mirror of
https://codeberg.org/JasterV/sentences-crud.git
synced 2026-04-27 02:15:43 +00:00
upload dataset improve
This commit is contained in:
parent
9e9d1fe844
commit
c22e3a1dd8
5 changed files with 238 additions and 45 deletions
1
data/index.txt
Normal file
1
data/index.txt
Normal file
|
|
@ -0,0 +1 @@
|
|||
Put your data file on this folder and change the path on the upload dataset file
|
||||
208
package-lock.json
generated
208
package-lock.json
generated
|
|
@ -11,6 +11,7 @@
|
|||
"dependencies": {
|
||||
"axios": "^0.21.1",
|
||||
"dotenv": "^10.0.0",
|
||||
"ejs": "^3.1.6",
|
||||
"express": "^4.17.1",
|
||||
"fastest-validator": "^1.11.1",
|
||||
"firebase-admin": "^9.11.0"
|
||||
|
|
@ -2052,6 +2053,11 @@
|
|||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/async": {
|
||||
"version": "0.9.2",
|
||||
"resolved": "https://registry.npmjs.org/async/-/async-0.9.2.tgz",
|
||||
"integrity": "sha1-rqdNXmHB+JlhO/ZL2mbUx48v0X0="
|
||||
},
|
||||
"node_modules/async-retry": {
|
||||
"version": "1.3.1",
|
||||
"resolved": "https://registry.npmjs.org/async-retry/-/async-retry-1.3.1.tgz",
|
||||
|
|
@ -2170,8 +2176,7 @@
|
|||
"node_modules/balanced-match": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
|
||||
"integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
|
||||
"dev": true
|
||||
"integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw=="
|
||||
},
|
||||
"node_modules/base64-js": {
|
||||
"version": "1.5.1",
|
||||
|
|
@ -2248,7 +2253,6 @@
|
|||
"version": "1.1.11",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
|
||||
"integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"balanced-match": "^1.0.0",
|
||||
"concat-map": "0.0.1"
|
||||
|
|
@ -2512,8 +2516,7 @@
|
|||
"node_modules/concat-map": {
|
||||
"version": "0.0.1",
|
||||
"resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
|
||||
"integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=",
|
||||
"dev": true
|
||||
"integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s="
|
||||
},
|
||||
"node_modules/configstore": {
|
||||
"version": "5.0.1",
|
||||
|
|
@ -2866,6 +2869,20 @@
|
|||
"resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz",
|
||||
"integrity": "sha1-WQxhFWsK4vTwJVcyoViyZrxWsh0="
|
||||
},
|
||||
"node_modules/ejs": {
|
||||
"version": "3.1.6",
|
||||
"resolved": "https://registry.npmjs.org/ejs/-/ejs-3.1.6.tgz",
|
||||
"integrity": "sha512-9lt9Zse4hPucPkoP7FHDF0LQAlGyF9JVpnClFLFH3aSSbxmyoqINRpp/9wePWJTUl4KOQwRL72Iw3InHPDkoGw==",
|
||||
"dependencies": {
|
||||
"jake": "^10.6.1"
|
||||
},
|
||||
"bin": {
|
||||
"ejs": "bin/cli.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=0.10.0"
|
||||
}
|
||||
},
|
||||
"node_modules/electron-to-chromium": {
|
||||
"version": "1.3.779",
|
||||
"resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.3.779.tgz",
|
||||
|
|
@ -3650,6 +3667,14 @@
|
|||
"node": "^10.12.0 || >=12.0.0"
|
||||
}
|
||||
},
|
||||
"node_modules/filelist": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/filelist/-/filelist-1.0.2.tgz",
|
||||
"integrity": "sha512-z7O0IS8Plc39rTCq6i6iHxk43duYOn8uFJiWSewIq0Bww1RNybVHSCjahmcC87ZqAm4OTvFzlzeGu3XAzG1ctQ==",
|
||||
"dependencies": {
|
||||
"minimatch": "^3.0.4"
|
||||
}
|
||||
},
|
||||
"node_modules/fill-range": {
|
||||
"version": "7.0.1",
|
||||
"resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz",
|
||||
|
|
@ -4721,6 +4746,87 @@
|
|||
"node": ">=8"
|
||||
}
|
||||
},
|
||||
"node_modules/jake": {
|
||||
"version": "10.8.2",
|
||||
"resolved": "https://registry.npmjs.org/jake/-/jake-10.8.2.tgz",
|
||||
"integrity": "sha512-eLpKyrfG3mzvGE2Du8VoPbeSkRry093+tyNjdYaBbJS9v17knImYGNXQCUV0gLxQtF82m3E8iRb/wdSQZLoq7A==",
|
||||
"dependencies": {
|
||||
"async": "0.9.x",
|
||||
"chalk": "^2.4.2",
|
||||
"filelist": "^1.0.1",
|
||||
"minimatch": "^3.0.4"
|
||||
},
|
||||
"bin": {
|
||||
"jake": "bin/cli.js"
|
||||
},
|
||||
"engines": {
|
||||
"node": "*"
|
||||
}
|
||||
},
|
||||
"node_modules/jake/node_modules/ansi-styles": {
|
||||
"version": "3.2.1",
|
||||
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz",
|
||||
"integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==",
|
||||
"dependencies": {
|
||||
"color-convert": "^1.9.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=4"
|
||||
}
|
||||
},
|
||||
"node_modules/jake/node_modules/chalk": {
|
||||
"version": "2.4.2",
|
||||
"resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz",
|
||||
"integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==",
|
||||
"dependencies": {
|
||||
"ansi-styles": "^3.2.1",
|
||||
"escape-string-regexp": "^1.0.5",
|
||||
"supports-color": "^5.3.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=4"
|
||||
}
|
||||
},
|
||||
"node_modules/jake/node_modules/color-convert": {
|
||||
"version": "1.9.3",
|
||||
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz",
|
||||
"integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==",
|
||||
"dependencies": {
|
||||
"color-name": "1.1.3"
|
||||
}
|
||||
},
|
||||
"node_modules/jake/node_modules/color-name": {
|
||||
"version": "1.1.3",
|
||||
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz",
|
||||
"integrity": "sha1-p9BVi9icQveV3UIyj3QIMcpTvCU="
|
||||
},
|
||||
"node_modules/jake/node_modules/escape-string-regexp": {
|
||||
"version": "1.0.5",
|
||||
"resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz",
|
||||
"integrity": "sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ=",
|
||||
"engines": {
|
||||
"node": ">=0.8.0"
|
||||
}
|
||||
},
|
||||
"node_modules/jake/node_modules/has-flag": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz",
|
||||
"integrity": "sha1-tdRU3CGZriJWmfNGfloH87lVuv0=",
|
||||
"engines": {
|
||||
"node": ">=4"
|
||||
}
|
||||
},
|
||||
"node_modules/jake/node_modules/supports-color": {
|
||||
"version": "5.5.0",
|
||||
"resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz",
|
||||
"integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==",
|
||||
"dependencies": {
|
||||
"has-flag": "^3.0.0"
|
||||
},
|
||||
"engines": {
|
||||
"node": ">=4"
|
||||
}
|
||||
},
|
||||
"node_modules/jest": {
|
||||
"version": "27.0.6",
|
||||
"resolved": "https://registry.npmjs.org/jest/-/jest-27.0.6.tgz",
|
||||
|
|
@ -6188,7 +6294,6 @@
|
|||
"version": "3.0.4",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz",
|
||||
"integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==",
|
||||
"dev": true,
|
||||
"dependencies": {
|
||||
"brace-expansion": "^1.1.7"
|
||||
},
|
||||
|
|
@ -9883,6 +9988,11 @@
|
|||
"integrity": "sha512-Z7tMw1ytTXt5jqMcOP+OQteU1VuNK9Y02uuJtKQ1Sv69jXQKKg5cibLwGJow8yzZP+eAc18EmLGPal0bp36rvQ==",
|
||||
"dev": true
|
||||
},
|
||||
"async": {
|
||||
"version": "0.9.2",
|
||||
"resolved": "https://registry.npmjs.org/async/-/async-0.9.2.tgz",
|
||||
"integrity": "sha1-rqdNXmHB+JlhO/ZL2mbUx48v0X0="
|
||||
},
|
||||
"async-retry": {
|
||||
"version": "1.3.1",
|
||||
"resolved": "https://registry.npmjs.org/async-retry/-/async-retry-1.3.1.tgz",
|
||||
|
|
@ -9980,8 +10090,7 @@
|
|||
"balanced-match": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
|
||||
"integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
|
||||
"dev": true
|
||||
"integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw=="
|
||||
},
|
||||
"base64-js": {
|
||||
"version": "1.5.1",
|
||||
|
|
@ -10037,7 +10146,6 @@
|
|||
"version": "1.1.11",
|
||||
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
|
||||
"integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"balanced-match": "^1.0.0",
|
||||
"concat-map": "0.0.1"
|
||||
|
|
@ -10242,8 +10350,7 @@
|
|||
"concat-map": {
|
||||
"version": "0.0.1",
|
||||
"resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
|
||||
"integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=",
|
||||
"dev": true
|
||||
"integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s="
|
||||
},
|
||||
"configstore": {
|
||||
"version": "5.0.1",
|
||||
|
|
@ -10526,6 +10633,14 @@
|
|||
"resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz",
|
||||
"integrity": "sha1-WQxhFWsK4vTwJVcyoViyZrxWsh0="
|
||||
},
|
||||
"ejs": {
|
||||
"version": "3.1.6",
|
||||
"resolved": "https://registry.npmjs.org/ejs/-/ejs-3.1.6.tgz",
|
||||
"integrity": "sha512-9lt9Zse4hPucPkoP7FHDF0LQAlGyF9JVpnClFLFH3aSSbxmyoqINRpp/9wePWJTUl4KOQwRL72Iw3InHPDkoGw==",
|
||||
"requires": {
|
||||
"jake": "^10.6.1"
|
||||
}
|
||||
},
|
||||
"electron-to-chromium": {
|
||||
"version": "1.3.779",
|
||||
"resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.3.779.tgz",
|
||||
|
|
@ -11156,6 +11271,14 @@
|
|||
"flat-cache": "^3.0.4"
|
||||
}
|
||||
},
|
||||
"filelist": {
|
||||
"version": "1.0.2",
|
||||
"resolved": "https://registry.npmjs.org/filelist/-/filelist-1.0.2.tgz",
|
||||
"integrity": "sha512-z7O0IS8Plc39rTCq6i6iHxk43duYOn8uFJiWSewIq0Bww1RNybVHSCjahmcC87ZqAm4OTvFzlzeGu3XAzG1ctQ==",
|
||||
"requires": {
|
||||
"minimatch": "^3.0.4"
|
||||
}
|
||||
},
|
||||
"fill-range": {
|
||||
"version": "7.0.1",
|
||||
"resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz",
|
||||
|
|
@ -11932,6 +12055,68 @@
|
|||
"istanbul-lib-report": "^3.0.0"
|
||||
}
|
||||
},
|
||||
"jake": {
|
||||
"version": "10.8.2",
|
||||
"resolved": "https://registry.npmjs.org/jake/-/jake-10.8.2.tgz",
|
||||
"integrity": "sha512-eLpKyrfG3mzvGE2Du8VoPbeSkRry093+tyNjdYaBbJS9v17knImYGNXQCUV0gLxQtF82m3E8iRb/wdSQZLoq7A==",
|
||||
"requires": {
|
||||
"async": "0.9.x",
|
||||
"chalk": "^2.4.2",
|
||||
"filelist": "^1.0.1",
|
||||
"minimatch": "^3.0.4"
|
||||
},
|
||||
"dependencies": {
|
||||
"ansi-styles": {
|
||||
"version": "3.2.1",
|
||||
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz",
|
||||
"integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==",
|
||||
"requires": {
|
||||
"color-convert": "^1.9.0"
|
||||
}
|
||||
},
|
||||
"chalk": {
|
||||
"version": "2.4.2",
|
||||
"resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz",
|
||||
"integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==",
|
||||
"requires": {
|
||||
"ansi-styles": "^3.2.1",
|
||||
"escape-string-regexp": "^1.0.5",
|
||||
"supports-color": "^5.3.0"
|
||||
}
|
||||
},
|
||||
"color-convert": {
|
||||
"version": "1.9.3",
|
||||
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz",
|
||||
"integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==",
|
||||
"requires": {
|
||||
"color-name": "1.1.3"
|
||||
}
|
||||
},
|
||||
"color-name": {
|
||||
"version": "1.1.3",
|
||||
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz",
|
||||
"integrity": "sha1-p9BVi9icQveV3UIyj3QIMcpTvCU="
|
||||
},
|
||||
"escape-string-regexp": {
|
||||
"version": "1.0.5",
|
||||
"resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz",
|
||||
"integrity": "sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ="
|
||||
},
|
||||
"has-flag": {
|
||||
"version": "3.0.0",
|
||||
"resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz",
|
||||
"integrity": "sha1-tdRU3CGZriJWmfNGfloH87lVuv0="
|
||||
},
|
||||
"supports-color": {
|
||||
"version": "5.5.0",
|
||||
"resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz",
|
||||
"integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==",
|
||||
"requires": {
|
||||
"has-flag": "^3.0.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"jest": {
|
||||
"version": "27.0.6",
|
||||
"resolved": "https://registry.npmjs.org/jest/-/jest-27.0.6.tgz",
|
||||
|
|
@ -13089,7 +13274,6 @@
|
|||
"version": "3.0.4",
|
||||
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz",
|
||||
"integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==",
|
||||
"dev": true,
|
||||
"requires": {
|
||||
"brace-expansion": "^1.1.7"
|
||||
}
|
||||
|
|
|
|||
|
|
@ -39,6 +39,7 @@
|
|||
"dependencies": {
|
||||
"axios": "^0.21.1",
|
||||
"dotenv": "^10.0.0",
|
||||
"ejs": "^3.1.6",
|
||||
"express": "^4.17.1",
|
||||
"fastest-validator": "^1.11.1",
|
||||
"firebase-admin": "^9.11.0"
|
||||
|
|
|
|||
0
scripts/aggregate_words.js
Normal file
0
scripts/aggregate_words.js
Normal file
|
|
@ -3,36 +3,43 @@ const serviceAccount = require("../.firebase.json");
|
|||
const fs = require('fs');
|
||||
const readline = require('readline');
|
||||
|
||||
const DATA_FILE_PATH = 'scripts/sentences.jsonl.txt';
|
||||
const args = process.argv.slice(2);
|
||||
|
||||
(async () => {
|
||||
admin.initializeApp({
|
||||
credential: admin.credential.cert(serviceAccount)
|
||||
});
|
||||
if (args.length != 1) {
|
||||
console.log('Dataset path required: ')
|
||||
console.log('nodejs upload_dataset.js <dataset-path>')
|
||||
process.exit(-1)
|
||||
}
|
||||
|
||||
const db = admin.firestore()
|
||||
const dataFilePath = args[0]
|
||||
(async () => {
|
||||
admin.initializeApp({
|
||||
credential: admin.credential.cert(serviceAccount)
|
||||
});
|
||||
|
||||
console.log('Firestore credentials ok')
|
||||
const db = admin.firestore()
|
||||
|
||||
const rl = readline.createInterface({
|
||||
input: fs.createReadStream(DATA_FILE_PATH),
|
||||
output: process.stdout,
|
||||
terminal: false
|
||||
});
|
||||
console.log('Firestore credentials ok')
|
||||
|
||||
console.log('Reading sentences.jsonl.txt')
|
||||
console.log('Importing data to firebase...')
|
||||
const rl = readline.createInterface({
|
||||
input: fs.createReadStream(dataFilePath),
|
||||
output: process.stdout,
|
||||
terminal: false
|
||||
});
|
||||
|
||||
rl.on('line', async (line) => {
|
||||
const sentence = JSON.parse(line)
|
||||
const entry = Object.entries(sentence.cats).find((elem) => elem['1'] == 1)
|
||||
if(!entry) {
|
||||
console.log('Sentence: ', sentence, ' does not have a category')
|
||||
return
|
||||
}
|
||||
await db.collection('sentences').doc().set({
|
||||
text: sentence.text,
|
||||
category: entry[0]
|
||||
})
|
||||
});
|
||||
})();
|
||||
console.log('Reading sentences.jsonl.txt')
|
||||
console.log('Importing data to firebase...')
|
||||
|
||||
rl.on('line', async (line) => {
|
||||
const sentence = JSON.parse(line)
|
||||
const entry = Object.entries(sentence.cats).find((elem) => elem['1'] == 1)
|
||||
if (!entry) {
|
||||
console.log('Sentence: ', sentence, ' does not have a category')
|
||||
return
|
||||
}
|
||||
await db.collection('sentences').doc().set({
|
||||
text: sentence.text,
|
||||
category: entry[0]
|
||||
})
|
||||
});
|
||||
})();
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue