upload dataset improve

This commit is contained in:
JasterV 2021-07-24 19:01:58 +02:00
commit c22e3a1dd8
5 changed files with 238 additions and 45 deletions

1
data/index.txt Normal file
View file

@ -0,0 +1 @@
Put your data file on this folder and change the path on the upload dataset file

208
package-lock.json generated
View file

@ -11,6 +11,7 @@
"dependencies": {
"axios": "^0.21.1",
"dotenv": "^10.0.0",
"ejs": "^3.1.6",
"express": "^4.17.1",
"fastest-validator": "^1.11.1",
"firebase-admin": "^9.11.0"
@ -2052,6 +2053,11 @@
"node": ">=8"
}
},
"node_modules/async": {
"version": "0.9.2",
"resolved": "https://registry.npmjs.org/async/-/async-0.9.2.tgz",
"integrity": "sha1-rqdNXmHB+JlhO/ZL2mbUx48v0X0="
},
"node_modules/async-retry": {
"version": "1.3.1",
"resolved": "https://registry.npmjs.org/async-retry/-/async-retry-1.3.1.tgz",
@ -2170,8 +2176,7 @@
"node_modules/balanced-match": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
"integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
"dev": true
"integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw=="
},
"node_modules/base64-js": {
"version": "1.5.1",
@ -2248,7 +2253,6 @@
"version": "1.1.11",
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
"integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
"dev": true,
"dependencies": {
"balanced-match": "^1.0.0",
"concat-map": "0.0.1"
@ -2512,8 +2516,7 @@
"node_modules/concat-map": {
"version": "0.0.1",
"resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
"integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=",
"dev": true
"integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s="
},
"node_modules/configstore": {
"version": "5.0.1",
@ -2866,6 +2869,20 @@
"resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz",
"integrity": "sha1-WQxhFWsK4vTwJVcyoViyZrxWsh0="
},
"node_modules/ejs": {
"version": "3.1.6",
"resolved": "https://registry.npmjs.org/ejs/-/ejs-3.1.6.tgz",
"integrity": "sha512-9lt9Zse4hPucPkoP7FHDF0LQAlGyF9JVpnClFLFH3aSSbxmyoqINRpp/9wePWJTUl4KOQwRL72Iw3InHPDkoGw==",
"dependencies": {
"jake": "^10.6.1"
},
"bin": {
"ejs": "bin/cli.js"
},
"engines": {
"node": ">=0.10.0"
}
},
"node_modules/electron-to-chromium": {
"version": "1.3.779",
"resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.3.779.tgz",
@ -3650,6 +3667,14 @@
"node": "^10.12.0 || >=12.0.0"
}
},
"node_modules/filelist": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/filelist/-/filelist-1.0.2.tgz",
"integrity": "sha512-z7O0IS8Plc39rTCq6i6iHxk43duYOn8uFJiWSewIq0Bww1RNybVHSCjahmcC87ZqAm4OTvFzlzeGu3XAzG1ctQ==",
"dependencies": {
"minimatch": "^3.0.4"
}
},
"node_modules/fill-range": {
"version": "7.0.1",
"resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz",
@ -4721,6 +4746,87 @@
"node": ">=8"
}
},
"node_modules/jake": {
"version": "10.8.2",
"resolved": "https://registry.npmjs.org/jake/-/jake-10.8.2.tgz",
"integrity": "sha512-eLpKyrfG3mzvGE2Du8VoPbeSkRry093+tyNjdYaBbJS9v17knImYGNXQCUV0gLxQtF82m3E8iRb/wdSQZLoq7A==",
"dependencies": {
"async": "0.9.x",
"chalk": "^2.4.2",
"filelist": "^1.0.1",
"minimatch": "^3.0.4"
},
"bin": {
"jake": "bin/cli.js"
},
"engines": {
"node": "*"
}
},
"node_modules/jake/node_modules/ansi-styles": {
"version": "3.2.1",
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz",
"integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==",
"dependencies": {
"color-convert": "^1.9.0"
},
"engines": {
"node": ">=4"
}
},
"node_modules/jake/node_modules/chalk": {
"version": "2.4.2",
"resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz",
"integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==",
"dependencies": {
"ansi-styles": "^3.2.1",
"escape-string-regexp": "^1.0.5",
"supports-color": "^5.3.0"
},
"engines": {
"node": ">=4"
}
},
"node_modules/jake/node_modules/color-convert": {
"version": "1.9.3",
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz",
"integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==",
"dependencies": {
"color-name": "1.1.3"
}
},
"node_modules/jake/node_modules/color-name": {
"version": "1.1.3",
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz",
"integrity": "sha1-p9BVi9icQveV3UIyj3QIMcpTvCU="
},
"node_modules/jake/node_modules/escape-string-regexp": {
"version": "1.0.5",
"resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz",
"integrity": "sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ=",
"engines": {
"node": ">=0.8.0"
}
},
"node_modules/jake/node_modules/has-flag": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz",
"integrity": "sha1-tdRU3CGZriJWmfNGfloH87lVuv0=",
"engines": {
"node": ">=4"
}
},
"node_modules/jake/node_modules/supports-color": {
"version": "5.5.0",
"resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz",
"integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==",
"dependencies": {
"has-flag": "^3.0.0"
},
"engines": {
"node": ">=4"
}
},
"node_modules/jest": {
"version": "27.0.6",
"resolved": "https://registry.npmjs.org/jest/-/jest-27.0.6.tgz",
@ -6188,7 +6294,6 @@
"version": "3.0.4",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz",
"integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==",
"dev": true,
"dependencies": {
"brace-expansion": "^1.1.7"
},
@ -9883,6 +9988,11 @@
"integrity": "sha512-Z7tMw1ytTXt5jqMcOP+OQteU1VuNK9Y02uuJtKQ1Sv69jXQKKg5cibLwGJow8yzZP+eAc18EmLGPal0bp36rvQ==",
"dev": true
},
"async": {
"version": "0.9.2",
"resolved": "https://registry.npmjs.org/async/-/async-0.9.2.tgz",
"integrity": "sha1-rqdNXmHB+JlhO/ZL2mbUx48v0X0="
},
"async-retry": {
"version": "1.3.1",
"resolved": "https://registry.npmjs.org/async-retry/-/async-retry-1.3.1.tgz",
@ -9980,8 +10090,7 @@
"balanced-match": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz",
"integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==",
"dev": true
"integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw=="
},
"base64-js": {
"version": "1.5.1",
@ -10037,7 +10146,6 @@
"version": "1.1.11",
"resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz",
"integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==",
"dev": true,
"requires": {
"balanced-match": "^1.0.0",
"concat-map": "0.0.1"
@ -10242,8 +10350,7 @@
"concat-map": {
"version": "0.0.1",
"resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz",
"integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s=",
"dev": true
"integrity": "sha1-2Klr13/Wjfd5OnMDajug1UBdR3s="
},
"configstore": {
"version": "5.0.1",
@ -10526,6 +10633,14 @@
"resolved": "https://registry.npmjs.org/ee-first/-/ee-first-1.1.1.tgz",
"integrity": "sha1-WQxhFWsK4vTwJVcyoViyZrxWsh0="
},
"ejs": {
"version": "3.1.6",
"resolved": "https://registry.npmjs.org/ejs/-/ejs-3.1.6.tgz",
"integrity": "sha512-9lt9Zse4hPucPkoP7FHDF0LQAlGyF9JVpnClFLFH3aSSbxmyoqINRpp/9wePWJTUl4KOQwRL72Iw3InHPDkoGw==",
"requires": {
"jake": "^10.6.1"
}
},
"electron-to-chromium": {
"version": "1.3.779",
"resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.3.779.tgz",
@ -11156,6 +11271,14 @@
"flat-cache": "^3.0.4"
}
},
"filelist": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/filelist/-/filelist-1.0.2.tgz",
"integrity": "sha512-z7O0IS8Plc39rTCq6i6iHxk43duYOn8uFJiWSewIq0Bww1RNybVHSCjahmcC87ZqAm4OTvFzlzeGu3XAzG1ctQ==",
"requires": {
"minimatch": "^3.0.4"
}
},
"fill-range": {
"version": "7.0.1",
"resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz",
@ -11932,6 +12055,68 @@
"istanbul-lib-report": "^3.0.0"
}
},
"jake": {
"version": "10.8.2",
"resolved": "https://registry.npmjs.org/jake/-/jake-10.8.2.tgz",
"integrity": "sha512-eLpKyrfG3mzvGE2Du8VoPbeSkRry093+tyNjdYaBbJS9v17knImYGNXQCUV0gLxQtF82m3E8iRb/wdSQZLoq7A==",
"requires": {
"async": "0.9.x",
"chalk": "^2.4.2",
"filelist": "^1.0.1",
"minimatch": "^3.0.4"
},
"dependencies": {
"ansi-styles": {
"version": "3.2.1",
"resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-3.2.1.tgz",
"integrity": "sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==",
"requires": {
"color-convert": "^1.9.0"
}
},
"chalk": {
"version": "2.4.2",
"resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz",
"integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==",
"requires": {
"ansi-styles": "^3.2.1",
"escape-string-regexp": "^1.0.5",
"supports-color": "^5.3.0"
}
},
"color-convert": {
"version": "1.9.3",
"resolved": "https://registry.npmjs.org/color-convert/-/color-convert-1.9.3.tgz",
"integrity": "sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==",
"requires": {
"color-name": "1.1.3"
}
},
"color-name": {
"version": "1.1.3",
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz",
"integrity": "sha1-p9BVi9icQveV3UIyj3QIMcpTvCU="
},
"escape-string-regexp": {
"version": "1.0.5",
"resolved": "https://registry.npmjs.org/escape-string-regexp/-/escape-string-regexp-1.0.5.tgz",
"integrity": "sha1-G2HAViGQqN/2rjuyzwIAyhMLhtQ="
},
"has-flag": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/has-flag/-/has-flag-3.0.0.tgz",
"integrity": "sha1-tdRU3CGZriJWmfNGfloH87lVuv0="
},
"supports-color": {
"version": "5.5.0",
"resolved": "https://registry.npmjs.org/supports-color/-/supports-color-5.5.0.tgz",
"integrity": "sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==",
"requires": {
"has-flag": "^3.0.0"
}
}
}
},
"jest": {
"version": "27.0.6",
"resolved": "https://registry.npmjs.org/jest/-/jest-27.0.6.tgz",
@ -13089,7 +13274,6 @@
"version": "3.0.4",
"resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.0.4.tgz",
"integrity": "sha512-yJHVQEhyqPLUTgt9B83PXu6W3rx4MvvHvSUvToogpwoGDOUQ+yDrR0HRot+yOCdCO7u4hX3pWft6kWBBcqh0UA==",
"dev": true,
"requires": {
"brace-expansion": "^1.1.7"
}

View file

@ -39,6 +39,7 @@
"dependencies": {
"axios": "^0.21.1",
"dotenv": "^10.0.0",
"ejs": "^3.1.6",
"express": "^4.17.1",
"fastest-validator": "^1.11.1",
"firebase-admin": "^9.11.0"

View file

View file

@ -3,9 +3,16 @@ const serviceAccount = require("../.firebase.json");
const fs = require('fs');
const readline = require('readline');
const DATA_FILE_PATH = 'scripts/sentences.jsonl.txt';
const args = process.argv.slice(2);
(async () => {
if (args.length != 1) {
console.log('Dataset path required: ')
console.log('nodejs upload_dataset.js <dataset-path>')
process.exit(-1)
}
const dataFilePath = args[0]
(async () => {
admin.initializeApp({
credential: admin.credential.cert(serviceAccount)
});
@ -15,7 +22,7 @@ const DATA_FILE_PATH = 'scripts/sentences.jsonl.txt';
console.log('Firestore credentials ok')
const rl = readline.createInterface({
input: fs.createReadStream(DATA_FILE_PATH),
input: fs.createReadStream(dataFilePath),
output: process.stdout,
terminal: false
});
@ -26,7 +33,7 @@ const DATA_FILE_PATH = 'scripts/sentences.jsonl.txt';
rl.on('line', async (line) => {
const sentence = JSON.parse(line)
const entry = Object.entries(sentence.cats).find((elem) => elem['1'] == 1)
if(!entry) {
if (!entry) {
console.log('Sentence: ', sentence, ' does not have a category')
return
}
@ -35,4 +42,4 @@ const DATA_FILE_PATH = 'scripts/sentences.jsonl.txt';
category: entry[0]
})
});
})();
})();