Commit d2cabbeb authored by Quentin's avatar Quentin
Browse files

Change outputs to processOutputs, new endpoint coprus_proces_start, and adding...

Change outputs to processOutputs, new endpoint coprus_proces_start, and adding a new one: corpus_process_continue that each module calls when it finished (so there is no issues if the module takes too much time)
parent a48d8cd0
...@@ -4,10 +4,15 @@ ...@@ -4,10 +4,15 @@
`npm install` `npm install`
## Set up MongoDB
Make sure to have a mongo instance running, with a database named _chenetal_
## Start server ## Start server
`npm start` `npm start`
If you are developping this repository, you can use the following command so that it automatically reloads when you modify a file. If you are developping this repository, you can use the following command so that it automatically reloads when you modify a file.
`npm run devstart` `npm run devstart`
\ No newline at end of file
...@@ -65,6 +65,8 @@ exports.corpus_file_upload = function (req, res) { ...@@ -65,6 +65,8 @@ exports.corpus_file_upload = function (req, res) {
fs.mkdirSync(uploadDirectory); fs.mkdirSync(uploadDirectory);
} }
let size = 0;
try { try {
if (files.length) { if (files.length) {
files.forEach(file => { files.forEach(file => {
...@@ -74,6 +76,7 @@ exports.corpus_file_upload = function (req, res) { ...@@ -74,6 +76,7 @@ exports.corpus_file_upload = function (req, res) {
} }
file.mv(filePath); file.mv(filePath);
resData.push({ name: file.name, mimetype: file.mimetype, size: file.size, path: filePath }); resData.push({ name: file.name, mimetype: file.mimetype, size: file.size, path: filePath });
size += file.size;
}) })
} }
else { else {
...@@ -84,8 +87,9 @@ exports.corpus_file_upload = function (req, res) { ...@@ -84,8 +87,9 @@ exports.corpus_file_upload = function (req, res) {
// TRY TO WRITE THE BUFFER HERE ? // TRY TO WRITE THE BUFFER HERE ?
files.mv(filePath); files.mv(filePath);
resData.push({ name: files.name, mimetype: files.mimetype, size: files.size, path: filePath }); resData.push({ name: files.name, mimetype: files.mimetype, size: files.size, path: filePath });
size += files.size;
} }
successHandler(res, { uploadedFiles: resData }); successHandler(res, { uploadedFiles: resData, size : size});
} }
catch (error) { catch (error) {
errorHandler(res, error); errorHandler(res, error);
...@@ -320,4 +324,4 @@ exports.corpus_document_report_get = function (req, res) { ...@@ -320,4 +324,4 @@ exports.corpus_document_report_get = function (req, res) {
} }
} }
}) })
} }
\ No newline at end of file
...@@ -129,18 +129,24 @@ exports.corpus_process_add_process = async function (req, res) { ...@@ -129,18 +129,24 @@ exports.corpus_process_add_process = async function (req, res) {
errorHandler(res, `No corpus processes were found with id: ${corpusProcessId}`, 404); errorHandler(res, `No corpus processes were found with id: ${corpusProcessId}`, 404);
} }
else { else {
// Add conlluCol
if (corpusProcess.conlluCols && corpusProcess.conlluCols.length > 0) { if (corpusProcess.conlluCols && corpusProcess.conlluCols.length > 0) {
corpusProcess.conlluCols.push(req.body.conlluCol); corpusProcess.conlluCols.push(req.body.conlluCol);
} }
else { else {
corpusProcess.conlluCols = [req.body.conlluCol]; corpusProcess.conlluCols = [req.body.conlluCol];
} }
if (corpusProcess.outputs && corpusProcess.outputs.length > 0) {
corpusProcess.outputs.push(req.body.output); // Add processOutput if there is one
} if (req.body.processOutput) {
else { if (corpusProcess.processOutputs && corpusProcess.processOutputs.length > 0) {
corpusProcess.outputs = [req.body.output]; corpusProcess.processOutputs.push(req.body.processOutput);
}
else {
corpusProcess.processOutputs = [req.body.processOutput];
}
} }
corpusProcess = await corpusProcess.save(); corpusProcess = await corpusProcess.save();
successHandler(res, { corpusProcess }); successHandler(res, { corpusProcess });
} }
...@@ -150,7 +156,7 @@ exports.corpus_process_add_process = async function (req, res) { ...@@ -150,7 +156,7 @@ exports.corpus_process_add_process = async function (req, res) {
} }
} }
exports.corpus_process_start = async function (req, res) { exports.corpus_process_start_old = async function (req, res) {
const corpusProcessId = req.params.corpusProcessId; const corpusProcessId = req.params.corpusProcessId;
let corpusProcess; let corpusProcess;
let started = false; let started = false;
...@@ -196,3 +202,80 @@ exports.corpus_process_start = async function (req, res) { ...@@ -196,3 +202,80 @@ exports.corpus_process_start = async function (req, res) {
} }
} }
} }
exports.corpus_process_start = async function (req, res) {
const corpusProcessId = req.params.corpusProcessId;
let corpusProcess;
let started = false;
let pipeline;
try {
corpusProcess = await CorpusProcessModel.findById(corpusProcessId);
corpusProcess.status = 'Started';
// Easier for debugging (you can just call that endpoint to restart the pipeline)
corpusProcess.currentProcessId = undefined;
await corpusProcess.save();
pipeline = await PipelineModel.findById(corpusProcess.pipelineId);
started = true;
successHandler(res, { message: "Pipeline started" });
// First of all, let's tree tag our corpus
moduleHelper.startTreeTagger(corpusProcessId);
// When it is finished, it will call corpus_process_continue
}
catch (error) {
errorHandler(res, error);
}
}
exports.corpus_process_continue = async function (req, res) {
const corpusProcessId = req.params.corpusProcessId;
try {
let corpusProcess = await CorpusProcessModel.findById(corpusProcessId);
const pipeline = await PipelineModel.findById(corpusProcess.pipelineId);
// If it's not started, start with zero, else, increment
if (corpusProcess.currentProcessId == undefined) {
corpusProcess.currentProcessId = 0;
}
else {
corpusProcess.currentProcessId += 1;
}
// End of the pipeline!
if (pipeline.processes.length <= corpusProcess.currentProcessId) {
corpusProcess.currentProcessingModule = 'Exporter';
await corpusProcess.save();
successHandler(res, 'Let\'s export the conllu file');
console.log(`Starting to export CONLLU for corpus process id: ${corpusProcessId}`);
// Export the pipeline to a conllu file
moduleHelper.startModuleProcess(corpusProcessId, { moduleName: 'Exporter'});
// Should we do another endpoint 'finish'?
corpusProcess.currentProcessingModule = null;
corpusProcess.status = 'Finished';
await corpusProcess.save();
}
else {
let process = pipeline.processes[corpusProcess.currentProcessId]
corpusProcess.currentProcessingModule = process.moduleName;
await corpusProcess.save();
console.log(`Starting module ${process.moduleName} for corpus process id: ${corpusProcessId}`);
moduleHelper.startModuleProcess(corpusProcessId, process);
successHandler(res, `Module ${process.moduleName} has started`);
}
}
catch (error) {
console.error(error);
}
}
...@@ -18,7 +18,8 @@ function nameToRoute(moduleName) { ...@@ -18,7 +18,8 @@ function nameToRoute(moduleName) {
"Exporter": "http://localhost:3001", "Exporter": "http://localhost:3001",
"Modulix": "http://localhost:3002", "Modulix": "http://localhost:3002",
"SDMC": "http://localhost:3003", "SDMC": "http://localhost:3003",
"Néoveille": "http://localhost:3004" "Néoveille": "http://localhost:3004",
"Morfetik - Formes Simples": "http://localhost:3005"
} }
return routeDict[moduleName]; return routeDict[moduleName];
} }
......
{
"name": "Morfetik - Formes Simples",
"description": "Morfetik Description",
"parameters":
[
{
"name": "acceptedCategories",
"label": "Appartenance d'une ou plusieurs catégories",
"description": "Ne traite que les catégories sélectionnées",
"type": "List-Poly",
"items": ["Nom", "Verbe", "Adjectif", "Adverbe", "Conjonction", "Déterminant", "Pronom", "Préposition", "Interjection"],
"default": ["Nom", "Verbe", "Adjectif", "Adverbe", "Conjonction", "Déterminant", "Pronom", "Préposition","Interjection"]
},
{
"name": "selectedColumns",
"label": "Informations souhaitées",
"description": "selectedComumns description",
"type": "List-Poly",
"items": ["Sous-Catégorie", "Temps", "Nombre", "Genre", "Personne", "Notes"],
"default": ["Sous-Catégorie", "Temps", "Nombre", "Genre", "Personne", "Notes"]
}
]
}
\ No newline at end of file
{ {
"name": "Néoveille", "name": "Néoveille",
"description": "Détection de Néologismes", "description": "Détection de néologismes formels",
"parameters": "parameters":
[ [
{ {
"name": "nodigit", "name": "nodigit",
"label": "Exclusion des chiffres", "label": "Exclusion des chiffres",
"description": "Exclu les candidats qui contiennent des chiffres", "description": "Exclure les candidats qui contiennent des chiffres",
"type": "Bool", "type": "Bool",
"items": null, "items": null,
"default": true "default": true
}, },
{ {
"name": "lowercaseonly", "name": "lowercaseonly",
"label": "Uniquement des mots en minuscule", "label": "Uniquement les mots en minuscule",
"description": "Exclu les candidats qui ont des caractères en minuscule, cela évite le bruit dû aux noms propres", "description": "Exclure les candidats qui ont des caractères en minuscule, cela évite le bruit dû aux noms propres",
"type": "Bool", "type": "Bool",
"items": null, "items": null,
"default": true "default": true
...@@ -25,20 +25,20 @@ ...@@ -25,20 +25,20 @@
"description": "Expression régulière qui sera utilisée pour filtrer la liste des candidats, si vide, sera équivalent à '.*'", "description": "Expression régulière qui sera utilisée pour filtrer la liste des candidats, si vide, sera équivalent à '.*'",
"type": "String", "type": "String",
"items": null, "items": null,
"default": "\\w{3,}(?:-\\w{3,}){0,3}" "default": "^\\w{3,}(?:-\\w{3,}){0,3}$"
}, },
{ {
"name": "exclusiondict", "name": "exclusiondict",
"label": "Dictionnaire d'exclusion", "label": "Dictionnaire d'exclusion",
"description": "Exclu tous les candidats présents de ce dictionnaire", "description": "Exclure tous les candidats présents de ce dictionnaire",
"type": "Bool", "type": "Bool",
"items": null, "items": null,
"default": true "default": true
}, },
{ {
"name": "spellchecker", "name": "spellchecker",
"label": "Heuristique orthographique", "label": "Correcteur orthographique",
"description": "Exclu les candidats selon une heuristique basée sur Hunspell, permettant notamment d'éviter le bruit dû aux coquilles orthographiques.", "description": "Exclure les candidats selon une heuristique basée sur Hunspell, permettant notamment d'éviter le bruit dû aux coquilles orthographiques.",
"type": "Bool", "type": "Bool",
"items": null, "items": null,
"default": true "default": true
......
{
"name": "Prototype",
"description": "Prototype permettant de créer son propre module",
"parameters":
[
{
"name": "parametre1",
"label": "Le nom du paramètre comme il sera inscrit dans le frontend",
"description": "Une description détaillée pour expliquer ce paramètre, qui apparaitra quand on clique sur le point d'interrogation dans le front end",
"type": "Le type de paramètre, parmi : [List-Mono, List-Poly, Int, Bool, String]",
"items": ["La liste des paramètres disponibles, uniquement pour List-Poly et List-Mono, null sinon"],
"default": "La valeur par défaut du paramètre"
}
]
}
\ No newline at end of file
...@@ -10,7 +10,7 @@ const Schema = mongoose.Schema; ...@@ -10,7 +10,7 @@ const Schema = mongoose.Schema;
* @swagger * @swagger
* components: * components:
* schemas: * schemas:
* Output: * ProcessOutput:
* type: object * type: object
* description: data returned by a module at the end of a process * description: data returned by a module at the end of a process
* required: * required:
...@@ -45,7 +45,7 @@ const Schema = mongoose.Schema; ...@@ -45,7 +45,7 @@ const Schema = mongoose.Schema;
// Output returned by a module // Output returned by a module
// Can be a text file or something else like a model. // Can be a text file or something else like a model.
/* Should there be documentId or corpusId? */ /* Should there be documentId or corpusId? */
const OutputSchema = new Schema({ const ProcessOutputSchema = new Schema({
processId: { type: String, required: false}, processId: { type: String, required: false},
moduleName: { type: String, required: true}, // Can be built from processId moduleName: { type: String, required: true}, // Can be built from processId
content: { content: {
...@@ -212,11 +212,11 @@ const AnnotatedDocumentSchema = new Schema({ ...@@ -212,11 +212,11 @@ const AnnotatedDocumentSchema = new Schema({
* description: list of annotations per document * description: list of annotations per document
* items: * items:
* $ref: "#/components/schemas/AnnotatedDocument" * $ref: "#/components/schemas/AnnotatedDocument"
* outputs: * processOutputs:
* type: array * type: array
* description: list of output produced by the pipeline * description: list of output produced by the pipeline
* items: * items:
* $ref: "#/components/schemas/Output" * $ref: "#/components/schemas/ProcessOutput"
* currentProcessingModule: * currentProcessingModule:
* type: String * type: String
* description: Name of the module that is being executed currently by the pipeline. Is null if the pipeline has not started, failed, or finished. * description: Name of the module that is being executed currently by the pipeline. Is null if the pipeline has not started, failed, or finished.
...@@ -244,10 +244,12 @@ const CorpusProcessSchema = new Schema({ ...@@ -244,10 +244,12 @@ const CorpusProcessSchema = new Schema({
// Maybe it's better // Maybe it's better
/*annotations: { type: [AnnotationSchema]},*/ /*annotations: { type: [AnnotationSchema]},*/
annotatedDocuments: { type: [AnnotatedDocumentSchema]}, annotatedDocuments: { type: [AnnotatedDocumentSchema]},
outputs: { type: [OutputSchema]}, // The output that a module can return (describing the process that was done)
//Don't know yet if following are needed, but it seems more relevant to find these processOutputs: { type: [ProcessOutputSchema]},
//properties here rather than in Pipeline // What module is currently processing the corpus?
currentProcessingModule: { type: String }, // What module is currently processing the corpus? currentProcessingModule: { type: String },
// Get the index of the current processing module
currentProcessId: { type: Number},
status: { status: {
type: String, type: String,
enum: ['Not started yet', 'Started', 'Processing annotations', 'Finished', 'Failed'] enum: ['Not started yet', 'Started', 'Processing annotations', 'Finished', 'Failed']
......
{ {
"name": "chenetal-server", "name": "chenetal-server",
"version": "1.0.0", "version": "1.0.0",
"lockfileVersion": 1, "lockfileVersion": 2,
"requires": true, "requires": true,
"packages": {
"": {
"name": "chenetal-server",
"version": "1.0.0",
"license": "ISC",
"dependencies": {
"axios": "^0.21.0",
"cors": "^2.8.5",
"express": "^4.17.1",
"express-fileupload": "^1.2.0",
"jsonwebtoken": "^8.5.1",
"mongoose": "^5.10.5",
"morgan": "^1.10.0",
"passport-jwt": "^4.0.0",
"passport-ldapauth": "^2.1.4",
"swagger-jsdoc": "^4.2.0",
"swagger-ui-express": "^4.1.4",
"yamljs": "^0.3.0"
},
"devDependencies": {
"eslint": "^7.9.0",
"eslint-config-airbnb-base": "^14.2.0",
"eslint-plugin-import": "^2.22.0",
"nodemon": "^2.0.4",
"passport": "^0.4.1"
}
},
"node_modules/@apidevtools/json-schema-ref-parser": {
"version": "9.0.6",
"resolved": "https://registry.npmjs.org/@apidevtools/json-schema-ref-parser/-/json-schema-ref-parser-9.0.6.tgz",
"integrity": "sha512-M3YgsLjI0lZxvrpeGVk9Ap032W6TPQkH6pRAZz81Ac3WUNF79VQooAFnp8umjvVzUmD93NkogxEwbSce7qMsUg==",
"dependencies": {
"@jsdevtools/ono": "^7.1.3",
"call-me-maybe": "^1.0.1",
"js-yaml": "^3.13.1"
}
},
"node_modules/@apidevtools/openapi-schemas": {
"version": "2.0.4",
"resolved": "https://registry.npmjs.org/@apidevtools/openapi-schemas/-/openapi-schemas-2.0.4.tgz",
"integrity": "sha512-ob5c4UiaMYkb24pNhvfSABShAwpREvUGCkqjiz/BX9gKZ32y/S22M+ALIHftTAuv9KsFVSpVdIDzi9ZzFh5TCA==",
"engines": {
"node": ">=10"
}
},
"node_modules/@apidevtools/swagger-methods": {
"version": "3.0.2",
"resolved": "https://registry.npmjs.org/@apidevtools/swagger-methods/-/swagger-methods-3.0.2.tgz",
"integrity": "sha512-QAkD5kK2b1WfjDS/UQn/qQkbwF31uqRjPTrsCs5ZG9BQGAkjwvqGFjjPqAuzac/IYzpPtRzjCP1WrTuAIjMrXg=="
},
"node_modules/@apidevtools/swagger-parser": {
"version": "10.0.2",
"resolved": "https://registry.npmjs.org/@apidevtools/swagger-parser/-/swagger-parser-10.0.2.tgz",
"integrity": "sha512-JFxcEyp8RlNHgBCE98nwuTkZT6eNFPc1aosWV6wPcQph72TSEEu1k3baJD4/x1qznU+JiDdz8F5pTwabZh+Dhg==",
"dependencies": {
"@apidevtools/json-schema-ref-parser": "^9.0.6",
"@apidevtools/openapi-schemas": "^2.0.4",
"@apidevtools/swagger-methods": "^3.0.2",
"@jsdevtools/ono": "^7.1.3",
"call-me-maybe": "^1.0.1",
"z-schema": "^4.2.3"
}
},
"node_modules/@babel/code-frame": {
"version": "7.10.4",
"resolved": "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.10.4.tgz",
"integrity": "sha512-vG6SvB6oYEhvgisZNFRmRCUkLz11c7rp+tbNTynGqc6mS1d5ATd/sGyV6W0KZZnXRKMTzZDRgQT3Ou9jhpAfUg==",
"dev": true,
"dependencies": {
"@babel/highlight": "^7.10.4"
}
},
"node_modules/@babel/helper-validator-identifier": {
"version": "7.10.4",
"resolved": "https://registry.npmjs.org/@babel/helper-validator-identifier/-/helper-validator-identifier-7.10.4.tgz",
"integrity": "sha512-3U9y+43hz7ZM+rzG24Qe2mufW5KhvFg/NhnNph+i9mgCtdTCtMJuI1TMkrIUiK7Ix4PYlRF9I5dhqaLYA/ADXw==",
"dev": true
},
"node_modules/@babel/highlight": {
"version": "7.10.4",
"resolved": "https://registry.npmjs.org/@babel/highlight/-/highlight-7.10.4.tgz",
"integrity": "sha512-i6rgnR/YgPEQzZZnbTHHuZdlE8qyoBNalD6F+q4vAFlcMEcqmkoG+mPqJYJCo63qPf74+Y1UZsl3l6f7/RIkmA==",
"dev": true,
"dependencies": {
"@babel/helper-validator-identifier": "^7.10.4",
"chalk": "^2.0.0",
"js-tokens": "^4.0.0"
}
},
"node_modules/@babel/highlight/node_modules/chalk": {
"version": "2.4.2",
"resolved": "https://registry.npmjs.org/chalk/-/chalk-2.4.2.tgz",
"integrity": "sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==",
"dev": true,
"dependencies": {
"ansi-styles": "^3.2.1",
"escape-string-regexp": "^1.0.5",
"supports-color": "^5.3.0"
},
"engines": {
"node": ">=4"
}
},
"node_modules/@eslint/eslintrc": {
"version": "0.1.3",
"resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-0.1.3.tgz",
"integrity": "sha512-4YVwPkANLeNtRjMekzux1ci8hIaH5eGKktGqR0d3LWsKNn5B2X/1Z6Trxy7jQXl9EBGE6Yj02O+t09FMeRllaA==",
"dev": true,
"dependencies": {
"ajv": "^6.12.4",
"debug": "^4.1.1",
"espree": "^7.3.0",
"globals": "^12.1.0",
"ignore": "^4.0.6",
"import-fresh": "^3.2.1",
"js-yaml": "^3.13.1",
"lodash": "^4.17.19",
"minimatch": "^3.0.4",
"strip-json-comments": "^3.1.1"
},
"engines": {
"node": "^10.12.0 || >=12.0.0"
}
},
"node_modules/@jsdevtools/ono": {
"version": "7.1.3",
"resolved": "https://registry.npmjs.org/@jsdevtools/ono/-/ono-7.1.3.tgz",
"integrity": "sha512-4JQNk+3mVzK3xh2rqd6RB4J46qUR19azEHBneZyTZM+c456qOrbbM/5xcR8huNCCcbVt7+UmizG6GuUvPvKUYg=="
},
"node_modules/@sindresorhus/is": {
"version": "0.14.0",
"resolved": "https://registry.npmjs.org/@sindresorhus/is/-/is-0.14.0.tgz",
"integrity": "sha512-9NET910DNaIPngYnLLPeg+Ogzqsi9uM4mSboU5y6p8S5DzMTVEsJZrawi+BoDNUVBa2DhJqQYUFvMDfgU062LQ==",
"dev": true,
"engines": {
"node": ">=6"
}
},
"node_modules/@szmarczak/http-timer": {
"version": "1.1.2",
"resolved": "https://registry.npmjs.org/@szmarczak/http-timer/-/http-timer-1.1.2.tgz",
"integrity": "sha512-XIB2XbzHTN6ieIjfIMV9hlVcfPU26s2vafYWQcZHWXHOxiaRZYEDKEwdl129Zyg50+foYV2jCgtrqSA6qNuNSA==",
"dev": true,
"dependencies": {
"defer-to-connect": "^1.0.1"
},
"engines": {
"node": ">=6"
}
},
"node_modules/@types/body-parser": {
"version": "1.19.0",
"resolved": "https://registry.npmjs.org/@types/body-parser/-/body-parser-1.19.0.tgz",
"integrity": "sha512-W98JrE0j2K78swW4ukqMleo8R7h/pFETjM2DQ90MF6XK2i4LO4W3gQ71Lt4w3bfm2EvVSyWHplECvB5sK22yFQ==",
"dependencies": {
"@types/connect": "*",
"@types/node": "*"
}
},
"node_modules/@types/color-name": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/@types/color-name/-/color-name-1.1.1.tgz",
"integrity": "sha512-rr+OQyAjxze7GgWrSaJwydHStIhHq2lvY3BOC2Mj7KnzI7XK0Uw1TOOdI9lDoajEbSWLiYgoo4f1R51erQfhPQ==",
"dev": true
},
"node_modules/@types/connect": {
"version": "3.4.33",
"resolved": "https://registry.npmjs.org/@types/connect/-/connect-3.4.33.tgz",
"integrity": "sha512-2+FrkXY4zllzTNfJth7jOqEHC+enpLeGslEhpnTAkg21GkRrWV4SsAtqchtT4YS9/nODBU2/ZfsBY2X4J/dX7A==",
"dependencies": {
"@types/node": "*"
}
},
"node_modules/@types/express": {
"version": "4.17.8",
"resolved": "https://registry.npmjs.org/@types/express/-/express-4.17.8.tgz",
"integrity": "sha512-wLhcKh3PMlyA2cNAB9sjM1BntnhPMiM0JOBwPBqttjHev2428MLEB4AYVN+d8s2iyCVZac+o41Pflm/ZH5vLXQ==",
"dependencies": {
"@types/body-parser": "*",
"@types/express-serve-static-core": "*",
"@types/qs": "*",
"@types/serve-static": "*"
}
},