Commit d2cabbeb authored by Quentin's avatar Quentin
Browse files

Change outputs to processOutputs, new endpoint coprus_proces_start, and adding...

Change outputs to processOutputs, new endpoint coprus_proces_start, and adding a new one: corpus_process_continue that each module calls when it finished (so there is no issues if the module takes too much time)
parent a48d8cd0
...@@ -4,10 +4,15 @@ ...@@ -4,10 +4,15 @@
`npm install` `npm install`
## Set up MongoDB
Make sure to have a mongo instance running, with a database named _chenetal_
## Start server ## Start server
`npm start` `npm start`
If you are developping this repository, you can use the following command so that it automatically reloads when you modify a file. If you are developping this repository, you can use the following command so that it automatically reloads when you modify a file.
`npm run devstart` `npm run devstart`
\ No newline at end of file
...@@ -65,6 +65,8 @@ exports.corpus_file_upload = function (req, res) { ...@@ -65,6 +65,8 @@ exports.corpus_file_upload = function (req, res) {
fs.mkdirSync(uploadDirectory); fs.mkdirSync(uploadDirectory);
} }
let size = 0;
try { try {
if (files.length) { if (files.length) {
files.forEach(file => { files.forEach(file => {
...@@ -74,6 +76,7 @@ exports.corpus_file_upload = function (req, res) { ...@@ -74,6 +76,7 @@ exports.corpus_file_upload = function (req, res) {
} }
file.mv(filePath); file.mv(filePath);
resData.push({ name: file.name, mimetype: file.mimetype, size: file.size, path: filePath }); resData.push({ name: file.name, mimetype: file.mimetype, size: file.size, path: filePath });
size += file.size;
}) })
} }
else { else {
...@@ -84,8 +87,9 @@ exports.corpus_file_upload = function (req, res) { ...@@ -84,8 +87,9 @@ exports.corpus_file_upload = function (req, res) {
// TRY TO WRITE THE BUFFER HERE ? // TRY TO WRITE THE BUFFER HERE ?
files.mv(filePath); files.mv(filePath);
resData.push({ name: files.name, mimetype: files.mimetype, size: files.size, path: filePath }); resData.push({ name: files.name, mimetype: files.mimetype, size: files.size, path: filePath });
size += files.size;
} }
successHandler(res, { uploadedFiles: resData }); successHandler(res, { uploadedFiles: resData, size : size});
} }
catch (error) { catch (error) {
errorHandler(res, error); errorHandler(res, error);
...@@ -320,4 +324,4 @@ exports.corpus_document_report_get = function (req, res) { ...@@ -320,4 +324,4 @@ exports.corpus_document_report_get = function (req, res) {
} }
} }
}) })
} }
\ No newline at end of file
...@@ -129,18 +129,24 @@ exports.corpus_process_add_process = async function (req, res) { ...@@ -129,18 +129,24 @@ exports.corpus_process_add_process = async function (req, res) {
errorHandler(res, `No corpus processes were found with id: ${corpusProcessId}`, 404); errorHandler(res, `No corpus processes were found with id: ${corpusProcessId}`, 404);
} }
else { else {
// Add conlluCol
if (corpusProcess.conlluCols && corpusProcess.conlluCols.length > 0) { if (corpusProcess.conlluCols && corpusProcess.conlluCols.length > 0) {
corpusProcess.conlluCols.push(req.body.conlluCol); corpusProcess.conlluCols.push(req.body.conlluCol);
} }
else { else {
corpusProcess.conlluCols = [req.body.conlluCol]; corpusProcess.conlluCols = [req.body.conlluCol];
} }
if (corpusProcess.outputs && corpusProcess.outputs.length > 0) {
corpusProcess.outputs.push(req.body.output); // Add processOutput if there is one
} if (req.body.processOutput) {
else { if (corpusProcess.processOutputs && corpusProcess.processOutputs.length > 0) {
corpusProcess.outputs = [req.body.output]; corpusProcess.processOutputs.push(req.body.processOutput);
}
else {
corpusProcess.processOutputs = [req.body.processOutput];
}
} }
corpusProcess = await corpusProcess.save(); corpusProcess = await corpusProcess.save();
successHandler(res, { corpusProcess }); successHandler(res, { corpusProcess });
} }
...@@ -150,7 +156,7 @@ exports.corpus_process_add_process = async function (req, res) { ...@@ -150,7 +156,7 @@ exports.corpus_process_add_process = async function (req, res) {
} }
} }
exports.corpus_process_start = async function (req, res) { exports.corpus_process_start_old = async function (req, res) {
const corpusProcessId = req.params.corpusProcessId; const corpusProcessId = req.params.corpusProcessId;
let corpusProcess; let corpusProcess;
let started = false; let started = false;
...@@ -196,3 +202,80 @@ exports.corpus_process_start = async function (req, res) { ...@@ -196,3 +202,80 @@ exports.corpus_process_start = async function (req, res) {
} }
} }
} }
exports.corpus_process_start = async function (req, res) {
const corpusProcessId = req.params.corpusProcessId;
let corpusProcess;
let started = false;
let pipeline;
try {
corpusProcess = await CorpusProcessModel.findById(corpusProcessId);
corpusProcess.status = 'Started';
// Easier for debugging (you can just call that endpoint to restart the pipeline)
corpusProcess.currentProcessId = undefined;
await corpusProcess.save();
pipeline = await PipelineModel.findById(corpusProcess.pipelineId);
started = true;
successHandler(res, { message: "Pipeline started" });
// First of all, let's tree tag our corpus
moduleHelper.startTreeTagger(corpusProcessId);
// When it is finished, it will call corpus_process_continue
}
catch (error) {
errorHandler(res, error);
}
}
exports.corpus_process_continue = async function (req, res) {
const corpusProcessId = req.params.corpusProcessId;
try {
let corpusProcess = await CorpusProcessModel.findById(corpusProcessId);
const pipeline = await PipelineModel.findById(corpusProcess.pipelineId);
// If it's not started, start with zero, else, increment
if (corpusProcess.currentProcessId == undefined) {
corpusProcess.currentProcessId = 0;
}
else {
corpusProcess.currentProcessId += 1;
}
// End of the pipeline!
if (pipeline.processes.length <= corpusProcess.currentProcessId) {
corpusProcess.currentProcessingModule = 'Exporter';
await corpusProcess.save();
successHandler(res, 'Let\'s export the conllu file');
console.log(`Starting to export CONLLU for corpus process id: ${corpusProcessId}`);
// Export the pipeline to a conllu file
moduleHelper.startModuleProcess(corpusProcessId, { moduleName: 'Exporter'});
// Should we do another endpoint 'finish'?
corpusProcess.currentProcessingModule = null;
corpusProcess.status = 'Finished';
await corpusProcess.save();
}
else {
let process = pipeline.processes[corpusProcess.currentProcessId]
corpusProcess.currentProcessingModule = process.moduleName;
await corpusProcess.save();
console.log(`Starting module ${process.moduleName} for corpus process id: ${corpusProcessId}`);
moduleHelper.startModuleProcess(corpusProcessId, process);
successHandler(res, `Module ${process.moduleName} has started`);
}
}
catch (error) {
console.error(error);
}
}
...@@ -18,7 +18,8 @@ function nameToRoute(moduleName) { ...@@ -18,7 +18,8 @@ function nameToRoute(moduleName) {
"Exporter": "http://localhost:3001", "Exporter": "http://localhost:3001",
"Modulix": "http://localhost:3002", "Modulix": "http://localhost:3002",
"SDMC": "http://localhost:3003", "SDMC": "http://localhost:3003",
"Néoveille": "http://localhost:3004" "Néoveille": "http://localhost:3004",
"Morfetik - Formes Simples": "http://localhost:3005"
} }
return routeDict[moduleName]; return routeDict[moduleName];
} }
......
{
"name": "Morfetik - Formes Simples",
"description": "Morfetik Description",
"parameters":
[
{
"name": "acceptedCategories",
"label": "Appartenance d'une ou plusieurs catégories",
"description": "Ne traite que les catégories sélectionnées",
"type": "List-Poly",
"items": ["Nom", "Verbe", "Adjectif", "Adverbe", "Conjonction", "Déterminant", "Pronom", "Préposition", "Interjection"],
"default": ["Nom", "Verbe", "Adjectif", "Adverbe", "Conjonction", "Déterminant", "Pronom", "Préposition","Interjection"]
},
{
"name": "selectedColumns",
"label": "Informations souhaitées",
"description": "selectedComumns description",
"type": "List-Poly",
"items": ["Sous-Catégorie", "Temps", "Nombre", "Genre", "Personne", "Notes"],
"default": ["Sous-Catégorie", "Temps", "Nombre", "Genre", "Personne", "Notes"]
}
]
}
\ No newline at end of file
{ {
"name": "Néoveille", "name": "Néoveille",
"description": "Détection de Néologismes", "description": "Détection de néologismes formels",
"parameters": "parameters":
[ [
{ {
"name": "nodigit", "name": "nodigit",
"label": "Exclusion des chiffres", "label": "Exclusion des chiffres",
"description": "Exclu les candidats qui contiennent des chiffres", "description": "Exclure les candidats qui contiennent des chiffres",
"type": "Bool", "type": "Bool",
"items": null, "items": null,
"default": true "default": true
}, },
{ {
"name": "lowercaseonly", "name": "lowercaseonly",
"label": "Uniquement des mots en minuscule", "label": "Uniquement les mots en minuscule",
"description": "Exclu les candidats qui ont des caractères en minuscule, cela évite le bruit dû aux noms propres", "description": "Exclure les candidats qui ont des caractères en minuscule, cela évite le bruit dû aux noms propres",
"type": "Bool", "type": "Bool",
"items": null, "items": null,
"default": true "default": true
...@@ -25,20 +25,20 @@ ...@@ -25,20 +25,20 @@
"description": "Expression régulière qui sera utilisée pour filtrer la liste des candidats, si vide, sera équivalent à '.*'", "description": "Expression régulière qui sera utilisée pour filtrer la liste des candidats, si vide, sera équivalent à '.*'",
"type": "String", "type": "String",
"items": null, "items": null,
"default": "\\w{3,}(?:-\\w{3,}){0,3}" "default": "^\\w{3,}(?:-\\w{3,}){0,3}$"
}, },
{ {
"name": "exclusiondict", "name": "exclusiondict",
"label": "Dictionnaire d'exclusion", "label": "Dictionnaire d'exclusion",
"description": "Exclu tous les candidats présents de ce dictionnaire", "description": "Exclure tous les candidats présents de ce dictionnaire",
"type": "Bool", "type": "Bool",
"items": null, "items": null,
"default": true "default": true
}, },
{ {
"name": "spellchecker", "name": "spellchecker",
"label": "Heuristique orthographique", "label": "Correcteur orthographique",
"description": "Exclu les candidats selon une heuristique basée sur Hunspell, permettant notamment d'éviter le bruit dû aux coquilles orthographiques.", "description": "Exclure les candidats selon une heuristique basée sur Hunspell, permettant notamment d'éviter le bruit dû aux coquilles orthographiques.",
"type": "Bool", "type": "Bool",
"items": null, "items": null,
"default": true "default": true
......
{
"name": "Prototype",
"description": "Prototype permettant de créer son propre module",
"parameters":
[
{
"name": "parametre1",
"label": "Le nom du paramètre comme il sera inscrit dans le frontend",
"description": "Une description détaillée pour expliquer ce paramètre, qui apparaitra quand on clique sur le point d'interrogation dans le front end",
"type": "Le type de paramètre, parmi : [List-Mono, List-Poly, Int, Bool, String]",
"items": ["La liste des paramètres disponibles, uniquement pour List-Poly et List-Mono, null sinon"],
"default": "La valeur par défaut du paramètre"
}
]
}
\ No newline at end of file
...@@ -10,7 +10,7 @@ const Schema = mongoose.Schema; ...@@ -10,7 +10,7 @@ const Schema = mongoose.Schema;
* @swagger * @swagger
* components: * components:
* schemas: * schemas:
* Output: * ProcessOutput:
* type: object * type: object
* description: data returned by a module at the end of a process * description: data returned by a module at the end of a process
* required: * required:
...@@ -45,7 +45,7 @@ const Schema = mongoose.Schema; ...@@ -45,7 +45,7 @@ const Schema = mongoose.Schema;
// Output returned by a module // Output returned by a module
// Can be a text file or something else like a model. // Can be a text file or something else like a model.
/* Should there be documentId or corpusId? */ /* Should there be documentId or corpusId? */
const OutputSchema = new Schema({ const ProcessOutputSchema = new Schema({
processId: { type: String, required: false}, processId: { type: String, required: false},
moduleName: { type: String, required: true}, // Can be built from processId moduleName: { type: String, required: true}, // Can be built from processId
content: { content: {
...@@ -212,11 +212,11 @@ const AnnotatedDocumentSchema = new Schema({ ...@@ -212,11 +212,11 @@ const AnnotatedDocumentSchema = new Schema({
* description: list of annotations per document * description: list of annotations per document
* items: * items:
* $ref: "#/components/schemas/AnnotatedDocument" * $ref: "#/components/schemas/AnnotatedDocument"
* outputs: * processOutputs:
* type: array * type: array
* description: list of output produced by the pipeline * description: list of output produced by the pipeline
* items: * items:
* $ref: "#/components/schemas/Output" * $ref: "#/components/schemas/ProcessOutput"
* currentProcessingModule: * currentProcessingModule:
* type: String * type: String
* description: Name of the module that is being executed currently by the pipeline. Is null if the pipeline has not started, failed, or finished. * description: Name of the module that is being executed currently by the pipeline. Is null if the pipeline has not started, failed, or finished.
...@@ -244,10 +244,12 @@ const CorpusProcessSchema = new Schema({ ...@@ -244,10 +244,12 @@ const CorpusProcessSchema = new Schema({
// Maybe it's better // Maybe it's better
/*annotations: { type: [AnnotationSchema]},*/ /*annotations: { type: [AnnotationSchema]},*/
annotatedDocuments: { type: [AnnotatedDocumentSchema]}, annotatedDocuments: { type: [AnnotatedDocumentSchema]},
outputs: { type: [OutputSchema]}, // The output that a module can return (describing the process that was done)
//Don't know yet if following are needed, but it seems more relevant to find these processOutputs: { type: [ProcessOutputSchema]},
//properties here rather than in Pipeline // What module is currently processing the corpus?
currentProcessingModule: { type: String }, // What module is currently processing the corpus? currentProcessingModule: { type: String },
// Get the index of the current processing module
currentProcessId: { type: Number},
status: { status: {
type: String, type: String,
enum: ['Not started yet', 'Started', 'Processing annotations', 'Finished', 'Failed'] enum: ['Not started yet', 'Started', 'Processing annotations', 'Finished', 'Failed']
......
This diff is collapsed.
...@@ -21,6 +21,7 @@ ...@@ -21,6 +21,7 @@
}, },
"dependencies": { "dependencies": {
"axios": "^0.21.0", "axios": "^0.21.0",
"cors": "^2.8.5",
"express": "^4.17.1", "express": "^4.17.1",
"express-fileupload": "^1.2.0", "express-fileupload": "^1.2.0",
"jsonwebtoken": "^8.5.1", "jsonwebtoken": "^8.5.1",
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment