Commit d2cabbeb authored by Quentin's avatar Quentin
Browse files

Change outputs to processOutputs, new endpoint coprus_proces_start, and adding...

Change outputs to processOutputs, new endpoint coprus_proces_start, and adding a new one: corpus_process_continue that each module calls when it finished (so there is no issues if the module takes too much time)
parent a48d8cd0
......@@ -4,10 +4,15 @@
`npm install`
## Set up MongoDB
Make sure to have a mongo instance running, with a database named _chenetal_
## Start server
`npm start`
If you are developping this repository, you can use the following command so that it automatically reloads when you modify a file.
`npm run devstart`
\ No newline at end of file
`npm run devstart`
......@@ -65,6 +65,8 @@ exports.corpus_file_upload = function (req, res) {
fs.mkdirSync(uploadDirectory);
}
let size = 0;
try {
if (files.length) {
files.forEach(file => {
......@@ -74,6 +76,7 @@ exports.corpus_file_upload = function (req, res) {
}
file.mv(filePath);
resData.push({ name: file.name, mimetype: file.mimetype, size: file.size, path: filePath });
size += file.size;
})
}
else {
......@@ -84,8 +87,9 @@ exports.corpus_file_upload = function (req, res) {
// TRY TO WRITE THE BUFFER HERE ?
files.mv(filePath);
resData.push({ name: files.name, mimetype: files.mimetype, size: files.size, path: filePath });
size += files.size;
}
successHandler(res, { uploadedFiles: resData });
successHandler(res, { uploadedFiles: resData, size : size});
}
catch (error) {
errorHandler(res, error);
......@@ -320,4 +324,4 @@ exports.corpus_document_report_get = function (req, res) {
}
}
})
}
\ No newline at end of file
}
......@@ -129,18 +129,24 @@ exports.corpus_process_add_process = async function (req, res) {
errorHandler(res, `No corpus processes were found with id: ${corpusProcessId}`, 404);
}
else {
// Add conlluCol
if (corpusProcess.conlluCols && corpusProcess.conlluCols.length > 0) {
corpusProcess.conlluCols.push(req.body.conlluCol);
}
else {
corpusProcess.conlluCols = [req.body.conlluCol];
}
if (corpusProcess.outputs && corpusProcess.outputs.length > 0) {
corpusProcess.outputs.push(req.body.output);
}
else {
corpusProcess.outputs = [req.body.output];
// Add processOutput if there is one
if (req.body.processOutput) {
if (corpusProcess.processOutputs && corpusProcess.processOutputs.length > 0) {
corpusProcess.processOutputs.push(req.body.processOutput);
}
else {
corpusProcess.processOutputs = [req.body.processOutput];
}
}
corpusProcess = await corpusProcess.save();
successHandler(res, { corpusProcess });
}
......@@ -150,7 +156,7 @@ exports.corpus_process_add_process = async function (req, res) {
}
}
exports.corpus_process_start = async function (req, res) {
exports.corpus_process_start_old = async function (req, res) {
const corpusProcessId = req.params.corpusProcessId;
let corpusProcess;
let started = false;
......@@ -196,3 +202,80 @@ exports.corpus_process_start = async function (req, res) {
}
}
}
exports.corpus_process_start = async function (req, res) {
const corpusProcessId = req.params.corpusProcessId;
let corpusProcess;
let started = false;
let pipeline;
try {
corpusProcess = await CorpusProcessModel.findById(corpusProcessId);
corpusProcess.status = 'Started';
// Easier for debugging (you can just call that endpoint to restart the pipeline)
corpusProcess.currentProcessId = undefined;
await corpusProcess.save();
pipeline = await PipelineModel.findById(corpusProcess.pipelineId);
started = true;
successHandler(res, { message: "Pipeline started" });
// First of all, let's tree tag our corpus
moduleHelper.startTreeTagger(corpusProcessId);
// When it is finished, it will call corpus_process_continue
}
catch (error) {
errorHandler(res, error);
}
}
exports.corpus_process_continue = async function (req, res) {
const corpusProcessId = req.params.corpusProcessId;
try {
let corpusProcess = await CorpusProcessModel.findById(corpusProcessId);
const pipeline = await PipelineModel.findById(corpusProcess.pipelineId);
// If it's not started, start with zero, else, increment
if (corpusProcess.currentProcessId == undefined) {
corpusProcess.currentProcessId = 0;
}
else {
corpusProcess.currentProcessId += 1;
}
// End of the pipeline!
if (pipeline.processes.length <= corpusProcess.currentProcessId) {
corpusProcess.currentProcessingModule = 'Exporter';
await corpusProcess.save();
successHandler(res, 'Let\'s export the conllu file');
console.log(`Starting to export CONLLU for corpus process id: ${corpusProcessId}`);
// Export the pipeline to a conllu file
moduleHelper.startModuleProcess(corpusProcessId, { moduleName: 'Exporter'});
// Should we do another endpoint 'finish'?
corpusProcess.currentProcessingModule = null;
corpusProcess.status = 'Finished';
await corpusProcess.save();
}
else {
let process = pipeline.processes[corpusProcess.currentProcessId]
corpusProcess.currentProcessingModule = process.moduleName;
await corpusProcess.save();
console.log(`Starting module ${process.moduleName} for corpus process id: ${corpusProcessId}`);
moduleHelper.startModuleProcess(corpusProcessId, process);
successHandler(res, `Module ${process.moduleName} has started`);
}
}
catch (error) {
console.error(error);
}
}
......@@ -18,7 +18,8 @@ function nameToRoute(moduleName) {
"Exporter": "http://localhost:3001",
"Modulix": "http://localhost:3002",
"SDMC": "http://localhost:3003",
"Néoveille": "http://localhost:3004"
"Néoveille": "http://localhost:3004",
"Morfetik - Formes Simples": "http://localhost:3005"
}
return routeDict[moduleName];
}
......
{
"name": "Morfetik - Formes Simples",
"description": "Morfetik Description",
"parameters":
[
{
"name": "acceptedCategories",
"label": "Appartenance d'une ou plusieurs catégories",
"description": "Ne traite que les catégories sélectionnées",
"type": "List-Poly",
"items": ["Nom", "Verbe", "Adjectif", "Adverbe", "Conjonction", "Déterminant", "Pronom", "Préposition", "Interjection"],
"default": ["Nom", "Verbe", "Adjectif", "Adverbe", "Conjonction", "Déterminant", "Pronom", "Préposition","Interjection"]
},
{
"name": "selectedColumns",
"label": "Informations souhaitées",
"description": "selectedComumns description",
"type": "List-Poly",
"items": ["Sous-Catégorie", "Temps", "Nombre", "Genre", "Personne", "Notes"],
"default": ["Sous-Catégorie", "Temps", "Nombre", "Genre", "Personne", "Notes"]
}
]
}
\ No newline at end of file
{
"name": "Néoveille",
"description": "Détection de Néologismes",
"description": "Détection de néologismes formels",
"parameters":
[
{
"name": "nodigit",
"label": "Exclusion des chiffres",
"description": "Exclu les candidats qui contiennent des chiffres",
"description": "Exclure les candidats qui contiennent des chiffres",
"type": "Bool",
"items": null,
"default": true
},
{
"name": "lowercaseonly",
"label": "Uniquement des mots en minuscule",
"description": "Exclu les candidats qui ont des caractères en minuscule, cela évite le bruit dû aux noms propres",
"label": "Uniquement les mots en minuscule",
"description": "Exclure les candidats qui ont des caractères en minuscule, cela évite le bruit dû aux noms propres",
"type": "Bool",
"items": null,
"default": true
......@@ -25,20 +25,20 @@
"description": "Expression régulière qui sera utilisée pour filtrer la liste des candidats, si vide, sera équivalent à '.*'",
"type": "String",
"items": null,
"default": "\\w{3,}(?:-\\w{3,}){0,3}"
"default": "^\\w{3,}(?:-\\w{3,}){0,3}$"
},
{
"name": "exclusiondict",
"label": "Dictionnaire d'exclusion",
"description": "Exclu tous les candidats présents de ce dictionnaire",
"description": "Exclure tous les candidats présents de ce dictionnaire",
"type": "Bool",
"items": null,
"default": true
},
{
"name": "spellchecker",
"label": "Heuristique orthographique",
"description": "Exclu les candidats selon une heuristique basée sur Hunspell, permettant notamment d'éviter le bruit dû aux coquilles orthographiques.",
"label": "Correcteur orthographique",
"description": "Exclure les candidats selon une heuristique basée sur Hunspell, permettant notamment d'éviter le bruit dû aux coquilles orthographiques.",
"type": "Bool",
"items": null,
"default": true
......
{
"name": "Prototype",
"description": "Prototype permettant de créer son propre module",
"parameters":
[
{
"name": "parametre1",
"label": "Le nom du paramètre comme il sera inscrit dans le frontend",
"description": "Une description détaillée pour expliquer ce paramètre, qui apparaitra quand on clique sur le point d'interrogation dans le front end",
"type": "Le type de paramètre, parmi : [List-Mono, List-Poly, Int, Bool, String]",
"items": ["La liste des paramètres disponibles, uniquement pour List-Poly et List-Mono, null sinon"],
"default": "La valeur par défaut du paramètre"
}
]
}
\ No newline at end of file
......@@ -10,7 +10,7 @@ const Schema = mongoose.Schema;
* @swagger
* components:
* schemas:
* Output:
* ProcessOutput:
* type: object
* description: data returned by a module at the end of a process
* required:
......@@ -45,7 +45,7 @@ const Schema = mongoose.Schema;
// Output returned by a module
// Can be a text file or something else like a model.
/* Should there be documentId or corpusId? */
const OutputSchema = new Schema({
const ProcessOutputSchema = new Schema({
processId: { type: String, required: false},
moduleName: { type: String, required: true}, // Can be built from processId
content: {
......@@ -212,11 +212,11 @@ const AnnotatedDocumentSchema = new Schema({
* description: list of annotations per document
* items:
* $ref: "#/components/schemas/AnnotatedDocument"
* outputs:
* processOutputs:
* type: array
* description: list of output produced by the pipeline
* items:
* $ref: "#/components/schemas/Output"
* $ref: "#/components/schemas/ProcessOutput"
* currentProcessingModule:
* type: String
* description: Name of the module that is being executed currently by the pipeline. Is null if the pipeline has not started, failed, or finished.
......@@ -244,10 +244,12 @@ const CorpusProcessSchema = new Schema({
// Maybe it's better
/*annotations: { type: [AnnotationSchema]},*/
annotatedDocuments: { type: [AnnotatedDocumentSchema]},
outputs: { type: [OutputSchema]},
//Don't know yet if following are needed, but it seems more relevant to find these
//properties here rather than in Pipeline
currentProcessingModule: { type: String }, // What module is currently processing the corpus?
// The output that a module can return (describing the process that was done)
processOutputs: { type: [ProcessOutputSchema]},
// What module is currently processing the corpus?
currentProcessingModule: { type: String },
// Get the index of the current processing module
currentProcessId: { type: Number},
status: {
type: String,
enum: ['Not started yet', 'Started', 'Processing annotations', 'Finished', 'Failed']
......
This diff is collapsed.
......@@ -21,6 +21,7 @@
},
"dependencies": {
"axios": "^0.21.0",
"cors": "^2.8.5",
"express": "^4.17.1",
"express-fileupload": "^1.2.0",
"jsonwebtoken": "^8.5.1",
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment