Commit 76415a20 authored by Quentin David's avatar Quentin David
Browse files

Renaming some stuff, began to implementing in detail the API, adding stuff for login

parent 61943d3f
const env = process.env.NODE_ENV || 'development';
const config = {
production: {
secret: process.env.JWT_SECRET || '',
ldap: {
url: process.env.LDAP_URL || '',
bindDN: process.env.LDAP_ADMIN_USER || '',
bindCredentials: process.env.LDAP_ADMIN_PWD || '',
searchBase: process.env.LDAP_BASE || 'ou=people,dc=univ-paris13,dc=fr',
searchFilter: process.env.LDAP_FILTER || '(&(uid={{username}})(accountStatus=active*))',
searchAttributes: process.env.LDAP_ATTRIBUTES || 'uid, mail, displayName, accountStatus'
}
},
development: {
secret: process.env.DEV_JWT_SECRET || 'lipn-cat',
ldap: {
url: process.env.DEV_LDAP_URL || 'ldap://0.0.0.0:389',
searchBase: process.env.DEV_LDAP_BASE || 'ou=people,dc=planetexpress,dc=com',
searchFilter: process.env.DEV_LDAP_FILTER || '(uid={{username}})',
searchAttributes: process.env.DEV_LDAP_ATTRIBUTES || 'uid, mail, cn',
passwordField: 'userPassword'
}
},
test: {
secret: process.env.TEST_JWT_SECRET || 'lipn-cat'
}
};
module.exports = config[env];
const LDAPStrategy = require('passport-ldapauth');
const JWTStrategy = require('passport-jwt').Strategy;
const ExtractJwt = require('passport-jwt').ExtractJwt;
const { secret, ldap } = require('./auth.js');
module.exports = function (passport) {
// JWT Configuration
const jwtOptions = {
jwtFromRequest: ExtractJwt.fromAuthHeaderAsBearerToken(),
secretOrKey: secret
};
// Use passport with JWT strategy
passport.use(new JWTStrategy(jwtOptions,
function (payload, done) {
console.log('payload received', payload);
done(null, payload);
}));
// Use passport with the LDAP strategy
const searchAttributes = ldap.searchAttributes.split(',').map(e => e.trim());
const ldapOpts = { server: { ...ldap, searchAttributes } };
passport.use(new LDAPStrategy(ldapOpts));
};
......@@ -20,14 +20,28 @@ exports.debug_corpus_list = function(req, res) {
}
// Add a new corpus
exports.corpus_create = function(req,res) {
console.log("Demande d'ajout d'un corpus:")
console.log(req.body);
const newCorpus = CorpusModel({
corpus_name: req.body.corpus_name,
content: req.body.content,
created_by: req.body.created_by
// documents: ....
createdBy: req.body.createdBy,
type: req.body.type,
creationDate: req.body.creationDate,
metadata: {
author: req.body.metadata.author,
title: req.body.metadata.title,
description: req.body.metadata.description,
date: req.body.metadata.date,
type: req.body.metadata.type,
size: req.body.metadata.size,
language: req.body.metadata.language,
additionalMetadata: req.body.metadata.additionalMetadata,
}
});
newCorpus.save((error, corpus) => {
if (error) { errorHandler(res, error); }
else {
console.log("Successfully added the corpus")
successHandler(res, { corpus: corpus}, 201);
}
})
......@@ -80,7 +94,16 @@ exports.corpus_summary = function(req,res) {
failHandler(res, { corpus: `No corpus was found with id: ${corpusId}`}, 404)
}
else {
successHandler(res, { corpus_summary: 'TBD' });
// Almost the same than the model, except we only get the number of docs, and not its content.
const corpusSummary = {
corpusId: corpusId,
createdBy: corpus.createdBy,
creationDate: corpus.creationDate,
type: corpus.type,
documentNumber: corpus.documents.length,
metadata: corpus.metadata
};
successHandler(res, { corpusSummary: corpusSummary });
}
})
}
......
......@@ -69,6 +69,20 @@ exports.corpus_delete = function(req,res) {
}
});
}
// Get the conllu of an existing corpus process
exports.corpus_process_conllu_get = function(res, req) {
const corpusProcessId = req.params.corpusProcesId;
CorpusProcessModel.findById(corpusProcessId, (error, corpusProcess) => {
if(error) { errorHandler(res, error); }
if (!corpusProcess) {
failHandler(res, { corpusProcess: `No corpus process was found with id: ${corpusProcessId}`}, 404);
}
else {
// TO BE FILLED
successHandler(res, { corpusProcess: { conllu: 'TBD'} });
}
})
}
// Get the status of an existing corpus process
exports.corpus_process_status_get = function(req,res) {
const corpusProcessId = req.params.corpusProcessId;
......
const CorpusModel = require('../../models/Corpus.js');
const { errorHandler, failHandler } = require('./responseHelper.js');
// TEST: Unknown id -> it should be 404 and no goes to successhandler of userController (corpora list)
function getSummaryFromId(res, corpusId) {
console.log('Corpus Id:');
console.log(corpusId);
CorpusModel.findById(corpusId, (error, corpus) => {
if (error) { errorHandler(res, error); }
if (!corpus) {
failHandler(res, { corpus: `No corpus was found with id: ${corpusId}`}, 404)
}
else {
console.log('Found the corpus');
// Almost the same than the model, except we only get the number of docs, and not its content.
const corpusSummary = {
corpusId: corpusId,
createdBy: corpus.createdBy,
creationDate: corpus.creationDate,
type: corpus.type,
documentNumber: corpus.documents.length,
metadata: corpus.metadata
};
console.log("Corpus Summary in its function");
console.log(corpusSummary);
return corpusSummary;
// successHandler(res, { corpusSummary: corpusSummary });
}
})
}
// Need number of documents
exports.getSummaryListFromIds = function (res, corpusIdList) {
CorpusModel.find( {'_id': {$in: corpusIdList}}, 'createdBy creationDate type metadata', (error, corpusSummaryList) => {
if (error) {
console.log(error);
errorHandler(res, error); }
// No corpora were found at all
if (!corpusSummaryList) {
// to be filled
//failHandler(res )
pass
}
// Some of the corpora were not found
if (corpusSummaryList.length < corpusIdList.length) {
// To be filled
pass
}
else {
console.log("Found corpusSummary list");
console.log(corpusSummaryList);
return corpusSummaryList;
}
})
}
exports.getSummaryListFromIds2 = function (res, corpusIdList) {
var corpusSummaryList = [];
var corpusSummary;
corpusIdList.forEach((element) => {
corpusSummary = getSummaryFromId(res, element);
console.log("corpus summary:");
console.log(corpusSummary)
corpusSummaryList.push(corpusSummary);
});
console.log("corpussummarylist");
console.log(corpusSummaryList)
return corpusSummaryList;
}
\ No newline at end of file
exports.errorHandler = function(res, error) {
// I'm not sure howExpress deals with http code and error, does it always send200 OK ?
exports.errorHandler = function(res, error, httpCode = 500) {
res.status(httpCode);
res.json({
status: "error",
message: error
......
......@@ -9,6 +9,8 @@ const errorHandler = responseHelper.errorHandler;
const successHandler = responseHelper.successHandler;
const failHandler = responseHelper.failHandler; // For eg. "User not found" is a fail, not an error
const corpusHelper = require('./lib/corpusHelper');
/* User personal information management functions */
// DEBUG: User list
......@@ -81,17 +83,22 @@ exports.user_delete = function(req,res) {
exports.user_corpus_list = function(req,res) {
// TODO: Fetch the corpora from the corpus DB
const userId = req.params.userId;
UserModel.findById(userId, (error, user) => {
UserModel.findById(userId, async (error, user) => {
if (error) { errorHandler(res, error); }
else if (!user) {
failHandler(res, { user: `No user was found with id: ${userId}`}, 404);
}
else {
// Should fetch /corpus to get the summary ?
successHandler(res, { profileCorpora: user.profileCorpora});
// ProfileCorpora is a list of ids.
// Promise ici non ?
var profileCorporaList = await corpusHelper.getSummaryListFromIds(res, user.profileCorpora)
console.log("Juste before success handler");
console.log(typeof profileCorporaList);
successHandler(res, { profileCorpora: profileCorporaList });
}
})
}
// Add a new corpus to an existing user
exports.user_corpus_create = function(req,res) {
const userId = req.params.userId;
......@@ -102,9 +109,10 @@ exports.user_corpus_create = function(req,res) {
}
else {
const corpusId = req.body.corpusId;
const newCorpus = { corpusId: corpusId};
user.profileCorpora.push(newCorpus);
// const newCorpus = { corpusId: corpusId};
user.profileCorpora.push(corpusId);
user.save((error, _) => {
if (error) { errorHandler(res, error); }
else {
......
......@@ -46,10 +46,18 @@ Request Body:
```
System will now maybe check all the informations, then try to fill conlluBuffer of each documents by tokenizing the corpus. It will also need to compute the size of the corpus.
Informations intéressantes:
- Nombre de mots / tokens
- Taille informatique (du fichier source)
From now on, this document (as in a mongo document) will bear the id "corpusId".
## II: User set their pipeline
PRETREATMENTS:
La segmentation par phrases et par mots se fera systématiquement lors du conllu buffer, on peut donc enlever ces propriétés de preTreatments.
POST /pipelines
Request body:
......@@ -101,32 +109,32 @@ Request body:
corpusId: corpusId,
pipelineId: pipelineId,
userId: 'myUserId',
conllu: null, // Empty until filled by modules
annotatedDocuments: [
{
documentId: article1DocumentId,
corpusProcessId: null, // Same issue than in I: id not yet created
annotations: []
}
],
conllu: null, // Will be computed after the POST
visualAnnotatedDocuments: [], // It will be computed after the pipeline
outputs: null,
currentProcessModule: null,
status: 'Not started yet'
}
```
On concatène tous les conlluBuffer de chaque document du corpus «corpusId» (GET /corpus/corpusId/...)
PUT /corpusProcesses/corpusProcessId
pour ajouter le conllu concaténer.
### Pre-Treatments executions
Once it is added to the collection, it will fetch the preTreatments in the document of the corresponding pipeline.
``` javascript
preTreatments: {
sentenceSegmentation: false,
wordSegmentation: true,
//sentenceSegmentation: false,
//wordSegmentation: true,
posTagger: true,
conversionToUTF: false
codeConversionToUTF: false
}
```
wordSegmentation is supposed to be already done as we posted the corpus.
So the system will need to apply the POS Tagger to the corpus.
......@@ -143,7 +151,7 @@ Then apply the process, then returning a conllu column that we can add to corpus
Once it is added to the collection, it will fetch the first process in the list of the corresponding pipelineId and call the module, then update:
POST /modules/Neoveille
> POST /modules/Neoveille
Request body:
``` javascript
......@@ -165,17 +173,16 @@ Request body:
Request body:
``` javascript
{
currentProcessingModule: 'Neoveille',
status: 'Started'
currentProcessingModule: 'Neoveille', // null -> Neoveille
status: 'Started' // Not started yet -> Started
}
```
## IV: Module Processing
The module will need to fetch the conllu.
(need a specific GET for conllu?)
> GET /corpusProcess/corpusProcessId
> GET /corpusProcesses/corpusProcessId/conllu
Then, given parameters given from the previous paragraph, applies the process to the conllu.
......@@ -183,7 +190,7 @@ Neoveille will then give back two things:
- one output: the raw list of found neologisms
- one conllu column with
> POST /corpusProcess/corpusProcessId
> POST /corpusProcess/corpusProcessId/processOutputs
Request body:
``` javascript
......@@ -209,6 +216,8 @@ This request adds those two outputs of neoveille process to the collection corpu
### When Neoveille is finised...
(Comment le système sait quand Neoveille finit ?)
We can start the next module on the pipeline list.
> POST /modules/SDMC
......@@ -227,7 +236,7 @@ Request body:
```
Then update our corpusProcess
> PUT /corpusProcess/corpusProcessId
> PUT /corpusProcesses/corpusProcessId
Request body:
``` javascript
......@@ -238,11 +247,11 @@ Request body:
Now, similar to Neoveille, SDMC will need to fetch the conllu,
> GET /corpusProcess/corpusProcessId
> GET /corpusProcesses/corpusProcessId/conllu
Process that corpus, then adds its output to the structure
> POST /corpusProcess/corpusProcessId
> POST /corpusProcesses/corpusProcessId/processOutputs
Request body:
``` javascript
......@@ -276,12 +285,15 @@ First, the system extrates the conllu.
Then it processes it and adds to annotated documents the annotations:
> PUT /corpusProcesses/corpusProcessId (/annotation?)
> POST /corpusProcesses/corpusProcessId/visualAnnotatedDocument
On coupe le conllu pour chaque document.
On traite pour chaque document toutes ses colonnes
Request body:
``` javascript
{
annotatedDocuments:[
newVisualAnotatedDocument:[
{
documentId: 'article1DocumentId',
annotations: [
......@@ -290,6 +302,18 @@ Request body:
processId: 'NeoveilleProcess',
conlluColumnId: 'conlluNeoveilleId',
moduleName: 'Neoveille',
content: {
title: 'Annotation des Néologismes',
description: 'Trouvés par Néoveille selon ces paramètres:',
data: null // ??
},
color: null // computed later ?
},
{
documentId: 'article1DocumentId',
processId: 'SDMCProcess',
conlluColumnId: 'conlluSDMCId',
moduleName: 'Neoveille',
content: {
title: 'Annotation des Néologismes',
description: 'Trouvés par Néoveille selon ces paramètres:',
......@@ -299,4 +323,24 @@ Request body:
]
}
]
}
\ No newline at end of file
}
```
Il faudrait faire un calcul pour affecter les couleurs pour chaque annotation des documents.
Tout le process est terminé, on peut mettre à jour lestatus de corpusProcess
> PUT /corpusProcesses/corpusProcessId
Request body:
``` javascript
{
status: 'Finished'
}
```
Et on envoie un mail à l'utilisateur s'il a activé l'option.
( POST /users/myUserId/sendMail )
......@@ -73,7 +73,7 @@ const DocumentSchema = new Schema({
* type: String
* description: language of the corpus, can be multilingual
* example: fr
* userMetadata:
* additionalMetadata:
* type: array
* description: list of metadata added by the user
* items:
......@@ -98,7 +98,7 @@ const MetadataSchema = new Schema({
type: {type: String },
size: {type: String },
language: {type: String },
userMetadata: [{
additionalMetadata: [{
name: {type: String },
value: {type: String }
}]
......@@ -145,6 +145,15 @@ const CorpusSchema = new Schema({
metadata: { type: MetadataSchema }
});
CorpusSchema.virtual('numberOfDocuments').get(function() {
if (this.documents) {
return this.documents.length;
}
else {
return 0;
}
});
// Let's compile as a model
const CorpusModel = mongoose.model('CorpusModel', CorpusSchema);
......
......@@ -226,6 +226,7 @@ const AnnotatedDocumentSchema = new Schema({
* - Not started yet
* - Started
* - Finished
* - Processing annotations
* - Failed
* description: Status of the pipeline
*/
......@@ -246,7 +247,7 @@ const CorpusProcessingSchema = new Schema({
currentProcessingModule: { type: String }, // What module is currently processing the corpus?
status: {
type: String,
enum: ['Not started yet', 'Started', 'Finished', 'Failed']
enum: ['Not started yet', 'Started', 'Processing annotations', 'Finished', 'Failed']
}
})
......
......@@ -70,7 +70,7 @@ const ProcessSchema = new Schema({
* posTagger:
* type: Boolean
* description: should each token be anotated with its part-of-speech?
* conversionToUTF:
* codeConversionToUTF:
* type: Boolean
* description: should the corpus be converted to UTF?
* processes:
......@@ -94,7 +94,7 @@ const PipelineSchema = new Schema({
sentenceSegmentation: Boolean,
wordSegmentation: Boolean,
posTagger: Boolean,
conversionToUTF: Boolean // imprecise... which UTF?
codeConversionToUTF: Boolean // UTF-8
},
// Moved in CorpusProcessing.js
// currentProcessingModule: { type: String }, // What module is currently processing the corpus?
......
This diff is collapsed.
......@@ -16,11 +16,15 @@
"eslint": "^7.9.0",
"eslint-config-airbnb-base": "^14.2.0",
"eslint-plugin-import": "^2.22.0",
"nodemon": "^2.0.4"
"nodemon": "^2.0.4",
"passport": "^0.4.1"
},
"dependencies": {
"express": "^4.17.1",
"jsonwebtoken": "^8.5.1",
"mongoose": "^5.10.5",
"passport-jwt": "^4.0.0",
"passport-ldapauth": "^2.1.4",
"swagger-jsdoc": "^4.2.0",
"swagger-ui-express": "^4.1.4",
"yamljs": "^0.3.0"
......
const express = require('express');
const jwt = require('jsonwebtoken');
const passport = require('passport');
const path = require('path');
const { secret } = require(path.join(__dirname, '/../config/auth.js'));
const router = express.Router();
/* Generates the JWT token */
function jwtToken (user) {
return jwt.sign(user, secret);
}
/* POST LDAP login */
router.post('/',
passport.authenticate('ldapauth', { session: false }),
function (req, res) {
// Successful authentication
const token = jwtToken(req.user);
res.json({
user: req.user,
token: token
});
});
module.exports = router;
......@@ -14,6 +14,8 @@ router.get('/:corpusProcessId', corpusProcessingController.corpus_process_get)
router.put('/:corpusProcessId', corpusProcessingController.corpus_process_modify)
// DELETE /corpusProcesses/:corpusProcessId
router.delete('/:corpusProcessId', corpusProcessingController.corpus_process_delete)
// GET /corpusProcesses/:corpusProcessId/conllu
router.get('/:corpusProcessId/conllu')
// GET /corpusProcesses/:corpusProcessId/status
router.get('/:corpusProcessId/status', corpusProcessingController.corpus_process_status_get)
// GET /corpusProcesses/:userId
......
const express = require('express');
const passport = require('passport');
const cors = require('cors');
const auth = require('./routes/auth.js');
const corporaRoutes = require('./routes/corpora.js');
const pipelinesRoutes = require('./routes/pipelines.js');
......@@ -31,12 +35,16 @@ db.once('open', function() {
})
const app = express();
app.use(express.json());
const port = process.env.API_PORT || 3000;
// Middlewares
app.use(express.json());
app.use(cors());
app.use(passport.initialize());
require('./config/passport')(passport)
// Routes
app.get('/', (req, res) => res.send('Hello'));
app.use('/login', auth);
app.use('/corpora', corporaRoutes);
app.use('/pipelines', pipelinesRoutes);
app.use('/modules', modulesRoutes);
......@@ -70,4 +78,5 @@ app.use(
swaggerUi.setup(specs, { explorer: true} )
);
const port = process.env.API_PORT || 8081;
app.listen(port, () => console.log(`App listening at http://localhost:${port}`));
Markdown is supported
0% or .