diff --git a/src/masterworker3.cpp b/src/masterworker3.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..418f715b23dfb2d0d105fca09b309e4b6703eadf
--- /dev/null
+++ b/src/masterworker3.cpp
@@ -0,0 +1,222 @@
+#include <iostream>
+#include <mpi.h>
+#include <ginac/ginac.h>
+
+#include "products.h"
+#include "utils_parall.h"
+#include "parall_constants.h"
+#include "parall_internal.h"
+#include "utils.h"
+
+namespace gi = GiNaC;
+
+/*******************************************************************************
+ *                        Parallel 1-level decomposition                       *
+ *******************************************************************************/
+
+gi::ex multiply_1level_master3( tensor3D_t& T, unsigned int size, MPI_Comm comm = MPI_COMM_WORLD ) { 
+    gi::ex Tens = 0;
+    unsigned int a2, a4;
+    gi::lst symbols;
+
+    MPI_Status status;
+    char* expr_c;
+    size_t expr_c_size = 0;
+    int src, np, running = 0;
+    unsigned int len;
+    parameters_2_1_t pzero( 0, 0 );
+
+    MPI_Comm_size( comm, &np );
+
+    expr_c = NULL;
+    expr_c = (char*) malloc( 3279 ); // TMP
+    
+    int i, j;
+    i = 0;
+    j = 0;
+
+    int receivedresults = 0;
+    unsigned int N = size/2;
+
+    std::vector<parameters_2_1_t> input;
+    std::vector<std::string> results_s;
+    std::vector<gi::ex> results;
+
+    /* Build a list of argument sets */
+    
+    for( a4 = 0 ; a4 < N ; a4++ ){
+        for( a2 = 0; a2 < N ; a2++ ){
+            parameters_2_1_t p( a4, a2 );
+            input.push_back( p );
+	    }
+	}
+    
+    /* Compute the set of symbols */
+    /* Could be done while the first slave is working */
+    
+    symbols = all_symbols_3D( size );
+
+    /* Distribute the work */
+
+    while( input.size() > 0 ) {
+        MPI_Recv( &len, 1, MPI_UNSIGNED, MPI_ANY_SOURCE, MPI_ANY_TAG, comm, &status );
+        src = status.MPI_SOURCE;
+       
+        if( status.MPI_TAG == TAG_PULL ) {
+            
+            /* Nothing else will come: just send wome work */
+            send_work( input, src, comm );
+            
+        } else {
+            if( status.MPI_TAG == TAG_RES ){
+                src = status.MPI_SOURCE;
+
+                /* The first message contains the length of what is coming next */
+                if( len != 0 ) {
+                    if( len > expr_c_size ) {
+                        expr_c_size = len;
+                        if( NULL != expr_c ) free( expr_c );
+                        expr_c = (char*)malloc( expr_c_size ); // The \0 was added by the slave
+                    }
+                    
+                    /* Receive the result */
+                    MPI_Recv( expr_c, len, MPI_CHAR, src, TAG_EXPR, comm, &status );
+                        
+                    /* put it in the result queue */
+                    std::string s( expr_c );
+                    
+                    send_work( input, src, comm );
+                    
+                    /* Process what I have just received */
+                    /* Could be given to a slave... */
+                    gi::ex received = de_linearize_expression( s, symbols );
+                    Tens += received;
+#if DEBUG
+                    results.push_back( received );
+                    results_s.push_back( s );
+                    receivedresults++;
+#endif
+                } else {
+                    /* Send more work  */
+                    send_work( input, src, comm );
+                }                
+            } else{
+                std::cerr << "Wrong tag received " << status.MPI_TAG << std::endl;
+            }
+        }
+   }
+
+   /* Wait until everyone is done */
+
+    running = np - 1; // all the slaves are running 
+    while( running > 0 ) {
+
+        MPI_Recv( &len, 1, MPI_UNSIGNED, MPI_ANY_SOURCE, MPI_ANY_TAG, comm, &status );
+        src = status.MPI_SOURCE;
+
+        if( len != 0 ) {
+            if( len > expr_c_size ) {
+                expr_c_size = len;
+                if( NULL != expr_c ) free( expr_c );
+                expr_c = (char*)malloc( expr_c_size ); // The \0 was added by the slave
+            }
+
+            /* Receive the result */
+            MPI_Recv( expr_c, len, MPI_CHAR, src, TAG_EXPR, comm, &status );
+            
+            /* And send the END signal */
+            send_end( src, pzero, comm );
+            running--;
+            
+            /* Process what I have just received */
+            /* Could be given to a slave... */
+            /* put it in the result queue */
+            std::string s( expr_c );
+            gi::ex received = de_linearize_expression( s, symbols );
+            Tens += received;
+#if DEBUG
+            results.push_back( received );
+            results_s.push_back( s );
+            receivedresults++;
+#endif
+        } else {
+            send_end( src, pzero, comm );
+            running--;
+        }
+    }
+    
+#if DEBUG
+    std::cout << "Received " << receivedresults << " results" << std::endl;
+
+    std::cout << "Tpara=" << Tens << ";" << std::endl;
+#endif
+    
+    if( NULL != expr_c) free( expr_c );
+    return Tens;
+}
+
+void multiply_1level_slave3( tensor3D_t& T, int size, MPI_Comm comm = MPI_COMM_WORLD ) {
+    gi::ex Tens;
+    unsigned int  a2, a4;
+    unsigned int len = 0;
+    
+    parameters_t params;
+    MPI_Status status;
+    char* expr_c;
+
+    int rank;
+    MPI_Comm_rank( comm, &rank );
+    
+    /* Ask for some work */
+    
+    MPI_Send( &len, 1, MPI_UNSIGNED, ROOT, TAG_PULL, comm );
+
+    while( true ){
+        /* Receive a set of parameters */
+        
+        MPI_Recv( &params, 1, DT_PARAMETERS_2_1, ROOT, MPI_ANY_TAG, comm, &status );
+        
+        if( status.MPI_TAG == TAG_WORK ){
+            a4 = params.a4;
+            a2 = params.a2;
+            
+            Tens = one_level1_product( &T, size, a4, a2  );
+            send_result( Tens );
+
+        } else {
+            if( status.MPI_TAG == TAG_END ){
+                return;
+            } else {
+                std::cerr << "Wrong tag received on slave " << status.MPI_TAG << std::endl;
+            }
+        }
+    }
+}
+
+/* Communication protocol:
+   M -> W: always the same size, therefore unique communication
+   W -> M: send an unsigned int (size of the expression), then the expression (table of chars)
+*/
+        
+gi::ex multiply_1level_mw3( tensor3D_t& T, int size ) {  // simpler: same dimension everywhere
+    int rank;
+    gi::ex Tens = 0;
+    MPI_Comm_rank( MPI_COMM_WORLD, &rank );
+
+    /* Create a new datatype for the parameters */
+    
+    create_parameters_datatype_2_1();
+
+    /* Here we go */
+    
+    if( 0 == rank ) {
+        Tens = multiply_1level_master3( T, size );
+    } else {
+        multiply_1level_slave3( T, size );
+    }
+
+    /* Finalize */
+    
+    free_parameters_2_1_dt();
+    return Tens;
+}
diff --git a/src/mw_addslave3.cpp b/src/mw_addslave3.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..bd1761ede1a0eda01100611335d27dfe95326d98
--- /dev/null
+++ b/src/mw_addslave3.cpp
@@ -0,0 +1,237 @@
+#include <iostream>
+#include <mpi.h>
+#include <ginac/ginac.h>
+
+#include "products.h"
+#include "utils_parall.h"
+#include "parall_constants.h"
+#include "parall_internal.h"
+#include "utils.h"
+#include "profiling.h"
+
+namespace gi = GiNaC;
+
+/*******************************************************************************
+ *         Parallel 1-level decomposition with addition on a slave             *
+ *******************************************************************************/
+
+gi::ex multiply_1level_master_addslave3( tensor3D_t& T, unsigned int size, MPI_Comm comm = MPI_COMM_WORLD ) { 
+    gi::ex Tens = 0;
+    unsigned int a2, a4;
+    gi::ex A;
+    gi::lst symbols;
+
+    MPI_Status status;
+    char* expr_c;
+    size_t expr_c_size = 0;
+    int src, np, running = 0;
+    unsigned int len;
+    parameters_2_1_t pzero( 0, 0 );
+
+    MPI_Comm_size( comm, &np );
+
+    expr_c = NULL;
+    expr_c = (char*) malloc( 3279 );
+    
+ 	int i, j;
+    i = 0;
+    j = 0;
+
+    int receivedresults = 0;
+    unsigned int N = size/2;
+
+    std::vector<parameters_2_1_t> input;
+    std::vector<std::string> results; /* length and char* */
+
+    /* Build a list of argument sets */
+    
+    for( a4 = 0 ; a4 < N ; a4++ ){
+        for( a2 = 0; a2 < N ; a2++ ){
+            parameters_2_1_t p( a4, a2 );
+            input.push_back( p );
+	    }
+	}
+
+    /* Compute the set of symbols */
+    /* Could be done while the first slave is working */
+    
+    symbols = all_symbols_3D( size );
+
+    /* Distribute the work */
+
+    while( input.size() > 0 ) {
+        MPI_Recv( &len, 1, MPI_UNSIGNED, MPI_ANY_SOURCE, MPI_ANY_TAG, comm, &status );
+        
+        if( status.MPI_TAG == TAG_PULL ) {
+            /* Nothing else will come: just send wome work */
+            src = status.MPI_SOURCE;
+            send_work( input, src );
+            
+        } else {
+            if( status.MPI_TAG == TAG_RES ){
+                src = status.MPI_SOURCE;
+
+                /* The first message contains the length of what is coming next */
+                if( len != 0 ) {
+                    if( len > expr_c_size ) {
+                        expr_c_size = len;
+                        if( NULL != expr_c ) free( expr_c );
+                        expr_c = (char*)malloc( expr_c_size ); // The \0 was added by the slave
+                    }
+                    
+                    /* Receive the result */
+                    MPI_Recv( expr_c, len, MPI_CHAR, src, TAG_EXPR, comm, &status );
+
+                    /* Put it in the result queue */
+                    results.push_back( std::string( expr_c ) );
+                }                    
+                    
+                /* Send more work  */
+                send_work_addslave( input, results, src );
+            } else {
+                std::cerr << "Wrong tag received " << status.MPI_TAG << std::endl;
+            }
+            
+        }
+   }
+
+   /* Wait until everyone is done */
+
+    running = np - 1; // all the slaves are running 
+    while( running > 0 ) {
+          /* TODO: here all we should receive is either TAG_EXPR or TAG_PULL if the input is too small */
+      MPI_Recv( &len, 1, MPI_UNSIGNED, MPI_ANY_SOURCE, MPI_ANY_TAG, comm, &status );
+        src = status.MPI_SOURCE;
+        
+        if( len != 0 ) {
+            if( len > expr_c_size ) {
+                expr_c_size = len;
+                if( NULL != expr_c ) free( expr_c );
+                expr_c = (char*)malloc( expr_c_size ); // The \0 was added by the slave
+            }
+
+            /* Receive the result */
+            MPI_Recv( expr_c, len, MPI_CHAR, src, TAG_EXPR, comm, &status );
+
+            /* Put it in the result queue */
+            results.push_back( std::string( expr_c ) );
+        }
+        send_add_or_end_addslave( results, src, &running, pzero );
+    }
+
+    /* Add whatever I have left */
+    Tens = add_expressions( results, symbols );
+    
+#if DEBUG
+    std::cout << "Received " << receivedresults << " results" << std::endl;
+
+    std::cout << "Tpara=" << Tens << ";" << std::endl;
+#endif
+    
+    if( NULL != expr_c) free( expr_c );
+    return Tens;
+}
+
+void multiply_1level_slave_addslave3( tensor3D_t& T, unsigned int size, MPI_Comm comm = MPI_COMM_WORLD ) {
+    gi::ex Tens;
+    int  a2, a4;
+    unsigned int len = 0;
+    
+    parameters_t params;
+    MPI_Status status;
+    char* expr_c;
+
+    int rank;
+    MPI_Comm_rank( comm, &rank );
+
+    /* Ask for some work */
+    
+    MPI_Send( &len, 1, MPI_UNSIGNED, ROOT, TAG_PULL, comm );
+
+    /* Compute the set of symbols */
+    
+    gi::lst symbols = all_symbols_3D( size );
+
+    while( true ){
+        /* Receive a set of parameters */
+
+        MPI_Recv( &params, 1, DT_PARAMETERS_2_1, ROOT, MPI_ANY_TAG, comm, &status );
+        
+        if( status.MPI_TAG == TAG_WORK ){
+            a4 = params.a4;
+            a2 = params.a2;
+
+            Tens = one_level1_product( &T, size, a4, a2 );
+            send_result( Tens );
+
+        } else {
+            if( status.MPI_TAG == TAG_ADD ) {
+                /* Receive a set of expressions to add */
+
+                /* Number of expressions received */
+                int nb = params.a4;
+                
+                /* Length of each string */
+
+                unsigned int* lengths = (unsigned int*) malloc( nb*sizeof( unsigned int ) );
+                MPI_Recv( lengths, nb, MPI_INT, ROOT, TAG_ADD, comm, &status );
+                std::vector<std::string> results_s;
+                char* c_str;
+                int i;
+                int len;
+                for( i = 0 ; i < nb ; i++ ) {
+                    len = lengths[i] + 1;
+                    c_str = (char*) malloc( len );
+                    MPI_Recv( c_str, len, MPI_CHAR, ROOT, TAG_ADD, comm, &status );
+                    c_str[len-1] = '\0';    // The master sends C++ strings, which do not contain the final '\0'
+                    results_s.push_back( std::string( c_str ) );
+                    free( c_str );
+                }
+
+                /* Delinearize all the expressions and add them */
+
+                Tens = add_expressions( results_s, symbols );
+                
+                /* Send the result */
+
+                send_result( Tens );
+
+            } else {
+                if( status.MPI_TAG == TAG_END ){
+                    return;
+                } else {
+                    std::cerr << "Wrong tag received on slave " << status.MPI_TAG << std::endl;
+                }
+            }
+        }
+    }
+}
+
+/* Communication protocol:
+   M -> W: always the same size, therefore unique communication
+   W -> M: send an unsigned int (size of the expression), then the expression (table of chars)
+*/
+        
+gi::ex multiply_1level_mw_addslave3( tensor3D_t& T, int size ) {  // simpler: same dimension everywhere
+    int rank;
+    gi::ex Tens = 0;
+    MPI_Comm_rank( MPI_COMM_WORLD, &rank );
+
+    /* Create a new datatype for the parameters */
+    
+    create_parameters_datatype_2_1();
+
+    /* Here we go */
+
+    if( 0 == rank ) {
+        Tens = multiply_1level_master_addslave3( T, size );
+    } else {
+        multiply_1level_slave_addslave3( T, size );
+    }
+
+    /* Finalize */
+    
+    free_parameters_2_1_dt();
+    return Tens;
+}
+