diff --git a/src/Makefile b/src/Makefile
index a257ab72d62caa572f734ec4c7967c3ed22170f8..db349944217d64bb42965ab207d409ef64936b4e 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -30,7 +30,7 @@ MPISRC = masterworker.cpp mw_addslave.cpp hierarchical.cpp  \
          perf.cpp  sequential.cpp  tensormatrix_mpi.cpp      \
          utils.cpp  utils_parall.cpp profiling.cpp mw_combined.cpp \
 	 masterworker2.cpp mw_addslave2.cpp hierarchical2.cpp  \
-	 masterworker3.cpp
+	 masterworker3.cpp mw_addslave3.cpp 
 
 MPIOBJ= $(MPISRC:.cpp=.o)
 
diff --git a/src/tensormatrix.h b/src/tensormatrix.h
index cfb5fbc9298c3c3d445c2c10b8a6ca401e793a01..f72a89c2d95ab45751a76e000d2a6723aaf30123 100644
--- a/src/tensormatrix.h
+++ b/src/tensormatrix.h
@@ -28,6 +28,7 @@ gi::ex multiply_1level_mw2( tensor3D_t&, int );
 gi::ex multiply_1level_mw3( tensor3D_t&, int );
 gi::ex multiply_1level_mw_addslave( tensor3D_t&, int );
 gi::ex multiply_1level_mw_addslave2( tensor3D_t&, int );
+gi::ex multiply_1level_mw_addslave3( tensor3D_t&, int );
 gi::ex multiply_2levels_mw_hierarch( tensor3D_t&, int );
 gi::ex multiply_2levels_mw_hierarch2( tensor3D_t&, int );
 gi::ex multiply_combined( tensor3D_t&, int );
diff --git a/src/tensormatrix_mpi.cpp b/src/tensormatrix_mpi.cpp
index 279d24eb8010b4b0edc23b07abb2d53174b40b95..79ea3ddf9b04b079dffcfc621a1c819e2825f7ee 100644
--- a/src/tensormatrix_mpi.cpp
+++ b/src/tensormatrix_mpi.cpp
@@ -32,6 +32,7 @@ namespace gi = GiNaC;
    - o/O: Master-Worker, middle grain -> multiply_1level_mw3
    - A/a: Master-Worker, addition on a slave -> multiply_1level_mw_addslave
    - B/b: Master-Worker, coarser grain, addition on a slave -> multiply_1level_mw_addslave2
+   - D/d: Master-Worker, middle grain, addition on a slave -> multiply_1level_mw_addslave2
    - H/h: Hierarchical master-worker -> multiply_1level_mw_hierarch
    - i/I: Hierarchical master-worker, coarser grain -> multiply_1level_mw_hierarch
    - C/c: Combined -> multiply_combined
@@ -110,6 +111,10 @@ int main( int argc, char** argv ){
             case 'b':
                 tostart = 'b';
                 break;
+            case 'D':
+            case 'd':
+                tostart = 'd';
+                break;
             case 'H':
             case 'h':
                 tostart = 'h';
@@ -168,6 +173,9 @@ int main( int argc, char** argv ){
     case 'b':
         Tpara = multiply_1level_mw_addslave2( T, N );
         break;
+    case 'd':
+        Tpara = multiply_1level_mw_addslave3( T, N );
+        break;
     case 'h':
         Tpara = multiply_2levels_mw_hierarch( T, N );
         break;
diff --git a/src/utils_parall.cpp b/src/utils_parall.cpp
index 609c9c77f8984ef8beea48ca317eb2b872449d8c..a4b18ba0a02a451de75cda6c80567c555992e0ce 100644
--- a/src/utils_parall.cpp
+++ b/src/utils_parall.cpp
@@ -175,6 +175,63 @@ void send_expressions_to_add( std::vector<std::string>& results, int peer ) {
     free( lengths );
 }
 
+void send_expressions_to_add( std::vector<std::string>& results, int peer, parameters_2_1_t p ) {
+
+    /* Fill a bogus parameter object */
+    int nb = results.size();
+    int i;
+    char* expr;
+    parameters_2_1_t p2( nb, 0 );
+
+    MPI_Send( &p2, 1, DT_PARAMETERS_2_1, peer, TAG_ADD, MPI_COMM_WORLD );
+
+    /* Send the length of each string */
+    unsigned int* lengths = (unsigned int*) malloc( nb*sizeof( unsigned int ) );
+    for( i = 0 ; i < nb ; i++ ) {
+        lengths[i] = results[i].length();
+    }
+    MPI_Send( lengths, nb, MPI_INT, peer, TAG_ADD, MPI_COMM_WORLD );
+
+    /* Send the strings (should be nicely pipelined) */
+    for( i = 0 ; i < nb ; i++ ) {
+        expr = const_cast<char*>( results[i].c_str() );
+        MPI_Send( expr, results[i].length(), MPI_CHAR, peer, TAG_ADD, MPI_COMM_WORLD );
+    }
+    results.erase( results.begin(), results.end() );
+   
+    free( lengths ); 
+
+}
+
+void send_expressions_to_add( std::vector<std::string>& results, int peer, parameters_s_t p ) {
+
+    /* Fill a bogus parameter object */
+    int nb = results.size();
+    int i;
+    char* expr;
+    parameters_s_t p2( nb );
+
+    MPI_Send( &p2, 1, DT_PARAMETERS_S, peer, TAG_ADD, MPI_COMM_WORLD );
+
+    /* Send the length of each string */
+    unsigned int* lengths = (unsigned int*) malloc( nb*sizeof( unsigned int ) );
+    for( i = 0 ; i < nb ; i++ ) {
+        lengths[i] = results[i].length();
+    }
+    MPI_Send( lengths, nb, MPI_INT, peer, TAG_ADD, MPI_COMM_WORLD );
+
+    /* Send the strings (should be nicely pipelined) */
+    for( i = 0 ; i < nb ; i++ ) {
+        expr = const_cast<char*>( results[i].c_str() );
+        MPI_Send( expr, results[i].length(), MPI_CHAR, peer, TAG_ADD, MPI_COMM_WORLD );
+    }
+    
+    results.erase( results.begin(), results.end() );
+   
+    free( lengths );
+
+}
+
 /* M -> W: Send either a set of expressions to add, or the end signal */
 
 void send_add_or_end_addslave(  std::vector<std::string>& results, int peer, int* running ){
@@ -196,13 +253,26 @@ void send_add_or_end_addslave(  std::vector<std::string>& results, int peer, int
 
     if( results.size() > maxresult ) {
         /* if the result queue is too big, send it */
-        send_expressions_to_add( results, peer );
+        send_expressions_to_add( results, peer, p );
     } else {
         send_end( peer, p );
         (*running)--;
     }
 }
 
+void send_add_or_end_addslave(  std::vector<std::string>& results, int peer, int* running, parameters_2_1_t p ){
+    
+    /* Do I have a lot of results to be treated in the result queue? */
+
+    if( results.size() > maxresult ) {
+        /* if the result queue is too big, send it */
+       send_expressions_to_add( results, peer, p );
+    } else {
+        send_end( peer, p );
+      (*running)--;
+    }
+}
+
 /* M -> W: Send work: either a set of expressions to add, or a parameter set */
 
 void send_work_addslave(  std::vector<parameters_t>& input, std::vector<std::string>& results, int peer ) {
@@ -218,8 +288,20 @@ void send_work_addslave(  std::vector<parameters_t>& input, std::vector<std::str
 void send_work_addslave(  std::vector<parameters_s_t>& input, std::vector<std::string>& results, int peer ) {
 
     if( results.size() > maxresult ) {
+        parameters_s_t p( 0 );
         /* if the result queue is too big, send it */
-        send_expressions_to_add( results, peer );
+        send_expressions_to_add( results, peer, p );
+    } else {
+        send_work( input, peer );
+    }    
+}
+
+void send_work_addslave(  std::vector<parameters_2_1_t>& input, std::vector<std::string>& results, int peer ) {
+
+    if( results.size() > maxresult ) {
+        parameters_2_1_t p( 0, 0 );
+        /* if the result queue is too big, send it */
+        send_expressions_to_add( results, peer, p );
     } else {
         send_work( input, peer );
     }    
diff --git a/src/utils_parall.h b/src/utils_parall.h
index f93aef01525e0813e533b319b21582ef2993a272..45a978ec47d7f98ae27a484dbd5b66134b5cbb85 100644
--- a/src/utils_parall.h
+++ b/src/utils_parall.h
@@ -46,14 +46,18 @@ gi::ex de_linearize_expression( std::string, gi::lst );
 
 void send_work( std::vector<parameters_t>& input, int peer, MPI_Comm comm = MPI_COMM_WORLD );
 void send_work( std::vector<parameters_2_2_t>& input, int peer, MPI_Comm comm = MPI_COMM_WORLD );
-void send_work( std::vector<parameters_2_1_t>& input, int peer, MPI_Comm comm );
+void send_work( std::vector<parameters_2_1_t>& input, int peer, MPI_Comm comm = MPI_COMM_WORLD );
 void send_work( std::vector<parameters_s_t>& input, int peer, MPI_Comm comm = MPI_COMM_WORLD );
 
 void send_expressions_to_add( std::vector<std::string>&, int );
+void send_expressions_to_add( std::vector<std::string>&, int, parameters_2_1_t );
+void send_expressions_to_add( std::vector<std::string>&, int, parameters_s_t );
 void send_add_or_end_addslave(  std::vector<std::string>&, int, int* );
 void send_add_or_end_addslave(  std::vector<std::string>&, int, int*, parameters_s_t );
+void send_add_or_end_addslave(  std::vector<std::string>&, int, int*, parameters_2_1_t );
 void send_work_addslave(  std::vector<parameters_t>&, std::vector<std::string>&, int ) ;
 void send_work_addslave(  std::vector<parameters_s_t>&, std::vector<std::string>&, int ) ;
+void send_work_addslave(  std::vector<parameters_2_1_t>&, std::vector<std::string>&, int );
 void send_result( gi::ex T, MPI_Comm comm = MPI_COMM_WORLD );
 void send_end( int peer, MPI_Comm comm = MPI_COMM_WORLD );
 void send_end( int peer, parameters_2_1_t p, MPI_Comm comm = MPI_COMM_WORLD );