diff --git a/src/mw_addslave.cpp b/src/mw_addslave.cpp
index f13546c93987450cee37fc899ea5f6c52e285db2..f35e47a2e49edb6951e3cf146287a3a8179d9f51 100644
--- a/src/mw_addslave.cpp
+++ b/src/mw_addslave.cpp
@@ -150,8 +150,6 @@ void multiply_1level_slave_addslave( tensor3D_t& T, matrix_int_t& J, unsigned in
     int rank;
     MPI_Comm_rank( comm, &rank );
 
-    double t_start, t_wait, t_compute;
-
     /* Ask for some work */
     
     MPI_Send( &len, 1, MPI_UNSIGNED, ROOT, TAG_PULL, comm );
@@ -163,9 +161,7 @@ void multiply_1level_slave_addslave( tensor3D_t& T, matrix_int_t& J, unsigned in
     while( true ){
         /* Receive a set of parameters */
 
-	t_start = rdtsc();
         MPI_Recv( &params, 1, DT_PARAMETERS, ROOT, MPI_ANY_TAG, comm, &status );
-	t_wait = rdtsc() - t_start;
         
         if( status.MPI_TAG == TAG_WORK ){
             a1 = params.a1;
@@ -174,13 +170,7 @@ void multiply_1level_slave_addslave( tensor3D_t& T, matrix_int_t& J, unsigned in
             b1 = params.b1;
             gi::symbol A( std::string( params.A  ) );
 
-	    t_start = rdtsc();
             Tens = one_level1_product( &T, &J, A, size, a1, a2, a3, b1 );
-	    t_compute = rdtsc() - t_start;
-
-	    /* TODO if we waited for too long */
-	    if( t_wait > t_compute ) {}
-
             send_result( Tens );
 
         } else {
diff --git a/src/mw_combined.cpp b/src/mw_combined.cpp
index b79a9199df23dd52742c902999af32f9851d1c60..b6bc69eb49180787bf239b7d71fe4fffa3efd0d4 100644
--- a/src/mw_combined.cpp
+++ b/src/mw_combined.cpp
@@ -1,5 +1,6 @@
 #include <iostream>
 #include <mpi.h>
+#include <numeric>
 #include <ginac/ginac.h>
 
 #include "products.h"
@@ -35,7 +36,10 @@ end_code_t multiply_combined_master_initial( tensor3D_t& T, matrix_int_t& J, uns
     int src, np, running = 0;
     unsigned int len;
 
-    double t_start, t_add, t_wait;
+    double t_start, t_add, t_wait, t_average;
+    std::vector<double> times;
+    int times_idx;
+    
     algo_t algo = ALGO_MW;
 
     MPI_Comm_size( comm, &np );
@@ -78,6 +82,10 @@ end_code_t multiply_combined_master_initial( tensor3D_t& T, matrix_int_t& J, uns
     
     bool initialround = true;
     running = 0;
+    for( i = 0 ; i < np - 1 ; i++ )
+	times.push_back( 0.0 );
+    times_idx = 0;
+    t_average = 0.0;
     
     /* Distribute the work */
 
@@ -86,6 +94,11 @@ end_code_t multiply_combined_master_initial( tensor3D_t& T, matrix_int_t& J, uns
         MPI_Recv( &len, 1, MPI_UNSIGNED, MPI_ANY_SOURCE, MPI_ANY_TAG, comm, &status );
         src = status.MPI_SOURCE;
 	t_wait = rdtsc() - t_start;
+	times[ times_idx] = t_wait;
+	times_idx = ( times_idx + 1 ) % ( np - 1 );
+	if( !initialround )
+	    t_average = std::accumulate( times.begin(), times.end(), 0.0 )/(double)(np - 1);
+
 	std::cout << "wait " << t_wait << std::endl;
        
         if( status.MPI_TAG == TAG_PULL ) {
@@ -116,9 +129,7 @@ end_code_t multiply_combined_master_initial( tensor3D_t& T, matrix_int_t& J, uns
                     /* put it in the result queue */
                     std::string s( expr_c );
 
-		    if( algo == ALGO_ADDSLAVE ) {
-			send_work_addslave( input, results_s, src );
-		    } else {
+		    if( algo == ALGO_MW) {
 			send_work( input, src, comm );
                     }
 		    
@@ -135,17 +146,16 @@ end_code_t multiply_combined_master_initial( tensor3D_t& T, matrix_int_t& J, uns
 			results_s.push_back( s );
 			receivedresults++;
 #endif
-			if( t_add > t_wait ) {
-			    /* We are spending too much time adding these results. Now we are going to ask a worker to do this. */
-			    // TODO use the average NP last wait time instead
-			    // double average = accumulate( v.begin(), v.end(), 0.0)/v.size(); 
-
+			
+			if( !initialround && t_add > t_average ) {
+			/* We are spending too much time adding these results. Now we are going to ask a worker to do this. */
 			    std::cout << "The master spent too much time computing the sum. Switch to ADDSLAVE algorithm" << std::endl;
 			    algo = ALGO_ADDSLAVE;
 			}
 		    } else {
 			if( ALGO_ADDSLAVE == algo ) {
 			    results_s.push_back( s );
+			    send_work_addslave( input, results_s, src );
 			} else {
 			    std::cout << "ERROR: unknown algorithm on the master " << algo << std::endl;
 			}
@@ -153,6 +163,7 @@ end_code_t multiply_combined_master_initial( tensor3D_t& T, matrix_int_t& J, uns
                 } else {
                     /* Send more work  */
                     send_work( input, src, comm );
+
                 }                
             } else{
                 std::cerr << "Wrong tag received " << status.MPI_TAG << std::endl;
@@ -221,6 +232,8 @@ void multiply_combined_slave_initial( tensor3D_t& T, matrix_int_t& J, int size,
     MPI_Status status;
     char* expr_c;
 
+    double t_start, t_wait, t_compute;
+
     int rank;
     MPI_Comm_rank( comm, &rank );
     
@@ -235,7 +248,9 @@ void multiply_combined_slave_initial( tensor3D_t& T, matrix_int_t& J, int size,
     while( true ){
         /* Receive a set of parameters */
         
+	t_start = rdtsc();
         MPI_Recv( &params, 1, DT_PARAMETERS, ROOT, MPI_ANY_TAG, comm, &status );
+	t_wait = rdtsc() - t_start;
         
         if( status.MPI_TAG == TAG_WORK ){
             a1 = params.a1;
@@ -244,7 +259,13 @@ void multiply_combined_slave_initial( tensor3D_t& T, matrix_int_t& J, int size,
             b1 = params.b1;
             gi::symbol A( std::string( params.A  ) );
             
+	    t_start = rdtsc();
             Tens = one_level1_product( &T, &J, A, size, a1, a2, a3, b1 );
+	    t_compute = rdtsc() - t_start;
+
+	    /* TODO if we waited for too long */
+	    if( t_wait > t_compute ) {}
+
             send_result( Tens );
 
         } else {