diff --git a/src/Makefile b/src/Makefile index a4eefda9d85f2267223b27af0e071049ae633546..cb3cdf2b1c47c34c7a3f8bae8ad35b4d2116399b 100644 --- a/src/Makefile +++ b/src/Makefile @@ -28,8 +28,8 @@ NP = 5 MPISRC = masterworker.cpp mw_addslave.cpp \ perf.cpp sequential.cpp tensormatrix_mpi.cpp \ - utils.cpp utils_parall.cpp profiling.cpp -#mw_combined.cpp hierarchical.cpp + utils.cpp utils_parall.cpp profiling.cpp mw_combined.cpp +# hierarchical.cpp MPIOBJ= $(MPISRC:.cpp=.o) diff --git a/src/mw_combined.cpp b/src/mw_combined.cpp index b6bc69eb49180787bf239b7d71fe4fffa3efd0d4..663eda0cd765851901bb00b5c0e5359bc82ba4ce 100644 --- a/src/mw_combined.cpp +++ b/src/mw_combined.cpp @@ -25,8 +25,8 @@ typedef enum { /* This one is a "regular" master. It returns either when it is done, or when it decides to switch to another algorithm. */ -end_code_t multiply_combined_master_initial( tensor3D_t& T, matrix_int_t& J, unsigned int size, gi::ex& Tens, MPI_Comm comm = MPI_COMM_WORLD ) { - unsigned int a1, a2, a3, b1; +end_code_t multiply_combined_master_initial( tensor3D_t& T, unsigned int size, gi::ex& Tens, MPI_Comm comm = MPI_COMM_WORLD ) { + unsigned int a1, a2, a4; gi::ex A; gi::lst symbols; @@ -52,6 +52,7 @@ end_code_t multiply_combined_master_initial( tensor3D_t& T, matrix_int_t& J, uns j = 0; int receivedresults = 0; + unsigned int N = size/2; std::vector<parameters_t> input; std::vector<std::string> results_s; @@ -59,20 +60,17 @@ end_code_t multiply_combined_master_initial( tensor3D_t& T, matrix_int_t& J, uns /* Build a list of argument sets */ - for( a1 = 0 ; a1 < size; a1++ ){ + for( a4 = 0 ; a4 < N ; a4++ ){ i=i+1; - for( a2 = 0; a2 < size ; a2++ ){ - j=j+1; - for( a3 = 0 ; a3 < size ; a3++ ){ - A = T[a1][a2][a3]; - for( b1 = 0 ; b1 < size ; b1++ ){ - parameters_t p( A, a1, a2, a3, b1 ); - input.push_back( p ); - } + for( a2 = 0; a2 < N ; a2++ ){ + j=j+1; + for( a1 = 0 ; a1 < N ; a1++ ){ + parameters_t p( a4, a2, a1 ); + input.push_back( p ); + } } } - } - + /* Compute the set of symbols */ /* Could be done while the first slave is working */ @@ -90,31 +88,29 @@ end_code_t multiply_combined_master_initial( tensor3D_t& T, matrix_int_t& J, uns /* Distribute the work */ while( input.size() > 0 ) { - t_start = rdtsc(); + t_start = rdtsc(); MPI_Recv( &len, 1, MPI_UNSIGNED, MPI_ANY_SOURCE, MPI_ANY_TAG, comm, &status ); src = status.MPI_SOURCE; - t_wait = rdtsc() - t_start; - times[ times_idx] = t_wait; - times_idx = ( times_idx + 1 ) % ( np - 1 ); - if( !initialround ) - t_average = std::accumulate( times.begin(), times.end(), 0.0 )/(double)(np - 1); - - std::cout << "wait " << t_wait << std::endl; - + t_wait = rdtsc() - t_start; + times[ times_idx] = t_wait; + times_idx = ( times_idx + 1 ) % ( np - 1 ); + if( !initialround ) + t_average = std::accumulate( times.begin(), times.end(), 0.0 )/(double)(np - 1); + if( status.MPI_TAG == TAG_PULL ) { /* Nothing else will come: just send wome work */ send_work( input, src, comm ); - - if( initialround ){ - running++; - if( np - 1 == running ) initialround = false; // everyone is at work - } + + if( initialround ){ + running++; + if( np - 1 == running ) initialround = false; // everyone is at work + } } else { if( status.MPI_TAG == TAG_RES ){ src = status.MPI_SOURCE; - + /* The first message contains the length of what is coming next */ if( len != 0 ) { if( len > expr_c_size ) { @@ -125,67 +121,67 @@ end_code_t multiply_combined_master_initial( tensor3D_t& T, matrix_int_t& J, uns /* Receive the result */ MPI_Recv( expr_c, len, MPI_CHAR, src, TAG_EXPR, comm, &status ); - + /* put it in the result queue */ std::string s( expr_c ); - - if( algo == ALGO_MW) { - send_work( input, src, comm ); + + if( algo == ALGO_MW) { + send_work( input, src, comm ); } - + /* Process what I have just received */ - - if( ALGO_MW == algo ) { - t_start = rdtsc(); - gi::ex received = de_linearize_expression( s, symbols ); - Tens += received; - t_add = rdtsc() - t_start; - std::cout << "Add " << t_add << std::endl; + + if( ALGO_MW == algo ) { + t_start = rdtsc(); + gi::ex received = de_linearize_expression( s, symbols ); + Tens += received; + t_add = rdtsc() - t_start; + std::cout << "Add " << t_add << std::endl; #if DEBUG - results.push_back( received ); - results_s.push_back( s ); - receivedresults++; + results.push_back( received ); + results_s.push_back( s ); + receivedresults++; #endif - - if( !initialround && t_add > t_average ) { - /* We are spending too much time adding these results. Now we are going to ask a worker to do this. */ - std::cout << "The master spent too much time computing the sum. Switch to ADDSLAVE algorithm" << std::endl; - algo = ALGO_ADDSLAVE; - } - } else { - if( ALGO_ADDSLAVE == algo ) { - results_s.push_back( s ); - send_work_addslave( input, results_s, src ); - } else { - std::cout << "ERROR: unknown algorithm on the master " << algo << std::endl; - } - } + + if( !initialround && t_add > t_average ) { + /* We are spending too much time adding these results. Now we are going to ask a worker to do this. */ + std::cout << "The master spent too much time computing the sum. Switch to ADDSLAVE algorithm" << std::endl; + algo = ALGO_ADDSLAVE; + } + } else { + if( ALGO_ADDSLAVE == algo ) { + results_s.push_back( s ); + send_work_addslave( input, results_s, src ); + } else { + std::cout << "ERROR: unknown algorithm on the master " << algo << std::endl; + } + } } else { /* Send more work */ send_work( input, src, comm ); - + } } else{ std::cerr << "Wrong tag received " << status.MPI_TAG << std::endl; } } } - - /* Wait until everyone is done */ - + + /* Wait until everyone is done */ + running = np - 1; // all the slaves are running while( running > 0 ) { MPI_Recv( &len, 1, MPI_UNSIGNED, MPI_ANY_SOURCE, MPI_ANY_TAG, comm, &status ); src = status.MPI_SOURCE; - + if( len != 0 ) { if( len > expr_c_size ) { expr_c_size = len; if( NULL != expr_c ) free( expr_c ); expr_c = (char*)malloc( expr_c_size ); // The \0 was added by the slave } - + /* Receive the result */ MPI_Recv( expr_c, len, MPI_CHAR, src, TAG_EXPR, comm, &status ); @@ -222,9 +218,9 @@ end_code_t multiply_combined_master_initial( tensor3D_t& T, matrix_int_t& J, uns /* The traditional slave */ -void multiply_combined_slave_initial( tensor3D_t& T, matrix_int_t& J, int size, MPI_Comm comm = MPI_COMM_WORLD ) { +void multiply_combined_slave_initial( tensor3D_t& T, int size, MPI_Comm comm = MPI_COMM_WORLD ) { gi::ex Tens; - int a1, a2, a3, b1; + int a1, a2, a4; // gi::ex A; unsigned int len = 0; @@ -248,21 +244,19 @@ void multiply_combined_slave_initial( tensor3D_t& T, matrix_int_t& J, int size, while( true ){ /* Receive a set of parameters */ - t_start = rdtsc(); + t_start = rdtsc(); MPI_Recv( ¶ms, 1, DT_PARAMETERS, ROOT, MPI_ANY_TAG, comm, &status ); - t_wait = rdtsc() - t_start; + t_wait = rdtsc() - t_start; if( status.MPI_TAG == TAG_WORK ){ a1 = params.a1; a2 = params.a2; - a3 = params.a3; - b1 = params.b1; - gi::symbol A( std::string( params.A ) ); + a4 = params.a4; + + t_start = rdtsc(); + Tens = one_level1_product( &T, size, a4, a2, a1 ); + t_compute = rdtsc() - t_start; - t_start = rdtsc(); - Tens = one_level1_product( &T, &J, A, size, a1, a2, a3, b1 ); - t_compute = rdtsc() - t_start; - /* TODO if we waited for too long */ if( t_wait > t_compute ) {} @@ -273,7 +267,7 @@ void multiply_combined_slave_initial( tensor3D_t& T, matrix_int_t& J, int size, /* Receive a set of expressions to add */ /* Number of expressions received */ - int nb = params.a1; + int nb = params.a4; /* Length of each string */ @@ -291,22 +285,22 @@ void multiply_combined_slave_initial( tensor3D_t& T, matrix_int_t& J, int size, results_s.push_back( std::string( c_str ) ); free( c_str ); } - + /* Delinearize all the expressions and add them */ - + Tens = add_expressions( results_s, symbols ); /* Send the result */ - + send_result( Tens ); - + } else { - if( status.MPI_TAG == TAG_END ){ - return; - } else { + if( status.MPI_TAG == TAG_END ){ + return; + } else { std::cerr << "Wrong tag received on slave " << status.MPI_TAG << std::endl; - } - } + } + } } } } @@ -315,31 +309,31 @@ void multiply_combined_slave_initial( tensor3D_t& T, matrix_int_t& J, int size, * Combined master-worker * *******************************************************************************/ -gi::ex multiply_combined_master( tensor3D_t& T, matrix_int_t& J, int size ) { // simpler: same dimension everywhere +gi::ex multiply_combined_master( tensor3D_t& T, int size ) { // simpler: same dimension everywhere gi::ex Tens = 0; end_code_t rc; /* Initially: start as a traditional M/W */ - rc = multiply_combined_master_initial( T, J, size, Tens ); + rc = multiply_combined_master_initial( T, size, Tens ); switch( rc ){ case FINISHED: - return Tens; + return Tens; } return Tens; } -void multiply_combined_worker( tensor3D_t& T, matrix_int_t& J, int size ) { // simpler: same dimension everywhere +void multiply_combined_worker( tensor3D_t& T, int size ) { // simpler: same dimension everywhere gi::ex Tens = 0; std::cout << "worker" << std::endl; - multiply_combined_slave_initial( T, J, size ); + multiply_combined_slave_initial( T, size ); } -gi::ex multiply_combined( tensor3D_t& T, matrix_int_t& J, int size ) { // simpler: same dimension everywhere +gi::ex multiply_combined( tensor3D_t& T, int size ) { // simpler: same dimension everywhere int rank; gi::ex Tens = 0; @@ -352,9 +346,9 @@ gi::ex multiply_combined( tensor3D_t& T, matrix_int_t& J, int size ) { // simpl /* Here we go */ if( 0 == rank ) { - Tens = multiply_combined_master( T, J, size ); + Tens = multiply_combined_master( T, size ); } else { - multiply_combined_worker( T, J, size ); + multiply_combined_worker( T, size ); } /* Finalize */ diff --git a/src/tensormatrix.h b/src/tensormatrix.h index 5f471ded12bcb98d474b8a8f59c7af794dc58528..f87ac5ad24e18b14137a99bb34031fc8463d256f 100644 --- a/src/tensormatrix.h +++ b/src/tensormatrix.h @@ -26,7 +26,7 @@ gi::ex multiply_2levels( tensor3D_t&, matrix_int_t&, int ); gi::ex multiply_1level_mw( tensor3D_t&, int ); gi::ex multiply_1level_mw_addslave( tensor3D_t&, int ); gi::ex multiply_1level_mw_hierarch( tensor3D_t&, matrix_int_t&, int ); -gi::ex multiply_combined( tensor3D_t&, matrix_int_t&, int ); +gi::ex multiply_combined( tensor3D_t&, int ); /******************************************************************************* * Default values * diff --git a/src/tensormatrix_mpi.cpp b/src/tensormatrix_mpi.cpp index f1aa1dd9581bece858df7c35f480d652b6c196e8..73889e091980cbfe3091804b3f1554c3ab2d2450 100644 --- a/src/tensormatrix_mpi.cpp +++ b/src/tensormatrix_mpi.cpp @@ -139,10 +139,10 @@ int main( int argc, char** argv ){ break; /*case 'h': Tpara = multiply_1level_mw_hierarch( T, J, N ); - break; - case 'c': - Tpara = multiply_combined( T, J, N ); break;*/ + case 'c': + Tpara = multiply_combined( T, N ); + break; case 's': Tpara = multiply_seq( T, J, N ); break;