diff --git a/src/Makefile b/src/Makefile index cb3cdf2b1c47c34c7a3f8bae8ad35b4d2116399b..d9a4393e3c0d2d4a07e46d7a026a44f62dda19ed 100644 --- a/src/Makefile +++ b/src/Makefile @@ -26,10 +26,10 @@ LDOPT = -lginac $(TAULIB) MPIEXEC = mpiexec NP = 5 -MPISRC = masterworker.cpp mw_addslave.cpp \ +MPISRC = masterworker.cpp mw_addslave.cpp hierarchical.cpp \ perf.cpp sequential.cpp tensormatrix_mpi.cpp \ utils.cpp utils_parall.cpp profiling.cpp mw_combined.cpp -# hierarchical.cpp + MPIOBJ= $(MPISRC:.cpp=.o) diff --git a/src/hierarchical.cpp b/src/hierarchical.cpp index ed447ccab0b12a862f5462ac99e7a8e2e00a9c92..87168ffc7e0e8bdaedd12b529563d433a3677d58 100644 --- a/src/hierarchical.cpp +++ b/src/hierarchical.cpp @@ -61,12 +61,16 @@ void create_communicators_hierarch( MPI_Comm& COMM_FOREMEN, MPI_Comm& COMM_TEAM } -gi::ex multiply_1level_foreman_hierarch_distribute_work( tensor3D_t& T, matrix_int_t& J, int size, parameters_t params, gi::lst symbols, MPI_Comm comm_team, int rank_foreman /* DEBUG */ ) { +/******************************************************************************* + * Foreman * + *******************************************************************************/ + +gi::ex multiply_2levels_foreman_hierarch_distribute_work( tensor3D_t& T, int size, parameters_2_1_t params, gi::lst symbols, MPI_Comm comm_team, int rank_foreman /* DEBUG */ ) { gi::ex Tens = 0; - gi::ex A; - int a1, a2, a3, b1, b2, b3, c1, c2; - + int a1, a2, a4, a6; + int N = size / 2; + MPI_Status status; int unsigned len; char* expr_c; @@ -78,28 +82,21 @@ gi::ex multiply_1level_foreman_hierarch_distribute_work( tensor3D_t& T, matrix_i /* Compute the points that need to be computed */ - A = gi::symbol( params.A ); - a1 = params.a1; a2 = params.a2; - a3 = params.a3; - b1 = params.b1; + a4 = params.a4; - std::vector<parameters_2_t> input; + std::vector<parameters_2_2_t> input; std::vector<gi::ex> results; - for( b2 = 0 ; b2 < size ; b2++ ) { - for( b3 = 0 ; b3 < size ; b3++ ) { - for( c1 = 0 ; c1 < size ; c1++ ) { - for( c2 = 0 ; c2 < size ; c2++ ) { - parameters_2_t p( A, a1, a2, a3, b1, b2, b3, c1, c2 ); - input.push_back( p ); - } - } + for( a6 = 0 ; a6 < N ; a6++ ) { + for( a1 = 0 ; a1 < N ; a1++ ) { + parameters_2_2_t p( a4, a2, a1, a6 ); + input.push_back( p ); } } /* Distribute the work */ - /* Very copy/paste from multiply_1level_master -> possible refactoring here */ + /* Very copy/paste from multiply_2levels_master -> possible refactoring here */ while( input.size() > 0 ) { MPI_Recv( &len, 1, MPI_UNSIGNED, MPI_ANY_SOURCE, MPI_ANY_TAG, comm_team, &status ); @@ -182,7 +179,7 @@ gi::ex multiply_1level_foreman_hierarch_distribute_work( tensor3D_t& T, matrix_i return Tens; } -void multiply_1level_foreman_hierarch_finalize( MPI_Comm comm_team ) { +void multiply_2levels_foreman_hierarch_finalize( MPI_Comm comm_team ) { int src, np, running = 0; unsigned int len; @@ -200,13 +197,13 @@ void multiply_1level_foreman_hierarch_finalize( MPI_Comm comm_team ) { return; } -void multiply_1level_foreman_hierarch( tensor3D_t& T, matrix_int_t& J, int size, MPI_Comm comm_foremen, MPI_Comm comm_team ) { +void multiply_2levels_foreman_hierarch( tensor3D_t& T, int size, MPI_Comm comm_foremen, MPI_Comm comm_team ) { gi::ex Tens; gi::lst symbols; unsigned int len = 0; - parameters_t params; + parameters_2_1_t params; MPI_Status status; char* expr_c; @@ -224,25 +221,25 @@ void multiply_1level_foreman_hierarch( tensor3D_t& T, matrix_int_t& J, int size, while( true ){ /* Receive a set of parameters */ - - MPI_Recv( ¶ms, 1, DT_PARAMETERS, ROOT, MPI_ANY_TAG, comm_foremen, &status ); - + + MPI_Recv( ¶ms, 1, DT_PARAMETERS_2_1, ROOT, MPI_ANY_TAG, comm_foremen, &status ); + if( status.MPI_TAG == TAG_WORK ){ /* Distribute the work on my workers */ - Tens = multiply_1level_foreman_hierarch_distribute_work( T, J, size, params, symbols, comm_team, rank /* DEBUG */ ); + Tens = multiply_2levels_foreman_hierarch_distribute_work( T, size, params, symbols, comm_team, rank /* DEBUG */ ); - /* Send the result to the master */ + /* Send the result to the master */ - send_result( Tens, comm_foremen); + send_result( Tens, comm_foremen ); } else { if( status.MPI_TAG == TAG_END ){ - + /* The end: forward the signal to our workers */ - multiply_1level_foreman_hierarch_finalize( comm_team ); + multiply_2levels_foreman_hierarch_finalize( comm_team ); return; } else { std::cerr << "Wrong tag received on slave " << status.MPI_TAG << std::endl; @@ -251,13 +248,17 @@ void multiply_1level_foreman_hierarch( tensor3D_t& T, matrix_int_t& J, int size, } } -void multiply_1level_slave_hierarch( tensor3D_t& T, matrix_int_t& J, int size, MPI_Comm comm ) { +/******************************************************************************* + * Worker * + *******************************************************************************/ + +void multiply_2levels_slave_hierarch( tensor3D_t& T, int size, MPI_Comm comm ) { gi::ex Tens; - int a1, a2, a3, b1, b2, b3, c1, c2; + int a1, a2, a4, a6; unsigned int len = 0; - parameters_2_t params; + parameters_2_2_t params; MPI_Status status; char* expr_c; @@ -272,21 +273,16 @@ void multiply_1level_slave_hierarch( tensor3D_t& T, matrix_int_t& J, int size, M while( true ){ /* Receive a set of parameters */ - MPI_Recv( ¶ms, 1, DT_PARAMETERS_2, ROOT, MPI_ANY_TAG, comm, &status ); + MPI_Recv( ¶ms, 1, DT_PARAMETERS_2_2, ROOT, MPI_ANY_TAG, comm, &status ); + if( status.MPI_TAG == TAG_WORK ){ - a1 = params.a1; + a4 = params.a4; a2 = params.a2; - a3 = params.a3; - b1 = params.b1; - b2 = params.b2; - b3 = params.b3; - c1 = params.c1; - c2 = params.c2; - - gi::symbol A( std::string( params.A ) ); - - Tens = one_level2_product( &T, &J, A, size, a1, a2, a3, b1, b2, b3, c1, c2 ); + a1 = params.a1; + a6 = params.a6; + + Tens = two_level2_product( &T, size, a4, a2, a1, a6 ); if( Tens.is_zero() ) { len = 0; @@ -324,6 +320,152 @@ void multiply_1level_slave_hierarch( tensor3D_t& T, matrix_int_t& J, int size, M } +/******************************************************************************* + * Master * + *******************************************************************************/ + +gi::ex multiply_2levels_master( tensor3D_t& T, unsigned int size, MPI_Comm comm = MPI_COMM_WORLD ) { + gi::ex Tens = 0; + unsigned int a2, a4; + gi::lst symbols; + + MPI_Status status; + char* expr_c; + size_t expr_c_size = 0; + int src, np, running = 0; + unsigned int len; + parameters_2_1_t pzero( 0, 0 ); + + MPI_Comm_size( comm, &np ); + + expr_c = NULL; + expr_c = (char*) malloc( 3279 ); // TMP + + int i, j; + i = 0; + j = 0; + + int receivedresults = 0; + unsigned int N = size/2; + + std::vector<parameters_2_1_t> input; + std::vector<std::string> results_s; + std::vector<gi::ex> results; + + /* Build a list of argument sets */ + + for( a4 = 0 ; a4 < N ; a4++ ){ + for( a2 = 0; a2 < N ; a2++ ){ + parameters_2_1_t p( a4, a2 ); + input.push_back( p ); + } + } + + /* Compute the set of symbols */ + /* Could be done while the first slave is working */ + + symbols = all_symbols_3D( size ); + + /* Distribute the work */ + + while( input.size() > 0 ) { + MPI_Recv( &len, 1, MPI_UNSIGNED, MPI_ANY_SOURCE, MPI_ANY_TAG, comm, &status ); + src = status.MPI_SOURCE; + + if( status.MPI_TAG == TAG_PULL ) { + + /* Nothing else will come: just send wome work */ + send_work( input, src, comm ); + running++; + + } else { + if( status.MPI_TAG == TAG_RES ){ + src = status.MPI_SOURCE; + + /* The first message contains the length of what is coming next */ + if( len != 0 ) { + if( len > expr_c_size ) { + expr_c_size = len; + if( NULL != expr_c ) free( expr_c ); + expr_c = (char*)malloc( expr_c_size ); // The \0 was added by the slave + } + + /* Receive the result */ + MPI_Recv( expr_c, len, MPI_CHAR, src, TAG_EXPR, comm, &status ); + + /* put it in the result queue */ + std::string s( expr_c ); + + send_work( input, src, comm ); + + /* Process what I have just received */ + /* Could be given to a slave... */ + gi::ex received = de_linearize_expression( s, symbols ); + Tens += received; +#if DEBUG + results.push_back( received ); + results_s.push_back( s ); + receivedresults++; +#endif + } else { + /* Send more work */ + send_work( input, src, comm ); + } + } else{ + std::cerr << "Wrong tag received " << status.MPI_TAG << std::endl; + } + } + } + + /* Wait until everyone is done */ + + // running = np - 1; // all the foremen are running + while( running > 0 ) { + + MPI_Recv( &len, 1, MPI_UNSIGNED, MPI_ANY_SOURCE, MPI_ANY_TAG, comm, &status ); + src = status.MPI_SOURCE; + + if( len != 0 ) { + if( len > expr_c_size ) { + expr_c_size = len; + if( NULL != expr_c ) free( expr_c ); + expr_c = (char*)malloc( expr_c_size ); // The \0 was added by the slave + } + + /* Receive the result */ + MPI_Recv( expr_c, len, MPI_CHAR, src, TAG_EXPR, comm, &status ); + + /* And send the END signal */ + send_end( src, pzero, comm ); + running--; + + /* Process what I have just received */ + /* Could be given to a slave... */ + /* put it in the result queue */ + std::string s( expr_c ); + gi::ex received = de_linearize_expression( s, symbols ); + Tens += received; +#if DEBUG + results.push_back( received ); + results_s.push_back( s ); + receivedresults++; +#endif + } else { + send_end( src, comm ); + running--; + } + } + +#if DEBUG + std::cout << "Received " << receivedresults << " results" << std::endl; + + std::cout << "Tpara=" << Tens << ";" << std::endl; +#endif + + if( NULL != expr_c) free( expr_c ); + return Tens; +} + /* The master sends a parameter set to a foreman. The foreman computes a set of parameter sets and distributes them to the workers. The foreman adds the results and, when everything is done, sends the total to the master. @@ -338,7 +480,7 @@ void multiply_1level_slave_hierarch( tensor3D_t& T, matrix_int_t& J, int size, M F -> M: Same */ -gi::ex multiply_1level_mw_hierarch( tensor3D_t& T, matrix_int_t& J, int size ) { // simpler: same dimension everywhere +gi::ex multiply_2levels_mw_hierarch( tensor3D_t& T, int size ) { // simpler: same dimension everywhere int rank; gi::ex Tens = 0; MPI_Comm COMM_FOREMEN, COMM_TEAM; @@ -370,14 +512,14 @@ gi::ex multiply_1level_mw_hierarch( tensor3D_t& T, matrix_int_t& J, int size ) { /* Here we go*/ if( 0 == rank ) { - Tens = multiply_1level_master( T, J, size, COMM_FOREMEN ); + Tens = multiply_2levels_master( T, size, COMM_FOREMEN ); } else { int rank_foreman; MPI_Comm_rank( COMM_TEAM, &rank_foreman ); if( 0 == rank_foreman ) { - multiply_1level_foreman_hierarch( T, J, size, COMM_FOREMEN, COMM_TEAM ); + multiply_2levels_foreman_hierarch( T, size, COMM_FOREMEN, COMM_TEAM ); } else { - multiply_1level_slave_hierarch( T, J, size, COMM_TEAM ); + multiply_2levels_slave_hierarch( T, size, COMM_TEAM ); } } diff --git a/src/tensormatrix.cpp b/src/tensormatrix.cpp index 77fdc2bf5df1bcb7a640b00821f8846c12eeefd3..eeba3e67a4e2fb28fb7e3a98c7ae146cf1202dc5 100644 --- a/src/tensormatrix.cpp +++ b/src/tensormatrix.cpp @@ -1,7 +1,6 @@ #include <iostream> #include <cstdlib> #include <ginac/ginac.h> -//#include <starpu.h> #define N 4 @@ -18,7 +17,7 @@ typedef std::vector<int> vector_int_t; /* libginac-dev ginac-tools - g++ -O3 -Wall -o tensormatrix tensormatrix.cpp -lginac + g++ -std=c++11 -O3 -Wall -o tensormatrix tensormatrix.cpp -lginac */ /* Sequentiel sur Minimum real 3m31,034s diff --git a/src/tensormatrix.h b/src/tensormatrix.h index a21dca21e0d20aa55e93ab8e2fde7c34a2fd49c5..dd936bfd53c439327a22d0adff267281484c6a9f 100644 --- a/src/tensormatrix.h +++ b/src/tensormatrix.h @@ -25,7 +25,7 @@ gi::ex multiply_2levels( tensor3D_t&, int ); // parallel gi::ex multiply_1level_mw( tensor3D_t&, int ); gi::ex multiply_1level_mw_addslave( tensor3D_t&, int ); -gi::ex multiply_1level_mw_hierarch( tensor3D_t&, matrix_int_t&, int ); +gi::ex multiply_2levels_mw_hierarch( tensor3D_t&, int ); gi::ex multiply_combined( tensor3D_t&, int ); /******************************************************************************* diff --git a/src/tensormatrix_mpi.cpp b/src/tensormatrix_mpi.cpp index 39a89de53e3dd0a15bc11cc1341d0c9fe270a067..9b42408df4429210ae39695fb15e936556e79945 100644 --- a/src/tensormatrix_mpi.cpp +++ b/src/tensormatrix_mpi.cpp @@ -114,7 +114,7 @@ int main( int argc, char** argv ){ break; } if( argc >= 4 ) { - nbforemen = atoi( argv[3] ); /* these two variables are obtained from the same CI parameter but since they are */ + nbforemen = atoi( argv[3] ); /* these two variables are obtained from the same CLI parameter but since they are */ maxresult = atoi( argv[3] ); /* used in two functions for two purposes, use two different names */ } } @@ -138,9 +138,9 @@ int main( int argc, char** argv ){ case 'a': Tpara = multiply_1level_mw_addslave( T, N ); break; - /*case 'h': - Tpara = multiply_1level_mw_hierarch( T, J, N ); - break;*/ + case 'h': + Tpara = multiply_2levels_mw_hierarch( T, N ); + break; case 'c': Tpara = multiply_combined( T, N ); break; diff --git a/src/utils_parall.cpp b/src/utils_parall.cpp index 64c7beccccb51340839383a80d5cbf99f2015be7..3ce59790315815f7455f2601b32f1519ca5c8d26 100644 --- a/src/utils_parall.cpp +++ b/src/utils_parall.cpp @@ -31,9 +31,11 @@ parameters_2_1_t::parameters_2_1_t( unsigned int a4, unsigned int a2 ){ this->a2 = a2; } -parameters_2_2_t::parameters_2_2_t( unsigned int a4, unsigned int a2 ){ - this->a6 = a6; +parameters_2_2_t::parameters_2_2_t( unsigned int a4, unsigned int a2, unsigned int a1, unsigned int a6 ){ + this->a4 = a4; + this->a2 = a2; this->a1 = a1; + this->a6 = a6; } void create_parameters_datatye(){ @@ -46,8 +48,8 @@ void create_parameters_datatye_2_1(){ MPI_Type_commit( &DT_PARAMETERS_2_1 ); } -void create_parameters_datatye_2(){ - MPI_Type_contiguous( 2, MPI_UNSIGNED, &DT_PARAMETERS_2_2 ); +void create_parameters_datatye_2_2(){ + MPI_Type_contiguous( 4, MPI_UNSIGNED, &DT_PARAMETERS_2_2 ); MPI_Type_commit( &DT_PARAMETERS_2_2 ); } @@ -79,6 +81,11 @@ void send_end( int peer, MPI_Comm comm ) { MPI_Send( ¶, 1, DT_PARAMETERS, peer, TAG_END, comm ); } +void send_end( int peer, parameters_2_1_t p, MPI_Comm comm ) { + /* The parameters_2_1_t argument is not used, but needed to instinguish between functions */ + MPI_Send( &p, 1, DT_PARAMETERS_2_1, peer, TAG_END, comm ); +} + void send_end_batch( int peer, MPI_Comm comm ) { parameters_t para; MPI_Send( ¶, 1, DT_PARAMETERS_2_1, peer, TAG_END_BATCH, comm ); @@ -98,6 +105,12 @@ void send_work( std::vector<parameters_2_1_t>& input, int peer, MPI_Comm comm ){ MPI_Send( ¶, 1, DT_PARAMETERS_2_1, peer, TAG_WORK, comm ); } +void send_work( std::vector<parameters_2_2_t>& input, int peer, MPI_Comm comm ){ + parameters_2_2_t para = input.back(); + input.pop_back(); + MPI_Send( ¶, 1, DT_PARAMETERS_2_2, peer, TAG_WORK, comm ); +} + /* M -> W: Send a set of expressions to be added */ void send_expressions_to_add( std::vector<std::string>& results, int peer ) { diff --git a/src/utils_parall.h b/src/utils_parall.h index bf9ebb22cd28e85d744d53c5241e45fe6237ad3e..a0083e88a1e7e5684c8f98a74a9fbb17fb030cea 100644 --- a/src/utils_parall.h +++ b/src/utils_parall.h @@ -21,10 +21,11 @@ public: parameters_2_1_t( unsigned int, unsigned int ); parameters_2_1_t( void ){}; }; + class parameters_2_2_t{ public: - unsigned int a6, a1; - parameters_2_2_t( unsigned int, unsigned int ); + unsigned int a4, a2, a6, a1; + parameters_2_2_t( unsigned int, unsigned int, unsigned int, unsigned int ); parameters_2_2_t( void ){}; }; @@ -38,12 +39,14 @@ gi::ex de_linearize_expression( std::string, gi::lst ); void send_work( std::vector<parameters_t>& input, int peer, MPI_Comm comm = MPI_COMM_WORLD ); void send_work( std::vector<parameters_2_2_t>& input, int peer, MPI_Comm comm = MPI_COMM_WORLD ); +void send_work( std::vector<parameters_2_1_t>& input, int peer, MPI_Comm comm ); void send_expressions_to_add( std::vector<std::string>&, int ); void send_add_or_end_addslave( std::vector<std::string>&, int, int* ); void send_work_addslave( std::vector<parameters_t>&, std::vector<std::string>&, int ) ; void send_result( gi::ex T, MPI_Comm comm = MPI_COMM_WORLD ); void send_end( int peer, MPI_Comm comm = MPI_COMM_WORLD ); +void send_end( int peer, parameters_2_1_t p, MPI_Comm comm = MPI_COMM_WORLD ); void send_end_batch( int peer, MPI_Comm comm = MPI_COMM_WORLD ); void create_parameters_datatye( void );