diff --git a/src/Makefile b/src/Makefile index a257ab72d62caa572f734ec4c7967c3ed22170f8..db349944217d64bb42965ab207d409ef64936b4e 100644 --- a/src/Makefile +++ b/src/Makefile @@ -30,7 +30,7 @@ MPISRC = masterworker.cpp mw_addslave.cpp hierarchical.cpp \ perf.cpp sequential.cpp tensormatrix_mpi.cpp \ utils.cpp utils_parall.cpp profiling.cpp mw_combined.cpp \ masterworker2.cpp mw_addslave2.cpp hierarchical2.cpp \ - masterworker3.cpp + masterworker3.cpp mw_addslave3.cpp MPIOBJ= $(MPISRC:.cpp=.o) diff --git a/src/tensormatrix.h b/src/tensormatrix.h index cfb5fbc9298c3c3d445c2c10b8a6ca401e793a01..f72a89c2d95ab45751a76e000d2a6723aaf30123 100644 --- a/src/tensormatrix.h +++ b/src/tensormatrix.h @@ -28,6 +28,7 @@ gi::ex multiply_1level_mw2( tensor3D_t&, int ); gi::ex multiply_1level_mw3( tensor3D_t&, int ); gi::ex multiply_1level_mw_addslave( tensor3D_t&, int ); gi::ex multiply_1level_mw_addslave2( tensor3D_t&, int ); +gi::ex multiply_1level_mw_addslave3( tensor3D_t&, int ); gi::ex multiply_2levels_mw_hierarch( tensor3D_t&, int ); gi::ex multiply_2levels_mw_hierarch2( tensor3D_t&, int ); gi::ex multiply_combined( tensor3D_t&, int ); diff --git a/src/tensormatrix_mpi.cpp b/src/tensormatrix_mpi.cpp index 279d24eb8010b4b0edc23b07abb2d53174b40b95..79ea3ddf9b04b079dffcfc621a1c819e2825f7ee 100644 --- a/src/tensormatrix_mpi.cpp +++ b/src/tensormatrix_mpi.cpp @@ -32,6 +32,7 @@ namespace gi = GiNaC; - o/O: Master-Worker, middle grain -> multiply_1level_mw3 - A/a: Master-Worker, addition on a slave -> multiply_1level_mw_addslave - B/b: Master-Worker, coarser grain, addition on a slave -> multiply_1level_mw_addslave2 + - D/d: Master-Worker, middle grain, addition on a slave -> multiply_1level_mw_addslave2 - H/h: Hierarchical master-worker -> multiply_1level_mw_hierarch - i/I: Hierarchical master-worker, coarser grain -> multiply_1level_mw_hierarch - C/c: Combined -> multiply_combined @@ -110,6 +111,10 @@ int main( int argc, char** argv ){ case 'b': tostart = 'b'; break; + case 'D': + case 'd': + tostart = 'd'; + break; case 'H': case 'h': tostart = 'h'; @@ -168,6 +173,9 @@ int main( int argc, char** argv ){ case 'b': Tpara = multiply_1level_mw_addslave2( T, N ); break; + case 'd': + Tpara = multiply_1level_mw_addslave3( T, N ); + break; case 'h': Tpara = multiply_2levels_mw_hierarch( T, N ); break; diff --git a/src/utils_parall.cpp b/src/utils_parall.cpp index 609c9c77f8984ef8beea48ca317eb2b872449d8c..a4b18ba0a02a451de75cda6c80567c555992e0ce 100644 --- a/src/utils_parall.cpp +++ b/src/utils_parall.cpp @@ -175,6 +175,63 @@ void send_expressions_to_add( std::vector<std::string>& results, int peer ) { free( lengths ); } +void send_expressions_to_add( std::vector<std::string>& results, int peer, parameters_2_1_t p ) { + + /* Fill a bogus parameter object */ + int nb = results.size(); + int i; + char* expr; + parameters_2_1_t p2( nb, 0 ); + + MPI_Send( &p2, 1, DT_PARAMETERS_2_1, peer, TAG_ADD, MPI_COMM_WORLD ); + + /* Send the length of each string */ + unsigned int* lengths = (unsigned int*) malloc( nb*sizeof( unsigned int ) ); + for( i = 0 ; i < nb ; i++ ) { + lengths[i] = results[i].length(); + } + MPI_Send( lengths, nb, MPI_INT, peer, TAG_ADD, MPI_COMM_WORLD ); + + /* Send the strings (should be nicely pipelined) */ + for( i = 0 ; i < nb ; i++ ) { + expr = const_cast<char*>( results[i].c_str() ); + MPI_Send( expr, results[i].length(), MPI_CHAR, peer, TAG_ADD, MPI_COMM_WORLD ); + } + results.erase( results.begin(), results.end() ); + + free( lengths ); + +} + +void send_expressions_to_add( std::vector<std::string>& results, int peer, parameters_s_t p ) { + + /* Fill a bogus parameter object */ + int nb = results.size(); + int i; + char* expr; + parameters_s_t p2( nb ); + + MPI_Send( &p2, 1, DT_PARAMETERS_S, peer, TAG_ADD, MPI_COMM_WORLD ); + + /* Send the length of each string */ + unsigned int* lengths = (unsigned int*) malloc( nb*sizeof( unsigned int ) ); + for( i = 0 ; i < nb ; i++ ) { + lengths[i] = results[i].length(); + } + MPI_Send( lengths, nb, MPI_INT, peer, TAG_ADD, MPI_COMM_WORLD ); + + /* Send the strings (should be nicely pipelined) */ + for( i = 0 ; i < nb ; i++ ) { + expr = const_cast<char*>( results[i].c_str() ); + MPI_Send( expr, results[i].length(), MPI_CHAR, peer, TAG_ADD, MPI_COMM_WORLD ); + } + + results.erase( results.begin(), results.end() ); + + free( lengths ); + +} + /* M -> W: Send either a set of expressions to add, or the end signal */ void send_add_or_end_addslave( std::vector<std::string>& results, int peer, int* running ){ @@ -196,13 +253,26 @@ void send_add_or_end_addslave( std::vector<std::string>& results, int peer, int if( results.size() > maxresult ) { /* if the result queue is too big, send it */ - send_expressions_to_add( results, peer ); + send_expressions_to_add( results, peer, p ); } else { send_end( peer, p ); (*running)--; } } +void send_add_or_end_addslave( std::vector<std::string>& results, int peer, int* running, parameters_2_1_t p ){ + + /* Do I have a lot of results to be treated in the result queue? */ + + if( results.size() > maxresult ) { + /* if the result queue is too big, send it */ + send_expressions_to_add( results, peer, p ); + } else { + send_end( peer, p ); + (*running)--; + } +} + /* M -> W: Send work: either a set of expressions to add, or a parameter set */ void send_work_addslave( std::vector<parameters_t>& input, std::vector<std::string>& results, int peer ) { @@ -218,8 +288,20 @@ void send_work_addslave( std::vector<parameters_t>& input, std::vector<std::str void send_work_addslave( std::vector<parameters_s_t>& input, std::vector<std::string>& results, int peer ) { if( results.size() > maxresult ) { + parameters_s_t p( 0 ); /* if the result queue is too big, send it */ - send_expressions_to_add( results, peer ); + send_expressions_to_add( results, peer, p ); + } else { + send_work( input, peer ); + } +} + +void send_work_addslave( std::vector<parameters_2_1_t>& input, std::vector<std::string>& results, int peer ) { + + if( results.size() > maxresult ) { + parameters_2_1_t p( 0, 0 ); + /* if the result queue is too big, send it */ + send_expressions_to_add( results, peer, p ); } else { send_work( input, peer ); } diff --git a/src/utils_parall.h b/src/utils_parall.h index f93aef01525e0813e533b319b21582ef2993a272..45a978ec47d7f98ae27a484dbd5b66134b5cbb85 100644 --- a/src/utils_parall.h +++ b/src/utils_parall.h @@ -46,14 +46,18 @@ gi::ex de_linearize_expression( std::string, gi::lst ); void send_work( std::vector<parameters_t>& input, int peer, MPI_Comm comm = MPI_COMM_WORLD ); void send_work( std::vector<parameters_2_2_t>& input, int peer, MPI_Comm comm = MPI_COMM_WORLD ); -void send_work( std::vector<parameters_2_1_t>& input, int peer, MPI_Comm comm ); +void send_work( std::vector<parameters_2_1_t>& input, int peer, MPI_Comm comm = MPI_COMM_WORLD ); void send_work( std::vector<parameters_s_t>& input, int peer, MPI_Comm comm = MPI_COMM_WORLD ); void send_expressions_to_add( std::vector<std::string>&, int ); +void send_expressions_to_add( std::vector<std::string>&, int, parameters_2_1_t ); +void send_expressions_to_add( std::vector<std::string>&, int, parameters_s_t ); void send_add_or_end_addslave( std::vector<std::string>&, int, int* ); void send_add_or_end_addslave( std::vector<std::string>&, int, int*, parameters_s_t ); +void send_add_or_end_addslave( std::vector<std::string>&, int, int*, parameters_2_1_t ); void send_work_addslave( std::vector<parameters_t>&, std::vector<std::string>&, int ) ; void send_work_addslave( std::vector<parameters_s_t>&, std::vector<std::string>&, int ) ; +void send_work_addslave( std::vector<parameters_2_1_t>&, std::vector<std::string>&, int ); void send_result( gi::ex T, MPI_Comm comm = MPI_COMM_WORLD ); void send_end( int peer, MPI_Comm comm = MPI_COMM_WORLD ); void send_end( int peer, parameters_2_1_t p, MPI_Comm comm = MPI_COMM_WORLD );