diff --git a/src/Makefile b/src/Makefile index 59876781d359430ede4b6a1f68356a1a888588ff..4e6658a78f6cba84e1ce334e9bfd52d4c6828186 100644 --- a/src/Makefile +++ b/src/Makefile @@ -20,7 +20,7 @@ else TAULIB = endif -CFLAGS = -Wall -g -O3 -Wno-unused-variable -std=c++11 $(TAUOPT) +CFLAGS = -Wall -g -O3 -Wno-unused-variable -std=c++11 $(TAUOPT) -DSCATTERGATHER LDOPT = -lginac $(TAULIB) MPIEXEC = mpiexec diff --git a/src/mw_addslave4.cpp b/src/mw_addslave4.cpp index 016269ebb2b27393fde92703ba47c3159c8c488e..c2a380e408ae24053578b958d80c048a7687ab86 100644 --- a/src/mw_addslave4.cpp +++ b/src/mw_addslave4.cpp @@ -10,6 +10,8 @@ #include "utils.h" #include "profiling.h" +#include <unistd.h> + namespace gi = GiNaC; #define MAXLENADD 1 // 256 @@ -25,6 +27,202 @@ unsigned int maxlen( std::vector<std::string> expressions ){ return len; } +#ifdef SCATTERGATHER + + +gi::ex add_expressions_parall( std::vector<std::string> expressions, gi::lst symbols, parameters_2_1_t p, MPI_Comm comm = MPI_COMM_WORLD ) { + + + gi::ex Tens = 0; + int i, peer, nb, len; + int rank, size; + int chunk, end; + char* expr_c; + char* total_c = NULL; + int expr = 0; + int* m_len; + int* m_disp; + char* toto; + int totallen = 0; + + MPI_Comm_rank( comm, &rank ); + MPI_Comm_size( comm, &size ); + + if( 0 == rank ) { + /* If the expressions are short, compute the sum locally */ + if( maxlen( expressions ) < MAXLENADD ) + return add_expressions( expressions, symbols ); + + nb = expressions.size(); + + } + + /* Broadcast the number of expressions to add */ + + MPI_Bcast( &nb, 1, MPI_UNSIGNED, 0, comm ); + + int* lengths = ( int*) malloc( size * nb * sizeof( int ) ); + int* displ = ( int*) malloc( size * nb * sizeof( int ) ); + //if( 0 == rank ) { + m_len = ( int*) malloc( size * sizeof( int ) ); + m_disp = ( int*) malloc( size * sizeof( int ) ); + // } + + /* Send all the number of elements and displacements, grouped by peer */ + + if( 0 == rank ) { + + // i = 0; + + for( auto s: expressions ) { + chunk = ceil( s.length() / size ); + + for( peer = 0 ; peer < size ; peer++ ) { + + if( 0 == peer ) { + displ[ expr ] = 0; + end = chunk; + } else { + end = displ[ peer * nb + expr] + chunk; + } + /* How much are we going to send: stop at a + or - sign (and keep the sign) */ + + while( !( s[end] == '+' || s[end] == '-' || end == s.length() - 1) ){ + end++; + } + end--; + if( 0 == peer ) { + lengths[ expr ] = end + 1; + } else { + // std::cout << "peer " << peer << " expr " << expr << " end " << end << std::endl; + lengths[ peer * nb + expr ] = end - displ[ peer * nb + expr ] + 1; + } + if( peer < size - 1 ) displ[ ( peer + 1 ) * nb + expr] = end; + } + + expr++; + } + + /* std::cout << "Lengths: " << std::endl; + for( peer = 0 ; peer < size ; peer++ ) { + i = 0; + for( auto s: expressions ) { + std::cout << lengths[peer*nb+i] << " "; + i++; + } + std::cout << std::endl; + } + std::cout << "Displacements: " << std::endl; + for( peer = 0 ; peer < size ; peer++ ) { + i = 0; + for( auto s: expressions ) { + std::cout << displ[peer*nb+i] << " "; + i++; + } + std::cout << std::endl; + } */ + + } + + // std::cout << nb << " expressions to receive" << std::endl; + + MPI_Scatter( lengths, nb, MPI_INT, lengths, nb, MPI_INT, 0, comm ); + MPI_Scatter( displ, nb, MPI_INT, displ, nb, MPI_INT, 0, comm ); + + /* sleep( rank ); + std::cout << "Lengths: " << std::endl; + for( i = 0 ; i < nb ; i++ ) { + std::cout << lengths[i] << " "; + } + std::cout << std::endl; + std::cout << "Displacements: " << std::endl; + for( i = 0 ; i < nb ; i++ ) { + std::cout << displ[i] << " "; + } + std::cout << std::endl;*/ + + /* Allocate the reception buffer */ + + unsigned int maxlen = 0; + std::vector<std::string> results_s; + + for( expr = 0 ; expr < nb ; expr++ ) { + maxlen = ( maxlen < lengths[ expr ] ) ? lengths[ expr ] : maxlen; //std::max( maxlen, lengths[ expr ] ); + } + expr_c = (char*)malloc( maxlen + 1 ); // Add a final \0 + + /* Send the expressions */ + + for( expr = 0 ; expr < nb ; expr++ ) { + + len = lengths[expr]; // correct even for rank == 0 + if( 0 == rank ) { + for( peer = 0 ; peer < size ; peer++ ) { + m_disp[ peer ] = displ[ peer * nb + expr]; + m_len[ peer ] = lengths[ peer * nb + expr]; + } + } + + if( 0 == rank ) toto = const_cast<char*>( expressions[expr].c_str() ); + + MPI_Scatterv( toto, m_len, m_disp, MPI_CHAR, + expr_c, len, MPI_CHAR, + 0, comm ); + expr_c[len ] = '\0'; // The master sends C++ strings, which do not contain the final '\0' + results_s.push_back( std::string( expr_c ) ); + + /* TODO: this can be overlapped with the computation of the previous addition */ + } + + /* Add them */ + + Tens = add_expressions( results_s, symbols ); + + /* Send the result to the master */ + + std::string expr_s = linearize_expression( Tens ); + len = expr_s.length(); + + MPI_Gather( &len, 1, MPI_INT, m_len, 1, MPI_INT, 0, comm ); + + if( 0 == rank ) { + for( peer = 0 ; peer < size ; peer++ ){ + m_disp[peer] = totallen; + totallen += m_len[peer]; + } + + total_c = (char*) malloc( ( totallen + size ) * sizeof( char ) ); + } + + expr_c = const_cast<char*>( expr_s.c_str() ); + + std::cout << expr_c[ len-5 ] << expr_c[ len-4 ] << expr_c[ len-3 ] << expr_c[ len-2 ] << expr_c[ len-1 ] << std::endl; + + MPI_Gatherv( expr_c, len, MPI_CHAR, total_c, m_len, m_disp, MPI_CHAR, 0, comm ); + + if( 0 == rank ){ /* replace the \n's by + */ + for( peer = 1 ; peer < size ; peer++ ){ + total_c[ m_disp[peer] ] = '+' ; + } + std::cout << total_c[ totallen-5 ] << total_c[ totallen-4 ] << total_c[ totallen-3 ] << total_c[ totallen-2 ] << total_c[ totallen-1 ] << std::endl; + expr_c[ totallen + size - 1 ] = '\n' ; + // std::cout << total_c << std::endl; + + Tens = de_linearize_expression( std::string( total_c ), symbols ); + } + + free( lengths ); + free( displ ); + if( 0 == rank ) { + free( m_len ); + free( m_disp ); + free( total_c ); + } + return Tens; +} + + +#else gi::ex add_expressions_parall( std::vector<std::string> expressions, gi::lst symbols, parameters_2_1_t p, MPI_Comm comm = MPI_COMM_WORLD ) { gi::ex Tens = 0; int size, i, nb, len; @@ -119,6 +317,7 @@ gi::ex add_expressions_parall( std::vector<std::string> expressions, gi::lst sym free( expr_c ); return Tens; } +#endif /******************************************************************************* * Parallel 1-level decomposition with addition on a slave * @@ -219,11 +418,17 @@ gi::ex multiply_1level_master_addslave4( tensor3D_t& T, unsigned int size, MPI_C /* Put it in the result queue */ results.push_back( std::string( expr_c ) ); } +#ifdef SCATTERGATHER + send_end( src, pzero ); + +#else /* Do not send the end signal yet */ +#endif running--; + } - /* Add whatever I have left */ + /* Add whatever I have left */ Tens = add_expressions_parall( results, symbols, pzero, comm ); #if DEBUG @@ -304,6 +509,10 @@ void multiply_1level_slave_addslave4( tensor3D_t& T, unsigned int size, MPI_Comm } else { if( status.MPI_TAG == TAG_END ){ +#ifdef SCATTERGATHER + std::vector<std::string> toto; + Tens = add_expressions_parall( toto, symbols, params, comm ); +#endif return; } else { std::cerr << "Wrong tag received on slave " << status.MPI_TAG << std::endl;