Commit d8aa6421 authored by Camille Coti's avatar Camille Coti
Browse files

Other implementation of the parallel final addition. Still a little bug in some cases

parent 6d574e28
......@@ -20,7 +20,7 @@ else
TAULIB =
endif
CFLAGS = -Wall -g -O3 -Wno-unused-variable -std=c++11 $(TAUOPT)
CFLAGS = -Wall -g -O3 -Wno-unused-variable -std=c++11 $(TAUOPT) -DSCATTERGATHER
LDOPT = -lginac $(TAULIB)
MPIEXEC = mpiexec
......
......@@ -10,6 +10,8 @@
#include "utils.h"
#include "profiling.h"
#include <unistd.h>
namespace gi = GiNaC;
#define MAXLENADD 1 // 256
......@@ -25,6 +27,202 @@ unsigned int maxlen( std::vector<std::string> expressions ){
return len;
}
#ifdef SCATTERGATHER
gi::ex add_expressions_parall( std::vector<std::string> expressions, gi::lst symbols, parameters_2_1_t p, MPI_Comm comm = MPI_COMM_WORLD ) {
gi::ex Tens = 0;
int i, peer, nb, len;
int rank, size;
int chunk, end;
char* expr_c;
char* total_c = NULL;
int expr = 0;
int* m_len;
int* m_disp;
char* toto;
int totallen = 0;
MPI_Comm_rank( comm, &rank );
MPI_Comm_size( comm, &size );
if( 0 == rank ) {
/* If the expressions are short, compute the sum locally */
if( maxlen( expressions ) < MAXLENADD )
return add_expressions( expressions, symbols );
nb = expressions.size();
}
/* Broadcast the number of expressions to add */
MPI_Bcast( &nb, 1, MPI_UNSIGNED, 0, comm );
int* lengths = ( int*) malloc( size * nb * sizeof( int ) );
int* displ = ( int*) malloc( size * nb * sizeof( int ) );
//if( 0 == rank ) {
m_len = ( int*) malloc( size * sizeof( int ) );
m_disp = ( int*) malloc( size * sizeof( int ) );
// }
/* Send all the number of elements and displacements, grouped by peer */
if( 0 == rank ) {
// i = 0;
for( auto s: expressions ) {
chunk = ceil( s.length() / size );
for( peer = 0 ; peer < size ; peer++ ) {
if( 0 == peer ) {
displ[ expr ] = 0;
end = chunk;
} else {
end = displ[ peer * nb + expr] + chunk;
}
/* How much are we going to send: stop at a + or - sign (and keep the sign) */
while( !( s[end] == '+' || s[end] == '-' || end == s.length() - 1) ){
end++;
}
end--;
if( 0 == peer ) {
lengths[ expr ] = end + 1;
} else {
// std::cout << "peer " << peer << " expr " << expr << " end " << end << std::endl;
lengths[ peer * nb + expr ] = end - displ[ peer * nb + expr ] + 1;
}
if( peer < size - 1 ) displ[ ( peer + 1 ) * nb + expr] = end;
}
expr++;
}
/* std::cout << "Lengths: " << std::endl;
for( peer = 0 ; peer < size ; peer++ ) {
i = 0;
for( auto s: expressions ) {
std::cout << lengths[peer*nb+i] << " ";
i++;
}
std::cout << std::endl;
}
std::cout << "Displacements: " << std::endl;
for( peer = 0 ; peer < size ; peer++ ) {
i = 0;
for( auto s: expressions ) {
std::cout << displ[peer*nb+i] << " ";
i++;
}
std::cout << std::endl;
} */
}
// std::cout << nb << " expressions to receive" << std::endl;
MPI_Scatter( lengths, nb, MPI_INT, lengths, nb, MPI_INT, 0, comm );
MPI_Scatter( displ, nb, MPI_INT, displ, nb, MPI_INT, 0, comm );
/* sleep( rank );
std::cout << "Lengths: " << std::endl;
for( i = 0 ; i < nb ; i++ ) {
std::cout << lengths[i] << " ";
}
std::cout << std::endl;
std::cout << "Displacements: " << std::endl;
for( i = 0 ; i < nb ; i++ ) {
std::cout << displ[i] << " ";
}
std::cout << std::endl;*/
/* Allocate the reception buffer */
unsigned int maxlen = 0;
std::vector<std::string> results_s;
for( expr = 0 ; expr < nb ; expr++ ) {
maxlen = ( maxlen < lengths[ expr ] ) ? lengths[ expr ] : maxlen; //std::max( maxlen, lengths[ expr ] );
}
expr_c = (char*)malloc( maxlen + 1 ); // Add a final \0
/* Send the expressions */
for( expr = 0 ; expr < nb ; expr++ ) {
len = lengths[expr]; // correct even for rank == 0
if( 0 == rank ) {
for( peer = 0 ; peer < size ; peer++ ) {
m_disp[ peer ] = displ[ peer * nb + expr];
m_len[ peer ] = lengths[ peer * nb + expr];
}
}
if( 0 == rank ) toto = const_cast<char*>( expressions[expr].c_str() );
MPI_Scatterv( toto, m_len, m_disp, MPI_CHAR,
expr_c, len, MPI_CHAR,
0, comm );
expr_c[len ] = '\0'; // The master sends C++ strings, which do not contain the final '\0'
results_s.push_back( std::string( expr_c ) );
/* TODO: this can be overlapped with the computation of the previous addition */
}
/* Add them */
Tens = add_expressions( results_s, symbols );
/* Send the result to the master */
std::string expr_s = linearize_expression( Tens );
len = expr_s.length();
MPI_Gather( &len, 1, MPI_INT, m_len, 1, MPI_INT, 0, comm );
if( 0 == rank ) {
for( peer = 0 ; peer < size ; peer++ ){
m_disp[peer] = totallen;
totallen += m_len[peer];
}
total_c = (char*) malloc( ( totallen + size ) * sizeof( char ) );
}
expr_c = const_cast<char*>( expr_s.c_str() );
std::cout << expr_c[ len-5 ] << expr_c[ len-4 ] << expr_c[ len-3 ] << expr_c[ len-2 ] << expr_c[ len-1 ] << std::endl;
MPI_Gatherv( expr_c, len, MPI_CHAR, total_c, m_len, m_disp, MPI_CHAR, 0, comm );
if( 0 == rank ){ /* replace the \n's by + */
for( peer = 1 ; peer < size ; peer++ ){
total_c[ m_disp[peer] ] = '+' ;
}
std::cout << total_c[ totallen-5 ] << total_c[ totallen-4 ] << total_c[ totallen-3 ] << total_c[ totallen-2 ] << total_c[ totallen-1 ] << std::endl;
expr_c[ totallen + size - 1 ] = '\n' ;
// std::cout << total_c << std::endl;
Tens = de_linearize_expression( std::string( total_c ), symbols );
}
free( lengths );
free( displ );
if( 0 == rank ) {
free( m_len );
free( m_disp );
free( total_c );
}
return Tens;
}
#else
gi::ex add_expressions_parall( std::vector<std::string> expressions, gi::lst symbols, parameters_2_1_t p, MPI_Comm comm = MPI_COMM_WORLD ) {
gi::ex Tens = 0;
int size, i, nb, len;
......@@ -119,6 +317,7 @@ gi::ex add_expressions_parall( std::vector<std::string> expressions, gi::lst sym
free( expr_c );
return Tens;
}
#endif
/*******************************************************************************
* Parallel 1-level decomposition with addition on a slave *
......@@ -219,11 +418,17 @@ gi::ex multiply_1level_master_addslave4( tensor3D_t& T, unsigned int size, MPI_C
/* Put it in the result queue */
results.push_back( std::string( expr_c ) );
}
#ifdef SCATTERGATHER
send_end( src, pzero );
#else
/* Do not send the end signal yet */
#endif
running--;
}
/* Add whatever I have left */
/* Add whatever I have left */
Tens = add_expressions_parall( results, symbols, pzero, comm );
#if DEBUG
......@@ -304,6 +509,10 @@ void multiply_1level_slave_addslave4( tensor3D_t& T, unsigned int size, MPI_Comm
} else {
if( status.MPI_TAG == TAG_END ){
#ifdef SCATTERGATHER
std::vector<std::string> toto;
Tens = add_expressions_parall( toto, symbols, params, comm );
#endif
return;
} else {
std::cerr << "Wrong tag received on slave " << status.MPI_TAG << std::endl;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment