Commit a403e776 authored by Camille Coti's avatar Camille Coti
Browse files

Master-worker, the slaves keep whatever they have, global addition at the end (tree)

parent 6eb8b453
......@@ -30,7 +30,8 @@ MPISRC = masterworker.cpp mw_addslave.cpp hierarchical.cpp \
perf.cpp sequential.cpp tensormatrix_mpi.cpp \
utils.cpp utils_parall.cpp profiling.cpp mw_combined.cpp \
masterworker2.cpp mw_addslave2.cpp hierarchical2.cpp \
masterworker3.cpp mw_addslave3.cpp mw_addslave4.cpp
masterworker3.cpp mw_addslave3.cpp mw_addslave4.cpp \
mw_tree.cpp
MPIOBJ= $(MPISRC:.cpp=.o)
......
......@@ -32,7 +32,6 @@ unsigned int maxlen( std::vector<std::string> expressions ){
gi::ex add_expressions_parall( std::vector<std::string> expressions, gi::lst symbols, parameters_2_1_t p, MPI_Comm comm = MPI_COMM_WORLD ) {
gi::ex Tens = 0;
int i, peer, nb, len;
int rank, size;
......@@ -67,21 +66,17 @@ gi::ex add_expressions_parall( std::vector<std::string> expressions, gi::lst sym
int* lengths = ( int*) malloc( size * nb * sizeof( int ) );
int* displ = ( int*) malloc( size * nb * sizeof( int ) );
//if( 0 == rank ) {
m_len = ( int*) malloc( size * sizeof( int ) );
m_disp = ( int*) malloc( size * sizeof( int ) );
// }
/* Send all the number of elements and displacements, grouped by peer */
if( 0 == rank ) {
// i = 0;
for( auto s: expressions ) {
chunk = ceil( s.length() / size );
end = 0;
for( peer = 0 ; peer < size ; peer++ ) {
if( 0 == peer ) {
......@@ -97,72 +92,30 @@ gi::ex add_expressions_parall( std::vector<std::string> expressions, gi::lst sym
end++;
}
end--;
// std::cout << s[end-4] << s[end-3] << s[end-2] << s[end-1] << s[end] << std::endl;
if( 0 == peer ) {
lengths[ expr ] = end + 1;
} else {
// std::cout << "peer " << peer << " expr " << expr << " end " << end << std::endl;
lengths[ peer * nb + expr ] = end - displ[ peer * nb + expr ] + 1;
}
// if( peer < size - 1 ) displ[ ( peer + 1 ) * nb + expr] = end;
// std::cout << "Disp " << displ[ peer * nb + expr] << std::endl;
// std::cout << s << std::endl;
// std::cout << s[ displ[ peer * nb + expr] ] << s[ displ[ peer * nb + expr] + 1 ] << s[ displ[ peer * nb + expr] + 2 ] << s[ displ[ peer * nb + expr] + 3 ] << s[ displ[ peer * nb + expr] + 4 ] << std::endl;
}
expr++;
}
/* std::cout << "Lengths: " << std::endl;
for( peer = 0 ; peer < size ; peer++ ) {
i = 0;
for( auto s: expressions ) {
std::cout << lengths[peer*nb+i] << " ";
i++;
}
std::cout << std::endl;
}
std::cout << "Displacements: " << std::endl;
for( peer = 0 ; peer < size ; peer++ ) {
i = 0;
for( auto s: expressions ) {
std::cout << displ[peer*nb+i] << " ";
i++;
}
std::cout << std::endl;
} */
}
// std::cout << nb << " expressions to receive" << std::endl;
MPI_Scatter( lengths, nb, MPI_INT, lengths, nb, MPI_INT, 0, comm );
MPI_Scatter( displ, nb, MPI_INT, displ, nb, MPI_INT, 0, comm );
/* sleep( rank );
std::cout << "Lengths: " << std::endl;
for( i = 0 ; i < nb ; i++ ) {
std::cout << lengths[i] << " ";
}
std::cout << std::endl;
std::cout << "Displacements: " << std::endl;
for( i = 0 ; i < nb ; i++ ) {
std::cout << displ[i] << " ";
}
std::cout << std::endl;*/
/* Allocate the reception buffer */
int maxlen = 0;
std::vector<std::string> results_s;
for( expr = 0 ; expr < nb ; expr++ ) {
maxlen = ( maxlen < lengths[ expr ] ) ? lengths[ expr ] : maxlen; //std::max( maxlen, lengths[ expr ] );
maxlen = ( maxlen < lengths[ expr ] ) ? lengths[ expr ] : maxlen;
}
expr_c = (char*)malloc( maxlen + 1 ); // Add a final \0
expr_2 = (char*)malloc( maxlen + 1 ); // Add a final \0
expr_2 = (char*)malloc( maxlen + 1 );
/* Send the expressions */
......@@ -181,7 +134,6 @@ gi::ex add_expressions_parall( std::vector<std::string> expressions, gi::lst sym
MPI_Iscatterv( toto, m_len, m_disp, MPI_CHAR,
expr_c, len, MPI_CHAR,
0, comm , &req );
// results_s.push_back( std::string( expr_c ) );
if( 0 != expr ) {
Tens += de_linearize_expression( std::string( expr_2 ), symbols );
......@@ -190,30 +142,12 @@ gi::ex add_expressions_parall( std::vector<std::string> expressions, gi::lst sym
/* get the new expression in expr_c and swap the pointers so that we can work with expr_2 and receive in expr_c */
MPI_Wait( &req, &stat );
expr_c[len ] = '\0'; // The master sends C++ strings, which do not contain the final '\0'
// std::cout << "Got iteration " << expr << std::endl;
tmp = expr_2;
expr_2 = expr_c;
expr_c = tmp;
}
Tens += de_linearize_expression( std::string( expr_2 ), symbols );
// }
// if( rank == 0 ){
// for( peer = 0 ; peer < size ; peer++ ) {
// std::cout << "==" << toto[ m_disp[peer] ] << toto[ m_disp[peer] +1] << toto[ m_disp[peer] +2] << toto[ m_disp[peer] +3] << toto[ m_disp[peer] +4] << std::endl;
// }
// }
// std::cout << expr_c << std::endl;
/* TODO: this can be overlapped with the computation of the previous addition */
// }
/* Add them */
// Tens = add_expressions( results_s, symbols );
// std::cout << Tens << std::endl;
/* Send the result to the master */
......@@ -232,27 +166,18 @@ gi::ex add_expressions_parall( std::vector<std::string> expressions, gi::lst sym
total_c = (char*) malloc( ( totallen + size ) * sizeof( char ) );
}
// sleep( rank );
expr_c = const_cast<char*>( expr_s.c_str() );
// std::cout << rank << " | " << expr_c << std::endl;
//std::cout << expr_s << std::endl;
// std::cout << expr_c[ len-5 ] << expr_c[ len-4 ] << expr_c[ len-3 ] << expr_c[ len-2 ] << expr_c[ len-1 ] << std::endl;
// std::cout << expr_c[ 0 ] << expr_c[ 1 ] << expr_c[ 2 ] << expr_c[ 3 ] << expr_c[ 4 ] << std::endl;
MPI_Gatherv( expr_c, len, MPI_CHAR, total_c, m_len, m_disp, MPI_CHAR, 0, comm );
if( 0 == rank ){ /* replace the \n's by + */
for( peer = 1 ; peer < size ; peer++ ){
total_c[ m_disp[peer] - 1 ] = '+' ;
}
// std::cout << total_c[ totallen-5 ] << total_c[ totallen-4 ] << total_c[ totallen-3 ] << total_c[ totallen-2 ] << total_c[ totallen-1 ] << std::endl;
expr_c[ totallen + size - 1 ] = '\n' ;
//x std::cout <<"Total: " << total_c << std::endl;
// Tens = de_linearize_expression( std::string( total_c ), symbols );
}
// Tens = de_linearize_expression( std::string( expr_c ), symbols );
free( lengths );
free( displ );
if( 0 == rank ) {
......
#include <iostream>
#include <mpi.h>
#include <ginac/ginac.h>
#include "products.h"
#include "utils_parall.h"
#include "parall_constants.h"
#include "parall_internal.h"
#include "utils.h"
#include "profiling.h"
namespace gi = GiNaC;
/*******************************************************************************
* Parallel 1-level decomposition. *
* Each worker keeps its result. Add them on a tree at the end *
*******************************************************************************/
gi::ex add_tree( gi::ex mine, gi::lst symbols, MPI_Comm comm = MPI_COMM_WORLD ){
int rank, size, step, buddy;
gi::ex Tens = mine;
MPI_Comm_size( comm, &size );
MPI_Comm_rank( comm, &rank );
step = 0;
while( ( 0x1 << step ) < size ) {
if( 0 == ( rank % ( 0x1 << ( step + 1 ) ) ) ) {
/* even number: I am a receiver */
buddy = rank + (0x1 << step );
if( buddy < size ) {
/* receive the result and add it */
Tens += recv_result( buddy, symbols, comm );
}
} else {
/* odd number: I am a sender */
buddy = rank - (0x1 << step );
send_result( Tens, buddy, comm );
return Tens; /* I have sent my result, I am done here */
}
step++;
}
return Tens;
}
gi::ex multiply_1level_master_local( tensor3D_t& T, unsigned int size, MPI_Comm comm = MPI_COMM_WORLD ) {
gi::ex Tens = 0;
unsigned int a2, a4;
gi::ex A;
gi::lst symbols;
MPI_Status status;
char* expr_c;
size_t expr_c_size = 0;
int src, np, running = 0;
unsigned int len;
parameters_2_1_t pzero( 0, 0 );
MPI_Comm_size( comm, &np );
expr_c = NULL;
expr_c = (char*) malloc( 3279 );
int receivedresults = 0;
unsigned int N = size/2;
std::vector<parameters_2_1_t> input;
std::vector<std::string> results; /* length and char* */
/* Build a list of argument sets */
for( a4 = 0 ; a4 < N ; a4++ ){
for( a2 = 0; a2 < N ; a2++ ){
parameters_2_1_t p( a4, a2 );
input.push_back( p );
}
}
/* Compute the set of symbols */
/* Could be done while the first slave is working */
symbols = all_symbols_3D( size );
/* Distribute the work */
while( input.size() > 0 ) {
MPI_Recv( &len, 1, MPI_UNSIGNED, MPI_ANY_SOURCE, TAG_PULL, comm, &status );
/* Nothing else will come: just send wome work */
src = status.MPI_SOURCE;
send_work( input, src );
}
/* Wait until everyone is done */
running = np - 1; // all the slaves are running
while( running > 0 ) {
MPI_Recv( &len, 1, MPI_UNSIGNED, MPI_ANY_SOURCE, TAG_PULL, comm, &status );
src = status.MPI_SOURCE;
send_end( src, pzero, comm );
running--;
}
/* Take part of the tree */
add_tree( Tens, symbols, comm );
#if DEBUG
std::cout << "Received " << receivedresults << " results" << std::endl;
std::cout << "Tpara=" << Tens << ";" << std::endl;
#endif
if( NULL != expr_c) free( expr_c );
return Tens;
}
void multiply_1level_slave_local( tensor3D_t& T, unsigned int size, MPI_Comm comm = MPI_COMM_WORLD ) {
gi::ex Tens = 0;
int a2, a4;
unsigned int len = 0;
parameters_2_1_t params;
MPI_Status status;
char* expr_c;
MPI_Request req;
std::vector<gi::ex> results;
int rank;
MPI_Comm_rank( comm, &rank );
/* Ask for some work */
MPI_Send( &len, 1, MPI_UNSIGNED, ROOT, TAG_PULL, comm );
/* Compute the set of symbols */
gi::lst symbols = all_symbols_3D( size );
while( true ){
/* Receive a set of parameters */
MPI_Irecv( &params, 1, DT_PARAMETERS_2_1, ROOT, MPI_ANY_TAG, comm, &req );
if( results.size() >= 1 ) {
for( auto e: results ) {
Tens += e;
}
results.clear();
}
MPI_Wait( &req, &status );
/* TODO overlap with an addition */
if( status.MPI_TAG == TAG_WORK ){
a4 = params.a4;
a2 = params.a2;
Tens = one_level1_product( &T, size, a4, a2 );
results.push_back( Tens );
MPI_Send( &len, 1, MPI_UNSIGNED, ROOT, TAG_PULL, comm );
} else {
if( status.MPI_TAG == TAG_END ){
Tens = add_tree( Tens, symbols, comm );
return;
} else {
std::cerr << "Wrong tag received on slave " << status.MPI_TAG << std::endl;
}
}
}
}
/* Communication protocol:
M -> W: always the same size, therefore unique communication
W -> M: send an unsigned int (size of the expression), then the expression (table of chars)
*/
gi::ex multiply_1level_mw_local( tensor3D_t& T, int size ) { // simpler: same dimension everywhere
int rank;
gi::ex Tens = 0;
MPI_Comm_rank( MPI_COMM_WORLD, &rank );
/* Create a new datatype for the parameters */
create_parameters_datatype_2_1();
/* Here we go */
if( 0 == rank ) {
Tens = multiply_1level_master_local( T, size );
} else {
multiply_1level_slave_local( T, size );
}
/* Finalize */
free_parameters_2_1_dt();
return Tens;
}
......@@ -30,6 +30,7 @@ gi::ex multiply_1level_mw_addslave( tensor3D_t&, int );
gi::ex multiply_1level_mw_addslave2( tensor3D_t&, int );
gi::ex multiply_1level_mw_addslave3( tensor3D_t&, int );
gi::ex multiply_1level_mw_addslave4( tensor3D_t&, int );
gi::ex multiply_1level_mw_local( tensor3D_t&, int );
gi::ex multiply_2levels_mw_hierarch( tensor3D_t&, int );
gi::ex multiply_2levels_mw_hierarch2( tensor3D_t&, int );
gi::ex multiply_combined( tensor3D_t&, int );
......
......@@ -34,6 +34,7 @@ namespace gi = GiNaC;
- B/b: Master-Worker, coarser grain, addition on a slave -> multiply_1level_mw_addslave2
- D/d: Master-Worker, middle grain, addition on a slave -> multiply_1level_mw_addslave3
- E/e: Master-Worker, middle grain, addition on a slave, parallel final addition -> multiply_1level_mw_addslave4
- T/t: Master-Worker, middle grain, slaves accumulate their results -> multiply_1level_mw_addslave_local
- H/h: Hierarchical master-worker -> multiply_1level_mw_hierarch
- i/I: Hierarchical master-worker, coarser grain -> multiply_1level_mw_hierarch
- C/c: Combined -> multiply_combined
......@@ -128,6 +129,10 @@ int main( int argc, char** argv ){
case 'i':
tostart = 'i';
break;
case 'T':
case 't':
tostart = 't';
break;
case 'C':
case 'c':
tostart = 'c';
......@@ -190,6 +195,9 @@ int main( int argc, char** argv ){
case 'i':
Tpara = multiply_2levels_mw_hierarch2( T, N );
break;
case 't':
Tpara = multiply_1level_mw_local( T, N );
break;
case 'c':
Tpara = multiply_combined( T, N );
break;
......
......@@ -336,6 +336,56 @@ void send_result( gi::ex T, MPI_Comm comm ){
}
}
void send_result( gi::ex T, int dst, MPI_Comm comm ){
unsigned int len;
char* expr_c;
if( T.is_zero() ) {
len = 0;
expr_c = NULL; // just so g++ does not complain that it believes we are using it uninitialized later
} else {
/* linearize the result */
std::string expr = linearize_expression( T );
/* Send it to the master */
len = expr.length() + 1;
expr_c = (char*) malloc( len );
memcpy( expr_c, expr.c_str(), len-1 );
expr_c[len-1] = '\0'; // C++ std::strings do not terminate with \0
}
MPI_Send( &len, 1, MPI_UNSIGNED, dst, TAG_RES, comm );
if( len != 0 ) {
MPI_Send( expr_c, len, MPI_CHAR, dst, TAG_EXPR, comm );
free( expr_c );
}
}
/* Receive a result (sent by send_result) */
gi::ex recv_result( int peer, gi::lst symbols, MPI_Comm comm ){
MPI_Status status;
unsigned int len;
char* expr_c = NULL;
size_t expr_c_size = 0;
MPI_Recv( &len, 1, MPI_UNSIGNED, peer, TAG_RES, comm, &status );
if( len != 0 ) {
if( len > expr_c_size ) {
expr_c_size = len;
expr_c = (char*)malloc( expr_c_size ); // The \0 was added by the slave
}
MPI_Recv( expr_c, len, MPI_CHAR, peer, TAG_EXPR, comm, &status );
}
std::string s( expr_c );
return de_linearize_expression( s, symbols );
}
/* Create communicators for the hierarchical decomposition */
......
......@@ -62,6 +62,7 @@ void send_work_addslave( std::vector<parameters_t>&, std::vector<std::string>&,
void send_work_addslave( std::vector<parameters_s_t>&, std::vector<std::string>&, int ) ;
void send_work_addslave( std::vector<parameters_2_1_t>&, std::vector<std::string>&, int );
void send_result( gi::ex T, MPI_Comm comm = MPI_COMM_WORLD );
void send_result( gi::ex T, int dst, MPI_Comm comm = MPI_COMM_WORLD );
void send_end( int peer, MPI_Comm comm = MPI_COMM_WORLD );
void send_end( int peer, parameters_2_1_t p, MPI_Comm comm = MPI_COMM_WORLD );
void send_end( int peer, parameters_2_2_t p, MPI_Comm comm = MPI_COMM_WORLD );
......@@ -69,6 +70,8 @@ void send_end( int peer, parameters_s_t p, MPI_Comm comm = MPI_COMM_WORLD );
void send_end_batch( int peer, MPI_Comm comm = MPI_COMM_WORLD );
void send_end_batch( int peer, parameters_2_1_t p, MPI_Comm comm = MPI_COMM_WORLD );
gi::ex recv_result( int peer, gi::lst symbols, MPI_Comm comm = MPI_COMM_WORLD );
void create_parameters_datatype( void );
void create_parameters_datatype_s( void );
void create_parameters_datatype_2_1( void );
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment