Commit 6d574e28 authored by Camille Coti's avatar Camille Coti
Browse files

parallelize the final addition

parent e1fcbab3
......@@ -30,7 +30,7 @@ MPISRC = masterworker.cpp mw_addslave.cpp hierarchical.cpp \
perf.cpp sequential.cpp tensormatrix_mpi.cpp \
utils.cpp utils_parall.cpp profiling.cpp mw_combined.cpp \
masterworker2.cpp mw_addslave2.cpp hierarchical2.cpp \
masterworker3.cpp mw_addslave3.cpp
masterworker3.cpp mw_addslave3.cpp mw_addslave4.cpp
MPIOBJ= $(MPISRC:.cpp=.o)
......
......@@ -31,10 +31,6 @@ gi::ex multiply_1level_master( tensor3D_t& T, unsigned int size, MPI_Comm comm =
expr_c = NULL;
expr_c = (char*) malloc( 3279 ); // TMP
int i, j;
i = 0;
j = 0;
int receivedresults = 0;
unsigned int N = size/2;
......@@ -45,9 +41,7 @@ gi::ex multiply_1level_master( tensor3D_t& T, unsigned int size, MPI_Comm comm =
/* Build a list of argument sets */
for( a4 = 0 ; a4 < N ; a4++ ){
i=i+1;
for( a2 = 0; a2 < N ; a2++ ){
j=j+1;
for( a1 = 0 ; a1 < N ; a1++ ){
parameters_t p( a4, a2, a1 );
input.push_back( p );
......
......@@ -42,6 +42,8 @@ gi::ex multiply_1level_master_addslave( tensor3D_t& T, unsigned int size, MPI_Co
std::vector<parameters_t> input;
std::vector<std::string> results; /* length and char* */
double t1 = getTime();
/* Build a list of argument sets */
for( a4 = 0 ; a4 < N ; a4++ ){
......@@ -60,6 +62,8 @@ gi::ex multiply_1level_master_addslave( tensor3D_t& T, unsigned int size, MPI_Co
symbols = all_symbols_3D( size );
double t2 = getTime();
/* Distribute the work */
while( input.size() > 0 ) {
......@@ -98,6 +102,8 @@ gi::ex multiply_1level_master_addslave( tensor3D_t& T, unsigned int size, MPI_Co
}
}
double t3 = getTime();
/* Wait until everyone is done */
running = np - 1; // all the slaves are running
......@@ -122,9 +128,17 @@ gi::ex multiply_1level_master_addslave( tensor3D_t& T, unsigned int size, MPI_Co
send_add_or_end_addslave( results, src, &running );
}
double t4 = getTime();
/* Add whatever I have left */
Tens = add_expressions( results, symbols );
double t5 = getTime();
std::cout << "Init: " << t2 - t1 << std::endl;
std::cout << "Loop: " << t3 - t2 << std::endl;
std::cout << "Fini: " << t4 - t3 << std::endl;
std::cout << "Add: " << t5 - t4 << std::endl;
#if DEBUG
std::cout << "Received " << receivedresults << " results" << std::endl;
......
......@@ -183,8 +183,10 @@ void multiply_1level_slave_addslave2( tensor3D_t& T, unsigned int size, MPI_Comm
/* Delinearize all the expressions and add them */
double t1 = getTime();
Tens = add_expressions( results_s, symbols );
std::cout << "Addition: " << getTime() - t1 << std::endl;
/* Send the result */
send_result( Tens );
......
#include <iostream>
#include <mpi.h>
#include <ginac/ginac.h>
#include <math.h> // for ceil
#include "products.h"
#include "utils_parall.h"
#include "parall_constants.h"
#include "parall_internal.h"
#include "utils.h"
#include "profiling.h"
namespace gi = GiNaC;
#define MAXLENADD 1 // 256
unsigned int maxlen( std::vector<std::string> expressions ){
unsigned int len = 0;
for( auto s: expressions ) {
unsigned int l2 = s.length();
if( len < l2 ) {
len = l2;
}
}
return len;
}
gi::ex add_expressions_parall( std::vector<std::string> expressions, gi::lst symbols, parameters_2_1_t p, MPI_Comm comm = MPI_COMM_WORLD ) {
gi::ex Tens = 0;
int size, i, nb, len;
unsigned int chunk, end;
std::vector<unsigned int> cut;
unsigned int* lengths;
std::string result;
char* expr;
MPI_Status status;
size_t expr_c_size = 0;
char* expr_c;
/* If the expressions are short, compute the sum locally */
if( maxlen( expressions ) < MAXLENADD )
return add_expressions( expressions, symbols );
MPI_Comm_size( comm, &size );
nb = expressions.size();
lengths = (unsigned int*) malloc( nb * sizeof( unsigned int ) );
for( i = 0 ; i < nb ; i++ ) {
cut.push_back( 0 );
lengths[i] = 0;
}
unsigned int running = size - 1;
p.setParams( nb, 1 );
/* TODO ca se factorise avec send_expressions_to_add */
for( int peer = 1 ; peer < size ; peer++ ) {
i = 0;
for( auto s: expressions ) {
/* How much are we going to send: stop at a + or - sign (and keep the sign) */
chunk = ceil( s.length() / ( size - 1 ) );
end = cut[i] + chunk;
while( !( s[end] == '+' || s[end] == '-' || end == s.length() - 1) ){
end++;
}
end--;
lengths[i] = end - cut[i] + 1;
i++;
}
/* Send the lengths */
MPI_Send( &p, 1, DT_PARAMETERS_2_1, peer, TAG_ADD, comm );
MPI_Send( lengths, nb, MPI_INT, peer, TAG_ADD, comm );
/* Send the strings */
for( unsigned int j = 0 ; j < nb ; j++ ) {
expr = const_cast<char*>( expressions[j].c_str() );
MPI_Send( &( expr[ cut[j] ] ), lengths[j], MPI_CHAR, peer, TAG_ADD, comm );
cut[j] += lengths[j];
}
}
/* Receive the results */
expr_c = NULL;
while( running > 0 ) {
MPI_Recv( &len, 1, MPI_UNSIGNED, MPI_ANY_SOURCE, MPI_ANY_TAG, comm, &status );
int src = status.MPI_SOURCE;
len++;
if( len != 0 ) {
if( len > expr_c_size ) {
expr_c_size = len;
if( NULL != expr_c ) free( expr_c );
expr_c = (char*)malloc( expr_c_size ); // The \0 was added by the slave
}
/* Receive the result */
MPI_Recv( expr_c, len-1, MPI_CHAR, src, TAG_EXPR, comm, &status );
expr_c[len - 1] = '\n';
/* Concatenate the result */
std::string recvs( expr_c );
if( expr_c[0] != '-' ) result += '+';
result += recvs;
}
running--;
send_end( src, p );
}
Tens = de_linearize_expression( result, symbols );
free( lengths );
free( expr_c );
return Tens;
}
/*******************************************************************************
* Parallel 1-level decomposition with addition on a slave *
*******************************************************************************/
gi::ex multiply_1level_master_addslave4( tensor3D_t& T, unsigned int size, MPI_Comm comm = MPI_COMM_WORLD ) {
gi::ex Tens = 0;
unsigned int a2, a4;
gi::lst symbols;
MPI_Status status;
char* expr_c;
size_t expr_c_size = 0;
int src, np;
unsigned int len, running = 0;
parameters_2_1_t pzero( 0, 0 );
MPI_Comm_size( comm, &np );
expr_c = NULL;
expr_c = (char*) malloc( 3279 );
int receivedresults = 0;
unsigned int N = size/2;
std::vector<parameters_2_1_t> input;
std::vector<std::string> results; /* length and char* */
/* Build a list of argument sets */
for( a4 = 0 ; a4 < N ; a4++ ){
for( a2 = 0; a2 < N ; a2++ ){
parameters_2_1_t p( a4, a2 );
input.push_back( p );
}
}
/* Compute the set of symbols */
/* Could be done while the first slave is working */
symbols = all_symbols_3D( size );
/* Distribute the work */
while( input.size() > 0 ) {
MPI_Recv( &len, 1, MPI_UNSIGNED, MPI_ANY_SOURCE, MPI_ANY_TAG, comm, &status );
if( status.MPI_TAG == TAG_PULL ) {
/* Nothing else will come: just send wome work */
src = status.MPI_SOURCE;
send_work( input, src );
} else {
if( status.MPI_TAG == TAG_RES ){
src = status.MPI_SOURCE;
/* The first message contains the length of what is coming next */
if( len != 0 ) {
if( len > expr_c_size ) {
expr_c_size = len;
if( NULL != expr_c ) free( expr_c );
expr_c = (char*)malloc( expr_c_size ); // The \0 was added by the slave
}
/* Receive the result */
MPI_Recv( expr_c, len, MPI_CHAR, src, TAG_EXPR, comm, &status );
/* Put it in the result queue */
results.push_back( std::string( expr_c ) );
}
/* Send more work */
send_work_addslave( input, results, src );
} else {
std::cerr << "Wrong tag received " << status.MPI_TAG << std::endl;
}
}
}
/* Wait until everyone is done */
running = np - 1; // all the slaves are running
while( running > 0 ) {
MPI_Recv( &len, 1, MPI_UNSIGNED, MPI_ANY_SOURCE, MPI_ANY_TAG, comm, &status );
src = status.MPI_SOURCE;
if( len != 0 ) {
if( len > expr_c_size ) {
expr_c_size = len;
if( NULL != expr_c ) free( expr_c );
expr_c = (char*)malloc( expr_c_size ); // The \0 was added by the slave
}
/* Receive the result */
MPI_Recv( expr_c, len, MPI_CHAR, src, TAG_EXPR, comm, &status );
/* Put it in the result queue */
results.push_back( std::string( expr_c ) );
}
/* Do not send the end signal yet */
running--;
}
/* Add whatever I have left */
Tens = add_expressions_parall( results, symbols, pzero, comm );
#if DEBUG
std::cout << "Received " << receivedresults << " results" << std::endl;
std::cout << "Tpara=" << Tens << ";" << std::endl;
#endif
if( NULL != expr_c) free( expr_c );
return Tens;
}
void multiply_1level_slave_addslave4( tensor3D_t& T, unsigned int size, MPI_Comm comm = MPI_COMM_WORLD ) {
gi::ex Tens;
int a2, a4;
unsigned int len = 0;
parameters_2_1_t params;
MPI_Status status;
char* expr_c;
int rank;
MPI_Comm_rank( comm, &rank );
/* Ask for some work */
MPI_Send( &len, 1, MPI_UNSIGNED, ROOT, TAG_PULL, comm );
/* Compute the set of symbols */
gi::lst symbols = all_symbols_3D( size );
while( true ){
/* Receive a set of parameters */
MPI_Recv( &params, 1, DT_PARAMETERS_2_1, ROOT, MPI_ANY_TAG, comm, &status );
if( status.MPI_TAG == TAG_WORK ){
a4 = params.a4;
a2 = params.a2;
Tens = one_level1_product( &T, size, a4, a2 );
send_result( Tens );
} else {
if( status.MPI_TAG == TAG_ADD ) {
/* Receive a set of expressions to add */
/* Number of expressions received */
int nb = params.a4;
a2 = params.a2;
/* Length of each string */
unsigned int* lengths = (unsigned int*) malloc( nb*sizeof( unsigned int ) );
MPI_Recv( lengths, nb, MPI_INT, ROOT, TAG_ADD, comm, &status );
std::vector<std::string> results_s;
char* c_str;
int i;
int len;
for( i = 0 ; i < nb ; i++ ) {
len = lengths[i] + 1 ;
c_str = (char*) malloc( len );
MPI_Recv( c_str, len - 1, MPI_CHAR, ROOT, TAG_ADD, comm, &status );
c_str[len - 1] = '\0'; // The master sends C++ strings, which do not contain the final '\0'
results_s.push_back( std::string( c_str ) );
free( c_str );
}
/* Delinearize all the expressions and add them */
Tens = add_expressions( results_s, symbols );
/* Send the result */
send_result( Tens );
} else {
if( status.MPI_TAG == TAG_END ){
return;
} else {
std::cerr << "Wrong tag received on slave " << status.MPI_TAG << std::endl;
}
}
}
}
}
/* Communication protocol:
M -> W: always the same size, therefore unique communication
W -> M: send an unsigned int (size of the expression), then the expression (table of chars)
*/
gi::ex multiply_1level_mw_addslave4( tensor3D_t& T, int size ) { // simpler: same dimension everywhere
int rank;
gi::ex Tens = 0;
MPI_Comm_rank( MPI_COMM_WORLD, &rank );
/* Create a new datatype for the parameters */
create_parameters_datatype_2_1();
/* Here we go */
if( 0 == rank ) {
Tens = multiply_1level_master_addslave4( T, size );
} else {
multiply_1level_slave_addslave4( T, size );
}
/* Finalize */
free_parameters_2_1_dt();
return Tens;
}
......@@ -29,6 +29,7 @@ gi::ex multiply_1level_mw3( tensor3D_t&, int );
gi::ex multiply_1level_mw_addslave( tensor3D_t&, int );
gi::ex multiply_1level_mw_addslave2( tensor3D_t&, int );
gi::ex multiply_1level_mw_addslave3( tensor3D_t&, int );
gi::ex multiply_1level_mw_addslave4( tensor3D_t&, int );
gi::ex multiply_2levels_mw_hierarch( tensor3D_t&, int );
gi::ex multiply_2levels_mw_hierarch2( tensor3D_t&, int );
gi::ex multiply_combined( tensor3D_t&, int );
......
......@@ -32,7 +32,8 @@ namespace gi = GiNaC;
- o/O: Master-Worker, middle grain -> multiply_1level_mw3
- A/a: Master-Worker, addition on a slave -> multiply_1level_mw_addslave
- B/b: Master-Worker, coarser grain, addition on a slave -> multiply_1level_mw_addslave2
- D/d: Master-Worker, middle grain, addition on a slave -> multiply_1level_mw_addslave2
- D/d: Master-Worker, middle grain, addition on a slave -> multiply_1level_mw_addslave3
- E/e: Master-Worker, middle grain, addition on a slave, parallel final addition -> multiply_1level_mw_addslave4
- H/h: Hierarchical master-worker -> multiply_1level_mw_hierarch
- i/I: Hierarchical master-worker, coarser grain -> multiply_1level_mw_hierarch
- C/c: Combined -> multiply_combined
......@@ -115,6 +116,10 @@ int main( int argc, char** argv ){
case 'd':
tostart = 'd';
break;
case 'E':
case 'e':
tostart = 'e';
break;
case 'H':
case 'h':
tostart = 'h';
......@@ -176,6 +181,9 @@ int main( int argc, char** argv ){
case 'd':
Tpara = multiply_1level_mw_addslave3( T, N );
break;
case 'e':
Tpara = multiply_1level_mw_addslave4( T, N );
break;
case 'h':
Tpara = multiply_2levels_mw_hierarch( T, N );
break;
......
......@@ -27,6 +27,9 @@ public:
unsigned int a4, a2;
parameters_2_1_t( unsigned int, unsigned int );
parameters_2_1_t( void ){};
void setA4( unsigned int _a4 ) { this->a4 = _a4; }
void setA2( unsigned int _a2 ) { this->a2 = _a2; }
void setParams( unsigned int _a4, unsigned int _a2 ) { this->a4 = _a4; this->a2 = _a2; } ;
};
class parameters_2_2_t{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment