Skip to content
Snippets Groups Projects
Commit 6d574e28 authored by Camille Coti's avatar Camille Coti
Browse files

parallelize the final addition

parent e1fcbab3
No related branches found
No related tags found
No related merge requests found
......@@ -30,7 +30,7 @@ MPISRC = masterworker.cpp mw_addslave.cpp hierarchical.cpp \
perf.cpp sequential.cpp tensormatrix_mpi.cpp \
utils.cpp utils_parall.cpp profiling.cpp mw_combined.cpp \
masterworker2.cpp mw_addslave2.cpp hierarchical2.cpp \
masterworker3.cpp mw_addslave3.cpp
masterworker3.cpp mw_addslave3.cpp mw_addslave4.cpp
MPIOBJ= $(MPISRC:.cpp=.o)
......
......@@ -31,10 +31,6 @@ gi::ex multiply_1level_master( tensor3D_t& T, unsigned int size, MPI_Comm comm =
expr_c = NULL;
expr_c = (char*) malloc( 3279 ); // TMP
int i, j;
i = 0;
j = 0;
int receivedresults = 0;
unsigned int N = size/2;
......@@ -45,9 +41,7 @@ gi::ex multiply_1level_master( tensor3D_t& T, unsigned int size, MPI_Comm comm =
/* Build a list of argument sets */
for( a4 = 0 ; a4 < N ; a4++ ){
i=i+1;
for( a2 = 0; a2 < N ; a2++ ){
j=j+1;
for( a1 = 0 ; a1 < N ; a1++ ){
parameters_t p( a4, a2, a1 );
input.push_back( p );
......
......@@ -42,6 +42,8 @@ gi::ex multiply_1level_master_addslave( tensor3D_t& T, unsigned int size, MPI_Co
std::vector<parameters_t> input;
std::vector<std::string> results; /* length and char* */
double t1 = getTime();
/* Build a list of argument sets */
for( a4 = 0 ; a4 < N ; a4++ ){
......@@ -60,6 +62,8 @@ gi::ex multiply_1level_master_addslave( tensor3D_t& T, unsigned int size, MPI_Co
symbols = all_symbols_3D( size );
double t2 = getTime();
/* Distribute the work */
while( input.size() > 0 ) {
......@@ -98,6 +102,8 @@ gi::ex multiply_1level_master_addslave( tensor3D_t& T, unsigned int size, MPI_Co
}
}
double t3 = getTime();
/* Wait until everyone is done */
running = np - 1; // all the slaves are running
......@@ -122,9 +128,17 @@ gi::ex multiply_1level_master_addslave( tensor3D_t& T, unsigned int size, MPI_Co
send_add_or_end_addslave( results, src, &running );
}
double t4 = getTime();
/* Add whatever I have left */
Tens = add_expressions( results, symbols );
double t5 = getTime();
std::cout << "Init: " << t2 - t1 << std::endl;
std::cout << "Loop: " << t3 - t2 << std::endl;
std::cout << "Fini: " << t4 - t3 << std::endl;
std::cout << "Add: " << t5 - t4 << std::endl;
#if DEBUG
std::cout << "Received " << receivedresults << " results" << std::endl;
......
......@@ -183,8 +183,10 @@ void multiply_1level_slave_addslave2( tensor3D_t& T, unsigned int size, MPI_Comm
/* Delinearize all the expressions and add them */
double t1 = getTime();
Tens = add_expressions( results_s, symbols );
std::cout << "Addition: " << getTime() - t1 << std::endl;
/* Send the result */
send_result( Tens );
......
#include <iostream>
#include <mpi.h>
#include <ginac/ginac.h>
#include <math.h> // for ceil
#include "products.h"
#include "utils_parall.h"
#include "parall_constants.h"
#include "parall_internal.h"
#include "utils.h"
#include "profiling.h"
namespace gi = GiNaC;
#define MAXLENADD 1 // 256
unsigned int maxlen( std::vector<std::string> expressions ){
unsigned int len = 0;
for( auto s: expressions ) {
unsigned int l2 = s.length();
if( len < l2 ) {
len = l2;
}
}
return len;
}
gi::ex add_expressions_parall( std::vector<std::string> expressions, gi::lst symbols, parameters_2_1_t p, MPI_Comm comm = MPI_COMM_WORLD ) {
gi::ex Tens = 0;
int size, i, nb, len;
unsigned int chunk, end;
std::vector<unsigned int> cut;
unsigned int* lengths;
std::string result;
char* expr;
MPI_Status status;
size_t expr_c_size = 0;
char* expr_c;
/* If the expressions are short, compute the sum locally */
if( maxlen( expressions ) < MAXLENADD )
return add_expressions( expressions, symbols );
MPI_Comm_size( comm, &size );
nb = expressions.size();
lengths = (unsigned int*) malloc( nb * sizeof( unsigned int ) );
for( i = 0 ; i < nb ; i++ ) {
cut.push_back( 0 );
lengths[i] = 0;
}
unsigned int running = size - 1;
p.setParams( nb, 1 );
/* TODO ca se factorise avec send_expressions_to_add */
for( int peer = 1 ; peer < size ; peer++ ) {
i = 0;
for( auto s: expressions ) {
/* How much are we going to send: stop at a + or - sign (and keep the sign) */
chunk = ceil( s.length() / ( size - 1 ) );
end = cut[i] + chunk;
while( !( s[end] == '+' || s[end] == '-' || end == s.length() - 1) ){
end++;
}
end--;
lengths[i] = end - cut[i] + 1;
i++;
}
/* Send the lengths */
MPI_Send( &p, 1, DT_PARAMETERS_2_1, peer, TAG_ADD, comm );
MPI_Send( lengths, nb, MPI_INT, peer, TAG_ADD, comm );
/* Send the strings */
for( unsigned int j = 0 ; j < nb ; j++ ) {
expr = const_cast<char*>( expressions[j].c_str() );
MPI_Send( &( expr[ cut[j] ] ), lengths[j], MPI_CHAR, peer, TAG_ADD, comm );
cut[j] += lengths[j];
}
}
/* Receive the results */
expr_c = NULL;
while( running > 0 ) {
MPI_Recv( &len, 1, MPI_UNSIGNED, MPI_ANY_SOURCE, MPI_ANY_TAG, comm, &status );
int src = status.MPI_SOURCE;
len++;
if( len != 0 ) {
if( len > expr_c_size ) {
expr_c_size = len;
if( NULL != expr_c ) free( expr_c );
expr_c = (char*)malloc( expr_c_size ); // The \0 was added by the slave
}
/* Receive the result */
MPI_Recv( expr_c, len-1, MPI_CHAR, src, TAG_EXPR, comm, &status );
expr_c[len - 1] = '\n';
/* Concatenate the result */
std::string recvs( expr_c );
if( expr_c[0] != '-' ) result += '+';
result += recvs;
}
running--;
send_end( src, p );
}
Tens = de_linearize_expression( result, symbols );
free( lengths );
free( expr_c );
return Tens;
}
/*******************************************************************************
* Parallel 1-level decomposition with addition on a slave *
*******************************************************************************/
gi::ex multiply_1level_master_addslave4( tensor3D_t& T, unsigned int size, MPI_Comm comm = MPI_COMM_WORLD ) {
gi::ex Tens = 0;
unsigned int a2, a4;
gi::lst symbols;
MPI_Status status;
char* expr_c;
size_t expr_c_size = 0;
int src, np;
unsigned int len, running = 0;
parameters_2_1_t pzero( 0, 0 );
MPI_Comm_size( comm, &np );
expr_c = NULL;
expr_c = (char*) malloc( 3279 );
int receivedresults = 0;
unsigned int N = size/2;
std::vector<parameters_2_1_t> input;
std::vector<std::string> results; /* length and char* */
/* Build a list of argument sets */
for( a4 = 0 ; a4 < N ; a4++ ){
for( a2 = 0; a2 < N ; a2++ ){
parameters_2_1_t p( a4, a2 );
input.push_back( p );
}
}
/* Compute the set of symbols */
/* Could be done while the first slave is working */
symbols = all_symbols_3D( size );
/* Distribute the work */
while( input.size() > 0 ) {
MPI_Recv( &len, 1, MPI_UNSIGNED, MPI_ANY_SOURCE, MPI_ANY_TAG, comm, &status );
if( status.MPI_TAG == TAG_PULL ) {
/* Nothing else will come: just send wome work */
src = status.MPI_SOURCE;
send_work( input, src );
} else {
if( status.MPI_TAG == TAG_RES ){
src = status.MPI_SOURCE;
/* The first message contains the length of what is coming next */
if( len != 0 ) {
if( len > expr_c_size ) {
expr_c_size = len;
if( NULL != expr_c ) free( expr_c );
expr_c = (char*)malloc( expr_c_size ); // The \0 was added by the slave
}
/* Receive the result */
MPI_Recv( expr_c, len, MPI_CHAR, src, TAG_EXPR, comm, &status );
/* Put it in the result queue */
results.push_back( std::string( expr_c ) );
}
/* Send more work */
send_work_addslave( input, results, src );
} else {
std::cerr << "Wrong tag received " << status.MPI_TAG << std::endl;
}
}
}
/* Wait until everyone is done */
running = np - 1; // all the slaves are running
while( running > 0 ) {
MPI_Recv( &len, 1, MPI_UNSIGNED, MPI_ANY_SOURCE, MPI_ANY_TAG, comm, &status );
src = status.MPI_SOURCE;
if( len != 0 ) {
if( len > expr_c_size ) {
expr_c_size = len;
if( NULL != expr_c ) free( expr_c );
expr_c = (char*)malloc( expr_c_size ); // The \0 was added by the slave
}
/* Receive the result */
MPI_Recv( expr_c, len, MPI_CHAR, src, TAG_EXPR, comm, &status );
/* Put it in the result queue */
results.push_back( std::string( expr_c ) );
}
/* Do not send the end signal yet */
running--;
}
/* Add whatever I have left */
Tens = add_expressions_parall( results, symbols, pzero, comm );
#if DEBUG
std::cout << "Received " << receivedresults << " results" << std::endl;
std::cout << "Tpara=" << Tens << ";" << std::endl;
#endif
if( NULL != expr_c) free( expr_c );
return Tens;
}
void multiply_1level_slave_addslave4( tensor3D_t& T, unsigned int size, MPI_Comm comm = MPI_COMM_WORLD ) {
gi::ex Tens;
int a2, a4;
unsigned int len = 0;
parameters_2_1_t params;
MPI_Status status;
char* expr_c;
int rank;
MPI_Comm_rank( comm, &rank );
/* Ask for some work */
MPI_Send( &len, 1, MPI_UNSIGNED, ROOT, TAG_PULL, comm );
/* Compute the set of symbols */
gi::lst symbols = all_symbols_3D( size );
while( true ){
/* Receive a set of parameters */
MPI_Recv( &params, 1, DT_PARAMETERS_2_1, ROOT, MPI_ANY_TAG, comm, &status );
if( status.MPI_TAG == TAG_WORK ){
a4 = params.a4;
a2 = params.a2;
Tens = one_level1_product( &T, size, a4, a2 );
send_result( Tens );
} else {
if( status.MPI_TAG == TAG_ADD ) {
/* Receive a set of expressions to add */
/* Number of expressions received */
int nb = params.a4;
a2 = params.a2;
/* Length of each string */
unsigned int* lengths = (unsigned int*) malloc( nb*sizeof( unsigned int ) );
MPI_Recv( lengths, nb, MPI_INT, ROOT, TAG_ADD, comm, &status );
std::vector<std::string> results_s;
char* c_str;
int i;
int len;
for( i = 0 ; i < nb ; i++ ) {
len = lengths[i] + 1 ;
c_str = (char*) malloc( len );
MPI_Recv( c_str, len - 1, MPI_CHAR, ROOT, TAG_ADD, comm, &status );
c_str[len - 1] = '\0'; // The master sends C++ strings, which do not contain the final '\0'
results_s.push_back( std::string( c_str ) );
free( c_str );
}
/* Delinearize all the expressions and add them */
Tens = add_expressions( results_s, symbols );
/* Send the result */
send_result( Tens );
} else {
if( status.MPI_TAG == TAG_END ){
return;
} else {
std::cerr << "Wrong tag received on slave " << status.MPI_TAG << std::endl;
}
}
}
}
}
/* Communication protocol:
M -> W: always the same size, therefore unique communication
W -> M: send an unsigned int (size of the expression), then the expression (table of chars)
*/
gi::ex multiply_1level_mw_addslave4( tensor3D_t& T, int size ) { // simpler: same dimension everywhere
int rank;
gi::ex Tens = 0;
MPI_Comm_rank( MPI_COMM_WORLD, &rank );
/* Create a new datatype for the parameters */
create_parameters_datatype_2_1();
/* Here we go */
if( 0 == rank ) {
Tens = multiply_1level_master_addslave4( T, size );
} else {
multiply_1level_slave_addslave4( T, size );
}
/* Finalize */
free_parameters_2_1_dt();
return Tens;
}
......@@ -29,6 +29,7 @@ gi::ex multiply_1level_mw3( tensor3D_t&, int );
gi::ex multiply_1level_mw_addslave( tensor3D_t&, int );
gi::ex multiply_1level_mw_addslave2( tensor3D_t&, int );
gi::ex multiply_1level_mw_addslave3( tensor3D_t&, int );
gi::ex multiply_1level_mw_addslave4( tensor3D_t&, int );
gi::ex multiply_2levels_mw_hierarch( tensor3D_t&, int );
gi::ex multiply_2levels_mw_hierarch2( tensor3D_t&, int );
gi::ex multiply_combined( tensor3D_t&, int );
......
......@@ -32,7 +32,8 @@ namespace gi = GiNaC;
- o/O: Master-Worker, middle grain -> multiply_1level_mw3
- A/a: Master-Worker, addition on a slave -> multiply_1level_mw_addslave
- B/b: Master-Worker, coarser grain, addition on a slave -> multiply_1level_mw_addslave2
- D/d: Master-Worker, middle grain, addition on a slave -> multiply_1level_mw_addslave2
- D/d: Master-Worker, middle grain, addition on a slave -> multiply_1level_mw_addslave3
- E/e: Master-Worker, middle grain, addition on a slave, parallel final addition -> multiply_1level_mw_addslave4
- H/h: Hierarchical master-worker -> multiply_1level_mw_hierarch
- i/I: Hierarchical master-worker, coarser grain -> multiply_1level_mw_hierarch
- C/c: Combined -> multiply_combined
......@@ -115,6 +116,10 @@ int main( int argc, char** argv ){
case 'd':
tostart = 'd';
break;
case 'E':
case 'e':
tostart = 'e';
break;
case 'H':
case 'h':
tostart = 'h';
......@@ -176,6 +181,9 @@ int main( int argc, char** argv ){
case 'd':
Tpara = multiply_1level_mw_addslave3( T, N );
break;
case 'e':
Tpara = multiply_1level_mw_addslave4( T, N );
break;
case 'h':
Tpara = multiply_2levels_mw_hierarch( T, N );
break;
......
......@@ -27,6 +27,9 @@ public:
unsigned int a4, a2;
parameters_2_1_t( unsigned int, unsigned int );
parameters_2_1_t( void ){};
void setA4( unsigned int _a4 ) { this->a4 = _a4; }
void setA2( unsigned int _a2 ) { this->a2 = _a2; }
void setParams( unsigned int _a4, unsigned int _a2 ) { this->a4 = _a4; this->a2 = _a2; } ;
};
class parameters_2_2_t{
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment