Commit d4b11de4 authored by Camille Coti's avatar Camille Coti
Browse files

Beginning of the implementation of a combined algorithm

parent 42445948
......@@ -31,7 +31,7 @@ gi::ex multiply_1level_master( tensor3D_t& T, matrix_int_t& J, unsigned int size
expr_c = NULL;
expr_c = (char*) malloc( 3279 ); // TMP
int i, j;
int i, j;
i = 0;
j = 0;
......@@ -45,18 +45,18 @@ gi::ex multiply_1level_master( tensor3D_t& T, matrix_int_t& J, unsigned int size
for( a1 = 0 ; a1 < size; a1++ ){
i=i+1;
for( a2 = 0; a2 < size ; a2++ ){
j=j+1;
for( a3 = 0 ; a3 < size ; a3++ ){
A = T[a1][a2][a3];
for( b1 = 0 ; b1 < size ; b1++ ){
parameters_t p( A, a1, a2, a3, b1 );
input.push_back( p );
}
}
for( a2 = 0; a2 < size ; a2++ ){
j=j+1;
for( a3 = 0 ; a3 < size ; a3++ ){
A = T[a1][a2][a3];
for( b1 = 0 ; b1 < size ; b1++ ){
parameters_t p( A, a1, a2, a3, b1 );
input.push_back( p );
}
}
}
}
/* Compute the set of symbols */
/* Could be done while the first slave is working */
......
......@@ -7,6 +7,7 @@
#include "parall_constants.h"
#include "parall_internal.h"
#include "utils.h"
#include "profiling.h"
namespace gi = GiNaC;
......@@ -149,18 +150,22 @@ void multiply_1level_slave_addslave( tensor3D_t& T, matrix_int_t& J, unsigned in
int rank;
MPI_Comm_rank( comm, &rank );
/* Compute the set of symbols */
gi::lst symbols = all_symbols_3D( size );
double t_start, t_wait, t_compute;
/* Ask for some work */
MPI_Send( &len, 1, MPI_UNSIGNED, ROOT, TAG_PULL, comm );
/* Compute the set of symbols */
gi::lst symbols = all_symbols_3D( size );
while( true ){
/* Receive a set of parameters */
t_start = rdtsc();
MPI_Recv( &params, 1, DT_PARAMETERS, ROOT, MPI_ANY_TAG, comm, &status );
t_wait = rdtsc() - t_start;
if( status.MPI_TAG == TAG_WORK ){
a1 = params.a1;
......@@ -168,8 +173,13 @@ void multiply_1level_slave_addslave( tensor3D_t& T, matrix_int_t& J, unsigned in
a3 = params.a3;
b1 = params.b1;
gi::symbol A( std::string( params.A ) );
t_start = rdtsc();
Tens = one_level1_product( &T, &J, A, size, a1, a2, a3, b1 );
t_compute = rdtsc() - t_start;
/* TODO if we waited for too long */
if( t_wait > t_compute ) {}
send_result( Tens );
......
#include <iostream>
#include <mpi.h>
#include <ginac/ginac.h>
#include "products.h"
#include "utils_parall.h"
#include "parall_constants.h"
#include "parall_internal.h"
#include "utils.h"
#include "profiling.h"
namespace gi = GiNaC;
typedef enum {
LONG_ADD_M, /* The addition on the master took a long time */
FINISHED, /* Computation finished normally */
} end_code_t;
typedef enum {
ALGO_MW, /* Regulat master-worker */
ALGO_ADDSLAVE, /* Do the addition on a slave */
} algo_t;
/* This one is a "regular" master. It returns either when it is done, or when it decides to switch to another algorithm.
*/
end_code_t multiply_combined_master_initial( tensor3D_t& T, matrix_int_t& J, unsigned int size, gi::ex& Tens, MPI_Comm comm = MPI_COMM_WORLD ) {
unsigned int a1, a2, a3, b1;
gi::ex A;
gi::lst symbols;
MPI_Status status;
char* expr_c;
size_t expr_c_size = 0;
int src, np, running = 0;
unsigned int len;
double t_start, t_add, t_wait;
algo_t algo = ALGO_MW;
MPI_Comm_size( comm, &np );
expr_c = NULL;
expr_c = (char*) malloc( 3279 ); // TMP
int i, j;
i = 0;
j = 0;
int receivedresults = 0;
std::vector<parameters_t> input;
std::vector<std::string> results_s;
std::vector<gi::ex> results;
/* Build a list of argument sets */
for( a1 = 0 ; a1 < size; a1++ ){
i=i+1;
for( a2 = 0; a2 < size ; a2++ ){
j=j+1;
for( a3 = 0 ; a3 < size ; a3++ ){
A = T[a1][a2][a3];
for( b1 = 0 ; b1 < size ; b1++ ){
parameters_t p( A, a1, a2, a3, b1 );
input.push_back( p );
}
}
}
}
/* Compute the set of symbols */
/* Could be done while the first slave is working */
symbols = all_symbols_3D( size );
/* Workers that have yet to send their first request */
bool initialround = true;
running = 0;
/* Distribute the work */
while( input.size() > 0 ) {
t_start = rdtsc();
MPI_Recv( &len, 1, MPI_UNSIGNED, MPI_ANY_SOURCE, MPI_ANY_TAG, comm, &status );
src = status.MPI_SOURCE;
t_wait = rdtsc() - t_start;
std::cout << "wait " << t_wait << std::endl;
if( status.MPI_TAG == TAG_PULL ) {
/* Nothing else will come: just send wome work */
send_work( input, src, comm );
if( initialround ){
running++;
if( np - 1 == running ) initialround = false; // everyone is at work
}
} else {
if( status.MPI_TAG == TAG_RES ){
src = status.MPI_SOURCE;
/* The first message contains the length of what is coming next */
if( len != 0 ) {
if( len > expr_c_size ) {
expr_c_size = len;
if( NULL != expr_c ) free( expr_c );
expr_c = (char*)malloc( expr_c_size ); // The \0 was added by the slave
}
/* Receive the result */
MPI_Recv( expr_c, len, MPI_CHAR, src, TAG_EXPR, comm, &status );
/* put it in the result queue */
std::string s( expr_c );
if( algo == ALGO_ADDSLAVE ) {
send_work_addslave( input, results_s, src );
} else {
send_work( input, src, comm );
}
/* Process what I have just received */
if( ALGO_MW == algo ) {
t_start = rdtsc();
gi::ex received = de_linearize_expression( s, symbols );
Tens += received;
t_add = rdtsc() - t_start;
std::cout << "Add " << t_add << std::endl;
#if DEBUG
results.push_back( received );
results_s.push_back( s );
receivedresults++;
#endif
if( t_add > t_wait ) {
/* We are spending too much time adding these results. Now we are going to ask a worker to do this. */
// TODO use the average NP last wait time instead
// double average = accumulate( v.begin(), v.end(), 0.0)/v.size();
std::cout << "The master spent too much time computing the sum. Switch to ADDSLAVE algorithm" << std::endl;
algo = ALGO_ADDSLAVE;
}
} else {
if( ALGO_ADDSLAVE == algo ) {
results_s.push_back( s );
} else {
std::cout << "ERROR: unknown algorithm on the master " << algo << std::endl;
}
}
} else {
/* Send more work */
send_work( input, src, comm );
}
} else{
std::cerr << "Wrong tag received " << status.MPI_TAG << std::endl;
}
}
}
/* Wait until everyone is done */
running = np - 1; // all the slaves are running
while( running > 0 ) {
MPI_Recv( &len, 1, MPI_UNSIGNED, MPI_ANY_SOURCE, MPI_ANY_TAG, comm, &status );
src = status.MPI_SOURCE;
if( len != 0 ) {
if( len > expr_c_size ) {
expr_c_size = len;
if( NULL != expr_c ) free( expr_c );
expr_c = (char*)malloc( expr_c_size ); // The \0 was added by the slave
}
/* Receive the result */
MPI_Recv( expr_c, len, MPI_CHAR, src, TAG_EXPR, comm, &status );
/* And send the END signal */
send_end( src, comm );
running--;
/* Process what I have just received */
/* Could be given to a slave... */
/* put it in the result queue */
std::string s( expr_c );
gi::ex received = de_linearize_expression( s, symbols );
Tens += received;
#if DEBUG
results.push_back( received );
results_s.push_back( s );
receivedresults++;
#endif
} else {
send_end( src, comm );
running--;
}
}
#if DEBUG
std::cout << "Received " << receivedresults << " results" << std::endl;
std::cout << "Tpara=" << Tens << ";" << std::endl;
#endif
if( NULL != expr_c) free( expr_c );
return FINISHED;
}
/* The traditional slave */
void multiply_combined_slave_initial( tensor3D_t& T, matrix_int_t& J, int size, MPI_Comm comm = MPI_COMM_WORLD ) {
gi::ex Tens;
int a1, a2, a3, b1;
// gi::ex A;
unsigned int len = 0;
parameters_t params;
MPI_Status status;
char* expr_c;
int rank;
MPI_Comm_rank( comm, &rank );
/* Ask for some work */
MPI_Send( &len, 1, MPI_UNSIGNED, ROOT, TAG_PULL, comm );
/* Compute the set of symbols */
gi::lst symbols = all_symbols_3D( size );
while( true ){
/* Receive a set of parameters */
MPI_Recv( &params, 1, DT_PARAMETERS, ROOT, MPI_ANY_TAG, comm, &status );
if( status.MPI_TAG == TAG_WORK ){
a1 = params.a1;
a2 = params.a2;
a3 = params.a3;
b1 = params.b1;
gi::symbol A( std::string( params.A ) );
Tens = one_level1_product( &T, &J, A, size, a1, a2, a3, b1 );
send_result( Tens );
} else {
if( status.MPI_TAG == TAG_ADD ) {
/* Receive a set of expressions to add */
/* Number of expressions received */
int nb = params.a1;
/* Length of each string */
unsigned int* lengths = (unsigned int*) malloc( nb*sizeof( unsigned int ) );
MPI_Recv( lengths, nb, MPI_INT, ROOT, TAG_ADD, comm, &status );
std::vector<std::string> results_s;
char* c_str;
int i;
int len;
for( i = 0 ; i < nb ; i++ ) {
len = lengths[i] + 1;
c_str = (char*) malloc( len );
MPI_Recv( c_str, len, MPI_CHAR, ROOT, TAG_ADD, comm, &status );
c_str[len-1] = '\0'; // The master sends C++ strings, which do not contain the final '\0'
results_s.push_back( std::string( c_str ) );
free( c_str );
}
/* Delinearize all the expressions and add them */
Tens = add_expressions( results_s, symbols );
/* Send the result */
send_result( Tens );
} else {
if( status.MPI_TAG == TAG_END ){
return;
} else {
std::cerr << "Wrong tag received on slave " << status.MPI_TAG << std::endl;
}
}
}
}
}
/*******************************************************************************
* Combined master-worker *
*******************************************************************************/
gi::ex multiply_combined_master( tensor3D_t& T, matrix_int_t& J, int size ) { // simpler: same dimension everywhere
gi::ex Tens = 0;
end_code_t rc;
/* Initially: start as a traditional M/W */
rc = multiply_combined_master_initial( T, J, size, Tens );
switch( rc ){
case FINISHED:
return Tens;
}
return Tens;
}
void multiply_combined_worker( tensor3D_t& T, matrix_int_t& J, int size ) { // simpler: same dimension everywhere
gi::ex Tens = 0;
std::cout << "worker" << std::endl;
multiply_combined_slave_initial( T, J, size );
}
gi::ex multiply_combined( tensor3D_t& T, matrix_int_t& J, int size ) { // simpler: same dimension everywhere
int rank;
gi::ex Tens = 0;
MPI_Comm_rank( MPI_COMM_WORLD, &rank );
/* Create a new datatype for the parameters */
create_parameters_datatye();
/* Here we go */
if( 0 == rank ) {
Tens = multiply_combined_master( T, J, size );
} else {
multiply_combined_worker( T, J, size );
}
/* Finalize */
free_parameters_dt();
return Tens;
}
......@@ -35,15 +35,15 @@ gi::ex multiply_seq( tensor3D_t& T, matrix_int_t& J, int size ) { // simpler: s
j=j+1;
// printf("j = %d\n", j);
for( a3 = 0 ; a3 < size ; a3++ ){
TAU_START( timerA );
A = T[a1][a2][a3];
/* Beyond this point, a2 and a3 are only used in the simplectic matrix */
TAU_START( timerA );
A = T[a1][a2][a3];
/* Beyond this point, a2 and a3 are only used in the simplectic matrix */
for( b1 = 0 ; b1 < size ; b1++ ){
TAB = J[a1][b1];
for( b2 = 0 ; b2 < size ; b2++ ){
for( b3 = 0 ; b3 < size ; b3++ ){
TAU_START( timerB );
/* Beyond this point, b1 is not used anymore */
/* Beyond this point, b1 is not used anymore */
TABB = TAB * A*T[b1][b2][b3];
for( c1 = 0 ; c1 < size ; c1++ ){
for( c2 = 0 ; c2 < size ; c2++ ){
......@@ -60,33 +60,33 @@ gi::ex multiply_seq( tensor3D_t& T, matrix_int_t& J, int size ) { // simpler: s
Tens = Tens + TABCDD * T[d1][d2][d3]*J[a3][d3];
t_end = rdtsc();
TAU_STOP( timeradd );
TAU_STOP( timeradd );
#ifdef TAUPROF
// std::cout << "add " << getTimeSpent( timeradd ) << " len " << Tens.nops() << std::endl;
printf( "add %lf %lu len %d\n", getTimeSpent( timeradd ), t_end - t_start, Tens.nops() );
printf( "add %lf %lu len %d\n", getTimeSpent( timeradd ), t_end - t_start, Tens.nops() );
// std::cout << Tens << std::endl;
#endif // TAUPROF
}
}
}
}
}
}
}
TAU_STOP( timerB );
TAU_STOP( timerB );
#ifdef TAUPROF
std::cout << "B " << getTimeSpent( timeradd ) << " len " << Tens.nops() << std::endl;
std::cout << "B " << getTimeSpent( timeradd ) << " len " << Tens.nops() << std::endl;
#endif // TAUPROF
}
}
}
}
TAU_STOP( timerA );
TAU_STOP( timerA );
#ifdef TAUPROF
std::cout << "A " << getTimeSpent( timeradd ) << " len " << Tens.nops() << std::endl;
std::cout << "A " << getTimeSpent( timeradd ) << " len " << Tens.nops() << std::endl;
#endif // TAUPROF
}
}
}
}
return Tens;
}
......@@ -115,15 +115,16 @@ gi::ex multiply_1level( tensor3D_t& T, matrix_int_t& J, int size ) { // simpler
j=j+1;
// printf("j = %d\n", j);
for( a3 = 0 ; a3 < size ; a3++ ){
TAU_START( "b" );
// TAU_START( "b" );
A = T[a1][a2][a3];
/* Beyond this point, a2 and a3 are only used in the simplectic matrix */
for( b1 = 0 ; b1 < size ; b1++ ){
Tn = one_level1_product( &T, &J, A, size, a1, a2, a3, b1 );
Tens += Tn;
}
TAU_STOP( "b" );
#ifdef TAUPROF
// TAU_STOP( "b" );
//#ifdef TAUPROF
#if 0
time = getTimeSpent( "b" );
#endif // TAUPROF
}
......@@ -147,7 +148,7 @@ gi::ex one_level1_product( tensor3D_t* T, matrix_int_t *J, gi::ex A, int size, i
TABB = TAB * A*(*T)[b1][b2][b3];
T5 = 0;
/* Beyond this point, b1 is not used anymore */
TAU_START( timerB );
// TAU_START( timerB );
for( c1 = 0 ; c1 < size ; c1++ ){
T4 = 0;
for( c2 = 0 ; c2 < size ; c2++ ){
......@@ -176,7 +177,7 @@ gi::ex one_level1_product( tensor3D_t* T, matrix_int_t *J, gi::ex A, int size, i
T5 += T4;
}
Tens += T5;
TAU_STOP( timerB );
// TAU_STOP( timerB );
}
}
......
......@@ -26,6 +26,7 @@ gi::ex multiply_2levels( tensor3D_t&, matrix_int_t&, int );
gi::ex multiply_1level_mw( tensor3D_t&, matrix_int_t&, int );
gi::ex multiply_1level_mw_addslave( tensor3D_t&, matrix_int_t&, int );
gi::ex multiply_1level_mw_hierarch( tensor3D_t&, matrix_int_t&, int );
gi::ex multiply_combined( tensor3D_t&, matrix_int_t&, int );
/*******************************************************************************
* Default values *
......
......@@ -26,6 +26,7 @@ namespace gi = GiNaC;
- M/m: Master-Worker -> multiply_1level_mw
- A/a: Master-Worker, addition on a slave -> multiply_1level_mw_addslave
- H/h: Hierarchical master-worker -> multiply_1level_mw_hierarch
- C/c: Combined -> multiply_combined
*/
/* Sequentiel sur Minimum
......@@ -86,6 +87,10 @@ int main( int argc, char** argv ){
case 'h':
tostart = 'h';
break;
case 'C':
case 'c':
tostart = 'c';
break;
case 'S':
case 's':
tostart = 's';
......@@ -126,6 +131,9 @@ int main( int argc, char** argv ){
case 'h':
Tpara = multiply_1level_mw_hierarch( T, J, N );
break;
case 'c':
Tpara = multiply_combined( T, J, N );
break;
case 's':
Tpara = multiply_seq( T, J, N );
break;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment