Commit a51b9c94 authored by Camille Coti's avatar Camille Coti
Browse files

Implemented the two level sum and regular master-worker

parent 0e4ebad2
......@@ -26,9 +26,10 @@ LDOPT = -lginac $(TAULIB)
MPIEXEC = mpiexec
NP = 5
MPISRC = hierarchical.cpp masterworker.cpp mw_addslave.cpp \
MPISRC = masterworker.cpp \
perf.cpp sequential.cpp tensormatrix_mpi.cpp \
utils.cpp utils_parall.cpp profiling.cpp mw_combined.cpp
utils.cpp utils_parall.cpp profiling.cpp
#mw_combined.cpp mw_addslave.cpp hierarchical.cpp
MPIOBJ= $(MPISRC:.cpp=.o)
......
......@@ -14,9 +14,9 @@ namespace gi = GiNaC;
* Parallel 1-level decomposition *
*******************************************************************************/
gi::ex multiply_1level_master( tensor3D_t& T, matrix_int_t& J, unsigned int size, MPI_Comm comm = MPI_COMM_WORLD ) {
gi::ex multiply_1level_master( tensor3D_t& T, unsigned int size, MPI_Comm comm = MPI_COMM_WORLD ) {
gi::ex Tens = 0;
unsigned int a1, a2, a3, b1;
unsigned int a1, a2, a4;
gi::ex A;
gi::lst symbols;
......@@ -36,6 +36,7 @@ gi::ex multiply_1level_master( tensor3D_t& T, matrix_int_t& J, unsigned int size
j = 0;
int receivedresults = 0;
unsigned int N = size/2;
std::vector<parameters_t> input;
std::vector<std::string> results_s;
......@@ -43,19 +44,17 @@ gi::ex multiply_1level_master( tensor3D_t& T, matrix_int_t& J, unsigned int size
/* Build a list of argument sets */
for( a1 = 0 ; a1 < size; a1++ ){
for( a4 = 0 ; a4 < N ; a4++ ){
i=i+1;
for( a2 = 0; a2 < size ; a2++ ){
for( a2 = 0; a2 < N ; a2++ ){
j=j+1;
for( a3 = 0 ; a3 < size ; a3++ ){
A = T[a1][a2][a3];
for( b1 = 0 ; b1 < size ; b1++ ){
parameters_t p( A, a1, a2, a3, b1 );
for( a1 = 0 ; a1 < N ; a1++ ){
parameters_t p( a4, a2, a1 );
input.push_back( p );
}
}
}
}
/* Compute the set of symbols */
/* Could be done while the first slave is working */
......@@ -161,9 +160,9 @@ gi::ex multiply_1level_master( tensor3D_t& T, matrix_int_t& J, unsigned int size
return Tens;
}
void multiply_1level_slave( tensor3D_t& T, matrix_int_t& J, int size, MPI_Comm comm = MPI_COMM_WORLD ) {
void multiply_1level_slave( tensor3D_t& T, int size, MPI_Comm comm = MPI_COMM_WORLD ) {
gi::ex Tens;
int a1, a2, a3, b1;
unsigned int a1, a2, a4;
// gi::ex A;
unsigned int len = 0;
......@@ -184,13 +183,11 @@ void multiply_1level_slave( tensor3D_t& T, matrix_int_t& J, int size, MPI_Comm c
MPI_Recv( &params, 1, DT_PARAMETERS, ROOT, MPI_ANY_TAG, comm, &status );
if( status.MPI_TAG == TAG_WORK ){
a1 = params.a1;
a4 = params.a4;
a2 = params.a2;
a3 = params.a3;
b1 = params.b1;
gi::symbol A( std::string( params.A ) );
a1 = params.a1;
Tens = one_level1_product( &T, &J, A, size, a1, a2, a3, b1 );
Tens = one_level1_product( &T, size, a4, a2, a1 );
send_result( Tens );
} else {
......@@ -208,7 +205,7 @@ void multiply_1level_slave( tensor3D_t& T, matrix_int_t& J, int size, MPI_Comm c
W -> M: send an unsigned int (size of the expression), then the expression (table of chars)
*/
gi::ex multiply_1level_mw( tensor3D_t& T, matrix_int_t& J, int size ) { // simpler: same dimension everywhere
gi::ex multiply_1level_mw( tensor3D_t& T, int size ) { // simpler: same dimension everywhere
int rank;
gi::ex Tens = 0;
MPI_Comm_rank( MPI_COMM_WORLD, &rank );
......@@ -220,9 +217,9 @@ gi::ex multiply_1level_mw( tensor3D_t& T, matrix_int_t& J, int size ) { // simp
/* Here we go */
if( 0 == rank ) {
Tens = multiply_1level_master( T, J, size );
Tens = multiply_1level_master( T, size );
} else {
multiply_1level_slave( T, J, size );
multiply_1level_slave( T, size );
}
/* Finalize */
......
......@@ -7,8 +7,8 @@
namespace gi = GiNaC;
// internal (sequential) routines
gi::ex one_level1_product( tensor3D_t*, matrix_int_t*, gi::ex, int, int, int, int, int );
gi::ex one_level2_product( tensor3D_t*, matrix_int_t*, gi::ex, int, int, int, int, int, int, int, int, int );
gi::ex one_level1_product( tensor3D_t*, int, int, int, int );
gi::ex one_level2_product( tensor3D_t*, int, int, int, int, int );
gi::ex two_level1_product( tensor3D_t*, matrix_int_t*, gi::ex, int, int, int, int, int );
gi::ex two_level2_product( tensor3D_t*, matrix_int_t*, gi::ex, int, int, int, int, int, int );
......
......@@ -99,12 +99,7 @@ gi::ex multiply_1level( tensor3D_t& T, matrix_int_t& J, int size ) { // simpler
gi::ex Tens = 0;
gi::ex Tn;
int a1, a2, a3, b1;
gi::ex A;
int i, j;
i = 0;
j = 0;
int a1, a2, a4;
double time;
......@@ -113,22 +108,17 @@ gi::ex multiply_1level( tensor3D_t& T, matrix_int_t& J, int size ) { // simpler
TAU_PROFILER_CREATE( ptr, "b","", TAU_USER );
#endif
for( a1 = 0 ; a1 < size; a1++ ){
i=i+1;
// std::cout << "Tens: " << Tens << std::endl;
// printf("i = %d\n", i);
for( a2 = 0; a2 < size ; a2++ ){
j=j+1;
// printf("j = %d\n", j);
for( a3 = 0 ; a3 < size ; a3++ ){
int N = size/2;
for( a4 = 0 ; a4 < N ; a4++ ) {
for( a2 = 0 ; a2 < N ; a2++ ) {
#ifdef TAUPROF
TAU_START( "b" );
TAU_PROFILER_START( ptr );
#endif
A = T[a1][a2][a3];
/* Beyond this point, a2 and a3 are only used in the simplectic matrix */
for( b1 = 0 ; b1 < size ; b1++ ){
Tn = one_level1_product( &T, &J, A, size, a1, a2, a3, b1 );
for( a1 = 0 ; a1 < N ; a1++ ){
Tn = one_level1_product( &T, size, a4, a2, a1 );
Tens += Tn;
}
#ifdef TAUPROF
......@@ -158,58 +148,85 @@ gi::ex multiply_1level( tensor3D_t& T, matrix_int_t& J, int size ) { // simpler
#endif // TAUPROF
}
}
}
return Tens;
}
gi::ex one_level1_product( tensor3D_t* T, matrix_int_t *J, gi::ex A, int size, int a1, int a2, int a3, int b1){
gi::ex TAB, TABB, TABC, TABCC, TABCD, TABCDD;
int b2, b3, c1, c2, c3, d1, d2, d3;
gi::ex one_level1_product( tensor3D_t* T, int size, int a4, int a2, int a1 ){
gi::ex Tens = 0;
gi::ex T0, T1, T2, T3, T4, T5;
gi::ex W1, W2, W3, W4, W5, W6, W7;
gi::ex Z1, Z2, Z6, t5, tE, t1, t12, t123, t126, t13, t134, t14, t16, t2, t23, t24, t26, t3, t4, X7Y5;
gi::ex TE, T1, T2, T3, T4, T5, T12, T13, T14, T16, T23, T24, T26, T123, T126, T134;
const char timerB[] = "B";
TAB = (*J)[a1][b1];
for( b2 = 0 ; b2 < size ; b2++ ){
for( b3 = 0 ; b3 < size ; b3++ ){
TABB = TAB * A*(*T)[b1][b2][b3];
T5 = 0;
/* Beyond this point, b1 is not used anymore */
// TAU_START( timerB );
for( c1 = 0 ; c1 < size ; c1++ ){
T4 = 0;
for( c2 = 0 ; c2 < size ; c2++ ){
TABC = TABB * (*J)[a2][c2];
T3 = 0;
for( c3 = 0 ; c3 < size ; c3++ ){
TABCC = TABC * (*T)[c1][c2][c3] * (*J)[b3][c3] ;
T2 = 0;
for( d1 = 0 ; d1 < size ; d1++ ){
TABCD = TABCC * (*J)[c1][d1];
T1 = 0;
for( d2 = 0 ; d2 < size ; d2++ ){
TABCDD = TABCD * (*J)[b2][d2];
T0 = 0;
for( d3 = 0 ; d3 < size ; d3++ ){
T0 += TABCDD * (*T)[d1][d2][d3]*(*J)[a3][d3];
}
T1 += T0;
}
T2 += T1;
}
T3 += T2;
}
T4 += T3;
}
T5 += T4;
int a3, a5, a6;
int A1, A2, A3, A4, A5, A6;
TE = T1 = T2 = T3 = T4 = T5 = T12 = T13 = T14 = T16 = T23 = T24 = T26 = T123 = T126 = T134 = 0;
int N = size/2;
A1 = a1 + N;
A4 = a4 + N;
A2 = a2 + N;
for( a6 = 0 ; a6 < N ; a6++ ) {
A6 = a6 + N;
W1 = (*T)[a4][a2][a6];
W2 = (*T)[a4][A2][a6];
W3 = (*T)[a4][a2][A6];
W4 = (*T)[A4][A2][a6];
W5 = (*T)[a4][A2][A6];
W6 = (*T)[A4][a2][A6];
W7 = (*T)[A4][A2][A6];
for( a5 = 0 ; a5 < N ; a5++ ) {
A5 = a5 + N;
Z1 = (*T)[a1][a5][a6];
Z2 = (*T)[A1][a5][a6];
Z6 = (*T)[A1][a5][A6];
t5 = W3*(*T)[a1][A5][a6];
tE = W4*(*T)[A1][A5][A6];
t1 = W3*Z2;
t13 = t1;
t2 = W5*Z1;
t23 = t2;
t3 = W3*Z1;
t4 = W6*Z1;
t12 = W5*Z2;
t14 = W6*Z2;
t134 = t14 ;
t16 = W1*Z6;
t24 = W7*Z1;
t26 = W2*(*T)[a1][a5][A6];
t123 = W5*Z2;
t126 = W2*Z6;
for( a3 = 0 ; a3 < N ; a3++ ) {
A3 = a3 + N;
TE = TE + tE*(*T)[a1][a2][a3]*(*T)[a4][a5][A3];
T5 = T5 + t5*(*T)[A1][A2][A3]*(*T)[A4][a5][a3];
X7Y5 = (*T)[a1][A2][A3]*(*T)[A4][A5][a3];
T1 = T1 + t1*X7Y5;
T16 = T16 + t16*X7Y5;
T2 = T2 + t2*(*T)[A1][a2][A3]*(*T)[A4][A5][a3];
T3 = T3 + t3*(*T)[A1][A2][a3]*(*T)[A4][A5][A3];
T4 = T4 + t4*(*T)[A1][A2][A3]*(*T)[a4][A5][a3];
T12 = T12 + t12*(*T)[a1][a2][A3]*(*T)[A4][A5][a3];
T13 = T13 + t13*(*T)[a1][A2][a3]*(*T)[A4][A5][A3];
T14 = T14 + t14*(*T)[a1][A2][A3]*(*T)[a4][A5][a3];
T23 = T23 + t23*(*T)[A1][a2][a3]*(*T)[A4][A5][A3];
T24 = T24 + t24*(*T)[A1][a2][A3]*(*T)[a4][A5][a3];
T26 = T26 + t26*(*T)[A1][a2][A3]*(*T)[A4][A5][a3];
T123 = T123 + t123*(*T)[a1][a2][a3]*(*T)[A4][A5][A3];
T126 = T126 + t126*(*T)[a1][a2][A3]*(*T)[A4][A5][a3];
T134 = T134 + t134*(*T)[a1][A2][a3]*(*T)[a4][A5][A3];
}
Tens += T5;
// TAU_STOP( timerB );
}
}
Tens = 4*(TE+T12+T13+T14+T16+T23+T24+T26 - (T1 + T2 + T3 + T4 + T5 +T123 + T126 + T134));
#if 0
std::ostringstream oss;
oss << "output_" << getpid();
......@@ -224,35 +241,80 @@ gi::ex one_level1_product( tensor3D_t* T, matrix_int_t *J, gi::ex A, int size, i
return Tens;
}
gi::ex one_level2_product( tensor3D_t* T, matrix_int_t *J, gi::ex A, int size, int a1, int a2, int a3, int b1, int b2, int b3, int c1, int c2 ){
gi::ex TAB, TABB, TABC, TABCC, TABCD, TABCDD;
int c3, d1, d2, d3;
gi::ex one_level2_product( tensor3D_t* T, int size, int a4, int a2, int a1, int a6 ){
int a3, a5;
int A1, A2, A3, A4, A5, A6;
gi::ex Tens = 0;
gi::ex T0, T1, T2;
gi::ex W1, W2, W3, W4, W5, W6, W7;
gi::ex Z1, Z2, Z6, t5, tE, t1, t12, t123, t126, t13, t134, t14, t16, t2, t23, t24, t26, t3, t4, X7Y5;
gi::ex TE, T1, T2, T3, T4, T5, T12, T13, T14, T16, T23, T24, T26, T123, T126, T134;
TAB = (*J)[a1][b1];
TABB = TAB * A*(*T)[b1][b2][b3];
TABC = TABB * (*J)[a2][c2];
for( c3 = 0 ; c3 < size ; c3++ ){
TABCC = TABC * (*T)[c1][c2][c3] * (*J)[b3][c3] ;
T2 = 0;
for( d1 = 0 ; d1 < size ; d1++ ){
TABCD = TABCC * (*J)[c1][d1];
T1 = 0;
for( d2 = 0 ; d2 < size ; d2++ ){
TABCDD = TABCD * (*J)[b2][d2];
T0 = 0;
for( d3 = 0 ; d3 < size ; d3++ ){
T0 += TABCDD * (*T)[d1][d2][d3]*(*J)[a3][d3];
}
T1 += T0;
}
T2 += T1;
TE = T1 = T2 = T3 = T4 = T5 = T12 = T13 = T14 = T16 = T23 = T24 = T26 = T123 = T126 = T134 = 0;
int N = size/2;
A1 = a1 + N;
A4 = a4 + N;
A2 = a2 + N;
A6 = a6 + N;
W1 = (*T)[a4][a2][a6];
W2 = (*T)[a4][A2][a6];
W3 = (*T)[a4][a2][A6];
W4 = (*T)[A4][A2][a6];
W5 = (*T)[a4][A2][A6];
W6 = (*T)[A4][a2][A6];
W7 = (*T)[A4][A2][A6];
for( a5 = 0 ; a5 < N ; a5++ ) {
A5 = a5 + N;
Z1 = (*T)[a1][a5][a6];
Z2 = (*T)[A1][a5][a6];
Z6 = (*T)[A1][a5][A6];
t5 = W3*(*T)[a1][A5][a6];
tE = W4*(*T)[A1][A5][A6];
t1 = W3*Z2;
t13 = t1;
t2 = W5*Z1;
t23 = t2;
t3 = W3*Z1;
t4 = W6*Z1;
t12 = W5*Z2;
t14 = W6*Z2;
t134 = t14 ;
t16 = W1*Z6;
t24 = W7*Z1;
t26 = W2*(*T)[a1][a5][A6];
t123 = W5*Z2;
t126 = W2*Z6;
for( a3 = 0 ; a3 < N ; a3++ ) {
A3 = a3 + N;
TE = TE + tE*(*T)[a1][a2][a3]*(*T)[a4][a5][A3];
T5 = T5 + t5*(*T)[A1][A2][A3]*(*T)[A4][a5][a3];
X7Y5 = (*T)[a1][A2][A3]*(*T)[A4][A5][a3];
T1 = T1 + t1*X7Y5;
T16 = T16 + t16*X7Y5;
T2 = T2 + t2*(*T)[A1][a2][A3]*(*T)[A4][A5][a3];
T3 = T3 + t3*(*T)[A1][A2][a3]*(*T)[A4][A5][A3];
T4 = T4 + t4*(*T)[A1][A2][A3]*(*T)[a4][A5][a3];
T12 = T12 + t12*(*T)[a1][a2][A3]*(*T)[A4][A5][a3];
T13 = T13 + t13*(*T)[a1][A2][a3]*(*T)[A4][A5][A3];
T14 = T14 + t14*(*T)[a1][A2][A3]*(*T)[a4][A5][a3];
T23 = T23 + t23*(*T)[A1][a2][a3]*(*T)[A4][A5][A3];
T24 = T24 + t24*(*T)[A1][a2][A3]*(*T)[a4][A5][a3];
T26 = T26 + t26*(*T)[A1][a2][A3]*(*T)[A4][A5][a3];
T123 = T123 + t123*(*T)[a1][a2][a3]*(*T)[A4][A5][A3];
T126 = T126 + t126*(*T)[a1][a2][A3]*(*T)[A4][A5][a3];
T134 = T134 + t134*(*T)[a1][A2][a3]*(*T)[a4][A5][A3];
}
Tens += T2;
}
Tens = 4*(TE+T12+T13+T14+T16+T23+T24+T26 - (T1 + T2 + T3 + T4 + T5 +T123 + T126 + T134));
return Tens;
}
......
......@@ -23,7 +23,7 @@ gi::ex multiply_seq( tensor3D_t&, matrix_int_t&, int );
gi::ex multiply_1level( tensor3D_t&, matrix_int_t&, int );
gi::ex multiply_2levels( tensor3D_t&, matrix_int_t&, int );
// parallel
gi::ex multiply_1level_mw( tensor3D_t&, matrix_int_t&, int );
gi::ex multiply_1level_mw( tensor3D_t&, int );
gi::ex multiply_1level_mw_addslave( tensor3D_t&, matrix_int_t&, int );
gi::ex multiply_1level_mw_hierarch( tensor3D_t&, matrix_int_t&, int );
gi::ex multiply_combined( tensor3D_t&, matrix_int_t&, int );
......
......@@ -132,9 +132,9 @@ int main( int argc, char** argv ){
tv_start = getTime();
switch( tostart ){
case 'm':
Tpara = multiply_1level_mw( T, J, N );
Tpara = multiply_1level_mw( T, N );
break;
case 'a':
/* case 'a':
Tpara = multiply_1level_mw_addslave( T, J, N );
break;
case 'h':
......@@ -142,16 +142,13 @@ int main( int argc, char** argv ){
break;
case 'c':
Tpara = multiply_combined( T, J, N );
break;
break;*/
case 's':
Tpara = multiply_seq( T, J, N );
break;
case '1':
Tpara = multiply_1level( T, J, N );
break;
case '2':
Tpara = multiply_2levels( T, J, N );
break;
default:
std::cerr << "Wrong function called" << std::endl;
}
......
......@@ -20,12 +20,10 @@ gi::ex de_linearize_expression( std::string s, gi::lst symbols ){
return gi::ex( s, symbols );
}
parameters_t::parameters_t( gi::ex A, unsigned int a1, unsigned int a2, unsigned int a3, unsigned int b1 ){
memcpy( this->A, linearize_expression( A ).c_str(), 6 );
this->a1 = a1;
parameters_t::parameters_t( unsigned int a4, unsigned int a2, unsigned int a1 ){
this->a4 = a4;
this->a2 = a2;
this->a3 = a3;
this->b1 = b1;
this->a1 = a1;
}
parameters_2_t::parameters_2_t( gi::ex A, unsigned int a1, unsigned int a2, unsigned int a3, unsigned int b1, unsigned int b2, unsigned int b3, unsigned int c1, unsigned int c2 ){
......@@ -41,38 +39,12 @@ parameters_2_t::parameters_2_t( gi::ex A, unsigned int a1, unsigned int a2, uns
}
void create_parameters_datatye(){
int structlen = 2;
int blocklengths[structlen];
MPI_Datatype types[structlen];
MPI_Aint displacements[structlen];
parameters_t par;
blocklengths[0] = 6;
types[0] = MPI_CHAR; // A: 6 chars
displacements[0] = (size_t)&(par.A) - (size_t)&par;
blocklengths[1] = 4;
types[1] = MPI_UNSIGNED; // a1, a2, a3, b1
displacements[1] = (size_t)&(par.a1) - (size_t)&par;
MPI_Type_create_struct (2, blocklengths, displacements, types, &DT_PARAMETERS );
MPI_Type_contiguous( 3, MPI_UNSIGNED, &DT_PARAMETERS );
MPI_Type_commit( &DT_PARAMETERS );
}
void create_parameters_datatye_2(){
int structlen = 2;
int blocklengths[structlen];
MPI_Datatype types[structlen];
MPI_Aint displacements[structlen];
parameters_t par;
blocklengths[0] = 6;
types[0] = MPI_CHAR; // A: 6 chars
displacements[0] = (size_t)&(par.A) - (size_t)&par;
blocklengths[1] = 8;
types[1] = MPI_UNSIGNED; // a1, a2, a3, b1, b2, b3, c1, c2
displacements[1] = (size_t)&(par.a1) - (size_t)&par;
MPI_Type_create_struct (2, blocklengths, displacements, types, &DT_PARAMETERS_2 );
MPI_Type_contiguous( 4, MPI_UNSIGNED, &DT_PARAMETERS );
MPI_Type_commit( &DT_PARAMETERS_2 );
}
......@@ -126,7 +98,7 @@ void send_expressions_to_add( std::vector<std::string>& results, int peer ) {
/* Fill a bogus parameter object */
int nb = results.size();
int i;
parameters_t p( gi::symbol( "" ), nb, 0, 0, 0 );
parameters_t p( 0, 0, 0 );
char* expr;
MPI_Send( &p, 1, DT_PARAMETERS, peer, TAG_ADD, MPI_COMM_WORLD );
......
......@@ -10,9 +10,8 @@ namespace gi = GiNaC;
class parameters_t{
public:
char A[6]; // A is always a simple symbol, of form T_xyz.
unsigned int a1, a2, a3, b1;
parameters_t( gi::ex, unsigned int, unsigned int, unsigned int, unsigned int );
unsigned int a4, a2, a1;
parameters_t( unsigned int, unsigned int, unsigned int );
parameters_t( void ){};
};
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment