diff --git a/src/Makefile b/src/Makefile index ac171997860a93c588bcda956bf1d9d09f773169..a1f518d3b8fbc38920ad4e383d96472541c02192 100644 --- a/src/Makefile +++ b/src/Makefile @@ -29,7 +29,7 @@ NP = 5 MPISRC = masterworker.cpp mw_addslave.cpp hierarchical.cpp \ perf.cpp sequential.cpp tensormatrix_mpi.cpp \ utils.cpp utils_parall.cpp profiling.cpp mw_combined.cpp \ - masterworker2.cpp mw_addslave2.cpp + masterworker2.cpp mw_addslave2.cpp hierarchical2.cpp MPIOBJ= $(MPISRC:.cpp=.o) diff --git a/src/hierarchical.cpp b/src/hierarchical.cpp index 64b3039b2ff64dd73274d5a7062bddd9706e43fd..b55fa07de325decc800da34c90e117e714605494 100644 --- a/src/hierarchical.cpp +++ b/src/hierarchical.cpp @@ -1,7 +1,6 @@ #include <iostream> #include <mpi.h> #include <ginac/ginac.h> -#include <math.h> // ceil #include "products.h" #include "utils_parall.h" @@ -15,52 +14,6 @@ namespace gi = GiNaC; * Parallel hierarchical decomposition * *******************************************************************************/ -void create_communicators_hierarch( MPI_Comm& COMM_FOREMEN, MPI_Comm& COMM_TEAM ){ - int rank, np; - MPI_Comm_rank( MPI_COMM_WORLD, &rank ); - MPI_Comm_size( MPI_COMM_WORLD, &np ); - - /* Create the communicators: master and foremen - color_foreman is set to 1 if I am the root or a foreman */ - - int color_foreman, workers_per_foreman; - workers_per_foreman = ceil( (double)( np - 1) / (double)nbforemen ); - if( ROOT == rank ) { - color_foreman = 1; - } else { - if( 1 == (rank % workers_per_foreman ) ){ - color_foreman = 1; - } else { - color_foreman = 0; - } - } - - MPI_Comm_split( MPI_COMM_WORLD, color_foreman, rank, &COMM_FOREMEN ); - - /* Create the communicator between the workers and their foreman */ - /* There is one problematic case here: when the last foreman ends up alone in its communicator */ - - int color_team; - if( ROOT == rank ) { - color_team = 0; - } else { - color_team = 1 + floor( ( (rank-1) / workers_per_foreman ) ); - } - MPI_Comm_split( MPI_COMM_WORLD, color_team, rank, &COMM_TEAM ); - -#if DEBUG - if( 1 == color_foreman ) { - int rank_foreman; - MPI_Comm_rank( COMM_FOREMEN, &rank_foreman ); - std::cout << rank << " I am rank " << rank_foreman << " among the foremen" << std::endl; - } - int rank_team; - MPI_Comm_rank( COMM_TEAM, &rank_team ); - std::cout << rank << " my team " << color_team << " and I am rank " << rank_team << std::endl; -#endif - -} - /******************************************************************************* * Foreman * *******************************************************************************/ @@ -184,13 +137,14 @@ void multiply_2levels_foreman_hierarch_finalize( MPI_Comm comm_team ) { int src, np, running = 0; unsigned int len; MPI_Status status; + parameters_2_2_t pzero( 0, 0, 0, 0 ); MPI_Comm_size( comm_team, &np ); running = np - 1; while( running > 0 ) { MPI_Recv( &len, 1, MPI_UNSIGNED, MPI_ANY_SOURCE, MPI_ANY_TAG, comm_team, &status ); src = status.MPI_SOURCE; - send_end( src, comm_team ); + send_end( src, pzero, comm_team ); running--; } diff --git a/src/products.h b/src/products.h index d03b3ce34b80973648a0beea07e9b75a89f60cd2..1c6bc3ad5ed7c4a8ed6c0495ce69401916346712 100644 --- a/src/products.h +++ b/src/products.h @@ -12,5 +12,6 @@ gi::ex one_level1_product( tensor3D_t*, int, int ); gi::ex one_level2_product( tensor3D_t*, int, int, int, int, int ); gi::ex two_level1_product( tensor3D_t*, int, int, int ); gi::ex two_level2_product( tensor3D_t*, int, int, int, int, int ); +gi::ex two_level2_product( tensor3D_t*, int, int, int ); #endif // _PRODUCTS_H_ diff --git a/src/sequential.cpp b/src/sequential.cpp index d6337b2d113d7eb91ab06c91a036aae0b996773f..11da914f02b9117b65878387e43eec28829a1d2a 100644 --- a/src/sequential.cpp +++ b/src/sequential.cpp @@ -437,7 +437,7 @@ gi::ex multiply_2levels( tensor3D_t& T, int size ) { // simpler: same dimension return Tens; } -gi::ex two_level1_product( tensor3D_t* T, int size, int a2, int a4 ){ +gi::ex two_level1_product( tensor3D_t* T, int size, int a4, int a2 ){ gi::ex Tens = 0; int a1, a6; @@ -453,7 +453,7 @@ gi::ex two_level1_product( tensor3D_t* T, int size, int a2, int a4 ){ return Tens; } -gi::ex two_level2_product( tensor3D_t* T, int size, int a2, int a4, int a6, int a1 ) { +gi::ex two_level2_product( tensor3D_t* T, int size, int a4, int a2, int a1, int a6 ) { gi::ex Tens = 0; int a3, a5; int A1, A2, A3, A4, A5, A6; @@ -526,3 +526,86 @@ gi::ex two_level2_product( tensor3D_t* T, int size, int a2, int a4, int a6, int return Tens; } +gi::ex two_level2_product( tensor3D_t* T, int size, int a4, int a2 ) { + gi::ex Tens = 0; + int a3, a5, a1, a6; + int A1, A2, A3, A4, A5, A6; + gi::ex W1, W2, W3, W4, W5, W6, W7; + gi::ex Z1, Z2, Z6, t5, tE, t1, t12, t123, t126, t13, t134, t14, t16, t2, t23, t24, t26, t3, t4, X7Y5; + + gi::ex TE, T1, T2, T3, T4, T5, T12, T13, T14, T16, T23, T24, T26, T123, T126, T134; + TE = T1 = T2 = T3 = T4 = T5 = T12 = T13 = T14 = T16 = T23 = T24 = T26 = T123 = T126 = T134 = 0; + gi::ex Ti0, Ti1; + + int N = size/2; + + A4 = a4 + N; + A2 = a2 + N; + + for( a6 = 0 ; a6 < N ; a6++ ) { + A6 = a6 + N; + + W1 = (*T)[a4][a2][a6]; + W2 = (*T)[a4][A2][a6]; + W3 = (*T)[a4][a2][A6]; + W4 = (*T)[A4][A2][a6]; + W5 = (*T)[a4][A2][A6]; + W6 = (*T)[A4][a2][A6]; + W7 = (*T)[A4][A2][A6]; + + Ti1 = 0; + for( a1 = 0 ; a1 < N ; a1++ ) { + A1 = a1 + N; + Ti0 = TE = T12 = T13 = T14 = T16 = T23 = T24 = T26 = T1 = T2 = T3 = T4 = T5 = T123 = T126 = T134 = 0; + for( a5 = 0 ; a5 < N ; a5++ ) { + A5 = a5 + N; + Z1 = (*T)[a1][a5][a6]; + Z2 = (*T)[A1][a5][a6]; + Z6 = (*T)[A1][a5][A6]; + t5 = W3*(*T)[a1][A5][a6]; + tE = W4*(*T)[A1][A5][A6]; + t1 = W3*Z2; + t13 = t1; + t2 = W5*Z1; + t23 = t2; + t3 = W3*Z1; + t4 = W6*Z1; + t12 = W5*Z2; + t14 = W6*Z2; + t134 = t14 ; + t16 = W1*Z6; + t24 = W7*Z1; + t26 = W2*(*T)[a1][a5][A6]; + t123 = W5*Z2; + t126 = W2*Z6; + + for( a3 = 0 ; a3 < N ; a3++ ) { + A3 = a3 + N; + TE = TE + tE*(*T)[a1][a2][a3]*(*T)[a4][a5][A3]; + T5 = T5 + t5*(*T)[A1][A2][A3]*(*T)[A4][a5][a3]; + X7Y5 = (*T)[a1][A2][A3]*(*T)[A4][A5][a3]; + T1 = T1 + t1*X7Y5; + T16 = T16 + t16*X7Y5; + T2 = T2 + t2*(*T)[A1][a2][A3]*(*T)[A4][A5][a3]; + T3 = T3 + t3*(*T)[A1][A2][a3]*(*T)[A4][A5][A3]; + T4 = T4 + t4*(*T)[A1][A2][A3]*(*T)[a4][A5][a3]; + T12 = T12 + t12*(*T)[a1][a2][A3]*(*T)[A4][A5][a3]; + T13 = T13 + t13*(*T)[a1][A2][a3]*(*T)[A4][A5][A3]; + T14 = T14 + t14*(*T)[a1][A2][A3]*(*T)[a4][A5][a3]; + T23 = T23 + t23*(*T)[A1][a2][a3]*(*T)[A4][A5][A3]; + T24 = T24 + t24*(*T)[A1][a2][A3]*(*T)[a4][A5][a3]; + T26 = T26 + t26*(*T)[A1][a2][A3]*(*T)[A4][A5][a3]; + T123 = T123 + t123*(*T)[a1][a2][a3]*(*T)[A4][A5][A3]; + T126 = T126 + t126*(*T)[a1][a2][A3]*(*T)[A4][A5][a3]; + T134 = T134 + t134*(*T)[a1][A2][a3]*(*T)[a4][A5][A3]; + } + Ti0 += ( 4*(TE+T12+T13+T14+T16+T23+T24+T26 - (T1 + T2 + T3 + T4 + T5 +T123 + T126 + T134)) ); + } + Ti1 += Ti0; + } + Tens += Ti1; + } + + return Tens; +} + diff --git a/src/tensormatrix.h b/src/tensormatrix.h index 889933220ea4bef8a4e19004cb1eefde091b3f80..05bca94a3b19eac71ec31f3e346a9b76209ae1a2 100644 --- a/src/tensormatrix.h +++ b/src/tensormatrix.h @@ -28,6 +28,7 @@ gi::ex multiply_1level_mw2( tensor3D_t&, int ); gi::ex multiply_1level_mw_addslave( tensor3D_t&, int ); gi::ex multiply_1level_mw_addslave2( tensor3D_t&, int ); gi::ex multiply_2levels_mw_hierarch( tensor3D_t&, int ); +gi::ex multiply_2levels_mw_hierarch2( tensor3D_t&, int ); gi::ex multiply_combined( tensor3D_t&, int ); /******************************************************************************* diff --git a/src/tensormatrix_mpi.cpp b/src/tensormatrix_mpi.cpp index b0d949978106e4e5f734f06b455ace7c9f2a0b47..a0482ae69d22560ca255570e706210bd3d9d2717 100644 --- a/src/tensormatrix_mpi.cpp +++ b/src/tensormatrix_mpi.cpp @@ -32,6 +32,7 @@ namespace gi = GiNaC; - A/a: Master-Worker, addition on a slave -> multiply_1level_mw_addslave - B/b: Master-Worker, coarser grain, addition on a slave -> multiply_1level_mw_addslave2 - H/h: Hierarchical master-worker -> multiply_1level_mw_hierarch + - i/I: Hierarchical master-worker, coarser grain -> multiply_1level_mw_hierarch - C/c: Combined -> multiply_combined */ @@ -108,6 +109,10 @@ int main( int argc, char** argv ){ case 'h': tostart = 'h'; break; + case 'I': + case 'i': + tostart = 'i'; + break; case 'C': case 'c': tostart = 'c'; @@ -158,6 +163,9 @@ int main( int argc, char** argv ){ case 'h': Tpara = multiply_2levels_mw_hierarch( T, N ); break; + case 'i': + Tpara = multiply_2levels_mw_hierarch2( T, N ); + break; case 'c': Tpara = multiply_combined( T, N ); break; diff --git a/src/utils_parall.cpp b/src/utils_parall.cpp index d50d750f60098cf7d928b5f0138d04a82714c2d2..609c9c77f8984ef8beea48ca317eb2b872449d8c 100644 --- a/src/utils_parall.cpp +++ b/src/utils_parall.cpp @@ -1,6 +1,7 @@ #include <iostream> #include <sstream> #include <mpi.h> +#include <math.h> // ceil #include "utils_parall.h" #include "parall_constants.h" @@ -99,6 +100,11 @@ void send_end( int peer, parameters_2_1_t p, MPI_Comm comm ) { MPI_Send( &p, 1, DT_PARAMETERS_2_1, peer, TAG_END, comm ); } +void send_end( int peer, parameters_2_2_t p, MPI_Comm comm ) { + /* The parameters_2_2_t argument is not used, but needed to distinguish between functions */ + MPI_Send( &p, 1, DT_PARAMETERS_2_2, peer, TAG_END, comm ); +} + void send_end( int peer, parameters_s_t p, MPI_Comm comm ) { /* The parameters_s_t argument is not used, but needed to distinguish between functions */ MPI_Send( &p, 1, DT_PARAMETERS_S, peer, TAG_END, comm ); @@ -106,7 +112,11 @@ void send_end( int peer, parameters_s_t p, MPI_Comm comm ) { void send_end_batch( int peer, MPI_Comm comm ) { parameters_t para; - MPI_Send( ¶, 1, DT_PARAMETERS_2_1, peer, TAG_END_BATCH, comm ); + MPI_Send( ¶, 1, DT_PARAMETERS, peer, TAG_END_BATCH, comm ); +} + +void send_end_batch( int peer, parameters_2_1_t p, MPI_Comm comm ) { + MPI_Send( &p, 1, DT_PARAMETERS_2_1, peer, TAG_END_BATCH, comm ); } /* M -> W: Send some work: just a parameter set */ @@ -245,3 +255,51 @@ void send_result( gi::ex T, MPI_Comm comm ){ } +/* Create communicators for the hierarchical decomposition */ + +void create_communicators_hierarch( MPI_Comm& COMM_FOREMEN, MPI_Comm& COMM_TEAM ){ + int rank, np; + MPI_Comm_rank( MPI_COMM_WORLD, &rank ); + MPI_Comm_size( MPI_COMM_WORLD, &np ); + + /* Create the communicators: master and foremen + color_foreman is set to 1 if I am the root or a foreman */ + + int color_foreman, workers_per_foreman; + workers_per_foreman = ceil( (double)( np - 1) / (double)nbforemen ); + if( ROOT == rank ) { + color_foreman = 1; + } else { + if( 1 == (rank % workers_per_foreman ) ){ + color_foreman = 1; + } else { + color_foreman = 0; + } + } + + MPI_Comm_split( MPI_COMM_WORLD, color_foreman, rank, &COMM_FOREMEN ); + + /* Create the communicator between the workers and their foreman */ + /* There is one problematic case here: when the last foreman ends up alone in its communicator */ + + int color_team; + if( ROOT == rank ) { + color_team = 0; + } else { + color_team = 1 + floor( ( (rank-1) / workers_per_foreman ) ); + } + MPI_Comm_split( MPI_COMM_WORLD, color_team, rank, &COMM_TEAM ); + +#if DEBUG + if( 1 == color_foreman ) { + int rank_foreman; + MPI_Comm_rank( COMM_FOREMEN, &rank_foreman ); + std::cout << rank << " I am rank " << rank_foreman << " among the foremen" << std::endl; + } + int rank_team; + MPI_Comm_rank( COMM_TEAM, &rank_team ); + std::cout << rank << " my team " << color_team << " and I am rank " << rank_team << std::endl; +#endif + +} + diff --git a/src/utils_parall.h b/src/utils_parall.h index 8c438ce08273db28eeb6e1d3149ca25dd1e6542d..f93aef01525e0813e533b319b21582ef2993a272 100644 --- a/src/utils_parall.h +++ b/src/utils_parall.h @@ -57,8 +57,10 @@ void send_work_addslave( std::vector<parameters_s_t>&, std::vector<std::string> void send_result( gi::ex T, MPI_Comm comm = MPI_COMM_WORLD ); void send_end( int peer, MPI_Comm comm = MPI_COMM_WORLD ); void send_end( int peer, parameters_2_1_t p, MPI_Comm comm = MPI_COMM_WORLD ); +void send_end( int peer, parameters_2_2_t p, MPI_Comm comm = MPI_COMM_WORLD ); void send_end( int peer, parameters_s_t p, MPI_Comm comm = MPI_COMM_WORLD ); void send_end_batch( int peer, MPI_Comm comm = MPI_COMM_WORLD ); +void send_end_batch( int peer, parameters_2_1_t p, MPI_Comm comm = MPI_COMM_WORLD ); void create_parameters_datatype( void ); void create_parameters_datatype_s( void ); @@ -71,6 +73,8 @@ void free_parameters_s_dt( void ); gi::ex add_expressions( std::vector<std::string>, gi::lst ); +void create_communicators_hierarch( MPI_Comm&, MPI_Comm& ); + /******************************************************************************* * Global variables * *******************************************************************************/