diff --git a/src/Makefile b/src/Makefile index a1f518d3b8fbc38920ad4e383d96472541c02192..a257ab72d62caa572f734ec4c7967c3ed22170f8 100644 --- a/src/Makefile +++ b/src/Makefile @@ -29,7 +29,8 @@ NP = 5 MPISRC = masterworker.cpp mw_addslave.cpp hierarchical.cpp \ perf.cpp sequential.cpp tensormatrix_mpi.cpp \ utils.cpp utils_parall.cpp profiling.cpp mw_combined.cpp \ - masterworker2.cpp mw_addslave2.cpp hierarchical2.cpp + masterworker2.cpp mw_addslave2.cpp hierarchical2.cpp \ + masterworker3.cpp MPIOBJ= $(MPISRC:.cpp=.o) diff --git a/src/products.h b/src/products.h index 1c6bc3ad5ed7c4a8ed6c0495ce69401916346712..932876255010c44c4c4bc961ca8ff62bd9284c0c 100644 --- a/src/products.h +++ b/src/products.h @@ -8,6 +8,7 @@ namespace gi = GiNaC; // internal (sequential) routines gi::ex one_level1_product( tensor3D_t*, int, int, int, int ); +gi::ex one_level1_product( tensor3D_t*, int, int, int ); gi::ex one_level1_product( tensor3D_t*, int, int ); gi::ex one_level2_product( tensor3D_t*, int, int, int, int, int ); gi::ex two_level1_product( tensor3D_t*, int, int, int ); diff --git a/src/sequential.cpp b/src/sequential.cpp index 11da914f02b9117b65878387e43eec28829a1d2a..0634853ff8bc76b49c97652abc5e01d58b3d2c44 100644 --- a/src/sequential.cpp +++ b/src/sequential.cpp @@ -341,6 +341,91 @@ gi::ex one_level1_product( tensor3D_t* T, int size, int a4 ){ return Tens; } +gi::ex one_level1_product( tensor3D_t* T, int size, int a4, int a2 ){ + + gi::ex Tens = 0; + gi::ex Ti0, Ti1, Ti2; + gi::ex W1, W2, W3, W4, W5, W6, W7; + gi::ex Z1, Z2, Z6, t5, tE, t1, t12, t123, t126, t13, t134, t14, t16, t2, t23, t24, t26, t3, t4, X7Y5; + gi::ex TE, T1, T2, T3, T4, T5, T12, T13, T14, T16, T23, T24, T26, T123, T126, T134; + const char timerB[] = "B"; + + int a1, a3, a5, a6; + int A1, A2, A3, A4, A5, A6; + TE = T1 = T2 = T3 = T4 = T5 = T12 = T13 = T14 = T16 = T23 = T24 = T26 = T123 = T126 = T134 = 0; + Ti0 = Ti1 = 0; + + int N = size/2; + + A4 = a4 + N; + A2 = a2 + N; + for( a6 = 0 ; a6 < N ; a6++ ) { + A6 = a6 + N; + + W1 = (*T)[a4][a2][a6]; + W2 = (*T)[a4][A2][a6]; + W3 = (*T)[a4][a2][A6]; + W4 = (*T)[A4][A2][a6]; + W5 = (*T)[a4][A2][A6]; + W6 = (*T)[A4][a2][A6]; + W7 = (*T)[A4][A2][A6]; + + Ti1 = 0; + for( a1 = 0 ; a1 < N ; a1++ ) { + A1 = a1 + N; + Ti0 = TE = T12 = T13 = T14 = T16 = T23 = T24 = T26 = T1 = T2 = T3 = T4 = T5 = T123 = T126 = T134 = 0; + for( a5 = 0 ; a5 < N ; a5++ ) { + A5 = a5 + N; + Z1 = (*T)[a1][a5][a6]; + Z2 = (*T)[A1][a5][a6]; + Z6 = (*T)[A1][a5][A6]; + t5 = W3*(*T)[a1][A5][a6]; + tE = W4*(*T)[A1][A5][A6]; + t1 = W3*Z2; + t13 = t1; + t2 = W5*Z1; + t23 = t2; + t3 = W3*Z1; + t4 = W6*Z1; + t12 = W5*Z2; + t14 = W6*Z2; + t134 = t14 ; + t16 = W1*Z6; + t24 = W7*Z1; + t26 = W2*(*T)[a1][a5][A6]; + t123 = W5*Z2; + t126 = W2*Z6; + + for( a3 = 0 ; a3 < N ; a3++ ) { + A3 = a3 + N; + TE = TE + tE*(*T)[a1][a2][a3]*(*T)[a4][a5][A3]; + T5 = T5 + t5*(*T)[A1][A2][A3]*(*T)[A4][a5][a3]; + X7Y5 = (*T)[a1][A2][A3]*(*T)[A4][A5][a3]; + T1 = T1 + t1*X7Y5; + T16 = T16 + t16*X7Y5; + T2 = T2 + t2*(*T)[A1][a2][A3]*(*T)[A4][A5][a3]; + T3 = T3 + t3*(*T)[A1][A2][a3]*(*T)[A4][A5][A3]; + T4 = T4 + t4*(*T)[A1][A2][A3]*(*T)[a4][A5][a3]; + T12 = T12 + t12*(*T)[a1][a2][A3]*(*T)[A4][A5][a3]; + T13 = T13 + t13*(*T)[a1][A2][a3]*(*T)[A4][A5][A3]; + T14 = T14 + t14*(*T)[a1][A2][A3]*(*T)[a4][A5][a3]; + T23 = T23 + t23*(*T)[A1][a2][a3]*(*T)[A4][A5][A3]; + T24 = T24 + t24*(*T)[A1][a2][A3]*(*T)[a4][A5][a3]; + T26 = T26 + t26*(*T)[A1][a2][A3]*(*T)[A4][A5][a3]; + T123 = T123 + t123*(*T)[a1][a2][a3]*(*T)[A4][A5][A3]; + T126 = T126 + t126*(*T)[a1][a2][A3]*(*T)[A4][A5][a3]; + T134 = T134 + t134*(*T)[a1][A2][a3]*(*T)[a4][A5][A3]; + } + Ti0 += ( 4*(TE+T12+T13+T14+T16+T23+T24+T26 - (T1 + T2 + T3 + T4 + T5 +T123 + T126 + T134)) ); + } + Ti1 += Ti0; + } + Tens += Ti1; + } + + return Tens; +} + gi::ex one_level2_product( tensor3D_t* T, int size, int a4, int a2, int a1, int a6 ){ int a3, a5; diff --git a/src/tensormatrix.h b/src/tensormatrix.h index 05bca94a3b19eac71ec31f3e346a9b76209ae1a2..cfb5fbc9298c3c3d445c2c10b8a6ca401e793a01 100644 --- a/src/tensormatrix.h +++ b/src/tensormatrix.h @@ -25,6 +25,7 @@ gi::ex multiply_2levels( tensor3D_t&, int ); // parallel gi::ex multiply_1level_mw( tensor3D_t&, int ); gi::ex multiply_1level_mw2( tensor3D_t&, int ); +gi::ex multiply_1level_mw3( tensor3D_t&, int ); gi::ex multiply_1level_mw_addslave( tensor3D_t&, int ); gi::ex multiply_1level_mw_addslave2( tensor3D_t&, int ); gi::ex multiply_2levels_mw_hierarch( tensor3D_t&, int ); diff --git a/src/tensormatrix_mpi.cpp b/src/tensormatrix_mpi.cpp index a0482ae69d22560ca255570e706210bd3d9d2717..279d24eb8010b4b0edc23b07abb2d53174b40b95 100644 --- a/src/tensormatrix_mpi.cpp +++ b/src/tensormatrix_mpi.cpp @@ -28,7 +28,8 @@ namespace gi = GiNaC; tensormatrix_mpi [N] [Function name] [Nb of foremen] Function names being: - M/m: Master-Worker -> multiply_1level_mw - - n/n: Master-Worker, coarser grain -> multiply_1level_mw2 + - n/N: Master-Worker, coarser grain -> multiply_1level_mw2 + - o/O: Master-Worker, middle grain -> multiply_1level_mw3 - A/a: Master-Worker, addition on a slave -> multiply_1level_mw_addslave - B/b: Master-Worker, coarser grain, addition on a slave -> multiply_1level_mw_addslave2 - H/h: Hierarchical master-worker -> multiply_1level_mw_hierarch @@ -97,6 +98,10 @@ int main( int argc, char** argv ){ case 'n': tostart = 'n'; break; + case 'O': + case 'o': + tostart = 'o'; + break; case 'A': case 'a': tostart = 'a'; @@ -154,6 +159,9 @@ int main( int argc, char** argv ){ case 'n': Tpara = multiply_1level_mw2( T, N ); break; + case 'o': + Tpara = multiply_1level_mw3( T, N ); + break; case 'a': Tpara = multiply_1level_mw_addslave( T, N ); break;