diff --git a/src/products.h b/src/products.h index 4a6b6de4b7f52715141e41fc9e6cdee1d9f61bfc..3d54d90925041b3096c64ac4db7963e66bb8467f 100644 --- a/src/products.h +++ b/src/products.h @@ -9,7 +9,7 @@ namespace gi = GiNaC; // internal (sequential) routines gi::ex one_level1_product( tensor3D_t*, int, int, int, int ); gi::ex one_level2_product( tensor3D_t*, int, int, int, int, int ); -gi::ex two_level1_product( tensor3D_t*, matrix_int_t*, gi::ex, int, int, int, int, int ); -gi::ex two_level2_product( tensor3D_t*, matrix_int_t*, gi::ex, int, int, int, int, int, int ); +gi::ex two_level1_product( tensor3D_t*, int, int, int ); +gi::ex two_level2_product( tensor3D_t*, int, int, int, int, int ); #endif // _PRODUCTS_H_ diff --git a/src/sequential.cpp b/src/sequential.cpp index cb00a2ba15a52d4658276d6772e71f029b088e8c..54fb8be5e7327e1dc3fa84cc067f6859fe62ad33 100644 --- a/src/sequential.cpp +++ b/src/sequential.cpp @@ -10,7 +10,7 @@ namespace gi = GiNaC; * Big multiplication * *******************************************************************************/ -gi::ex multiply_seq( tensor3D_t& T, matrix_int_t& J, int size ) { +gi::ex multiply_seq( tensor3D_t& T, int size ) { gi::ex Tens = 0; int a1, a2, a3, a4, a5, a6; @@ -95,7 +95,7 @@ gi::ex multiply_seq( tensor3D_t& T, matrix_int_t& J, int size ) { * 1-level decomposition * *******************************************************************************/ -gi::ex multiply_1level( tensor3D_t& T, matrix_int_t& J, int size ) { // simpler: same dimension everywhere +gi::ex multiply_1level( tensor3D_t& T, int size ) { // simpler: same dimension everywhere gi::ex Tens = 0; gi::ex Tn; @@ -322,87 +322,107 @@ gi::ex one_level2_product( tensor3D_t* T, int size, int a4, int a2, int a1, int * 2-level decomposition * *******************************************************************************/ -gi::ex multiply_2levels( tensor3D_t& T, matrix_int_t& J, int size ) { // simpler: same dimension everywhere - +gi::ex multiply_2levels( tensor3D_t& T, int size ) { // simpler: same dimension everywhere + gi::ex Tens = 0; - int a1, a2, a3, b1; - gi::ex A; + int a2, a4; - int i, j; - i = 0; - j = 0; + int N = size/2; - for( a1 = 0 ; a1 < size; a1++ ){ - i=i+1; - for( a2 = 0; a2 < size ; a2++ ){ - j=j+1; - for( a3 = 0 ; a3 < size ; a3++ ){ - A = T[a1][a2][a3]; - /* Beyond this point, a2 and a3 are only used in the simplectic matrix */ - for( b1 = 0 ; b1 < size ; b1++ ){ - Tens += two_level1_product( &T, &J, A, size, a1, a2, a3, b1 ); - } - } + for( a4 = 0 ; a4 < N ; a4++ ) { + for( a2 = 0 ; a2 < N ; a2++ ) { + Tens += two_level1_product( &T, size, a2, a4 ); } } return Tens; } -gi::ex two_level1_product( tensor3D_t* T, matrix_int_t *J, gi::ex A, int size, int a1, int a2, int a3, int b1){ - gi::ex TAB, TABB, TABC; - int b2, b3, c1, c2; +gi::ex two_level1_product( tensor3D_t* T, int size, int a2, int a4 ){ gi::ex Tens = 0; - gi::ex T0, T1, T2; - - TAB = (*J)[a1][b1]; - for( b2 = 0 ; b2 < size ; b2++ ){ - T2 = 0; - for( b3 = 0 ; b3 < size ; b3++ ){ - TABB = TAB * A*(*T)[b1][b2][b3]; - T1 = 0; - /* Beyond this point, b1 is not used anymore */ - for( c1 = 0 ; c1 < size ; c1++ ){ - T0 = 0; - for( c2 = 0 ; c2 < size ; c2++ ){ - TABC = TABB * (*J)[a2][c2]; - T0 += two_level2_product( T, J, TABC, size, a3, b2, b3, c1, c2 ); - } - T1 += T0; - } - T2 += T1; + int a1, a6; + + int N = size/2; + for( a6 = 0 ; a6 < N ; a6++ ) { + for( a1 = 0 ; a1 < N ; a1++ ) { + + Tens += two_level2_product( T, size, a2, a4, a6, a1 ); } - Tens += T2; } + return Tens; } -gi::ex two_level2_product( tensor3D_t* T, matrix_int_t *J, gi::ex TABC, int size, int a3, int b2, int b3, int c1, int c2 ){ +gi::ex two_level2_product( tensor3D_t* T, int size, int a2, int a4, int a6, int a1 ) { + gi::ex Tens = 0; + int a3, a5; + int A1, A2, A3, A4, A5, A6; + gi::ex W1, W2, W3, W4, W5, W6, W7; + gi::ex Z1, Z2, Z6, t5, tE, t1, t12, t123, t126, t13, t134, t14, t16, t2, t23, t24, t26, t3, t4, X7Y5; + + gi::ex TE, T1, T2, T3, T4, T5, T12, T13, T14, T16, T23, T24, T26, T123, T126, T134; + TE = T1 = T2 = T3 = T4 = T5 = T12 = T13 = T14 = T16 = T23 = T24 = T26 = T123 = T126 = T134 = 0; - int c3, d1, d2, d3; - gi::ex TABCC, TABCD, TABCDD; + int N = size/2; + + A4 = a4 + N; + A2 = a2 + N; + A1 = a1 + N; + A6 = a6 + N; - gi::ex Tens = 0; - gi::ex T0, T1, T2; - - for( c3 = 0 ; c3 < size ; c3++ ){ - TABCC = TABC * (*T)[c1][c2][c3] * (*J)[b3][c3] ; - T2 = 0; - for( d1 = 0 ; d1 < size ; d1++ ){ - TABCD = TABCC * (*J)[c1][d1]; - T1 = 0; - for( d2 = 0 ; d2 < size ; d2++ ){ - TABCDD = TABCD * (*J)[b2][d2]; - T0 = 0; - for( d3 = 0 ; d3 < size ; d3++ ){ - T0 += TABCDD * (*T)[d1][d2][d3]*(*J)[a3][d3]; - } - T1 += T0; - } - T2 += T1; + W1 = (*T)[a4][a2][a6]; + W2 = (*T)[a4][A2][a6]; + W3 = (*T)[a4][a2][A6]; + W4 = (*T)[A4][A2][a6]; + W5 = (*T)[a4][A2][A6]; + W6 = (*T)[A4][a2][A6]; + W7 = (*T)[A4][A2][A6]; + + for( a5 = 0 ; a5 < N ; a5++ ) { + A5 = a5 + N; + Z1 = (*T)[a1][a5][a6]; + Z2 = (*T)[A1][a5][a6]; + Z6 = (*T)[A1][a5][A6]; + t5 = W3*(*T)[a1][A5][a6]; + tE = W4*(*T)[A1][A5][A6]; + t1 = W3*Z2; + t13 = t1; + t2 = W5*Z1; + t23 = t2; + t3 = W3*Z1; + t4 = W6*Z1; + t12 = W5*Z2; + t14 = W6*Z2; + t134 = t14 ; + t16 = W1*Z6; + t24 = W7*Z1; + t26 = W2*(*T)[a1][a5][A6]; + t123 = W5*Z2; + t126 = W2*Z6; + + for( a3 = 0 ; a3 < N ; a3++ ) { + A3 = a3 + N; + TE = TE + tE*(*T)[a1][a2][a3]*(*T)[a4][a5][A3]; + T5 = T5 + t5*(*T)[A1][A2][A3]*(*T)[A4][a5][a3]; + X7Y5 = (*T)[a1][A2][A3]*(*T)[A4][A5][a3]; + T1 = T1 + t1*X7Y5; + T16 = T16 + t16*X7Y5; + T2 = T2 + t2*(*T)[A1][a2][A3]*(*T)[A4][A5][a3]; + T3 = T3 + t3*(*T)[A1][A2][a3]*(*T)[A4][A5][A3]; + T4 = T4 + t4*(*T)[A1][A2][A3]*(*T)[a4][A5][a3]; + T12 = T12 + t12*(*T)[a1][a2][A3]*(*T)[A4][A5][a3]; + T13 = T13 + t13*(*T)[a1][A2][a3]*(*T)[A4][A5][A3]; + T14 = T14 + t14*(*T)[a1][A2][A3]*(*T)[a4][A5][a3]; + T23 = T23 + t23*(*T)[A1][a2][a3]*(*T)[A4][A5][A3]; + T24 = T24 + t24*(*T)[A1][a2][A3]*(*T)[a4][A5][a3]; + T26 = T26 + t26*(*T)[A1][a2][A3]*(*T)[A4][A5][a3]; + T123 = T123 + t123*(*T)[a1][a2][a3]*(*T)[A4][A5][A3]; + T126 = T126 + t126*(*T)[a1][a2][A3]*(*T)[A4][A5][a3]; + T134 = T134 + t134*(*T)[a1][A2][a3]*(*T)[a4][A5][A3]; } - Tens += T2; - } + } + + Tens = 4*(TE+T12+T13+T14+T16+T23+T24+T26 - (T1 + T2 + T3 + T4 + T5 +T123 + T126 + T134)); return Tens; } diff --git a/src/tensormatrix_mpi.cpp b/src/tensormatrix_mpi.cpp index f1aa1dd9581bece858df7c35f480d652b6c196e8..270c63e78e94679113c43b27e062d59e551ed382 100644 --- a/src/tensormatrix_mpi.cpp +++ b/src/tensormatrix_mpi.cpp @@ -42,7 +42,8 @@ real 3m31,034s *******************************************************************************/ MPI_Datatype DT_PARAMETERS; -MPI_Datatype DT_PARAMETERS_2; +MPI_Datatype DT_PARAMETERS_2_1; +MPI_Datatype DT_PARAMETERS_2_2; unsigned int nbforemen = NBFOREMEN; /* Number of foremen to use with the hierarchical M/W */ unsigned int maxresult = MAXRESULT; /* Maximum results in the result queue, addslave version */ @@ -144,10 +145,13 @@ int main( int argc, char** argv ){ Tpara = multiply_combined( T, J, N ); break;*/ case 's': - Tpara = multiply_seq( T, J, N ); + Tpara = multiply_seq( T, N ); break; case '1': - Tpara = multiply_1level( T, J, N ); + Tpara = multiply_1level( T, N ); + break; + case '2': + Tpara = multiply_2levels( T, N ); break; default: std::cerr << "Wrong function called" << std::endl;