diff --git a/src/Makefile b/src/Makefile index 027747984d52f208ba62129843893af67ba2c125..b79136cbe5ad82ff19fbf69655a9fdb8924114e9 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,16 +1,34 @@ +TAU = 0 + CXX = g++ MPICXX = mpic++ LD = g++ MPILD = mpic++ -CFLAGS = -Wall -g -O3 -Wno-unused-variable -std=c++11 -LDOPT = -lginac + +ifeq ($(TAU),1) + TAUDIR = $(HOME)/logiciels/tau-2.29/x86_64 + TAUOPT = -DTAUPROF -I$(TAUDIR)/../include + TAULIB = -L$(TAUDIR)/lib -lTAU + + MPICXX = tau_cxx.sh + CXX = tau_cxx.sh + LD = tau_cxx.sh + MPILD = tau_cxx.sh +# export TAU_MAKEFILE=$(TAUDIR)/lib/Makefile.tau-papi-mpi-pdt-openmp +else + TAUOPT = + TAULIB = +endif + +CFLAGS = -Wall -g -O3 -Wno-unused-variable -std=c++11 $(TAUOPT) +LDOPT = -lginac $(TAULIB) MPIEXEC = mpiexec NP = 5 MPISRC = hierarchical.cpp masterworker.cpp mw_addslave.cpp \ perf.cpp sequential.cpp tensormatrix_mpi.cpp \ - utils.cpp utils_parall.cpp + utils.cpp utils_parall.cpp profiling.cpp MPIOBJ= $(MPISRC:.cpp=.o) diff --git a/src/profiling.cpp b/src/profiling.cpp new file mode 100644 index 0000000000000000000000000000000000000000..dc165f44071e7bd9048c3dea6228eea0230c42ec --- /dev/null +++ b/src/profiling.cpp @@ -0,0 +1,59 @@ +#include <iostream> +#include "profiling.h" + +#ifdef TAUPROF +double getTimeSpent( const char* cnt ){ + + const char **inFuncs; + /* The first dimension is functions, and the + second dimension is counters */ + double **counterExclusiveValues; + double **counterInclusiveValues; + int *numOfCalls; + int *numOfSubRoutines; + const char **counterNames; + int numOfCouns; + int numOfFunctions; + const char ** functionList; + int i; + + TAU_GET_FUNC_NAMES(functionList, numOfFunctions); + + inFuncs = (const char **) malloc(sizeof(const char *) * numOfFunctions ); + + for( i = 0 ; i < numOfFunctions ; i++ ) { + inFuncs[i] = functionList[i]; + } + + //Just to show consistency. + TAU_DB_DUMP(); + + TAU_GET_FUNC_VALS( inFuncs, numOfFunctions, + counterExclusiveValues, + counterInclusiveValues, + numOfCalls, + numOfSubRoutines, + counterNames, + numOfCouns ); + + TAU_DUMP_FUNC_VALS_INCR( inFuncs, numOfFunctions ); + + for( i = 0 ; i < numOfFunctions ; i++ ) { + if( 0 == strcmp( cnt, inFuncs[i] ) ) { + return counterExclusiveValues[i][0]; + } + } + + TAU_DB_DUMP_INCR(); + + std::cerr << "Timer " << cnt << " not found" << std::endl; + return -1; +} + + + +#else +double getTimeSpent( const char* cnt ){ + return 0; +} +#endif // def TAUPROF diff --git a/src/profiling.h b/src/profiling.h new file mode 100644 index 0000000000000000000000000000000000000000..11080f00fa4053d3625dc9fbda0097b5edad3e91 --- /dev/null +++ b/src/profiling.h @@ -0,0 +1,13 @@ +#ifndef _PROFILING_H_ +#define _PROFILING_H_ + +#ifdef TAUPROF +#include <TAU.h> +#else +#define TAU_START( a ) do { ;; }while( 0 ); +#define TAU_STOP( a ) do { ;; } while( 0 ); +#endif // TAUPROF + +double getTimeSpent( const char* cnt ); + +#endif // _PROFILING_H_ diff --git a/src/sequential.cpp b/src/sequential.cpp index f7933d9fa0bca85c0fd34169cfce7e7b0bcc3ccd..0c3f1f5051c70839faed7fbaf640b328d24d9088 100644 --- a/src/sequential.cpp +++ b/src/sequential.cpp @@ -1,5 +1,6 @@ #include <ginac/ginac.h> +#include "profiling.h" #include "tensormatrix.h" #include "products.h" @@ -19,23 +20,30 @@ gi::ex multiply_seq( tensor3D_t& T, matrix_int_t& J, int size ) { // simpler: s int i, j; i = 0; j = 0; + + const char timerA[] = "A"; + const char timerB[] = "B"; + const char timeradd[] = "add"; + double timeA, timeB; for( a1 = 0 ; a1 < size; a1++ ){ i=i+1; // std::cout << "Tens: " << Tens << std::endl; - printf("i = %d\n", i); + // printf("i = %d\n", i); for( a2 = 0; a2 < size ; a2++ ){ j=j+1; - printf("j = %d\n", j); + // printf("j = %d\n", j); for( a3 = 0 ; a3 < size ; a3++ ){ + TAU_START( timerA ); A = T[a1][a2][a3]; /* Beyond this point, a2 and a3 are only used in the simplectic matrix */ for( b1 = 0 ; b1 < size ; b1++ ){ TAB = J[a1][b1]; for( b2 = 0 ; b2 < size ; b2++ ){ for( b3 = 0 ; b3 < size ; b3++ ){ - TABB = TAB * A*T[b1][b2][b3]; + TAU_START( timerB ); /* Beyond this point, b1 is not used anymore */ + TABB = TAB * A*T[b1][b2][b3]; for( c1 = 0 ; c1 < size ; c1++ ){ for( c2 = 0 ; c2 < size ; c2++ ){ TABC = TABB * J[a2][c2]; @@ -46,16 +54,31 @@ gi::ex multiply_seq( tensor3D_t& T, matrix_int_t& J, int size ) { // simpler: s for( d2 = 0 ; d2 < size ; d2++ ){ TABCDD = TABCD * J[b2][d2]; for( d3 = 0 ; d3 < size ; d3++ ){ + TAU_START( timeradd ); Tens = Tens + TABCDD * T[d1][d2][d3]*J[a3][d3]; - } + TAU_STOP( timeradd ); +#ifdef TAUPROF + std::cout << "add " << getTimeSpent( timeradd ) << " len " << Tens.nops() << std::endl; + + std::cout << Tens << std::endl; +#endif // TAUPROF + } } } } } } + TAU_STOP( timerB ); +#ifdef TAUPROF + std::cout << "B " << getTimeSpent( timeradd ) << " len " << Tens.nops() << std::endl; +#endif // TAUPROF } } } + TAU_STOP( timerA ); +#ifdef TAUPROF + std::cout << "A " << getTimeSpent( timeradd ) << " len " << Tens.nops() << std::endl; +#endif // TAUPROF } } } @@ -76,24 +99,32 @@ gi::ex multiply_1level( tensor3D_t& T, matrix_int_t& J, int size ) { // simpler int i, j; i = 0; j = 0; + + double time; for( a1 = 0 ; a1 < size; a1++ ){ i=i+1; // std::cout << "Tens: " << Tens << std::endl; - printf("i = %d\n", i); + // printf("i = %d\n", i); for( a2 = 0; a2 < size ; a2++ ){ j=j+1; - printf("j = %d\n", j); + // printf("j = %d\n", j); for( a3 = 0 ; a3 < size ; a3++ ){ + TAU_START( "b" ); A = T[a1][a2][a3]; /* Beyond this point, a2 and a3 are only used in the simplectic matrix */ for( b1 = 0 ; b1 < size ; b1++ ){ Tn = one_level1_product( &T, &J, A, size, a1, a2, a3, b1 ); Tens += Tn; } + TAU_STOP( "b" ); +#ifdef TAUPROF + time = getTimeSpent( "b" ); +#endif // TAUPROF } } } + return Tens; } @@ -103,13 +134,15 @@ gi::ex one_level1_product( tensor3D_t* T, matrix_int_t *J, gi::ex A, int size, i gi::ex Tens = 0; gi::ex T0, T1, T2, T3, T4, T5; - + const char timerB[] = "B"; + TAB = (*J)[a1][b1]; for( b2 = 0 ; b2 < size ; b2++ ){ for( b3 = 0 ; b3 < size ; b3++ ){ TABB = TAB * A*(*T)[b1][b2][b3]; T5 = 0; /* Beyond this point, b1 is not used anymore */ + TAU_START( timerB ); for( c1 = 0 ; c1 < size ; c1++ ){ T4 = 0; for( c2 = 0 ; c2 < size ; c2++ ){ @@ -138,6 +171,7 @@ gi::ex one_level1_product( tensor3D_t* T, matrix_int_t *J, gi::ex A, int size, i T5 += T4; } Tens += T5; + TAU_STOP( timerB ); } }