diff --git a/src/profiling.cpp b/src/profiling.cpp index c2f880c1e485b4ca7d80b806dd29b72ddab2bd59..8ba041f565964828dbfcc2e2041e782450618812 100644 --- a/src/profiling.cpp +++ b/src/profiling.cpp @@ -2,6 +2,7 @@ #include "profiling.h" #ifdef TAUPROF +double previous = 0; double getTimeSpent( const char* cnt ){ const char **inFuncs; @@ -16,17 +17,10 @@ double getTimeSpent( const char* cnt ){ int numOfFunctions; const char ** functionList; int i; - - TAU_GET_FUNC_NAMES(functionList, numOfFunctions); - + + numOfFunctions = 1; inFuncs = (const char **) malloc(sizeof(const char *) * numOfFunctions ); - - for( i = 0 ; i < numOfFunctions ; i++ ) { - inFuncs[i] = functionList[i]; - } - - //Just to show consistency. - TAU_DB_DUMP(); + inFuncs[0] = cnt; TAU_GET_FUNC_VALS( inFuncs, numOfFunctions, counterExclusiveValues, @@ -36,25 +30,29 @@ double getTimeSpent( const char* cnt ){ counterNames, numOfCouns ); - TAU_DUMP_FUNC_VALS_INCR( inFuncs, numOfFunctions ); - - for( i = 0 ; i < numOfFunctions ; i++ ) { - if( 0 == strcmp( cnt, inFuncs[i] ) ) { - TAU_DB_DUMP_INCR(); - return counterExclusiveValues[i][0]; - } + i = 0; + if( 0 == strcmp( cnt, inFuncs[i] ) ) { + free( inFuncs ); + /* double dur = numOfCalls[i] - previous; + previous = numOfCalls[i];*/ + double dur = counterExclusiveValues[i][0] - previous; + previous = counterExclusiveValues[i][0]; + return dur; } - TAU_DB_DUMP_INCR(); - std::cerr << "Timer " << cnt << " not found" << std::endl; + free( inFuncs ); return -1; } - - - #else double getTimeSpent( const char* cnt ){ return 0; } #endif // def TAUPROF + +uint64_t rdtsc(){ + unsigned int lo,hi; + __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); + return ((uint64_t)hi << 32) | lo; +} + diff --git a/src/profiling.h b/src/profiling.h index 11080f00fa4053d3625dc9fbda0097b5edad3e91..47e86dd54404c2025730934dc535e449dfb33ffb 100644 --- a/src/profiling.h +++ b/src/profiling.h @@ -8,6 +8,7 @@ #define TAU_STOP( a ) do { ;; } while( 0 ); #endif // TAUPROF -double getTimeSpent( const char* cnt ); +double getTimeSpent( const char* ); +uint64_t rdtsc( void ); #endif // _PROFILING_H_ diff --git a/src/sequential.cpp b/src/sequential.cpp index 0c3f1f5051c70839faed7fbaf640b328d24d9088..855104a7f570fe315a8f9b61fb1a463509afb7e0 100644 --- a/src/sequential.cpp +++ b/src/sequential.cpp @@ -25,6 +25,7 @@ gi::ex multiply_seq( tensor3D_t& T, matrix_int_t& J, int size ) { // simpler: s const char timerB[] = "B"; const char timeradd[] = "add"; double timeA, timeB; + uint64_t t_start, t_end; for( a1 = 0 ; a1 < size; a1++ ){ i=i+1; @@ -41,7 +42,7 @@ gi::ex multiply_seq( tensor3D_t& T, matrix_int_t& J, int size ) { // simpler: s TAB = J[a1][b1]; for( b2 = 0 ; b2 < size ; b2++ ){ for( b3 = 0 ; b3 < size ; b3++ ){ - TAU_START( timerB ); + TAU_START( timerB ); /* Beyond this point, b1 is not used anymore */ TABB = TAB * A*T[b1][b2][b3]; for( c1 = 0 ; c1 < size ; c1++ ){ @@ -54,13 +55,17 @@ gi::ex multiply_seq( tensor3D_t& T, matrix_int_t& J, int size ) { // simpler: s for( d2 = 0 ; d2 < size ; d2++ ){ TABCDD = TABCD * J[b2][d2]; for( d3 = 0 ; d3 < size ; d3++ ){ - TAU_START( timeradd ); - Tens = Tens + TABCDD * T[d1][d2][d3]*J[a3][d3]; + TAU_START( timeradd ); + t_start = rdtsc(); + Tens = Tens + TABCDD * T[d1][d2][d3]*J[a3][d3]; + t_end = rdtsc(); + TAU_STOP( timeradd ); #ifdef TAUPROF - std::cout << "add " << getTimeSpent( timeradd ) << " len " << Tens.nops() << std::endl; + // std::cout << "add " << getTimeSpent( timeradd ) << " len " << Tens.nops() << std::endl; + printf( "add %lf %lu len %d\n", getTimeSpent( timeradd ), t_end - t_start, Tens.nops() ); - std::cout << Tens << std::endl; + // std::cout << Tens << std::endl; #endif // TAUPROF } }