Fixed the TAU profiling function, added rdtsc

42445948 · Camille Coti · 9b9420a6 · 42445948 · 42445948 · 42445948
Commit 42445948 authored 5 years ago by Camille Coti
--- a/src/profiling.cpp
+++ b/src/profiling.cpp
@@ -2,6 +2,7 @@
 #include "profiling.h"

 #ifdef TAUPROF
+double previous = 0;
 double getTimeSpent( const char* cnt ){
    
    const char **inFuncs;
@@ -16,17 +17,10 @@ double getTimeSpent( const char* cnt ){
    int numOfFunctions;
    const char ** functionList;
    int i;
-    
-    TAU_GET_FUNC_NAMES(functionList, numOfFunctions);
-    
+
+    numOfFunctions = 1;
    inFuncs = (const char **) malloc(sizeof(const char *) * numOfFunctions );
-    
-    for( i = 0 ; i < numOfFunctions ; i++ ) {
-        inFuncs[i] = functionList[i];
-    }
-        
-    //Just to show consistency.
-    TAU_DB_DUMP();
+    inFuncs[0] = cnt;
    
    TAU_GET_FUNC_VALS( inFuncs, numOfFunctions,
                       counterExclusiveValues,
@@ -36,25 +30,29 @@ double getTimeSpent( const char* cnt ){
                       counterNames,
                       numOfCouns );
    
-    TAU_DUMP_FUNC_VALS_INCR( inFuncs,  numOfFunctions );
-    
-    for( i = 0 ; i < numOfFunctions ; i++ ) {
-        if( 0 == strcmp( cnt, inFuncs[i] ) ) {
-            TAU_DB_DUMP_INCR();
-            return counterExclusiveValues[i][0];
-        }
+    i = 0;
+    if( 0 == strcmp( cnt, inFuncs[i] ) ) {
+      free( inFuncs );
+      /*      double dur = numOfCalls[i] - previous;
+	      previous = numOfCalls[i];*/
+      double dur = counterExclusiveValues[i][0] - previous;
+      previous = counterExclusiveValues[i][0];
+      return dur;
    }
    
-    TAU_DB_DUMP_INCR();
-    
    std::cerr << "Timer " << cnt << " not found" << std::endl;
+    free( inFuncs );
    return -1;
 }
-
-
-
 #else
 double getTimeSpent( const char* cnt ){
    return 0;
 }
 #endif // def TAUPROF
+
+uint64_t rdtsc(){
+    unsigned int lo,hi;
+    __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
+    return ((uint64_t)hi << 32) | lo;
+}
+ 
--- a/src/profiling.h
+++ b/src/profiling.h
@@ -8,6 +8,7 @@
 #define TAU_STOP( a ) do { ;; } while( 0 );
 #endif // TAUPROF

-double getTimeSpent( const char* cnt );
+double getTimeSpent( const char* );
+uint64_t rdtsc( void );

 #endif // _PROFILING_H_
--- a/src/sequential.cpp
+++ b/src/sequential.cpp
@@ -25,6 +25,7 @@ gi::ex multiply_seq( tensor3D_t& T, matrix_int_t& J, int size ) {  // simpler: s
    const char timerB[] = "B";
    const char timeradd[] = "add";
    double timeA, timeB;
+    uint64_t t_start, t_end;

    for( a1 = 0 ; a1 < size; a1++ ){
 		i=i+1; 
@@ -41,7 +42,7 @@ gi::ex multiply_seq( tensor3D_t& T, matrix_int_t& J, int size ) {  // simpler: s
 					TAB = J[a1][b1]; 
 					for( b2 = 0 ; b2 < size ; b2++ ){
 						for( b3 = 0 ; b3 < size ; b3++ ){
-                            TAU_START( timerB );
+						  TAU_START( timerB );
                           /* Beyond this point, b1 is not used anymore */
 							TABB =  TAB * A*T[b1][b2][b3];
 							for( c1 = 0 ; c1 < size ; c1++ ){
@@ -54,13 +55,17 @@ gi::ex multiply_seq( tensor3D_t& T, matrix_int_t& J, int size ) {  // simpler: s
 											for( d2 = 0 ; d2 < size ; d2++ ){
 												TABCDD = TABCD * J[b2][d2];
 												for( d3 = 0 ; d3 < size ; d3++ ){
-                                                    TAU_START( timeradd );
-													Tens = Tens + TABCDD * T[d1][d2][d3]*J[a3][d3];
+												  TAU_START( timeradd );
+												  t_start = rdtsc();
+												  Tens = Tens + TABCDD * T[d1][d2][d3]*J[a3][d3];
+												  t_end = rdtsc();
+												  
                                                    TAU_STOP( timeradd );
 #ifdef TAUPROF
-                                                    std::cout << "add " << getTimeSpent( timeradd ) << " len " << Tens.nops() << std::endl;
+						    //                                                    std::cout << "add " << getTimeSpent( timeradd ) << " len " << Tens.nops() << std::endl;
+                                                    printf( "add %lf %lu len %d\n", getTimeSpent( timeradd ), t_end - t_start, Tens.nops() );

-                                                    std::cout << Tens << std::endl;
+						    //                                                    std::cout << Tens << std::endl;
 #endif // TAUPROF
   												}
                                            }