diff --git a/src/profiling.cpp b/src/profiling.cpp
index c2f880c1e485b4ca7d80b806dd29b72ddab2bd59..8ba041f565964828dbfcc2e2041e782450618812 100644
--- a/src/profiling.cpp
+++ b/src/profiling.cpp
@@ -2,6 +2,7 @@
 #include "profiling.h"
 
 #ifdef TAUPROF
+double previous = 0;
 double getTimeSpent( const char* cnt ){
     
     const char **inFuncs;
@@ -16,17 +17,10 @@ double getTimeSpent( const char* cnt ){
     int numOfFunctions;
     const char ** functionList;
     int i;
-    
-    TAU_GET_FUNC_NAMES(functionList, numOfFunctions);
-    
+
+    numOfFunctions = 1;
     inFuncs = (const char **) malloc(sizeof(const char *) * numOfFunctions );
-    
-    for( i = 0 ; i < numOfFunctions ; i++ ) {
-        inFuncs[i] = functionList[i];
-    }
-        
-    //Just to show consistency.
-    TAU_DB_DUMP();
+    inFuncs[0] = cnt;
     
     TAU_GET_FUNC_VALS( inFuncs, numOfFunctions,
                        counterExclusiveValues,
@@ -36,25 +30,29 @@ double getTimeSpent( const char* cnt ){
                        counterNames,
                        numOfCouns );
     
-    TAU_DUMP_FUNC_VALS_INCR( inFuncs,  numOfFunctions );
-    
-    for( i = 0 ; i < numOfFunctions ; i++ ) {
-        if( 0 == strcmp( cnt, inFuncs[i] ) ) {
-            TAU_DB_DUMP_INCR();
-            return counterExclusiveValues[i][0];
-        }
+    i = 0;
+    if( 0 == strcmp( cnt, inFuncs[i] ) ) {
+      free( inFuncs );
+      /*      double dur = numOfCalls[i] - previous;
+	      previous = numOfCalls[i];*/
+      double dur = counterExclusiveValues[i][0] - previous;
+      previous = counterExclusiveValues[i][0];
+      return dur;
     }
     
-    TAU_DB_DUMP_INCR();
-    
     std::cerr << "Timer " << cnt << " not found" << std::endl;
+    free( inFuncs );
     return -1;
 }
-
-
-
 #else
 double getTimeSpent( const char* cnt ){
     return 0;
 }
 #endif // def TAUPROF
+
+uint64_t rdtsc(){
+    unsigned int lo,hi;
+    __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
+    return ((uint64_t)hi << 32) | lo;
+}
+ 
diff --git a/src/profiling.h b/src/profiling.h
index 11080f00fa4053d3625dc9fbda0097b5edad3e91..47e86dd54404c2025730934dc535e449dfb33ffb 100644
--- a/src/profiling.h
+++ b/src/profiling.h
@@ -8,6 +8,7 @@
 #define TAU_STOP( a ) do { ;; } while( 0 );
 #endif // TAUPROF
 
-double getTimeSpent( const char* cnt );
+double getTimeSpent( const char* );
+uint64_t rdtsc( void );
 
 #endif // _PROFILING_H_
diff --git a/src/sequential.cpp b/src/sequential.cpp
index 0c3f1f5051c70839faed7fbaf640b328d24d9088..855104a7f570fe315a8f9b61fb1a463509afb7e0 100644
--- a/src/sequential.cpp
+++ b/src/sequential.cpp
@@ -25,6 +25,7 @@ gi::ex multiply_seq( tensor3D_t& T, matrix_int_t& J, int size ) {  // simpler: s
     const char timerB[] = "B";
     const char timeradd[] = "add";
     double timeA, timeB;
+    uint64_t t_start, t_end;
 
     for( a1 = 0 ; a1 < size; a1++ ){
 		i=i+1; 
@@ -41,7 +42,7 @@ gi::ex multiply_seq( tensor3D_t& T, matrix_int_t& J, int size ) {  // simpler: s
 					TAB = J[a1][b1]; 
 					for( b2 = 0 ; b2 < size ; b2++ ){
 						for( b3 = 0 ; b3 < size ; b3++ ){
-                            TAU_START( timerB );
+						  TAU_START( timerB );
                            /* Beyond this point, b1 is not used anymore */
 							TABB =  TAB * A*T[b1][b2][b3];
 							for( c1 = 0 ; c1 < size ; c1++ ){
@@ -54,13 +55,17 @@ gi::ex multiply_seq( tensor3D_t& T, matrix_int_t& J, int size ) {  // simpler: s
 											for( d2 = 0 ; d2 < size ; d2++ ){
 												TABCDD = TABCD * J[b2][d2];
 												for( d3 = 0 ; d3 < size ; d3++ ){
-                                                    TAU_START( timeradd );
-													Tens = Tens + TABCDD * T[d1][d2][d3]*J[a3][d3];
+												  TAU_START( timeradd );
+												  t_start = rdtsc();
+												  Tens = Tens + TABCDD * T[d1][d2][d3]*J[a3][d3];
+												  t_end = rdtsc();
+												  
                                                     TAU_STOP( timeradd );
 #ifdef TAUPROF
-                                                    std::cout << "add " << getTimeSpent( timeradd ) << " len " << Tens.nops() << std::endl;
+						    //                                                    std::cout << "add " << getTimeSpent( timeradd ) << " len " << Tens.nops() << std::endl;
+                                                    printf( "add %lf %lu len %d\n", getTimeSpent( timeradd ), t_end - t_start, Tens.nops() );
 
-                                                    std::cout << Tens << std::endl;
+						    //                                                    std::cout << Tens << std::endl;
 #endif // TAUPROF
    												}
                                             }