diff --git a/src/profiling.cpp b/src/profiling.cpp
index 8ba041f565964828dbfcc2e2041e782450618812..de8c1524d3b1955efd0706386c8a97d99d21f3f7 100644
--- a/src/profiling.cpp
+++ b/src/profiling.cpp
@@ -3,6 +3,31 @@
 
 #ifdef TAUPROF
 double previous = 0;
+
+double getTimeSpent( void* ptr ){
+     
+  long calls, childcalls;
+  double incl[TAU_MAX_COUNTERS], excl[TAU_MAX_COUNTERS];
+  const char **counters;
+  int numcounters, j; 
+
+  TAU_PROFILER_GET_CALLS(ptr, &calls);
+  TAU_PROFILER_GET_CHILD_CALLS(ptr, &childcalls);
+  TAU_PROFILER_GET_INCLUSIVE_VALUES(ptr, &incl);
+  TAU_PROFILER_GET_EXCLUSIVE_VALUES(ptr, &excl);
+
+  TAU_PROFILER_GET_COUNTER_INFO(&counters, &numcounters);
+  printf("Calls = %ld, child = %ld\n", calls, childcalls);
+  printf("numcounters = %d\n", numcounters);
+  for (j = 0; j < numcounters ; j++)  {
+    printf(">>>");
+    printf("counter [%d] = %s\n", j, counters[j]);
+    printf(" excl [%d] = %g, incl [%d] = %g\n", j, excl[j], j, incl[j]);
+  }
+  return incl[0];
+}
+
+
 double getTimeSpent( const char* cnt ){
     
     const char **inFuncs;
diff --git a/src/profiling.h b/src/profiling.h
index 47e86dd54404c2025730934dc535e449dfb33ffb..d06538febcc84214df882e854513a350d0f779a5 100644
--- a/src/profiling.h
+++ b/src/profiling.h
@@ -8,6 +8,7 @@
 #define TAU_STOP( a ) do { ;; } while( 0 );
 #endif // TAUPROF
 
+double getTimeSpent( void* );
 double getTimeSpent( const char* );
 uint64_t rdtsc( void );
 
diff --git a/src/sequential.cpp b/src/sequential.cpp
index 0aa406557902abdeae64b064047c47ee88a19649..1f81cd4414fe339fd7807ef7a82eca7b67f346c5 100644
--- a/src/sequential.cpp
+++ b/src/sequential.cpp
@@ -106,7 +106,12 @@ gi::ex multiply_1level( tensor3D_t& T, matrix_int_t& J, int size ) {  // simpler
     j = 0;
 
     double time;
-    
+
+#ifdef TAUPROF
+    void* ptr;
+    TAU_PROFILER_CREATE( ptr, "b","", TAU_USER );
+#endif
+
     for( a1 = 0 ; a1 < size; a1++ ){
         i=i+1; 
         //   std::cout << "Tens: " << Tens << std::endl;
@@ -115,17 +120,40 @@ gi::ex multiply_1level( tensor3D_t& T, matrix_int_t& J, int size ) {  // simpler
 			j=j+1; 
             //	printf("j = %d\n", j);
 			for( a3 = 0 ; a3 < size ; a3++ ){
-			  //                TAU_START( "b" );
+#ifdef TAUPROF
+                TAU_START( "b" );
+                TAU_PROFILER_START( ptr );
+#endif
 				A = T[a1][a2][a3];
                 /* Beyond this point, a2 and a3 are only used in the simplectic matrix */
 				for( b1 = 0 ; b1 < size ; b1++ ){
                     Tn = one_level1_product( &T, &J, A, size, a1, a2, a3, b1 );
                     Tens += Tn;
                 }
-				//                TAU_STOP( "b" );
-				//#ifdef TAUPROF
-				#if 0
-                time = getTimeSpent( "b" );
+#ifdef TAUPROF
+                TAU_STOP( "b" );
+                //                time = getTimeSpent( ptr );
+                
+                  long calls, childcalls;
+  double incl[TAU_MAX_COUNTERS], excl[TAU_MAX_COUNTERS];
+  const char **counters;
+  int numcounters, i, j; 
+
+  TAU_PROFILER_GET_CALLS(ptr, &calls);
+  TAU_PROFILER_GET_CHILD_CALLS(ptr, &childcalls);
+  TAU_PROFILER_GET_INCLUSIVE_VALUES(ptr, &incl);
+  TAU_PROFILER_GET_EXCLUSIVE_VALUES(ptr, &excl);
+
+  TAU_PROFILER_GET_COUNTER_INFO(&counters, &numcounters);
+  printf("Calls = %ld, child = %ld\n", calls, childcalls);
+  printf("numcounters = %d\n", numcounters);
+  for (j = 0; j < numcounters ; j++)  {
+    printf(">>>");
+    printf("counter [%d] = %s\n", j, counters[j]);
+    printf(" excl [%d] = %g, incl [%d] = %g\n", j, excl[j], j, incl[j]);
+  }
+
+//                time = getTimeSpent( "b" );
 #endif // TAUPROF                
 			}
 		}
diff --git a/src/tensormatrix_mpi.cpp b/src/tensormatrix_mpi.cpp
index e72db52db810e224783ab809487347aa1a41813c..e1ec0feb076237d3ee08ff11466ca8721039e046 100644
--- a/src/tensormatrix_mpi.cpp
+++ b/src/tensormatrix_mpi.cpp
@@ -11,6 +11,10 @@
 #include "tensormatrix.h"
 #include "utils.h"
 
+#ifdef TAUPROF
+#include <TAU.h>
+#endif
+
 #define DEBUG 0
 
 namespace gi = GiNaC;
@@ -59,7 +63,12 @@ int main( int argc, char** argv ){
 
     double tv_start, tv_para, tv_seq;
     int rank;
-
+    
+#ifdef TAUPROF
+    TAU_INIT(&argc, &argv);
+    TAU_PROFILE_SET_NODE(0);
+#endif
+    
     MPI_Init( &argc, &argv );
     MPI_Comm_rank( MPI_COMM_WORLD, &rank );
     //    std::cout << "Process " << rank << " has pid " << getpid() << std::endl;