diff --git a/src/Makefile b/src/Makefile
index 027747984d52f208ba62129843893af67ba2c125..b79136cbe5ad82ff19fbf69655a9fdb8924114e9 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -1,16 +1,34 @@
+TAU = 0
+
 CXX = g++
 MPICXX = mpic++
 LD = g++
 MPILD = mpic++
-CFLAGS = -Wall -g -O3  -Wno-unused-variable -std=c++11
-LDOPT = -lginac
+
+ifeq ($(TAU),1)
+	TAUDIR = $(HOME)/logiciels/tau-2.29/x86_64
+	TAUOPT = -DTAUPROF -I$(TAUDIR)/../include
+	TAULIB = -L$(TAUDIR)/lib -lTAU
+
+	MPICXX = tau_cxx.sh
+	CXX = tau_cxx.sh
+	LD = tau_cxx.sh
+	MPILD = tau_cxx.sh
+#	export TAU_MAKEFILE=$(TAUDIR)/lib/Makefile.tau-papi-mpi-pdt-openmp
+else
+	TAUOPT =
+	TAULIB =
+endif
+
+CFLAGS = -Wall -g -O3  -Wno-unused-variable -std=c++11 $(TAUOPT)
+LDOPT = -lginac $(TAULIB)
 
 MPIEXEC = mpiexec
 NP = 5
 
 MPISRC = hierarchical.cpp  masterworker.cpp  mw_addslave.cpp \
          perf.cpp  sequential.cpp  tensormatrix_mpi.cpp      \
-         utils.cpp  utils_parall.cpp
+         utils.cpp  utils_parall.cpp profiling.cpp
 MPIOBJ= $(MPISRC:.cpp=.o)
 
 
diff --git a/src/profiling.cpp b/src/profiling.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..dc165f44071e7bd9048c3dea6228eea0230c42ec
--- /dev/null
+++ b/src/profiling.cpp
@@ -0,0 +1,59 @@
+#include <iostream>
+#include "profiling.h"
+
+#ifdef TAUPROF
+double getTimeSpent( const char* cnt ){
+    
+    const char **inFuncs;
+    /* The first dimension is functions, and the 
+       second dimension is counters */
+    double **counterExclusiveValues;
+    double **counterInclusiveValues;
+    int *numOfCalls;
+    int *numOfSubRoutines;
+    const char **counterNames;
+    int numOfCouns;
+    int numOfFunctions;
+    const char ** functionList;
+    int i;
+    
+    TAU_GET_FUNC_NAMES(functionList, numOfFunctions);
+    
+    inFuncs = (const char **) malloc(sizeof(const char *) * numOfFunctions );
+    
+    for( i = 0 ; i < numOfFunctions ; i++ ) {
+        inFuncs[i] = functionList[i];
+    }
+        
+    //Just to show consistency.
+    TAU_DB_DUMP();
+    
+    TAU_GET_FUNC_VALS( inFuncs, numOfFunctions,
+                       counterExclusiveValues,
+                       counterInclusiveValues,
+                       numOfCalls,
+                       numOfSubRoutines,
+                       counterNames,
+                       numOfCouns );
+    
+    TAU_DUMP_FUNC_VALS_INCR( inFuncs,  numOfFunctions );
+    
+    for( i = 0 ; i < numOfFunctions ; i++ ) {
+        if( 0 == strcmp( cnt, inFuncs[i] ) ) {
+            return counterExclusiveValues[i][0];
+        }
+    }
+    
+    TAU_DB_DUMP_INCR();
+    
+    std::cerr << "Timer " << cnt << " not found" << std::endl;
+    return -1;
+}
+
+
+
+#else
+double getTimeSpent( const char* cnt ){
+    return 0;
+}
+#endif // def TAUPROF
diff --git a/src/profiling.h b/src/profiling.h
new file mode 100644
index 0000000000000000000000000000000000000000..11080f00fa4053d3625dc9fbda0097b5edad3e91
--- /dev/null
+++ b/src/profiling.h
@@ -0,0 +1,13 @@
+#ifndef _PROFILING_H_
+#define _PROFILING_H_
+
+#ifdef TAUPROF
+#include <TAU.h>
+#else
+#define TAU_START( a ) do { ;; }while( 0 );
+#define TAU_STOP( a ) do { ;; } while( 0 );
+#endif // TAUPROF
+
+double getTimeSpent( const char* cnt );
+
+#endif // _PROFILING_H_
diff --git a/src/sequential.cpp b/src/sequential.cpp
index f7933d9fa0bca85c0fd34169cfce7e7b0bcc3ccd..0c3f1f5051c70839faed7fbaf640b328d24d9088 100644
--- a/src/sequential.cpp
+++ b/src/sequential.cpp
@@ -1,5 +1,6 @@
 #include <ginac/ginac.h>
 
+#include "profiling.h"
 #include "tensormatrix.h"
 #include "products.h"
 
@@ -19,23 +20,30 @@ gi::ex multiply_seq( tensor3D_t& T, matrix_int_t& J, int size ) {  // simpler: s
  	int i, j;
     i = 0;
     j = 0;
+    
+    const char timerA[] = "A";
+    const char timerB[] = "B";
+    const char timeradd[] = "add";
+    double timeA, timeB;
 
     for( a1 = 0 ; a1 < size; a1++ ){
 		i=i+1; 
         //  std::cout << "Tens: " << Tens << std::endl;
-		printf("i = %d\n", i); 
+        //	printf("i = %d\n", i); 
 		for( a2 = 0; a2 < size ; a2++ ){
 			j=j+1; 
-			printf("j = %d\n", j);
+            //			printf("j = %d\n", j);
 			for( a3 = 0 ; a3 < size ; a3++ ){
+                TAU_START( timerA );
 				A = T[a1][a2][a3];
                 /* Beyond this point, a2 and a3 are only used in the simplectic matrix */
 				for( b1 = 0 ; b1 < size ; b1++ ){
 					TAB = J[a1][b1]; 
 					for( b2 = 0 ; b2 < size ; b2++ ){
 						for( b3 = 0 ; b3 < size ; b3++ ){
-							TABB =  TAB * A*T[b1][b2][b3];
+                            TAU_START( timerB );
                            /* Beyond this point, b1 is not used anymore */
+							TABB =  TAB * A*T[b1][b2][b3];
 							for( c1 = 0 ; c1 < size ; c1++ ){
 								for( c2 = 0 ; c2 < size ; c2++ ){
 									TABC = TABB * J[a2][c2];
@@ -46,16 +54,31 @@ gi::ex multiply_seq( tensor3D_t& T, matrix_int_t& J, int size ) {  // simpler: s
 											for( d2 = 0 ; d2 < size ; d2++ ){
 												TABCDD = TABCD * J[b2][d2];
 												for( d3 = 0 ; d3 < size ; d3++ ){
+                                                    TAU_START( timeradd );
 													Tens = Tens + TABCDD * T[d1][d2][d3]*J[a3][d3];
-												}
+                                                    TAU_STOP( timeradd );
+#ifdef TAUPROF
+                                                    std::cout << "add " << getTimeSpent( timeradd ) << " len " << Tens.nops() << std::endl;
+
+                                                    std::cout << Tens << std::endl;
+#endif // TAUPROF
+   												}
                                             }
 										}
 									}
 								}
 							}
+                            TAU_STOP( timerB );
+#ifdef TAUPROF
+                            std::cout << "B " << getTimeSpent( timeradd ) << " len " << Tens.nops() << std::endl;
+#endif // TAUPROF
 						}
                     }
 				}
+                TAU_STOP( timerA );
+#ifdef TAUPROF
+                std::cout << "A " << getTimeSpent( timeradd ) << " len " << Tens.nops() << std::endl;
+#endif // TAUPROF
 			}
 		}
 	}
@@ -76,24 +99,32 @@ gi::ex multiply_1level( tensor3D_t& T, matrix_int_t& J, int size ) {  // simpler
  	int i, j;
     i = 0;
     j = 0;
+
+    double time;
     
     for( a1 = 0 ; a1 < size; a1++ ){
         i=i+1; 
         //   std::cout << "Tens: " << Tens << std::endl;
-		printf("i = %d\n", i); 
+        //		printf("i = %d\n", i); 
 		for( a2 = 0; a2 < size ; a2++ ){
 			j=j+1; 
-			printf("j = %d\n", j);
+            //	printf("j = %d\n", j);
 			for( a3 = 0 ; a3 < size ; a3++ ){
+                TAU_START( "b" );
 				A = T[a1][a2][a3];
                 /* Beyond this point, a2 and a3 are only used in the simplectic matrix */
 				for( b1 = 0 ; b1 < size ; b1++ ){
                     Tn = one_level1_product( &T, &J, A, size, a1, a2, a3, b1 );
                     Tens += Tn;
                 }
+                TAU_STOP( "b" );
+#ifdef TAUPROF
+                time = getTimeSpent( "b" );
+#endif // TAUPROF                
 			}
 		}
 	}
+    
     return Tens;
 }
 
@@ -103,13 +134,15 @@ gi::ex one_level1_product( tensor3D_t* T, matrix_int_t *J, gi::ex A, int size, i
 
     gi::ex Tens = 0;
     gi::ex T0, T1, T2, T3, T4, T5;
-    
+    const char timerB[] = "B";
+
     TAB = (*J)[a1][b1]; 
     for( b2 = 0 ; b2 < size ; b2++ ){
         for( b3 = 0 ; b3 < size ; b3++ ){
             TABB =  TAB * A*(*T)[b1][b2][b3];
             T5 = 0;
             /* Beyond this point, b1 is not used anymore */
+            TAU_START( timerB );
             for( c1 = 0 ; c1 < size ; c1++ ){
                 T4 = 0;
                 for( c2 = 0 ; c2 < size ; c2++ ){
@@ -138,6 +171,7 @@ gi::ex one_level1_product( tensor3D_t* T, matrix_int_t *J, gi::ex A, int size, i
                 T5 += T4;
             }
             Tens += T5;
+            TAU_STOP( timerB );
         }
     }