Commit 5fd5144a authored by Daniel Torres's avatar Daniel Torres
Browse files

Initial commit

parent 5bacaf94
ifdef TAU_EXEC
MPICC?=$(TAU)/bin/tau_cc.sh
else
MPICC?=mpicc
endif
CFLAGS?=-g -O3 -no-pie
FLAGS?=-lm -lopenblas
OBJECTS=ft_tslu.o matrix_data.o lapack.o mpi_data.o matrix_util.o util.o timers.o
DIRLIB=../lib/
LIBS=${DIRLIB}libfttslu.a
INCLIB=../include/
HEADERS=ft_tslu.h matrix_data.h lapack.h mpi_data.h matrix_util.h util.h timers.h
HEADS=${INCLIB}ft_tslu.h ${INCLIB}matrix_data.h ${INCLIB}lapack.h ${INCLIB}mpi_data.h ${INCLIB}matrix_util.h ${INCLIB}util.h ${INCLIB}timers.h
all: ${OBJECTS} ${LIBS} ${HEADS}
ft_tslu.o: ft_tslu.c ft_tslu.h
${MPICC} ${OMPI} ${TAU_EXEC} ${CFLAGS} -c $< -o $@ ${FLAGS}
matrix_data.o: matrix_data.c matrix_data.h
${MPICC} ${OMPI} ${TAU_EXEC} ${CFLAGS} -c $< -o $@ ${FLAGS}
lapack.o: lapack.c lapack.h
${MPICC} ${OMPI} ${TAU_EXEC} ${CFLAGS} -c $< -o $@ ${FLAGS}
mpi_data.o: mpi_data.c mpi_data.h
${MPICC} ${OMPI} ${TAU_EXEC} ${CFLAGS} -c $< -o $@ ${FLAGS}
matrix_util.o: matrix_util.c matrix_util.h
${MPICC} ${OMPI} ${TAU_EXEC} ${CFLAGS} -c $< -o $@ ${FLAGS}
util.o: util.c util.h
${MPICC} ${OMPI} ${TAU_EXEC} ${CFLAGS} -c $< -o $@ ${FLAGS}
timers.o: timers.c timers.h
${MPICC} ${OMPI} ${TAU_EXEC} ${CFLAGS} -c $< -o $@ ${FLAGS}
${LIBS}: ${OBJECTS}
mkdir -p ${DIRLIB}; ar rc $@ $^; ranlib $@
${HEADS}:
mkdir -p ${INCLIB}; cp ${HEADERS} ${INCLIB}
clean:
rm -rf *.o
This diff is collapsed.
#ifndef _FT_TSLU_H_
#define _FT_TSLU_H_
#include "lapack.h"
#include "mpi_data.h"
#include "matrix_data.h"
#define WAIT_TIME 10000
#define MU_POS 0
#define NU_POS 1
#define MAX_CONCAT_SIZES 2
typedef struct _fttslu_data{
matrix_data *md;
MPI_data *mpid;
}fttslu_data;
int ft_tslu_init(fttslu_data *ftd,double *A,int M,int N,int l_c,char *in,int argc,char **argv);
int ft_tslu_spawned(fttslu_data *ftd);
int ft_tslu(fttslu_data *ftd,int l_c);
int restore_data(fttslu_data *ftd);
int first_middle_step(matrix_data *md_sub,matrix_data *md_subR,MPI_data *mpid);
int last_step(matrix_data *md_sub);
void backup_results(matrix_data *md,matrix_data *md_sub,int index,matrix_data *md_subR,int indexD,char up);
void concatenate_matrices(matrix_data *md_sub,matrix_data *md_subR,int step,char up);
void free_fttslu_data(fttslu_data *ftd);
void init_variables(int l_c,int world_size);
int concat_sizes[MAX_CONCAT_SIZES];
int l_calc;
int step_lim;
int MU,NU;
double *concatU;
#ifndef _OMPI_
int MU_prev,NU_prev;
double *concatU_prev;
double *concatU_ptr;
#endif /* _OMPI_ */
#endif /* _FT_TSLU_H_ */
#include "lapack.h"
void lu_matrix(double *A,int M,int N,int *IPIV){
int LDA=M;
int INFO=0;
#ifdef _TAU_
TAU_START(TAU_time_tslu);
#endif /* _TAU_ */
time_tslu=MPI_Wtime();
dgetrf_(&M,&N,A,&LDA,IPIV,&INFO);
time_tslu_final+=MPI_Wtime()-time_tslu;
#ifdef _TAU_
TAU_STOP(TAU_time_tslu);
#endif /* _TAU_ */
}
void mult_matrix(double *A,int MA,int NA,double *B,int MB,int NB,double *C,int index){
char TRANSA='N';
char TRANSB='N';
double ALPHA=1.0;
double BETA=1.0;
int LDAA=MA;
int LDAB=MB;
int LDAC=MA;
#ifdef _TAU_
TAU_START(TAU_time_mult);
#endif /* _TAU_ */
time_mult=MPI_Wtime();
dgemm_(&TRANSA,&TRANSB,&MA,&NB,&NA,&ALPHA,A,&LDAA,B+index,&LDAB,&BETA,C,&LDAC);
time_mult_final+=MPI_Wtime()-time_mult;
#ifdef _TAU_
TAU_STOP(TAU_time_mult);
#endif /* _TAU_ */
}
void solve_matrix(double *A,double *U,int M,int N){
char SIDE='R';
char UPLO='U';
char TRANSA='N';
char DIAG='N';
double ALPHA=1.0;
int LDA=M;
int LDB=M;
#ifdef _TAU_
TAU_START(TAU_time_solv);
#endif /* _TAU_ */
time_solv=MPI_Wtime();
dtrsm_(&SIDE,&UPLO,&TRANSA,&DIAG,&M,&N,&ALPHA,U,&LDA,A,&LDB);
time_solv_final+=MPI_Wtime()-time_solv;
#ifdef _TAU_
TAU_STOP(TAU_time_solv);
#endif /* _TAU_ */
}
void inv_matrix(double *A,int M,int N,int *IPIV){
int LDA=M;
int INFO=0;
int LWORK=N*N;
double *WORK=(double *)malloc(LWORK*sizeof(double));
#ifdef _TAU_
TAU_START(TAU_time_inv);
#endif /* _TAU_ */
time_inv=MPI_Wtime();
dgetri_(&N,A,&LDA,IPIV,WORK,&LWORK,&INFO);
time_inv_final+=MPI_Wtime()-time_inv;
#ifdef _TAU_
TAU_STOP(TAU_time_inv);
#endif /* _TAU_ */
free(WORK);
}
void add_matrix(double *A,double *B,int M,int N,char op){
double DA=1.0;
int INCX=1;
int INCY=1;
int MN=M*N;
int i,j;
#ifdef _TAU_
TAU_START(TAU_time_add);
#endif /* _TAU_ */
time_add=MPI_Wtime();
if(op=='S'){
for(i=0;i<M;i++)
for(j=0;j<N;j++)
B[j*M+i]*=-1.0;
}
daxpy_(&MN,&DA,A,&INCX,B,&INCY);
time_add_final+=MPI_Wtime()-time_add;
#ifdef _TAU_
TAU_STOP(TAU_time_add);
#endif /* _TAU_ */
}
void rand_matrix(double *A,int M,int N,int r){
int seed[4]={0,0,0,2*r+1};
int ONE=1;
int MN=M*N;
dlarnv_(&ONE,seed,&MN,A);
}
void swap_sub_block(double *A,int MA,int index,int M,int N,int *IPIV,int INCX){
int LDA=M;
int K1=1;
int K2=min(M,N);
double *sub=(double *)calloc(M*N,sizeof(double));
get_sub_block(A,MA,index,sub,M,N);
#ifdef _TAU_
TAU_START(TAU_time_swap);
#endif /* _TAU_ */
time_swap=MPI_Wtime();
dlaswp_(&N,sub,&LDA,&K1,&K2,IPIV,&INCX);
time_swap_final+=MPI_Wtime()-time_swap;
#ifdef _TAU_
TAU_STOP(TAU_time_swap);
#endif /* _TAU_ */
set_sub_block(A,MA,index,sub,M,N);
free(sub);
}
int random_number(){
int i;
int SIZE=100;
int IDIST=1;
int SEED_SIZE=4;
int SEED[SEED_SIZE];
int SEED_MAX=4096;
int sum=0;
for(i=0;i<SEED_SIZE;i++)
SEED[i]=rand()%SEED_MAX;
SEED[SEED_SIZE-1]=SEED[SEED_SIZE-1]+((SEED[SEED_SIZE-1]&1) ? 0 : 1);
double *X=(double *)malloc(SIZE*sizeof(double));
dlarnv_(&IDIST,SEED,&SIZE,X);
for(i=0;i<SIZE;i++)
sum+=(int)floor(X[i]*SIZE*SEED_MAX);
free(X);
return(sum%SIZE);
}
#ifndef _LAPACK_H_
#define _LAPACK_H_
#include "util.h"
#include "matrix_util.h"
void lu_matrix(double *A,int M,int N,int *IPIV);
void mult_matrix(double *A,int MA,int NA,double *B,int MB,int NB,double *C,int index);
void solve_matrix(double *A,double *U,int M,int N);
void inv_matrix(double *A,int M,int N,int *IPIV);
void add_matrix(double *A,double *B,int M,int N,char op);
void rand_matrix(double *A,int M,int N,int r);
void swap_sub_block(double *A,int MA,int index,int M,int N,int *IPIV,int INCX);
int random_number();
void dgetrf_(int *M,int *N,double *A,int *LDA,int *IPIV, int *INFO);
void dgemm_(char *TRANSA,char *TRANSB,int *M,int *N,int *K,double *ALPHA,double *A,int *LDA,double *B,int *LDB,double *BETA,double *C,int *LDC);
void dtrsm_(char *SIDE,char *UPLO,char *TRANSA,char *DIAG,int *M,int *N,double *ALPHA,double *A,int *LDA,double *B,int *LDB);
void dgetri_(int *N,double *A,int *LDA,int *IPIV,double *WORK,int *LWORK,int *INFO);
void daxpy_(int *N,double *DA,double *DX,int *INCX,double *DY,int *INCY);
void dlarnv_(int *IDIST,int *ISEED,int *N,double *X);
void dlaswp_(int *N,double *A,int *LDA,int *K1,int *K2,int *IPIV,int *INCX);
#endif /* _LAPACK_H_ */
#include "matrix_data.h"
matrix_data *matrix_data_init_with_random(int M,int N,int Mb,int Nb,int rank,int copy){
matrix_data *m=matrix_data_init(M,N,Mb,Nb,copy);
rand_matrix(m->A,M,N,rank);
if(copy)
set_A_init(m);
return(m);
}
matrix_data *matrix_data_init_with_file(int M,int N,int Mb,int Nb,char *file,int copy){
FILE *in=fopen(file,"r");
if(in==NULL)
return(NULL);
matrix_data *m=matrix_data_init(M,N,Mb,Nb,copy);
int i,j;
for(i=0;i<m->M;i++)
for(j=0;j<m->N;j++)
fscanf(in,"%lf",&(m->A[j*m->M+i]));
fclose(in);
if(copy)
set_A_init(m);
return(m);
}
matrix_data *matrix_data_init_with_matrix(int M,int N,int Mb,int Nb,double *A,int copy){
matrix_data *m=matrix_data_init(M,N,Mb,Nb,copy);
free(m->A);
m->A=A;
if(copy)
set_A_init(m);
return(m);
}
matrix_data *matrix_data_init(int M,int N,int Mb,int Nb,int copy){
matrix_data *m=(matrix_data *)calloc(1,sizeof(matrix_data));
m->M=M;
m->N=N;
m->A=(double *)calloc(m->M*m->N,sizeof(double));
if(copy)
m->A_init=(double *)calloc(m->M*m->N,sizeof(double));
m->LDA=m->M;
m->IPIV=(int *)calloc(min(m->M,m->N),sizeof(int));
m->INFO=0;
m->U=(double *)calloc(min(m->M,m->N)*min(m->M,m->N),sizeof(double));
m->Mb=Mb;
m->Nb=Nb;
return(m);
}
void matrix_data_free(matrix_data *md){
if(md->U!=NULL)
free(md->U);
if(md->IPIV!=NULL)
free(md->IPIV);
if(md->A_init!=NULL)
free(md->A_init);
if(md->A!=NULL)
free(md->A);
if(md!=NULL)
free(md);
}
void set_U(matrix_data *md){
int i,j,min_md=min(md->M,md->N);
#ifdef _TAU_
TAU_START(TAU_time_copy);
#endif /* _TAU_ */
time_copy=MPI_Wtime();
for(i=0;i<min_md;i++)
for(j=0;j<min_md;j++)
md->U[j*min_md+i]=(j>=i) ? md->A[j*md->M+i] : 0;
time_copy_final+=MPI_Wtime()-time_copy;
#ifdef _TAU_
TAU_STOP(TAU_time_copy);
#endif /* _TAU_ */
}
void set_A(matrix_data *md,int src){
int i,j,min_md=min(md->M,md->N);
#ifdef _TAU_
TAU_START(TAU_time_copy);
#endif /* _TAU_ */
time_copy=MPI_Wtime();
for(i=0;i<md->M;i++)
for(j=0;j<md->N;j++)
md->A[j*md->M+i]=(j<i && src==0) ? 0 : (j<i && src==1) ? md->A_init[j*md->M+i] : md->U[j*min_md+i];
time_copy_final+=MPI_Wtime()-time_copy;
#ifdef _TAU_
TAU_STOP(TAU_time_copy);
#endif /* _TAU_ */
}
void set_A_init(matrix_data *md){
#ifdef _TAU_
TAU_START(TAU_time_copy);
#endif /* _TAU_ */
time_copy=MPI_Wtime();
memcpy(md->A_init,md->A,md->M*md->N*sizeof(double));
time_copy_final+=MPI_Wtime()-time_copy;
#ifdef _TAU_
TAU_STOP(TAU_time_copy);
#endif /* _TAU_ */
}
void print_matrix_data(matrix_data *md){
int i,j,min_md=min(md->M,md->N);
for(i=0;i<md->M;i++){
for(j=0;j<md->N;j++){
if(md->A[j*md->M+i]>=0.0)
printf("+");
printf("%.4lf ",md->A[j*md->M+i]);
}
if(i<min_md){
printf("\t|\t");
for(j=0;j<min_md;j++){
if(md->U[j*min_md+i]>=0.0)
printf("+");
printf("%.4lf ",md->U[j*min_md+i]);
}
}
printf("\n");
}
}
#ifndef _MATRIX_DATA_H_
#define _MATRIX_DATA_H_
#include "lapack.h"
typedef struct _matrix_data{
int M;
int N;
double *A;
double *A_init;
int LDA;
int *IPIV;
int INFO;
double *U;
int Mb;
int Nb;
}matrix_data;
matrix_data *matrix_data_init_with_random(int M,int N,int Mb,int Nb,int rank,int copy);
matrix_data *matrix_data_init_with_file(int M,int N,int Mb,int Nb,char *file,int copy);
matrix_data *matrix_data_init_with_matrix(int M,int N,int Mb,int Nb,double *A,int copy);
matrix_data *matrix_data_init(int M,int N,int Mb,int Nb,int copy);
void matrix_data_free(matrix_data *md);
void set_U(matrix_data *md);
void set_A(matrix_data *md,int src);
void set_A_init(matrix_data *md);
void print_matrix_data(matrix_data *md);
#endif /* _MATRIX_DATA_H_ */
#include "matrix_util.h"
double *concat_U_blocks_first_step(double *A,int MA,int NA,double *B,int MB,int NB,char up){
int M=NA+NB;
int i,j,upperM,lowerM;
double *upper;
double *lower;
double *AB=(double *)calloc(M*NA,sizeof(double));
if(up=='A'){
upperM=MA;
lowerM=MB;
upper=A;
lower=B;
}
else{
upperM=MB;
lowerM=MA;
upper=B;
lower=A;
}
#ifdef _TAU_
TAU_START(TAU_time_copy);
#endif /* _TAU_ */
time_copy=MPI_Wtime();
for(j=0;j<NA;j++){
for(i=0;i<=j;i++){
AB[j*M+i]=upper[j*upperM+i];
AB[j*M+(i+NA)]=lower[j*lowerM+i];
}
}
time_copy_final+=MPI_Wtime()-time_copy;
#ifdef _TAU_
TAU_STOP(TAU_time_copy);
#endif /* _TAU_ */
return(AB);
}
double *concat_U_blocks(double *A,int NA,double *B,int NB,char up){
int M=NA+NB;
int i,upperM,lowerM;
double *upper;
double *lower;
double *AB=(double *)calloc(M*NA,sizeof(double));
if(up=='A'){
upperM=NA;
lowerM=NB;
upper=A;
lower=B;
}
else{
upperM=NB;
lowerM=NA;
upper=B;
lower=A;
}
#ifdef _TAU_
TAU_START(TAU_time_copy);
#endif /* _TAU_ */
time_copy=MPI_Wtime();
for(i=0;i<NA;i++){
memcpy(AB+i*M,upper+i*upperM,upperM*sizeof(double));
memcpy(AB+i*M+upperM,lower+i*lowerM,lowerM*sizeof(double));
}
time_copy_final+=MPI_Wtime()-time_copy;
#ifdef _TAU_
TAU_STOP(TAU_time_copy);
#endif /* _TAU_ */
return(AB);
}
double *concat_U_blocks_horizontal(double *A,int MA,int NA,double *B,int MB,int NB,char l){
int N=NA+NB;
int leftM,rightM,leftN,rightN;
double *left;
double *right;
double *AB=(double *)calloc(MA*N,sizeof(double));
if(l=='A'){
leftM=MA;
rightM=MB;
leftN=NA;
rightN=NB;
left=A;
right=B;
}
else{
leftM=MB;
rightM=MA;
leftN=NB;
rightN=NA;
left=B;
right=A;
}
#ifdef _TAU_
TAU_START(TAU_time_copy);
#endif /* _TAU_ */
time_copy=MPI_Wtime();
memcpy(AB,left,leftM*leftN*sizeof(double));
memcpy(AB+leftM*leftN,right,rightM*rightN*sizeof(double));
time_copy_final+=MPI_Wtime()-time_copy;
#ifdef _TAU_
TAU_STOP(TAU_time_copy);
#endif /* _TAU_ */
return(AB);
}
double *pack_U(double *U,int M,int N){
int i,j,k=0;
int size=N+((int)((N*(N-1))/2));
double *packed=(double *)calloc(size,sizeof(double));
#ifdef _TAU_
TAU_START(TAU_time_copy);
#endif /* _TAU_ */
time_copy=MPI_Wtime();
for(j=0;j<N;j++)
for(i=0;i<=j;i++,k++)
packed[k]=U[j*M+i];
time_copy_final+=MPI_Wtime()-time_copy;
#ifdef _TAU_
TAU_STOP(TAU_time_copy);
#endif /* _TAU_ */
return(packed);
}
double *unpack_U(double *U,int N){
int i,j,k=0;
double *matrix=(double *)calloc(N*N,sizeof(double));
#ifdef _TAU_
TAU_START(TAU_time_copy);
#endif /* _TAU_ */
time_copy=MPI_Wtime();
for(j=0;j<N;j++)
for(i=0;i<=j;i++,k++)
matrix[j*N+i]=U[k];
time_copy_final+=MPI_Wtime()-time_copy;
#ifdef _TAU_
TAU_STOP(TAU_time_copy);
#endif /* _TAU_ */
return(matrix);
}
double *pad_U(double *U,int MU,int M,int N){
int i,j;
double *U_new=(double *)malloc(M*N*sizeof(double));
memset(U_new,0,M*N*sizeof(double));
#ifdef _TAU_
TAU_START(TAU_time_copy);
#endif /* _TAU_ */
time_copy=MPI_Wtime();
for(i=0;i<MU;i++){
for(j=i;j<MU;j++){
U_new[j*M+i]=U[j*MU+i];
}
}
time_copy_final+=MPI_Wtime()-time_copy;
#ifdef _TAU_
TAU_STOP(TAU_time_copy);
#endif /* _TAU_ */
return(U_new);
}
void get_sub_block(double *A,int MA,int index,double *sub,int M,int N){
int sub_tam=M*N;
int count=0;
#ifdef _TAU_
TAU_START(TAU_time_copy);
#endif /* _TAU_ */
time_copy=MPI_Wtime();
while(count<sub_tam){
memcpy(sub+count,A+index,M*sizeof(double));
count+=M;
index+=MA;
}
time_copy_final+=MPI_Wtime()-time_copy;