Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Camille Coti
TensorJoseph
Commits
6d574e28
Commit
6d574e28
authored
Jan 30, 2020
by
Camille Coti
Browse files
parallelize the final addition
parent
e1fcbab3
Changes
8
Hide whitespace changes
Inline
Side-by-side
src/Makefile
View file @
6d574e28
...
...
@@ -30,7 +30,7 @@ MPISRC = masterworker.cpp mw_addslave.cpp hierarchical.cpp \
perf.cpp sequential.cpp tensormatrix_mpi.cpp
\
utils.cpp utils_parall.cpp profiling.cpp mw_combined.cpp
\
masterworker2.cpp mw_addslave2.cpp hierarchical2.cpp
\
masterworker3.cpp mw_addslave3.cpp
masterworker3.cpp mw_addslave3.cpp
mw_addslave4.cpp
MPIOBJ
=
$(MPISRC:.cpp=.o)
...
...
src/masterworker.cpp
View file @
6d574e28
...
...
@@ -31,10 +31,6 @@ gi::ex multiply_1level_master( tensor3D_t& T, unsigned int size, MPI_Comm comm =
expr_c
=
NULL
;
expr_c
=
(
char
*
)
malloc
(
3279
);
// TMP
int
i
,
j
;
i
=
0
;
j
=
0
;
int
receivedresults
=
0
;
unsigned
int
N
=
size
/
2
;
...
...
@@ -45,9 +41,7 @@ gi::ex multiply_1level_master( tensor3D_t& T, unsigned int size, MPI_Comm comm =
/* Build a list of argument sets */
for
(
a4
=
0
;
a4
<
N
;
a4
++
){
i
=
i
+
1
;
for
(
a2
=
0
;
a2
<
N
;
a2
++
){
j
=
j
+
1
;
for
(
a1
=
0
;
a1
<
N
;
a1
++
){
parameters_t
p
(
a4
,
a2
,
a1
);
input
.
push_back
(
p
);
...
...
src/mw_addslave.cpp
View file @
6d574e28
...
...
@@ -42,6 +42,8 @@ gi::ex multiply_1level_master_addslave( tensor3D_t& T, unsigned int size, MPI_Co
std
::
vector
<
parameters_t
>
input
;
std
::
vector
<
std
::
string
>
results
;
/* length and char* */
double
t1
=
getTime
();
/* Build a list of argument sets */
for
(
a4
=
0
;
a4
<
N
;
a4
++
){
...
...
@@ -60,6 +62,8 @@ gi::ex multiply_1level_master_addslave( tensor3D_t& T, unsigned int size, MPI_Co
symbols
=
all_symbols_3D
(
size
);
double
t2
=
getTime
();
/* Distribute the work */
while
(
input
.
size
()
>
0
)
{
...
...
@@ -98,6 +102,8 @@ gi::ex multiply_1level_master_addslave( tensor3D_t& T, unsigned int size, MPI_Co
}
}
double
t3
=
getTime
();
/* Wait until everyone is done */
running
=
np
-
1
;
// all the slaves are running
...
...
@@ -122,9 +128,17 @@ gi::ex multiply_1level_master_addslave( tensor3D_t& T, unsigned int size, MPI_Co
send_add_or_end_addslave
(
results
,
src
,
&
running
);
}
double
t4
=
getTime
();
/* Add whatever I have left */
Tens
=
add_expressions
(
results
,
symbols
);
double
t5
=
getTime
();
std
::
cout
<<
"Init: "
<<
t2
-
t1
<<
std
::
endl
;
std
::
cout
<<
"Loop: "
<<
t3
-
t2
<<
std
::
endl
;
std
::
cout
<<
"Fini: "
<<
t4
-
t3
<<
std
::
endl
;
std
::
cout
<<
"Add: "
<<
t5
-
t4
<<
std
::
endl
;
#if DEBUG
std
::
cout
<<
"Received "
<<
receivedresults
<<
" results"
<<
std
::
endl
;
...
...
src/mw_addslave2.cpp
View file @
6d574e28
...
...
@@ -183,8 +183,10 @@ void multiply_1level_slave_addslave2( tensor3D_t& T, unsigned int size, MPI_Comm
/* Delinearize all the expressions and add them */
double
t1
=
getTime
();
Tens
=
add_expressions
(
results_s
,
symbols
);
std
::
cout
<<
"Addition: "
<<
getTime
()
-
t1
<<
std
::
endl
;
/* Send the result */
send_result
(
Tens
);
...
...
src/mw_addslave4.cpp
0 → 100644
View file @
6d574e28
#include
<iostream>
#include
<mpi.h>
#include
<ginac/ginac.h>
#include
<math.h>
// for ceil
#include
"products.h"
#include
"utils_parall.h"
#include
"parall_constants.h"
#include
"parall_internal.h"
#include
"utils.h"
#include
"profiling.h"
namespace
gi
=
GiNaC
;
#define MAXLENADD 1 // 256
unsigned
int
maxlen
(
std
::
vector
<
std
::
string
>
expressions
){
unsigned
int
len
=
0
;
for
(
auto
s
:
expressions
)
{
unsigned
int
l2
=
s
.
length
();
if
(
len
<
l2
)
{
len
=
l2
;
}
}
return
len
;
}
gi
::
ex
add_expressions_parall
(
std
::
vector
<
std
::
string
>
expressions
,
gi
::
lst
symbols
,
parameters_2_1_t
p
,
MPI_Comm
comm
=
MPI_COMM_WORLD
)
{
gi
::
ex
Tens
=
0
;
int
size
,
i
,
nb
,
len
;
unsigned
int
chunk
,
end
;
std
::
vector
<
unsigned
int
>
cut
;
unsigned
int
*
lengths
;
std
::
string
result
;
char
*
expr
;
MPI_Status
status
;
size_t
expr_c_size
=
0
;
char
*
expr_c
;
/* If the expressions are short, compute the sum locally */
if
(
maxlen
(
expressions
)
<
MAXLENADD
)
return
add_expressions
(
expressions
,
symbols
);
MPI_Comm_size
(
comm
,
&
size
);
nb
=
expressions
.
size
();
lengths
=
(
unsigned
int
*
)
malloc
(
nb
*
sizeof
(
unsigned
int
)
);
for
(
i
=
0
;
i
<
nb
;
i
++
)
{
cut
.
push_back
(
0
);
lengths
[
i
]
=
0
;
}
unsigned
int
running
=
size
-
1
;
p
.
setParams
(
nb
,
1
);
/* TODO ca se factorise avec send_expressions_to_add */
for
(
int
peer
=
1
;
peer
<
size
;
peer
++
)
{
i
=
0
;
for
(
auto
s
:
expressions
)
{
/* How much are we going to send: stop at a + or - sign (and keep the sign) */
chunk
=
ceil
(
s
.
length
()
/
(
size
-
1
)
);
end
=
cut
[
i
]
+
chunk
;
while
(
!
(
s
[
end
]
==
'+'
||
s
[
end
]
==
'-'
||
end
==
s
.
length
()
-
1
)
){
end
++
;
}
end
--
;
lengths
[
i
]
=
end
-
cut
[
i
]
+
1
;
i
++
;
}
/* Send the lengths */
MPI_Send
(
&
p
,
1
,
DT_PARAMETERS_2_1
,
peer
,
TAG_ADD
,
comm
);
MPI_Send
(
lengths
,
nb
,
MPI_INT
,
peer
,
TAG_ADD
,
comm
);
/* Send the strings */
for
(
unsigned
int
j
=
0
;
j
<
nb
;
j
++
)
{
expr
=
const_cast
<
char
*>
(
expressions
[
j
].
c_str
()
);
MPI_Send
(
&
(
expr
[
cut
[
j
]
]
),
lengths
[
j
],
MPI_CHAR
,
peer
,
TAG_ADD
,
comm
);
cut
[
j
]
+=
lengths
[
j
];
}
}
/* Receive the results */
expr_c
=
NULL
;
while
(
running
>
0
)
{
MPI_Recv
(
&
len
,
1
,
MPI_UNSIGNED
,
MPI_ANY_SOURCE
,
MPI_ANY_TAG
,
comm
,
&
status
);
int
src
=
status
.
MPI_SOURCE
;
len
++
;
if
(
len
!=
0
)
{
if
(
len
>
expr_c_size
)
{
expr_c_size
=
len
;
if
(
NULL
!=
expr_c
)
free
(
expr_c
);
expr_c
=
(
char
*
)
malloc
(
expr_c_size
);
// The \0 was added by the slave
}
/* Receive the result */
MPI_Recv
(
expr_c
,
len
-
1
,
MPI_CHAR
,
src
,
TAG_EXPR
,
comm
,
&
status
);
expr_c
[
len
-
1
]
=
'\n'
;
/* Concatenate the result */
std
::
string
recvs
(
expr_c
);
if
(
expr_c
[
0
]
!=
'-'
)
result
+=
'+'
;
result
+=
recvs
;
}
running
--
;
send_end
(
src
,
p
);
}
Tens
=
de_linearize_expression
(
result
,
symbols
);
free
(
lengths
);
free
(
expr_c
);
return
Tens
;
}
/*******************************************************************************
* Parallel 1-level decomposition with addition on a slave *
*******************************************************************************/
gi
::
ex
multiply_1level_master_addslave4
(
tensor3D_t
&
T
,
unsigned
int
size
,
MPI_Comm
comm
=
MPI_COMM_WORLD
)
{
gi
::
ex
Tens
=
0
;
unsigned
int
a2
,
a4
;
gi
::
lst
symbols
;
MPI_Status
status
;
char
*
expr_c
;
size_t
expr_c_size
=
0
;
int
src
,
np
;
unsigned
int
len
,
running
=
0
;
parameters_2_1_t
pzero
(
0
,
0
);
MPI_Comm_size
(
comm
,
&
np
);
expr_c
=
NULL
;
expr_c
=
(
char
*
)
malloc
(
3279
);
int
receivedresults
=
0
;
unsigned
int
N
=
size
/
2
;
std
::
vector
<
parameters_2_1_t
>
input
;
std
::
vector
<
std
::
string
>
results
;
/* length and char* */
/* Build a list of argument sets */
for
(
a4
=
0
;
a4
<
N
;
a4
++
){
for
(
a2
=
0
;
a2
<
N
;
a2
++
){
parameters_2_1_t
p
(
a4
,
a2
);
input
.
push_back
(
p
);
}
}
/* Compute the set of symbols */
/* Could be done while the first slave is working */
symbols
=
all_symbols_3D
(
size
);
/* Distribute the work */
while
(
input
.
size
()
>
0
)
{
MPI_Recv
(
&
len
,
1
,
MPI_UNSIGNED
,
MPI_ANY_SOURCE
,
MPI_ANY_TAG
,
comm
,
&
status
);
if
(
status
.
MPI_TAG
==
TAG_PULL
)
{
/* Nothing else will come: just send wome work */
src
=
status
.
MPI_SOURCE
;
send_work
(
input
,
src
);
}
else
{
if
(
status
.
MPI_TAG
==
TAG_RES
){
src
=
status
.
MPI_SOURCE
;
/* The first message contains the length of what is coming next */
if
(
len
!=
0
)
{
if
(
len
>
expr_c_size
)
{
expr_c_size
=
len
;
if
(
NULL
!=
expr_c
)
free
(
expr_c
);
expr_c
=
(
char
*
)
malloc
(
expr_c_size
);
// The \0 was added by the slave
}
/* Receive the result */
MPI_Recv
(
expr_c
,
len
,
MPI_CHAR
,
src
,
TAG_EXPR
,
comm
,
&
status
);
/* Put it in the result queue */
results
.
push_back
(
std
::
string
(
expr_c
)
);
}
/* Send more work */
send_work_addslave
(
input
,
results
,
src
);
}
else
{
std
::
cerr
<<
"Wrong tag received "
<<
status
.
MPI_TAG
<<
std
::
endl
;
}
}
}
/* Wait until everyone is done */
running
=
np
-
1
;
// all the slaves are running
while
(
running
>
0
)
{
MPI_Recv
(
&
len
,
1
,
MPI_UNSIGNED
,
MPI_ANY_SOURCE
,
MPI_ANY_TAG
,
comm
,
&
status
);
src
=
status
.
MPI_SOURCE
;
if
(
len
!=
0
)
{
if
(
len
>
expr_c_size
)
{
expr_c_size
=
len
;
if
(
NULL
!=
expr_c
)
free
(
expr_c
);
expr_c
=
(
char
*
)
malloc
(
expr_c_size
);
// The \0 was added by the slave
}
/* Receive the result */
MPI_Recv
(
expr_c
,
len
,
MPI_CHAR
,
src
,
TAG_EXPR
,
comm
,
&
status
);
/* Put it in the result queue */
results
.
push_back
(
std
::
string
(
expr_c
)
);
}
/* Do not send the end signal yet */
running
--
;
}
/* Add whatever I have left */
Tens
=
add_expressions_parall
(
results
,
symbols
,
pzero
,
comm
);
#if DEBUG
std
::
cout
<<
"Received "
<<
receivedresults
<<
" results"
<<
std
::
endl
;
std
::
cout
<<
"Tpara="
<<
Tens
<<
";"
<<
std
::
endl
;
#endif
if
(
NULL
!=
expr_c
)
free
(
expr_c
);
return
Tens
;
}
void
multiply_1level_slave_addslave4
(
tensor3D_t
&
T
,
unsigned
int
size
,
MPI_Comm
comm
=
MPI_COMM_WORLD
)
{
gi
::
ex
Tens
;
int
a2
,
a4
;
unsigned
int
len
=
0
;
parameters_2_1_t
params
;
MPI_Status
status
;
char
*
expr_c
;
int
rank
;
MPI_Comm_rank
(
comm
,
&
rank
);
/* Ask for some work */
MPI_Send
(
&
len
,
1
,
MPI_UNSIGNED
,
ROOT
,
TAG_PULL
,
comm
);
/* Compute the set of symbols */
gi
::
lst
symbols
=
all_symbols_3D
(
size
);
while
(
true
){
/* Receive a set of parameters */
MPI_Recv
(
&
params
,
1
,
DT_PARAMETERS_2_1
,
ROOT
,
MPI_ANY_TAG
,
comm
,
&
status
);
if
(
status
.
MPI_TAG
==
TAG_WORK
){
a4
=
params
.
a4
;
a2
=
params
.
a2
;
Tens
=
one_level1_product
(
&
T
,
size
,
a4
,
a2
);
send_result
(
Tens
);
}
else
{
if
(
status
.
MPI_TAG
==
TAG_ADD
)
{
/* Receive a set of expressions to add */
/* Number of expressions received */
int
nb
=
params
.
a4
;
a2
=
params
.
a2
;
/* Length of each string */
unsigned
int
*
lengths
=
(
unsigned
int
*
)
malloc
(
nb
*
sizeof
(
unsigned
int
)
);
MPI_Recv
(
lengths
,
nb
,
MPI_INT
,
ROOT
,
TAG_ADD
,
comm
,
&
status
);
std
::
vector
<
std
::
string
>
results_s
;
char
*
c_str
;
int
i
;
int
len
;
for
(
i
=
0
;
i
<
nb
;
i
++
)
{
len
=
lengths
[
i
]
+
1
;
c_str
=
(
char
*
)
malloc
(
len
);
MPI_Recv
(
c_str
,
len
-
1
,
MPI_CHAR
,
ROOT
,
TAG_ADD
,
comm
,
&
status
);
c_str
[
len
-
1
]
=
'\0'
;
// The master sends C++ strings, which do not contain the final '\0'
results_s
.
push_back
(
std
::
string
(
c_str
)
);
free
(
c_str
);
}
/* Delinearize all the expressions and add them */
Tens
=
add_expressions
(
results_s
,
symbols
);
/* Send the result */
send_result
(
Tens
);
}
else
{
if
(
status
.
MPI_TAG
==
TAG_END
){
return
;
}
else
{
std
::
cerr
<<
"Wrong tag received on slave "
<<
status
.
MPI_TAG
<<
std
::
endl
;
}
}
}
}
}
/* Communication protocol:
M -> W: always the same size, therefore unique communication
W -> M: send an unsigned int (size of the expression), then the expression (table of chars)
*/
gi
::
ex
multiply_1level_mw_addslave4
(
tensor3D_t
&
T
,
int
size
)
{
// simpler: same dimension everywhere
int
rank
;
gi
::
ex
Tens
=
0
;
MPI_Comm_rank
(
MPI_COMM_WORLD
,
&
rank
);
/* Create a new datatype for the parameters */
create_parameters_datatype_2_1
();
/* Here we go */
if
(
0
==
rank
)
{
Tens
=
multiply_1level_master_addslave4
(
T
,
size
);
}
else
{
multiply_1level_slave_addslave4
(
T
,
size
);
}
/* Finalize */
free_parameters_2_1_dt
();
return
Tens
;
}
src/tensormatrix.h
View file @
6d574e28
...
...
@@ -29,6 +29,7 @@ gi::ex multiply_1level_mw3( tensor3D_t&, int );
gi
::
ex
multiply_1level_mw_addslave
(
tensor3D_t
&
,
int
);
gi
::
ex
multiply_1level_mw_addslave2
(
tensor3D_t
&
,
int
);
gi
::
ex
multiply_1level_mw_addslave3
(
tensor3D_t
&
,
int
);
gi
::
ex
multiply_1level_mw_addslave4
(
tensor3D_t
&
,
int
);
gi
::
ex
multiply_2levels_mw_hierarch
(
tensor3D_t
&
,
int
);
gi
::
ex
multiply_2levels_mw_hierarch2
(
tensor3D_t
&
,
int
);
gi
::
ex
multiply_combined
(
tensor3D_t
&
,
int
);
...
...
src/tensormatrix_mpi.cpp
View file @
6d574e28
...
...
@@ -32,7 +32,8 @@ namespace gi = GiNaC;
- o/O: Master-Worker, middle grain -> multiply_1level_mw3
- A/a: Master-Worker, addition on a slave -> multiply_1level_mw_addslave
- B/b: Master-Worker, coarser grain, addition on a slave -> multiply_1level_mw_addslave2
- D/d: Master-Worker, middle grain, addition on a slave -> multiply_1level_mw_addslave2
- D/d: Master-Worker, middle grain, addition on a slave -> multiply_1level_mw_addslave3
- E/e: Master-Worker, middle grain, addition on a slave, parallel final addition -> multiply_1level_mw_addslave4
- H/h: Hierarchical master-worker -> multiply_1level_mw_hierarch
- i/I: Hierarchical master-worker, coarser grain -> multiply_1level_mw_hierarch
- C/c: Combined -> multiply_combined
...
...
@@ -115,6 +116,10 @@ int main( int argc, char** argv ){
case
'd'
:
tostart
=
'd'
;
break
;
case
'E'
:
case
'e'
:
tostart
=
'e'
;
break
;
case
'H'
:
case
'h'
:
tostart
=
'h'
;
...
...
@@ -176,6 +181,9 @@ int main( int argc, char** argv ){
case
'd'
:
Tpara
=
multiply_1level_mw_addslave3
(
T
,
N
);
break
;
case
'e'
:
Tpara
=
multiply_1level_mw_addslave4
(
T
,
N
);
break
;
case
'h'
:
Tpara
=
multiply_2levels_mw_hierarch
(
T
,
N
);
break
;
...
...
src/utils_parall.h
View file @
6d574e28
...
...
@@ -27,6 +27,9 @@ public:
unsigned
int
a4
,
a2
;
parameters_2_1_t
(
unsigned
int
,
unsigned
int
);
parameters_2_1_t
(
void
){};
void
setA4
(
unsigned
int
_a4
)
{
this
->
a4
=
_a4
;
}
void
setA2
(
unsigned
int
_a2
)
{
this
->
a2
=
_a2
;
}
void
setParams
(
unsigned
int
_a4
,
unsigned
int
_a2
)
{
this
->
a4
=
_a4
;
this
->
a2
=
_a2
;
}
;
};
class
parameters_2_2_t
{
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment