Commit 60e42923 authored by Jaime Arias's avatar Jaime Arias
Browse files

fix: plot all combination when comparing tools

parent c8481f0a
%% Cell type:code id: tags:
``` python
import os
import pandas as pd
import plotly.io as pio
import plotly.express as px
import plotly.graph_objs as go
from itertools import combinations
from plotly.subplots import make_subplots
import plotly.io as pio
# render figures in notebook
pio.renderers.default = "notebook_connected"
# templates figures
px.defaults.template = "simple_white"
pio.templates.default = "simple_white"
# layout for all figures
LAYOUT_FIGURES = dict(
autosize=False,
width = 500,
height = 500,
xaxis = dict(
constrain="domain",
mirror=True,
showexponent="all",
exponentformat="power"
),
yaxis = dict(
scaleanchor = "x",
scaleratio = 1,
mirror=True,
showexponent="all",
exponentformat="power"
),
title = dict(
y = 0.9,
x = 0.5,
xanchor = 'center',
yanchor = 'top'
)
)
```
%% Cell type:markdown id: tags:
# Auxiliary Functions
%% Cell type:code id: tags:
``` python
def create_folder(path):
"""Creates a folder if it does not exist
Parameters
----------
path : str
Path of the new folder
Examples
--------
>>> create_folder('./results')
"""
if not os.path.exists(path):
os.makedirs(path)
```
%% Cell type:code id: tags:
``` python
def create_figure(df, model):
"""Creates a scatter figure showing the time taken by each tool to verify each property of a model
Parameters
----------
df : pandas.Dataframe
Dataframe containing the results of the experiments
model : string
model to be plotted
Returns
-------
plotly.graph_objects.Figure
Scatter figure
Examples
--------
>>> import os
>>> import pandas as pd
>>> csv_file = os.path.join("results", "output.csv")
>>> df = pd.read_csv(csv_file)
>>> fig = create_figure(df, 'philo10')
"""
model_df = df[df.model == model]
figure = px.scatter(model_df,
x="formula", y="time",
title=model,
color="tool",
symbol_sequence=['x'])
figure.update_layout(yaxis_title="time (s)", title=LAYOUT_FIGURES['title'])
return figure
```
%% Cell type:code id: tags:
``` python
def get_axis_title(experiment, show_strategy=True):
"""Get the axis title of a figure depending on the experiment being plotted
Parameters
----------
experiment : str
String with the experiment information
show_strategy : bool, optional
Flag to show the information related to the strategy used by the tool
Returns
-------
str
axis title
Examples
--------
>>> get_axis_title('pmc-sog_otfL_couv99-default_1_1', True)
pmc-sog (Lace, strategy: couv99-default, # cores: 1)
"""
information = experiment.split('_')
tool_name = information[0]
info = []
library_dic = {
'otfL': 'Lace',
'otfP': 'Pthreads',
'otfC': 'Cthreads',
'otf': 'Hybrid'
}
if (len(information) == 5):
info.append(library_dic[information[1]])
if (show_strategy):
info.append('strategy: {}'.format(information[-3]))
nb_nodes = int(information[-2])
if (nb_nodes > 1):
info.append('# nodes: {}'.format(nb_nodes))
info.append('# cores: {}'.format(information[-1]))
title = '{} ({})'.format(tool_name, ', '.join(info))
return title
```
%% Cell type:code id: tags:
``` python
def create_log_figure(table, table_errors, model, tool_x, tool_y, show_strategy=True):
"""Creates a Scatter figure in logarithmic scale comparing the performance of two tools
Parameters
----------
table : pandas.Dataframe
Dataframe with the times of each experiment
table_errors : pandas.Dataframe
Dataframe with the errors of each experiment
model : string
Model to be analyzed
tool_x : string
Tool to be compared and plotted on the x-axis
tool_y : string
Tool to be compared and plotted on the y-axis
show_strategy : bool
Flag to show the stretagy used by the tools
Returns
-------
plotly.graph_objects.Figure
Scatter figure
Examples
--------
>>> import os
>>> import pandas as pd
>>> csv_file = os.path.join("results", "output.csv")
>>> df = pd.read_csv(csv_file)
>>> table = df.set_index(['model', 'formula', 'tool'], drop=True).unstack('tool')
>>> fig = create_log_figure(table['time'], table['error'], 'philo10', 'pmc-sog_otfL_couv99-default_1_8', 'pmc-sog_otfP_couv99-default_1_8')
"""
try:
min_values = table.loc[model].min()
max_values = table.loc[model].max()
min_value = min(min_values[tool_x], min_values[tool_y])/2.
max_value = max(max_values[tool_x], max_values[tool_y])
figure = px.scatter(table.loc[model],
title=model,
x=tool_x, y=tool_y,
log_x=True, log_y=True,
range_x=[min_value, max_value],
range_y=[min_value, max_value],
color="property",
hover_data=[
['formula #{}'.format(i) for i in table.loc[model].index],
table_errors.loc[model, tool_x],
table_errors.loc[model, tool_y]
],
color_discrete_map={"T": "green", "F": "red", "U": "black"},
symbol_sequence=["circle-open"])
line = go.Scatter(x=[min_value, max_value],
y=[min_value, max_value],
mode='lines', showlegend=False,
line=dict(color='black', width=1))
figure.add_traces(line)
figure.update_layout(LAYOUT_FIGURES,
xaxis_title=get_axis_title(tool_x, show_strategy),
yaxis_title=get_axis_title(tool_y, show_strategy))
return figure
except Exception as e:
print("Error when ploting model: {} - tool_x: {} - tool_y: {}".format(model, tool_x, tool_y))
print(e)
```
%% Cell type:code id: tags:
``` python
# Experiment filters
def versus_dfs(experiments):
"""Selects only experiments using DFS strategy"""
exp1, exp2 = experiments
strategy_exp1= exp1.split('_')[1]
strategy_exp2= exp2.split('_')[1]
return strategy_exp1 == 'dfs' or strategy_exp2 == 'dfs'
def versus_sequential(experiments):
"""Selects only experiments run sequentially """
exp1, exp2 = experiments
nodes_exp1, threads_exp1 = exp1.split('_')[-2:]
nodes_exp2, threads_exp2 = exp2.split('_')[-2:]
return (nodes_exp1 == '1' and nodes_exp2 == '1') and \
(threads_exp1 == '1' or threads_exp2 == '1')
def same_tool(experiments, tool):
"""Selects only experiments comparing the same tool"""
exp1, exp2 = experiments
tool_exp1= exp1.split('_')[0]
tool_exp2= exp2.split('_')[0]
return tool_exp1.startswith(tool) and tool_exp2.startswith(tool)
def same_number_threads(experiments):
"""Selects only experiments comparing the same number of processes and cores"""
exp1, exp2 = experiments
nodes_exp1, threads_exp1 = exp1.split('_')[-2:]
nodes_exp2, threads_exp2 = exp2.split('_')[-2:]
return (nodes_exp1 == nodes_exp2) and (threads_exp1 == threads_exp2)
def same_thread_library(experiments):
"""Selects only experiments comparing the same parallelization library"""
exp1, exp2 = experiments
library_exp1 = exp1.split('_')[1]
library_exp2 = exp2.split('_')[1]
return library_exp1 == library_exp2
def same_strategy(experiments):
"""Selects only experiments comparing the same strategy"""
exp1, exp2 = experiments
strategy_exp1 = exp1.split('_')[2]
strategy_exp2 = exp2.split('_')[2]
return strategy_exp1 == strategy_exp2
def only_couvreur_strategy(experiments):
"""Selects only experiments comparing couvreur emptiness check algorithm"""
exp1, exp2 = experiments
strategy_exp1 = exp1.split('_')[2]
strategy_exp2 = exp2.split('_')[2]
return strategy_exp1.startswith('couv99') and strategy_exp2.startswith('couv99')
def compare_threads_library(experiments):
"""Compares parallization libraries used in pmc-sog.
It selects experiments where the tool is only pmc-sog and the strategy, number of threads,
number of processus are the same.
"""
return same_tool(experiments, 'pmc-sog') and \
same_strategy(experiments) and \
same_number_threads(experiments) and \
not same_thread_library(experiments)
def compare_couvreur_strategies(experiments):
"""Compares couvreurs strategies used in pmc-sog.
It selects experiments where the tool is only pmc-sog, the strategy is couvreur, and
the parallelization library, number of threads, number of processus are the same.
"""
return only_couvreur_strategy(experiments) and \
same_thread_library(experiments) and \
same_number_threads(experiments)
def same_distributed_number_threads(experiments):
"""Selects only experiments where the multiplication of theirs nodes with cores are the same."""
exp1, exp2 = experiments
nodes_exp1, threads_exp1 = exp1.split('_')[-2:]
nodes_exp2, threads_exp2 = exp2.split('_')[-2:]
return (int(nodes_exp1) * int(threads_exp1)) == (int(nodes_exp2) * int(threads_exp2))
def compare_tools(experiments):
"""Compares pmc-sog and pnml2lts-mc using the DFS algorithm.
It selects experiments where the tools are not the same, the exploration algorithm is DFS and
the number of processus and cores are the same.
"""
return same_distributed_number_threads(experiments) and \
not (same_tool(experiments, 'pmc-sog') or same_tool(experiments,'pnml2lts-mc')) and \
return not (same_tool(experiments, 'pmc-sog') or same_tool(experiments,'pnml2lts-mc')) and \
versus_dfs(experiments)
def compare_multithreading(experiments):
"""Compares the sequential and multi-core version of pmc-sog.
It selects experiments where the tools is pmc-sog, the parallelization library, the emptiness check
strategy are the same. Here the number of processus and cores are different.
"""
return same_tool(experiments, 'pmc-sog') and \
same_thread_library(experiments) and \
same_strategy(experiments) and \
versus_sequential(experiments)
def against_hybrid(experiments):
"""Selects only experiments comparing with hybrid mode"""
exp1, exp2 = experiments
library_exp1 = exp1.split('_')[1]
library_exp2 = exp2.split('_')[1]
return (library_exp1 == 'otf') or (library_exp2 == 'otf')
def compare_distributed(experiments):
"""Compares the hybrid version of pmc-sog"""
return same_tool(experiments, 'pmc-sog') and \
same_strategy(experiments) and \
same_distributed_number_threads(experiments) and \
against_hybrid(experiments)
# Plots to be created
plots = {
'compare_thread_library': compare_threads_library,
'compare_couvreur_algorithm': compare_couvreur_strategies,
'compare_tools': compare_tools,
'compare_multicore': compare_multithreading,
'compare_distributed': compare_distributed
}
```
%% Cell type:markdown id: tags:
# Load Data
%% Cell type:code id: tags:
``` python
# Root folder
PROJECT_FOLDER = os.path.abspath(os.pardir)
# csv file with the output
csv_file = os.path.join(PROJECT_FOLDER, "results", "output.csv")
# Output folder
OUTPUT_FOLDER = os.path.join(PROJECT_FOLDER,"results", "figures")
create_folder(OUTPUT_FOLDER)
```
%% Cell type:code id: tags:
``` python
# read data
df = pd.read_csv(csv_file)
# merge the information related to the experiment (# nodes, # threads, strategy) to the tool column
df['tool'] = df[['tool', 'strategy', 'num_nodes', 'num_threads']].astype(str).apply('_'.join, axis=1)
df = df.drop(columns=['strategy', 'num_nodes', 'num_threads'])
df.head()
```
%%%% Output: execute_result
model formula tool time property error
0 philo10 1 pmc-sog_otfL_couv99-default_1_16 6.572 F OK
1 philo10 1 pmc-sog_otfL_couv99-shy_1_16 5.926 F OK
2 philo10 1 pmc-sog_otfP_couv99-default_1_16 6.811 F OK
3 philo10 1 pmc-sog_otfP_couv99-shy_1_16 6.862 F OK
4 philo10 1 pmc-sog_otf_couv99-default_2_8 4.761 F OK
%% Cell type:code id: tags:
``` python
# ground truth for properties
p_df = pd.read_csv(csv_file)
p_df =p_df[
(p_df.tool=='pnml2lts-mc') &
(p_df.strategy == 'ndfs') &
(p_df.num_nodes == 1) &
(p_df.num_threads == 16)]
# only property column is needed
p_df = p_df.drop(columns=['tool', 'strategy', 'num_nodes', 'num_threads', 'time', 'error'])
p_df.fillna('U', inplace=True)
p_df.set_index(['model', 'formula'], inplace=True)
p_df.sort_index(inplace=True)
p_df.head()
```
%%%% Output: execute_result
property
model formula
philo10 1 F
2 F
3 F
4 F
5 F
%% Cell type:code id: tags:
``` python
# table with times, verification output and error for each experiment
table = df.set_index(['model', 'formula', 'tool'], drop=True).unstack('tool')
table.head()
```
%%%% Output: execute_result
time \
tool pmc-sog_otfL_couv99-default_1_16 pmc-sog_otfL_couv99-shy_1_16
model formula
philo10 1 6.572 5.926
2 5.179 5.090
3 2.620 4.383
4 2.379 4.183
5 4.260 7.320
\
tool pmc-sog_otfP_couv99-default_1_16 pmc-sog_otfP_couv99-shy_1_16
model formula
philo10 1 6.811 6.862
2 4.575 4.350
3 3.489 4.143
4 4.322 4.149
5 7.995 7.629
\
tool pmc-sog_otf_couv99-default_2_8 pmc-sog_otf_couv99-default_8_2
model formula
philo10 1 4.761 11.652
2 6.304 6.811
3 5.428 NaN
4 4.717 9.407
5 7.344 13.582
\
tool pmc-sog_otf_couv99-shy_2_8 pmc-sog_otf_couv99-shy_8_2
model formula
philo10 1 6.675 11.119
2 6.286 NaN
3 6.441 10.935
4 5.468 9.346
5 6.954 13.567
... \
tool pnml2lts-mc_dfs_1_16 pnml2lts-mc_ndfs_1_16 ...
model formula ...
philo10 1 0.81 0.19 ...
2 0.43 0.12 ...
3 2.36 0.16 ...
4 0.54 0.18 ...
5 0.67 0.08 ...
error \
tool pmc-sog_otfL_couv99-default_1_16 pmc-sog_otfL_couv99-shy_1_16
model formula
philo10 1 OK OK
2 OK OK
3 OK OK
4 OK OK
5 OK OK
\
tool pmc-sog_otfP_couv99-default_1_16 pmc-sog_otfP_couv99-shy_1_16
model formula
philo10 1 OK OK
2 OK OK
3 OK OK
4 OK OK
5 OK OK
\
tool pmc-sog_otf_couv99-default_2_8 pmc-sog_otf_couv99-default_8_2
model formula
philo10 1 OK OK
2 OK OK
3 OK SEGMENTATION FAULT
4 OK OK
5 OK OK
\
tool pmc-sog_otf_couv99-shy_2_8 pmc-sog_otf_couv99-shy_8_2
model formula
philo10 1 OK OK
2 OK SEGMENTATION FAULT
3 OK OK
4 OK OK
5 OK OK
tool pnml2lts-mc_dfs_1_16 pnml2lts-mc_ndfs_1_16
model formula
philo10 1 OK OK
2 OK OK
3 OK OK
4 OK OK
5 OK OK
[5 rows x 30 columns]
%% Cell type:markdown id: tags:
# Preprocessing of data
%% Cell type:code id: tags:
``` python
ZERO = 10e-5
TIMEOUT = 10 * 60 # 5 minutes = 300 seconds
```
%% Cell type:code id: tags:
``` python
# table with times for each experiment
table_time = table['time'].copy()
# replace non finished experiments with a dummy value, e.g. timeout
table_time.fillna(TIMEOUT, inplace=True)
# replace 0.00 time for 10^(-5), we cannot plot log(0)
table_time.replace(0.0, ZERO, inplace=True)
# add verification output to the table
table_time = pd.concat([table_time, p_df], axis=1)
table_time.head()
```
%%%% Output: execute_result
pmc-sog_otfL_couv99-default_1_16 \
model formula
philo10 1 6.572
2 5.179
3 2.620
4 2.379
5 4.260
pmc-sog_otfL_couv99-shy_1_16 \
model formula
philo10 1 5.926
2 5.090
3 4.383
4 4.183
5 7.320
pmc-sog_otfP_couv99-default_1_16 \
model formula
philo10 1 6.811
2 4.575
3 3.489
4 4.322
5 7.995
pmc-sog_otfP_couv99-shy_1_16 pmc-sog_otf_couv99-default_2_8 \
model formula
philo10 1 6.862 4.761
2 4.350 6.304
3 4.143 5.428
4 4.149 4.717
5 7.629 7.344
pmc-sog_otf_couv99-default_8_2 pmc-sog_otf_couv99-shy_2_8 \
model formula
philo10 1 11.652 6.675
2 6.811 6.286
3 600.000 6.441
4 9.407 5.468
5 13.582 6.954
pmc-sog_otf_couv99-shy_8_2 pnml2lts-mc_dfs_1_16 \
model formula
philo10 1 11.119 0.81
2 600.000 0.43
3 10.935 2.36
4 9.346 0.54
5 13.567 0.67
pnml2lts-mc_ndfs_1_16 property
model formula
philo10 1 0.19 F
2 0.12 F
3 0.16 F
4 0.18 F
5 0.08 F
%% Cell type:code id: tags:
``` python
# table with verification output for each experiment
table_property = table['property'].copy()
# replace non finished experiments with a dummy value
table_property.fillna('U', inplace=True)
# add ground truth to the table
table_property = pd.concat([table_property, p_df], axis=1)