Skip to content
Snippets Groups Projects
plot-results.py 14.9 KiB
Newer Older
  • Learn to ignore specific revisions
  • #!/usr/bin/env python
    # coding: utf-8
    
    
    
    
    import os
    import pandas as pd
    import plotly.io as pio
    import plotly.express as px
    import plotly.graph_objs as go
    from itertools import combinations 
    from plotly.subplots import make_subplots
    import plotly.io as pio
    
    # render figures in notebook
    pio.renderers.default = "notebook_connected"
    
    # templates figures
    px.defaults.template = "simple_white"
    pio.templates.default = "simple_white"
    
    # layout for all figures
    LAYOUT_FIGURES = dict(
        autosize=False,
        width = 500,
        height = 500,
        xaxis = dict(
          constrain="domain",
          mirror=True,
          showexponent="all",
          exponentformat="power"
        ),
        yaxis = dict(
          scaleanchor = "x",
          scaleratio = 1,
          mirror=True,
          showexponent="all",
          exponentformat="power"
        ),
        title = dict(
          y = 0.9,
          x = 0.5,
          xanchor = 'center',
          yanchor = 'top'
        )
    )
    
    
    # # Auxiliary Functions
    
    
    
    
    def create_folder(path):
        """Creates a folder if it does not exist
        
        Parameters
        ----------
        path : str
            Path of the new folder
        
        Examples
        --------
        
        >>> create_folder('./results')
        """
        if not os.path.exists(path):
            os.makedirs(path)
    
    
    
    
    
    def create_figure(df, model):
        """Creates a scatter figure showing the time taken by each tool to verify each property of a model
        
        Parameters
        ----------
        df : pandas.Dataframe
            Dataframe containing the results of the experiments
        model : string
            model to be plotted
        
        Returns
        -------
        plotly.graph_objects.Figure
            Scatter figure
        
        Examples
        --------
        
        >>> import os
        >>> import pandas as pd
        >>> csv_file = os.path.join("results", "output.csv")
        >>> df = pd.read_csv(csv_file)
        >>> fig = create_figure(df, 'philo10')
        """
        model_df = df[df.model == model]
    
        figure = px.scatter(model_df, 
                            x="formula", y="time",
                            title=model, 
                            color="tool", 
                            symbol_sequence=['x'])
    
        figure.update_layout(yaxis_title="time (s)", title=LAYOUT_FIGURES['title'])
        return figure
    
    
    
    
    
    def get_axis_title(experiment, show_strategy=True):
        """Get the axis title of a figure depending on the experiment being plotted
        
        Parameters
        ----------
        experiment : str
            String with the experiment information
        show_strategy : bool, optional
            Flag to show the information related to the strategy used by the tool
        
        Returns
        -------
        str
            axis title
            
        Examples
        --------
        
        >>> get_axis_title('pmc-sog_otfL_couv99-default_1_1', True)
        pmc-sog (Lace, strategy: couv99-default, # cores: 1)
        """
        information = experiment.split('_')
        tool_name = information[0]
        
        info = []
        library_dic = {
            'otfL': 'Lace',
            'otfP': 'Pthreads',
    
            'otfC': 'Cthreads',
            'otf': 'Hybrid'
    
        }
        
        if (len(information) == 5):
            info.append(library_dic[information[1]])
    
        if (show_strategy):
            info.append('strategy: {}'.format(information[-3]))
    
        nb_nodes = int(information[-2])
        if (nb_nodes > 1):
            info.append('# nodes: {}'.format(nb_nodes))
    
        info.append('# cores: {}'.format(information[-1]))
    
        title = '{} ({})'.format(tool_name, ', '.join(info))
        
        return title
    
    
    
    
    
    def create_log_figure(table, table_errors, model, tool_x, tool_y, show_strategy=True):
        """Creates a Scatter figure in logarithmic scale comparing the performance of two tools
        
        Parameters
        ----------
        table : pandas.Dataframe
            Dataframe with the times of each experiment
        table_errors : pandas.Dataframe
            Dataframe with the errors of each experiment
        model : string
            Model to be analyzed
        tool_x : string
            Tool to be compared and plotted on the x-axis
        tool_y : string
            Tool to be compared and plotted on the y-axis
        show_strategy : bool
            Flag to show the stretagy used by the tools
            
        Returns
        -------
        plotly.graph_objects.Figure
            Scatter figure
            
        Examples
        --------
        >>> import os
        >>> import pandas as pd
        >>> csv_file = os.path.join("results", "output.csv")
        >>> df = pd.read_csv(csv_file)
        >>> table = df.set_index(['model', 'formula', 'tool'], drop=True).unstack('tool')
        >>> fig = create_log_figure(table['time'], table['error'], 'philo10', 'pmc-sog_otfL_couv99-default_1_8', 'pmc-sog_otfP_couv99-default_1_8')
        """
        try:
            min_values = table.loc[model].min()
            max_values = table.loc[model].max()
    
            min_value = min(min_values[tool_x], min_values[tool_y])/2.
            max_value = max(max_values[tool_x], max_values[tool_y])
    
            figure = px.scatter(table.loc[model],
                                title=model,
                                x=tool_x, y=tool_y, 
                                log_x=True, log_y=True,
                                range_x=[min_value, max_value],
                                range_y=[min_value, max_value],
                                color="property",
                                hover_data=[
                                    ['formula #{}'.format(i) for i in table.loc[model].index],
                                    table_errors.loc[model, tool_x],
                                    table_errors.loc[model, tool_y]
                                    ],
                                color_discrete_map={"T": "green", "F": "red", "U": "black"},
                                symbol_sequence=["circle-open"])
    
            line = go.Scatter(x=[min_value, max_value], 
                                y=[min_value, max_value],
                                mode='lines', showlegend=False,
                                line=dict(color='black', width=1))
    
            figure.add_traces(line)
            figure.update_layout(LAYOUT_FIGURES, 
                                    xaxis_title=get_axis_title(tool_x, show_strategy),
                                    yaxis_title=get_axis_title(tool_y, show_strategy))
        
            return figure
        except Exception as e:
            print("Error when ploting model: {} - tool_x: {} - tool_y: {}".format(model, tool_x, tool_y))
            print(e)
    
    
    
    
    
    # Experiment filters
    
    def versus_dfs(experiments):
        """Selects only experiments using DFS strategy"""
        exp1, exp2 = experiments
        strategy_exp1= exp1.split('_')[1]
        strategy_exp2= exp2.split('_')[1]
        
        return strategy_exp1 == 'dfs' or strategy_exp2 == 'dfs'
    
    def versus_sequential(experiments):
        """Selects only experiments run sequentially """
        exp1, exp2 = experiments
        nodes_exp1, threads_exp1 = exp1.split('_')[-2:]
        nodes_exp2, threads_exp2 = exp2.split('_')[-2:]
    
        return (nodes_exp1 == '1' and nodes_exp2 == '1') and             (threads_exp1 == '1' or threads_exp2 == '1')
    
    def same_tool(experiments, tool):
        """Selects only experiments comparing the same tool"""
        exp1, exp2 = experiments
        tool_exp1= exp1.split('_')[0]
        tool_exp2= exp2.split('_')[0]
        return tool_exp1.startswith(tool) and tool_exp2.startswith(tool)
    
    def same_number_threads(experiments):
        """Selects only experiments comparing the same number of processes and cores"""
        exp1, exp2 = experiments
        nodes_exp1, threads_exp1 = exp1.split('_')[-2:]
        nodes_exp2, threads_exp2 = exp2.split('_')[-2:]
        return (nodes_exp1 == nodes_exp2) and (threads_exp1 == threads_exp2) 
    
    def same_thread_library(experiments):
        """Selects only experiments comparing the same parallelization library"""
        exp1, exp2 = experiments
        library_exp1 = exp1.split('_')[1]
        library_exp2 = exp2.split('_')[1]
        return library_exp1 == library_exp2
    
    def same_strategy(experiments):
        """Selects only experiments comparing the same strategy"""
        exp1, exp2 = experiments
        strategy_exp1 = exp1.split('_')[2]
        strategy_exp2 = exp2.split('_')[2]
        return strategy_exp1 == strategy_exp2
    
    def only_couvreur_strategy(experiments):
        """Selects only experiments comparing couvreur emptiness check algorithm"""
        exp1, exp2 = experiments
        strategy_exp1 = exp1.split('_')[2]
        strategy_exp2 = exp2.split('_')[2]
        return strategy_exp1.startswith('couv99') and strategy_exp2.startswith('couv99')
    
    def compare_threads_library(experiments):
        """Compares parallization libraries used in pmc-sog. 
        
        It selects experiments where the tool is only pmc-sog and the strategy, number of threads, 
        number of processus are the same.
        """
        return same_tool(experiments, 'pmc-sog') and             same_strategy(experiments) and             same_number_threads(experiments) and             not same_thread_library(experiments)
    
    def compare_couvreur_strategies(experiments):
        """Compares couvreurs strategies used in pmc-sog. 
        
        It selects experiments where the tool is only pmc-sog, the strategy is couvreur, and 
        the parallelization library, number of threads, number of processus are the same.
        """
        return only_couvreur_strategy(experiments) and             same_thread_library(experiments) and             same_number_threads(experiments)
    
    
    def same_distributed_number_threads(experiments):
        """Selects only experiments where the multiplication of theirs nodes with cores are the same."""
        exp1, exp2 = experiments
        nodes_exp1, threads_exp1 = exp1.split('_')[-2:]
        nodes_exp2, threads_exp2 = exp2.split('_')[-2:]
        return (int(nodes_exp1) * int(threads_exp1)) == (int(nodes_exp2) * int(threads_exp2))
    
    
    def compare_tools(experiments):
        """Compares pmc-sog and pnml2lts-mc using the DFS algorithm. 
        
        It selects experiments where the tools are not the same, the exploration algorithm is DFS and 
        the number of processus and cores are the same.
        """
    
        return same_distributed_number_threads(experiments) and             not (same_tool(experiments, 'pmc-sog') or same_tool(experiments,'pnml2lts-mc')) and             versus_dfs(experiments)
    
    
    def compare_multithreading(experiments):
        """Compares the sequential and multi-core version of pmc-sog. 
        
        It selects experiments where the tools is pmc-sog, the parallelization library, the emptiness check 
        strategy are the same. Here the number of processus and cores are different.
        """
        return same_tool(experiments, 'pmc-sog') and             same_thread_library(experiments) and             same_strategy(experiments) and             versus_sequential(experiments)
    
    
    def against_hybrid(experiments):
        """Selects only experiments comparing with hybrid mode"""
        exp1, exp2 = experiments
        library_exp1 = exp1.split('_')[1]
        library_exp2 = exp2.split('_')[1]
        return (library_exp1 == 'otf') or (library_exp2 == 'otf')
    
    
    
    def compare_distributed(experiments):
        """Compares the hybrid version of pmc-sog"""
    
        return same_tool(experiments, 'pmc-sog') and         same_strategy(experiments) and         same_distributed_number_threads(experiments) and         against_hybrid(experiments)
    
    # Plots to be created
    plots = {
        'compare_thread_library': compare_threads_library,
        'compare_couvreur_algorithm': compare_couvreur_strategies,
        'compare_tools': compare_tools,
    
        'compare_multicore': compare_multithreading,
        'compare_distributed': compare_distributed
    
    
    
    # Root folder
    PROJECT_FOLDER = os.path.abspath(os.pardir)
    
    # csv file with the output
    csv_file = os.path.join(PROJECT_FOLDER, "results", "output.csv")
    
    # Output folder
    OUTPUT_FOLDER = os.path.join(PROJECT_FOLDER,"results", "figures")
    create_folder(OUTPUT_FOLDER)
    
    
    
    
    
    # read data
    df = pd.read_csv(csv_file)
    
    # merge the information related to the experiment (# nodes, # threads, strategy) to the tool column
    df['tool'] = df[['tool', 'strategy', 'num_nodes', 'num_threads']].astype(str).apply('_'.join, axis=1)
    df = df.drop(columns=['strategy', 'num_nodes', 'num_threads'])
    
    df.head()
    
    
    
    
    
    # ground truth for properties
    p_df = pd.read_csv(csv_file)
    p_df =p_df[
        (p_df.tool=='pnml2lts-mc') & 
        (p_df.strategy == 'ndfs') & 
        (p_df.num_nodes == 1) & 
    
        (p_df.num_threads == 16)]
    
    
    # only property column is needed
    p_df = p_df.drop(columns=['tool', 'strategy', 'num_nodes', 'num_threads', 'time', 'error'])
    p_df.fillna('U', inplace=True)
    p_df.set_index(['model', 'formula'], inplace=True)
    p_df.sort_index(inplace=True)
    
    p_df.head()
    
    
    
    
    
    # table with times, verification output and error for each experiment
    table = df.set_index(['model', 'formula', 'tool'], drop=True).unstack('tool')
    table.head()
    
    
    # # Preprocessing of data
    
    
    
    
    ZERO = 10e-5
    TIMEOUT = 10 * 60 # 5 minutes = 300 seconds
    
    
    
    
    
    # table with times for each experiment
    table_time = table['time'].copy()
    
    # replace non finished experiments with a dummy value, e.g. timeout
    table_time.fillna(TIMEOUT, inplace=True)
    
    # replace 0.00 time for 10^(-5), we cannot plot log(0)
    table_time.replace(0.0, ZERO, inplace=True)
    
    # add verification output to the table
    table_time = pd.concat([table_time, p_df], axis=1)
    
    table_time.head()
    
    
    
    
    
    # table with verification output for each experiment
    table_property = table['property'].copy()
    
    # replace non finished experiments with a dummy value
    table_property.fillna('U', inplace=True)
    
    # add ground truth to the table
    table_property = pd.concat([table_property, p_df], axis=1)
    
    table_property.head()
    
    
    
    
    
    # table with error for each experiment
    table_error = table['error'].copy()
    
    table_error.head()
    
    
    # # Examples
    
    
    fig = create_figure(df, "philo10")
    
    fig = create_log_figure(table_time, table_error, "philo10", "pmc-sog_otf_couv99-default_2_8", "pnml2lts-mc_dfs_1_16")
    
    fig.show()
    
    
    # # Generate Figures
    
    
    
    
    # models
    models = df.model.unique()
    
    # tools 
    tools = df.tool.unique()
    
    
    # In[52]:
    
    
    # create all the figures formula vs time
    
    folder = os.path.join(OUTPUT_FOLDER, 'time-plots')
    create_folder(folder)
    
    for model in models:
        try:
            fig = create_figure(df, model)
            
            # save figures in html and pdf
            fig.write_html(os.path.join(folder, model + '.html'))
            fig.write_image(os.path.join(folder, model + '.pdf'))
        except KeyError:
            print("Error: {} was not plotted".format(model))
    
    
    
    # In[ ]:
    
    
    
    # create all the log figures
    
    
    tools_pairs = [sorted(t) for t in (combinations(tools, 2))]
    
    
    for plot, filter_method in plots.items():
        axes = list(filter(filter_method, tools_pairs))
        
        for model in models:
            folder = os.path.join(OUTPUT_FOLDER, plot, model)
            create_folder(folder)
            
            for axe in axes:
                try:
                    show_strategy = plot == 'compare_couvreur_algorithm'
                    fig = create_log_figure(table_time, table_error, model, axe[0], axe[1], show_strategy)
                    
                    # save figures in html and pdf
                    figure_name = os.path.join(folder, '{}-{}-VS-{}-log'.format(model, axe[0], axe[1]))
                    fig.write_html(figure_name + '.html')
                    fig.write_image(figure_name + '.pdf')
                except KeyError:
                    print("Error: {} was not plotted".format(model))
    
    
    # In[ ]: