[analysis] mean values for non-unique config runs

2025-10-12 23:37:07 +03:00 · 2025-10-12 23:37:07 +03:00 · 628f0439b7
commit 628f0439b7
parent 900aca9bd5
3 changed files with 172 additions and 72 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,7 @@
 #project ignore:
 plots/
 results/
 # ---> Python
 # Byte-compiled / optimized / DLL files
 __pycache__/
--- a/latencyAnalysis.py
+++ b/latencyAnalysis.py
@ -2,11 +2,14 @@ import pandas as pd
 import matplotlib.pyplot as plt
 import numpy as np
 import os
 import re
 import argparse
 import logging
 # Configure logging to show informational messages
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s - %(levelname)s - %(message)s')
 def parse_args():
    parser = argparse.ArgumentParser(prog=__file__)
@ -25,13 +28,17 @@ def parse_args():
                        help='Path to directory in which resulted csv data should be saved')
    return parser.parse_args()
 cmd_args = None
 def get_args():
    global cmd_args
    if cmd_args is None:
        cmd_args = parse_args()
    return cmd_args
 def plot_latency_data(df):
    def create_labels(df):
        """Combines MultiIndex levels (L0-L3) into a single string for notes."""
@ -53,20 +60,26 @@ def plot_latency_data(df):
    r3 = [x + bar_width for x in r2]
    fig = plt.figure(figsize=(10, 6), dpi=300)
    # Create the bars
-    plt.bar(r1, df['max'], color='red', width=bar_width, edgecolor='grey', label='Max Latency')
+    plt.bar(r1, df['max'], color='red', width=bar_width,
-    plt.bar(r2, df['avg'], color='blue', width=bar_width, edgecolor='grey', label='Avg Latency')
+            edgecolor='grey', label='Max Latency')
-    plt.bar(r3, df['median'], color='green', width=bar_width, edgecolor='grey', label='Median Latency')
+    plt.bar(r2, df['avg'], color='blue', width=bar_width,
            edgecolor='grey', label='Avg Latency')
    plt.bar(r3, df['median'], color='green', width=bar_width,
            edgecolor='grey', label='Median Latency')
    # Add labels and ticks
    plt.xlabel('Индекс конфигурации', fontweight='bold')
    plt.ylabel('Общая задержка [мс]', fontweight='bold')
-    plt.xticks([r + bar_width for r in range(num_configs)], [str(i + 1) for i in range(num_configs)])
+    plt.xticks([r + bar_width for r in range(num_configs)],
-    plt.title(f'Сравнение производительности {num_configs} лучших конфигураций по задержке для {encoder_name}')
+               [str(i + 1) for i in range(num_configs)])
    plt.title(
        f'Сравнение производительности {num_configs} лучших конфигураций по задержке для {encoder_name}')
    plt.legend()
    plt.grid(axis='y', linestyle='--', alpha=0.6)
    plt.tight_layout()
-    plt.savefig(get_args().plot_dir + f'combined_top_configurations_plot_{encoder_name}.png')
+    plt.savefig(get_args().plot_dir +
                f'combined_top_configurations_plot_{encoder_name}.png')
    plt.close()
    # Output Notes (for user interpretation)
@ -74,6 +87,7 @@ def plot_latency_data(df):
    for index, note in max_notes.items():
        print(f"Index {index}: {note}")
 def plot_start_latency(df):
    fig = plt.figure(figsize=(10, 6), dpi=300)
    r1 = np.arange(len(df))
@ -86,6 +100,7 @@ def plot_start_latency(df):
    plt.savefig(get_args().plot_dir + f"start_latency_{encoder_name}.png")
    plt.close()
 def analyze_latency_data(csv_path: str):
    """
    Analyzes latency data to find the top 10 components (rows) contributing most
@ -97,7 +112,8 @@ def analyze_latency_data(csv_path: str):
    # --- 1. Load Data with Multi-level Headers ---
    try:
        df = pd.read_csv(csv_path, header=[0, 1, 2, 3, 4], index_col=0)
-        logging.info(f"Successfully loaded '{csv_path}' with multi-level headers. Shape: {df.shape}")
+        logging.info(
            f"Successfully loaded '{csv_path}' with multi-level headers. Shape: {df.shape}")
        if df.index.name == 'Unnamed: 0':
            df.index.name = 'component'
    except FileNotFoundError:
@ -110,18 +126,46 @@ def analyze_latency_data(csv_path: str):
    # calculate summary along the rows
    sumDf = df.sum()
    if get_args().compensate == True:
-        logging.info("Filesrc latency compensation is ON")
+        logging.info("Filesrc and rawvideoparse latency compensation is ON")
        filesrcData = df.loc["filesrc0"]
        rawvideoparseData = df.loc["rawvideoparse0"]
        sumDf -= filesrcData
-    print(sumDf.head())
+        sumDf -= rawvideoparseData
-    # return
+    logging.debug(f"\n{sumDf.head()}")
    # calculate mean accross non-unique runs:
    def get_base_metric(metric):
        """Strips suffixes like '.1' or '.2' from the metric name."""
        return re.sub(r'\.\d+$', '', str(metric))
    metric_level_values = sumDf.index.get_level_values(-1)
    base_metrics_key = metric_level_values.map(get_base_metric)
    config_levels = list(range(sumDf.index.nlevels - 1)
                         )  # This gives [0, 1, 2, 3]
    grouping_keys = sumDf.index.droplevel(config_levels)  # type: ignore
    grouping_keys = [
        sumDf.index.get_level_values(i) for i in config_levels
    ] + [base_metrics_key]
    # 3. Perform Grouping and Mean Calculation
    # This command groups all entries that share the same (Config + Base Metric),
    # collapsing (avg, avg.1, avg.2) into a single average.
    averaged_sumDf = sumDf.groupby(grouping_keys).mean()
    logging.info(f"\n{averaged_sumDf.head(10)}")
    sumDf = averaged_sumDf
    df_summary = sumDf.unstack(level=-1)  # or level='Metric' if names are set
    # 2. Sort the resulting DataFrame by the desired metric column.
-    df_sorted_by_max = df_summary.sort_values(by='max', ascending=True)
+    df_sorted_by_max = df_summary.sort_values(
-    df_sorted_by_avg = df_summary.sort_values(by='avg', ascending=True)
+        by='max', ascending=True)  # type: ignore
-    df_sorted_by_median = df_summary.sort_values(by='median', ascending=True)
+    df_sorted_by_avg = df_summary.sort_values(
        by='avg', ascending=True)  # type: ignore
    df_sorted_by_median = df_summary.sort_values(
        by='median', ascending=True)  # type: ignore
    print("SORTED BY MAX")
    print(df_sorted_by_max)
@ -139,7 +183,8 @@ def analyze_latency_data(csv_path: str):
    # 2. Find the intersection (common elements) of the three sets of indices
    # max is main index because it is commonly introduces the largest amount of latency to the stream
-    common_indices = max_indices.intersection(avg_indices).intersection(median_indices)
+    common_indices = max_indices.intersection(
        avg_indices).intersection(median_indices)
    # 3. Filter the original summary DataFrame (df_summary) using the common indices
    df_common_top_performers = df_summary.loc[common_indices]
@ -156,8 +201,8 @@ def analyze_latency_data(csv_path: str):
    top_10_df.to_csv(get_args().csv_dir + f"{encoder_name}.csv")
    return
 if __name__ == '__main__':
    os.makedirs(get_args().csv_dir, exist_ok=True)
    os.makedirs(get_args().plot_dir, exist_ok=True)
    analyze_latency_data(get_args().latency_csv)
--- a/qualityAnalysis.py
+++ b/qualityAnalysis.py
@ -4,9 +4,12 @@ import numpy as np
 import logging
 import argparse
 import os
 import re
 # Configure logging to show informational messages
-logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
+logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s - %(levelname)s - %(message)s')
 def parse_args():
    parser = argparse.ArgumentParser(prog=__file__)
@ -26,13 +29,17 @@ def parse_args():
                        help='Path to directory in which resulted csv data should be saved')
    return parser.parse_args()
 cmd_args = None
 def get_args():
    global cmd_args
    if cmd_args is None:
        cmd_args = parse_args()
    return cmd_args
 def plot_top_configurations(df: pd.DataFrame, file_name: str, title: str):
    """
    Draws a bar plot comparing PSNR and SSIM for the top 10 video configurations.
@ -118,12 +125,13 @@ def plot_top_configurations(df: pd.DataFrame, file_name: str, title: str):
    # 7. Final Plot appearance
    fig.suptitle(title)
-    fig.tight_layout(rect=[0, 0.03, 1, 0.95]) 
+    fig.tight_layout(rect={0.0, 0.03, 1.0, 0.95})  # type: ignore
    # Combine legends from both axes
    lines1, labels1 = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
-    ax1.legend(lines1 + lines2, labels1 + labels2, bbox_to_anchor=(0.6, 1.1), ncol=2)
+    ax1.legend(lines1 + lines2, labels1 + labels2,
               bbox_to_anchor=(0.6, 1.1), ncol=2)
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.savefig(f'{file_name}.png')
@ -133,11 +141,13 @@ def plot_top_configurations(df: pd.DataFrame, file_name: str, title: str):
    for index, note in quality_notes.items():
        print(f"Index {index}: {note}")
 def analyze_quality_report(csv_path: str):
    # --- 1. Load Data with Multi-level Headers ---
    try:
        df = pd.read_csv(csv_path, header=[0, 1, 2, 3, 4], index_col=0)
-        logging.info(f"Successfully loaded '{csv_path}' with multi-level headers. Shape: {df.shape}")
+        logging.info(
            f"Successfully loaded '{csv_path}' with multi-level headers. Shape: {df.shape}")
        if df.index.name == 'Unnamed: 0':
            df.index.name = 'component'
    except FileNotFoundError:
@ -149,13 +159,42 @@ def analyze_quality_report(csv_path: str):
    # Get row with average results
    avgDf = df.loc["Average"]
-    avgDf = avgDf.unstack(level=-1)
+    logging.info(f"\n{avgDf.head(10)}")
    # calculate mean accross non-unique runs:
    def get_base_metric(metric):
        """Strips suffixes like '.1' or '.2' from the metric name."""
        return re.sub(r'\.\d+$', '', str(metric))
    metric_level_values = avgDf.index.get_level_values(-1)
    base_metrics_key = metric_level_values.map(get_base_metric)
    config_levels = list(range(avgDf.index.nlevels - 1)
                         )  # This gives [0, 1, 2, 3]
    grouping_keys = avgDf.index.droplevel(config_levels)  # type: ignore
    grouping_keys = [
        avgDf.index.get_level_values(i) for i in config_levels
    ] + [base_metrics_key]
    # 3. Perform Grouping and Mean Calculation
    # This command groups all entries that share the same (Config + Base Metric),
    # collapsing (avg, avg.1, avg.2) into a single average.
    averaged_sumDf = avgDf.groupby(grouping_keys).mean()
    logging.info(f"\n{averaged_sumDf.head(10)}")
    avgDf = averaged_sumDf
    logging.info(f"\n{avgDf.head(10)}")
    avgDf = avgDf.unstack(level=-1)
    encoder_name = avgDf.index.get_level_values(0)[0]
    logging.debug(f"encoder_name={encoder_name}")
-    dfPSNRsorted = avgDf.sort_values(by="PSNR", ascending=False)
+    dfPSNRsorted = avgDf.sort_values(
-    dfSSIMsorted = avgDf.sort_values(by="SSIM", ascending=False)
+        by="PSNR", ascending=False)  # type: ignore
    dfSSIMsorted = avgDf.sort_values(
        by="SSIM", ascending=False)  # type: ignore
    indexPSNR = dfPSNRsorted.index
    indexSSIM = dfSSIMsorted.index
@ -168,13 +207,15 @@ def analyze_quality_report(csv_path: str):
    # Convert the MultiIndex (encoder, profile, video, parameters) into columns
    df_quality_results = intersectedDf.reset_index()
    # Rename the columns to match the latency report's structure
-    df_quality_results.columns = ['encoder', 'profile', 'video', 'parameters', 'PSNR', 'SSIM']
+    df_quality_results.columns = [
-    logging.debug(f"Prepared quality results dataframe columns: {df_quality_results.columns.tolist()}")
+        'encoder', 'profile', 'video', 'parameters', 'PSNR', 'SSIM']
-
+    logging.debug(
        f"Prepared quality results dataframe columns: {df_quality_results.columns.tolist()}")
    # Now intersected with latency report
    latency_df = pd.read_csv(f'results/{encoder_name}.csv')
-    columns = {'Unnamed: 0': 'encoder', 'Unnamed: 1': 'profile', 'Unnamed: 2': 'video', 'Unnamed: 3': 'parameters'}
+    columns = {'Unnamed: 0': 'encoder', 'Unnamed: 1': 'profile',
               'Unnamed: 2': 'video', 'Unnamed: 3': 'parameters'}
    latency_df.rename(columns=columns, inplace=True)
    logging.debug(f"\n{latency_df.head()}")
@ -185,12 +226,14 @@ def analyze_quality_report(csv_path: str):
        df_quality_results,
        latency_df,
        on=merge_keys,
-        how='inner' # Only keep records present in both (i.e., the top quality configurations)
+        # Only keep records present in both (i.e., the top quality configurations)
        how='inner'
    )
    logging.info("=" * 70)
    logging.info("--- Intersected Quality (PSNR/SSIM) and Latency Report ---")
-    logging.info(f"Number of common configuration entries found: {len(merged_df)}")
+    logging.info(
        f"Number of common configuration entries found: {len(merged_df)}")
    logging.info("=" * 70)
    # Prepare for display
@ -200,21 +243,29 @@ def analyze_quality_report(csv_path: str):
    display_columns = [
        'encoder', 'profile', 'video', 'parameters',
        'PSNR', 'SSIM',  # Quality metrics
-        'avg', 'max', 'median', 'std' # Latency metrics (assuming these are in the latency report)
+        # Latency metrics (assuming these are in the latency report)
        'avg', 'max', 'median', 'std'
    ]
-    final_cols = [col for col in display_columns if col in merged_df_display.columns]
+    final_cols = [
        col for col in display_columns if col in merged_df_display.columns]
    print(f"\n{merged_df_display[final_cols].to_string()}")
-    plot_top_configurations(merged_df_display, get_args().plot_dir +  f"top_quality_configurations_by_latency_{encoder_name}", f"Результаты качества для 10 лучших конфигураций по задержкам для {encoder_name}")
+    plot_top_configurations(merged_df_display,
                            get_args().plot_dir +
                            f"top_quality_configurations_by_latency_{encoder_name}",
                            f"Результаты качества для 10 лучших конфигураций по задержкам для {encoder_name}")
-    plot_top_configurations(df_quality_results, get_args().plot_dir + f"top_quality_configurations_{encoder_name}", f"10 лучших конфигураций по PSNR и SSIM для {encoder_name}")
+    plot_top_configurations(df_quality_results,
                            get_args().plot_dir +
                            f"top_quality_configurations_{encoder_name}",
                            f"10 лучших конфигураций по PSNR и SSIM для {encoder_name}")
    return
 if __name__ == '__main__':
    os.makedirs(get_args().csv_dir, exist_ok=True)
    os.makedirs(get_args().plot_dir, exist_ok=True)
    analyze_quality_report(get_args().quality_csv)