gstAutotest/qualityAnalysis.py

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import logging
import argparse
import os
import re

# Configure logging to show informational messages
logging.basicConfig(level=logging.INFO,
                    format='%(asctime)s - %(levelname)s - %(message)s')


def parse_args():
    parser = argparse.ArgumentParser(prog=__file__)
    parser.add_argument(
        '--quality-csv',
        type=str,
        default='sample/qualityResultsnvh264enc.csv',
        help='Path to the quality results CSV file.'
    )
    parser.add_argument('-pd', '--plot-dir',
                        type=str,
                        default='plots/',
                        help='Path to directory in which resulted plots should be saved')
    parser.add_argument('-csv', '--csv-dir',
                        type=str,
                        default='results/',
                        help='Path to directory in which resulted csv data should be saved')
    return parser.parse_args()


cmd_args = None


def get_args():
    global cmd_args
    if cmd_args is None:
        cmd_args = parse_args()
    return cmd_args


def plot_top_configurations(df: pd.DataFrame, file_name: str, title: str):
    """
    Draws a bar plot comparing PSNR and SSIM for the top 10 video configurations.

    The plot uses a primary Y-axis for PSNR and a secondary Y-axis for SSIM
    due to their different value ranges. The X-axis uses simple numerical indices,
    with detailed configuration notes printed separately below the plot.

    Args:
        df: DataFrame containing the top configurations, must have 'PSNR' and 'SSIM' columns.
        file_name: Name of the file to which plot would be saved.
        title: Title of the plot
    """
    # Use the top 10 rows for plotting
    plot_df = df.head(10).copy()

    if plot_df.empty:
        logging.warning("DataFrame is empty, cannot generate plot.")
        return

    # Create the index for the x-axis (0 to 9 for bar plotting)
    config_indices = np.arange(len(plot_df))

    # 1. Create simple numerical labels for the X-axis (1 to 10)
    x_labels_simple = [str(i + 1) for i in config_indices]

    # 2. Generate notes mapping index to configuration details (similar to the template)
    quality_notes = {}
    for i, row in plot_df.iterrows():
        # Format: Index: encoder | profile | video | parameters
        note_parts = [
            row['encoder'],
            row['profile'],
            row['video'],
            row['parameters']
        ]
        quality_notes[len(quality_notes) + 1] = " | ".join(note_parts)

    # 3. Setup the figure and the primary axis (ax1)
    fig, ax1 = plt.subplots(figsize=(10, 6), dpi=300)

    # Define bar width and positions
    bar_width = 0.35

    # 4. Plot PSNR on the primary axis (left)
    bar1 = ax1.bar(config_indices - bar_width/2, plot_df['PSNR'], bar_width,
                   label='PSNR (dB)', color='Blue', edgecolor='grey')
    ax1.set_xlabel('Configuration Index', fontsize=12)  # Simplified X-label
    ax1.set_ylabel('PSNR (dB)', color='Black', fontsize=12)
    ax1.tick_params(axis='y', labelcolor='Black')
    ax1.set_xticks(config_indices)
    # Use simple numerical labels for the X-axis
    ax1.set_xticklabels(x_labels_simple, fontsize=10)

    # Add PSNR value labels above the bars
    for rect in bar1:
        height = rect.get_height()
        ax1.annotate(f'PSNR={height:.2f}',
                     xy=(rect.get_x() + rect.get_width() / 2, height / 1.5),
                     xytext=(0, 0),  # 3 points vertical offset
                     textcoords="offset points", transform_rotates_text=True,
                     rotation=90,
                     ha='center', va='bottom', fontsize=10, color='White')

    # 5. Create a secondary axis (ax2) for SSIM (twinx)
    ax2 = ax1.twinx()

    # 6. Plot SSIM on the secondary axis (right)
    bar2 = ax2.bar(config_indices + bar_width/2, plot_df['SSIM'], bar_width,
                   label='SSIM', color='Red', edgecolor='grey')
    ax2.set_ylabel('SSIM (Structural Similarity)', color='Black', fontsize=12)
    ax2.tick_params(axis='y', labelcolor='Black')

    # Add SSIM value labels above the bars
    for rect in bar2:
        height = rect.get_height()
        ax2.annotate(f'SSIM={height:.4f}',
                     xy=(rect.get_x() + rect.get_width() / 2, height / 1.5),
                     xytext=(0, 0),  # 3 points vertical offset
                     textcoords="offset points", transform_rotates_text=True,
                     rotation=90,
                     ha='center', va='bottom', fontsize=10, color='White')

    # 7. Final Plot appearance
    fig.suptitle(title)
    rect = tuple([0.0, 0.0, 1.0, 0.95])
    fig.tight_layout(rect=rect)  # type: ignore

    # Combine legends from both axes
    lines1, labels1 = ax1.get_legend_handles_labels()
    lines2, labels2 = ax2.get_legend_handles_labels()
    ax1.legend(lines1 + lines2, labels1 + labels2,
               bbox_to_anchor=(0.6, 1.1), ncol=2)

    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.savefig(f'{file_name}.png')

    # 8. Output Notes (for user interpretation)
    print("\n--- Notes for Plot (X-Axis Index to Configuration) ---")
    for index, note in quality_notes.items():
        print(f"Index {index}: {note}")


def analyze_quality_report(csv_path: str):
    # --- 1. Load Data with Multi-level Headers ---
    try:
        df = pd.read_csv(csv_path, header=[0, 1, 2, 3, 4], index_col=0)
        logging.info(
            f"Successfully loaded '{csv_path}' with multi-level headers. Shape: {df.shape}")
        if df.index.name == 'Unnamed: 0':
            df.index.name = 'component'
    except FileNotFoundError:
        logging.error(f"Error: The file '{csv_path}' was not found.")
        return
    except Exception as e:
        logging.error(f"An error occurred while reading the CSV file: {e}")
        return

    # Get row with average results
    avgDf = df.loc["Average"]
    logging.info(f"\n{avgDf.head(10)}")
    # calculate mean accross non-unique runs:

    def get_base_metric(metric):
        """Strips suffixes like '.1' or '.2' from the metric name."""
        return re.sub(r'\.\d+$', '', str(metric))

    metric_level_values = avgDf.index.get_level_values(-1)

    base_metrics_key = metric_level_values.map(get_base_metric)

    config_levels = list(range(avgDf.index.nlevels - 1)
                         )  # This gives [0, 1, 2, 3]

    grouping_keys = avgDf.index.droplevel(config_levels)  # type: ignore
    grouping_keys = [
        avgDf.index.get_level_values(i) for i in config_levels
    ] + [base_metrics_key]

    # 3. Perform Grouping and Mean Calculation
    # This command groups all entries that share the same (Config + Base Metric),
    # collapsing (avg, avg.1, avg.2) into a single average.
    averaged_sumDf = avgDf.groupby(grouping_keys).mean()
    logging.info(f"\n{averaged_sumDf.head(10)}")
    avgDf = averaged_sumDf

    logging.info(f"\n{avgDf.head(10)}")

    avgDf = avgDf.unstack(level=-1)
    encoder_name = avgDf.index.get_level_values(0)[0]
    logging.debug(f"encoder_name={encoder_name}")

    dfPSNRsorted = avgDf.sort_values(
        by="PSNR", ascending=False)  # type: ignore
    dfSSIMsorted = avgDf.sort_values(
        by="SSIM", ascending=False)  # type: ignore

    indexPSNR = dfPSNRsorted.index
    indexSSIM = dfSSIMsorted.index
    commonIndex = indexPSNR.intersection(indexSSIM)

    intersectedDf = avgDf.loc[commonIndex]
    logging.debug(intersectedDf.head(10))

    # --- 2. Prepare Intersected Quality Data for Merge ---
    # Convert the MultiIndex (encoder, profile, video, parameters) into columns
    df_quality_results = intersectedDf.reset_index()
    # Rename the columns to match the latency report's structure
    df_quality_results.columns = [
        'encoder', 'profile', 'video', 'parameters', 'PSNR', 'SSIM']
    logging.debug(
        f"Prepared quality results dataframe columns: {df_quality_results.columns.tolist()}")

    # Now intersected with latency report
    latency_df = pd.read_csv(f'results/{encoder_name}.csv')

    logging.info(latency_df.head())
    columns = {'Unnamed: 0': 'encoder', 'Unnamed: 1': 'profile',
               'Unnamed: 2': 'video', 'Unnamed: 3': 'parameters'}
    latency_df.rename(columns=columns, inplace=True)  # type: ignore
    logging.debug(f"\n{latency_df.head()}")

    # --- 4. Merge Quality and Latency Reports ---
    # Use an inner merge on the four identifier columns to combine the data.
    merge_keys = ['encoder', 'profile', 'video', 'parameters']
    merged_df = pd.merge(
        df_quality_results,
        latency_df,
        on=merge_keys,
        # Only keep records present in both (i.e., the top quality configurations)
        how='inner'
    )

    logging.info("=" * 70)
    logging.info("--- Intersected Quality (PSNR/SSIM) and Latency Report ---")
    logging.info(
        f"Number of common configuration entries found: {len(merged_df)}")
    logging.info("=" * 70)

    # Prepare for display
    merged_df_display = merged_df.sort_values(by='PSNR', ascending=False)

    # Select and display key metrics
    display_columns = [
        'encoder', 'profile', 'video', 'parameters',
        'PSNR', 'SSIM',  # Quality metrics
        # Latency metrics (assuming these are in the latency report)
        'avg', 'max', 'median', 'std'
    ]

    final_cols = [
        col for col in display_columns if col in merged_df_display.columns]

    print(f"\n{merged_df_display[final_cols].to_string()}")

    plot_top_configurations(merged_df_display,
                            get_args().plot_dir +
                            f"top_quality_configurations_by_latency_{encoder_name}",
                            f"Результаты качества для 10 лучших конфигураций по задержкам для {encoder_name}")

    plot_top_configurations(df_quality_results,
                            get_args().plot_dir +
                            f"top_quality_configurations_{encoder_name}",
                            f"10 лучших конфигураций по PSNR и SSIM для {encoder_name}")

    return


if __name__ == '__main__':
    os.makedirs(get_args().csv_dir, exist_ok=True)
    os.makedirs(get_args().plot_dir, exist_ok=True)
    analyze_quality_report(get_args().quality_csv)