gstAutotest/latencyAnalysis.py

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import argparse
import logging

# Configure logging to show informational messages
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def parse_args():
    parser = argparse.ArgumentParser(prog=__file__)
    parser.add_argument('-c', '--compensate', action="store_true")
    parser.add_argument('--latency-csv',
                        type=str,
                        default='sample/latencyDataframenvh264enc.csv',
                        help='Path to the latency results CSV file.')
    parser.add_argument('-pd','--plot-dir',
                        type=str,
                        default='plots/',
                        help='Path to directory in which resulted plots should be saved')
    parser.add_argument('-csv', '--csv-dir',
                        type=str,
                        default='results/',
                        help='Path to directory in which resulted csv data should be saved')
    return parser.parse_args()

cmd_args = None
def get_args():
    global cmd_args
    if cmd_args is None:
        cmd_args = parse_args()
    return cmd_args

def plot_latency_data(df):
    def create_labels(df):
        """Combines MultiIndex levels (L0-L3) into a single string for notes."""
        labels = {}
        for i, index in enumerate(df.index):
            # Format: L#:value | L#:value | ...
            label_parts = [f"L{j}:{val}" for j, val in enumerate(index)]
            labels[i + 1] = " | ".join(label_parts)
        return labels

    df = df.head(10)
    encoder_name = df.index.get_level_values(0)[0]
    max_notes = create_labels(df)

    bar_width = 0.25
    num_configs = len(df)
    r1 = np.arange(num_configs)
    r2 = [x + bar_width for x in r1]
    r3 = [x + bar_width for x in r2]
    fig = plt.figure(figsize=(10, 6), dpi=300)
    # Create the bars
    plt.bar(r1, df['max'], color='red', width=bar_width, edgecolor='grey', label='Max Latency')
    plt.bar(r2, df['avg'], color='blue', width=bar_width, edgecolor='grey', label='Avg Latency')
    plt.bar(r3, df['median'], color='green', width=bar_width, edgecolor='grey', label='Median Latency')

    # Add labels and ticks
    plt.xlabel('Индекс конфигурации', fontweight='bold')
    plt.ylabel('Общая задержка [мс]', fontweight='bold')
    plt.xticks([r + bar_width for r in range(num_configs)], [str(i + 1) for i in range(num_configs)])
    plt.title(f'Сравнение производительности {num_configs} лучших конфигураций по задержке для {encoder_name}')
    plt.legend()
    plt.grid(axis='y', linestyle='--', alpha=0.6)

    plt.tight_layout()
    plt.savefig(get_args().plot_dir + f'combined_top_configurations_plot_{encoder_name}.png')
    plt.close()

    # Output Notes (for user interpretation)
    print("\n--- Notes for Plot (X-Axis Index to Configuration) ---")
    for index, note in max_notes.items():
        print(f"Index {index}: {note}")

def plot_start_latency(df):
    fig = plt.figure(figsize=(10, 6), dpi=300)
    r1 = np.arange(len(df))
    plt.plot(r1, df['max'])
    plt.xlabel('Индекс конфигурации', fontweight='bold')
    plt.ylabel('Общая задержка [мс]', fontweight='bold')
    encoder_name = df.index.get_level_values(0)[0]
    plt.title(f"Результаты стартовой задержки для {encoder_name}")
    plt.tight_layout()
    plt.savefig(get_args().plot_dir + f"start_latency_{encoder_name}.png")
    plt.close()

def analyze_latency_data(csv_path: str):
    """
    Analyzes latency data to find the top 10 components (rows) contributing most
    to latency, and plots histograms of their summed avg, median, and max latencies.

    Args:
        csv_path (str): The path to the input CSV file.
    """
    # --- 1. Load Data with Multi-level Headers ---
    try:
        df = pd.read_csv(csv_path, header=[0,1, 2, 3, 4], index_col=0)
        logging.info(f"Successfully loaded '{csv_path}' with multi-level headers. Shape: {df.shape}")
        if df.index.name == 'Unnamed: 0':
            df.index.name = 'component'
    except FileNotFoundError:
        logging.error(f"Error: The file '{csv_path}' was not found.")
        return
    except Exception as e:
        logging.error(f"An error occurred while reading the CSV file: {e}")
        return

    #calculate summary along the rows
    sumDf = df.sum()
    if get_args().compensate == True:
        logging.info("Filesrc latency compensation is ON")
        filesrcData = df.loc["filesrc0"]
        sumDf -= filesrcData
    print(sumDf.head())
    # return

    df_summary = sumDf.unstack(level=-1) # or level='Metric' if names are set

    # 2. Sort the resulting DataFrame by the desired metric column.
    df_sorted_by_max = df_summary.sort_values(by='max', ascending=True)
    df_sorted_by_avg = df_summary.sort_values(by='avg', ascending=True)
    df_sorted_by_median = df_summary.sort_values(by='median', ascending=True)

    print("SORTED BY MAX")
    print(df_sorted_by_max)
    print("---------------")
    print("SORTED BY AVERAGE")
    print(df_sorted_by_avg)
    print("---------------")
    print("SORTED BY MEDIAN")
    print(df_sorted_by_median)

    # 1. Get the indices (configurations) for each top 10 list
    max_indices = df_sorted_by_max.index
    avg_indices = df_sorted_by_avg.index
    median_indices = df_sorted_by_median.index

    # 2. Find the intersection (common elements) of the three sets of indices
    # max is main index because it is commonly introduces the largest amount of latency to the stream
    common_indices = max_indices.intersection(avg_indices).intersection(median_indices)

    # 3. Filter the original summary DataFrame (df_summary) using the common indices
    df_common_top_performers = df_summary.loc[common_indices]
    encoder_name = df_common_top_performers.index.get_level_values(0)[0]

    print(df_common_top_performers.head())

    plot_latency_data(df_common_top_performers)

    plot_start_latency(df_common_top_performers)

    # 4. Save top performers to csv
    top_10_df = df_common_top_performers.head(10)
    top_10_df.to_csv(get_args().csv_dir + f"{encoder_name}.csv")
    return

if __name__ == '__main__':
    os.makedirs(get_args().csv_dir, exist_ok=True)
    os.makedirs(get_args().plot_dir, exist_ok=True)
    analyze_latency_data(get_args().latency_csv)