import pandas as pd import matplotlib.pyplot as plt import numpy as np import os import re import argparse import logging # Configure logging to show informational messages logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') def parse_args(): parser = argparse.ArgumentParser(prog=__file__) parser.add_argument('-c', '--compensate', action="store_true") parser.add_argument('--latency-csv', type=str, default='sample/latencyDataframenvh264enc.csv', help='Path to the latency results CSV file.') parser.add_argument('-pd', '--plot-dir', type=str, default='plots/', help='Path to directory in which resulted plots should be saved') parser.add_argument('-csv', '--csv-dir', type=str, default='results/', help='Path to directory in which resulted csv data should be saved') return parser.parse_args() cmd_args = None def get_args(): global cmd_args if cmd_args is None: cmd_args = parse_args() return cmd_args def plot_latency_data(df): def create_labels(df_slice): labels = {} for i, index in enumerate(df_slice.index): label_parts = [f"{df.index.names[j] or f'L{j}'}: {val}" for j, val in enumerate(index)] labels[i + 1] = " | ".join(label_parts) return labels mean_max_key = ('mean', 'max') mean_avg_key = ('mean', 'avg') mean_median_key = ('mean', 'median') min_max_key = ('left', 'max') max_max_key = ('right', 'max') min_avg_key = ('left', 'avg') max_avg_key = ('right', 'avg') min_median_key = ('left', 'median') max_median_key = ('right', 'median') df_top_n = df.head(10).copy() mean_max_values = df_top_n[mean_max_key] yerr_lower_max = mean_max_values - df_top_n[min_max_key] yerr_upper_max = df_top_n[max_max_key] - mean_max_values yerr_max_orig = np.array([yerr_lower_max.values, yerr_upper_max.values]) mean_avg_values = df_top_n[mean_avg_key] yerr_lower_avg = mean_avg_values - df_top_n[min_avg_key] yerr_upper_avg = df_top_n[max_avg_key] - mean_avg_values yerr_avg = np.array([yerr_lower_avg.values, yerr_upper_avg.values]) mean_median_values = df_top_n[mean_median_key] yerr_lower_median = mean_median_values - df_top_n[min_median_key] yerr_upper_median = df_top_n[max_median_key] - mean_median_values yerr_median = np.array( [yerr_lower_median.values, yerr_upper_median.values]) encoder_name = df_top_n.index.get_level_values(0)[0] max_notes = create_labels(df_top_n) bar_width = 0.25 num_configs = len(df_top_n) r1 = np.arange(num_configs) r_max_orig = r1 r_avg = [x + bar_width for x in r1] r_median = [x + bar_width for x in r_avg] fig = plt.figure(figsize=(12, 7), dpi=300) plt.bar( r_max_orig, df_top_n[mean_max_key], yerr=yerr_max_orig, capsize=5, color='red', width=bar_width, edgecolor='grey', label='Максимальная задержка' ) plt.bar( r_avg, df_top_n[mean_avg_key], yerr=yerr_avg, capsize=5, color='blue', width=bar_width, edgecolor='grey', label='Средняя задержка' ) plt.bar( r_median, df_top_n[mean_median_key], yerr=yerr_median, capsize=5, color='green', width=bar_width, edgecolor='grey', label='Медианная задержка' ) plt.xlabel('Индекс конфигурации', fontweight='bold') plt.ylabel('Общая задержка [мс]', fontweight='bold') center_pos = [r + bar_width for r in r1] plt.xticks(center_pos, [str(i + 1) for i in range(num_configs)]) plt.title( f'Сравнение производительности {num_configs} лучших конфигураций по задержке для {encoder_name}') plt.legend() plt.grid(axis='y', linestyle='--', alpha=0.6) plt.tight_layout() plt.savefig(get_args().plot_dir + f'combined_top_configurations_with_errors_{encoder_name}.png') plt.close() print("\n--- Notes for Plot (X-Axis Index to Configuration) ---") for index, note in max_notes.items(): print(f"Index {index}: {note}") def plot_start_latency(df): fig = plt.figure(figsize=(10, 6), dpi=300) r1 = np.arange(len(df)) mean_col = ('mean', 'max') min_col = ('left', 'max') max_col = ('right', 'max') mean_values = df[mean_col] min_values = df[min_col] max_values = df[max_col] lower_error = mean_values - min_values upper_error = max_values - mean_values y_error = [lower_error.values, upper_error.values] plt.errorbar(r1, mean_values, yerr=y_error, fmt='.-', color='darkblue', ecolor='red', capsize=3, linewidth=1 ) plt.xlabel('Индекс конфигурации', fontweight='bold') plt.ylabel('Общая задержка [мс]', fontweight='bold') encoder_name = df.index.get_level_values(0)[0] plt.title(f"Результаты стартовой задержки для {encoder_name}") plt.tight_layout() plt.savefig(get_args().plot_dir + f"start_latency_{encoder_name}.png") plt.close() def analyze_latency_data(csv_path: str): """ Analyzes latency data to find the top 10 components (rows) contributing most to latency, and plots histograms of their summed avg, median, and max latencies. Args: csv_path (str): The path to the input CSV file. """ try: df = pd.read_csv(csv_path, header=[0, 1, 2, 3, 4], index_col=0) logging.info( f"Successfully loaded '{csv_path}' with multi-level headers. Shape: {df.shape}") if df.index.name == 'Unnamed: 0': df.index.name = 'component' except FileNotFoundError: logging.error(f"Error: The file '{csv_path}' was not found.") return except Exception as e: logging.error(f"An error occurred while reading the CSV file: {e}") return sumDf = df.sum() if get_args().compensate == True: logging.info("Filesrc and rawvideoparse latency compensation is ON") filesrcData = df.loc["filesrc0"] rawvideoparseData = df.loc["rawvideoparse0"] sumDf -= filesrcData sumDf -= rawvideoparseData logging.debug(f"\n{sumDf.head()}") def get_base_metric(metric): """Strips suffixes like '.1' or '.2' from the metric name.""" return re.sub(r'\.\d+$', '', str(metric)) metric_level_values = sumDf.index.get_level_values(-1) base_metrics_key = metric_level_values.map(get_base_metric) config_levels = list(range(sumDf.index.nlevels - 1)) grouping_keys = sumDf.index.droplevel(config_levels) # type: ignore grouping_keys = [ sumDf.index.get_level_values(i) for i in config_levels ] + [base_metrics_key] # 3. Perform Grouping and Mean Calculation # This command groups all entries that share the same (Config + Base Metric), # collapsing (avg, avg.1, avg.2) into a single average. sumDfgrouping = sumDf.groupby(grouping_keys) averaged_sumDf = sumDfgrouping.mean() max_sumDf = sumDfgrouping.max() min_sumDf = sumDfgrouping.min() logging.debug(f"\n{max_sumDf.head(10)}") logging.debug(f"\n{min_sumDf.head(10)}") logging.info(f"\n{averaged_sumDf.head(10)}") merged_sumDf = pd.concat( [min_sumDf, averaged_sumDf, max_sumDf], axis=1, keys=['left', 'mean', 'right'] ) sumDf = merged_sumDf df_summary = sumDf.unstack(level=-1) df_sorted_by_max = df_summary.sort_values( by=('mean', 'max'), ascending=True) # type: ignore df_sorted_by_avg = df_summary.sort_values( by=('mean', 'avg'), ascending=True) # type: ignore df_sorted_by_median = df_summary.sort_values( by=('mean', 'median'), ascending=True) # type: ignore print("SORTED BY MAX") print(df_sorted_by_max) print("---------------") print("SORTED BY AVERAGE") print(df_sorted_by_avg) print("---------------") print("SORTED BY MEDIAN") print(df_sorted_by_median) max_indices = df_sorted_by_max.index avg_indices = df_sorted_by_avg.index median_indices = df_sorted_by_median.index # 2. Find the intersection (common elements) of the three sets of indices # max is main index because it is commonly introduces the largest amount of latency to the stream common_indices = max_indices.intersection( avg_indices).intersection(median_indices) # 3. Filter the original summary DataFrame (df_summary) using the common indices df_common_top_performers = df_summary.loc[common_indices] encoder_name = df_common_top_performers.index.get_level_values(0)[0] print(df_common_top_performers.head()) plot_latency_data(df_common_top_performers) plot_start_latency(df_common_top_performers) # 4. Save top performers to csv top_10_df = df_common_top_performers.head(10) top_10_df.to_csv(get_args().csv_dir + f"{encoder_name}.csv") return if __name__ == '__main__': os.makedirs(get_args().csv_dir, exist_ok=True) os.makedirs(get_args().plot_dir, exist_ok=True) analyze_latency_data(get_args().latency_csv)