import pandas as pd import matplotlib.pyplot as plt import numpy as np import os import re import argparse import logging # Configure logging to show informational messages logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') def parse_args(): parser = argparse.ArgumentParser(prog=__file__) parser.add_argument('-c', '--compensate', action="store_true") parser.add_argument('--latency-csv', type=str, default='sample/latencyDataframenvh264enc.csv', help='Path to the latency results CSV file.') parser.add_argument('-pd', '--plot-dir', type=str, default='plots/', help='Path to directory in which resulted plots should be saved') parser.add_argument('-csv', '--csv-dir', type=str, default='results/', help='Path to directory in which resulted csv data should be saved') return parser.parse_args() cmd_args = None encoder_name = "default" def get_args(): global cmd_args if cmd_args is None: cmd_args = parse_args() return cmd_args def plot_latency_data(df): global encoder_name def create_labels(df_slice): labels = {} for i, index in enumerate(df_slice.index): label_parts = [f"{df.index.names[j] or f'L{j}'}: {val}" for j, val in enumerate(index)] labels[i + 1] = " | ".join(label_parts) return labels mean_max_key = ('mean', 'max') mean_avg_key = ('mean', 'avg') mean_median_key = ('mean', 'median') min_max_key = ('left', 'max') max_max_key = ('right', 'max') min_avg_key = ('left', 'avg') max_avg_key = ('right', 'avg') min_median_key = ('left', 'median') max_median_key = ('right', 'median') df_top_n = df.head(10).copy() mean_max_values = df_top_n[mean_max_key] yerr_lower_max = mean_max_values - df_top_n[min_max_key] yerr_upper_max = df_top_n[max_max_key] - mean_max_values yerr_max_orig = np.array([yerr_lower_max.values, yerr_upper_max.values]) mean_avg_values = df_top_n[mean_avg_key] yerr_lower_avg = mean_avg_values - df_top_n[min_avg_key] yerr_upper_avg = df_top_n[max_avg_key] - mean_avg_values yerr_avg = np.array([yerr_lower_avg.values, yerr_upper_avg.values]) mean_median_values = df_top_n[mean_median_key] yerr_lower_median = mean_median_values - df_top_n[min_median_key] yerr_upper_median = df_top_n[max_median_key] - mean_median_values yerr_median = np.array( [yerr_lower_median.values, yerr_upper_median.values]) max_notes = create_labels(df_top_n) bar_width = 0.25 num_configs = len(df_top_n) r1 = np.arange(num_configs) r_max_orig = r1 r_avg = [x + bar_width for x in r1] r_median = [x + bar_width for x in r_avg] fig = plt.figure(figsize=(12, 7), dpi=300) def add_annotation(bar): for rect in bar: height = rect.get_height() plt.annotate(f'{height:.2f} мс', xy=(rect.get_x() + rect.get_width() / 2, height / 2), xytext=(0, 0), # 3 points vertical offset textcoords="offset points", transform_rotates_text=True, rotation=90, ha='center', va='bottom', fontsize=10, color='White') bar1 = plt.bar( r_max_orig, df_top_n[mean_max_key], yerr=yerr_max_orig, capsize=5, color='red', width=bar_width, edgecolor='grey', label='Максимальная задержка' ) add_annotation(bar1) bar2 = plt.bar( r_avg, df_top_n[mean_avg_key], yerr=yerr_avg, capsize=5, color='blue', width=bar_width, edgecolor='grey', label='Средняя задержка' ) add_annotation(bar2) bar3 = plt.bar( r_median, df_top_n[mean_median_key], yerr=yerr_median, capsize=5, color='green', width=bar_width, edgecolor='grey', label='Медианная задержка' ) add_annotation(bar3) plt.xlabel('Индекс конфигурации', fontweight='bold') plt.ylabel('Общая задержка [мс]', fontweight='bold') center_pos = [r + bar_width for r in r1] plt.xticks(center_pos, [str(i + 1) for i in range(num_configs)]) plt.title( f'Сравнение производительности {num_configs} лучших конфигураций по задержке для {encoder_name}') plt.legend() plt.grid(axis='y', linestyle='--', alpha=0.6) plt.tight_layout() plt.savefig(get_args().plot_dir + f'combined_top_configurations_with_errors_{encoder_name}.png') plt.close() print("\n--- Notes for Plot (X-Axis Index to Configuration) ---") for index, note in max_notes.items(): print(f"Index {index}: {note}") def plot_start_latency(df): global encoder_name fig = plt.figure(figsize=(10, 6), dpi=300) r1 = np.arange(len(df)) mean_col = ('mean', 'avg') min_col = ('left', 'avg') max_col = ('right', 'avg') mean_values = df[mean_col] min_values = df[min_col] max_values = df[max_col] lower_error = mean_values - min_values upper_error = max_values - mean_values y_error = [lower_error.values, upper_error.values] plt.errorbar(r1, mean_values, yerr=y_error, fmt='-', color='darkblue', ecolor='red', capsize=3, linewidth=1 ) plt.xlabel('Индекс конфигурации', fontweight='bold') plt.ylabel('Общая задержка [мс]', fontweight='bold') plt.title(f"Результаты стартовой задержки для {encoder_name}") plt.tight_layout() plt.savefig(get_args().plot_dir + f"start_latency_{encoder_name}.png") plt.close() def get_base_metric(metric): """Strips suffixes like '.1' or '.2' from the metric name.""" return re.sub(r'\.\d+$', '', str(metric)) def compensate(func): def wrapper(df: pd.DataFrame, group: pd.DataFrame) -> pd.DataFrame: logging.debug("Inside transpose decorator") res_df = func(df, group) if get_args().compensate == True: logging.info( "Filesrc and rawvideoparse latency compensation is ON") res_df = res_df.drop('filesrc0', axis=0) res_df = res_df.drop('rawvideoparse0', axis=0) return res_df return wrapper def log_result(func): def wrapper(*args, **kwargs): res = func(*args, **kwargs) logging.info(f"\n{res}") return res return wrapper @compensate def transpose_and_set(df: pd.DataFrame, group: pd.DataFrame) -> pd.DataFrame: new_column_index_data = group.index res_df = group.T res_df.columns = pd.MultiIndex.from_tuples( new_column_index_data, names=df.columns.names ) return res_df @log_result def do_sum_and_change(df: pd.DataFrame): global encoder_name idx = pd.IndexSlice # some shit, to be fair, so here we are trying to replace value with correct one, # so we will have zero pain in ass on data plotting mean_max_values = df.loc[f"{encoder_name}0", idx[:, :, :, :, 'max']] # type: ignore logging.info(mean_max_values.values) df.loc[f"{encoder_name}0", idx[:, :, :, :, 'avg'] ] = mean_max_values.values # type: ignore # we want to change recorded encoder avg latency with max latency to get full pipeline latency res_df = df.sum() return res_df def get_grouping_keys(df: pd.DataFrame): metric_level_values = df.columns.get_level_values(-1) base_metrics_key = metric_level_values.map(get_base_metric) config_levels_to_group = list(range(df.columns.nlevels - 1)) grouping_keys = [ df.columns.get_level_values(i) for i in config_levels_to_group ] + [base_metrics_key] return grouping_keys def analyze_latency_data(csv_path: str): """ Analyzes latency data to find the top 10 components (rows) contributing most to latency, and plots histograms of their summed avg, median, and max latencies. Args: csv_path (str): The path to the input CSV file. """ global encoder_name try: df = pd.read_csv(csv_path, header=[0, 1, 2, 3, 4], index_col=0) logging.info( f"Successfully loaded '{csv_path}' with multi-level headers. Shape: {df.shape}") if df.index.name == 'Unnamed: 0': df.index.name = 'component' except FileNotFoundError: logging.error(f"Error: The file '{csv_path}' was not found.") return except Exception as e: logging.error(f"An error occurred while reading the CSV file: {e}") return encoder_name = df.columns.get_level_values(0)[0] logging.info(f"encoder name={encoder_name}") grouping_keys = get_grouping_keys(df) mean_df = transpose_and_set(df, df.T.groupby(grouping_keys).mean()) min_df = transpose_and_set(df, df.T.groupby(grouping_keys).min()) max_df = transpose_and_set(df, df.T.groupby(grouping_keys).max()) logging.info(f"\n{mean_df}") # at this stage our dataframe is summarized no per element data is accessible mean_sumDf = do_sum_and_change(mean_df) min_sumDf = do_sum_and_change(min_df) max_sumDf = do_sum_and_change(max_df) merged_sumDf = pd.concat( [min_sumDf, mean_sumDf, max_sumDf], axis=1, keys=['left', 'mean', 'right'] ) sumDf = merged_sumDf df_summary = sumDf.unstack(level=-1) df_sorted_by_max = df_summary.sort_values( by=('mean', 'max'), ascending=True) # type: ignore df_sorted_by_avg = df_summary.sort_values( by=('mean', 'avg'), ascending=True) # type: ignore df_sorted_by_median = df_summary.sort_values( by=('mean', 'median'), ascending=True) # type: ignore print("SORTED BY MAX") print(df_sorted_by_max) print("---------------") print("SORTED BY AVERAGE") print(df_sorted_by_avg) print("---------------") print("SORTED BY MEDIAN") print(df_sorted_by_median) max_indices = df_sorted_by_max.index avg_indices = df_sorted_by_avg.index median_indices = df_sorted_by_median.index # 2. Find the intersection (common elements) of the three sets of indices # avg is main index because it is commonly introduces the largest amount of latency to the stream common_indices = avg_indices.intersection( max_indices).intersection(median_indices) # 3. Filter the original summary DataFrame (df_summary) using the common indices df_common_top_performers = df_summary.loc[common_indices] print(df_common_top_performers.head()) plot_latency_data(df_common_top_performers) plot_start_latency(df_common_top_performers) # 4. Save top performers to csv top_10_df = df_common_top_performers.head(10)["mean"] top_10_df.to_csv(get_args().csv_dir + f"{encoder_name}.csv") return if __name__ == '__main__': os.makedirs(get_args().csv_dir, exist_ok=True) os.makedirs(get_args().plot_dir, exist_ok=True) analyze_latency_data(get_args().latency_csv)