diff --git a/latencyAnalysis.py b/latencyAnalysis.py index 7d1741e..273f11b 100644 --- a/latencyAnalysis.py +++ b/latencyAnalysis.py @@ -40,38 +40,96 @@ def get_args(): def plot_latency_data(df): - def create_labels(df): - """Combines MultiIndex levels (L0-L3) into a single string for notes.""" + def create_labels(df_slice): labels = {} - for i, index in enumerate(df.index): - # Format: L#:value | L#:value | ... - label_parts = [f"L{j}:{val}" for j, val in enumerate(index)] + for i, index in enumerate(df_slice.index): + label_parts = [f"{df.index.names[j] or f'L{j}'}: {val}" + for j, val in enumerate(index)] labels[i + 1] = " | ".join(label_parts) return labels - df = df.head(10) - encoder_name = df.index.get_level_values(0)[0] - max_notes = create_labels(df) + mean_max_key = ('mean', 'max') + mean_avg_key = ('mean', 'avg') + mean_median_key = ('mean', 'median') + + min_max_key = ('left', 'max') + max_max_key = ('right', 'max') + + min_avg_key = ('left', 'avg') + max_avg_key = ('right', 'avg') + + min_median_key = ('left', 'median') + max_median_key = ('right', 'median') + + df_top_n = df.head(10).copy() + + mean_max_values = df_top_n[mean_max_key] + yerr_lower_max = mean_max_values - df_top_n[min_max_key] + yerr_upper_max = df_top_n[max_max_key] - mean_max_values + yerr_max_orig = np.array([yerr_lower_max.values, yerr_upper_max.values]) + + mean_avg_values = df_top_n[mean_avg_key] + yerr_lower_avg = mean_avg_values - df_top_n[min_avg_key] + yerr_upper_avg = df_top_n[max_avg_key] - mean_avg_values + yerr_avg = np.array([yerr_lower_avg.values, yerr_upper_avg.values]) + + mean_median_values = df_top_n[mean_median_key] + yerr_lower_median = mean_median_values - df_top_n[min_median_key] + yerr_upper_median = df_top_n[max_median_key] - mean_median_values + yerr_median = np.array( + [yerr_lower_median.values, yerr_upper_median.values]) + + encoder_name = df_top_n.index.get_level_values(0)[0] + max_notes = create_labels(df_top_n) bar_width = 0.25 - num_configs = len(df) + num_configs = len(df_top_n) r1 = np.arange(num_configs) - r2 = [x + bar_width for x in r1] - r3 = [x + bar_width for x in r2] - fig = plt.figure(figsize=(10, 6), dpi=300) - # Create the bars - plt.bar(r1, df['max'], color='red', width=bar_width, - edgecolor='grey', label='Max Latency') - plt.bar(r2, df['avg'], color='blue', width=bar_width, - edgecolor='grey', label='Avg Latency') - plt.bar(r3, df['median'], color='green', width=bar_width, - edgecolor='grey', label='Median Latency') - # Add labels and ticks + r_max_orig = r1 + r_avg = [x + bar_width for x in r1] + r_median = [x + bar_width for x in r_avg] + + fig = plt.figure(figsize=(12, 7), dpi=300) + + plt.bar( + r_max_orig, + df_top_n[mean_max_key], + yerr=yerr_max_orig, + capsize=5, + color='red', + width=bar_width, + edgecolor='grey', + label='Максимальная задержка' + ) + + plt.bar( + r_avg, + df_top_n[mean_avg_key], + yerr=yerr_avg, + capsize=5, + color='blue', + width=bar_width, + edgecolor='grey', + label='Средняя задержка' + ) + + plt.bar( + r_median, + df_top_n[mean_median_key], + yerr=yerr_median, + capsize=5, + color='green', + width=bar_width, + edgecolor='grey', + label='Медианная задержка' + ) + plt.xlabel('Индекс конфигурации', fontweight='bold') plt.ylabel('Общая задержка [мс]', fontweight='bold') - plt.xticks([r + bar_width for r in range(num_configs)], - [str(i + 1) for i in range(num_configs)]) + center_pos = [r + bar_width for r in r1] + plt.xticks(center_pos, [str(i + 1) for i in range(num_configs)]) + plt.title( f'Сравнение производительности {num_configs} лучших конфигураций по задержке для {encoder_name}') plt.legend() @@ -79,10 +137,9 @@ def plot_latency_data(df): plt.tight_layout() plt.savefig(get_args().plot_dir + - f'combined_top_configurations_plot_{encoder_name}.png') + f'combined_top_configurations_with_errors_{encoder_name}.png') plt.close() - # Output Notes (for user interpretation) print("\n--- Notes for Plot (X-Axis Index to Configuration) ---") for index, note in max_notes.items(): print(f"Index {index}: {note}") @@ -91,7 +148,29 @@ def plot_latency_data(df): def plot_start_latency(df): fig = plt.figure(figsize=(10, 6), dpi=300) r1 = np.arange(len(df)) - plt.plot(r1, df['max']) + + mean_col = ('mean', 'max') + min_col = ('left', 'max') + max_col = ('right', 'max') + + mean_values = df[mean_col] + min_values = df[min_col] + max_values = df[max_col] + + lower_error = mean_values - min_values + upper_error = max_values - mean_values + + y_error = [lower_error.values, upper_error.values] + + plt.errorbar(r1, + mean_values, + yerr=y_error, + fmt='.-', + color='darkblue', + ecolor='red', + capsize=3, + linewidth=1 + ) plt.xlabel('Индекс конфигурации', fontweight='bold') plt.ylabel('Общая задержка [мс]', fontweight='bold') encoder_name = df.index.get_level_values(0)[0] @@ -109,7 +188,7 @@ def analyze_latency_data(csv_path: str): Args: csv_path (str): The path to the input CSV file. """ - # --- 1. Load Data with Multi-level Headers --- + try: df = pd.read_csv(csv_path, header=[0, 1, 2, 3, 4], index_col=0) logging.info( @@ -123,7 +202,6 @@ def analyze_latency_data(csv_path: str): logging.error(f"An error occurred while reading the CSV file: {e}") return - # calculate summary along the rows sumDf = df.sum() if get_args().compensate == True: logging.info("Filesrc and rawvideoparse latency compensation is ON") @@ -133,7 +211,6 @@ def analyze_latency_data(csv_path: str): sumDf -= rawvideoparseData logging.debug(f"\n{sumDf.head()}") - # calculate mean accross non-unique runs: def get_base_metric(metric): """Strips suffixes like '.1' or '.2' from the metric name.""" return re.sub(r'\.\d+$', '', str(metric)) @@ -142,8 +219,7 @@ def analyze_latency_data(csv_path: str): base_metrics_key = metric_level_values.map(get_base_metric) - config_levels = list(range(sumDf.index.nlevels - 1) - ) # This gives [0, 1, 2, 3] + config_levels = list(range(sumDf.index.nlevels - 1)) grouping_keys = sumDf.index.droplevel(config_levels) # type: ignore grouping_keys = [ @@ -153,19 +229,31 @@ def analyze_latency_data(csv_path: str): # 3. Perform Grouping and Mean Calculation # This command groups all entries that share the same (Config + Base Metric), # collapsing (avg, avg.1, avg.2) into a single average. - averaged_sumDf = sumDf.groupby(grouping_keys).mean() + sumDfgrouping = sumDf.groupby(grouping_keys) + averaged_sumDf = sumDfgrouping.mean() + max_sumDf = sumDfgrouping.max() + min_sumDf = sumDfgrouping.min() + + logging.debug(f"\n{max_sumDf.head(10)}") + logging.debug(f"\n{min_sumDf.head(10)}") logging.info(f"\n{averaged_sumDf.head(10)}") - sumDf = averaged_sumDf - df_summary = sumDf.unstack(level=-1) # or level='Metric' if names are set + merged_sumDf = pd.concat( + [min_sumDf, averaged_sumDf, max_sumDf], + axis=1, + keys=['left', 'mean', 'right'] + ) + + sumDf = merged_sumDf + + df_summary = sumDf.unstack(level=-1) - # 2. Sort the resulting DataFrame by the desired metric column. df_sorted_by_max = df_summary.sort_values( - by='max', ascending=True) # type: ignore + by=('mean', 'max'), ascending=True) # type: ignore df_sorted_by_avg = df_summary.sort_values( - by='avg', ascending=True) # type: ignore + by=('mean', 'avg'), ascending=True) # type: ignore df_sorted_by_median = df_summary.sort_values( - by='median', ascending=True) # type: ignore + by=('mean', 'median'), ascending=True) # type: ignore print("SORTED BY MAX") print(df_sorted_by_max) @@ -176,7 +264,6 @@ def analyze_latency_data(csv_path: str): print("SORTED BY MEDIAN") print(df_sorted_by_median) - # 1. Get the indices (configurations) for each top 10 list max_indices = df_sorted_by_max.index avg_indices = df_sorted_by_avg.index median_indices = df_sorted_by_median.index