From 628f0439b7411fc6956c49be73e81a4eccbec114 Mon Sep 17 00:00:00 2001 From: Artur Mukhamadiev Date: Sun, 12 Oct 2025 23:37:07 +0300 Subject: [PATCH] [analysis] mean values for non-unique config runs --- .gitignore | 4 ++ latencyAnalysis.py | 93 ++++++++++++++++++++-------- qualityAnalysis.py | 147 ++++++++++++++++++++++++++++++--------------- 3 files changed, 172 insertions(+), 72 deletions(-) diff --git a/.gitignore b/.gitignore index 0dbf2f2..431a1c8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ +#project ignore: +plots/ +results/ + # ---> Python # Byte-compiled / optimized / DLL files __pycache__/ diff --git a/latencyAnalysis.py b/latencyAnalysis.py index ef5a7fa..7d1741e 100644 --- a/latencyAnalysis.py +++ b/latencyAnalysis.py @@ -2,20 +2,23 @@ import pandas as pd import matplotlib.pyplot as plt import numpy as np import os +import re import argparse import logging # Configure logging to show informational messages -logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') +logging.basicConfig(level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s') + def parse_args(): parser = argparse.ArgumentParser(prog=__file__) parser.add_argument('-c', '--compensate', action="store_true") - parser.add_argument('--latency-csv', - type=str, + parser.add_argument('--latency-csv', + type=str, default='sample/latencyDataframenvh264enc.csv', help='Path to the latency results CSV file.') - parser.add_argument('-pd','--plot-dir', + parser.add_argument('-pd', '--plot-dir', type=str, default='plots/', help='Path to directory in which resulted plots should be saved') @@ -25,13 +28,17 @@ def parse_args(): help='Path to directory in which resulted csv data should be saved') return parser.parse_args() + cmd_args = None + + def get_args(): global cmd_args if cmd_args is None: cmd_args = parse_args() return cmd_args + def plot_latency_data(df): def create_labels(df): """Combines MultiIndex levels (L0-L3) into a single string for notes.""" @@ -40,7 +47,7 @@ def plot_latency_data(df): # Format: L#:value | L#:value | ... label_parts = [f"L{j}:{val}" for j, val in enumerate(index)] labels[i + 1] = " | ".join(label_parts) - return labels + return labels df = df.head(10) encoder_name = df.index.get_level_values(0)[0] @@ -53,20 +60,26 @@ def plot_latency_data(df): r3 = [x + bar_width for x in r2] fig = plt.figure(figsize=(10, 6), dpi=300) # Create the bars - plt.bar(r1, df['max'], color='red', width=bar_width, edgecolor='grey', label='Max Latency') - plt.bar(r2, df['avg'], color='blue', width=bar_width, edgecolor='grey', label='Avg Latency') - plt.bar(r3, df['median'], color='green', width=bar_width, edgecolor='grey', label='Median Latency') + plt.bar(r1, df['max'], color='red', width=bar_width, + edgecolor='grey', label='Max Latency') + plt.bar(r2, df['avg'], color='blue', width=bar_width, + edgecolor='grey', label='Avg Latency') + plt.bar(r3, df['median'], color='green', width=bar_width, + edgecolor='grey', label='Median Latency') # Add labels and ticks plt.xlabel('Индекс конфигурации', fontweight='bold') plt.ylabel('Общая задержка [мс]', fontweight='bold') - plt.xticks([r + bar_width for r in range(num_configs)], [str(i + 1) for i in range(num_configs)]) - plt.title(f'Сравнение производительности {num_configs} лучших конфигураций по задержке для {encoder_name}') + plt.xticks([r + bar_width for r in range(num_configs)], + [str(i + 1) for i in range(num_configs)]) + plt.title( + f'Сравнение производительности {num_configs} лучших конфигураций по задержке для {encoder_name}') plt.legend() plt.grid(axis='y', linestyle='--', alpha=0.6) plt.tight_layout() - plt.savefig(get_args().plot_dir + f'combined_top_configurations_plot_{encoder_name}.png') + plt.savefig(get_args().plot_dir + + f'combined_top_configurations_plot_{encoder_name}.png') plt.close() # Output Notes (for user interpretation) @@ -74,6 +87,7 @@ def plot_latency_data(df): for index, note in max_notes.items(): print(f"Index {index}: {note}") + def plot_start_latency(df): fig = plt.figure(figsize=(10, 6), dpi=300) r1 = np.arange(len(df)) @@ -86,6 +100,7 @@ def plot_start_latency(df): plt.savefig(get_args().plot_dir + f"start_latency_{encoder_name}.png") plt.close() + def analyze_latency_data(csv_path: str): """ Analyzes latency data to find the top 10 components (rows) contributing most @@ -96,8 +111,9 @@ def analyze_latency_data(csv_path: str): """ # --- 1. Load Data with Multi-level Headers --- try: - df = pd.read_csv(csv_path, header=[0,1, 2, 3, 4], index_col=0) - logging.info(f"Successfully loaded '{csv_path}' with multi-level headers. Shape: {df.shape}") + df = pd.read_csv(csv_path, header=[0, 1, 2, 3, 4], index_col=0) + logging.info( + f"Successfully loaded '{csv_path}' with multi-level headers. Shape: {df.shape}") if df.index.name == 'Unnamed: 0': df.index.name = 'component' except FileNotFoundError: @@ -107,22 +123,50 @@ def analyze_latency_data(csv_path: str): logging.error(f"An error occurred while reading the CSV file: {e}") return - #calculate summary along the rows + # calculate summary along the rows sumDf = df.sum() if get_args().compensate == True: - logging.info("Filesrc latency compensation is ON") + logging.info("Filesrc and rawvideoparse latency compensation is ON") filesrcData = df.loc["filesrc0"] + rawvideoparseData = df.loc["rawvideoparse0"] sumDf -= filesrcData - print(sumDf.head()) - # return + sumDf -= rawvideoparseData + logging.debug(f"\n{sumDf.head()}") - df_summary = sumDf.unstack(level=-1) # or level='Metric' if names are set + # calculate mean accross non-unique runs: + def get_base_metric(metric): + """Strips suffixes like '.1' or '.2' from the metric name.""" + return re.sub(r'\.\d+$', '', str(metric)) + + metric_level_values = sumDf.index.get_level_values(-1) + + base_metrics_key = metric_level_values.map(get_base_metric) + + config_levels = list(range(sumDf.index.nlevels - 1) + ) # This gives [0, 1, 2, 3] + + grouping_keys = sumDf.index.droplevel(config_levels) # type: ignore + grouping_keys = [ + sumDf.index.get_level_values(i) for i in config_levels + ] + [base_metrics_key] + + # 3. Perform Grouping and Mean Calculation + # This command groups all entries that share the same (Config + Base Metric), + # collapsing (avg, avg.1, avg.2) into a single average. + averaged_sumDf = sumDf.groupby(grouping_keys).mean() + logging.info(f"\n{averaged_sumDf.head(10)}") + sumDf = averaged_sumDf + + df_summary = sumDf.unstack(level=-1) # or level='Metric' if names are set # 2. Sort the resulting DataFrame by the desired metric column. - df_sorted_by_max = df_summary.sort_values(by='max', ascending=True) - df_sorted_by_avg = df_summary.sort_values(by='avg', ascending=True) - df_sorted_by_median = df_summary.sort_values(by='median', ascending=True) - + df_sorted_by_max = df_summary.sort_values( + by='max', ascending=True) # type: ignore + df_sorted_by_avg = df_summary.sort_values( + by='avg', ascending=True) # type: ignore + df_sorted_by_median = df_summary.sort_values( + by='median', ascending=True) # type: ignore + print("SORTED BY MAX") print(df_sorted_by_max) print("---------------") @@ -139,7 +183,8 @@ def analyze_latency_data(csv_path: str): # 2. Find the intersection (common elements) of the three sets of indices # max is main index because it is commonly introduces the largest amount of latency to the stream - common_indices = max_indices.intersection(avg_indices).intersection(median_indices) + common_indices = max_indices.intersection( + avg_indices).intersection(median_indices) # 3. Filter the original summary DataFrame (df_summary) using the common indices df_common_top_performers = df_summary.loc[common_indices] @@ -156,8 +201,8 @@ def analyze_latency_data(csv_path: str): top_10_df.to_csv(get_args().csv_dir + f"{encoder_name}.csv") return + if __name__ == '__main__': os.makedirs(get_args().csv_dir, exist_ok=True) os.makedirs(get_args().plot_dir, exist_ok=True) analyze_latency_data(get_args().latency_csv) - diff --git a/qualityAnalysis.py b/qualityAnalysis.py index e8d8fd1..7d89d35 100644 --- a/qualityAnalysis.py +++ b/qualityAnalysis.py @@ -4,19 +4,22 @@ import numpy as np import logging import argparse import os +import re # Configure logging to show informational messages -logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') +logging.basicConfig(level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s') + def parse_args(): parser = argparse.ArgumentParser(prog=__file__) parser.add_argument( - '--quality-csv', - type=str, + '--quality-csv', + type=str, default='sample/qualityResultsnvh264enc.csv', help='Path to the quality results CSV file.' ) - parser.add_argument('-pd','--plot-dir', + parser.add_argument('-pd', '--plot-dir', type=str, default='plots/', help='Path to directory in which resulted plots should be saved') @@ -26,21 +29,25 @@ def parse_args(): help='Path to directory in which resulted csv data should be saved') return parser.parse_args() + cmd_args = None + + def get_args(): global cmd_args if cmd_args is None: cmd_args = parse_args() return cmd_args + def plot_top_configurations(df: pd.DataFrame, file_name: str, title: str): """ Draws a bar plot comparing PSNR and SSIM for the top 10 video configurations. - + The plot uses a primary Y-axis for PSNR and a secondary Y-axis for SSIM due to their different value ranges. The X-axis uses simple numerical indices, with detailed configuration notes printed separately below the plot. - + Args: df: DataFrame containing the top configurations, must have 'PSNR' and 'SSIM' columns. file_name: Name of the file to which plot would be saved. @@ -48,50 +55,50 @@ def plot_top_configurations(df: pd.DataFrame, file_name: str, title: str): """ # Use the top 10 rows for plotting plot_df = df.head(10).copy() - + if plot_df.empty: logging.warning("DataFrame is empty, cannot generate plot.") return # Create the index for the x-axis (0 to 9 for bar plotting) config_indices = np.arange(len(plot_df)) - + # 1. Create simple numerical labels for the X-axis (1 to 10) x_labels_simple = [str(i + 1) for i in config_indices] - + # 2. Generate notes mapping index to configuration details (similar to the template) quality_notes = {} for i, row in plot_df.iterrows(): # Format: Index: encoder | profile | video | parameters note_parts = [ - row['encoder'], - row['profile'], - row['video'], + row['encoder'], + row['profile'], + row['video'], row['parameters'] ] quality_notes[len(quality_notes) + 1] = " | ".join(note_parts) - + # 3. Setup the figure and the primary axis (ax1) fig, ax1 = plt.subplots(figsize=(12, 6)) - + # Define bar width and positions bar_width = 0.35 - + # 4. Plot PSNR on the primary axis (left) - bar1 = ax1.bar(config_indices - bar_width/2, plot_df['PSNR'], bar_width, + bar1 = ax1.bar(config_indices - bar_width/2, plot_df['PSNR'], bar_width, label='PSNR (dB)', color='Blue', edgecolor='grey') - ax1.set_xlabel('Configuration Index', fontsize=12) # Simplified X-label + ax1.set_xlabel('Configuration Index', fontsize=12) # Simplified X-label ax1.set_ylabel('PSNR (dB)', color='Black', fontsize=12) ax1.tick_params(axis='y', labelcolor='Black') ax1.set_xticks(config_indices) # Use simple numerical labels for the X-axis - ax1.set_xticklabels(x_labels_simple, fontsize=10) - + ax1.set_xticklabels(x_labels_simple, fontsize=10) + # Add PSNR value labels above the bars for rect in bar1: height = rect.get_height() ax1.annotate(f'PSNR={height:.2f}', - xy=(rect.get_x() + rect.get_width() / 2, height / 1.5 ), + xy=(rect.get_x() + rect.get_width() / 2, height / 1.5), xytext=(0, 0), # 3 points vertical offset textcoords="offset points", transform_rotates_text=True, rotation=90, @@ -99,18 +106,18 @@ def plot_top_configurations(df: pd.DataFrame, file_name: str, title: str): # 5. Create a secondary axis (ax2) for SSIM (twinx) ax2 = ax1.twinx() - + # 6. Plot SSIM on the secondary axis (right) - bar2 = ax2.bar(config_indices + bar_width/2, plot_df['SSIM'], bar_width, + bar2 = ax2.bar(config_indices + bar_width/2, plot_df['SSIM'], bar_width, label='SSIM', color='Red', edgecolor='grey') ax2.set_ylabel('SSIM (Structural Similarity)', color='Black', fontsize=12) ax2.tick_params(axis='y', labelcolor='Black') - + # Add SSIM value labels above the bars for rect in bar2: height = rect.get_height() ax2.annotate(f'SSIM={height:.4f}', - xy=(rect.get_x() + rect.get_width() / 2, height / 1.5 ), + xy=(rect.get_x() + rect.get_width() / 2, height / 1.5), xytext=(0, 0), # 3 points vertical offset textcoords="offset points", transform_rotates_text=True, rotation=90, @@ -118,12 +125,13 @@ def plot_top_configurations(df: pd.DataFrame, file_name: str, title: str): # 7. Final Plot appearance fig.suptitle(title) - fig.tight_layout(rect=[0, 0.03, 1, 0.95]) - + fig.tight_layout(rect={0.0, 0.03, 1.0, 0.95}) # type: ignore + # Combine legends from both axes lines1, labels1 = ax1.get_legend_handles_labels() lines2, labels2 = ax2.get_legend_handles_labels() - ax1.legend(lines1 + lines2, labels1 + labels2, bbox_to_anchor=(0.6, 1.1), ncol=2) + ax1.legend(lines1 + lines2, labels1 + labels2, + bbox_to_anchor=(0.6, 1.1), ncol=2) plt.grid(axis='y', linestyle='--', alpha=0.7) plt.savefig(f'{file_name}.png') @@ -133,11 +141,13 @@ def plot_top_configurations(df: pd.DataFrame, file_name: str, title: str): for index, note in quality_notes.items(): print(f"Index {index}: {note}") + def analyze_quality_report(csv_path: str): # --- 1. Load Data with Multi-level Headers --- try: df = pd.read_csv(csv_path, header=[0, 1, 2, 3, 4], index_col=0) - logging.info(f"Successfully loaded '{csv_path}' with multi-level headers. Shape: {df.shape}") + logging.info( + f"Successfully loaded '{csv_path}' with multi-level headers. Shape: {df.shape}") if df.index.name == 'Unnamed: 0': df.index.name = 'component' except FileNotFoundError: @@ -146,16 +156,45 @@ def analyze_quality_report(csv_path: str): except Exception as e: logging.error(f"An error occurred while reading the CSV file: {e}") return - + # Get row with average results avgDf = df.loc["Average"] + logging.info(f"\n{avgDf.head(10)}") + # calculate mean accross non-unique runs: + + def get_base_metric(metric): + """Strips suffixes like '.1' or '.2' from the metric name.""" + return re.sub(r'\.\d+$', '', str(metric)) + + metric_level_values = avgDf.index.get_level_values(-1) + + base_metrics_key = metric_level_values.map(get_base_metric) + + config_levels = list(range(avgDf.index.nlevels - 1) + ) # This gives [0, 1, 2, 3] + + grouping_keys = avgDf.index.droplevel(config_levels) # type: ignore + grouping_keys = [ + avgDf.index.get_level_values(i) for i in config_levels + ] + [base_metrics_key] + + # 3. Perform Grouping and Mean Calculation + # This command groups all entries that share the same (Config + Base Metric), + # collapsing (avg, avg.1, avg.2) into a single average. + averaged_sumDf = avgDf.groupby(grouping_keys).mean() + logging.info(f"\n{averaged_sumDf.head(10)}") + avgDf = averaged_sumDf + + logging.info(f"\n{avgDf.head(10)}") + avgDf = avgDf.unstack(level=-1) - encoder_name = avgDf.index.get_level_values(0)[0] logging.debug(f"encoder_name={encoder_name}") - dfPSNRsorted = avgDf.sort_values(by="PSNR", ascending=False) - dfSSIMsorted = avgDf.sort_values(by="SSIM", ascending=False) + dfPSNRsorted = avgDf.sort_values( + by="PSNR", ascending=False) # type: ignore + dfSSIMsorted = avgDf.sort_values( + by="SSIM", ascending=False) # type: ignore indexPSNR = dfPSNRsorted.index indexSSIM = dfSSIMsorted.index @@ -168,53 +207,65 @@ def analyze_quality_report(csv_path: str): # Convert the MultiIndex (encoder, profile, video, parameters) into columns df_quality_results = intersectedDf.reset_index() # Rename the columns to match the latency report's structure - df_quality_results.columns = ['encoder', 'profile', 'video', 'parameters', 'PSNR', 'SSIM'] - logging.debug(f"Prepared quality results dataframe columns: {df_quality_results.columns.tolist()}") - + df_quality_results.columns = [ + 'encoder', 'profile', 'video', 'parameters', 'PSNR', 'SSIM'] + logging.debug( + f"Prepared quality results dataframe columns: {df_quality_results.columns.tolist()}") # Now intersected with latency report latency_df = pd.read_csv(f'results/{encoder_name}.csv') - columns = {'Unnamed: 0': 'encoder', 'Unnamed: 1': 'profile', 'Unnamed: 2': 'video', 'Unnamed: 3': 'parameters'} + columns = {'Unnamed: 0': 'encoder', 'Unnamed: 1': 'profile', + 'Unnamed: 2': 'video', 'Unnamed: 3': 'parameters'} latency_df.rename(columns=columns, inplace=True) logging.debug(f"\n{latency_df.head()}") - # --- 4. Merge Quality and Latency Reports --- + # --- 4. Merge Quality and Latency Reports --- # Use an inner merge on the four identifier columns to combine the data. merge_keys = ['encoder', 'profile', 'video', 'parameters'] merged_df = pd.merge( df_quality_results, latency_df, on=merge_keys, - how='inner' # Only keep records present in both (i.e., the top quality configurations) + # Only keep records present in both (i.e., the top quality configurations) + how='inner' ) logging.info("=" * 70) logging.info("--- Intersected Quality (PSNR/SSIM) and Latency Report ---") - logging.info(f"Number of common configuration entries found: {len(merged_df)}") + logging.info( + f"Number of common configuration entries found: {len(merged_df)}") logging.info("=" * 70) # Prepare for display merged_df_display = merged_df.sort_values(by='PSNR', ascending=False) - + # Select and display key metrics display_columns = [ - 'encoder', 'profile', 'video', 'parameters', - 'PSNR', 'SSIM', # Quality metrics - 'avg', 'max', 'median', 'std' # Latency metrics (assuming these are in the latency report) + 'encoder', 'profile', 'video', 'parameters', + 'PSNR', 'SSIM', # Quality metrics + # Latency metrics (assuming these are in the latency report) + 'avg', 'max', 'median', 'std' ] - - final_cols = [col for col in display_columns if col in merged_df_display.columns] + + final_cols = [ + col for col in display_columns if col in merged_df_display.columns] print(f"\n{merged_df_display[final_cols].to_string()}") - plot_top_configurations(merged_df_display, get_args().plot_dir + f"top_quality_configurations_by_latency_{encoder_name}", f"Результаты качества для 10 лучших конфигураций по задержкам для {encoder_name}") + plot_top_configurations(merged_df_display, + get_args().plot_dir + + f"top_quality_configurations_by_latency_{encoder_name}", + f"Результаты качества для 10 лучших конфигураций по задержкам для {encoder_name}") - plot_top_configurations(df_quality_results, get_args().plot_dir + f"top_quality_configurations_{encoder_name}", f"10 лучших конфигураций по PSNR и SSIM для {encoder_name}") + plot_top_configurations(df_quality_results, + get_args().plot_dir + + f"top_quality_configurations_{encoder_name}", + f"10 лучших конфигураций по PSNR и SSIM для {encoder_name}") return + if __name__ == '__main__': os.makedirs(get_args().csv_dir, exist_ok=True) os.makedirs(get_args().plot_dir, exist_ok=True) analyze_quality_report(get_args().quality_csv) -