import pandas as pd import matplotlib.pyplot as plt import numpy as np import logging import argparse import os import re # Configure logging to show informational messages logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') def parse_args(): parser = argparse.ArgumentParser(prog=__file__) parser.add_argument( '--quality-csv', type=str, default='sample/qualityResultsnvh264enc.csv', help='Path to the quality results CSV file.' ) parser.add_argument('-pd', '--plot-dir', type=str, default='plots/', help='Path to directory in which resulted plots should be saved') parser.add_argument('-csv', '--csv-dir', type=str, default='results/', help='Path to directory in which resulted csv data should be saved') return parser.parse_args() cmd_args = None def get_args(): global cmd_args if cmd_args is None: cmd_args = parse_args() return cmd_args def plot_top_configurations(df: pd.DataFrame, file_name: str, title: str): """ Draws a bar plot comparing PSNR and SSIM for the top 10 video configurations. The plot uses a primary Y-axis for PSNR and a secondary Y-axis for SSIM due to their different value ranges. The X-axis uses simple numerical indices, with detailed configuration notes printed separately below the plot. Args: df: DataFrame containing the top configurations, must have 'PSNR' and 'SSIM' columns. file_name: Name of the file to which plot would be saved. title: Title of the plot """ # Use the top 10 rows for plotting plot_df = df.head(10).copy() if plot_df.empty: logging.warning("DataFrame is empty, cannot generate plot.") return # Create the index for the x-axis (0 to 9 for bar plotting) config_indices = np.arange(len(plot_df)) # 1. Create simple numerical labels for the X-axis (1 to 10) x_labels_simple = [str(i + 1) for i in config_indices] # 2. Generate notes mapping index to configuration details (similar to the template) quality_notes = {} for i, row in plot_df.iterrows(): # Format: Index: encoder | profile | video | parameters note_parts = [ row['encoder'], row['profile'], row['video'], row['parameters'] ] quality_notes[len(quality_notes) + 1] = " | ".join(note_parts) # 3. Setup the figure and the primary axis (ax1) fig, ax1 = plt.subplots(figsize=(10, 6), dpi=300) # Define bar width and positions bar_width = 0.35 # 4. Plot PSNR on the primary axis (left) bar1 = ax1.bar(config_indices - bar_width/2, plot_df['PSNR'], bar_width, label='PSNR (dB)', color='Blue', edgecolor='grey') ax1.set_xlabel('Configuration Index', fontsize=12) # Simplified X-label ax1.set_ylabel('PSNR (dB)', color='Black', fontsize=12) ax1.tick_params(axis='y', labelcolor='Black') ax1.set_xticks(config_indices) # Use simple numerical labels for the X-axis ax1.set_xticklabels(x_labels_simple, fontsize=10) # Add PSNR value labels above the bars for rect in bar1: height = rect.get_height() ax1.annotate(f'PSNR={height:.2f}', xy=(rect.get_x() + rect.get_width() / 2, height / 1.5), xytext=(0, 0), # 3 points vertical offset textcoords="offset points", transform_rotates_text=True, rotation=90, ha='center', va='bottom', fontsize=10, color='White') # 5. Create a secondary axis (ax2) for SSIM (twinx) ax2 = ax1.twinx() # 6. Plot SSIM on the secondary axis (right) bar2 = ax2.bar(config_indices + bar_width/2, plot_df['SSIM'], bar_width, label='SSIM', color='Red', edgecolor='grey') ax2.set_ylabel('SSIM (Structural Similarity)', color='Black', fontsize=12) ax2.tick_params(axis='y', labelcolor='Black') # Add SSIM value labels above the bars for rect in bar2: height = rect.get_height() ax2.annotate(f'SSIM={height:.4f}', xy=(rect.get_x() + rect.get_width() / 2, height / 1.5), xytext=(0, 0), # 3 points vertical offset textcoords="offset points", transform_rotates_text=True, rotation=90, ha='center', va='bottom', fontsize=10, color='White') # 7. Final Plot appearance fig.suptitle(title) rect = tuple([0.0, 0.0, 1.0, 0.95]) fig.tight_layout(rect=rect) # type: ignore # Combine legends from both axes lines1, labels1 = ax1.get_legend_handles_labels() lines2, labels2 = ax2.get_legend_handles_labels() ax1.legend(lines1 + lines2, labels1 + labels2, bbox_to_anchor=(0.6, 1.1), ncol=2) plt.grid(axis='y', linestyle='--', alpha=0.7) plt.savefig(f'{file_name}.png') # 8. Output Notes (for user interpretation) print("\n--- Notes for Plot (X-Axis Index to Configuration) ---") for index, note in quality_notes.items(): print(f"Index {index}: {note}") def analyze_quality_report(csv_path: str): # --- 1. Load Data with Multi-level Headers --- try: df = pd.read_csv(csv_path, header=[0, 1, 2, 3, 4], index_col=0) logging.info( f"Successfully loaded '{csv_path}' with multi-level headers. Shape: {df.shape}") if df.index.name == 'Unnamed: 0': df.index.name = 'component' except FileNotFoundError: logging.error(f"Error: The file '{csv_path}' was not found.") return except Exception as e: logging.error(f"An error occurred while reading the CSV file: {e}") return # Get row with average results avgDf = df.loc["Average"] logging.info(f"\n{avgDf.head(10)}") # calculate mean accross non-unique runs: def get_base_metric(metric): """Strips suffixes like '.1' or '.2' from the metric name.""" return re.sub(r'\.\d+$', '', str(metric)) metric_level_values = avgDf.index.get_level_values(-1) base_metrics_key = metric_level_values.map(get_base_metric) config_levels = list(range(avgDf.index.nlevels - 1) ) # This gives [0, 1, 2, 3] grouping_keys = avgDf.index.droplevel(config_levels) # type: ignore grouping_keys = [ avgDf.index.get_level_values(i) for i in config_levels ] + [base_metrics_key] # 3. Perform Grouping and Mean Calculation # This command groups all entries that share the same (Config + Base Metric), # collapsing (avg, avg.1, avg.2) into a single average. averaged_sumDf = avgDf.groupby(grouping_keys).mean() logging.info(f"\n{averaged_sumDf.head(10)}") avgDf = averaged_sumDf logging.info(f"\n{avgDf.head(10)}") avgDf = avgDf.unstack(level=-1) encoder_name = avgDf.index.get_level_values(0)[0] logging.debug(f"encoder_name={encoder_name}") dfPSNRsorted = avgDf.sort_values( by="PSNR", ascending=False) # type: ignore dfSSIMsorted = avgDf.sort_values( by="SSIM", ascending=False) # type: ignore indexPSNR = dfPSNRsorted.index indexSSIM = dfSSIMsorted.index commonIndex = indexPSNR.intersection(indexSSIM) intersectedDf = avgDf.loc[commonIndex] logging.debug(intersectedDf.head(10)) # --- 2. Prepare Intersected Quality Data for Merge --- # Convert the MultiIndex (encoder, profile, video, parameters) into columns df_quality_results = intersectedDf.reset_index() # Rename the columns to match the latency report's structure df_quality_results.columns = [ 'encoder', 'profile', 'video', 'parameters', 'PSNR', 'SSIM'] logging.debug( f"Prepared quality results dataframe columns: {df_quality_results.columns.tolist()}") # Now intersected with latency report latency_df = pd.read_csv(f'results/{encoder_name}.csv') logging.info(latency_df.head()) columns = {'Unnamed: 0': 'encoder', 'Unnamed: 1': 'profile', 'Unnamed: 2': 'video', 'Unnamed: 3': 'parameters'} latency_df.rename(columns=columns, inplace=True) # type: ignore logging.debug(f"\n{latency_df.head()}") # --- 4. Merge Quality and Latency Reports --- # Use an inner merge on the four identifier columns to combine the data. merge_keys = ['encoder', 'profile', 'video', 'parameters'] merged_df = pd.merge( df_quality_results, latency_df, on=merge_keys, # Only keep records present in both (i.e., the top quality configurations) how='inner' ) logging.info("=" * 70) logging.info("--- Intersected Quality (PSNR/SSIM) and Latency Report ---") logging.info( f"Number of common configuration entries found: {len(merged_df)}") logging.info("=" * 70) # Prepare for display merged_df_display = merged_df.sort_values(by='PSNR', ascending=False) # Select and display key metrics display_columns = [ 'encoder', 'profile', 'video', 'parameters', 'PSNR', 'SSIM', # Quality metrics # Latency metrics (assuming these are in the latency report) 'avg', 'max', 'median', 'std' ] final_cols = [ col for col in display_columns if col in merged_df_display.columns] print(f"\n{merged_df_display[final_cols].to_string()}") plot_top_configurations(merged_df_display, get_args().plot_dir + f"top_quality_configurations_by_latency_{encoder_name}", f"Результаты качества для 10 лучших конфигураций по задержкам для {encoder_name}") plot_top_configurations(df_quality_results, get_args().plot_dir + f"top_quality_configurations_{encoder_name}", f"10 лучших конфигураций по PSNR и SSIM для {encoder_name}") return if __name__ == '__main__': os.makedirs(get_args().csv_dir, exist_ok=True) os.makedirs(get_args().plot_dir, exist_ok=True) analyze_quality_report(get_args().quality_csv)