import pandas as pd import matplotlib.pyplot as plt import numpy as np import os import argparse import logging # Configure logging to show informational messages logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') def parse_args(): parser = argparse.ArgumentParser(prog=__file__) parser.add_argument('-c', '--compensate', action="store_true") parser.add_argument('--latency-csv', type=str, default='sample/latencyDataframenvh264enc.csv', help='Path to the latency results CSV file.') parser.add_argument('-pd','--plot-dir', type=str, default='plots/', help='Path to directory in which resulted plots should be saved') parser.add_argument('-csv', '--csv-dir', type=str, default='results/', help='Path to directory in which resulted csv data should be saved') return parser.parse_args() cmd_args = None def get_args(): global cmd_args if cmd_args is None: cmd_args = parse_args() return cmd_args def plot_latency_data(df): def create_labels(df): """Combines MultiIndex levels (L0-L3) into a single string for notes.""" labels = {} for i, index in enumerate(df.index): # Format: L#:value | L#:value | ... label_parts = [f"L{j}:{val}" for j, val in enumerate(index)] labels[i + 1] = " | ".join(label_parts) return labels df = df.head(10) encoder_name = df.index.get_level_values(0)[0] max_notes = create_labels(df) bar_width = 0.25 num_configs = len(df) r1 = np.arange(num_configs) r2 = [x + bar_width for x in r1] r3 = [x + bar_width for x in r2] fig = plt.figure(figsize=(10, 6), dpi=300) # Create the bars plt.bar(r1, df['max'], color='red', width=bar_width, edgecolor='grey', label='Max Latency') plt.bar(r2, df['avg'], color='blue', width=bar_width, edgecolor='grey', label='Avg Latency') plt.bar(r3, df['median'], color='green', width=bar_width, edgecolor='grey', label='Median Latency') # Add labels and ticks plt.xlabel('Индекс конфигурации', fontweight='bold') plt.ylabel('Общая задержка [мс]', fontweight='bold') plt.xticks([r + bar_width for r in range(num_configs)], [str(i + 1) for i in range(num_configs)]) plt.title(f'Сравнение производительности {num_configs} лучших конфигураций по задержке для {encoder_name}') plt.legend() plt.grid(axis='y', linestyle='--', alpha=0.6) plt.tight_layout() plt.savefig(get_args().plot_dir + f'combined_top_configurations_plot_{encoder_name}.png') plt.close() # Output Notes (for user interpretation) print("\n--- Notes for Plot (X-Axis Index to Configuration) ---") for index, note in max_notes.items(): print(f"Index {index}: {note}") def plot_start_latency(df): fig = plt.figure(figsize=(10, 6), dpi=300) r1 = np.arange(len(df)) plt.plot(r1, df['max']) plt.xlabel('Индекс конфигурации', fontweight='bold') plt.ylabel('Общая задержка [мс]', fontweight='bold') encoder_name = df.index.get_level_values(0)[0] plt.title(f"Результаты стартовой задержки для {encoder_name}") plt.tight_layout() plt.savefig(get_args().plot_dir + f"start_latency_{encoder_name}.png") plt.close() def analyze_latency_data(csv_path: str): """ Analyzes latency data to find the top 10 components (rows) contributing most to latency, and plots histograms of their summed avg, median, and max latencies. Args: csv_path (str): The path to the input CSV file. """ # --- 1. Load Data with Multi-level Headers --- try: df = pd.read_csv(csv_path, header=[0,1, 2, 3, 4], index_col=0) logging.info(f"Successfully loaded '{csv_path}' with multi-level headers. Shape: {df.shape}") if df.index.name == 'Unnamed: 0': df.index.name = 'component' except FileNotFoundError: logging.error(f"Error: The file '{csv_path}' was not found.") return except Exception as e: logging.error(f"An error occurred while reading the CSV file: {e}") return #calculate summary along the rows sumDf = df.sum() if get_args().compensate == True: logging.info("Filesrc latency compensation is ON") filesrcData = df.loc["filesrc0"] sumDf -= filesrcData print(sumDf.head()) # return df_summary = sumDf.unstack(level=-1) # or level='Metric' if names are set # 2. Sort the resulting DataFrame by the desired metric column. df_sorted_by_max = df_summary.sort_values(by='max', ascending=True) df_sorted_by_avg = df_summary.sort_values(by='avg', ascending=True) df_sorted_by_median = df_summary.sort_values(by='median', ascending=True) print("SORTED BY MAX") print(df_sorted_by_max) print("---------------") print("SORTED BY AVERAGE") print(df_sorted_by_avg) print("---------------") print("SORTED BY MEDIAN") print(df_sorted_by_median) # 1. Get the indices (configurations) for each top 10 list max_indices = df_sorted_by_max.index avg_indices = df_sorted_by_avg.index median_indices = df_sorted_by_median.index # 2. Find the intersection (common elements) of the three sets of indices # max is main index because it is commonly introduces the largest amount of latency to the stream common_indices = max_indices.intersection(avg_indices).intersection(median_indices) # 3. Filter the original summary DataFrame (df_summary) using the common indices df_common_top_performers = df_summary.loc[common_indices] encoder_name = df_common_top_performers.index.get_level_values(0)[0] print(df_common_top_performers.head()) plot_latency_data(df_common_top_performers) plot_start_latency(df_common_top_performers) # 4. Save top performers to csv top_10_df = df_common_top_performers.head(10) top_10_df.to_csv(get_args().csv_dir + f"{encoder_name}.csv") return if __name__ == '__main__': os.makedirs(get_args().csv_dir, exist_ok=True) os.makedirs(get_args().plot_dir, exist_ok=True) analyze_latency_data(get_args().latency_csv)