import pandas as pd import matplotlib.pyplot as plt import numpy as np import os import argparse import logging # Configure logging to show informational messages logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') prefixImage = 'histograms/' def parse_args(): parser = argparse.ArgumentParser(prog=__file__) parser.add_argument('-c', '--compensate', action="store_true") return parser.parse_args() cmd_args = None def get_args(): global cmd_args if cmd_args is None: cmd_args = parse_args() return cmd_args def plot_latency_data(df): def create_labels(df): """Combines MultiIndex levels (L0-L3) into a single string for notes.""" labels = {} for i, index in enumerate(df.index): # Format: L#:value | L#:value | ... label_parts = [f"L{j}:{val}" for j, val in enumerate(index)] labels[i + 1] = " | ".join(label_parts) return labels df = df.head(10) encoder_name = df.index.get_level_values(0)[0] max_notes = create_labels(df) bar_width = 0.25 num_configs = len(df) r1 = np.arange(num_configs) r2 = [x + bar_width for x in r1] r3 = [x + bar_width for x in r2] fig = plt.figure(figsize=(10, 6), dpi=300) # Create the bars plt.bar(r1, df['max'], color='red', width=bar_width, edgecolor='grey', label='Max Latency') plt.bar(r2, df['avg'], color='blue', width=bar_width, edgecolor='grey', label='Avg Latency') plt.bar(r3, df['median'], color='green', width=bar_width, edgecolor='grey', label='Median Latency') # Add labels and ticks plt.xlabel('Индекс конфигурации', fontweight='bold') plt.ylabel('Общая задержка [мс]', fontweight='bold') plt.xticks([r + bar_width for r in range(num_configs)], [str(i + 1) for i in range(num_configs)]) plt.title(f'Сравнение производительности {num_configs} лучших конфигураций по задержке для {encoder_name}') plt.legend() plt.grid(axis='y', linestyle='--', alpha=0.6) plt.tight_layout() plt.savefig(prefixImage + f'combined_top_configurations_plot_{encoder_name}.png') plt.close() # Output Notes (for user interpretation) print("\n--- Notes for Plot (X-Axis Index to Configuration) ---") for index, note in max_notes.items(): print(f"Index {index}: {note}") def analyze_latency_data(csv_path: str): """ Analyzes latency data to find the top 10 components (rows) contributing most to latency, and plots histograms of their summed avg, median, and max latencies. Args: csv_path (str): The path to the input CSV file. """ # --- 1. Load Data with Multi-level Headers --- try: df = pd.read_csv(csv_path, header=[0,1, 2, 3, 4], index_col=0) logging.info(f"Successfully loaded '{csv_path}' with multi-level headers. Shape: {df.shape}") if df.index.name == 'Unnamed: 0': df.index.name = 'component' except FileNotFoundError: logging.error(f"Error: The file '{csv_path}' was not found.") return except Exception as e: logging.error(f"An error occurred while reading the CSV file: {e}") return #calculate summary along the rows sumDf = df.sum() if get_args().compensate == True: logging.info("Filesrc latency compensation is ON") filesrcData = df.loc["filesrc0"] sumDf -= filesrcData print(sumDf.head()) # return df_summary = sumDf.unstack(level=-1) # or level='Metric' if names are set # 2. Sort the resulting DataFrame by the desired metric column. df_sorted_by_max = df_summary.sort_values(by='max', ascending=True) df_sorted_by_avg = df_summary.sort_values(by='avg', ascending=True) df_sorted_by_median = df_summary.sort_values(by='median', ascending=True) print("SORTED BY MAX") print(df_sorted_by_max) print("---------------") print("SORTED BY AVERAGE") print(df_sorted_by_avg) print("---------------") print("SORTED BY MEDIAN") print(df_sorted_by_median) # 1. Get the indices (configurations) for each top 10 list max_indices = df_sorted_by_max.index avg_indices = df_sorted_by_avg.index median_indices = df_sorted_by_median.index # 2. Find the intersection (common elements) of the three sets of indices # max is main index because it is commonly introduces the largest amount of latency to the stream common_indices = max_indices.intersection(avg_indices).intersection(median_indices) # 3. Filter the original summary DataFrame (df_summary) using the common indices df_common_top_performers = df_summary.loc[common_indices] print(df_common_top_performers.head()) plot_latency_data(df_common_top_performers) return if __name__ == '__main__': parse_args() # Set the path to your CSV file here. csv_filename = 'sample/latencyDataframenvh264enc.csv' analyze_latency_data(csv_filename)