gstAutotest/latencyAnalysis.py
Artur Mukhamadiev 8b9190bb86 [init] just move from pipelines repo
:Release Notes:
-

:Detailed Notes:
-

:Testing Performed:
-

:QA Notes:
-

:Issues Addressed:
-
2025-10-11 18:41:28 +03:00

113 lines
4.5 KiB
Python

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import logging
# Configure logging to show informational messages
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
def analyze_latency_data(csv_path: str):
"""
Analyzes latency data to find the top 10 components (rows) contributing most
to latency, and plots histograms of their summed avg, median, and max latencies.
Args:
csv_path (str): The path to the input CSV file.
"""
# --- 1. Load Data with Multi-level Headers ---
try:
df = pd.read_csv(csv_path, header=[0,1, 2, 3, 4], index_col=0)
logging.info(f"Successfully loaded '{csv_path}' with multi-level headers. Shape: {df.shape}")
if df.index.name == 'Unnamed: 0':
df.index.name = 'component'
except FileNotFoundError:
logging.error(f"Error: The file '{csv_path}' was not found.")
return
except Exception as e:
logging.error(f"An error occurred while reading the CSV file: {e}")
return
#calculate summary along the rows
sumDf = df.sum()
print(sumDf.info())
df_summary = sumDf.unstack(level=-1) # or level='Metric' if names are set
# 2. Sort the resulting DataFrame by the desired metric column.
df_sorted_by_max = df_summary.sort_values(by='max', ascending=True)
df_sorted_by_avg = df_summary.sort_values(by='avg', ascending=True)
df_sorted_by_median = df_summary.sort_values(by='median', ascending=True)
print("SORTED BY MAX")
print(df_sorted_by_max)
print("---------------")
print("SORTED BY AVERAGE")
print(df_sorted_by_avg)
print("---------------")
print("SORTED BY MEDIAN")
print(df_sorted_by_median)
# 1. Get the indices (configurations) for each top 10 list
max_indices = df_sorted_by_max.index
avg_indices = df_sorted_by_avg.index
median_indices = df_sorted_by_median.index
# 2. Find the intersection (common elements) of the three sets of indices
# max is main index because it is commonly introduces the largest amount of latency to the stream
common_indices = max_indices.intersection(avg_indices).intersection(median_indices)
# 3. Filter the original summary DataFrame (df_summary) using the common indices
df_common_top_performers = df_summary.loc[common_indices]
print(df_common_top_performers.head())
def create_labels(df):
"""Combines MultiIndex levels (L0-L3) into a single string for notes."""
labels = {}
for i, index in enumerate(df.index):
# Format: L#:value | L#:value | ...
label_parts = [f"L{j}:{val}" for j, val in enumerate(index)]
labels[i + 1] = " | ".join(label_parts)
return labels
df_common_top_performers =df_common_top_performers.head(10)
encoder_name = df_common_top_performers.index.get_level_values(0)[0]
max_notes = create_labels(df_common_top_performers)
bar_width = 0.25
num_configs = len(df_common_top_performers)
r1 = np.arange(num_configs)
r2 = [x + bar_width for x in r1]
r3 = [x + bar_width for x in r2]
fig = plt.figure(figsize=(10, 6), dpi=300)
# Create the bars
plt.bar(r1, df_common_top_performers['max'], color='red', width=bar_width, edgecolor='grey', label='Max Latency')
plt.bar(r2, df_common_top_performers['avg'], color='blue', width=bar_width, edgecolor='grey', label='Avg Latency')
plt.bar(r3, df_common_top_performers['median'], color='green', width=bar_width, edgecolor='grey', label='Median Latency')
# Add labels and ticks
plt.xlabel('Индекс конфигурации', fontweight='bold')
plt.ylabel('Общая задержка [мс]', fontweight='bold')
plt.xticks([r + bar_width for r in range(num_configs)], [str(i + 1) for i in range(num_configs)])
plt.title(f'Сравнение производительности {num_configs} лучших конфигураций по задержке для {encoder_name}')
plt.legend()
plt.grid(axis='y', linestyle='--', alpha=0.6)
plt.tight_layout()
plt.savefig(f'combined_top_configurations_plot_{encoder_name}.png')
plt.close()
# Output Notes (for user interpretation)
print("\n--- Notes for MAX Plot (X-Axis Index to Configuration) ---")
for index, note in max_notes.items():
print(f"Index {index}: {note}")
# Sort
return
if __name__ == '__main__':
# Set the path to your CSV file here.
csv_filename = 'results/latencyDataframenvv4l2h264enc.csv'
analyze_latency_data(csv_filename)