296 lines
9.5 KiB
Python
296 lines
9.5 KiB
Python
import pandas as pd
|
|
import matplotlib.pyplot as plt
|
|
import numpy as np
|
|
import os
|
|
import re
|
|
import argparse
|
|
import logging
|
|
|
|
# Configure logging to show informational messages
|
|
logging.basicConfig(level=logging.INFO,
|
|
format='%(asctime)s - %(levelname)s - %(message)s')
|
|
|
|
|
|
def parse_args():
|
|
parser = argparse.ArgumentParser(prog=__file__)
|
|
parser.add_argument('-c', '--compensate', action="store_true")
|
|
parser.add_argument('--latency-csv',
|
|
type=str,
|
|
default='sample/latencyDataframenvh264enc.csv',
|
|
help='Path to the latency results CSV file.')
|
|
parser.add_argument('-pd', '--plot-dir',
|
|
type=str,
|
|
default='plots/',
|
|
help='Path to directory in which resulted plots should be saved')
|
|
parser.add_argument('-csv', '--csv-dir',
|
|
type=str,
|
|
default='results/',
|
|
help='Path to directory in which resulted csv data should be saved')
|
|
return parser.parse_args()
|
|
|
|
|
|
cmd_args = None
|
|
|
|
|
|
def get_args():
|
|
global cmd_args
|
|
if cmd_args is None:
|
|
cmd_args = parse_args()
|
|
return cmd_args
|
|
|
|
|
|
def plot_latency_data(df):
|
|
def create_labels(df_slice):
|
|
labels = {}
|
|
for i, index in enumerate(df_slice.index):
|
|
label_parts = [f"{df.index.names[j] or f'L{j}'}: {val}"
|
|
for j, val in enumerate(index)]
|
|
labels[i + 1] = " | ".join(label_parts)
|
|
return labels
|
|
|
|
mean_max_key = ('mean', 'max')
|
|
mean_avg_key = ('mean', 'avg')
|
|
mean_median_key = ('mean', 'median')
|
|
|
|
min_max_key = ('left', 'max')
|
|
max_max_key = ('right', 'max')
|
|
|
|
min_avg_key = ('left', 'avg')
|
|
max_avg_key = ('right', 'avg')
|
|
|
|
min_median_key = ('left', 'median')
|
|
max_median_key = ('right', 'median')
|
|
|
|
df_top_n = df.head(10).copy()
|
|
|
|
mean_max_values = df_top_n[mean_max_key]
|
|
yerr_lower_max = mean_max_values - df_top_n[min_max_key]
|
|
yerr_upper_max = df_top_n[max_max_key] - mean_max_values
|
|
yerr_max_orig = np.array([yerr_lower_max.values, yerr_upper_max.values])
|
|
|
|
mean_avg_values = df_top_n[mean_avg_key]
|
|
yerr_lower_avg = mean_avg_values - df_top_n[min_avg_key]
|
|
yerr_upper_avg = df_top_n[max_avg_key] - mean_avg_values
|
|
yerr_avg = np.array([yerr_lower_avg.values, yerr_upper_avg.values])
|
|
|
|
mean_median_values = df_top_n[mean_median_key]
|
|
yerr_lower_median = mean_median_values - df_top_n[min_median_key]
|
|
yerr_upper_median = df_top_n[max_median_key] - mean_median_values
|
|
yerr_median = np.array(
|
|
[yerr_lower_median.values, yerr_upper_median.values])
|
|
|
|
encoder_name = df_top_n.index.get_level_values(0)[0]
|
|
max_notes = create_labels(df_top_n)
|
|
|
|
bar_width = 0.25
|
|
num_configs = len(df_top_n)
|
|
r1 = np.arange(num_configs)
|
|
|
|
r_max_orig = r1
|
|
r_avg = [x + bar_width for x in r1]
|
|
r_median = [x + bar_width for x in r_avg]
|
|
|
|
fig = plt.figure(figsize=(12, 7), dpi=300)
|
|
|
|
plt.bar(
|
|
r_max_orig,
|
|
df_top_n[mean_max_key],
|
|
yerr=yerr_max_orig,
|
|
capsize=5,
|
|
color='red',
|
|
width=bar_width,
|
|
edgecolor='grey',
|
|
label='Максимальная задержка'
|
|
)
|
|
|
|
plt.bar(
|
|
r_avg,
|
|
df_top_n[mean_avg_key],
|
|
yerr=yerr_avg,
|
|
capsize=5,
|
|
color='blue',
|
|
width=bar_width,
|
|
edgecolor='grey',
|
|
label='Средняя задержка'
|
|
)
|
|
|
|
plt.bar(
|
|
r_median,
|
|
df_top_n[mean_median_key],
|
|
yerr=yerr_median,
|
|
capsize=5,
|
|
color='green',
|
|
width=bar_width,
|
|
edgecolor='grey',
|
|
label='Медианная задержка'
|
|
)
|
|
|
|
plt.xlabel('Индекс конфигурации', fontweight='bold')
|
|
plt.ylabel('Общая задержка [мс]', fontweight='bold')
|
|
center_pos = [r + bar_width for r in r1]
|
|
plt.xticks(center_pos, [str(i + 1) for i in range(num_configs)])
|
|
|
|
plt.title(
|
|
f'Сравнение производительности {num_configs} лучших конфигураций по задержке для {encoder_name}')
|
|
plt.legend()
|
|
plt.grid(axis='y', linestyle='--', alpha=0.6)
|
|
|
|
plt.tight_layout()
|
|
plt.savefig(get_args().plot_dir +
|
|
f'combined_top_configurations_with_errors_{encoder_name}.png')
|
|
plt.close()
|
|
|
|
print("\n--- Notes for Plot (X-Axis Index to Configuration) ---")
|
|
for index, note in max_notes.items():
|
|
print(f"Index {index}: {note}")
|
|
|
|
|
|
def plot_start_latency(df):
|
|
fig = plt.figure(figsize=(10, 6), dpi=300)
|
|
r1 = np.arange(len(df))
|
|
|
|
mean_col = ('mean', 'max')
|
|
min_col = ('left', 'max')
|
|
max_col = ('right', 'max')
|
|
|
|
mean_values = df[mean_col]
|
|
min_values = df[min_col]
|
|
max_values = df[max_col]
|
|
|
|
lower_error = mean_values - min_values
|
|
upper_error = max_values - mean_values
|
|
|
|
y_error = [lower_error.values, upper_error.values]
|
|
|
|
plt.errorbar(r1,
|
|
mean_values,
|
|
yerr=y_error,
|
|
fmt='.-',
|
|
color='darkblue',
|
|
ecolor='red',
|
|
capsize=3,
|
|
linewidth=1
|
|
)
|
|
plt.xlabel('Индекс конфигурации', fontweight='bold')
|
|
plt.ylabel('Общая задержка [мс]', fontweight='bold')
|
|
encoder_name = df.index.get_level_values(0)[0]
|
|
plt.title(f"Результаты стартовой задержки для {encoder_name}")
|
|
plt.tight_layout()
|
|
plt.savefig(get_args().plot_dir + f"start_latency_{encoder_name}.png")
|
|
plt.close()
|
|
|
|
|
|
def analyze_latency_data(csv_path: str):
|
|
"""
|
|
Analyzes latency data to find the top 10 components (rows) contributing most
|
|
to latency, and plots histograms of their summed avg, median, and max latencies.
|
|
|
|
Args:
|
|
csv_path (str): The path to the input CSV file.
|
|
"""
|
|
|
|
try:
|
|
df = pd.read_csv(csv_path, header=[0, 1, 2, 3, 4], index_col=0)
|
|
logging.info(
|
|
f"Successfully loaded '{csv_path}' with multi-level headers. Shape: {df.shape}")
|
|
if df.index.name == 'Unnamed: 0':
|
|
df.index.name = 'component'
|
|
except FileNotFoundError:
|
|
logging.error(f"Error: The file '{csv_path}' was not found.")
|
|
return
|
|
except Exception as e:
|
|
logging.error(f"An error occurred while reading the CSV file: {e}")
|
|
return
|
|
|
|
sumDf = df.sum()
|
|
if get_args().compensate == True:
|
|
logging.info("Filesrc and rawvideoparse latency compensation is ON")
|
|
filesrcData = df.loc["filesrc0"]
|
|
rawvideoparseData = df.loc["rawvideoparse0"]
|
|
sumDf -= filesrcData
|
|
sumDf -= rawvideoparseData
|
|
logging.debug(f"\n{sumDf.head()}")
|
|
|
|
def get_base_metric(metric):
|
|
"""Strips suffixes like '.1' or '.2' from the metric name."""
|
|
return re.sub(r'\.\d+$', '', str(metric))
|
|
|
|
metric_level_values = sumDf.index.get_level_values(-1)
|
|
|
|
base_metrics_key = metric_level_values.map(get_base_metric)
|
|
|
|
config_levels = list(range(sumDf.index.nlevels - 1))
|
|
|
|
grouping_keys = sumDf.index.droplevel(config_levels) # type: ignore
|
|
grouping_keys = [
|
|
sumDf.index.get_level_values(i) for i in config_levels
|
|
] + [base_metrics_key]
|
|
|
|
# 3. Perform Grouping and Mean Calculation
|
|
# This command groups all entries that share the same (Config + Base Metric),
|
|
# collapsing (avg, avg.1, avg.2) into a single average.
|
|
sumDfgrouping = sumDf.groupby(grouping_keys)
|
|
averaged_sumDf = sumDfgrouping.mean()
|
|
max_sumDf = sumDfgrouping.max()
|
|
min_sumDf = sumDfgrouping.min()
|
|
|
|
logging.debug(f"\n{max_sumDf.head(10)}")
|
|
logging.debug(f"\n{min_sumDf.head(10)}")
|
|
logging.info(f"\n{averaged_sumDf.head(10)}")
|
|
|
|
merged_sumDf = pd.concat(
|
|
[min_sumDf, averaged_sumDf, max_sumDf],
|
|
axis=1,
|
|
keys=['left', 'mean', 'right']
|
|
)
|
|
|
|
sumDf = merged_sumDf
|
|
|
|
df_summary = sumDf.unstack(level=-1)
|
|
|
|
df_sorted_by_max = df_summary.sort_values(
|
|
by=('mean', 'max'), ascending=True) # type: ignore
|
|
df_sorted_by_avg = df_summary.sort_values(
|
|
by=('mean', 'avg'), ascending=True) # type: ignore
|
|
df_sorted_by_median = df_summary.sort_values(
|
|
by=('mean', 'median'), ascending=True) # type: ignore
|
|
|
|
print("SORTED BY MAX")
|
|
print(df_sorted_by_max)
|
|
print("---------------")
|
|
print("SORTED BY AVERAGE")
|
|
print(df_sorted_by_avg)
|
|
print("---------------")
|
|
print("SORTED BY MEDIAN")
|
|
print(df_sorted_by_median)
|
|
|
|
max_indices = df_sorted_by_max.index
|
|
avg_indices = df_sorted_by_avg.index
|
|
median_indices = df_sorted_by_median.index
|
|
|
|
# 2. Find the intersection (common elements) of the three sets of indices
|
|
# max is main index because it is commonly introduces the largest amount of latency to the stream
|
|
common_indices = max_indices.intersection(
|
|
avg_indices).intersection(median_indices)
|
|
|
|
# 3. Filter the original summary DataFrame (df_summary) using the common indices
|
|
df_common_top_performers = df_summary.loc[common_indices]
|
|
encoder_name = df_common_top_performers.index.get_level_values(0)[0]
|
|
|
|
print(df_common_top_performers.head())
|
|
|
|
plot_latency_data(df_common_top_performers)
|
|
|
|
plot_start_latency(df_common_top_performers)
|
|
|
|
# 4. Save top performers to csv
|
|
top_10_df = df_common_top_performers.head(10)
|
|
top_10_df.to_csv(get_args().csv_dir + f"{encoder_name}.csv")
|
|
return
|
|
|
|
|
|
if __name__ == '__main__':
|
|
os.makedirs(get_args().csv_dir, exist_ok=True)
|
|
os.makedirs(get_args().plot_dir, exist_ok=True)
|
|
analyze_latency_data(get_args().latency_csv)
|