add range to plot results

This commit is contained in:
Artur Mukhamadiev 2025-10-13 23:50:03 +03:00
parent 5910377893
commit ec7d82d344

View File

@ -40,38 +40,96 @@ def get_args():
def plot_latency_data(df):
def create_labels(df):
"""Combines MultiIndex levels (L0-L3) into a single string for notes."""
def create_labels(df_slice):
labels = {}
for i, index in enumerate(df.index):
# Format: L#:value | L#:value | ...
label_parts = [f"L{j}:{val}" for j, val in enumerate(index)]
for i, index in enumerate(df_slice.index):
label_parts = [f"{df.index.names[j] or f'L{j}'}: {val}"
for j, val in enumerate(index)]
labels[i + 1] = " | ".join(label_parts)
return labels
df = df.head(10)
encoder_name = df.index.get_level_values(0)[0]
max_notes = create_labels(df)
mean_max_key = ('mean', 'max')
mean_avg_key = ('mean', 'avg')
mean_median_key = ('mean', 'median')
min_max_key = ('left', 'max')
max_max_key = ('right', 'max')
min_avg_key = ('left', 'avg')
max_avg_key = ('right', 'avg')
min_median_key = ('left', 'median')
max_median_key = ('right', 'median')
df_top_n = df.head(10).copy()
mean_max_values = df_top_n[mean_max_key]
yerr_lower_max = mean_max_values - df_top_n[min_max_key]
yerr_upper_max = df_top_n[max_max_key] - mean_max_values
yerr_max_orig = np.array([yerr_lower_max.values, yerr_upper_max.values])
mean_avg_values = df_top_n[mean_avg_key]
yerr_lower_avg = mean_avg_values - df_top_n[min_avg_key]
yerr_upper_avg = df_top_n[max_avg_key] - mean_avg_values
yerr_avg = np.array([yerr_lower_avg.values, yerr_upper_avg.values])
mean_median_values = df_top_n[mean_median_key]
yerr_lower_median = mean_median_values - df_top_n[min_median_key]
yerr_upper_median = df_top_n[max_median_key] - mean_median_values
yerr_median = np.array(
[yerr_lower_median.values, yerr_upper_median.values])
encoder_name = df_top_n.index.get_level_values(0)[0]
max_notes = create_labels(df_top_n)
bar_width = 0.25
num_configs = len(df)
num_configs = len(df_top_n)
r1 = np.arange(num_configs)
r2 = [x + bar_width for x in r1]
r3 = [x + bar_width for x in r2]
fig = plt.figure(figsize=(10, 6), dpi=300)
# Create the bars
plt.bar(r1, df['max'], color='red', width=bar_width,
edgecolor='grey', label='Max Latency')
plt.bar(r2, df['avg'], color='blue', width=bar_width,
edgecolor='grey', label='Avg Latency')
plt.bar(r3, df['median'], color='green', width=bar_width,
edgecolor='grey', label='Median Latency')
# Add labels and ticks
r_max_orig = r1
r_avg = [x + bar_width for x in r1]
r_median = [x + bar_width for x in r_avg]
fig = plt.figure(figsize=(12, 7), dpi=300)
plt.bar(
r_max_orig,
df_top_n[mean_max_key],
yerr=yerr_max_orig,
capsize=5,
color='red',
width=bar_width,
edgecolor='grey',
label='Максимальная задержка'
)
plt.bar(
r_avg,
df_top_n[mean_avg_key],
yerr=yerr_avg,
capsize=5,
color='blue',
width=bar_width,
edgecolor='grey',
label='Средняя задержка'
)
plt.bar(
r_median,
df_top_n[mean_median_key],
yerr=yerr_median,
capsize=5,
color='green',
width=bar_width,
edgecolor='grey',
label='Медианная задержка'
)
plt.xlabel('Индекс конфигурации', fontweight='bold')
plt.ylabel('Общая задержка [мс]', fontweight='bold')
plt.xticks([r + bar_width for r in range(num_configs)],
[str(i + 1) for i in range(num_configs)])
center_pos = [r + bar_width for r in r1]
plt.xticks(center_pos, [str(i + 1) for i in range(num_configs)])
plt.title(
f'Сравнение производительности {num_configs} лучших конфигураций по задержке для {encoder_name}')
plt.legend()
@ -79,10 +137,9 @@ def plot_latency_data(df):
plt.tight_layout()
plt.savefig(get_args().plot_dir +
f'combined_top_configurations_plot_{encoder_name}.png')
f'combined_top_configurations_with_errors_{encoder_name}.png')
plt.close()
# Output Notes (for user interpretation)
print("\n--- Notes for Plot (X-Axis Index to Configuration) ---")
for index, note in max_notes.items():
print(f"Index {index}: {note}")
@ -91,7 +148,29 @@ def plot_latency_data(df):
def plot_start_latency(df):
fig = plt.figure(figsize=(10, 6), dpi=300)
r1 = np.arange(len(df))
plt.plot(r1, df['max'])
mean_col = ('mean', 'max')
min_col = ('left', 'max')
max_col = ('right', 'max')
mean_values = df[mean_col]
min_values = df[min_col]
max_values = df[max_col]
lower_error = mean_values - min_values
upper_error = max_values - mean_values
y_error = [lower_error.values, upper_error.values]
plt.errorbar(r1,
mean_values,
yerr=y_error,
fmt='.-',
color='darkblue',
ecolor='red',
capsize=3,
linewidth=1
)
plt.xlabel('Индекс конфигурации', fontweight='bold')
plt.ylabel('Общая задержка [мс]', fontweight='bold')
encoder_name = df.index.get_level_values(0)[0]
@ -109,7 +188,7 @@ def analyze_latency_data(csv_path: str):
Args:
csv_path (str): The path to the input CSV file.
"""
# --- 1. Load Data with Multi-level Headers ---
try:
df = pd.read_csv(csv_path, header=[0, 1, 2, 3, 4], index_col=0)
logging.info(
@ -123,7 +202,6 @@ def analyze_latency_data(csv_path: str):
logging.error(f"An error occurred while reading the CSV file: {e}")
return
# calculate summary along the rows
sumDf = df.sum()
if get_args().compensate == True:
logging.info("Filesrc and rawvideoparse latency compensation is ON")
@ -133,7 +211,6 @@ def analyze_latency_data(csv_path: str):
sumDf -= rawvideoparseData
logging.debug(f"\n{sumDf.head()}")
# calculate mean accross non-unique runs:
def get_base_metric(metric):
"""Strips suffixes like '.1' or '.2' from the metric name."""
return re.sub(r'\.\d+$', '', str(metric))
@ -142,8 +219,7 @@ def analyze_latency_data(csv_path: str):
base_metrics_key = metric_level_values.map(get_base_metric)
config_levels = list(range(sumDf.index.nlevels - 1)
) # This gives [0, 1, 2, 3]
config_levels = list(range(sumDf.index.nlevels - 1))
grouping_keys = sumDf.index.droplevel(config_levels) # type: ignore
grouping_keys = [
@ -153,19 +229,31 @@ def analyze_latency_data(csv_path: str):
# 3. Perform Grouping and Mean Calculation
# This command groups all entries that share the same (Config + Base Metric),
# collapsing (avg, avg.1, avg.2) into a single average.
averaged_sumDf = sumDf.groupby(grouping_keys).mean()
sumDfgrouping = sumDf.groupby(grouping_keys)
averaged_sumDf = sumDfgrouping.mean()
max_sumDf = sumDfgrouping.max()
min_sumDf = sumDfgrouping.min()
logging.debug(f"\n{max_sumDf.head(10)}")
logging.debug(f"\n{min_sumDf.head(10)}")
logging.info(f"\n{averaged_sumDf.head(10)}")
sumDf = averaged_sumDf
df_summary = sumDf.unstack(level=-1) # or level='Metric' if names are set
merged_sumDf = pd.concat(
[min_sumDf, averaged_sumDf, max_sumDf],
axis=1,
keys=['left', 'mean', 'right']
)
sumDf = merged_sumDf
df_summary = sumDf.unstack(level=-1)
# 2. Sort the resulting DataFrame by the desired metric column.
df_sorted_by_max = df_summary.sort_values(
by='max', ascending=True) # type: ignore
by=('mean', 'max'), ascending=True) # type: ignore
df_sorted_by_avg = df_summary.sort_values(
by='avg', ascending=True) # type: ignore
by=('mean', 'avg'), ascending=True) # type: ignore
df_sorted_by_median = df_summary.sort_values(
by='median', ascending=True) # type: ignore
by=('mean', 'median'), ascending=True) # type: ignore
print("SORTED BY MAX")
print(df_sorted_by_max)
@ -176,7 +264,6 @@ def analyze_latency_data(csv_path: str):
print("SORTED BY MEDIAN")
print(df_sorted_by_median)
# 1. Get the indices (configurations) for each top 10 list
max_indices = df_sorted_by_max.index
avg_indices = df_sorted_by_avg.index
median_indices = df_sorted_by_median.index