[analysis] mean values for non-unique config runs

This commit is contained in:
Artur Mukhamadiev 2025-10-12 23:37:07 +03:00
parent 900aca9bd5
commit 628f0439b7
3 changed files with 172 additions and 72 deletions

4
.gitignore vendored
View File

@ -1,3 +1,7 @@
#project ignore:
plots/
results/
# ---> Python # ---> Python
# Byte-compiled / optimized / DLL files # Byte-compiled / optimized / DLL files
__pycache__/ __pycache__/

View File

@ -2,11 +2,14 @@ import pandas as pd
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np import numpy as np
import os import os
import re
import argparse import argparse
import logging import logging
# Configure logging to show informational messages # Configure logging to show informational messages
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s')
def parse_args(): def parse_args():
parser = argparse.ArgumentParser(prog=__file__) parser = argparse.ArgumentParser(prog=__file__)
@ -25,13 +28,17 @@ def parse_args():
help='Path to directory in which resulted csv data should be saved') help='Path to directory in which resulted csv data should be saved')
return parser.parse_args() return parser.parse_args()
cmd_args = None cmd_args = None
def get_args(): def get_args():
global cmd_args global cmd_args
if cmd_args is None: if cmd_args is None:
cmd_args = parse_args() cmd_args = parse_args()
return cmd_args return cmd_args
def plot_latency_data(df): def plot_latency_data(df):
def create_labels(df): def create_labels(df):
"""Combines MultiIndex levels (L0-L3) into a single string for notes.""" """Combines MultiIndex levels (L0-L3) into a single string for notes."""
@ -53,20 +60,26 @@ def plot_latency_data(df):
r3 = [x + bar_width for x in r2] r3 = [x + bar_width for x in r2]
fig = plt.figure(figsize=(10, 6), dpi=300) fig = plt.figure(figsize=(10, 6), dpi=300)
# Create the bars # Create the bars
plt.bar(r1, df['max'], color='red', width=bar_width, edgecolor='grey', label='Max Latency') plt.bar(r1, df['max'], color='red', width=bar_width,
plt.bar(r2, df['avg'], color='blue', width=bar_width, edgecolor='grey', label='Avg Latency') edgecolor='grey', label='Max Latency')
plt.bar(r3, df['median'], color='green', width=bar_width, edgecolor='grey', label='Median Latency') plt.bar(r2, df['avg'], color='blue', width=bar_width,
edgecolor='grey', label='Avg Latency')
plt.bar(r3, df['median'], color='green', width=bar_width,
edgecolor='grey', label='Median Latency')
# Add labels and ticks # Add labels and ticks
plt.xlabel('Индекс конфигурации', fontweight='bold') plt.xlabel('Индекс конфигурации', fontweight='bold')
plt.ylabel('Общая задержка [мс]', fontweight='bold') plt.ylabel('Общая задержка [мс]', fontweight='bold')
plt.xticks([r + bar_width for r in range(num_configs)], [str(i + 1) for i in range(num_configs)]) plt.xticks([r + bar_width for r in range(num_configs)],
plt.title(f'Сравнение производительности {num_configs} лучших конфигураций по задержке для {encoder_name}') [str(i + 1) for i in range(num_configs)])
plt.title(
f'Сравнение производительности {num_configs} лучших конфигураций по задержке для {encoder_name}')
plt.legend() plt.legend()
plt.grid(axis='y', linestyle='--', alpha=0.6) plt.grid(axis='y', linestyle='--', alpha=0.6)
plt.tight_layout() plt.tight_layout()
plt.savefig(get_args().plot_dir + f'combined_top_configurations_plot_{encoder_name}.png') plt.savefig(get_args().plot_dir +
f'combined_top_configurations_plot_{encoder_name}.png')
plt.close() plt.close()
# Output Notes (for user interpretation) # Output Notes (for user interpretation)
@ -74,6 +87,7 @@ def plot_latency_data(df):
for index, note in max_notes.items(): for index, note in max_notes.items():
print(f"Index {index}: {note}") print(f"Index {index}: {note}")
def plot_start_latency(df): def plot_start_latency(df):
fig = plt.figure(figsize=(10, 6), dpi=300) fig = plt.figure(figsize=(10, 6), dpi=300)
r1 = np.arange(len(df)) r1 = np.arange(len(df))
@ -86,6 +100,7 @@ def plot_start_latency(df):
plt.savefig(get_args().plot_dir + f"start_latency_{encoder_name}.png") plt.savefig(get_args().plot_dir + f"start_latency_{encoder_name}.png")
plt.close() plt.close()
def analyze_latency_data(csv_path: str): def analyze_latency_data(csv_path: str):
""" """
Analyzes latency data to find the top 10 components (rows) contributing most Analyzes latency data to find the top 10 components (rows) contributing most
@ -97,7 +112,8 @@ def analyze_latency_data(csv_path: str):
# --- 1. Load Data with Multi-level Headers --- # --- 1. Load Data with Multi-level Headers ---
try: try:
df = pd.read_csv(csv_path, header=[0, 1, 2, 3, 4], index_col=0) df = pd.read_csv(csv_path, header=[0, 1, 2, 3, 4], index_col=0)
logging.info(f"Successfully loaded '{csv_path}' with multi-level headers. Shape: {df.shape}") logging.info(
f"Successfully loaded '{csv_path}' with multi-level headers. Shape: {df.shape}")
if df.index.name == 'Unnamed: 0': if df.index.name == 'Unnamed: 0':
df.index.name = 'component' df.index.name = 'component'
except FileNotFoundError: except FileNotFoundError:
@ -110,18 +126,46 @@ def analyze_latency_data(csv_path: str):
# calculate summary along the rows # calculate summary along the rows
sumDf = df.sum() sumDf = df.sum()
if get_args().compensate == True: if get_args().compensate == True:
logging.info("Filesrc latency compensation is ON") logging.info("Filesrc and rawvideoparse latency compensation is ON")
filesrcData = df.loc["filesrc0"] filesrcData = df.loc["filesrc0"]
rawvideoparseData = df.loc["rawvideoparse0"]
sumDf -= filesrcData sumDf -= filesrcData
print(sumDf.head()) sumDf -= rawvideoparseData
# return logging.debug(f"\n{sumDf.head()}")
# calculate mean accross non-unique runs:
def get_base_metric(metric):
"""Strips suffixes like '.1' or '.2' from the metric name."""
return re.sub(r'\.\d+$', '', str(metric))
metric_level_values = sumDf.index.get_level_values(-1)
base_metrics_key = metric_level_values.map(get_base_metric)
config_levels = list(range(sumDf.index.nlevels - 1)
) # This gives [0, 1, 2, 3]
grouping_keys = sumDf.index.droplevel(config_levels) # type: ignore
grouping_keys = [
sumDf.index.get_level_values(i) for i in config_levels
] + [base_metrics_key]
# 3. Perform Grouping and Mean Calculation
# This command groups all entries that share the same (Config + Base Metric),
# collapsing (avg, avg.1, avg.2) into a single average.
averaged_sumDf = sumDf.groupby(grouping_keys).mean()
logging.info(f"\n{averaged_sumDf.head(10)}")
sumDf = averaged_sumDf
df_summary = sumDf.unstack(level=-1) # or level='Metric' if names are set df_summary = sumDf.unstack(level=-1) # or level='Metric' if names are set
# 2. Sort the resulting DataFrame by the desired metric column. # 2. Sort the resulting DataFrame by the desired metric column.
df_sorted_by_max = df_summary.sort_values(by='max', ascending=True) df_sorted_by_max = df_summary.sort_values(
df_sorted_by_avg = df_summary.sort_values(by='avg', ascending=True) by='max', ascending=True) # type: ignore
df_sorted_by_median = df_summary.sort_values(by='median', ascending=True) df_sorted_by_avg = df_summary.sort_values(
by='avg', ascending=True) # type: ignore
df_sorted_by_median = df_summary.sort_values(
by='median', ascending=True) # type: ignore
print("SORTED BY MAX") print("SORTED BY MAX")
print(df_sorted_by_max) print(df_sorted_by_max)
@ -139,7 +183,8 @@ def analyze_latency_data(csv_path: str):
# 2. Find the intersection (common elements) of the three sets of indices # 2. Find the intersection (common elements) of the three sets of indices
# max is main index because it is commonly introduces the largest amount of latency to the stream # max is main index because it is commonly introduces the largest amount of latency to the stream
common_indices = max_indices.intersection(avg_indices).intersection(median_indices) common_indices = max_indices.intersection(
avg_indices).intersection(median_indices)
# 3. Filter the original summary DataFrame (df_summary) using the common indices # 3. Filter the original summary DataFrame (df_summary) using the common indices
df_common_top_performers = df_summary.loc[common_indices] df_common_top_performers = df_summary.loc[common_indices]
@ -156,8 +201,8 @@ def analyze_latency_data(csv_path: str):
top_10_df.to_csv(get_args().csv_dir + f"{encoder_name}.csv") top_10_df.to_csv(get_args().csv_dir + f"{encoder_name}.csv")
return return
if __name__ == '__main__': if __name__ == '__main__':
os.makedirs(get_args().csv_dir, exist_ok=True) os.makedirs(get_args().csv_dir, exist_ok=True)
os.makedirs(get_args().plot_dir, exist_ok=True) os.makedirs(get_args().plot_dir, exist_ok=True)
analyze_latency_data(get_args().latency_csv) analyze_latency_data(get_args().latency_csv)

View File

@ -4,9 +4,12 @@ import numpy as np
import logging import logging
import argparse import argparse
import os import os
import re
# Configure logging to show informational messages # Configure logging to show informational messages
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s')
def parse_args(): def parse_args():
parser = argparse.ArgumentParser(prog=__file__) parser = argparse.ArgumentParser(prog=__file__)
@ -26,13 +29,17 @@ def parse_args():
help='Path to directory in which resulted csv data should be saved') help='Path to directory in which resulted csv data should be saved')
return parser.parse_args() return parser.parse_args()
cmd_args = None cmd_args = None
def get_args(): def get_args():
global cmd_args global cmd_args
if cmd_args is None: if cmd_args is None:
cmd_args = parse_args() cmd_args = parse_args()
return cmd_args return cmd_args
def plot_top_configurations(df: pd.DataFrame, file_name: str, title: str): def plot_top_configurations(df: pd.DataFrame, file_name: str, title: str):
""" """
Draws a bar plot comparing PSNR and SSIM for the top 10 video configurations. Draws a bar plot comparing PSNR and SSIM for the top 10 video configurations.
@ -118,12 +125,13 @@ def plot_top_configurations(df: pd.DataFrame, file_name: str, title: str):
# 7. Final Plot appearance # 7. Final Plot appearance
fig.suptitle(title) fig.suptitle(title)
fig.tight_layout(rect=[0, 0.03, 1, 0.95]) fig.tight_layout(rect={0.0, 0.03, 1.0, 0.95}) # type: ignore
# Combine legends from both axes # Combine legends from both axes
lines1, labels1 = ax1.get_legend_handles_labels() lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels() lines2, labels2 = ax2.get_legend_handles_labels()
ax1.legend(lines1 + lines2, labels1 + labels2, bbox_to_anchor=(0.6, 1.1), ncol=2) ax1.legend(lines1 + lines2, labels1 + labels2,
bbox_to_anchor=(0.6, 1.1), ncol=2)
plt.grid(axis='y', linestyle='--', alpha=0.7) plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.savefig(f'{file_name}.png') plt.savefig(f'{file_name}.png')
@ -133,11 +141,13 @@ def plot_top_configurations(df: pd.DataFrame, file_name: str, title: str):
for index, note in quality_notes.items(): for index, note in quality_notes.items():
print(f"Index {index}: {note}") print(f"Index {index}: {note}")
def analyze_quality_report(csv_path: str): def analyze_quality_report(csv_path: str):
# --- 1. Load Data with Multi-level Headers --- # --- 1. Load Data with Multi-level Headers ---
try: try:
df = pd.read_csv(csv_path, header=[0, 1, 2, 3, 4], index_col=0) df = pd.read_csv(csv_path, header=[0, 1, 2, 3, 4], index_col=0)
logging.info(f"Successfully loaded '{csv_path}' with multi-level headers. Shape: {df.shape}") logging.info(
f"Successfully loaded '{csv_path}' with multi-level headers. Shape: {df.shape}")
if df.index.name == 'Unnamed: 0': if df.index.name == 'Unnamed: 0':
df.index.name = 'component' df.index.name = 'component'
except FileNotFoundError: except FileNotFoundError:
@ -149,13 +159,42 @@ def analyze_quality_report(csv_path: str):
# Get row with average results # Get row with average results
avgDf = df.loc["Average"] avgDf = df.loc["Average"]
avgDf = avgDf.unstack(level=-1) logging.info(f"\n{avgDf.head(10)}")
# calculate mean accross non-unique runs:
def get_base_metric(metric):
"""Strips suffixes like '.1' or '.2' from the metric name."""
return re.sub(r'\.\d+$', '', str(metric))
metric_level_values = avgDf.index.get_level_values(-1)
base_metrics_key = metric_level_values.map(get_base_metric)
config_levels = list(range(avgDf.index.nlevels - 1)
) # This gives [0, 1, 2, 3]
grouping_keys = avgDf.index.droplevel(config_levels) # type: ignore
grouping_keys = [
avgDf.index.get_level_values(i) for i in config_levels
] + [base_metrics_key]
# 3. Perform Grouping and Mean Calculation
# This command groups all entries that share the same (Config + Base Metric),
# collapsing (avg, avg.1, avg.2) into a single average.
averaged_sumDf = avgDf.groupby(grouping_keys).mean()
logging.info(f"\n{averaged_sumDf.head(10)}")
avgDf = averaged_sumDf
logging.info(f"\n{avgDf.head(10)}")
avgDf = avgDf.unstack(level=-1)
encoder_name = avgDf.index.get_level_values(0)[0] encoder_name = avgDf.index.get_level_values(0)[0]
logging.debug(f"encoder_name={encoder_name}") logging.debug(f"encoder_name={encoder_name}")
dfPSNRsorted = avgDf.sort_values(by="PSNR", ascending=False) dfPSNRsorted = avgDf.sort_values(
dfSSIMsorted = avgDf.sort_values(by="SSIM", ascending=False) by="PSNR", ascending=False) # type: ignore
dfSSIMsorted = avgDf.sort_values(
by="SSIM", ascending=False) # type: ignore
indexPSNR = dfPSNRsorted.index indexPSNR = dfPSNRsorted.index
indexSSIM = dfSSIMsorted.index indexSSIM = dfSSIMsorted.index
@ -168,13 +207,15 @@ def analyze_quality_report(csv_path: str):
# Convert the MultiIndex (encoder, profile, video, parameters) into columns # Convert the MultiIndex (encoder, profile, video, parameters) into columns
df_quality_results = intersectedDf.reset_index() df_quality_results = intersectedDf.reset_index()
# Rename the columns to match the latency report's structure # Rename the columns to match the latency report's structure
df_quality_results.columns = ['encoder', 'profile', 'video', 'parameters', 'PSNR', 'SSIM'] df_quality_results.columns = [
logging.debug(f"Prepared quality results dataframe columns: {df_quality_results.columns.tolist()}") 'encoder', 'profile', 'video', 'parameters', 'PSNR', 'SSIM']
logging.debug(
f"Prepared quality results dataframe columns: {df_quality_results.columns.tolist()}")
# Now intersected with latency report # Now intersected with latency report
latency_df = pd.read_csv(f'results/{encoder_name}.csv') latency_df = pd.read_csv(f'results/{encoder_name}.csv')
columns = {'Unnamed: 0': 'encoder', 'Unnamed: 1': 'profile', 'Unnamed: 2': 'video', 'Unnamed: 3': 'parameters'} columns = {'Unnamed: 0': 'encoder', 'Unnamed: 1': 'profile',
'Unnamed: 2': 'video', 'Unnamed: 3': 'parameters'}
latency_df.rename(columns=columns, inplace=True) latency_df.rename(columns=columns, inplace=True)
logging.debug(f"\n{latency_df.head()}") logging.debug(f"\n{latency_df.head()}")
@ -185,12 +226,14 @@ def analyze_quality_report(csv_path: str):
df_quality_results, df_quality_results,
latency_df, latency_df,
on=merge_keys, on=merge_keys,
how='inner' # Only keep records present in both (i.e., the top quality configurations) # Only keep records present in both (i.e., the top quality configurations)
how='inner'
) )
logging.info("=" * 70) logging.info("=" * 70)
logging.info("--- Intersected Quality (PSNR/SSIM) and Latency Report ---") logging.info("--- Intersected Quality (PSNR/SSIM) and Latency Report ---")
logging.info(f"Number of common configuration entries found: {len(merged_df)}") logging.info(
f"Number of common configuration entries found: {len(merged_df)}")
logging.info("=" * 70) logging.info("=" * 70)
# Prepare for display # Prepare for display
@ -200,21 +243,29 @@ def analyze_quality_report(csv_path: str):
display_columns = [ display_columns = [
'encoder', 'profile', 'video', 'parameters', 'encoder', 'profile', 'video', 'parameters',
'PSNR', 'SSIM', # Quality metrics 'PSNR', 'SSIM', # Quality metrics
'avg', 'max', 'median', 'std' # Latency metrics (assuming these are in the latency report) # Latency metrics (assuming these are in the latency report)
'avg', 'max', 'median', 'std'
] ]
final_cols = [col for col in display_columns if col in merged_df_display.columns] final_cols = [
col for col in display_columns if col in merged_df_display.columns]
print(f"\n{merged_df_display[final_cols].to_string()}") print(f"\n{merged_df_display[final_cols].to_string()}")
plot_top_configurations(merged_df_display, get_args().plot_dir + f"top_quality_configurations_by_latency_{encoder_name}", f"Результаты качества для 10 лучших конфигураций по задержкам для {encoder_name}") plot_top_configurations(merged_df_display,
get_args().plot_dir +
f"top_quality_configurations_by_latency_{encoder_name}",
f"Результаты качества для 10 лучших конфигураций по задержкам для {encoder_name}")
plot_top_configurations(df_quality_results, get_args().plot_dir + f"top_quality_configurations_{encoder_name}", f"10 лучших конфигураций по PSNR и SSIM для {encoder_name}") plot_top_configurations(df_quality_results,
get_args().plot_dir +
f"top_quality_configurations_{encoder_name}",
f"10 лучших конфигураций по PSNR и SSIM для {encoder_name}")
return return
if __name__ == '__main__': if __name__ == '__main__':
os.makedirs(get_args().csv_dir, exist_ok=True) os.makedirs(get_args().csv_dir, exist_ok=True)
os.makedirs(get_args().plot_dir, exist_ok=True) os.makedirs(get_args().plot_dir, exist_ok=True)
analyze_quality_report(get_args().quality_csv) analyze_quality_report(get_args().quality_csv)