[analysis] mean values for non-unique config runs

This commit is contained in:
Artur Mukhamadiev 2025-10-12 23:37:07 +03:00
parent 900aca9bd5
commit 628f0439b7
3 changed files with 172 additions and 72 deletions

4
.gitignore vendored
View File

@ -1,3 +1,7 @@
#project ignore:
plots/
results/
# ---> Python # ---> Python
# Byte-compiled / optimized / DLL files # Byte-compiled / optimized / DLL files
__pycache__/ __pycache__/

View File

@ -2,20 +2,23 @@ import pandas as pd
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import numpy as np import numpy as np
import os import os
import re
import argparse import argparse
import logging import logging
# Configure logging to show informational messages # Configure logging to show informational messages
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s')
def parse_args(): def parse_args():
parser = argparse.ArgumentParser(prog=__file__) parser = argparse.ArgumentParser(prog=__file__)
parser.add_argument('-c', '--compensate', action="store_true") parser.add_argument('-c', '--compensate', action="store_true")
parser.add_argument('--latency-csv', parser.add_argument('--latency-csv',
type=str, type=str,
default='sample/latencyDataframenvh264enc.csv', default='sample/latencyDataframenvh264enc.csv',
help='Path to the latency results CSV file.') help='Path to the latency results CSV file.')
parser.add_argument('-pd','--plot-dir', parser.add_argument('-pd', '--plot-dir',
type=str, type=str,
default='plots/', default='plots/',
help='Path to directory in which resulted plots should be saved') help='Path to directory in which resulted plots should be saved')
@ -25,13 +28,17 @@ def parse_args():
help='Path to directory in which resulted csv data should be saved') help='Path to directory in which resulted csv data should be saved')
return parser.parse_args() return parser.parse_args()
cmd_args = None cmd_args = None
def get_args(): def get_args():
global cmd_args global cmd_args
if cmd_args is None: if cmd_args is None:
cmd_args = parse_args() cmd_args = parse_args()
return cmd_args return cmd_args
def plot_latency_data(df): def plot_latency_data(df):
def create_labels(df): def create_labels(df):
"""Combines MultiIndex levels (L0-L3) into a single string for notes.""" """Combines MultiIndex levels (L0-L3) into a single string for notes."""
@ -40,7 +47,7 @@ def plot_latency_data(df):
# Format: L#:value | L#:value | ... # Format: L#:value | L#:value | ...
label_parts = [f"L{j}:{val}" for j, val in enumerate(index)] label_parts = [f"L{j}:{val}" for j, val in enumerate(index)]
labels[i + 1] = " | ".join(label_parts) labels[i + 1] = " | ".join(label_parts)
return labels return labels
df = df.head(10) df = df.head(10)
encoder_name = df.index.get_level_values(0)[0] encoder_name = df.index.get_level_values(0)[0]
@ -53,20 +60,26 @@ def plot_latency_data(df):
r3 = [x + bar_width for x in r2] r3 = [x + bar_width for x in r2]
fig = plt.figure(figsize=(10, 6), dpi=300) fig = plt.figure(figsize=(10, 6), dpi=300)
# Create the bars # Create the bars
plt.bar(r1, df['max'], color='red', width=bar_width, edgecolor='grey', label='Max Latency') plt.bar(r1, df['max'], color='red', width=bar_width,
plt.bar(r2, df['avg'], color='blue', width=bar_width, edgecolor='grey', label='Avg Latency') edgecolor='grey', label='Max Latency')
plt.bar(r3, df['median'], color='green', width=bar_width, edgecolor='grey', label='Median Latency') plt.bar(r2, df['avg'], color='blue', width=bar_width,
edgecolor='grey', label='Avg Latency')
plt.bar(r3, df['median'], color='green', width=bar_width,
edgecolor='grey', label='Median Latency')
# Add labels and ticks # Add labels and ticks
plt.xlabel('Индекс конфигурации', fontweight='bold') plt.xlabel('Индекс конфигурации', fontweight='bold')
plt.ylabel('Общая задержка [мс]', fontweight='bold') plt.ylabel('Общая задержка [мс]', fontweight='bold')
plt.xticks([r + bar_width for r in range(num_configs)], [str(i + 1) for i in range(num_configs)]) plt.xticks([r + bar_width for r in range(num_configs)],
plt.title(f'Сравнение производительности {num_configs} лучших конфигураций по задержке для {encoder_name}') [str(i + 1) for i in range(num_configs)])
plt.title(
f'Сравнение производительности {num_configs} лучших конфигураций по задержке для {encoder_name}')
plt.legend() plt.legend()
plt.grid(axis='y', linestyle='--', alpha=0.6) plt.grid(axis='y', linestyle='--', alpha=0.6)
plt.tight_layout() plt.tight_layout()
plt.savefig(get_args().plot_dir + f'combined_top_configurations_plot_{encoder_name}.png') plt.savefig(get_args().plot_dir +
f'combined_top_configurations_plot_{encoder_name}.png')
plt.close() plt.close()
# Output Notes (for user interpretation) # Output Notes (for user interpretation)
@ -74,6 +87,7 @@ def plot_latency_data(df):
for index, note in max_notes.items(): for index, note in max_notes.items():
print(f"Index {index}: {note}") print(f"Index {index}: {note}")
def plot_start_latency(df): def plot_start_latency(df):
fig = plt.figure(figsize=(10, 6), dpi=300) fig = plt.figure(figsize=(10, 6), dpi=300)
r1 = np.arange(len(df)) r1 = np.arange(len(df))
@ -86,6 +100,7 @@ def plot_start_latency(df):
plt.savefig(get_args().plot_dir + f"start_latency_{encoder_name}.png") plt.savefig(get_args().plot_dir + f"start_latency_{encoder_name}.png")
plt.close() plt.close()
def analyze_latency_data(csv_path: str): def analyze_latency_data(csv_path: str):
""" """
Analyzes latency data to find the top 10 components (rows) contributing most Analyzes latency data to find the top 10 components (rows) contributing most
@ -96,8 +111,9 @@ def analyze_latency_data(csv_path: str):
""" """
# --- 1. Load Data with Multi-level Headers --- # --- 1. Load Data with Multi-level Headers ---
try: try:
df = pd.read_csv(csv_path, header=[0,1, 2, 3, 4], index_col=0) df = pd.read_csv(csv_path, header=[0, 1, 2, 3, 4], index_col=0)
logging.info(f"Successfully loaded '{csv_path}' with multi-level headers. Shape: {df.shape}") logging.info(
f"Successfully loaded '{csv_path}' with multi-level headers. Shape: {df.shape}")
if df.index.name == 'Unnamed: 0': if df.index.name == 'Unnamed: 0':
df.index.name = 'component' df.index.name = 'component'
except FileNotFoundError: except FileNotFoundError:
@ -107,22 +123,50 @@ def analyze_latency_data(csv_path: str):
logging.error(f"An error occurred while reading the CSV file: {e}") logging.error(f"An error occurred while reading the CSV file: {e}")
return return
#calculate summary along the rows # calculate summary along the rows
sumDf = df.sum() sumDf = df.sum()
if get_args().compensate == True: if get_args().compensate == True:
logging.info("Filesrc latency compensation is ON") logging.info("Filesrc and rawvideoparse latency compensation is ON")
filesrcData = df.loc["filesrc0"] filesrcData = df.loc["filesrc0"]
rawvideoparseData = df.loc["rawvideoparse0"]
sumDf -= filesrcData sumDf -= filesrcData
print(sumDf.head()) sumDf -= rawvideoparseData
# return logging.debug(f"\n{sumDf.head()}")
df_summary = sumDf.unstack(level=-1) # or level='Metric' if names are set # calculate mean accross non-unique runs:
def get_base_metric(metric):
"""Strips suffixes like '.1' or '.2' from the metric name."""
return re.sub(r'\.\d+$', '', str(metric))
metric_level_values = sumDf.index.get_level_values(-1)
base_metrics_key = metric_level_values.map(get_base_metric)
config_levels = list(range(sumDf.index.nlevels - 1)
) # This gives [0, 1, 2, 3]
grouping_keys = sumDf.index.droplevel(config_levels) # type: ignore
grouping_keys = [
sumDf.index.get_level_values(i) for i in config_levels
] + [base_metrics_key]
# 3. Perform Grouping and Mean Calculation
# This command groups all entries that share the same (Config + Base Metric),
# collapsing (avg, avg.1, avg.2) into a single average.
averaged_sumDf = sumDf.groupby(grouping_keys).mean()
logging.info(f"\n{averaged_sumDf.head(10)}")
sumDf = averaged_sumDf
df_summary = sumDf.unstack(level=-1) # or level='Metric' if names are set
# 2. Sort the resulting DataFrame by the desired metric column. # 2. Sort the resulting DataFrame by the desired metric column.
df_sorted_by_max = df_summary.sort_values(by='max', ascending=True) df_sorted_by_max = df_summary.sort_values(
df_sorted_by_avg = df_summary.sort_values(by='avg', ascending=True) by='max', ascending=True) # type: ignore
df_sorted_by_median = df_summary.sort_values(by='median', ascending=True) df_sorted_by_avg = df_summary.sort_values(
by='avg', ascending=True) # type: ignore
df_sorted_by_median = df_summary.sort_values(
by='median', ascending=True) # type: ignore
print("SORTED BY MAX") print("SORTED BY MAX")
print(df_sorted_by_max) print(df_sorted_by_max)
print("---------------") print("---------------")
@ -139,7 +183,8 @@ def analyze_latency_data(csv_path: str):
# 2. Find the intersection (common elements) of the three sets of indices # 2. Find the intersection (common elements) of the three sets of indices
# max is main index because it is commonly introduces the largest amount of latency to the stream # max is main index because it is commonly introduces the largest amount of latency to the stream
common_indices = max_indices.intersection(avg_indices).intersection(median_indices) common_indices = max_indices.intersection(
avg_indices).intersection(median_indices)
# 3. Filter the original summary DataFrame (df_summary) using the common indices # 3. Filter the original summary DataFrame (df_summary) using the common indices
df_common_top_performers = df_summary.loc[common_indices] df_common_top_performers = df_summary.loc[common_indices]
@ -156,8 +201,8 @@ def analyze_latency_data(csv_path: str):
top_10_df.to_csv(get_args().csv_dir + f"{encoder_name}.csv") top_10_df.to_csv(get_args().csv_dir + f"{encoder_name}.csv")
return return
if __name__ == '__main__': if __name__ == '__main__':
os.makedirs(get_args().csv_dir, exist_ok=True) os.makedirs(get_args().csv_dir, exist_ok=True)
os.makedirs(get_args().plot_dir, exist_ok=True) os.makedirs(get_args().plot_dir, exist_ok=True)
analyze_latency_data(get_args().latency_csv) analyze_latency_data(get_args().latency_csv)

View File

@ -4,19 +4,22 @@ import numpy as np
import logging import logging
import argparse import argparse
import os import os
import re
# Configure logging to show informational messages # Configure logging to show informational messages
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') logging.basicConfig(level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s')
def parse_args(): def parse_args():
parser = argparse.ArgumentParser(prog=__file__) parser = argparse.ArgumentParser(prog=__file__)
parser.add_argument( parser.add_argument(
'--quality-csv', '--quality-csv',
type=str, type=str,
default='sample/qualityResultsnvh264enc.csv', default='sample/qualityResultsnvh264enc.csv',
help='Path to the quality results CSV file.' help='Path to the quality results CSV file.'
) )
parser.add_argument('-pd','--plot-dir', parser.add_argument('-pd', '--plot-dir',
type=str, type=str,
default='plots/', default='plots/',
help='Path to directory in which resulted plots should be saved') help='Path to directory in which resulted plots should be saved')
@ -26,21 +29,25 @@ def parse_args():
help='Path to directory in which resulted csv data should be saved') help='Path to directory in which resulted csv data should be saved')
return parser.parse_args() return parser.parse_args()
cmd_args = None cmd_args = None
def get_args(): def get_args():
global cmd_args global cmd_args
if cmd_args is None: if cmd_args is None:
cmd_args = parse_args() cmd_args = parse_args()
return cmd_args return cmd_args
def plot_top_configurations(df: pd.DataFrame, file_name: str, title: str): def plot_top_configurations(df: pd.DataFrame, file_name: str, title: str):
""" """
Draws a bar plot comparing PSNR and SSIM for the top 10 video configurations. Draws a bar plot comparing PSNR and SSIM for the top 10 video configurations.
The plot uses a primary Y-axis for PSNR and a secondary Y-axis for SSIM The plot uses a primary Y-axis for PSNR and a secondary Y-axis for SSIM
due to their different value ranges. The X-axis uses simple numerical indices, due to their different value ranges. The X-axis uses simple numerical indices,
with detailed configuration notes printed separately below the plot. with detailed configuration notes printed separately below the plot.
Args: Args:
df: DataFrame containing the top configurations, must have 'PSNR' and 'SSIM' columns. df: DataFrame containing the top configurations, must have 'PSNR' and 'SSIM' columns.
file_name: Name of the file to which plot would be saved. file_name: Name of the file to which plot would be saved.
@ -48,50 +55,50 @@ def plot_top_configurations(df: pd.DataFrame, file_name: str, title: str):
""" """
# Use the top 10 rows for plotting # Use the top 10 rows for plotting
plot_df = df.head(10).copy() plot_df = df.head(10).copy()
if plot_df.empty: if plot_df.empty:
logging.warning("DataFrame is empty, cannot generate plot.") logging.warning("DataFrame is empty, cannot generate plot.")
return return
# Create the index for the x-axis (0 to 9 for bar plotting) # Create the index for the x-axis (0 to 9 for bar plotting)
config_indices = np.arange(len(plot_df)) config_indices = np.arange(len(plot_df))
# 1. Create simple numerical labels for the X-axis (1 to 10) # 1. Create simple numerical labels for the X-axis (1 to 10)
x_labels_simple = [str(i + 1) for i in config_indices] x_labels_simple = [str(i + 1) for i in config_indices]
# 2. Generate notes mapping index to configuration details (similar to the template) # 2. Generate notes mapping index to configuration details (similar to the template)
quality_notes = {} quality_notes = {}
for i, row in plot_df.iterrows(): for i, row in plot_df.iterrows():
# Format: Index: encoder | profile | video | parameters # Format: Index: encoder | profile | video | parameters
note_parts = [ note_parts = [
row['encoder'], row['encoder'],
row['profile'], row['profile'],
row['video'], row['video'],
row['parameters'] row['parameters']
] ]
quality_notes[len(quality_notes) + 1] = " | ".join(note_parts) quality_notes[len(quality_notes) + 1] = " | ".join(note_parts)
# 3. Setup the figure and the primary axis (ax1) # 3. Setup the figure and the primary axis (ax1)
fig, ax1 = plt.subplots(figsize=(12, 6)) fig, ax1 = plt.subplots(figsize=(12, 6))
# Define bar width and positions # Define bar width and positions
bar_width = 0.35 bar_width = 0.35
# 4. Plot PSNR on the primary axis (left) # 4. Plot PSNR on the primary axis (left)
bar1 = ax1.bar(config_indices - bar_width/2, plot_df['PSNR'], bar_width, bar1 = ax1.bar(config_indices - bar_width/2, plot_df['PSNR'], bar_width,
label='PSNR (dB)', color='Blue', edgecolor='grey') label='PSNR (dB)', color='Blue', edgecolor='grey')
ax1.set_xlabel('Configuration Index', fontsize=12) # Simplified X-label ax1.set_xlabel('Configuration Index', fontsize=12) # Simplified X-label
ax1.set_ylabel('PSNR (dB)', color='Black', fontsize=12) ax1.set_ylabel('PSNR (dB)', color='Black', fontsize=12)
ax1.tick_params(axis='y', labelcolor='Black') ax1.tick_params(axis='y', labelcolor='Black')
ax1.set_xticks(config_indices) ax1.set_xticks(config_indices)
# Use simple numerical labels for the X-axis # Use simple numerical labels for the X-axis
ax1.set_xticklabels(x_labels_simple, fontsize=10) ax1.set_xticklabels(x_labels_simple, fontsize=10)
# Add PSNR value labels above the bars # Add PSNR value labels above the bars
for rect in bar1: for rect in bar1:
height = rect.get_height() height = rect.get_height()
ax1.annotate(f'PSNR={height:.2f}', ax1.annotate(f'PSNR={height:.2f}',
xy=(rect.get_x() + rect.get_width() / 2, height / 1.5 ), xy=(rect.get_x() + rect.get_width() / 2, height / 1.5),
xytext=(0, 0), # 3 points vertical offset xytext=(0, 0), # 3 points vertical offset
textcoords="offset points", transform_rotates_text=True, textcoords="offset points", transform_rotates_text=True,
rotation=90, rotation=90,
@ -99,18 +106,18 @@ def plot_top_configurations(df: pd.DataFrame, file_name: str, title: str):
# 5. Create a secondary axis (ax2) for SSIM (twinx) # 5. Create a secondary axis (ax2) for SSIM (twinx)
ax2 = ax1.twinx() ax2 = ax1.twinx()
# 6. Plot SSIM on the secondary axis (right) # 6. Plot SSIM on the secondary axis (right)
bar2 = ax2.bar(config_indices + bar_width/2, plot_df['SSIM'], bar_width, bar2 = ax2.bar(config_indices + bar_width/2, plot_df['SSIM'], bar_width,
label='SSIM', color='Red', edgecolor='grey') label='SSIM', color='Red', edgecolor='grey')
ax2.set_ylabel('SSIM (Structural Similarity)', color='Black', fontsize=12) ax2.set_ylabel('SSIM (Structural Similarity)', color='Black', fontsize=12)
ax2.tick_params(axis='y', labelcolor='Black') ax2.tick_params(axis='y', labelcolor='Black')
# Add SSIM value labels above the bars # Add SSIM value labels above the bars
for rect in bar2: for rect in bar2:
height = rect.get_height() height = rect.get_height()
ax2.annotate(f'SSIM={height:.4f}', ax2.annotate(f'SSIM={height:.4f}',
xy=(rect.get_x() + rect.get_width() / 2, height / 1.5 ), xy=(rect.get_x() + rect.get_width() / 2, height / 1.5),
xytext=(0, 0), # 3 points vertical offset xytext=(0, 0), # 3 points vertical offset
textcoords="offset points", transform_rotates_text=True, textcoords="offset points", transform_rotates_text=True,
rotation=90, rotation=90,
@ -118,12 +125,13 @@ def plot_top_configurations(df: pd.DataFrame, file_name: str, title: str):
# 7. Final Plot appearance # 7. Final Plot appearance
fig.suptitle(title) fig.suptitle(title)
fig.tight_layout(rect=[0, 0.03, 1, 0.95]) fig.tight_layout(rect={0.0, 0.03, 1.0, 0.95}) # type: ignore
# Combine legends from both axes # Combine legends from both axes
lines1, labels1 = ax1.get_legend_handles_labels() lines1, labels1 = ax1.get_legend_handles_labels()
lines2, labels2 = ax2.get_legend_handles_labels() lines2, labels2 = ax2.get_legend_handles_labels()
ax1.legend(lines1 + lines2, labels1 + labels2, bbox_to_anchor=(0.6, 1.1), ncol=2) ax1.legend(lines1 + lines2, labels1 + labels2,
bbox_to_anchor=(0.6, 1.1), ncol=2)
plt.grid(axis='y', linestyle='--', alpha=0.7) plt.grid(axis='y', linestyle='--', alpha=0.7)
plt.savefig(f'{file_name}.png') plt.savefig(f'{file_name}.png')
@ -133,11 +141,13 @@ def plot_top_configurations(df: pd.DataFrame, file_name: str, title: str):
for index, note in quality_notes.items(): for index, note in quality_notes.items():
print(f"Index {index}: {note}") print(f"Index {index}: {note}")
def analyze_quality_report(csv_path: str): def analyze_quality_report(csv_path: str):
# --- 1. Load Data with Multi-level Headers --- # --- 1. Load Data with Multi-level Headers ---
try: try:
df = pd.read_csv(csv_path, header=[0, 1, 2, 3, 4], index_col=0) df = pd.read_csv(csv_path, header=[0, 1, 2, 3, 4], index_col=0)
logging.info(f"Successfully loaded '{csv_path}' with multi-level headers. Shape: {df.shape}") logging.info(
f"Successfully loaded '{csv_path}' with multi-level headers. Shape: {df.shape}")
if df.index.name == 'Unnamed: 0': if df.index.name == 'Unnamed: 0':
df.index.name = 'component' df.index.name = 'component'
except FileNotFoundError: except FileNotFoundError:
@ -146,16 +156,45 @@ def analyze_quality_report(csv_path: str):
except Exception as e: except Exception as e:
logging.error(f"An error occurred while reading the CSV file: {e}") logging.error(f"An error occurred while reading the CSV file: {e}")
return return
# Get row with average results # Get row with average results
avgDf = df.loc["Average"] avgDf = df.loc["Average"]
logging.info(f"\n{avgDf.head(10)}")
# calculate mean accross non-unique runs:
def get_base_metric(metric):
"""Strips suffixes like '.1' or '.2' from the metric name."""
return re.sub(r'\.\d+$', '', str(metric))
metric_level_values = avgDf.index.get_level_values(-1)
base_metrics_key = metric_level_values.map(get_base_metric)
config_levels = list(range(avgDf.index.nlevels - 1)
) # This gives [0, 1, 2, 3]
grouping_keys = avgDf.index.droplevel(config_levels) # type: ignore
grouping_keys = [
avgDf.index.get_level_values(i) for i in config_levels
] + [base_metrics_key]
# 3. Perform Grouping and Mean Calculation
# This command groups all entries that share the same (Config + Base Metric),
# collapsing (avg, avg.1, avg.2) into a single average.
averaged_sumDf = avgDf.groupby(grouping_keys).mean()
logging.info(f"\n{averaged_sumDf.head(10)}")
avgDf = averaged_sumDf
logging.info(f"\n{avgDf.head(10)}")
avgDf = avgDf.unstack(level=-1) avgDf = avgDf.unstack(level=-1)
encoder_name = avgDf.index.get_level_values(0)[0] encoder_name = avgDf.index.get_level_values(0)[0]
logging.debug(f"encoder_name={encoder_name}") logging.debug(f"encoder_name={encoder_name}")
dfPSNRsorted = avgDf.sort_values(by="PSNR", ascending=False) dfPSNRsorted = avgDf.sort_values(
dfSSIMsorted = avgDf.sort_values(by="SSIM", ascending=False) by="PSNR", ascending=False) # type: ignore
dfSSIMsorted = avgDf.sort_values(
by="SSIM", ascending=False) # type: ignore
indexPSNR = dfPSNRsorted.index indexPSNR = dfPSNRsorted.index
indexSSIM = dfSSIMsorted.index indexSSIM = dfSSIMsorted.index
@ -168,53 +207,65 @@ def analyze_quality_report(csv_path: str):
# Convert the MultiIndex (encoder, profile, video, parameters) into columns # Convert the MultiIndex (encoder, profile, video, parameters) into columns
df_quality_results = intersectedDf.reset_index() df_quality_results = intersectedDf.reset_index()
# Rename the columns to match the latency report's structure # Rename the columns to match the latency report's structure
df_quality_results.columns = ['encoder', 'profile', 'video', 'parameters', 'PSNR', 'SSIM'] df_quality_results.columns = [
logging.debug(f"Prepared quality results dataframe columns: {df_quality_results.columns.tolist()}") 'encoder', 'profile', 'video', 'parameters', 'PSNR', 'SSIM']
logging.debug(
f"Prepared quality results dataframe columns: {df_quality_results.columns.tolist()}")
# Now intersected with latency report # Now intersected with latency report
latency_df = pd.read_csv(f'results/{encoder_name}.csv') latency_df = pd.read_csv(f'results/{encoder_name}.csv')
columns = {'Unnamed: 0': 'encoder', 'Unnamed: 1': 'profile', 'Unnamed: 2': 'video', 'Unnamed: 3': 'parameters'} columns = {'Unnamed: 0': 'encoder', 'Unnamed: 1': 'profile',
'Unnamed: 2': 'video', 'Unnamed: 3': 'parameters'}
latency_df.rename(columns=columns, inplace=True) latency_df.rename(columns=columns, inplace=True)
logging.debug(f"\n{latency_df.head()}") logging.debug(f"\n{latency_df.head()}")
# --- 4. Merge Quality and Latency Reports --- # --- 4. Merge Quality and Latency Reports ---
# Use an inner merge on the four identifier columns to combine the data. # Use an inner merge on the four identifier columns to combine the data.
merge_keys = ['encoder', 'profile', 'video', 'parameters'] merge_keys = ['encoder', 'profile', 'video', 'parameters']
merged_df = pd.merge( merged_df = pd.merge(
df_quality_results, df_quality_results,
latency_df, latency_df,
on=merge_keys, on=merge_keys,
how='inner' # Only keep records present in both (i.e., the top quality configurations) # Only keep records present in both (i.e., the top quality configurations)
how='inner'
) )
logging.info("=" * 70) logging.info("=" * 70)
logging.info("--- Intersected Quality (PSNR/SSIM) and Latency Report ---") logging.info("--- Intersected Quality (PSNR/SSIM) and Latency Report ---")
logging.info(f"Number of common configuration entries found: {len(merged_df)}") logging.info(
f"Number of common configuration entries found: {len(merged_df)}")
logging.info("=" * 70) logging.info("=" * 70)
# Prepare for display # Prepare for display
merged_df_display = merged_df.sort_values(by='PSNR', ascending=False) merged_df_display = merged_df.sort_values(by='PSNR', ascending=False)
# Select and display key metrics # Select and display key metrics
display_columns = [ display_columns = [
'encoder', 'profile', 'video', 'parameters', 'encoder', 'profile', 'video', 'parameters',
'PSNR', 'SSIM', # Quality metrics 'PSNR', 'SSIM', # Quality metrics
'avg', 'max', 'median', 'std' # Latency metrics (assuming these are in the latency report) # Latency metrics (assuming these are in the latency report)
'avg', 'max', 'median', 'std'
] ]
final_cols = [col for col in display_columns if col in merged_df_display.columns] final_cols = [
col for col in display_columns if col in merged_df_display.columns]
print(f"\n{merged_df_display[final_cols].to_string()}") print(f"\n{merged_df_display[final_cols].to_string()}")
plot_top_configurations(merged_df_display, get_args().plot_dir + f"top_quality_configurations_by_latency_{encoder_name}", f"Результаты качества для 10 лучших конфигураций по задержкам для {encoder_name}") plot_top_configurations(merged_df_display,
get_args().plot_dir +
f"top_quality_configurations_by_latency_{encoder_name}",
f"Результаты качества для 10 лучших конфигураций по задержкам для {encoder_name}")
plot_top_configurations(df_quality_results, get_args().plot_dir + f"top_quality_configurations_{encoder_name}", f"10 лучших конфигураций по PSNR и SSIM для {encoder_name}") plot_top_configurations(df_quality_results,
get_args().plot_dir +
f"top_quality_configurations_{encoder_name}",
f"10 лучших конфигураций по PSNR и SSIM для {encoder_name}")
return return
if __name__ == '__main__': if __name__ == '__main__':
os.makedirs(get_args().csv_dir, exist_ok=True) os.makedirs(get_args().csv_dir, exist_ok=True)
os.makedirs(get_args().plot_dir, exist_ok=True) os.makedirs(get_args().plot_dir, exist_ok=True)
analyze_quality_report(get_args().quality_csv) analyze_quality_report(get_args().quality_csv)