From 75274ee2bed39f754731c2eab4ebe6d4698274ab Mon Sep 17 00:00:00 2001
From: Artur Mukhamadiev <muhamadiev1@gmail.com>
Date: Tue, 14 Oct 2025 19:28:17 +0300
Subject: [PATCH] [research] changed encoder max with avg for latency calc

---
 latencyAnalysis.py | 153 +++++++++++++++++++++++++++++++--------------
 qualityAnalysis.py |   9 ++-
 2 files changed, 111 insertions(+), 51 deletions(-)

diff --git a/latencyAnalysis.py b/latencyAnalysis.py
index 419cc5d..5dae01e 100644
--- a/latencyAnalysis.py
+++ b/latencyAnalysis.py
@@ -30,6 +30,7 @@ def parse_args():
 
 
 cmd_args = None
+encoder_name = "default"
 
 
 def get_args():
@@ -40,6 +41,8 @@ def get_args():
 
 
 def plot_latency_data(df):
+    global encoder_name
+
     def create_labels(df_slice):
         labels = {}
         for i, index in enumerate(df_slice.index):
@@ -78,8 +81,6 @@ def plot_latency_data(df):
     yerr_upper_median = df_top_n[max_median_key] - mean_median_values
     yerr_median = np.array(
         [yerr_lower_median.values, yerr_upper_median.values])
-
-    encoder_name = df_top_n.index.get_level_values(0)[0]
     max_notes = create_labels(df_top_n)
 
     bar_width = 0.25
@@ -92,7 +93,17 @@ def plot_latency_data(df):
 
     fig = plt.figure(figsize=(12, 7), dpi=300)
 
-    plt.bar(
+    def add_annotation(bar):
+        for rect in bar:
+            height = rect.get_height()
+            plt.annotate(f'{height:.2f} мс',
+                         xy=(rect.get_x() + rect.get_width() / 2, height / 2),
+                         xytext=(0, 0),  # 3 points vertical offset
+                         textcoords="offset points", transform_rotates_text=True,
+                         rotation=90,
+                         ha='center', va='bottom', fontsize=10, color='White')
+
+    bar1 = plt.bar(
         r_max_orig,
         df_top_n[mean_max_key],
         yerr=yerr_max_orig,
@@ -102,8 +113,9 @@ def plot_latency_data(df):
         edgecolor='grey',
         label='Максимальная задержка'
     )
+    add_annotation(bar1)
 
-    plt.bar(
+    bar2 = plt.bar(
         r_avg,
         df_top_n[mean_avg_key],
         yerr=yerr_avg,
@@ -113,8 +125,9 @@ def plot_latency_data(df):
         edgecolor='grey',
         label='Средняя задержка'
     )
+    add_annotation(bar2)
 
-    plt.bar(
+    bar3 = plt.bar(
         r_median,
         df_top_n[mean_median_key],
         yerr=yerr_median,
@@ -124,6 +137,7 @@ def plot_latency_data(df):
         edgecolor='grey',
         label='Медианная задержка'
     )
+    add_annotation(bar3)
 
     plt.xlabel('Индекс конфигурации', fontweight='bold')
     plt.ylabel('Общая задержка [мс]', fontweight='bold')
@@ -146,12 +160,13 @@ def plot_latency_data(df):
 
 
 def plot_start_latency(df):
+    global encoder_name
     fig = plt.figure(figsize=(10, 6), dpi=300)
     r1 = np.arange(len(df))
 
-    mean_col = ('mean', 'max')
-    min_col = ('left', 'max')
-    max_col = ('right', 'max')
+    mean_col = ('mean', 'avg')
+    min_col = ('left', 'avg')
+    max_col = ('right', 'avg')
 
     mean_values = df[mean_col]
     min_values = df[min_col]
@@ -173,13 +188,77 @@ def plot_start_latency(df):
                  )
     plt.xlabel('Индекс конфигурации', fontweight='bold')
     plt.ylabel('Общая задержка [мс]', fontweight='bold')
-    encoder_name = df.index.get_level_values(0)[0]
     plt.title(f"Результаты стартовой задержки для {encoder_name}")
     plt.tight_layout()
     plt.savefig(get_args().plot_dir + f"start_latency_{encoder_name}.png")
     plt.close()
 
 
+def get_base_metric(metric):
+    """Strips suffixes like '.1' or '.2' from the metric name."""
+    return re.sub(r'\.\d+$', '', str(metric))
+
+
+def compensate(func):
+    def wrapper(df: pd.DataFrame, group: pd.DataFrame) -> pd.DataFrame:
+        logging.debug("Inside transpose decorator")
+        res_df = func(df, group)
+        if get_args().compensate == True:
+            logging.info(
+                "Filesrc and rawvideoparse latency compensation is ON")
+            res_df = res_df.drop('filesrc0', axis=0)
+            res_df = res_df.drop('rawvideoparse0', axis=0)
+        return res_df
+    return wrapper
+
+
+def log_result(func):
+    def wrapper(*args, **kwargs):
+        res = func(*args, **kwargs)
+        logging.info(f"\n{res}")
+        return res
+    return wrapper
+
+
+@compensate
+def transpose_and_set(df: pd.DataFrame, group: pd.DataFrame) -> pd.DataFrame:
+    new_column_index_data = group.index
+    res_df = group.T
+    res_df.columns = pd.MultiIndex.from_tuples(
+        new_column_index_data,
+        names=df.columns.names
+    )
+    return res_df
+
+
+@log_result
+def do_sum_and_change(df: pd.DataFrame):
+    global encoder_name
+    idx = pd.IndexSlice
+    # some shit, to be fair, so here we are trying to replace value with correct one,
+    # so we will have zero pain in ass on data plotting
+    mean_max_values = df.loc[f"{encoder_name}0",
+                             idx[:, :, :, :, 'max']]  # type: ignore
+    logging.info(mean_max_values.values)
+    df.loc[f"{encoder_name}0", idx[:, :, :, :, 'avg']
+           ] = mean_max_values.values  # type: ignore
+    # we want to change recorded encoder avg latency with max latency to get full pipeline latency
+    res_df = df.sum()
+    return res_df
+
+
+def get_grouping_keys(df: pd.DataFrame):
+    metric_level_values = df.columns.get_level_values(-1)
+    base_metrics_key = metric_level_values.map(get_base_metric)
+
+    config_levels_to_group = list(range(df.columns.nlevels - 1))
+
+    grouping_keys = [
+        df.columns.get_level_values(i) for i in config_levels_to_group
+    ] + [base_metrics_key]
+    return grouping_keys
+
+
 def analyze_latency_data(csv_path: str):
     """
     Analyzes latency data to find the top 10 components (rows) contributing most
@@ -188,6 +267,7 @@ def analyze_latency_data(csv_path: str):
     Args:
         csv_path (str): The path to the input CSV file.
     """
+    global encoder_name
 
     try:
         df = pd.read_csv(csv_path, header=[0, 1, 2, 3, 4], index_col=0)
@@ -201,45 +281,23 @@ def analyze_latency_data(csv_path: str):
     except Exception as e:
         logging.error(f"An error occurred while reading the CSV file: {e}")
         return
+    encoder_name = df.columns.get_level_values(0)[0]
+    logging.info(f"encoder name={encoder_name}")
+    grouping_keys = get_grouping_keys(df)
 
-    sumDf = df.sum()
-    if get_args().compensate == True:
-        logging.info("Filesrc and rawvideoparse latency compensation is ON")
-        filesrcData = df.loc["filesrc0"]
-        rawvideoparseData = df.loc["rawvideoparse0"]
-        sumDf -= filesrcData
-        sumDf -= rawvideoparseData
-    logging.debug(f"\n{sumDf.head()}")
+    mean_df = transpose_and_set(df, df.T.groupby(grouping_keys).mean())
+    min_df = transpose_and_set(df, df.T.groupby(grouping_keys).min())
+    max_df = transpose_and_set(df, df.T.groupby(grouping_keys).max())
 
-    def get_base_metric(metric):
-        """Strips suffixes like '.1' or '.2' from the metric name."""
-        return re.sub(r'\.\d+$', '', str(metric))
+    logging.info(f"\n{mean_df}")
 
-    metric_level_values = sumDf.index.get_level_values(-1)
-
-    base_metrics_key = metric_level_values.map(get_base_metric)
-
-    config_levels = list(range(sumDf.index.nlevels - 1))
-
-    grouping_keys = sumDf.index.droplevel(config_levels)  # type: ignore
-    grouping_keys = [
-        sumDf.index.get_level_values(i) for i in config_levels
-    ] + [base_metrics_key]
-
-    # 3. Perform Grouping and Mean Calculation
-    # This command groups all entries that share the same (Config + Base Metric),
-    # collapsing (avg, avg.1, avg.2) into a single average.
-    sumDfgrouping = sumDf.groupby(grouping_keys)
-    averaged_sumDf = sumDfgrouping.mean()
-    max_sumDf = sumDfgrouping.max()
-    min_sumDf = sumDfgrouping.min()
-
-    logging.debug(f"\n{max_sumDf.head(10)}")
-    logging.debug(f"\n{min_sumDf.head(10)}")
-    logging.info(f"\n{averaged_sumDf.head(10)}")
+    # at this stage our dataframe is summarized no per element data is accessible
+    mean_sumDf = do_sum_and_change(mean_df)
+    min_sumDf = do_sum_and_change(min_df)
+    max_sumDf = do_sum_and_change(max_df)
 
     merged_sumDf = pd.concat(
-        [min_sumDf, averaged_sumDf, max_sumDf],
+        [min_sumDf, mean_sumDf, max_sumDf],
         axis=1,
         keys=['left', 'mean', 'right']
     )
@@ -269,13 +327,12 @@ def analyze_latency_data(csv_path: str):
     median_indices = df_sorted_by_median.index
 
     # 2. Find the intersection (common elements) of the three sets of indices
-    # max is main index because it is commonly introduces the largest amount of latency to the stream
-    common_indices = max_indices.intersection(
-        avg_indices).intersection(median_indices)
+    # avg is main index because it is commonly introduces the largest amount of latency to the stream
+    common_indices = avg_indices.intersection(
+        max_indices).intersection(median_indices)
 
     # 3. Filter the original summary DataFrame (df_summary) using the common indices
     df_common_top_performers = df_summary.loc[common_indices]
-    encoder_name = df_common_top_performers.index.get_level_values(0)[0]
 
     print(df_common_top_performers.head())
 
@@ -284,7 +341,7 @@ def analyze_latency_data(csv_path: str):
     plot_start_latency(df_common_top_performers)
 
     # 4. Save top performers to csv
-    top_10_df = df_common_top_performers.head(10)
+    top_10_df = df_common_top_performers.head(10)["mean"]
     top_10_df.to_csv(get_args().csv_dir + f"{encoder_name}.csv")
     return
 
diff --git a/qualityAnalysis.py b/qualityAnalysis.py
index 7d89d35..0688a6d 100644
--- a/qualityAnalysis.py
+++ b/qualityAnalysis.py
@@ -79,7 +79,7 @@ def plot_top_configurations(df: pd.DataFrame, file_name: str, title: str):
         quality_notes[len(quality_notes) + 1] = " | ".join(note_parts)
 
     # 3. Setup the figure and the primary axis (ax1)
-    fig, ax1 = plt.subplots(figsize=(12, 6))
+    fig, ax1 = plt.subplots(figsize=(10, 6), dpi=300)
 
     # Define bar width and positions
     bar_width = 0.35
@@ -125,7 +125,8 @@ def plot_top_configurations(df: pd.DataFrame, file_name: str, title: str):
 
     # 7. Final Plot appearance
     fig.suptitle(title)
-    fig.tight_layout(rect={0.0, 0.03, 1.0, 0.95})  # type: ignore
+    rect = tuple([0.0, 0.0, 1.0, 0.95])
+    fig.tight_layout(rect=rect)  # type: ignore
 
     # Combine legends from both axes
     lines1, labels1 = ax1.get_legend_handles_labels()
@@ -214,9 +215,11 @@ def analyze_quality_report(csv_path: str):
 
     # Now intersected with latency report
     latency_df = pd.read_csv(f'results/{encoder_name}.csv')
+
+    logging.info(latency_df.head())
     columns = {'Unnamed: 0': 'encoder', 'Unnamed: 1': 'profile',
                'Unnamed: 2': 'video', 'Unnamed: 3': 'parameters'}
-    latency_df.rename(columns=columns, inplace=True)
+    latency_df.rename(columns=columns, inplace=True)  # type: ignore
     logging.debug(f"\n{latency_df.head()}")
 
     # --- 4. Merge Quality and Latency Reports ---