From 75274ee2bed39f754731c2eab4ebe6d4698274ab Mon Sep 17 00:00:00 2001 From: Artur Mukhamadiev Date: Tue, 14 Oct 2025 19:28:17 +0300 Subject: [PATCH] [research] changed encoder max with avg for latency calc --- latencyAnalysis.py | 153 +++++++++++++++++++++++++++++++-------------- qualityAnalysis.py | 9 ++- 2 files changed, 111 insertions(+), 51 deletions(-) diff --git a/latencyAnalysis.py b/latencyAnalysis.py index 419cc5d..5dae01e 100644 --- a/latencyAnalysis.py +++ b/latencyAnalysis.py @@ -30,6 +30,7 @@ def parse_args(): cmd_args = None +encoder_name = "default" def get_args(): @@ -40,6 +41,8 @@ def get_args(): def plot_latency_data(df): + global encoder_name + def create_labels(df_slice): labels = {} for i, index in enumerate(df_slice.index): @@ -78,8 +81,6 @@ def plot_latency_data(df): yerr_upper_median = df_top_n[max_median_key] - mean_median_values yerr_median = np.array( [yerr_lower_median.values, yerr_upper_median.values]) - - encoder_name = df_top_n.index.get_level_values(0)[0] max_notes = create_labels(df_top_n) bar_width = 0.25 @@ -92,7 +93,17 @@ def plot_latency_data(df): fig = plt.figure(figsize=(12, 7), dpi=300) - plt.bar( + def add_annotation(bar): + for rect in bar: + height = rect.get_height() + plt.annotate(f'{height:.2f} мс', + xy=(rect.get_x() + rect.get_width() / 2, height / 2), + xytext=(0, 0), # 3 points vertical offset + textcoords="offset points", transform_rotates_text=True, + rotation=90, + ha='center', va='bottom', fontsize=10, color='White') + + bar1 = plt.bar( r_max_orig, df_top_n[mean_max_key], yerr=yerr_max_orig, @@ -102,8 +113,9 @@ def plot_latency_data(df): edgecolor='grey', label='Максимальная задержка' ) + add_annotation(bar1) - plt.bar( + bar2 = plt.bar( r_avg, df_top_n[mean_avg_key], yerr=yerr_avg, @@ -113,8 +125,9 @@ def plot_latency_data(df): edgecolor='grey', label='Средняя задержка' ) + add_annotation(bar2) - plt.bar( + bar3 = plt.bar( r_median, df_top_n[mean_median_key], yerr=yerr_median, @@ -124,6 +137,7 @@ def plot_latency_data(df): edgecolor='grey', label='Медианная задержка' ) + add_annotation(bar3) plt.xlabel('Индекс конфигурации', fontweight='bold') plt.ylabel('Общая задержка [мс]', fontweight='bold') @@ -146,12 +160,13 @@ def plot_latency_data(df): def plot_start_latency(df): + global encoder_name fig = plt.figure(figsize=(10, 6), dpi=300) r1 = np.arange(len(df)) - mean_col = ('mean', 'max') - min_col = ('left', 'max') - max_col = ('right', 'max') + mean_col = ('mean', 'avg') + min_col = ('left', 'avg') + max_col = ('right', 'avg') mean_values = df[mean_col] min_values = df[min_col] @@ -173,13 +188,77 @@ def plot_start_latency(df): ) plt.xlabel('Индекс конфигурации', fontweight='bold') plt.ylabel('Общая задержка [мс]', fontweight='bold') - encoder_name = df.index.get_level_values(0)[0] plt.title(f"Результаты стартовой задержки для {encoder_name}") plt.tight_layout() plt.savefig(get_args().plot_dir + f"start_latency_{encoder_name}.png") plt.close() +def get_base_metric(metric): + """Strips suffixes like '.1' or '.2' from the metric name.""" + return re.sub(r'\.\d+$', '', str(metric)) + + +def compensate(func): + def wrapper(df: pd.DataFrame, group: pd.DataFrame) -> pd.DataFrame: + logging.debug("Inside transpose decorator") + res_df = func(df, group) + if get_args().compensate == True: + logging.info( + "Filesrc and rawvideoparse latency compensation is ON") + res_df = res_df.drop('filesrc0', axis=0) + res_df = res_df.drop('rawvideoparse0', axis=0) + return res_df + return wrapper + + +def log_result(func): + def wrapper(*args, **kwargs): + res = func(*args, **kwargs) + logging.info(f"\n{res}") + return res + return wrapper + + +@compensate +def transpose_and_set(df: pd.DataFrame, group: pd.DataFrame) -> pd.DataFrame: + new_column_index_data = group.index + res_df = group.T + res_df.columns = pd.MultiIndex.from_tuples( + new_column_index_data, + names=df.columns.names + ) + return res_df + + +@log_result +def do_sum_and_change(df: pd.DataFrame): + global encoder_name + idx = pd.IndexSlice + # some shit, to be fair, so here we are trying to replace value with correct one, + # so we will have zero pain in ass on data plotting + mean_max_values = df.loc[f"{encoder_name}0", + idx[:, :, :, :, 'max']] # type: ignore + logging.info(mean_max_values.values) + df.loc[f"{encoder_name}0", idx[:, :, :, :, 'avg'] + ] = mean_max_values.values # type: ignore + # we want to change recorded encoder avg latency with max latency to get full pipeline latency + res_df = df.sum() + return res_df + + +def get_grouping_keys(df: pd.DataFrame): + metric_level_values = df.columns.get_level_values(-1) + base_metrics_key = metric_level_values.map(get_base_metric) + + config_levels_to_group = list(range(df.columns.nlevels - 1)) + + grouping_keys = [ + df.columns.get_level_values(i) for i in config_levels_to_group + ] + [base_metrics_key] + return grouping_keys + + def analyze_latency_data(csv_path: str): """ Analyzes latency data to find the top 10 components (rows) contributing most @@ -188,6 +267,7 @@ def analyze_latency_data(csv_path: str): Args: csv_path (str): The path to the input CSV file. """ + global encoder_name try: df = pd.read_csv(csv_path, header=[0, 1, 2, 3, 4], index_col=0) @@ -201,45 +281,23 @@ def analyze_latency_data(csv_path: str): except Exception as e: logging.error(f"An error occurred while reading the CSV file: {e}") return + encoder_name = df.columns.get_level_values(0)[0] + logging.info(f"encoder name={encoder_name}") + grouping_keys = get_grouping_keys(df) - sumDf = df.sum() - if get_args().compensate == True: - logging.info("Filesrc and rawvideoparse latency compensation is ON") - filesrcData = df.loc["filesrc0"] - rawvideoparseData = df.loc["rawvideoparse0"] - sumDf -= filesrcData - sumDf -= rawvideoparseData - logging.debug(f"\n{sumDf.head()}") + mean_df = transpose_and_set(df, df.T.groupby(grouping_keys).mean()) + min_df = transpose_and_set(df, df.T.groupby(grouping_keys).min()) + max_df = transpose_and_set(df, df.T.groupby(grouping_keys).max()) - def get_base_metric(metric): - """Strips suffixes like '.1' or '.2' from the metric name.""" - return re.sub(r'\.\d+$', '', str(metric)) + logging.info(f"\n{mean_df}") - metric_level_values = sumDf.index.get_level_values(-1) - - base_metrics_key = metric_level_values.map(get_base_metric) - - config_levels = list(range(sumDf.index.nlevels - 1)) - - grouping_keys = sumDf.index.droplevel(config_levels) # type: ignore - grouping_keys = [ - sumDf.index.get_level_values(i) for i in config_levels - ] + [base_metrics_key] - - # 3. Perform Grouping and Mean Calculation - # This command groups all entries that share the same (Config + Base Metric), - # collapsing (avg, avg.1, avg.2) into a single average. - sumDfgrouping = sumDf.groupby(grouping_keys) - averaged_sumDf = sumDfgrouping.mean() - max_sumDf = sumDfgrouping.max() - min_sumDf = sumDfgrouping.min() - - logging.debug(f"\n{max_sumDf.head(10)}") - logging.debug(f"\n{min_sumDf.head(10)}") - logging.info(f"\n{averaged_sumDf.head(10)}") + # at this stage our dataframe is summarized no per element data is accessible + mean_sumDf = do_sum_and_change(mean_df) + min_sumDf = do_sum_and_change(min_df) + max_sumDf = do_sum_and_change(max_df) merged_sumDf = pd.concat( - [min_sumDf, averaged_sumDf, max_sumDf], + [min_sumDf, mean_sumDf, max_sumDf], axis=1, keys=['left', 'mean', 'right'] ) @@ -269,13 +327,12 @@ def analyze_latency_data(csv_path: str): median_indices = df_sorted_by_median.index # 2. Find the intersection (common elements) of the three sets of indices - # max is main index because it is commonly introduces the largest amount of latency to the stream - common_indices = max_indices.intersection( - avg_indices).intersection(median_indices) + # avg is main index because it is commonly introduces the largest amount of latency to the stream + common_indices = avg_indices.intersection( + max_indices).intersection(median_indices) # 3. Filter the original summary DataFrame (df_summary) using the common indices df_common_top_performers = df_summary.loc[common_indices] - encoder_name = df_common_top_performers.index.get_level_values(0)[0] print(df_common_top_performers.head()) @@ -284,7 +341,7 @@ def analyze_latency_data(csv_path: str): plot_start_latency(df_common_top_performers) # 4. Save top performers to csv - top_10_df = df_common_top_performers.head(10) + top_10_df = df_common_top_performers.head(10)["mean"] top_10_df.to_csv(get_args().csv_dir + f"{encoder_name}.csv") return diff --git a/qualityAnalysis.py b/qualityAnalysis.py index 7d89d35..0688a6d 100644 --- a/qualityAnalysis.py +++ b/qualityAnalysis.py @@ -79,7 +79,7 @@ def plot_top_configurations(df: pd.DataFrame, file_name: str, title: str): quality_notes[len(quality_notes) + 1] = " | ".join(note_parts) # 3. Setup the figure and the primary axis (ax1) - fig, ax1 = plt.subplots(figsize=(12, 6)) + fig, ax1 = plt.subplots(figsize=(10, 6), dpi=300) # Define bar width and positions bar_width = 0.35 @@ -125,7 +125,8 @@ def plot_top_configurations(df: pd.DataFrame, file_name: str, title: str): # 7. Final Plot appearance fig.suptitle(title) - fig.tight_layout(rect={0.0, 0.03, 1.0, 0.95}) # type: ignore + rect = tuple([0.0, 0.0, 1.0, 0.95]) + fig.tight_layout(rect=rect) # type: ignore # Combine legends from both axes lines1, labels1 = ax1.get_legend_handles_labels() @@ -214,9 +215,11 @@ def analyze_quality_report(csv_path: str): # Now intersected with latency report latency_df = pd.read_csv(f'results/{encoder_name}.csv') + + logging.info(latency_df.head()) columns = {'Unnamed: 0': 'encoder', 'Unnamed: 1': 'profile', 'Unnamed: 2': 'video', 'Unnamed: 3': 'parameters'} - latency_df.rename(columns=columns, inplace=True) + latency_df.rename(columns=columns, inplace=True) # type: ignore logging.debug(f"\n{latency_df.head()}") # --- 4. Merge Quality and Latency Reports ---