Follow up changes for bug #3170343. (#3248)

# Description This PR contains follow up changes for PR #3214 # All Promptflow Contribution checklist: - [x] **The pull request does not introduce [breaking changes].** - [x] **CHANGELOG is updated for new features, bug fixes or other significant changes.** - [x] **I have read the [contribution guidelines](../CONTRIBUTING.md).** - [x] **Create an issue and link to the pull request to get dedicated review from promptflow team. Learn more: [suggested workflow](../CONTRIBUTING.md#suggested-workflow).** ## General Guidelines and Best Practices - [x] Title of the pull request is clear and informative. - [x] There are a small number of commits, each of which have an informative message. This means that previously merged commits do not appear in the history of the PR. For more information on cleaning up the commits in your PR, [see this page](https://github.com/Azure/azure-powershell/blob/master/documentation/development-docs/cleaning-up-commits.md). ### Testing Guidelines - [x] Pull request includes test coverage for the included changes.
microsoft · May 14, 2024 · 235cb01 · 235cb01
1 parent f4c51dd
commit 235cb01
Show file tree

Hide file tree

Showing 2 changed files with 22 additions and 16 deletions.
diff --git a/src/promptflow-evals/promptflow/evals/evaluate/_evaluate.py b/src/promptflow-evals/promptflow/evals/evaluate/_evaluate.py
@@ -140,20 +140,17 @@ def _apply_target_to_data(
     # Sometimes user data may contain column named the same as the one generated by target.
     # In this case we will not rename the column.
     generated_columns = set(rename_dict.values())
-    tgt_out_and_input = set()
     for col in initial_data.columns:
         if col in generated_columns:
             tgt_out = f'{prefix}{col}'
             del rename_dict[tgt_out]
-            tgt_out_and_input.add(tgt_out)
-            tgt_out_and_input.add(col)
     # Sort output by line numbers
     target_output.set_index(f"inputs.{LINE_NUMBER}", inplace=True)
     target_output.sort_index(inplace=True)
     target_output.reset_index(inplace=True, drop=False)
     # target_output contains only input columns, taken by function,
     # so we need to concatenate it to the input data frame.
-    drop_columns = set(target_output.columns) - set(rename_dict.keys()) - tgt_out_and_input
+    drop_columns = list(filter(lambda x: x.startswith('inputs'), target_output.columns))
     target_output.drop(drop_columns, inplace=True, axis=1)
     # Remove outputs. prefix
     target_output.rename(columns=rename_dict, inplace=True)
@@ -162,10 +159,19 @@ def _apply_target_to_data(
     return target_output, generated_columns, run
 
 
-def _apply_column_mapping(source_df: pd.DataFrame, mapping_config: dict, inplace: bool = False):
+def _apply_column_mapping(
+        source_df: pd.DataFrame, mapping_config: dict, inplace: bool = False) -> pd.DataFrame:
     """
     Apply column mapping to source_df based on mapping_config.
+
     This function is used for pre-validation of input data for evaluators
+    :param source_df: the data frame to be changed.
+    :type source_df: pd.DataFrame
+    :param mapping_config: The configuration, containing column mapping.
+    :type mapping_config: dict.
+    :param inplace: If true, the source_df will be changed inplace.
+    :type inplace: bool
+    :return: The modified data frame.
     """
     result_df = source_df
 
@@ -188,12 +194,12 @@ def _apply_column_mapping(source_df: pd.DataFrame, mapping_config: dict, inplace
                     # target, the column will have "outputs." prefix. We will use
                     # target-generated column for validation.
                     if col_outputs in source_df.columns:
-                        # If column needs to be mapped to already existing column.
-                        if map_to_key in source_df.columns:
-                            columns_to_drop.add(map_to_key)
-                        column_mapping[col_outputs] = map_to_key
-                    else:
-                        column_mapping[map_from_key] = map_to_key
+                        map_from_key = col_outputs
+
+                    # If column needs to be mapped to already existing column.
+                    if map_to_key in source_df.columns:
+                        columns_to_drop.add(map_to_key)
+                    column_mapping[map_from_key] = map_to_key
         # If some columns, which has to be dropped actually can renamed,
         # we will not drop it.
         columns_to_drop = columns_to_drop - set(column_mapping.keys())
@@ -224,13 +230,13 @@ def _process_evaluator_config(evaluator_config: Dict[str, Dict[str, str]]):
                             "Ensure only ${target.} and ${data.} are used."
                         )
 
-                    # Replace ${target.} with ${run.}
+                    # Replace ${target.} with ${run.outputs.}
                     processed_config[evaluator][map_to_key] = map_value.replace("${target.", "${run.outputs.")
 
     return processed_config
 
 
-def _rename_columns_maybe(df: pd.DataFrame, target_generated: Set[str]):
+def _rename_columns_conditionally(df: pd.DataFrame, target_generated: Set[str]):
     """
     Change the column names for data frame. The change happens inplace.
 
@@ -371,7 +377,7 @@ def evaluate(
     # Rename columns, generated by template function to outputs instead of inputs.
     # If target generates columns, already present in the input data, these columns
     # will be marked as outputs already so we do not need to rename them.
-    input_data_df = _rename_columns_maybe(input_data_df, target_generated_columns)
+    input_data_df = _rename_columns_conditionally(input_data_df, target_generated_columns)
 
     result_df = pd.concat([input_data_df, evaluators_result_df], axis=1, verify_integrity=True)
     metrics = _calculate_mean(evaluators_result_df)

diff --git a/src/promptflow-evals/tests/evals/unittests/test_evaluate.py b/src/promptflow-evals/tests/evals/unittests/test_evaluate.py
@@ -12,7 +12,7 @@
 from promptflow.evals.evaluate._evaluate import (
     _apply_column_mapping,
     _apply_target_to_data,
-    _rename_columns_maybe
+    _rename_columns_conditionally
     )
 from promptflow.evals.evaluators import F1ScoreEvaluator, GroundednessEvaluator
 
@@ -276,7 +276,7 @@ def test_renaming_column(self):
             'outputs.generated': ['Generaged by target'],
             'inputs.outputs.before': ['Despite prefix this column was before target.']
         })
-        df_actuals = _rename_columns_maybe(df, {'presnt_generated', 'generated'})
+        df_actuals = _rename_columns_conditionally(df, {'presnt_generated', 'generated'})
         assert_frame_equal(df_actuals.sort_index(axis=1), df_expected.sort_index(axis=1))
 
     def test_evaluate_output_path(self, evaluate_test_data_jsonl_file, tmpdir):