Azure · v-pbavanari · Feb 16, 2022
diff --git a/...tion-bank-marketing-all-features/auto-ml-classification-bank-marketing-all-features.ipynb b/...tion-bank-marketing-all-features/auto-ml-classification-bank-marketing-all-features.ipynb
diff --git a/...-learning/classification-credit-card-fraud/auto-ml-classification-credit-card-fraud.ipynb b/...-learning/classification-credit-card-fraud/auto-ml-classification-credit-card-fraud.ipynb
diff --git a/.../automated-machine-learning/classification-text-dnn/auto-ml-classification-text-dnn.ipynb b/.../automated-machine-learning/classification-text-dnn/auto-ml-classification-text-dnn.ipynb
diff --git a/how-to-use-azureml/automated-machine-learning/classification-text-dnn/helper.py b/how-to-use-azureml/automated-machine-learning/classification-text-dnn/helper.py
@@ -4,52 +4,65 @@
 from azureml.core.run import Run
 
 
-def run_inference(test_experiment, compute_target, script_folder, train_run,
-                  test_dataset, target_column_name, model_name):
+def run_inference(
+    test_experiment,
+    compute_target,
+    script_folder,
+    train_run,
+    test_dataset,
+    target_column_name,
+    model_name,
+):
 
     inference_env = train_run.get_environment()
 
-    est = Estimator(source_directory=script_folder,
-                    entry_script='infer.py',
-                    script_params={
-                        '--target_column_name': target_column_name,
-                        '--model_name': model_name
-                    },
-                    inputs=[
-                        test_dataset.as_named_input('test_data')
-                    ],
-                    compute_target=compute_target,
-                    environment_definition=inference_env)
+    est = Estimator(
+        source_directory=script_folder,
+        entry_script="infer.py",
+        script_params={
+            "--target_column_name": target_column_name,
+            "--model_name": model_name,
+        },
+        inputs=[test_dataset.as_named_input("test_data")],
+        compute_target=compute_target,
+        environment_definition=inference_env,
+    )
 
     run = test_experiment.submit(
-        est, tags={
-            'training_run_id': train_run.id,
-            'run_algorithm': train_run.properties['run_algorithm'],
-            'valid_score': train_run.properties['score'],
-            'primary_metric': train_run.properties['primary_metric']
-        })
-
-    run.log("run_algorithm", run.tags['run_algorithm'])
+        est,
+        tags={
+            "training_run_id": train_run.id,
+            "run_algorithm": train_run.properties["run_algorithm"],
+            "valid_score": train_run.properties["score"],
+            "primary_metric": train_run.properties["primary_metric"],
+        },
+    )
+
+    run.log("run_algorithm", run.tags["run_algorithm"])
     return run
 
 
 def get_result_df(remote_run):
 
     children = list(remote_run.get_children(recursive=True))
-    summary_df = pd.DataFrame(index=['run_id', 'run_algorithm',
-                                     'primary_metric', 'Score'])
+    summary_df = pd.DataFrame(
+        index=["run_id", "run_algorithm", "primary_metric", "Score"]
+    )
     goal_minimize = False
     for run in children:
-        if('run_algorithm' in run.properties and 'score' in run.properties):
-            summary_df[run.id] = [run.id, run.properties['run_algorithm'],
-                                  run.properties['primary_metric'],
-                                  float(run.properties['score'])]
-            if('goal' in run.properties):
-                goal_minimize = run.properties['goal'].split('_')[-1] == 'min'
+        if "run_algorithm" in run.properties and "score" in run.properties:
+            summary_df[run.id] = [
+                run.id,
+                run.properties["run_algorithm"],
+                run.properties["primary_metric"],
+                float(run.properties["score"]),
+            ]
+            if "goal" in run.properties:
+                goal_minimize = run.properties["goal"].split("_")[-1] == "min"
 
     summary_df = summary_df.T.sort_values(
-        'Score',
-        ascending=goal_minimize).drop_duplicates(['run_algorithm'])
-    summary_df = summary_df.set_index('run_algorithm')
+        "Score", ascending=goal_minimize
+    ).drop_duplicates(["run_algorithm"])
+    summary_df = summary_df.set_index("run_algorithm")
 
     return summary_df
diff --git a/how-to-use-azureml/automated-machine-learning/classification-text-dnn/infer.py b/how-to-use-azureml/automated-machine-learning/classification-text-dnn/infer.py
@@ -12,33 +12,39 @@
 
 parser = argparse.ArgumentParser()
 parser.add_argument(
-    '--target_column_name', type=str, dest='target_column_name',
-    help='Target Column Name')
+    "--target_column_name",
+    type=str,
+    dest="target_column_name",
+    help="Target Column Name",
+)
 parser.add_argument(
-    '--model_name', type=str, dest='model_name',
-    help='Name of registered model')
+    "--model_name", type=str, dest="model_name", help="Name of registered model"
+)
 
 args = parser.parse_args()
 target_column_name = args.target_column_name
 model_name = args.model_name
 
-print('args passed are: ')
-print('Target column name: ', target_column_name)
-print('Name of registered model: ', model_name)
+print("args passed are: ")
+print("Target column name: ", target_column_name)
+print("Name of registered model: ", model_name)
 
 model_path = Model.get_model_path(model_name)
 # deserialize the model file back into a sklearn model
 model = joblib.load(model_path)
 
 run = Run.get_context()
 # get input dataset by name
-test_dataset = run.input_datasets['test_data']
+test_dataset = run.input_datasets["test_data"]
 
-X_test_df = test_dataset.drop_columns(columns=[target_column_name]) \
-                        .to_pandas_dataframe()
-y_test_df = test_dataset.with_timestamp_columns(None) \
-                        .keep_columns(columns=[target_column_name]) \
-                        .to_pandas_dataframe()
+X_test_df = test_dataset.drop_columns(
+    columns=[target_column_name]
+).to_pandas_dataframe()
+y_test_df = (
+    test_dataset.with_timestamp_columns(None)
+    .keep_columns(columns=[target_column_name])
+    .to_pandas_dataframe()
+)
 
 predicted = model.predict_proba(X_test_df)
 
@@ -47,11 +53,13 @@
 
 # Use the AutoML scoring module
 train_labels = model.classes_
-class_labels = np.unique(np.concatenate((y_test_df.values, np.reshape(train_labels, (-1, 1)))))
+class_labels = np.unique(
+    np.concatenate((y_test_df.values, np.reshape(train_labels, (-1, 1))))
+)
 classification_metrics = list(constants.CLASSIFICATION_SCALAR_SET)
-scores = scoring.score_classification(y_test_df.values, predicted,
-                                      classification_metrics,
-                                      class_labels, train_labels)
+scores = scoring.score_classification(
+    y_test_df.values, predicted, classification_metrics, class_labels, train_labels
+)
 
 print("scores:")
 print(scores)