From d8f0d7e580c60f733a7c3fdf7e2cc7c457a24899 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Thu, 31 Oct 2024 14:55:24 +0000
Subject: [PATCH 001/112] Update generate_final_report.py

---
 tools/submission/generate_final_report.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/submission/generate_final_report.py b/tools/submission/generate_final_report.py
index ba2c368cd..79d9fe076 100644
--- a/tools/submission/generate_final_report.py
+++ b/tools/submission/generate_final_report.py
@@ -79,7 +79,7 @@ def main():
     df["p#"] = df.apply(lambda x: int(x["host_processors_per_node"]), axis=1)
 
     # details url
-    base_url = f"https://github.com/mlcommons/{args.repository}/tree/main"
+    base_url = f"https://github.com/{args.repository_owner}/{args.repository}/tree/{args.repository_branch}"
     df["Details"] = df.apply(
         lambda x: '=HYPERLINK("{}","details")'.format(
             "/".join(

From 6b1a0f87f46288d7b4b487f89e18f3151422694c Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Thu, 7 Nov 2024 11:54:48 +0000
Subject: [PATCH 002/112] Fix sdxl (#1911)

* Fix typo in fid_score.py, fail_safe for SDXL short runs

* [Automated Commit] Format Codebase

* Fix typo in fid_score.py, fail_safe for SDXL short runs

* Fix dlrmv2 reference implementation | Update run_local.sh
---
 recommendation/dlrm_v2/pytorch/run_local.sh |  4 +++-
 text_to_image/coco.py                       | 24 ++++++++++++---------
 text_to_image/tools/fid/fid_score.py        |  2 +-
 3 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/recommendation/dlrm_v2/pytorch/run_local.sh b/recommendation/dlrm_v2/pytorch/run_local.sh
index 0d054c6c4..3bc8ec667 100755
--- a/recommendation/dlrm_v2/pytorch/run_local.sh
+++ b/recommendation/dlrm_v2/pytorch/run_local.sh
@@ -2,7 +2,9 @@
 
 source ./run_common.sh
 
-common_opt="--mlperf_conf ../../../mlperf.conf"
+#mlperf.conf is now automatically loaded by loadgen
+#common_opt="--mlperf_conf ../../../mlperf.conf"
+
 OUTPUT_DIR=`pwd`/output/$name
 if [ ! -d $OUTPUT_DIR ]; then
     mkdir -p $OUTPUT_DIR
diff --git a/text_to_image/coco.py b/text_to_image/coco.py
index cb3956a01..e9499b0e6 100644
--- a/text_to_image/coco.py
+++ b/text_to_image/coco.py
@@ -176,20 +176,24 @@ def __call__(self, results, ids, expected=None, result_dict=None):
     def save_images(self, ids, ds):
         info = []
         idx = {}
-        for i, id in enumerate(self.content_ids):
-            if id in ids:
-                idx[id] = i
+        for i, image_id in enumerate(self.content_ids):
+            if image_id in ids:
+                idx[image_id] = i
         if not os.path.exists("images/"):
             os.makedirs("images/", exist_ok=True)
-        for id in ids:
-            caption = ds.get_caption(id)
-            generated = Image.fromarray(self.results[idx[id]])
-            image_path_tmp = f"images/{self.content_ids[idx[id]]}.png"
+        for image_id in ids:
+            if not idx.get(image_id):
+                print(
+                    f"image id {image_id} is missing in the results. Hence not saved.")
+                continue
+            caption = ds.get_caption(image_id)
+            generated = Image.fromarray(self.results[idx[image_id]])
+            image_path_tmp = f"images/{self.content_ids[idx[image_id]]}.png"
             generated.save(image_path_tmp)
-            info.append((self.content_ids[idx[id]], caption))
+            info.append((self.content_ids[idx[image_id]], caption))
         with open("images/captions.txt", "w+") as f:
-            for id, caption in info:
-                f.write(f"{id}  {caption}\n")
+            for image_id, caption in info:
+                f.write(f"{image_id}  {caption}\n")
 
     def start(self):
         self.results = []
diff --git a/text_to_image/tools/fid/fid_score.py b/text_to_image/tools/fid/fid_score.py
index febc12ff5..8e486c8b7 100644
--- a/text_to_image/tools/fid/fid_score.py
+++ b/text_to_image/tools/fid/fid_score.py
@@ -44,7 +44,7 @@
 import pathlib
 import os
 import sys
-sys.path.insert("..", 0)
+sys.path.insert(0, "..")
 from inception import InceptionV3  # noqa: E402
 
 

From a4ba51fb2244f2efc703c341b13411676297e299 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Thu, 7 Nov 2024 20:07:07 +0530
Subject: [PATCH 003/112] Fixes for filtering invalid results

---
 tools/submission/preprocess_submission.py | 28 +++++++++++++++++++----
 1 file changed, 24 insertions(+), 4 deletions(-)

diff --git a/tools/submission/preprocess_submission.py b/tools/submission/preprocess_submission.py
index 1e26b81ca..9d44b91d6 100644
--- a/tools/submission/preprocess_submission.py
+++ b/tools/submission/preprocess_submission.py
@@ -141,6 +141,22 @@ def change_folder_name_in_path(path, old_folder_name, new_folder_name):
     new_path = os.path.join(*path_parts)
     return new_path
 
+def clean_model_dir(model_results_dir):
+    model_measurements_dir = change_folder_name_in_path(model_results_dir, "results", "measurements")
+    model_compliance_dir = change_folder_name_in_path(model_results_dir, "results", "compliance")
+
+    print(f"rmtree {model_results_dir}")
+    shutil.rmtree(model_results_dir)
+    shutil.rmtree(model_measurements_dir)
+    shutil.rmtree(model_compliance_dir)
+    sut_results_dir = os.path.dirname(model_results_dir)
+    if not os.listdir(sut_results_dir):
+        #clean sut dir
+        sut = os.path.basename(sut_results_dir)
+        print(f"No benchmark results remaining for {sut}. rmtree {sut_results_dir}")
+        shutil.rmtree(sut_results_dir)
+        shutil.rmtree(os.path.dirname(model_measurements_dir))
+        shutil.rmtree(os.path.dirname(model_compliance_dir))
 
 def clean_invalid_results(args, log_path, config, system_desc, system_json,
                           model, mlperf_model, division, system_id_json, is_closed_or_network):
@@ -176,6 +192,7 @@ def clean_invalid_results(args, log_path, config, system_desc, system_json,
         except Exception as e:
             log.warning(e)
             perf_is_valid = False
+        compliance_is_valid = False
         if perf_is_valid:
             power_path = os.path.join(scenario_path, "performance", "power")
             has_power = os.path.exists(power_path)
@@ -260,9 +277,11 @@ def clean_invalid_results(args, log_path, config, system_desc, system_json,
                     # if only accuracy or compliance failed, result is valid
                     # for open
                     if not perf_is_valid:
-                        shutil.rmtree(scenario_path)
                         log.warning(
                             f"{scenario} scenario result is invalid for {system_desc}: {model} in {division} and open divisions. Accuracy: {accuracy_is_valid}, Performance: {perf_is_valid}. Removing it...")
+                        shutil.rmtree(scenario_path)
+                        scenario_measurements_path = change_folder_name_in_path(scenario_path, "results", "measurements")
+                        shutil.rmtree(scenario_measurements_path)
                     if not os.path.exists(target_results_path):
                         shutil.copytree(
                             model_results_path, target_results_path)
@@ -288,9 +307,7 @@ def clean_invalid_results(args, log_path, config, system_desc, system_json,
                         log.warning(f"{scenario} scenario result is invalid for {system_desc}: {model} in {division} division. Accuracy: {accuracy_is_valid}, Performance: {perf_is_valid}. Compliance: {compliance_is_valid}. Moving other scenario results of {model} to open...")
                 else:
                     log.warning(f"{scenario} scenario result is invalid for {system_desc}: {model} in {division} division. Accuracy: {accuracy_is_valid}, Performance: {perf_is_valid}. Removing all dependent scenario results...")
-                shutil.rmtree(model_results_path)
-                shutil.rmtree(model_measurements_path)
-                shutil.rmtree(model_compliance_path)
+                clean_model_dir(model_results_path)
             else:  # delete this result
                 # delete other scenario results too
                 shutil.rmtree(scenario_path)
@@ -517,6 +534,9 @@ def main():
 
     infer_scenario_results(args, config)
 
+    if not args.nodelete_empty_dirs:
+        delete_empty_dirs(os.path.join(src_dir))
+
     return 0
 
 

From 451b310ef42a28e015ce5abc4e43ba6033ff8d4a Mon Sep 17 00:00:00 2001
From: arjunsuresh <arjunsuresh@users.noreply.github.com>
Date: Thu, 7 Nov 2024 14:44:42 +0000
Subject: [PATCH 004/112] [Automated Commit] Format Codebase

---
 tools/submission/preprocess_submission.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/tools/submission/preprocess_submission.py b/tools/submission/preprocess_submission.py
index 9d44b91d6..7803cf568 100644
--- a/tools/submission/preprocess_submission.py
+++ b/tools/submission/preprocess_submission.py
@@ -141,9 +141,12 @@ def change_folder_name_in_path(path, old_folder_name, new_folder_name):
     new_path = os.path.join(*path_parts)
     return new_path
 
+
 def clean_model_dir(model_results_dir):
-    model_measurements_dir = change_folder_name_in_path(model_results_dir, "results", "measurements")
-    model_compliance_dir = change_folder_name_in_path(model_results_dir, "results", "compliance")
+    model_measurements_dir = change_folder_name_in_path(
+        model_results_dir, "results", "measurements")
+    model_compliance_dir = change_folder_name_in_path(
+        model_results_dir, "results", "compliance")
 
     print(f"rmtree {model_results_dir}")
     shutil.rmtree(model_results_dir)
@@ -151,13 +154,15 @@ def clean_model_dir(model_results_dir):
     shutil.rmtree(model_compliance_dir)
     sut_results_dir = os.path.dirname(model_results_dir)
     if not os.listdir(sut_results_dir):
-        #clean sut dir
+        # clean sut dir
         sut = os.path.basename(sut_results_dir)
-        print(f"No benchmark results remaining for {sut}. rmtree {sut_results_dir}")
+        print(
+            f"No benchmark results remaining for {sut}. rmtree {sut_results_dir}")
         shutil.rmtree(sut_results_dir)
         shutil.rmtree(os.path.dirname(model_measurements_dir))
         shutil.rmtree(os.path.dirname(model_compliance_dir))
 
+
 def clean_invalid_results(args, log_path, config, system_desc, system_json,
                           model, mlperf_model, division, system_id_json, is_closed_or_network):
     # cleanup invalid results
@@ -280,7 +285,8 @@ def clean_invalid_results(args, log_path, config, system_desc, system_json,
                         log.warning(
                             f"{scenario} scenario result is invalid for {system_desc}: {model} in {division} and open divisions. Accuracy: {accuracy_is_valid}, Performance: {perf_is_valid}. Removing it...")
                         shutil.rmtree(scenario_path)
-                        scenario_measurements_path = change_folder_name_in_path(scenario_path, "results", "measurements")
+                        scenario_measurements_path = change_folder_name_in_path(
+                            scenario_path, "results", "measurements")
                         shutil.rmtree(scenario_measurements_path)
                     if not os.path.exists(target_results_path):
                         shutil.copytree(

From 4c109ea8b5b17d0c422d4b8a08a55070142c68ae Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Thu, 7 Nov 2024 14:58:42 +0000
Subject: [PATCH 005/112] Update preprocess_submission.py

---
 tools/submission/preprocess_submission.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/tools/submission/preprocess_submission.py b/tools/submission/preprocess_submission.py
index 7803cf568..a1678c79d 100644
--- a/tools/submission/preprocess_submission.py
+++ b/tools/submission/preprocess_submission.py
@@ -2,10 +2,6 @@
 Tool to infer scenario results and cleanup submission tree
 """
 
-from __future__ import division
-from __future__ import print_function
-from __future__ import unicode_literals
-
 import argparse
 import logging
 import os
@@ -156,7 +152,7 @@ def clean_model_dir(model_results_dir):
     if not os.listdir(sut_results_dir):
         # clean sut dir
         sut = os.path.basename(sut_results_dir)
-        print(
+        log.info(
             f"No benchmark results remaining for {sut}. rmtree {sut_results_dir}")
         shutil.rmtree(sut_results_dir)
         shutil.rmtree(os.path.dirname(model_measurements_dir))

From 40c1fe0c28364b243b5944b3569000611ddf2b7d Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Thu, 7 Nov 2024 21:20:52 +0530
Subject: [PATCH 006/112] Added an option to pass in sample_ids.txt for SDXL
 accuracy check

---
 text_to_image/tools/accuracy_coco.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/text_to_image/tools/accuracy_coco.py b/text_to_image/tools/accuracy_coco.py
index 2d7c36506..8740ee172 100644
--- a/text_to_image/tools/accuracy_coco.py
+++ b/text_to_image/tools/accuracy_coco.py
@@ -51,6 +51,10 @@ def get_args():
         required=False,
         help="path to dump 10 stable diffusion xl compliance images",
     )
+    #Do not use for official MLPerf inference submissions as only the default one is valid
+    parser.add_argument(
+        "--ids-path", help="Path to 10 caption ids to dump as compliance images", default="os.path.join(os.path.dirname(__file__), 'sample_ids.txt')"
+    )
     parser.add_argument("--device", default="cpu", choices=["gpu", "cpu"])
     parser.add_argument(
         "--low_memory",
@@ -97,8 +101,9 @@ def main():
             os.makedirs(args.compliance_images_path)
         dump_compliance_images = True
         compliance_images_idx_list = []
+        sample_ids_file_path = args.ids_path if args.ids_path else os.path.join(os.path.dirname(__file__), "sample_ids.txt")
         with open(
-            os.path.join(os.path.dirname(__file__), "sample_ids.txt"), "r"
+            os.path.join(sample_ids_file_path, "r"
         ) as compliance_id_file:
             for line in compliance_id_file:
                 idx = int(line.strip())

From 89a2ffe257bc8c4c0d8e81cb5c1fec4e15080b2a Mon Sep 17 00:00:00 2001
From: arjunsuresh <arjunsuresh@users.noreply.github.com>
Date: Thu, 7 Nov 2024 15:51:36 +0000
Subject: [PATCH 007/112] [Automated Commit] Format Codebase

---
 text_to_image/tools/accuracy_coco.py | 88 ++++++++++++++--------------
 1 file changed, 45 insertions(+), 43 deletions(-)

diff --git a/text_to_image/tools/accuracy_coco.py b/text_to_image/tools/accuracy_coco.py
index 8740ee172..bc3f87d04 100644
--- a/text_to_image/tools/accuracy_coco.py
+++ b/text_to_image/tools/accuracy_coco.py
@@ -51,7 +51,8 @@ def get_args():
         required=False,
         help="path to dump 10 stable diffusion xl compliance images",
     )
-    #Do not use for official MLPerf inference submissions as only the default one is valid
+    # Do not use for official MLPerf inference submissions as only the default
+    # one is valid
     parser.add_argument(
         "--ids-path", help="Path to 10 caption ids to dump as compliance images", default="os.path.join(os.path.dirname(__file__), 'sample_ids.txt')"
     )
@@ -101,12 +102,13 @@ def main():
             os.makedirs(args.compliance_images_path)
         dump_compliance_images = True
         compliance_images_idx_list = []
-        sample_ids_file_path = args.ids_path if args.ids_path else os.path.join(os.path.dirname(__file__), "sample_ids.txt")
+        sample_ids_file_path = args.ids_path if args.ids_path else os.path.join(
+            os.path.dirname(__file__), "sample_ids.txt")
         with open(
             os.path.join(sample_ids_file_path, "r"
         ) as compliance_id_file:
             for line in compliance_id_file:
-                idx = int(line.strip())
+                idx=int(line.strip())
                 compliance_images_idx_list.append(idx)
         # Dump caption.txt
         with open(
@@ -153,28 +155,28 @@ def compute_accuracy(
     statistics_path,
 ):
     # Load torchmetrics modules
-    clip = CLIPEncoder(device=device)
-    clip_scores = []
-    seen = set()
-    result_list = []
-    result_dict = {}
+    clip=CLIPEncoder(device=device)
+    clip_scores=[]
+    seen=set()
+    result_list=[]
+    result_dict={}
 
     # Load model outputs
     with open(mlperf_accuracy_file, "r") as f:
-        results = json.load(f)
+        results=json.load(f)
 
     for j in tqdm(results):
-        idx = j["qsl_idx"]
+        idx=j["qsl_idx"]
         if idx in seen:
             continue
         seen.add(idx)
 
         # Load generated image
-        generated_img = np.frombuffer(bytes.fromhex(j["data"]), np.uint8).reshape(
+        generated_img=np.frombuffer(bytes.fromhex(j["data"]), np.uint8).reshape(
             1024, 1024, 3
         )
         result_list.append(generated_img)
-        generated_img = Image.fromarray(generated_img)
+        generated_img=Image.fromarray(generated_img)
 
         # Dump compliance images
         if dump_compliance_images and idx in compliance_images_idx_list:
@@ -185,16 +187,16 @@ def compute_accuracy(
 
         # generated_img = torch.Tensor(generated_img).to(torch.uint8).to(device)
         # Load Ground Truth
-        caption = df_captions.iloc[idx]["caption"]
+        caption=df_captions.iloc[idx]["caption"]
         clip_scores.append(
             100 *
             clip.get_clip_score(
                 caption,
                 generated_img).item())
-    fid_score = compute_fid(result_list, statistics_path, device)
+    fid_score=compute_fid(result_list, statistics_path, device)
 
-    result_dict["FID_SCORE"] = fid_score
-    result_dict["CLIP_SCORE"] = np.mean(clip_scores)
+    result_dict["FID_SCORE"]=fid_score
+    result_dict["CLIP_SCORE"]=np.mean(clip_scores)
     print(f"Accuracy Results: {result_dict}")
 
     with open(output_file, "w") as fp:
@@ -216,43 +218,43 @@ def compute_accuracy_low_memory(
 ):
     if num_workers is None:
         try:
-            num_cpus = len(os.sched_getaffinity(0))
+            num_cpus=len(os.sched_getaffinity(0))
         except AttributeError:
             # os.sched_getaffinity is not available under Windows, use
             # os.cpu_count instead (which may not return the *available* number
             # of CPUs).
-            num_cpus = os.cpu_count()
+            num_cpus=os.cpu_count()
 
-        num_workers = min(num_cpus, 8) if num_cpus is not None else 0
+        num_workers=min(num_cpus, 8) if num_cpus is not None else 0
     else:
-        num_workers = num_workers
+        num_workers=num_workers
 
     # Load torchmetrics modules
-    block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[inception_dims]
-    inception_model = InceptionV3([block_idx]).to(device)
-    clip_model = CLIPEncoder(device=device)
+    block_idx=InceptionV3.BLOCK_INDEX_BY_DIM[inception_dims]
+    inception_model=InceptionV3([block_idx]).to(device)
+    clip_model=CLIPEncoder(device=device)
 
-    clip_scores = []
-    seen = set()
-    result_batch = []
-    result_dict = {}
-    activations = np.empty((0, inception_dims))
+    clip_scores=[]
+    seen=set()
+    result_batch=[]
+    result_dict={}
+    activations=np.empty((0, inception_dims))
 
     # Load model outputs
     with open(mlperf_accuracy_file, "r") as f:
-        results = ijson.items(f, "item")
+        results=ijson.items(f, "item")
 
         for j in tqdm(results):
-            idx = j["qsl_idx"]
+            idx=j["qsl_idx"]
             if idx in seen:
                 continue
             seen.add(idx)
 
             # Load generated image
-            generated_img = np.frombuffer(bytes.fromhex(j["data"]), np.uint8).reshape(
+            generated_img=np.frombuffer(bytes.fromhex(j["data"]), np.uint8).reshape(
                 1024, 1024, 3
             )
-            generated_img = Image.fromarray(generated_img)
+            generated_img=Image.fromarray(generated_img)
 
             # Dump compliance images
             if dump_compliance_images and idx in compliance_images_idx_list:
@@ -262,7 +264,7 @@ def compute_accuracy_low_memory(
                         f"{idx}.png"))
 
             # Load Ground Truth
-            caption = df_captions.iloc[idx]["caption"]
+            caption=df_captions.iloc[idx]["caption"]
             clip_scores.append(
                 100 * clip_model.get_clip_score(caption, generated_img).item()
             )
@@ -270,7 +272,7 @@ def compute_accuracy_low_memory(
             result_batch.append(generated_img.convert("RGB"))
 
             if len(result_batch) == batch_size:
-                act = get_activations(
+                act=get_activations(
                     result_batch,
                     inception_model,
                     batch_size,
@@ -278,12 +280,12 @@ def compute_accuracy_low_memory(
                     device,
                     num_workers,
                 )
-                activations = np.append(activations, act, axis=0)
+                activations=np.append(activations, act, axis=0)
                 result_batch.clear()
 
         # Remaining data for last batch
         if len(result_batch) > 0:
-            act = get_activations(
+            act=get_activations(
                 result_batch,
                 inception_model,
                 len(result_batch),
@@ -291,9 +293,9 @@ def compute_accuracy_low_memory(
                 device,
                 num_workers,
             )
-            activations = np.append(activations, act, axis=0)
+            activations=np.append(activations, act, axis=0)
 
-    m1, s1 = compute_statistics_of_path(
+    m1, s1=compute_statistics_of_path(
         statistics_path,
         inception_model,
         batch_size,
@@ -304,13 +306,13 @@ def compute_accuracy_low_memory(
         None,
     )
 
-    m2 = np.mean(activations, axis=0)
-    s2 = np.cov(activations, rowvar=False)
+    m2=np.mean(activations, axis=0)
+    s2=np.cov(activations, rowvar=False)
 
-    fid_score = calculate_frechet_distance(m1, s1, m2, s2)
+    fid_score=calculate_frechet_distance(m1, s1, m2, s2)
 
-    result_dict["FID_SCORE"] = fid_score
-    result_dict["CLIP_SCORE"] = np.mean(clip_scores)
+    result_dict["FID_SCORE"]=fid_score
+    result_dict["CLIP_SCORE"]=np.mean(clip_scores)
     print(f"Accuracy Results: {result_dict}")
 
     with open(output_file, "w") as fp:

From 69ffdc0aa783f9127af612a7de57c6329703c1dc Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Thu, 7 Nov 2024 20:19:13 +0000
Subject: [PATCH 008/112] Update accuracy_coco.py

---
 text_to_image/tools/accuracy_coco.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/text_to_image/tools/accuracy_coco.py b/text_to_image/tools/accuracy_coco.py
index bc3f87d04..0d0c01560 100644
--- a/text_to_image/tools/accuracy_coco.py
+++ b/text_to_image/tools/accuracy_coco.py
@@ -105,7 +105,7 @@ def main():
         sample_ids_file_path = args.ids_path if args.ids_path else os.path.join(
             os.path.dirname(__file__), "sample_ids.txt")
         with open(
-            os.path.join(sample_ids_file_path, "r"
+            os.path.join(sample_ids_file_path, "r")
         ) as compliance_id_file:
             for line in compliance_id_file:
                 idx=int(line.strip())

From d1d642e06f91e5b8f56088f8d1a4b127a65d962c Mon Sep 17 00:00:00 2001
From: arjunsuresh <arjunsuresh@users.noreply.github.com>
Date: Thu, 7 Nov 2024 20:19:47 +0000
Subject: [PATCH 009/112] [Automated Commit] Format Codebase

---
 text_to_image/tools/accuracy_coco.py | 82 ++++++++++++++--------------
 1 file changed, 41 insertions(+), 41 deletions(-)

diff --git a/text_to_image/tools/accuracy_coco.py b/text_to_image/tools/accuracy_coco.py
index 0d0c01560..d73325897 100644
--- a/text_to_image/tools/accuracy_coco.py
+++ b/text_to_image/tools/accuracy_coco.py
@@ -108,7 +108,7 @@ def main():
             os.path.join(sample_ids_file_path, "r")
         ) as compliance_id_file:
             for line in compliance_id_file:
-                idx=int(line.strip())
+                idx = int(line.strip())
                 compliance_images_idx_list.append(idx)
         # Dump caption.txt
         with open(
@@ -155,28 +155,28 @@ def compute_accuracy(
     statistics_path,
 ):
     # Load torchmetrics modules
-    clip=CLIPEncoder(device=device)
-    clip_scores=[]
-    seen=set()
-    result_list=[]
-    result_dict={}
+    clip = CLIPEncoder(device=device)
+    clip_scores = []
+    seen = set()
+    result_list = []
+    result_dict = {}
 
     # Load model outputs
     with open(mlperf_accuracy_file, "r") as f:
-        results=json.load(f)
+        results = json.load(f)
 
     for j in tqdm(results):
-        idx=j["qsl_idx"]
+        idx = j["qsl_idx"]
         if idx in seen:
             continue
         seen.add(idx)
 
         # Load generated image
-        generated_img=np.frombuffer(bytes.fromhex(j["data"]), np.uint8).reshape(
+        generated_img = np.frombuffer(bytes.fromhex(j["data"]), np.uint8).reshape(
             1024, 1024, 3
         )
         result_list.append(generated_img)
-        generated_img=Image.fromarray(generated_img)
+        generated_img = Image.fromarray(generated_img)
 
         # Dump compliance images
         if dump_compliance_images and idx in compliance_images_idx_list:
@@ -187,16 +187,16 @@ def compute_accuracy(
 
         # generated_img = torch.Tensor(generated_img).to(torch.uint8).to(device)
         # Load Ground Truth
-        caption=df_captions.iloc[idx]["caption"]
+        caption = df_captions.iloc[idx]["caption"]
         clip_scores.append(
             100 *
             clip.get_clip_score(
                 caption,
                 generated_img).item())
-    fid_score=compute_fid(result_list, statistics_path, device)
+    fid_score = compute_fid(result_list, statistics_path, device)
 
-    result_dict["FID_SCORE"]=fid_score
-    result_dict["CLIP_SCORE"]=np.mean(clip_scores)
+    result_dict["FID_SCORE"] = fid_score
+    result_dict["CLIP_SCORE"] = np.mean(clip_scores)
     print(f"Accuracy Results: {result_dict}")
 
     with open(output_file, "w") as fp:
@@ -218,43 +218,43 @@ def compute_accuracy_low_memory(
 ):
     if num_workers is None:
         try:
-            num_cpus=len(os.sched_getaffinity(0))
+            num_cpus = len(os.sched_getaffinity(0))
         except AttributeError:
             # os.sched_getaffinity is not available under Windows, use
             # os.cpu_count instead (which may not return the *available* number
             # of CPUs).
-            num_cpus=os.cpu_count()
+            num_cpus = os.cpu_count()
 
-        num_workers=min(num_cpus, 8) if num_cpus is not None else 0
+        num_workers = min(num_cpus, 8) if num_cpus is not None else 0
     else:
-        num_workers=num_workers
+        num_workers = num_workers
 
     # Load torchmetrics modules
-    block_idx=InceptionV3.BLOCK_INDEX_BY_DIM[inception_dims]
-    inception_model=InceptionV3([block_idx]).to(device)
-    clip_model=CLIPEncoder(device=device)
+    block_idx = InceptionV3.BLOCK_INDEX_BY_DIM[inception_dims]
+    inception_model = InceptionV3([block_idx]).to(device)
+    clip_model = CLIPEncoder(device=device)
 
-    clip_scores=[]
-    seen=set()
-    result_batch=[]
-    result_dict={}
-    activations=np.empty((0, inception_dims))
+    clip_scores = []
+    seen = set()
+    result_batch = []
+    result_dict = {}
+    activations = np.empty((0, inception_dims))
 
     # Load model outputs
     with open(mlperf_accuracy_file, "r") as f:
-        results=ijson.items(f, "item")
+        results = ijson.items(f, "item")
 
         for j in tqdm(results):
-            idx=j["qsl_idx"]
+            idx = j["qsl_idx"]
             if idx in seen:
                 continue
             seen.add(idx)
 
             # Load generated image
-            generated_img=np.frombuffer(bytes.fromhex(j["data"]), np.uint8).reshape(
+            generated_img = np.frombuffer(bytes.fromhex(j["data"]), np.uint8).reshape(
                 1024, 1024, 3
             )
-            generated_img=Image.fromarray(generated_img)
+            generated_img = Image.fromarray(generated_img)
 
             # Dump compliance images
             if dump_compliance_images and idx in compliance_images_idx_list:
@@ -264,7 +264,7 @@ def compute_accuracy_low_memory(
                         f"{idx}.png"))
 
             # Load Ground Truth
-            caption=df_captions.iloc[idx]["caption"]
+            caption = df_captions.iloc[idx]["caption"]
             clip_scores.append(
                 100 * clip_model.get_clip_score(caption, generated_img).item()
             )
@@ -272,7 +272,7 @@ def compute_accuracy_low_memory(
             result_batch.append(generated_img.convert("RGB"))
 
             if len(result_batch) == batch_size:
-                act=get_activations(
+                act = get_activations(
                     result_batch,
                     inception_model,
                     batch_size,
@@ -280,12 +280,12 @@ def compute_accuracy_low_memory(
                     device,
                     num_workers,
                 )
-                activations=np.append(activations, act, axis=0)
+                activations = np.append(activations, act, axis=0)
                 result_batch.clear()
 
         # Remaining data for last batch
         if len(result_batch) > 0:
-            act=get_activations(
+            act = get_activations(
                 result_batch,
                 inception_model,
                 len(result_batch),
@@ -293,9 +293,9 @@ def compute_accuracy_low_memory(
                 device,
                 num_workers,
             )
-            activations=np.append(activations, act, axis=0)
+            activations = np.append(activations, act, axis=0)
 
-    m1, s1=compute_statistics_of_path(
+    m1, s1 = compute_statistics_of_path(
         statistics_path,
         inception_model,
         batch_size,
@@ -306,13 +306,13 @@ def compute_accuracy_low_memory(
         None,
     )
 
-    m2=np.mean(activations, axis=0)
-    s2=np.cov(activations, rowvar=False)
+    m2 = np.mean(activations, axis=0)
+    s2 = np.cov(activations, rowvar=False)
 
-    fid_score=calculate_frechet_distance(m1, s1, m2, s2)
+    fid_score = calculate_frechet_distance(m1, s1, m2, s2)
 
-    result_dict["FID_SCORE"]=fid_score
-    result_dict["CLIP_SCORE"]=np.mean(clip_scores)
+    result_dict["FID_SCORE"] = fid_score
+    result_dict["CLIP_SCORE"] = np.mean(clip_scores)
     print(f"Accuracy Results: {result_dict}")
 
     with open(output_file, "w") as fp:

From 8d3b8ab09ac392b5a8656ad07d37fb8d7942595b Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Fri, 8 Nov 2024 03:44:42 +0530
Subject: [PATCH 010/112] Fix typo

---
 text_to_image/tools/accuracy_coco.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/text_to_image/tools/accuracy_coco.py b/text_to_image/tools/accuracy_coco.py
index d73325897..42ef8efe3 100644
--- a/text_to_image/tools/accuracy_coco.py
+++ b/text_to_image/tools/accuracy_coco.py
@@ -105,7 +105,7 @@ def main():
         sample_ids_file_path = args.ids_path if args.ids_path else os.path.join(
             os.path.dirname(__file__), "sample_ids.txt")
         with open(
-            os.path.join(sample_ids_file_path, "r")
+            sample_ids_file_path, "r"
         ) as compliance_id_file:
             for line in compliance_id_file:
                 idx = int(line.strip())

From b09b1efef4e5225d33618432cf71550ac135f501 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Fri, 8 Nov 2024 15:47:17 +0530
Subject: [PATCH 011/112] Not use default for sample_ids.txt

---
 text_to_image/tools/accuracy_coco.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/text_to_image/tools/accuracy_coco.py b/text_to_image/tools/accuracy_coco.py
index 42ef8efe3..b5f1be378 100644
--- a/text_to_image/tools/accuracy_coco.py
+++ b/text_to_image/tools/accuracy_coco.py
@@ -54,7 +54,7 @@ def get_args():
     # Do not use for official MLPerf inference submissions as only the default
     # one is valid
     parser.add_argument(
-        "--ids-path", help="Path to 10 caption ids to dump as compliance images", default="os.path.join(os.path.dirname(__file__), 'sample_ids.txt')"
+        "--ids-path", help="Path to 10 caption ids to dump as compliance images"
     )
     parser.add_argument("--device", default="cpu", choices=["gpu", "cpu"])
     parser.add_argument(

From df5049d4dbec41862fef6dd7edf9fb064a779bd6 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Thu, 14 Nov 2024 23:12:35 +0000
Subject: [PATCH 012/112] Update requirements.txt (#1907)

Updating the pip packages
---
 text_to_image/requirements.txt | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/text_to_image/requirements.txt b/text_to_image/requirements.txt
index a0a850773..857de950e 100644
--- a/text_to_image/requirements.txt
+++ b/text_to_image/requirements.txt
@@ -1,8 +1,8 @@
-diffusers==0.21.2
-transformers==4.33.2
-accelerate==0.23.0
-open-clip-torch==2.7.0
-opencv-python==4.8.1.78
+diffusers==0.30.3
+transformers==4.45.2
+accelerate==1.0.1
+open-clip-torch==2.26.1
+opencv-python==4.10.0.84
 pycocotools==2.0.7
-torchmetrics[image]==1.2.0
-scipy==1.9.1
+torchmetrics[image]==1.4.3
+scipy==1.10.1

From a7e8c8ad2766e3fb64a31eb42c8cde724f7b055d Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Fri, 15 Nov 2024 16:46:53 +0530
Subject: [PATCH 013/112] Fix preprocess_sudbmission for a bug

---
 tools/submission/preprocess_submission.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/submission/preprocess_submission.py b/tools/submission/preprocess_submission.py
index a1678c79d..ec3aa1f7a 100644
--- a/tools/submission/preprocess_submission.py
+++ b/tools/submission/preprocess_submission.py
@@ -84,7 +84,6 @@ def delete_empty_dirs(src):
     """
     if not os.path.isdir(src):
         return False
-
     if all([delete_empty_dirs(os.path.join(src, file))
            for file in os.listdir(src)]):
         log.info("Removing empty dir: (%s)", src)
@@ -532,13 +531,16 @@ def main():
     if not args.nodelete_empty_dirs:
         delete_empty_dirs(os.path.join(src_dir))
 
+    run_dir = os.getcwd()
     os.chdir(src_dir)
 
     infer_scenario_results(args, config)
+    os.chdir(run_dir)
 
     if not args.nodelete_empty_dirs:
         delete_empty_dirs(os.path.join(src_dir))
 
+    
     return 0
 
 

From 8915a90ea0fed700afbffbc75908cd2fbf103104 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Sat, 16 Nov 2024 22:04:18 +0000
Subject: [PATCH 014/112] Update submission_checker.py | Removed TEST05

---
 tools/submission/submission_checker.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tools/submission/submission_checker.py b/tools/submission/submission_checker.py
index 5f2e27267..deff9eb8c 100755
--- a/tools/submission/submission_checker.py
+++ b/tools/submission/submission_checker.py
@@ -188,6 +188,7 @@
             "sample_index_rng_seed": 198141574272810017,
             "schedule_rng_seed": 7575108116881280410,
         },
+        # not required for v5.0+
         "test05_seeds": {
             # TODO: Update random seeds
             "qsl_rng_seed": 2376919268182438552,
@@ -2880,7 +2881,7 @@ def check_compliance_dir(
     compliance_perf_pass = True
     compliance_perf_dir_pass = True
     compliance_acc_pass = True
-    test_list = ["TEST01", "TEST04", "TEST05"]
+    test_list = ["TEST01", "TEST04"]
 
     if model in [
         "bert-99",
@@ -2899,7 +2900,7 @@ def check_compliance_dir(
     ]:
         test_list.remove("TEST04")
 
-    if model in [
+    if config.version in ["v4.0", "v4.1"] and model not in [
         "gptj-99",
         "gptj-99.9",
         "llama2-70b-99",
@@ -2907,7 +2908,7 @@ def check_compliance_dir(
         "stable-diffusion-xl",
         "mixtral-8x7b",
     ]:
-        test_list.remove("TEST05")
+        test_list.append("TEST05")
 
     if model in [
         "gptj-99",

From da9e6bb95635d501f5f112dfc91c48f62465f5c0 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Fri, 22 Nov 2024 00:24:22 +0530
Subject: [PATCH 015/112] Fix to SDXL accuracy output

---
 text_to_image/tools/accuracy_coco.py | 4 ++--
 text_to_image/tools/fid/fid_score.py | 4 ++++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/text_to_image/tools/accuracy_coco.py b/text_to_image/tools/accuracy_coco.py
index b5f1be378..dcf7ed889 100644
--- a/text_to_image/tools/accuracy_coco.py
+++ b/text_to_image/tools/accuracy_coco.py
@@ -195,8 +195,8 @@ def compute_accuracy(
                 generated_img).item())
     fid_score = compute_fid(result_list, statistics_path, device)
 
-    result_dict["FID_SCORE"] = fid_score
-    result_dict["CLIP_SCORE"] = np.mean(clip_scores)
+    result_dict["FID_SCORE"] = f"{fid_score}"
+    result_dict["CLIP_SCORE"] = f"{np.mean(clip_scores)}"
     print(f"Accuracy Results: {result_dict}")
 
     with open(output_file, "w") as fp:
diff --git a/text_to_image/tools/fid/fid_score.py b/text_to_image/tools/fid/fid_score.py
index 8e486c8b7..e4a90c83f 100644
--- a/text_to_image/tools/fid/fid_score.py
+++ b/text_to_image/tools/fid/fid_score.py
@@ -356,6 +356,10 @@ def compute_fid(
         imgs, model, batch_size, dims, device, num_workers
     )
 
+    # Ensure dimensions match before calculating FID
+    assert s1.shape == s2.shape, f"Covariance shapes mismatch: {s1.shape} vs {s2.shape}"
+    assert m1.shape == m2.shape, f"Mean shapes mismatch: {m1.shape} vs {m2.shape}"
+
     fid_value = calculate_frechet_distance(m1, s1, m2, s2)
 
     return fid_value

From 13cd2e0abc98a36601e125f60dc96bc13738dc62 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Fri, 22 Nov 2024 02:06:53 +0530
Subject: [PATCH 016/112] Added exists checks for rmtree in
 preprocess_submission script

---
 tools/submission/preprocess_submission.py | 55 +++++++++++++++--------
 1 file changed, 36 insertions(+), 19 deletions(-)

diff --git a/tools/submission/preprocess_submission.py b/tools/submission/preprocess_submission.py
index a737c1f16..9587752cc 100644
--- a/tools/submission/preprocess_submission.py
+++ b/tools/submission/preprocess_submission.py
@@ -136,7 +136,6 @@ def change_folder_name_in_path(path, old_folder_name, new_folder_name):
     new_path = os.path.join(*path_parts)
     return new_path
 
-
 def clean_model_dir(model_results_dir):
     model_measurements_dir = change_folder_name_in_path(
         model_results_dir, "results", "measurements")
@@ -144,18 +143,25 @@ def clean_model_dir(model_results_dir):
         model_results_dir, "results", "compliance")
 
     print(f"rmtree {model_results_dir}")
-    shutil.rmtree(model_results_dir)
-    shutil.rmtree(model_measurements_dir)
-    shutil.rmtree(model_compliance_dir)
+    if os.path.exists(model_results_dir):
+        shutil.rmtree(model_results_dir)
+    if os.path.exists(model_measurements_dir):
+        shutil.rmtree(model_measurements_dir)
+    if os.path.exists(model_compliance_dir):
+        shutil.rmtree(model_compliance_dir)
+
     sut_results_dir = os.path.dirname(model_results_dir)
-    if not os.listdir(sut_results_dir):
+    if os.path.exists(sut_results_dir) and not os.listdir(sut_results_dir):
         # clean sut dir
         sut = os.path.basename(sut_results_dir)
         log.info(
             f"No benchmark results remaining for {sut}. rmtree {sut_results_dir}")
-        shutil.rmtree(sut_results_dir)
-        shutil.rmtree(os.path.dirname(model_measurements_dir))
-        shutil.rmtree(os.path.dirname(model_compliance_dir))
+        if os.path.exists(sut_results_dir):
+            shutil.rmtree(sut_results_dir)
+        if os.path.exists(os.path.dirname(model_measurements_dir)):
+            shutil.rmtree(os.path.dirname(model_measurements_dir))
+        if os.path.exists(os.path.dirname(model_compliance_dir)):
+            shutil.rmtree(os.path.dirname(model_compliance_dir))
 
 
 def clean_invalid_results(args, log_path, config, system_desc, system_json,
@@ -200,7 +206,7 @@ def clean_invalid_results(args, log_path, config, system_desc, system_json,
                 ranging_path = os.path.join(
                     scenario_path, "performance", "ranging")
                 try:
-                    ranging_r = get_performance_metric(
+                    ranging_r = checker.get_performance_metric(
                         config,
                         mlperf_model,
                         ranging_path,
@@ -224,9 +230,13 @@ def clean_invalid_results(args, log_path, config, system_desc, system_json,
                 if not power_is_valid:
                     log.warning(
                         f"Power result is invalid for {system_desc}: {model} {scenario} scenario in {division} division. Removing...")
-                    shutil.rmtree(power_path)
-                    shutil.rmtree(ranging_path)
-                    shutil.rmtree(os.path.join(perf_path, "spl.txt"))
+                    if os.path.exists(power_path):
+                        shutil.rmtree(power_path)
+                    if os.path.exists(ranging_path):
+                        shutil.rmtree(ranging_path)
+                    spl_path = os.path.join(perf_path, "spl.txt")
+                    if os.path.exists(spl_path):
+                        os.remove(spl_path)
 
             compliance_is_valid = True
             if is_closed_or_network:
@@ -253,9 +263,12 @@ def clean_invalid_results(args, log_path, config, system_desc, system_json,
                     scenario_path, "results", "compliance")
                 log.warning(
                     f"{scenario} scenario result is invalid for {system_desc}: {model} in {division} division. Accuracy: {accuracy_is_valid}, Performance: {perf_is_valid}. Removing...")
-                shutil.rmtree(scenario_path)
-                shutil.rmtree(scenario_measurements_path)
-                shutil.rmtree(scenario_compliance_path)
+                if os.path.exists(scenario_path):
+                    shutil.rmtree(scenario_path)
+                if os.path.exists(scenario_measurements_path):
+                    shutil.rmtree(scenario_measurements_path)
+                if os.path.exists(scenario_compliance_path):
+                    shutil.rmtree(scenario_compliance_path)
             elif division in ["closed", "network"]:
                 model_results_path = os.path.dirname(scenario_path)
                 model_measurements_path = change_folder_name_in_path(
@@ -279,10 +292,12 @@ def clean_invalid_results(args, log_path, config, system_desc, system_json,
                     if not perf_is_valid:
                         log.warning(
                             f"{scenario} scenario result is invalid for {system_desc}: {model} in {division} and open divisions. Accuracy: {accuracy_is_valid}, Performance: {perf_is_valid}. Removing it...")
-                        shutil.rmtree(scenario_path)
+                        if os.path.exists(scenario_path):
+                            shutil.rmtree(scenario_path)
                         scenario_measurements_path = change_folder_name_in_path(
                             scenario_path, "results", "measurements")
-                        shutil.rmtree(scenario_measurements_path)
+                        if os.path.exists(scenario_measurements_path):
+                            shutil.rmtree(scenario_measurements_path)
                     if not os.path.exists(target_results_path):
                         shutil.copytree(
                             model_results_path, target_results_path)
@@ -311,9 +326,11 @@ def clean_invalid_results(args, log_path, config, system_desc, system_json,
                 clean_model_dir(model_results_path)
             else:  # delete this result
                 # delete other scenario results too
-                shutil.rmtree(scenario_path)
+                if os.path.exists(scenario_path):
+                    shutil.rmtree(scenario_path)
                 # delete other scenario results too
-                shutil.rmtree(scenario_measurements_path)
+                if os.path.exists(scenario_measurements_path):
+                    shutil.rmtree(scenario_measurements_path)
                 log.warning(
                     f"{scenario} scenario result is invalid for {system_desc}: {model} in {division} division. Accuracy: {accuracy_is_valid}, Performance: {perf_is_valid}. Removing it...")
 

From db5e7370119b96e97f988e4ad0016d8eab246bb4 Mon Sep 17 00:00:00 2001
From: arjunsuresh <arjunsuresh@users.noreply.github.com>
Date: Thu, 21 Nov 2024 20:37:27 +0000
Subject: [PATCH 017/112] [Automated Commit] Format Codebase

---
 tools/submission/preprocess_submission.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/submission/preprocess_submission.py b/tools/submission/preprocess_submission.py
index 9587752cc..977af4d47 100644
--- a/tools/submission/preprocess_submission.py
+++ b/tools/submission/preprocess_submission.py
@@ -136,6 +136,7 @@ def change_folder_name_in_path(path, old_folder_name, new_folder_name):
     new_path = os.path.join(*path_parts)
     return new_path
 
+
 def clean_model_dir(model_results_dir):
     model_measurements_dir = change_folder_name_in_path(
         model_results_dir, "results", "measurements")

From dbecd2bfde2905f44fcedc347039d010744ea744 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Thu, 21 Nov 2024 21:03:14 +0000
Subject: [PATCH 018/112] Delete .github/workflows/format.yml

---
 .github/workflows/format.yml | 50 ------------------------------------
 1 file changed, 50 deletions(-)
 delete mode 100644 .github/workflows/format.yml

diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml
deleted file mode 100644
index 829a3fcb7..000000000
--- a/.github/workflows/format.yml
+++ /dev/null
@@ -1,50 +0,0 @@
-# Automatic code formatting
-name: "format"
-on:
-  pull_request:
-    branches: [ master ]
-    types: [opened, closed, synchronize]
-
-
-env:
-  python_version: "3.9"
-  HEAD_REF: ${{ github.head_ref }}
-
-jobs:
-  format-code:
-    runs-on: ubuntu-latest
-    steps:
-      - uses: actions/checkout@v3
-        with:
-          fetch-depth: 0
-      - name: Set up Python ${{ env.python_version }}
-        uses: actions/setup-python@v3
-        with:
-          python-version: ${{ env.python_version }}
-
-      - name: Install dependencies
-        run: |
-          python3 -m pip install autopep8
-
-      - name: Grant permissions
-        run: |
-          chmod +x "${GITHUB_WORKSPACE}/.github/scripts/format-cpp.sh"
-          chmod +x "${GITHUB_WORKSPACE}/.github/scripts/format-py.sh"
-      
-      - name: Format Codebase
-        run: |
-          git remote add upstream ${{ github.event.pull_request.base.repo.clone_url }}
-          git fetch upstream ${{ github.event.pull_request.base.ref }}
-          ".github/scripts/format-cpp.sh" "upstream" "${{ github.event.pull_request.base.ref }}"
-          ".github/scripts/format-py.sh" "upstream" "${{ github.event.pull_request.base.ref }}"
-
-      - name: Commit
-        run: |
-          HAS_CHANGES=$(git diff --staged --name-only)
-          if [ ${#HAS_CHANGES} -gt 0 ]; then
-            git checkout -B "$HEAD_REF"
-            git config --global user.email "${{ github.actor }}@users.noreply.github.com"
-            git config --global user.name "${{ github.actor }}"
-            git commit -m '[Automated Commit] Format Codebase'
-            git push origin "$HEAD_REF"
-          fi

From 011a0d15273c0d7f52621aa638a91658341f4b49 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Thu, 21 Nov 2024 21:03:31 +0000
Subject: [PATCH 019/112] Delete .github/scripts directory

---
 .github/scripts/format-cpp.sh | 26 --------------------------
 .github/scripts/format-py.sh  | 26 --------------------------
 2 files changed, 52 deletions(-)
 delete mode 100755 .github/scripts/format-cpp.sh
 delete mode 100755 .github/scripts/format-py.sh

diff --git a/.github/scripts/format-cpp.sh b/.github/scripts/format-cpp.sh
deleted file mode 100755
index c0a237489..000000000
--- a/.github/scripts/format-cpp.sh
+++ /dev/null
@@ -1,26 +0,0 @@
-#================================================================
-# HEADER
-#================================================================
-# DESCRIPTION
-#    This is a script containing some commands to automatically 
-#    format the c/c++ code contained in one folder. 
-#    This will help to maintain good quality code in the github 
-#    repository. 
-# SET UP
-#    You need to allow the correct permissions for this file.
-#    This can be done by running:
-#    chmod u+x <path to file>
-# REQUIREMENTS
-#    clang-format
-#================================================================
-# END_OF_HEADER
-#================================================================
-
-
-# Checks all the modified c/c++ files, format them and adds them
-# to the commit.
-for FILE in $(git diff $1/$2 --name-only | grep -E '.*\.(cc|cpp|h|hpp)$')
-do
-    clang-format -i -style=file $FILE
-    git add $FILE
-done
diff --git a/.github/scripts/format-py.sh b/.github/scripts/format-py.sh
deleted file mode 100755
index ca3ca5671..000000000
--- a/.github/scripts/format-py.sh
+++ /dev/null
@@ -1,26 +0,0 @@
-#================================================================
-# HEADER
-#================================================================
-# DESCRIPTION
-#    This is a script containing some commands to automatically 
-#    format the c/c++ code contained in one folder. 
-#    This will help to maintain good quality code in the github 
-#    repository. 
-# SET UP
-#    You need to allow the correct permissions for this file.
-#    This can be done by running:
-#    chmod u+x <path to file>
-# REQUIREMENTS
-#    clang-format
-#================================================================
-# END_OF_HEADER
-#================================================================
-
-
-# Checks all the modified c/c++ files, format them and adds them
-# to the commit.
-for FILE in $(git diff $1/$2 --name-only | grep -E '.*\.py$')
-do
-    autopep8 --in-place -a $FILE
-    git add $FILE
-done

From d04ae63d8dfd85d61cdc3660c327e19551d771cd Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Fri, 22 Nov 2024 01:28:08 +0000
Subject: [PATCH 020/112] Update build_wheels.yml | Added src distribution

---
 .github/workflows/build_wheels.yml | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml
index e62f010df..9ade9a2b5 100644
--- a/.github/workflows/build_wheels.yml
+++ b/.github/workflows/build_wheels.yml
@@ -84,6 +84,11 @@ jobs:
       - name: Install requirements
         run: python -m pip install cibuildwheel twine
 
+      - name: Build src dist
+        if: ${{ matrix.os == 'ubuntu-latest' }}
+        run: |
+          python -m build --sdist --outdir wheels loadgen
+
       - name: Build wheels
         run: git pull && python -m cibuildwheel loadgen/ --output-dir wheels
 

From 1923b01c9a3dec2ee83efc0a2d8cf1d5d083ccc7 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Fri, 22 Nov 2024 01:28:28 +0000
Subject: [PATCH 021/112] Update VERSION.txt

---
 loadgen/VERSION.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loadgen/VERSION.txt b/loadgen/VERSION.txt
index 59fe035d9..2afe9065c 100644
--- a/loadgen/VERSION.txt
+++ b/loadgen/VERSION.txt
@@ -1 +1 @@
-4.1.23
+4.1.24

From dc1acd3ec167802cfd5fd36471af2ba9ccd2b346 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Fri, 22 Nov 2024 01:30:03 +0000
Subject: [PATCH 022/112] Update build_wheels.yml

---
 .github/workflows/build_wheels.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml
index 9ade9a2b5..445a6e606 100644
--- a/.github/workflows/build_wheels.yml
+++ b/.github/workflows/build_wheels.yml
@@ -82,7 +82,7 @@ jobs:
       - uses: actions/setup-python@v3
 
       - name: Install requirements
-        run: python -m pip install cibuildwheel twine
+        run: python -m pip install cibuildwheel twine build
 
       - name: Build src dist
         if: ${{ matrix.os == 'ubuntu-latest' }}

From dac78f559aeae7659d0d4f997672d178d56a4fb3 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Fri, 22 Nov 2024 01:31:15 +0000
Subject: [PATCH 023/112] Update VERSION.txt

---
 loadgen/VERSION.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loadgen/VERSION.txt b/loadgen/VERSION.txt
index 2afe9065c..76d70ac3d 100644
--- a/loadgen/VERSION.txt
+++ b/loadgen/VERSION.txt
@@ -1 +1 @@
-4.1.24
+4.1.25

From 5443e072dca4a5241faf2bdfaac78377be0333a5 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Fri, 22 Nov 2024 12:10:13 +0000
Subject: [PATCH 024/112] Update pyproject.toml

---
 loadgen/pyproject.toml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/loadgen/pyproject.toml b/loadgen/pyproject.toml
index 6f0ae06f0..b7a16848e 100755
--- a/loadgen/pyproject.toml
+++ b/loadgen/pyproject.toml
@@ -5,3 +5,6 @@ build-backend = "setuptools.build_meta:__legacy__"
 [tool.cibuildwheel]
 environment = "CFLAGS='-std=c++14'"
 build = "cp3{7,8,9,10,11,12,13}-*"
+
+[tool.setuptools]
+include-package-data = true

From cae5eb275efa390015a50eb3904ec81d6f2ab3b1 Mon Sep 17 00:00:00 2001
From: arjunsuresh <arjunsuresh@users.noreply.github.com>
Date: Fri, 22 Nov 2024 12:10:33 +0000
Subject: [PATCH 025/112] Increment version to 4.1.26

---
 loadgen/VERSION.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loadgen/VERSION.txt b/loadgen/VERSION.txt
index 76d70ac3d..f0c129e17 100644
--- a/loadgen/VERSION.txt
+++ b/loadgen/VERSION.txt
@@ -1 +1 @@
-4.1.25
+4.1.26

From 3b1031fe83d61bd3267b017a64a7eafd51ca7585 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Fri, 22 Nov 2024 12:12:17 +0000
Subject: [PATCH 026/112] Update MANIFEST.in

---
 loadgen/MANIFEST.in | 1 +
 1 file changed, 1 insertion(+)

diff --git a/loadgen/MANIFEST.in b/loadgen/MANIFEST.in
index 74282fcee..152b53111 100644
--- a/loadgen/MANIFEST.in
+++ b/loadgen/MANIFEST.in
@@ -1 +1,2 @@
 include VERSION.txt
+include mlperf.conf

From 15581e20aea79bbe4da7962f931828fc30852be7 Mon Sep 17 00:00:00 2001
From: arjunsuresh <arjunsuresh@users.noreply.github.com>
Date: Fri, 22 Nov 2024 12:12:28 +0000
Subject: [PATCH 027/112] Increment version to 4.1.27

---
 loadgen/VERSION.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loadgen/VERSION.txt b/loadgen/VERSION.txt
index f0c129e17..1c12ed8e6 100644
--- a/loadgen/VERSION.txt
+++ b/loadgen/VERSION.txt
@@ -1 +1 @@
-4.1.26
+4.1.27

From 0511f951a1471b5db94cb61595658784a5a7f01c Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Fri, 22 Nov 2024 12:18:02 +0000
Subject: [PATCH 028/112] Update pyproject.toml

---
 loadgen/pyproject.toml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/loadgen/pyproject.toml b/loadgen/pyproject.toml
index b7a16848e..6f0ae06f0 100755
--- a/loadgen/pyproject.toml
+++ b/loadgen/pyproject.toml
@@ -5,6 +5,3 @@ build-backend = "setuptools.build_meta:__legacy__"
 [tool.cibuildwheel]
 environment = "CFLAGS='-std=c++14'"
 build = "cp3{7,8,9,10,11,12,13}-*"
-
-[tool.setuptools]
-include-package-data = true

From 03911ac7150e7c356b0cf49ab16aa1c4214294b4 Mon Sep 17 00:00:00 2001
From: arjunsuresh <arjunsuresh@users.noreply.github.com>
Date: Fri, 22 Nov 2024 12:18:19 +0000
Subject: [PATCH 029/112] Increment version to 4.1.28

---
 loadgen/VERSION.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loadgen/VERSION.txt b/loadgen/VERSION.txt
index 1c12ed8e6..d6d90e831 100644
--- a/loadgen/VERSION.txt
+++ b/loadgen/VERSION.txt
@@ -1 +1 @@
-4.1.27
+4.1.28

From 2418e8128f7a454f4b60de76a2f6895007dc8fea Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Fri, 22 Nov 2024 13:16:39 +0000
Subject: [PATCH 030/112] Update build_wheels.yml

---
 .github/workflows/build_wheels.yml | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml
index 445a6e606..6f67f56de 100644
--- a/.github/workflows/build_wheels.yml
+++ b/.github/workflows/build_wheels.yml
@@ -74,7 +74,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        os: [ubuntu-latest, windows-latest, macos-latest]
+        os: [ubuntu-latest, windows-latest, macos-latest, macos-13]
 
     steps:
       - uses: actions/checkout@v3
@@ -115,12 +115,18 @@ jobs:
         with:
           name: wheels-ubuntu-latest
           path: wheels
-      # Download the built wheels from macOS
-      - name: Download macOS wheels
+      # Download the built wheels from macOS-latest
+      - name: Download macOS-latest wheels
         uses: actions/download-artifact@v4
         with:
           name: wheels-macos-latest
           path: wheels
+      # Download the built wheels from macOS-13 (x86)
+      - name: Download macOS-13 (x86) wheels
+        uses: actions/download-artifact@v4
+        with:
+          name: wheels-macos-13
+          path: wheels
       # Download the built wheels from Windows
       - name: Download Windows wheels
         uses: actions/download-artifact@v4

From 6e1bcfd74f4c70dfae7a899d963e644625208653 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Fri, 22 Nov 2024 13:50:19 +0000
Subject: [PATCH 031/112] Update VERSION.txt

---
 loadgen/VERSION.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loadgen/VERSION.txt b/loadgen/VERSION.txt
index d6d90e831..e7c468635 100644
--- a/loadgen/VERSION.txt
+++ b/loadgen/VERSION.txt
@@ -1 +1 @@
-4.1.28
+4.1.29

From 62949d0b2ee5eb710f165bf4dad4efa2124ed845 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Sun, 24 Nov 2024 11:07:34 +0000
Subject: [PATCH 032/112] Update accuracy_coco.py

---
 text_to_image/tools/accuracy_coco.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/text_to_image/tools/accuracy_coco.py b/text_to_image/tools/accuracy_coco.py
index dcf7ed889..0fedd278b 100644
--- a/text_to_image/tools/accuracy_coco.py
+++ b/text_to_image/tools/accuracy_coco.py
@@ -311,8 +311,8 @@ def compute_accuracy_low_memory(
 
     fid_score = calculate_frechet_distance(m1, s1, m2, s2)
 
-    result_dict["FID_SCORE"] = fid_score
-    result_dict["CLIP_SCORE"] = np.mean(clip_scores)
+    result_dict["FID_SCORE"] = f"{fid_score}"
+    result_dict["CLIP_SCORE"] = f"{np.mean(clip_scores)}"
     print(f"Accuracy Results: {result_dict}")
 
     with open(output_file, "w") as fp:

From 5314c5cb7b4294114d064764bf5faa3ceab171af Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Sat, 30 Nov 2024 15:07:13 +0530
Subject: [PATCH 033/112] Making sdxl run thread safe

---
 text_to_image/backend_pytorch.py | 23 +++++++++++++----------
 text_to_image/main.py            | 28 +++++++++++++++-------------
 2 files changed, 28 insertions(+), 23 deletions(-)

diff --git a/text_to_image/backend_pytorch.py b/text_to_image/backend_pytorch.py
index 36e2b8009..027e15565 100644
--- a/text_to_image/backend_pytorch.py
+++ b/text_to_image/backend_pytorch.py
@@ -5,6 +5,7 @@
 import backend
 from diffusers import StableDiffusionXLPipeline
 from diffusers import EulerDiscreteScheduler
+import threading
 
 logging.basicConfig(level=logging.INFO)
 log = logging.getLogger("backend-pytorch")
@@ -24,6 +25,7 @@ def __init__(
     ):
         super(BackendPytorch, self).__init__()
         self.model_path = model_path
+        self.lock = threading.Lock()
         if model_id == "xl":
             self.model_id = "stabilityai/stable-diffusion-xl-base-1.0"
         else:
@@ -385,15 +387,16 @@ def predict(self, inputs):
                     pooled_prompt_embeds,
                     negative_pooled_prompt_embeds,
                 ) = self.prepare_inputs(inputs, i)
-                generated = self.pipe(
-                    prompt_embeds=prompt_embeds,
-                    negative_prompt_embeds=negative_prompt_embeds,
-                    pooled_prompt_embeds=pooled_prompt_embeds,
-                    negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
-                    guidance_scale=self.guidance,
-                    num_inference_steps=self.steps,
-                    output_type="pt",
-                    latents=latents_input,
-                ).images
+                with lock:
+                    generated = self.pipe(
+                        prompt_embeds=prompt_embeds,
+                        negative_prompt_embeds=negative_prompt_embeds,
+                        pooled_prompt_embeds=pooled_prompt_embeds,
+                        negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
+                        guidance_scale=self.guidance,
+                        num_inference_steps=self.steps,
+                        output_type="pt",
+                        latents=latents_input,
+                    ).images
                 images.extend(generated)
         return images
diff --git a/text_to_image/main.py b/text_to_image/main.py
index 6aa7c15e7..b8c98dc9c 100644
--- a/text_to_image/main.py
+++ b/text_to_image/main.py
@@ -250,6 +250,7 @@ def run_one_item(self, qitem: Item):
             log.error("thread: failed on contentid=%s, %s", src, ex)
             # since post_process will not run, fake empty responses
             processed_results = [[]] * len(qitem.query_id)
+            raise
         finally:
             response_array_refs = []
             response = []
@@ -402,19 +403,20 @@ def main():
     #
     count = ds.get_item_count()
 
-    # warmup
-    syntetic_str = "Lorem ipsum dolor sit amet, consectetur adipiscing elit"
-    latents_pt = torch.rand(ds.latents.shape, dtype=dtype).to(args.device)
-    warmup_samples = [
-        {
-            "input_tokens": ds.preprocess(syntetic_str, model.pipe.tokenizer),
-            "input_tokens_2": ds.preprocess(syntetic_str, model.pipe.tokenizer_2),
-            "latents": latents_pt,
-        }
-        for _ in range(args.max_batchsize)
-    ]
-    for i in range(5):
-        _ = backend.predict(warmup_samples)
+    if os.environ.get('FORCE_NO_WARMUP', '').lower() not in [ "yes", "true", "1" ]:
+        # warmup
+        syntetic_str = "Lorem ipsum dolor sit amet, consectetur adipiscing elit"
+        latents_pt = torch.rand(ds.latents.shape, dtype=dtype).to(args.device)
+        warmup_samples = [
+            {
+                "input_tokens": ds.preprocess(syntetic_str, model.pipe.tokenizer),
+                "input_tokens_2": ds.preprocess(syntetic_str, model.pipe.tokenizer_2),
+                "latents": latents_pt,
+            }
+            for _ in range(args.max_batchsize)
+        ]
+        for i in range(5):
+            _ = backend.predict(warmup_samples)
 
     scenario = SCENARIO_MAP[args.scenario]
     runner_map = {

From 509f555b7509defe94a1872cacf6181d23359d35 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Wed, 4 Dec 2024 14:48:25 +0000
Subject: [PATCH 034/112] Create format.yml | Run format on push instead of PR

---
 .github/workflows/format.yml | 60 ++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)
 create mode 100644 .github/workflows/format.yml

diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml
new file mode 100644
index 000000000..45ebb521b
--- /dev/null
+++ b/.github/workflows/format.yml
@@ -0,0 +1,60 @@
+# Automatic code formatting
+name: "Code formatting"
+on:
+  push:
+    branches:
+    - "**"
+
+env:
+  python_version: "3.9"
+
+jobs:
+  format-code:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+          ssh-key: ${{ secrets.DEPLOY_KEY }}
+      - name: Set up Python ${{ env.python_version }}
+        uses: actions/setup-python@v3
+        with:
+          python-version: ${{ env.python_version }}
+
+      - name: Format modified python files
+        env:
+          filter: ${{ github.event.before }}
+        run: |
+          python3 -m pip install autopep8
+          for FILE in $(git diff --name-only $filter | grep -E '.*\.py$')
+          do
+            # Check if the file still exists in the working tree
+            if [ -f "$FILE" ]; then
+              autopep8 --in-place -a "$FILE"
+              git add "$FILE"
+            fi
+          done
+
+      - name: Format modified C++ files
+        env:
+          filter: ${{ github.event.before }}
+        run: |
+          for FILE in $(git diff --name-only $filter | grep -E '.*\.(cc|cpp|h|hpp)$')
+          do
+            # Check if the file still exists in the working tree
+            if [ -f "$FILE" ]; then
+              clang-format -i -style=file $FILE
+              git add $FILE
+            fi
+          done
+
+      - name: Commit and push changes
+        run: |
+          HAS_CHANGES=$(git diff --staged --name-only)
+          if [ ${#HAS_CHANGES} -gt 0 ]; then
+            git config --global user.name mlcommons-bot
+            git config --global user.email "mlcommons-bot@users.noreply.github.com"
+            # Commit changes
+            git commit -m '[Automated Commit] Format Codebase'
+            git push
+          fi 

From 17dc9aabb277ced22231d4c5245ec8f2a92d2269 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Sun, 8 Dec 2024 20:47:58 +0000
Subject: [PATCH 035/112] Update backend_pytorch.py | Fix lock usage

---
 text_to_image/backend_pytorch.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/text_to_image/backend_pytorch.py b/text_to_image/backend_pytorch.py
index 027e15565..8e52e0a61 100644
--- a/text_to_image/backend_pytorch.py
+++ b/text_to_image/backend_pytorch.py
@@ -387,7 +387,7 @@ def predict(self, inputs):
                     pooled_prompt_embeds,
                     negative_pooled_prompt_embeds,
                 ) = self.prepare_inputs(inputs, i)
-                with lock:
+                with self.lock:
                     generated = self.pipe(
                         prompt_embeds=prompt_embeds,
                         negative_prompt_embeds=negative_prompt_embeds,

From dc7be47856b091c55fd56706265fa45a21f620ee Mon Sep 17 00:00:00 2001
From: Zhihan Jiang <68881590+nvzhihanj@users.noreply.github.com>
Date: Mon, 9 Dec 2024 11:32:32 -0800
Subject: [PATCH 036/112] Upgrade loadgen version to 5.0 (#1962)

---
 loadgen/VERSION.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loadgen/VERSION.txt b/loadgen/VERSION.txt
index d9b366bb7..0062ac971 100644
--- a/loadgen/VERSION.txt
+++ b/loadgen/VERSION.txt
@@ -1 +1 @@
-4.1.30
+5.0.0

From d64509d006c4d73a4d562a07966331dede07ae22 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Tue, 10 Dec 2024 04:35:20 +0530
Subject: [PATCH 037/112] Fix loadgen build for version numbers having "0"
 (#1967)

* Fix loadgen build for version numbers having "0"

* Update test-resnet50.yml

* Update test-retinanet.yml

* Update test-bert.yml
---
 .github/workflows/test-bert.yml      | 2 +-
 .github/workflows/test-resnet50.yml  | 2 +-
 .github/workflows/test-retinanet.yml | 2 +-
 loadgen/CMakeLists.txt               | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/test-bert.yml b/.github/workflows/test-bert.yml
index 2ce156c45..b7012a6e8 100755
--- a/.github/workflows/test-bert.yml
+++ b/.github/workflows/test-bert.yml
@@ -36,4 +36,4 @@ jobs:
         python3 -m pip install cm4mlops
     - name: Test BERT and end to end submission generation
       run: |
-        cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --quiet --submitter="MLCommons" --hw_name=default --model=bert-99 --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --adr.compiler.tags=gcc  --adr.inference-src.version=custom --adr.inference-src.tags=_repo.${{ github.event.pull_request.head.repo.html_url }},_branch.$PR_HEAD_REF --adr.loadgen.version=custom
+        cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --quiet --submitter="MLCommons" --hw_name=default --model=bert-99 --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --adr.compiler.tags=gcc  --adr.inference-src.version=custom --adr.inference-src.tags=_repo.${{ github.event.pull_request.head.repo.html_url }},_branch.$PR_HEAD_REF --adr.inference-src-loadgen.tags=_branch.$PR_HEAD_REF,_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src-loadgen.version=custom --adr.loadgen.version=custom
diff --git a/.github/workflows/test-resnet50.yml b/.github/workflows/test-resnet50.yml
index c942258b2..50404b0d9 100755
--- a/.github/workflows/test-resnet50.yml
+++ b/.github/workflows/test-resnet50.yml
@@ -38,4 +38,4 @@ jobs:
         python3 -m pip install cm4mlops
     - name: Test Resnet50 and end to end submission generation
       run: |
-        cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --quiet --submitter="MLCommons" --hw_name=default --model=resnet50 --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --adr.inference-src.tags=_branch.$PR_HEAD_REF,_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src.version=custom --adr.loadgen.version=custom ${{ matrix.loadgen-flag }}
+        cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --quiet --submitter="MLCommons" --hw_name=default --model=resnet50 --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --adr.inference-src.tags=_branch.$PR_HEAD_REF,_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src.version=custom --adr.inference-src-loadgen.tags=_branch.$PR_HEAD_REF,_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src-loadgen.version=custom --adr.loadgen.version=custom ${{ matrix.loadgen-flag }}
diff --git a/.github/workflows/test-retinanet.yml b/.github/workflows/test-retinanet.yml
index 648266626..5b18619de 100755
--- a/.github/workflows/test-retinanet.yml
+++ b/.github/workflows/test-retinanet.yml
@@ -36,4 +36,4 @@ jobs:
         python3 -m pip install cm4mlops
     - name: Test Retinanet and end to end submission generation
       run: |
-        cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --quiet --submitter="MLCommons" --hw_name=default --model=retinanet --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=10 --adr.compiler.tags=gcc  --adr.inference-src.version=custom --adr.inference-src.tags=_repo.${{ github.event.pull_request.head.repo.html_url }},_branch.$PR_HEAD_REF --adr.loadgen.version=custom
+        cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --quiet --submitter="MLCommons" --hw_name=default --model=retinanet --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=10 --adr.compiler.tags=gcc  --adr.inference-src.version=custom --adr.inference-src.tags=_repo.${{ github.event.pull_request.head.repo.html_url }},_branch.$PR_HEAD_REF --adr.inference-src-loadgen.tags=_branch.$PR_HEAD_REF,_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src-loadgen.version=custom --adr.loadgen.version=custom
diff --git a/loadgen/CMakeLists.txt b/loadgen/CMakeLists.txt
index 37270ac24..4fec0e44f 100644
--- a/loadgen/CMakeLists.txt
+++ b/loadgen/CMakeLists.txt
@@ -14,7 +14,7 @@ set(mlperf_loadgen_VERSION_MINOR "${CMAKE_MATCH_2}")
 set(mlperf_loadgen_VERSION_PATCH "${CMAKE_MATCH_3}")
 
 # Check if the version format was parsed correctly
-if(NOT mlperf_loadgen_VERSION_MAJOR OR NOT mlperf_loadgen_VERSION_MINOR OR NOT mlperf_loadgen_VERSION_PATCH)
+if(NOT DEFINED mlperf_loadgen_VERSION_MAJOR OR NOT DEFINED mlperf_loadgen_VERSION_MINOR OR NOT DEFINED mlperf_loadgen_VERSION_PATCH)
     message(FATAL_ERROR "Version format in VERSION.txt is incorrect. Expected format: MAJOR.MINOR.PATCH")
 endif()
 

From b149bec616610c95b64a72d4a1071d3c196aa1c3 Mon Sep 17 00:00:00 2001
From: pgmpablo157321 <pgmpablo157321@users.noreply.github.com>
Date: Mon, 9 Dec 2024 23:05:32 +0000
Subject: [PATCH 038/112] Increment version to 5.0.1

---
 loadgen/VERSION.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loadgen/VERSION.txt b/loadgen/VERSION.txt
index 0062ac971..6b244dcd6 100644
--- a/loadgen/VERSION.txt
+++ b/loadgen/VERSION.txt
@@ -1 +1 @@
-5.0.0
+5.0.1

From 0a9b2e4a40fd29a79f390c8505d4d34313e84fbb Mon Sep 17 00:00:00 2001
From: Zhihan Jiang <68881590+nvzhihanj@users.noreply.github.com>
Date: Mon, 9 Dec 2024 21:48:24 -0800
Subject: [PATCH 039/112] Fix Dockerfile for 405B (#1960)

Co-authored-by: Miro <mirhodak@amd.com>
---
 language/llama3-405b/Dockerfile       | 5 ++++-
 language/llama3-405b/requirements.txt | 4 ++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/language/llama3-405b/Dockerfile b/language/llama3-405b/Dockerfile
index 7987d4851..67edcc46b 100644
--- a/language/llama3-405b/Dockerfile
+++ b/language/llama3-405b/Dockerfile
@@ -22,7 +22,7 @@ ENV TZ=US/Pacific
 ENV DEBIAN_FRONTEND=noninteractive
 
 RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone
-RUN rm -rf /var/lib/apt/lists/* && rm /etc/apt/sources.list.d/* \
+RUN rm -rf /var/lib/apt/lists/* && rm -rf /etc/apt/sources.list.d/* \
  && apt update \
  && apt install -y --no-install-recommends build-essential autoconf \
         libtool git ccache curl wget pkg-config sudo ca-certificates \
@@ -46,3 +46,6 @@ RUN wget https://repo.anaconda.com/miniconda/Miniconda3-py310_23.5.2-0-Linux-x86
 ENV PATH="$PATH:/opt/miniconda3/bin"
 RUN conda create -n llama3-405b python=3.10
 RUN chmod -R 777 /opt/miniconda3
+
+# Set the env variable for vLLM
+ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
diff --git a/language/llama3-405b/requirements.txt b/language/llama3-405b/requirements.txt
index d8f5fbd12..a62f68e7b 100644
--- a/language/llama3-405b/requirements.txt
+++ b/language/llama3-405b/requirements.txt
@@ -3,7 +3,7 @@ nltk==3.8.1
 evaluate==0.4.0
 absl-py==1.4.0
 rouge-score==0.1.2
-sentencepiece==0.1.99
+sentencepiece==0.2.0
 accelerate==0.21.0
 vllm==0.6.3
-pybind11==2.10.4
\ No newline at end of file
+pybind11==2.10.4

From 47422d1345d9c925fc59554921c008627c75538e Mon Sep 17 00:00:00 2001
From: Pablo Gonzalez <pablo.gonzalez@factored.ai>
Date: Wed, 11 Dec 2024 17:23:47 -0500
Subject: [PATCH 040/112] Add llama3 metrics + remove llama3-99.9 (#1973)

---
 language/llama3-405b/README.md         | 11 +++-
 tools/submission/submission_checker.py | 69 +++++++-------------------
 2 files changed, 28 insertions(+), 52 deletions(-)

diff --git a/language/llama3-405b/README.md b/language/llama3-405b/README.md
index 8c04c202e..dcc5344c4 100644
--- a/language/llama3-405b/README.md
+++ b/language/llama3-405b/README.md
@@ -193,5 +193,12 @@ The ServerSUT was not tested for GPU runs.
 
 
 ## Accuracy Target
-Running the GPU implementation in FP16 precision resulted in the following FP16 accuracy targets (normalized to a 0-100
-scale from a 0.0-1.0 scale):
+Running the GPU implementation in FP16 precision resulted in the following FP16 accuracy targets:
+```
+{
+        'rougeL': 21.6666,
+        'exact_match': 90.1335,
+        'tokens_per_sample': 684.68,
+}
+```
+
diff --git a/tools/submission/submission_checker.py b/tools/submission/submission_checker.py
index 51f8c7aab..37cd3a0f8 100755
--- a/tools/submission/submission_checker.py
+++ b/tools/submission/submission_checker.py
@@ -271,8 +271,7 @@
             "llama2-70b-99.9",
             "stable-diffusion-xl",
             "mixtral-8x7b",
-            "llama3-405b-99",
-            "llama3-405b-99.9",
+            "llama3-405b",
             "rgat",
             # TODO: add automotive
         ],
@@ -289,8 +288,7 @@
             "llama2-70b-99.9": ["Server", "Offline"],
             "stable-diffusion-xl": ["Server", "Offline"],
             "mixtral-8x7b": ["Server", "Offline"],
-            "llama3-405b-99": ["Server", "Offline"],
-            "llama3-405b-99.9": ["Server", "Offline"],
+            "llama3-405b": ["Server", "Offline"],
             "rgat": ["Offline"],
         },
         "optional-scenarios-datacenter": {},
@@ -320,8 +318,7 @@
             "llama2-70b-99.9": ["Server", "Offline"],
             "stable-diffusion-xl": ["SingleStream", "Offline", "Server"],
             "mixtral-8x7b": ["Server", "Offline"],
-            "llama3-405b-99": ["Server", "Offline"],
-            "llama3-405b-99.9": ["Server", "Offline"],
+            "llama3-405b": ["Server", "Offline"],
             "rgat": ["Offline"],
         },
         "optional-scenarios-datacenter-edge": {},
@@ -395,22 +392,13 @@
                 "mbxp_accuracy",
                 60.12 * 0.99,
             ),
-            # TODO: Get llama3 metrics
-            "llama3-405b-99": (
+            "llama3-405b": (
                 "ROUGEL",
-                1 * 0.99,
+                21.6666 * 0.99,
                 "exact_match",
-                1 * 0.99,
+                90.1335 * 0.99,
                 "TOKENS_PER_SAMPLE",
-                1000 * 0.9,
-            ),
-            "llama3-405b-99.9": (
-                "ROUGEL",
-                1 * 0.99,
-                "exact_match",
-                1 * 0.99,
-                "TOKENS_PER_SAMPLE",
-                20000 * 0.9,
+                684.68 * 0.9,
             ),
             "rgat": ("acc", 0.7286 * 0.99),
         },
@@ -424,8 +412,7 @@
             "llama2-70b-99": ("TOKENS_PER_SAMPLE", 294.45 * 1.1),
             "llama2-70b-99.9": ("TOKENS_PER_SAMPLE", 294.45 * 1.1),
             "mixtral-8x7b": ("TOKENS_PER_SAMPLE", 145.9 * 1.1),
-            "llama3-405b-99": ("TOKENS_PER_SAMPLE", 294.45 * 1.1),
-            "llama3-405b-99.9": ("TOKENS_PER_SAMPLE", 20000 * 1.1),
+            "llama3-405b": ("TOKENS_PER_SAMPLE", 684.68 * 1.1),
         },
         "accuracy-delta-perc": {
             "stable-diffusion-xl": {"CLIP_SCORE": 1, "FID_SCORE": 2}
@@ -445,8 +432,7 @@
             "llama2-70b-99.9": 24576,
             "stable-diffusion-xl": 5000,
             "mixtral-8x7b": 15000,
-            "llama3-405b-99": 8312,
-            "llama3-405b-99.9": 8312,
+            "llama3-405b": 8312,
             "rgat": 788379
 
         },
@@ -510,8 +496,7 @@
                 "Offline": 1,
             },
             "mixtral-8x7b": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
-            "llama3-405b-99": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
-            "llama3-405b-99.9": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
+            "llama3-405b": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
             "rgat": {"SingleStream": 1024, "Server": 270336, "Offline": 1}
         },
     },
@@ -600,7 +585,7 @@
     "llama2-70b-99.9": 24576,
     "stable-diffusion-xl": 5000,
     "mixtral-8x7b": 15000,
-    "llama3-405b-99": 8312,
+    "llama3-405b": 8312,
     "llama2-405b-99.9": 8312,
     "rgat": 788379,
 }
@@ -680,11 +665,7 @@
             "Offline": "result_tokens_per_second",
             "Server": "result_completed_tokens_per_second",
         },
-        "llama3-405b-99": {
-            "Offline": "result_tokens_per_second",
-            "Server": "result_completed_tokens_per_second",
-        },
-        "llama3-405b-99.9": {
+        "llama3-405b": {
             "Offline": "result_tokens_per_second",
             "Server": "result_completed_tokens_per_second",
         },
@@ -699,10 +680,7 @@
         "conversational": {"ttft": 2000 * 1000000, "tpot": 200 * 1000000}
     },
     "mixtral-8x7b": {"conversational": {"ttft": 2000 * 1000000, "tpot": 200 * 1000000}},
-    "llama3-405b-99": {
-        "conversational": {"ttft": 6000 * 1000000, "tpot": 175 * 1000000}
-    },
-    "llama3-405b-99.9": {
+    "llama3-405b": {
         "conversational": {"ttft": 6000 * 1000000, "tpot": 175 * 1000000}
     },
 }
@@ -986,8 +964,7 @@ def requires_equal_issue(self, model, division):
                 "llama2-70b-99",
                 "llama2-70b-99.9",
                 "mixtral-8x7b",
-                "llama3-405b-99",
-                "llama3-405b-99.9",
+                "llama3-405b",
             ]
             and self.version not in ["v4.0", "v4.1"]
         )
@@ -1355,7 +1332,7 @@ def check_performance_dir(
         )
 
     if model in ["llama2-70b-99", "llama2-70b-99.9",
-                 "mixtral-8x7b", "llama3-405b-99", "llama3-405b-99.9"]:
+                 "mixtral-8x7b", "llama3-405b"]:
         llama_constraint, is_valid = extra_check_llm(
             mlperf_log, scenario_fixed, model)
 
@@ -1895,13 +1872,7 @@ def log_result(
                 "Offline": "Tokens/s",
                 "Server": "Tokens/s",
             },
-            "llama3-405b-99": {
-                "SingleStream": "Latency (ms)",
-                "MultiStream": "Latency (ms)",
-                "Offline": "Tokens/s",
-                "Server": "Tokens/s",
-            },
-            "llama3-405b-99.9": {
+            "llama3-405b": {
                 "SingleStream": "Latency (ms)",
                 "MultiStream": "Latency (ms)",
                 "Offline": "Tokens/s",
@@ -2986,8 +2957,7 @@ def check_compliance_dir(
         "llama2-70b-99",
         "llama2-70b-99.9",
         "mixtral-8x7b",
-        "llama3-405b-99",
-        "llama3-405b-99.9",
+        "llama3-405b",
         "rgat",
     ]:
         test_list.remove("TEST04")
@@ -3008,8 +2978,7 @@ def check_compliance_dir(
         "llama2-70b-99",
         "llama2-70b-99.9",
         "mixtral-8x7b",
-        "llama3-405b-99",
-        "llama3-405b-99.9",
+        "llama3-405b",
     ]:
         test_list.remove("TEST01")
 
@@ -3018,7 +2987,7 @@ def check_compliance_dir(
         test_list.remove("TEST04")
 
     if model in ["llama2-70b-99", "llama2-70b-99.9",
-                 "mixtral-8x7b", "llama3-405b-99", "llama3-405b-99.9"]:
+                 "mixtral-8x7b", "llama3-405b"]:
         test_list.append("TEST06")
 
     if test_list and not os.path.exists(compliance_dir):

From 070ef4d8f2197053cfc1d041bcb9ab8cfef4bc8c Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Thu, 12 Dec 2024 20:15:56 +0530
Subject: [PATCH 041/112] Fix submission checker for v5.0 rgat (#1974)

* Fix submission checker for v5.0 rgat

* Update submission_checker.py | Updates for v5.0

* [Automated Commit] Format Codebase

* Update submission_checker.py | Fixes latency_constraints for v5.0

* [Automated Commit] Format Codebase

---------

Co-authored-by: mlcommons-bot <mlcommons-bot@users.noreply.github.com>
---
 tools/submission/submission_checker.py | 46 +++++++++++---------------
 1 file changed, 19 insertions(+), 27 deletions(-)

diff --git a/tools/submission/submission_checker.py b/tools/submission/submission_checker.py
index 37cd3a0f8..35b70a4e7 100755
--- a/tools/submission/submission_checker.py
+++ b/tools/submission/submission_checker.py
@@ -1,4 +1,4 @@
-"""A checker for MLPerf Inference submissions from v4.0 onwards (for checking older submissions please use the submission checker from the respective release)
+"""A checker for MLPerf Inference submissions from v4.1 onwards (for checking older submissions please use the submission checker from the respective release)
 """
 
 from __future__ import division
@@ -196,13 +196,11 @@
             "resnet50": "resnet",
         },
         "seeds": {
-            # TODO: Update random seeds
             "qsl_rng_seed": 3066443479025735752,
             "sample_index_rng_seed": 10688027786191513374,
             "schedule_rng_seed": 14962580496156340209,
         },
         "test05_seeds": {
-            # TODO: Update random seeds
             "qsl_rng_seed": 16799458546791641818,
             "sample_index_rng_seed": 5453809927556429288,
             "schedule_rng_seed": 5435552105434836064,
@@ -220,8 +218,7 @@
             "llama2-70b-99": {"Server": 20000000000},
             "llama2-70b-99.9": {"Server": 20000000000},
             "stable-diffusion-xl": {"Server": 20000000000},
-            # TODO: Mixtral metrics
-            # "mixtral-8x7b" : {"Server": 20000000000}
+            "mixtral-8x7b": {"Server": 20000000000}
         },
         "min-queries": {
             "resnet": {
@@ -260,7 +257,6 @@
             "retinanet",
             "bert-99",
             "bert-99.9",
-            # TODO: remove dlrm?
             "dlrm-v2-99",
             "dlrm-v2-99.9",
             "3d-unet-99",
@@ -273,7 +269,7 @@
             "mixtral-8x7b",
             "llama3-405b",
             "rgat",
-            # TODO: add automotive
+            # TODO: add automotive?
         ],
         "required-scenarios-datacenter": {
             "resnet": ["Server", "Offline"],
@@ -296,6 +292,7 @@
             "resnet": ["SingleStream", "MultiStream", "Offline"],
             "retinanet": ["SingleStream", "MultiStream", "Offline"],
             "bert-99": ["SingleStream", "Offline"],
+            "bert-99.9": ["SingleStream", "Offline"],
             "3d-unet-99": ["SingleStream", "Offline"],
             "3d-unet-99.9": ["SingleStream", "Offline"],
             "gptj-99": ["SingleStream", "Offline"],
@@ -306,8 +303,8 @@
         "required-scenarios-datacenter-edge": {
             "resnet": ["SingleStream", "Offline", "MultiStream", "Server"],
             "retinanet": ["SingleStream", "Offline", "MultiStream", "Server"],
-            "bert-99": ["SingleStream", "Offline", "Server"],
-            "bert-99.9": ["Offline", "Server"],
+            "bert-99": ["SingleStream", "Offline"],
+            "bert-99.9": ["SingleStream", "Offline"],
             "dlrm-v2-99": ["Offline", "Server"],
             "dlrm-v2-99.9": ["Offline", "Server"],
             "3d-unet-99": ["SingleStream", "Offline"],
@@ -436,10 +433,11 @@
             "rgat": 788379
 
         },
-        # TODO: Update this list.
+        # model_mapping.json is expected in the root directory of the
+        # submission folder for open submissions and so the below dictionary is
+        # not really needed
         "model_mapping": {
             # map model names to the official mlperf model class
-            "ssd-resnet34": "retinanet",
             "mobilenet": "resnet",
             "resnet50": "resnet",
         },
@@ -449,23 +447,19 @@
             "sample_index_rng_seed": 10688027786191513374,
             "schedule_rng_seed": 14962580496156340209,
         },
-        "test05_seeds": {
-            # TODO: Update random seeds
-            "qsl_rng_seed": 16799458546791641818,
-            "sample_index_rng_seed": 5453809927556429288,
-            "schedule_rng_seed": 5435552105434836064,
-        },
         "ignore_errors": [],
         "latency-constraint": {
             "resnet": {"Server": 15000000},
             "retinanet": {"Server": 100000000},
-            "bert-99": {"Server": 130000000},
-            "bert-99.9": {"Server": 130000000},
             "dlrm-v2-99": {"Server": 60000000},
             "dlrm-v2-99.9": {"Server": 60000000},
             "gptj-99": {"Server": 20000000000},
             "gptj-99.9": {"Server": 20000000000},
             "stable-diffusion-xl": {"Server": 20000000000},
+            "llama2-70b-99": {"Server": 20000000000},
+            "llama2-70b-99.9": {"Server": 20000000000},
+            "mixtral-8x7b": {"Server": 20000000000},
+            "llama3-405b": {"Server": 60000000000}
         },
         "min-queries": {
             "resnet": {
@@ -480,8 +474,8 @@
                 "Server": 270336,
                 "Offline": 1,
             },
-            "bert-99": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
-            "bert-99.9": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
+            "bert-99": {"SingleStream": 1024, "Offline": 1},
+            "bert-99.9": {"SingleStream": 1024, "Offline": 1},
             "dlrm-v2-99": {"Server": 270336, "Offline": 1},
             "dlrm-v2-99.9": {"Server": 270336, "Offline": 1},
             "3d-unet-99": {"SingleStream": 1024, "Offline": 1},
@@ -497,7 +491,7 @@
             },
             "mixtral-8x7b": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
             "llama3-405b": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
-            "rgat": {"SingleStream": 1024, "Server": 270336, "Offline": 1}
+            "rgat": {"SingleStream": 1024, "Offline": 1}
         },
     },
 }
@@ -605,17 +599,15 @@
 }
 
 RESULT_FIELD_NEW = {
-    "v4.0": {
+    "v4.1": {
         "Offline": "result_samples_per_second",
         "SingleStream": "early_stopping_latency_ss",
-        "MultiStreamLegacy": "effective_samples_per_query",
         "MultiStream": "early_stopping_latency_ms",
-        "Server": "result_scheduled_samples_per_sec",
+        "Server": "result_completed_samples_per_sec",
     },
-    "v4.1": {
+    "v5.0": {
         "Offline": "result_samples_per_second",
         "SingleStream": "early_stopping_latency_ss",
-        "MultiStreamLegacy": "effective_samples_per_query",
         "MultiStream": "early_stopping_latency_ms",
         "Server": "result_completed_samples_per_sec",
     },

From 2da8aeed4f008a97df54eeef6b8c810a6c48ecee Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Fri, 13 Dec 2024 00:43:35 +0530
Subject: [PATCH 042/112] Fix test05 seeds missing error for v5.0 submission
 checker (#1976)

---
 tools/submission/submission_checker.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tools/submission/submission_checker.py b/tools/submission/submission_checker.py
index 35b70a4e7..a5165b381 100755
--- a/tools/submission/submission_checker.py
+++ b/tools/submission/submission_checker.py
@@ -827,7 +827,8 @@ def __init__(
         self.version = version
         self.models = self.base["models"]
         self.seeds = self.base["seeds"]
-        self.test05_seeds = self.base["test05_seeds"]
+        if self.base.get("test05_seeds"):
+            self.test05_seeds = self.base["test05_seeds"]
         self.accuracy_target = self.base["accuracy-target"]
         self.accuracy_delta_perc = self.base["accuracy-delta-perc"]
         self.accuracy_upper_limit = self.base.get("accuracy-upper-limit", {})
@@ -968,7 +969,7 @@ def get_args():
     parser.add_argument("--input", required=True, help="submission directory")
     parser.add_argument(
         "--version",
-        default="v4.1",
+        default="v5.0",
         choices=list(MODEL_CONFIG.keys()),
         help="mlperf version",
     )

From b4d72fbe772e699f9aea17549b4ff8a08dc6fd0b Mon Sep 17 00:00:00 2001
From: Zhihan Jiang <68881590+nvzhihanj@users.noreply.github.com>
Date: Thu, 12 Dec 2024 20:38:37 -0800
Subject: [PATCH 043/112] Fix llama3-405B docker workflow and performance
 sample count (#1978)

* Fix llama3-405B docker workflow

* Fix the performance sample count from 8312 to 8313

* More fixes
---
 .gitignore                                    |  1 +
 language/llama3-405b/README.md                | 81 ++++++++++++++-----
 .../{launch.sh => launch_docker.sh}           |  0
 language/llama3-405b/main.py                  |  2 +-
 language/llama3-405b/run_accuracy.sh          |  5 +-
 language/llama3-405b/run_offline.sh           | 17 ++--
 language/llama3-405b/run_server.sh            | 17 ++--
 language/llama3-405b/with_the_same_user       | 27 +++++++
 loadgen/mlperf.conf                           |  8 +-
 tools/submission/submission_checker.py        |  5 +-
 10 files changed, 116 insertions(+), 47 deletions(-)
 rename language/llama3-405b/{launch.sh => launch_docker.sh} (100%)
 create mode 100755 language/llama3-405b/with_the_same_user

diff --git a/.gitignore b/.gitignore
index 9545a7977..eba8bb341 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,4 @@ loadgen/build/
 libmlperf_loadgen.a
 __pycache__/
 generated/
+*.swp
diff --git a/language/llama3-405b/README.md b/language/llama3-405b/README.md
index dcc5344c4..8df2a81f1 100644
--- a/language/llama3-405b/README.md
+++ b/language/llama3-405b/README.md
@@ -9,34 +9,64 @@
 
 Please see the [new docs site](https://docs.mlcommons.org/inference/benchmarks/language/llama3-405b) for an automated way to run this benchmark across different available implementations and do an end-to-end submission with or without docker.
 
- 
+
 ## Prepare environment
 
-Copy the mlperf.conf file to this folder.
-```
-cp ../../mlperf.conf .
+### Local Environment Run
+
+The following steps were tested in Ubuntu 22.04 with python 3.10
+
+- **Prerrequisite for GPU runs:** Install Nvidia Driver and cuda 12.1.
+
+The following links contain the commands for installing the [NVIDIA Driver](https://developer.nvidia.com/datacenter-driver-downloads?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu&target_version=22.04&target_type=deb_local) and [Cuda](https://developer.nvidia.com/cuda-12-1-0-download-archive?target_os=Linux&target_arch=x86_64&Distribution=Ubuntu&target_version=22.04&target_type=deb_local)
+
+- **Prerrequisite:** Install conda.
+
+```bash
+mkdir -p ~/miniconda3
+wget https://repo.anaconda.com/miniconda/Miniconda3-py310_23.5.2-0-Linux-x86_64.sh -O ~/miniconda3/miniconda.sh
+bash ~/miniconda3/miniconda.sh -b -u -p ~/miniconda3
+rm ~/miniconda3/miniconda.sh
+~/miniconda3/bin/conda init
 ```
 
-For a CPU-only run:
+- Set the following helper variables
+```bash
+export ROOT=$PWD/inference
+export LLAMA_FOLDER=$PWD/inference/language/llama3-405b
+export LOADGEN_FOLDER=$PWD/inference/loadgen
+export DATASET_FOLDER=$PWD/inference/language/llama3-405b/dataset
+```
 
+- Clone the inference repository:
+```bash
+git clone --recurse-submodules https://github.com/mlcommons/inference.git \
+ --depth 1
 ```
-conda create -n llama3-405b python=3.9
+
+- Create a conda environment:
+```bash
+conda create -y -n llama3-405b python=3.10
 conda activate llama3-405b
+conda install -y -c conda-forge libstdcxx-ng=12
+```
 
+- Install requirements and loadgen:
+```bash
+cd $LLAMA_FOLDER
 # Install packages
 pip install -r requirements.txt
+```
 
-export CUR_DIR=${PWD}
-cd <inference-repo-root>/loadgen
-
-
-python -m pip install .
+```bash
+cd $LOADGEN_FOLDER
+pip install -e .
 ```
 
-For a GPU-based run:
+### Docker Run
 
 A dockerfile is provided, along with scripts to help launch it. First, add any docker volume mounts you want in
-`launch.sh`. There is a section at the top of the file that looks like:
+`launch_docker.sh`. There is a section at the top of the file that looks like:
 ```
 # Add any volume mounts here with the following syntax
 # /path/to/src:/path/to/dir/in/container
@@ -54,10 +84,13 @@ MOUNTS=(
     /raid/data:/raid/data
 )
 ```
-Once you have added all your mounts, launch the container with `bash launch.sh`.
+Once you have added all your mounts, build and launch the container with `bash launch.sh`.
 
-Inside the container, set up the environment with `bash build.sh`. This will install all the dependencies from the
-CPU-only setup, as well as any GPU versions for applicable libraries like PyTorch.
+Now install all the dependencies:
+```
+pip install -r requirements.txt
+pip install -e ../../loadgen
+```
 
 
 ## Get Model
@@ -73,7 +106,7 @@ TODO: Host model and grant access to submitters
 export CHECKPOINT_PATH=Meta-Llama-3.1-405B-Instruct
 git lfs install
 git clone https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct ${CHECKPOINT_PATH}
-
+cd ${CHECKPOINT_PATH} && git checkout be673f326cab4cd22ccfef76109faf68e41aa5f1
 ```
 
 ## Get Dataset
@@ -109,9 +142,10 @@ rclone copy mlc-inference:mlcommons-inference-wg-public/llama3_405b/mlperf_llama
 ```
 python -u main.py --scenario Offline \
                 --model-path ${CHECKPOINT_PATH} \
+                --batch-size 16 \
                 --dtype float16 \
                 --user-conf user.conf \
-                --total-sample-count 8312 \
+                --total-sample-count 8313 \
                 --dataset-path ${DATASET_PATH} \
                 --output-log-dir output \
                 --tensor-parallel-size ${GPU_COUNT} \
@@ -123,9 +157,10 @@ python -u main.py --scenario Offline \
 ```
 python -u main.py --scenario Server \
                 --model-path ${CHECKPOINT_PATH} \
+                --batch-size 16 \
                 --dtype float16 \
                 --user-conf user.conf \
-                --total-sample-count 8312 \
+                --total-sample-count 8313 \
                 --dataset-path ${DATASET_PATH} \
                 --output-log-dir output \
                 --tensor-parallel-size ${GPU_COUNT} \
@@ -145,10 +180,11 @@ mkdir -p "run_outputs"  # The script will dump all the outputs to 'run_outputs'.
 
 python -u main.py --scenario Offline \
                 --model-path ${CHECKPOINT_PATH} \
+                --batch-size 16 \
                 --accuracy \
                 --dtype float16 \
                 --user-conf user.conf \
-                --total-sample-count 8312 \
+                --total-sample-count 8313 \
                 --dataset-path ${DATASET_PATH} \
                 --output-log-dir output \
                 --tensor-parallel-size ${GPU_COUNT} \
@@ -172,10 +208,11 @@ OUTPUT_LOG_DIR=server-accuracy-logs
 
 python -u main.py --scenario Server \
                 --model-path ${CHECKPOINT_PATH} \
+                --batch-size 16 \
                 --accuracy \
                 --dtype float16 \
                 --user-conf user.conf \
-                --total-sample-count 8312 \
+                --total-sample-count 8313 \
                 --dataset-path ${DATASET_PATH} \
                 --output-log-dir output \
                 --tensor-parallel-size ${GPU_COUNT} \
@@ -201,4 +238,4 @@ Running the GPU implementation in FP16 precision resulted in the following FP16
         'tokens_per_sample': 684.68,
 }
 ```
-
+The accuracy target is 99% for rougeL and exact_match, and 90% for tokens_per_sample
diff --git a/language/llama3-405b/launch.sh b/language/llama3-405b/launch_docker.sh
similarity index 100%
rename from language/llama3-405b/launch.sh
rename to language/llama3-405b/launch_docker.sh
diff --git a/language/llama3-405b/main.py b/language/llama3-405b/main.py
index 26d5726b3..f7802687e 100644
--- a/language/llama3-405b/main.py
+++ b/language/llama3-405b/main.py
@@ -77,7 +77,7 @@ def get_args():
     parser.add_argument(
         "--total-sample-count",
         type=int,
-        default=8312,
+        default=8313,
         help="Number of samples to use in benchmark.",
     )
     parser.add_argument(
diff --git a/language/llama3-405b/run_accuracy.sh b/language/llama3-405b/run_accuracy.sh
index 075245913..9a54d8f13 100644
--- a/language/llama3-405b/run_accuracy.sh
+++ b/language/llama3-405b/run_accuracy.sh
@@ -5,10 +5,11 @@ mkdir -p "run_outputs"
 
 python3 -u main.py --scenario Offline \
         --model-path ${CHECKPOINT_PATH} \
+        --batch-size 16 \
         --accuracy \
         --mlperf-conf mlperf.conf \
         --user-conf user.conf \
-        --total-sample-count 8312 \
+        --total-sample-count 8313 \
         --dataset-path ${DATASET_PATH} \
         --output-log-dir offline_accuracy_loadgen_logs \
         --dtype float32 | tee offline_accuracy_log.log
@@ -17,5 +18,3 @@ python3 evaluate-accuracy.py --checkpoint-path ${CHECKPOINT_PATH} \
         --mlperf-accuracy-file offline_accuracy_loadgen_logs/mlperf_log_accuracy.json \
         --dataset-file ${DATASET_PATH} \
         --dtype int32
-
-python3 consolidate_results.py --dataset-path ${DATASET_PATH} --model-dir ${CHECKPOINT_PATH}
diff --git a/language/llama3-405b/run_offline.sh b/language/llama3-405b/run_offline.sh
index 89fa9e45f..6b3a56e01 100644
--- a/language/llama3-405b/run_offline.sh
+++ b/language/llama3-405b/run_offline.sh
@@ -1,10 +1,13 @@
 CHECKPOINT_PATH="${CHECKPOINT_PATH:Meta-Llama-3.1-405B-Instruct}"
-DATASET_PATH="${DATASET_PATH:-open-orca-val-set.pkl}"
+DATASET_PATH="${DATASET_PATH:mlperf_llama3.1_405b_dataset_8318.pkl}"
 
 python -u main.py --scenario Offline \
-		--model-path ${CHECKPOINT_PATH} \
-		--mlperf-conf mlperf.conf \
-		--user-conf user.conf \
-		--total-sample-count 8312 \
-		--dataset-path ${DATASET_PATH} \
-		--device cpu 2>&1 | tee server_log.log
+	--model-path ${CHECKPOINT_PATH} \
+	--batch-size 16 \
+	--dtype float16 \
+	--user-conf user.conf \
+	--total-sample-count 8313 \
+	--dataset-path ${DATASET_PATH} \
+	--output-log-dir output \
+	--tensor-parallel-size ${GPU_COUNT} \
+	--vllm 2>&1 | tee offline.log
diff --git a/language/llama3-405b/run_server.sh b/language/llama3-405b/run_server.sh
index fe2a31c43..010a359de 100644
--- a/language/llama3-405b/run_server.sh
+++ b/language/llama3-405b/run_server.sh
@@ -1,12 +1,15 @@
 
 
 CHECKPOINT_PATH="${CHECKPOINT_PATH:Meta-Llama-3.1-405B-Instruct}"
-DATASET_PATH="${DATASET_PATH:-open-orca-val-set.pkl}"
+DATASET_PATH="${DATASET_PATH:mlperf_llama3.1_405b_dataset_8318.pkl}"
 
 python -u main.py --scenario Server \
-		--model-path ${CHECKPOINT_PATH} \
-		--mlperf-conf mlperf.conf \
-		--user-conf user.conf \
-		--total-sample-count 8312 \
-		--dataset-path ${DATASET_PATH} \
-		--device cpu 2>&1 | tee server_log.log
+	--model-path ${CHECKPOINT_PATH} \
+	--batch-size 16 \
+	--dtype float16 \
+	--user-conf user.conf \
+	--total-sample-count 8313 \
+	--dataset-path ${DATASET_PATH} \
+	--output-log-dir output \
+	--tensor-parallel-size ${GPU_COUNT} \
+	--vllm 2>&1 | tee server.log
diff --git a/language/llama3-405b/with_the_same_user b/language/llama3-405b/with_the_same_user
new file mode 100755
index 000000000..cfa57902f
--- /dev/null
+++ b/language/llama3-405b/with_the_same_user
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+# wkong: manually set the user info in env first
+
+set -ex
+
+if [ -z "$@" ]; then
+  COMMAND=(bash)
+else
+  COMMAND=("$@")
+fi
+
+apt-get update && apt-get install -y sudo
+
+getent group "${CI_BUILD_GID}" || addgroup --gid "${CI_BUILD_GID}" "${CI_BUILD_GROUP}"
+getent passwd "${CI_BUILD_UID}" || adduser --gid "${CI_BUILD_GID}" --uid "${CI_BUILD_UID}" --gecos "${CI_BUILD_USER} (generated by with_the_same_user script)" --disabled-password --quiet "${CI_BUILD_USER}"
+
+usermod -a -G dip "${CI_BUILD_USER}"
+usermod -a -G sudo "${CI_BUILD_USER}"
+usermod -a -G root "${CI_BUILD_USER}"
+
+echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
+
+sudo -H -u "#${CI_BUILD_UID}" --preserve-env \
+  PATH="${PATH}" \
+  LD_LIBRARY_PATH="${LD_LIBRARY_PATH}" \
+  PYTHONPATH="${PYTHONPATH}" \
+  ${COMMAND[@]}
diff --git a/loadgen/mlperf.conf b/loadgen/mlperf.conf
index 31ad5ef62..1fe202253 100644
--- a/loadgen/mlperf.conf
+++ b/loadgen/mlperf.conf
@@ -14,7 +14,7 @@ dlrm-v2.*.performance_sample_count_override = 204800
 rnnt.*.performance_sample_count_override = 2513
 gptj.*.performance_sample_count_override = 13368
 llama2-70b.*.performance_sample_count_override = 24576
-llama3-405b.*.performance_sample_count_override = 8312
+llama3-405b.*.performance_sample_count_override = 8313
 stable-diffusion-xl.*.performance_sample_count_override = 5000
 rgat.*.performance_sample_count_override = 788379
 # set to 0 to let entire sample set to be performance sample
@@ -84,8 +84,8 @@ llama3-405b.Server.tpot_latency = 175
 *.Offline.min_duration = 600000
 
 # In Offline scenario, we always have one query. But LoadGen maps this to
-# min_sample_count internally in Offline scenario. If the dataset size is larger 
-# than 24576 we limit the min_query_count to 24576 and otherwise we use 
+# min_sample_count internally in Offline scenario. If the dataset size is larger
+# than 24576 we limit the min_query_count to 24576 and otherwise we use
 # the dataset size as the limit
 
 resnet50.Offline.min_query_count = 24576
@@ -97,7 +97,7 @@ rnnt.Offline.min_query_count = 2513
 3d-unet.Offline.min_query_count = 43
 stable-diffusion-xl.Offline.min_query_count = 5000
 llama2-70b.Offline.min_query_count = 24576
-llama3-405b.Offline.min_query_count = 8312
+llama3-405b.Offline.min_query_count = 8313
 mixtral-8x7b.Offline.min_query_count = 15000
 rgat.Offline.min_query_count = 788379
 
diff --git a/tools/submission/submission_checker.py b/tools/submission/submission_checker.py
index a5165b381..43fa1350c 100755
--- a/tools/submission/submission_checker.py
+++ b/tools/submission/submission_checker.py
@@ -429,7 +429,7 @@
             "llama2-70b-99.9": 24576,
             "stable-diffusion-xl": 5000,
             "mixtral-8x7b": 15000,
-            "llama3-405b": 8312,
+            "llama3-405b": 8313,
             "rgat": 788379
 
         },
@@ -579,8 +579,7 @@
     "llama2-70b-99.9": 24576,
     "stable-diffusion-xl": 5000,
     "mixtral-8x7b": 15000,
-    "llama3-405b": 8312,
-    "llama2-405b-99.9": 8312,
+    "llama3-405b": 8313,
     "rgat": 788379,
 }
 

From a2c8a361af08da2fcc8ff77c57713130f101036d Mon Sep 17 00:00:00 2001
From: mrmhodak <mrmhodak@users.noreply.github.com>
Date: Fri, 13 Dec 2024 04:38:49 +0000
Subject: [PATCH 044/112] Increment version to 5.0.2

---
 loadgen/VERSION.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loadgen/VERSION.txt b/loadgen/VERSION.txt
index 6b244dcd6..a1ef0cae1 100644
--- a/loadgen/VERSION.txt
+++ b/loadgen/VERSION.txt
@@ -1 +1 @@
-5.0.1
+5.0.2

From a988a327d572c8b2ec2fe5977a57360ec73e644d Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Fri, 13 Dec 2024 20:57:22 +0530
Subject: [PATCH 045/112] Fix submission generation for v5.0 (#1981)

* Fix submission checker for v5.0 rgat

* Fix accuracy pattern for rgat, report-generator for v5.0
---
 tools/submission/generate_final_report.py | 99 +++++++++++++++--------
 tools/submission/submission_checker.py    |  2 +-
 2 files changed, 67 insertions(+), 34 deletions(-)

diff --git a/tools/submission/generate_final_report.py b/tools/submission/generate_final_report.py
index 5e5d22c45..34ae82fb1 100644
--- a/tools/submission/generate_final_report.py
+++ b/tools/submission/generate_final_report.py
@@ -160,39 +160,72 @@ def main():
         ],
     ]
 
-    filter_scenarios = {
-        "datacenter": {
-            "resnet": ["Server", "Offline"],
-            "retinanet": ["Server", "Offline"],
-            "rnnt": ["Server", "Offline"],
-            "bert-99": ["Server", "Offline"],
-            "bert-99.9": ["Server", "Offline"],
-            "dlrm-v2-99": ["Server", "Offline"],
-            "dlrm-v2-99.9": ["Server", "Offline"],
-            "3d-unet-99": ["Offline"],
-            "3d-unet-99.9": ["Offline"],
-            "gptj-99": ["Server", "Offline"],
-            "gptj-99.9": ["Server", "Offline"],
-            "stable-diffusion-xl": ["Server", "Offline"],
-            "llama2-70b-99": ["Server", "Offline"],
-            "llama2-70b-99.9": ["Server", "Offline"],
-            "mixtral-8x7b": ["Server", "Offline"],
-        },
-        "edge": {
-            "resnet": ["SingleStream", "MultiStream", "Offline"],
-            "retinanet": ["SingleStream", "MultiStream", "Offline"],
-            "rnnt": ["SingleStream", "Offline"],
-            "bert-99": ["SingleStream", "Offline"],
-            "bert-99.9": [],
-            "dlrm-v2-99": [],
-            "dlrm-v2-99.9": [],
-            "3d-unet-99": ["SingleStream", "Offline"],
-            "3d-unet-99.9": ["SingleStream", "Offline"],
-            "gptj-99": ["SingleStream", "Offline"],
-            "gptj-99.9": ["SingleStream", "Offline"],
-            "stable-diffusion-xl": ["SingleStream", "Offline"],
-        },
-    }
+    if args.version == "4.1":
+        filter_scenarios = {
+            "datacenter": {
+                "resnet": ["Server", "Offline"],
+                "retinanet": ["Server", "Offline"],
+                "rnnt": ["Server", "Offline"],
+                "bert-99": ["Server", "Offline"],
+                "bert-99.9": ["Server", "Offline"],
+                "dlrm-v2-99": ["Server", "Offline"],
+                "dlrm-v2-99.9": ["Server", "Offline"],
+                "3d-unet-99": ["Offline"],
+                "3d-unet-99.9": ["Offline"],
+                "gptj-99": ["Server", "Offline"],
+                "gptj-99.9": ["Server", "Offline"],
+                "stable-diffusion-xl": ["Server", "Offline"],
+                "llama2-70b-99": ["Server", "Offline"],
+                "llama2-70b-99.9": ["Server", "Offline"],
+                "mixtral-8x7b": ["Server", "Offline"],
+            },
+            "edge": {
+                "resnet": ["SingleStream", "MultiStream", "Offline"],
+                "retinanet": ["SingleStream", "MultiStream", "Offline"],
+                "rnnt": ["SingleStream", "Offline"],
+                "bert-99": ["SingleStream", "Offline"],
+                "bert-99.9": [],
+                "dlrm-v2-99": [],
+                "dlrm-v2-99.9": [],
+                "3d-unet-99": ["SingleStream", "Offline"],
+                "3d-unet-99.9": ["SingleStream", "Offline"],
+                "gptj-99": ["SingleStream", "Offline"],
+                "gptj-99.9": ["SingleStream", "Offline"],
+                "stable-diffusion-xl": ["SingleStream", "Offline"],
+            },
+        }
+    else:
+        filter_scenarios = {
+            "datacenter": {
+                "resnet": ["Server", "Offline"],
+                "retinanet": ["Server", "Offline"],
+                "rnnt": ["Server", "Offline"],
+                "dlrm-v2-99": ["Server", "Offline"],
+                "dlrm-v2-99.9": ["Server", "Offline"],
+                "3d-unet-99": ["Offline"],
+                "3d-unet-99.9": ["Offline"],
+                "gptj-99": ["Server", "Offline"],
+                "gptj-99.9": ["Server", "Offline"],
+                "stable-diffusion-xl": ["Server", "Offline"],
+                "llama2-70b-99": ["Server", "Offline"],
+                "llama2-70b-99.9": ["Server", "Offline"],
+                "mixtral-8x7b": ["Server", "Offline"],
+                "rgat": ["Offline"],
+                "llama3-405b": ["Offline", "Server"]
+            },
+            "edge": {
+                "resnet": ["SingleStream", "MultiStream", "Offline"],
+                "retinanet": ["SingleStream", "MultiStream", "Offline"],
+                "rnnt": ["SingleStream", "Offline"],
+                "bert-99": ["SingleStream", "Offline"],
+                "bert-99.9": ["SingleStream", "Offline"],
+                "3d-unet-99": ["SingleStream", "Offline"],
+                "3d-unet-99.9": ["SingleStream", "Offline"],
+                "gptj-99": ["SingleStream", "Offline"],
+                "gptj-99.9": ["SingleStream", "Offline"],
+                "stable-diffusion-xl": ["SingleStream", "Offline"],
+            },
+        }
 
     def MakeWorksheet(df, index, filter_dict, sheet_name, outjsondata=[]):
         for key, value in filter_dict.items():
diff --git a/tools/submission/submission_checker.py b/tools/submission/submission_checker.py
index 43fa1350c..4a463f304 100755
--- a/tools/submission/submission_checker.py
+++ b/tools/submission/submission_checker.py
@@ -677,7 +677,7 @@
 }
 
 ACC_PATTERN = {
-    "acc": r"^accuracy=([\d\.]+).*",
+    "acc": r"^(?:\{\"accuracy|accuracy)[\": ]*=?\s*([\d\.]+).*",
     "AUC": r"^AUC=([\d\.]+).*",
     "mAP": r"^mAP=([\d\.]+).*",
     "bleu": r"^BLEU\:\s*([\d\.]+).*",

From e24ba2147205c0d310afc8f90934ff92670e292d Mon Sep 17 00:00:00 2001
From: Zhihan Jiang <68881590+nvzhihanj@users.noreply.github.com>
Date: Tue, 17 Dec 2024 12:27:19 -0800
Subject: [PATCH 046/112] More minor fixes for llama3.1-405b (#1983)

* More minor fixes

* Fix indentation for stats report
---
 language/llama3-405b/SUT_VLLM.py          | 22 +++++++++++-----------
 language/llama3-405b/dataset.py           |  2 +-
 language/llama3-405b/evaluate-accuracy.py |  2 +-
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/language/llama3-405b/SUT_VLLM.py b/language/llama3-405b/SUT_VLLM.py
index e64999d09..f5a802021 100644
--- a/language/llama3-405b/SUT_VLLM.py
+++ b/language/llama3-405b/SUT_VLLM.py
@@ -31,7 +31,7 @@ def __init__(
         model_path=None,
         dtype="bfloat16",
         batch_size=None,
-        total_sample_count=8312,
+        total_sample_count=8313,
         dataset_path=None,
         use_cached_outputs=False,
         # Set this to True *only for test accuracy runs* in case your prior
@@ -140,16 +140,16 @@ def process_queries(self):
                         n_tokens)]
                 lg.QuerySamplesComplete(response)
 
-        tok = time.time()
+            tok = time.time()
 
-        with self.sample_counter_lock:
-            self.sample_counter += len(qitem)
-            log.info(f"Samples run: {self.sample_counter}")
-            if tik1:
-                log.info(f"\tBatchMaker time: {tik2 - tik1}")
-                log.info(f"\tInference time: {tik3 - tik2}")
-                log.info(f"\tPostprocess time: {tok - tik3}")
-                log.info(f"\t==== Total time: {tok - tik1}")
+            with self.sample_counter_lock:
+                self.sample_counter += len(qitem)
+                log.info(f"Samples run: {self.sample_counter}")
+                if tik1:
+                    log.info(f"\tBatchMaker time: {tik2 - tik1}")
+                    log.info(f"\tInference time: {tik3 - tik2}")
+                    log.info(f"\tPostprocess time: {tok - tik3}")
+                    log.info(f"\t==== Total time: {tok - tik1}")
 
     def load_model(self):
         log.info("Loading model...")
@@ -194,7 +194,7 @@ def __init__(
         self,
         model_path=None,
         dtype="bfloat16",
-        total_sample_count=8312,
+        total_sample_count=8313,
         dataset_path=None,
         batch_size=None,
         workers=1,
diff --git a/language/llama3-405b/dataset.py b/language/llama3-405b/dataset.py
index 04fe9c4b2..084f13208 100644
--- a/language/llama3-405b/dataset.py
+++ b/language/llama3-405b/dataset.py
@@ -24,7 +24,7 @@ class Dataset:
     def __init__(
         self,
         model_name=None,
-        total_sample_count=8312,
+        total_sample_count=8313,
         perf_count_override=None,
         dataset_path=None,
         dtype="bfloat16"
diff --git a/language/llama3-405b/evaluate-accuracy.py b/language/llama3-405b/evaluate-accuracy.py
index ccc87f71f..f5677820e 100644
--- a/language/llama3-405b/evaluate-accuracy.py
+++ b/language/llama3-405b/evaluate-accuracy.py
@@ -141,7 +141,7 @@ def main():
 
     tokenizer = AutoTokenizer.from_pretrained(
         checkpoint_path,
-        model_max_length=2048,
+        model_max_length=22000,
         padding_side="left",
         use_fast=False,
     )

From 659e563e81e9a66c4a7e2e2b824a2aeccc742f84 Mon Sep 17 00:00:00 2001
From: Pablo Gonzalez <pablo.gonzalez@factored.ai>
Date: Wed, 18 Dec 2024 12:28:57 -0500
Subject: [PATCH 047/112] Remove unused rgat files (#1961)

Co-authored-by: Miro <mirhodak@amd.com>
---
 graph/R-GAT/igbh/tiny/models/dataloader.py |  82 ------
 graph/R-GAT/igbh/tiny/models/gnn.py        | 296 ---------------------
 graph/R-GAT/igbh/tiny/models/main.py       |  79 ------
 graph/R-GAT/igbh/tiny/models/utils.py      | 224 ----------------
 4 files changed, 681 deletions(-)
 delete mode 100644 graph/R-GAT/igbh/tiny/models/dataloader.py
 delete mode 100644 graph/R-GAT/igbh/tiny/models/gnn.py
 delete mode 100644 graph/R-GAT/igbh/tiny/models/main.py
 delete mode 100644 graph/R-GAT/igbh/tiny/models/utils.py

diff --git a/graph/R-GAT/igbh/tiny/models/dataloader.py b/graph/R-GAT/igbh/tiny/models/dataloader.py
deleted file mode 100644
index cc64d1466..000000000
--- a/graph/R-GAT/igbh/tiny/models/dataloader.py
+++ /dev/null
@@ -1,82 +0,0 @@
-import torch
-from torch_geometric.data import InMemoryDataset, Data
-from dgl.data import DGLDataset
-
-from utils import IGL260MDataset
-
-# TODO: Make a PyG dataloader for large datasets
-
-
-class IGL260M_PyG(InMemoryDataset):
-    def __init__(self, args):
-        super().__init__(root, transform, pre_transform, pre_filter)
-
-    def process(self):
-        dataset = IGL260MDataset(root=self.dir, size=args.dataset_size,
-                                 in_memory=args.in_memory, classes=args.type_classes, synthetic=args.synthetic)
-        node_features = torch.from_numpy(dataset.paper_feat)
-        node_edges = torch.from_numpy(dataset.paper_edge).T
-        node_labels = torch.from_numpy(dataset.paper_label).to(torch.long)
-        data = Data(x=node_features, edge_index=node_edges, y=node_labels)
-
-        n_nodes = node_features.shape[0]
-
-        n_train = int(n_nodes * 0.6)
-        n_val = int(n_nodes * 0.2)
-
-        train_mask = torch.zeros(n_nodes, dtype=torch.bool)
-        val_mask = torch.zeros(n_nodes, dtype=torch.bool)
-        test_mask = torch.zeros(n_nodes, dtype=torch.bool)
-
-        train_mask[:n_train] = True
-        val_mask[n_train:n_train + n_val] = True
-        test_mask[n_train + n_val:] = True
-
-        data.train_mask = train_mask
-        data.val_mask = val_mask
-        data.test_mask = test_mask
-
-
-class IGL260M_DGL(DGLDataset):
-    def __init__(self, args):
-        self.dir = args.path
-        super().__init__(name='IGB260M')
-
-    def process(self):
-        dataset = IGL260MDataset(root=self.dir, size=args.dataset_size,
-                                 in_memory=args.in_memory, classes=args.type_classes, synthetic=args.synthetic)
-        node_features = torch.from_numpy(dataset.paper_feat)
-        node_edges = torch.from_numpy(dataset.paper_edge)
-        node_labels = torch.from_numpy(dataset.paper_label).to(torch.long)
-
-        self.graph = dgl.graph(
-            (node_edges[:, 0], node_edges[:, 1]), num_nodes=node_features.shape[0])
-
-        self.graph.ndata['feat'] = node_features
-        self.graph.ndata['label'] = node_labels
-
-        self.graph = dgl.remove_self_loop(self.graph)
-        self.graph = dgl.add_self_loop(self.graph)
-
-        n_nodes = node_features.shape[0]
-
-        n_train = int(n_nodes * 0.6)
-        n_val = int(n_nodes * 0.2)
-
-        train_mask = torch.zeros(n_nodes, dtype=torch.bool)
-        val_mask = torch.zeros(n_nodes, dtype=torch.bool)
-        test_mask = torch.zeros(n_nodes, dtype=torch.bool)
-
-        train_mask[:n_train] = True
-        val_mask[n_train:n_train + n_val] = True
-        test_mask[n_train + n_val:] = True
-
-        self.graph.ndata['train_mask'] = train_mask
-        self.graph.ndata['val_mask'] = val_mask
-        self.graph.ndata['test_mask'] = test_mask
-
-    def __getitem__(self, i):
-        return self.graph
-
-    def __len__(self):
-        return 1
diff --git a/graph/R-GAT/igbh/tiny/models/gnn.py b/graph/R-GAT/igbh/tiny/models/gnn.py
deleted file mode 100644
index 20d5ecd72..000000000
--- a/graph/R-GAT/igbh/tiny/models/gnn.py
+++ /dev/null
@@ -1,296 +0,0 @@
-from utils import IGL260MDataset
-import warnings
-from tqdm import tqdm
-import numpy as np
-import time
-import torch.nn.functional as F
-import torch.optim as optim
-import torch.nn as nn
-import dgl
-from dgl.data import DGLDataset
-import dgl.nn.pytorch as dglnn
-from dgl.nn.pytorch import GATConv, GraphConv, SAGEConv
-import os.path as osp
-from sys import getsizeof
-
-
-import torch
-torch.manual_seed(0)
-dgl.seed(0)
-warnings.filterwarnings("ignore")
-
-
-class GCN(nn.Module):
-    def __init__(self,
-                 in_feats,
-                 n_hidden,
-                 n_classes,
-                 n_layers,
-                 activation,
-                 dropout):
-        super(GCN, self).__init__()
-        self.layers = nn.ModuleList()
-        self.n_layers = n_layers
-        self.n_hidden = n_hidden
-        self.n_classes = n_classes
-        # input layer
-        self.layers.append(
-            GraphConv(
-                in_feats,
-                n_hidden,
-                activation=activation))
-        # hidden layers
-        for i in range(n_layers - 1):
-            self.layers.append(
-                GraphConv(
-                    n_hidden,
-                    n_hidden,
-                    activation=activation))
-        # output layer
-        self.layers.append(GraphConv(n_hidden, n_classes))
-        self.dropout = nn.Dropout(p=dropout)
-        self.activation = activation
-
-    def forward(self, blocks, x):
-        h = x
-        for l, (layer, block) in enumerate(zip(self.layers, blocks)):
-            if l != len(self.layers) - 1:
-                # h = self.activation(h)
-                h = self.dropout(h)
-            h = layer(block, h)
-        return h
-
-    def inference(self, g, x, batch_size, device):
-        """
-        Inference with the GraphSAGE model on full neighbors (i.e. without neighbor sampling).
-        g : the entire graph.
-        x : the input of entire node set.
-        The inference code is written in a fashion that it could handle any number of nodes and
-        layers.
-        """
-        # During inference with sampling, multi-layer blocks are very inefficient because
-        # lots of computations in the first few layers are repeated.
-        # Therefore, we compute the representation of all nodes layer by layer.  The nodes
-        # on each layer are of course splitted in batches.
-        # TODO: can we standardize this?
-        for l, layer in enumerate(self.layers):
-            y = torch.zeros(g.number_of_nodes(), self.n_hidden if l !=
-                            len(self.layers) - 1 else self.n_classes)
-
-            sampler = dgl.dataloading.MultiLayerFullNeighborSampler(1)
-            dataloader = dgl.dataloading.NodeDataLoader(
-                g,
-                torch.arange(g.number_of_nodes()),
-                sampler,
-                batch_size=batch_size,
-                shuffle=True,
-                drop_last=False,
-                num_workers=4)
-
-            for input_nodes, output_nodes, blocks in dataloader:
-                block = blocks[0]
-
-                block = block.int().to(device)
-                h = x[input_nodes].to(device)
-                h = layer(block, h)
-                if l != len(self.layers) - 1:
-                    h = self.activation(h)
-                    h = self.dropout(h)
-
-                y[output_nodes] = h.cpu()
-
-            x = y
-        return y
-
-
-class GAT(nn.Module):
-    def __init__(
-        self, in_feats, n_hidden, n_classes, n_layers, num_heads, activation
-    ):
-        super().__init__()
-        self.n_layers = n_layers
-        self.n_hidden = n_hidden
-        self.n_classes = n_classes
-        self.layers = nn.ModuleList()
-        self.layers.append(
-            dglnn.GATConv(
-                (in_feats, in_feats),
-                n_hidden,
-                num_heads=num_heads,
-                activation=activation,
-            )
-        )
-        for i in range(1, n_layers - 1):
-            self.layers.append(
-                dglnn.GATConv(
-                    (n_hidden * num_heads, n_hidden * num_heads),
-                    n_hidden,
-                    num_heads=num_heads,
-                    activation=activation,
-                )
-            )
-        self.layers.append(
-            dglnn.GATConv(
-                (n_hidden * num_heads, n_hidden * num_heads),
-                n_classes,
-                num_heads=num_heads,
-                activation=None,
-            )
-        )
-
-    def forward(self, blocks, x):
-        h = x
-        for l, (layer, block) in enumerate(zip(self.layers, blocks)):
-            # We need to first copy the representation of nodes on the RHS from the
-            # appropriate nodes on the LHS.
-            # Note that the shape of h is (num_nodes_LHS, D) and the shape of h_dst
-            # would be (num_nodes_RHS, D)
-            h_dst = h[: block.num_dst_nodes()]
-            # Then we compute the updated representation on the RHS.
-            # The shape of h now becomes (num_nodes_RHS, D)
-            if l < self.n_layers - 1:
-                h = layer(block, (h, h_dst)).flatten(1)
-            else:
-                h = layer(block, (h, h_dst))
-        h = h.mean(1)
-        return h.log_softmax(dim=-1)
-
-    def inference(self, g, x, batch_size, device):
-        """
-        Inference with the GAT model on full neighbors (i.e. without neighbor sampling).
-        g : the entire graph.
-        x : the input of entire node set.
-        The inference code is written in a fashion that it could handle any number of nodes and
-        layers.
-        """
-        # During inference with sampling, multi-layer blocks are very inefficient because
-        # lots of computations in the first few layers are repeated.
-        # Therefore, we compute the representation of all nodes layer by layer.  The nodes
-        # on each layer are of course splitted in batches.
-        # TODO: can we standardize this?
-        # TODO: make thiw into a variable
-        num_heads = 2
-        for l, layer in enumerate(self.layers):
-            if l < self.n_layers - 1:
-                y = torch.zeros(
-                    g.num_nodes(),
-                    self.n_hidden * num_heads
-                    if l != len(self.layers) - 1
-                    else self.n_classes,
-                )
-            else:
-                y = torch.zeros(
-                    g.num_nodes(),
-                    self.n_hidden
-                    if l != len(self.layers) - 1
-                    else self.n_classes,
-                )
-
-            sampler = dgl.dataloading.MultiLayerFullNeighborSampler(1)
-            dataloader = dgl.dataloading.DataLoader(
-                g,
-                torch.arange(g.num_nodes()),
-                sampler,
-                batch_size=batch_size,
-                shuffle=True,
-                drop_last=False,
-                num_workers=4,
-            )
-
-            for input_nodes, output_nodes, blocks in dataloader:
-                block = blocks[0].int().to(device)
-
-                h = x[input_nodes].to(device)
-                h_dst = h[: block.num_dst_nodes()]
-                if l < self.n_layers - 1:
-                    h = layer(block, (h, h_dst)).flatten(1)
-                else:
-                    h = layer(block, (h, h_dst))
-                    h = h.mean(1)
-                    h = h.log_softmax(dim=-1)
-
-                y[output_nodes] = h.cpu()
-
-            x = y
-        return y
-
-
-class SAGE(nn.Module):
-    def __init__(self,
-                 in_feats,
-                 n_hidden,
-                 n_classes,
-                 n_layers,
-                 activation,
-                 dropout,
-                 aggregator_type):
-        super().__init__()
-        self.n_layers = n_layers
-        self.n_hidden = n_hidden
-        self.n_classes = n_classes
-        self.layers = nn.ModuleList()
-        self.layers.append(dglnn.SAGEConv(in_feats, n_hidden, aggregator_type))
-        for i in range(1, n_layers - 1):
-            self.layers.append(
-                dglnn.SAGEConv(
-                    n_hidden,
-                    n_hidden,
-                    aggregator_type))
-        self.layers.append(
-            dglnn.SAGEConv(
-                n_hidden,
-                n_classes,
-                aggregator_type))
-        self.dropout = nn.Dropout(dropout)
-        self.activation = activation
-
-    def forward(self, blocks, x):
-        h = x
-        for l, (layer, block) in enumerate(zip(self.layers, blocks)):
-            h = layer(block, h)
-            if l != len(self.layers) - 1:
-                h = self.activation(h)
-                h = self.dropout(h)
-        return h
-
-    def inference(self, g, x, batch_size, device):
-        """
-        Inference with the GraphSAGE model on full neighbors (i.e. without neighbor sampling).
-        g : the entire graph.
-        x : the input of entire node set.
-        The inference code is written in a fashion that it could handle any number of nodes and
-        layers.
-        """
-        # During inference with sampling, multi-layer blocks are very inefficient because
-        # lots of computations in the first few layers are repeated.
-        # Therefore, we compute the representation of all nodes layer by layer.  The nodes
-        # on each layer are of course splitted in batches.
-        # TODO: can we standardize this?
-        for l, layer in enumerate(self.layers):
-            y = torch.zeros(g.number_of_nodes(), self.n_hidden if l !=
-                            len(self.layers) - 1 else self.n_classes)
-
-            sampler = dgl.dataloading.MultiLayerFullNeighborSampler(1)
-            dataloader = dgl.dataloading.NodeDataLoader(
-                g,
-                torch.arange(g.number_of_nodes()),
-                sampler,
-                batch_size=batch_size,
-                shuffle=True,
-                drop_last=False,
-                num_workers=4)
-
-            for input_nodes, output_nodes, blocks in dataloader:
-                block = blocks[0]
-
-                block = block.int().to(device)
-                h = x[input_nodes].to(device)
-                h = layer(block, h)
-                if l != len(self.layers) - 1:
-                    h = self.activation(h)
-                    h = self.dropout(h)
-
-                y[output_nodes] = h.cpu()
-
-            x = y
-        return y
diff --git a/graph/R-GAT/igbh/tiny/models/main.py b/graph/R-GAT/igbh/tiny/models/main.py
deleted file mode 100644
index 4ab22eb75..000000000
--- a/graph/R-GAT/igbh/tiny/models/main.py
+++ /dev/null
@@ -1,79 +0,0 @@
-import argparse
-
-
-def main():
-    parser = argparse.ArgumentParser()
-
-    # Input/output paths
-    parser.add_argument('--path', type=str, default='/gnndataset/')
-    parser.add_argument('--modelpath', type=str, default='gcn_19.pt')
-
-    # Dataset selection
-    parser.add_argument(
-        '--dataset_size',
-        type=str,
-        default='experimental',
-        choices=[
-            'experimental',
-            'small',
-            'medium',
-            'large',
-            'full'])
-    parser.add_argument(
-        '--type_classes',
-        type=int,
-        default=19,
-        choices=[
-            19,
-            292,
-            2983])
-
-    # Hyperparameters
-    parser.add_argument('--hidden_channels', type=int, default=16)
-    parser.add_argument('--fan_out', type=str, default='5,10')
-    parser.add_argument('--num_layers', type=int, default=2)
-    parser.add_argument('--learning_rate', type=int, default=0.01)
-    parser.add_argument('--decay', type=int, default=0.001)
-    parser.add_argument('--num_workers', type=int, default=4)
-    parser.add_argument('--batch_size', type=int, default=2048 * 16)
-    parser.add_argument('--dropout', type=float, default=0.2)
-    parser.add_argument('--epochs', type=int, default=20)
-    parser.add_argument(
-        '--model_type',
-        type=str,
-        default='gcn',
-        choices=[
-            'gat',
-            'sage',
-            'gcn'])
-    parser.add_argument('--in_memory', type=int, default=0)
-    parser.add_argument('--synthetic', type=int, default=0)
-    parser.add_argument('--device', type=str, default='1')
-    args = parser.parse_args()
-
-    print("Dataset_size: " + args.dataset_size)
-    print("Model       : " + args.model)
-    print("Num_classes : " + str(args.num_classes))
-    print()
-
-    device = f'cuda:' + args.device if torch.cuda.is_available() else 'cpu'
-
-    dataset = IGL260M_DGL(args)
-    g = dataset[0]
-
-    best_test_acc, train_acc, test_acc = track_acc(g, args)
-
-    print(
-        f"Train accuracy: {np.mean(train_acc):.2f} \u00B1 {np.std(train_acc):.2f} \t Best: {np.max(train_acc) * 100:.4f}%")
-    print(
-        f"Test accuracy: {np.mean(test_acc):.2f} \u00B1 {np.std(test_acc):.2f} \t Best: {np.max(test_acc) * 100:.4f}%")
-    print()
-    print(" -------- For debugging --------- ")
-    print("Parameters: ", args)
-    print(g)
-    print("Train accuracy: ", train_acc)
-    print("Test accuracy: ", test_acc)
-
-
-if __name__ == '__main__':
-    main()
diff --git a/graph/R-GAT/igbh/tiny/models/utils.py b/graph/R-GAT/igbh/tiny/models/utils.py
deleted file mode 100644
index 5e9e1a25d..000000000
--- a/graph/R-GAT/igbh/tiny/models/utils.py
+++ /dev/null
@@ -1,224 +0,0 @@
-import numpy as np
-import torch
-
-
-class IGL260MDataset(object):
-    def __init__(self, root: str, size: str, in_memory: int,
-                 classes: int, synthetic: int):
-        self.dir = root
-        self.size = size
-        self.synthetic = synthetic
-        self.in_memory = in_memory
-        self.num_classes = classes
-        self.__meta__ = torch.load(osp.join(self.dir, self.size, 'meta.pt'))
-
-        self.num_features = self.__meta__['paper']['emb_dim']
-        self.num_nodes = self.__meta__['paper']['num_node']
-        self.num_edges = self.__meta__['cites']['num_edge']
-
-    @property
-    def paper_feat(self) -> np.ndarray:
-        if self.synthetic:
-            return np.random((self.num_nodes, self.num_edges))
-
-        path = osp.join(
-            self.dir,
-            self.size,
-            'processed',
-            'paper',
-            'node_feat.npy')
-        if self.in_memory:
-            return np.load(path)
-        else:
-            return np.load(path, mmap_mode='r')
-
-    @property
-    def paper_label(self) -> np.ndarray:
-        if self.num_classes == 19:
-            path = osp.join(
-                self.dir,
-                self.size,
-                'processed',
-                'paper',
-                'node_label_19.npy')
-        else:
-            path = osp.join(
-                self.dir,
-                self.size,
-                'processed',
-                'paper',
-                'node_label_2K.npy')
-        if self.in_memory:
-            return np.load(path)
-        else:
-            return np.load(path, mmap_mode='r')
-
-    @property
-    def paper_edge(self) -> np.ndarray:
-        path = osp.join(
-            self.dir,
-            self.size,
-            'processed',
-            'paper__cites__paper',
-            'edge_index.npy')
-        if self.in_memory:
-            return np.load(path)
-        else:
-            return np.load(path, mmap_mode='r')
-
-
-def compute_acc(pred, labels):
-    """
-    Compute the accuracy of prediction given the labels.
-    """
-    labels = labels.long()
-    return (torch.argmax(pred, dim=1) == labels).float().sum() / len(pred)
-
-
-def evaluate(model, g, inputs, labels, val_nid, batch_size, device):
-    """
-    Evaluate the model on the validation set specified by ``val_nid``.
-    g : The entire graph.
-    inputs : The features of all the nodes.
-    labels : The labels of all the nodes.
-    val_nid : the node Ids for validation.
-    batch_size : Number of nodes to compute at the same time.
-    device : The GPU device to evaluate on.
-    """
-    model.eval()
-    with torch.no_grad():
-        pred = model.inference(g, inputs, batch_size, device)
-    model.train()
-    return compute_acc(pred[val_nid], labels[val_nid])
-
-
-def load_subtensor(g, seeds, input_nodes, device):
-    """
-    Copys features and labels of a set of nodes onto GPU.
-    """
-    batch_inputs = g.ndata['features'][input_nodes].to(device)
-    batch_labels = g.ndata['labels'][seeds].to(device)
-    return batch_inputs, batch_labels
-
-
-def track_acc(g, args):
-    train_accuracy = []
-    test_accuracy = []
-    g.ndata['features'] = g.ndata['feat']
-    g.ndata['labels'] = g.ndata['label']
-    in_feats = g.ndata['features'].shape[1]
-    n_classes = args.num_classes
-
-    # Create csr/coo/csc formats before launching training processes with multi-gpu.
-    # This avoids creating certain formats in each sub-process, which saves
-    # momory and CPU.
-    g.create_formats_()
-
-    num_epochs = args.epochs
-    num_hidden = args.hidden_channels
-    num_layers = args.num_layers
-    fan_out = args.fan_out
-    batch_size = args.batch_size
-    lr = args.learning_rate
-    dropout = args.dropout
-    num_workers = args.num_workers
-
-    train_nid = torch.nonzero(g.ndata['train_mask'], as_tuple=True)[0]
-
-    # Create PyTorch DataLoader for constructing blocks
-    sampler = dgl.dataloading.MultiLayerNeighborSampler(
-        [int(fanout) for fanout in fan_out.split(',')])
-
-    dataloader = dgl.dataloading.NodeDataLoader(
-        g,
-        train_nid,
-        sampler,
-        batch_size=batch_size,
-        shuffle=True,
-        drop_last=False,
-        num_workers=num_workers)
-
-    # Define model and optimizer
-    if args.model_type == 'gcn':
-        model = GCN(in_feats, num_hidden, n_classes, 1, F.relu, dropout)
-    if args.model_type == 'sage':
-        model = SAGE(
-            in_feats,
-            num_hidden,
-            n_classes,
-            num_layers,
-            F.relu,
-            dropout,
-            'gcn')
-    if args.model_type == 'gat':
-        model = GAT(in_feats, num_hidden, n_classes, num_layers, 2, F.relu)
-
-    model = model.to(device)
-    loss_fcn = nn.CrossEntropyLoss()
-    loss_fcn = loss_fcn.to(device)
-    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=args.decay)
-
-    # Training loop
-    avg = 0
-    best_test_acc = 0
-    log_every = 1
-    training_start = time.time()
-    for epoch in (range(num_epochs)):
-        # Loop over the dataloader to sample the computation dependency graph as a list of
-        # blocks.
-        epoch_loss = 0
-        gpu_mem_alloc = 0
-        epoch_start = time.time()
-        for step, (input_nodes, seeds, blocks) in (enumerate(dataloader)):
-            # Load the input features as well as output labels
-            # batch_inputs, batch_labels = load_subtensor(g, seeds, input_nodes, device)
-            blocks = [block.int().to(device) for block in blocks]
-            batch_inputs = blocks[0].srcdata['features']
-            batch_labels = blocks[-1].dstdata['labels']
-
-            # Compute loss and prediction
-            batch_pred = model(blocks, batch_inputs)
-            loss = loss_fcn(batch_pred, batch_labels)
-            optimizer.zero_grad()
-            loss.backward()
-            optimizer.step()
-
-            epoch_loss += loss.detach()
-
-            gpu_mem_alloc += (
-                torch.cuda.max_memory_allocated() / 1000000
-                if torch.cuda.is_available()
-                else 0
-            )
-
-        train_g = g
-        train_nid = torch.nonzero(
-            train_g.ndata['train_mask'], as_tuple=True)[0]
-        train_acc = evaluate(
-            model, train_g, train_g.ndata['features'], train_g.ndata['labels'], train_nid, batch_size, device)
-
-        test_g = g
-        test_nid = torch.nonzero(
-            test_g.ndata['test_mask'], as_tuple=True)[0]
-        test_acc = evaluate(
-            model, test_g, test_g.ndata['features'], test_g.ndata['labels'], test_nid, batch_size, device)
-
-        if test_acc.item() > best_test_acc:
-            best_test_acc = test_acc.item()
-        tqdm.write(
-            "Epoch {:05d} | Loss {:.4f} | Train Acc {:.4f} | Test Acc {:.4f} | Time {:.2f}s | GPU {:.1f} MB".format(
-                epoch,
-                epoch_loss,
-                train_acc.item(),
-                test_acc.item(),
-                time.time() - epoch_start,
-                gpu_mem_alloc
-            )
-        )
-        test_accuracy.append(test_acc.item())
-        train_accuracy.append(train_acc.item())
-        torch.save(model.state_dict(), args.modelpath)
-    print()
-    print("Total time taken: ", time.time() - training_start)
-
-    return best_test_acc, train_accuracy, test_accuracy

From f4c1f9a847cf41df78e33ada79034863529f7c68 Mon Sep 17 00:00:00 2001
From: Pablo Gonzalez <pablo.gonzalez@factored.ai>
Date: Wed, 18 Dec 2024 12:31:23 -0500
Subject: [PATCH 048/112] Update docker GPU, avoid long build time (#1966)

Co-authored-by: Miro <mirhodak@amd.com>
---
 graph/R-GAT/README.md      | 5 +++--
 graph/R-GAT/dockerfile.gpu | 6 ++----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/graph/R-GAT/README.md b/graph/R-GAT/README.md
index 569233ac6..aecf7ffe9 100644
--- a/graph/R-GAT/README.md
+++ b/graph/R-GAT/README.md
@@ -181,9 +181,10 @@ docker build . -f dockerfile.gpu -t rgat-gpu
 ```
 Run docker container:
 ```bash
-docker run --rm -it -v $(pwd):/root --gpus all rgat-gpu
+docker run --rm -it -v $(pwd):/workspace/root --gpus all rgat-gpu
 ```
-Run benchmark inside the docker container:
+Go inside the root folder and run benchmark inside the docker container:
 ```bash
+cd root
 python3 main.py --dataset igbh-dgl --dataset-path igbh/ --profile rgat-dgl-full --device gpu [--model-path <path_to_ckpt>] [--in-memory] [--dtype <fp16 or fp32>] [--scenario <SingleStream, MultiStream, Server or Offline>]
 ```
diff --git a/graph/R-GAT/dockerfile.gpu b/graph/R-GAT/dockerfile.gpu
index fae65081f..f600028fe 100644
--- a/graph/R-GAT/dockerfile.gpu
+++ b/graph/R-GAT/dockerfile.gpu
@@ -26,6 +26,8 @@ RUN apt install -y --no-install-recommends rsync
 # Upgrade pip
 RUN python3 -m pip install --upgrade pip
 
+RUN pip install torch-geometric torch-scatter torch-sparse -f https://pytorch-geometric.com/whl/torch-2.1.0+cu121.html
+RUN pip install  dgl -f https://data.dgl.ai/wheels/torch-2.1/cu121/repo.html
 
 COPY requirements.txt requirements.txt
 RUN pip install -r requirements.txt
@@ -35,10 +37,6 @@ RUN cd /tmp && \
     pip install pybind11 && \
     CFLAGS="-std=c++14" python3 setup.py install
 
-RUN export TORCH_VERSION=$(python -c "import torch; print(torch.__version__)")
-RUN pip install torch-geometric torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-${TORCH_VERSION}.html
-RUN pip install  dgl -f https://data.dgl.ai/wheels/torch-2.1/cu121/repo.html
-
 # Clean up
 RUN rm -rf mlperf \
     rm requirements.txt
\ No newline at end of file

From ea5153f7920a6c463dc03c134b1eba620cc4e708 Mon Sep 17 00:00:00 2001
From: Pablo Gonzalez <pablo.gonzalez@factored.ai>
Date: Wed, 18 Dec 2024 13:15:33 -0500
Subject: [PATCH 049/112] Require equal issue mode for R-GAT (#1968)

* Require equal issue mode for R-GAT

* Add equal issue note in readme

---------

Co-authored-by: Miro <mirhodak@amd.com>
---
 graph/R-GAT/README.md                  | 2 ++
 loadgen/mlperf.conf                    | 3 +++
 tools/submission/submission_checker.py | 1 +
 3 files changed, 6 insertions(+)

diff --git a/graph/R-GAT/README.md b/graph/R-GAT/README.md
index aecf7ffe9..69883c0d1 100644
--- a/graph/R-GAT/README.md
+++ b/graph/R-GAT/README.md
@@ -188,3 +188,5 @@ Go inside the root folder and run benchmark inside the docker container:
 cd root
 python3 main.py --dataset igbh-dgl --dataset-path igbh/ --profile rgat-dgl-full --device gpu [--model-path <path_to_ckpt>] [--in-memory] [--dtype <fp16 or fp32>] [--scenario <SingleStream, MultiStream, Server or Offline>]
 ```
+
+**NOTE:** For official submissions, this benchmark is required to run in equal issue mode. Please make sure that the flag `rgat.*.sample_concatenate_permutation` is set to one in the [mlperf.conf](../../loadgen/mlperf.conf) file when loadgen is built.
diff --git a/loadgen/mlperf.conf b/loadgen/mlperf.conf
index 1fe202253..95cc08351 100644
--- a/loadgen/mlperf.conf
+++ b/loadgen/mlperf.conf
@@ -42,6 +42,9 @@ retinanet.MultiStream.target_latency = 528
 # 3D-UNet uses equal issue mode because it has non-uniform inputs
 3d-unet.*.sample_concatenate_permutation = 1
 
+# R-GAT uses equal issue mode because it may have non-uniform inputs
+rgat.*.sample_concatenate_permutation = 1
+
 # LLM benchmarks have non-uniform inputs and outputs, and use equal issue mode for all latency scenario
 gptj.*.sample_concatenate_permutation = 1
 llama2-70b.*.sample_concatenate_permutation = 1
diff --git a/tools/submission/submission_checker.py b/tools/submission/submission_checker.py
index 4a463f304..dcdad1180 100755
--- a/tools/submission/submission_checker.py
+++ b/tools/submission/submission_checker.py
@@ -957,6 +957,7 @@ def requires_equal_issue(self, model, division):
                 "llama2-70b-99.9",
                 "mixtral-8x7b",
                 "llama3-405b",
+                "rgat",
             ]
             and self.version not in ["v4.0", "v4.1"]
         )

From 281189a334cce8d7d31c26886bc825892712f0f3 Mon Sep 17 00:00:00 2001
From: mrmhodak <mrmhodak@users.noreply.github.com>
Date: Wed, 18 Dec 2024 18:15:47 +0000
Subject: [PATCH 050/112] Increment version to 5.0.3

---
 loadgen/VERSION.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loadgen/VERSION.txt b/loadgen/VERSION.txt
index a1ef0cae1..50e2274e6 100644
--- a/loadgen/VERSION.txt
+++ b/loadgen/VERSION.txt
@@ -1 +1 @@
-5.0.2
+5.0.3

From ab052330e58768f53984cc3c595537a5620525b5 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Wed, 18 Dec 2024 18:17:07 +0000
Subject: [PATCH 051/112] Docs update for r-gat (#1969)

* Fixes #1648, restrict loadgen uncommitted error message to within the loadgen directory

* Update test-rnnt.yml (#1688)

Stopping the github action for rnnt

* Added docs init

Added github action for website publish

Update benchmark documentation

Update publish.yaml

Update publish.yaml

Update benchmark documentation

Improved the submission documentation

Fix taskname

Removed unused images

* Fix benchmark URLs

* Fix links

* Add _full variation to run commands

* Added script flow diagram

* Added docker setup command for CM, extra run options

* Added support for docker options in the docs

* Added --quiet to the CM run_cmds in docs

* Fix the test query count for cm commands

* Support ctuning-cpp implementation

* Added commands for mobilenet models

* Docs cleanup

* Docs cleanup

* Added separate files for dataset and models in the docs

* Remove redundant tab in the docs

* Fixes some WIP models in the docs

* Use the official docs page for CM installation

* Fix the deadlink in docs

* Fix indendation issue in docs

* Added dockerinfo for nvidia implementation

* Added run options for gptj

* Added execution environment tabs

* Cleanup of the docs

* Cleanup of the docs

* Reordered the sections of the docs page

* Removed an unnecessary heading in the docs

* Fixes the commands for datacenter

* Fix the build --sdist for loadgen

* Fixes #1761, llama2 and mixtral runtime error on CPU systems

* Added mixtral to the benchmark list, improved benchmark docs

* Update docs for MLPerf inference v4.1

* Update docs for MLPerf inference v4.1

* Fix typo

* Gave direct link to implementation readmes

* Added tables detailing implementations

* Update vision README.md, split the frameworks into separate rows

* Update README.md

* pointed links to specific frameworks

* pointed links to specific frameworks

* Update Submission_Guidelines.md

* Update Submission_Guidelines.md

* Update Submission_Guidelines.md

* api support llama2

* Added request module and reduced max token len

* Fix for llama2 api server

* Update SUT_API offline to work for OpenAI

* Update SUT_API.py

* Minor fixes

* Fix json import in SUT_API.py

* Fix llama2 token length

* Added model name verification with server

* clean temp files

* support num_workers in LLAMA2 SUTs

* Remove batching from Offline SUT_API.py

* Update SUT_API.py

* Minor fixes for llama2 API

* Fix for llama2 API

* removed table of contents

* enabled llama2-nvidia + vllm-NM : WIP

* enabled dlrm for intel

* lower cased implementation

* added raw data input

* corrected data download commands

* renamed filename

* changes for bert and vllm

* documentation to work on custom repo and branch

* benchmark index page update

* enabled sdxl for nvidia and intel

* updated vllm server run cmd

* benchmark page information addition

* fix indendation issue

* Added submission categories

* update submission page - generate submission with or w/o using CM for benchmarking

* Updated kits dataset documentation

* Updated model parameters

* updation of information

* updated non cm based benchmark

* added info about hf password

* added links to model and access tokens

* Updated reference results structuree tree

* submission docs cleanup

* Some cleanups for benchmark info

* Some cleanups for benchmark info

* Some cleanups for benchmark info

* added generic stubs deepsparse

* Some cleanups for benchmark info

* Some cleanups for benchmark info

* Some cleanups for benchmark info

* Some cleanups for benchmark info (FID and CLIP data added)

* typo fix for bert deepsparse framework

* added min system requirements for models

* fixed code version

* changes for displaying reference and intel implementation tip

* added reference to installation page

* updated neural magic documentation

* Added links to the install page, redirect benchmarks page

* added tips about batch size and dataset for nvidia llama2

* fix conditions logic

* modified tips and additional run cmds

* sentence corrections

* Minor fix for the documentation

* fixed bug in deepsparse generic model stubs + styling

* added more information to stubs

* Added SCC24 readme, support reproducibility in the docs

* Made clear the custom CM repo URL format

* Support conditional implementation, setup and run tips

* Support rocm for sdxl

* Fix _short tag support

* Fix install URL

* Expose bfloat16 and float16 options for sdxl

* Expose download model to host option for sdxl

* IndySCC24 documentation added

* Improve the SCC24 docs

* Improve the support of short variation

* Improved the indyscc24 documentation

* Updated scc run commands

* removed test_query_count option for scc

* Remove scc24 in the main docs

* Remove scc24 in the main docs

* Fix docs: indendation issue on the submission page

* generalised code for skipping test query count

* Fixes for SCC24 docs

* Fix scenario text in main.py

* Fix links for scc24

* Fix links for scc24

* Improve the general docs

* Fix links for scc24

* Use float16 in scc24 doc

* Improve scc24 docs

* Improve scc24 docs

* Use float16 in scc24 doc

* fixed command bug

* Fix typo in docs

* Fix typo in docs

* Remove unnecessary indendation in docs

* initial commit for tip - native run CUDA

* Updated tip

* added docker_cm_repo_branch to more run option - docker

* Update docs for IndySCC24

* Support custom repo branch and owner for final report generation

* enabled amd implementation for llama2

* updations for amd - docs

* Fix scenarios in docs page

* formatted the files to pass the gh action

* scenarios -> fixed_scenarios in docs

* [Automated Commit] Format Codebase

* Update indyscc24-bert.md

* Update scc24.md

* updated tip for reference implementation (#1912)

* [Automated Commit] Format Codebase

* fix for run suffix (#1913)

* [Automated Commit] Format Codebase

* Updation for adding submission flow diagram

* Added submission flow diagram

* Update scc24.md

* changes in submission documentation (#1946)

* update results category (#1947)

* changes for adding rgat to docs (#1965)

* Update index.md | Added R-GAT details (WIP)

* Update index.md

* Create system_requirements.yml

* Update system_requirements.yml

* Update system_requirements.yml

* Update system_requirements.yml

---------

Co-authored-by: anandhu-eng <anandhukicks@gmail.com>
Co-authored-by: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com>
Co-authored-by: Michael Goin <michael@neuralmagic.com>
Co-authored-by: arjunsuresh <arjunsuresh@users.noreply.github.com>
Co-authored-by: Pablo Gonzalez <pablo.gonzalez@factored.ai>
Co-authored-by: Mitchelle Rasquinha <80070689+mrasquinha-g@users.noreply.github.com>
Co-authored-by: Miro <mirhodak@amd.com>
---
 docs/benchmarks/graph/get-rgat-data.md |  39 ++++++
 docs/benchmarks/graph/rgat.md          |  13 ++
 docs/index.md                          |  19 ++-
 docs/submission/index.md               | 160 +++++++++++++------------
 docs/system_requirements.yml           |  50 ++++++++
 main.py                                |   9 +-
 mkdocs.yml                             |   2 +
 7 files changed, 211 insertions(+), 81 deletions(-)
 create mode 100644 docs/benchmarks/graph/get-rgat-data.md
 create mode 100644 docs/benchmarks/graph/rgat.md
 create mode 100644 docs/system_requirements.yml

diff --git a/docs/benchmarks/graph/get-rgat-data.md b/docs/benchmarks/graph/get-rgat-data.md
new file mode 100644
index 000000000..189c25b87
--- /dev/null
+++ b/docs/benchmarks/graph/get-rgat-data.md
@@ -0,0 +1,39 @@
+---
+hide:
+  - toc
+---
+
+# Graph Neural Network using R-GAT 
+
+## Dataset
+
+The benchmark implementation run command will automatically download the validation and calibration datasets and do the necessary preprocessing. In case you want to download only the datasets, you can use the below commands.
+
+=== "Full Dataset"
+    R-GAT validation run uses the IGBH dataset consisting of 547,306,935 nodes and 5,812,005,639 edges.
+
+    ### Get Full Dataset
+    ```
+    cm run script --tags=get,dataset,igbh,_full -j
+    ```
+
+=== "Debug Dataset"
+    R-GAT debug run uses the IGBH debug dataset(tiny).
+
+    ### Get Full Dataset
+    ```
+    cm run script --tags=get,dataset,igbh,_debug -j
+    ```
+
+## Model
+The benchmark implementation run command will automatically download the required model and do the necessary conversions. In case you want to only download the official model, you can use the below commands.
+
+Get the Official MLPerf R-GAT Model
+
+=== "PyTorch"
+
+    ### PyTorch
+    ```
+    cm run script --tags=get,ml-model,rgat -j
+    ```
+
diff --git a/docs/benchmarks/graph/rgat.md b/docs/benchmarks/graph/rgat.md
new file mode 100644
index 000000000..ffff467a4
--- /dev/null
+++ b/docs/benchmarks/graph/rgat.md
@@ -0,0 +1,13 @@
+---
+hide:
+  - toc
+---
+
+
+# Graph Neural Network using R-GAT 
+
+
+=== "MLCommons-Python"
+    ## MLPerf Reference Implementation in Python
+    
+{{ mlperf_inference_implementation_readme (4, "rgat", "reference", devices = ["CPU", "CUDA"]) }}
\ No newline at end of file
diff --git a/docs/index.md b/docs/index.md
index 11f2a52c2..b46d4c274 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -1,7 +1,7 @@
 # MLPerf Inference Benchmarks
 
 ## Overview
-The currently valid [MLPerf Inference Benchmarks](index_gh.md) as of MLPerf inference v4.0 round are listed below, categorized by tasks. Under each model you can find its details like the dataset used, reference accuracy, server latency constraints etc.
+The currently valid [MLPerf Inference Benchmarks](index_gh.md) as of MLPerf inference v5.0 round are listed below, categorized by tasks. Under each model you can find its details like the dataset used, reference accuracy, server latency constraints etc.
 
 ---
 
@@ -80,7 +80,7 @@ The currently valid [MLPerf Inference Benchmarks](index_gh.md) as of MLPerf infe
 - **Server Scenario Latency Constraint**: 130ms
 - **Equal Issue mode**: False
 - **High accuracy variant**: yes
-- **Submission Category**: Datacenter, Edge
+- **Submission Category**: Edge
 
 #### [LLAMA2-70B](benchmarks/language/llama2-70b.md)
 - **Dataset**: OpenORCA (GPT-4 split, max_seq_len=1024)
@@ -157,11 +157,22 @@ The currently valid [MLPerf Inference Benchmarks](index_gh.md) as of MLPerf infe
 - **High accuracy variant**: Yes
 - **Submission Category**: Datacenter
 
+## Graph Neural Networks
+### [R-GAT](benchmarks/graph/rgat.md)
+- **Dataset**: Illinois Graph Benchmark Heterogeneous validation dataset
+    - **Dataset Size**: 788,379
+    - **QSL Size**: 788,379
+- **Number of Parameters**: 
+- **Reference Model Accuracy**: ACC = ?
+- **Server Scenario Latency Constraint**: N/A
+- **Equal Issue mode**: True
+- **High accuracy variant**: No
+- **Submission Category**: Datacenter
 ---
 
 ## Submission Categories
-- **Datacenter Category**: All the current inference benchmarks are applicable to the datacenter category.
-- **Edge Category**: All benchmarks except DLRMv2, LLAMA2-70B, and Mixtral-8x7B are applicable to the edge category.
+- **Datacenter Category**: All benchmarks except bert are applicable to the datacenter category for inference v5.0.
+- **Edge Category**: All benchmarks except DLRMv2, LLAMA2-70B, Mixtral-8x7B and R-GAT are applicable to the edge category for v5.0.
 
 ## High Accuracy Variants
 - **Benchmarks**: `bert`, `llama2-70b`, `gpt-j`,  `dlrm_v2`, and `3d-unet` have a normal accuracy variant as well as a high accuracy variant.
diff --git a/docs/submission/index.md b/docs/submission/index.md
index c99802420..1050f5fb0 100644
--- a/docs/submission/index.md
+++ b/docs/submission/index.md
@@ -13,13 +13,15 @@ hide:
 
 Click [here](https://youtu.be/eI1Hoecc3ho) to view the recording of the workshop: Streamlining your MLPerf Inference results using CM.
 
-=== "CM based benchmark"
+Click [here](https://docs.google.com/presentation/d/1cmbpZUpVr78EIrhzyMBnnWnjJrD-mZ2vmSb-yETkTA8/edit?usp=sharing) to view the prposal slide for Common Automation for MLPerf Inference Submission Generation through CM.
+
+=== "CM based results"
     If you have followed the `cm run` commands under the individual model pages in the [benchmarks](../index.md) directory, all the valid results will get aggregated to the `cm cache` folder. The following command could be used to browse the structure of inference results folder generated by CM.
     ### Get results folder structure
     ```bash
     cm find cache --tags=get,mlperf,inference,results,dir | xargs tree
     ```
-=== "Non CM based benchmark"
+=== "Non CM based results"
     If you have not followed the `cm run` commands under the individual model pages in the [benchmarks](../index.md) directory, please make sure that the result directory is structured in the following way. 
     ```
     └── System description ID(SUT Name)
@@ -35,18 +37,20 @@ Click [here](https://youtu.be/eI1Hoecc3ho) to view the recording of the workshop
                 |   ├── mlperf_log_detail.txt
                 |   ├── mlperf_log_accuracy.json
                 |   └── accuracy.txt
-                └── Compliance_Test_ID
-                    ├── Performance
-                    |   └── run_x/#1 run for all scenarios
-                    |       ├── mlperf_log_summary.txt
-                    |       └── mlperf_log_detail.txt
-                    ├── Accuracy
-                    |   ├── baseline_accuracy.txt
-                    |   ├── compliance_accuracy.txt
-                    |   ├── mlperf_log_accuracy.json
-                    |   └── accuracy.txt
-                    ├── verify_performance.txt
-                    └── verify_accuracy.txt #for TEST01 only
+                |── Compliance_Test_ID
+                |   ├── Performance
+                |   |   └── run_x/#1 run for all scenarios
+                |   |       ├── mlperf_log_summary.txt
+                |   |       └── mlperf_log_detail.txt
+                |   ├── Accuracy
+                |   |   ├── baseline_accuracy.txt
+                |   |   ├── compliance_accuracy.txt
+                |   |   ├── mlperf_log_accuracy.json
+                |   |   └── accuracy.txt
+                |   ├── verify_performance.txt
+                |   └── verify_accuracy.txt #for TEST01 only
+                |── user.conf
+                └── measurements.json
     ```
     
     <details>
@@ -67,67 +71,69 @@ Once all the results across all the models are ready you can use the following c
 
 ## Generate actual submission tree
 
-=== "Closed Edge"
-    ### Closed Edge Submission
-    ```bash
-    cm run script --tags=generate,inference,submission \
-       --clean \
-       --preprocess_submission=yes \
-       --run-checker \
-       --submitter=MLCommons \
-       --tar=yes \
-       --env.CM_TAR_OUTFILE=submission.tar.gz \
-       --division=closed \
-       --category=edge \
-       --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes \
-       --quiet
-    ```
-
-=== "Closed Datacenter"
-    ### Closed Datacenter Submission
-    ```bash
-    cm run script --tags=generate,inference,submission \
-       --clean \
-       --preprocess_submission=yes \
-       --run-checker \
-       --submitter=MLCommons \
-       --tar=yes \
-       --env.CM_TAR_OUTFILE=submission.tar.gz \
-       --division=closed \
-       --category=datacenter \
-       --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes \
-       --quiet
-    ```
-=== "Open Edge"
-    ### Open Edge Submission
-    ```bash
-    cm run script --tags=generate,inference,submission \
-       --clean \
-       --preprocess_submission=yes \
-       --run-checker \
-       --submitter=MLCommons \
-       --tar=yes \
-       --env.CM_TAR_OUTFILE=submission.tar.gz \
-       --division=open \
-       --category=edge \
-       --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes \
-       --quiet
-    ```
-=== "Open Datacenter"
-    ### Closed Datacenter Submission
-    ```bash
-    cm run script --tags=generate,inference,submission \
-       --clean \
-       --preprocess_submission=yes \
-       --run-checker \
-       --submitter=MLCommons \
-       --tar=yes \
-       --env.CM_TAR_OUTFILE=submission.tar.gz \
-       --division=open \
-       --category=datacenter \
-       --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes \
-       --quiet
-    ```
+=== "Docker run"
+    ### Docker run
+    === "Closed"
+        ### Closed Submission
+        ```bash
+        cm docker script --tags=generate,inference,submission \
+            --clean \
+            --preprocess_submission=yes \
+            --run-checker \
+            --submitter=MLCommons \
+            --tar=yes \
+            --env.CM_TAR_OUTFILE=submission.tar.gz \
+            --division=closed \
+            --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes \
+            --quiet
+        ```
+
+    === "Open"
+        ### Open Submission
+        ```bash
+        cm docker script --tags=generate,inference,submission \
+            --clean \
+            --preprocess_submission=yes \
+            --run-checker \
+            --submitter=MLCommons \
+            --tar=yes \
+            --env.CM_TAR_OUTFILE=submission.tar.gz \
+            --division=open \
+            --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes \
+            --quiet
+        ```
+
+=== "Native run"
+    ### Native run
+    === "Closed"
+        ### Closed Submission
+        ```bash
+        cm run script --tags=generate,inference,submission \
+            --clean \
+            --preprocess_submission=yes \
+            --run-checker \
+            --submitter=MLCommons \
+            --tar=yes \
+            --env.CM_TAR_OUTFILE=submission.tar.gz \
+            --division=closed \
+            --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes \
+            --quiet
+        ```
+
+    === "Open"
+        ### Open Submission
+        ```bash
+        cm run script --tags=generate,inference,submission \
+            --clean \
+            --preprocess_submission=yes \
+            --run-checker \
+            --submitter=MLCommons \
+            --tar=yes \
+            --env.CM_TAR_OUTFILE=submission.tar.gz \
+            --division=open \
+            --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes \
+            --quiet
+        ```
 
 * Use `--hw_name="My system name"` to give a meaningful system name. Examples can be seen [here](https://github.com/mlcommons/inference_results_v3.0/tree/main/open/cTuning/systems)
 
@@ -137,6 +143,10 @@ Once all the results across all the models are ready you can use the following c
 
 * Use `--results_dir` option to specify the results folder for Non CM based benchmarks
 
+* Use `--category` option to specify the category for which submission is generated(datacenter/edge). By default, the category is taken from `system_meta.json` file located in the SUT root directory.
+
+* Use `--submission_base_dir` to specify the directory to which outputs from preprocess submission script and final submission is to be dumped. No need to provide `--submission_dir` along with this. For `docker run`, use `--submission_base_dir` instead of `--submission_dir`.
+
 The above command should generate "submission.tar.gz" if there are no submission checker issues and you can upload it to the [MLCommons Submission UI](https://submissions-ui.mlcommons.org/submission).
 
 ## Aggregate Results in GitHub
diff --git a/docs/system_requirements.yml b/docs/system_requirements.yml
new file mode 100644
index 000000000..5dfec202a
--- /dev/null
+++ b/docs/system_requirements.yml
@@ -0,0 +1,50 @@
+# All memory requirements in GB
+resnet:
+  reference:
+    fp32:
+      system_memory: 8
+      accelerator_memory: 4
+      disk_storage: 25
+  nvidia:
+    int8:
+      system_memory: 8
+      accelerator_memory: 4
+      disk_storage: 100
+  intel:
+    int8:
+      system_memory: 8
+      accelerator_memory: 0
+      disk_storage: 50
+  qualcomm:
+    int8:
+      system_memory: 8
+      accelerator_memory: 8
+      disk_storage: 50
+retinanet:
+  reference:
+    fp32:
+      system_memory: 8
+      accelerator_memory: 8
+      disk_storage: 200
+  nvidia:
+    int8:
+      system_memory: 8
+      accelerator_memory: 8
+      disk_storage: 200
+  intel:
+    int8:
+      system_memory: 8
+      accelerator_memory: 0
+      disk_storage: 200
+  qualcomm:
+    int8:
+      system_memory: 8
+      accelerator_memory: 8
+      disk_storage: 200
+rgat:
+  reference:
+    fp32:
+      system_memory: 768
+      accelerator_memory: 8
+      disk_storage: 2300
+
diff --git a/main.py b/main.py
index c8c64b8c3..c5b22a705 100755
--- a/main.py
+++ b/main.py
@@ -239,7 +239,8 @@ def mlperf_inference_implementation_readme(
 
                             common_info = get_common_info(
                                 spaces + 16,
-                                implementation
+                                implementation,
+                                model.lower()
                             )
 
                             if (
@@ -488,7 +489,7 @@ def get_venv_command(spaces):
 
     # contains run command information which is common to both docker and
     # native runs
-    def get_common_info(spaces, implementation):
+    def get_common_info(spaces, implementation, model):
         info = ""
         pre_space = ""
         for i in range(1, spaces):
@@ -496,7 +497,11 @@ def get_common_info(spaces, implementation):
         pre_space += " "
         # pre_space = "                "
         info += f"\n{pre_space}!!! tip\n\n"
+        info += f"{pre_space}    - Number of threads could be adjusted using `--threads=#`, where `#` is the desired number of threads. This option works only if the implementation in use supports threading.\n\n"
         info += f"{pre_space}    - Batch size could be adjusted using `--batch_size=#`, where `#` is the desired batch size. This option works only if the implementation in use is supporting the given batch size.\n\n"
+        if model == "rgat":
+            info += f"{pre_space}    - Add `--env.CM_DATASET_IGBH_PATH=<Path to IGBH dataset>` if you have already downloaded the dataset. The path will be automatically mounted when using docker run.\n\n"
+            info += f"{pre_space}    - Add `--env.CM_ML_MODEL_RGAT_CHECKPOINT_PATH=<Path to R-GAT model checkpoint>` if you have already downloaded the model. The path will be automatically mounted when using docker run.\n\n"  
         if implementation.lower() == "reference":
             info += f"{pre_space}    - Add `--adr.mlperf-implementation.tags=_branch.master,_repo.<CUSTOM_INFERENCE_REPO_LINK>` if you are modifying the official MLPerf Inference implementation in a custom fork.\n\n"
             info += f"{pre_space}    - Add `--adr.inference-src.tags=_repo.<CUSTOM_INFERENCE_REPO_LINK>` if you are modifying the model config accuracy script in the submission checker within a custom fork.\n\n"
diff --git a/mkdocs.yml b/mkdocs.yml
index 95dfb6e86..96bcfb758 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -42,6 +42,8 @@ nav:
       - MIXTRAL-8x7B: benchmarks/language/mixtral-8x7b.md
     - Recommendation:
       - DLRM-v2: benchmarks/recommendation/dlrm-v2.md
+    - Graph Neural Networks:
+      - R-GAT: benchmarks/graph/rgat.md
   - Install CM:
     - install/index.md
   - Submission:

From a65114b163cb1d610af1de9e4ed0e95be7cb6223 Mon Sep 17 00:00:00 2001
From: mlcommons-bot <mlcommons-bot@users.noreply.github.com>
Date: Wed, 18 Dec 2024 18:17:31 +0000
Subject: [PATCH 052/112] [Automated Commit] Format Codebase

---
 main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/main.py b/main.py
index c5b22a705..6a34587dd 100755
--- a/main.py
+++ b/main.py
@@ -501,7 +501,7 @@ def get_common_info(spaces, implementation, model):
         info += f"{pre_space}    - Batch size could be adjusted using `--batch_size=#`, where `#` is the desired batch size. This option works only if the implementation in use is supporting the given batch size.\n\n"
         if model == "rgat":
             info += f"{pre_space}    - Add `--env.CM_DATASET_IGBH_PATH=<Path to IGBH dataset>` if you have already downloaded the dataset. The path will be automatically mounted when using docker run.\n\n"
-            info += f"{pre_space}    - Add `--env.CM_ML_MODEL_RGAT_CHECKPOINT_PATH=<Path to R-GAT model checkpoint>` if you have already downloaded the model. The path will be automatically mounted when using docker run.\n\n"  
+            info += f"{pre_space}    - Add `--env.CM_ML_MODEL_RGAT_CHECKPOINT_PATH=<Path to R-GAT model checkpoint>` if you have already downloaded the model. The path will be automatically mounted when using docker run.\n\n"
         if implementation.lower() == "reference":
             info += f"{pre_space}    - Add `--adr.mlperf-implementation.tags=_branch.master,_repo.<CUSTOM_INFERENCE_REPO_LINK>` if you are modifying the official MLPerf Inference implementation in a custom fork.\n\n"
             info += f"{pre_space}    - Add `--adr.inference-src.tags=_repo.<CUSTOM_INFERENCE_REPO_LINK>` if you are modifying the model config accuracy script in the submission checker within a custom fork.\n\n"

From 99a2015b597e2a5f031e4f0a420184bddbc55815 Mon Sep 17 00:00:00 2001
From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com>
Date: Wed, 18 Dec 2024 23:48:11 +0530
Subject: [PATCH 053/112] Update automated run command section - R-GAT (#1970)

* Update automated run command section

* add cm commands for model and dataset downloads

* Update README.md

* Update cm run cmds

---------

Co-authored-by: Miro <mirhodak@amd.com>
---
 graph/R-GAT/README.md | 53 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 52 insertions(+), 1 deletion(-)

diff --git a/graph/R-GAT/README.md b/graph/R-GAT/README.md
index 69883c0d1..fbfca4709 100644
--- a/graph/R-GAT/README.md
+++ b/graph/R-GAT/README.md
@@ -19,7 +19,7 @@ This is the reference implementation for MLPerf Inference Graph Neural Network.
 
 ## Automated command to run the benchmark via MLCommons CM
 
-TODO
+Please check the official inference documentation [here](https://docs.mlcommons.org/inference/benchmarks/graph/rgat/)
  
 ## Setup
 Set the following helper variables
@@ -95,6 +95,12 @@ You can then navigate in the terminal to your desired download directory and run
 rclone copy mlc-inference:mlcommons-inference-wg-public/R-GAT/RGAT.pt $MODEL_PATH -P
 ```
 
+### Download model through CM (Collective Minds)
+
+```
+cm run script --tags=get,ml-model,rgat -j
+```
+
 ### Download and setup dataset
 #### Debug Dataset
 
@@ -110,6 +116,10 @@ cd $GRAPH_FOLDER
 python3 tools/split_seeds.py --path igbh --dataset_size tiny
 ```
 
+**CM Command**
+```
+cm run script --tags=get,dataset,igbh,_debug -j
+```
 
 #### Full Dataset
 **Warning:** This script will download 2.2TB of data 
@@ -124,6 +134,11 @@ cd $GRAPH_FOLDER
 python3 tools/split_seeds.py --path igbh --dataset_size full
 ```
 
+**CM Command**
+```
+cm run script --tags=get,dataset,igbh,_full -j
+```
+
 
 #### Calibration dataset
 
@@ -140,6 +155,21 @@ cd $GRAPH_FOLDER
 python3 main.py --dataset igbh-dgl-tiny --dataset-path igbh/ --profile debug-dgl [--model-path <path_to_ckpt>] [--in-memory] [--device <cpu or gpu>] [--dtype <fp16 or fp32>] [--scenario <SingleStream, MultiStream, Server or Offline>]
 ```
 
+##### Debug Run using CM
+```
+cm run script --tags=run-mlperf,inference,_submission,_short,_r5.0-dev \
+   --model=rgat \
+   --implementation=reference \
+   --framework=pytorch \
+   --category=edge \
+   --scenario=Offline \
+   --execution_mode=test \
+   --device=<cpu or cuda> \
+   --quiet \
+   --test_query_count=10 \
+   --docker
+```
+
 #### Local run
 ```bash
 # Go to the benchmark folder
@@ -148,6 +178,27 @@ cd $GRAPH_FOLDER
 # Run the benchmark DGL
 python3 main.py --dataset igbh-dgl --dataset-path igbh/ --profile rgat-dgl-full [--model-path <path_to_ckpt>] [--in-memory] [--device <cpu or gpu>] [--dtype <fp16 or fp32>] [--scenario <SingleStream, MultiStream, Server or Offline>]
 ```
+
+##### Local Run using CM
+```
+cm run script --tags=run-mlperf,inference,_submission,_full,_r5.0-dev \
+   --model=rgat \
+   --implementation=reference \
+   --framework=pytorch \
+   --category=edge \
+   --scenario=Offline \
+   --execution_mode=test \
+   --device=<>cpu or cuda> \
+   --quiet \
+   --test_query_count=10 \
+   --docker
+```
+
+- Number of threads could be adjusted using `--threads=#`, where # is the desired number of threads. This option works only if the implementation in use supports threading.
+- Batch size could be adjusted using `--batch_size=#`, where # is the desired batch size. This option works only if the implementation in use is supporting the given batch size.
+- Add `--env.CM_DATASET_IGBH_PATH=<Path to IGBH dataset>` if you have already downloaded the dataset. The path will be automatically mounted when using docker run.
+- Add `--env.CM_ML_MODEL_RGAT_CHECKPOINT_PATH=<Path to R-GAT model checkpoint>` if you have already downloaded the model. The path will be automatically mounted when using docker run.
+
 #### Run using docker
 
 Not implemented yet

From aeb415eceb29f18157bd9cc09ad4f3f4b5d0d4d8 Mon Sep 17 00:00:00 2001
From: Pablo Gonzalez <pablo.gonzalez@factored.ai>
Date: Thu, 19 Dec 2024 01:36:42 -0500
Subject: [PATCH 054/112] Unify llama3 names to llama3.1-405b (#1982)

* Unify llama3 names to llama3.1-405b

* Set mlperf.conf name to llama3_1-405b
---
 .../{llama3-405b => llama3.1-405b}/Dockerfile |  2 +-
 .../{llama3-405b => llama3.1-405b}/README.md  | 20 +++++------
 .../SUT_VLLM.py                               |  0
 .../{llama3-405b => llama3.1-405b}/build.sh   |  0
 .../{llama3-405b => llama3.1-405b}/dataset.py |  0
 .../evaluate-accuracy.py                      |  2 +-
 .../launch_docker.sh                          |  0
 .../{llama3-405b => llama3.1-405b}/main.py    |  4 +--
 .../requirements.txt                          |  0
 .../run_accuracy.sh                           |  0
 .../run_offline.sh                            |  0
 .../run_server.sh                             |  0
 .../{llama3-405b => llama3.1-405b}/user.conf  |  2 +-
 .../with_the_same_user                        |  0
 loadgen/mlperf.conf                           | 16 ++++-----
 tools/submission/generate_final_report.py     |  2 +-
 tools/submission/submission_checker.py        | 35 ++++++++++---------
 17 files changed, 42 insertions(+), 41 deletions(-)
 rename language/{llama3-405b => llama3.1-405b}/Dockerfile (97%)
 rename language/{llama3-405b => llama3.1-405b}/README.md (87%)
 rename language/{llama3-405b => llama3.1-405b}/SUT_VLLM.py (100%)
 rename language/{llama3-405b => llama3.1-405b}/build.sh (100%)
 rename language/{llama3-405b => llama3.1-405b}/dataset.py (100%)
 rename language/{llama3-405b => llama3.1-405b}/evaluate-accuracy.py (98%)
 rename language/{llama3-405b => llama3.1-405b}/launch_docker.sh (100%)
 rename language/{llama3-405b => llama3.1-405b}/main.py (97%)
 rename language/{llama3-405b => llama3.1-405b}/requirements.txt (100%)
 rename language/{llama3-405b => llama3.1-405b}/run_accuracy.sh (100%)
 rename language/{llama3-405b => llama3.1-405b}/run_offline.sh (100%)
 rename language/{llama3-405b => llama3.1-405b}/run_server.sh (100%)
 rename language/{llama3-405b => llama3.1-405b}/user.conf (87%)
 rename language/{llama3-405b => llama3.1-405b}/with_the_same_user (100%)

diff --git a/language/llama3-405b/Dockerfile b/language/llama3.1-405b/Dockerfile
similarity index 97%
rename from language/llama3-405b/Dockerfile
rename to language/llama3.1-405b/Dockerfile
index 67edcc46b..14d0a202d 100644
--- a/language/llama3-405b/Dockerfile
+++ b/language/llama3.1-405b/Dockerfile
@@ -44,7 +44,7 @@ WORKDIR /tmp
 RUN wget https://repo.anaconda.com/miniconda/Miniconda3-py310_23.5.2-0-Linux-x86_64.sh \
     && bash Miniconda3-* -b -p /opt/miniconda3
 ENV PATH="$PATH:/opt/miniconda3/bin"
-RUN conda create -n llama3-405b python=3.10
+RUN conda create -n llama3.1-405b python=3.10
 RUN chmod -R 777 /opt/miniconda3
 
 # Set the env variable for vLLM
diff --git a/language/llama3-405b/README.md b/language/llama3.1-405b/README.md
similarity index 87%
rename from language/llama3-405b/README.md
rename to language/llama3.1-405b/README.md
index 8df2a81f1..d1dd5ad4f 100644
--- a/language/llama3-405b/README.md
+++ b/language/llama3.1-405b/README.md
@@ -1,13 +1,13 @@
-# Reference Implementation for llama3-405b
+# Reference Implementation for llama3.1-405b
 
-**Basic implementation for llama3-405b. Few noteworthy items:**
+**Basic implementation for llama3.1-405b. Few noteworthy items:**
 
 + Streamer for communicating with loadgen has quite some overhead. This is only meant to provide functional implementation
 + For custom/optimized implementations of this benchmark it is important to include the :
         - For server scenario, it is necessary to call `lg.FirstTokenComplete(response)` for each query. This way the first token will be reported and it's latency will be measured.
         - For all scenarios, when calling `lg.QuerySamplesComplete(response)`, it is necessary that each of the elements in response is a `lg.QuerySampleResponse` that contains the number of tokens (can be create this way: `lg.QuerySampleResponse(qitem.id, bi[0], bi[1], n_tokens)`). The number of tokens reported should match with the number of tokens on your answer and this will be checked in [TEST06](../../compliance/nvidia/TEST06/)
 
-Please see the [new docs site](https://docs.mlcommons.org/inference/benchmarks/language/llama3-405b) for an automated way to run this benchmark across different available implementations and do an end-to-end submission with or without docker.
+Please see the [new docs site](https://docs.mlcommons.org/inference/benchmarks/language/llama3.1-405b) for an automated way to run this benchmark across different available implementations and do an end-to-end submission with or without docker.
 
 
 ## Prepare environment
@@ -33,9 +33,9 @@ rm ~/miniconda3/miniconda.sh
 - Set the following helper variables
 ```bash
 export ROOT=$PWD/inference
-export LLAMA_FOLDER=$PWD/inference/language/llama3-405b
+export LLAMA_FOLDER=$PWD/inference/language/llama3.1-405b
 export LOADGEN_FOLDER=$PWD/inference/loadgen
-export DATASET_FOLDER=$PWD/inference/language/llama3-405b/dataset
+export DATASET_FOLDER=$PWD/inference/language/llama3.1-405b/dataset
 ```
 
 - Clone the inference repository:
@@ -46,8 +46,8 @@ git clone --recurse-submodules https://github.com/mlcommons/inference.git \
 
 - Create a conda environment:
 ```bash
-conda create -y -n llama3-405b python=3.10
-conda activate llama3-405b
+conda create -y -n llama3.1-405b python=3.10
+conda activate llama3.1-405b
 conda install -y -c conda-forge libstdcxx-ng=12
 ```
 
@@ -100,7 +100,7 @@ TODO: Host model and grant access to submitters
 
 
 ### External Download
-+ First go to [llama3-request-link](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) and make a request, sign in to HuggingFace (if you don't have account, you'll need to create one). **Please note your authentication credentials** as you may be required to provide them when cloning below.
++ First go to [llama3.1-request-link](https://ai.meta.com/resources/models-and-libraries/llama-downloads/) and make a request, sign in to HuggingFace (if you don't have account, you'll need to create one). **Please note your authentication credentials** as you may be required to provide them when cloning below.
 + Requires Git Large Files Storage
 ```
 export CHECKPOINT_PATH=Meta-Llama-3.1-405B-Instruct
@@ -127,13 +127,13 @@ rclone config create mlc-inference s3 provider=Cloudflare access_key_id=f65ba5ee
 You can then navigate in the terminal to your desired download directory and run the following command to download the dataset:
 
 ```
-rclone copy mlc-inference:mlcommons-inference-wg-public/llama3_405b/mlperf_llama3.1_405b_dataset_8313_processed_fp16_eval.pkl ./ -P
+rclone copy mlc-inference:mlcommons-inference-wg-public/llama3.1_405b/mlperf_llama3.1_405b_dataset_8313_processed_fp16_eval.pkl ./ -P
 ```
 
 You can also download the calibration dataset from the Cloudflare R2 bucket by running the following command:
 
 ```
-rclone copy mlc-inference:mlcommons-inference-wg-public/llama3_405b/mlperf_llama3.1_405b_calibration_dataset_512_processed_fp16_eval.pkl ./ -P
+rclone copy mlc-inference:mlcommons-inference-wg-public/llama3.1_405b/mlperf_llama3.1_405b_calibration_dataset_512_processed_fp16_eval.pkl ./ -P
 ```
 
 ## Run Performance Benchmarks
diff --git a/language/llama3-405b/SUT_VLLM.py b/language/llama3.1-405b/SUT_VLLM.py
similarity index 100%
rename from language/llama3-405b/SUT_VLLM.py
rename to language/llama3.1-405b/SUT_VLLM.py
diff --git a/language/llama3-405b/build.sh b/language/llama3.1-405b/build.sh
similarity index 100%
rename from language/llama3-405b/build.sh
rename to language/llama3.1-405b/build.sh
diff --git a/language/llama3-405b/dataset.py b/language/llama3.1-405b/dataset.py
similarity index 100%
rename from language/llama3-405b/dataset.py
rename to language/llama3.1-405b/dataset.py
diff --git a/language/llama3-405b/evaluate-accuracy.py b/language/llama3.1-405b/evaluate-accuracy.py
similarity index 98%
rename from language/llama3-405b/evaluate-accuracy.py
rename to language/llama3.1-405b/evaluate-accuracy.py
index f5677820e..7c803e1ca 100644
--- a/language/llama3-405b/evaluate-accuracy.py
+++ b/language/llama3.1-405b/evaluate-accuracy.py
@@ -15,7 +15,7 @@ def get_args():
     parser.add_argument(
         "--checkpoint-path",
         default="meta-llama/Meta-Llama-3-8B",
-        help="Path to Llama3-405b-hf-chat checkpoint"
+        help="Path to Llama3.1-405b-hf-chat checkpoint"
     )
     parser.add_argument(
         "--mlperf-accuracy-file", required=True, help="path to mlperf_log_accuracy.json"
diff --git a/language/llama3-405b/launch_docker.sh b/language/llama3.1-405b/launch_docker.sh
similarity index 100%
rename from language/llama3-405b/launch_docker.sh
rename to language/llama3.1-405b/launch_docker.sh
diff --git a/language/llama3-405b/main.py b/language/llama3.1-405b/main.py
similarity index 97%
rename from language/llama3-405b/main.py
rename to language/llama3.1-405b/main.py
index f7802687e..32f80060c 100644
--- a/language/llama3-405b/main.py
+++ b/language/llama3.1-405b/main.py
@@ -136,8 +136,8 @@ def main():
     settings = lg.TestSettings()
     settings.scenario = scenario_map[args.scenario.lower()]
     # mlperf.conf is automatically loaded by the loadgen
-    # settings.FromConfig(args.mlperf_conf, "llama3-405b", args.scenario)
-    settings.FromConfig(args.user_conf, "llama3-405b", args.scenario)
+    # settings.FromConfig(args.mlperf_conf, "llama3_1-405b", args.scenario)
+    settings.FromConfig(args.user_conf, "llama3_1-405b", args.scenario)
 
     if args.accuracy:
         settings.mode = lg.TestMode.AccuracyOnly
diff --git a/language/llama3-405b/requirements.txt b/language/llama3.1-405b/requirements.txt
similarity index 100%
rename from language/llama3-405b/requirements.txt
rename to language/llama3.1-405b/requirements.txt
diff --git a/language/llama3-405b/run_accuracy.sh b/language/llama3.1-405b/run_accuracy.sh
similarity index 100%
rename from language/llama3-405b/run_accuracy.sh
rename to language/llama3.1-405b/run_accuracy.sh
diff --git a/language/llama3-405b/run_offline.sh b/language/llama3.1-405b/run_offline.sh
similarity index 100%
rename from language/llama3-405b/run_offline.sh
rename to language/llama3.1-405b/run_offline.sh
diff --git a/language/llama3-405b/run_server.sh b/language/llama3.1-405b/run_server.sh
similarity index 100%
rename from language/llama3-405b/run_server.sh
rename to language/llama3.1-405b/run_server.sh
diff --git a/language/llama3-405b/user.conf b/language/llama3.1-405b/user.conf
similarity index 87%
rename from language/llama3-405b/user.conf
rename to language/llama3.1-405b/user.conf
index 9f4eb5f9a..30681302c 100644
--- a/language/llama3-405b/user.conf
+++ b/language/llama3.1-405b/user.conf
@@ -10,4 +10,4 @@
 *.Server.min_duration = 120000
 *.Server.min_query_count = 100
 
-llama3-405b.Server.sample_concatenate_permutation = 1
\ No newline at end of file
+llama3_1-405b.Server.sample_concatenate_permutation = 1
\ No newline at end of file
diff --git a/language/llama3-405b/with_the_same_user b/language/llama3.1-405b/with_the_same_user
similarity index 100%
rename from language/llama3-405b/with_the_same_user
rename to language/llama3.1-405b/with_the_same_user
diff --git a/loadgen/mlperf.conf b/loadgen/mlperf.conf
index 95cc08351..1d036f4b4 100644
--- a/loadgen/mlperf.conf
+++ b/loadgen/mlperf.conf
@@ -14,7 +14,7 @@ dlrm-v2.*.performance_sample_count_override = 204800
 rnnt.*.performance_sample_count_override = 2513
 gptj.*.performance_sample_count_override = 13368
 llama2-70b.*.performance_sample_count_override = 24576
-llama3-405b.*.performance_sample_count_override = 8313
+llama3_1-405b.*.performance_sample_count_override = 8313
 stable-diffusion-xl.*.performance_sample_count_override = 5000
 rgat.*.performance_sample_count_override = 788379
 # set to 0 to let entire sample set to be performance sample
@@ -49,7 +49,7 @@ rgat.*.sample_concatenate_permutation = 1
 gptj.*.sample_concatenate_permutation = 1
 llama2-70b.*.sample_concatenate_permutation = 1
 mixtral-8x7b.*.sample_concatenate_permutation = 1
-llama3-405b.*.sample_concatenate_permutation = 1
+llama3_1-405b.*.sample_concatenate_permutation = 1
 
 *.Server.target_latency = 10
 *.Server.target_latency_percentile = 99
@@ -66,11 +66,11 @@ stable-diffusion-xl.Server.target_latency = 20000
 # Benchmarks that measure token latencies
 llama2-70b.*.use_token_latencies = 1
 mixtral-8x7b.*.use_token_latencies = 1
-llama3-405b.*.use_token_latencies = 1
+llama3_1-405b.*.use_token_latencies = 1
 # gptj benchmark infers token latencies
 gptj.*.infer_token_latencies = 1
 gptj.*.token_latency_scaling_factor = 69
-# Only ttft and tpot are tracked for the llama2-70b, mixtral-8x7B & llama3-405b benchmark therefore target_latency = 0
+# Only ttft and tpot are tracked for the llama2-70b, mixtral-8x7B & llama3_1-405b benchmark therefore target_latency = 0
 llama2-70b.Server.target_latency = 0
 llama2-70b.Server.ttft_latency = 2000
 llama2-70b.Server.tpot_latency = 200
@@ -79,9 +79,9 @@ mixtral-8x7b.Server.target_latency = 0
 mixtral-8x7b.Server.ttft_latency = 2000
 mixtral-8x7b.Server.tpot_latency = 200
 
-llama3-405b.Server.target_latency = 0
-llama3-405b.Server.ttft_latency = 6000
-llama3-405b.Server.tpot_latency = 175
+llama3_1-405b.Server.target_latency = 0
+llama3_1-405b.Server.ttft_latency = 6000
+llama3_1-405b.Server.tpot_latency = 175
 
 *.Offline.target_latency_percentile = 90
 *.Offline.min_duration = 600000
@@ -100,7 +100,7 @@ rnnt.Offline.min_query_count = 2513
 3d-unet.Offline.min_query_count = 43
 stable-diffusion-xl.Offline.min_query_count = 5000
 llama2-70b.Offline.min_query_count = 24576
-llama3-405b.Offline.min_query_count = 8313
+llama3_1-405b.Offline.min_query_count = 8313
 mixtral-8x7b.Offline.min_query_count = 15000
 rgat.Offline.min_query_count = 788379
 
diff --git a/tools/submission/generate_final_report.py b/tools/submission/generate_final_report.py
index 34ae82fb1..aa5b36983 100644
--- a/tools/submission/generate_final_report.py
+++ b/tools/submission/generate_final_report.py
@@ -211,7 +211,7 @@ def main():
                 "llama2-70b-99.9": ["Server", "Offline"],
                 "mixtral-8x7b": ["Server", "Offline"],
                 "rgat": ["Offline"],
-                "llama3-405b": ["Offline", "Server"]
+                "llama3.1-405b": ["Offline", "Server"]
             },
             "edge": {
                 "resnet": ["SingleStream", "MultiStream", "Offline"],
diff --git a/tools/submission/submission_checker.py b/tools/submission/submission_checker.py
index dcdad1180..26d5212f9 100755
--- a/tools/submission/submission_checker.py
+++ b/tools/submission/submission_checker.py
@@ -194,6 +194,7 @@
             "ssd-resnet34": "retinanet",
             "mobilenet": "resnet",
             "resnet50": "resnet",
+            "llama3_1-405b": "llama3.1-405b"
         },
         "seeds": {
             "qsl_rng_seed": 3066443479025735752,
@@ -267,7 +268,7 @@
             "llama2-70b-99.9",
             "stable-diffusion-xl",
             "mixtral-8x7b",
-            "llama3-405b",
+            "llama3.1-405b",
             "rgat",
             # TODO: add automotive?
         ],
@@ -284,7 +285,7 @@
             "llama2-70b-99.9": ["Server", "Offline"],
             "stable-diffusion-xl": ["Server", "Offline"],
             "mixtral-8x7b": ["Server", "Offline"],
-            "llama3-405b": ["Server", "Offline"],
+            "llama3.1-405b": ["Server", "Offline"],
             "rgat": ["Offline"],
         },
         "optional-scenarios-datacenter": {},
@@ -315,7 +316,7 @@
             "llama2-70b-99.9": ["Server", "Offline"],
             "stable-diffusion-xl": ["SingleStream", "Offline", "Server"],
             "mixtral-8x7b": ["Server", "Offline"],
-            "llama3-405b": ["Server", "Offline"],
+            "llama3.1-405b": ["Server", "Offline"],
             "rgat": ["Offline"],
         },
         "optional-scenarios-datacenter-edge": {},
@@ -389,7 +390,7 @@
                 "mbxp_accuracy",
                 60.12 * 0.99,
             ),
-            "llama3-405b": (
+            "llama3.1-405b": (
                 "ROUGEL",
                 21.6666 * 0.99,
                 "exact_match",
@@ -409,7 +410,7 @@
             "llama2-70b-99": ("TOKENS_PER_SAMPLE", 294.45 * 1.1),
             "llama2-70b-99.9": ("TOKENS_PER_SAMPLE", 294.45 * 1.1),
             "mixtral-8x7b": ("TOKENS_PER_SAMPLE", 145.9 * 1.1),
-            "llama3-405b": ("TOKENS_PER_SAMPLE", 684.68 * 1.1),
+            "llama3.1-405b": ("TOKENS_PER_SAMPLE", 684.68 * 1.1),
         },
         "accuracy-delta-perc": {
             "stable-diffusion-xl": {"CLIP_SCORE": 1, "FID_SCORE": 2}
@@ -429,7 +430,7 @@
             "llama2-70b-99.9": 24576,
             "stable-diffusion-xl": 5000,
             "mixtral-8x7b": 15000,
-            "llama3-405b": 8313,
+            "llama3.1-405b": 8313,
             "rgat": 788379
 
         },
@@ -459,7 +460,7 @@
             "llama2-70b-99": {"Server": 20000000000},
             "llama2-70b-99.9": {"Server": 20000000000},
             "mixtral-8x7b": {"Server": 20000000000},
-            "llama3-405b": {"Server": 60000000000}
+            "llama3.1-405b": {"Server": 60000000000}
         },
         "min-queries": {
             "resnet": {
@@ -490,7 +491,7 @@
                 "Offline": 1,
             },
             "mixtral-8x7b": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
-            "llama3-405b": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
+            "llama3.1-405b": {"SingleStream": 1024, "Server": 270336, "Offline": 1},
             "rgat": {"SingleStream": 1024, "Offline": 1}
         },
     },
@@ -579,7 +580,7 @@
     "llama2-70b-99.9": 24576,
     "stable-diffusion-xl": 5000,
     "mixtral-8x7b": 15000,
-    "llama3-405b": 8313,
+    "llama3.1-405b": 8313,
     "rgat": 788379,
 }
 
@@ -656,7 +657,7 @@
             "Offline": "result_tokens_per_second",
             "Server": "result_completed_tokens_per_second",
         },
-        "llama3-405b": {
+        "llama3.1-405b": {
             "Offline": "result_tokens_per_second",
             "Server": "result_completed_tokens_per_second",
         },
@@ -671,7 +672,7 @@
         "conversational": {"ttft": 2000 * 1000000, "tpot": 200 * 1000000}
     },
     "mixtral-8x7b": {"conversational": {"ttft": 2000 * 1000000, "tpot": 200 * 1000000}},
-    "llama3-405b": {
+    "llama3.1-405b": {
         "conversational": {"ttft": 6000 * 1000000, "tpot": 175 * 1000000}
     },
 }
@@ -956,7 +957,7 @@ def requires_equal_issue(self, model, division):
                 "llama2-70b-99",
                 "llama2-70b-99.9",
                 "mixtral-8x7b",
-                "llama3-405b",
+                "llama3.1-405b",
                 "rgat",
             ]
             and self.version not in ["v4.0", "v4.1"]
@@ -1325,7 +1326,7 @@ def check_performance_dir(
         )
 
     if model in ["llama2-70b-99", "llama2-70b-99.9",
-                 "mixtral-8x7b", "llama3-405b"]:
+                 "mixtral-8x7b", "llama3.1-405b"]:
         llama_constraint, is_valid = extra_check_llm(
             mlperf_log, scenario_fixed, model)
 
@@ -1865,7 +1866,7 @@ def log_result(
                 "Offline": "Tokens/s",
                 "Server": "Tokens/s",
             },
-            "llama3-405b": {
+            "llama3.1-405b": {
                 "SingleStream": "Latency (ms)",
                 "MultiStream": "Latency (ms)",
                 "Offline": "Tokens/s",
@@ -2950,7 +2951,7 @@ def check_compliance_dir(
         "llama2-70b-99",
         "llama2-70b-99.9",
         "mixtral-8x7b",
-        "llama3-405b",
+        "llama3.1-405b",
         "rgat",
     ]:
         test_list.remove("TEST04")
@@ -2971,7 +2972,7 @@ def check_compliance_dir(
         "llama2-70b-99",
         "llama2-70b-99.9",
         "mixtral-8x7b",
-        "llama3-405b",
+        "llama3.1-405b",
     ]:
         test_list.remove("TEST01")
 
@@ -2980,7 +2981,7 @@ def check_compliance_dir(
         test_list.remove("TEST04")
 
     if model in ["llama2-70b-99", "llama2-70b-99.9",
-                 "mixtral-8x7b", "llama3-405b"]:
+                 "mixtral-8x7b", "llama3.1-405b"]:
         test_list.append("TEST06")
 
     if test_list and not os.path.exists(compliance_dir):

From 8a319f07a84175700bc118da490e762a9209b152 Mon Sep 17 00:00:00 2001
From: mrmhodak <mrmhodak@users.noreply.github.com>
Date: Thu, 19 Dec 2024 06:36:54 +0000
Subject: [PATCH 055/112] Increment version to 5.0.4

---
 loadgen/VERSION.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loadgen/VERSION.txt b/loadgen/VERSION.txt
index 50e2274e6..2d6c0bcf1 100644
--- a/loadgen/VERSION.txt
+++ b/loadgen/VERSION.txt
@@ -1 +1 @@
-5.0.3
+5.0.4

From 2d4360f9c707b2cf43a40b5cba943d3b7a50cefa Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Thu, 19 Dec 2024 06:37:49 +0000
Subject: [PATCH 056/112] Create test-rgat.yml (#1984)

* Create test-rgat.yml

* Update test-rgat.yml

* Update test-rgat.yml

---------

Co-authored-by: Miro <mirhodak@amd.com>
---
 .github/workflows/test-rgat.yml | 38 +++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 .github/workflows/test-rgat.yml

diff --git a/.github/workflows/test-rgat.yml b/.github/workflows/test-rgat.yml
new file mode 100644
index 000000000..990c58a66
--- /dev/null
+++ b/.github/workflows/test-rgat.yml
@@ -0,0 +1,38 @@
+name: Test for MLPerf inference rgat submission generation using CM script automation
+
+on:
+  pull_request:
+    branches: [ "master", "dev" ]
+    paths:
+      - graph/R-GAT/**
+      - loadgen/**
+      - tools/submission/**
+      - .github/workflows/test-rgat.yml
+      - '!**.md'
+
+env:
+  PR_HEAD_REF: ${{ github.event.pull_request.head.ref }}
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: [ "3.11" ]
+        backend: [ "pytorch" ]
+        loadgen-flag: [ "" ]
+
+    steps:
+    - uses: actions/checkout@v3
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v3
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python3 -m pip install cm4mlops
+    - name: Test R-GAT and end to end submission generation
+      run: |
+        cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --quiet --submitter="MLCommons" --category=datacenter --hw_name=default --model=rgat --implementation=reference --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=500 --adr.compiler.tags=gcc --adr.inference-src.tags=_branch.$PR_HEAD_REF,_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src-loadgen.tags=_branch.$PR_HEAD_REF,_repo.${{ github.event.pull_request.head.repo.html_url }} --adr.inference-src-loadgen.version=custom --adr.loadgen.version=custom ${{ matrix.loadgen-flag }}

From 1a51a95c468b9ed0519ae722ef1cd6a3f6101eeb Mon Sep 17 00:00:00 2001
From: Pablo Gonzalez <pablo.gonzalez@factored.ai>
Date: Thu, 19 Dec 2024 01:39:06 -0500
Subject: [PATCH 057/112] Update compliance test table (#1987)

Co-authored-by: Miro <mirhodak@amd.com>
---
 compliance/nvidia/README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/compliance/nvidia/README.md b/compliance/nvidia/README.md
index 91c8d8df7..bcf050a99 100755
--- a/compliance/nvidia/README.md
+++ b/compliance/nvidia/README.md
@@ -38,4 +38,6 @@ The `run_verification.py` found in each test directory will copy the test files
 | gpt-j | - |
 | stable-diffusion-xl | [TEST01](./TEST01/), [TEST04](./TEST04/) |
 | Llama2-70b | [TEST06](./TEST06/) |
+| Llama3.1-405b | [TEST06](./TEST06/) |
 | mixtral-8x7b | [TEST06](./TEST06/) |
+| R-GAT | [TEST01](./TEST01/) |

From 9309ef73e0473c31289f86349f09aac26d67c4c5 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Thu, 19 Dec 2024 06:43:29 +0000
Subject: [PATCH 058/112] Create benchmark-checklist.md for r-gat (#1985)

* Create benchmark-checklist.md for r-gat

* Update benchmark-checklist.md

* Update benchmark-checklist.md

* Update benchmark-checklist.md

* Update benchmark-checklist.md

* Update benchmark-checklist.md

* Update benchmark-checklist.md

* Update benchmark-checklist.md

* Update benchmark-checklist.md

* Update benchmark-checklist.md

* Update benchmark-checklist.md

* Update benchmark-checklist.md

---------

Co-authored-by: Miro <mirhodak@amd.com>
---
 graph/R-GAT/benchmark-checklist.md | 86 ++++++++++++++++++++++++++++++
 1 file changed, 86 insertions(+)
 create mode 100644 graph/R-GAT/benchmark-checklist.md

diff --git a/graph/R-GAT/benchmark-checklist.md b/graph/R-GAT/benchmark-checklist.md
new file mode 100644
index 000000000..2e76acb99
--- /dev/null
+++ b/graph/R-GAT/benchmark-checklist.md
@@ -0,0 +1,86 @@
+
+#### **1. Applicable Categories**
+- Datacenter
+
+---
+
+#### **2. Applicable Scenarios for Each Category**
+- Offline
+
+---
+
+#### **3. Applicable Compliance Tests**
+- TEST01
+
+---
+
+#### **4. Latency Threshold for Server Scenarios**
+- Not applicable
+
+---
+
+#### **5. Validation Dataset: Unique Samples**
+Number of **unique samples** in the validation dataset and the QSL size specified in 
+- [ ] [inference policies benchmark section](https://github.com/mlcommons/inference_policies/blob/master/inference_rules.adoc#41-benchmarks)
+- [X] [mlperf.conf](https://github.com/mlcommons/inference/blob/master/loadgen/mlperf.conf)
+- [X] [Inference benchmark docs](https://github.com/mlcommons/inference/blob/docs/docs/index.md)
+  *(Ensure QSL size overflows the system cache if possible.)*
+
+---
+
+#### **6. Equal Issue Mode Applicability**
+Documented whether **Equal Issue Mode** is applicable in 
+- [X] [mlperf.conf](https://github.com/mlcommons/inference/blob/master/loadgen/mlperf.conf#L42)
+- [X] [Inference benchmark docs](https://github.com/mlcommons/inference/blob/docs/docs/index.md)
+  *(Relevant if sample processing times are inconsistent across inputs.)*
+
+---
+
+#### **7. Expected Accuracy and `accuracy.txt` Contents**
+- [ ] Detailed expected accuracy and the required contents of the `accuracy.txt` file.
+
+---
+
+#### **8. Reference Model Details**
+- [ ] Reference model details updated in [Inference benchmark docs](https://github.com/mlcommons/inference/blob/docs/docs/index.md)  
+
+---
+
+#### **9. Reference Implementation Dataset Coverage**
+- [ ] Reference implementation successfully processes the entire validation dataset during:
+  - [ ] Performance runs
+  - [ ] Accuracy runs
+  - [ ] Compliance runs  
+- [ ] Valid log files passing the submission checker are generated for all runs.
+
+---
+
+#### **10. Test Runs with Smaller Input Sets**
+- [X] Verified the reference implementation can perform test runs with a smaller subset of inputs for:
+  - [X] Performance runs
+  - [X] Accuracy runs
+
+---
+
+#### **11. Dataset and Reference Model Instructions**
+- [X] Clear instructions provided for:
+  - [X] Downloading the dataset and reference model.
+  - [X] Using the dataset and model for the benchmark.
+
+---
+
+#### **12. Documentation of Recommended System Requirements to run the reference implementation**
+- [X] Added [here](https://github.com/mlcommons/inference/blob/docs/docs/system_requirements.yml#L44)
+
+---
+
+#### **13. Submission Checker Modifications**
+- [X] All necessary changes made to the **submission checker** to validate the benchmark.
+
+---
+
+#### **14. Sample Log Files**
+- [ ] Include sample logs for all the applicable scenario runs:
+  - [ ] `mlperf_log_summary.txt`
+  - [ ] `mlperf_log_detail.txt`  
+- [ ] Ensure sample logs successfully pass the submission checker and applicable compliance runs.

From d9f1b6fbce8918ec2d4dff44d6c22ed2a634ce07 Mon Sep 17 00:00:00 2001
From: arjunsuresh <arjunsuresh@users.noreply.github.com>
Date: Fri, 20 Dec 2024 19:10:16 +0000
Subject: [PATCH 059/112] Increment version to 5.0.5

---
 loadgen/VERSION.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/loadgen/VERSION.txt b/loadgen/VERSION.txt
index 2d6c0bcf1..ab0fa336d 100644
--- a/loadgen/VERSION.txt
+++ b/loadgen/VERSION.txt
@@ -1 +1 @@
-5.0.4
+5.0.5

From 81983e528886a173416e1d1bf0198d8ccb5f803a Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Fri, 20 Dec 2024 19:53:31 +0000
Subject: [PATCH 060/112] Added python3.12, 3.13 to loadgen test

---
 .github/workflows/test-loadgen.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test-loadgen.yml b/.github/workflows/test-loadgen.yml
index 8e42f625e..d73d5913b 100755
--- a/.github/workflows/test-loadgen.yml
+++ b/.github/workflows/test-loadgen.yml
@@ -21,7 +21,7 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        python-version: ["3.7", "3.8", "3.9", "3.10", "3.11"]
+        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
 
     steps:
     - uses: actions/checkout@v3

From ff2e54b21f6f305f5391a0ef6f95ca4dcd759927 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Fri, 20 Dec 2024 20:04:50 +0000
Subject: [PATCH 061/112] Update format.yml | Don't format power_checker being
 synced from power-dev repo

---
 .github/workflows/format.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml
index 45ebb521b..375008f75 100644
--- a/.github/workflows/format.yml
+++ b/.github/workflows/format.yml
@@ -29,7 +29,7 @@ jobs:
           for FILE in $(git diff --name-only $filter | grep -E '.*\.py$')
           do
             # Check if the file still exists in the working tree
-            if [ -f "$FILE" ]; then
+            if [ -f "$FILE" ] && [ "$FILE" != "tools/submission/power/power_checker.py" ]; then
               autopep8 --in-place -a "$FILE"
               git add "$FILE"
             fi

From e9354d8602c3bcf568e5efb413bd087ff05fe36b Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Fri, 20 Dec 2024 20:27:18 +0000
Subject: [PATCH 062/112] Update index.md | Update accuracy for r-gat

---
 docs/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/index.md b/docs/index.md
index b46d4c274..db9e3e440 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -163,7 +163,7 @@ The currently valid [MLPerf Inference Benchmarks](index_gh.md) as of MLPerf infe
     - **Dataset Size**: 788,379
     - **QSL Size**: 788,379
 - **Number of Parameters**: 
-- **Reference Model Accuracy**: ACC = ?
+- **Reference Model Accuracy**: ACC = 72.86%
 - **Server Scenario Latency Constraint**: N/A
 - **Equal Issue mode**: True
 - **High accuracy variant**: No

From 76047ff0f16b3f95d191b78710d483c03d2b2dd9 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Fri, 20 Dec 2024 20:27:41 +0000
Subject: [PATCH 063/112] Update benchmark-checklist.md for r-gat

---
 graph/R-GAT/benchmark-checklist.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/graph/R-GAT/benchmark-checklist.md b/graph/R-GAT/benchmark-checklist.md
index 2e76acb99..f83c816cb 100644
--- a/graph/R-GAT/benchmark-checklist.md
+++ b/graph/R-GAT/benchmark-checklist.md
@@ -21,7 +21,7 @@
 
 #### **5. Validation Dataset: Unique Samples**
 Number of **unique samples** in the validation dataset and the QSL size specified in 
-- [ ] [inference policies benchmark section](https://github.com/mlcommons/inference_policies/blob/master/inference_rules.adoc#41-benchmarks)
+- [X] [inference policies benchmark section](https://github.com/mlcommons/inference_policies/blob/master/inference_rules.adoc#41-benchmarks)
 - [X] [mlperf.conf](https://github.com/mlcommons/inference/blob/master/loadgen/mlperf.conf)
 - [X] [Inference benchmark docs](https://github.com/mlcommons/inference/blob/docs/docs/index.md)
   *(Ensure QSL size overflows the system cache if possible.)*
@@ -37,12 +37,13 @@ Documented whether **Equal Issue Mode** is applicable in
 ---
 
 #### **7. Expected Accuracy and `accuracy.txt` Contents**
-- [ ] Detailed expected accuracy and the required contents of the `accuracy.txt` file.
+- [X] Expected accuracy updated in the [inference policies](https://github.com/mlcommons/inference_policies/blob/master/inference_rules.adoc#41-benchmarks)
+- [X] `accuracy.txt` file generated by the reference accuracy script from the MLPerf accuracy log and is validated by the submission checker.
 
 ---
 
 #### **8. Reference Model Details**
-- [ ] Reference model details updated in [Inference benchmark docs](https://github.com/mlcommons/inference/blob/docs/docs/index.md)  
+- [X] Reference model details updated in [Inference benchmark docs](https://github.com/mlcommons/inference/blob/docs/docs/index.md)  
 
 ---
 

From 45bb01b0509e3b305bba8f80ea30d1fe891e6d39 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Fri, 20 Dec 2024 21:19:11 +0000
Subject: [PATCH 064/112] Update CM commands in R-GAT README.md

---
 graph/R-GAT/README.md | 72 +++++++++++++------------------------------
 1 file changed, 21 insertions(+), 51 deletions(-)

diff --git a/graph/R-GAT/README.md b/graph/R-GAT/README.md
index 4c31b42aa..fb8a1c03b 100644
--- a/graph/R-GAT/README.md
+++ b/graph/R-GAT/README.md
@@ -19,7 +19,7 @@ This is the reference implementation for MLPerf Inference Graph Neural Network.
 
 ## Automated command to run the benchmark via MLCommons CM
 
-Please check the official inference documentation [here](https://docs.mlcommons.org/inference/benchmarks/graph/rgat/)
+Please see the [new docs site](https://docs.mlcommons.org/inference/benchmarks/graph/rgat/) for an automated way to run this benchmark across different available implementations and do an end-to-end submission with or without docker.
  
 ## Setup
 Set the following helper variables
@@ -33,10 +33,7 @@ export MODEL_PATH=$PWD/inference/graph/R-GAT/model/
 ```bash
 git clone --recurse-submodules https://github.com/mlcommons/inference.git --depth 1
 ```
-Finally copy the `mlperf.conf` file to the stable diffusion folder
-```bash
-cp $ROOT_INFERENCE/mlperf.conf $GRAPH_FOLDER
-```
+
 
 ### Install pytorch
 **For NVIDIA GPU based runs:**
@@ -77,6 +74,14 @@ pip install  dgl -f https://data.dgl.ai/wheels/torch-2.1/cu121/repo.html
 pip install  dgl -f https://data.dgl.ai/wheels/torch-2.1/repo.html
 ```
 
+
+### Download model through CM (Collective Minds)
+
+```
+pip install cm4mlops
+cm run script --tags=get,ml-model,rgat --outdirname=<path_to_download>
+```
+
 ### Download model using Rclone
 
 To run Rclone on Windows, you can download the executable [here](https://rclone.org/install/#windows).
@@ -95,15 +100,16 @@ You can then navigate in the terminal to your desired download directory and run
 rclone copy mlc-inference:mlcommons-inference-wg-public/R-GAT/RGAT.pt $MODEL_PATH -P
 ```
 
-### Download model through CM (Collective Minds)
 
-```
-cm run script --tags=get,ml-model,rgat -j
-```
 
 ### Download and setup dataset
 #### Debug Dataset
 
+**CM Command**
+```
+cm run script --tags=get,dataset,igbh,_debug --outdirname=<path to download>
+```
+
 **Download Dataset**
 ```bash
 cd $GRAPH_FOLDER
@@ -116,13 +122,16 @@ cd $GRAPH_FOLDER
 python3 tools/split_seeds.py --path igbh --dataset_size tiny
 ```
 
+
+
+#### Full Dataset
+**Warning:** This script will download 2.2TB of data
+
 **CM Command**
 ```
-cm run script --tags=get,dataset,igbh,_debug -j
+cm run script --tags=get,dataset,igbh,_full --outdirname=<path to download>
 ```
 
-#### Full Dataset
-**Warning:** This script will download 2.2TB of data 
 ```bash
 cd $GRAPH_FOLDER
 ./tools/download_igbh_full.sh igbh/
@@ -134,11 +143,6 @@ cd $GRAPH_FOLDER
 python3 tools/split_seeds.py --path igbh --dataset_size full
 ```
 
-**CM Command**
-```
-cm run script --tags=get,dataset,igbh,_full -j
-```
-
 
 #### Calibration dataset
 
@@ -155,20 +159,6 @@ cd $GRAPH_FOLDER
 python3 main.py --dataset igbh-dgl-tiny --dataset-path igbh/ --profile debug-dgl [--model-path <path_to_ckpt>] [--in-memory] [--device <cpu or gpu>] [--dtype <fp16 or fp32>] [--scenario <SingleStream, MultiStream, Server or Offline>]
 ```
 
-##### Debug Run using CM
-```
-cm run script --tags=run-mlperf,inference,_submission,_short,_r5.0-dev \
-   --model=rgat \
-   --implementation=reference \
-   --framework=pytorch \
-   --category=edge \
-   --scenario=Offline \
-   --execution_mode=test \
-   --device=<cpu or cuda> \
-   --quiet \
-   --test_query_count=10 \
-   --docker
-```
 
 #### Local run
 ```bash
@@ -179,26 +169,6 @@ cd $GRAPH_FOLDER
 python3 main.py --dataset igbh-dgl --dataset-path igbh/ --profile rgat-dgl-full [--model-path <path_to_ckpt>] [--in-memory] [--device <cpu or gpu>] [--dtype <fp16 or fp32>] [--scenario <SingleStream, MultiStream, Server or Offline>]
 ```
 
-##### Local Run using CM
-```
-cm run script --tags=run-mlperf,inference,_submission,_full,_r5.0-dev \
-   --model=rgat \
-   --implementation=reference \
-   --framework=pytorch \
-   --category=edge \
-   --scenario=Offline \
-   --execution_mode=test \
-   --device=<>cpu or cuda> \
-   --quiet \
-   --test_query_count=10 \
-   --docker
-```
-
-- Number of threads could be adjusted using `--threads=#`, where # is the desired number of threads. This option works only if the implementation in use supports threading.
-- Batch size could be adjusted using `--batch_size=#`, where # is the desired batch size. This option works only if the implementation in use is supporting the given batch size.
-- Add `--env.CM_DATASET_IGBH_PATH=<Path to IGBH dataset>` if you have already downloaded the dataset. The path will be automatically mounted when using docker run.
-- Add `--env.CM_ML_MODEL_RGAT_CHECKPOINT_PATH=<Path to R-GAT model checkpoint>` if you have already downloaded the model. The path will be automatically mounted when using docker run.
-
 #### Run using docker
 
 Not implemented yet

From 293d4ea982fe98c76370404f314f69492d32f9cd Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Fri, 20 Dec 2024 21:21:21 +0000
Subject: [PATCH 065/112] Update README.md

---
 graph/R-GAT/README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/graph/R-GAT/README.md b/graph/R-GAT/README.md
index fb8a1c03b..be8f46e87 100644
--- a/graph/R-GAT/README.md
+++ b/graph/R-GAT/README.md
@@ -20,6 +20,8 @@ This is the reference implementation for MLPerf Inference Graph Neural Network.
 ## Automated command to run the benchmark via MLCommons CM
 
 Please see the [new docs site](https://docs.mlcommons.org/inference/benchmarks/graph/rgat/) for an automated way to run this benchmark across different available implementations and do an end-to-end submission with or without docker.
+
+You can also do `pip install cm4mlops` and then use `cm` commands for downloading the model and datasets using the commands given in the later sections.
  
 ## Setup
 Set the following helper variables
@@ -78,7 +80,6 @@ pip install  dgl -f https://data.dgl.ai/wheels/torch-2.1/repo.html
 ### Download model through CM (Collective Minds)
 
 ```
-pip install cm4mlops
 cm run script --tags=get,ml-model,rgat --outdirname=<path_to_download>
 ```
 

From 1416bbd4ed24d2b7a7cb0ce4a70aee47c349e0ea Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Sat, 21 Dec 2024 10:18:29 +0000
Subject: [PATCH 066/112] Create reset-branch.yml

---
 .github/workflows/reset-branch.yml | 42 ++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)
 create mode 100644 .github/workflows/reset-branch.yml

diff --git a/.github/workflows/reset-branch.yml b/.github/workflows/reset-branch.yml
new file mode 100644
index 000000000..76cf0b97e
--- /dev/null
+++ b/.github/workflows/reset-branch.yml
@@ -0,0 +1,42 @@
+name: Reset Current Branch to Upstream After Squash Merge
+
+on:
+  workflow_dispatch:
+    inputs:
+      branch:
+        description: 'Branch to reset (leave blank for current branch)'
+        required: false
+        default: 'dev'
+
+jobs:
+  reset-branch:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout Repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0
+
+      - name: Detect Current Branch
+        if: ${{ inputs.branch == '' }}
+        run: echo "branch=$(git rev-parse --abbrev-ref HEAD)" >> $GITHUB_ENV
+
+      - name: Use Input Branch
+        if: ${{ inputs.branch != '' }}
+        run: echo "branch=${{ inputs.branch }}" >> $GITHUB_ENV
+
+      - name: Add Upstream Remote
+        run: |
+          git remote add upstream https://github.com/mlcommons/inference.git
+          git fetch upstream
+      - name: Reset Branch to Upstream
+        run: |
+          git checkout ${{ env.branch }}
+          git reset --hard upstream/${{ env.branch }}
+        if: success()
+
+      - name: Force Push to Origin
+        run: |
+          git push origin ${{ env.branch }} --force-with-lease
+        if: success()

From b2ce7c8092e7ed68d1cbaad038fb8e9d8fc88a35 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Sun, 22 Dec 2024 13:28:11 +0000
Subject: [PATCH 067/112] Create auto-update-dev.yml

---
 .github/workflows/auto-update-dev.yml | 34 +++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)
 create mode 100644 .github/workflows/auto-update-dev.yml

diff --git a/.github/workflows/auto-update-dev.yml b/.github/workflows/auto-update-dev.yml
new file mode 100644
index 000000000..69cfdb281
--- /dev/null
+++ b/.github/workflows/auto-update-dev.yml
@@ -0,0 +1,34 @@
+name: Auto-Update Dev Branch from Master
+
+on:
+  push:
+    branches:
+      - master  # Trigger workflow on commits to 'dev' branch
+
+jobs:
+  update-main:
+    runs-on: ubuntu-latest
+    permissions:
+      contents: write  # Required to push to protected branches
+
+    steps:
+      - name: Checkout Main Branch
+        uses: actions/checkout@v4
+        with:
+          ref: dev
+          fetch-depth: 0
+          ssh-key: ${{ secrets.DEPLOY_KEY }}
+
+      - name: Configure Git User
+        run: |
+          git config user.name "github-actions"
+          git config user.email "github-actions@github.com"
+
+      - name: Merge auto-update into dev
+        run: |
+          git fetch origin master:master
+          git merge --no-ff master -m "Auto-merge updates from master branch"
+
+      - name: Push Changes to Main
+        run: |
+          git push origin dev

From 1162c25a88aa6f1b5a1bf405df4eb156d1727a00 Mon Sep 17 00:00:00 2001
From: sahilavaran <139779393+sahilavaran@users.noreply.github.com>
Date: Mon, 23 Dec 2024 18:24:57 +0000
Subject: [PATCH 068/112] Tested and fixed SDXL README (#1997)

* Update SDXL README.md, improved CM commands

* Update README.md | Fix SDXL model download path

* Update README.md | Added cm command for downloading coco2014 size.50

* Update README.md | Fix SDXL calibration download command

* Update SDXL README.md

* Update README.md
---
 text_to_image/README.md | 60 ++++++++++++++++++++++++++---------------
 1 file changed, 39 insertions(+), 21 deletions(-)

diff --git a/text_to_image/README.md b/text_to_image/README.md
index 57c4343b1..84c8c7245 100644
--- a/text_to_image/README.md
+++ b/text_to_image/README.md
@@ -1,9 +1,11 @@
 # MLPerf™ Inference Benchmarks for Text to Image
 
-This is the reference implementation for MLPerf Inference text to image.
+## Automated command to run the benchmark via MLCommons CM
 
 Please see the [new docs site](https://docs.mlcommons.org/inference/benchmarks/text_to_image/sdxl) for an automated way to run this benchmark across different available implementations and do an end-to-end submission with or without docker.
 
+You can also do `pip install cm4mlops` and then use `cm` commands for downloading the model and datasets using the commands given in the later sections.
+ 
 ## Supported Models
 
 | model | accuracy | dataset | model source | precision | notes |
@@ -53,10 +55,10 @@ We host two checkpoints (fp32 and fp16) that are a snapshot of the [Hugging Face
 The following MLCommons CM commands can be used to programmatically download the model checkpoints.
 
 ```
-pip install cmind
-cm pull repo mlcommons@ck
-cm run script --tags=get,ml-model,sdxl,_fp16,_rclone -j
-cm run script --tags=get,ml-model,sdxl,_fp32,_rclone -j
+cm run script --tags=get,ml-model,sdxl,_fp16,_rclone --outdirname=$MODEL_PATH
+```
+```
+cm run script --tags=get,ml-model,sdxl,_fp32,_rclone --outdirname-$MODEL_PATH
 ```
 #### Manual method
 
@@ -72,30 +74,35 @@ Once Rclone is installed, run the following command to authenticate with the buc
 rclone config create mlc-inference s3 provider=Cloudflare access_key_id=f65ba5eef400db161ea49967de89f47b secret_access_key=fbea333914c292b854f14d3fe232bad6c5407bf0ab1bebf78833c2b359bdfd2b endpoint=https://c2686074cb2caf5cbaf6d134bdba8b47.r2.cloudflarestorage.com
 ```
 You can then navigate in the terminal to your desired download directory and run the following commands to download the checkpoints:
+```
+cd $MODEL_PATH
+```
 
 **`fp32`**
 ```
-rclone copy mlc-inference:mlcommons-inference-wg-public/stable_diffusion_fp32 ./stable_diffusion_fp32 -P
+rclone copy mlc-inference:mlcommons-inference-wg-public/stable_diffusion_fp32 $MODEL_PATH -P
 ```
 **`fp16`**
 ```
-rclone copy mlc-inference:mlcommons-inference-wg-public/stable_diffusion_fp16 ./stable_diffusion_fp16 -P
+rclone copy mlc-inference:mlcommons-inference-wg-public/stable_diffusion_fp16 $MODEL_PATH -P
 ```
 
-#### Move to model path
+### Download validation dataset
 
-```bash
-mkdir $MODEL_PATH
-cd $MODEL_PATH
-# For fp32
-mv <path_to_download>/stable_diffusion_fp32.zip .
-unzip stable_diffusion_fp32.zip
-# For fp16
-mv <path_to_download>/stable_diffusion_fp16.zip .
-unzip stable_diffusion_fp16.zip
+#### CM METHOD
+The following MLCommons CM commands can be used to programmatically download the validation dataset.
+
+```
+cm run script --tags=get,dataset,coco2014,_validation,_full --outdirname=coco2014
+```
+
+For debugging you can download only a part of all the images in the dataset
+```
+cm run script --tags=get,dataset,coco2014,_validation,_size.50 --outdirname=coco2014
 ```
 
-### Download dataset
+
+#### MANUAL METHOD
 ```bash
 cd $SD_FOLDER/tools
 ./download-coco-2014.sh -n <number_of_workers>
@@ -107,14 +114,25 @@ cd $SD_FOLDER/tools
 ```
 If the file [captions.tsv](coco2014/captions/captions.tsv) can be found in the script, it will be used to download the target dataset subset, otherwise it will be generated. We recommend you to have this file for consistency.
 
-#### Calibration dataset
+### Download Calibration dataset (only if you are doing quantization)
+
+#### CM METHOD
+The following MLCommons CM commands can be used to programmatically download the calibration dataset.
+
+```
+cm run script --tags=get,dataset,coco2014,_calibration --outdirname=coco2014
+```
+
+
+#### MANUAL METHOD
 
 We provide a script to download the calibration captions and images. To download only the captions:
 ```bash
 cd $SD_FOLDER/tools
-./download-coco-2014-calibration.sh
+./download-coco-2014-calibration.sh -n <number_of_workers>
 ```
-To download only the captions and images:
+
+To download both the captions and images:
 ```bash
 cd $SD_FOLDER/tools
 ./download-coco-2014-calibration.sh -i -n <number_of_workers>

From b91d6f2eeb5fe2b13d52caec4d31e3fae1ca12f8 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Mon, 23 Dec 2024 21:18:01 +0000
Subject: [PATCH 069/112] Update preprocess_submission.py

---
 tools/submission/preprocess_submission.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/submission/preprocess_submission.py b/tools/submission/preprocess_submission.py
index 977af4d47..1abb42189 100644
--- a/tools/submission/preprocess_submission.py
+++ b/tools/submission/preprocess_submission.py
@@ -51,7 +51,7 @@ def get_args():
 
     parser.add_argument(
         "--version",
-        default="v4.1",
+        default="v5.0",
         choices=list(checker.MODEL_CONFIG.keys()),
         help="mlperf version",
     )

From d6faf23a01dd03721c47239a0a52f95d260a4e3b Mon Sep 17 00:00:00 2001
From: sahilavaran <139779393+sahilavaran@users.noreply.github.com>
Date: Wed, 25 Dec 2024 13:45:51 +0000
Subject: [PATCH 070/112] Update README.md

---
 language/gpt-j/README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/language/gpt-j/README.md b/language/gpt-j/README.md
index 9dc024a8e..2e4c34b5e 100644
--- a/language/gpt-j/README.md
+++ b/language/gpt-j/README.md
@@ -2,6 +2,9 @@
 
 Please see the [new docs site](https://docs.mlcommons.org/inference/benchmarks/language/gpt-j) for an automated way to run this benchmark across different available implementations and do an end-to-end submission with or without docker.
 
+You can also do pip install cm4mlops and then use cm commands for downloading the model and datasets using the commands given in the later sections.
+
+
 ### Setup Instructions
 
 ```bash

From 5c448911bb453ec62ddc117c8eed96c618bf45b0 Mon Sep 17 00:00:00 2001
From: sahilavaran <139779393+sahilavaran@users.noreply.github.com>
Date: Wed, 25 Dec 2024 14:10:49 +0000
Subject: [PATCH 071/112] Update README.md | added the outdirname in the CM
 command

---
 language/gpt-j/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/language/gpt-j/README.md b/language/gpt-j/README.md
index 2e4c34b5e..fb75ecd51 100644
--- a/language/gpt-j/README.md
+++ b/language/gpt-j/README.md
@@ -79,7 +79,7 @@ The following MLCommons CM commands can be used to programmatically download the
 
 ```
 pip install cm4mlops
-cm run script --tags=get,ml-model,gptj,_pytorch,_rclone -j
+cm run script --tags=get,ml-model,gptj,_pytorch,_rclone -j --outdirname=$MODEL_PATH
 ```
 
 #### Manual method

From d679f1460b82f666015fffecf0d765b6f77cc2f4 Mon Sep 17 00:00:00 2001
From: sahilavaran <139779393+sahilavaran@users.noreply.github.com>
Date: Wed, 25 Dec 2024 14:19:40 +0000
Subject: [PATCH 072/112] Update README.md | added the outdirname in the CM
 Command

---
 language/gpt-j/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/language/gpt-j/README.md b/language/gpt-j/README.md
index fb75ecd51..b465b7845 100644
--- a/language/gpt-j/README.md
+++ b/language/gpt-j/README.md
@@ -79,7 +79,7 @@ The following MLCommons CM commands can be used to programmatically download the
 
 ```
 pip install cm4mlops
-cm run script --tags=get,ml-model,gptj,_pytorch,_rclone -j --outdirname=$MODEL_PATH
+cm run script --tags=get,ml-model,gptj,_pytorch,_rclone -j --outdirname=<path/to/desired/output-directory>
 ```
 
 #### Manual method

From 390d934e089d0cd9594ef7a8ef49c07c6a55e107 Mon Sep 17 00:00:00 2001
From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com>
Date: Mon, 23 Dec 2024 17:32:38 +0530
Subject: [PATCH 073/112] include cm commands - accuracy and calibration

---
 graph/R-GAT/README.md | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/graph/R-GAT/README.md b/graph/R-GAT/README.md
index be8f46e87..f3291f37f 100644
--- a/graph/R-GAT/README.md
+++ b/graph/R-GAT/README.md
@@ -149,6 +149,10 @@ python3 tools/split_seeds.py --path igbh --dataset_size full
 
 The calibration dataset contains 5000 nodes from the training paper nodes of the IGBH dataset. We provide the [Node ids](../../calibration/IGBH/calibration.txt) and the [script](tools/split_seeds.py) to generate them (using the `--calibration` flag). 
 
+**CM Command**
+```
+cm run script --tags=get,dataset,igbh,_full,_calibration --outdirname=<path to download>
+```
 
 ### Run the benchmark
 #### Debug Run
@@ -170,6 +174,13 @@ cd $GRAPH_FOLDER
 python3 main.py --dataset igbh-dgl --dataset-path igbh/ --profile rgat-dgl-full [--model-path <path_to_ckpt>] [--in-memory] [--device <cpu or gpu>] [--dtype <fp16 or fp32>] [--scenario <SingleStream, MultiStream, Server or Offline>]
 ```
 
+### Evaluate the accuracy
+```bash
+cm run script --tags=process,mlperf,accuracy,_igbh --result_dir=<Path to directory where files are generated after benchmark run>
+```
+
+Please click [here](https://github.com/mlcommons/inference/blob/dev/graph/R-GAT/tools/accuracy_igbh.py) to view the python script for evaluating accuracy for the igbh dataset.
+
 #### Run using docker
 
 Not implemented yet

From 9340b793b3690c9649d17f5ef43da24e19e78c48 Mon Sep 17 00:00:00 2001
From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com>
Date: Mon, 23 Dec 2024 17:40:48 +0530
Subject: [PATCH 074/112] Update README.md

---
 graph/R-GAT/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/graph/R-GAT/README.md b/graph/R-GAT/README.md
index f3291f37f..561a65e6f 100644
--- a/graph/R-GAT/README.md
+++ b/graph/R-GAT/README.md
@@ -176,10 +176,10 @@ python3 main.py --dataset igbh-dgl --dataset-path igbh/ --profile rgat-dgl-full
 
 ### Evaluate the accuracy
 ```bash
-cm run script --tags=process,mlperf,accuracy,_igbh --result_dir=<Path to directory where files are generated after benchmark run>
+cm run script --tags=process,mlperf,accuracy,_igbh --result_dir=<Path to directory where files are generated after the benchmark run>
 ```
 
-Please click [here](https://github.com/mlcommons/inference/blob/dev/graph/R-GAT/tools/accuracy_igbh.py) to view the python script for evaluating accuracy for the igbh dataset.
+Please click [here](https://github.com/mlcommons/inference/blob/dev/graph/R-GAT/tools/accuracy_igbh.py) to view the Python script for evaluating accuracy for the IGBH dataset.
 
 #### Run using docker
 

From 5da0abed59492e1be9fdf522bb4ae40770330828 Mon Sep 17 00:00:00 2001
From: sahilavaran <139779393+sahilavaran@users.noreply.github.com>
Date: Wed, 25 Dec 2024 18:46:28 +0000
Subject: [PATCH 075/112] Update README.md | added the outdirname in the CM
 command

---
 language/gpt-j/README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/language/gpt-j/README.md b/language/gpt-j/README.md
index b465b7845..66f07de3a 100644
--- a/language/gpt-j/README.md
+++ b/language/gpt-j/README.md
@@ -2,7 +2,8 @@
 
 Please see the [new docs site](https://docs.mlcommons.org/inference/benchmarks/language/gpt-j) for an automated way to run this benchmark across different available implementations and do an end-to-end submission with or without docker.
 
-You can also do pip install cm4mlops and then use cm commands for downloading the model and datasets using the commands given in the later sections.
+Please see the [new docs site](https://docs.mlcommons.org/inference/benchmarks/language/gpt-j/) for an automated way to run this benchmark across different available implementations and do an end-to-end submission with or without docker.
+
 
 
 ### Setup Instructions
@@ -78,8 +79,7 @@ Please download the fine-tuned GPT-J checkpoint using the instructions below. Th
 The following MLCommons CM commands can be used to programmatically download the model checkpoint. 
 
 ```
-pip install cm4mlops
-cm run script --tags=get,ml-model,gptj,_pytorch,_rclone -j --outdirname=<path/to/desired/output-directory>
+cm run script --tags=get,ml-model,gptj,_pytorch,_rclone --outdirname=<path/to/desired/output-directory>
 ```
 
 #### Manual method

From ff6d245cd6d4802089d7ffe57660dbff8195dc87 Mon Sep 17 00:00:00 2001
From: sahilavaran <139779393+sahilavaran@users.noreply.github.com>
Date: Mon, 30 Dec 2024 11:02:13 +0000
Subject: [PATCH 076/112] Update README.md| added outdirname in the CM command

---
 language/gpt-j/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/language/gpt-j/README.md b/language/gpt-j/README.md
index 66f07de3a..765317635 100644
--- a/language/gpt-j/README.md
+++ b/language/gpt-j/README.md
@@ -79,7 +79,7 @@ Please download the fine-tuned GPT-J checkpoint using the instructions below. Th
 The following MLCommons CM commands can be used to programmatically download the model checkpoint. 
 
 ```
-cm run script --tags=get,ml-model,gptj,_pytorch,_rclone --outdirname=<path/to/desired/output-directory>
+cm run script --tags=get,ml-model,gptj,_pytorch,_rclone ---outdirname =./model -P
 ```
 
 #### Manual method

From 69ff4000da7d6e73f0186ad1011f0137d0d92f96 Mon Sep 17 00:00:00 2001
From: Arjun <arjun@gateoverflow.com>
Date: Mon, 30 Dec 2024 16:05:52 +0000
Subject: [PATCH 077/112] Support audit.conf with static mlperf.conf

---
 loadgen/bindings/python_api.cc    |  6 +++---
 loadgen/loadgen.cc                |  2 +-
 loadgen/test_settings.h           |  2 +-
 loadgen/test_settings_internal.cc | 32 ++++++++++++++++---------------
 4 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/loadgen/bindings/python_api.cc b/loadgen/bindings/python_api.cc
index 7f50f5f56..e758772bd 100644
--- a/loadgen/bindings/python_api.cc
+++ b/loadgen/bindings/python_api.cc
@@ -348,10 +348,10 @@ PYBIND11_MODULE(mlperf_loadgen, m) {
                      &TestSettings::token_latency_scaling_factor)
       .def("FromConfig", &TestSettings::FromConfig, pybind11::arg("path"),
            pybind11::arg("model"), pybind11::arg("scenario"),
-           pybind11::arg("is_mlperf_conf") = false,
+           pybind11::arg("conf_type") = 1,
            "This function configures settings from the given user "
-           "configuration file, model, and scenario. The is_mlperf_conf flag "
-           "should be set to false or else only the default mlperf_conf file "
+           "configuration file, model, and scenario. The conf_type flag "
+           "should be set to 1 for loading user.conf or else only the default mlperf_conf file "
            "will be loaded by the loadgen.");
 
   pybind11::enum_<LoggingMode>(m, "LoggingMode")
diff --git a/loadgen/loadgen.cc b/loadgen/loadgen.cc
index beda3a6c4..c731f1a8d 100644
--- a/loadgen/loadgen.cc
+++ b/loadgen/loadgen.cc
@@ -1228,7 +1228,7 @@ void StartTest(SystemUnderTest* sut, QuerySampleLibrary* qsl,
     RemoveValue(&audit_scenario, ' ');
     const std::string generic_model = "*";
     test_settings.FromConfig(audit_config_filename, generic_model,
-                             audit_scenario);
+                             audit_scenario, 2);
   }
   if (test_settings.test05) {
     // If the configuration indicates we are running test05,
diff --git a/loadgen/test_settings.h b/loadgen/test_settings.h
index 739b2947f..584d073bb 100644
--- a/loadgen/test_settings.h
+++ b/loadgen/test_settings.h
@@ -237,7 +237,7 @@ struct TestSettings {
 
   /// \brief Load mlperf parameter config from file.
   int FromConfig(const std::string &path, const std::string &model,
-                 const std::string &scenario, bool is_mlperf_conf = false);
+                 const std::string &scenario, int conf_type = 1);
   /**@}*/
 
   // ==================================
diff --git a/loadgen/test_settings_internal.cc b/loadgen/test_settings_internal.cc
index 1a7387f59..cd58fcf01 100644
--- a/loadgen/test_settings_internal.cc
+++ b/loadgen/test_settings_internal.cc
@@ -16,7 +16,6 @@ limitations under the License.
 #include <map>
 #include <sstream>
 #include <string>
-
 #include "logging.h"
 #include "mlperf_conf.h"
 #include "utils.h"
@@ -520,14 +519,14 @@ void TestSettingsInternal::LogSummary(AsyncSummary &summary) const {
 }  // namespace loadgen
 
 int TestSettings::FromConfig(const std::string &path, const std::string &model,
-                             const std::string &scenario, bool is_mlperf_conf) {
+                             const std::string &scenario, int conf_type) {
   std::map<std::string, std::string> kv;
   static int configCount = 0;
 
-  if (!is_mlperf_conf) {
-    if (configCount == 0) {
+  if (conf_type == 0) {
+    if (configCount == 0 || ((configCount==1) && (conf_type == 2))) {
       // Only allow userConf as the single configFile and loadgen loads the
-      // mlperfConf automatically
+      // mlperfConf automatically for perf and accuracy runs
       FromConfig("", model, scenario, true);
     }
 
@@ -586,7 +585,7 @@ int TestSettings::FromConfig(const std::string &path, const std::string &model,
   std::unique_ptr<std::istream> fss;
   std::string line;
 
-  if (!is_mlperf_conf) {
+  if (conf_type=0) {
     // dirt simple config parser
     fss = std::make_unique<std::ifstream>(path);
     if (!static_cast<std::ifstream *>(fss.get())->is_open()) {
@@ -691,20 +690,16 @@ int TestSettings::FromConfig(const std::string &path, const std::string &model,
         break;
     }
   }
-  if (is_mlperf_conf) {
+  if (conf_type=0) {
     lookupkv(model, scenario, "qsl_rng_seed", &qsl_rng_seed, nullptr);
     lookupkv(model, scenario, "sample_index_rng_seed", &sample_index_rng_seed,
              nullptr);
     lookupkv(model, scenario, "schedule_rng_seed", &schedule_rng_seed, nullptr);
-    lookupkv(model, scenario, "accuracy_log_rng_seed", &accuracy_log_rng_seed,
-             nullptr);
-    lookupkv(model, scenario, "accuracy_log_probability", nullptr,
-             &accuracy_log_probability, 0.01);
-    lookupkv(model, scenario, "accuracy_log_sampling_target",
-             &accuracy_log_sampling_target, nullptr);
     if (lookupkv(model, scenario, "sample_concatenate_permutation", &val,
                  nullptr))
       sample_concatenate_permutation = (val == 1) ? true : false;
+    lookupkv(model, scenario, "accuracy_log_probability", nullptr,
+             &accuracy_log_probability, 0.01);
     if (lookupkv(model, scenario, "test05", &val, nullptr))
       test05 = (val == 1) ? true : false;
     lookupkv(model, scenario, "test05_qsl_rng_seed", &test05_qsl_rng_seed,
@@ -715,8 +710,10 @@ int TestSettings::FromConfig(const std::string &path, const std::string &model,
              &test05_schedule_rng_seed, nullptr);
   }
 
-  // keys that can be overriden in user.conf but will make the results eligibale
-  // only for open submission keys to measure token metrics
+  // keys that can be overriden in user.conf but will make the results eligible
+  // only for open submissions
+
+  // keys to measure token metrics
   if (lookupkv(model, scenario, "use_token_latencies", &val, nullptr)) {
     use_token_latencies = (val == 1) ? true : false;
   }
@@ -781,6 +778,11 @@ int TestSettings::FromConfig(const std::string &path, const std::string &model,
   if (lookupkv(model, scenario, "print_timestamps", &val, nullptr))
     print_timestamps = (val == 0) ? false : true;
 
+  //keys that are used in audit.conf
+  lookupkv(model, scenario, "accuracy_log_rng_seed", &accuracy_log_rng_seed,
+             nullptr);
+  lookupkv(model, scenario, "accuracy_log_sampling_target",
+             &accuracy_log_sampling_target, nullptr);
   return 0;
 }
 

From e11f6cda3e81fe1b76c5a23cf3256ae988810c74 Mon Sep 17 00:00:00 2001
From: Arjun <arjun@gateoverflow.com>
Date: Mon, 30 Dec 2024 16:05:52 +0000
Subject: [PATCH 078/112] Support audit.conf with static mlperf.conf

---
 loadgen/bindings/python_api.cc    |  6 +++---
 loadgen/loadgen.cc                |  2 +-
 loadgen/test_settings.h           |  2 +-
 loadgen/test_settings_internal.cc | 32 ++++++++++++++++---------------
 4 files changed, 22 insertions(+), 20 deletions(-)

diff --git a/loadgen/bindings/python_api.cc b/loadgen/bindings/python_api.cc
index 7f50f5f56..e758772bd 100644
--- a/loadgen/bindings/python_api.cc
+++ b/loadgen/bindings/python_api.cc
@@ -348,10 +348,10 @@ PYBIND11_MODULE(mlperf_loadgen, m) {
                      &TestSettings::token_latency_scaling_factor)
       .def("FromConfig", &TestSettings::FromConfig, pybind11::arg("path"),
            pybind11::arg("model"), pybind11::arg("scenario"),
-           pybind11::arg("is_mlperf_conf") = false,
+           pybind11::arg("conf_type") = 1,
            "This function configures settings from the given user "
-           "configuration file, model, and scenario. The is_mlperf_conf flag "
-           "should be set to false or else only the default mlperf_conf file "
+           "configuration file, model, and scenario. The conf_type flag "
+           "should be set to 1 for loading user.conf or else only the default mlperf_conf file "
            "will be loaded by the loadgen.");
 
   pybind11::enum_<LoggingMode>(m, "LoggingMode")
diff --git a/loadgen/loadgen.cc b/loadgen/loadgen.cc
index beda3a6c4..c731f1a8d 100644
--- a/loadgen/loadgen.cc
+++ b/loadgen/loadgen.cc
@@ -1228,7 +1228,7 @@ void StartTest(SystemUnderTest* sut, QuerySampleLibrary* qsl,
     RemoveValue(&audit_scenario, ' ');
     const std::string generic_model = "*";
     test_settings.FromConfig(audit_config_filename, generic_model,
-                             audit_scenario);
+                             audit_scenario, 2);
   }
   if (test_settings.test05) {
     // If the configuration indicates we are running test05,
diff --git a/loadgen/test_settings.h b/loadgen/test_settings.h
index 739b2947f..584d073bb 100644
--- a/loadgen/test_settings.h
+++ b/loadgen/test_settings.h
@@ -237,7 +237,7 @@ struct TestSettings {
 
   /// \brief Load mlperf parameter config from file.
   int FromConfig(const std::string &path, const std::string &model,
-                 const std::string &scenario, bool is_mlperf_conf = false);
+                 const std::string &scenario, int conf_type = 1);
   /**@}*/
 
   // ==================================
diff --git a/loadgen/test_settings_internal.cc b/loadgen/test_settings_internal.cc
index 1a7387f59..cd58fcf01 100644
--- a/loadgen/test_settings_internal.cc
+++ b/loadgen/test_settings_internal.cc
@@ -16,7 +16,6 @@ limitations under the License.
 #include <map>
 #include <sstream>
 #include <string>
-
 #include "logging.h"
 #include "mlperf_conf.h"
 #include "utils.h"
@@ -520,14 +519,14 @@ void TestSettingsInternal::LogSummary(AsyncSummary &summary) const {
 }  // namespace loadgen
 
 int TestSettings::FromConfig(const std::string &path, const std::string &model,
-                             const std::string &scenario, bool is_mlperf_conf) {
+                             const std::string &scenario, int conf_type) {
   std::map<std::string, std::string> kv;
   static int configCount = 0;
 
-  if (!is_mlperf_conf) {
-    if (configCount == 0) {
+  if (conf_type == 0) {
+    if (configCount == 0 || ((configCount==1) && (conf_type == 2))) {
       // Only allow userConf as the single configFile and loadgen loads the
-      // mlperfConf automatically
+      // mlperfConf automatically for perf and accuracy runs
       FromConfig("", model, scenario, true);
     }
 
@@ -586,7 +585,7 @@ int TestSettings::FromConfig(const std::string &path, const std::string &model,
   std::unique_ptr<std::istream> fss;
   std::string line;
 
-  if (!is_mlperf_conf) {
+  if (conf_type=0) {
     // dirt simple config parser
     fss = std::make_unique<std::ifstream>(path);
     if (!static_cast<std::ifstream *>(fss.get())->is_open()) {
@@ -691,20 +690,16 @@ int TestSettings::FromConfig(const std::string &path, const std::string &model,
         break;
     }
   }
-  if (is_mlperf_conf) {
+  if (conf_type=0) {
     lookupkv(model, scenario, "qsl_rng_seed", &qsl_rng_seed, nullptr);
     lookupkv(model, scenario, "sample_index_rng_seed", &sample_index_rng_seed,
              nullptr);
     lookupkv(model, scenario, "schedule_rng_seed", &schedule_rng_seed, nullptr);
-    lookupkv(model, scenario, "accuracy_log_rng_seed", &accuracy_log_rng_seed,
-             nullptr);
-    lookupkv(model, scenario, "accuracy_log_probability", nullptr,
-             &accuracy_log_probability, 0.01);
-    lookupkv(model, scenario, "accuracy_log_sampling_target",
-             &accuracy_log_sampling_target, nullptr);
     if (lookupkv(model, scenario, "sample_concatenate_permutation", &val,
                  nullptr))
       sample_concatenate_permutation = (val == 1) ? true : false;
+    lookupkv(model, scenario, "accuracy_log_probability", nullptr,
+             &accuracy_log_probability, 0.01);
     if (lookupkv(model, scenario, "test05", &val, nullptr))
       test05 = (val == 1) ? true : false;
     lookupkv(model, scenario, "test05_qsl_rng_seed", &test05_qsl_rng_seed,
@@ -715,8 +710,10 @@ int TestSettings::FromConfig(const std::string &path, const std::string &model,
              &test05_schedule_rng_seed, nullptr);
   }
 
-  // keys that can be overriden in user.conf but will make the results eligibale
-  // only for open submission keys to measure token metrics
+  // keys that can be overriden in user.conf but will make the results eligible
+  // only for open submissions
+
+  // keys to measure token metrics
   if (lookupkv(model, scenario, "use_token_latencies", &val, nullptr)) {
     use_token_latencies = (val == 1) ? true : false;
   }
@@ -781,6 +778,11 @@ int TestSettings::FromConfig(const std::string &path, const std::string &model,
   if (lookupkv(model, scenario, "print_timestamps", &val, nullptr))
     print_timestamps = (val == 0) ? false : true;
 
+  //keys that are used in audit.conf
+  lookupkv(model, scenario, "accuracy_log_rng_seed", &accuracy_log_rng_seed,
+             nullptr);
+  lookupkv(model, scenario, "accuracy_log_sampling_target",
+             &accuracy_log_sampling_target, nullptr);
   return 0;
 }
 

From cb18921d1e619bd27cc57c1893ab69b9fe98d4bf Mon Sep 17 00:00:00 2001
From: mlcommons-bot <mlcommons-bot@users.noreply.github.com>
Date: Mon, 30 Dec 2024 16:17:09 +0000
Subject: [PATCH 079/112] [Automated Commit] Format Codebase

---
 loadgen/bindings/python_api.cc    |  3 ++-
 loadgen/test_settings_internal.cc | 13 +++++++------
 2 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/loadgen/bindings/python_api.cc b/loadgen/bindings/python_api.cc
index e758772bd..96396dab9 100644
--- a/loadgen/bindings/python_api.cc
+++ b/loadgen/bindings/python_api.cc
@@ -351,7 +351,8 @@ PYBIND11_MODULE(mlperf_loadgen, m) {
            pybind11::arg("conf_type") = 1,
            "This function configures settings from the given user "
            "configuration file, model, and scenario. The conf_type flag "
-           "should be set to 1 for loading user.conf or else only the default mlperf_conf file "
+           "should be set to 1 for loading user.conf or else only the default "
+           "mlperf_conf file "
            "will be loaded by the loadgen.");
 
   pybind11::enum_<LoggingMode>(m, "LoggingMode")
diff --git a/loadgen/test_settings_internal.cc b/loadgen/test_settings_internal.cc
index cd58fcf01..af735edee 100644
--- a/loadgen/test_settings_internal.cc
+++ b/loadgen/test_settings_internal.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include <map>
 #include <sstream>
 #include <string>
+
 #include "logging.h"
 #include "mlperf_conf.h"
 #include "utils.h"
@@ -524,7 +525,7 @@ int TestSettings::FromConfig(const std::string &path, const std::string &model,
   static int configCount = 0;
 
   if (conf_type == 0) {
-    if (configCount == 0 || ((configCount==1) && (conf_type == 2))) {
+    if (configCount == 0 || ((configCount == 1) && (conf_type == 2))) {
       // Only allow userConf as the single configFile and loadgen loads the
       // mlperfConf automatically for perf and accuracy runs
       FromConfig("", model, scenario, true);
@@ -585,7 +586,7 @@ int TestSettings::FromConfig(const std::string &path, const std::string &model,
   std::unique_ptr<std::istream> fss;
   std::string line;
 
-  if (conf_type=0) {
+  if (conf_type = 0) {
     // dirt simple config parser
     fss = std::make_unique<std::ifstream>(path);
     if (!static_cast<std::ifstream *>(fss.get())->is_open()) {
@@ -690,7 +691,7 @@ int TestSettings::FromConfig(const std::string &path, const std::string &model,
         break;
     }
   }
-  if (conf_type=0) {
+  if (conf_type = 0) {
     lookupkv(model, scenario, "qsl_rng_seed", &qsl_rng_seed, nullptr);
     lookupkv(model, scenario, "sample_index_rng_seed", &sample_index_rng_seed,
              nullptr);
@@ -778,11 +779,11 @@ int TestSettings::FromConfig(const std::string &path, const std::string &model,
   if (lookupkv(model, scenario, "print_timestamps", &val, nullptr))
     print_timestamps = (val == 0) ? false : true;
 
-  //keys that are used in audit.conf
+  // keys that are used in audit.conf
   lookupkv(model, scenario, "accuracy_log_rng_seed", &accuracy_log_rng_seed,
-             nullptr);
+           nullptr);
   lookupkv(model, scenario, "accuracy_log_sampling_target",
-             &accuracy_log_sampling_target, nullptr);
+           &accuracy_log_sampling_target, nullptr);
   return 0;
 }
 

From a5c1552212f6af5f57b24f64d97abb5aae79c9af Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Mon, 30 Dec 2024 22:38:45 +0000
Subject: [PATCH 080/112] Update test_settings_internal.cc | Fix conf_type
 usage

---
 loadgen/test_settings_internal.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/loadgen/test_settings_internal.cc b/loadgen/test_settings_internal.cc
index af735edee..6401b0483 100644
--- a/loadgen/test_settings_internal.cc
+++ b/loadgen/test_settings_internal.cc
@@ -524,11 +524,11 @@ int TestSettings::FromConfig(const std::string &path, const std::string &model,
   std::map<std::string, std::string> kv;
   static int configCount = 0;
 
-  if (conf_type == 0) {
-    if (configCount == 0 || ((configCount == 1) && (conf_type == 2))) {
+  if (conf_type == 1) {
+    if (configCount == 0) {
       // Only allow userConf as the single configFile and loadgen loads the
       // mlperfConf automatically for perf and accuracy runs
-      FromConfig("", model, scenario, true);
+      FromConfig("", model, scenario, 0);
     }
 
     else {

From d4f3f2d2308da1435bfd575729dfeb15db5dbef5 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Mon, 30 Dec 2024 23:02:48 +0000
Subject: [PATCH 081/112] Update test_settings_internal.cc

---
 loadgen/test_settings_internal.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/loadgen/test_settings_internal.cc b/loadgen/test_settings_internal.cc
index 6401b0483..65260bfeb 100644
--- a/loadgen/test_settings_internal.cc
+++ b/loadgen/test_settings_internal.cc
@@ -586,7 +586,7 @@ int TestSettings::FromConfig(const std::string &path, const std::string &model,
   std::unique_ptr<std::istream> fss;
   std::string line;
 
-  if (conf_type = 0) {
+  if (conf_type != 0) {
     // dirt simple config parser
     fss = std::make_unique<std::ifstream>(path);
     if (!static_cast<std::ifstream *>(fss.get())->is_open()) {
@@ -691,7 +691,7 @@ int TestSettings::FromConfig(const std::string &path, const std::string &model,
         break;
     }
   }
-  if (conf_type = 0) {
+  if (conf_type == 0) {
     lookupkv(model, scenario, "qsl_rng_seed", &qsl_rng_seed, nullptr);
     lookupkv(model, scenario, "sample_index_rng_seed", &sample_index_rng_seed,
              nullptr);

From ea2ba9cd85bb1c03553562803f8537edb36d0649 Mon Sep 17 00:00:00 2001
From: Arjun <arjun@gateoverflow.com>
Date: Mon, 30 Dec 2024 23:09:52 +0000
Subject: [PATCH 082/112] Fixes to submission checker

---
 tools/submission/submission_checker.py    | 57 ++++++++++++++---------
 tools/submission/truncate_accuracy_log.py |  2 +-
 2 files changed, 36 insertions(+), 23 deletions(-)

diff --git a/tools/submission/submission_checker.py b/tools/submission/submission_checker.py
index 26d5212f9..49604de41 100755
--- a/tools/submission/submission_checker.py
+++ b/tools/submission/submission_checker.py
@@ -431,7 +431,7 @@
             "stable-diffusion-xl": 5000,
             "mixtral-8x7b": 15000,
             "llama3.1-405b": 8313,
-            "rgat": 788379
+            "rgat": 10000
 
         },
         # model_mapping.json is expected in the root directory of the
@@ -1132,22 +1132,18 @@ def find_error_in_detail_log(config, fname):
                 is_valid = False
     return is_valid
 
-
-def check_accuracy_dir(config, model, path, verbose):
-    is_valid = False
-    all_accuracy_valid = True
-    acc = None
-    result_acc = {}
-    hash_val = None
-    target = config.get_accuracy_target(model)
-    acc_upper_limit = config.get_accuracy_upper_limit(model)
+def get_accuracy_values(config, model):
+    
     patterns = []
     acc_targets = []
     acc_types = []
+    acc_limits = []
+    up_patterns = []
+    acc_limit_check = False
+
+    target = config.get_accuracy_target(model)
+    acc_upper_limit = config.get_accuracy_upper_limit(model)
     if acc_upper_limit is not None:
-        acc_limits = []
-        up_patterns = []
-        acc_limit_check = True
         for i in range(0, len(acc_upper_limit), 2):
             acc_type, acc_target = acc_upper_limit[i: i + 2]
             acc_limits.append(acc_target)
@@ -1158,6 +1154,21 @@ def check_accuracy_dir(config, model, path, verbose):
         patterns.append(ACC_PATTERN[acc_type])
         acc_targets.append(acc_target)
         acc_types.append(acc_type)
+
+    return patterns, acc_targets, acc_types, acc_limits, up_patterns, acc_upper_limit
+
+
+def check_accuracy_dir(config, model, path, verbose):
+    is_valid = False
+    all_accuracy_valid = True
+    acc = None
+    result_acc = {}
+    hash_val = None
+    target = config.get_accuracy_target(model)
+    #acc_upper_limit = config.get_accuracy_upper_limit(model)
+    patterns, acc_targets, acc_types, acc_limits, up_patterns, acc_upper_limit = get_accuracy_values(config, model)
+    acc_limit_check = True
+
     acc_seen = [False for _ in acc_targets]
 
     with open(os.path.join(path, "accuracy.txt"), "r", encoding="utf-8") as f:
@@ -1185,6 +1196,7 @@ def check_accuracy_dir(config, model, path, verbose):
                 if acc:
                     result_acc[acc_type] = acc
                 acc = None
+
             if acc_upper_limit is not None:
                 for i, (pattern, acc_limit) in enumerate(
                         zip(up_patterns, acc_limits)):
@@ -1341,7 +1353,7 @@ def check_performance_dir(
     samples_per_query = mlperf_log["effective_samples_per_query"]
     min_duration = mlperf_log["effective_min_duration_ms"]
     equal_issue_used_check = (
-        mlperf_log["effective_sample_concatenate_permutation"] == "true"
+        mlperf_log["effective_sample_concatenate_permutation"] == True
     )
     if not config.requires_equal_issue(model, division):
         equal_issue_used_check = True
@@ -2800,7 +2812,7 @@ def check_compliance_perf_dir(test_dir):
                     test_perf_path,
                     diff)
                 is_valid = False
-
+    
     return is_valid
 
 
@@ -2849,13 +2861,10 @@ def check_compliance_acc_dir(test_dir, model, config):
                     is_valid = False
                 elif not acc_passed:
                     target = config.get_accuracy_target(model)
-                    patterns = []
-                    acc_types = []
-                    for i in range(0, len(target), 2):
-                        acc_type = target[i: i + 2]
-                        acc_types.append(acc_type)
-                        patterns.append(ACC_PATTERN[acc_type[0]])
-                    acc_seen = [False for _ in acc_type]
+                    patterns, acc_targets, acc_types, acc_limits, up_patterns, acc_upper_limit = get_accuracy_values(config, model)
+                    acc_limit_check = True
+
+                    acc_seen = [False for _ in acc_targets]
                     acc_baseline = {acc_type: 0 for acc_type in acc_types}
                     acc_compliance = {acc_type: 0 for acc_type in acc_types}
                     with open(
@@ -2898,6 +2907,10 @@ def check_compliance_acc_dir(test_dir, model, config):
                             if delta_perc <= required_delta_perc:
                                 is_valid = True
                             else:
+                                log.error(
+                                    "Compliance test accuracy check (non-deterministic mode) in %s failed",
+                                    test_dir,
+                                )
                                 is_valid = False
                                 break
         elif "TEST06" in test_dir:
diff --git a/tools/submission/truncate_accuracy_log.py b/tools/submission/truncate_accuracy_log.py
index b7a9509ae..e0e1973ec 100755
--- a/tools/submission/truncate_accuracy_log.py
+++ b/tools/submission/truncate_accuracy_log.py
@@ -233,7 +233,7 @@ def truncate_results_dir(filter_submitter, backup, scenarios_to_skip):
                                 # get to work
                                 hash_val = get_hash(acc_log)
                                 with open(acc_txt, "a", encoding="utf-8") as f:
-                                    f.write("hash={0}\n".format(hash_val))
+                                    f.write("\nhash={0}\n".format(hash_val))
                                 truncate_file(acc_log)
                                 log.info("%s truncated", acc_log)
 

From 3d5688ac031264a5ca256be55f03134d27d67e92 Mon Sep 17 00:00:00 2001
From: mlcommons-bot <mlcommons-bot@users.noreply.github.com>
Date: Mon, 30 Dec 2024 23:11:04 +0000
Subject: [PATCH 083/112] [Automated Commit] Format Codebase

---
 tools/submission/submission_checker.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/tools/submission/submission_checker.py b/tools/submission/submission_checker.py
index 49604de41..6f2332aea 100755
--- a/tools/submission/submission_checker.py
+++ b/tools/submission/submission_checker.py
@@ -1132,8 +1132,9 @@ def find_error_in_detail_log(config, fname):
                 is_valid = False
     return is_valid
 
+
 def get_accuracy_values(config, model):
-    
+
     patterns = []
     acc_targets = []
     acc_types = []
@@ -1165,8 +1166,9 @@ def check_accuracy_dir(config, model, path, verbose):
     result_acc = {}
     hash_val = None
     target = config.get_accuracy_target(model)
-    #acc_upper_limit = config.get_accuracy_upper_limit(model)
-    patterns, acc_targets, acc_types, acc_limits, up_patterns, acc_upper_limit = get_accuracy_values(config, model)
+    # acc_upper_limit = config.get_accuracy_upper_limit(model)
+    patterns, acc_targets, acc_types, acc_limits, up_patterns, acc_upper_limit = get_accuracy_values(
+        config, model)
     acc_limit_check = True
 
     acc_seen = [False for _ in acc_targets]
@@ -1637,7 +1639,7 @@ def get_power_metric(config, scenario_fixed, log_path, is_valid, res):
                 samples_per_query = 8
 
             if (scenario_fixed in ["MultiStream"]
-                    ) and scenario in ["SingleStream"]:
+                ) and scenario in ["SingleStream"]:
                 power_metric = (
                     avg_power * power_duration * samples_per_query * 1000 / num_queries
                 )
@@ -2812,7 +2814,7 @@ def check_compliance_perf_dir(test_dir):
                     test_perf_path,
                     diff)
                 is_valid = False
-    
+
     return is_valid
 
 
@@ -2861,7 +2863,8 @@ def check_compliance_acc_dir(test_dir, model, config):
                     is_valid = False
                 elif not acc_passed:
                     target = config.get_accuracy_target(model)
-                    patterns, acc_targets, acc_types, acc_limits, up_patterns, acc_upper_limit = get_accuracy_values(config, model)
+                    patterns, acc_targets, acc_types, acc_limits, up_patterns, acc_upper_limit = get_accuracy_values(
+                        config, model)
                     acc_limit_check = True
 
                     acc_seen = [False for _ in acc_targets]

From 16019e65228a49eb890a28794ae08a8b9855b09e Mon Sep 17 00:00:00 2001
From: mlcommons-bot <mlcommons-bot@users.noreply.github.com>
Date: Mon, 30 Dec 2024 23:18:13 +0000
Subject: [PATCH 084/112] [Automated Commit] Format Codebase

---
 loadgen/test_settings_internal.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/loadgen/test_settings_internal.cc b/loadgen/test_settings_internal.cc
index 708fb0a18..f654948f3 100644
--- a/loadgen/test_settings_internal.cc
+++ b/loadgen/test_settings_internal.cc
@@ -16,6 +16,7 @@ limitations under the License.
 #include <map>
 #include <sstream>
 #include <string>
+
 #include "logging.h"
 #include "mlperf_conf.h"
 #include "utils.h"

From 308147a873452548b711ee1dd069681d24b35fc6 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Tue, 31 Dec 2024 14:06:37 +0000
Subject: [PATCH 085/112] Update submission_checker.py | Fix rgat
 performance_sample_count

---
 tools/submission/submission_checker.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/submission/submission_checker.py b/tools/submission/submission_checker.py
index 6f2332aea..fb1f1bd49 100755
--- a/tools/submission/submission_checker.py
+++ b/tools/submission/submission_checker.py
@@ -431,7 +431,7 @@
             "stable-diffusion-xl": 5000,
             "mixtral-8x7b": 15000,
             "llama3.1-405b": 8313,
-            "rgat": 10000
+            "rgat": 788379
 
         },
         # model_mapping.json is expected in the root directory of the

From 5d5da3c2ec2764d5ca643dcfb288611d82edce30 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Wed, 1 Jan 2025 22:07:21 +0000
Subject: [PATCH 086/112] Update evaluate-accuracy.py | Fixes #2008

---
 language/mixtral-8x7b/evaluate-accuracy.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/language/mixtral-8x7b/evaluate-accuracy.py b/language/mixtral-8x7b/evaluate-accuracy.py
index 3ea79cea8..74485d569 100644
--- a/language/mixtral-8x7b/evaluate-accuracy.py
+++ b/language/mixtral-8x7b/evaluate-accuracy.py
@@ -121,6 +121,7 @@ def main():
     checkpoint_path = args.checkpoint_path
     metric = evaluate.load("rouge")
     nltk.download("punkt")
+    nltk.download("punkt_tab")
 
     tokenizer = AutoTokenizer.from_pretrained(
         checkpoint_path,

From 4ba40e2b7b6169fed5c569ea2e4888621b3276ad Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Mon, 6 Jan 2025 12:22:34 +0000
Subject: [PATCH 087/112] Update index.md

---
 docs/submission/index.md | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/docs/submission/index.md b/docs/submission/index.md
index 1050f5fb0..78cd2f732 100644
--- a/docs/submission/index.md
+++ b/docs/submission/index.md
@@ -3,13 +3,22 @@ hide:
   - toc
 ---
 
-<p align="center">
-  <img src="../img/submission-flow.png" alt="Submission Generation Flow">
-</p>
-
-<p align="center"><em>Figure: MLPerf Inference Submission Generation Flow</em></p>
-
-<!--![Submission Generation Flow](../img/submission-flow.png)-->
+```mermaid
+flowchart LR
+    classDef hidden fill:none,stroke:none;
+    subgraph Generation [Submission Generation]
+      direction TB
+      A[populate system details] --> B[generate submission structure]
+      B --> C[truncate-accuracy-logs]
+      C --> D{Infer low talency results and/or filter out invalid results}
+      D --> yes --> E[preprocess-mlperf-inference-submission]
+      D --> no --> F[run-mlperf-inference-submission-checker]
+      E --> F
+    end
+    Input((MLPerf Inference Results folder)) --> Generation
+    Generation --  Submission TAR file --> H[Upload to Submission Server]
+    H --> Output((Receive validation email))
+```
 
 Click [here](https://youtu.be/eI1Hoecc3ho) to view the recording of the workshop: Streamlining your MLPerf Inference results using CM.
 

From a8931662e27b70a77e1a758246f047c7cf12d27d Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Mon, 6 Jan 2025 12:41:25 +0000
Subject: [PATCH 088/112] Update index.md

---
 docs/submission/index.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/submission/index.md b/docs/submission/index.md
index 78cd2f732..0b0cc908c 100644
--- a/docs/submission/index.md
+++ b/docs/submission/index.md
@@ -11,8 +11,8 @@ flowchart LR
       A[populate system details] --> B[generate submission structure]
       B --> C[truncate-accuracy-logs]
       C --> D{Infer low talency results and/or filter out invalid results}
-      D --> yes --> E[preprocess-mlperf-inference-submission]
-      D --> no --> F[run-mlperf-inference-submission-checker]
+      D -- yes --> E[preprocess-mlperf-inference-submission]
+      D -- no --> F[run-mlperf-inference-submission-checker]
       E --> F
     end
     Input((MLPerf Inference Results folder)) --> Generation

From 194aeda8d6652a01a030e3a3848f9e918b523f45 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Mon, 6 Jan 2025 12:42:52 +0000
Subject: [PATCH 089/112] Update index.md

---
 docs/submission/index.md | 34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

diff --git a/docs/submission/index.md b/docs/submission/index.md
index 0b0cc908c..d631970bc 100644
--- a/docs/submission/index.md
+++ b/docs/submission/index.md
@@ -3,22 +3,7 @@ hide:
   - toc
 ---
 
-```mermaid
-flowchart LR
-    classDef hidden fill:none,stroke:none;
-    subgraph Generation [Submission Generation]
-      direction TB
-      A[populate system details] --> B[generate submission structure]
-      B --> C[truncate-accuracy-logs]
-      C --> D{Infer low talency results and/or filter out invalid results}
-      D -- yes --> E[preprocess-mlperf-inference-submission]
-      D -- no --> F[run-mlperf-inference-submission-checker]
-      E --> F
-    end
-    Input((MLPerf Inference Results folder)) --> Generation
-    Generation --  Submission TAR file --> H[Upload to Submission Server]
-    H --> Output((Receive validation email))
-```
+
 
 Click [here](https://youtu.be/eI1Hoecc3ho) to view the recording of the workshop: Streamlining your MLPerf Inference results using CM.
 
@@ -80,6 +65,23 @@ Once all the results across all the models are ready you can use the following c
 
 ## Generate actual submission tree
 
+```mermaid
+flowchart LR
+    classDef hidden fill:none,stroke:none;
+    subgraph Generation [Submission Generation]
+      direction TB
+      A[populate system details] --> B[generate submission structure]
+      B --> C[truncate-accuracy-logs]
+      C --> D{Infer low talency results and/or filter out invalid results}
+      D -- yes --> E[preprocess-mlperf-inference-submission]
+      D -- no --> F[run-mlperf-inference-submission-checker]
+      E --> F
+    end
+    Input((MLPerf Inference Results folder)) --> Generation
+    Generation --  Submission TAR file --> H[Upload to Submission Server]
+    H --> Output((Receive validation email))
+```
+
 === "Docker run"
     ### Docker run
     === "Closed"

From bab97ffa95fa74bbfc8e96fb2edf9189e21db1e9 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Mon, 6 Jan 2025 13:26:43 +0000
Subject: [PATCH 090/112] Update submission generation steps (WIP)

---
 docs/submission/index.md | 39 ++++++++++++++++++++++++---------------
 1 file changed, 24 insertions(+), 15 deletions(-)

diff --git a/docs/submission/index.md b/docs/submission/index.md
index d631970bc..d5a86f10a 100644
--- a/docs/submission/index.md
+++ b/docs/submission/index.md
@@ -5,17 +5,10 @@ hide:
 
 
 
-Click [here](https://youtu.be/eI1Hoecc3ho) to view the recording of the workshop: Streamlining your MLPerf Inference results using CM.
 
 Click [here](https://docs.google.com/presentation/d/1cmbpZUpVr78EIrhzyMBnnWnjJrD-mZ2vmSb-yETkTA8/edit?usp=sharing) to view the prposal slide for Common Automation for MLPerf Inference Submission Generation through CM.
 
-=== "CM based results"
-    If you have followed the `cm run` commands under the individual model pages in the [benchmarks](../index.md) directory, all the valid results will get aggregated to the `cm cache` folder. The following command could be used to browse the structure of inference results folder generated by CM.
-    ### Get results folder structure
-    ```bash
-    cm find cache --tags=get,mlperf,inference,results,dir | xargs tree
-    ```
-=== "Non CM based results"
+=== "Custom automation based MLPerf results"
     If you have not followed the `cm run` commands under the individual model pages in the [benchmarks](../index.md) directory, please make sure that the result directory is structured in the following way. 
     ```
     └── System description ID(SUT Name)
@@ -23,7 +16,7 @@ Click [here](https://docs.google.com/presentation/d/1cmbpZUpVr78EIrhzyMBnnWnjJrD
         └── Benchmark
             └── Scenario
                 ├── Performance
-                |   └── run_x/#1 run for all scenarios
+                |   └── run_1 run for all scenarios
                 |       ├── mlperf_log_summary.txt
                 |       └── mlperf_log_detail.txt
                 ├── Accuracy
@@ -36,13 +29,13 @@ Click [here](https://docs.google.com/presentation/d/1cmbpZUpVr78EIrhzyMBnnWnjJrD
                 |   |   └── run_x/#1 run for all scenarios
                 |   |       ├── mlperf_log_summary.txt
                 |   |       └── mlperf_log_detail.txt
-                |   ├── Accuracy
-                |   |   ├── baseline_accuracy.txt
-                |   |   ├── compliance_accuracy.txt
+                |   ├── Accuracy # for TEST01 only
+                |   |   ├── baseline_accuracy.txt (if test fails in deterministic mode)
+                |   |   ├── compliance_accuracy.txt (if test fails in deterministic mode)
                 |   |   ├── mlperf_log_accuracy.json
                 |   |   └── accuracy.txt
                 |   ├── verify_performance.txt
-                |   └── verify_accuracy.txt #for TEST01 only
+                |   └── verify_accuracy.txt # for TEST01 only
                 |── user.conf
                 └── measurements.json
     ```
@@ -61,13 +54,27 @@ Click [here](https://docs.google.com/presentation/d/1cmbpZUpVr78EIrhzyMBnnWnjJrD
     ```
     </details>
 
+=== "MLPerf Automation based results"
+    If you have followed the `cm run` commands under the individual model pages in the [benchmarks](../index.md) directory, all the valid results will get aggregated to the `cm cache` folder. The following command could be used to browse the structure of inference results folder generated by CM.
+    ### Get results folder structure
+    ```bash
+    cm find cache --tags=get,mlperf,inference,results,dir | xargs tree
+    ```
+
+
 Once all the results across all the models are ready you can use the following command to generate a valid submission tree compliant with the [MLPerf requirements](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1).
 
 ## Generate actual submission tree
 
+=== "Multi-SUT submission"
+
+    === "Using Local Folder Sync"
+    === "Using a Github repo"
+
+=== "Single SUT submission"
+
 ```mermaid
 flowchart LR
-    classDef hidden fill:none,stroke:none;
     subgraph Generation [Submission Generation]
       direction TB
       A[populate system details] --> B[generate submission structure]
@@ -168,9 +175,11 @@ Run the following command after **replacing `--repo_url` with your GitHub reposi
 
 ```bash
 cm run script --tags=push,github,mlperf,inference,submission \
-   --repo_url=https://github.com/GATEOverflow/mlperf_inference_submissions_v4.1 \
+   --repo_url=https://github.com/mlcommons/mlperf_inference_submissions_v4.1 \
    --commit_message="Results on <HW name> added by <Name>" \
    --quiet
 ```
 
 At the end, you can download the github repo and upload to the [MLCommons Submission UI](https://submissions-ui.mlcommons.org/submission).
+
+Click [here](https://youtu.be/eI1Hoecc3ho) to view the recording of the workshop: Streamlining your MLPerf Inference results using CM.

From 318110cfb1339aa8bc0d832e101b4f839f1dd350 Mon Sep 17 00:00:00 2001
From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com>
Date: Tue, 7 Jan 2025 01:53:46 +0530
Subject: [PATCH 091/112] add submission generation graphs for local sync and
 through github repo (#2016)

* add graphs for local sync and through github repo
---
 docs/submission/index.md | 101 ++++++++++++++++++++++++++++++++-------
 1 file changed, 83 insertions(+), 18 deletions(-)

diff --git a/docs/submission/index.md b/docs/submission/index.md
index d5a86f10a..cc9b07c67 100644
--- a/docs/submission/index.md
+++ b/docs/submission/index.md
@@ -69,26 +69,91 @@ Once all the results across all the models are ready you can use the following c
 === "Multi-SUT submission"
 
     === "Using Local Folder Sync"
-    === "Using a Github repo"
 
-=== "Single SUT submission"
-
-```mermaid
-flowchart LR
-    subgraph Generation [Submission Generation]
-      direction TB
-      A[populate system details] --> B[generate submission structure]
-      B --> C[truncate-accuracy-logs]
-      C --> D{Infer low talency results and/or filter out invalid results}
-      D -- yes --> E[preprocess-mlperf-inference-submission]
-      D -- no --> F[run-mlperf-inference-submission-checker]
-      E --> F
-    end
-    Input((MLPerf Inference Results folder)) --> Generation
-    Generation --  Submission TAR file --> H[Upload to Submission Server]
-    H --> Output((Receive validation email))
-```
+        ```mermaid
+        flowchart LR
+            classDef hidden fill:none,stroke:none;
+            subgraph Generation1 [Submission Generation SUT-1]
+                direction TB
+                A3[populate system details] --> B3[generate submission structure]
+                B3 --> C3[truncate-accuracy-logs]
+                C3 --> D3{Infer low talency results and/or filter out invalid results}
+                D3 -- yes --> E3[preprocess-mlperf-inference-submission]
+                D3 -- no --> F3[run-mlperf-inference-submission-checker]
+                E3 --> F3
+            end
+
+            subgraph Generation2 [Submission Generation SUT-2]
+                direction TB
+            end
+
+            subgraph GenerationN [Submission Generation SUT-N]
+                direction TB
+            end
+
+            Input1((MLPerf Inference Results folder SUT1)) --> Generation1 --> T1[Submission Tree 1]
+            Input2((MLPerf Inference Results folder SUT2)) --> Generation2 --> T2[Submission Tree 2]
+            Input3((MLPerf Inference Results folder SUTN)) --> GenerationN --> TN[Submission Tree N]
+
+            subgraph LargeCircle [ ]
+                direction TB
+                Generation1
+                Generation2
+                GenerationN
+            end
+
+            T1 --> Sync((Sync locally with rsync on SUT-1))
+            T2 --> Sync
+            TN --> Sync
+
+            Sync --> finalsubcheck[run-mlperf-inference-submission-checker]
+
+            finalsubcheck --> tar[Submission Tar File] --> upload[Upload result to submission server] --> output((Receive vlidation email))
+        ```
 
+    === "Using a Github repo"
+
+        ```mermaid
+        flowchart LR
+            classDef hidden fill:none,stroke:none;
+        
+            subgraph Generation1 [Submission Generation SUT-1]
+                direction TB
+                A3[populate system details] --> B3[generate submission structure]
+                B3 --> C3[truncate-accuracy-logs]
+                C3 --> D3{Infer low talency results and/or filter out invalid results}
+                D3 -- yes --> E3[preprocess-mlperf-inference-submission]
+                D3 -- no --> F3[run-mlperf-inference-submission-checker]
+                E3 --> F3
+            end
+                
+            subgraph Generation2 [Submission Generation SUT-2]
+                direction TB
+            end
+        
+            subgraph GenerationN [Submission Generation SUT-N]
+                direction TB
+            end
+        
+            Input1((MLPerf Inference Results folder SUT1)) --> Generation1 --> T1[Submission Tree 1]
+            Input2((MLPerf Inference Results folder SUT2)) --> Generation2 --> T2[Submission Tree 2]
+            Input3((MLPerf Inference Results folder SUTN)) --> GenerationN --> TN[Submission Tree N]
+        
+            subgraph LargeCircle [ ]
+                direction TB
+                Generation1
+                Generation2
+                GenerationN
+            end
+        
+            T1 --> Sync((Upload the submission tree to GitHub repo))
+            T2 --> Sync
+            TN --> Sync
+        
+            Sync --> clone[Clone the repo to SUT1] --> tar[Submission Tar File] --> upload[Upload result to submission server] --> output((Receive vlidation email))
+        ```
+        
+## Command to generate actual submission tree        
 === "Docker run"
     ### Docker run
     === "Closed"

From 41721dfd6233abac1ec4098a4bb7ead6793df69b Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Mon, 6 Jan 2025 22:53:10 +0000
Subject: [PATCH 092/112] Update index.md

---
 docs/submission/index.md | 34 ++++++++++++++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

diff --git a/docs/submission/index.md b/docs/submission/index.md
index cc9b07c67..28a8b2b20 100644
--- a/docs/submission/index.md
+++ b/docs/submission/index.md
@@ -66,9 +66,39 @@ Once all the results across all the models are ready you can use the following c
 
 ## Generate actual submission tree
 
+The submission generation flow is explained in the below diagram
+
+```mermaid
+flowchart LR
+    subgraph Generation [Submission Generation SUT1]
+      direction TB
+      A[populate system details] --> B[generate submission structure]
+      B --> C[truncate-accuracy-logs]
+      C --> D{Infer low talency results and/or filter out invalid results}
+      D --> yes --> E[preprocess-mlperf-inference-submission]
+      D --> no --> F[run-mlperf-inference-submission-checker]
+      E --> F
+    end
+    Input((MLPerf Inference Results SUT1)) --> Generation
+    Generation --> Output((Submission Folder SUT1))
+```
+## Command to generate actual submission folder        
+```bash
+        cm docker script --tags=generate,inference,submission \
+            --clean \
+            --preprocess_submission=yes \
+            --run-checker \
+            --submitter=MLCommons \
+            --division=closed \
+            --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes \
+            --quiet
+        ```
+
 === "Multi-SUT submission"
 
-    === "Using Local Folder Sync"
+If there are multiple SUTs, the same process needs to be repeated on each of them. One we have Submission folders on all the SUTs, we need to sync them to make a single submission folder
+
+    === "Sync Locally"
 
         ```mermaid
         flowchart LR
@@ -111,7 +141,7 @@ Once all the results across all the models are ready you can use the following c
             finalsubcheck --> tar[Submission Tar File] --> upload[Upload result to submission server] --> output((Receive vlidation email))
         ```
 
-    === "Using a Github repo"
+    === "Sync via a Github repo"
 
         ```mermaid
         flowchart LR

From a8cdcaf9b123adde60164606f8ec509266bb228a Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Mon, 6 Jan 2025 22:53:49 +0000
Subject: [PATCH 093/112] Update index.md

---
 docs/submission/index.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/submission/index.md b/docs/submission/index.md
index 28a8b2b20..99e1d83ce 100644
--- a/docs/submission/index.md
+++ b/docs/submission/index.md
@@ -82,6 +82,7 @@ flowchart LR
     Input((MLPerf Inference Results SUT1)) --> Generation
     Generation --> Output((Submission Folder SUT1))
 ```
+
 ## Command to generate actual submission folder        
 ```bash
         cm docker script --tags=generate,inference,submission \
@@ -92,7 +93,7 @@ flowchart LR
             --division=closed \
             --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes \
             --quiet
-        ```
+```
 
 === "Multi-SUT submission"
 

From 6eda9f13bbc7086956e6bd09f20ff3b790b625a2 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Mon, 6 Jan 2025 22:54:37 +0000
Subject: [PATCH 094/112] Update index.md

---
 docs/submission/index.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/docs/submission/index.md b/docs/submission/index.md
index 99e1d83ce..b85276de9 100644
--- a/docs/submission/index.md
+++ b/docs/submission/index.md
@@ -83,9 +83,10 @@ flowchart LR
     Generation --> Output((Submission Folder SUT1))
 ```
 
-## Command to generate actual submission folder        
+## Command to generate actual submission folder
+
 ```bash
-        cm docker script --tags=generate,inference,submission \
+cm docker script --tags=generate,inference,submission \
             --clean \
             --preprocess_submission=yes \
             --run-checker \
@@ -97,7 +98,7 @@ flowchart LR
 
 === "Multi-SUT submission"
 
-If there are multiple SUTs, the same process needs to be repeated on each of them. One we have Submission folders on all the SUTs, we need to sync them to make a single submission folder
+    If there are multiple SUTs, the same process needs to be repeated on each of them. One we have Submission folders on all the SUTs, we need to sync them to make a single submission folder
 
     === "Sync Locally"
 

From 36db7cc8190987ad09369c46ba29c56782d450a1 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Mon, 6 Jan 2025 22:56:20 +0000
Subject: [PATCH 095/112] Update index.md

---
 docs/submission/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/submission/index.md b/docs/submission/index.md
index b85276de9..1d62f3ab6 100644
--- a/docs/submission/index.md
+++ b/docs/submission/index.md
@@ -98,7 +98,7 @@ cm docker script --tags=generate,inference,submission \
 
 === "Multi-SUT submission"
 
-    If there are multiple SUTs, the same process needs to be repeated on each of them. One we have Submission folders on all the SUTs, we need to sync them to make a single submission folder
+     If there are multiple SUTs, the same process needs to be repeated on each of them. One we have Submission folders on all the SUTs, we need to sync them to make a single submission folder
 
     === "Sync Locally"
 

From 6430b79385c45e7a2c1f24efbb58777ac156382a Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Mon, 6 Jan 2025 22:58:28 +0000
Subject: [PATCH 096/112] Update index.md

---
 docs/submission/index.md | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/docs/submission/index.md b/docs/submission/index.md
index 1d62f3ab6..f77b1894b 100644
--- a/docs/submission/index.md
+++ b/docs/submission/index.md
@@ -96,13 +96,12 @@ cm docker script --tags=generate,inference,submission \
             --quiet
 ```
 
-=== "Multi-SUT submission"
 
-     If there are multiple SUTs, the same process needs to be repeated on each of them. One we have Submission folders on all the SUTs, we need to sync them to make a single submission folder
+If there are multiple SUTs, the same process needs to be repeated on each of them. One we have Submission folders on all the SUTs, we need to sync them to make a single submission folder
 
-    === "Sync Locally"
+=== "Sync Locally"
 
-        ```mermaid
+    ```mermaid
         flowchart LR
             classDef hidden fill:none,stroke:none;
             subgraph Generation1 [Submission Generation SUT-1]
@@ -141,11 +140,11 @@ cm docker script --tags=generate,inference,submission \
             Sync --> finalsubcheck[run-mlperf-inference-submission-checker]
 
             finalsubcheck --> tar[Submission Tar File] --> upload[Upload result to submission server] --> output((Receive vlidation email))
-        ```
+    ```
 
-    === "Sync via a Github repo"
+=== "Sync via a Github repo"
 
-        ```mermaid
+    ```mermaid
         flowchart LR
             classDef hidden fill:none,stroke:none;
         
@@ -183,7 +182,7 @@ cm docker script --tags=generate,inference,submission \
             TN --> Sync
         
             Sync --> clone[Clone the repo to SUT1] --> tar[Submission Tar File] --> upload[Upload result to submission server] --> output((Receive vlidation email))
-        ```
+    ```
         
 ## Command to generate actual submission tree        
 === "Docker run"

From 8425da6cba0eb87b5787b871c7a6783884454555 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Mon, 6 Jan 2025 23:10:50 +0000
Subject: [PATCH 097/112] Update index.md

---
 docs/submission/index.md | 87 +++++++++++++++-------------------------
 1 file changed, 33 insertions(+), 54 deletions(-)

diff --git a/docs/submission/index.md b/docs/submission/index.md
index f77b1894b..6fe0ccb4e 100644
--- a/docs/submission/index.md
+++ b/docs/submission/index.md
@@ -83,63 +83,53 @@ flowchart LR
     Generation --> Output((Submission Folder SUT1))
 ```
 
-## Command to generate actual submission folder
+### Command to generate actual submission folder
 
 ```bash
-cm docker script --tags=generate,inference,submission \
-            --clean \
-            --preprocess_submission=yes \
-            --run-checker \
-            --submitter=MLCommons \
-            --division=closed \
-            --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes \
-            --quiet
+cm run script --tags=generate,inference,submission \
+  --clean \
+  --preprocess_submission=yes \
+  --run-checker=yes \
+  --submitter=MLCommons \
+  --division=closed \
+  --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes \
+  --quiet
 ```
+* Use `--hw_name="My system name"` to give a meaningful system name. Examples can be seen [here](https://github.com/mlcommons/inference_results_v3.0/tree/main/open/cTuning/systems)
 
+* Use `--submitter=<Your name>` if your organization is an official MLCommons member and would like to submit under your organization
 
-If there are multiple SUTs, the same process needs to be repeated on each of them. One we have Submission folders on all the SUTs, we need to sync them to make a single submission folder
+* Use `--hw_notes_extra` option to add additional notes like `--hw_notes_extra="Result taken by NAME" `
 
-=== "Sync Locally"
+* Use `--results_dir` option to specify the results folder.  It is automatically taken from CM cache for MLPerf automation based runs
 
-    ```mermaid
-        flowchart LR
-            classDef hidden fill:none,stroke:none;
-            subgraph Generation1 [Submission Generation SUT-1]
-                direction TB
-                A3[populate system details] --> B3[generate submission structure]
-                B3 --> C3[truncate-accuracy-logs]
-                C3 --> D3{Infer low talency results and/or filter out invalid results}
-                D3 -- yes --> E3[preprocess-mlperf-inference-submission]
-                D3 -- no --> F3[run-mlperf-inference-submission-checker]
-                E3 --> F3
-            end
+* Use `--submission_dir` option to specify the submission folder.
 
-            subgraph Generation2 [Submission Generation SUT-2]
-                direction TB
-            end
+* Use `--division=open` for open division submission 
 
-            subgraph GenerationN [Submission Generation SUT-N]
-                direction TB
-            end
+* Use `--category` option to specify the category for which submission is generated(datacenter/edge). By default, the category is taken from `system_meta.json` file located in the SUT root directory.
 
-            Input1((MLPerf Inference Results folder SUT1)) --> Generation1 --> T1[Submission Tree 1]
-            Input2((MLPerf Inference Results folder SUT2)) --> Generation2 --> T2[Submission Tree 2]
-            Input3((MLPerf Inference Results folder SUTN)) --> GenerationN --> TN[Submission Tree N]
+* Use `--submission_base_dir` to specify the directory to which the outputs from preprocess submission script and final submission is added. No need to provide `--submission_dir` along with this. For `docker run`, use `--submission_base_dir` instead of `--submission_dir`.
 
-            subgraph LargeCircle [ ]
-                direction TB
-                Generation1
-                Generation2
-                GenerationN
-            end
 
-            T1 --> Sync((Sync locally with rsync on SUT-1))
-            T2 --> Sync
-            TN --> Sync
+If there are multiple SUTs, the same process needs to be repeated on each of them. One we have Submission folders on all the SUTs, we need to sync them to make a single submission folder
 
-            Sync --> finalsubcheck[run-mlperf-inference-submission-checker]
+=== "Sync Locally"
 
-            finalsubcheck --> tar[Submission Tar File] --> upload[Upload result to submission server] --> output((Receive vlidation email))
+    ```mermaid
+        flowchart LR
+            subgraph SUT1 [Submission Generation SUT1]
+              A[Submission Folder SUT1]
+            end
+            subgraph SUT2 [Submission Generation SUT2]
+              B[Submission Folder SUT2]
+            end
+            subgraph SUT3 [Submission Generation SUT3]
+              C[Submission Folder SUT3]
+            end
+            SUT2 --> SUT1
+            SUT3 --> SUT1
+           
     ```
 
 === "Sync via a Github repo"
@@ -249,17 +239,6 @@ If there are multiple SUTs, the same process needs to be repeated on each of the
             --quiet
         ```
 
-* Use `--hw_name="My system name"` to give a meaningful system name. Examples can be seen [here](https://github.com/mlcommons/inference_results_v3.0/tree/main/open/cTuning/systems)
-
-* Use `--submitter=<Your name>` if your organization is an official MLCommons member and would like to submit under your organization
-
-* Use `--hw_notes_extra` option to add additional notes like `--hw_notes_extra="Result taken by NAME" `
-
-* Use `--results_dir` option to specify the results folder for Non CM based benchmarks
-
-* Use `--category` option to specify the category for which submission is generated(datacenter/edge). By default, the category is taken from `system_meta.json` file located in the SUT root directory.
-
-* Use `--submission_base_dir` to specify the directory to which outputs from preprocess submission script and final submission is to be dumped. No need to provide `--submission_dir` along with this. For `docker run`, use `--submission_base_dir` instead of `--submission_dir`.
 
 The above command should generate "submission.tar.gz" if there are no submission checker issues and you can upload it to the [MLCommons Submission UI](https://submissions-ui.mlcommons.org/submission).
 

From 6b24ac0cefc8bc5b1e5cd75a7e108dff37ba4e36 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Mon, 6 Jan 2025 23:52:31 +0000
Subject: [PATCH 098/112] Fixes to submission generation docs

---
 docs/submission/index.md | 201 ++++++++++++++-------------------------
 1 file changed, 71 insertions(+), 130 deletions(-)

diff --git a/docs/submission/index.md b/docs/submission/index.md
index 6fe0ccb4e..f7784426f 100644
--- a/docs/submission/index.md
+++ b/docs/submission/index.md
@@ -3,10 +3,7 @@ hide:
   - toc
 ---
 
-
-
-
-Click [here](https://docs.google.com/presentation/d/1cmbpZUpVr78EIrhzyMBnnWnjJrD-mZ2vmSb-yETkTA8/edit?usp=sharing) to view the prposal slide for Common Automation for MLPerf Inference Submission Generation through CM.
+Click [here](https://docs.google.com/presentation/d/1cmbpZUpVr78EIrhzyMBnnWnjJrD-mZ2vmSb-yETkTA8/edit?usp=sharing) to view the proposal slide for Common Automation for MLPerf Inference Submission Generation through CM.
 
 === "Custom automation based MLPerf results"
     If you have not followed the `cm run` commands under the individual model pages in the [benchmarks](../index.md) directory, please make sure that the result directory is structured in the following way. 
@@ -54,7 +51,7 @@ Click [here](https://docs.google.com/presentation/d/1cmbpZUpVr78EIrhzyMBnnWnjJrD
     ```
     </details>
 
-=== "MLPerf Automation based results"
+=== "CM automation based results"
     If you have followed the `cm run` commands under the individual model pages in the [benchmarks](../index.md) directory, all the valid results will get aggregated to the `cm cache` folder. The following command could be used to browse the structure of inference results folder generated by CM.
     ### Get results folder structure
     ```bash
@@ -74,13 +71,13 @@ flowchart LR
       direction TB
       A[populate system details] --> B[generate submission structure]
       B --> C[truncate-accuracy-logs]
-      C --> D{Infer low talency results and/or filter out invalid results}
+      C --> D{Infer low talency results <br>and/or<br> filter out invalid results}
       D --> yes --> E[preprocess-mlperf-inference-submission]
       D --> no --> F[run-mlperf-inference-submission-checker]
       E --> F
     end
-    Input((MLPerf Inference Results SUT1)) --> Generation
-    Generation --> Output((Submission Folder SUT1))
+    Input((Results SUT1)) --> Generation
+    Generation --> Output((Submission Folder <br> SUT1))
 ```
 
 ### Command to generate actual submission folder
@@ -95,26 +92,32 @@ cm run script --tags=generate,inference,submission \
   --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes \
   --quiet
 ```
-* Use `--hw_name="My system name"` to give a meaningful system name. Examples can be seen [here](https://github.com/mlcommons/inference_results_v3.0/tree/main/open/cTuning/systems)
+!!! tip
+    * Use `--hw_name="My system name"` to give a meaningful system name. Examples can be seen [here](https://github.com/mlcommons/inference_results_v3.0/tree/main/open/cTuning/systems)
 
-* Use `--submitter=<Your name>` if your organization is an official MLCommons member and would like to submit under your organization
+    * Use `--submitter=<Your name>` if your organization is an official MLCommons member and would like to submit under your organization
 
-* Use `--hw_notes_extra` option to add additional notes like `--hw_notes_extra="Result taken by NAME" `
+    * Use `--hw_notes_extra` option to add additional notes like `--hw_notes_extra="Result taken by NAME" `
 
-* Use `--results_dir` option to specify the results folder.  It is automatically taken from CM cache for MLPerf automation based runs
+    * Use `--results_dir` option to specify the results folder.  It is automatically taken from CM cache for MLPerf automation based runs
 
-* Use `--submission_dir` option to specify the submission folder.
+    * Use `--submission_dir` option to specify the submission folder.
 
-* Use `--division=open` for open division submission 
+    * Use `--division=open` for open division submission 
 
-* Use `--category` option to specify the category for which submission is generated(datacenter/edge). By default, the category is taken from `system_meta.json` file located in the SUT root directory.
+    * Use `--category` option to specify the category for which submission is generated(datacenter/edge). By default, the category is taken from `system_meta.json` file located in the SUT root directory.
 
-* Use `--submission_base_dir` to specify the directory to which the outputs from preprocess submission script and final submission is added. No need to provide `--submission_dir` along with this. For `docker run`, use `--submission_base_dir` instead of `--submission_dir`.
+    * Use `--submission_base_dir` to specify the directory to which the outputs from preprocess submission script and final submission is added. No need to provide `--submission_dir` along with this. For `docker run`, use `--submission_base_dir` instead of `--submission_dir`.
 
 
-If there are multiple SUTs, the same process needs to be repeated on each of them. One we have Submission folders on all the SUTs, we need to sync them to make a single submission folder
+If there are multiple systems where MLPerf results are collected, the same process needs to be repeated on each of them. One we have submission folders on all the SUTs, we need to sync them to make a single submission folder
 
 === "Sync Locally"
+    If you are having results in multiple systems, you need to merge them to one system. You can use `rsync` for this. For example, the below command will sync the submission folder from SUT2 to the one in SUT1. 
+    ```
+    rsync -avz username@host1:<path_to_submission_folder2>/ <path_to_submission_folder1>/
+    ```
+    Same needs to be repeated for all other SUTs so that we have the full submissions in SUT1.
 
     ```mermaid
         flowchart LR
@@ -127,134 +130,72 @@ If there are multiple SUTs, the same process needs to be repeated on each of the
             subgraph SUT3 [Submission Generation SUT3]
               C[Submission Folder SUT3]
             end
+            subgraph SUTN [Submission Generation SUTN]
+              D[Submission Folder SUTN]
+            end
             SUT2 --> SUT1
             SUT3 --> SUT1
+            SUTN --> SUT1
            
     ```
 
 === "Sync via a Github repo"
+    If you are collecting results across multiple systems you can generate different submissions and aggregate all of them to a GitHub repository (can be private) and use it to generate a single tar ball which can be uploaded to the [MLCommons Submission UI](https://submissions-ui.mlcommons.org/submission). 
 
+    Run the following command after **replacing `--repo_url` with your GitHub repository URL**.
+
+    ```bash
+    cm run script --tags=push,github,mlperf,inference,submission \
+       --repo_url=https://github.com/mlcommons/mlperf_inference_submissions_v5.0 \
+       --commit_message="Results on <HW name> added by <Name>" \
+       --quiet
+    ```
+    
     ```mermaid
         flowchart LR
-            classDef hidden fill:none,stroke:none;
-        
-            subgraph Generation1 [Submission Generation SUT-1]
-                direction TB
-                A3[populate system details] --> B3[generate submission structure]
-                B3 --> C3[truncate-accuracy-logs]
-                C3 --> D3{Infer low talency results and/or filter out invalid results}
-                D3 -- yes --> E3[preprocess-mlperf-inference-submission]
-                D3 -- no --> F3[run-mlperf-inference-submission-checker]
-                E3 --> F3
+            subgraph SUT1 [Submission Generation SUT1]
+              A[Submission Folder SUT1]
             end
-                
-            subgraph Generation2 [Submission Generation SUT-2]
-                direction TB
+            subgraph SUT2 [Submission Generation SUT2]
+              B[Submission Folder SUT2]
             end
-        
-            subgraph GenerationN [Submission Generation SUT-N]
-                direction TB
+            subgraph SUT3 [Submission Generation SUT3]
+              C[Submission Folder SUT3]
             end
-        
-            Input1((MLPerf Inference Results folder SUT1)) --> Generation1 --> T1[Submission Tree 1]
-            Input2((MLPerf Inference Results folder SUT2)) --> Generation2 --> T2[Submission Tree 2]
-            Input3((MLPerf Inference Results folder SUTN)) --> GenerationN --> TN[Submission Tree N]
-        
-            subgraph LargeCircle [ ]
-                direction TB
-                Generation1
-                Generation2
-                GenerationN
+            subgraph SUTN [Submission Generation SUTN]
+              D[Submission Folder SUTN]
             end
-        
-            T1 --> Sync((Upload the submission tree to GitHub repo))
-            T2 --> Sync
-            TN --> Sync
-        
-            Sync --> clone[Clone the repo to SUT1] --> tar[Submission Tar File] --> upload[Upload result to submission server] --> output((Receive vlidation email))
+	    SUT2 -- git sync and push --> G[Github Repo]
+	    SUT3 -- git sync and push --> G[Github Repo]
+	    SUTN -- git sync and push --> G[Github Repo]
+	    SUT1 -- git sync and push --> G[Github Repo]
+           
     ```
-        
-## Command to generate actual submission tree        
-=== "Docker run"
-    ### Docker run
-    === "Closed"
-        ### Closed Submission
-        ```bash
-        cm docker script --tags=generate,inference,submission \
-            --clean \
-            --preprocess_submission=yes \
-            --run-checker \
-            --submitter=MLCommons \
-            --tar=yes \
-            --env.CM_TAR_OUTFILE=submission.tar.gz \
-            --division=closed \
-            --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes \
-            --quiet
-        ```
-
-    === "Open"
-        ### Open Submission
-        ```bash
-        cm docker script --tags=generate,inference,submission \
-            --clean \
-            --preprocess_submission=yes \
-            --run-checker \
-            --submitter=MLCommons \
-            --tar=yes \
-            --env.CM_TAR_OUTFILE=submission.tar.gz \
-            --division=open \
-            --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes \
-            --quiet
-        ```
-
-=== "Native run"
-    ### Native run
-    === "Closed"
-        ### Closed Submission
-        ```bash
-        cm run script --tags=generate,inference,submission \
-            --clean \
-            --preprocess_submission=yes \
-            --run-checker \
-            --submitter=MLCommons \
-            --tar=yes \
-            --env.CM_TAR_OUTFILE=submission.tar.gz \
-            --division=closed \
-            --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes \
-            --quiet
-        ```
-
-    === "Open"
-        ### Open Submission
-        ```bash
-        cm run script --tags=generate,inference,submission \
-            --clean \
-            --preprocess_submission=yes \
-            --run-checker \
-            --submitter=MLCommons \
-            --tar=yes \
-            --env.CM_TAR_OUTFILE=submission.tar.gz \
-            --division=open \
-            --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes \
-            --quiet
-        ```
-
-
-The above command should generate "submission.tar.gz" if there are no submission checker issues and you can upload it to the [MLCommons Submission UI](https://submissions-ui.mlcommons.org/submission).
-
-## Aggregate Results in GitHub
-
-If you are collecting results across multiple systems you can generate different submissions and aggregate all of them to a GitHub repository (can be private) and use it to generate a single tar ball which can be uploaded to the [MLCommons Submission UI](https://submissions-ui.mlcommons.org/submission). 
-
-Run the following command after **replacing `--repo_url` with your GitHub repository URL**.
 
-```bash
-cm run script --tags=push,github,mlperf,inference,submission \
-   --repo_url=https://github.com/mlcommons/mlperf_inference_submissions_v4.1 \
-   --commit_message="Results on <HW name> added by <Name>" \
-   --quiet
+## Upload the final submission
+    
+
+=== "via CLI"
+    You can do the following command which will run the submission checker and upload the results to the MLCommons submission server
+    ```
+    cm run script --tags=run,submission,checker --submitter_id=<>
+    ```
+=== "via Browser"
+    You can do the following command to generate the final submission tar file and then upload to the [MLCommons Submission UI](https://submissions-ui.mlcommons.org/submission). 
+    ```
+    cm run script --tags=run,submission,checker --submitter_id=<> --tar=yes --submission_tar_file=mysubmission.tar.gz
+    ```
+    
+```mermaid
+        flowchart LR
+            subgraph SUT [Combined Submissions]
+              A[Combined Submission Folder in SUT1]
+            end
+	    SUT --> B[Run submission checker]
+	    B --> C[Upload to MLC Submission server]
+	    C --> D[Receive validation email]
 ```
 
-At the end, you can download the github repo and upload to the [MLCommons Submission UI](https://submissions-ui.mlcommons.org/submission).
 
-Click [here](https://youtu.be/eI1Hoecc3ho) to view the recording of the workshop: Streamlining your MLPerf Inference results using CM.
+
+<!--Click [here](https://youtu.be/eI1Hoecc3ho) to view the recording of the workshop: Streamlining your MLPerf Inference results using CM.-->

From 1120df8642bf31f5fe8a6ea32903bdd9501bc30c Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Mon, 6 Jan 2025 23:56:37 +0000
Subject: [PATCH 099/112] Fixes to submission generation docs

---
 docs/submission/index.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/submission/index.md b/docs/submission/index.md
index f7784426f..58131f090 100644
--- a/docs/submission/index.md
+++ b/docs/submission/index.md
@@ -59,9 +59,9 @@ Click [here](https://docs.google.com/presentation/d/1cmbpZUpVr78EIrhzyMBnnWnjJrD
     ```
 
 
-Once all the results across all the models are ready you can use the following command to generate a valid submission tree compliant with the [MLPerf requirements](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1).
+Once all the results across all the models are ready you can use the following the below section to generate a valid submission tree compliant with the [MLPerf requirements](https://github.com/mlcommons/policies/blob/master/submission_rules.adoc#inference-1).
 
-## Generate actual submission tree
+## Generate submission folder
 
 The submission generation flow is explained in the below diagram
 
@@ -80,7 +80,7 @@ flowchart LR
     Generation --> Output((Submission Folder <br> SUT1))
 ```
 
-### Command to generate actual submission folder
+### Command to generate submission folder
 
 ```bash
 cm run script --tags=generate,inference,submission \

From e3ea5efe4328912e7726c9496f8361ccce1b39b0 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Tue, 7 Jan 2025 00:02:21 +0000
Subject: [PATCH 100/112] Added link to the expected results folder structure

---
 docs/submission/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/submission/index.md b/docs/submission/index.md
index 58131f090..fb25d64fd 100644
--- a/docs/submission/index.md
+++ b/docs/submission/index.md
@@ -6,7 +6,7 @@ hide:
 Click [here](https://docs.google.com/presentation/d/1cmbpZUpVr78EIrhzyMBnnWnjJrD-mZ2vmSb-yETkTA8/edit?usp=sharing) to view the proposal slide for Common Automation for MLPerf Inference Submission Generation through CM.
 
 === "Custom automation based MLPerf results"
-    If you have not followed the `cm run` commands under the individual model pages in the [benchmarks](../index.md) directory, please make sure that the result directory is structured in the following way. 
+    If you have not followed the `cm run` commands under the individual model pages in the [benchmarks](../index.md) directory, please make sure that the result directory is structured in the following way. You can see the real examples for the expected folder structure [here](https://github.com/mlcommons/inference/tree/submission-generation-examples).
     ```
     └── System description ID(SUT Name)
         ├── system_meta.json

From 2c67b24caec7a7868234487b13ab10db2ab83d96 Mon Sep 17 00:00:00 2001
From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com>
Date: Tue, 7 Jan 2025 16:02:11 +0530
Subject: [PATCH 101/112] add docs for llama3 + inference version upgrade
 (#2020)

* add docs for llama3 + inference version upgrade

* add output path and hf token
---
 .../language/get-llama3_1-405b-data.md        | 41 +++++++++++++++++++
 docs/benchmarks/language/llama3_1-405b.md     | 13 ++++++
 main.py                                       | 10 ++++-
 mkdocs.yml                                    |  1 +
 4 files changed, 63 insertions(+), 2 deletions(-)
 create mode 100644 docs/benchmarks/language/get-llama3_1-405b-data.md
 create mode 100644 docs/benchmarks/language/llama3_1-405b.md

diff --git a/docs/benchmarks/language/get-llama3_1-405b-data.md b/docs/benchmarks/language/get-llama3_1-405b-data.md
new file mode 100644
index 000000000..7333be64d
--- /dev/null
+++ b/docs/benchmarks/language/get-llama3_1-405b-data.md
@@ -0,0 +1,41 @@
+---
+hide:
+  - toc
+---
+
+# Text Summarization using LLAMA3.1-405b
+
+## Dataset
+
+The benchmark implementation run command will automatically download the validation and calibration datasets and do the necessary preprocessing. In case you want to download only the datasets, you can use the below commands.
+
+=== "Validation"
+
+    ### Get Validation Dataset
+    ```
+    cm run script --tags=get,dataset,mlperf,inference,llama3,_validation --outdirname=<path to download> -j
+    ```
+    
+=== "Calibration"
+
+    ### Get Calibration Dataset
+    ```
+    cm run script --tags=get,dataset,mlperf,inference,llama3,_calibration --outdirname=<path to download> -j
+    ```
+
+## Model
+The benchmark implementation run command will automatically download the required model and do the necessary conversions. In case you want to only download the official model, you can use the below commands.
+
+Get the Official MLPerf LLAMA3.1-405b Model
+
+=== "Pytorch"
+
+    ### Pytorch
+    ```
+    cm run script --tags=get,ml-model,llama3 --outdirname=<path to download> --hf_token=<huggingface access token> -j
+    ```
+  
+!!! tip
+
+    Downloading llama3.1-405B model from Hugging Face will require an [**access token**](https://huggingface.co/settings/tokens) which could be generated for your account. Additionally, ensure that your account has access to the [llama3.1-405B](https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct) model. 
+
diff --git a/docs/benchmarks/language/llama3_1-405b.md b/docs/benchmarks/language/llama3_1-405b.md
new file mode 100644
index 000000000..8163bb1e8
--- /dev/null
+++ b/docs/benchmarks/language/llama3_1-405b.md
@@ -0,0 +1,13 @@
+---
+hide:
+  - toc
+---
+
+# Text Summarization using LLAMA3_1-405b
+
+=== "MLCommons-Python"
+    ## MLPerf Reference Implementation in Python
+    
+{{ mlperf_inference_implementation_readme (4, "llama3_1-405b-99", "reference", devices=["CPU","CUDA"]) }}
+
+{{ mlperf_inference_implementation_readme (4, "llama3_1-405b-99.9", "reference", devices=["CPU","CUDA"]) }}
\ No newline at end of file
diff --git a/main.py b/main.py
index 6a34587dd..1e561175f 100755
--- a/main.py
+++ b/main.py
@@ -28,7 +28,7 @@ def mlperf_inference_implementation_readme(
         content = ""
 
         execution_envs = ["Docker", "Native"]
-        code_version = "r4.1-dev"
+        code_version = "r5.0-dev"
         implementation_run_options = []
 
         if model == "rnnt":
@@ -50,6 +50,8 @@ def mlperf_inference_implementation_readme(
                     frameworks = ["Onnxruntime", "Pytorch"]
                 elif "bert" in model.lower():
                     frameworks = ["Pytorch", "Deepsparse"]
+                elif "llama3" in model.lower():
+                    frameworks = ["Pytorch"]
                 else:
                     frameworks = ["Pytorch"]
 
@@ -127,6 +129,7 @@ def mlperf_inference_implementation_readme(
                 "dlrm" in model.lower()
                 or "llama2" in model.lower()
                 or "mixtral" in model.lower()
+                or "llama3" in model.lower()
             ):
                 categories = ["Datacenter"]
             else:
@@ -499,6 +502,7 @@ def get_common_info(spaces, implementation, model):
         info += f"\n{pre_space}!!! tip\n\n"
         info += f"{pre_space}    - Number of threads could be adjusted using `--threads=#`, where `#` is the desired number of threads. This option works only if the implementation in use supports threading.\n\n"
         info += f"{pre_space}    - Batch size could be adjusted using `--batch_size=#`, where `#` is the desired batch size. This option works only if the implementation in use is supporting the given batch size.\n\n"
+        info += f"{pre_space}    - `_r4.1-dev` could also be given instead of `_r5.0-dev` if you want to run the benchmark with the MLPerf version being 4.1.\n\n"
         if model == "rgat":
             info += f"{pre_space}    - Add `--env.CM_DATASET_IGBH_PATH=<Path to IGBH dataset>` if you have already downloaded the dataset. The path will be automatically mounted when using docker run.\n\n"
             info += f"{pre_space}    - Add `--env.CM_ML_MODEL_RGAT_CHECKPOINT_PATH=<Path to R-GAT model checkpoint>` if you have already downloaded the model. The path will be automatically mounted when using docker run.\n\n"
@@ -522,7 +526,9 @@ def get_docker_info(spaces, model, implementation,
 
             if model == "sdxl":
                 info += f"{pre_space}    - `--env.CM_MLPERF_MODEL_SDXL_DOWNLOAD_TO_HOST=yes` option can be used to download the model on the host so that it can be reused across different container lanuches. \n\n"
-
+            elif "llama3" in model.lower():
+                info += f"{pre_space}    - `--env.CM_MLPERF_MODEL_LLAMA3_DOWNLOAD_TO_HOST=yes` option can be used to download the model on the host so that it can be reused across different container lanuches. \n\n"
+                info += f"{pre_space}    - `--env.CM_MLPERF_DATASET_LLAMA3_DOWNLOAD_TO_HOST=yes` option can be used to download the dataset on the host so that it can be reused across different container lanuches. \n\n"
             if implementation.lower() == "nvidia":
                 info += f"{pre_space}    - Default batch size is assigned based on [GPU memory](https://github.com/mlcommons/cm4mlops/blob/dd0c35856969c68945524d5c80414c615f5fe42c/script/app-mlperf-inference-nvidia/_cm.yaml#L1129) or the [specified GPU](https://github.com/mlcommons/cm4mlops/blob/dd0c35856969c68945524d5c80414c615f5fe42c/script/app-mlperf-inference-nvidia/_cm.yaml#L1370). Please click more option for *docker launch* or *run command* to see how to specify the GPU name.\n\n"
                 info += f"{pre_space}    - When run with `--all_models=yes`, all the benchmark models of NVIDIA implementation can be executed within the same container.\n\n"
diff --git a/mkdocs.yml b/mkdocs.yml
index 96bcfb758..9178191a3 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -39,6 +39,7 @@ nav:
           - IndySCC24: benchmarks/language/reproducibility/indyscc24-bert.md
       - GPT-J: benchmarks/language/gpt-j.md
       - LLAMA2-70B: benchmarks/language/llama2-70b.md
+      - LLAMA3-405B: benchmarks/language/llama3_1-405b.md
       - MIXTRAL-8x7B: benchmarks/language/mixtral-8x7b.md
     - Recommendation:
       - DLRM-v2: benchmarks/recommendation/dlrm-v2.md

From c0ed0a83fa3052f8ca2d10cf993b899a67b3f486 Mon Sep 17 00:00:00 2001
From: ANANDHU S <71482562+anandhu-eng@users.noreply.github.com>
Date: Tue, 7 Jan 2025 17:35:19 +0530
Subject: [PATCH 102/112] Update CM run commands for llama3_1-405b (#2019)

* Update CM run commands for llama3_1-405b

* Update cm commands for llama3

* add information about hf tokens
---
 language/llama3.1-405b/README.md | 37 +++++++++++++++++++++++++++++---
 1 file changed, 34 insertions(+), 3 deletions(-)

diff --git a/language/llama3.1-405b/README.md b/language/llama3.1-405b/README.md
index d1dd5ad4f..499a5f9d9 100644
--- a/language/llama3.1-405b/README.md
+++ b/language/llama3.1-405b/README.md
@@ -9,6 +9,11 @@
 
 Please see the [new docs site](https://docs.mlcommons.org/inference/benchmarks/language/llama3.1-405b) for an automated way to run this benchmark across different available implementations and do an end-to-end submission with or without docker.
 
+## Automated command to run the benchmark via MLCommons CM
+
+Please see the [new docs site](https://docs.mlcommons.org/inference/benchmarks/language/llama3_1-405b/) for an automated way to run this benchmark across different available implementations and do an end-to-end submission with or without docker.
+
+You can also do pip install cm4mlops and then use cm commands for downloading the model and datasets using the commands given in the later sections.
 
 ## Prepare environment
 
@@ -109,6 +114,15 @@ git clone https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct ${CHECKPOINT
 cd ${CHECKPOINT_PATH} && git checkout be673f326cab4cd22ccfef76109faf68e41aa5f1
 ```
 
+### Download model through CM (Collective Minds)
+
+```
+cm run script --tags=get,ml-model,llama3 --outdirname=<path_to_download> --hf_token=<huggingface access token> -j
+```
+
+**Note:**
+Downloading llama3.1-405B model from Hugging Face will require an [**access token**](https://huggingface.co/settings/tokens) which could be generated for your account. Additionally, ensure that your account has access to the [llama3.1-405B](https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct) model. 
+
 ## Get Dataset
 
 ### Preprocessed
@@ -136,6 +150,19 @@ You can also download the calibration dataset from the Cloudflare R2 bucket by r
 rclone copy mlc-inference:mlcommons-inference-wg-public/llama3.1_405b/mlperf_llama3.1_405b_calibration_dataset_512_processed_fp16_eval.pkl ./ -P
 ```
 
+**CM Command**
+
+Validation Dataset:
+```
+cm run script --tags=get,dataset,mlperf,inference,llama3,_validation --outdirname=<path to download> -j
+```
+
+Calibration Dataset:
+```
+cm run script --tags=get,dataset,mlperf,inference,llama3,_calibration --outdirname=<path to download> -j
+```
+
+
 ## Run Performance Benchmarks
 
 ### Offline
@@ -169,7 +196,6 @@ python -u main.py --scenario Server \
 
 The ServerSUT was not tested for GPU runs.
 
-
 ## Run Accuracy Benchmarks
 
 ### Offline
@@ -201,7 +227,6 @@ fi
 For the GPU run - The above steps have been automated in `run_accuracy.sh`. You can also modify this script to use
 `--device cpu` to adapt it to a CPU-only run.
 
-
 ### Server
 ```
 OUTPUT_LOG_DIR=server-accuracy-logs
@@ -218,7 +243,6 @@ python -u main.py --scenario Server \
                 --tensor-parallel-size ${GPU_COUNT} \
                 --vllm
 
-
 ACCURACY_LOG_FILE=${OUTPUT_LOG_DIR}/mlperf_log_accuracy.json
 if [ -e ${ACCURACY_LOG_FILE} ]; then
         python evaluate-accuracy.py --checkpoint-path ${CHECKPOINT_PATH} \
@@ -228,6 +252,13 @@ fi
 
 The ServerSUT was not tested for GPU runs.
 
+### Evaluate the accuracy
+
+```
+cm run script --tags=process,mlperf,accuracy,_dataset_llama3 --result_dir=<Path to directory where files are generated after the benchmark run>
+```
+
+Please click [here](https://github.com/anandhu-eng/inference/blob/patch-14/language/llama3.1-405b/evaluate-accuracy.py) to view the Python script for evaluating accuracy for the Llama3 dataset.
 
 ## Accuracy Target
 Running the GPU implementation in FP16 precision resulted in the following FP16 accuracy targets:

From 17960a50c8dc89d5f0eb5db6cfed4d8035a00d03 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Tue, 7 Jan 2025 12:47:26 +0000
Subject: [PATCH 103/112] Fixes the submission README

---
 docs/submission/index.md | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/docs/submission/index.md b/docs/submission/index.md
index fb25d64fd..a3412839e 100644
--- a/docs/submission/index.md
+++ b/docs/submission/index.md
@@ -101,7 +101,7 @@ cm run script --tags=generate,inference,submission \
 
     * Use `--results_dir` option to specify the results folder.  It is automatically taken from CM cache for MLPerf automation based runs
 
-    * Use `--submission_dir` option to specify the submission folder.
+    * Use `--submission_dir` option to specify the submission folder. (You can avoid this if you're pushing to github or only running a single SUT and CM will use its cache folder)
 
     * Use `--division=open` for open division submission 
 
@@ -174,16 +174,25 @@ If there are multiple systems where MLPerf results are collected, the same proce
 
 ## Upload the final submission
     
+!!! warning
+    If you are using GitHub for consolidating your results, make sure that you have run the [`push-to-github` command](#__tabbed_2_2) on the same system to ensure results are synced as is on the GitHub repository.
+
+Once you have all the results on the system, you can upload them to the MLCommons submission server as follows:
 
 === "via CLI"
     You can do the following command which will run the submission checker and upload the results to the MLCommons submission server
     ```
-    cm run script --tags=run,submission,checker --submitter_id=<>
+    cm run script --tags=run,submission,checker \
+    --submitter_id=<> \
+    --submission_dir=<Path to the submission folder>
     ```
 === "via Browser"
     You can do the following command to generate the final submission tar file and then upload to the [MLCommons Submission UI](https://submissions-ui.mlcommons.org/submission). 
     ```
-    cm run script --tags=run,submission,checker --submitter_id=<> --tar=yes --submission_tar_file=mysubmission.tar.gz
+    cm run script --tags=run,submission,checker \
+    --submitter_id=<> \
+    --tar=yes \
+    --submission_tar_file=mysubmission.tar.gz
     ```
     
 ```mermaid

From 51af49200e8e96f0760ab0584e64a0b71b9698b3 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Tue, 7 Jan 2025 13:08:00 +0000
Subject: [PATCH 104/112] Update README.md

---
 tools/submission/README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/submission/README.md b/tools/submission/README.md
index 2459ab363..5a31a304a 100644
--- a/tools/submission/README.md
+++ b/tools/submission/README.md
@@ -1,5 +1,7 @@
 # Tools to check Submissions
 
+Please follow the [official submission automation page](https://docs.mlcommons.org/inference/submission/) for doing a submission. It wraps all the submission related files listed below. 
+
 ## `truncate_accuracy_log.py` (Mandatory)
 
 ### Inputs

From cd25f8ecf3f7b05d4e6a64af4f24d0ff28dd45ba Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Tue, 7 Jan 2025 14:05:58 +0000
Subject: [PATCH 105/112] Create test-submission-generation.yml

---
 .../workflows/test-submission-generation.yml  | 54 +++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100644 .github/workflows/test-submission-generation.yml

diff --git a/.github/workflows/test-submission-generation.yml b/.github/workflows/test-submission-generation.yml
new file mode 100644
index 000000000..3a64fed0e
--- /dev/null
+++ b/.github/workflows/test-submission-generation.yml
@@ -0,0 +1,54 @@
+# This workflow will test the submission generation using MLPerf Automation 
+
+name: CM based Submission Generation
+
+on:
+  pull_request:
+    branches: [ "master", "dev" ]
+    paths:
+      - '.github/workflows/test-submission-generation.yml'
+      - '**'  
+      - '!**.md'
+jobs:
+  submission_generation:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, windows-latest, macos-latest]
+        python-version: [ "3.12" ]
+        division: ["closed", "open", "closed-open"]
+        category: ["datacenter", "edge"]
+        case: ["closed", "closed-power"]
+        action: ["run", "docker"]
+        exclude:
+          - os: macos-latest
+          - os: windows-latest
+          - category: "edge"
+            
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v3
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        pip install cm4mlops
+    - name: Pull repo where test cases are uploaded
+      run: |
+        git clone -b submission-generation-examples https://github.com/mlcommons/inference.git submission_generation_examples
+    - name: Run Submission Generation - ${{ matrix.case }} ${{ matrix.action }} ${{ matrix.category }} ${{ matrix.division }} 
+      continue-on-error: true
+      run: |
+        if [ "${{ matrix.case }}" == "closed" ]; then
+          extra_run_args=" --env.CM_MLPERF_SUBMISSION_CHECKER_EXTRA_ARGS="--skip-extra-accuracy-files-check""
+          description="Test submission - contains closed edge and datacenter"
+        elif [ "${{ matrix.case }}" == "closed-power" ]; then
+          extra_run_args=" --env.CM_MLPERF_SUBMISSION_CHECKER_EXTRA_ARGS="--skip-extra-accuracy-files-check""
+          description="Test submission - contains closed-power edge and datacenter results"
+        fi
+        # Dynamically set the log group to simulate a dynamic step name
+        echo "::group::$description"
+        cm ${{ matrix.action }} script --tags=generate,inference,submission  --version=v5.0 --clean --preprocess_submission=yes --results_dir=$PWD/submission_generation_tests/${{ matrix.case }}/ --run-checker --submitter=MLCommons --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=${{ matrix.division }} --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes --quiet $extra_run_args
+  

From 9bcdbee9ee7d60828e62cc281190ed70f9708890 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Tue, 7 Jan 2025 14:18:32 +0000
Subject: [PATCH 106/112] Update test-submission-generation.yml

---
 .github/workflows/test-submission-generation.yml | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/test-submission-generation.yml b/.github/workflows/test-submission-generation.yml
index 3a64fed0e..7424b9fef 100644
--- a/.github/workflows/test-submission-generation.yml
+++ b/.github/workflows/test-submission-generation.yml
@@ -42,13 +42,11 @@ jobs:
       continue-on-error: true
       run: |
         if [ "${{ matrix.case }}" == "closed" ]; then
-          extra_run_args=" --env.CM_MLPERF_SUBMISSION_CHECKER_EXTRA_ARGS="--skip-extra-accuracy-files-check""
           description="Test submission - contains closed edge and datacenter"
         elif [ "${{ matrix.case }}" == "closed-power" ]; then
-          extra_run_args=" --env.CM_MLPERF_SUBMISSION_CHECKER_EXTRA_ARGS="--skip-extra-accuracy-files-check""
           description="Test submission - contains closed-power edge and datacenter results"
         fi
         # Dynamically set the log group to simulate a dynamic step name
         echo "::group::$description"
-        cm ${{ matrix.action }} script --tags=generate,inference,submission  --version=v5.0 --clean --preprocess_submission=yes --results_dir=$PWD/submission_generation_tests/${{ matrix.case }}/ --run-checker --submitter=MLCommons --tar=yes --env.CM_TAR_OUTFILE=submission.tar.gz --division=${{ matrix.division }} --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes --quiet $extra_run_args
-  
+        cm ${{ matrix.action }} script --tags=generate,inference,submission  --adr.compiler.tags=gcc --version=v5.0 --clean --preprocess_submission=yes --submission_base_dir=mysubmissions --results_dir=$PWD/submission_generation_tests/${{ matrix.case }}/ --run-checker --submitter=MLCommons --tar=yes --division=${{ matrix.division }} --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes --quiet $extra_run_args
+        cm ${{ matrix.action }} script --tags=run,submission,checker --submitter_id_off=mysubmitter_id --tar=yes --submission_dir=mysubmissions/submissions --submission_tar_file=mysubmission.tar.gz

From 961efb7b39d3dd49cffb633dfe6ca947a0fdf0fa Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Tue, 7 Jan 2025 15:47:10 +0000
Subject: [PATCH 107/112] Clean invalid model results in preprocess_submission
 script

---
 tools/submission/preprocess_submission.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/tools/submission/preprocess_submission.py b/tools/submission/preprocess_submission.py
index 1abb42189..c98ba4f40 100644
--- a/tools/submission/preprocess_submission.py
+++ b/tools/submission/preprocess_submission.py
@@ -405,12 +405,12 @@ def infer_scenario_results(args, config):
                             continue
 
                         if mlperf_model not in config.required:
-                            log.error("Division %s, submitter %s, system %s has invalid "
-                                      "MLPerf model (%s) corresponding to given model (%s). "
-                                      "Valid ones for MLPerf inference version (%s) in (%s) "
-                                      "category are [%s]", division, submitter, system_id_json,
-                                      mlperf_model, model, config.version, system_type,
-                                      config.required.keys())
+                            log.warning(f"""Division {division}, submitter {submitter}, system {system_id_json} has invalid """
+                                      f"""MLPerf model ({mlperf_model}) corresponding to given model ({model}). """
+                                      f"""Valid ones for MLPerf inference version ({config.version}) in ({system_type}) """
+                                      f"""category are [{config.required.keys()}]. Removing...""")
+                            clean_model_dir(os.path.join(
+                                    log_path, system_desc, model))
                             continue
 
                         required_scenarios = config.get_required(mlperf_model)

From ab2fc442d6d53e4d529494a1b4aa4b81c01044f4 Mon Sep 17 00:00:00 2001
From: mlcommons-bot <mlcommons-bot@users.noreply.github.com>
Date: Tue, 7 Jan 2025 15:47:42 +0000
Subject: [PATCH 108/112] [Automated Commit] Format Codebase

---
 tools/submission/preprocess_submission.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/submission/preprocess_submission.py b/tools/submission/preprocess_submission.py
index c98ba4f40..7eaa7f8f7 100644
--- a/tools/submission/preprocess_submission.py
+++ b/tools/submission/preprocess_submission.py
@@ -406,11 +406,11 @@ def infer_scenario_results(args, config):
 
                         if mlperf_model not in config.required:
                             log.warning(f"""Division {division}, submitter {submitter}, system {system_id_json} has invalid """
-                                      f"""MLPerf model ({mlperf_model}) corresponding to given model ({model}). """
-                                      f"""Valid ones for MLPerf inference version ({config.version}) in ({system_type}) """
-                                      f"""category are [{config.required.keys()}]. Removing...""")
+                                        f"""MLPerf model ({mlperf_model}) corresponding to given model ({model}). """
+                                        f"""Valid ones for MLPerf inference version ({config.version}) in ({system_type}) """
+                                        f"""category are [{config.required.keys()}]. Removing...""")
                             clean_model_dir(os.path.join(
-                                    log_path, system_desc, model))
+                                log_path, system_desc, model))
                             continue
 
                         required_scenarios = config.get_required(mlperf_model)

From d98eb37b7cadb115bb05dc4b837babc29b08eefe Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjunsuresh1987@gmail.com>
Date: Tue, 7 Jan 2025 15:51:43 +0000
Subject: [PATCH 109/112] Fixes the submission README

---
 docs/submission/index.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/submission/index.md b/docs/submission/index.md
index a3412839e..64ef5afa7 100644
--- a/docs/submission/index.md
+++ b/docs/submission/index.md
@@ -190,7 +190,7 @@ Once you have all the results on the system, you can upload them to the MLCommon
     You can do the following command to generate the final submission tar file and then upload to the [MLCommons Submission UI](https://submissions-ui.mlcommons.org/submission). 
     ```
     cm run script --tags=run,submission,checker \
-    --submitter_id=<> \
+    --submission_dir=<Path to the submission folder> \
     --tar=yes \
     --submission_tar_file=mysubmission.tar.gz
     ```

From 37e94c6ded51fe963ccff7eeb441e1162a3a9580 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Tue, 7 Jan 2025 16:02:01 +0000
Subject: [PATCH 110/112] Update README.md

---
 language/llama3.1-405b/README.md | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/language/llama3.1-405b/README.md b/language/llama3.1-405b/README.md
index 499a5f9d9..a9729a329 100644
--- a/language/llama3.1-405b/README.md
+++ b/language/llama3.1-405b/README.md
@@ -143,21 +143,19 @@ You can then navigate in the terminal to your desired download directory and run
 ```
 rclone copy mlc-inference:mlcommons-inference-wg-public/llama3.1_405b/mlperf_llama3.1_405b_dataset_8313_processed_fp16_eval.pkl ./ -P
 ```
-
-You can also download the calibration dataset from the Cloudflare R2 bucket by running the following command:
+**CM Command**
 
 ```
-rclone copy mlc-inference:mlcommons-inference-wg-public/llama3.1_405b/mlperf_llama3.1_405b_calibration_dataset_512_processed_fp16_eval.pkl ./ -P
+cm run script --tags=get,dataset,mlperf,inference,llama3,_validation --outdirname=<path to download> -j
 ```
 
-**CM Command**
+You can also download the calibration dataset from the Cloudflare R2 bucket by running the following command:
 
-Validation Dataset:
 ```
-cm run script --tags=get,dataset,mlperf,inference,llama3,_validation --outdirname=<path to download> -j
+rclone copy mlc-inference:mlcommons-inference-wg-public/llama3.1_405b/mlperf_llama3.1_405b_calibration_dataset_512_processed_fp16_eval.pkl ./ -P
 ```
 
-Calibration Dataset:
+**CM Command**
 ```
 cm run script --tags=get,dataset,mlperf,inference,llama3,_calibration --outdirname=<path to download> -j
 ```
@@ -252,14 +250,12 @@ fi
 
 The ServerSUT was not tested for GPU runs.
 
-### Evaluate the accuracy
-
+### Evaluate the accuracy using CM
+You can also evaulate the accuracy from the generated accuracy log by using the following CM command
 ```
-cm run script --tags=process,mlperf,accuracy,_dataset_llama3 --result_dir=<Path to directory where files are generated after the benchmark run>
+cm run script --tags=process,mlperf,accuracy,_dataset_llama3 --result_dir=<Path to accuracy log directory>
 ```
 
-Please click [here](https://github.com/anandhu-eng/inference/blob/patch-14/language/llama3.1-405b/evaluate-accuracy.py) to view the Python script for evaluating accuracy for the Llama3 dataset.
-
 ## Accuracy Target
 Running the GPU implementation in FP16 precision resulted in the following FP16 accuracy targets:
 ```

From 27827dc92de7062e1e2900accf96185cb6412164 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Tue, 7 Jan 2025 16:03:27 +0000
Subject: [PATCH 111/112] Update README.md

---
 language/llama3.1-405b/README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/language/llama3.1-405b/README.md b/language/llama3.1-405b/README.md
index a9729a329..ea358bb98 100644
--- a/language/llama3.1-405b/README.md
+++ b/language/llama3.1-405b/README.md
@@ -114,10 +114,10 @@ git clone https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct ${CHECKPOINT
 cd ${CHECKPOINT_PATH} && git checkout be673f326cab4cd22ccfef76109faf68e41aa5f1
 ```
 
-### Download model through CM (Collective Minds)
+### Download model through CM (Collective Mind)
 
 ```
-cm run script --tags=get,ml-model,llama3 --outdirname=<path_to_download> --hf_token=<huggingface access token> -j
+cm run script --tags=get,ml-model,llama3 --outdirname=${CHECKPOINT_PATH} --hf_token=<huggingface access token> -j
 ```
 
 **Note:**

From a56a2517ecf47045aabc46be7d96d91843df36f9 Mon Sep 17 00:00:00 2001
From: Arjun Suresh <arjun@gateoverflow.com>
Date: Tue, 7 Jan 2025 16:35:21 +0000
Subject: [PATCH 112/112] Update test-submission-generation.yml

---
 .github/workflows/test-submission-generation.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/test-submission-generation.yml b/.github/workflows/test-submission-generation.yml
index 7424b9fef..97afc58cd 100644
--- a/.github/workflows/test-submission-generation.yml
+++ b/.github/workflows/test-submission-generation.yml
@@ -19,7 +19,7 @@ jobs:
         python-version: [ "3.12" ]
         division: ["closed", "open", "closed-open"]
         category: ["datacenter", "edge"]
-        case: ["closed", "closed-power"]
+        case: ["closed"]
         action: ["run", "docker"]
         exclude:
           - os: macos-latest
@@ -48,5 +48,5 @@ jobs:
         fi
         # Dynamically set the log group to simulate a dynamic step name
         echo "::group::$description"
-        cm ${{ matrix.action }} script --tags=generate,inference,submission  --adr.compiler.tags=gcc --version=v5.0 --clean --preprocess_submission=yes --submission_base_dir=mysubmissions --results_dir=$PWD/submission_generation_tests/${{ matrix.case }}/ --run-checker --submitter=MLCommons --tar=yes --division=${{ matrix.division }} --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes --quiet $extra_run_args
+        cm ${{ matrix.action }} script --tags=generate,inference,submission  --adr.compiler.tags=gcc --version=v5.0 --clean --preprocess_submission=yes --submission_base_dir=mysubmissions --results_dir=$PWD/submission_generation_tests/${{ matrix.case }}/ --run-checker --submitter=MLCommons --tar=yes --division=${{ matrix.division }} --env.CM_DETERMINE_MEMORY_CONFIGURATION=yes --quiet
         cm ${{ matrix.action }} script --tags=run,submission,checker --submitter_id_off=mysubmitter_id --tar=yes --submission_dir=mysubmissions/submissions --submission_tar_file=mysubmission.tar.gz