From cbbc8ec32858923523be19f08592b9aff5fe63e3 Mon Sep 17 00:00:00 2001
From: Henry Le Berre <hberre3@gatech.edu>
Date: Sat, 24 Aug 2024 20:19:45 -0400
Subject: [PATCH] Benchmarks report grind time (#589)

---
 benchmarks/5eq_rk3_weno3_hllc/case.py         |  4 +--
 benchmarks/hypo_hll/case.py                   |  4 +--
 benchmarks/ibm/case.py                        |  2 +-
 benchmarks/viscous_weno5_sgb_acoustic/case.py |  8 ++---
 src/simulation/m_start_up.fpp                 | 24 ++++++++------
 toolchain/bootstrap/lint.sh                   |  2 +-
 toolchain/mfc/bench.py                        | 32 +++++++++++++++----
 toolchain/mfc/run/run.py                      |  3 +-
 toolchain/templates/include/helpers.mako      | 12 ++++---
 9 files changed, 61 insertions(+), 30 deletions(-)

diff --git a/benchmarks/5eq_rk3_weno3_hllc/case.py b/benchmarks/5eq_rk3_weno3_hllc/case.py
index 19bd281a3..df3bf3b85 100644
--- a/benchmarks/5eq_rk3_weno3_hllc/case.py
+++ b/benchmarks/5eq_rk3_weno3_hllc/case.py
@@ -176,7 +176,7 @@
 # Configuring case dictionary
 print(json.dumps({
     # Logistics ================================================
-    'run_time_info'                : 'T',
+    'run_time_info'                : 'F',
     # ==========================================================
 
     # Computational Domain Parameters ==========================
@@ -193,7 +193,7 @@
     'dt'                           : dt,
     't_step_start'                 : 0,
     't_step_stop'                  : int(60*(95*size + 5)),
-    't_step_save'                  : int(12*(95*size + 5)),
+    't_step_save'                  : int(60*(95*size + 5)),
     # ==========================================================
 
     # Simulation Algorithm Parameters ==========================
diff --git a/benchmarks/hypo_hll/case.py b/benchmarks/hypo_hll/case.py
index 780a9b6b6..e55575791 100644
--- a/benchmarks/hypo_hll/case.py
+++ b/benchmarks/hypo_hll/case.py
@@ -30,7 +30,7 @@
 # Configuring case dictionary
 print(json.dumps({
                     # Logistics ================================================
-                    'run_time_info'                : 'T',
+                    'run_time_info'                : 'F',
                     # ==========================================================
 
                     # Computational Domain Parameters ==========================
@@ -46,7 +46,7 @@
                     'dt'                           : 1e-8,
                     't_step_start'                 : 0,
                     't_step_stop'                  : int(60*(95*size + 5)),
-                    't_step_save'                  : int(12*(95*size + 5)),
+                    't_step_save'                  : int(60*(95*size + 5)),
 		    # ==========================================================
 
                     # Simulation Algorithm Parameters ==========================
diff --git a/benchmarks/ibm/case.py b/benchmarks/ibm/case.py
index bf0347138..5dfc88fd1 100644
--- a/benchmarks/ibm/case.py
+++ b/benchmarks/ibm/case.py
@@ -50,7 +50,7 @@
             'dt'                           : mydt,                      
             't_step_start'                 : 0,                         
             't_step_stop'                  : int(40*(95*size + 5)),
-            't_step_save'                  : int(8*(95*size + 5)),
+            't_step_save'                  : int(40*(95*size + 5)),
 		    # ==========================================================
                                                                                 
             # Simulation Algorithm Parameters ==========================
diff --git a/benchmarks/viscous_weno5_sgb_acoustic/case.py b/benchmarks/viscous_weno5_sgb_acoustic/case.py
index 6515cc1bc..928942abf 100644
--- a/benchmarks/viscous_weno5_sgb_acoustic/case.py
+++ b/benchmarks/viscous_weno5_sgb_acoustic/case.py
@@ -93,8 +93,8 @@
 
 # Configuring case dictionary
 print(json.dumps({
-        # Logistics ================================================
-    'run_time_info'                : 'T',
+    # Logistics ================================================
+    'run_time_info'                : 'F',
     # ==========================================================
     
     # Computational Domain Parameters ==========================
@@ -112,9 +112,9 @@
     'dt'                           : dt,
     't_step_start'                 : 0,
     't_step_stop'                  : int(30*(25*size + 5)),
-    't_step_save'                  : int(6*(25*size + 5)),
+    't_step_save'                  : int(30*(25*size + 5)),
     # ==========================================================
-    
+
     # Simulation Algorithm Parameters ==========================
     'num_patches'                  : 2,
     'model_eqns'                   : 2,
diff --git a/src/simulation/m_start_up.fpp b/src/simulation/m_start_up.fpp
index 5e1c6f6e8..15fdcbe29 100644
--- a/src/simulation/m_start_up.fpp
+++ b/src/simulation/m_start_up.fpp
@@ -1134,6 +1134,8 @@ contains
         real(kind(0d0)), intent(inout) :: start, finish
         integer, intent(inout) :: nt
 
+        real(kind(0d0)) :: grind_time
+
         call s_mpi_barrier()
 
         if (num_procs > 1) then
@@ -1152,29 +1154,33 @@ contains
                 time_final = maxval(proc_time)
                 io_time_final = maxval(io_proc_time)
             end if
-            print *, "Performance: ", time_final*1.0d9/(sys_size*maxval((/1,m_glb/))*maxval((/1,n_glb/))*maxval((/1,p_glb/))), " ns/gp/eq/rhs"
+
+            grind_time = time_final*1.0d9/(sys_size*maxval((/1,m_glb/))*maxval((/1,n_glb/))*maxval((/1,p_glb/)))
+
+            print *, "Performance:", grind_time, "ns/gp/eq/rhs"
             inquire (FILE='time_data.dat', EXIST=file_exists)
             if (file_exists) then
                 open (1, file='time_data.dat', position='append', status='old')
-                write (1, *) num_procs, time_final
-                close (1)
             else
                 open (1, file='time_data.dat', status='new')
-                write (1, *) num_procs, time_final
-                close (1)
+                write (1, '(A10, A15, A15)') "Ranks", "s/step", "ns/gp/eq/rhs"
             end if
 
+            write (1, '(I10, 2(F15.8))') num_procs, time_final, grind_time
+
+            close (1)
+
             inquire (FILE='io_time_data.dat', EXIST=file_exists)
             if (file_exists) then
                 open (1, file='io_time_data.dat', position='append', status='old')
-                write (1, *) num_procs, io_time_final
-                close (1)
             else
                 open (1, file='io_time_data.dat', status='new')
-                write (1, *) num_procs, io_time_final
-                close (1)
+                write (1, '(A10, A15)') "Ranks", "s/step"
             end if
 
+            write (1, '(I10, F15.8)') num_procs, io_time_final
+            close (1)
+
         end if
 
     end subroutine s_save_performance_metrics
diff --git a/toolchain/bootstrap/lint.sh b/toolchain/bootstrap/lint.sh
index ea8934b3b..1c9826224 100644
--- a/toolchain/bootstrap/lint.sh
+++ b/toolchain/bootstrap/lint.sh
@@ -2,6 +2,6 @@
 
 log "(venv) Running$MAGENTA pylint$COLOR_RESET on$MAGENTA MFC$COLOR_RESET's $MAGENTA""toolchain$COLOR_RESET."
 
-pylint -d R1722,W0718,C0301,C0116,C0115,C0114,C0410,W0622,W0640,C0103,W1309,C0411,W1514,R0401 "$(pwd)/toolchain/"
+pylint -d R1722,W0718,C0301,C0116,C0115,C0114,C0410,W0622,W0640,C0103,W1309,C0411,W1514,R0401,W0511 "$(pwd)/toolchain/"
 
 exit $?
diff --git a/toolchain/mfc/bench.py b/toolchain/mfc/bench.py
index 52792f269..08a5a1d56 100644
--- a/toolchain/mfc/bench.py
+++ b/toolchain/mfc/bench.py
@@ -4,7 +4,7 @@
 
 from .printer import cons
 from .state   import ARG, CFG
-from .build   import get_targets, DEFAULT_TARGETS
+from .build   import get_targets, DEFAULT_TARGETS, SIMULATION
 from .common  import system, MFC_BENCH_FILEPATH, MFC_SUBDIR, format_list_to_string
 from .common  import file_load_yaml, file_dump_yaml, create_directory
 from .common  import MFCException
@@ -77,6 +77,9 @@ def bench(targets = None):
     cons.unindent()
 
 
+# TODO: This function is too long and not nicely written at all. Someone should
+#       refactor it...
+# pylint: disable=too-many-branches
 def diff():
     lhs, rhs = file_load_yaml(ARG("lhs")), file_load_yaml(ARG("rhs"))
 
@@ -132,15 +135,32 @@ def _lock_to_str(lock):
 
                 continue
 
-            if (float(f"{lhs_summary[target.name]}") <= 0.0) or math.isnan(float(f"{lhs_summary[target.name]}")):
+            if not math.isfinite(lhs_summary[target.name]["exec"]) or not math.isfinite(rhs_summary[target.name]["exec"]):
                 err = 1
-                cons.print(f"lhs_summary reports non-positive or NaN runtime for {target.name} - Case: {slug}")
+                cons.print(f"lhs_summary or rhs_summary reports non-real exec time for {target.name} - Case: {slug}")
 
-            if (float(f"{rhs_summary[target.name]}") <= 0.0) or math.isnan(float(f"{rhs_summary[target.name]}")):
+            exec_time_speedup = "N/A"
+            try:
+                exec_time_speedup = f'{lhs_summary[target.name]["exec"] / rhs_summary[target.name]["exec"]:.2f}'
+            except Exception as _:
                 err = 1
-                cons.print(f"rhs_summary reports non-positive or NaN runtime for {target.name} - Case: {slug}")
+                cons.print(f"lhs_summary or rhs_summary reports non-real exec time for {target.name} - Case: {slug}")
 
-            speedups[i] = f"{lhs_summary[target.name] / rhs_summary[target.name]:.2f}x"
+            speedups[i] = f"Exec: {exec_time_speedup}"
+
+            if target == SIMULATION:
+                grind_time_speedup = "N/A"
+                if not math.isfinite(lhs_summary[target.name]["grind"]) or not math.isfinite(rhs_summary[target.name]["grind"]):
+                    err = 1
+                    cons.print(f"lhs_summary or rhs_summary reports non-real grind time for {target.name} - Case: {slug}")
+
+                try:
+                    grind_time_speedup = f'{lhs_summary[target.name]["grind"] / rhs_summary[target.name]["grind"]:.2f}'
+                except Exception as _:
+                    err = 1
+                    cons.print(f"lhs_summary or rhs_summary reports non-real grind time for {target.name} - Case: {slug}")
+
+                speedups[i] += f" & Grind: {grind_time_speedup}"
 
         table.add_row(f"[magenta]{slug}[/magenta]", *speedups)
 
diff --git a/toolchain/mfc/run/run.py b/toolchain/mfc/run/run.py
index 162648a05..c1453e43d 100644
--- a/toolchain/mfc/run/run.py
+++ b/toolchain/mfc/run/run.py
@@ -5,7 +5,7 @@
 from mako.lookup   import TemplateLookup
 from mako.template import Template
 
-from ..build   import get_targets, build, REQUIRED_TARGETS
+from ..build   import get_targets, build, REQUIRED_TARGETS, SIMULATION
 from ..printer import cons
 from ..state   import ARG, ARGS, CFG
 from ..common  import MFCException, isspace, file_read, does_command_exist
@@ -101,6 +101,7 @@ def __generate_job_script(targets, case: input.MFCInputFile):
         env=env,
         case=case,
         MFC_ROOTDIR=MFC_ROOTDIR,
+        SIMULATION=SIMULATION,
         qsystem=queues.get_system(),
         profiler=shlex.join(__profiler_prepend())
     )
diff --git a/toolchain/templates/include/helpers.mako b/toolchain/templates/include/helpers.mako
index abd64b076..a7e6f7ec3 100644
--- a/toolchain/templates/include/helpers.mako
+++ b/toolchain/templates/include/helpers.mako
@@ -74,7 +74,7 @@ END
         export CRAY_ACC_MODULE='${target.get_staging_dirpath(case)}/simulation-wg256.lld.exe'
     fi
 
-    cd "${os.path.dirname(input)}"
+    cd '${os.path.dirname(input)}'
 
     t_${target.name}_start=$(python3 -c 'import time; print(time.time())')
 % else:
@@ -102,10 +102,14 @@ END
 
     % if output_summary:
 
-        cd "${MFC_ROOTDIR}"
+        cd '${MFC_ROOTDIR}'
 
-        cat >>"${output_summary}" <<EOL
-${target.name}: $(echo "$t_${target.name}_stop - $t_${target.name}_start" | bc -l)
+        cat >>'${output_summary}' <<EOL
+${target.name}:
+    exec:  $(echo "$t_${target.name}_stop - $t_${target.name}_start" | bc -l)
+% if target == SIMULATION:
+    grind: $(cat '${os.path.join(os.path.dirname(input), 'time_data.dat')}' | tail -n 1 | awk '{print $NF}')
+% endif
 EOL
 
         cd - > /dev/null