diff --git a/.github/workflows/frontier/test.sh b/.github/workflows/frontier/test.sh index 9f26a2a03..5725cf4e7 100644 --- a/.github/workflows/frontier/test.sh +++ b/.github/workflows/frontier/test.sh @@ -3,4 +3,5 @@ gpus=`rocm-smi --showid | awk '{print $1}' | grep -Eo '[0-9]+' | uniq | tr '\n' ' '` ngpus=`echo "$gpus" | tr -d '[:space:]' | wc -c` -./mfc.sh test -j $ngpus --sys-hdf5 --sys-fftw -- -c frontier +./mfc.sh test --max-attempts 3 -j $ngpus --sys-hdf5 --sys-fftw -- -c frontier + diff --git a/.github/workflows/phoenix/test.sh b/.github/workflows/phoenix/test.sh index 2f4e3bd19..5cdc57e78 100644 --- a/.github/workflows/phoenix/test.sh +++ b/.github/workflows/phoenix/test.sh @@ -16,5 +16,6 @@ if [ "$job_device" == "gpu" ]; then n_test_threads=`expr $gpu_count \* 2` fi -./mfc.sh test -a -j $n_test_threads $device_opts -- -c phoenix +./mfc.sh test --max-attempts 3 -a -j $n_test_threads $device_opts -- -c phoenix + diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 088e016fd..65515d3a5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -94,7 +94,7 @@ jobs: - name: Test run: | if [ '${{ matrix.intel }}' == 'true' ]; then source /opt/intel/oneapi/setvars.sh; fi - /bin/bash mfc.sh test -j $(nproc) $OPT1 $OPT2 + /bin/bash mfc.sh test --max-attempts 3 -j $(nproc) $OPT1 $OPT2 env: OPT1: ${{ matrix.mpi == 'mpi' && '--test-all' || '' }} OPT2: ${{ matrix.debug == 'debug' && '-% 20' || '' }} @@ -109,7 +109,7 @@ jobs: uses: actions/checkout@v4 - name: Test - run: sudo ./mfc.sh docker ./mfc.sh test -j $(nproc) -a + run: sudo ./mfc.sh docker ./mfc.sh test --max-attempts 3 -j $(nproc) -a self: name: Self Hosted diff --git a/toolchain/mfc/args.py b/toolchain/mfc/args.py index 6ad9cf399..a9c07fd59 100644 --- a/toolchain/mfc/args.py +++ b/toolchain/mfc/args.py @@ -81,13 +81,11 @@ def add_common_arguments(p, mask = None): test.add_argument("-f", "--from", default=test_cases[0].get_uuid(), type=str, help="First test UUID to run.") test.add_argument("-t", "--to", default=test_cases[-1].get_uuid(), type=str, help="Last test UUID to run.") test.add_argument("-o", "--only", nargs="+", type=str, default=[], metavar="L", help="Only run tests with specified properties.") - test.add_argument("-r", "--relentless", action="store_true", default=False, help="Run all tests, even if multiple fail.") test.add_argument("-a", "--test-all", action="store_true", default=False, help="Run the Post Process Tests too.") test.add_argument("-%", "--percent", type=int, default=100, help="Percentage of tests to run.") - test.add_argument("-m", "--max-attempts", type=int, default=3, help="Maximum number of attempts to run a test.") + test.add_argument("-m", "--max-attempts", type=int, default=1, help="Maximum number of attempts to run a test.") test.add_argument( "--no-build", action="store_true", default=False, help="(Testing) Do not rebuild MFC.") test.add_argument("--case-optimization", action="store_true", default=False, help="(GPU Optimization) Compile MFC targets with some case parameters hard-coded.") - test_meg = test.add_mutually_exclusive_group() test_meg.add_argument("--generate", action="store_true", default=False, help="(Test Generation) Generate golden files.") test_meg.add_argument("--add-new-variables", action="store_true", default=False, help="(Test Generation) If new variables are found in D/ when running tests, add them to the golden files.") diff --git a/toolchain/mfc/test/test.py b/toolchain/mfc/test/test.py index 0d300e58b..4e77867ba 100644 --- a/toolchain/mfc/test/test.py +++ b/toolchain/mfc/test/test.py @@ -18,6 +18,8 @@ nFAIL = 0 +nPASS = 0 +nSKIP = 0 def __filter(cases_) -> typing.List[TestCase]: cases = cases_[:] @@ -60,7 +62,7 @@ def __filter(cases_) -> typing.List[TestCase]: def test(): # pylint: disable=global-statement, global-variable-not-assigned - global nFAIL + global nFAIL, nPASS, nSKIP cases = [ _.to_case() for _ in list_cases() ] @@ -123,12 +125,9 @@ def test(): ARG("jobs"), ARG("gpus")) cons.print() - if nFAIL == 0: - cons.print("Tested Simulation [bold green]✓[/bold green]") - else: - raise MFCException(f"Testing: Encountered [bold red]{nFAIL}[/bold red] failure(s).") - cons.unindent() + cons.print(f"\nTest Summary: [bold green]{nPASS}[/bold green] passed, [bold red]{nFAIL}[/bold red] failed, [bold yellow]{nSKIP}[/bold yellow] skipped.") + exit(nFAIL) # pylint: disable=too-many-locals, too-many-branches, too-many-statements @@ -217,8 +216,8 @@ def _handle_case(case: TestCase, devices: typing.Set[int]): def handle_case(case: TestCase, devices: typing.Set[int]): - # pylint: disable=global-statement - global nFAIL + # pylint: disable=global-statement, global-variable-not-assigned + global nFAIL, nPASS, nSKIP nAttempts = 0 @@ -227,18 +226,13 @@ def handle_case(case: TestCase, devices: typing.Set[int]): try: _handle_case(case, devices) + nPASS += 1 except Exception as exc: if nAttempts < ARG("max_attempts"): cons.print(f"[bold yellow] Attempt {nAttempts}: Failed test {case.get_uuid()}. Retrying...[/bold yellow]") continue - nFAIL += 1 - cons.print(f"[bold red]Failed test {case} after {nAttempts} attempt(s).[/bold red]") - - if ARG("relentless"): - cons.print(f"{exc}") - else: - raise exc + cons.print(f"{exc}") return