From 3aac2c3c2aa31ae741b31bf9c00051f9b4380577 Mon Sep 17 00:00:00 2001 From: Henry Le Berre Date: Fri, 23 Aug 2024 09:03:22 -0400 Subject: [PATCH] Two-stage IPO for NVHPC (#581) Co-authored-by: Cameron --- .github/workflows/bench.yml | 4 +- CMakeLists.txt | 239 ++++++++++++---------- src/common/include/inline_conversions.fpp | 57 ------ src/common/m_helper_basic.f90 | 1 + src/common/m_variables_conversion.fpp | 61 +++++- src/post_process/m_derived_variables.fpp | 4 - src/pre_process/m_data_output.fpp | 2 - src/simulation/m_cbc.fpp | 3 - src/simulation/m_data_output.fpp | 3 - src/simulation/m_riemann_solvers.fpp | 5 +- toolchain/modules | 1 + 11 files changed, 195 insertions(+), 185 deletions(-) delete mode 100644 src/common/include/inline_conversions.fpp diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index 334d6e3eb..56ed3c009 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -52,8 +52,8 @@ jobs: - name: Generate & Post Comment run: | - . ./mfc.sh load -c p -m g - ./mfc.sh bench_diff master/bench-${{ matrix.device }}.yaml pr/bench-${{ matrix.device }}.yaml + (cd pr && . ./mfc.sh load -c p -m g) + (cd pr && ./mfc.sh bench_diff ../master/bench-${{ matrix.device }}.yaml ../pr/bench-${{ matrix.device }}.yaml) - name: Archive Logs uses: actions/upload-artifact@v3 diff --git a/CMakeLists.txt b/CMakeLists.txt index 5b72a285d..409fdad20 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -172,16 +172,13 @@ elseif ((CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC") OR (CMAKE_Fortran_COMPILER_ add_compile_options( $<$:-Mfreeform> $<$:-cpp> - -Minfo=accel + $<$:-Minfo=inline> + $<$:-Minfo=accel> ) - if (CMAKE_BUILD_TYPE STREQUAL "Release") - add_compile_options( - $<$ - ) - elseif (CMAKE_BUILD_TYPE STREQUAL "Debug") + if (CMAKE_BUILD_TYPE STREQUAL "Debug") add_compile_options( - $<$ + $<$:-O0> ) endif() @@ -208,13 +205,22 @@ if (CMAKE_BUILD_TYPE STREQUAL "Release") endif() # Enable LTO/IPO if supported - CHECK_IPO_SUPPORTED(RESULT SUPPORTS_IPO OUTPUT IPO_ERROR) - if (SUPPORTS_IPO) - message(STATUS "Enabled IPO / LTO") - set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) + if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC") + if (MFC_Unified) + message(STATUS "IPO is not available with NVHPC using Unified Memory") + else() + message(STATUS "Performing IPO using -Mextract followed by -Minline") + set(NVHPC_USE_TWO_PASS_IPO TRUE) + endif() else() - message(STATUS "IPO / LTO is NOT available") - endif() + CHECK_IPO_SUPPORTED(RESULT SUPPORTS_IPO OUTPUT IPO_ERROR) + if (SUPPORTS_IPO) + message(STATUS "Enabled IPO / LTO") + set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) + else() + message(STATUS "IPO / LTO is NOT available") + endif() + endif() endif() if (CMAKE_BUILD_TYPE STREQUAL "Debug") @@ -365,124 +371,139 @@ function(MFC_SETUP_TARGET) cmake_parse_arguments(ARGS "OpenACC;MPI;SILO;HDF5;FFTW" "TARGET" "SOURCES" ${ARGN}) add_executable(${ARGS_TARGET} ${ARGS_SOURCES}) - - set_target_properties(${ARGS_TARGET} PROPERTIES Fortran_PREPROCESS ON) - - target_include_directories(${ARGS_TARGET} PRIVATE - "${CMAKE_SOURCE_DIR}/src/common" - "${CMAKE_SOURCE_DIR}/src/common/include" - "${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}") - - if (EXISTS "${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}/include") - target_include_directories(${ARGS_TARGET} PRIVATE - "${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}/include") + set(IPO_TARGETS ${ARGS_TARGET}) + # Here we need to split into "library" and "executable" to perform IPO on the NVIDIA compiler. + # A little hacky, but it *is* an edge-case for *one* compiler. + if (NVHPC_USE_TWO_PASS_IPO) + add_library(${ARGS_TARGET}_lib OBJECT ${ARGS_SOURCES}) + target_compile_options(${ARGS_TARGET}_lib PRIVATE + $<$:-Mextract=lib:${ARGS_TARGET}_lib> + $<$:-Minline> + ) + add_dependencies(${ARGS_TARGET} ${ARGS_TARGET}_lib) + target_compile_options(${ARGS_TARGET} PRIVATE -Minline=lib:${ARGS_TARGET}_lib) + list(PREPEND IPO_TARGETS ${ARGS_TARGET}_lib) endif() - string(TOUPPER "${ARGS_TARGET}" ${ARGS_TARGET}_UPPER) - target_compile_definitions( - ${ARGS_TARGET} PRIVATE MFC_${CMAKE_Fortran_COMPILER_ID} - MFC_${${ARGS_TARGET}_UPPER} - ) + foreach (a_target ${IPO_TARGETS}) + set_target_properties(${a_target} PROPERTIES Fortran_PREPROCESS ON) - if (MFC_MPI AND ARGS_MPI) - find_package(MPI COMPONENTS Fortran REQUIRED) + target_include_directories(${a_target} PRIVATE + "${CMAKE_SOURCE_DIR}/src/common" + "${CMAKE_SOURCE_DIR}/src/common/include" + "${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}") - target_compile_definitions(${ARGS_TARGET} PRIVATE MFC_MPI) - target_link_libraries (${ARGS_TARGET} PRIVATE MPI::MPI_Fortran) - endif() + if (EXISTS "${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}/include") + target_include_directories(${a_target} PRIVATE + "${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}/include") + endif() - if (ARGS_SILO) - find_package(SILO REQUIRED) - target_link_libraries(${ARGS_TARGET} PRIVATE SILO::SILO) - endif() + string(TOUPPER "${ARGS_TARGET}" ${ARGS_TARGET}_UPPER) + target_compile_definitions( + ${a_target} PRIVATE MFC_${CMAKE_Fortran_COMPILER_ID} + MFC_${${ARGS_TARGET}_UPPER} + ) - if (ARGS_HDF5) - find_package(HDF5 REQUIRED) - target_link_libraries(${ARGS_TARGET} PRIVATE HDF5::HDF5) - endif() + if (MFC_MPI AND ARGS_MPI) + find_package(MPI COMPONENTS Fortran REQUIRED) - if (ARGS_FFTW) - if (MFC_OpenACC AND ARGS_OpenACC) - if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI") - find_package(CUDAToolkit REQUIRED) - target_link_libraries(${ARGS_TARGET} PRIVATE CUDA::cudart CUDA::cufft) - else() - find_package(hipfort COMPONENTS hipfft CONFIG REQUIRED) - target_link_libraries(${ARGS_TARGET} PRIVATE hipfort::hipfft) - endif() - else() - find_package(FFTW REQUIRED) - target_link_libraries(${ARGS_TARGET} PRIVATE FFTW::FFTW) + target_compile_definitions(${a_target} PRIVATE MFC_MPI) + target_link_libraries (${a_target} PRIVATE MPI::MPI_Fortran) endif() - endif() - if (MFC_OpenACC AND ARGS_OpenACC) - find_package(OpenACC) + if (ARGS_SILO) + find_package(SILO REQUIRED) + target_link_libraries(${a_target} PRIVATE SILO::SILO) + endif() - # This should be equivalent to if (NOT OpenACC_FC_FOUND) - if (NOT TARGET OpenACC::OpenACC_Fortran) - message(FATAL_ERROR "OpenACC + Fortran is unsupported.") + if (ARGS_HDF5) + find_package(HDF5 REQUIRED) + target_link_libraries(${a_target} PRIVATE HDF5::HDF5) endif() - target_link_libraries(${ARGS_TARGET} PRIVATE OpenACC::OpenACC_Fortran) - target_compile_definitions(${ARGS_TARGET} PRIVATE MFC_OpenACC) - - if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") - # FIXME: This should work with other cards than gfx90a ones. - target_compile_options(${ARGS_TARGET} PRIVATE - "-foffload=amdgcn-amdhsa='-march=gfx90a'" - "-foffload-options=-lgfortran\ -lm" - "-fno-exceptions") - elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI") - find_package(cuTENSOR) - if (NOT cuTENSOR_FOUND) - message(WARNING - "Failed to locate the NVIDIA cuTENSOR library. MFC will be " - "built without support for it, disallowing the use of " - "cu_tensor=T. This can result in degraded performance.") + if (ARGS_FFTW) + if (MFC_OpenACC AND ARGS_OpenACC) + if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI") + find_package(CUDAToolkit REQUIRED) + target_link_libraries(${a_target} PRIVATE CUDA::cudart CUDA::cufft) + else() + find_package(hipfort COMPONENTS hipfft CONFIG REQUIRED) + target_link_libraries(${a_target} PRIVATE hipfort::hipfft) + endif() else() - target_link_libraries (${ARGS_TARGET} PRIVATE cuTENSOR::cuTENSOR) - target_compile_definitions(${ARGS_TARGET} PRIVATE MFC_cuTENSOR) + find_package(FFTW REQUIRED) + target_link_libraries(${a_target} PRIVATE FFTW::FFTW) endif() + endif() - foreach (cc ${MFC_CUDA_CC}) - target_compile_options(${ARGS_TARGET} - PRIVATE -gpu=cc${cc} - ) - endforeach() - - target_compile_options(${ARGS_TARGET} - PRIVATE -gpu=keep,ptxinfo,lineinfo - ) + if (MFC_OpenACC AND ARGS_OpenACC) + find_package(OpenACC) - # GH-200 Unified Memory Support - if (MFC_Unified) - target_compile_options(${ARGS_TARGET} - PRIVATE -gpu=unified - ) - # "This option must appear in both the compile and link lines" -- NVHPC Docs - target_link_options(${ARGS_TARGET} - PRIVATE -gpu=unified - ) + # This should be equivalent to if (NOT OpenACC_FC_FOUND) + if (NOT TARGET OpenACC::OpenACC_Fortran) + message(FATAL_ERROR "OpenACC + Fortran is unsupported.") endif() - if (CMAKE_BUILD_TYPE STREQUAL "Debug") - target_compile_options(${ARGS_TARGET} - PRIVATE -gpu=autocompare,debug + target_link_libraries(${a_target} PRIVATE OpenACC::OpenACC_Fortran) + target_compile_definitions(${a_target} PRIVATE MFC_OpenACC) + + if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU") + # FIXME: This should work with other cards than gfx90a ones. + target_compile_options(${a_target} PRIVATE + "-foffload=amdgcn-amdhsa='-march=gfx90a'" + "-foffload-options=-lgfortran\ -lm" + "-fno-exceptions") + elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI") + find_package(cuTENSOR) + if (NOT cuTENSOR_FOUND) + message(WARNING + "Failed to locate the NVIDIA cuTENSOR library. MFC will be " + "built without support for it, disallowing the use of " + "cu_tensor=T. This can result in degraded performance.") + else() + target_link_libraries (${a_target} PRIVATE cuTENSOR::cuTENSOR) + target_compile_definitions(${a_target} PRIVATE MFC_cuTENSOR) + endif() + + foreach (cc ${MFC_CUDA_CC}) + target_compile_options(${a_target} + PRIVATE -gpu=cc${cc} + ) + endforeach() + + target_compile_options(${a_target} + PRIVATE -gpu=keep,ptxinfo,lineinfo ) + + # GH-200 Unified Memory Support + if (MFC_Unified) + target_compile_options(${ARGS_TARGET} + PRIVATE -gpu=unified + ) + # "This option must appear in both the compile and link lines" -- NVHPC Docs + target_link_options(${ARGS_TARGET} + PRIVATE -gpu=unified + ) + endif() + + if (CMAKE_BUILD_TYPE STREQUAL "Debug") + target_compile_options(${a_target} + PRIVATE -gpu=autocompare,debug + ) + endif() + elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Cray") + find_package(hipfort COMPONENTS hip CONFIG REQUIRED) + target_link_libraries(${a_target} PRIVATE hipfort::hip hipfort::hipfort-amdgcn) endif() - elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Cray") - find_package(hipfort COMPONENTS hip CONFIG REQUIRED) - target_link_libraries(${ARGS_TARGET} PRIVATE hipfort::hip hipfort::hipfort-amdgcn) + elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray") + target_compile_options(${a_target} PRIVATE "SHELL:-h noacc" "SHELL:-x acc") endif() - elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray") - target_compile_options(${ARGS_TARGET} PRIVATE "SHELL:-h noacc" "SHELL:-x acc") - endif() - if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI") - find_package(CUDAToolkit REQUIRED) - target_link_libraries(${ARGS_TARGET} PRIVATE CUDA::nvToolsExt) - endif() + if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI") + find_package(CUDAToolkit REQUIRED) + target_link_libraries(${a_target} PRIVATE CUDA::nvToolsExt) + endif() + endforeach() install(TARGETS ${ARGS_TARGET} RUNTIME DESTINATION bin) endfunction() diff --git a/src/common/include/inline_conversions.fpp b/src/common/include/inline_conversions.fpp deleted file mode 100644 index a63af61e1..000000000 --- a/src/common/include/inline_conversions.fpp +++ /dev/null @@ -1,57 +0,0 @@ -#:def s_compute_speed_of_sound() - subroutine s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, adv, vel_sum, c) -#ifdef CRAY_ACC_WAR - !DIR$ INLINEALWAYS s_compute_speed_of_sound -#else - !$acc routine seq -#endif - real(kind(0d0)), intent(in) :: pres - real(kind(0d0)), intent(in) :: rho, gamma, pi_inf - real(kind(0d0)), intent(in) :: H - real(kind(0d0)), dimension(num_fluids), intent(in) :: adv - real(kind(0d0)), intent(in) :: vel_sum - real(kind(0d0)), intent(out) :: c - - real(kind(0d0)) :: blkmod1, blkmod2 - - integer :: q - - if (alt_soundspeed) then - blkmod1 = ((gammas(1) + 1d0)*pres + & - pi_infs(1))/gammas(1) - blkmod2 = ((gammas(2) + 1d0)*pres + & - pi_infs(2))/gammas(2) - c = (1d0/(rho*(adv(1)/blkmod1 + adv(2)/blkmod2))) - elseif (model_eqns == 3) then - c = 0d0 - !$acc loop seq - do q = 1, num_fluids - c = c + adv(q)*(1d0/gammas(q) + 1d0)* & - (pres + pi_infs(q)/(gammas(q) + 1d0)) - end do - c = c/rho - - elseif (((model_eqns == 4) .or. (model_eqns == 2 .and. bubbles))) then - ! Sound speed for bubble mmixture to order O(\alpha) - - if (mpp_lim .and. (num_fluids > 1)) then - c = (1d0/gamma + 1d0)* & - (pres + pi_inf/(gamma + 1d0))/rho - else - c = & - (1d0/gamma + 1d0)* & - (pres + pi_inf/(gamma + 1d0))/ & - (rho*(1d0 - adv(num_fluids))) - end if - else - c = ((H - 5d-1*vel_sum)/gamma) - end if - - if (mixture_err .and. c < 0d0) then - c = 100.d0*sgm_eps - else - c = sqrt(c) - end if - end subroutine s_compute_speed_of_sound -#:enddef - diff --git a/src/common/m_helper_basic.f90 b/src/common/m_helper_basic.f90 index 9f4c6523b..0611ff86f 100644 --- a/src/common/m_helper_basic.f90 +++ b/src/common/m_helper_basic.f90 @@ -25,6 +25,7 @@ module m_helper_basic !! @param tol_input Relative error (default = 1d-6). !! @return Result of the comparison. logical function f_approx_equal(a, b, tol_input) result(res) + !$acc routine seq ! Reference: https://floating-point-gui.de/errors/comparison/ real(kind(0d0)), intent(in) :: a, b diff --git a/src/common/m_variables_conversion.fpp b/src/common/m_variables_conversion.fpp index 46436cb8b..180abefd6 100644 --- a/src/common/m_variables_conversion.fpp +++ b/src/common/m_variables_conversion.fpp @@ -3,7 +3,6 @@ !! @brief Contains module m_variables_conversion #:include 'macros.fpp' -#:include 'inline_conversions.fpp' #:include 'case.fpp' !> @brief This module consists of subroutines used in the conversion of the @@ -40,6 +39,9 @@ module m_variables_conversion s_convert_primitive_to_conservative_variables, & s_convert_primitive_to_flux_variables, & s_compute_pressure, & +#ifndef MFC_PRE_PROCESS + s_compute_speed_of_sound, & +#endif s_finalize_variables_conversion_module !> Abstract interface to two subroutines designed for the transfer/conversion @@ -1339,4 +1341,61 @@ contains end subroutine s_finalize_variables_conversion_module +#ifndef MFC_PRE_PROCESS + subroutine s_compute_speed_of_sound(pres, rho, gamma, pi_inf, H, adv, vel_sum, c) +#ifdef CRAY_ACC_WAR + !DIR$ INLINEALWAYS s_compute_speed_of_sound +#else + !$acc routine seq +#endif + real(kind(0d0)), intent(in) :: pres + real(kind(0d0)), intent(in) :: rho, gamma, pi_inf + real(kind(0d0)), intent(in) :: H + real(kind(0d0)), dimension(num_fluids), intent(in) :: adv + real(kind(0d0)), intent(in) :: vel_sum + real(kind(0d0)), intent(out) :: c + + real(kind(0d0)) :: blkmod1, blkmod2 + + integer :: q + + if (alt_soundspeed) then + blkmod1 = ((gammas(1) + 1d0)*pres + & + pi_infs(1))/gammas(1) + blkmod2 = ((gammas(2) + 1d0)*pres + & + pi_infs(2))/gammas(2) + c = (1d0/(rho*(adv(1)/blkmod1 + adv(2)/blkmod2))) + elseif (model_eqns == 3) then + c = 0d0 + !$acc loop seq + do q = 1, num_fluids + c = c + adv(q)*(1d0/gammas(q) + 1d0)* & + (pres + pi_infs(q)/(gammas(q) + 1d0)) + end do + c = c/rho + + elseif (((model_eqns == 4) .or. (model_eqns == 2 .and. bubbles))) then + ! Sound speed for bubble mmixture to order O(\alpha) + + if (mpp_lim .and. (num_fluids > 1)) then + c = (1d0/gamma + 1d0)* & + (pres + pi_inf/(gamma + 1d0))/rho + else + c = & + (1d0/gamma + 1d0)* & + (pres + pi_inf/(gamma + 1d0))/ & + (rho*(1d0 - adv(num_fluids))) + end if + else + c = ((H - 5d-1*vel_sum)/gamma) + end if + + if (mixture_err .and. c < 0d0) then + c = 100.d0*sgm_eps + else + c = sqrt(c) + end if + end subroutine s_compute_speed_of_sound +#endif + end module m_variables_conversion diff --git a/src/post_process/m_derived_variables.fpp b/src/post_process/m_derived_variables.fpp index ba6afbdcb..e08973bd2 100644 --- a/src/post_process/m_derived_variables.fpp +++ b/src/post_process/m_derived_variables.fpp @@ -8,8 +8,6 @@ !! volume fraction, specific heat ratio, liquid stiffness, speed of !! sound, vorticity and the numerical Schlieren function. -#:include 'inline_conversions.fpp' - module m_derived_variables ! Dependencies ============================================================= @@ -561,8 +559,6 @@ contains end subroutine s_derive_qm - @:s_compute_speed_of_sound() - !> This subroutine gets as inputs the conservative variables !! and density. From those inputs, it proceeds to calculate !! the values of the numerical Schlieren function, which are diff --git a/src/pre_process/m_data_output.fpp b/src/pre_process/m_data_output.fpp index 94d5d8fb6..b650bc98e 100644 --- a/src/pre_process/m_data_output.fpp +++ b/src/pre_process/m_data_output.fpp @@ -2,8 +2,6 @@ !! @file m_data_output.f90 !! @brief Contains module m_data_output -#:include 'inline_conversions.fpp' - !> @brief This module takes care of writing the grid and initial condition !! data files into the "0" time-step directory located in the folder !! associated with the rank of the local processor, which is a sub- diff --git a/src/simulation/m_cbc.fpp b/src/simulation/m_cbc.fpp index 793d7555d..5b95b6d92 100644 --- a/src/simulation/m_cbc.fpp +++ b/src/simulation/m_cbc.fpp @@ -19,7 +19,6 @@ !! Please refer to Thompson (1987, 1990) for detailed descriptions. #:include 'macros.fpp' -#:include 'inline_conversions.fpp' module m_cbc @@ -144,8 +143,6 @@ module m_cbc contains - @:s_compute_speed_of_sound() - !> The computation of parameters, the allocation of memory, !! the association of pointers and/or the execution of any !! other procedures that are necessary to setup the module. diff --git a/src/simulation/m_data_output.fpp b/src/simulation/m_data_output.fpp index 47970831d..a28d6b0f3 100644 --- a/src/simulation/m_data_output.fpp +++ b/src/simulation/m_data_output.fpp @@ -3,7 +3,6 @@ !! @brief Contains module m_data_output #:include 'macros.fpp' -#:include 'inline_conversions.fpp' !> @brief The primary purpose of this module is to output the grid and the !! conservative variables data at the chosen time-step interval. In @@ -101,8 +100,6 @@ module m_data_output contains - @:s_compute_speed_of_sound() - !> The purpose of this subroutine is to open a new or pre- !! existing run-time information file and append to it the !! basic header information relevant to current simulation. diff --git a/src/simulation/m_riemann_solvers.fpp b/src/simulation/m_riemann_solvers.fpp index bfca71428..929476b5f 100644 --- a/src/simulation/m_riemann_solvers.fpp +++ b/src/simulation/m_riemann_solvers.fpp @@ -20,7 +20,6 @@ #:include 'macros.fpp' #:include 'inline_riemann.fpp' -#:include 'inline_conversions.fpp' module m_riemann_solvers @@ -264,9 +263,7 @@ module m_riemann_solvers contains - @:s_compute_speed_of_sound() - - subroutine s_hll_riemann_solver(qL_prim_rsx_vf, qL_prim_rsy_vf, qL_prim_rsz_vf, dqL_prim_dx_vf, & + subroutine s_hll_riemann_solver(qL_prim_rsx_vf, qL_prim_rsy_vf, qL_prim_rsz_vf, dqL_prim_dx_vf, & ! ------- dqL_prim_dy_vf, & dqL_prim_dz_vf, & qL_prim_vf, & diff --git a/toolchain/modules b/toolchain/modules index 15be3b4f7..0dc9576c8 100644 --- a/toolchain/modules +++ b/toolchain/modules @@ -42,6 +42,7 @@ e-gpu gpu/0.15.4 cuda/11.0.2 nvhpc/22.2 openmpi/4.0.5 cmake/3.19.8 e-gpu CC=nvc CXX=nvc++ FC=nvfortran p GT Phoenix +p-all python/3.10.10 p-cpu gcc/12.3.0 openmpi/4.1.5 p-gpu nvhpc/24.5 hpcx/2.19-cuda cuda/12.1.1