Skip to content

Commit

Permalink
Two-stage IPO for NVHPC (#581)
Browse files Browse the repository at this point in the history
Co-authored-by: Cameron <airedaledev@protonmail.com>
  • Loading branch information
henryleberre and AiredaleDev authored Aug 23, 2024
1 parent 7bdf4e3 commit 3aac2c3
Show file tree
Hide file tree
Showing 11 changed files with 195 additions and 185 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/bench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ jobs:
- name: Generate & Post Comment
run: |
. ./mfc.sh load -c p -m g
./mfc.sh bench_diff master/bench-${{ matrix.device }}.yaml pr/bench-${{ matrix.device }}.yaml
(cd pr && . ./mfc.sh load -c p -m g)
(cd pr && ./mfc.sh bench_diff ../master/bench-${{ matrix.device }}.yaml ../pr/bench-${{ matrix.device }}.yaml)
- name: Archive Logs
uses: actions/upload-artifact@v3
Expand Down
239 changes: 130 additions & 109 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -172,16 +172,13 @@ elseif ((CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC") OR (CMAKE_Fortran_COMPILER_
add_compile_options(
$<$<COMPILE_LANGUAGE:Fortran>:-Mfreeform>
$<$<COMPILE_LANGUAGE:Fortran>:-cpp>
-Minfo=accel
$<$<COMPILE_LANGUAGE:Fortran>:-Minfo=inline>
$<$<COMPILE_LANGUAGE:Fortran>:-Minfo=accel>
)

if (CMAKE_BUILD_TYPE STREQUAL "Release")
add_compile_options(
$<$<COMPILE_LANGUAGE:Fortran:-minline>
)
elseif (CMAKE_BUILD_TYPE STREQUAL "Debug")
if (CMAKE_BUILD_TYPE STREQUAL "Debug")
add_compile_options(
$<$<COMPILE_LANGUAGE:Fortran:-O0>
$<$<COMPILE_LANGUAGE:Fortran>:-O0>
)
endif()

Expand All @@ -208,13 +205,22 @@ if (CMAKE_BUILD_TYPE STREQUAL "Release")
endif()

# Enable LTO/IPO if supported
CHECK_IPO_SUPPORTED(RESULT SUPPORTS_IPO OUTPUT IPO_ERROR)
if (SUPPORTS_IPO)
message(STATUS "Enabled IPO / LTO")
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC")
if (MFC_Unified)
message(STATUS "IPO is not available with NVHPC using Unified Memory")
else()
message(STATUS "Performing IPO using -Mextract followed by -Minline")
set(NVHPC_USE_TWO_PASS_IPO TRUE)
endif()
else()
message(STATUS "IPO / LTO is NOT available")
endif()
CHECK_IPO_SUPPORTED(RESULT SUPPORTS_IPO OUTPUT IPO_ERROR)
if (SUPPORTS_IPO)
message(STATUS "Enabled IPO / LTO")
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
else()
message(STATUS "IPO / LTO is NOT available")
endif()
endif()
endif()

if (CMAKE_BUILD_TYPE STREQUAL "Debug")
Expand Down Expand Up @@ -365,124 +371,139 @@ function(MFC_SETUP_TARGET)
cmake_parse_arguments(ARGS "OpenACC;MPI;SILO;HDF5;FFTW" "TARGET" "SOURCES" ${ARGN})

add_executable(${ARGS_TARGET} ${ARGS_SOURCES})

set_target_properties(${ARGS_TARGET} PROPERTIES Fortran_PREPROCESS ON)

target_include_directories(${ARGS_TARGET} PRIVATE
"${CMAKE_SOURCE_DIR}/src/common"
"${CMAKE_SOURCE_DIR}/src/common/include"
"${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}")

if (EXISTS "${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}/include")
target_include_directories(${ARGS_TARGET} PRIVATE
"${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}/include")
set(IPO_TARGETS ${ARGS_TARGET})
# Here we need to split into "library" and "executable" to perform IPO on the NVIDIA compiler.
# A little hacky, but it *is* an edge-case for *one* compiler.
if (NVHPC_USE_TWO_PASS_IPO)
add_library(${ARGS_TARGET}_lib OBJECT ${ARGS_SOURCES})
target_compile_options(${ARGS_TARGET}_lib PRIVATE
$<$<COMPILE_LANGUAGE:Fortran>:-Mextract=lib:${ARGS_TARGET}_lib>
$<$<COMPILE_LANGUAGE:Fortran>:-Minline>
)
add_dependencies(${ARGS_TARGET} ${ARGS_TARGET}_lib)
target_compile_options(${ARGS_TARGET} PRIVATE -Minline=lib:${ARGS_TARGET}_lib)
list(PREPEND IPO_TARGETS ${ARGS_TARGET}_lib)
endif()

string(TOUPPER "${ARGS_TARGET}" ${ARGS_TARGET}_UPPER)
target_compile_definitions(
${ARGS_TARGET} PRIVATE MFC_${CMAKE_Fortran_COMPILER_ID}
MFC_${${ARGS_TARGET}_UPPER}
)
foreach (a_target ${IPO_TARGETS})
set_target_properties(${a_target} PROPERTIES Fortran_PREPROCESS ON)

if (MFC_MPI AND ARGS_MPI)
find_package(MPI COMPONENTS Fortran REQUIRED)
target_include_directories(${a_target} PRIVATE
"${CMAKE_SOURCE_DIR}/src/common"
"${CMAKE_SOURCE_DIR}/src/common/include"
"${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}")

target_compile_definitions(${ARGS_TARGET} PRIVATE MFC_MPI)
target_link_libraries (${ARGS_TARGET} PRIVATE MPI::MPI_Fortran)
endif()
if (EXISTS "${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}/include")
target_include_directories(${a_target} PRIVATE
"${CMAKE_SOURCE_DIR}/src/${ARGS_TARGET}/include")
endif()

if (ARGS_SILO)
find_package(SILO REQUIRED)
target_link_libraries(${ARGS_TARGET} PRIVATE SILO::SILO)
endif()
string(TOUPPER "${ARGS_TARGET}" ${ARGS_TARGET}_UPPER)
target_compile_definitions(
${a_target} PRIVATE MFC_${CMAKE_Fortran_COMPILER_ID}
MFC_${${ARGS_TARGET}_UPPER}
)

if (ARGS_HDF5)
find_package(HDF5 REQUIRED)
target_link_libraries(${ARGS_TARGET} PRIVATE HDF5::HDF5)
endif()
if (MFC_MPI AND ARGS_MPI)
find_package(MPI COMPONENTS Fortran REQUIRED)

if (ARGS_FFTW)
if (MFC_OpenACC AND ARGS_OpenACC)
if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI")
find_package(CUDAToolkit REQUIRED)
target_link_libraries(${ARGS_TARGET} PRIVATE CUDA::cudart CUDA::cufft)
else()
find_package(hipfort COMPONENTS hipfft CONFIG REQUIRED)
target_link_libraries(${ARGS_TARGET} PRIVATE hipfort::hipfft)
endif()
else()
find_package(FFTW REQUIRED)
target_link_libraries(${ARGS_TARGET} PRIVATE FFTW::FFTW)
target_compile_definitions(${a_target} PRIVATE MFC_MPI)
target_link_libraries (${a_target} PRIVATE MPI::MPI_Fortran)
endif()
endif()

if (MFC_OpenACC AND ARGS_OpenACC)
find_package(OpenACC)
if (ARGS_SILO)
find_package(SILO REQUIRED)
target_link_libraries(${a_target} PRIVATE SILO::SILO)
endif()

# This should be equivalent to if (NOT OpenACC_FC_FOUND)
if (NOT TARGET OpenACC::OpenACC_Fortran)
message(FATAL_ERROR "OpenACC + Fortran is unsupported.")
if (ARGS_HDF5)
find_package(HDF5 REQUIRED)
target_link_libraries(${a_target} PRIVATE HDF5::HDF5)
endif()

target_link_libraries(${ARGS_TARGET} PRIVATE OpenACC::OpenACC_Fortran)
target_compile_definitions(${ARGS_TARGET} PRIVATE MFC_OpenACC)

if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
# FIXME: This should work with other cards than gfx90a ones.
target_compile_options(${ARGS_TARGET} PRIVATE
"-foffload=amdgcn-amdhsa='-march=gfx90a'"
"-foffload-options=-lgfortran\ -lm"
"-fno-exceptions")
elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI")
find_package(cuTENSOR)
if (NOT cuTENSOR_FOUND)
message(WARNING
"Failed to locate the NVIDIA cuTENSOR library. MFC will be "
"built without support for it, disallowing the use of "
"cu_tensor=T. This can result in degraded performance.")
if (ARGS_FFTW)
if (MFC_OpenACC AND ARGS_OpenACC)
if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI")
find_package(CUDAToolkit REQUIRED)
target_link_libraries(${a_target} PRIVATE CUDA::cudart CUDA::cufft)
else()
find_package(hipfort COMPONENTS hipfft CONFIG REQUIRED)
target_link_libraries(${a_target} PRIVATE hipfort::hipfft)
endif()
else()
target_link_libraries (${ARGS_TARGET} PRIVATE cuTENSOR::cuTENSOR)
target_compile_definitions(${ARGS_TARGET} PRIVATE MFC_cuTENSOR)
find_package(FFTW REQUIRED)
target_link_libraries(${a_target} PRIVATE FFTW::FFTW)
endif()
endif()

foreach (cc ${MFC_CUDA_CC})
target_compile_options(${ARGS_TARGET}
PRIVATE -gpu=cc${cc}
)
endforeach()

target_compile_options(${ARGS_TARGET}
PRIVATE -gpu=keep,ptxinfo,lineinfo
)
if (MFC_OpenACC AND ARGS_OpenACC)
find_package(OpenACC)

# GH-200 Unified Memory Support
if (MFC_Unified)
target_compile_options(${ARGS_TARGET}
PRIVATE -gpu=unified
)
# "This option must appear in both the compile and link lines" -- NVHPC Docs
target_link_options(${ARGS_TARGET}
PRIVATE -gpu=unified
)
# This should be equivalent to if (NOT OpenACC_FC_FOUND)
if (NOT TARGET OpenACC::OpenACC_Fortran)
message(FATAL_ERROR "OpenACC + Fortran is unsupported.")
endif()

if (CMAKE_BUILD_TYPE STREQUAL "Debug")
target_compile_options(${ARGS_TARGET}
PRIVATE -gpu=autocompare,debug
target_link_libraries(${a_target} PRIVATE OpenACC::OpenACC_Fortran)
target_compile_definitions(${a_target} PRIVATE MFC_OpenACC)

if (CMAKE_Fortran_COMPILER_ID STREQUAL "GNU")
# FIXME: This should work with other cards than gfx90a ones.
target_compile_options(${a_target} PRIVATE
"-foffload=amdgcn-amdhsa='-march=gfx90a'"
"-foffload-options=-lgfortran\ -lm"
"-fno-exceptions")
elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI")
find_package(cuTENSOR)
if (NOT cuTENSOR_FOUND)
message(WARNING
"Failed to locate the NVIDIA cuTENSOR library. MFC will be "
"built without support for it, disallowing the use of "
"cu_tensor=T. This can result in degraded performance.")
else()
target_link_libraries (${a_target} PRIVATE cuTENSOR::cuTENSOR)
target_compile_definitions(${a_target} PRIVATE MFC_cuTENSOR)
endif()

foreach (cc ${MFC_CUDA_CC})
target_compile_options(${a_target}
PRIVATE -gpu=cc${cc}
)
endforeach()

target_compile_options(${a_target}
PRIVATE -gpu=keep,ptxinfo,lineinfo
)

# GH-200 Unified Memory Support
if (MFC_Unified)
target_compile_options(${ARGS_TARGET}
PRIVATE -gpu=unified
)
# "This option must appear in both the compile and link lines" -- NVHPC Docs
target_link_options(${ARGS_TARGET}
PRIVATE -gpu=unified
)
endif()

if (CMAKE_BUILD_TYPE STREQUAL "Debug")
target_compile_options(${a_target}
PRIVATE -gpu=autocompare,debug
)
endif()
elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
find_package(hipfort COMPONENTS hip CONFIG REQUIRED)
target_link_libraries(${a_target} PRIVATE hipfort::hip hipfort::hipfort-amdgcn)
endif()
elseif(CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
find_package(hipfort COMPONENTS hip CONFIG REQUIRED)
target_link_libraries(${ARGS_TARGET} PRIVATE hipfort::hip hipfort::hipfort-amdgcn)
elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
target_compile_options(${a_target} PRIVATE "SHELL:-h noacc" "SHELL:-x acc")
endif()
elseif (CMAKE_Fortran_COMPILER_ID STREQUAL "Cray")
target_compile_options(${ARGS_TARGET} PRIVATE "SHELL:-h noacc" "SHELL:-x acc")
endif()

if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI")
find_package(CUDAToolkit REQUIRED)
target_link_libraries(${ARGS_TARGET} PRIVATE CUDA::nvToolsExt)
endif()
if (CMAKE_Fortran_COMPILER_ID STREQUAL "NVHPC" OR CMAKE_Fortran_COMPILER_ID STREQUAL "PGI")
find_package(CUDAToolkit REQUIRED)
target_link_libraries(${a_target} PRIVATE CUDA::nvToolsExt)
endif()
endforeach()

install(TARGETS ${ARGS_TARGET} RUNTIME DESTINATION bin)
endfunction()
Expand Down
57 changes: 0 additions & 57 deletions src/common/include/inline_conversions.fpp

This file was deleted.

1 change: 1 addition & 0 deletions src/common/m_helper_basic.f90
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ module m_helper_basic
!! @param tol_input Relative error (default = 1d-6).
!! @return Result of the comparison.
logical function f_approx_equal(a, b, tol_input) result(res)
!$acc routine seq
! Reference: https://floating-point-gui.de/errors/comparison/

real(kind(0d0)), intent(in) :: a, b
Expand Down
Loading

0 comments on commit 3aac2c3

Please sign in to comment.