Skip to content

Commit

Permalink
Add ROCm support based on configure option.
Browse files Browse the repository at this point in the history
ROCm provides an interface similar to CUDA, to work with AMD GPUs.
Provide a compile time option to build with ROCm instead of CUDA.

1. Add --with-rocm= flag to ./configure.
2. Make all CUDA calls "gpu" calls, which are independent of the
   underlying framework.
3. Switch between _rocm and _cuda files at compile time to make the
   appropriate calls.
4. When building for RCCL (AMD's NCCL), generate a rccl-net.so-named
   plugin for binary compatibility.

Tested on:

1. HPE Cray EX with EX235A BardPeak GPUs + 200Gb Slingshot adapters.
2. HPE Cray EX with NVIDIA A100 SXM4 80GB GPUs + 200 Gb Slingshot
    adapters.

Signed-off-by: Ryan Hankins <ryan.hankins@hpe.com>
  • Loading branch information
ryanhankins committed Aug 15, 2024
1 parent 8e836e5 commit b1a22d5
Show file tree
Hide file tree
Showing 16 changed files with 240 additions and 40 deletions.
26 changes: 23 additions & 3 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,30 @@ CHECK_PKG_NEURON([AS_IF([test -n "${want_cuda}"],
[AC_MSG_ERROR([Cannot enable both CUDA and neuron.])],
[want_cuda=no])
have_device_interface=neuron])
CHECK_PKG_CUDA([have_device_interface=cuda])

# Select CUDA if Neuron wasn't specified and --with-rocm was not used.
CHECK_PKG_CUDA(AS_IF([test "${have_device_interface}" = "no"],
AS_IF([test -z "$with_rocm"], [have_device_interface=cuda])))
# If neither CUDA nor Neuron is being used, select ROCm
CHECK_PKG_ROCM(AS_IF([test "${have_device_interface}" = "no"], [have_device_interface=rocm]))
AS_IF([test "${have_device_interface}" = "no"],
[AC_MSG_ERROR([NCCL OFI Plugin requires either CUDA or Neuron runtime.])])
[AC_MSG_ERROR([NCCL OFI Plugin requires either CUDA, ROCm or Neuron runtime.])])

do_cuda=0
do_rocm=0
AS_IF([test -n "$with_rocm"],
[AS_IF([test "$have_device_interface" = "rocm"],
[enable_tests="no"
do_rocm=1
])],
[AS_IF([test "$have_device_interface" = "cuda"], [do_cuda=1])])

AC_DEFINE_UNQUOTED([HAVE_CUDA], [${do_cuda}], [Defined to 1 if CUDA is available])
AM_CONDITIONAL([HAVE_CUDA], [test ${do_cuda} = 1])

AC_DEFINE_UNQUOTED([HAVE_ROCM], [${do_rocm}], [Defined to 1 if ROCm is available])
AM_CONDITIONAL([HAVE_ROCM], [test ${do_rocm} = 1])
AS_IF([test ${do_rocm} = 1],
AC_DEFINE_UNQUOTED( [__HIP_PLATFORM_AMD__], [ 1 ], [Select AMD/ROCm HIP APIs] ))

CHECK_PKG_HWLOC([],
[AC_MSG_ERROR([Could not find the hwloc library. Use --with-hwloc to provide the path to non-standard hwloc installation.])])
Expand Down
2 changes: 1 addition & 1 deletion include/nccl-headers/error.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#ifndef NCCL_HEADERS_ERROR_H
#define NCCL_HEADERS_ERROR_H

#if HAVE_CUDA
#if HAVE_CUDA || HAVE_ROCM
#include "nccl-headers/nvidia/err.h"
#elif HAVE_NEURON
#include "nccl-headers/neuron/error.h"
Expand Down
2 changes: 1 addition & 1 deletion include/nccl-headers/net.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#ifndef NCCL_HEADERS_NET_H
#define NCCL_HEADERS_NET_H

#if HAVE_CUDA
#if HAVE_CUDA || HAVE_ROCM
#include "nccl-headers/nvidia/net.h"
#elif HAVE_NEURON
#include "nccl-headers/neuron/net.h"
Expand Down
2 changes: 2 additions & 0 deletions include/nccl_ofi_cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,10 @@ extern int nccl_net_ofi_gpuDeviceGetCount(int* count);

#if CUDA_VERSION >= 11030
extern int nccl_net_ofi_gpuFlushGPUDirectRDMAWrites();
#define HAVE_FLUSH_GPU_DIRECT_RDMA_WRITE 1
#else
extern void *nccl_net_ofi_gpuFlushGPUDirectRDMAWrites;
#define HAVE_FLUSH_GPU_DIRECT_RDMA_WRITE 0
#endif

#ifdef _cplusplus
Expand Down
49 changes: 49 additions & 0 deletions include/nccl_ofi_rocm.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
/*
* Copyright (c) 2024 Hewlett Packard Enterprise Development LP
* Copyright (c) 2018-2023 Amazon.com, Inc. or its affiliates. All rights reserved.
* Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
*/

#ifndef NCCL_OFI_CUDA_H_
#define NCCL_OFI_CUDA_H_

#ifdef _cplusplus
extern "C" {
#endif

#include <hip/hip_runtime_api.h>

/*
* Error checking is currently just success or failure.
*/
enum {
GPU_SUCCESS = 0,
GPU_ERROR = 999 /* Match hipErrorUnknown */
};

int nccl_net_ofi_gpu_init(void);

/*
* @brief Gets the GPU device associated with the buffer
*
* @param data
* Pointer to GPU buffer.
*
* @return Valid GPU device ID on success
* -1 on error
* @return 0 on success
* non-zero on error
*/
int nccl_net_ofi_get_cuda_device(void *data, int *dev_id);
int nccl_net_ofi_gpuDriverGetVersion(int *driverVersion);
int nccl_net_ofi_gpuCtxGetDevice(int *device);
int nccl_net_ofi_gpuDeviceGetCount(int* count);

extern void *nccl_net_ofi_gpuFlushGPUDirectRDMAWrites;
#define HAVE_FLUSH_GPU_DIRECT_RDMA_WRITE 0

#ifdef _cplusplus
} // End extern "C"
#endif

#endif // End NCCL_OFI_H_
3 changes: 0 additions & 3 deletions m4/check_pkg_cuda.m4
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,6 @@ AC_DEFUN([CHECK_PKG_CUDA], [
CPPFLAGS="${check_pkg_CPPFLAGS_save}"
$2])
AC_DEFINE_UNQUOTED([HAVE_CUDA], [${check_pkg_define}], [Defined to 1 if CUDA is available])
AM_CONDITIONAL([HAVE_CUDA], [test "${check_pkg_found}" = "yes"])
AC_SUBST([CUDA_LDFLAGS])
AC_SUBST([CUDA_LIBS])
Expand Down
52 changes: 52 additions & 0 deletions m4/check_pkg_rocm.m4
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# -*- autoconf -*-
#
# Copyright (c) 2024 Hewlett Packard Enterprise Development LP
# Copyright (c) 2023 Amazon.com, Inc. or its affiliates. All rights reserved.
#
# See LICENSE.txt for license information
#

AC_DEFUN([CHECK_PKG_ROCM], [
check_pkg_found="yes"
check_pkg_CPPFLAGS_save="${CPPFLAGS}"
check_pkg_LDFLAGS_save="${LDFLAGS}"
check_pkg_LIBS_save="${LIBS}"
AC_ARG_WITH([rocm],
[AS_HELP_STRING([--with-rocm=PATH], [Path to non-standard ROCm installation])])
AS_IF([test -z "${with-rocm}" -o "{with_rocm}" = "yes"],
[],
[test "${with_rocm}" = "no"],
[check_pkg_found=no],
[AS_IF([test -d ${with_rocm}/lib64], [check_pkg_libdir="lib64"], [check_pkg_libdir="lib"])
CPPFLAGS="-I${with_rocm}/include ${CPPFLAGS}"
LDFLAGS="-L${with_rocm}/${check_pkg_libdir} ${LDFLAGS}"])
AS_IF([test "${check_pkg_found}" = "yes"],
[AC_CHECK_LIB([amdhip64], [hipMemAllocHost], [], [check_pkg_found=no])])
AS_IF([test "${check_pkg_found}" = "yes"],
[AC_CHECK_HEADERS([hip/hip_runtime_api.h], [], [check_pkg_found=no], [#define __HIP_PLATFORM_AMD__])])
AS_IF([test "${check_pkg_found}" = "yes"],
[check_pkg_define="yes"],
[check_pkg_define="no"
CPPFLAGS="${check_pkg_CPPFLAGS_save}"
LDFLAGS="${check_pkg_LDFLAGS_save}"
LIBS="${check_pkg_LIBS_save}"
])
AS_IF([test -n "${with_rocm}"],
[AS_IF([test "${check_pkg_define}" = "yes"],
[$1], [$2] )
], [$2]
)
AS_UNSET([check_pkg_found])
AS_UNSET([check_pkg_define])
AS_UNSET([check_pkg_CPPFLAGS_save])
AS_UNSET([check_pkg_LDFLAGS_save])
AS_UNSET([check_pkg_LIBS_save])
])
54 changes: 30 additions & 24 deletions src/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -26,52 +26,58 @@ sources = \
nccl_ofi_ep_addr_list.c \
tracepoint.c

tuner_sources = \
tuner/nccl_ofi_regions.c \
tuner/nccl_ofi_tuner.c

if WANT_PLATFORM_AWS
sources += platform-aws.c
endif

if ENABLE_NEURON
sources += nccl_ofi_interface_neuron.c
else
sources += nccl_ofi_cuda.c \
nccl_ofi_interface_nvidia.c
endif

# Build an internal-only library that can be used by unit tests as
# well as the actual nccl_net.so / nccom_net.so libraries. This saves
# us writing dlopen() handlers for simple unit tests.
noinst_LTLIBRARIES = libinternal_net_plugin.la
libinternal_net_plugin_la_SOURCES = $(sources)
libinternal_net_plugin_la_LDFLAGS = -avoid-version

if ENABLE_NEURON
lib_LTLIBRARIES = libnccom-net.la
libnccom_net_la_SOURCES =
libnccom_net_la_LIBADD = libinternal_net_plugin.la
libnccom_net_la_LDFLAGS = -module -avoid-version
endif

if HAVE_CUDA
sources += nccl_ofi_cuda.c nccl_ofi_interface_nvidia.c
if WANT_PLATFORM_AWS
# NCCL tuner plugin
lib_LTLIBRARIES = libnccl-net.la libnccl-ofi-tuner.la
libnccl_ofi_tuner_la_SOURCES = $(tuner_sources)
libnccl_ofi_tuner_la_LDFLAGS = -module -avoid-version
else
lib_LTLIBRARIES = libnccl-net.la
endif

libnccl_net_la_SOURCES =
libnccl_net_la_LIBADD = libinternal_net_plugin.la
libnccl_net_la_LDFLAGS = -module -avoid-version
endif

if HAVE_ROCM
sources += nccl_ofi_rocm.c nccl_ofi_interface_nvidia.c

lib_LTLIBRARIES = librccl-net.la
librccl_net_la_SOURCES =
librccl_net_la_LIBADD = libinternal_net_plugin.la
librccl_net_la_LDFLAGS = -module -avoid-version
endif

# Build an internal-only library that can be used by unit tests as
# well as the actual nccl_net.so / nccom_net.so libraries. This saves
# us writing dlopen() handlers for simple unit tests.
noinst_LTLIBRARIES = libinternal_net_plugin.la
libinternal_net_plugin_la_SOURCES = $(sources)
libinternal_net_plugin_la_LDFLAGS = -avoid-version

#
# Tuner
#
noinst_LTLIBRARIES += libinternal_tuner_plugin.la
tuner_sources = \
tuner/nccl_ofi_regions.c \
tuner/nccl_ofi_tuner.c
libinternal_tuner_plugin_la_SOURCES = $(tuner_sources)
libinternal_tuner_plugin_la_LDFLAGS = -avoid-version

if HAVE_CUDA
if WANT_PLATFORM_AWS
# NCCL tuner plugin
lib_LTLIBRARIES += libnccl-ofi-tuner.la
libnccl_ofi_tuner_la_SOURCES = $(tuner_sources)
libnccl_ofi_tuner_la_LDFLAGS = -module -avoid-version
endif
endif
2 changes: 1 addition & 1 deletion src/nccl_ofi_api.c
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ ncclResult_t nccl_net_ofi_regMr(void *comm, void *data, size_t size, int type,
/* Validate type of buffer */
bool valid_buffer_type = false;
if (type == NCCL_PTR_HOST) valid_buffer_type = true;
#if HAVE_CUDA
#if HAVE_CUDA || HAVE_ROCM
if (type == NCCL_PTR_CUDA) valid_buffer_type = true;
#endif
#if HAVE_NEURON
Expand Down
6 changes: 4 additions & 2 deletions src/nccl_ofi_net.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#include "nccl_ofi_tracepoint.h"
#if HAVE_CUDA
#include "nccl_ofi_cuda.h"
#elif HAVE_ROCM
#include "nccl_ofi_rocm.h"
#endif
#include "nccl_ofi_sendrecv.h"
#include "nccl_ofi_rdma.h"
Expand Down Expand Up @@ -151,7 +153,7 @@ int nccl_net_ofi_create_plugin(nccl_net_ofi_plugin_t **plugin_p)
assert(NCCL_OFI_IS_POWER_OF_TWO(system_page_size));
assert(system_page_size > 0);

#if HAVE_CUDA
#if HAVE_CUDA || HAVE_ROCM
ret = nccl_net_ofi_gpu_init();
if (ret != 0) {
NCCL_OFI_WARN("CUDA initialization failed.");
Expand All @@ -167,7 +169,7 @@ int nccl_net_ofi_create_plugin(nccl_net_ofi_plugin_t **plugin_p)

NCCL_OFI_INFO(NCCL_INIT | NCCL_NET, "Using CUDA driver version %d", cuda_version);
if (ofi_nccl_cuda_flush_enable()) {
if (nccl_net_ofi_gpuFlushGPUDirectRDMAWrites == NULL) {
if (HAVE_FLUSH_GPU_DIRECT_RDMA_WRITE) {
NCCL_OFI_WARN("CUDA flush requested, but cuFlushGPUDirectRDMAWrites not found.");
cuda_flush = false;
} else {
Expand Down
4 changes: 3 additions & 1 deletion src/nccl_ofi_ofiutils.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
#include "nccl_ofi_tracepoint.h"
#if HAVE_CUDA
#include "nccl_ofi_cuda.h"
#elif HAVE_ROCM
#include "nccl_ofi_rocm.h"
#endif
#include "nccl_ofi_math.h"
#include "nccl_ofi_ofiutils.h"
Expand Down Expand Up @@ -342,7 +344,7 @@ int nccl_ofi_ofiutils_init_connection(int api_version, struct fi_info *info, str
* using the Libfabric 1.18 API with HMEM support.
*/
if (api_version == FI_VERSION(1,18) && support_gdr != GDR_UNSUPPORTED) {
#if (HAVE_CUDA && HAVE_DECL_FI_OPT_CUDA_API_PERMITTED)
#if ((HAVE_CUDA || HAVE_ROCM) && HAVE_DECL_FI_OPT_CUDA_API_PERMITTED)
bool optval = false;
ret = fi_setopt(&(*ep)->fid, FI_OPT_ENDPOINT,
FI_OPT_CUDA_API_PERMITTED, &optval,
Expand Down
4 changes: 3 additions & 1 deletion src/nccl_ofi_rdma.c
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
#include "nccl_ofi.h"
#if HAVE_CUDA
#include "nccl_ofi_cuda.h"
#elif HAVE_ROCM
#include "nccl_ofi_rocm.h"
#endif
#include "nccl_ofi_ep_addr_list.h"
#include "nccl_ofi_param.h"
Expand Down Expand Up @@ -385,7 +387,7 @@ static int set_mr_req_attr(nccl_ofi_idpool_t *key_pool, int dev_id,
mr_attr->access |= FI_READ;
mr_attr->iface = FI_HMEM_SYSTEM;
break;
#if HAVE_CUDA
#if HAVE_CUDA || HAVE_ROCM
case NCCL_PTR_CUDA:
mr_attr->access |= FI_REMOTE_READ;
mr_attr->iface = FI_HMEM_CUDA;
Expand Down
63 changes: 63 additions & 0 deletions src/nccl_ofi_rocm.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*
* Copyright (c) 2024 Hewlett Packard Enterprise Development LP
* Copyright (c) 2018-2023 Amazon.com, Inc. or its affiliates. All rights reserved.
* Copyright (c) 2015-2018, NVIDIA CORPORATION. All rights reserved.
*/

#include "config.h"

#include <dlfcn.h>

#include "nccl_ofi.h"
#include "nccl_ofi_rocm.h"

int nccl_net_ofi_gpuDriverGetVersion(int *driverVersion) {
return hipDriverGetVersion(driverVersion) == hipSuccess ? GPU_SUCCESS : GPU_ERROR;
}

int nccl_net_ofi_gpuCtxGetDevice(int *device) {
return hipGetDevice(device) == hipSuccess ? GPU_SUCCESS : GPU_ERROR;
}

int nccl_net_ofi_gpuDeviceGetCount(int *count) {
return hipGetDeviceCount(count) == hipSuccess ? GPU_SUCCESS : GPU_ERROR;
}

void *nccl_net_ofi_gpuFlushGPUDirectRDMAWrites = NULL;

int
nccl_net_ofi_gpu_init(void)
{
return 0;
}

int nccl_net_ofi_get_cuda_device(void *data, int *dev_id)
{
int ret = 0;
int cuda_device = -1;
unsigned int mem_type;
unsigned int device_ordinal;
hipError_t cuda_ret_mem = hipPointerGetAttribute(&device_ordinal,
HIP_POINTER_ATTRIBUTE_DEVICE_ORDINAL,
(hipDeviceptr_t) data);
hipError_t cuda_ret_dev = hipPointerGetAttribute(&mem_type,
HIP_POINTER_ATTRIBUTE_MEMORY_TYPE,
(hipDeviceptr_t) data);

if (cuda_ret_mem != hipSuccess || cuda_ret_dev != hipSuccess) {
ret = -ENOTSUP;
NCCL_OFI_WARN("Invalid buffer pointer provided");
goto exit;
}

if (mem_type == hipMemoryTypeDevice) {
cuda_device = device_ordinal;
} else {
ret = -EINVAL;
NCCL_OFI_WARN("Invalid type of buffer provided. Only device memory is expected for NCCL_PTR_CUDA type");
}

exit:
*dev_id = cuda_device;
return ret;
}
Loading

0 comments on commit b1a22d5

Please sign in to comment.