Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tree: cleanup "gdr_support" variable #711

Draft
wants to merge 1 commit into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 1 addition & 5 deletions include/nccl_ofi.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,15 +81,11 @@ extern "C" {
/* Initial number of entries in the MR cache of a device */
#define NCCL_OFI_MR_CACHE_INIT_SIZE 128

/* Indicates if GPUDirect is supported by libfabric provider */
enum gdr_support_level_t {GDR_UNKNOWN, GDR_SUPPORTED, GDR_UNSUPPORTED};
extern enum gdr_support_level_t support_gdr;


/* Indicates if the cudaDeviceFlushGPUDirectRDMAWrites function should be used
* to flush data to the GPU. Note, CUDA flush support is not supported on all
* platforms and should be disabled by default */
extern bool cuda_flush;
extern bool gdr_flush_disabled;

/* number of duplicate providers to create for each discovered
* provider, including renaming to cause NCCL to create additional
Expand Down
17 changes: 17 additions & 0 deletions include/nccl_ofi_cuda.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,23 @@ bool nccl_net_ofi_cuda_have_dma_buf_attr(void);
*/
bool nccl_net_ofi_cuda_have_gdr_support_attr(void);

/*
* @brief query CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS

* @return true if attr is fetched successfully and true.
* false otherwise
*/
bool nccl_net_ofi_cuda_have_gdr_flush_support_attr(void);

/*
* @brief test whether gdrcopy can possibly be supported, depending on the
* linked libfabric version and the properties exposed by cuda.
*
* @return true if attr is fetched successfully and true.
* false otherwise
*/
bool nccl_net_ofi_cuda_gdr_viable(void);

#ifdef __cplusplus
} // End extern "C"
#endif
Expand Down
3 changes: 3 additions & 0 deletions include/nccl_ofi_param.h
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,9 @@ OFI_NCCL_PARAM_INT(disable_gdr_required_check, "DISABLE_GDR_REQUIRED_CHECK", 0);
*/
OFI_NCCL_PARAM_INT(disable_dmabuf, "DISABLE_DMABUF", 0);

/* Largely exists for parity with DISABLE_DMABUF, but usage of this is discouraged. */
OFI_NCCL_PARAM_INT(disable_gdrcopy, "DISABLE_GDRCOPY", 0);

/*
* Messages sized larger than this threshold will be striped across multiple rails
*/
Expand Down
10 changes: 5 additions & 5 deletions m4/check_pkg_cuda.m4
Original file line number Diff line number Diff line change
Expand Up @@ -53,15 +53,15 @@ AC_DEFUN([CHECK_PKG_CUDA], [
[check_pkg_found=no],
[-ldl -lrt])])

check_cuda_gdr_flush_define=0
check_cuda_gdr_define=0
AS_IF([test "${check_pkg_found}" = "yes"],
[
AC_MSG_CHECKING([if CUDA 11.3+ is available for GDR Write Flush support])
AC_MSG_CHECKING([if CUDA 11.3+ is available for GDR + GDR Write Flush support])
AC_COMPILE_IFELSE([AC_LANG_PROGRAM([
#include <cuda.h>
_Static_assert(CUDA_VERSION >= 11030, "cudart>=11030 required for cuFlushGPUDirectRDMAWrites");
])],[ check_cuda_gdr_flush_define=1 chk_result=yes ],
[ check_cuda_gdr_flush_define=0 chk_result=no ])
])],[ check_cuda_gdr_define=1 chk_result=yes ],
[ check_cuda_gdr_define=0 chk_result=no ])
AC_MSG_RESULT(${chk_result})
])

Expand All @@ -85,7 +85,7 @@ AC_DEFUN([CHECK_PKG_CUDA], [

AC_DEFINE_UNQUOTED([HAVE_CUDA], [${check_pkg_define}], [Defined to 1 if CUDA is available])
AC_DEFINE_UNQUOTED([HAVE_CUDA_DMABUF_SUPPORT], [${check_cuda_dmabuf_define}], [Defined to 1 if CUDA DMA-BUF support is available])
AC_DEFINE_UNQUOTED([HAVE_CUDA_GDRFLUSH_SUPPORT], [${check_cuda_gdr_flush_define}], [Defined to 1 if CUDA cuFlushGPUDirectRDMAWrites support is available])
AC_DEFINE_UNQUOTED([HAVE_CUDA_GDR_SUPPORT], [${check_cuda_gdr_define}], [Defined to 1 if CUDA cuFlushGPUDirectRDMAWrites support is available])
AM_CONDITIONAL([HAVE_CUDA], [test "${check_pkg_found}" = "yes"])

AC_SUBST([CUDA_LDFLAGS])
Expand Down
66 changes: 55 additions & 11 deletions src/nccl_ofi_cuda.c
Original file line number Diff line number Diff line change
Expand Up @@ -69,19 +69,23 @@ int nccl_net_ofi_cuda_init(void)
RESOLVE_CUDA_FUNCTION(cuCtxGetDevice);
RESOLVE_CUDA_FUNCTION(cuDeviceGetAttribute);

if (HAVE_CUDA_GDRFLUSH_SUPPORT && nccl_net_ofi_cuda_have_gdr_support_attr() && ofi_nccl_cuda_flush_enable()) {
NCCL_OFI_WARN("CUDA flush enabled");
cuda_flush = true;
} else {
cuda_flush = ofi_nccl_cuda_flush_enable();
gdr_flush_disabled = ofi_nccl_gdr_flush_disable();

#if HAVE_CUDA_GDR_SUPPORT
if (!(nccl_net_ofi_cuda_gdr_viable() &&
nccl_net_ofi_cuda_have_gdr_flush_support_attr())) {
gdr_flush_disabled = true;
cuda_flush = false;
}
#endif

return 0;
}

int nccl_net_ofi_cuda_flush_gpudirect_rdma_writes(void)
{
#if HAVE_CUDA_GDRFLUSH_SUPPORT
#if HAVE_CUDA_GDR_SUPPORT
static_assert(CUDA_VERSION >= 11030, "Requires cudart>=11.3");
cudaError_t ret = cudaDeviceFlushGPUDirectRDMAWrites(cudaFlushGPUDirectRDMAWritesTargetCurrentDevice,
cudaFlushGPUDirectRDMAWritesToOwner);
Expand Down Expand Up @@ -129,9 +133,9 @@ int nccl_net_ofi_get_cuda_device_for_addr(void *data, int *dev_id)
};
}

bool nccl_net_ofi_cuda_have_gdr_support_attr(void)
bool nccl_net_ofi_cuda_have_gdr_flush_support_attr(void)
{
#if HAVE_CUDA_GDRFLUSH_SUPPORT
#if HAVE_CUDA_GDR_SUPPORT
if (pfn_cuCtxGetDevice == NULL || pfn_cuDeviceGetAttribute == NULL) {
return false;
}
Expand All @@ -143,13 +147,29 @@ bool nccl_net_ofi_cuda_have_gdr_support_attr(void)
}

int supported;
result = pfn_cuDeviceGetAttribute(&supported, CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED, dev);
if (result != CUDA_SUCCESS || !((bool)supported)) {
result = pfn_cuDeviceGetAttribute(&supported, CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS, dev);
return result == CUDA_SUCCESS && ((supported & CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_HOST) != 0);
#else
return false;
#endif
}

bool nccl_net_ofi_cuda_have_gdr_support_attr(void)
{
#if HAVE_CUDA_GDR_SUPPORT
if (pfn_cuCtxGetDevice == NULL || pfn_cuDeviceGetAttribute == NULL) {
return false;
}

result = pfn_cuDeviceGetAttribute(&supported, CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_FLUSH_WRITES_OPTIONS, dev);
return result == CUDA_SUCCESS && ((supported & CU_FLUSH_GPU_DIRECT_RDMA_WRITES_OPTION_HOST) != 0);
CUdevice dev;
CUresult result = pfn_cuCtxGetDevice(&dev);
if (result != CUDA_SUCCESS) {
return false;
}

int supported;
result = pfn_cuDeviceGetAttribute(&supported, CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED, dev);
return result == CUDA_SUCCESS && (bool)supported;
#else
return false;
#endif
Expand Down Expand Up @@ -179,3 +199,27 @@ bool nccl_net_ofi_cuda_have_dma_buf_attr(void)
return false;
#endif
}

bool nccl_net_ofi_cuda_gdr_viable(void)
{
/* Disable GDR if building against too-old libfabric. */
if (FI_VERSION_LT(FI_VERSION(FI_MAJOR_VERSION, FI_MINOR_VERSION), FI_VERSION(1, 18))) {
NCCL_OFI_TRACE(NCCL_INIT | NCCL_NET, "Will not use GDR, requires Libfabric 1.18 or greater.");
return false;
}

/* Disable GDR if explicitly disabled by user. */
if (ofi_nccl_disable_gdrcopy()) {
NCCL_OFI_TRACE(NCCL_INIT | NCCL_NET, "Will not attempt to use GDRCopy, explicitly disabled by user.");
return false;
}

/* Disable GDR if CUDA does not report GDR support in device attributes. */
if (!nccl_net_ofi_cuda_have_gdr_support_attr()) {
NCCL_OFI_TRACE(NCCL_INIT | NCCL_NET,
"Will not attempt to use GDRCopy, CU_DEVICE_ATTRIBUTE_GPU_DIRECT_RDMA_SUPPORTED was false.");
return false;
}

return true;
}
94 changes: 25 additions & 69 deletions src/nccl_ofi_net.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,11 @@
#include "nccl_ofi_ofiutils.h"
#include "nccl_ofi_system.h"

/* Indicates if GPUDirect is supported by libfabric provider */
enum gdr_support_level_t support_gdr = GDR_UNKNOWN;

/* Indicates if the cudaDeviceFlushGPUDirectRDMAWrites function should be used
* to flush data to the GPU. Note, CUDA flush support is not supported on all
* platforms and should be disabled by default */
bool cuda_flush = false;
bool gdr_flush_disabled = true;

/* number of duplicate providers to create for each discovered
* provider, including renaming to cause NCCL to create additional
Expand Down Expand Up @@ -136,9 +134,6 @@ int nccl_net_ofi_create_plugin(nccl_net_ofi_plugin_t **plugin_p)
int ret = 0;
const char *provider_filter = NULL;
nccl_net_ofi_plugin_t *plugin;
nccl_net_ofi_ep_t *base_ep = NULL;
nccl_net_ofi_device_t *device = NULL;
nccl_ofi_properties_t properties;

NCCL_OFI_INFO(NCCL_INIT | NCCL_NET, "Initializing " PACKAGE_STRING);

Expand All @@ -162,6 +157,11 @@ int nccl_net_ofi_create_plugin(nccl_net_ofi_plugin_t **plugin_p)
*/
mr_cache_alignment = NCCL_OFI_MIN(system_page_size, NCCL_OFI_CACHE_PAGE_SIZE);

/* configuration parameters */
nic_dup_conns = ofi_nccl_nic_dup_conns();
net_latency = (float)ofi_nccl_net_latency();
cq_read_count = ofi_nccl_cq_read_count();

#if HAVE_CUDA
ret = nccl_net_ofi_cuda_init();
if (ret != 0) {
Expand All @@ -170,17 +170,22 @@ int nccl_net_ofi_create_plugin(nccl_net_ofi_plugin_t **plugin_p)
}
#endif

/* configuration parameters */
nic_dup_conns = ofi_nccl_nic_dup_conns();
net_latency = (float)ofi_nccl_net_latency();
cq_read_count = ofi_nccl_cq_read_count();

if (platform_init) {
ret = platform_init(&provider_filter);
if (ret != 0)
goto exit;
}

#if HAVE_CUDA
if (nic_dup_conns > 0 && nccl_net_ofi_cuda_have_gdr_support_attr()) {
NCCL_OFI_WARN(
"NCCL_OFI_NIC_DUP_CONNS set on platform that supports GPUDirect RDMA. This configuration is not "
"supported.");
ret = -ENOTSUP;
goto exit;
}
#endif

/* This is ugly, but here's the basic protocol selection
* logic:
* 1. if the user set NCCL_OFI_PROTOCOL, use that.
Expand Down Expand Up @@ -285,55 +290,6 @@ int nccl_net_ofi_create_plugin(nccl_net_ofi_plugin_t **plugin_p)
goto exit;
}

/* In order to set endpoint options and potentially NCCL configuration
* options (such as NCCL_PROTO) during the plugin initialization
* process, we need to create an endpoint and call the platform hook
* "platform_config_endpoint" using "get_ep". This code makes the
* assumption that the thread calling "nccl_net_ofi_init" will make
* communication calls. As well, since without this code the endpoint
* would be created the first time "get_ep" in called during a listen or
* connect call, creating the endpoint earlier would not be a waste of
* resources. This initialization happens once per process, and thus it
* does not matter which device is used to create the endpoint.
*/
device = plugin->get_device(plugin, 0);

ret = device->get_ep(device, &base_ep);
if (ret != 0) {
goto exit;
}
ret = device->get_properties(device, &properties);
if (ret != 0) {
goto exit;
}
NCCL_OFI_INFO(NCCL_NET | NCCL_INIT, "Support for global registrations: %s",
(properties.regIsGlobal == 0) ? "false" : "true");
NCCL_OFI_INFO(NCCL_NET | NCCL_INIT, "Support for DMA-BUF registrations: %s",
(properties.dmabuf_support == 0) ? "false" : "true");
/* Cause release to not actually free the resources, to speed
* up initialization, since the very same resources will be
* recreated by NCCL soon after initialization to do real
* communication.
*/
base_ep->ref_cnt++;
ret = base_ep->release_ep(base_ep);
base_ep->ref_cnt--;
if (ret != 0) {
goto exit;
}

assert(support_gdr != GDR_UNKNOWN);

/* we don't actually know if GDR is supported until we've
* created the first endpoint, so this check needs to be way
* down here
*/
if (nic_dup_conns > 0 && support_gdr != GDR_UNSUPPORTED) {
NCCL_OFI_WARN("NCCL_OFI_NIC_DUP_CONNS set on platform that supports GPUDirect RDMA. This configuration is not supported.");
ret = -ENOTSUP;
goto exit;
}

*plugin_p = plugin;

exit:
Expand Down Expand Up @@ -416,12 +372,7 @@ static int set_nic_props_default(int dev_id, struct fi_info *nic_prov,
*/
props->max_group_receives = NCCL_OFI_MAX_RECVS;

if (support_gdr == GDR_SUPPORTED) {
props->hmem_support = true;
} else {
props->hmem_support = false;
}

props->hmem_support = false;
props->dmabuf_support = false;

/* Should be successful for ptrSupport invocation */
Expand Down Expand Up @@ -580,14 +531,19 @@ int nccl_net_ofi_info_properties(nccl_net_ofi_plugin_t *plugin, struct fi_info *

props->max_mr_key_size = nic_prov->domain_attr->mr_key_size;

props->hmem_support = ((nic_prov->caps & FI_HMEM) != 0) &&
FI_VERSION_GE(nic_prov->fabric_attr->api_version, FI_VERSION(1, 18)) &&
(HAVE_NEURON || nccl_net_ofi_cuda_have_gdr_support_attr());

props->dmabuf_support = ((nic_prov->caps & FI_HMEM) != 0) &&
FI_VERSION_GE(nic_prov->fabric_attr->api_version, FI_VERSION(1, 20)) &&
nccl_ofi_dmabuf_viable()
;
if (props->dmabuf_support) {
NCCL_OFI_TRACE(NCCL_INIT | NCCL_NET, "DMA-BUF support is advertised in properties.");
}

NCCL_OFI_TRACE(NCCL_INIT | NCCL_NET,
"NCCL properties: dmabuf=%s hmem=%s",
props->dmabuf_support ? "yes" : "no",
props->hmem_support ? "yes" : "no");

goto exit;
error:
Expand Down
Loading
Loading