Skip to content

Commit

Permalink
rdma: split parameters for posted ctrl and eager buffers
Browse files Browse the repository at this point in the history
Make posted buffer counts for control and eager bounce recv buffers
separately configurable, and set a higher count for ctrl buffers.

Signed-off-by: Eric Raut <eraut@amazon.com>
  • Loading branch information
rauteric committed Oct 4, 2024
1 parent bf176e6 commit 5f22617
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 9 deletions.
17 changes: 15 additions & 2 deletions include/nccl_ofi_param.h
Original file line number Diff line number Diff line change
Expand Up @@ -239,14 +239,27 @@ OFI_NCCL_PARAM_INT(disable_dmabuf, "DISABLE_DMABUF", 0);
OFI_NCCL_PARAM_UINT(round_robin_threshold, "ROUND_ROBIN_THRESHOLD", (256 * 1024));

/*
* Minimum bounce buffers posted per rail. The plugin will attempt to post
* Minimum ctrl recv buffers posted per rail. The plugin will attempt to post
* more buffers if we dip below this threshold, allocating new buffers if needed.
*/
OFI_NCCL_PARAM_INT(rdma_min_posted_ctrl_recv_buffers, "RDMA_MIN_POSTED_CTRL_RECV_BUFFERS", 64);

/*
* Maximum ctrl recv buffers posted per rail. The plugin will not attempt to
* post more buffers if we reach this threshold, returning available buffers to
* the free list if needed
*/
OFI_NCCL_PARAM_INT(rdma_max_posted_ctrl_recv_buffers, "RDMA_MAX_POSTED_CTRL_RECV_BUFFERS", 128);

/*
* Minimum (eager) bounce buffers posted per rail. The plugin will attempt to post
* more bounce buffers if we dip below this threshold, allocating new bounce
* buffers if needed.
*/
OFI_NCCL_PARAM_INT(rdma_min_posted_bounce_buffers, "RDMA_MIN_POSTED_BOUNCE_BUFFERS", 16);

/*
* Maximum bounce buffers posted per rail. The plugin will not attempt to
* Maximum (eager) bounce buffers posted per rail. The plugin will not attempt to
* post more bounce buffers if we reach this threshold, returning available
* buffers to the free list if needed
*/
Expand Down
21 changes: 14 additions & 7 deletions src/nccl_ofi_rdma.c
Original file line number Diff line number Diff line change
Expand Up @@ -5720,12 +5720,13 @@ static inline nccl_net_ofi_rdma_send_comm_t *calloc_rdma_send_comm(int num_rails
* non-zero, on error
*/
static inline int init_bounce_buffers_rail(nccl_net_ofi_ep_rail_t *ep_rail, nccl_net_ofi_rdma_ep_t *ep,
size_t buff_size, size_t entry_alignment)
size_t buff_size, size_t entry_alignment,
size_t min_posted_count, size_t max_posted_count)
{
int ret = 0;

ret = nccl_ofi_freelist_init(sizeof(nccl_net_ofi_rdma_req_t),
ofi_nccl_rdma_min_posted_bounce_buffers(), 16, 0,
max_posted_count, 16, 0,
&ep_rail->bounce_buff_reqs_fl);
if (ret != 0) {
NCCL_OFI_WARN("Failed to init bounce_buff_reqs_fl");
Expand All @@ -5734,18 +5735,20 @@ static inline int init_bounce_buffers_rail(nccl_net_ofi_ep_rail_t *ep_rail, nccl

ep_rail->buff_size = buff_size;
ret = nccl_ofi_freelist_init_mr(buff_size,
ofi_nccl_rdma_min_posted_bounce_buffers(), 16, 0,
max_posted_count, 16, 0,
freelist_regmr_host_fn, freelist_deregmr_host_fn,
ep, 0, entry_alignment, &ep_rail->bounce_buff_fl);
if (ret != 0) {
NCCL_OFI_WARN("Failed to init bounce_buff_fl");
goto error;
}

ep_rail->min_bounce_posted = ofi_nccl_rdma_min_posted_bounce_buffers();
ep_rail->max_bounce_posted = ofi_nccl_rdma_max_posted_bounce_buffers();
ep_rail->min_bounce_posted = min_posted_count;
ep_rail->max_bounce_posted = max_posted_count;
ep_rail->num_bounce_posted = 0;

assert(ep_rail->max_bounce_posted >= ep_rail->min_bounce_posted);

ret = nccl_net_ofi_mutex_init(&ep_rail->bounce_mutex, NULL);
if (ret != 0) {
goto error;
Expand Down Expand Up @@ -5803,7 +5806,9 @@ static inline int init_bounce_buffers(nccl_net_ofi_rdma_ep_t *ep)
sizeof(nccl_ofi_rdma_connection_info_t)),
sizeof(nccl_net_ofi_rdma_close_msg_t));
ret = init_bounce_buffers_rail(&ep->control_rail, ep,
buff_size, BOUNCE_BUFFER_ALIGNMENT);
buff_size, BOUNCE_BUFFER_ALIGNMENT,
ofi_nccl_rdma_min_posted_ctrl_recv_buffers(),
ofi_nccl_rdma_max_posted_ctrl_recv_buffers());
if (ret != 0) {
return ret;
}
Expand All @@ -5817,7 +5822,9 @@ static inline int init_bounce_buffers(nccl_net_ofi_rdma_ep_t *ep)
nccl_net_ofi_ep_rail_t *rail = get_rail(ep, rail_id);

ret = init_bounce_buffers_rail(rail, ep, buff_size,
BOUNCE_BUFFER_ALIGNMENT);
BOUNCE_BUFFER_ALIGNMENT,
ofi_nccl_rdma_min_posted_bounce_buffers(),
ofi_nccl_rdma_max_posted_bounce_buffers());
if (ret != 0) {

/* Cleanup previously-established rails */
Expand Down

0 comments on commit 5f22617

Please sign in to comment.