Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYNC] 2.22.3-1 #1426

Open
wants to merge 27 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
529ee69
Add decription for regIsGlobal in the NET API documentation
sjeaugey Jun 14, 2024
178b6b7
2.22.3-1
sjeaugey Jun 11, 2024
1b972cd
Merge remote-tracking branch 'nccl/master' into develop
BertanDogancay Oct 4, 2024
cef4621
modify loadWorkBatchToShmem for WARP_SIZE of 64
BertanDogancay Oct 6, 2024
aed4ced
fix send/recv merge
BertanDogancay Oct 7, 2024
568eae6
fix multistream kernel launch
BertanDogancay Oct 15, 2024
e842e08
Merge remote-tracking branch 'rccl/develop' into 2.22-sync
BertanDogancay Oct 23, 2024
26128c8
msccl needs teardown before freeing comm
BertanDogancay Oct 24, 2024
874f7fc
missing chunksize optmizations
BertanDogancay Oct 28, 2024
6be8ad4
npkit fix
BertanDogancay Oct 28, 2024
50433c1
Maintain extra version info
BertanDogancay Oct 30, 2024
5ec2c59
fix colltrace
BertanDogancay Nov 4, 2024
8a2f939
use local tid when loading work batch to shmem
BertanDogancay Nov 8, 2024
315124b
Merge remote-tracking branch 'rccl/develop' into 2.22-sync
BertanDogancay Nov 8, 2024
98578c1
update notices.txt
BertanDogancay Nov 8, 2024
7695ff9
Merge remote-tracking branch 'rccl/develop' into 2.22-sync
BertanDogancay Nov 15, 2024
3b082ca
support up to 128 channels
BertanDogancay Nov 18, 2024
2cbbee4
Merge remote-tracking branch 'rccl/develop' into nccl-2.22-sync
BertanDogancay Nov 18, 2024
c6d742c
Allow max 2 work/batch
BertanDogancay Nov 25, 2024
f042066
Map channels in sequential order
BertanDogancay Dec 18, 2024
875a2b4
Fix partidx based on channel in device side
BertanDogancay Dec 18, 2024
164d91a
Merge remote-tracking branch 'rccl/develop' into nccl-2.22-sync
BertanDogancay Dec 18, 2024
7a65e8f
Covert to int8 in case of AlltoAllPivot
BertanDogancay Dec 19, 2024
1d91c67
Make adjustments for warp size 32
BertanDogancay Dec 21, 2024
6e4ea12
Temporarily disable alltoall pivot kernel
BertanDogancay Dec 21, 2024
0e160eb
Set NCCL_MAX_DEV_WORK_BATCH_BYTES to 128
BertanDogancay Jan 14, 2025
6c010be
Merge remote-tracking branch 'rccl/develop' into nccl-2.22-sync
BertanDogancay Jan 15, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 23 additions & 11 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -425,12 +425,13 @@ set(SRC_FILES
src/graph/tuning.cc
src/graph/xml.cc
src/graph/xml.h
src/include/align.h
src/include/alloc.h
src/include/alt_rsmi.h
src/include/archinfo.h
src/include/api_trace.h
src/include/argcheck.h
src/include/BfdBacktrace.hpp
src/include/bitops.h
src/include/bootstrap.h
src/include/channel.h
src/include/checks.h
Expand Down Expand Up @@ -489,30 +490,40 @@ set(SRC_FILES
src/include/npkit/npkit.h
src/include/npkit/npkit_event.h
src/include/npkit/npkit_struct.h
src/include/nvtx3/nvToolsExt.h
src/include/nvtx3/nvToolsExtCounters.h
src/include/nvtx3/nvToolsExtCuda.h
src/include/nvtx3/nvToolsExtCudaRt.h
src/include/nvtx3/nvToolsExt.h
src/include/nvtx3/nvToolsExtMem.h
src/include/nvtx3/nvToolsExtMemCudaRt.h
src/include/nvtx3/nvToolsExtOpenCL.h
src/include/nvtx3/nvToolsExtPayload.h
src/include/nvtx3/nvToolsExtPayloadHelper.h
src/include/nvtx3/nvToolsExtSemanticsCounters.h
src/include/nvtx3/nvToolsExtSemanticsScope.h
src/include/nvtx3/nvToolsExtSync.h
src/include/nvtx3/nvtx3.hpp
src/include/nvtx3/nvtxDetail/nvtxExtHelperMacros.h
src/include/nvtx3/nvtxDetail/nvtxExtImpl.h
src/include/nvtx3/nvtxDetail/nvtxExtImplCounters_v1.h
src/include/nvtx3/nvtxDetail/nvtxExtImplMem_v1.h
src/include/nvtx3/nvtxDetail/nvtxExtImplMemCudaRt_v1.h
src/include/nvtx3/nvtxDetail/nvtxExtImplPayload_v1.h
src/include/nvtx3/nvtxDetail/nvtxExtInit.h
src/include/nvtx3/nvtxDetail/nvtxExtPayloadHelperInternal.h
src/include/nvtx3/nvtxDetail/nvtxExtPayloadTypeInfo.h
src/include/nvtx3/nvtxDetail/nvtxExtTypes.h
src/include/nvtx3/nvtxDetail/nvtxImpl.h
src/include/nvtx3/nvtxDetail/nvtxImplCore.h
src/include/nvtx3/nvtxDetail/nvtxImplCudaRt_v3.h
src/include/nvtx3/nvtxDetail/nvtxImplCuda_v3.h
src/include/nvtx3/nvtxDetail/nvtxImpl.h
src/include/nvtx3/nvtxDetail/nvtxImplCudaRt_v3.h
src/include/nvtx3/nvtxDetail/nvtxImplOpenCL_v3.h
src/include/nvtx3/nvtxDetail/nvtxImplSync_v3.h
src/include/nvtx3/nvtxDetail/nvtxInit.h
src/include/nvtx3/nvtxDetail/nvtxInitDecls.h
src/include/nvtx3/nvtxDetail/nvtxInitDefs.h
src/include/nvtx3/nvtxDetail/nvtxInit.h
src/include/nvtx3/nvtxDetail/nvtxLinkOnce.h
src/include/nvtx3/nvtxDetail/nvtxTypes.h
src/include/nvtx3/nvtxExtDetail/nvtxExtImpl.h
src/include/nvtx3/nvtxExtDetail/nvtxExtImplPayload_v1.h
src/include/nvtx3/nvtxExtDetail/nvtxExtInit.h
src/include/nvtx3/nvtxExtDetail/nvtxExtPayloadTypeInfo.h
src/include/nvtx3/nvtxExtDetail/nvtxExtTypes.h
src/include/alt_rsmi.h
src/misc/alt_rsmi.cc
src/misc/archinfo.cc
src/misc/argcheck.cc
Expand Down Expand Up @@ -542,6 +553,7 @@ set(SRC_FILES
src/misc/msccl/msccl_setup.cc
src/misc/msccl/msccl_status.cc
src/transport/coll_net.cc
src/transport/generic.cc
src/transport/net.cc
src/transport/net_ib.cc
src/transport/net_socket.cc
Expand Down
4 changes: 2 additions & 2 deletions NOTICES.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
Notices and Licenses file
_______________________________________________________________

Dependencies on nvidia-nccl v2.21.5-1 (BSD3)
Dependencies on nvidia-nccl v2.22.3-1 (BSD3)

Copyright (c) 2015-2020, NVIDIA CORPORATION. All rights reserved.
Modifications Copyright (c) 2019-2023 Advanced Micro Devices, Inc. All rights reserved.
Modifications Copyright (c) 2019-2024 Advanced Micro Devices, Inc. All rights reserved.
Modifications Copyright (c) Microsoft Corporation. Licensed under the MIT License.

Redistribution and use in source and binary forms, with or without
Expand Down
4 changes: 2 additions & 2 deletions cmake/scripts/add_unroll.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,8 @@ if [[ "$HIP_FILE" =~ .*/src/device/.*\.h ]]; then
sed -i "s/\\(runRing<T[^>]*\\)>*/\\1, COLL_UNROLL>/" "$HIP_FILE"
sed -i "s/runTreeUpDown<T, RedOp, ProtoSimple<1, 1, COLL_UNROLL>>/runTreeUpDown<T, RedOp, ProtoSimple<1, 1, COLL_UNROLL>, COLL_UNROLL>/" "$HIP_FILE"
sed -i "s/\\(runTreeSplit<T[^>]*\\)>*/\\1, COLL_UNROLL>/" "$HIP_FILE"
sed -i "s/\\(struct RunWorkElement<ncclFunc[^>]*\\)>*/\\1, COLL_UNROLL>/" "$HIP_FILE"
sed -i "s/\\(struct RunWork<ncclFunc[^>]*\\)>*/\\1, COLL_UNROLL>/" "$HIP_FILE"
sed -i "s/\\(struct RunWorkColl<ncclFunc[^>]*\\)>*/\\1, COLL_UNROLL>/" "$HIP_FILE"
sed -i "s/\\(struct RunWorkBatch<ncclFunc[^>]*\\)>*/\\1, COLL_UNROLL>/" "$HIP_FILE"

echo "Added COLL_UNROLL template argument to $HIP_FILE"
fi
10 changes: 10 additions & 0 deletions ext-net/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,16 @@ set to `NCCL_PTR_HOST|NCCL_PTR_CUDA`, otherwise it should be set to `NCCL_PTR_HO
supports `dmabuf`, it should set `ptrSupport` to `NCCL_PTR_HOST|NCCL_PTR_CUDA|NCCL_PTR_DMABUF` and
provide a `regMrDmaBuf` function.

The `regIsGlobal` field allows NCCL to register buffers in advance using e.g. a loopback connection
and later on, expect that another registration on a buffer contained within a previous registration
will be nearly immediate, as the buffer is already known by the network adapter. A typical
implementation would maintain a registration cache; the call to ncclCommRegister will create the
initial entry in the cache using regMr() on a loopback connection. Any later call to NCCL
operations will call regMr() again on the real connection, with the real buffer (could be at a
different offset within the original buffer, with a smaller size, etc), then deregMr() right after.
The call to ncclCommDeregister should call the final deregMr() and effectively remove the mapping
on the network adapter.

The `speed` field indicates the speed of the network port in Mbps (10^6 bits per second). This is
important to ensure proper optimization of flows within the node.

Expand Down
15 changes: 15 additions & 0 deletions ext-net/example/nccl/common.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
/*************************************************************************
* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
*
* See LICENSE.txt for license information
************************************************************************/

#ifndef COMMON_H_
#define COMMON_H_

typedef enum {NCCL_LOG_NONE=0, NCCL_LOG_VERSION=1, NCCL_LOG_WARN=2, NCCL_LOG_INFO=3, NCCL_LOG_ABORT=4, NCCL_LOG_TRACE=5} ncclDebugLogLevel;
typedef enum {NCCL_INIT=1, NCCL_COLL=2, NCCL_P2P=4, NCCL_SHM=8, NCCL_NET=16, NCCL_GRAPH=32, NCCL_TUNING=64, NCCL_ENV=128, NCCL_ALLOC=256, NCCL_CALL=512, NCCL_PROXY=1024, NCCL_NVLS=2048, NCCL_BOOTSTRAP=4096, NCCL_REG=8192, NCCL_ALL=~0} ncclDebugLogSubSys;

typedef void (*ncclDebugLogger_t)(ncclDebugLogLevel level, unsigned long flags, const char *file, int line, const char *fmt, ...);

#endif
1 change: 1 addition & 0 deletions ext-net/example/nccl/err.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ typedef enum { ncclSuccess = 0,
ncclSystemError = 2,
ncclInternalError = 3,
ncclInvalidArgument = 4,
ncclInvalidUsage = 5,
ncclRemoteError = 6 } ncclResult_t;

#endif
6 changes: 1 addition & 5 deletions ext-net/example/nccl/net.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include <stdint.h>
#include <stdlib.h>

#include "common.h"
#include "err.h"

#define NCCL_NET_HANDLE_MAXSIZE 128
Expand All @@ -19,11 +20,6 @@
// Maximum number of requests per comm object
#define NCCL_NET_MAX_REQUESTS 32

typedef enum {NCCL_LOG_NONE=0, NCCL_LOG_VERSION=1, NCCL_LOG_WARN=2, NCCL_LOG_INFO=3, NCCL_LOG_ABORT=4, NCCL_LOG_TRACE=5} ncclDebugLogLevel;
typedef enum {NCCL_INIT=1, NCCL_COLL=2, NCCL_P2P=4, NCCL_SHM=8, NCCL_NET=16, NCCL_GRAPH=32, NCCL_TUNING=64, NCCL_ENV=128, NCCL_ALLOC=256, NCCL_CALL=512, NCCL_ALL=~0} ncclDebugLogSubSys;

typedef void (*ncclDebugLogger_t)(ncclDebugLogLevel level, unsigned long flags, const char *file, int line, const char *fmt, ...);

#include "net_v8.h"
#include "net_v7.h"
#include "net_v6.h"
Expand Down
4 changes: 2 additions & 2 deletions ext-net/example/nccl/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
* Copyright (c) 2017-2022, NVIDIA CORPORATION. All rights reserved.
*/

#ifndef NCCL_ERR_H_
#define NCCL_ERR_H_
#ifndef NCCL_TYPES_H_
#define NCCL_TYPES_H_

/* Data types */
typedef enum { ncclInt8 = 0, ncclChar = 0,
Expand Down
15 changes: 15 additions & 0 deletions ext-tuner/example/nccl/common.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
/*************************************************************************
* Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
*
* See LICENSE.txt for license information
************************************************************************/

#ifndef COMMON_H_
#define COMMON_H_

typedef enum {NCCL_LOG_NONE=0, NCCL_LOG_VERSION=1, NCCL_LOG_WARN=2, NCCL_LOG_INFO=3, NCCL_LOG_ABORT=4, NCCL_LOG_TRACE=5} ncclDebugLogLevel;
typedef enum {NCCL_INIT=1, NCCL_COLL=2, NCCL_P2P=4, NCCL_SHM=8, NCCL_NET=16, NCCL_GRAPH=32, NCCL_TUNING=64, NCCL_ENV=128, NCCL_ALLOC=256, NCCL_CALL=512, NCCL_PROXY=1024, NCCL_NVLS=2048, NCCL_BOOTSTRAP=4096, NCCL_REG=8192, NCCL_ALL=~0} ncclDebugLogSubSys;

typedef void (*ncclDebugLogger_t)(ncclDebugLogLevel level, unsigned long flags, const char *file, int line, const char *fmt, ...);

#endif
17 changes: 17 additions & 0 deletions ext-tuner/example/nccl/err.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
/*
* Copyright (c) 2017-2022, NVIDIA CORPORATION. All rights reserved.
*/

#ifndef NCCL_ERR_H_
#define NCCL_ERR_H_

/* Error type for plugins */
typedef enum { ncclSuccess = 0,
ncclUnhandledCudaError = 1,
ncclSystemError = 2,
ncclInternalError = 3,
ncclInvalidArgument = 4,
ncclInvalidUsage = 5,
ncclRemoteError = 6 } ncclResult_t;

#endif
43 changes: 28 additions & 15 deletions ext-tuner/example/nccl/tuner.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,24 @@
#ifndef NCCL_TUNER_H_
#define NCCL_TUNER_H_

#include "nccl.h"
#include <stdint.h>
#include <stdlib.h>

typedef enum {NCCL_LOG_NONE=0, NCCL_LOG_VERSION=1, NCCL_LOG_WARN=2, NCCL_LOG_INFO=3, NCCL_LOG_ABORT=4, NCCL_LOG_TRACE=5} ncclDebugLogLevel;
typedef enum {NCCL_INIT=1, NCCL_COLL=2, NCCL_P2P=4, NCCL_SHM=8, NCCL_NET=16, NCCL_GRAPH=32, NCCL_TUNING=64, NCCL_ENV=128, NCCL_ALLOC=256, NCCL_CALL=512, NCCL_PROXY=1024, NCCL_NVLS=2048, NCCL_ALL=~0} ncclDebugLogSubSys;

typedef void (*ncclDebugLogger_t)(ncclDebugLogLevel level, unsigned long flags, const char *file, int line, const char *fmt, ...);
#include "common.h"
#include "err.h"

#define NCCL_NUM_FUNCTIONS 5 // Send/Recv not included for now
typedef enum { ncclFuncBroadcast, ncclFuncReduce, ncclFuncAllGather, ncclFuncReduceScatter, ncclFuncAllReduce, ncclFuncSendRecv, ncclFuncSend, ncclFuncRecv, ncclNumFuncs} ncclFunc_t;
typedef enum {
ncclFuncBroadcast = 0,
ncclFuncReduce = 1,
ncclFuncAllGather = 2,
ncclFuncReduceScatter = 3,
ncclFuncAllReduce = 4,
ncclFuncSendRecv = 5,
ncclFuncSend = 6,
ncclFuncRecv = 7,
ncclNumFuncs = 8
} ncclFunc_t;

#define NCCL_NUM_ALGORITHMS 6 // Tree/Ring/CollNet*
#define NCCL_ALGO_UNDEF -1
Expand All @@ -33,6 +42,8 @@ typedef enum { ncclFuncBroadcast, ncclFuncReduce, ncclFuncAllGather, ncclFuncRed
#define NCCL_PROTO_LL128 1
#define NCCL_PROTO_SIMPLE 2

#define NCCL_ALGO_PROTO_IGNORE -1.0

// API to be implemented by external tuner
typedef struct {
// Name of the tuner
Expand All @@ -52,31 +63,33 @@ typedef struct {
// - context: tuner context object
// - collType: collective type , e.g., allreduce, allgather…
// - nBytes: collective size in bytes
// - collNetSupport: whether collnet supports this type
// - nvlsSupport: whether nvlink sharp supports this time
// - numPipeOps: number of operations in the group
// - numAlgo: number of algorithms in collCostTable
// - numProto: number of protocols in collCostTable
//
// Outputs:
// - algorithm: selected algorithm to be used for the given collective
// - protocol: selected protocol to be used for the given collective
// - nChannels: number of channels (hence SMs) to be used.
//
// InOut:
// - collCostTable: collective cost table, generated by NCCL core, containing algo|proto|time entries for collType.
// NCCL core sets ignored algo/proto cost table entries to -1.0 (NCCL_ALGO_PROTO_IGNORE).
//
// If getCollInfo() does not return ncclSuccess, NCCL will fall back to the
// default tuning for the given collective.
// Also, the plugin is allowed to not set any output, or set only the
// algorithm and protocol, but not only the algorithm or only the protocol.
// Unset fields will be set automatically by NCCL.
ncclResult_t (*getCollInfo)(void* context, ncclFunc_t collType, size_t nBytes,
int collNetSupport, int nvlsSupport, int numPipeOps,
int *algorithm, int *protocol, int* nChannels);
int numPipeOps, float** collCostTable, int numAlgo, int numProto,
int* nChannels);

// Terminates the plugin and cleans up any resources that the plugin allocated.
// context: tuner context object
ncclResult_t (*destroy)(void* context);
} ncclTuner_v2_t;
} ncclTuner_v3_t;

typedef ncclTuner_v2_t ncclTuner_t;
typedef ncclTuner_v3_t ncclTuner_t;

#define NCCL_TUNER_PLUGIN_SYMBOL "ncclTunerPlugin_v2"
#define NCCL_TUNER_PLUGIN_SYMBOL "ncclTunerPlugin_v3"

#endif
4 changes: 2 additions & 2 deletions ext-tuner/example/plugin.c
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ ncclResult_t ncclTopoGetAlgoTime_Tuner(ncclFunc_t collType, int algorithm, int p
return ncclSuccess;
}

__hidden ncclResult_t pluginInit(size_t nRanks, size_t nNodes, ncclDebugLogger_t logFunction, void **context) {
__hidden ncclResult_t pluginInit(size_t nRanks, size_t nNodes, ncclDebugLogger_t logFunction) {
if (nRanks <= 1) return ncclSuccess;
int compCapIndex = HOPPER_COMPCAP_IDX;
int index2 = nNodes <= 2 ? nNodes-1 : 2;
Expand Down Expand Up @@ -226,7 +226,7 @@ __hidden ncclResult_t pluginDestroy(void* context) { return ncclSuccess; }

#define PLUGIN_NAME "Example"

const ncclTuner_v2_t ncclTunerPlugin_v2 = {
const ncclTuner_v3_t ncclTunerPlugin_v3 = {
.name = PLUGIN_NAME,
.init = pluginInit,
.getCollInfo = pluginGetCollInfo,
Expand Down
4 changes: 2 additions & 2 deletions makefiles/version.mk
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
##### version
NCCL_MAJOR := 2
NCCL_MINOR := 21
NCCL_PATCH := 5
NCCL_MINOR := 22
NCCL_PATCH := 3
NCCL_SUFFIX :=
PKG_REVISION := 1
5 changes: 3 additions & 2 deletions src/bootstrap.cc
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,6 @@ ncclResult_t bootstrapCreateRoot(struct ncclBootstrapHandle* handle, bool idFrom

ncclResult_t bootstrapGetUniqueId(struct ncclBootstrapHandle* handle) {
memset(handle, 0, sizeof(ncclBootstrapHandle));
NCCLCHECK(getRandomData(&handle->magic, sizeof(handle->magic)));

const char* env = ncclGetEnv("NCCL_COMM_ID");
if (env) {
Expand All @@ -211,7 +210,9 @@ ncclResult_t bootstrapGetUniqueId(struct ncclBootstrapHandle* handle) {
WARN("Invalid NCCL_COMM_ID, please use format: <ipv4>:<port> or [<ipv6>]:<port> or <hostname>:<port>");
return ncclInvalidArgument;
}
handle->magic = NCCL_MAGIC;
} else {
NCCLCHECK(getRandomData(&handle->magic, sizeof(handle->magic)));
memcpy(&handle->addr, &bootstrapNetIfAddr, sizeof(union ncclSocketAddress));
NCCLCHECK(bootstrapCreateRoot(handle, false));
}
Expand Down Expand Up @@ -631,7 +632,7 @@ ncclResult_t bootstrapClose(void* commState) {
struct bootstrapState* state = (struct bootstrapState*)commState;
if (state->unexpectedConnections != NULL) {
unexpectedFree(state);
if (__atomic_load_n(state->abortFlag, __ATOMIC_RELAXED) == 0) {
if (__atomic_load_n(state->abortFlag, __ATOMIC_ACQUIRE) == 0) {
WARN("Unexpected connections are not empty");
return ncclInternalError;
}
Expand Down
8 changes: 5 additions & 3 deletions src/channel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,17 @@
#include "channel.h"
#include "param.h"
#include "gdrwrap.h"
#include "transport.h"

ncclResult_t initChannel(struct ncclComm* comm, int channelId) {
struct ncclChannel* channel = &comm->channels[channelId];
if (channel->id != -1) return ncclSuccess;

int nRanks = comm->nRanks;
int nvlsRanks = comm->MNNVL ? comm->clique.size : comm->localRanks;
int nvlsRanks = comm->localRanks;
int nPeers = nRanks + 1 /* Collnet */ + nvlsRanks /* NVLS */;
channel->id = channelId;
channel->workFifoSent = 0;
channel->workFifoProduced = 0;

struct ncclSharedResources* sharedRes = comm->sharedRes;

Expand Down Expand Up @@ -74,7 +75,8 @@ ncclResult_t initNvlsChannel(struct ncclComm* comm, int channelId, struct ncclCo

NCCLCHECK(ncclStrongStreamAcquireUncaptured(&sharedRes->deviceStream));

int nvlsRanks = comm->MNNVL ? comm->clique.size : comm->localRanks;
int nvlsRanks = comm->localRanks;

if (share) {
channel->nvlsPeers = parent->channels[channelId].nvlsPeers;
channel->nvlsDevPeers = parent->channels[channelId].nvlsDevPeers;
Expand Down
Loading