Skip to content

Commit

Permalink
Copy jbaczek/mcore_parallel_state_api_change branch leaving out chang…
Browse files Browse the repository at this point in the history
…es to nemo/export/quantize/quantizer.py

Signed-off-by: Jan Baczek <[email protected]>
  • Loading branch information
jbaczek authored and vasunvidia committed Mar 26, 2024
1 parent 3f7823c commit b4f736e
Show file tree
Hide file tree
Showing 14 changed files with 16 additions and 16 deletions.
2 changes: 1 addition & 1 deletion examples/nlp/language_modeling/megatron_lm_ckpt_to_nemo.py
Original file line number Diff line number Diff line change
Expand Up @@ -529,7 +529,7 @@ def convert(local_rank, rank, world_size, args):

if args.nemo_file_path:
if args.model_type == 'gpt':
if mcore_output and parallel_state.is_unitialized():
if mcore_output and not parallel_state.is_initialized():
parallel_state.initialize_model_parallel(
tensor_model_parallel_size=args.tensor_model_parallel_size,
pipeline_model_parallel_size=args.pipeline_model_parallel_size,
Expand Down
2 changes: 1 addition & 1 deletion examples/nlp/language_modeling/megatron_retro_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def main(cfg) -> None:
}

# check whether the DDP is initialized
if parallel_state.is_unitialized():
if not parallel_state.is_initialized():

def dummy():
return
Expand Down
2 changes: 1 addition & 1 deletion nemo/collections/multimodal/data/common/webdataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ def run(self, src):
epoch = self.epoch
rng = random.Random()
# This seed to be deterministic AND the same across all nodes/workers in each epoch
if parallel_state.is_unitialized():
if not parallel_state.is_initialized():
seed = self.seed + epoch
else:
seed = self.seed + epoch + (100 * parallel_state.get_data_parallel_rank())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,7 @@ def model_provider_func(self, pre_process, post_process):
media_end_id = self.tokenizer.token_to_id(DEFAULT_IM_END_TOKEN)

if self.mcore_gpt:
if parallel_state.is_unitialized():
if not parallel_state.is_initialized():

def dummy():
return
Expand Down Expand Up @@ -1001,7 +1001,7 @@ def generate(
) -> OutputType:

# check whether the DDP is initialized
if parallel_state.is_unitialized():
if not parallel_state.is_initialized():

def dummy():
return
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1424,7 +1424,7 @@ def generate(
) -> OutputType:

# check whether the DDP is initialized
if parallel_state.is_unitialized():
if not parallel_state.is_initialized():

def dummy():
return
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -733,7 +733,7 @@ def generate(
):

# check whether the DDP is initialized
if parallel_state.is_unitialized():
if not parallel_state.is_initialized():

def dummy():
return
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1012,7 +1012,7 @@ def encode(self, tokens_enc, enc_mask, encoder_input=None, batch_data=None, reco
Format is not defined and should match the expected format of the used hiddens modules.
"""
# Check whether the DDP is initialized. This is needed when running inference outside of training loop.
if parallel_state.is_unitialized():
if not parallel_state.is_initialized():

def dummy():
return
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -482,7 +482,7 @@ def generate(
) -> OutputType:

# check whether the DDP is initialized
if parallel_state.is_unitialized():
if not parallel_state.is_initialized():

def dummy():
return
Expand Down
2 changes: 1 addition & 1 deletion nemo/collections/nlp/models/nlp_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,7 +385,7 @@ def load_from_checkpoint(
sharded_state_dict = model.sharded_state_dict()
checkpoint['state_dict'] = sharded_state_dict
# dist checkpointing needs torch.distributed to load the checkpoint
if parallel_state.is_unitialized():
if not parallel_state.is_initialized():

def dummy():
return
Expand Down
4 changes: 2 additions & 2 deletions nemo/collections/nlp/parts/nlp_overrides.py
Original file line number Diff line number Diff line change
Expand Up @@ -840,7 +840,7 @@ def save_to(self, model, save_path: str):

sharded_state_dict = model.sharded_state_dict()
# dist checkpoint needs torch.distributed to save the checkpoint
if parallel_state.is_unitialized():
if not parallel_state.is_initialized():

def dummy():
return
Expand Down Expand Up @@ -1074,7 +1074,7 @@ def restore_from(
# if we're using dist checkpointing then state_dict will be None
if state_dict is None:
# dist checkpointing needs torch.distributed to load the checkpoint
if parallel_state.is_unitialized():
if not parallel_state.is_initialized():

def dummy():
return
Expand Down
2 changes: 1 addition & 1 deletion nemo/core/optim/distributed_adam.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def __init__(
):

# Initialize process groups
if 'process_group' not in kwargs and not parallel_state.is_unitialized():
if 'process_group' not in kwargs and parallel_state.is_initialized():
kwargs['process_group'] = parallel_state.get_data_parallel_group(with_context_parallel=True)
if disable_distributed_parameters:
world_size = torch.distributed.get_world_size()
Expand Down
2 changes: 1 addition & 1 deletion nemo/utils/distributed.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def gather_objects(partial_results_list, main_rank=None):
pickle.dump(predictions, open(output_fname, "wb"))
"""
# do not fail when DDP is not initialized
if parallel_state.is_unitialized():
if not parallel_state.is_initialized():
return partial_results_list

rank = parallel_state.get_data_parallel_rank()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ def main(cfg) -> None:
raise ValueError("need at least a nemo file or checkpoint dir")

# check whether the DDP is initialized
if parallel_state.is_unitialized():
if not parallel_state.is_initialized():

def dummy():
return
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def main(cfg) -> None:
)

# check whether the DDP is initialized
if parallel_state.is_unitialized():
if not parallel_state.is_initialized():

def dummy():
return
Expand Down

0 comments on commit b4f736e

Please sign in to comment.