huggingface · pacman100 · Aug 17, 2023 · Jul 26, 2023 · Jul 26, 2023 · Jul 26, 2023
diff --git a/docs/source/en/internal/trainer_utils.md b/docs/source/en/internal/trainer_utils.md
@@ -32,6 +32,8 @@ Most of those are only useful if you are studying the code of the Trainer in the
 
 [[autodoc]] torch_distributed_zero_first
 
+[[autodoc]] load_pretrained_model_only_on_rank0
+
 ## Callbacks internals
 
 [[autodoc]] trainer_callback.CallbackHandler

diff --git a/src/transformers/__init__.py b/src/transformers/__init__.py
@@ -3059,7 +3059,10 @@
     _import_structure["sagemaker"] = []
     _import_structure["time_series_utils"] = []
     _import_structure["trainer"] = ["Trainer"]
-    _import_structure["trainer_pt_utils"] = ["torch_distributed_zero_first"]
+    _import_structure["trainer_pt_utils"] = [
+        "load_pretrained_model_only_on_rank0",
+        "torch_distributed_zero_first",
+    ]
     _import_structure["trainer_seq2seq"] = ["Seq2SeqTrainer"]
 
 # TensorFlow-backed objects
@@ -6598,7 +6601,7 @@
 
         # Trainer
         from .trainer import Trainer
-        from .trainer_pt_utils import torch_distributed_zero_first
+        from .trainer_pt_utils import load_pretrained_model_only_on_rank0, torch_distributed_zero_first
         from .trainer_seq2seq import Seq2SeqTrainer
 
     # TensorFlow

diff --git a/src/transformers/trainer_pt_utils.py b/src/transformers/trainer_pt_utils.py
@@ -1125,3 +1125,16 @@ def smp_nested_concat(tensor):
         # It doesn't seem possible to check here if `tensor` is a StepOutput because StepOutput lives in `smp.step`
         # which is also the name of the decorator so Python is confused.
         return tensor.concat().detach().cpu()
+
+
+def load_pretrained_model_only_on_rank0(model_cls, config_cls, model_name_or_path):
+    from accelerate.state import PartialState
+
+    state = PartialState()
+    if state.is_main_process:
+        model = model_cls.from_pretrained(model_name_or_path, return_dict=True)
+    else:
+        with torch.device("meta"):
+            config = config_cls.from_pretrained(model_name_or_path)
+            model = model_cls.from_config(config)
+    return model
diff --git a/src/transformers/utils/dummy_pt_objects.py b/src/transformers/utils/dummy_pt_objects.py
@@ -8493,6 +8493,10 @@ def __init__(self, *args, **kwargs):
         requires_backends(self, ["torch"])
 
 
+def load_pretrained_model_only_on_rank0(*args, **kwargs):
+    requires_backends(load_pretrained_model_only_on_rank0, ["torch"])
+
+
 def torch_distributed_zero_first(*args, **kwargs):
     requires_backends(torch_distributed_zero_first, ["torch"])