diff --git a/examples/flax/image-captioning/run_image_captioning_flax.py b/examples/flax/image-captioning/run_image_captioning_flax.py index 23f8a4d5ad6ee8..149d3abff5e300 100644 --- a/examples/flax/image-captioning/run_image_captioning_flax.py +++ b/examples/flax/image-captioning/run_image_captioning_flax.py @@ -52,7 +52,7 @@ HfArgumentParser, is_tensorboard_available, ) -from transformers.utils import get_full_repo_name, is_offline_mode +from transformers.utils import get_full_repo_name, is_offline_mode, send_example_telemetry logger = logging.getLogger(__name__) @@ -388,6 +388,10 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_image_captioning", model_args, data_args, framework="flax") + if ( os.path.exists(training_args.output_dir) and os.listdir(training_args.output_dir) diff --git a/examples/flax/language-modeling/run_clm_flax.py b/examples/flax/language-modeling/run_clm_flax.py index ef16e6e8866050..1bf088df29c495 100755 --- a/examples/flax/language-modeling/run_clm_flax.py +++ b/examples/flax/language-modeling/run_clm_flax.py @@ -58,7 +58,7 @@ set_seed, ) from transformers.testing_utils import CaptureLogger -from transformers.utils import get_full_repo_name +from transformers.utils import get_full_repo_name, send_example_telemetry logger = logging.getLogger(__name__) @@ -328,6 +328,10 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_clm", model_args, data_args, framework="flax") + if ( os.path.exists(training_args.output_dir) and os.listdir(training_args.output_dir) diff --git a/examples/flax/language-modeling/run_mlm_flax.py b/examples/flax/language-modeling/run_mlm_flax.py index 04796e83fa9c9f..3538ba268334a3 100755 --- a/examples/flax/language-modeling/run_mlm_flax.py +++ b/examples/flax/language-modeling/run_mlm_flax.py @@ -58,7 +58,7 @@ is_tensorboard_available, set_seed, ) -from transformers.utils import get_full_repo_name +from transformers.utils import get_full_repo_name, send_example_telemetry MODEL_CONFIG_CLASSES = list(FLAX_MODEL_FOR_MASKED_LM_MAPPING.keys()) @@ -365,6 +365,10 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_mlm", model_args, data_args, framework="flax") + if ( os.path.exists(training_args.output_dir) and os.listdir(training_args.output_dir) diff --git a/examples/flax/language-modeling/run_t5_mlm_flax.py b/examples/flax/language-modeling/run_t5_mlm_flax.py index e5ed47a8a56138..48a58b60c0a821 100755 --- a/examples/flax/language-modeling/run_t5_mlm_flax.py +++ b/examples/flax/language-modeling/run_t5_mlm_flax.py @@ -57,7 +57,7 @@ set_seed, ) from transformers.models.t5.modeling_flax_t5 import shift_tokens_right -from transformers.utils import get_full_repo_name +from transformers.utils import get_full_repo_name, send_example_telemetry MODEL_CONFIG_CLASSES = list(FLAX_MODEL_FOR_MASKED_LM_MAPPING.keys()) @@ -498,6 +498,10 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_t5_mlm", model_args, data_args, framework="flax") + if ( os.path.exists(training_args.output_dir) and os.listdir(training_args.output_dir) diff --git a/examples/flax/question-answering/run_qa.py b/examples/flax/question-answering/run_qa.py index eafbe876e178a2..5c4fe750a993e5 100644 --- a/examples/flax/question-answering/run_qa.py +++ b/examples/flax/question-answering/run_qa.py @@ -53,7 +53,7 @@ PreTrainedTokenizerFast, is_tensorboard_available, ) -from transformers.utils import check_min_version, get_full_repo_name +from transformers.utils import check_min_version, get_full_repo_name, send_example_telemetry from utils_qa import postprocess_qa_predictions @@ -424,6 +424,10 @@ def main(): model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_qa", model_args, data_args, framework="flax") # endregion # region Logging diff --git a/examples/flax/summarization/run_summarization_flax.py b/examples/flax/summarization/run_summarization_flax.py index 9730daa163062d..0de02fe950f901 100644 --- a/examples/flax/summarization/run_summarization_flax.py +++ b/examples/flax/summarization/run_summarization_flax.py @@ -54,7 +54,7 @@ HfArgumentParser, is_tensorboard_available, ) -from transformers.utils import get_full_repo_name, is_offline_mode +from transformers.utils import get_full_repo_name, is_offline_mode, send_example_telemetry logger = logging.getLogger(__name__) @@ -399,6 +399,10 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_summarization", model_args, data_args, framework="flax") + if ( os.path.exists(training_args.output_dir) and os.listdir(training_args.output_dir) diff --git a/examples/flax/text-classification/run_flax_glue.py b/examples/flax/text-classification/run_flax_glue.py index 59e28397d121e1..d32f70a4c165dc 100755 --- a/examples/flax/text-classification/run_flax_glue.py +++ b/examples/flax/text-classification/run_flax_glue.py @@ -48,7 +48,7 @@ TrainingArguments, is_tensorboard_available, ) -from transformers.utils import check_min_version, get_full_repo_name +from transformers.utils import check_min_version, get_full_repo_name, send_example_telemetry logger = logging.getLogger(__name__) @@ -308,6 +308,10 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_glue", model_args, data_args, framework="flax") + # Make one log on every process with the configuration for debugging. logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", diff --git a/examples/flax/token-classification/run_flax_ner.py b/examples/flax/token-classification/run_flax_ner.py index 79a1e85fb26103..2d6f37f9350ec4 100644 --- a/examples/flax/token-classification/run_flax_ner.py +++ b/examples/flax/token-classification/run_flax_ner.py @@ -47,7 +47,7 @@ HfArgumentParser, is_tensorboard_available, ) -from transformers.utils import check_min_version, get_full_repo_name +from transformers.utils import check_min_version, get_full_repo_name, send_example_telemetry from transformers.utils.versions import require_version @@ -366,6 +366,10 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_ner", model_args, data_args, framework="flax") + # Make one log on every process with the configuration for debugging. logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", diff --git a/examples/flax/vision/run_image_classification.py b/examples/flax/vision/run_image_classification.py index d00f99399a3795..d8ddd13cefcd38 100644 --- a/examples/flax/vision/run_image_classification.py +++ b/examples/flax/vision/run_image_classification.py @@ -53,7 +53,7 @@ is_tensorboard_available, set_seed, ) -from transformers.utils import get_full_repo_name +from transformers.utils import get_full_repo_name, send_example_telemetry logger = logging.getLogger(__name__) @@ -256,6 +256,10 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_image_classification", model_args, data_args, framework="flax") + if ( os.path.exists(training_args.output_dir) and os.listdir(training_args.output_dir) diff --git a/examples/pytorch/audio-classification/run_audio_classification.py b/examples/pytorch/audio-classification/run_audio_classification.py index c6dd2e6342ec64..88ca51af3f6be5 100644 --- a/examples/pytorch/audio-classification/run_audio_classification.py +++ b/examples/pytorch/audio-classification/run_audio_classification.py @@ -37,7 +37,7 @@ set_seed, ) from transformers.trainer_utils import get_last_checkpoint -from transformers.utils import check_min_version +from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version @@ -197,6 +197,10 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_audio_classification", model_args, data_args) + # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", diff --git a/examples/pytorch/contrastive-image-text/run_clip.py b/examples/pytorch/contrastive-image-text/run_clip.py index ff8944c705217a..4ed5123ae0edef 100644 --- a/examples/pytorch/contrastive-image-text/run_clip.py +++ b/examples/pytorch/contrastive-image-text/run_clip.py @@ -47,7 +47,7 @@ set_seed, ) from transformers.trainer_utils import get_last_checkpoint -from transformers.utils import check_min_version +from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version @@ -233,6 +233,10 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_clip", model_args, data_args) + # 2. Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", diff --git a/examples/pytorch/image-classification/run_image_classification.py b/examples/pytorch/image-classification/run_image_classification.py index a1a4fd079893c4..ac6cf0238cb71b 100644 --- a/examples/pytorch/image-classification/run_image_classification.py +++ b/examples/pytorch/image-classification/run_image_classification.py @@ -45,7 +45,7 @@ TrainingArguments, ) from transformers.trainer_utils import get_last_checkpoint -from transformers.utils import check_min_version +from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version @@ -175,6 +175,10 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_image_classification", model_args, data_args) + # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", diff --git a/examples/pytorch/image-classification/run_image_classification_no_trainer.py b/examples/pytorch/image-classification/run_image_classification_no_trainer.py index 27965612fb0c3b..76b2059a1b0eee 100644 --- a/examples/pytorch/image-classification/run_image_classification_no_trainer.py +++ b/examples/pytorch/image-classification/run_image_classification_no_trainer.py @@ -47,7 +47,7 @@ SchedulerType, get_scheduler, ) -from transformers.utils import get_full_repo_name +from transformers.utils import get_full_repo_name, send_example_telemetry from transformers.utils.versions import require_version @@ -201,6 +201,10 @@ def parse_args(): def main(): args = parse_args() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_image_classification_no_trainer", args) + # Initialize the accelerator. We will let the accelerator handle device placement for us in this example. # If we're using tracking, we also need to initialize it here and it will by default pick up all supported trackers # in the environment diff --git a/examples/pytorch/image-pretraining/run_mae.py b/examples/pytorch/image-pretraining/run_mae.py index 4765ed26f3b842..9d90f2665b1828 100644 --- a/examples/pytorch/image-pretraining/run_mae.py +++ b/examples/pytorch/image-pretraining/run_mae.py @@ -34,7 +34,7 @@ ViTMAEForPreTraining, ) from transformers.trainer_utils import get_last_checkpoint -from transformers.utils import check_min_version +from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version @@ -175,6 +175,10 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_mae", model_args, data_args) + # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", diff --git a/examples/pytorch/image-pretraining/run_mim.py b/examples/pytorch/image-pretraining/run_mim.py index 429c726bca6f0f..8ad1dadae99555 100644 --- a/examples/pytorch/image-pretraining/run_mim.py +++ b/examples/pytorch/image-pretraining/run_mim.py @@ -37,7 +37,7 @@ TrainingArguments, ) from transformers.trainer_utils import get_last_checkpoint -from transformers.utils import check_min_version +from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version @@ -239,6 +239,10 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_mim", model_args, data_args) + # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", diff --git a/examples/pytorch/language-modeling/run_clm.py b/examples/pytorch/language-modeling/run_clm.py index 9661ed217d618e..2cd8092b7fb8df 100755 --- a/examples/pytorch/language-modeling/run_clm.py +++ b/examples/pytorch/language-modeling/run_clm.py @@ -48,7 +48,7 @@ ) from transformers.testing_utils import CaptureLogger from transformers.trainer_utils import get_last_checkpoint -from transformers.utils import check_min_version +from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version @@ -214,6 +214,10 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_clm", model_args, data_args) + # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", diff --git a/examples/pytorch/language-modeling/run_clm_no_trainer.py b/examples/pytorch/language-modeling/run_clm_no_trainer.py index 06c56d501d7dc2..73d1ae086371fe 100755 --- a/examples/pytorch/language-modeling/run_clm_no_trainer.py +++ b/examples/pytorch/language-modeling/run_clm_no_trainer.py @@ -52,7 +52,7 @@ default_data_collator, get_scheduler, ) -from transformers.utils import get_full_repo_name +from transformers.utils import get_full_repo_name, send_example_telemetry from transformers.utils.versions import require_version @@ -239,6 +239,10 @@ def parse_args(): def main(): args = parse_args() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_clm_no_trainer", args) + # Initialize the accelerator. We will let the accelerator handle device placement for us in this example. # If we're using tracking, we also need to initialize it here and it will by default pick up all supported trackers # in the environment diff --git a/examples/pytorch/language-modeling/run_mlm.py b/examples/pytorch/language-modeling/run_mlm.py index 4b7b7e1b696b26..0322ac30972766 100755 --- a/examples/pytorch/language-modeling/run_mlm.py +++ b/examples/pytorch/language-modeling/run_mlm.py @@ -47,7 +47,7 @@ set_seed, ) from transformers.trainer_utils import get_last_checkpoint -from transformers.utils import check_min_version +from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version @@ -224,6 +224,10 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_mlm", model_args, data_args) + # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", diff --git a/examples/pytorch/language-modeling/run_mlm_no_trainer.py b/examples/pytorch/language-modeling/run_mlm_no_trainer.py index e128889c233458..32d42412e3deff 100755 --- a/examples/pytorch/language-modeling/run_mlm_no_trainer.py +++ b/examples/pytorch/language-modeling/run_mlm_no_trainer.py @@ -52,7 +52,7 @@ SchedulerType, get_scheduler, ) -from transformers.utils import get_full_repo_name +from transformers.utils import get_full_repo_name, send_example_telemetry from transformers.utils.versions import require_version @@ -248,6 +248,10 @@ def parse_args(): def main(): args = parse_args() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_mlm_no_trainer", args) + # Initialize the accelerator. We will let the accelerator handle device placement for us in this example. # If we're using tracking, we also need to initialize it here and it will by default pick up all supported trackers # in the environment diff --git a/examples/pytorch/language-modeling/run_plm.py b/examples/pytorch/language-modeling/run_plm.py index 8808953d9758fe..78fc89f8305958 100755 --- a/examples/pytorch/language-modeling/run_plm.py +++ b/examples/pytorch/language-modeling/run_plm.py @@ -42,7 +42,7 @@ set_seed, ) from transformers.trainer_utils import get_last_checkpoint -from transformers.utils import check_min_version +from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version @@ -220,6 +220,10 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_plm", model_args, data_args) + # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", diff --git a/examples/pytorch/multiple-choice/run_swag.py b/examples/pytorch/multiple-choice/run_swag.py index 1421bb4be638e2..32e31a7ff35c32 100755 --- a/examples/pytorch/multiple-choice/run_swag.py +++ b/examples/pytorch/multiple-choice/run_swag.py @@ -43,7 +43,7 @@ ) from transformers.tokenization_utils_base import PreTrainedTokenizerBase from transformers.trainer_utils import get_last_checkpoint -from transformers.utils import PaddingStrategy, check_min_version +from transformers.utils import PaddingStrategy, check_min_version, send_example_telemetry # Will error if the minimal version of Transformers is not installed. Remove at your own risks. @@ -225,6 +225,10 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_swag", model_args, data_args) + # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", diff --git a/examples/pytorch/multiple-choice/run_swag_no_trainer.py b/examples/pytorch/multiple-choice/run_swag_no_trainer.py index 5ce2aade6cf316..6e948a315bf08b 100755 --- a/examples/pytorch/multiple-choice/run_swag_no_trainer.py +++ b/examples/pytorch/multiple-choice/run_swag_no_trainer.py @@ -51,7 +51,7 @@ default_data_collator, get_scheduler, ) -from transformers.utils import PaddingStrategy, get_full_repo_name +from transformers.utils import PaddingStrategy, get_full_repo_name, send_example_telemetry logger = get_logger(__name__) @@ -273,6 +273,10 @@ def __call__(self, features): def main(): args = parse_args() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_swag_no_trainer", args) + # Initialize the accelerator. We will let the accelerator handle device placement for us in this example. # If we're using tracking, we also need to initialize it here and it will by default pick up all supported trackers # in the environment diff --git a/examples/pytorch/question-answering/run_qa.py b/examples/pytorch/question-answering/run_qa.py index 9b85625ec88f84..50c9557141c017 100755 --- a/examples/pytorch/question-answering/run_qa.py +++ b/examples/pytorch/question-answering/run_qa.py @@ -42,7 +42,7 @@ set_seed, ) from transformers.trainer_utils import get_last_checkpoint -from transformers.utils import check_min_version +from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version from utils_qa import postprocess_qa_predictions @@ -226,6 +226,10 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_qa", model_args, data_args) + # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", diff --git a/examples/pytorch/question-answering/run_qa_beam_search.py b/examples/pytorch/question-answering/run_qa_beam_search.py index b7430405a63935..b73de15b452c9c 100755 --- a/examples/pytorch/question-answering/run_qa_beam_search.py +++ b/examples/pytorch/question-answering/run_qa_beam_search.py @@ -41,7 +41,7 @@ set_seed, ) from transformers.trainer_utils import get_last_checkpoint -from transformers.utils import check_min_version +from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version from utils_qa import postprocess_qa_predictions_with_beam_search @@ -225,6 +225,10 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_qa_beam_search", model_args, data_args) + # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", diff --git a/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py b/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py index 8da97157d58754..d1547a49231f6c 100644 --- a/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py +++ b/examples/pytorch/question-answering/run_qa_beam_search_no_trainer.py @@ -49,7 +49,7 @@ default_data_collator, get_scheduler, ) -from transformers.utils import check_min_version, get_full_repo_name +from transformers.utils import check_min_version, get_full_repo_name, send_example_telemetry from transformers.utils.versions import require_version from utils_qa import postprocess_qa_predictions_with_beam_search @@ -291,6 +291,10 @@ def parse_args(): def main(): args = parse_args() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_qa_beam_search_no_trainer", args) + # Initialize the accelerator. We will let the accelerator handle device placement for us in this example. # If we're using tracking, we also need to initialize it here and it will pick up all supported trackers in the environment accelerator = Accelerator(log_with="all", logging_dir=args.output_dir) if args.with_tracking else Accelerator() diff --git a/examples/pytorch/question-answering/run_qa_no_trainer.py b/examples/pytorch/question-answering/run_qa_no_trainer.py index 2701db085bdf54..8f6045386ae83e 100755 --- a/examples/pytorch/question-answering/run_qa_no_trainer.py +++ b/examples/pytorch/question-answering/run_qa_no_trainer.py @@ -50,7 +50,7 @@ default_data_collator, get_scheduler, ) -from transformers.utils import check_min_version, get_full_repo_name +from transformers.utils import check_min_version, get_full_repo_name, send_example_telemetry from transformers.utils.versions import require_version from utils_qa import postprocess_qa_predictions @@ -329,6 +329,10 @@ def parse_args(): def main(): args = parse_args() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_qa_no_trainer", args) + # Initialize the accelerator. We will let the accelerator handle device placement for us in this example. # If we're using tracking, we also need to initialize it here and it will by default pick up all supported trackers # in the environment diff --git a/examples/pytorch/question-answering/run_seq2seq_qa.py b/examples/pytorch/question-answering/run_seq2seq_qa.py index 66a52b93f74da8..bd806cc033e810 100644 --- a/examples/pytorch/question-answering/run_seq2seq_qa.py +++ b/examples/pytorch/question-answering/run_seq2seq_qa.py @@ -39,7 +39,7 @@ set_seed, ) from transformers.trainer_utils import EvalLoopOutput, EvalPrediction, get_last_checkpoint -from transformers.utils import check_min_version +from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version @@ -271,6 +271,10 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_seq2seq_qa", model_args, data_args) + # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", diff --git a/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py b/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py index a86589ccae24f0..20e9b93a48c03f 100644 --- a/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py +++ b/examples/pytorch/semantic-segmentation/run_semantic_segmentation.py @@ -42,7 +42,7 @@ default_data_collator, ) from transformers.trainer_utils import get_last_checkpoint -from transformers.utils import check_min_version +from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version @@ -266,6 +266,10 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_semantic_segmentation", model_args, data_args) + # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", diff --git a/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py b/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py index 9b5ea272052349..37df263f5be4d4 100644 --- a/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py +++ b/examples/pytorch/semantic-segmentation/run_semantic_segmentation_no_trainer.py @@ -44,7 +44,7 @@ default_data_collator, get_scheduler, ) -from transformers.utils import get_full_repo_name +from transformers.utils import get_full_repo_name, send_example_telemetry from transformers.utils.versions import require_version @@ -315,6 +315,10 @@ def parse_args(): def main(): args = parse_args() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_semantic_segmentation_no_trainer", args) + # Initialize the accelerator. We will let the accelerator handle device placement for us in this example. # If we're using tracking, we also need to initialize it here and it will by default pick up all supported trackers # in the environment diff --git a/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py b/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py index b5f06391ddd237..1f6125390da2db 100755 --- a/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py +++ b/examples/pytorch/speech-pretraining/run_wav2vec2_pretraining_no_trainer.py @@ -43,7 +43,7 @@ set_seed, ) from transformers.models.wav2vec2.modeling_wav2vec2 import _compute_mask_indices, _sample_negative_indices -from transformers.utils import get_full_repo_name +from transformers.utils import get_full_repo_name, send_example_telemetry logger = get_logger(__name__) @@ -363,6 +363,10 @@ def main(): # We now keep distinct sets of args, for a cleaner separation of concerns. args = parse_args() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_wav2vec2_pretraining_no_trainer", args) + # Initialize the accelerator. We will let the accelerator handle device placement for us in this example. accelerator = Accelerator() logger.info(accelerator.state, main_process_only=False) diff --git a/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py b/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py index 70ab4c10592dc5..ad2425d9fbb87a 100755 --- a/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py +++ b/examples/pytorch/speech-recognition/run_speech_recognition_ctc.py @@ -44,7 +44,7 @@ set_seed, ) from transformers.trainer_utils import get_last_checkpoint, is_main_process -from transformers.utils import check_min_version +from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version @@ -376,6 +376,10 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_speech_recognition_ctc", model_args, data_args) + # Detecting last checkpoint. last_checkpoint = None if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir: diff --git a/examples/pytorch/speech-recognition/run_speech_recognition_seq2seq.py b/examples/pytorch/speech-recognition/run_speech_recognition_seq2seq.py index 9b05b7a3f8d512..fce6b55be17856 100755 --- a/examples/pytorch/speech-recognition/run_speech_recognition_seq2seq.py +++ b/examples/pytorch/speech-recognition/run_speech_recognition_seq2seq.py @@ -42,7 +42,7 @@ set_seed, ) from transformers.trainer_utils import get_last_checkpoint, is_main_process -from transformers.utils import check_min_version +from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version @@ -239,6 +239,10 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_speech_recognition_seq2seq", model_args, data_args) + # 2. Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", diff --git a/examples/pytorch/summarization/run_summarization.py b/examples/pytorch/summarization/run_summarization.py index ec80bb6dd6674e..95be07e7185dad 100755 --- a/examples/pytorch/summarization/run_summarization.py +++ b/examples/pytorch/summarization/run_summarization.py @@ -46,7 +46,7 @@ set_seed, ) from transformers.trainer_utils import get_last_checkpoint -from transformers.utils import check_min_version, is_offline_mode +from transformers.utils import check_min_version, is_offline_mode, send_example_telemetry from transformers.utils.versions import require_version @@ -302,6 +302,10 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_summarization", model_args, data_args) + # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", diff --git a/examples/pytorch/summarization/run_summarization_no_trainer.py b/examples/pytorch/summarization/run_summarization_no_trainer.py index 172858581eeaa4..98c7f09bd4f01f 100644 --- a/examples/pytorch/summarization/run_summarization_no_trainer.py +++ b/examples/pytorch/summarization/run_summarization_no_trainer.py @@ -50,7 +50,7 @@ SchedulerType, get_scheduler, ) -from transformers.utils import get_full_repo_name, is_offline_mode +from transformers.utils import get_full_repo_name, is_offline_mode, send_example_telemetry from transformers.utils.versions import require_version @@ -319,6 +319,10 @@ def parse_args(): def main(): args = parse_args() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_summarization_no_trainer", args) + # Initialize the accelerator. We will let the accelerator handle device placement for us in this example. # If we're using tracking, we also need to initialize it here and it will by default pick up all supported trackers # in the environment diff --git a/examples/pytorch/text-classification/run_glue.py b/examples/pytorch/text-classification/run_glue.py index e69e65a9884277..22f5497399aa0d 100755 --- a/examples/pytorch/text-classification/run_glue.py +++ b/examples/pytorch/text-classification/run_glue.py @@ -42,7 +42,7 @@ set_seed, ) from transformers.trainer_utils import get_last_checkpoint -from transformers.utils import check_min_version +from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version @@ -215,6 +215,10 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_glue", model_args, data_args) + # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", diff --git a/examples/pytorch/text-classification/run_glue_no_trainer.py b/examples/pytorch/text-classification/run_glue_no_trainer.py index 0abc804c6b22d7..4e73a10e9a3302 100644 --- a/examples/pytorch/text-classification/run_glue_no_trainer.py +++ b/examples/pytorch/text-classification/run_glue_no_trainer.py @@ -42,7 +42,7 @@ default_data_collator, get_scheduler, ) -from transformers.utils import get_full_repo_name +from transformers.utils import get_full_repo_name, send_example_telemetry from transformers.utils.versions import require_version @@ -205,6 +205,9 @@ def parse_args(): def main(): args = parse_args() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_glue_no_trainer", args) # Initialize the accelerator. We will let the accelerator handle device placement for us in this example. # If we're using tracking, we also need to initialize it here and it will by default pick up all supported trackers diff --git a/examples/pytorch/text-classification/run_xnli.py b/examples/pytorch/text-classification/run_xnli.py index 6a518aff982437..d0a449c3521c3a 100755 --- a/examples/pytorch/text-classification/run_xnli.py +++ b/examples/pytorch/text-classification/run_xnli.py @@ -42,7 +42,7 @@ set_seed, ) from transformers.trainer_utils import get_last_checkpoint -from transformers.utils import check_min_version +from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version @@ -112,8 +112,6 @@ class DataTrainingArguments: ) }, ) - server_ip: Optional[str] = field(default=None, metadata={"help": "For distant debugging."}) - server_port: Optional[str] = field(default=None, metadata={"help": "For distant debugging."}) @dataclass @@ -176,14 +174,9 @@ def main(): parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments)) model_args, data_args, training_args = parser.parse_args_into_dataclasses() - # Setup distant debugging if needed - if data_args.server_ip and data_args.server_port: - # Distant debugging - see https://code.visualstudio.com/docs/python/debugging#_attach-to-a-local-script - import ptvsd - - print("Waiting for debugger attach") - ptvsd.enable_attach(address=(data_args.server_ip, data_args.server_port), redirect_output=True) - ptvsd.wait_for_attach() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_xnli", model_args) # Setup logging logging.basicConfig( diff --git a/examples/pytorch/token-classification/run_ner.py b/examples/pytorch/token-classification/run_ner.py index fbbfe3a38b5926..bffc4395fd21da 100755 --- a/examples/pytorch/token-classification/run_ner.py +++ b/examples/pytorch/token-classification/run_ner.py @@ -43,7 +43,7 @@ set_seed, ) from transformers.trainer_utils import get_last_checkpoint -from transformers.utils import check_min_version +from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version @@ -216,6 +216,10 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_ner", model_args, data_args) + # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", diff --git a/examples/pytorch/token-classification/run_ner_no_trainer.py b/examples/pytorch/token-classification/run_ner_no_trainer.py index 18ca225637304d..4910b30e04d608 100755 --- a/examples/pytorch/token-classification/run_ner_no_trainer.py +++ b/examples/pytorch/token-classification/run_ner_no_trainer.py @@ -49,7 +49,7 @@ default_data_collator, get_scheduler, ) -from transformers.utils import get_full_repo_name +from transformers.utils import get_full_repo_name, send_example_telemetry from transformers.utils.versions import require_version @@ -259,6 +259,10 @@ def parse_args(): def main(): args = parse_args() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_ner_no_trainer", args) + # Initialize the accelerator. We will let the accelerator handle device placement for us in this example. # If we're using tracking, we also need to initialize it here and it will by default pick up all supported trackers # in the environment diff --git a/examples/pytorch/translation/run_translation.py b/examples/pytorch/translation/run_translation.py index 758fde82df26f3..1cd55a6f4a2dcd 100755 --- a/examples/pytorch/translation/run_translation.py +++ b/examples/pytorch/translation/run_translation.py @@ -46,7 +46,7 @@ set_seed, ) from transformers.trainer_utils import get_last_checkpoint -from transformers.utils import check_min_version +from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version @@ -260,6 +260,10 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_translation", model_args, data_args) + # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", diff --git a/examples/pytorch/translation/run_translation_no_trainer.py b/examples/pytorch/translation/run_translation_no_trainer.py index e65a567df8e57f..acc49ffdfcd218 100644 --- a/examples/pytorch/translation/run_translation_no_trainer.py +++ b/examples/pytorch/translation/run_translation_no_trainer.py @@ -51,7 +51,7 @@ default_data_collator, get_scheduler, ) -from transformers.utils import get_full_repo_name +from transformers.utils import get_full_repo_name, send_example_telemetry from transformers.utils.versions import require_version @@ -305,6 +305,10 @@ def main(): # Parse the arguments args = parse_args() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_translation_no_trainer", args) + # Initialize the accelerator. We will let the accelerator handle device placement for us in this example. # If we're using tracking, we also need to initialize it here and it will by default pick up all supported trackers # in the environment diff --git a/examples/tensorflow/language-modeling/run_clm.py b/examples/tensorflow/language-modeling/run_clm.py index 5469f0c3f7318e..46c8d339d970c3 100755 --- a/examples/tensorflow/language-modeling/run_clm.py +++ b/examples/tensorflow/language-modeling/run_clm.py @@ -53,6 +53,7 @@ create_optimizer, set_seed, ) +from transformers.utils import send_example_telemetry from transformers.utils.versions import require_version @@ -232,6 +233,10 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_clm", model_args, data_args, framework="tensorflow") + # Sanity checks if data_args.dataset_name is None and data_args.train_file is None and data_args.validation_file is None: raise ValueError("Need either a dataset name or a training/validation file.") diff --git a/examples/tensorflow/language-modeling/run_mlm.py b/examples/tensorflow/language-modeling/run_mlm.py index 5c0b124d459661..46b27dab662519 100755 --- a/examples/tensorflow/language-modeling/run_mlm.py +++ b/examples/tensorflow/language-modeling/run_mlm.py @@ -55,6 +55,7 @@ create_optimizer, set_seed, ) +from transformers.utils import send_example_telemetry from transformers.utils.versions import require_version @@ -242,6 +243,10 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_mlm", model_args, data_args, framework="tensorflow") + # Sanity checks if data_args.dataset_name is None and data_args.train_file is None and data_args.validation_file is None: raise ValueError("Need either a dataset name or a training/validation file.") diff --git a/examples/tensorflow/multiple-choice/run_swag.py b/examples/tensorflow/multiple-choice/run_swag.py index c06ac5a19b7452..1c88f0db51b0df 100644 --- a/examples/tensorflow/multiple-choice/run_swag.py +++ b/examples/tensorflow/multiple-choice/run_swag.py @@ -44,7 +44,7 @@ set_seed, ) from transformers.tokenization_utils_base import PreTrainedTokenizerBase -from transformers.utils import PaddingStrategy, check_min_version +from transformers.utils import PaddingStrategy, check_min_version, send_example_telemetry # Will error if the minimal version of Transformers is not installed. Remove at your own risks. @@ -246,6 +246,10 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_swag", model_args, data_args, framework="tensorflow") + output_dir = Path(training_args.output_dir) output_dir.mkdir(parents=True, exist_ok=True) # endregion diff --git a/examples/tensorflow/question-answering/run_qa.py b/examples/tensorflow/question-answering/run_qa.py index eb91ec25389142..55465f345adfc9 100755 --- a/examples/tensorflow/question-answering/run_qa.py +++ b/examples/tensorflow/question-answering/run_qa.py @@ -41,7 +41,7 @@ TFTrainingArguments, set_seed, ) -from transformers.utils import CONFIG_NAME, TF2_WEIGHTS_NAME, check_min_version +from transformers.utils import CONFIG_NAME, TF2_WEIGHTS_NAME, check_min_version, send_example_telemetry from utils_qa import postprocess_qa_predictions @@ -242,6 +242,10 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_qa", model_args, data_args, framework="tensorflow") + output_dir = Path(training_args.output_dir) output_dir.mkdir(parents=True, exist_ok=True) # endregion diff --git a/examples/tensorflow/summarization/run_summarization.py b/examples/tensorflow/summarization/run_summarization.py index aa750837abc75f..e67dc9b2cc607f 100644 --- a/examples/tensorflow/summarization/run_summarization.py +++ b/examples/tensorflow/summarization/run_summarization.py @@ -44,7 +44,7 @@ set_seed, ) from transformers.trainer_utils import get_last_checkpoint -from transformers.utils import check_min_version, is_offline_mode +from transformers.utils import check_min_version, is_offline_mode, send_example_telemetry from transformers.utils.versions import require_version @@ -348,6 +348,10 @@ def main(): model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_summarization", model_args, data_args, framework="tensorflow") # endregion # region Logging diff --git a/examples/tensorflow/text-classification/run_glue.py b/examples/tensorflow/text-classification/run_glue.py index 9d11e787a53ee7..9268d755e03f8a 100644 --- a/examples/tensorflow/text-classification/run_glue.py +++ b/examples/tensorflow/text-classification/run_glue.py @@ -39,7 +39,7 @@ set_seed, ) from transformers.trainer_utils import get_last_checkpoint, is_main_process -from transformers.utils import check_min_version +from transformers.utils import check_min_version, send_example_telemetry # region Helper functions @@ -206,6 +206,10 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_glue", model_args, data_args, framework="tensorflow") + if not (training_args.do_train or training_args.do_eval or training_args.do_predict): exit("Must specify at least one of --do_train, --do_eval or --do_predict!") # endregion diff --git a/examples/tensorflow/text-classification/run_text_classification.py b/examples/tensorflow/text-classification/run_text_classification.py index b2948324f5a13a..210a30344dbc0e 100644 --- a/examples/tensorflow/text-classification/run_text_classification.py +++ b/examples/tensorflow/text-classification/run_text_classification.py @@ -37,7 +37,7 @@ TFTrainingArguments, set_seed, ) -from transformers.utils import CONFIG_NAME, TF2_WEIGHTS_NAME +from transformers.utils import CONFIG_NAME, TF2_WEIGHTS_NAME, send_example_telemetry os.environ["TF_CPP_MIN_LOG_LEVEL"] = "1" # Reduce the amount of console output from TF @@ -196,6 +196,11 @@ def main(): model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_text_classification", model_args, data_args, framework="tensorflow") + output_dir = Path(training_args.output_dir) output_dir.mkdir(parents=True, exist_ok=True) # endregion diff --git a/examples/tensorflow/token-classification/run_ner.py b/examples/tensorflow/token-classification/run_ner.py index d9feb413d08302..7eecf240cacd7a 100644 --- a/examples/tensorflow/token-classification/run_ner.py +++ b/examples/tensorflow/token-classification/run_ner.py @@ -41,6 +41,7 @@ create_optimizer, set_seed, ) +from transformers.utils import send_example_telemetry from transformers.utils.versions import require_version @@ -252,6 +253,10 @@ def main(): # region Argument Parsing parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TFTrainingArguments)) model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_ner", model_args, data_args, framework="tensorflow") # endregion # region Setup logging diff --git a/examples/tensorflow/translation/run_translation.py b/examples/tensorflow/translation/run_translation.py index 4ed7c621e5ee87..abce256ac9a76d 100644 --- a/examples/tensorflow/translation/run_translation.py +++ b/examples/tensorflow/translation/run_translation.py @@ -47,7 +47,7 @@ set_seed, ) from transformers.trainer_utils import get_last_checkpoint -from transformers.utils import check_min_version +from transformers.utils import check_min_version, send_example_telemetry from transformers.utils.versions import require_version @@ -318,6 +318,10 @@ def main(): model_args, data_args, training_args = parser.parse_json_file(json_file=os.path.abspath(sys.argv[1])) else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_translation", model_args, data_args, framework="tensorflow") # endregion # region Logging diff --git a/src/transformers/utils/__init__.py b/src/transformers/utils/__init__.py index 36e2fa43300e61..87bdfeebb888eb 100644 --- a/src/transformers/utils/__init__.py +++ b/src/transformers/utils/__init__.py @@ -74,6 +74,7 @@ is_local_clone, is_offline_mode, is_remote_url, + send_example_telemetry, url_to_filename, ) from .import_utils import ( diff --git a/src/transformers/utils/hub.py b/src/transformers/utils/hub.py index 7f2c8c56623c76..927801ab1c204a 100644 --- a/src/transformers/utils/hub.py +++ b/src/transformers/utils/hub.py @@ -109,6 +109,7 @@ def is_offline_mode(): HUGGINGFACE_CO_RESOLVE_ENDPOINT = os.environ.get("HUGGINGFACE_CO_RESOLVE_ENDPOINT", None) HUGGINGFACE_CO_RESOLVE_ENDPOINT = os.environ.get("HF_ENDPOINT", HUGGINGFACE_CO_RESOLVE_ENDPOINT) HUGGINGFACE_CO_PREFIX = HUGGINGFACE_CO_RESOLVE_ENDPOINT + "/{model_id}/resolve/{revision}/{filename}" +HUGGINGFACE_CO_EXAMPLES_TELEMETRY = HUGGINGFACE_CO_RESOLVE_ENDPOINT + "/telemetry/examples" def is_remote_url(url_or_filename): @@ -1028,3 +1029,41 @@ def get_full_repo_name(model_id: str, organization: Optional[str] = None, token: return f"{username}/{model_id}" else: return f"{organization}/{model_id}" + + +def send_example_telemetry(example_name, *example_args, framework="pytorch"): + """ + Sends telemetry that helps tracking the examples use. + + Args: + example_name (`str`): The name of the example. + *example_args (dataclasses or `argparse.ArgumentParser`): The arguments to the script. This function will only + try to extract the model and dataset name from those. Nothing else is tracked. + framework (`str`, *optional*, defaults to `"pytorch"`): The framework for the example. + """ + if is_offline_mode(): + return + + data = {"example": example_name, "framework": framework} + for args in example_args: + args_as_dict = {k: v for k, v in args.__dict__.items() if not k.startswith("_") and v is not None} + if "model_name_or_path" in args_as_dict: + model_name = args_as_dict["model_name_or_path"] + # Filter out local paths + if not os.path.isdir(model_name): + data["model_name"] = args_as_dict["model_name_or_path"] + if "dataset_name" in args_as_dict: + data["dataset_name"] = args_as_dict["dataset_name"] + elif "task_name" in args_as_dict: + # Extract script name from the example_name + script_name = example_name.replace("tf_", "").replace("flax_", "").replace("run_", "") + script_name = script_name.replace("_no_trainer", "") + data["dataset_name"] = f"{script_name}-{args_as_dict['task_name']}" + + headers = {"user-agent": http_user_agent(data)} + try: + r = requests.head(HUGGINGFACE_CO_EXAMPLES_TELEMETRY, headers=headers) + r.raise_for_status() + except Exception: + # We don't want to error in case of connection errors of any kind. + pass diff --git a/templates/adding_a_new_example_script/{{cookiecutter.directory_name}}/run_{{cookiecutter.example_shortcut}}.py b/templates/adding_a_new_example_script/{{cookiecutter.directory_name}}/run_{{cookiecutter.example_shortcut}}.py index 5a641f85f2ef22..f07029ec242caa 100755 --- a/templates/adding_a_new_example_script/{{cookiecutter.directory_name}}/run_{{cookiecutter.example_shortcut}}.py +++ b/templates/adding_a_new_example_script/{{cookiecutter.directory_name}}/run_{{cookiecutter.example_shortcut}}.py @@ -46,6 +46,7 @@ set_seed, ) from transformers.trainer_utils import get_last_checkpoint +from transformers.utils import send_example_telemetry logger = logging.getLogger(__name__) @@ -207,6 +208,10 @@ def main(): else: model_args, data_args, training_args = parser.parse_args_into_dataclasses() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_{{cookiecutter.example_shortcut}}", model_args, data_args) + # Detecting last checkpoint. last_checkpoint = None if os.path.isdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir: @@ -519,6 +524,7 @@ def _mp_fn(index): get_scheduler, set_seed, ) +from transformers.utils import send_example_telemetry logger = logging.getLogger(__name__) @@ -662,6 +668,10 @@ def parse_args(): def main(): args = parse_args() + # Sending telemetry. Tracking the example usage helps us better allocate resources to maintain them. The + # information sent is the one passed as arguments along with your Python/PyTorch versions. + send_example_telemetry("run_{{cookiecutter.example_shortcut}", args) + # Initialize the accelerator. We will let the accelerator handle device placement for us in this example. accelerator = Accelerator() # Make one log on every process with the configuration for debugging.