From 2e90c3df8f965ec616faa08e3fb2a1857a1e64b6 Mon Sep 17 00:00:00 2001
From: Sylvain Gugger <35901082+sgugger@users.noreply.github.com>
Date: Wed, 6 Jul 2022 12:10:06 -0400
Subject: [PATCH] Doc to dataset (#18037)

* Link to the Datasets doc

* Remove unwanted file
---
 docs/source/en/perf_train_gpu_one.mdx            |  2 +-
 docs/source/en/preprocessing.mdx                 |  2 +-
 docs/source/en/tasks/asr.mdx                     |  2 +-
 docs/source/en/tasks/audio_classification.mdx    |  2 +-
 docs/source/en/tasks/image_classification.mdx    |  2 +-
 docs/source/en/tasks/language_modeling.mdx       |  6 +++---
 docs/source/en/tasks/multiple_choice.mdx         |  4 ++--
 docs/source/en/tasks/question_answering.mdx      |  4 ++--
 docs/source/en/tasks/sequence_classification.mdx |  4 ++--
 docs/source/en/tasks/summarization.mdx           |  4 ++--
 docs/source/en/tasks/token_classification.mdx    |  4 ++--
 docs/source/en/tasks/translation.mdx             |  4 ++--
 docs/source/en/training.mdx                      |  2 +-
 src/transformers/modeling_tf_utils.py            |  4 ++--
 src/transformers/trainer.py                      | 16 ++++++++--------
 src/transformers/trainer_seq2seq.py              |  6 +++---
 16 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/docs/source/en/perf_train_gpu_one.mdx b/docs/source/en/perf_train_gpu_one.mdx
index 682ba663c8e9ce..5e825beb7d10d8 100644
--- a/docs/source/en/perf_train_gpu_one.mdx
+++ b/docs/source/en/perf_train_gpu_one.mdx
@@ -33,7 +33,7 @@ pip install transformers datasets accelerate nvidia-ml-py3
 
 The `nvidia-ml-py3` library allows us to monitor the memory usage of the models from within Python. You might be familiar with the `nvidia-smi` command in the terminal - this library allows to access the same information in Python directly.
 
-Then we create some dummy data. We create random token IDs between 100 and 30000 and binary labels for a classifier. In total we get 512 sequences each with length 512 and store them in a [`Dataset`](https://huggingface.co/docs/datasets/package_reference/main_classes.html?highlight=dataset#datasets.Dataset) with PyTorch format.
+Then we create some dummy data. We create random token IDs between 100 and 30000 and binary labels for a classifier. In total we get 512 sequences each with length 512 and store them in a [`~datasets.Dataset`] with PyTorch format.
 
 
 ```py
diff --git a/docs/source/en/preprocessing.mdx b/docs/source/en/preprocessing.mdx
index 6e2e9bd752174e..f9bdae3603af82 100644
--- a/docs/source/en/preprocessing.mdx
+++ b/docs/source/en/preprocessing.mdx
@@ -244,7 +244,7 @@ For example, the [MInDS-14](https://huggingface.co/datasets/PolyAI/minds14) data
  'sampling_rate': 8000}
 ```
 
-1. Use 🤗 Datasets' [`cast_column`](https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.cast_column) method to upsample the sampling rate to 16kHz:
+1. Use 🤗 Datasets' [`~datasets.Dataset.cast_column`] method to upsample the sampling rate to 16kHz:
 
 ```py
 >>> dataset = dataset.cast_column("audio", Audio(sampling_rate=16_000))
diff --git a/docs/source/en/tasks/asr.mdx b/docs/source/en/tasks/asr.mdx
index dac5015bf8158b..8ceea824f4eeb4 100644
--- a/docs/source/en/tasks/asr.mdx
+++ b/docs/source/en/tasks/asr.mdx
@@ -117,7 +117,7 @@ The preprocessing function needs to:
 ...     return batch
 ```
 
-Use 🤗 Datasets [`map`](https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.map) function to apply the preprocessing function over the entire dataset. You can speed up the map function by increasing the number of processes with `num_proc`. Remove the columns you don't need:
+Use 🤗 Datasets [`~datasets.Dataset.map`] function to apply the preprocessing function over the entire dataset. You can speed up the map function by increasing the number of processes with `num_proc`. Remove the columns you don't need:
 
 ```py
 >>> encoded_minds = minds.map(prepare_dataset, remove_columns=minds.column_names["train"], num_proc=4)
diff --git a/docs/source/en/tasks/audio_classification.mdx b/docs/source/en/tasks/audio_classification.mdx
index 6dee7a19dd5dc1..33a469ac5a79cf 100644
--- a/docs/source/en/tasks/audio_classification.mdx
+++ b/docs/source/en/tasks/audio_classification.mdx
@@ -129,7 +129,7 @@ The preprocessing function needs to:
 ...     return inputs
 ```
 
-Use 🤗 Datasets [`map`](https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.map) function to apply the preprocessing function over the entire dataset. You can speed up the `map` function by setting `batched=True` to process multiple elements of the dataset at once. Remove the columns you don't need, and rename `intent_class` to `label` because that is what the model expects:
+Use 🤗 Datasets [`~datasets.Dataset.map`] function to apply the preprocessing function over the entire dataset. You can speed up the `map` function by setting `batched=True` to process multiple elements of the dataset at once. Remove the columns you don't need, and rename `intent_class` to `label` because that is what the model expects:
 
 ```py
 >>> encoded_minds = minds.map(preprocess_function, remove_columns="audio", batched=True)
diff --git a/docs/source/en/tasks/image_classification.mdx b/docs/source/en/tasks/image_classification.mdx
index 0ca317e79d6e97..a6ca2ea5ddc17d 100644
--- a/docs/source/en/tasks/image_classification.mdx
+++ b/docs/source/en/tasks/image_classification.mdx
@@ -95,7 +95,7 @@ Create a preprocessing function that will apply the transforms and return the `p
 ...     return examples
 ```
 
-Use 🤗 Dataset's [`with_transform`](https://huggingface.co/docs/datasets/package_reference/main_classes.html?#datasets.Dataset.with_transform) method to apply the transforms over the entire dataset. The transforms are applied on-the-fly when you load an element of the dataset:
+Use 🤗 Dataset's [`~datasets.Dataset.with_transform`] method to apply the transforms over the entire dataset. The transforms are applied on-the-fly when you load an element of the dataset:
 
 ```py
 >>> food = food.with_transform(transforms)
diff --git a/docs/source/en/tasks/language_modeling.mdx b/docs/source/en/tasks/language_modeling.mdx
index 4aa368fe0bb53e..f410bd5a557291 100644
--- a/docs/source/en/tasks/language_modeling.mdx
+++ b/docs/source/en/tasks/language_modeling.mdx
@@ -118,7 +118,7 @@ Here is how you can create a preprocessing function to convert the list to a str
 ...     return tokenizer([" ".join(x) for x in examples["answers.text"]], truncation=True)
 ```
 
-Use 🤗 Datasets [`map`](https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.map) function to apply the preprocessing function over the entire dataset. You can speed up the `map` function by setting `batched=True` to process multiple elements of the dataset at once and increasing the number of processes with `num_proc`. Remove the columns you don't need:
+Use 🤗 Datasets [`~datasets.Dataset.map`] function to apply the preprocessing function over the entire dataset. You can speed up the `map` function by setting `batched=True` to process multiple elements of the dataset at once and increasing the number of processes with `num_proc`. Remove the columns you don't need:
 
 ```py
 >>> tokenized_eli5 = eli5.map(
@@ -245,7 +245,7 @@ At this point, only three steps remain:
 ```
 </pt>
 <tf>
-To fine-tune a model in TensorFlow, start by converting your datasets to the `tf.data.Dataset` format with [`to_tf_dataset`](https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.to_tf_dataset). Specify inputs and labels in `columns`, whether to shuffle the dataset order, batch size, and the data collator:
+To fine-tune a model in TensorFlow, start by converting your datasets to the `tf.data.Dataset` format with [`~datasets.Dataset.to_tf_dataset`]. Specify inputs and labels in `columns`, whether to shuffle the dataset order, batch size, and the data collator:
 
 ```py
 >>> tf_train_set = lm_dataset["train"].to_tf_dataset(
@@ -352,7 +352,7 @@ At this point, only three steps remain:
 ```
 </pt>
 <tf>
-To fine-tune a model in TensorFlow, start by converting your datasets to the `tf.data.Dataset` format with [`to_tf_dataset`](https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.to_tf_dataset). Specify inputs and labels in `columns`, whether to shuffle the dataset order, batch size, and the data collator:
+To fine-tune a model in TensorFlow, start by converting your datasets to the `tf.data.Dataset` format with [`~datasets.Dataset.to_tf_dataset`]. Specify inputs and labels in `columns`, whether to shuffle the dataset order, batch size, and the data collator:
 
 ```py
 >>> tf_train_set = lm_dataset["train"].to_tf_dataset(
diff --git a/docs/source/en/tasks/multiple_choice.mdx b/docs/source/en/tasks/multiple_choice.mdx
index 2ec7019a153252..b8eb528497036d 100644
--- a/docs/source/en/tasks/multiple_choice.mdx
+++ b/docs/source/en/tasks/multiple_choice.mdx
@@ -79,7 +79,7 @@ The preprocessing function needs to do:
 ...     return {k: [v[i : i + 4] for i in range(0, len(v), 4)] for k, v in tokenized_examples.items()}
 ```
 
-Use 🤗 Datasets [`map`](https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.map) function to apply the preprocessing function over the entire dataset. You can speed up the `map` function by setting `batched=True` to process multiple elements of the dataset at once:
+Use 🤗 Datasets [`~datasets.Dataset.map`] function to apply the preprocessing function over the entire dataset. You can speed up the `map` function by setting `batched=True` to process multiple elements of the dataset at once:
 
 ```py
 tokenized_swag = swag.map(preprocess_function, batched=True)
@@ -224,7 +224,7 @@ At this point, only three steps remain:
 ```
 </pt>
 <tf>
-To fine-tune a model in TensorFlow, start by converting your datasets to the `tf.data.Dataset` format with [`to_tf_dataset`](https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.to_tf_dataset). Specify inputs in `columns`, targets in `label_cols`, whether to shuffle the dataset order, batch size, and the data collator:
+To fine-tune a model in TensorFlow, start by converting your datasets to the `tf.data.Dataset` format with [`~datasets.Dataset.to_tf_dataset`]. Specify inputs in `columns`, targets in `label_cols`, whether to shuffle the dataset order, batch size, and the data collator:
 
 ```py
 >>> data_collator = DataCollatorForMultipleChoice(tokenizer=tokenizer)
diff --git a/docs/source/en/tasks/question_answering.mdx b/docs/source/en/tasks/question_answering.mdx
index d5df758db3acb8..2cb54760e8796e 100644
--- a/docs/source/en/tasks/question_answering.mdx
+++ b/docs/source/en/tasks/question_answering.mdx
@@ -126,7 +126,7 @@ Here is how you can create a function to truncate and map the start and end toke
 ...     return inputs
 ```
 
-Use 🤗 Datasets [`map`](https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.map) function to apply the preprocessing function over the entire dataset. You can speed up the `map` function by setting `batched=True` to process multiple elements of the dataset at once. Remove the columns you don't need:
+Use 🤗 Datasets [`~datasets.Dataset.map`] function to apply the preprocessing function over the entire dataset. You can speed up the `map` function by setting `batched=True` to process multiple elements of the dataset at once. Remove the columns you don't need:
 
 ```py
 >>> tokenized_squad = squad.map(preprocess_function, batched=True, remove_columns=squad["train"].column_names)
@@ -199,7 +199,7 @@ At this point, only three steps remain:
 ```
 </pt>
 <tf>
-To fine-tune a model in TensorFlow, start by converting your datasets to the `tf.data.Dataset` format with [`to_tf_dataset`](https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.to_tf_dataset). Specify inputs and the start and end positions of an answer in `columns`, whether to shuffle the dataset order, batch size, and the data collator:
+To fine-tune a model in TensorFlow, start by converting your datasets to the `tf.data.Dataset` format with [`~datasets.Dataset.to_tf_dataset`]. Specify inputs and the start and end positions of an answer in `columns`, whether to shuffle the dataset order, batch size, and the data collator:
 
 ```py
 >>> tf_train_set = tokenized_squad["train"].to_tf_dataset(
diff --git a/docs/source/en/tasks/sequence_classification.mdx b/docs/source/en/tasks/sequence_classification.mdx
index 97c98bb88821ff..44729dc28f4e85 100644
--- a/docs/source/en/tasks/sequence_classification.mdx
+++ b/docs/source/en/tasks/sequence_classification.mdx
@@ -66,7 +66,7 @@ Create a preprocessing function to tokenize `text` and truncate sequences to be
 ...     return tokenizer(examples["text"], truncation=True)
 ```
 
-Use 🤗 Datasets [`map`](https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.map) function to apply the preprocessing function over the entire dataset. You can speed up the `map` function by setting `batched=True` to process multiple elements of the dataset at once:
+Use 🤗 Datasets [`~datasets.Dataset.map`] function to apply the preprocessing function over the entire dataset. You can speed up the `map` function by setting `batched=True` to process multiple elements of the dataset at once:
 
 ```py
 tokenized_imdb = imdb.map(preprocess_function, batched=True)
@@ -144,7 +144,7 @@ At this point, only three steps remain:
 </Tip>
 </pt>
 <tf>
-To fine-tune a model in TensorFlow, start by converting your datasets to the `tf.data.Dataset` format with [`to_tf_dataset`](https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.to_tf_dataset). Specify inputs and labels in `columns`, whether to shuffle the dataset order, batch size, and the data collator:
+To fine-tune a model in TensorFlow, start by converting your datasets to the `tf.data.Dataset` format with [`~datasets.Dataset.to_tf_dataset`]. Specify inputs and labels in `columns`, whether to shuffle the dataset order, batch size, and the data collator:
 
 ```py
 >>> tf_train_set = tokenized_imdb["train"].to_tf_dataset(
diff --git a/docs/source/en/tasks/summarization.mdx b/docs/source/en/tasks/summarization.mdx
index c750e4732829ae..1c73c7396e6403 100644
--- a/docs/source/en/tasks/summarization.mdx
+++ b/docs/source/en/tasks/summarization.mdx
@@ -85,7 +85,7 @@ The preprocessing function needs to:
 ...     return model_inputs
 ```
 
-Use 🤗 Datasets [`map`](https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.map) function to apply the preprocessing function over the entire dataset. You can speed up the `map` function by setting `batched=True` to process multiple elements of the dataset at once:
+Use 🤗 Datasets [`~datasets.Dataset.map`] function to apply the preprocessing function over the entire dataset. You can speed up the `map` function by setting `batched=True` to process multiple elements of the dataset at once:
 
 ```py
 >>> tokenized_billsum = billsum.map(preprocess_function, batched=True)
@@ -160,7 +160,7 @@ At this point, only three steps remain:
 ```
 </pt>
 <tf>
-To fine-tune a model in TensorFlow, start by converting your datasets to the `tf.data.Dataset` format with [`to_tf_dataset`](https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.to_tf_dataset). Specify inputs and labels in `columns`, whether to shuffle the dataset order, batch size, and the data collator:
+To fine-tune a model in TensorFlow, start by converting your datasets to the `tf.data.Dataset` format with [`~datasets.Dataset.to_tf_dataset`]. Specify inputs and labels in `columns`, whether to shuffle the dataset order, batch size, and the data collator:
 
 ```py
 >>> tf_train_set = tokenized_billsum["train"].to_tf_dataset(
diff --git a/docs/source/en/tasks/token_classification.mdx b/docs/source/en/tasks/token_classification.mdx
index 03cd304898b543..aa5739534f9fb8 100644
--- a/docs/source/en/tasks/token_classification.mdx
+++ b/docs/source/en/tasks/token_classification.mdx
@@ -126,7 +126,7 @@ Here is how you can create a function to realign the tokens and labels, and trun
 ...     return tokenized_inputs
 ```
 
-Use 🤗 Datasets [`map`](https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.map) function to tokenize and align the labels over the entire dataset. You can speed up the `map` function by setting `batched=True` to process multiple elements of the dataset at once:
+Use 🤗 Datasets [`~datasets.Dataset.map`] function to tokenize and align the labels over the entire dataset. You can speed up the `map` function by setting `batched=True` to process multiple elements of the dataset at once:
 
 ```py
 >>> tokenized_wnut = wnut.map(tokenize_and_align_labels, batched=True)
@@ -199,7 +199,7 @@ At this point, only three steps remain:
 ```
 </pt>
 <tf>
-To fine-tune a model in TensorFlow, start by converting your datasets to the `tf.data.Dataset` format with [`to_tf_dataset`](https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.to_tf_dataset). Specify inputs and labels in `columns`, whether to shuffle the dataset order, batch size, and the data collator:
+To fine-tune a model in TensorFlow, start by converting your datasets to the `tf.data.Dataset` format with [`~datasets.Dataset.to_tf_dataset`]. Specify inputs and labels in `columns`, whether to shuffle the dataset order, batch size, and the data collator:
 
 ```py
 >>> tf_train_set = tokenized_wnut["train"].to_tf_dataset(
diff --git a/docs/source/en/tasks/translation.mdx b/docs/source/en/tasks/translation.mdx
index b3ecec6e1ce7e3..0cb28736f0fcfb 100644
--- a/docs/source/en/tasks/translation.mdx
+++ b/docs/source/en/tasks/translation.mdx
@@ -87,7 +87,7 @@ The preprocessing function needs to:
 ...     return model_inputs
 ```
 
-Use 🤗 Datasets [`map`](https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.map) function to apply the preprocessing function over the entire dataset. You can speed up the `map` function by setting `batched=True` to process multiple elements of the dataset at once:
+Use 🤗 Datasets [`~datasets.Dataset.map`] function to apply the preprocessing function over the entire dataset. You can speed up the `map` function by setting `batched=True` to process multiple elements of the dataset at once:
 
 ```py
 >>> tokenized_books = books.map(preprocess_function, batched=True)
@@ -162,7 +162,7 @@ At this point, only three steps remain:
 ```
 </pt>
 <tf>
-To fine-tune a model in TensorFlow, start by converting your datasets to the `tf.data.Dataset` format with [`to_tf_dataset`](https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.to_tf_dataset). Specify inputs and labels in `columns`, whether to shuffle the dataset order, batch size, and the data collator:
+To fine-tune a model in TensorFlow, start by converting your datasets to the `tf.data.Dataset` format with [`~datasets.Dataset.to_tf_dataset`]. Specify inputs and labels in `columns`, whether to shuffle the dataset order, batch size, and the data collator:
 
 ```py
 >>> tf_train_set = tokenized_books["train"].to_tf_dataset(
diff --git a/docs/source/en/training.mdx b/docs/source/en/training.mdx
index 0d3648a9255145..6b0ec8a4081050 100644
--- a/docs/source/en/training.mdx
+++ b/docs/source/en/training.mdx
@@ -169,7 +169,7 @@ The [`DefaultDataCollator`] assembles tensors into a batch for the model to trai
 
 </Tip>
 
-Next, convert the tokenized datasets to TensorFlow datasets with the [`to_tf_dataset`](https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.to_tf_dataset) method. Specify your inputs in `columns`, and your label in `label_cols`:
+Next, convert the tokenized datasets to TensorFlow datasets with the [`~datasets.Dataset.to_tf_dataset`] method. Specify your inputs in `columns`, and your label in `label_cols`:
 
 ```py
 >>> tf_train_dataset = small_train_dataset.to_tf_dataset(
diff --git a/src/transformers/modeling_tf_utils.py b/src/transformers/modeling_tf_utils.py
index c79c7d3d5453e5..16956657ca1093 100644
--- a/src/transformers/modeling_tf_utils.py
+++ b/src/transformers/modeling_tf_utils.py
@@ -1189,7 +1189,7 @@ def prepare_tf_dataset(
         prefetch: bool = True,
     ):
         """
-        Wraps a HuggingFace `datasets.Dataset` as a `tf.data.Dataset` with collation and batching. This method is
+        Wraps a HuggingFace [`~datasets.Dataset`] as a `tf.data.Dataset` with collation and batching. This method is
         designed to create a "ready-to-use" dataset that can be passed directly to Keras methods like `fit()` without
         further modification. The method will drop columns from the dataset if they don't match input names for the
         model. If you want to specify the column names to return rather than using the names that match this model, we
@@ -1197,7 +1197,7 @@ def prepare_tf_dataset(
 
         Args:
             dataset (`Any`):
-                A `datasets.Dataset` to be wrapped as a `tf.data.Dataset`.
+                A [~`datasets.Dataset`] to be wrapped as a `tf.data.Dataset`.
             batch_size (`int`, defaults to 8):
                 The size of batches to return.
             shuffle (`bool`, defaults to `True`):
diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py
index 35342c0415b6c6..5f698b86ddddad 100755
--- a/src/transformers/trainer.py
+++ b/src/transformers/trainer.py
@@ -232,7 +232,7 @@ class Trainer:
             default to [`default_data_collator`] if no `tokenizer` is provided, an instance of
             [`DataCollatorWithPadding`] otherwise.
         train_dataset (`torch.utils.data.Dataset` or `torch.utils.data.IterableDataset`, *optional*):
-            The dataset to use for training. If it is an `datasets.Dataset`, columns not accepted by the
+            The dataset to use for training. If it is a [`~datasets.Dataset`], columns not accepted by the
             `model.forward()` method are automatically removed.
 
             Note that if it's a `torch.utils.data.IterableDataset` with some randomization and you are training in a
@@ -241,7 +241,7 @@ class Trainer:
             manually set the seed of this `generator` at each epoch) or have a `set_epoch()` method that internally
             sets the seed of the RNGs used.
         eval_dataset (`torch.utils.data.Dataset`, *optional*):
-             The dataset to use for evaluation. If it is an `datasets.Dataset`, columns not accepted by the
+             The dataset to use for evaluation. If it is a [`~datasets.Dataset`], columns not accepted by the
              `model.forward()` method are automatically removed.
         tokenizer ([`PreTrainedTokenizerBase`], *optional*):
             The tokenizer used to preprocess the data. If provided, will be used to automatically pad the inputs the
@@ -854,8 +854,8 @@ def get_eval_dataloader(self, eval_dataset: Optional[Dataset] = None) -> DataLoa
 
         Args:
             eval_dataset (`torch.utils.data.Dataset`, *optional*):
-                If provided, will override `self.eval_dataset`. If it is an `datasets.Dataset`, columns not accepted by
-                the `model.forward()` method are automatically removed. It must implement `__len__`.
+                If provided, will override `self.eval_dataset`. If it is a [`~datasets.Dataset`], columns not accepted
+                by the `model.forward()` method are automatically removed. It must implement `__len__`.
         """
         if eval_dataset is None and self.eval_dataset is None:
             raise ValueError("Trainer: evaluation requires an eval_dataset.")
@@ -904,8 +904,8 @@ def get_test_dataloader(self, test_dataset: Dataset) -> DataLoader:
 
         Args:
             test_dataset (`torch.utils.data.Dataset`, *optional*):
-                The test dataset to use. If it is an `datasets.Dataset`, columns not accepted by the `model.forward()`
-                method are automatically removed. It must implement `__len__`.
+                The test dataset to use. If it is a [`~datasets.Dataset`], columns not accepted by the
+                `model.forward()` method are automatically removed. It must implement `__len__`.
         """
         data_collator = self.data_collator
 
@@ -2605,8 +2605,8 @@ def evaluate(
 
         Args:
             eval_dataset (`Dataset`, *optional*):
-                Pass a dataset if you wish to override `self.eval_dataset`. If it is an `datasets.Dataset`, columns not
-                accepted by the `model.forward()` method are automatically removed. It must implement the `__len__`
+                Pass a dataset if you wish to override `self.eval_dataset`. If it is a [`~datasets.Dataset`], columns
+                not accepted by the `model.forward()` method are automatically removed. It must implement the `__len__`
                 method.
             ignore_keys (`Lst[str]`, *optional*):
                 A list of keys in the output of your model (if it is a dictionary) that should be ignored when
diff --git a/src/transformers/trainer_seq2seq.py b/src/transformers/trainer_seq2seq.py
index 6dcb387ce7a4f0..02ce3d393b9e75 100644
--- a/src/transformers/trainer_seq2seq.py
+++ b/src/transformers/trainer_seq2seq.py
@@ -45,8 +45,8 @@ def evaluate(
 
         Args:
             eval_dataset (`Dataset`, *optional*):
-                Pass a dataset if you wish to override `self.eval_dataset`. If it is an `datasets.Dataset`, columns not
-                accepted by the `model.forward()` method are automatically removed. It must implement the `__len__`
+                Pass a dataset if you wish to override `self.eval_dataset`. If it is an [`~datasets.Dataset`], columns
+                not accepted by the `model.forward()` method are automatically removed. It must implement the `__len__`
                 method.
             ignore_keys (`List[str]`, *optional*):
                 A list of keys in the output of your model (if it is a dictionary) that should be ignored when
@@ -93,7 +93,7 @@ def predict(
 
         Args:
             test_dataset (`Dataset`):
-                Dataset to run the predictions on. If it is an `datasets.Dataset`, columns not accepted by the
+                Dataset to run the predictions on. If it is a [`~datasets.Dataset`], columns not accepted by the
                 `model.forward()` method are automatically removed. Has to implement the method `__len__`
             ignore_keys (`List[str]`, *optional*):
                 A list of keys in the output of your model (if it is a dictionary) that should be ignored when