From 36a570f4167a39f7ba03a86d3a4d3bdb4d1f4bab Mon Sep 17 00:00:00 2001
From: Steven Liu <steven.liu@huggingface.co>
Date: Fri, 22 Dec 2023 15:55:37 -0600
Subject: [PATCH 1/4] soft prompt guides

---
 docs/source/_toctree.yml                      |  14 +-
 docs/source/task_guides/clm-prompt-tuning.md  | 293 -----------------
 .../task_guides/prompt_based_methods.md       | 303 ++++++++++++++++++
 .../task_guides/ptuning-seq-classification.md | 236 --------------
 .../task_guides/seq2seq-prefix-tuning.md      | 256 ---------------
 5 files changed, 308 insertions(+), 794 deletions(-)
 delete mode 100644 docs/source/task_guides/clm-prompt-tuning.md
 create mode 100644 docs/source/task_guides/prompt_based_methods.md
 delete mode 100644 docs/source/task_guides/ptuning-seq-classification.md
 delete mode 100644 docs/source/task_guides/seq2seq-prefix-tuning.md
diff --git a/docs/source/_toctree.yml b/docs/source/_toctree.yml
index 962356fef7..9bb2492692 100644
--- a/docs/source/_toctree.yml
+++ b/docs/source/_toctree.yml
@@ -10,18 +10,14 @@
 - title: Tutorial
   sections:
   - local: tutorial/peft_model_config
-    title: PEFT configurations and models
+    title: Configurations and models
   - local: tutorial/peft_integrations
-    title: PEFT integrations
+    title: Integrations
 
-- title: Task guides
+- title: PEFT method guides
   sections:
-  - local: task_guides/seq2seq-prefix-tuning
-    title: Prefix tuning for conditional generation
-  - local: task_guides/clm-prompt-tuning
-    title: Prompt tuning for causal language modeling
-  - local: task_guides/ptuning-seq-classification
-    title: P-tuning for sequence classification
+  - local: task_guides/prompt_based_methods
+    title: Prompt-based methods
   - title: LoRA
     sections:
     - local: task_guides/image_classification_lora
diff --git a/docs/source/task_guides/clm-prompt-tuning.md b/docs/source/task_guides/clm-prompt-tuning.md
deleted file mode 100644
index 835893c139..0000000000
--- a/docs/source/task_guides/clm-prompt-tuning.md
+++ /dev/null
@@ -1,293 +0,0 @@
-<!--Copyright 2023 The HuggingFace Team. All rights reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
-the License. You may obtain a copy of the License at
-
-http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
-an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
-specific language governing permissions and limitations under the License.
-
-⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
-
--->
-
-# Prompt tuning for causal language modeling
-
-[[open-in-colab]]
-
-Prompting helps guide language model behavior by adding some input text specific to a task. Prompt tuning is an additive method for only training and updating the newly added prompt tokens to a pretrained model. This way, you can use one pretrained model whose weights are frozen, and train and update a smaller set of prompt parameters for each downstream task instead of fully finetuning a separate model. As models grow larger and larger, prompt tuning can be more efficient, and results are even better as model parameters scale.
-
-<Tip>
-
-💡 Read [The Power of Scale for Parameter-Efficient Prompt Tuning](https://arxiv.org/abs/2104.08691) to learn more about prompt tuning.
-
-</Tip>
-
-This guide will show you how to apply prompt tuning to train a [`bloomz-560m`](https://huggingface.co/bigscience/bloomz-560m) model on the `twitter_complaints` subset of the [RAFT](https://huggingface.co/datasets/ought/raft) dataset.
-
-Before you begin, make sure you have all the necessary libraries installed:
-
-```bash
-!pip install -q peft transformers datasets
-```
-
-## Setup
-
-Start by defining the model and tokenizer, the dataset and the dataset columns to train on, some training hyperparameters, and the [`PromptTuningConfig`]. The [`PromptTuningConfig`] contains information about the task type, the text to initialize the prompt embedding, the number of virtual tokens, and the tokenizer to use:
-
-```py
-from transformers import AutoModelForCausalLM, AutoTokenizer, default_data_collator, get_linear_schedule_with_warmup
-from peft import get_peft_config, get_peft_model, PromptTuningInit, PromptTuningConfig, TaskType, PeftType
-import torch
-from datasets import load_dataset
-import os
-from torch.utils.data import DataLoader
-from tqdm import tqdm
-
-device = "cuda"
-model_name_or_path = "bigscience/bloomz-560m"
-tokenizer_name_or_path = "bigscience/bloomz-560m"
-peft_config = PromptTuningConfig(
-    task_type=TaskType.CAUSAL_LM,
-    prompt_tuning_init=PromptTuningInit.TEXT,
-    num_virtual_tokens=8,
-    prompt_tuning_init_text="Classify if the tweet is a complaint or not:",
-    tokenizer_name_or_path=model_name_or_path,
-)
-
-dataset_name = "twitter_complaints"
-checkpoint_name = f"{dataset_name}_{model_name_or_path}_{peft_config.peft_type}_{peft_config.task_type}_v1.pt".replace(
-    "/", "_"
-)
-text_column = "Tweet text"
-label_column = "text_label"
-max_length = 64
-lr = 3e-2
-num_epochs = 50
-batch_size = 8
-```
-
-## Load dataset
-
-For this guide, you'll load the `twitter_complaints` subset of the [RAFT](https://huggingface.co/datasets/ought/raft) dataset. This subset contains tweets that are labeled either `complaint` or `no complaint`:
-
-```py
-dataset = load_dataset("ought/raft", dataset_name)
-dataset["train"][0]
-{"Tweet text": "@HMRCcustomers No this is my first job", "ID": 0, "Label": 2}
-```
-
-To make the `Label` column more readable, replace the `Label` value with the corresponding label text and store them in a `text_label` column. You can use the [`~datasets.Dataset.map`] function to apply this change over the entire dataset in one step:
-
-```py
-classes = [k.replace("_", " ") for k in dataset["train"].features["Label"].names]
-dataset = dataset.map(
-    lambda x: {"text_label": [classes[label] for label in x["Label"]]},
-    batched=True,
-    num_proc=1,
-)
-dataset["train"][0]
-{"Tweet text": "@HMRCcustomers No this is my first job", "ID": 0, "Label": 2, "text_label": "no complaint"}
-```
-
-## Preprocess dataset
-
-Next, you'll setup a tokenizer; configure the appropriate padding token to use for padding sequences, and determine the maximum length of the tokenized labels:
-
-```py
-tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
-if tokenizer.pad_token_id is None:
-    tokenizer.pad_token_id = tokenizer.eos_token_id
-target_max_length = max([len(tokenizer(class_label)["input_ids"]) for class_label in classes])
-print(target_max_length)
-3
-```
-
-Create a `preprocess_function` to:
-
-1. Tokenize the input text and labels.
-2. For each example in a batch, pad the labels with the tokenizers `pad_token_id`.
-3. Concatenate the input text and labels into the `model_inputs`.
-4. Create a separate attention mask for `labels` and `model_inputs`.
-5. Loop through each example in the batch again to pad the input ids, labels, and attention mask to the `max_length` and convert them to PyTorch tensors.
-
-```py
-def preprocess_function(examples):
-    batch_size = len(examples[text_column])
-    inputs = [f"{text_column} : {x} Label : " for x in examples[text_column]]
-    targets = [str(x) for x in examples[label_column]]
-    model_inputs = tokenizer(inputs)
-    labels = tokenizer(targets)
-    for i in range(batch_size):
-        sample_input_ids = model_inputs["input_ids"][i]
-        label_input_ids = labels["input_ids"][i] + [tokenizer.pad_token_id]
-        # print(i, sample_input_ids, label_input_ids)
-        model_inputs["input_ids"][i] = sample_input_ids + label_input_ids
-        labels["input_ids"][i] = [-100] * len(sample_input_ids) + label_input_ids
-        model_inputs["attention_mask"][i] = [1] * len(model_inputs["input_ids"][i])
-    # print(model_inputs)
-    for i in range(batch_size):
-        sample_input_ids = model_inputs["input_ids"][i]
-        label_input_ids = labels["input_ids"][i]
-        model_inputs["input_ids"][i] = [tokenizer.pad_token_id] * (
-            max_length - len(sample_input_ids)
-        ) + sample_input_ids
-        model_inputs["attention_mask"][i] = [0] * (max_length - len(sample_input_ids)) + model_inputs[
-            "attention_mask"
-        ][i]
-        labels["input_ids"][i] = [-100] * (max_length - len(sample_input_ids)) + label_input_ids
-        model_inputs["input_ids"][i] = torch.tensor(model_inputs["input_ids"][i][:max_length])
-        model_inputs["attention_mask"][i] = torch.tensor(model_inputs["attention_mask"][i][:max_length])
-        labels["input_ids"][i] = torch.tensor(labels["input_ids"][i][:max_length])
-    model_inputs["labels"] = labels["input_ids"]
-    return model_inputs
-```
-
-Use the [`~datasets.Dataset.map`] function to apply the `preprocess_function` to the entire dataset. You can remove the unprocessed columns since the model won't need them:
-
-```py
-processed_datasets = dataset.map(
-    preprocess_function,
-    batched=True,
-    num_proc=1,
-    remove_columns=dataset["train"].column_names,
-    load_from_cache_file=False,
-    desc="Running tokenizer on dataset",
-)
-```
-
-Create a [`DataLoader`](https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader) from the `train` and `eval` datasets. Set `pin_memory=True` to speed up the data transfer to the GPU during training if the samples in your dataset are on a CPU.
-
-```py
-train_dataset = processed_datasets["train"]
-eval_dataset = processed_datasets["test"]
-
-
-train_dataloader = DataLoader(
-    train_dataset, shuffle=True, collate_fn=default_data_collator, batch_size=batch_size, pin_memory=True
-)
-eval_dataloader = DataLoader(eval_dataset, collate_fn=default_data_collator, batch_size=batch_size, pin_memory=True)
-```
-
-## Train
-
-You're almost ready to setup your model and start training!
-
-Initialize a base model from [`~transformers.AutoModelForCausalLM`], and pass it and `peft_config` to the [`get_peft_model`] function to create a [`PeftModel`]. You can print the new [`PeftModel`]'s trainable parameters to see how much more efficient it is than training the full parameters of the original model!
-
-```py
-model = AutoModelForCausalLM.from_pretrained(model_name_or_path)
-model = get_peft_model(model, peft_config)
-print(model.print_trainable_parameters())
-"trainable params: 8192 || all params: 559222784 || trainable%: 0.0014648902430985358"
-```
-
-Setup an optimizer and learning rate scheduler:
-
-```py
-optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
-lr_scheduler = get_linear_schedule_with_warmup(
-    optimizer=optimizer,
-    num_warmup_steps=0,
-    num_training_steps=(len(train_dataloader) * num_epochs),
-)
-```
-
-Move the model to the GPU, then write a training loop to start training!
-
-```py
-model = model.to(device)
-
-for epoch in range(num_epochs):
-    model.train()
-    total_loss = 0
-    for step, batch in enumerate(tqdm(train_dataloader)):
-        batch = {k: v.to(device) for k, v in batch.items()}
-        outputs = model(**batch)
-        loss = outputs.loss
-        total_loss += loss.detach().float()
-        loss.backward()
-        optimizer.step()
-        lr_scheduler.step()
-        optimizer.zero_grad()
-
-    model.eval()
-    eval_loss = 0
-    eval_preds = []
-    for step, batch in enumerate(tqdm(eval_dataloader)):
-        batch = {k: v.to(device) for k, v in batch.items()}
-        with torch.no_grad():
-            outputs = model(**batch)
-        loss = outputs.loss
-        eval_loss += loss.detach().float()
-        eval_preds.extend(
-            tokenizer.batch_decode(torch.argmax(outputs.logits, -1).detach().cpu().numpy(), skip_special_tokens=True)
-        )
-
-    eval_epoch_loss = eval_loss / len(eval_dataloader)
-    eval_ppl = torch.exp(eval_epoch_loss)
-    train_epoch_loss = total_loss / len(train_dataloader)
-    train_ppl = torch.exp(train_epoch_loss)
-    print(f"{epoch=}: {train_ppl=} {train_epoch_loss=} {eval_ppl=} {eval_epoch_loss=}")
-```
-
-## Share model
-
-You can store and share your model on the Hub if you'd like. Log in to your Hugging Face account and enter your token when prompted:
-
-```py
-from huggingface_hub import notebook_login
-
-notebook_login()
-```
-
-Use the [`~transformers.PreTrainedModel.push_to_hub`] function to upload your model to a model repository on the Hub:
-
-```py
-peft_model_id = "your-name/bloomz-560m_PROMPT_TUNING_CAUSAL_LM"
-model.push_to_hub("your-name/bloomz-560m_PROMPT_TUNING_CAUSAL_LM", use_auth_token=True)
-```
-
-Once the model is uploaded, you'll see the model file size is only 33.5kB! 🤏
-
-## Inference
-
-Let's try the model on a sample input for inference. If you look at the repository you uploaded the model to, you'll see a `adapter_config.json` file. Load this file into [`PeftConfig`] to specify the `peft_type` and `task_type`. Then you can load the prompt tuned model weights, and the configuration into [`~PeftModel.from_pretrained`] to create the [`PeftModel`]:
-
-```py
-from peft import PeftModel, PeftConfig
-
-peft_model_id = "stevhliu/bloomz-560m_PROMPT_TUNING_CAUSAL_LM"
-
-config = PeftConfig.from_pretrained(peft_model_id)
-model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path)
-model = PeftModel.from_pretrained(model, peft_model_id)
-```
-
-Grab a tweet and tokenize it:
-
-```py
-inputs = tokenizer(
-    f'{text_column} : {"@nationalgridus I have no water and the bill is current and paid. Can you do something about this?"} Label : ',
-    return_tensors="pt",
-)
-```
-
-Put the model on a GPU and *generate* the predicted label:
-
-```py
-model.to(device)
-
-with torch.no_grad():
-    inputs = {k: v.to(device) for k, v in inputs.items()}
-    outputs = model.generate(
-        input_ids=inputs["input_ids"], attention_mask=inputs["attention_mask"], max_new_tokens=10, eos_token_id=3
-    )
-    print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True))
-[
-    "Tweet text : @nationalgridus I have no water and the bill is current and paid. Can you do something about this? Label : complaint"
-]
-```
diff --git a/docs/source/task_guides/prompt_based_methods.md b/docs/source/task_guides/prompt_based_methods.md
new file mode 100644
index 0000000000..0b65dbd491
--- /dev/null
+++ b/docs/source/task_guides/prompt_based_methods.md
@@ -0,0 +1,303 @@
+<!--Copyright 2023 The HuggingFace Team. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
+the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
+an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
+specific language governing permissions and limitations under the License.
+
+⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
+rendered properly in your Markdown viewer.
+
+-->
+
+# Prompt-based methods
+
+A prompt usually describes a task or provides an example of the task you want the model to learn. Instead of manually creating these prompts, soft prompting methods add learnable parameters to the input embeddings that can be optimized for a specific task while keeping the pretrained model's parameters frozen. This makes it both faster and easier to finetune large language models (LLMs) for new downstream tasks.
+
+The PEFT library supports several types of prompting methods (p-tuning, prefix tuning, prompt tuning, multitask prompt tuning) and you can learn more about how these methods work conceptually in the [Soft prompts](../conceptual_guides/prompting) guide. If you're interested in applying these methods to other tasks, take a look at our collection of [notebooks](https://huggingface.co/spaces/PEFT/soft-prompting)!
+
+This guide will show you how to train a causal language model - by prompting it - to *generate a classification* for whether a tweet is a complaint or not.
+
+<Tip>
+
+This guide focuses on how to apply soft-prompt PEFT methods to train a causal language model. It assumes you're already familiar with the general process of training a causal language model so it won't spend too much time discussing those specifics. If you're new, we recommend taking a look at the [Causal language modeling](https://huggingface.co/docs/transformers/tasks/language_modeling) guide first from the Transformers documentation. When you're ready, come back and see how easy it is to drop PEFT in your training!
+
+</Tip>
+
+Before you begin, make sure you have all the necessary libraries installed.
+
+```bash
+pip install -q peft transformers datasets
+```
+
+## Dataset
+
+For this guide, you'll use the `twitter_complaints` subset of the [RAFT](https://huggingface.co/datasets/ought/raft) dataset. The `twitter_complaints` subset contains tweets labeled as `complaint` and `no complaint`, and you can check out the [dataset viewer](https://huggingface.co/datasets/ought/raft/viewer/twitter_complaints) for a better idea of what the data looks like.
+
+Use the [`~datasets.load_dataset`] function to load the dataset and create a new `text_label` column so it is easier to understand what the `Label` values `1` and `2` mean.
+
+```py
+from datasets import load_dataset
+
+ds = load_dataset("ought/raft", "twitter_complaints")
+
+classes = [k.replace("_", " ") for k in ds["train"].features["Label"].names]
+ds = ds.map(
+    lambda x: {"text_label": [classes[label] for label in x["Label"]]},
+    batched=True,
+    num_proc=1,
+)
+ds["train"][0]
+{"Tweet text": "@HMRCcustomers No this is my first job", "ID": 0, "Label": 2, "text_label": "no complaint"}
+```
+
+Load a tokenizer, define the padding token to use, and determine the maximum length of the tokenized label.
+
+```py
+from transformers import AutoTokenizer
+
+tokenizer = AutoTokenizer.from_pretrained("bigscience/bloomz-560m")
+if tokenizer.pad_token_id is None:
+    tokenizer.pad_token_id = tokenizer.eos_token_id
+target_max_length = max([len(tokenizer(class_label)["input_ids"]) for class_label in classes])
+print(target_max_length)
+```
+
+Create a preprocessing function that tokenizes the tweet text and labels, pad each label in a batch, concatenate the input text and labels, and create an attention mask. Then you'll loop through each example in a batch to pad the input ids, labels, and attention mask to a predefined maximum length before converting them to PyTorch tensors.
+
+```py
+import torch
+
+max_length = 64
+
+def preprocess_function(examples):
+    batch_size = len(examples[text_column])
+    inputs = [f"{text_column} : {x} Label : " for x in examples[text_column]]
+    targets = [str(x) for x in examples[label_column]]
+    model_inputs = tokenizer(inputs)
+    labels = tokenizer(targets)
+    for i in range(batch_size):
+        sample_input_ids = model_inputs["input_ids"][i]
+        label_input_ids = labels["input_ids"][i] + [tokenizer.pad_token_id]
+        # print(i, sample_input_ids, label_input_ids)
+        model_inputs["input_ids"][i] = sample_input_ids + label_input_ids
+        labels["input_ids"][i] = [-100] * len(sample_input_ids) + label_input_ids
+        model_inputs["attention_mask"][i] = [1] * len(model_inputs["input_ids"][i])
+    # print(model_inputs)
+    for i in range(batch_size):
+        sample_input_ids = model_inputs["input_ids"][i]
+        label_input_ids = labels["input_ids"][i]
+        model_inputs["input_ids"][i] = [tokenizer.pad_token_id] * (
+            max_length - len(sample_input_ids)
+        ) + sample_input_ids
+        model_inputs["attention_mask"][i] = [0] * (max_length - len(sample_input_ids)) + model_inputs[
+            "attention_mask"
+        ][i]
+        labels["input_ids"][i] = [-100] * (max_length - len(sample_input_ids)) + label_input_ids
+        model_inputs["input_ids"][i] = torch.tensor(model_inputs["input_ids"][i][:max_length])
+        model_inputs["attention_mask"][i] = torch.tensor(model_inputs["attention_mask"][i][:max_length])
+        labels["input_ids"][i] = torch.tensor(labels["input_ids"][i][:max_length])
+    model_inputs["labels"] = labels["input_ids"]
+    return model_inputs
+```
+
+Apply the preprocessing function to the entire dataset with the [`~datasets.Dataset.map`] function, and remove the unprocessed columns because the model won't need them.
+
+```py
+processed_ds = ds.map(
+    preprocess_function,
+    batched=True,
+    num_proc=1,
+    remove_columns=ds["train"].column_names,
+    load_from_cache_file=False,
+    desc="Running tokenizer on dataset",
+)
+```
+
+Finally, create a training and evaluation [`DataLoader`](https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader). Set `pin_memory=True` to speed up the data transfer to the GPU during training if the samples in your dataset are on a CPU.
+
+```py
+from torch.utils.data import DataLoader
+from transformers import default_data_collator
+
+train_ds = processed_ds["train"]
+eval_ds = processed_ds["test"]
+
+batch_size = 16
+
+train_dataloader = DataLoader(train_ds, shuffle=True, collate_fn=default_data_collator, batch_size=batch_size, pin_memory=True)
+eval_dataloader = DataLoader(eval_ds, collate_fn=default_data_collator, batch_size=batch_size, pin_memory=True)
+```
+
+## Model
+
+Now let's load a pretrained model to use as the base model for the soft-prompt method. This guide uses the [bigscience/bloomz-560m](https://huggingface.co/bigscience/bloomz-560m) model, but you can use any causal language model you want.
+
+```py
+from transformers import AutoModelForCausalLM
+
+model = AutoModelForCausalLM.from_pretrained("bigscience/bloomz-560m")
+```
+
+### PEFT configuration and model
+
+For any PEFT method, you'll need to create a configuration which contains all the parameters that specify how the PEFT method should be applied. Once the configuration is setup, pass it to the [`~peft.get_peft_model`] function along with the base model to create a trainable PEFT model.
+
+<Tip>
+
+Call the [`~PeftModel.print_trainable_parameters`] method to compare the number of parameters you're training with PEFT versus the total number of parameters in the base model!
+
+</Tip>
+
+<hfoptions id="configurations">
+<hfoption id="p-tuning">
+
+[P-tuning](../conceptual_guides/prompting#p-tuning) adds a trainable embedding tensor where the prompt tokens can be added anywhere in the input sequence. Create a [`PromptEncoderConfig`] with the task type, the number of virtual tokens to add and learn, and the hidden size of the encoder for learning the prompt parameters.
+
+```py
+from peft import PromptEncoderConfig
+
+peft_config = PromptEncoderConfig(task_type="CAUSAL_LM", num_virtual_tokens=20, encoder_hidden_size=128)
+model = get_peft_model(model, peft_config)
+model.print_trainable_parameters()
+"trainable params: 300,288 || all params: 559,514,880 || trainable%: 0.05366935013417338"
+```
+
+</hfoption>
+<hfoption id="prefix tuning">
+
+[Prefix tuning](../conceptual_guides/prompting#prefix-tuning) adds task-specific parameters in all of the model layers which are optimized by a separate feedforward network. Create a [`PrefixTuningConfig`] with the task type and number of virtual tokens to add and learn.
+
+```py
+from peft import PrefixTuningConfig
+
+peft_config = PrefixTuningConfig(task_type="CAUSAL_LM", num_virtual_tokens=20)
+model = get_peft_model(model, peft_config)
+model.print_trainable_parameters()
+"trainable params: 983,040 || all params: 560,197,632 || trainable%: 0.1754809274167014"
+```
+
+</hfoption>
+<hfoption id="prompt tuning">
+
+[Prompt tuning](../conceptual_guides/prompting#prompt-tuning) formulates all tasks as a *generation* task, and it adds a task-specific prompt to the input which is updated independently. Create a [`PromptTuningConfig`] with the task type, the initial prompt tuning text to train the model with (in this case, it is classifying whether tweets are complaints or not), the number of virtual tokens to add and learn, and a tokenizer.
+
+```py
+from peft import PromptTuningConfig, PromptTuningInit
+
+peft_config = PromptTuningConfig(
+    task_type="CAUSAL_LM",
+    prompt_tuning_init=PromptTuningInit.TEXT,
+    num_virtual_tokens=8,
+    prompt_tuning_init_text="Classify if the tweet is a complaint or not:",
+    tokenizer_name_or_path="bigscience/bloomz-560m",
+)
+model = get_peft_model(model, peft_config)
+model.print_trainable_parameters()
+"trainable params: 8,192 || all params: 559,222,784 || trainable%: 0.0014648902430985358"
+```
+
+</hfoption>
+</hfoptions>
+
+### Training
+
+Set up an optimizer and learning rate scheduler.
+
+```py
+from transformers import get_linear_schedule_with_warmup
+
+lr = 3e-2
+num_epochs = 50
+
+optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
+lr_scheduler = get_linear_schedule_with_warmup(
+    optimizer=optimizer,
+    num_warmup_steps=0,
+    num_training_steps=(len(train_dataloader) * num_epochs),
+)
+```
+
+Move the model to the GPU and create a training loop that reports the loss and perplexity for each epoch.
+
+```py
+from tqdm import tqdm
+
+model = model.to("cuda")
+
+for epoch in range(num_epochs):
+    model.train()
+    total_loss = 0
+    for step, batch in enumerate(tqdm(train_dataloader)):
+        batch = {k: v.to(device) for k, v in batch.items()}
+        outputs = model(**batch)
+        loss = outputs.loss
+        total_loss += loss.detach().float()
+        loss.backward()
+        optimizer.step()
+        lr_scheduler.step()
+        optimizer.zero_grad()
+
+    model.eval()
+    eval_loss = 0
+    eval_preds = []
+    for step, batch in enumerate(tqdm(eval_dataloader)):
+        batch = {k: v.to(device) for k, v in batch.items()}
+        with torch.no_grad():
+            outputs = model(**batch)
+        loss = outputs.loss
+        eval_loss += loss.detach().float()
+        eval_preds.extend(
+            tokenizer.batch_decode(torch.argmax(outputs.logits, -1).detach().cpu().numpy(), skip_special_tokens=True)
+        )
+
+    eval_epoch_loss = eval_loss / len(eval_dataloader)
+    eval_ppl = torch.exp(eval_epoch_loss)
+    train_epoch_loss = total_loss / len(train_dataloader)
+    train_ppl = torch.exp(train_epoch_loss)
+    print(f"{epoch=}: {train_ppl=} {train_epoch_loss=} {eval_ppl=} {eval_epoch_loss=}")
+```
+
+## Share your model
+
+Once training is complete, you can upload your model to the Hub with the [`~transformers.PreTrainedModel.push_to_hub`] method. You'll need to login to your Hugging Face account first and enter your token when prompted.
+
+```py
+from huggingface_hub import notebook_login
+
+notebook_login()
+peft_model_id = "your-name/bloomz-560-m-peft-method"
+model.push_to_hub("your-name/bloomz-560-m-peft-method")
+```
+
+If you check the model file size in the repository, you’ll see that it is only a few hundred bytes!
+
+## Inference
+
+Load the model from the Hub for inference and let's test it out on a tweet.
+
+```py
+from peft import AutoPeftModelForCausalLM
+
+model = AutoPeftModelForCausalLM.from_pretrained("stevhliu/bloomz-560m-p-tuning").to("cuda")
+tokenizer = AutoTokenizer.from_pretrained("bigscience/bloomz-560m")
+
+i = 15
+inputs = tokenizer(f'{text_column} : {dataset["test"][i]["Tweet text"]} Label : ', return_tensors="pt")
+print(dataset["test"][i]["Tweet text"])
+"@NYTsupport i have complained a dozen times &amp; yet my papers are still thrown FAR from my door. Why is this so hard to resolve?"
+```
+
+Call the [`~transformers.GenerationMixin.generate`] method to generate the predicted classification label for the tweet.
+
+```py
+with torch.no_grad():
+    inputs = {k: v.to(device) for k, v in inputs.items()}
+    outputs = model.generate(input_ids=inputs["input_ids"], max_new_tokens=10)
+    print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True))
+"['Tweet text : @NYTsupport i have complained a dozen times &amp; yet my papers are still thrown FAR from my door. Why is this so hard to resolve? Label : complaint']"
+```
diff --git a/docs/source/task_guides/ptuning-seq-classification.md b/docs/source/task_guides/ptuning-seq-classification.md
deleted file mode 100644
index 13fb69f2e4..0000000000
--- a/docs/source/task_guides/ptuning-seq-classification.md
+++ /dev/null
@@ -1,236 +0,0 @@
-<!--⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
--->
-
-# P-tuning for sequence classification
-
-It is challenging to finetune large language models for downstream tasks because they have so many parameters. To work around this, you can use *prompts* to steer the model toward a particular downstream task without fully finetuning a model. Typically, these prompts are handcrafted, which may be impractical because you need very large validation sets to find the best prompts. *P-tuning* is a method for automatically searching and optimizing for better prompts in a continuous space.
-
-<Tip>
-
-💡 Read [GPT Understands, Too](https://arxiv.org/abs/2103.10385) to learn more about p-tuning.
-
-</Tip>
-
-This guide will show you how to train a [`roberta-large`](https://huggingface.co/roberta-large) model (but you can also use any of the GPT, OPT, or BLOOM models) with p-tuning on the `mrpc` configuration of the [GLUE](https://huggingface.co/datasets/glue) benchmark.
-
-Before you begin, make sure you have all the necessary libraries installed:
-
-```bash
-!pip install -q peft transformers datasets evaluate
-```
-
-## Setup
-
-To get started, import 🤗 Transformers to create the base model, 🤗 Datasets to load a dataset, 🤗 Evaluate to load an evaluation metric, and 🤗 PEFT to create a [`PeftModel`] and setup the configuration for p-tuning.
-
-Define the model, dataset, and some basic training hyperparameters:
-
-```py
-from transformers import (
-    AutoModelForSequenceClassification,
-    AutoTokenizer,
-    DataCollatorWithPadding,
-    TrainingArguments,
-    Trainer,
-)
-from peft import (
-    get_peft_config,
-    get_peft_model,
-    get_peft_model_state_dict,
-    set_peft_model_state_dict,
-    PeftType,
-    PromptEncoderConfig,
-)
-from datasets import load_dataset
-import evaluate
-import torch
-
-model_name_or_path = "roberta-large"
-task = "mrpc"
-num_epochs = 20
-lr = 1e-3
-batch_size = 32
-```
-
-## Load dataset and metric
-
-Next, load the `mrpc` configuration - a corpus of sentence pairs labeled according to whether they're semantically equivalent or not - from the [GLUE](https://huggingface.co/datasets/glue) benchmark:
-
-```py
-dataset = load_dataset("glue", task)
-dataset["train"][0]
-{
-    "sentence1": 'Amrozi accused his brother , whom he called " the witness " , of deliberately distorting his evidence .',
-    "sentence2": 'Referring to him as only " the witness " , Amrozi accused his brother of deliberately distorting his evidence .',
-    "label": 1,
-    "idx": 0,
-}
-```
-
-From 🤗 Evaluate, load a metric for evaluating the model's performance. The evaluation module returns the accuracy and F1 scores associated with this specific task.
-
-```py
-metric = evaluate.load("glue", task)
-```
-
-Now you can use the `metric` to write a function that computes the accuracy and F1 scores. The `compute_metric` function calculates the scores from the model predictions and labels:
-
-```py
-import numpy as np
-
-
-def compute_metrics(eval_pred):
-    predictions, labels = eval_pred
-    predictions = np.argmax(predictions, axis=1)
-    return metric.compute(predictions=predictions, references=labels)
-```
-
-## Preprocess dataset
-
-Initialize the tokenizer and configure the padding token to use. If you're using a GPT, OPT, or BLOOM model, you should set the `padding_side` to the left; otherwise it'll be set to the right. Tokenize the sentence pairs and truncate them to the maximum length.
-
-```py
-if any(k in model_name_or_path for k in ("gpt", "opt", "bloom")):
-    padding_side = "left"
-else:
-    padding_side = "right"
-
-tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, padding_side=padding_side)
-if getattr(tokenizer, "pad_token_id") is None:
-    tokenizer.pad_token_id = tokenizer.eos_token_id
-
-
-def tokenize_function(examples):
-    # max_length=None => use the model max length (it's actually the default)
-    outputs = tokenizer(examples["sentence1"], examples["sentence2"], truncation=True, max_length=None)
-    return outputs
-```
-
-Use [`~datasets.Dataset.map`] to apply the `tokenize_function` to the dataset, and remove the unprocessed columns because the model won't need those. You should also rename the `label` column to `labels` because that is the expected name for the labels by models in the 🤗 Transformers library.
-
-```py
-tokenized_datasets = dataset.map(
-    tokenize_function,
-    batched=True,
-    remove_columns=["idx", "sentence1", "sentence2"],
-)
-
-tokenized_datasets = tokenized_datasets.rename_column("label", "labels")
-```
-
-Create a collator function with [`~transformers.DataCollatorWithPadding`] to pad the examples in the batches to the `longest` sequence in the batch:
-
-```py
-data_collator = DataCollatorWithPadding(tokenizer=tokenizer, padding="longest")
-```
-
-## Train
-
-P-tuning uses a prompt encoder to optimize the prompt parameters, so you'll need to initialize the [`PromptEncoderConfig`] with several arguments:
-
-- `task_type`: the type of task you're training on, in this case it is sequence classification or `SEQ_CLS`
-- `num_virtual_tokens`: the number of virtual tokens to use, or in other words, the prompt
-- `encoder_hidden_size`: the hidden size of the encoder used to optimize the prompt parameters
-
-```py
-peft_config = PromptEncoderConfig(task_type="SEQ_CLS", num_virtual_tokens=20, encoder_hidden_size=128)
-```
-
-Create the base `roberta-large` model from [`~transformers.AutoModelForSequenceClassification`], and then wrap the base model and `peft_config` with [`get_peft_model`] to create a [`PeftModel`]. If you're curious to see how many parameters you're actually training compared to training on all the model parameters, you can print it out with [`~peft.PeftModel.print_trainable_parameters`]:
-
-```py
-model = AutoModelForSequenceClassification.from_pretrained(model_name_or_path, return_dict=True)
-model = get_peft_model(model, peft_config)
-model.print_trainable_parameters()
-"trainable params: 1351938 || all params: 355662082 || trainable%: 0.38011867680626127"
-```
-
-From the 🤗 Transformers library, set up the [`~transformers.TrainingArguments`] class with where you want to save the model to, the training hyperparameters, how to evaluate the model, and when to save the checkpoints:
-
-```py
-training_args = TrainingArguments(
-    output_dir="your-name/roberta-large-peft-p-tuning",
-    learning_rate=1e-3,
-    per_device_train_batch_size=32,
-    per_device_eval_batch_size=32,
-    num_train_epochs=2,
-    weight_decay=0.01,
-    evaluation_strategy="epoch",
-    save_strategy="epoch",
-    load_best_model_at_end=True,
-)
-```
-
-Then pass the model, `TrainingArguments`, datasets, tokenizer, data collator, and evaluation function to the [`~transformers.Trainer`] class, which'll handle the entire training loop for you. Once you're ready, call [`~transformers.Trainer.train`] to start training!
-
-```py
-trainer = Trainer(
-    model=model,
-    args=training_args,
-    train_dataset=tokenized_datasets["train"],
-    eval_dataset=tokenized_datasets["test"],
-    tokenizer=tokenizer,
-    data_collator=data_collator,
-    compute_metrics=compute_metrics,
-)
-
-trainer.train()
-```
-
-## Share model
-
-You can store and share your model on the Hub if you'd like. Log in to your Hugging Face account and enter your token when prompted:
-
-```py
-from huggingface_hub import notebook_login
-
-notebook_login()
-```
-
-Upload the model to a specifc model repository on the Hub with the [`~transformers.PreTrainedModel.push_to_hub`] function:
-
-```py
-model.push_to_hub("your-name/roberta-large-peft-p-tuning", use_auth_token=True)
-```
-
-## Inference
-
-Once the model has been uploaded to the Hub, anyone can easily use it for inference. Load the configuration and model:
-
-```py
-import torch
-from peft import PeftModel, PeftConfig
-from transformers import AutoModelForSequenceClassification, AutoTokenizer
-
-peft_model_id = "smangrul/roberta-large-peft-p-tuning"
-config = PeftConfig.from_pretrained(peft_model_id)
-inference_model = AutoModelForSequenceClassification.from_pretrained(config.base_model_name_or_path)
-tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
-model = PeftModel.from_pretrained(inference_model, peft_model_id)
-```
-
-Get some text and tokenize it:
-
-```py
-classes = ["not equivalent", "equivalent"]
-
-sentence1 = "Coast redwood trees are the tallest trees on the planet and can grow over 300 feet tall."
-sentence2 = "The coast redwood trees, which can attain a height of over 300 feet, are the tallest trees on earth."
-
-inputs = tokenizer(sentence1, sentence2, truncation=True, padding="longest", return_tensors="pt")
-```
-
-Pass the inputs to the model to classify the sentences:
-
-```py
-with torch.no_grad():
-    outputs = model(**inputs).logits
-    print(outputs)
-
-paraphrased_text = torch.softmax(outputs, dim=1).tolist()[0]
-for i in range(len(classes)):
-    print(f"{classes[i]}: {int(round(paraphrased_text[i] * 100))}%")
-"not equivalent: 4%"
-"equivalent: 96%"
-```
\ No newline at end of file
diff --git a/docs/source/task_guides/seq2seq-prefix-tuning.md b/docs/source/task_guides/seq2seq-prefix-tuning.md
deleted file mode 100644
index 1eea24bc62..0000000000
--- a/docs/source/task_guides/seq2seq-prefix-tuning.md
+++ /dev/null
@@ -1,256 +0,0 @@
-<!--⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
-rendered properly in your Markdown viewer.
--->
-
-# Prefix tuning for conditional generation
-
-[[open-in-colab]]
-
-Prefix tuning is an additive method where only a sequence of continuous task-specific vectors is attached to the beginning of the input, or *prefix*. Only the prefix parameters are optimized and added to the hidden states in every layer of the model. The tokens of the input sequence can still attend to the prefix as *virtual tokens*. As a result, prefix tuning stores 1000x fewer parameters than a fully finetuned model, which means you can use one large language model for many tasks.
-
-<Tip>
-
-💡 Read [Prefix-Tuning: Optimizing Continuous Prompts for Generation](https://arxiv.org/abs/2101.00190) to learn more about prefix tuning. 
-
-</Tip>
-
-This guide will show you how to apply prefix tuning to train a [`t5-large`](https://huggingface.co/t5-large) model on the `sentences_allagree` subset of the [financial_phrasebank](https://huggingface.co/datasets/financial_phrasebank) dataset.
-
-Before you begin, make sure you have all the necessary libraries installed:
-
-```bash
-!pip install -q peft transformers datasets
-```
-
-## Setup
-
-Start by defining the model and tokenizer, text and label columns, and some hyperparameters so it'll be easier to start training faster later. Set the environment variable `TOKENIZERS_PARALLELSIM` to `false` to disable the fast Rust-based tokenizer which processes data in parallel by default so you can use multiprocessing in Python.
-
-```py
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, default_data_collator, get_linear_schedule_with_warmup
-from peft import get_peft_config, get_peft_model, get_peft_model_state_dict, PrefixTuningConfig, TaskType
-from datasets import load_dataset
-from torch.utils.data import DataLoader
-from tqdm import tqdm
-import torch
-import os
-
-os.environ["TOKENIZERS_PARALLELISM"] = "false"
-os.environ["CUDA_VISIBLE_DEVICES"] = "3"
-
-device = "cuda"
-model_name_or_path = "t5-large"
-tokenizer_name_or_path = "t5-large"
-
-text_column = "sentence"
-label_column = "text_label"
-max_length = 128
-lr = 1e-2
-num_epochs = 5
-batch_size = 8
-```
-
-## Load dataset
-
-For this guide, you'll train on the `sentences_allagree` subset of the [`financial_phrasebank`](https://huggingface.co/datasets/financial_phrasebank) dataset. This dataset contains financial news categorized by sentiment.
-
-Use 🤗 [Datasets](https://huggingface.co/docs/datasets/index) [`~datasets.Dataset.train_test_split`] function to create a training and validation split and convert the `label` value to the more readable `text_label`. All of the changes can be applied with the [`~datasets.Dataset.map`] function:
-
-```py
-from datasets import load_dataset
-
-dataset = load_dataset("financial_phrasebank", "sentences_allagree")
-dataset = dataset["train"].train_test_split(test_size=0.1)
-dataset["validation"] = dataset["test"]
-del dataset["test"]
-
-classes = dataset["train"].features["label"].names
-dataset = dataset.map(
-    lambda x: {"text_label": [classes[label] for label in x["label"]]},
-    batched=True,
-    num_proc=1,
-)
-
-dataset["train"][0]
-{"sentence": "Profit before taxes was EUR 4.0 mn , down from EUR 4.9 mn .", "label": 0, "text_label": "negative"}
-```
-
-## Preprocess dataset
-
-Initialize a tokenizer, and create a function to pad and truncate the `model_inputs` and `labels`:
-
-```py
-tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
-
-
-def preprocess_function(examples):
-    inputs = examples[text_column]
-    targets = examples[label_column]
-    model_inputs = tokenizer(inputs, max_length=max_length, padding="max_length", truncation=True, return_tensors="pt")
-    labels = tokenizer(targets, max_length=2, padding="max_length", truncation=True, return_tensors="pt")
-    labels = labels["input_ids"]
-    labels[labels == tokenizer.pad_token_id] = -100
-    model_inputs["labels"] = labels
-    return model_inputs
-```
-
-Use the [`~datasets.Dataset.map`] function to apply the `preprocess_function` to the dataset. You can remove the unprocessed columns since the model doesn't need them anymore:
-
-```py
-processed_datasets = dataset.map(
-    preprocess_function,
-    batched=True,
-    num_proc=1,
-    remove_columns=dataset["train"].column_names,
-    load_from_cache_file=False,
-    desc="Running tokenizer on dataset",
-)
-```
-
-Create a [`DataLoader`](https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader) from the `train` and `eval` datasets. Set `pin_memory=True` to speed up the data transfer to the GPU during training if the samples in your dataset are on a CPU.
-
-```py
-train_dataset = processed_datasets["train"]
-eval_dataset = processed_datasets["validation"]
-
-train_dataloader = DataLoader(
-    train_dataset, shuffle=True, collate_fn=default_data_collator, batch_size=batch_size, pin_memory=True
-)
-eval_dataloader = DataLoader(eval_dataset, collate_fn=default_data_collator, batch_size=batch_size, pin_memory=True)
-```
-
-## Train model
-
-Now you can setup your model and make sure it is ready for training. Specify the task in [`PrefixTuningConfig`], create the base `t5-large` model from [`~transformers.AutoModelForSeq2SeqLM`], and then wrap the model and configuration in a [`PeftModel`]. Feel free to print the [`PeftModel`]'s parameters and compare it to fully training all the model parameters to see how much more efficient it is!
-
-```py
-peft_config = PrefixTuningConfig(task_type=TaskType.SEQ_2_SEQ_LM, inference_mode=False, num_virtual_tokens=20)
-
-model = AutoModelForSeq2SeqLM.from_pretrained(model_name_or_path)
-model = get_peft_model(model, peft_config)
-model.print_trainable_parameters()
-"trainable params: 983040 || all params: 738651136 || trainable%: 0.13308583065659835"
-```
-
-Setup the optimizer and learning rate scheduler:
-
-```py
-optimizer = torch.optim.AdamW(model.parameters(), lr=lr)
-lr_scheduler = get_linear_schedule_with_warmup(
-    optimizer=optimizer,
-    num_warmup_steps=0,
-    num_training_steps=(len(train_dataloader) * num_epochs),
-)
-```
-
-Move the model to the GPU, and then write a training loop to begin!
-
-```py
-model = model.to(device)
-
-for epoch in range(num_epochs):
-    model.train()
-    total_loss = 0
-    for step, batch in enumerate(tqdm(train_dataloader)):
-        batch = {k: v.to(device) for k, v in batch.items()}
-        outputs = model(**batch)
-        loss = outputs.loss
-        total_loss += loss.detach().float()
-        loss.backward()
-        optimizer.step()
-        lr_scheduler.step()
-        optimizer.zero_grad()
-
-    model.eval()
-    eval_loss = 0
-    eval_preds = []
-    for step, batch in enumerate(tqdm(eval_dataloader)):
-        batch = {k: v.to(device) for k, v in batch.items()}
-        with torch.no_grad():
-            outputs = model(**batch)
-        loss = outputs.loss
-        eval_loss += loss.detach().float()
-        eval_preds.extend(
-            tokenizer.batch_decode(torch.argmax(outputs.logits, -1).detach().cpu().numpy(), skip_special_tokens=True)
-        )
-
-    eval_epoch_loss = eval_loss / len(eval_dataloader)
-    eval_ppl = torch.exp(eval_epoch_loss)
-    train_epoch_loss = total_loss / len(train_dataloader)
-    train_ppl = torch.exp(train_epoch_loss)
-    print(f"{epoch=}: {train_ppl=} {train_epoch_loss=} {eval_ppl=} {eval_epoch_loss=}")
-```
-
-Let's see how well the model performs on the validation set:
-
-```py
-correct = 0
-total = 0
-for pred, true in zip(eval_preds, dataset["validation"]["text_label"]):
-    if pred.strip() == true.strip():
-        correct += 1
-    total += 1
-accuracy = correct / total * 100
-print(f"{accuracy=} % on the evaluation dataset")
-print(f"{eval_preds[:10]=}")
-print(f"{dataset['validation']['text_label'][:10]=}")
-"accuracy=97.3568281938326 % on the evaluation dataset"
-"eval_preds[:10]=['neutral', 'positive', 'neutral', 'positive', 'neutral', 'negative', 'negative', 'neutral', 'neutral', 'neutral']"
-"dataset['validation']['text_label'][:10]=['neutral', 'positive', 'neutral', 'positive', 'neutral', 'negative', 'negative', 'neutral', 'neutral', 'neutral']"
-```
-
-97% accuracy in just a few minutes; pretty good!
-
-## Share model
-
-You can store and share your model on the Hub if you'd like. Login to your Hugging Face account and enter your token when prompted:
-
-```py
-from huggingface_hub import notebook_login
-
-notebook_login()
-```
-
-Upload the model to a specifc model repository on the Hub with the [`~transformers.PreTrainedModel.push_to_hub`] function:
-
-```py
-peft_model_id = "your-name/t5-large_PREFIX_TUNING_SEQ2SEQ"
-model.push_to_hub("your-name/t5-large_PREFIX_TUNING_SEQ2SEQ", use_auth_token=True)
-```
-
-If you check the model file size in the repository, you'll see that it is only 3.93MB! 🤏
-
-## Inference
-
-Once the model has been uploaded to the Hub, anyone can easily use it for inference. Load the configuration and model:
-
-```py
-from peft import PeftModel, PeftConfig
-
-peft_model_id = "stevhliu/t5-large_PREFIX_TUNING_SEQ2SEQ"
-
-config = PeftConfig.from_pretrained(peft_model_id)
-model = AutoModelForSeq2SeqLM.from_pretrained(config.base_model_name_or_path)
-model = PeftModel.from_pretrained(model, peft_model_id)
-```
-
-Get and tokenize some text about financial news:
-
-```py
-inputs = tokenizer(
-    "The Lithuanian beer market made up 14.41 million liters in January , a rise of 0.8 percent from the year-earlier figure , the Lithuanian Brewers ' Association reporting citing the results from its members .",
-    return_tensors="pt",
-)
-```
-
-Put the model on a GPU and *generate* the predicted text sentiment:
-
-```py
-model.to(device)
-
-with torch.no_grad():
-    inputs = {k: v.to(device) for k, v in inputs.items()}
-    outputs = model.generate(input_ids=inputs["input_ids"], max_new_tokens=10)
-    print(tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True))
-["positive"]
-```
\ No newline at end of file

From 717c4559f7582e820bf6886f55b5de55aadf052e Mon Sep 17 00:00:00 2001
From: Steven Liu <steven.liu@huggingface.co>
Date: Mon, 8 Jan 2024 17:11:06 -0800
Subject: [PATCH 2/4] small edits

---
 docs/source/task_guides/prompt_based_methods.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/source/task_guides/prompt_based_methods.md b/docs/source/task_guides/prompt_based_methods.md
index 0b65dbd491..9257d7328b 100644
--- a/docs/source/task_guides/prompt_based_methods.md
+++ b/docs/source/task_guides/prompt_based_methods.md
@@ -16,7 +16,7 @@ rendered properly in your Markdown viewer.
 
 # Prompt-based methods
 
-A prompt usually describes a task or provides an example of the task you want the model to learn. Instead of manually creating these prompts, soft prompting methods add learnable parameters to the input embeddings that can be optimized for a specific task while keeping the pretrained model's parameters frozen. This makes it both faster and easier to finetune large language models (LLMs) for new downstream tasks.
+A prompt can describe a task or provide an example of the task you want the model to learn. Instead of manually creating these prompts, soft prompting methods add learnable parameters to the input embeddings that can be optimized for a specific task while keeping the pretrained model's parameters frozen. This makes it both faster and easier to finetune large language models (LLMs) for new downstream tasks.
 
 The PEFT library supports several types of prompting methods (p-tuning, prefix tuning, prompt tuning, multitask prompt tuning) and you can learn more about how these methods work conceptually in the [Soft prompts](../conceptual_guides/prompting) guide. If you're interested in applying these methods to other tasks, take a look at our collection of [notebooks](https://huggingface.co/spaces/PEFT/soft-prompting)!
 
@@ -38,7 +38,7 @@ pip install -q peft transformers datasets
 
 For this guide, you'll use the `twitter_complaints` subset of the [RAFT](https://huggingface.co/datasets/ought/raft) dataset. The `twitter_complaints` subset contains tweets labeled as `complaint` and `no complaint`, and you can check out the [dataset viewer](https://huggingface.co/datasets/ought/raft/viewer/twitter_complaints) for a better idea of what the data looks like.
 
-Use the [`~datasets.load_dataset`] function to load the dataset and create a new `text_label` column so it is easier to understand what the `Label` values `1` and `2` mean.
+Use the [`~datasets.load_dataset`] function to load the dataset and create a new `text_label` column so it is easier to understand what the `Label` values, `1` and `2` mean.
 
 ```py
 from datasets import load_dataset
@@ -118,7 +118,7 @@ processed_ds = ds.map(
 )
 ```
 
-Finally, create a training and evaluation [`DataLoader`](https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader). Set `pin_memory=True` to speed up the data transfer to the GPU during training if the samples in your dataset are on a CPU.
+Finally, create a training and evaluation [`DataLoader`](https://pytorch.org/docs/stable/data.html#torch.utils.data.DataLoader). You can set `pin_memory=True` to speed up the data transfer to the GPU during training if the samples in your dataset are on a CPU.
 
 ```py
 from torch.utils.data import DataLoader
@@ -170,7 +170,7 @@ model.print_trainable_parameters()
 </hfoption>
 <hfoption id="prefix tuning">
 
-[Prefix tuning](../conceptual_guides/prompting#prefix-tuning) adds task-specific parameters in all of the model layers which are optimized by a separate feedforward network. Create a [`PrefixTuningConfig`] with the task type and number of virtual tokens to add and learn.
+[Prefix tuning](../conceptual_guides/prompting#prefix-tuning) adds task-specific parameters in all of the model layers which are optimized by a separate feed-forward network. Create a [`PrefixTuningConfig`] with the task type and number of virtual tokens to add and learn.
 
 ```py
 from peft import PrefixTuningConfig

From b0a2a8520331003792b8cbc64bd17e7780839ecc Mon Sep 17 00:00:00 2001
From: Steven Liu <steven.liu@huggingface.co>
Date: Tue, 9 Jan 2024 10:10:08 -0800
Subject: [PATCH 3/4] feedback

---
 .../task_guides/prompt_based_methods.md       | 34 ++++++++-----------
 1 file changed, 14 insertions(+), 20 deletions(-)

diff --git a/docs/source/task_guides/prompt_based_methods.md b/docs/source/task_guides/prompt_based_methods.md
index 9257d7328b..6c5e2c5b2d 100644
--- a/docs/source/task_guides/prompt_based_methods.md
+++ b/docs/source/task_guides/prompt_based_methods.md
@@ -74,20 +74,13 @@ import torch
 
 max_length = 64
 
-def preprocess_function(examples):
+def preprocess_function(examples, text_column="Tweet text", label_column="text_label"):
     batch_size = len(examples[text_column])
     inputs = [f"{text_column} : {x} Label : " for x in examples[text_column]]
     targets = [str(x) for x in examples[label_column]]
     model_inputs = tokenizer(inputs)
     labels = tokenizer(targets)
-    for i in range(batch_size):
-        sample_input_ids = model_inputs["input_ids"][i]
-        label_input_ids = labels["input_ids"][i] + [tokenizer.pad_token_id]
-        # print(i, sample_input_ids, label_input_ids)
-        model_inputs["input_ids"][i] = sample_input_ids + label_input_ids
-        labels["input_ids"][i] = [-100] * len(sample_input_ids) + label_input_ids
-        model_inputs["attention_mask"][i] = [1] * len(model_inputs["input_ids"][i])
-    # print(model_inputs)
+    classes = [k.replace("_", " ") for k in ds["train"].features["Label"].names]
     for i in range(batch_size):
         sample_input_ids = model_inputs["input_ids"][i]
         label_input_ids = labels["input_ids"][i]
@@ -159,7 +152,7 @@ Call the [`~PeftModel.print_trainable_parameters`] method to compare the number
 [P-tuning](../conceptual_guides/prompting#p-tuning) adds a trainable embedding tensor where the prompt tokens can be added anywhere in the input sequence. Create a [`PromptEncoderConfig`] with the task type, the number of virtual tokens to add and learn, and the hidden size of the encoder for learning the prompt parameters.
 
 ```py
-from peft import PromptEncoderConfig
+from peft import PromptEncoderConfig, get_peft_model
 
 peft_config = PromptEncoderConfig(task_type="CAUSAL_LM", num_virtual_tokens=20, encoder_hidden_size=128)
 model = get_peft_model(model, peft_config)
@@ -170,10 +163,10 @@ model.print_trainable_parameters()
 </hfoption>
 <hfoption id="prefix tuning">
 
-[Prefix tuning](../conceptual_guides/prompting#prefix-tuning) adds task-specific parameters in all of the model layers which are optimized by a separate feed-forward network. Create a [`PrefixTuningConfig`] with the task type and number of virtual tokens to add and learn.
+[Prefix tuning](../conceptual_guides/prompting#prefix-tuning) adds task-specific parameters in all of the model layers, which are optimized by a separate feed-forward network. Create a [`PrefixTuningConfig`] with the task type and number of virtual tokens to add and learn.
 
 ```py
-from peft import PrefixTuningConfig
+from peft import PrefixTuningConfig, get_peft_model
 
 peft_config = PrefixTuningConfig(task_type="CAUSAL_LM", num_virtual_tokens=20)
 model = get_peft_model(model, peft_config)
@@ -187,7 +180,7 @@ model.print_trainable_parameters()
 [Prompt tuning](../conceptual_guides/prompting#prompt-tuning) formulates all tasks as a *generation* task, and it adds a task-specific prompt to the input which is updated independently. Create a [`PromptTuningConfig`] with the task type, the initial prompt tuning text to train the model with (in this case, it is classifying whether tweets are complaints or not), the number of virtual tokens to add and learn, and a tokenizer.
 
 ```py
-from peft import PromptTuningConfig, PromptTuningInit
+from peft import PromptTuningConfig, PromptTuningInit, get_peft_model
 
 peft_config = PromptTuningConfig(
     task_type="CAUSAL_LM",
@@ -227,7 +220,8 @@ Move the model to the GPU and create a training loop that reports the loss and p
 ```py
 from tqdm import tqdm
 
-model = model.to("cuda")
+device = "cuda"
+model = model.to(device)
 
 for epoch in range(num_epochs):
     model.train()
@@ -269,9 +263,9 @@ Once training is complete, you can upload your model to the Hub with the [`~tran
 ```py
 from huggingface_hub import notebook_login
 
-notebook_login()
-peft_model_id = "your-name/bloomz-560-m-peft-method"
-model.push_to_hub("your-name/bloomz-560-m-peft-method")
+account = <your-hf-account-name>
+peft_model_id = f"{account}/bloomz-560-m-peft-method"
+model.push_to_hub(peft_model_id)
 ```
 
 If you check the model file size in the repository, you’ll see that it is only a few hundred bytes!
@@ -283,12 +277,12 @@ Load the model from the Hub for inference and let's test it out on a tweet.
 ```py
 from peft import AutoPeftModelForCausalLM
 
-model = AutoPeftModelForCausalLM.from_pretrained("stevhliu/bloomz-560m-p-tuning").to("cuda")
+model = AutoPeftModelForCausalLM.from_pretrained("peft_model_id").to("cuda")
 tokenizer = AutoTokenizer.from_pretrained("bigscience/bloomz-560m")
 
 i = 15
-inputs = tokenizer(f'{text_column} : {dataset["test"][i]["Tweet text"]} Label : ', return_tensors="pt")
-print(dataset["test"][i]["Tweet text"])
+inputs = tokenizer(f'{text_column} : {ds["test"][i]["Tweet text"]} Label : ', return_tensors="pt")
+print(ds["test"][i]["Tweet text"])
 "@NYTsupport i have complained a dozen times &amp; yet my papers are still thrown FAR from my door. Why is this so hard to resolve?"
 ```
 

From f955b265b02e71fae6bbca27c13029468ba6bde6 Mon Sep 17 00:00:00 2001
From: Steven Liu <steven.liu@huggingface.co>
Date: Thu, 18 Jan 2024 11:46:39 -0800
Subject: [PATCH 4/4] feedback

---
 .../task_guides/prompt_based_methods.md       | 40 +++++++++++--------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/docs/source/task_guides/prompt_based_methods.md b/docs/source/task_guides/prompt_based_methods.md
index 6c5e2c5b2d..5b74b9328d 100644
--- a/docs/source/task_guides/prompt_based_methods.md
+++ b/docs/source/task_guides/prompt_based_methods.md
@@ -1,4 +1,4 @@
-<!--Copyright 2023 The HuggingFace Team. All rights reserved.
+<!--Copyright 2024 The HuggingFace Team. All rights reserved.
 
 Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 the License. You may obtain a copy of the License at
@@ -16,15 +16,15 @@ rendered properly in your Markdown viewer.
 
 # Prompt-based methods
 
-A prompt can describe a task or provide an example of the task you want the model to learn. Instead of manually creating these prompts, soft prompting methods add learnable parameters to the input embeddings that can be optimized for a specific task while keeping the pretrained model's parameters frozen. This makes it both faster and easier to finetune large language models (LLMs) for new downstream tasks.
+A prompt can describe a task or provide an example of a task you want the model to learn. Instead of manually creating these prompts, soft prompting methods add learnable parameters to the input embeddings that can be optimized for a specific task while keeping the pretrained model's parameters frozen. This makes it both faster and easier to finetune large language models (LLMs) for new downstream tasks.
 
-The PEFT library supports several types of prompting methods (p-tuning, prefix tuning, prompt tuning, multitask prompt tuning) and you can learn more about how these methods work conceptually in the [Soft prompts](../conceptual_guides/prompting) guide. If you're interested in applying these methods to other tasks, take a look at our collection of [notebooks](https://huggingface.co/spaces/PEFT/soft-prompting)!
+The PEFT library supports several types of prompting methods (p-tuning, prefix tuning, prompt tuning) and you can learn more about how these methods work conceptually in the [Soft prompts](../conceptual_guides/prompting) guide. If you're interested in applying these methods to other tasks and use cases, take a look at our [notebook collection](https://huggingface.co/spaces/PEFT/soft-prompting)!
 
-This guide will show you how to train a causal language model - by prompting it - to *generate a classification* for whether a tweet is a complaint or not.
+This guide will show you how to train a causal language model - with a soft prompting method - to *generate a classification* for whether a tweet is a complaint or not.
 
 <Tip>
 
-This guide focuses on how to apply soft-prompt PEFT methods to train a causal language model. It assumes you're already familiar with the general process of training a causal language model so it won't spend too much time discussing those specifics. If you're new, we recommend taking a look at the [Causal language modeling](https://huggingface.co/docs/transformers/tasks/language_modeling) guide first from the Transformers documentation. When you're ready, come back and see how easy it is to drop PEFT in your training!
+Some familiarity with the general process of training a causal language model would be really helpful and allow you to focus on the soft prompting methods. If you're new, we recommend taking a look at the [Causal language modeling](https://huggingface.co/docs/transformers/tasks/language_modeling) guide first from the Transformers documentation. When you're ready, come back and see how easy it is to drop PEFT in to your training!
 
 </Tip>
 
@@ -36,7 +36,7 @@ pip install -q peft transformers datasets
 
 ## Dataset
 
-For this guide, you'll use the `twitter_complaints` subset of the [RAFT](https://huggingface.co/datasets/ought/raft) dataset. The `twitter_complaints` subset contains tweets labeled as `complaint` and `no complaint`, and you can check out the [dataset viewer](https://huggingface.co/datasets/ought/raft/viewer/twitter_complaints) for a better idea of what the data looks like.
+For this guide, you'll use the `twitter_complaints` subset of the [RAFT](https://huggingface.co/datasets/ought/raft) dataset. The `twitter_complaints` subset contains tweets labeled as `complaint` and `no complaint` and you can check out the [dataset viewer](https://huggingface.co/datasets/ought/raft/viewer/twitter_complaints) for a better idea of what the data looks like.
 
 Use the [`~datasets.load_dataset`] function to load the dataset and create a new `text_label` column so it is easier to understand what the `Label` values, `1` and `2` mean.
 
@@ -67,7 +67,7 @@ target_max_length = max([len(tokenizer(class_label)["input_ids"]) for class_labe
 print(target_max_length)
 ```
 
-Create a preprocessing function that tokenizes the tweet text and labels, pad each label in a batch, concatenate the input text and labels, and create an attention mask. Then you'll loop through each example in a batch to pad the input ids, labels, and attention mask to a predefined maximum length before converting them to PyTorch tensors.
+Create a preprocessing function that tokenizes the tweet text and labels, pad the inputs and labels in each batch, create an attention mask, and truncate sequences to the `max_length`. Then convert the `input_ids`, `attention_mask`, and `labels` to PyTorch tensors.
 
 ```py
 import torch
@@ -128,7 +128,7 @@ eval_dataloader = DataLoader(eval_ds, collate_fn=default_data_collator, batch_si
 
 ## Model
 
-Now let's load a pretrained model to use as the base model for the soft-prompt method. This guide uses the [bigscience/bloomz-560m](https://huggingface.co/bigscience/bloomz-560m) model, but you can use any causal language model you want.
+Now let's load a pretrained model to use as the base model for the soft prompt method. This guide uses the [bigscience/bloomz-560m](https://huggingface.co/bigscience/bloomz-560m) model, but you can use any causal language model you want.
 
 ```py
 from transformers import AutoModelForCausalLM
@@ -138,11 +138,11 @@ model = AutoModelForCausalLM.from_pretrained("bigscience/bloomz-560m")
 
 ### PEFT configuration and model
 
-For any PEFT method, you'll need to create a configuration which contains all the parameters that specify how the PEFT method should be applied. Once the configuration is setup, pass it to the [`~peft.get_peft_model`] function along with the base model to create a trainable PEFT model.
+For any PEFT method, you'll need to create a configuration which contains all the parameters that specify how the PEFT method should be applied. Once the configuration is setup, pass it to the [`~peft.get_peft_model`] function along with the base model to create a trainable [`PeftModel`].
 
 <Tip>
 
-Call the [`~PeftModel.print_trainable_parameters`] method to compare the number of parameters you're training with PEFT versus the total number of parameters in the base model!
+Call the [`~PeftModel.print_trainable_parameters`] method to compare the number of trainable parameters of [`PeftModel`] versus the number of parameters in the base model!
 
 </Tip>
 
@@ -177,16 +177,19 @@ model.print_trainable_parameters()
 </hfoption>
 <hfoption id="prompt tuning">
 
-[Prompt tuning](../conceptual_guides/prompting#prompt-tuning) formulates all tasks as a *generation* task, and it adds a task-specific prompt to the input which is updated independently. Create a [`PromptTuningConfig`] with the task type, the initial prompt tuning text to train the model with (in this case, it is classifying whether tweets are complaints or not), the number of virtual tokens to add and learn, and a tokenizer.
+[Prompt tuning](../conceptual_guides/prompting#prompt-tuning) formulates all tasks as a *generation* task and it adds a task-specific prompt to the input which is updated independently. The `prompt_tuning_init_text` parameter specifies how to finetune the model (in this case, it is classifying whether tweets are complaints or not). For the best results, the `prompt_tuning_init_text` should have the same number of tokens that should be predicted. To do this, you can set `num_virtual_tokens` to the number of tokens of the `prompt_tuning_init_text`.
+
+Create a [`PromptTuningConfig`] with the task type, the initial prompt tuning text to train the model with, the number of virtual tokens to add and learn, and a tokenizer.
 
 ```py
 from peft import PromptTuningConfig, PromptTuningInit, get_peft_model
 
+prompt_tuning_init_text = "Classify if the tweet is a complaint or no complaint.\n"
 peft_config = PromptTuningConfig(
     task_type="CAUSAL_LM",
     prompt_tuning_init=PromptTuningInit.TEXT,
-    num_virtual_tokens=8,
-    prompt_tuning_init_text="Classify if the tweet is a complaint or not:",
+    num_virtual_tokens=len(tokenizer(prompt_tuning_init_text)["input_ids"]),,
+    prompt_tuning_init_text=prompt_tuning_init_text,
     tokenizer_name_or_path="bigscience/bloomz-560m",
 )
 model = get_peft_model(model, peft_config)
@@ -268,11 +271,16 @@ peft_model_id = f"{account}/bloomz-560-m-peft-method"
 model.push_to_hub(peft_model_id)
 ```
 
-If you check the model file size in the repository, you’ll see that it is only a few hundred bytes!
+If you check the model file size in the repository, you’ll see that it is a lot smaller than a full sized model!
+
+<div class="flex flex-col justify-center">
+  <img src="https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/peft/PEFT-hub-screenshot.png"/>
+  <figcaption class="text-center">For example, the adapter weights for a opt-350m model stored on the Hub are only ~6MB compared to the full model size which can be ~700MB.</figcaption>
+</div>
 
 ## Inference
 
-Load the model from the Hub for inference and let's test it out on a tweet.
+Let's load the model for inference and test it out on a tweet!
 
 ```py
 from peft import AutoPeftModelForCausalLM
@@ -286,7 +294,7 @@ print(ds["test"][i]["Tweet text"])
 "@NYTsupport i have complained a dozen times &amp; yet my papers are still thrown FAR from my door. Why is this so hard to resolve?"
 ```
 
-Call the [`~transformers.GenerationMixin.generate`] method to generate the predicted classification label for the tweet.
+Call the [`~transformers.GenerationMixin.generate`] method to generate the predicted classification label.
 
 ```py
 with torch.no_grad():