Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
DTennant committed Apr 29, 2024
2 parents da4dbe2 + 7a22b7d commit 3a091cd
Show file tree
Hide file tree
Showing 20 changed files with 511 additions and 34 deletions.
61 changes: 61 additions & 0 deletions .github/workflows/build_docker_images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ jobs:
tags: huggingface/peft-cpu

- name: Post to a Slack channel
if: always()
id: slack
#uses: slackapi/[email protected]
uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001
Expand Down Expand Up @@ -102,6 +103,7 @@ jobs:
tags: huggingface/peft-gpu

- name: Post to a Slack channel
if: always()
id: slack
#uses: slackapi/[email protected]
uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001
Expand Down Expand Up @@ -159,6 +161,7 @@ jobs:


- name: Post to a Slack channel
if: always()
id: slack
#uses: slackapi/[email protected]
uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001
Expand Down Expand Up @@ -216,6 +219,64 @@ jobs:
tags: huggingface/peft-gpu-bnb-latest

- name: Post to a Slack channel
if: always()
id: slack
#uses: slackapi/[email protected]
uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001
with:
# Slack channel id, channel name, or user id to post message.
# See also: https://api.slack.com/methods/chat.postMessage#channels
channel-id: ${{ env.CI_SLACK_CHANNEL }}
# For posting a rich message using Block Kit
payload: |
{
"text": "peft-gpu + bnb-source (latest) Docker Image build result: ${{ job.status }}\n${{ github.event.pull_request.html_url || github.event.head_commit.url }}",
"blocks": [
{
"type": "section",
"text": {
"type": "mrkdwn",
"text": "peft-gpu + bnb-source (latest) Docker Image build result: ${{ job.status }}\n${{ github.event.pull_request.html_url || github.event.head_commit.url }}"
}
}
]
}
env:
SLACK_BOT_TOKEN: ${{ secrets.SLACK_CIFEEDBACK_BOT_TOKEN }}

latest-cuda-bnb-source-multi:
name: "Latest Peft GPU + bnb (multi-backend) source [accelerate / peft / transformers source]"
runs-on: ubuntu-latest
steps:
- name: Cleanup disk
run: |
sudo ls -l /usr/local/lib/
sudo ls -l /usr/share/
sudo du -sh /usr/local/lib/
sudo du -sh /usr/share/
sudo rm -rf /usr/local/lib/android
sudo rm -rf /usr/share/dotnet
sudo du -sh /usr/local/lib/
sudo du -sh /usr/share/
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
- name: Check out code
uses: actions/checkout@v3
- name: Login to DockerHub
uses: docker/login-action@v1
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_PASSWORD }}

- name: Build and Push GPU
uses: docker/build-push-action@v4
with:
context: ./docker/peft-gpu-bnb-multi-source
push: true
tags: huggingface/peft-gpu-bnb-multi-source

- name: Post to a Slack channel
if: always()
id: slack
#uses: slackapi/[email protected]
uses: slackapi/slack-github-action@6c661ce58804a1a20f6dc5fbee7f0381b469e001
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/nightly-bnb.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
strategy:
fail-fast: false
matrix:
docker-image-name: ["huggingface/peft-gpu-bnb-source:latest", "huggingface/peft-gpu-bnb-latest:latest"]
docker-image-name: ["huggingface/peft-gpu-bnb-source:latest", "huggingface/peft-gpu-bnb-latest:latest", "huggingface/peft-gpu-bnb-multi-source:latest"]
runs-on: [self-hosted, single-gpu, nvidia-gpu, t4, ci]
env:
CUDA_VISIBLE_DEVICES: "0"
Expand Down Expand Up @@ -74,7 +74,7 @@ jobs:
strategy:
fail-fast: false
matrix:
docker-image-name: ["huggingface/peft-gpu-bnb-source:latest", "huggingface/peft-gpu-bnb-latest:latest"]
docker-image-name: ["huggingface/peft-gpu-bnb-source:latest", "huggingface/peft-gpu-bnb-latest:latest", "huggingface/peft-gpu-bnb-multi-source:latest"]
runs-on: [self-hosted, multi-gpu, nvidia-gpu, t4, ci]
env:
CUDA_VISIBLE_DEVICES: "0,1"
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11"]
os: ["ubuntu-latest", "macos-latest", "windows-latest"]
os: ["ubuntu-latest", "macos-12", "windows-latest"]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v3
Expand Down
11 changes: 11 additions & 0 deletions docker/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# PEFT Docker images

Here we store all PEFT Docker images used in our testing infrastructure. We use python 3.8 for now on all our images.

- `peft-cpu`: PEFT compiled on CPU with all other HF libraries installed on main branch
- `peft-gpu`: PEFT complied for NVIDIA GPUs wih all other HF libraries installed on main branch
- `peft-gpu-bnb-source`: PEFT complied for NVIDIA GPUs with `bitsandbytes` and all other HF libraries installed from main branch
- `peft-gpu-bnb-latest`: PEFT complied for NVIDIA GPUs with `bitsandbytes` complied from main and all other HF libraries installed from latest PyPi
- `peft-gpu-bnb-multi-source`: PEFT complied for NVIDIA GPUs with `bitsandbytes` complied from `multi-backend` branch and all other HF libraries installed from main branch

`peft-gpu-bnb-source` and `peft-gpu-bnb-multi-source` are essentially the same, with the only difference being `bitsandbytes` compiled on another branch. Make sure to propagate the changes you applied on one file to the other!
68 changes: 68 additions & 0 deletions docker/peft-gpu-bnb-multi-source/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Builds GPU docker image of PyTorch
# Uses multi-staged approach to reduce size
# Stage 1
# Use base conda image to reduce time
FROM continuumio/miniconda3:latest AS compile-image
# Specify py version
ENV PYTHON_VERSION=3.8
# Install apt libs - copied from https://github.com/huggingface/accelerate/blob/main/docker/accelerate-gpu/Dockerfile
RUN apt-get update && \
apt-get install -y curl git wget software-properties-common git-lfs && \
apt-get clean && \
rm -rf /var/lib/apt/lists*

# Install audio-related libraries
RUN apt-get update && \
apt install -y ffmpeg

RUN apt install -y libsndfile1-dev
RUN git lfs install

# Create our conda env - copied from https://github.com/huggingface/accelerate/blob/main/docker/accelerate-gpu/Dockerfile
RUN conda create --name peft python=${PYTHON_VERSION} ipython jupyter pip
RUN python3 -m pip install --no-cache-dir --upgrade pip

# Below is copied from https://github.com/huggingface/accelerate/blob/main/docker/accelerate-gpu/Dockerfile
# We don't install pytorch here yet since CUDA isn't available
# instead we use the direct torch wheel
ENV PATH /opt/conda/envs/peft/bin:$PATH
# Activate our bash shell
RUN chsh -s /bin/bash
SHELL ["/bin/bash", "-c"]

# Stage 2
FROM nvidia/cuda:12.1.0-devel-ubuntu22.04 AS build-image
COPY --from=compile-image /opt/conda /opt/conda
ENV PATH /opt/conda/bin:$PATH

RUN chsh -s /bin/bash
SHELL ["/bin/bash", "-c"]

# Install apt libs
RUN apt-get update && \
apt-get install -y curl git wget cmake && \
apt-get clean && \
rm -rf /var/lib/apt/lists*

# Activate the conda env and install transformers + accelerate from source
# Also clone BNB and build it from source.
RUN source activate peft && \
python3 -m pip install -U --no-cache-dir \
librosa \
"soundfile>=0.12.1" \
scipy \
git+https://github.com/huggingface/transformers \
git+https://github.com/huggingface/accelerate \
peft[test]@git+https://github.com/huggingface/peft \
optimum \
auto-gptq && \
git clone https://github.com/TimDettmers/bitsandbytes && cd bitsandbytes && git checkout multi-backend-refactor && \
cmake -B . -DCOMPUTE_BACKEND=cuda -S . && \
cmake --build . && \
pip install -e . && \
pip freeze | grep bitsandbytes

RUN echo "source activate peft" >> ~/.profile

# Activate the virtualenv
CMD ["/bin/bash"]
8 changes: 6 additions & 2 deletions docker/peft-gpu/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -42,16 +42,20 @@ RUN source activate peft && \

# Add autoawq for quantization testing
RUN source activate peft && \
python3 -m pip install --no-cache-dir https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.1/autoawq-0.2.1-cp38-cp38-linux_x86_64.whl
python3 -m pip install --no-cache-dir https://github.com/casper-hansen/AutoAWQ/releases/download/v0.2.4/autoawq-0.2.4-cp38-cp38-linux_x86_64.whl
RUN source activate peft && \
python3 -m pip install --no-cache-dir https://github.com/casper-hansen/AutoAWQ_kernels/releases/download/v0.0.4/autoawq_kernels-0.0.4-cp38-cp38-linux_x86_64.whl
python3 -m pip install --no-cache-dir https://github.com/casper-hansen/AutoAWQ_kernels/releases/download/v0.0.6/autoawq_kernels-0.0.6-cp38-cp38-linux_x86_64.whl

# Install apt libs
RUN apt-get update && \
apt-get install -y curl git wget && \
apt-get clean && \
rm -rf /var/lib/apt/lists*

# Add eetq for quantization testing
RUN source activate peft && \
python3 -m pip install git+https://github.com/NetEase-FuXi/EETQ.git

# Activate the conda env and install transformers + accelerate from source
RUN source activate peft && \
python3 -m pip install -U --no-cache-dir \
Expand Down
2 changes: 0 additions & 2 deletions docs/source/accelerate/deepspeed.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,6 @@ For DeepSpeed Stage 3 + QLoRA, please refer to the section [Use PEFT QLoRA and D

For confirming these observations, we ran the SFT (Supervised Fine-tuning) [offical example scripts](https://github.com/huggingface/trl/tree/main/examples) of the [Transformers Reinforcement Learning (TRL) library](https://github.com/huggingface/trl) using QLoRA + PEFT and the accelerate configs available [here](https://github.com/huggingface/trl/tree/main/examples/accelerate_configs). We ran these experiments on a 2x NVIDIA T4 GPU.

Note DeepSpeed-Zero3 and `bitsandbytes` are currently **not** compatible.

# Use PEFT and DeepSpeed with ZeRO3 for finetuning large models on multiple devices and multiple nodes

This section of guide will help you learn how to use our DeepSpeed [training script](https://github.com/huggingface/peft/blob/main/examples/sft/train.py) for performing SFT. You'll configure the script to do SFT (supervised fine-tuning) of Llama-70B model with LoRA and ZeRO-3 on 8xH100 80GB GPUs on a single machine. You can configure it to scale to multiple machines by changing the accelerate config.
Expand Down
36 changes: 36 additions & 0 deletions docs/source/developer_guides/quantization.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,42 @@ quantized_model = get_peft_model(quantized_model, peft_config)

You can refer to the [Google Colab](https://colab.research.google.com/drive/12GTp1FCj5_0SnnNQH18h_2XFh9vS_guX?usp=sharing) example for an overview of AQLM+LoRA finetuning.

## EETQ quantization

You can also perform LoRA fine-tuning on EETQ quantized models. [EETQ](https://github.com/NetEase-FuXi/EETQ) package offers simple and efficient way to perform 8-bit quantization, which is claimed to be faster than the `LLM.int8()` algorithm. First, make sure that you have a transformers version that is compatible with EETQ (e.g. by installing it from latest pypi or from source).

```py
import torch
from transformers import EetqConfig

config = EetqConfig("int8")
```

Pass the `config` to the [`~transformers.AutoModelForCausalLM.from_pretrained`] method.

```py
from transformers import AutoModelForCausalLM

model = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1", quantization_config=config)
```

and create a `LoraConfig` and pass it to `get_peft_model`:

```py
from peft import LoraConfig, get_peft_model

config = LoraConfig(
r=16,
lora_alpha=8,
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
lora_dropout=0.05,
bias="none",
task_type="CAUSAL_LM"
)

model = get_peft_model(model, config)
```

## Next steps

If you're interested in learning more about quantization, the following may be helpful:
Expand Down
5 changes: 5 additions & 0 deletions src/peft/import_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,8 @@ def is_aqlm_available():
@lru_cache
def is_auto_awq_available():
return importlib.util.find_spec("awq") is not None


@lru_cache
def is_eetq_available():
return importlib.util.find_spec("eetq") is not None
21 changes: 12 additions & 9 deletions src/peft/tuners/ia3/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ def merge(self, safe_merge: bool = False, adapter_names: Optional[List[str]] = N
if active_adapter in self.ia3_l.keys():
base_layer = self.get_base_layer()
ia3_l = transpose(self.ia3_l[active_adapter].data, self.fan_in_fan_out)
orig_dtype = base_layer.weight.data.dtype
if safe_merge:
orig_weights = base_layer.weight.data
orig_weights = torch.mul(orig_weights, ia3_l)
Expand All @@ -119,13 +120,14 @@ def merge(self, safe_merge: bool = False, adapter_names: Optional[List[str]] = N
raise ValueError(
f"NaNs detected in the merged weights. The adapter {active_adapter} seems to be broken"
)
base_layer.weight.data = orig_weights
base_layer.weight.data = orig_weights.to(orig_dtype)
else:
base_layer.weight.data = torch.mul(base_layer.weight.data, ia3_l)
base_layer.weight.data = torch.mul(base_layer.weight.data, ia3_l).to(orig_dtype)

if not self.is_feedforward and (base_layer.bias is not None):
scaling = self.ia3_l[active_adapter].reshape(base_layer.bias.shape)
base_layer.bias.data = torch.mul(base_layer.bias.data, scaling.data)
orig_dtype = base_layer.bias.data.dtype
base_layer.bias.data = torch.mul(base_layer.bias.data, scaling.data).to(orig_dtype)

self.merged_adapters.append(active_adapter)

Expand All @@ -144,15 +146,16 @@ def unmerge(self) -> None:
base_layer = self.get_base_layer()
# Add tolerace to avoid division by zero
ia3_l = transpose(self.ia3_l[active_adapter].data, self.fan_in_fan_out) + 1e-8
base_layer.weight.data = torch.div(base_layer.weight.data, ia3_l)
orig_dtype = base_layer.weight.data.dtype
base_layer.weight.data = torch.div(base_layer.weight.data, ia3_l).to(orig_dtype)

if not self.is_feedforward and (base_layer.bias is not None):
scaling = self.ia3_l[active_adapter].reshape(base_layer.bias.shape)
base_layer.bias.data = torch.div(base_layer.bias.data, scaling.data + 1e-8)
orig_dtype = base_layer.bias.data.dtype
base_layer.bias.data = torch.div(base_layer.bias.data, scaling.data + 1e-8).to(orig_dtype)

def forward(self, x: torch.Tensor, *args: Any, **kwargs: Any) -> torch.Tensor:
dtype = previous_dtype = x.dtype

if self.disable_adapters:
if self.merged:
self.unmerge()
Expand All @@ -171,13 +174,13 @@ def forward(self, x: torch.Tensor, *args: Any, **kwargs: Any) -> torch.Tensor:
x = x.to(dtype)
# TODO: weight.dtype can be != self.ia3_l[self.active_adapters].dtype
# e.g. bf16 vs fp32. Is that okay?
interm = (x * ia3_scaling).to(self.get_base_layer().weight.dtype)
interm = (x * ia3_scaling).to(previous_dtype)
result = self.base_layer(interm, *args, **kwargs)
else:
result = self.base_layer(x, *args, **kwargs)
result = result.to(dtype) * ia3_scaling
result_dtype = result.dtype
result = (result * ia3_scaling).to(result_dtype)

result = result.to(previous_dtype)
return result


Expand Down
7 changes: 6 additions & 1 deletion src/peft/tuners/lora/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from peft.import_utils import is_bnb_4bit_available, is_bnb_available
from peft.import_utils import is_bnb_4bit_available, is_bnb_available, is_eetq_available

from .config import LoftQConfig, LoraConfig
from .gptq import QuantLinear
Expand All @@ -34,4 +34,9 @@ def __getattr__(name):

return Linear4bit

if (name == "EetqLoraLinear") and is_eetq_available():
from .eetq import EetqLoraLinear

return EetqLoraLinear

raise AttributeError(f"module {__name__} has no attribute {name}")
Loading

0 comments on commit 3a091cd

Please sign in to comment.