diff --git a/README.md b/README.md index a7d33e508c9..4cd8b8bab68 100644 --- a/README.md +++ b/README.md @@ -355,13 +355,6 @@ NNCF may be straightforwardly integrated into training/evaluation pipelines of t NNCF is used as a compression backend within the renowned `transformers` repository in HuggingFace Optimum Intel. -### Git patches for third-party repository - -See [third_party_integration](./third_party_integration) for examples of code modifications (Git patches and base commit IDs are provided) that are necessary to integrate NNCF into the following repositories: - -- [huggingface-transformers](third_party_integration/huggingface_transformers/README.md) -**NOTE**: this patch is deprecated and will be removed from NNCF repository in future releases. - ## Installation Guide For detailed installation instructions please refer to the [Installation](./docs/Installation.md) page. @@ -386,8 +379,6 @@ NNCF is also available via [conda](https://anaconda.org/conda-forge/nncf): conda install -c conda-forge nncf ``` -You may also use one of the Dockerfiles in the [docker](./docker) directory to build an image with an environment already set up and ready for running NNCF [sample scripts](#demos-tutorials-and-samples). - ### System requirements - Ubuntu\* 18.04 or later (64-bit) diff --git a/docker/README.md b/docker/README.md deleted file mode 100644 index 7a1f4d37465..00000000000 --- a/docker/README.md +++ /dev/null @@ -1,45 +0,0 @@ -# Using docker - -## Step 1. Install docker - -Review the instructions for installation docker [here](https://docs.docker.com/engine/install/ubuntu/) and configure Docker -to use a proxy server as [here](https://docs.docker.com/network/proxy/#configure-the-docker-client). - -## Step 2. Install nvidia-docker - -*Skip this step if you don't have GPU.* - -Review the instructions for installation docker [here](https://github.com/NVIDIA/nvidia-docker). - -## Step 3. Build image - -In the project folder run in terminal: - -```bash -sudo docker image build --network=host -``` - -Use `--network` to duplicate the network settings of your localhost into context build. - -## Step 4. Run container - -Run in terminal: - -```bash -sudo docker run \ --it \ ---name= \ ---runtime=nvidia \ ---network=host \ ---shm-size=1g \ ---ulimit memlock=-1 \ ---mount type=bind,source=,target= \ ---mount type=bind,source=,target=/home/nncf \ - - ``` - -You should not use `--runtime=nvidia` if you want to use `--cpu-only` mode. - -Use `--shm-size` to increase the size of the shared memory directory. - -Now you have a working container and you can run examples. diff --git a/docker/onnx/openvinoep/Dockerfile b/docker/onnx/openvinoep/Dockerfile deleted file mode 100644 index fd5a5d6a3e6..00000000000 --- a/docker/onnx/openvinoep/Dockerfile +++ /dev/null @@ -1,57 +0,0 @@ -FROM ubuntu:20.04 - -ARG PIP_EXTRA_INDEX_URL -ARG PIP_TRUSTED_HOST -ARG http_proxy -ARG https_proxy -ARG no_proxy - -RUN echo "PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}" && \ - echo "PIP_TRUSTED_HOST=${PIP_TRUSTED_HOST}" && \ - echo "http_proxy=${http_proxy}" && \ - echo "https_proxy=${https_proxy}" && \ - echo "no_proxy=${no_proxy}" && \ - -ARG DEBIAN_FRONTEND=noninteractive -ARG TZ=Etc/UTC - -RUN apt-get update && apt-get -y install --no-install-recommends \ - git=2.25 \ - build-essential=12.8 \ - python3.8-dev=3.8 \ - python3.8-venv=3.8 \ - python3-opencv=4.2 \ - && rm -rf /var/lib/apt/lists/* - -# Add user -ARG BUILD_UID=1001 -ARG BUILD_USER=onnxruntimedev - -RUN adduser --uid $BUILD_UID $BUILD_USER && \ - usermod -a -G video,users ${BUILD_USER} -ENV WORKDIR_PATH /home/${BUILD_USER} - -# Copy nncf -WORKDIR ${WORKDIR_PATH}/nncf -COPY nncf nncf -COPY examples examples -COPY tests tests -COPY setup.py ./ -COPY README.md ./ -COPY Makefile ./ - -WORKDIR ${WORKDIR_PATH} -RUN chown -R ${BUILD_USER}:${BUILD_USER} nncf - -USER ${BUILD_USER} - -# Create & activate venv -ENV VIRTUAL_ENV=${WORKDIR_PATH}/venv -RUN python3 -m venv $VIRTUAL_ENV -ENV PATH="$VIRTUAL_ENV/bin:$PATH" - -WORKDIR ${WORKDIR_PATH}/nncf -ENV PYTHONPATH=$PYTHONPATH:${WORKDIR_PATH}/nncf -RUN make install-onnx-dev - -WORKDIR ${WORKDIR_PATH} diff --git a/docker/onnx/openvinoep/build.sh b/docker/onnx/openvinoep/build.sh deleted file mode 100755 index b49c484f9a3..00000000000 --- a/docker/onnx/openvinoep/build.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/env bash - -SCRIPT_DIR=$(dirname "$0") -WORK_DIR="${SCRIPT_DIR}/../../../" - -cd $WORK_DIR && echo "WORK_DIR=$PWD" - -docker build -t onnx_ptq_experimental:dev \ - --build-arg http_proxy=$http_proxy \ - --build-arg https_proxy=$https_proxy \ - --build-arg no_proxy=$no_proxy \ - --build-arg PIP_EXTRA_INDEX_URL=$PIP_EXTRA_INDEX_URL \ - --build-arg PIP_TRUSTED_HOST=$PIP_TRUSTED_HOST \ - -f docker/onnx/openvinoep/Dockerfile \ - . diff --git a/docker/tensorflow/gpu/Dockerfile b/docker/tensorflow/gpu/Dockerfile deleted file mode 100644 index 8f44ab59b0c..00000000000 --- a/docker/tensorflow/gpu/Dockerfile +++ /dev/null @@ -1,44 +0,0 @@ -FROM nvidia/cuda:11.0.3-cudnn8-runtime-ubuntu20.04 - -RUN apt-get update \ - && apt-get install -y --no-install-recommends \ - apt-transport-https=2.0 \ - git=2.25 && \ - rm -rf /var/lib/apt/lists/* - -RUN apt-get update \ - && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \ - build-essential=12.8 \ - libgl1-mesa-glx=21.2 \ - libglib2.0-dev=2.64 \ - wget=1.20 \ - curl=7.68 \ - zip=3.0 \ - unzip=6.0 \ - nano=4.8 \ - openssh-server=1:8.2 \ - openssh-client=1:8.2 \ - sudo=1.8 \ - python3=3.8 \ - python3-dev=3.8 \ - python3-pip=20.0 \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -WORKDIR /usr/bin -RUN ln -s python3.8 python - -RUN pip3 install --no-cache-dir --upgrade pip==23.3 \ - && pip3 install --no-cache-dir setuptools==69.0 - -ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH} -ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64 - -# nvidia-container-runtime -ENV NVIDIA_VISIBLE_DEVICES all -ENV NVIDIA_DRIVER_CAPABILITIES compute,utility - -ENTRYPOINT ["cd /home/nncf \ - && python setup.py install --tf \ - && pip3 install -r examples/tensorflow/requirements.txt \ - && bash"] diff --git a/docker/torch/cpu/Dockerfile b/docker/torch/cpu/Dockerfile deleted file mode 100755 index eea028ef8df..00000000000 --- a/docker/torch/cpu/Dockerfile +++ /dev/null @@ -1,28 +0,0 @@ -FROM ubuntu:20.04 - -RUN apt-get update && apt-get install -y --no-install-recommends \ - apt-transport-https=2.0 \ - git=2.25 && \ - rm -rf /var/lib/apt/lists/* - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive \ - apt-get install -y --no-install-recommends \ - wget=1.20 \ - curl=7.68 \ - zip=3.0 \ - unzip=6.0 \ - nano=4.8 \ - openssh-server=1:8.2 \ - openssh-client=1:8.2 \ - sudo=1.8 \ - python3=3.8 \ - python3-dev=3.8 \ - python3-pip=20.0 \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -ENTRYPOINT ["cd /home/nncf \ - && python setup.py install --torch \ - && pip3 install -r examples/torch/requirements.txt \ - && bash"] diff --git a/docker/torch/gpu/Dockerfile b/docker/torch/gpu/Dockerfile deleted file mode 100755 index 26e902f3c89..00000000000 --- a/docker/torch/gpu/Dockerfile +++ /dev/null @@ -1,44 +0,0 @@ -FROM nvidia/cuda:11.1.1-cudnn8-devel-ubuntu20.04 - -RUN apt-get update && apt-get install -y --no-install-recommends \ - apt-transport-https=2.0 \ - git=2.25 && \ - rm -rf /var/lib/apt/lists/* - -# Required for nvidia-docker v1 -RUN echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && \ - echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf - - -RUN apt-get update && \ - DEBIAN_FRONTEND=noninteractive \ - apt-get install -y --no-install-recommends \ - wget=1.20 \ - curl=7.68 \ - zip=3.0 \ - unzip=6.0 \ - nano=4.8 \ - openssh-server=1:8.2 \ - openssh-client=1:8.2 \ - sudo=1.8 \ - python3=3.8 \ - python3-dev=3.8 \ - python3-pip=20.0 \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -WORKDIR /usr/bin -RUN ln -s python3.8 python - - -ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH} -ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64 - -# nvidia-container-runtime -ENV NVIDIA_VISIBLE_DEVICES all -ENV NVIDIA_DRIVER_CAPABILITIES compute,utility - -ENTRYPOINT ["cd /home/nncf \ - && python setup.py install --torch \ - && pip3 install -r examples/torch/requirements.txt \ - && bash"] diff --git a/docs/Installation.md b/docs/Installation.md index e1a9624ce4f..1a42be33a6a 100644 --- a/docs/Installation.md +++ b/docs/Installation.md @@ -58,10 +58,6 @@ pip install git+https://github.com/openvinotoolkit/nncf@bd189e2#egg=nncf Note that in order for this to work for pip versions >= 21.3, your Git version must be at least 2.22. -## As a Docker image - -Use one of the Dockerfiles in the [docker](../docker) directory to build an image with an environment already set up and ready for running NNCF [sample scripts](../README.md#model-compression-samples). - ## Corresponding versions The following table lists the recommended corresponding versions of backend packages diff --git a/tests/torch/sparsity/movement/test_training_with_third_party.py b/tests/torch/sparsity/movement/test_training_with_third_party.py deleted file mode 100644 index f353ac6bf33..00000000000 --- a/tests/torch/sparsity/movement/test_training_with_third_party.py +++ /dev/null @@ -1,87 +0,0 @@ -# Copyright (c) 2023 Intel Corporation -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -from pathlib import Path - -import pytest - -from tests.torch.helpers import Command -from tests.torch.sparsity.movement.helpers import MRPC_CONFIG_FILE_NAME -from tests.torch.sparsity.movement.helpers import TRAINING_SCRIPTS_PATH -from tests.torch.test_sanity_third_party import TransformersVirtualEnvInstaller -from tests.torch.test_sanity_third_party import create_command_line - - -@pytest.fixture(scope="class") -def temp_folder(tmp_path_factory): - root_folder = tmp_path_factory.mktemp("movement_third_party") - folders = {"models": root_folder / "models", "venv": root_folder / "venv", "repo": root_folder / "repo"} - for folder in folders.values(): - Path(folder).mkdir(exist_ok=True, parents=True) - return folders - - -@pytest.mark.usefixtures("temp_folder") -class TestMovementWithTransformers: - @pytest.fixture(autouse=True) - def setup(self, temp_folder): - self.temp_folder = temp_folder - self.env = TransformersVirtualEnvInstaller(temp_folder["venv"], temp_folder["repo"]) - - @pytest.mark.dependency(name="install_transformers") - def test_install_transformers_env(self, third_party, pip_cache_dir): - if not third_party: - pytest.skip( - "Skip tests of movement sparsity with patched transformers package " - "since `--third-party-sanity` is False." - ) - self.env.install_env(pip_cache_dir) - - @pytest.mark.dependency(depends=["install_transformers"], name="glue_movement_train") - def test_movement_glue_train(self): - nncf_config = TRAINING_SCRIPTS_PATH / MRPC_CONFIG_FILE_NAME - output_dir = Path(self.temp_folder["models"], nncf_config.stem) - com_line = ( - "examples/pytorch/text-classification/run_glue.py --model_name_or_path " - "google/bert_uncased_L-2_H-128_A-2 --task_name mrpc --do_train " - " --per_gpu_train_batch_size 4 --learning_rate 1e-4 --num_train_epochs 4 --max_seq_length 128 " - " --max_train_samples 8 --output_dir {output_dir} --save_steps 200 --nncf_config {nncf_config} ".format( - output_dir=output_dir, nncf_config=nncf_config - ) - ) - runner = Command( - create_command_line( - com_line, self.env.VENV_ACTIVATE, self.env.PYTHON_EXECUTABLE, self.env.CUDA_VISIBLE_STRING - ), - self.env.TRANSFORMERS_REPO_PATH, - ) - runner.run() - assert Path(output_dir, "pytorch_model.bin").is_file() - - @pytest.mark.dependency(depends=["install_transformers", "glue_movement_train"]) - def test_movement_glue_eval(self): - nncf_config = TRAINING_SCRIPTS_PATH / MRPC_CONFIG_FILE_NAME - model_dir = Path(self.temp_folder["models"], nncf_config.stem) - output_dir = Path(self.temp_folder["models"], nncf_config.stem + "_eval") - com_line = ( - "examples/pytorch/text-classification/run_glue.py --model_name_or_path {model_dir}" - " --task_name mrpc --do_eval " - " --learning_rate 2e-5" - " --max_seq_length 128 --output_dir {output_dir}" - " --max_eval_samples 10" - " --nncf_config {nncf_config}".format(model_dir=model_dir, output_dir=output_dir, nncf_config=nncf_config) - ) - runner = Command( - create_command_line( - com_line, self.env.VENV_ACTIVATE, self.env.PYTHON_EXECUTABLE, self.env.CUDA_VISIBLE_STRING - ), - self.env.TRANSFORMERS_REPO_PATH, - ) - runner.run() diff --git a/tests/torch/test_sanity_third_party.py b/tests/torch/test_sanity_third_party.py deleted file mode 100644 index 85da106eeb6..00000000000 --- a/tests/torch/test_sanity_third_party.py +++ /dev/null @@ -1,368 +0,0 @@ -# Copyright (c) 2023 Intel Corporation -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -import os -import subprocess -import sys - -import pytest - -from nncf.torch import BKC_TORCH_VERSION -from tests.shared.paths import PROJECT_ROOT -from tests.torch.helpers import Command - -TRANSFORMERS_COMMIT = "bd469c40659ce76c81f69c7726759d249b4aef49" -INSTALL_PATH = PROJECT_ROOT.parent -DATASET_PATH = os.path.join(PROJECT_ROOT, "tests", "torch", "data", "mock_datasets") - - -def create_command_line(args, venv_activate, python=sys.executable, cuda_string=""): - line = "{venv_activate} && {cuda} {python_exe} {args}".format( - venv_activate=venv_activate, cuda=cuda_string, args=args, python_exe=python - ) - return line - - -@pytest.fixture(autouse=True, scope="session") -def skip_tests(third_party): - if not third_party: - pytest.skip() - - -@pytest.fixture(scope="session") -def temp_folder(tmp_path_factory): - return { - "models": str(tmp_path_factory.mktemp("models", False)), - "venv": str(tmp_path_factory.mktemp("venv", False)), - "repo": str(tmp_path_factory.mktemp("repo", False)), - } - - -class CachedPipRunner: - def __init__(self, venv_activation_script_path: str, cache_dir: str = None): - self.venv_activate = venv_activation_script_path - self.cache_dir = cache_dir - - def run_pip(self, pip_command: str, cwd: str = None, use_cache: bool = True): - if not use_cache: - cache_dir_entry = "--no-cache-dir" - elif self.cache_dir is not None: - cache_dir_entry = "--cache-dir {}".format(self.cache_dir) - else: - cache_dir_entry = "" - subprocess.run(f"{self.venv_activate} && pip {cache_dir_entry} {pip_command}", check=True, shell=True, cwd=cwd) - - -class TransformersVirtualEnvInstaller: - def __init__(self, venv_path, repo_path): - self.VENV_PATH = str(venv_path) - self.VENV_ACTIVATE = str(". {}/bin/activate".format(self.VENV_PATH)) - self.PYTHON_EXECUTABLE = str("{}/bin/python".format(self.VENV_PATH)) - self.TRANSFORMERS_REPO_PATH = str(os.path.join(repo_path, "transformers")) - self.CUDA_VISIBLE_STRING = "export CUDA_VISIBLE_DEVICES=0;" - self.PATH_TO_PATCH = str( - os.path.join( - PROJECT_ROOT, - "third_party_integration", - "huggingface_transformers", - "0001-Modifications-for-NNCF-usage.patch", - ) - ) - - def install_env(self, pip_cache_dir): - version_string = "{}.{}".format(sys.version_info[0], sys.version_info[1]) - subprocess.call("virtualenv -ppython{} {}".format(version_string, self.VENV_PATH), shell=True) - pip_runner = CachedPipRunner(self.VENV_ACTIVATE, pip_cache_dir) - pip_runner.run_pip("install --upgrade pip") # cache options are available with pip > 20.2 - pip_runner.run_pip("uninstall setuptools -y") - pip_runner.run_pip("install setuptools") - pip_runner.run_pip("install onnx") - torch_install_cmd = "install torch=={}".format(BKC_TORCH_VERSION) - pip_runner.run_pip(torch_install_cmd) - subprocess.run( - "git clone https://github.com/huggingface/transformers {}".format(self.TRANSFORMERS_REPO_PATH), - check=True, - shell=True, - ) - subprocess.run( - "git checkout {}".format(TRANSFORMERS_COMMIT), check=True, shell=True, cwd=self.TRANSFORMERS_REPO_PATH - ) - subprocess.run("cp {} .".format(self.PATH_TO_PATCH), check=True, shell=True, cwd=self.TRANSFORMERS_REPO_PATH) - subprocess.run( - "git apply 0001-Modifications-for-NNCF-usage.patch", check=True, shell=True, cwd=self.TRANSFORMERS_REPO_PATH - ) - pip_runner.run_pip("install .", cwd=self.TRANSFORMERS_REPO_PATH) - pip_runner.run_pip('install -e ".[testing]"', cwd=self.TRANSFORMERS_REPO_PATH) - for sample_folder in ["question-answering", "text-classification", "language-modeling", "token-classification"]: - pip_runner.run_pip( - f"install -r examples/pytorch/{sample_folder}/requirements.txt", cwd=self.TRANSFORMERS_REPO_PATH - ) - pip_runner.run_pip("install boto3", cwd=self.TRANSFORMERS_REPO_PATH) - # WA for deleted CONLL2003 in datasets==1.11.0 (https://github.com/huggingface/datasets/issues/3582) - pip_runner.run_pip("install -U datasets", cwd=self.TRANSFORMERS_REPO_PATH) - pip_runner.run_pip("install -e .", cwd=PROJECT_ROOT) - - -class TestTransformers: - @pytest.fixture(autouse=True) - def setup(self, temp_folder): - self.env = TransformersVirtualEnvInstaller(temp_folder["venv"], temp_folder["repo"]) - - @pytest.mark.dependency(name="install_trans") - def test_install_trans_(self, pip_cache_dir): - self.env.install_env(pip_cache_dir) - - @pytest.mark.dependency(depends=["install_trans"], name="xnli_train") - def test_xnli_train(self, temp_folder): - com_line = ( - "examples/pytorch/text-classification/run_xnli.py --model_name_or_path bert-base-chinese" - " --language zh --train_language zh --do_train --per_gpu_train_batch_size 24" - " --learning_rate 5e-5 --num_train_epochs 0.0001 --max_seq_length 128 --output_dir {}" - " --save_steps 200 --nncf_config nncf_bert_config_xnli.json".format( - os.path.join(temp_folder["models"], "xnli") - ) - ) - runner = Command( - create_command_line( - com_line, self.env.VENV_ACTIVATE, self.env.PYTHON_EXECUTABLE, self.env.CUDA_VISIBLE_STRING - ), - self.env.TRANSFORMERS_REPO_PATH, - ) - runner.run() - assert os.path.exists(os.path.join(temp_folder["models"], "xnli", "pytorch_model.bin")) - - @pytest.mark.dependency(depends=["install_trans", "xnli_train"]) - def test_xnli_eval(self, temp_folder): - com_line = ( - "examples/pytorch/text-classification/run_xnli.py --model_name_or_path {output}" - " --language zh --do_eval --learning_rate 5e-5 --max_seq_length 128 --output_dir" - " {output} --nncf_config nncf_bert_config_xnli.json --per_gpu_eval_batch_size 24" - " --max_eval_samples 10".format(output=os.path.join(temp_folder["models"], "xnli")) - ) - runner = Command( - create_command_line( - com_line, self.env.VENV_ACTIVATE, self.env.PYTHON_EXECUTABLE, self.env.CUDA_VISIBLE_STRING - ), - self.env.TRANSFORMERS_REPO_PATH, - ) - runner.run() - - @pytest.mark.dependency(depends=["install_trans"], name="squad_train") - def test_squad_train(self, temp_folder): - com_line = ( - "examples/pytorch/question-answering/run_qa.py --model_name_or_path " - "bert-large-uncased-whole-word-masking-finetuned-squad --dataset_name squad --do_train " - " --learning_rate 3e-5 --num_train_epochs 0.0001 --max_seq_length 384 --doc_stride 128 " - " --output_dir {} --per_gpu_train_batch_size=1 --save_steps=200 --nncf_config" - " nncf_bert_config_squad.json".format(os.path.join(temp_folder["models"], "squad")) - ) - runner = Command( - create_command_line( - com_line, self.env.VENV_ACTIVATE, self.env.PYTHON_EXECUTABLE, self.env.CUDA_VISIBLE_STRING - ), - self.env.TRANSFORMERS_REPO_PATH, - ) - runner.run() - assert os.path.exists(os.path.join(temp_folder["models"], "squad", "pytorch_model.bin")) - - @pytest.mark.dependency(depends=["install_trans", "squad_train"]) - def test_squad_eval(self, temp_folder): - com_line = ( - "examples/pytorch/question-answering/run_qa.py --model_name_or_path {output}" - " --do_eval --dataset_name squad --learning_rate 3e-5" - " --max_seq_length 384 --doc_stride 128 --per_gpu_eval_batch_size=4 --output_dir {output} " - " --max_eval_samples 10" - " --nncf_config nncf_bert_config_squad.json".format(output=os.path.join(temp_folder["models"], "squad")) - ) - runner = Command( - create_command_line( - com_line, self.env.VENV_ACTIVATE, self.env.PYTHON_EXECUTABLE, self.env.CUDA_VISIBLE_STRING - ), - self.env.TRANSFORMERS_REPO_PATH, - ) - runner.run() - - @pytest.mark.dependency(depends=["install_trans"], name="glue_roberta_train") - def test_glue_train(self, temp_folder): - com_line = ( - "examples/pytorch/text-classification/run_glue.py --model_name_or_path" - " roberta-large-mnli --task_name mnli --do_train " - " --per_gpu_train_batch_size 4 --learning_rate 2e-5 --num_train_epochs 0.001 --max_seq_length 128 " - " --output_dir {} --save_steps 200 --nncf_config" - " nncf_roberta_config_mnli.json".format(os.path.join(temp_folder["models"], "roberta_mnli")) - ) - runner = Command( - create_command_line( - com_line, self.env.VENV_ACTIVATE, self.env.PYTHON_EXECUTABLE, self.env.CUDA_VISIBLE_STRING - ), - self.env.TRANSFORMERS_REPO_PATH, - ) - runner.run() - assert os.path.exists(os.path.join(temp_folder["models"], "roberta_mnli", "pytorch_model.bin")) - - @pytest.mark.dependency(depends=["install_trans", "glue_roberta_train"]) - def test_glue_eval(self, temp_folder): - com_line = ( - "examples/pytorch/text-classification/run_glue.py --model_name_or_path {output}" - " --task_name mnli --do_eval --validation_file {}/glue/glue_data/MNLI/dev_matched.tsv " - " --learning_rate 2e-5" - " --max_seq_length 128 --output_dir {output}" - " --max_eval_samples 10" - " --nncf_config nncf_roberta_config_mnli.json".format( - DATASET_PATH, output=os.path.join(temp_folder["models"], "roberta_mnli") - ) - ) - runner = Command( - create_command_line( - com_line, self.env.VENV_ACTIVATE, self.env.PYTHON_EXECUTABLE, self.env.CUDA_VISIBLE_STRING - ), - self.env.TRANSFORMERS_REPO_PATH, - ) - runner.run() - - @pytest.mark.dependency(depends=["install_trans"], name="glue_distilbert_train") - def test_glue_distilbert_train(self, temp_folder): - com_line = ( - "examples/pytorch/text-classification/run_glue.py --model_name_or_path" - " distilbert-base-uncased --train_file {}/glue/glue_data/SST-2/train.tsv" - " --task_name sst2 --do_train --max_seq_length 128 --per_gpu_train_batch_size 8" - " --learning_rate 5e-5 --num_train_epochs 0.001" - " --output_dir {} --save_steps 200 --nncf_config" - " nncf_distilbert_config_sst2.json".format( - DATASET_PATH, os.path.join(temp_folder["models"], "distilbert_output") - ) - ) - runner = Command( - create_command_line( - com_line, self.env.VENV_ACTIVATE, self.env.PYTHON_EXECUTABLE, self.env.CUDA_VISIBLE_STRING - ), - self.env.TRANSFORMERS_REPO_PATH, - ) - runner.run() - assert os.path.exists(os.path.join(temp_folder["models"], "distilbert_output", "pytorch_model.bin")) - - @pytest.mark.dependency(depends=["install_trans", "glue_distilbert_train"]) - def test_glue_distilbert_eval(self, temp_folder): - com_line = ( - "examples/pytorch/text-classification/run_glue.py --model_name_or_path {output}" - " --task_name sst2 --do_eval --max_seq_length 128" - " --output_dir {output} --validation_file {}/glue/glue_data/SST-2/test.tsv" - " --max_eval_samples 10" - " --nncf_config nncf_distilbert_config_sst2.json".format( - DATASET_PATH, output=os.path.join(temp_folder["models"], "distilbert_output") - ) - ) - runner = Command( - create_command_line( - com_line, self.env.VENV_ACTIVATE, self.env.PYTHON_EXECUTABLE, self.env.CUDA_VISIBLE_STRING - ), - self.env.TRANSFORMERS_REPO_PATH, - ) - runner.run() - - @pytest.mark.dependency(depends=["install_trans"], name="lm_train") - def test_lm_train(self, temp_folder): - # GPT2 is loaded via torch.frombuffer which is not available in torch==1.9.1 yet - com_line = ( - "examples/pytorch/language-modeling/run_clm.py --model_name_or_path distilgpt2" - " --do_train --per_gpu_train_batch_size 1" - " --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 " - " --num_train_epochs 0.001" - " --output_dir {} --nncf_config" - " nncf_gpt2_config_wikitext_hw_config.json".format(os.path.join(temp_folder["models"], "lm_output")) - ) - runner = Command( - create_command_line( - com_line, self.env.VENV_ACTIVATE, self.env.PYTHON_EXECUTABLE, self.env.CUDA_VISIBLE_STRING - ), - self.env.TRANSFORMERS_REPO_PATH, - ) - runner.run() - assert os.path.exists(os.path.join(temp_folder["models"], "lm_output", "pytorch_model.bin")) - - @pytest.mark.dependency(depends=["install_trans", "lm_train"]) - def test_lm_eval(self, temp_folder): - # GPT2 is loaded via torch.frombuffer which is not available in torch==1.9.1 yet - com_line = ( - "examples/pytorch/language-modeling/run_clm.py " - " --model_name_or_path {output} --do_eval " - " --output_dir {output} --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1" - " --max_eval_samples 10" - " --nncf_config nncf_gpt2_config_wikitext_hw_config.json".format( - output=os.path.join(temp_folder["models"], "lm_output") - ) - ) - runner = Command( - create_command_line( - com_line, self.env.VENV_ACTIVATE, self.env.PYTHON_EXECUTABLE, self.env.CUDA_VISIBLE_STRING - ), - self.env.TRANSFORMERS_REPO_PATH, - ) - runner.run() - - @pytest.mark.dependency(depends=["install_trans"], name="ner_train") - def test_ner_train(self, temp_folder): - com_line = ( - "examples/pytorch/token-classification/run_ner.py --model_name_or_path bert-base-uncased" - " --do_train --per_gpu_train_batch_size 1" - " --dataset_name conll2003 " - " --max_train_samples 10" - " --output_dir {} " - " --nncf_config nncf_bert_config_conll.json".format(os.path.join(temp_folder["models"], "ner_output")) - ) - runner = Command( - create_command_line( - com_line, self.env.VENV_ACTIVATE, self.env.PYTHON_EXECUTABLE, self.env.CUDA_VISIBLE_STRING - ), - self.env.TRANSFORMERS_REPO_PATH, - ) - runner.run() - assert os.path.exists(os.path.join(temp_folder["models"], "ner_output", "pytorch_model.bin")) - - @pytest.mark.dependency(depends=["install_trans", "ner_train"]) - def test_ner_eval(self, temp_folder): - com_line = ( - "examples/pytorch/token-classification/run_ner.py " - " --model_name_or_path {output} --do_eval " - " --output_dir {output} --dataset_name conll2003" - " --max_eval_samples 10" - " --nncf_config nncf_bert_config_conll.json".format( - output=os.path.join(temp_folder["models"], "ner_output") - ) - ) - runner = Command( - create_command_line( - com_line, self.env.VENV_ACTIVATE, self.env.PYTHON_EXECUTABLE, self.env.CUDA_VISIBLE_STRING - ), - self.env.TRANSFORMERS_REPO_PATH, - ) - runner.run() - - @pytest.mark.dependency(depends=["install_trans"]) - def test_convert_to_onnx(self, temp_folder): - com_line = ( - "examples/pytorch/question-answering/run_qa.py --model_name_or_path {output} " - " --do_eval" - " --dataset_name squad " - " --max_eval_samples 10" - " --output_dir {output}" - " --to_onnx {output}/model.onnx" - " --nncf_config nncf_bert_config_squad.json".format(output=os.path.join(temp_folder["models"], "squad")) - ) - runner = Command( - create_command_line( - com_line, self.env.VENV_ACTIVATE, self.env.PYTHON_EXECUTABLE, self.env.CUDA_VISIBLE_STRING - ), - self.env.TRANSFORMERS_REPO_PATH, - ) - runner.run() - assert os.path.exists(os.path.join(temp_folder["models"], "squad", "model.onnx")) diff --git a/third_party_integration/huggingface_transformers/0001-Modifications-for-NNCF-usage.patch b/third_party_integration/huggingface_transformers/0001-Modifications-for-NNCF-usage.patch deleted file mode 100644 index 9e177d36b8f..00000000000 --- a/third_party_integration/huggingface_transformers/0001-Modifications-for-NNCF-usage.patch +++ /dev/null @@ -1,1390 +0,0 @@ -From 76c14db1e3501d03f548d9e9ebc58661443d64e1 Mon Sep 17 00:00:00 2001 -From: Alexander Dokuchaev -Date: Mon, 25 Dec 2023 21:36:16 +0200 -Subject: [PATCH] Modifications for NNCF usage - ---- - examples/pytorch/language-modeling/run_clm.py | 77 +++++++++--- - examples/pytorch/question-answering/run_qa.py | 63 ++++++++-- - .../pytorch/text-classification/run_glue.py | 112 +++++++++++++++--- - .../pytorch/text-classification/run_xnli.py | 71 +++++++++-- - .../pytorch/token-classification/run_ner.py | 106 ++++++++++++++--- - nncf_bert_config_conll.json | 44 +++++++ - nncf_bert_config_mrpc.json | 42 +++++++ - nncf_bert_config_squad.json | 44 +++++++ - ...config_squad_magnitude_sparsity_cubic.json | 31 +++++ - nncf_bert_config_xnli.json | 38 ++++++ - nncf_distilbert_config_sst2.json | 33 ++++++ - nncf_gpt2_config_wikitext_hw_config.json | 49 ++++++++ - nncf_mobilebert_config_squad_int8.json | 49 ++++++++ - nncf_roberta_config_mnli.json | 29 +++++ - src/transformers/modeling_utils.py | 24 ++++ - src/transformers/pytorch_utils.py | 3 +- - src/transformers/trainer.py | 51 +++++++- - src/transformers/training_args.py | 6 + - src/transformers/utils/__init__.py | 1 + - 19 files changed, 805 insertions(+), 68 deletions(-) - create mode 100644 nncf_bert_config_conll.json - create mode 100644 nncf_bert_config_mrpc.json - create mode 100644 nncf_bert_config_squad.json - create mode 100644 nncf_bert_config_squad_magnitude_sparsity_cubic.json - create mode 100644 nncf_bert_config_xnli.json - create mode 100644 nncf_distilbert_config_sst2.json - create mode 100644 nncf_gpt2_config_wikitext_hw_config.json - create mode 100644 nncf_mobilebert_config_squad_int8.json - create mode 100644 nncf_roberta_config_mnli.json - -diff --git a/examples/pytorch/language-modeling/run_clm.py b/examples/pytorch/language-modeling/run_clm.py -index fe03cde7c..3f8cd6d37 100755 ---- a/examples/pytorch/language-modeling/run_clm.py -+++ b/examples/pytorch/language-modeling/run_clm.py -@@ -30,6 +30,8 @@ from itertools import chain - from typing import Optional - - import datasets -+import onnx -+import torch - from datasets import load_dataset - - import evaluate -@@ -51,7 +53,12 @@ from transformers.testing_utils import CaptureLogger - from transformers.trainer_utils import get_last_checkpoint - from transformers.utils import check_min_version, send_example_telemetry - from transformers.utils.versions import require_version -+from transformers.trainer import get_train_dataloader_for_init - -+from nncf import NNCFConfig -+from nncf.config.structures import QuantizationRangeInitArgs -+from nncf.config.structures import BNAdaptationInitArgs -+from nncf.torch.initialization import PTInitializingDataLoader - - # Will error if the minimal version of Transformers is not installed. Remove at your own risks. - check_min_version("4.23.0") -@@ -373,22 +380,6 @@ def main(): - "You can do it from another script, save it, and load it from here, using --tokenizer_name." - ) - -- if model_args.model_name_or_path: -- model = AutoModelForCausalLM.from_pretrained( -- model_args.model_name_or_path, -- from_tf=bool(".ckpt" in model_args.model_name_or_path), -- config=config, -- cache_dir=model_args.cache_dir, -- revision=model_args.model_revision, -- use_auth_token=True if model_args.use_auth_token else None, -- ) -- else: -- model = AutoModelForCausalLM.from_config(config) -- n_params = sum(dict((p.data_ptr(), p.numel()) for p in model.parameters()).values()) -- logger.info(f"Training new model from scratch - Total size={n_params/2**20:.2f}M params") -- -- model.resize_token_embeddings(len(tokenizer)) -- - # Preprocessing the datasets. - # First we tokenize all the texts. - if training_args.do_train: -@@ -503,6 +494,59 @@ def main(): - preds = preds[:, :-1].reshape(-1) - return metric.compute(predictions=preds, references=labels) - -+ nncf_config = None -+ if training_args.nncf_config is not None: -+ nncf_config = NNCFConfig.from_json(training_args.nncf_config) -+ if nncf_config.get("log_dir") is None: -+ nncf_config["log_dir"] = training_args.output_dir -+ if not os.path.exists(training_args.output_dir) and training_args.local_rank in [-1, 0]: -+ os.makedirs(nncf_config["log_dir"]) -+ if training_args.do_train: -+ train_dataloader = get_train_dataloader_for_init(training_args, train_dataset, -+ default_data_collator) -+ -+ class WikitextInitializingDataLoader(PTInitializingDataLoader): -+ def get_inputs(self, dataloader_output): -+ return (), dataloader_output -+ -+ nncf_config.register_extra_structs([ -+ QuantizationRangeInitArgs(WikitextInitializingDataLoader(train_dataloader)), -+ BNAdaptationInitArgs(WikitextInitializingDataLoader(train_dataloader)), -+ ]) -+ -+ if model_args.model_name_or_path: -+ retval = AutoModelForCausalLM.from_pretrained( -+ model_args.model_name_or_path, -+ from_tf=bool(".ckpt" in model_args.model_name_or_path), -+ config=config, -+ cache_dir=model_args.cache_dir, -+ revision=model_args.model_revision, -+ use_auth_token=True if model_args.use_auth_token else None, -+ nncf_config=nncf_config, -+ nncf_eval=nncf_config is not None and training_args.do_eval and not training_args.do_train -+ ) -+ else: -+ retval = AutoModelForCausalLM.from_config(config) -+ n_params = sum(dict((p.data_ptr(), p.numel()) for p in retval.parameters()).values()) -+ logger.info(f"Training new model from scratch - Total size={n_params / 2 ** 20:.2f}M params") -+ -+ -+ if nncf_config is None: -+ model = retval -+ compression_ctrl = None -+ else: -+ compression_ctrl, model = retval -+ -+ model.resize_token_embeddings(len(tokenizer)) -+ -+ if training_args.to_onnx: -+ if nncf_config is not None: -+ compression_ctrl.export_model(training_args.to_onnx) -+ else: -+ model.to('cpu') -+ dummy_tensor = torch.ones([1, config.n_positions], dtype=torch.long) -+ onnx.export(model, dummy_tensor, training_args.to_onnx) -+ - # Initialize our Trainer - trainer = Trainer( - model=model, -@@ -516,6 +560,7 @@ def main(): - preprocess_logits_for_metrics=preprocess_logits_for_metrics - if training_args.do_eval and not is_torch_tpu_available() - else None, -+ compression_ctrl=compression_ctrl - ) - - # Training -diff --git a/examples/pytorch/question-answering/run_qa.py b/examples/pytorch/question-answering/run_qa.py -index 1240623b5..b6136d6e3 100755 ---- a/examples/pytorch/question-answering/run_qa.py -+++ b/examples/pytorch/question-answering/run_qa.py -@@ -25,6 +25,7 @@ from dataclasses import dataclass, field - from typing import Optional - - import datasets -+import torch - from datasets import load_dataset - - import evaluate -@@ -42,11 +43,19 @@ from transformers import ( - default_data_collator, - set_seed, - ) -+from transformers.trainer import get_train_dataloader_for_init - from transformers.trainer_utils import get_last_checkpoint - from transformers.utils import check_min_version, send_example_telemetry - from transformers.utils.versions import require_version - from utils_qa import postprocess_qa_predictions - -+from torch import onnx -+ -+from nncf import NNCFConfig -+from nncf.torch.initialization import PTInitializingDataLoader -+from nncf.config.structures import BNAdaptationInitArgs -+from nncf.config.structures import QuantizationRangeInitArgs -+from nncf.common.utils.tensorboard import prepare_for_tensorboard - - # Will error if the minimal version of Transformers is not installed. Remove at your own risks. - check_min_version("4.23.0") -@@ -327,14 +336,6 @@ def main(): - revision=model_args.model_revision, - use_auth_token=True if model_args.use_auth_token else None, - ) -- model = AutoModelForQuestionAnswering.from_pretrained( -- model_args.model_name_or_path, -- from_tf=bool(".ckpt" in model_args.model_name_or_path), -- config=config, -- cache_dir=model_args.cache_dir, -- revision=model_args.model_revision, -- use_auth_token=True if model_args.use_auth_token else None, -- ) - - # Tokenizer check: this script requires a fast tokenizer. - if not isinstance(tokenizer, PreTrainedTokenizerFast): -@@ -599,6 +600,51 @@ def main(): - def compute_metrics(p: EvalPrediction): - return metric.compute(predictions=p.predictions, references=p.label_ids) - -+ nncf_config = None -+ if training_args.nncf_config is not None: -+ nncf_config = NNCFConfig.from_json(training_args.nncf_config) -+ if nncf_config.get("log_dir") is None: -+ nncf_config["log_dir"] = training_args.output_dir -+ if not os.path.exists(training_args.output_dir) and training_args.local_rank in [-1, 0]: -+ os.makedirs(nncf_config["log_dir"]) -+ if training_args.do_train: -+ train_dataloader = get_train_dataloader_for_init(training_args, train_dataset, data_collator) -+ class SquadInitializingDataloader(PTInitializingDataLoader): -+ def get_inputs(self, dataloader_output): -+ return (), dataloader_output -+ -+ nncf_config.register_extra_structs([ -+ QuantizationRangeInitArgs(SquadInitializingDataloader(train_dataloader)), -+ BNAdaptationInitArgs(SquadInitializingDataloader(train_dataloader)), -+ ]) -+ -+ retval = AutoModelForQuestionAnswering.from_pretrained( -+ model_args.model_name_or_path, -+ from_tf=bool(".ckpt" in model_args.model_name_or_path), -+ config=config, -+ cache_dir=model_args.cache_dir, -+ revision=model_args.model_revision, -+ use_auth_token=True if model_args.use_auth_token else None, -+ nncf_config=nncf_config, -+ nncf_eval=nncf_config is not None and training_args.do_eval and not training_args.do_train -+ ) -+ -+ if nncf_config is None: -+ model = retval -+ compression_ctrl = None -+ else: -+ compression_ctrl, model = retval -+ -+ if training_args.to_onnx: -+ # Expecting the following forward signature: -+ # (input_ids, attention_mask, token_type_ids, ...) -+ if nncf_config is not None: -+ compression_ctrl.export_model(training_args.to_onnx) -+ else: -+ model.to('cpu') -+ dummy_tensor = torch.ones([1, 384], dtype=torch.long) -+ onnx.export(model, (dummy_tensor, dummy_tensor, dummy_tensor), training_args.to_onnx) -+ - # Initialize our Trainer - trainer = QuestionAnsweringTrainer( - model=model, -@@ -610,6 +656,7 @@ def main(): - data_collator=data_collator, - post_process_function=post_processing_function, - compute_metrics=compute_metrics, -+ compression_ctrl=compression_ctrl - ) - - # Training -diff --git a/examples/pytorch/text-classification/run_glue.py b/examples/pytorch/text-classification/run_glue.py -index 3eb423f08..f675e7ab4 100755 ---- a/examples/pytorch/text-classification/run_glue.py -+++ b/examples/pytorch/text-classification/run_glue.py -@@ -29,6 +29,10 @@ from datasets import load_dataset - - import evaluate - import transformers -+from nncf import NNCFConfig -+from nncf.config.structures import BNAdaptationInitArgs -+from nncf.config.structures import QuantizationRangeInitArgs -+from nncf.torch.initialization import PTInitializingDataLoader - from transformers import ( - AutoConfig, - AutoModelForSequenceClassification, -@@ -42,6 +46,7 @@ from transformers import ( - default_data_collator, - set_seed, - ) -+from transformers.trainer import get_train_dataloader_for_init - from transformers.trainer_utils import get_last_checkpoint - from transformers.utils import check_min_version, send_example_telemetry - from transformers.utils.versions import require_version -@@ -366,15 +371,6 @@ def main(): - revision=model_args.model_revision, - use_auth_token=True if model_args.use_auth_token else None, - ) -- model = AutoModelForSequenceClassification.from_pretrained( -- model_args.model_name_or_path, -- from_tf=bool(".ckpt" in model_args.model_name_or_path), -- config=config, -- cache_dir=model_args.cache_dir, -- revision=model_args.model_revision, -- use_auth_token=True if model_args.use_auth_token else None, -- ignore_mismatched_sizes=model_args.ignore_mismatched_sizes, -- ) - - # Preprocessing the raw_datasets - if data_args.task_name is not None: -@@ -400,12 +396,12 @@ def main(): - # Some models have set the order of the labels to use, so let's make sure we do use it. - label_to_id = None - if ( -- model.config.label2id != PretrainedConfig(num_labels=num_labels).label2id -+ config.label2id != PretrainedConfig(num_labels=num_labels).label2id - and data_args.task_name is not None - and not is_regression - ): - # Some have all caps in their config, some don't. -- label_name_to_id = {k.lower(): v for k, v in model.config.label2id.items()} -+ label_name_to_id = {k.lower(): v for k, v in config.label2id.items()} - if list(sorted(label_name_to_id.keys())) == list(sorted(label_list)): - label_to_id = {i: int(label_name_to_id[label_list[i]]) for i in range(num_labels)} - else: -@@ -418,11 +414,11 @@ def main(): - label_to_id = {v: i for i, v in enumerate(label_list)} - - if label_to_id is not None: -- model.config.label2id = label_to_id -- model.config.id2label = {id: label for label, id in config.label2id.items()} -+ config.label2id = label_to_id -+ config.id2label = {id: label for label, id in config.label2id.items()} - elif data_args.task_name is not None and not is_regression: -- model.config.label2id = {l: i for i, l in enumerate(label_list)} -- model.config.id2label = {id: label for label, id in config.label2id.items()} -+ config.label2id = {l: i for i, l in enumerate(label_list)} -+ config.id2label = {id: label for label, id in config.label2id.items()} - - if data_args.max_seq_length > tokenizer.model_max_length: - logger.warning( -@@ -458,6 +454,87 @@ def main(): - max_train_samples = min(len(train_dataset), data_args.max_train_samples) - train_dataset = train_dataset.select(range(max_train_samples)) - -+ nncf_config = None -+ if training_args.nncf_config is not None: -+ nncf_config = NNCFConfig.from_json(training_args.nncf_config) -+ -+ if nncf_config.get("log_dir") is None: -+ nncf_config["log_dir"] = training_args.output_dir -+ -+ if not os.path.exists(training_args.output_dir) and training_args.local_rank in [-1, 0]: -+ os.makedirs(nncf_config["log_dir"]) -+ -+ if training_args.do_train: -+ train_dataloader = get_train_dataloader_for_init(training_args, -+ train_dataset, -+ data_collator=default_data_collator) -+ -+ class SST2InitializingDataLoader(PTInitializingDataLoader): -+ def get_inputs(self, dataloader_output): -+ return (), { -+ "labels": dataloader_output["labels"], -+ "attention_mask": dataloader_output["attention_mask"], -+ "input_ids": dataloader_output["input_ids"] -+ } -+ -+ class MRPCInitializingDataLoader(PTInitializingDataLoader): -+ def get_inputs(self, dataloader_output): -+ return (), { -+ "labels": dataloader_output["labels"], -+ "attention_mask": dataloader_output["attention_mask"], -+ "input_ids": dataloader_output["input_ids"], -+ "token_type_ids": dataloader_output["token_type_ids"] -+ } -+ -+ class MNLIInitializingDataLoader(PTInitializingDataLoader): -+ def get_inputs(self, dataloader_output): -+ return (), { -+ "labels": dataloader_output["labels"], -+ "attention_mask": dataloader_output["attention_mask"], -+ "input_ids": dataloader_output["input_ids"] -+ } -+ -+ if data_args.task_name == "sst2": -+ initializing_data_loader_cls = SST2InitializingDataLoader -+ elif data_args.task_name == "mrpc": -+ initializing_data_loader_cls = MRPCInitializingDataLoader -+ elif data_args.task_name == "mnli": -+ initializing_data_loader_cls = MNLIInitializingDataLoader -+ initializing_data_loader = initializing_data_loader_cls(train_dataloader) -+ nncf_config.register_extra_structs([QuantizationRangeInitArgs(initializing_data_loader), -+ BNAdaptationInitArgs(initializing_data_loader)]) -+ -+ -+ retval = AutoModelForSequenceClassification.from_pretrained( -+ model_args.model_name_or_path, -+ from_tf=bool(".ckpt" in model_args.model_name_or_path), -+ config=config, -+ cache_dir=model_args.cache_dir, -+ revision=model_args.model_revision, -+ use_auth_token=True if model_args.use_auth_token else None, -+ nncf_config=nncf_config, -+ nncf_eval=nncf_config is not None and training_args.do_eval and not training_args.do_train -+ ) -+ -+ if nncf_config is None: -+ model = retval -+ compression_ctrl = None -+ else: -+ compression_ctrl, model = retval -+ -+ if training_args.to_onnx: -+ # Expecting the following forward signature: -+ # (input_ids, attention_mask, token_type_ids, ...) -+ if nncf_config is not None: -+ compression_ctrl.export_model(training_args.to_onnx) -+ else: -+ model.to('cpu') -+ import torch -+ from torch import onnx -+ dummy_tensor = torch.ones([1, 128], dtype=torch.long) -+ onnx.export(model, (dummy_tensor, dummy_tensor, dummy_tensor), -+ training_args.to_onnx, opset_version=10) -+ - if training_args.do_eval: - if "validation" not in raw_datasets and "validation_matched" not in raw_datasets: - raise ValueError("--do_eval requires a validation dataset") -@@ -518,8 +595,13 @@ def main(): - compute_metrics=compute_metrics, - tokenizer=tokenizer, - data_collator=data_collator, -+ compression_ctrl=compression_ctrl - ) - -+ if nncf_config is not None: -+ if not (training_args.local_rank == -1 or training_args.no_cuda): -+ compression_ctrl.distributed() -+ - # Training - if training_args.do_train: - checkpoint = None -diff --git a/examples/pytorch/text-classification/run_xnli.py b/examples/pytorch/text-classification/run_xnli.py -index 55523edfc..68a3ebe41 100755 ---- a/examples/pytorch/text-classification/run_xnli.py -+++ b/examples/pytorch/text-classification/run_xnli.py -@@ -26,10 +26,16 @@ from typing import Optional - - import datasets - import numpy as np -+import torch - from datasets import load_dataset - - import evaluate - import transformers -+from nncf import NNCFConfig -+from nncf.config.structures import QuantizationRangeInitArgs -+from nncf.torch import register_default_init_args -+from nncf.torch.initialization import PTInitializingDataLoader -+ - from transformers import ( - AutoConfig, - AutoModelForSequenceClassification, -@@ -42,6 +48,7 @@ from transformers import ( - default_data_collator, - set_seed, - ) -+from transformers.trainer import get_train_dataloader_for_init - from transformers.trainer_utils import get_last_checkpoint - from transformers.utils import check_min_version, send_example_telemetry - from transformers.utils.versions import require_version -@@ -282,15 +289,6 @@ def main(): - revision=model_args.model_revision, - use_auth_token=True if model_args.use_auth_token else None, - ) -- model = AutoModelForSequenceClassification.from_pretrained( -- model_args.model_name_or_path, -- from_tf=bool(".ckpt" in model_args.model_name_or_path), -- config=config, -- cache_dir=model_args.cache_dir, -- revision=model_args.model_revision, -- use_auth_token=True if model_args.use_auth_token else None, -- ignore_mismatched_sizes=model_args.ignore_mismatched_sizes, -- ) - - # Preprocessing the datasets - # Padding strategy -@@ -367,6 +365,56 @@ def main(): - else: - data_collator = None - -+ nncf_config = None -+ if training_args.nncf_config is not None: -+ nncf_config = NNCFConfig.from_json(training_args.nncf_config) -+ -+ if nncf_config.get("log_dir") is None: -+ nncf_config["log_dir"] = training_args.output_dir -+ -+ if not os.path.exists(training_args.output_dir) and training_args.local_rank in [-1, 0]: -+ os.makedirs(nncf_config["log_dir"]) -+ -+ if training_args.do_train: -+ train_dataloader = get_train_dataloader_for_init(training_args, -+ train_dataset, -+ data_collator=data_collator) -+ -+ class KwargBasedInitializingDataloader(PTInitializingDataLoader): -+ def get_inputs(self, dataloader_output): -+ return (), dataloader_output -+ -+ initializing_data_loader = KwargBasedInitializingDataloader(train_dataloader) -+ nncf_config = register_default_init_args(nncf_config, initializing_data_loader) -+ -+ -+ retval = AutoModelForSequenceClassification.from_pretrained( -+ model_args.model_name_or_path, -+ from_tf=bool(".ckpt" in model_args.model_name_or_path), -+ config=config, -+ cache_dir=model_args.cache_dir, -+ revision=model_args.model_revision, -+ use_auth_token=True if model_args.use_auth_token else None, -+ nncf_config=nncf_config, -+ nncf_eval=nncf_config is not None and training_args.do_eval and not training_args.do_train -+ ) -+ -+ if nncf_config is None: -+ model = retval -+ compression_ctrl = None -+ else: -+ compression_ctrl, model = retval -+ -+ if training_args.to_onnx: -+ # Expecting the following forward signature: -+ # (input_ids, attention_mask, token_type_ids, ...) -+ if nncf_config is not None: -+ compression_ctrl.export_model(training_args.to_onnx) -+ else: -+ model.to('cpu') -+ dummy_tensor = torch.ones([1, training_args.max_seq_length], dtype=torch.long) -+ torch.onnx.export(model, (dummy_tensor, dummy_tensor, dummy_tensor), training_args.to_onnx) -+ - # Initialize our Trainer - trainer = Trainer( - model=model, -@@ -376,8 +424,13 @@ def main(): - compute_metrics=compute_metrics, - tokenizer=tokenizer, - data_collator=data_collator, -+ compression_ctrl=compression_ctrl - ) - -+ if nncf_config is not None: -+ if not (training_args.local_rank == -1 or training_args.no_cuda): -+ compression_ctrl.distributed() -+ - # Training - if training_args.do_train: - checkpoint = None -diff --git a/examples/pytorch/token-classification/run_ner.py b/examples/pytorch/token-classification/run_ner.py -index 52cbbb87b..30f150580 100755 ---- a/examples/pytorch/token-classification/run_ner.py -+++ b/examples/pytorch/token-classification/run_ner.py -@@ -22,15 +22,24 @@ Fine-tuning the library models for token classification. - import logging - import os - import sys -+from copy import deepcopy - from dataclasses import dataclass, field - from typing import Optional -+from typing import List - - import datasets - import numpy as np - from datasets import ClassLabel, load_dataset - - import evaluate -+import torch - import transformers -+from nncf import NNCFConfig -+from nncf.config.structures import BNAdaptationInitArgs -+from nncf.config.structures import QuantizationRangeInitArgs -+from nncf.torch.initialization import PTInitializingDataLoader -+from packaging import version -+from torch import onnx - from transformers import ( - AutoConfig, - AutoModelForTokenClassification, -@@ -43,6 +52,7 @@ from transformers import ( - TrainingArguments, - set_seed, - ) -+from transformers.trainer import get_train_dataloader_for_init - from transformers.trainer_utils import get_last_checkpoint - from transformers.utils import check_min_version, send_example_telemetry - from transformers.utils.versions import require_version -@@ -204,6 +214,16 @@ class DataTrainingArguments: - self.task_name = self.task_name.lower() - - -+def filter_columns(dataset, keep_columns: List[str], remove_columns: List[str]): -+ if version.parse(datasets.__version__) < version.parse("1.4.0"): -+ dataset.set_format( -+ type=dataset.format["type"], columns=keep_columns, format_kwargs=dataset.format["format_kwargs"] -+ ) -+ return dataset -+ else: -+ return dataset.remove_columns(remove_columns) -+ -+ - def main(): - # See all possible arguments in src/transformers/training_args.py - # or by passing the --help flag to this script. -@@ -366,16 +386,6 @@ def main(): - use_auth_token=True if model_args.use_auth_token else None, - ) - -- model = AutoModelForTokenClassification.from_pretrained( -- model_args.model_name_or_path, -- from_tf=bool(".ckpt" in model_args.model_name_or_path), -- config=config, -- cache_dir=model_args.cache_dir, -- revision=model_args.model_revision, -- use_auth_token=True if model_args.use_auth_token else None, -- ignore_mismatched_sizes=model_args.ignore_mismatched_sizes, -- ) -- - # Tokenizer check: this script requires a fast tokenizer. - if not isinstance(tokenizer, PreTrainedTokenizerFast): - raise ValueError( -@@ -385,25 +395,25 @@ def main(): - ) - - # Model has labels -> use them. -- if model.config.label2id != PretrainedConfig(num_labels=num_labels).label2id: -- if list(sorted(model.config.label2id.keys())) == list(sorted(label_list)): -+ if config.label2id != PretrainedConfig(num_labels=num_labels).label2id: -+ if list(sorted(config.label2id.keys())) == list(sorted(label_list)): - # Reorganize `label_list` to match the ordering of the model. - if labels_are_int: -- label_to_id = {i: int(model.config.label2id[l]) for i, l in enumerate(label_list)} -- label_list = [model.config.id2label[i] for i in range(num_labels)] -+ label_to_id = {i: int(config.label2id[l]) for i, l in enumerate(label_list)} -+ label_list = [config.id2label[i] for i in range(num_labels)] - else: -- label_list = [model.config.id2label[i] for i in range(num_labels)] -+ label_list = [config.id2label[i] for i in range(num_labels)] - label_to_id = {l: i for i, l in enumerate(label_list)} - else: - logger.warning( - "Your model seems to have been trained with labels, but they don't match the dataset: ", -- f"model labels: {list(sorted(model.config.label2id.keys()))}, dataset labels:" -+ f"model labels: {list(sorted(config.label2id.keys()))}, dataset labels:" - f" {list(sorted(label_list))}.\nIgnoring the model labels as a result.", - ) - - # Set the correspondences label/ID inside the model config -- model.config.label2id = {l: i for i, l in enumerate(label_list)} -- model.config.id2label = {i: l for i, l in enumerate(label_list)} -+ config.label2id = {l: i for i, l in enumerate(label_list)} -+ config.id2label = {i: l for i, l in enumerate(label_list)} - - # Map that sends B-Xxx label to its I-Xxx counterpart - b_to_i_label = [] -@@ -504,6 +514,65 @@ def main(): - # Data collator - data_collator = DataCollatorForTokenClassification(tokenizer, pad_to_multiple_of=8 if training_args.fp16 else None) - -+ nncf_config = None -+ if training_args.nncf_config is not None: -+ nncf_config = NNCFConfig.from_json(training_args.nncf_config) -+ if nncf_config.get("log_dir") is None: -+ nncf_config["log_dir"] = training_args.output_dir -+ if not os.path.exists(training_args.output_dir) and training_args.local_rank in [-1, 0]: -+ os.makedirs(nncf_config["log_dir"]) -+ if training_args.do_train: -+ train_dataset_for_init = deepcopy(train_dataset) -+ -+ train_dataset_for_init = filter_columns(train_dataset_for_init, -+ keep_columns=['labels', 'input_ids', 'attention_mask', -+ 'token_type_ids'], -+ remove_columns=['ner_tags', 'pos_tags', 'tokens', 'id', -+ 'chunk_tags']) -+ train_dataloader = get_train_dataloader_for_init(training_args, train_dataset_for_init, data_collator) -+ -+ class ConllInitializingDataloader(PTInitializingDataLoader): -+ def get_inputs(self, dataloader_output): -+ return (), { -+ "input_ids": dataloader_output["input_ids"], -+ "attention_mask": dataloader_output["attention_mask"], -+ "token_type_ids": dataloader_output["token_type_ids"], -+ } -+ -+ nncf_config.register_extra_structs([ -+ QuantizationRangeInitArgs(ConllInitializingDataloader(train_dataloader)), -+ BNAdaptationInitArgs(ConllInitializingDataloader(train_dataloader)), -+ ]) -+ -+ retval = AutoModelForTokenClassification.from_pretrained( -+ model_args.model_name_or_path, -+ from_tf=bool(".ckpt" in model_args.model_name_or_path), -+ config=config, -+ cache_dir=model_args.cache_dir, -+ revision=model_args.model_revision, -+ use_auth_token=True if model_args.use_auth_token else None, -+ nncf_config=nncf_config, -+ nncf_eval=nncf_config is not None and training_args.do_eval and not training_args.do_train -+ ) -+ -+ if nncf_config is None: -+ model = retval -+ compression_ctrl = None -+ else: -+ compression_ctrl, model = retval -+ -+ -+ if training_args.to_onnx: -+ # Expecting the following forward signature: -+ # (input_ids, attention_mask, token_type_ids, ...) -+ if nncf_config is not None: -+ compression_ctrl.export_model(training_args.to_onnx) -+ else: -+ model.to('cpu') -+ dummy_tensor = torch.ones([1, 128], dtype=torch.long) -+ onnx.export(model, (dummy_tensor, dummy_tensor, dummy_tensor), training_args.to_onnx, -+ opset_version=10) -+ - # Metrics - metric = evaluate.load("seqeval") - -@@ -549,6 +618,7 @@ def main(): - tokenizer=tokenizer, - data_collator=data_collator, - compute_metrics=compute_metrics, -+ compression_ctrl=compression_ctrl - ) - - # Training -diff --git a/nncf_bert_config_conll.json b/nncf_bert_config_conll.json -new file mode 100644 -index 000000000..bf7c88ebb ---- /dev/null -+++ b/nncf_bert_config_conll.json -@@ -0,0 +1,44 @@ -+{ -+ "input_info": [ -+ { -+ "sample_size": [1, 128], -+ "type": "long" -+ }, -+ { -+ "sample_size": [1, 128], -+ "type": "long" -+ }, -+ { -+ "sample_size": [1, 128], -+ "type": "long" -+ } -+ ], -+ "compression": { -+ "algorithm": "quantization", -+ "initializer": { -+ "range": { -+ "num_init_samples": 32, -+ "type": "percentile", -+ "params": -+ { -+ "min_percentile": 0.01, -+ "max_percentile": 99.99 -+ } -+ }, -+ -+ "batchnorm_adaptation": { -+ "num_bn_adaptation_samples": 0 -+ } -+ }, -+ "activations": -+ { -+ "mode": "symmetric" -+ }, -+ "weights": -+ { -+ "mode": "symmetric", -+ "signed": true, -+ "per_channel": false -+ } -+ } -+} -diff --git a/nncf_bert_config_mrpc.json b/nncf_bert_config_mrpc.json -new file mode 100644 -index 000000000..425d89d76 ---- /dev/null -+++ b/nncf_bert_config_mrpc.json -@@ -0,0 +1,42 @@ -+{ -+ "input_info": [ -+ { -+ "sample_size": [1, 128], -+ "type": "long" -+ }, -+ { -+ "sample_size": [1, 128], -+ "type": "long" -+ }, -+ { -+ "sample_size": [1, 128], -+ "type": "long" -+ } -+ ], -+ "compression": { -+ "algorithm": "quantization", -+ "initializer": { -+ "range": { -+ "num_init_samples": 64, -+ "type": "percentile", -+ "params": -+ { -+ "min_percentile": 0.01, -+ "max_percentile": 99.99 -+ } -+ }, -+ "batchnorm_adaptation": { -+ "num_bn_adaptation_samples": 0 -+ } -+ }, -+ "activations": -+ { -+ "mode": "symmetric" -+ }, -+ "weights": -+ { -+ "mode": "symmetric", -+ "per_channel": false -+ } -+ } -+} -diff --git a/nncf_bert_config_squad.json b/nncf_bert_config_squad.json -new file mode 100644 -index 000000000..2a055de17 ---- /dev/null -+++ b/nncf_bert_config_squad.json -@@ -0,0 +1,44 @@ -+{ -+ "input_info": [ -+ { -+ "sample_size": [1, 384], -+ "type": "long" -+ }, -+ { -+ "sample_size": [1, 384], -+ "type": "long" -+ }, -+ { -+ "sample_size": [1, 384], -+ "type": "long" -+ } -+ ], -+ "compression": { -+ "algorithm": "quantization", -+ "initializer": { -+ "range": { -+ "num_init_samples": 32, -+ "type": "percentile", -+ "params": -+ { -+ "min_percentile": 0.01, -+ "max_percentile": 99.99 -+ } -+ }, -+ -+ "batchnorm_adaptation": { -+ "num_bn_adaptation_samples": 0 -+ } -+ }, -+ "activations": -+ { -+ "mode": "symmetric" -+ }, -+ "weights": -+ { -+ "mode": "symmetric", -+ "signed": true, -+ "per_channel": false -+ } -+ } -+} -diff --git a/nncf_bert_config_squad_magnitude_sparsity_cubic.json b/nncf_bert_config_squad_magnitude_sparsity_cubic.json -new file mode 100644 -index 000000000..b4452e8d4 ---- /dev/null -+++ b/nncf_bert_config_squad_magnitude_sparsity_cubic.json -@@ -0,0 +1,31 @@ -+{ -+ "input_info": [ -+ { -+ "sample_size": [1, 384], -+ "type": "long" -+ }, -+ { -+ "sample_size": [1, 384], -+ "type": "long" -+ }, -+ { -+ "sample_size": [1, 384], -+ "type": "long" -+ } -+ ], -+ "compression": { -+ "algorithm": "magnitude_sparsity", -+ "params": { -+ "schedule": "polynomial", -+ "power": 3, -+ "sparsity_init": 0.0, -+ "sparsity_target": 0.8, -+ "sparsity_target_epoch": 40, -+ "sparsity_freeze_epoch": 60, -+ "update_per_optimizer_step": true, -+ "steps_per_epoch": 1109, -+ "weight_importance": "abs" -+ }, -+ "ignored_scopes": ["{re}.*NNCFEmbedding"] -+ } -+} -diff --git a/nncf_bert_config_xnli.json b/nncf_bert_config_xnli.json -new file mode 100644 -index 000000000..92b95db1c ---- /dev/null -+++ b/nncf_bert_config_xnli.json -@@ -0,0 +1,38 @@ -+{ -+ "input_info": [ -+ { -+ "sample_size": [1, 128], -+ "type": "long" -+ }, -+ { -+ "sample_size": [1, 128], -+ "type": "long" -+ }, -+ { -+ "sample_size": [1, 128], -+ "type": "long" -+ } -+ ], -+ "compression": { -+ "algorithm": "quantization", -+ "initializer": { -+ "range": { -+ "num_init_samples": 96 -+ }, -+ "batchnorm_adaptation": { -+ "num_bn_adaptation_samples": 0 -+ } -+ }, -+ "ignored_scopes": ["{re}BertSelfAttention\\[self\\]/__add___0", -+ "{re}BertIntermediate\\[intermediate\\]/NNCFLinear\\[dense\\]/linear_0" -+ ], -+ "activations": -+ { -+ "mode": "asymmetric" -+ }, -+ "weights": -+ { -+ "mode": "symmetric" -+ } -+ } -+} -diff --git a/nncf_distilbert_config_sst2.json b/nncf_distilbert_config_sst2.json -new file mode 100644 -index 000000000..dc140ab39 ---- /dev/null -+++ b/nncf_distilbert_config_sst2.json -@@ -0,0 +1,33 @@ -+{ -+ "input_info": [ -+ { -+ "sample_size": [1, 128], -+ "type": "long" -+ }, -+ { -+ "sample_size": [1, 128], -+ "type": "long" -+ } -+ ], -+ "compression": { -+ "algorithm": "quantization", -+ "initializer": { -+ "range": { -+ "num_init_samples": 32, -+ "type": "mean_percentile" -+ }, -+ "batchnorm_adaptation": { -+ "num_bn_adaptation_samples": 0 -+ } -+ }, -+ "activations": -+ { -+ "mode": "symmetric" -+ }, -+ "weights": -+ { -+ "mode": "symmetric", -+ "signed": true -+ } -+ } -+} -diff --git a/nncf_gpt2_config_wikitext_hw_config.json b/nncf_gpt2_config_wikitext_hw_config.json -new file mode 100644 -index 000000000..55173b25b ---- /dev/null -+++ b/nncf_gpt2_config_wikitext_hw_config.json -@@ -0,0 +1,49 @@ -+{ -+ "input_info": [ -+ { -+ "sample_size": [1, 1024], -+ "type": "long" -+ } -+ ], -+ "hw_config_type": "cpu", -+ "compression": { -+ "algorithm": "quantization", -+ "initializer": { -+ "range": { -+ "num_init_samples": 16, -+ "type": "percentile", -+ "params": -+ { -+ "min_percentile": 0.01, -+ "max_percentile": 99.99 -+ } -+ }, -+ "batchnorm_adaptation": { -+ "num_bn_adaptation_samples": 0 -+ } -+ }, -+ "ignored_scopes": [ -+ // Intermediate embedding sum results -+ "GPT2LMHeadModel/GPT2Model[transformer]/__add___0", -+ -+ // Scaling in attention -+ "{re}.*Attention\\[attn\\]/__truediv___0", -+ -+ // Pre-LayerNorm additions -+ "{re}.*Block\\[[0-9]*\\]/__add___0", -+ "{re}.*Block\\[[0-9]*\\]/__add___1", -+ -+ // LM head -+ "GPT2LMHeadModel/NNCFLinear[lm_head]/linear_0" -+ ], -+ "activations": -+ { -+ "mode": "symmetric" -+ }, -+ "weights": -+ { -+ "mode": "symmetric", -+ "signed": true -+ } -+ } -+} -diff --git a/nncf_mobilebert_config_squad_int8.json b/nncf_mobilebert_config_squad_int8.json -new file mode 100644 -index 000000000..4d0e84edf ---- /dev/null -+++ b/nncf_mobilebert_config_squad_int8.json -@@ -0,0 +1,49 @@ -+{ -+ "input_info": [ -+ { -+ "sample_size": [1, 384], -+ "type": "long" -+ }, -+ { -+ "sample_size": [1, 384], -+ "type": "long" -+ }, -+ { -+ "sample_size": [1, 384], -+ "type": "long" -+ } -+ ], -+ "compression": { -+ "algorithm": "quantization", -+ "initializer": { -+ "range": { -+ "num_init_samples": 64, -+ "type": "percentile", -+ "params": -+ { -+ "min_percentile": 0.01, -+ "max_percentile": 99.99 -+ } -+ }, -+ "batchnorm_adaptation": { -+ "num_bn_adaptation_samples": 0 -+ } -+ }, -+ "ignored_scopes": ["{re}MobileBertSelfAttention\\[self\\]/__add___0", -+ "{re}MobileBertIntermediate\\[intermediate\\]/NNCFLinear\\[dense\\]/linear_0"], -+ "activations": -+ { -+ "mode": "symmetric", -+ "ignored_scopes": [ -+ "{re}MobileBertForQuestionAnswering/MobileBertModel\\[mobilebert\\]/MobileBertEmbeddings\\[embeddings\\]/__add___0", -+ "{re}MobileBertForQuestionAnswering/MobileBertModel\\[mobilebert\\]/MobileBertEmbeddings\\[embeddings\\]/__add___1", -+ "{re}MobileBertOutput\\[output\\]/__add___0", -+ "{re}NoNorm\\[LayerNorm\\]/__mul___0"] -+ }, -+ "weights": -+ { -+ "mode": "symmetric", -+ "signed": true -+ } -+ } -+} -diff --git a/nncf_roberta_config_mnli.json b/nncf_roberta_config_mnli.json -new file mode 100644 -index 000000000..46f819bca ---- /dev/null -+++ b/nncf_roberta_config_mnli.json -@@ -0,0 +1,29 @@ -+{ -+ "input_info": [ -+ { -+ "keyword": "input_ids", -+ "sample_size": [1, 128], -+ "type": "long", -+ "filler": "ones" -+ } -+ ], -+ "compression": { -+ "algorithm": "quantization", -+ "initializer": { -+ "range": { -+ "num_init_samples": 24 -+ }, -+ "batchnorm_adaptation": { -+ "num_bn_adaptation_samples": 0 -+ } -+ }, -+ "activations": -+ { -+ "mode": "asymmetric" -+ }, -+ "weights": -+ { -+ "mode": "asymmetric" -+ } -+ } -+} -diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py -index 5f4fccd33..7f4cdb3d8 100644 ---- a/src/transformers/modeling_utils.py -+++ b/src/transformers/modeling_utils.py -@@ -27,10 +27,12 @@ from functools import partial - from typing import Any, Callable, Dict, List, Optional, Tuple, Union - - import torch -+from nncf.torch import create_compressed_model - from packaging import version - from torch import Tensor, device, nn - from torch.nn import CrossEntropyLoss - -+from transformers.utils import NNCF_PT_STATE_NAME - from transformers.utils.hub import convert_file_size_to_int, get_checkpoint_shard_files - from transformers.utils.import_utils import is_sagemaker_mp_enabled - -@@ -1497,6 +1499,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix - push_to_hub: bool = False, - max_shard_size: Union[int, str] = "10GB", - safe_serialization: bool = False, -+ nncf_compression_state: Dict = None, - **kwargs, - ): - """ -@@ -1620,6 +1623,10 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix - else: - save_function(shard, os.path.join(save_directory, shard_file)) - -+ if nncf_compression_state is not None: -+ nncf_state_output_file = os.path.join(save_directory, NNCF_PT_STATE_NAME) -+ save_function(nncf_compression_state, nncf_state_output_file) -+ - if index is None: - logger.info(f"Model weights saved in {os.path.join(save_directory, WEIGHTS_NAME)}") - else: -@@ -1901,6 +1908,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix - load_in_8bit_skip_modules = kwargs.pop("load_in_8bit_skip_modules", None) - subfolder = kwargs.pop("subfolder", "") - commit_hash = kwargs.pop("_commit_hash", None) -+ nncf_config = kwargs.pop("nncf_config", None) -+ nncf_eval = kwargs.pop("nncf_eval", False) - - if trust_remote_code is True: - logger.warning( -@@ -2321,6 +2330,11 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix - if dtype_orig is not None: - torch.set_default_dtype(dtype_orig) - -+ if nncf_config is not None and nncf_eval: -+ compression_algo_controller, model = create_compressed_model(model, nncf_config, -+ compression_state=state_dict) -+ return compression_algo_controller, model -+ - model, missing_keys, unexpected_keys, mismatched_keys, error_msgs = cls._load_pretrained_model( - model, - state_dict, -@@ -2344,6 +2358,16 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix - # Set model in evaluation mode to deactivate DropOut modules by default - model.eval() - -+ if nncf_config is not None: -+ compression_state = None -+ compression_state_file = os.path.join(pretrained_model_name_or_path, NNCF_PT_STATE_NAME) -+ if os.path.isfile(compression_state_file): -+ compression_state = torch.load(compression_state_file) -+ -+ compression_algo_controller, model = create_compressed_model(model, nncf_config, -+ compression_state=compression_state) -+ return compression_algo_controller, model -+ - # Dispatch model with hooks on all devices if necessary - if device_map is not None: - dispatch_model(model, device_map=device_map, offload_dir=offload_folder) -diff --git a/src/transformers/pytorch_utils.py b/src/transformers/pytorch_utils.py -index d94e049b5..09c99d4dd 100644 ---- a/src/transformers/pytorch_utils.py -+++ b/src/transformers/pytorch_utils.py -@@ -87,7 +87,8 @@ def prune_linear_layer(layer: nn.Linear, index: torch.LongTensor, dim: int = 0) - new_layer.bias.requires_grad = True - return new_layer - -- -+import nncf -+@nncf.torch.register_module() - class Conv1D(nn.Module): - """ - 1D-convolutional layer as defined by Radford et al. for OpenAI GPT (and also used in GPT-2). -diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py -index 214e7a978..12a5787cd 100755 ---- a/src/transformers/trainer.py -+++ b/src/transformers/trainer.py -@@ -33,7 +33,7 @@ from pathlib import Path - from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union - - from tqdm.auto import tqdm -- -+from nncf.torch.nncf_network import NNCFNetwork - - # Integrations must be imported before ML frameworks: - from .integrations import ( # isort: split -@@ -55,6 +55,8 @@ import numpy as np - import torch - import torch.distributed as dist - from packaging import version -+from nncf.torch.compression_method_api import PTCompressionAlgorithmController -+from nncf.common.utils.tensorboard import prepare_for_tensorboard - from torch import nn - from torch.utils.data import DataLoader, Dataset, RandomSampler, SequentialSampler - from torch.utils.data.distributed import DistributedSampler -@@ -206,6 +208,30 @@ SCHEDULER_NAME = "scheduler.pt" - SCALER_NAME = "scaler.pt" - - -+def get_train_dataloader_for_init(args, train_dataset, data_collator=None): -+ from torch.utils.data import RandomSampler -+ from torch.utils.data import DistributedSampler -+ train_sampler = ( -+ RandomSampler(train_dataset) -+ if args.local_rank == -1 -+ else DistributedSampler(train_dataset) -+ ) -+ -+ if data_collator is None: -+ from transformers.data.data_collator import default_data_collator -+ data_collator = default_data_collator -+ -+ from torch.utils.data import DataLoader -+ data_loader = DataLoader( -+ train_dataset, -+ batch_size=args.train_batch_size, -+ sampler=train_sampler, -+ collate_fn=data_collator, -+ drop_last=args.dataloader_drop_last, -+ ) -+ return data_loader -+ -+ - class Trainer: - """ - Trainer is a simple but feature-complete training and eval loop for PyTorch, optimized for 🤗 Transformers. -@@ -304,12 +330,15 @@ class Trainer: - callbacks: Optional[List[TrainerCallback]] = None, - optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None), - preprocess_logits_for_metrics: Callable[[torch.Tensor, torch.Tensor], torch.Tensor] = None, -+ compression_ctrl: PTCompressionAlgorithmController = None - ): - if args is None: - output_dir = "tmp_trainer" - logger.info(f"No `TrainingArguments` passed, using `output_dir={output_dir}`.") - args = TrainingArguments(output_dir=output_dir) - self.args = args -+ -+ self.compression_ctrl = compression_ctrl - # Seed must be set before instantiating the model when using model - enable_full_determinism(self.args.seed) if self.args.full_determinism else set_seed(self.args.seed) - self.hp_name = None -@@ -1409,6 +1438,8 @@ class Trainer: - - if self.args.ddp_bucket_cap_mb is not None: - kwargs["bucket_cap_mb"] = self.args.ddp_bucket_cap_mb -+ if self.compression_ctrl is not None: -+ self.compression_ctrl.distributed() - model = nn.parallel.DistributedDataParallel( - model, - device_ids=[self.args.local_rank] if self.args._n_gpu != 0 else None, -@@ -1687,6 +1718,9 @@ class Trainer: - _ = list(train_dataloader.sampler) - - for epoch in range(epochs_trained, num_train_epochs): -+ if self.compression_ctrl is not None: -+ self.compression_ctrl.scheduler.epoch_step() -+ print(self.compression_ctrl.statistics().to_str()) - if isinstance(train_dataloader, DataLoader) and isinstance(train_dataloader.sampler, DistributedSampler): - train_dataloader.sampler.set_epoch(epoch) - elif hasattr(train_dataloader, "dataset") and isinstance(train_dataloader.dataset, IterableDatasetShard): -@@ -1790,6 +1824,8 @@ class Trainer: - ) - - # Optimizer step -+ if self.compression_ctrl is not None: -+ self.compression_ctrl.scheduler.step() - optimizer_was_run = True - if self.deepspeed: - pass # called outside the loop -@@ -1814,6 +1850,7 @@ class Trainer: - model.zero_grad() - self.state.global_step += 1 - self.state.epoch = epoch + (step + 1) / steps_in_epoch -+ self.state.curr_loss = tr_loss_step.cpu().detach().item() - self.control = self.callback_handler.on_step_end(args, self.state, self.control) - - self._maybe_log_save_evaluate(tr_loss, model, trial, epoch, ignore_keys_for_eval) -@@ -2033,6 +2070,14 @@ class Trainer: - logs["loss"] = round(tr_loss_scalar / (self.state.global_step - self._globalstep_last_logged), 4) - logs["learning_rate"] = self._get_learning_rate() - -+ if self.compression_ctrl is not None: -+ logs["compression_loss"] = self.compression_ctrl.loss().item() -+ compression_stats = self.compression_ctrl.statistics() -+ for key, value in prepare_for_tensorboard(compression_stats).items(): -+ logs["compression/statistics/{0}".format(key)] = value -+ print(compression_stats.to_str()) -+ -+ - self._total_loss_scalar += tr_loss_scalar - self._globalstep_last_logged = self.state.global_step - self.store_flos() -@@ -2492,6 +2537,10 @@ class Trainer: - # deepspeed handles loss scaling by gradient_accumulation_steps in its `backward` - loss = loss / self.args.gradient_accumulation_steps - -+ if self.compression_ctrl is not None: -+ compression_loss = self.compression_ctrl.loss() -+ loss += compression_loss -+ - if self.do_grad_scaling: - self.scaler.scale(loss).backward() - elif self.use_apex: -diff --git a/src/transformers/training_args.py b/src/transformers/training_args.py -index 170315fe2..daa497c02 100644 ---- a/src/transformers/training_args.py -+++ b/src/transformers/training_args.py -@@ -993,6 +993,12 @@ class TrainingArguments: - }, - ) - -+ nncf_config: str = field(default=None, -+ metadata={"help": "NNCF configuration .json file for compression-enabled training"}) -+ -+ to_onnx: str = field(default=None, -+ metadata={"help": "Name of the ONNX model file to export the model to."}) -+ - def __post_init__(self): - # Handle --use_env option in torch.distributed.launch (local_rank not passed as an arg then). - # This needs to happen before any call to self.device or self.n_gpu. -diff --git a/src/transformers/utils/__init__.py b/src/transformers/utils/__init__.py -index 2269f2254..2f3082293 100644 ---- a/src/transformers/utils/__init__.py -+++ b/src/transformers/utils/__init__.py -@@ -154,6 +154,7 @@ from .import_utils import ( - - - WEIGHTS_NAME = "pytorch_model.bin" -+NNCF_PT_STATE_NAME = "nncf_state.bin" - WEIGHTS_INDEX_NAME = "pytorch_model.bin.index.json" - TF2_WEIGHTS_NAME = "tf_model.h5" - TF2_WEIGHTS_INDEX_NAME = "tf_model.h5.index.json" --- -2.34.1 diff --git a/third_party_integration/huggingface_transformers/README.md b/third_party_integration/huggingface_transformers/README.md deleted file mode 100644 index 1ae5b35b0f3..00000000000 --- a/third_party_integration/huggingface_transformers/README.md +++ /dev/null @@ -1,145 +0,0 @@ -# Integrating NNCF into Transformers - -https://github.com/huggingface/transformers - -This folder contains a git patch to enable NNCF-based quantization for XNLI, SQuAD and GLUE training pipelines of the huggingface transformers repository. - -**NOTE**: this patch is deprecated and will be removed from NNCF repository in future releases. - -Instructions: - -1. Apply the `0001-Modifications-for-NNCF-usage.patch` file to the huggingface transformers repository checked out at commit id: `bd469c40659ce76c81f69c7726759d249b4aef49` - -2. Install the `transformers` library and the example scripts from the patched repository as described in the documentation for the huggingface transformers repository. - -3. To start quantization-aware fine-tuning of NLP models using NNCF, use the regular scripts and command line parameters for XNLI and SQuAD training, but with additional `--nncf_config ` parameter. -The NNCF configs to be used in this way are also provided in the same patch on a per-model, per-compression algorithm basis. -Distributed multiprocessing is also supported, simply use the corresponding version of the command line in the huggingface transformers repository with the same additional `--nncf_config` parameter. - -4. While running with the `--nncf_config` option, the training scripts will output NNCF-wrapped model checkpoints instead of the regular ones. You may evaluate these checkpoints using the same command lines for training above, but with the`--do_train` key omitted. In order to export these checkpoints into ONNX format, further add `--to_onnx ` to your evaluation command line parameters. -See exact command lines for each case in the model notes below. -Note that in all cases the training hyperparameters might have to be adjusted to accommodate the hardware you have available. - -## Current best results - -All models use as their baselines the checkpoints obtained with the scripts and command line parameters from the corresponding sections in the original repository documentation. While fine-tuning the quantized model, the hyperparameters were left unchanged, i.e. the difference in the training script invocation was limited to adding `--nncf_config` option and specifying the pre-trained baseline model as the starting point for quantization fine-tuning. For RoBERTa-MNLI, no baseline model finetuning was necessary since the `roberta-large-mnli` model pretrained on MNLI was already available for download. - -Make sure that you are running evaluation on a single GPU, since the repository evaluation scripts give inconsistent results when running multi-GPU evaluation. - -### BERT-XNLI - -_Full-precision FP32 baseline model_ - bert-base-chinese, trained on the Chinese portion of XNLI - 77.68% accuracy when evaluated on the Chinese portion of XNLI test set. - -_INT8 model (symmetric weights, asymmetric activations quantization)_ - 77.22% accuracy in the same evaluation conditions. - -**INT8 model quantization-aware training command line:** - -`python examples/pytorch/text-classification/run_xnli.py --model_name_or_path bert-base-chinese --language zh --train_language zh --do_train --do_eval --per_gpu_train_batch_size 48 --per_gpu_eval_batch_size 1 --learning_rate 5e-5 --num_train_epochs 4.0 --max_seq_length 128 --output_dir bert_xnli_int8 --save_steps 200 --nncf_config nncf_bert_config_xnli.json` - -**Fine-tuned INT8 model evaluation and ONNX export command line:** - -`python examples/pytorch/text-classification/run_xnli.py --model_name_or_path bert_xnli_int8 --language zh --train_language zh --do_eval --per_gpu_eval_batch_size 1 --max_seq_length 128 --output_dir bert_xnli_int8 --nncf_config nncf_bert_config_xnli.json --to_onnx bert_xnli_int8.onnx` - -### BERT-SQuAD v1.1 - -_Full-precision FP32 baseline model_ - bert-large-uncased-whole-word-masking model, trained on SQuAD v1.1 - 93.21% F1, 87.2% EM on the dev set, - -_INT8 model (symmetric quantization)_ - 92.55% F1, 86.1% EM on the dev set. - -**INT8 model quantization-aware training command line (trained on 4x Tesla V100):** - -`python examples/pytorch/question-answering/run_qa.py --model_name_or_path bert-large-uncased-whole-word-masking --do_train --do_eval --dataset_name squad --learning_rate 3e-5 --num_train_epochs 2 --max_seq_length 384 --doc_stride 128 --output_dir bert_squad_int8 --per_gpu_eval_batch_size=1 --per_gpu_train_batch_size=10 --save_steps=400 --nncf_config nncf_bert_config_squad.json` - -_INT8 model (symmetric quantization) + Knowledge Distillation_ - 92.89% F1, 86.68% EM on the dev set. - -**INT8 model quantization-aware training + Knowledge Distillation command line (trained on 4x Tesla V100):** - -`python examples/pytorch/question-answering/run_qa.py --model_name_or_path bert-large-uncased-whole-word-masking --do_train --do_eval --dataset_name squad --learning_rate 3e-5 --num_train_epochs 2 --max_seq_length 384 --doc_stride 128 --output_dir bert_squad_int8 --per_gpu_eval_batch_size=1 --per_gpu_train_batch_size=10 --save_steps=400 --nncf_config nncf_bert_config_squad_kd.json` - -**Fine-tuned INT8 model evaluation and ONNX export command line:** - -`python examples/pytorch/question-answering/run_qa.py --model_name_or_path bert_squad_int8 --do_eval --dataset_name squad --max_seq_length 384 --doc_stride 128 --output_dir bert_squad_int8 --per_gpu_eval_batch_size=1 --nncf_config nncf_bert_config_squad.json --to_onnx bert_squad_int8.onnx` - -### BERT-CoNLL2003 - -_Full-precision FP32 baseline model_ - bert-base-cased model, trained on CoNLL2003 - 99.17% acc, 95.03% F1 - -_INT8 model (symmetric quantization)_ - 99.18% acc, 95.31% F1 - -**INT8 model quantization-aware training command line (trained on 4x Tesla V100):** - -`python examples/pytorch/token-classification/run_ner.py --model_name_or_path *path_to_fp32_finetuned_model* --dataset_name conll2003 --output_dir bert_base_cased_conll_int8 --do_train --do_eval --save_strategy epoch --evaluation_strategy epoch --nncf_config nncf_bert_config_conll.json` - -**Fine-tuned INT8 model evaluation and ONNX export command line:** - -`python examples/pytorch/token-classification/run_ner.py --model_name_or_path bert_base_cased_conll_int8 --dataset_name conll2003 --output_dir bert_base_cased_conll_int8 --do_eval --nncf_config nncf_bert_config_squad.json --to_onnx bert_base_cased_conll_int8.onnx` - -### BERT-MRPC - -_Full-precision FP32 baseline model_ - bert-base-cased-finetuned-mrpc, 84.56% acc - -_INT8 model (symmetric quantization)_ - 84.8% acc - -**INT8 model quantization-aware training command line (trained on 1x RTX 2080):** - -`python examples/pytorch/token-classification/run_glue.py --model_name_or_path bert-base-cased-finetuned-mrpc --task_name mrpc --do_train --do_eval --num_train_epochs 5.0 --per_device_eval_batch_size 1 --output_dir bert_cased_mrpc_int8 --evaluation_strategy epoch --save_strategy epoch --nncf_config nncf_bert_config_mrpc.json` - -**Fine-tuned INT8 model evaluation and ONNX export command line:** - -`python examples/pytorch/token-classification/run_ner.py --model_name_or_path bert_cased_mrpc_int8 --task_name mrpc --do_eval --per_gpu_eval_batch_size 1 --output_dir bert_cased_mrpc_int8 --nncf_config nncf_bert_config_mrpc.json --to_onnx bert_base_cased_mrpc_int8.onnx` - -### RoBERTA-MNLI - -_Full-precision FP32 baseline model_ - roberta-large-mnli, pre-trained on MNLI - 90.6% accuracy (matched), 90.1% accuracy (mismatched) - -_INT8 model (asymmetrically quantized)_ - 89.25% accuracy (matched), 88.9% accuracy (mismatched) - -**INT8 model quantization-aware training command line:** - -`python examples/pytorch/text-classification/run_glue.py --model_name_or_path roberta-large-mnli --task_name mnli --do_train --do_eval --per_gpu_train_batch_size 24 --per_gpu_eval_batch_size 1 --learning_rate 2e-5 --num_train_epochs 3.0 --max_seq_length 128 --output_dir roberta_mnli_int8 --save_steps 400 --nncf_config nncf_roberta_config_mnli.json` - -**Fine-tuned INT8 model evaluation and ONNX export command line:** - -`python examples/pytorch/text-classification/run_glue.py --model_name_or_path roberta_mnli_int8 --task_name mnli --do_eval --learning_rate 2e-5 --num_train_epochs 3.0 --max_seq_length 128 --per_gpu_eval_batch_size 1 --output_dir roberta_mnli_int8 --save_steps 400 --nncf_config nncf_roberta_config_mnli.json --to_onnx roberta_mnli_int8.onnx` - -### DistilBERT-SST-2 - -_Full-precision FP32 baseline model_ - distilbert-base-uncased-finetuned-sst-2-english, pre-trained on SST-2 - 91.1% accuracy - -_INT8 model (symmetrically quantized)_ - 90.94% accuracy - -**INT8 model quantization-aware training command line:** - -`python examples/pytorch/text-classification/run_glue.py --model_name_or_path distilbert-base-uncased-finetuned-sst-2-english --task_name sst2 --do_train --do_eval --per_gpu_train_batch_size 16 --per_gpu_eval_batch_size 1 --learning_rate 5e-5 --num_train_epochs 3.0 --max_seq_length 128 --output_dir distilbert_sst2_int8 --save_steps 100000 --nncf_config nncf_distilbert_config_sst2.json` - -**Fine-tuned INT8 model evaluation and ONNX export command line:** - -`python examples/pytorch/text-classification/run_glue.py --model_name_or_path distilbert_sst2_int8 --task_name sst2 --do_eval --per_gpu_eval_batch_size 1 --max_seq_length 128 --output_dir distilbert_sst2_int8 --save_steps 100000 --nncf_config nncf_distilbert_config_sst2.json --to_onnx distilbert_sst2_int8.onnx` - -### MobileBERT-SQuAD v1.1 - -_Full-precision FP32 baseline model_ - google/mobilebert-uncased, trained on SQuAD v1.1 - 89.98% F1, 82.61% EM on the dev set, - -_INT8 model (symmetric quantization)_ - 89.4% F1, 82.05% EM on the dev set. - -**INT8 model quantization-aware training command line (trained on 3x Tesla V100):** - -`python examples/pytorch/question-answering/run_qa.py --model_name_or_path --do_train --do_eval --dataset_name squad --learning_rate 3e-5 --num_train_epochs 5 --max_seq_length 384 --doc_stride 128 --output_dir mobilebert_squad_int8 --per_gpu_eval_batch_size=1 --per_gpu_train_batch_size=6 --save_steps=400 --nncf_config nncf_mobilebert_config_squad_int8.json` - -**Fine-tuned INT8 model evaluation and ONNX export command line:** - -`python examples/pytorch/question-answering/run_qa.py --model_name_or_path mobilebert_squad_int8 --do_eval --dataset_name squad --max_seq_length 384 --doc_stride 128 --output_dir mobilebert_squad_int8 --per_gpu_eval_batch_size=1 --nncf_config nncf_mobilebert_config_squad_int8.json --to_onnx mobilebert_squad_int8.onnx` - -### GPT-2-WikiText 2 (raw) language modeling - -_Full-precision FP32 baseline model_ - 19.73 perplexity on the test set - -_INT8 model (symmetric quantization)_ - 20.9 perplexity on the test set - -**INT8 model quantization-aware training command line (trained on 1x Tesla V100):** - -`python examples/pytorch/language-modeling/run_clm.py --model_name_or_path --do_train --do_eval --dataset_name wikitext --num_train_epochs 3 --output_dir gpt2_wikitext2_int8 --per_gpu_eval_batch_size=1 --per_gpu_train_batch_size=4 --save_steps=591 --nncf_config nncf_gpt2_config_wikitext_hw_config.json` - -**Fine-tuned INT8 model evaluation and ONNX export command line:** - -`python examples/pytorch/language-modeling/run_clm.py --model_name_or_path gpt2_wikitext2_int8 --do_eval --dataset_name wikitext --output_dir gpt2_wikitext2_int8 --per_gpu_eval_batch_size=1 --nncf_config nncf_gpt2_config_wikitext_hw_config.json --to_onnx gpt2_wikitext2_int8.onnx`