diff --git a/README.md b/README.md
index a7d33e508c9..4cd8b8bab68 100644
--- a/README.md
+++ b/README.md
@@ -355,13 +355,6 @@ NNCF may be straightforwardly integrated into training/evaluation pipelines of t
 
   NNCF is used as a compression backend within the renowned `transformers` repository in HuggingFace Optimum Intel.
 
-### Git patches for third-party repository
-
-See [third_party_integration](./third_party_integration) for examples of code modifications (Git patches and base commit IDs are provided) that are necessary to integrate NNCF into the following repositories:
-
-- [huggingface-transformers](third_party_integration/huggingface_transformers/README.md)
-**NOTE**: this patch is deprecated and will be removed from NNCF repository in future releases.
-
 ## Installation Guide
 
 For detailed installation instructions please refer to the [Installation](./docs/Installation.md) page.
@@ -386,8 +379,6 @@ NNCF is also available via [conda](https://anaconda.org/conda-forge/nncf):
 conda install -c conda-forge nncf
 ```
 
-You may also use one of the Dockerfiles in the [docker](./docker) directory to build an image with an environment already set up and ready for running NNCF [sample scripts](#demos-tutorials-and-samples).
-
 ### System requirements
 
 - Ubuntu\* 18.04 or later (64-bit)
diff --git a/docker/README.md b/docker/README.md
deleted file mode 100644
index 7a1f4d37465..00000000000
--- a/docker/README.md
+++ /dev/null
@@ -1,45 +0,0 @@
-# Using docker
-
-## Step 1. Install docker
-
-Review the instructions for installation docker [here](https://docs.docker.com/engine/install/ubuntu/) and configure Docker
-to use a proxy server as [here](https://docs.docker.com/network/proxy/#configure-the-docker-client).
-
-## Step 2. Install nvidia-docker
-
-*Skip this step if you don't have GPU.*
-
-Review the instructions for installation docker [here](https://github.com/NVIDIA/nvidia-docker).
-
-## Step 3. Build image
-
-In the project folder run in terminal:
-
-```bash
-sudo docker image build --network=host <PATH_TO_DIR_WITH_DOCKERFILE>
-```
-
-Use `--network` to duplicate the network settings of your localhost into context build.
-
-## Step 4. Run container
-
-Run in terminal:
-
-```bash
-sudo docker run \
--it \
---name=<CONTAINER_NAME> \
---runtime=nvidia \
---network=host \
---shm-size=1g \
---ulimit memlock=-1 \
---mount type=bind,source=<PATH_TO_DATASETS_ON_HOST>,target=<PATH_TO_DATSETS_IN_CONTAINER> \
---mount type=bind,source=<PATH_TO_NNCF_HOME_ON_HOST>,target=/home/nncf \
-<IMAGE_ID>
- ```
-
-You should not use `--runtime=nvidia` if you want to use `--cpu-only` mode.
-
-Use `--shm-size` to increase the size of the shared memory directory.
-
-Now you have a working container and you can run examples.
diff --git a/docker/onnx/openvinoep/Dockerfile b/docker/onnx/openvinoep/Dockerfile
deleted file mode 100644
index fd5a5d6a3e6..00000000000
--- a/docker/onnx/openvinoep/Dockerfile
+++ /dev/null
@@ -1,57 +0,0 @@
-FROM ubuntu:20.04
-
-ARG PIP_EXTRA_INDEX_URL
-ARG PIP_TRUSTED_HOST
-ARG http_proxy
-ARG https_proxy
-ARG no_proxy
-
-RUN echo "PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL}" && \
- echo "PIP_TRUSTED_HOST=${PIP_TRUSTED_HOST}" && \
- echo "http_proxy=${http_proxy}" && \
- echo "https_proxy=${https_proxy}" && \
- echo "no_proxy=${no_proxy}" && \
-
-ARG DEBIAN_FRONTEND=noninteractive
-ARG TZ=Etc/UTC
-
-RUN apt-get update && apt-get -y install --no-install-recommends \
-    git=2.25 \
-    build-essential=12.8 \
-    python3.8-dev=3.8 \
-    python3.8-venv=3.8 \
-    python3-opencv=4.2 \
-    && rm -rf /var/lib/apt/lists/*
-
-# Add user
-ARG BUILD_UID=1001
-ARG BUILD_USER=onnxruntimedev
-
-RUN adduser --uid $BUILD_UID $BUILD_USER && \
-    usermod -a -G video,users ${BUILD_USER}
-ENV WORKDIR_PATH /home/${BUILD_USER}
-
-# Copy nncf
-WORKDIR ${WORKDIR_PATH}/nncf
-COPY nncf nncf
-COPY examples examples
-COPY tests tests
-COPY setup.py ./
-COPY README.md ./
-COPY Makefile ./
-
-WORKDIR ${WORKDIR_PATH}
-RUN chown -R ${BUILD_USER}:${BUILD_USER} nncf
-
-USER ${BUILD_USER}
-
-# Create & activate venv
-ENV VIRTUAL_ENV=${WORKDIR_PATH}/venv
-RUN python3 -m venv $VIRTUAL_ENV
-ENV PATH="$VIRTUAL_ENV/bin:$PATH"
-
-WORKDIR ${WORKDIR_PATH}/nncf
-ENV PYTHONPATH=$PYTHONPATH:${WORKDIR_PATH}/nncf
-RUN make install-onnx-dev
-
-WORKDIR ${WORKDIR_PATH}
diff --git a/docker/onnx/openvinoep/build.sh b/docker/onnx/openvinoep/build.sh
deleted file mode 100755
index b49c484f9a3..00000000000
--- a/docker/onnx/openvinoep/build.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/usr/bin/env bash
-
-SCRIPT_DIR=$(dirname "$0")
-WORK_DIR="${SCRIPT_DIR}/../../../"
-
-cd $WORK_DIR && echo "WORK_DIR=$PWD"
-
-docker build -t onnx_ptq_experimental:dev                   \
-    --build-arg http_proxy=$http_proxy                      \
-    --build-arg https_proxy=$https_proxy                    \
-    --build-arg no_proxy=$no_proxy                          \
-    --build-arg PIP_EXTRA_INDEX_URL=$PIP_EXTRA_INDEX_URL    \
-    --build-arg PIP_TRUSTED_HOST=$PIP_TRUSTED_HOST          \
-    -f docker/onnx/openvinoep/Dockerfile                    \
-    .
diff --git a/docker/tensorflow/gpu/Dockerfile b/docker/tensorflow/gpu/Dockerfile
deleted file mode 100644
index 8f44ab59b0c..00000000000
--- a/docker/tensorflow/gpu/Dockerfile
+++ /dev/null
@@ -1,44 +0,0 @@
-FROM nvidia/cuda:11.0.3-cudnn8-runtime-ubuntu20.04
-
-RUN apt-get update \
-        && apt-get install -y --no-install-recommends \
-        apt-transport-https=2.0 \
-        git=2.25 && \
-    rm -rf /var/lib/apt/lists/*
-
-RUN apt-get update \
-        && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
-            build-essential=12.8 \
-            libgl1-mesa-glx=21.2 \
-            libglib2.0-dev=2.64 \
-            wget=1.20 \
-            curl=7.68 \
-            zip=3.0 \
-            unzip=6.0 \
-            nano=4.8 \
-            openssh-server=1:8.2 \
-            openssh-client=1:8.2 \
-            sudo=1.8 \
-            python3=3.8 \
-            python3-dev=3.8 \
-            python3-pip=20.0 \
-        && apt-get clean \
-        && rm -rf /var/lib/apt/lists/*
-
-WORKDIR /usr/bin
-RUN ln -s python3.8 python
-
-RUN pip3 install --no-cache-dir --upgrade pip==23.3 \
-        && pip3 install --no-cache-dir setuptools==69.0
-
-ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH}
-ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64
-
-# nvidia-container-runtime
-ENV NVIDIA_VISIBLE_DEVICES all
-ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
-
-ENTRYPOINT ["cd /home/nncf \
-               && python setup.py install --tf \
-               && pip3 install -r examples/tensorflow/requirements.txt \
-               && bash"]
diff --git a/docker/torch/cpu/Dockerfile b/docker/torch/cpu/Dockerfile
deleted file mode 100755
index eea028ef8df..00000000000
--- a/docker/torch/cpu/Dockerfile
+++ /dev/null
@@ -1,28 +0,0 @@
-FROM ubuntu:20.04
-
-RUN apt-get update && apt-get install -y --no-install-recommends \
-        apt-transport-https=2.0 \
-        git=2.25 && \
-    rm -rf /var/lib/apt/lists/*
-
-RUN apt-get update && \
-    DEBIAN_FRONTEND=noninteractive \
-    apt-get install -y --no-install-recommends \
-        wget=1.20 \
-        curl=7.68 \
-        zip=3.0 \
-        unzip=6.0 \
-        nano=4.8 \
-        openssh-server=1:8.2 \
-        openssh-client=1:8.2 \
-        sudo=1.8 \
-        python3=3.8 \
-        python3-dev=3.8 \
-        python3-pip=20.0 \
-    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/*
-
-ENTRYPOINT ["cd /home/nncf \
-               && python setup.py install --torch \
-               && pip3 install -r examples/torch/requirements.txt \
-               && bash"]
diff --git a/docker/torch/gpu/Dockerfile b/docker/torch/gpu/Dockerfile
deleted file mode 100755
index 26e902f3c89..00000000000
--- a/docker/torch/gpu/Dockerfile
+++ /dev/null
@@ -1,44 +0,0 @@
-FROM nvidia/cuda:11.1.1-cudnn8-devel-ubuntu20.04
-
-RUN apt-get update && apt-get install -y --no-install-recommends \
-        apt-transport-https=2.0 \
-        git=2.25 && \
-    rm -rf /var/lib/apt/lists/*
-
-# Required for nvidia-docker v1
-RUN echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf && \
-    echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf
-
-
-RUN apt-get update && \
-    DEBIAN_FRONTEND=noninteractive \
-    apt-get install -y --no-install-recommends \
-        wget=1.20 \
-        curl=7.68 \
-        zip=3.0 \
-        unzip=6.0 \
-        nano=4.8 \
-        openssh-server=1:8.2 \
-        openssh-client=1:8.2 \
-        sudo=1.8 \
-        python3=3.8 \
-        python3-dev=3.8 \
-        python3-pip=20.0 \
-    && apt-get clean \
-    && rm -rf /var/lib/apt/lists/*
-
-WORKDIR /usr/bin
-RUN ln -s python3.8 python
-
-
-ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:${PATH}
-ENV LD_LIBRARY_PATH /usr/local/nvidia/lib:/usr/local/nvidia/lib64
-
-# nvidia-container-runtime
-ENV NVIDIA_VISIBLE_DEVICES all
-ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
-
-ENTRYPOINT ["cd /home/nncf \
-               && python setup.py install --torch \
-               && pip3 install -r examples/torch/requirements.txt \
-               && bash"]
diff --git a/docs/Installation.md b/docs/Installation.md
index e1a9624ce4f..1a42be33a6a 100644
--- a/docs/Installation.md
+++ b/docs/Installation.md
@@ -58,10 +58,6 @@ pip install git+https://github.com/openvinotoolkit/nncf@bd189e2#egg=nncf
 
 Note that in order for this to work for pip versions >= 21.3, your Git version must be at least 2.22.
 
-## As a Docker image
-
-Use one of the Dockerfiles in the [docker](../docker) directory to build an image with an environment already set up and ready for running NNCF [sample scripts](../README.md#model-compression-samples).
-
 ## Corresponding versions
 
 The following table lists the recommended corresponding versions of backend packages
diff --git a/tests/torch/sparsity/movement/test_training_with_third_party.py b/tests/torch/sparsity/movement/test_training_with_third_party.py
deleted file mode 100644
index f353ac6bf33..00000000000
--- a/tests/torch/sparsity/movement/test_training_with_third_party.py
+++ /dev/null
@@ -1,87 +0,0 @@
-# Copyright (c) 2023 Intel Corporation
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#      http://www.apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from pathlib import Path
-
-import pytest
-
-from tests.torch.helpers import Command
-from tests.torch.sparsity.movement.helpers import MRPC_CONFIG_FILE_NAME
-from tests.torch.sparsity.movement.helpers import TRAINING_SCRIPTS_PATH
-from tests.torch.test_sanity_third_party import TransformersVirtualEnvInstaller
-from tests.torch.test_sanity_third_party import create_command_line
-
-
-@pytest.fixture(scope="class")
-def temp_folder(tmp_path_factory):
-    root_folder = tmp_path_factory.mktemp("movement_third_party")
-    folders = {"models": root_folder / "models", "venv": root_folder / "venv", "repo": root_folder / "repo"}
-    for folder in folders.values():
-        Path(folder).mkdir(exist_ok=True, parents=True)
-    return folders
-
-
-@pytest.mark.usefixtures("temp_folder")
-class TestMovementWithTransformers:
-    @pytest.fixture(autouse=True)
-    def setup(self, temp_folder):
-        self.temp_folder = temp_folder
-        self.env = TransformersVirtualEnvInstaller(temp_folder["venv"], temp_folder["repo"])
-
-    @pytest.mark.dependency(name="install_transformers")
-    def test_install_transformers_env(self, third_party, pip_cache_dir):
-        if not third_party:
-            pytest.skip(
-                "Skip tests of movement sparsity with patched transformers package "
-                "since `--third-party-sanity` is False."
-            )
-        self.env.install_env(pip_cache_dir)
-
-    @pytest.mark.dependency(depends=["install_transformers"], name="glue_movement_train")
-    def test_movement_glue_train(self):
-        nncf_config = TRAINING_SCRIPTS_PATH / MRPC_CONFIG_FILE_NAME
-        output_dir = Path(self.temp_folder["models"], nncf_config.stem)
-        com_line = (
-            "examples/pytorch/text-classification/run_glue.py --model_name_or_path "
-            "google/bert_uncased_L-2_H-128_A-2 --task_name mrpc --do_train "
-            " --per_gpu_train_batch_size 4 --learning_rate 1e-4 --num_train_epochs 4 --max_seq_length 128 "
-            " --max_train_samples 8 --output_dir {output_dir} --save_steps 200 --nncf_config {nncf_config} ".format(
-                output_dir=output_dir, nncf_config=nncf_config
-            )
-        )
-        runner = Command(
-            create_command_line(
-                com_line, self.env.VENV_ACTIVATE, self.env.PYTHON_EXECUTABLE, self.env.CUDA_VISIBLE_STRING
-            ),
-            self.env.TRANSFORMERS_REPO_PATH,
-        )
-        runner.run()
-        assert Path(output_dir, "pytorch_model.bin").is_file()
-
-    @pytest.mark.dependency(depends=["install_transformers", "glue_movement_train"])
-    def test_movement_glue_eval(self):
-        nncf_config = TRAINING_SCRIPTS_PATH / MRPC_CONFIG_FILE_NAME
-        model_dir = Path(self.temp_folder["models"], nncf_config.stem)
-        output_dir = Path(self.temp_folder["models"], nncf_config.stem + "_eval")
-        com_line = (
-            "examples/pytorch/text-classification/run_glue.py --model_name_or_path {model_dir}"
-            " --task_name mrpc --do_eval "
-            " --learning_rate 2e-5"
-            " --max_seq_length 128 --output_dir {output_dir}"
-            " --max_eval_samples 10"
-            " --nncf_config {nncf_config}".format(model_dir=model_dir, output_dir=output_dir, nncf_config=nncf_config)
-        )
-        runner = Command(
-            create_command_line(
-                com_line, self.env.VENV_ACTIVATE, self.env.PYTHON_EXECUTABLE, self.env.CUDA_VISIBLE_STRING
-            ),
-            self.env.TRANSFORMERS_REPO_PATH,
-        )
-        runner.run()
diff --git a/tests/torch/test_sanity_third_party.py b/tests/torch/test_sanity_third_party.py
deleted file mode 100644
index 85da106eeb6..00000000000
--- a/tests/torch/test_sanity_third_party.py
+++ /dev/null
@@ -1,368 +0,0 @@
-# Copyright (c) 2023 Intel Corporation
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#      http://www.apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-import os
-import subprocess
-import sys
-
-import pytest
-
-from nncf.torch import BKC_TORCH_VERSION
-from tests.shared.paths import PROJECT_ROOT
-from tests.torch.helpers import Command
-
-TRANSFORMERS_COMMIT = "bd469c40659ce76c81f69c7726759d249b4aef49"
-INSTALL_PATH = PROJECT_ROOT.parent
-DATASET_PATH = os.path.join(PROJECT_ROOT, "tests", "torch", "data", "mock_datasets")
-
-
-def create_command_line(args, venv_activate, python=sys.executable, cuda_string=""):
-    line = "{venv_activate} && {cuda} {python_exe} {args}".format(
-        venv_activate=venv_activate, cuda=cuda_string, args=args, python_exe=python
-    )
-    return line
-
-
-@pytest.fixture(autouse=True, scope="session")
-def skip_tests(third_party):
-    if not third_party:
-        pytest.skip()
-
-
-@pytest.fixture(scope="session")
-def temp_folder(tmp_path_factory):
-    return {
-        "models": str(tmp_path_factory.mktemp("models", False)),
-        "venv": str(tmp_path_factory.mktemp("venv", False)),
-        "repo": str(tmp_path_factory.mktemp("repo", False)),
-    }
-
-
-class CachedPipRunner:
-    def __init__(self, venv_activation_script_path: str, cache_dir: str = None):
-        self.venv_activate = venv_activation_script_path
-        self.cache_dir = cache_dir
-
-    def run_pip(self, pip_command: str, cwd: str = None, use_cache: bool = True):
-        if not use_cache:
-            cache_dir_entry = "--no-cache-dir"
-        elif self.cache_dir is not None:
-            cache_dir_entry = "--cache-dir {}".format(self.cache_dir)
-        else:
-            cache_dir_entry = ""
-        subprocess.run(f"{self.venv_activate} && pip {cache_dir_entry} {pip_command}", check=True, shell=True, cwd=cwd)
-
-
-class TransformersVirtualEnvInstaller:
-    def __init__(self, venv_path, repo_path):
-        self.VENV_PATH = str(venv_path)
-        self.VENV_ACTIVATE = str(". {}/bin/activate".format(self.VENV_PATH))
-        self.PYTHON_EXECUTABLE = str("{}/bin/python".format(self.VENV_PATH))
-        self.TRANSFORMERS_REPO_PATH = str(os.path.join(repo_path, "transformers"))
-        self.CUDA_VISIBLE_STRING = "export CUDA_VISIBLE_DEVICES=0;"
-        self.PATH_TO_PATCH = str(
-            os.path.join(
-                PROJECT_ROOT,
-                "third_party_integration",
-                "huggingface_transformers",
-                "0001-Modifications-for-NNCF-usage.patch",
-            )
-        )
-
-    def install_env(self, pip_cache_dir):
-        version_string = "{}.{}".format(sys.version_info[0], sys.version_info[1])
-        subprocess.call("virtualenv -ppython{} {}".format(version_string, self.VENV_PATH), shell=True)
-        pip_runner = CachedPipRunner(self.VENV_ACTIVATE, pip_cache_dir)
-        pip_runner.run_pip("install --upgrade pip")  # cache options are available with pip > 20.2
-        pip_runner.run_pip("uninstall setuptools -y")
-        pip_runner.run_pip("install setuptools")
-        pip_runner.run_pip("install onnx")
-        torch_install_cmd = "install torch=={}".format(BKC_TORCH_VERSION)
-        pip_runner.run_pip(torch_install_cmd)
-        subprocess.run(
-            "git clone https://github.com/huggingface/transformers {}".format(self.TRANSFORMERS_REPO_PATH),
-            check=True,
-            shell=True,
-        )
-        subprocess.run(
-            "git checkout {}".format(TRANSFORMERS_COMMIT), check=True, shell=True, cwd=self.TRANSFORMERS_REPO_PATH
-        )
-        subprocess.run("cp {} .".format(self.PATH_TO_PATCH), check=True, shell=True, cwd=self.TRANSFORMERS_REPO_PATH)
-        subprocess.run(
-            "git apply 0001-Modifications-for-NNCF-usage.patch", check=True, shell=True, cwd=self.TRANSFORMERS_REPO_PATH
-        )
-        pip_runner.run_pip("install .", cwd=self.TRANSFORMERS_REPO_PATH)
-        pip_runner.run_pip('install -e ".[testing]"', cwd=self.TRANSFORMERS_REPO_PATH)
-        for sample_folder in ["question-answering", "text-classification", "language-modeling", "token-classification"]:
-            pip_runner.run_pip(
-                f"install -r examples/pytorch/{sample_folder}/requirements.txt", cwd=self.TRANSFORMERS_REPO_PATH
-            )
-        pip_runner.run_pip("install boto3", cwd=self.TRANSFORMERS_REPO_PATH)
-        # WA for deleted CONLL2003 in datasets==1.11.0 (https://github.com/huggingface/datasets/issues/3582)
-        pip_runner.run_pip("install -U datasets", cwd=self.TRANSFORMERS_REPO_PATH)
-        pip_runner.run_pip("install -e .", cwd=PROJECT_ROOT)
-
-
-class TestTransformers:
-    @pytest.fixture(autouse=True)
-    def setup(self, temp_folder):
-        self.env = TransformersVirtualEnvInstaller(temp_folder["venv"], temp_folder["repo"])
-
-    @pytest.mark.dependency(name="install_trans")
-    def test_install_trans_(self, pip_cache_dir):
-        self.env.install_env(pip_cache_dir)
-
-    @pytest.mark.dependency(depends=["install_trans"], name="xnli_train")
-    def test_xnli_train(self, temp_folder):
-        com_line = (
-            "examples/pytorch/text-classification/run_xnli.py --model_name_or_path bert-base-chinese"
-            " --language zh --train_language zh --do_train --per_gpu_train_batch_size 24"
-            " --learning_rate 5e-5 --num_train_epochs 0.0001 --max_seq_length 128 --output_dir {}"
-            " --save_steps 200 --nncf_config nncf_bert_config_xnli.json".format(
-                os.path.join(temp_folder["models"], "xnli")
-            )
-        )
-        runner = Command(
-            create_command_line(
-                com_line, self.env.VENV_ACTIVATE, self.env.PYTHON_EXECUTABLE, self.env.CUDA_VISIBLE_STRING
-            ),
-            self.env.TRANSFORMERS_REPO_PATH,
-        )
-        runner.run()
-        assert os.path.exists(os.path.join(temp_folder["models"], "xnli", "pytorch_model.bin"))
-
-    @pytest.mark.dependency(depends=["install_trans", "xnli_train"])
-    def test_xnli_eval(self, temp_folder):
-        com_line = (
-            "examples/pytorch/text-classification/run_xnli.py --model_name_or_path {output}"
-            " --language zh --do_eval --learning_rate 5e-5 --max_seq_length 128 --output_dir"
-            " {output} --nncf_config nncf_bert_config_xnli.json --per_gpu_eval_batch_size 24"
-            " --max_eval_samples 10".format(output=os.path.join(temp_folder["models"], "xnli"))
-        )
-        runner = Command(
-            create_command_line(
-                com_line, self.env.VENV_ACTIVATE, self.env.PYTHON_EXECUTABLE, self.env.CUDA_VISIBLE_STRING
-            ),
-            self.env.TRANSFORMERS_REPO_PATH,
-        )
-        runner.run()
-
-    @pytest.mark.dependency(depends=["install_trans"], name="squad_train")
-    def test_squad_train(self, temp_folder):
-        com_line = (
-            "examples/pytorch/question-answering/run_qa.py --model_name_or_path "
-            "bert-large-uncased-whole-word-masking-finetuned-squad --dataset_name squad --do_train "
-            " --learning_rate 3e-5 --num_train_epochs 0.0001 --max_seq_length 384 --doc_stride 128 "
-            " --output_dir {} --per_gpu_train_batch_size=1 --save_steps=200 --nncf_config"
-            " nncf_bert_config_squad.json".format(os.path.join(temp_folder["models"], "squad"))
-        )
-        runner = Command(
-            create_command_line(
-                com_line, self.env.VENV_ACTIVATE, self.env.PYTHON_EXECUTABLE, self.env.CUDA_VISIBLE_STRING
-            ),
-            self.env.TRANSFORMERS_REPO_PATH,
-        )
-        runner.run()
-        assert os.path.exists(os.path.join(temp_folder["models"], "squad", "pytorch_model.bin"))
-
-    @pytest.mark.dependency(depends=["install_trans", "squad_train"])
-    def test_squad_eval(self, temp_folder):
-        com_line = (
-            "examples/pytorch/question-answering/run_qa.py --model_name_or_path {output}"
-            " --do_eval --dataset_name squad  --learning_rate 3e-5"
-            " --max_seq_length 384 --doc_stride 128 --per_gpu_eval_batch_size=4 --output_dir {output} "
-            " --max_eval_samples 10"
-            " --nncf_config nncf_bert_config_squad.json".format(output=os.path.join(temp_folder["models"], "squad"))
-        )
-        runner = Command(
-            create_command_line(
-                com_line, self.env.VENV_ACTIVATE, self.env.PYTHON_EXECUTABLE, self.env.CUDA_VISIBLE_STRING
-            ),
-            self.env.TRANSFORMERS_REPO_PATH,
-        )
-        runner.run()
-
-    @pytest.mark.dependency(depends=["install_trans"], name="glue_roberta_train")
-    def test_glue_train(self, temp_folder):
-        com_line = (
-            "examples/pytorch/text-classification/run_glue.py --model_name_or_path"
-            " roberta-large-mnli --task_name mnli --do_train "
-            " --per_gpu_train_batch_size 4 --learning_rate 2e-5 --num_train_epochs 0.001 --max_seq_length 128 "
-            " --output_dir {} --save_steps 200 --nncf_config"
-            " nncf_roberta_config_mnli.json".format(os.path.join(temp_folder["models"], "roberta_mnli"))
-        )
-        runner = Command(
-            create_command_line(
-                com_line, self.env.VENV_ACTIVATE, self.env.PYTHON_EXECUTABLE, self.env.CUDA_VISIBLE_STRING
-            ),
-            self.env.TRANSFORMERS_REPO_PATH,
-        )
-        runner.run()
-        assert os.path.exists(os.path.join(temp_folder["models"], "roberta_mnli", "pytorch_model.bin"))
-
-    @pytest.mark.dependency(depends=["install_trans", "glue_roberta_train"])
-    def test_glue_eval(self, temp_folder):
-        com_line = (
-            "examples/pytorch/text-classification/run_glue.py --model_name_or_path {output}"
-            " --task_name mnli --do_eval --validation_file {}/glue/glue_data/MNLI/dev_matched.tsv "
-            " --learning_rate 2e-5"
-            " --max_seq_length 128 --output_dir {output}"
-            " --max_eval_samples 10"
-            " --nncf_config nncf_roberta_config_mnli.json".format(
-                DATASET_PATH, output=os.path.join(temp_folder["models"], "roberta_mnli")
-            )
-        )
-        runner = Command(
-            create_command_line(
-                com_line, self.env.VENV_ACTIVATE, self.env.PYTHON_EXECUTABLE, self.env.CUDA_VISIBLE_STRING
-            ),
-            self.env.TRANSFORMERS_REPO_PATH,
-        )
-        runner.run()
-
-    @pytest.mark.dependency(depends=["install_trans"], name="glue_distilbert_train")
-    def test_glue_distilbert_train(self, temp_folder):
-        com_line = (
-            "examples/pytorch/text-classification/run_glue.py --model_name_or_path"
-            " distilbert-base-uncased --train_file {}/glue/glue_data/SST-2/train.tsv"
-            " --task_name sst2 --do_train --max_seq_length 128 --per_gpu_train_batch_size 8"
-            " --learning_rate 5e-5 --num_train_epochs 0.001"
-            " --output_dir {} --save_steps 200 --nncf_config"
-            " nncf_distilbert_config_sst2.json".format(
-                DATASET_PATH, os.path.join(temp_folder["models"], "distilbert_output")
-            )
-        )
-        runner = Command(
-            create_command_line(
-                com_line, self.env.VENV_ACTIVATE, self.env.PYTHON_EXECUTABLE, self.env.CUDA_VISIBLE_STRING
-            ),
-            self.env.TRANSFORMERS_REPO_PATH,
-        )
-        runner.run()
-        assert os.path.exists(os.path.join(temp_folder["models"], "distilbert_output", "pytorch_model.bin"))
-
-    @pytest.mark.dependency(depends=["install_trans", "glue_distilbert_train"])
-    def test_glue_distilbert_eval(self, temp_folder):
-        com_line = (
-            "examples/pytorch/text-classification/run_glue.py --model_name_or_path {output}"
-            " --task_name sst2 --do_eval --max_seq_length 128"
-            " --output_dir {output} --validation_file {}/glue/glue_data/SST-2/test.tsv"
-            " --max_eval_samples 10"
-            " --nncf_config nncf_distilbert_config_sst2.json".format(
-                DATASET_PATH, output=os.path.join(temp_folder["models"], "distilbert_output")
-            )
-        )
-        runner = Command(
-            create_command_line(
-                com_line, self.env.VENV_ACTIVATE, self.env.PYTHON_EXECUTABLE, self.env.CUDA_VISIBLE_STRING
-            ),
-            self.env.TRANSFORMERS_REPO_PATH,
-        )
-        runner.run()
-
-    @pytest.mark.dependency(depends=["install_trans"], name="lm_train")
-    def test_lm_train(self, temp_folder):
-        # GPT2 is loaded via torch.frombuffer which is not available in torch==1.9.1 yet
-        com_line = (
-            "examples/pytorch/language-modeling/run_clm.py --model_name_or_path distilgpt2"
-            " --do_train --per_gpu_train_batch_size 1"
-            " --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1 "
-            " --num_train_epochs 0.001"
-            " --output_dir {} --nncf_config"
-            " nncf_gpt2_config_wikitext_hw_config.json".format(os.path.join(temp_folder["models"], "lm_output"))
-        )
-        runner = Command(
-            create_command_line(
-                com_line, self.env.VENV_ACTIVATE, self.env.PYTHON_EXECUTABLE, self.env.CUDA_VISIBLE_STRING
-            ),
-            self.env.TRANSFORMERS_REPO_PATH,
-        )
-        runner.run()
-        assert os.path.exists(os.path.join(temp_folder["models"], "lm_output", "pytorch_model.bin"))
-
-    @pytest.mark.dependency(depends=["install_trans", "lm_train"])
-    def test_lm_eval(self, temp_folder):
-        # GPT2 is loaded via torch.frombuffer which is not available in torch==1.9.1 yet
-        com_line = (
-            "examples/pytorch/language-modeling/run_clm.py "
-            " --model_name_or_path {output} --do_eval "
-            " --output_dir {output} --dataset_name wikitext --dataset_config_name wikitext-2-raw-v1"
-            " --max_eval_samples 10"
-            " --nncf_config nncf_gpt2_config_wikitext_hw_config.json".format(
-                output=os.path.join(temp_folder["models"], "lm_output")
-            )
-        )
-        runner = Command(
-            create_command_line(
-                com_line, self.env.VENV_ACTIVATE, self.env.PYTHON_EXECUTABLE, self.env.CUDA_VISIBLE_STRING
-            ),
-            self.env.TRANSFORMERS_REPO_PATH,
-        )
-        runner.run()
-
-    @pytest.mark.dependency(depends=["install_trans"], name="ner_train")
-    def test_ner_train(self, temp_folder):
-        com_line = (
-            "examples/pytorch/token-classification/run_ner.py --model_name_or_path bert-base-uncased"
-            " --do_train --per_gpu_train_batch_size 1"
-            " --dataset_name conll2003 "
-            " --max_train_samples 10"
-            " --output_dir {} "
-            " --nncf_config nncf_bert_config_conll.json".format(os.path.join(temp_folder["models"], "ner_output"))
-        )
-        runner = Command(
-            create_command_line(
-                com_line, self.env.VENV_ACTIVATE, self.env.PYTHON_EXECUTABLE, self.env.CUDA_VISIBLE_STRING
-            ),
-            self.env.TRANSFORMERS_REPO_PATH,
-        )
-        runner.run()
-        assert os.path.exists(os.path.join(temp_folder["models"], "ner_output", "pytorch_model.bin"))
-
-    @pytest.mark.dependency(depends=["install_trans", "ner_train"])
-    def test_ner_eval(self, temp_folder):
-        com_line = (
-            "examples/pytorch/token-classification/run_ner.py "
-            " --model_name_or_path {output} --do_eval "
-            " --output_dir {output} --dataset_name conll2003"
-            " --max_eval_samples 10"
-            " --nncf_config nncf_bert_config_conll.json".format(
-                output=os.path.join(temp_folder["models"], "ner_output")
-            )
-        )
-        runner = Command(
-            create_command_line(
-                com_line, self.env.VENV_ACTIVATE, self.env.PYTHON_EXECUTABLE, self.env.CUDA_VISIBLE_STRING
-            ),
-            self.env.TRANSFORMERS_REPO_PATH,
-        )
-        runner.run()
-
-    @pytest.mark.dependency(depends=["install_trans"])
-    def test_convert_to_onnx(self, temp_folder):
-        com_line = (
-            "examples/pytorch/question-answering/run_qa.py --model_name_or_path {output} "
-            " --do_eval"
-            " --dataset_name squad "
-            " --max_eval_samples 10"
-            " --output_dir {output}"
-            " --to_onnx {output}/model.onnx"
-            " --nncf_config nncf_bert_config_squad.json".format(output=os.path.join(temp_folder["models"], "squad"))
-        )
-        runner = Command(
-            create_command_line(
-                com_line, self.env.VENV_ACTIVATE, self.env.PYTHON_EXECUTABLE, self.env.CUDA_VISIBLE_STRING
-            ),
-            self.env.TRANSFORMERS_REPO_PATH,
-        )
-        runner.run()
-        assert os.path.exists(os.path.join(temp_folder["models"], "squad", "model.onnx"))
diff --git a/third_party_integration/huggingface_transformers/0001-Modifications-for-NNCF-usage.patch b/third_party_integration/huggingface_transformers/0001-Modifications-for-NNCF-usage.patch
deleted file mode 100644
index 9e177d36b8f..00000000000
--- a/third_party_integration/huggingface_transformers/0001-Modifications-for-NNCF-usage.patch
+++ /dev/null
@@ -1,1390 +0,0 @@
-From 76c14db1e3501d03f548d9e9ebc58661443d64e1 Mon Sep 17 00:00:00 2001
-From: Alexander Dokuchaev <alexander.dokuchaev@intel.com>
-Date: Mon, 25 Dec 2023 21:36:16 +0200
-Subject: [PATCH] Modifications for NNCF usage
-
----
- examples/pytorch/language-modeling/run_clm.py |  77 +++++++++---
- examples/pytorch/question-answering/run_qa.py |  63 ++++++++--
- .../pytorch/text-classification/run_glue.py   | 112 +++++++++++++++---
- .../pytorch/text-classification/run_xnli.py   |  71 +++++++++--
- .../pytorch/token-classification/run_ner.py   | 106 ++++++++++++++---
- nncf_bert_config_conll.json                   |  44 +++++++
- nncf_bert_config_mrpc.json                    |  42 +++++++
- nncf_bert_config_squad.json                   |  44 +++++++
- ...config_squad_magnitude_sparsity_cubic.json |  31 +++++
- nncf_bert_config_xnli.json                    |  38 ++++++
- nncf_distilbert_config_sst2.json              |  33 ++++++
- nncf_gpt2_config_wikitext_hw_config.json      |  49 ++++++++
- nncf_mobilebert_config_squad_int8.json        |  49 ++++++++
- nncf_roberta_config_mnli.json                 |  29 +++++
- src/transformers/modeling_utils.py            |  24 ++++
- src/transformers/pytorch_utils.py             |   3 +-
- src/transformers/trainer.py                   |  51 +++++++-
- src/transformers/training_args.py             |   6 +
- src/transformers/utils/__init__.py            |   1 +
- 19 files changed, 805 insertions(+), 68 deletions(-)
- create mode 100644 nncf_bert_config_conll.json
- create mode 100644 nncf_bert_config_mrpc.json
- create mode 100644 nncf_bert_config_squad.json
- create mode 100644 nncf_bert_config_squad_magnitude_sparsity_cubic.json
- create mode 100644 nncf_bert_config_xnli.json
- create mode 100644 nncf_distilbert_config_sst2.json
- create mode 100644 nncf_gpt2_config_wikitext_hw_config.json
- create mode 100644 nncf_mobilebert_config_squad_int8.json
- create mode 100644 nncf_roberta_config_mnli.json
-
-diff --git a/examples/pytorch/language-modeling/run_clm.py b/examples/pytorch/language-modeling/run_clm.py
-index fe03cde7c..3f8cd6d37 100755
---- a/examples/pytorch/language-modeling/run_clm.py
-+++ b/examples/pytorch/language-modeling/run_clm.py
-@@ -30,6 +30,8 @@ from itertools import chain
- from typing import Optional
-
- import datasets
-+import onnx
-+import torch
- from datasets import load_dataset
-
- import evaluate
-@@ -51,7 +53,12 @@ from transformers.testing_utils import CaptureLogger
- from transformers.trainer_utils import get_last_checkpoint
- from transformers.utils import check_min_version, send_example_telemetry
- from transformers.utils.versions import require_version
-+from transformers.trainer import get_train_dataloader_for_init
-
-+from nncf import NNCFConfig
-+from nncf.config.structures import QuantizationRangeInitArgs
-+from nncf.config.structures import BNAdaptationInitArgs
-+from nncf.torch.initialization import PTInitializingDataLoader
-
- # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
- check_min_version("4.23.0")
-@@ -373,22 +380,6 @@ def main():
-             "You can do it from another script, save it, and load it from here, using --tokenizer_name."
-         )
-
--    if model_args.model_name_or_path:
--        model = AutoModelForCausalLM.from_pretrained(
--            model_args.model_name_or_path,
--            from_tf=bool(".ckpt" in model_args.model_name_or_path),
--            config=config,
--            cache_dir=model_args.cache_dir,
--            revision=model_args.model_revision,
--            use_auth_token=True if model_args.use_auth_token else None,
--        )
--    else:
--        model = AutoModelForCausalLM.from_config(config)
--        n_params = sum(dict((p.data_ptr(), p.numel()) for p in model.parameters()).values())
--        logger.info(f"Training new model from scratch - Total size={n_params/2**20:.2f}M params")
--
--    model.resize_token_embeddings(len(tokenizer))
--
-     # Preprocessing the datasets.
-     # First we tokenize all the texts.
-     if training_args.do_train:
-@@ -503,6 +494,59 @@ def main():
-             preds = preds[:, :-1].reshape(-1)
-             return metric.compute(predictions=preds, references=labels)
-
-+    nncf_config = None
-+    if training_args.nncf_config is not None:
-+        nncf_config = NNCFConfig.from_json(training_args.nncf_config)
-+        if nncf_config.get("log_dir") is None:
-+            nncf_config["log_dir"] = training_args.output_dir
-+        if not os.path.exists(training_args.output_dir) and training_args.local_rank in [-1, 0]:
-+            os.makedirs(nncf_config["log_dir"])
-+        if training_args.do_train:
-+            train_dataloader = get_train_dataloader_for_init(training_args, train_dataset,
-+                                                             default_data_collator)
-+
-+            class WikitextInitializingDataLoader(PTInitializingDataLoader):
-+                def get_inputs(self, dataloader_output):
-+                    return (), dataloader_output
-+
-+            nncf_config.register_extra_structs([
-+                QuantizationRangeInitArgs(WikitextInitializingDataLoader(train_dataloader)),
-+                BNAdaptationInitArgs(WikitextInitializingDataLoader(train_dataloader)),
-+            ])
-+
-+    if model_args.model_name_or_path:
-+        retval = AutoModelForCausalLM.from_pretrained(
-+            model_args.model_name_or_path,
-+            from_tf=bool(".ckpt" in model_args.model_name_or_path),
-+            config=config,
-+            cache_dir=model_args.cache_dir,
-+            revision=model_args.model_revision,
-+            use_auth_token=True if model_args.use_auth_token else None,
-+            nncf_config=nncf_config,
-+            nncf_eval=nncf_config is not None and training_args.do_eval and not training_args.do_train
-+        )
-+    else:
-+        retval = AutoModelForCausalLM.from_config(config)
-+        n_params = sum(dict((p.data_ptr(), p.numel()) for p in retval.parameters()).values())
-+        logger.info(f"Training new model from scratch - Total size={n_params / 2 ** 20:.2f}M params")
-+
-+
-+    if nncf_config is None:
-+        model = retval
-+        compression_ctrl = None
-+    else:
-+        compression_ctrl, model = retval
-+
-+    model.resize_token_embeddings(len(tokenizer))
-+
-+    if training_args.to_onnx:
-+        if nncf_config is not None:
-+           compression_ctrl.export_model(training_args.to_onnx)
-+        else:
-+           model.to('cpu')
-+           dummy_tensor = torch.ones([1, config.n_positions], dtype=torch.long)
-+           onnx.export(model, dummy_tensor, training_args.to_onnx)
-+
-     # Initialize our Trainer
-     trainer = Trainer(
-         model=model,
-@@ -516,6 +560,7 @@ def main():
-         preprocess_logits_for_metrics=preprocess_logits_for_metrics
-         if training_args.do_eval and not is_torch_tpu_available()
-         else None,
-+        compression_ctrl=compression_ctrl
-     )
-
-     # Training
-diff --git a/examples/pytorch/question-answering/run_qa.py b/examples/pytorch/question-answering/run_qa.py
-index 1240623b5..b6136d6e3 100755
---- a/examples/pytorch/question-answering/run_qa.py
-+++ b/examples/pytorch/question-answering/run_qa.py
-@@ -25,6 +25,7 @@ from dataclasses import dataclass, field
- from typing import Optional
-
- import datasets
-+import torch
- from datasets import load_dataset
-
- import evaluate
-@@ -42,11 +43,19 @@ from transformers import (
-     default_data_collator,
-     set_seed,
- )
-+from transformers.trainer import get_train_dataloader_for_init
- from transformers.trainer_utils import get_last_checkpoint
- from transformers.utils import check_min_version, send_example_telemetry
- from transformers.utils.versions import require_version
- from utils_qa import postprocess_qa_predictions
-
-+from torch import onnx
-+
-+from nncf import NNCFConfig
-+from nncf.torch.initialization import PTInitializingDataLoader
-+from nncf.config.structures import BNAdaptationInitArgs
-+from nncf.config.structures import QuantizationRangeInitArgs
-+from nncf.common.utils.tensorboard import prepare_for_tensorboard
-
- # Will error if the minimal version of Transformers is not installed. Remove at your own risks.
- check_min_version("4.23.0")
-@@ -327,14 +336,6 @@ def main():
-         revision=model_args.model_revision,
-         use_auth_token=True if model_args.use_auth_token else None,
-     )
--    model = AutoModelForQuestionAnswering.from_pretrained(
--        model_args.model_name_or_path,
--        from_tf=bool(".ckpt" in model_args.model_name_or_path),
--        config=config,
--        cache_dir=model_args.cache_dir,
--        revision=model_args.model_revision,
--        use_auth_token=True if model_args.use_auth_token else None,
--    )
-
-     # Tokenizer check: this script requires a fast tokenizer.
-     if not isinstance(tokenizer, PreTrainedTokenizerFast):
-@@ -599,6 +600,51 @@ def main():
-     def compute_metrics(p: EvalPrediction):
-         return metric.compute(predictions=p.predictions, references=p.label_ids)
-
-+    nncf_config = None
-+    if training_args.nncf_config is not None:
-+        nncf_config = NNCFConfig.from_json(training_args.nncf_config)
-+        if nncf_config.get("log_dir") is None:
-+            nncf_config["log_dir"] = training_args.output_dir
-+        if not os.path.exists(training_args.output_dir) and training_args.local_rank in [-1, 0]:
-+            os.makedirs(nncf_config["log_dir"])
-+        if training_args.do_train:
-+            train_dataloader = get_train_dataloader_for_init(training_args, train_dataset, data_collator)
-+            class SquadInitializingDataloader(PTInitializingDataLoader):
-+                def get_inputs(self, dataloader_output):
-+                    return (), dataloader_output
-+
-+            nncf_config.register_extra_structs([
-+                QuantizationRangeInitArgs(SquadInitializingDataloader(train_dataloader)),
-+                BNAdaptationInitArgs(SquadInitializingDataloader(train_dataloader)),
-+            ])
-+
-+    retval = AutoModelForQuestionAnswering.from_pretrained(
-+        model_args.model_name_or_path,
-+        from_tf=bool(".ckpt" in model_args.model_name_or_path),
-+        config=config,
-+        cache_dir=model_args.cache_dir,
-+        revision=model_args.model_revision,
-+        use_auth_token=True if model_args.use_auth_token else None,
-+        nncf_config=nncf_config,
-+        nncf_eval=nncf_config is not None and training_args.do_eval and not training_args.do_train
-+    )
-+
-+    if nncf_config is None:
-+        model = retval
-+        compression_ctrl = None
-+    else:
-+        compression_ctrl, model = retval
-+
-+    if training_args.to_onnx:
-+    # Expecting the following forward signature:
-+    # (input_ids, attention_mask, token_type_ids, ...)
-+        if nncf_config is not None:
-+            compression_ctrl.export_model(training_args.to_onnx)
-+        else:
-+            model.to('cpu')
-+            dummy_tensor = torch.ones([1, 384], dtype=torch.long)
-+            onnx.export(model, (dummy_tensor, dummy_tensor, dummy_tensor), training_args.to_onnx)
-+
-     # Initialize our Trainer
-     trainer = QuestionAnsweringTrainer(
-         model=model,
-@@ -610,6 +656,7 @@ def main():
-         data_collator=data_collator,
-         post_process_function=post_processing_function,
-         compute_metrics=compute_metrics,
-+        compression_ctrl=compression_ctrl
-     )
-
-     # Training
-diff --git a/examples/pytorch/text-classification/run_glue.py b/examples/pytorch/text-classification/run_glue.py
-index 3eb423f08..f675e7ab4 100755
---- a/examples/pytorch/text-classification/run_glue.py
-+++ b/examples/pytorch/text-classification/run_glue.py
-@@ -29,6 +29,10 @@ from datasets import load_dataset
-
- import evaluate
- import transformers
-+from nncf import NNCFConfig
-+from nncf.config.structures import BNAdaptationInitArgs
-+from nncf.config.structures import QuantizationRangeInitArgs
-+from nncf.torch.initialization import PTInitializingDataLoader
- from transformers import (
-     AutoConfig,
-     AutoModelForSequenceClassification,
-@@ -42,6 +46,7 @@ from transformers import (
-     default_data_collator,
-     set_seed,
- )
-+from transformers.trainer import get_train_dataloader_for_init
- from transformers.trainer_utils import get_last_checkpoint
- from transformers.utils import check_min_version, send_example_telemetry
- from transformers.utils.versions import require_version
-@@ -366,15 +371,6 @@ def main():
-         revision=model_args.model_revision,
-         use_auth_token=True if model_args.use_auth_token else None,
-     )
--    model = AutoModelForSequenceClassification.from_pretrained(
--        model_args.model_name_or_path,
--        from_tf=bool(".ckpt" in model_args.model_name_or_path),
--        config=config,
--        cache_dir=model_args.cache_dir,
--        revision=model_args.model_revision,
--        use_auth_token=True if model_args.use_auth_token else None,
--        ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,
--    )
-
-     # Preprocessing the raw_datasets
-     if data_args.task_name is not None:
-@@ -400,12 +396,12 @@ def main():
-     # Some models have set the order of the labels to use, so let's make sure we do use it.
-     label_to_id = None
-     if (
--        model.config.label2id != PretrainedConfig(num_labels=num_labels).label2id
-+        config.label2id != PretrainedConfig(num_labels=num_labels).label2id
-         and data_args.task_name is not None
-         and not is_regression
-     ):
-         # Some have all caps in their config, some don't.
--        label_name_to_id = {k.lower(): v for k, v in model.config.label2id.items()}
-+        label_name_to_id = {k.lower(): v for k, v in config.label2id.items()}
-         if list(sorted(label_name_to_id.keys())) == list(sorted(label_list)):
-             label_to_id = {i: int(label_name_to_id[label_list[i]]) for i in range(num_labels)}
-         else:
-@@ -418,11 +414,11 @@ def main():
-         label_to_id = {v: i for i, v in enumerate(label_list)}
-
-     if label_to_id is not None:
--        model.config.label2id = label_to_id
--        model.config.id2label = {id: label for label, id in config.label2id.items()}
-+        config.label2id = label_to_id
-+        config.id2label = {id: label for label, id in config.label2id.items()}
-     elif data_args.task_name is not None and not is_regression:
--        model.config.label2id = {l: i for i, l in enumerate(label_list)}
--        model.config.id2label = {id: label for label, id in config.label2id.items()}
-+        config.label2id = {l: i for i, l in enumerate(label_list)}
-+        config.id2label = {id: label for label, id in config.label2id.items()}
-
-     if data_args.max_seq_length > tokenizer.model_max_length:
-         logger.warning(
-@@ -458,6 +454,87 @@ def main():
-             max_train_samples = min(len(train_dataset), data_args.max_train_samples)
-             train_dataset = train_dataset.select(range(max_train_samples))
-
-+    nncf_config = None
-+    if training_args.nncf_config is not None:
-+        nncf_config = NNCFConfig.from_json(training_args.nncf_config)
-+
-+        if nncf_config.get("log_dir") is None:
-+            nncf_config["log_dir"] = training_args.output_dir
-+
-+        if not os.path.exists(training_args.output_dir) and training_args.local_rank in [-1, 0]:
-+            os.makedirs(nncf_config["log_dir"])
-+
-+        if training_args.do_train:
-+            train_dataloader = get_train_dataloader_for_init(training_args,
-+                                                             train_dataset,
-+                                                             data_collator=default_data_collator)
-+
-+            class SST2InitializingDataLoader(PTInitializingDataLoader):
-+                def get_inputs(self, dataloader_output):
-+                    return (), {
-+                        "labels": dataloader_output["labels"],
-+                        "attention_mask": dataloader_output["attention_mask"],
-+                        "input_ids": dataloader_output["input_ids"]
-+                    }
-+
-+            class MRPCInitializingDataLoader(PTInitializingDataLoader):
-+                def get_inputs(self, dataloader_output):
-+                    return (), {
-+                        "labels": dataloader_output["labels"],
-+                        "attention_mask": dataloader_output["attention_mask"],
-+                        "input_ids": dataloader_output["input_ids"],
-+                        "token_type_ids": dataloader_output["token_type_ids"]
-+                    }
-+
-+            class MNLIInitializingDataLoader(PTInitializingDataLoader):
-+                def get_inputs(self, dataloader_output):
-+                    return (), {
-+                        "labels": dataloader_output["labels"],
-+                        "attention_mask": dataloader_output["attention_mask"],
-+                        "input_ids": dataloader_output["input_ids"]
-+                    }
-+
-+            if data_args.task_name == "sst2":
-+                initializing_data_loader_cls = SST2InitializingDataLoader
-+            elif data_args.task_name == "mrpc":
-+                initializing_data_loader_cls = MRPCInitializingDataLoader
-+            elif data_args.task_name == "mnli":
-+                initializing_data_loader_cls = MNLIInitializingDataLoader
-+            initializing_data_loader = initializing_data_loader_cls(train_dataloader)
-+            nncf_config.register_extra_structs([QuantizationRangeInitArgs(initializing_data_loader),
-+                                                BNAdaptationInitArgs(initializing_data_loader)])
-+
-+
-+    retval = AutoModelForSequenceClassification.from_pretrained(
-+        model_args.model_name_or_path,
-+        from_tf=bool(".ckpt" in model_args.model_name_or_path),
-+        config=config,
-+        cache_dir=model_args.cache_dir,
-+        revision=model_args.model_revision,
-+        use_auth_token=True if model_args.use_auth_token else None,
-+        nncf_config=nncf_config,
-+        nncf_eval=nncf_config is not None and training_args.do_eval and not training_args.do_train
-+    )
-+
-+    if nncf_config is None:
-+        model = retval
-+        compression_ctrl = None
-+    else:
-+        compression_ctrl, model = retval
-+
-+    if training_args.to_onnx:
-+    # Expecting the following forward signature:
-+    # (input_ids, attention_mask, token_type_ids, ...)
-+        if nncf_config is not None:
-+            compression_ctrl.export_model(training_args.to_onnx)
-+        else:
-+            model.to('cpu')
-+            import torch
-+            from torch import onnx
-+            dummy_tensor = torch.ones([1, 128], dtype=torch.long)
-+            onnx.export(model, (dummy_tensor, dummy_tensor, dummy_tensor),
-+                        training_args.to_onnx, opset_version=10)
-+
-     if training_args.do_eval:
-         if "validation" not in raw_datasets and "validation_matched" not in raw_datasets:
-             raise ValueError("--do_eval requires a validation dataset")
-@@ -518,8 +595,13 @@ def main():
-         compute_metrics=compute_metrics,
-         tokenizer=tokenizer,
-         data_collator=data_collator,
-+        compression_ctrl=compression_ctrl
-     )
-
-+    if nncf_config is not None:
-+        if not (training_args.local_rank == -1 or training_args.no_cuda):
-+            compression_ctrl.distributed()
-+
-     # Training
-     if training_args.do_train:
-         checkpoint = None
-diff --git a/examples/pytorch/text-classification/run_xnli.py b/examples/pytorch/text-classification/run_xnli.py
-index 55523edfc..68a3ebe41 100755
---- a/examples/pytorch/text-classification/run_xnli.py
-+++ b/examples/pytorch/text-classification/run_xnli.py
-@@ -26,10 +26,16 @@ from typing import Optional
-
- import datasets
- import numpy as np
-+import torch
- from datasets import load_dataset
-
- import evaluate
- import transformers
-+from nncf import NNCFConfig
-+from nncf.config.structures import QuantizationRangeInitArgs
-+from nncf.torch import register_default_init_args
-+from nncf.torch.initialization import PTInitializingDataLoader
-+
- from transformers import (
-     AutoConfig,
-     AutoModelForSequenceClassification,
-@@ -42,6 +48,7 @@ from transformers import (
-     default_data_collator,
-     set_seed,
- )
-+from transformers.trainer import get_train_dataloader_for_init
- from transformers.trainer_utils import get_last_checkpoint
- from transformers.utils import check_min_version, send_example_telemetry
- from transformers.utils.versions import require_version
-@@ -282,15 +289,6 @@ def main():
-         revision=model_args.model_revision,
-         use_auth_token=True if model_args.use_auth_token else None,
-     )
--    model = AutoModelForSequenceClassification.from_pretrained(
--        model_args.model_name_or_path,
--        from_tf=bool(".ckpt" in model_args.model_name_or_path),
--        config=config,
--        cache_dir=model_args.cache_dir,
--        revision=model_args.model_revision,
--        use_auth_token=True if model_args.use_auth_token else None,
--        ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,
--    )
-
-     # Preprocessing the datasets
-     # Padding strategy
-@@ -367,6 +365,56 @@ def main():
-     else:
-         data_collator = None
-
-+    nncf_config = None
-+    if training_args.nncf_config is not None:
-+        nncf_config = NNCFConfig.from_json(training_args.nncf_config)
-+
-+        if nncf_config.get("log_dir") is None:
-+            nncf_config["log_dir"] = training_args.output_dir
-+
-+        if not os.path.exists(training_args.output_dir) and training_args.local_rank in [-1, 0]:
-+            os.makedirs(nncf_config["log_dir"])
-+
-+        if training_args.do_train:
-+            train_dataloader = get_train_dataloader_for_init(training_args,
-+                                                             train_dataset,
-+                                                             data_collator=data_collator)
-+
-+            class KwargBasedInitializingDataloader(PTInitializingDataLoader):
-+                def get_inputs(self, dataloader_output):
-+                    return (), dataloader_output
-+
-+            initializing_data_loader = KwargBasedInitializingDataloader(train_dataloader)
-+            nncf_config = register_default_init_args(nncf_config, initializing_data_loader)
-+
-+
-+    retval = AutoModelForSequenceClassification.from_pretrained(
-+        model_args.model_name_or_path,
-+        from_tf=bool(".ckpt" in model_args.model_name_or_path),
-+        config=config,
-+        cache_dir=model_args.cache_dir,
-+        revision=model_args.model_revision,
-+        use_auth_token=True if model_args.use_auth_token else None,
-+        nncf_config=nncf_config,
-+        nncf_eval=nncf_config is not None and training_args.do_eval and not training_args.do_train
-+    )
-+
-+    if nncf_config is None:
-+        model = retval
-+        compression_ctrl = None
-+    else:
-+        compression_ctrl, model = retval
-+
-+    if training_args.to_onnx:
-+        # Expecting the following forward signature:
-+        # (input_ids, attention_mask, token_type_ids, ...)
-+        if nncf_config is not None:
-+            compression_ctrl.export_model(training_args.to_onnx)
-+        else:
-+            model.to('cpu')
-+            dummy_tensor = torch.ones([1, training_args.max_seq_length], dtype=torch.long)
-+            torch.onnx.export(model, (dummy_tensor, dummy_tensor, dummy_tensor), training_args.to_onnx)
-+
-     # Initialize our Trainer
-     trainer = Trainer(
-         model=model,
-@@ -376,8 +424,13 @@ def main():
-         compute_metrics=compute_metrics,
-         tokenizer=tokenizer,
-         data_collator=data_collator,
-+        compression_ctrl=compression_ctrl
-     )
-
-+    if nncf_config is not None:
-+        if not (training_args.local_rank == -1 or training_args.no_cuda):
-+            compression_ctrl.distributed()
-+
-     # Training
-     if training_args.do_train:
-         checkpoint = None
-diff --git a/examples/pytorch/token-classification/run_ner.py b/examples/pytorch/token-classification/run_ner.py
-index 52cbbb87b..30f150580 100755
---- a/examples/pytorch/token-classification/run_ner.py
-+++ b/examples/pytorch/token-classification/run_ner.py
-@@ -22,15 +22,24 @@ Fine-tuning the library models for token classification.
- import logging
- import os
- import sys
-+from copy import deepcopy
- from dataclasses import dataclass, field
- from typing import Optional
-+from typing import List
-
- import datasets
- import numpy as np
- from datasets import ClassLabel, load_dataset
-
- import evaluate
-+import torch
- import transformers
-+from nncf import NNCFConfig
-+from nncf.config.structures import BNAdaptationInitArgs
-+from nncf.config.structures import QuantizationRangeInitArgs
-+from nncf.torch.initialization import PTInitializingDataLoader
-+from packaging import version
-+from torch import onnx
- from transformers import (
-     AutoConfig,
-     AutoModelForTokenClassification,
-@@ -43,6 +52,7 @@ from transformers import (
-     TrainingArguments,
-     set_seed,
- )
-+from transformers.trainer import get_train_dataloader_for_init
- from transformers.trainer_utils import get_last_checkpoint
- from transformers.utils import check_min_version, send_example_telemetry
- from transformers.utils.versions import require_version
-@@ -204,6 +214,16 @@ class DataTrainingArguments:
-         self.task_name = self.task_name.lower()
-
-
-+def filter_columns(dataset, keep_columns: List[str], remove_columns: List[str]):
-+    if version.parse(datasets.__version__) < version.parse("1.4.0"):
-+        dataset.set_format(
-+            type=dataset.format["type"], columns=keep_columns, format_kwargs=dataset.format["format_kwargs"]
-+        )
-+        return dataset
-+    else:
-+        return dataset.remove_columns(remove_columns)
-+
-+
- def main():
-     # See all possible arguments in src/transformers/training_args.py
-     # or by passing the --help flag to this script.
-@@ -366,16 +386,6 @@ def main():
-             use_auth_token=True if model_args.use_auth_token else None,
-         )
-
--    model = AutoModelForTokenClassification.from_pretrained(
--        model_args.model_name_or_path,
--        from_tf=bool(".ckpt" in model_args.model_name_or_path),
--        config=config,
--        cache_dir=model_args.cache_dir,
--        revision=model_args.model_revision,
--        use_auth_token=True if model_args.use_auth_token else None,
--        ignore_mismatched_sizes=model_args.ignore_mismatched_sizes,
--    )
--
-     # Tokenizer check: this script requires a fast tokenizer.
-     if not isinstance(tokenizer, PreTrainedTokenizerFast):
-         raise ValueError(
-@@ -385,25 +395,25 @@ def main():
-         )
-
-     # Model has labels -> use them.
--    if model.config.label2id != PretrainedConfig(num_labels=num_labels).label2id:
--        if list(sorted(model.config.label2id.keys())) == list(sorted(label_list)):
-+    if config.label2id != PretrainedConfig(num_labels=num_labels).label2id:
-+        if list(sorted(config.label2id.keys())) == list(sorted(label_list)):
-             # Reorganize `label_list` to match the ordering of the model.
-             if labels_are_int:
--                label_to_id = {i: int(model.config.label2id[l]) for i, l in enumerate(label_list)}
--                label_list = [model.config.id2label[i] for i in range(num_labels)]
-+                label_to_id = {i: int(config.label2id[l]) for i, l in enumerate(label_list)}
-+                label_list = [config.id2label[i] for i in range(num_labels)]
-             else:
--                label_list = [model.config.id2label[i] for i in range(num_labels)]
-+                label_list = [config.id2label[i] for i in range(num_labels)]
-                 label_to_id = {l: i for i, l in enumerate(label_list)}
-         else:
-             logger.warning(
-                 "Your model seems to have been trained with labels, but they don't match the dataset: ",
--                f"model labels: {list(sorted(model.config.label2id.keys()))}, dataset labels:"
-+                f"model labels: {list(sorted(config.label2id.keys()))}, dataset labels:"
-                 f" {list(sorted(label_list))}.\nIgnoring the model labels as a result.",
-             )
-
-     # Set the correspondences label/ID inside the model config
--    model.config.label2id = {l: i for i, l in enumerate(label_list)}
--    model.config.id2label = {i: l for i, l in enumerate(label_list)}
-+    config.label2id = {l: i for i, l in enumerate(label_list)}
-+    config.id2label = {i: l for i, l in enumerate(label_list)}
-
-     # Map that sends B-Xxx label to its I-Xxx counterpart
-     b_to_i_label = []
-@@ -504,6 +514,65 @@ def main():
-     # Data collator
-     data_collator = DataCollatorForTokenClassification(tokenizer, pad_to_multiple_of=8 if training_args.fp16 else None)
-
-+    nncf_config = None
-+    if training_args.nncf_config is not None:
-+        nncf_config = NNCFConfig.from_json(training_args.nncf_config)
-+        if nncf_config.get("log_dir") is None:
-+            nncf_config["log_dir"] = training_args.output_dir
-+        if not os.path.exists(training_args.output_dir) and training_args.local_rank in [-1, 0]:
-+            os.makedirs(nncf_config["log_dir"])
-+        if training_args.do_train:
-+            train_dataset_for_init = deepcopy(train_dataset)
-+
-+            train_dataset_for_init = filter_columns(train_dataset_for_init,
-+                                                    keep_columns=['labels', 'input_ids', 'attention_mask',
-+                                                                  'token_type_ids'],
-+                                                    remove_columns=['ner_tags', 'pos_tags', 'tokens', 'id',
-+                                                                    'chunk_tags'])
-+            train_dataloader = get_train_dataloader_for_init(training_args, train_dataset_for_init, data_collator)
-+
-+            class ConllInitializingDataloader(PTInitializingDataLoader):
-+                def get_inputs(self, dataloader_output):
-+                    return (), {
-+                        "input_ids": dataloader_output["input_ids"],
-+                        "attention_mask": dataloader_output["attention_mask"],
-+                        "token_type_ids": dataloader_output["token_type_ids"],
-+                    }
-+
-+            nncf_config.register_extra_structs([
-+                QuantizationRangeInitArgs(ConllInitializingDataloader(train_dataloader)),
-+                BNAdaptationInitArgs(ConllInitializingDataloader(train_dataloader)),
-+            ])
-+
-+    retval = AutoModelForTokenClassification.from_pretrained(
-+        model_args.model_name_or_path,
-+        from_tf=bool(".ckpt" in model_args.model_name_or_path),
-+        config=config,
-+        cache_dir=model_args.cache_dir,
-+        revision=model_args.model_revision,
-+        use_auth_token=True if model_args.use_auth_token else None,
-+        nncf_config=nncf_config,
-+        nncf_eval=nncf_config is not None and training_args.do_eval and not training_args.do_train
-+    )
-+
-+    if nncf_config is None:
-+        model = retval
-+        compression_ctrl = None
-+    else:
-+        compression_ctrl, model = retval
-+
-+
-+    if training_args.to_onnx:
-+    # Expecting the following forward signature:
-+    # (input_ids, attention_mask, token_type_ids, ...)
-+        if nncf_config is not None:
-+            compression_ctrl.export_model(training_args.to_onnx)
-+        else:
-+            model.to('cpu')
-+            dummy_tensor = torch.ones([1, 128], dtype=torch.long)
-+            onnx.export(model, (dummy_tensor, dummy_tensor, dummy_tensor), training_args.to_onnx,
-+                        opset_version=10)
-+
-     # Metrics
-     metric = evaluate.load("seqeval")
-
-@@ -549,6 +618,7 @@ def main():
-         tokenizer=tokenizer,
-         data_collator=data_collator,
-         compute_metrics=compute_metrics,
-+        compression_ctrl=compression_ctrl
-     )
-
-     # Training
-diff --git a/nncf_bert_config_conll.json b/nncf_bert_config_conll.json
-new file mode 100644
-index 000000000..bf7c88ebb
---- /dev/null
-+++ b/nncf_bert_config_conll.json
-@@ -0,0 +1,44 @@
-+{
-+    "input_info": [
-+        {
-+            "sample_size": [1, 128],
-+            "type": "long"
-+        },
-+        {
-+            "sample_size": [1, 128],
-+            "type": "long"
-+        },
-+        {
-+            "sample_size": [1, 128],
-+            "type": "long"
-+        }
-+    ],
-+    "compression": {
-+        "algorithm": "quantization",
-+        "initializer": {
-+            "range": {
-+                "num_init_samples": 32,
-+                "type": "percentile",
-+                "params":
-+                {
-+                    "min_percentile": 0.01,
-+                    "max_percentile": 99.99
-+                }
-+            },
-+
-+            "batchnorm_adaptation": {
-+                "num_bn_adaptation_samples": 0
-+            }
-+        },
-+        "activations":
-+        {
-+            "mode": "symmetric"
-+        },
-+        "weights":
-+        {
-+            "mode": "symmetric",
-+            "signed": true,
-+            "per_channel": false
-+        }
-+    }
-+}
-diff --git a/nncf_bert_config_mrpc.json b/nncf_bert_config_mrpc.json
-new file mode 100644
-index 000000000..425d89d76
---- /dev/null
-+++ b/nncf_bert_config_mrpc.json
-@@ -0,0 +1,42 @@
-+{
-+    "input_info": [
-+        {
-+            "sample_size": [1, 128],
-+            "type": "long"
-+        },
-+        {
-+            "sample_size": [1, 128],
-+            "type": "long"
-+        },
-+        {
-+            "sample_size": [1, 128],
-+            "type": "long"
-+        }
-+    ],
-+    "compression": {
-+        "algorithm": "quantization",
-+        "initializer": {
-+            "range": {
-+                "num_init_samples": 64,
-+                "type": "percentile",
-+                "params":
-+                {
-+                    "min_percentile": 0.01,
-+                    "max_percentile": 99.99
-+                }
-+            },
-+            "batchnorm_adaptation": {
-+                "num_bn_adaptation_samples": 0
-+            }
-+        },
-+        "activations":
-+        {
-+            "mode": "symmetric"
-+        },
-+        "weights":
-+        {
-+            "mode": "symmetric",
-+            "per_channel": false
-+        }
-+    }
-+}
-diff --git a/nncf_bert_config_squad.json b/nncf_bert_config_squad.json
-new file mode 100644
-index 000000000..2a055de17
---- /dev/null
-+++ b/nncf_bert_config_squad.json
-@@ -0,0 +1,44 @@
-+{
-+    "input_info": [
-+        {
-+            "sample_size": [1, 384],
-+            "type": "long"
-+        },
-+        {
-+            "sample_size": [1, 384],
-+            "type": "long"
-+        },
-+        {
-+            "sample_size": [1, 384],
-+            "type": "long"
-+        }
-+    ],
-+    "compression": {
-+        "algorithm": "quantization",
-+        "initializer": {
-+            "range": {
-+                "num_init_samples": 32,
-+                "type": "percentile",
-+                "params":
-+                {
-+                    "min_percentile": 0.01,
-+                    "max_percentile": 99.99
-+                }
-+            },
-+
-+            "batchnorm_adaptation": {
-+                "num_bn_adaptation_samples": 0
-+            }
-+        },
-+        "activations":
-+        {
-+            "mode": "symmetric"
-+        },
-+        "weights":
-+        {
-+            "mode": "symmetric",
-+            "signed": true,
-+            "per_channel": false
-+        }
-+    }
-+}
-diff --git a/nncf_bert_config_squad_magnitude_sparsity_cubic.json b/nncf_bert_config_squad_magnitude_sparsity_cubic.json
-new file mode 100644
-index 000000000..b4452e8d4
---- /dev/null
-+++ b/nncf_bert_config_squad_magnitude_sparsity_cubic.json
-@@ -0,0 +1,31 @@
-+{
-+    "input_info": [
-+        {
-+            "sample_size": [1, 384],
-+            "type": "long"
-+        },
-+        {
-+            "sample_size": [1, 384],
-+            "type": "long"
-+        },
-+        {
-+            "sample_size": [1, 384],
-+            "type": "long"
-+        }
-+    ],
-+    "compression": {
-+        "algorithm": "magnitude_sparsity",
-+        "params": {
-+            "schedule": "polynomial",
-+            "power": 3,
-+            "sparsity_init": 0.0,
-+            "sparsity_target": 0.8,
-+            "sparsity_target_epoch": 40,
-+            "sparsity_freeze_epoch": 60,
-+            "update_per_optimizer_step": true,
-+            "steps_per_epoch": 1109,
-+            "weight_importance": "abs"
-+        },
-+        "ignored_scopes": ["{re}.*NNCFEmbedding"]
-+    }
-+}
-diff --git a/nncf_bert_config_xnli.json b/nncf_bert_config_xnli.json
-new file mode 100644
-index 000000000..92b95db1c
---- /dev/null
-+++ b/nncf_bert_config_xnli.json
-@@ -0,0 +1,38 @@
-+{
-+    "input_info": [
-+        {
-+            "sample_size": [1, 128],
-+            "type": "long"
-+        },
-+        {
-+            "sample_size": [1, 128],
-+            "type": "long"
-+        },
-+        {
-+            "sample_size": [1, 128],
-+            "type": "long"
-+        }
-+    ],
-+    "compression": {
-+        "algorithm": "quantization",
-+        "initializer": {
-+            "range": {
-+                "num_init_samples": 96
-+            },
-+            "batchnorm_adaptation": {
-+                "num_bn_adaptation_samples": 0
-+            }
-+        },
-+        "ignored_scopes": ["{re}BertSelfAttention\\[self\\]/__add___0",
-+            "{re}BertIntermediate\\[intermediate\\]/NNCFLinear\\[dense\\]/linear_0"
-+        ],
-+        "activations":
-+        {
-+            "mode": "asymmetric"
-+        },
-+        "weights":
-+        {
-+            "mode": "symmetric"
-+        }
-+    }
-+}
-diff --git a/nncf_distilbert_config_sst2.json b/nncf_distilbert_config_sst2.json
-new file mode 100644
-index 000000000..dc140ab39
---- /dev/null
-+++ b/nncf_distilbert_config_sst2.json
-@@ -0,0 +1,33 @@
-+{
-+    "input_info": [
-+        {
-+            "sample_size": [1, 128],
-+            "type": "long"
-+        },
-+        {
-+            "sample_size": [1, 128],
-+            "type": "long"
-+        }
-+    ],
-+    "compression": {
-+        "algorithm": "quantization",
-+        "initializer": {
-+            "range": {
-+                "num_init_samples": 32,
-+                "type": "mean_percentile"
-+            },
-+            "batchnorm_adaptation": {
-+                "num_bn_adaptation_samples": 0
-+            }
-+        },
-+        "activations":
-+        {
-+            "mode": "symmetric"
-+        },
-+        "weights":
-+        {
-+            "mode": "symmetric",
-+            "signed": true
-+        }
-+    }
-+}
-diff --git a/nncf_gpt2_config_wikitext_hw_config.json b/nncf_gpt2_config_wikitext_hw_config.json
-new file mode 100644
-index 000000000..55173b25b
---- /dev/null
-+++ b/nncf_gpt2_config_wikitext_hw_config.json
-@@ -0,0 +1,49 @@
-+{
-+    "input_info": [
-+        {
-+            "sample_size": [1, 1024],
-+            "type": "long"
-+        }
-+    ],
-+    "hw_config_type": "cpu",
-+    "compression": {
-+        "algorithm": "quantization",
-+        "initializer": {
-+            "range": {
-+                "num_init_samples": 16,
-+                "type": "percentile",
-+                "params":
-+                {
-+                    "min_percentile": 0.01,
-+                    "max_percentile": 99.99
-+                }
-+            },
-+            "batchnorm_adaptation": {
-+                "num_bn_adaptation_samples": 0
-+            }
-+        },
-+        "ignored_scopes": [
-+            // Intermediate embedding sum results
-+            "GPT2LMHeadModel/GPT2Model[transformer]/__add___0",
-+
-+            // Scaling in attention
-+            "{re}.*Attention\\[attn\\]/__truediv___0",
-+
-+            // Pre-LayerNorm additions
-+            "{re}.*Block\\[[0-9]*\\]/__add___0",
-+            "{re}.*Block\\[[0-9]*\\]/__add___1",
-+
-+            // LM head
-+            "GPT2LMHeadModel/NNCFLinear[lm_head]/linear_0"
-+        ],
-+        "activations":
-+        {
-+            "mode": "symmetric"
-+        },
-+        "weights":
-+        {
-+            "mode": "symmetric",
-+            "signed": true
-+        }
-+    }
-+}
-diff --git a/nncf_mobilebert_config_squad_int8.json b/nncf_mobilebert_config_squad_int8.json
-new file mode 100644
-index 000000000..4d0e84edf
---- /dev/null
-+++ b/nncf_mobilebert_config_squad_int8.json
-@@ -0,0 +1,49 @@
-+{
-+    "input_info": [
-+        {
-+            "sample_size": [1, 384],
-+            "type": "long"
-+        },
-+        {
-+            "sample_size": [1, 384],
-+            "type": "long"
-+        },
-+        {
-+            "sample_size": [1, 384],
-+            "type": "long"
-+        }
-+    ],
-+    "compression": {
-+        "algorithm": "quantization",
-+        "initializer": {
-+            "range": {
-+                "num_init_samples": 64,
-+                "type": "percentile",
-+                "params":
-+                {
-+                    "min_percentile": 0.01,
-+                    "max_percentile": 99.99
-+                }
-+            },
-+            "batchnorm_adaptation": {
-+                "num_bn_adaptation_samples": 0
-+            }
-+        },
-+        "ignored_scopes": ["{re}MobileBertSelfAttention\\[self\\]/__add___0",
-+            "{re}MobileBertIntermediate\\[intermediate\\]/NNCFLinear\\[dense\\]/linear_0"],
-+        "activations":
-+        {
-+            "mode": "symmetric",
-+            "ignored_scopes": [
-+            "{re}MobileBertForQuestionAnswering/MobileBertModel\\[mobilebert\\]/MobileBertEmbeddings\\[embeddings\\]/__add___0",
-+            "{re}MobileBertForQuestionAnswering/MobileBertModel\\[mobilebert\\]/MobileBertEmbeddings\\[embeddings\\]/__add___1",
-+            "{re}MobileBertOutput\\[output\\]/__add___0",
-+            "{re}NoNorm\\[LayerNorm\\]/__mul___0"]
-+        },
-+        "weights":
-+        {
-+            "mode": "symmetric",
-+            "signed": true
-+        }
-+    }
-+}
-diff --git a/nncf_roberta_config_mnli.json b/nncf_roberta_config_mnli.json
-new file mode 100644
-index 000000000..46f819bca
---- /dev/null
-+++ b/nncf_roberta_config_mnli.json
-@@ -0,0 +1,29 @@
-+{
-+    "input_info": [
-+        {
-+            "keyword": "input_ids",
-+            "sample_size": [1, 128],
-+            "type": "long",
-+            "filler": "ones"
-+        }
-+    ],
-+    "compression": {
-+        "algorithm": "quantization",
-+        "initializer": {
-+            "range": {
-+                "num_init_samples": 24
-+            },
-+            "batchnorm_adaptation": {
-+                "num_bn_adaptation_samples": 0
-+            }
-+        },
-+        "activations":
-+        {
-+            "mode": "asymmetric"
-+        },
-+        "weights":
-+        {
-+            "mode": "asymmetric"
-+        }
-+    }
-+}
-diff --git a/src/transformers/modeling_utils.py b/src/transformers/modeling_utils.py
-index 5f4fccd33..7f4cdb3d8 100644
---- a/src/transformers/modeling_utils.py
-+++ b/src/transformers/modeling_utils.py
-@@ -27,10 +27,12 @@ from functools import partial
- from typing import Any, Callable, Dict, List, Optional, Tuple, Union
-
- import torch
-+from nncf.torch import create_compressed_model
- from packaging import version
- from torch import Tensor, device, nn
- from torch.nn import CrossEntropyLoss
-
-+from transformers.utils import NNCF_PT_STATE_NAME
- from transformers.utils.hub import convert_file_size_to_int, get_checkpoint_shard_files
- from transformers.utils.import_utils import is_sagemaker_mp_enabled
-
-@@ -1497,6 +1499,7 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
-         push_to_hub: bool = False,
-         max_shard_size: Union[int, str] = "10GB",
-         safe_serialization: bool = False,
-+        nncf_compression_state: Dict = None,
-         **kwargs,
-     ):
-         """
-@@ -1620,6 +1623,10 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
-             else:
-                 save_function(shard, os.path.join(save_directory, shard_file))
-
-+        if nncf_compression_state is not None:
-+            nncf_state_output_file = os.path.join(save_directory, NNCF_PT_STATE_NAME)
-+            save_function(nncf_compression_state, nncf_state_output_file)
-+
-         if index is None:
-             logger.info(f"Model weights saved in {os.path.join(save_directory, WEIGHTS_NAME)}")
-         else:
-@@ -1901,6 +1908,8 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
-         load_in_8bit_skip_modules = kwargs.pop("load_in_8bit_skip_modules", None)
-         subfolder = kwargs.pop("subfolder", "")
-         commit_hash = kwargs.pop("_commit_hash", None)
-+        nncf_config = kwargs.pop("nncf_config", None)
-+        nncf_eval = kwargs.pop("nncf_eval", False)
-
-         if trust_remote_code is True:
-             logger.warning(
-@@ -2321,6 +2330,11 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
-             if dtype_orig is not None:
-                 torch.set_default_dtype(dtype_orig)
-
-+            if nncf_config is not None and nncf_eval:
-+                compression_algo_controller, model = create_compressed_model(model, nncf_config,
-+                                                                             compression_state=state_dict)
-+                return compression_algo_controller, model
-+
-             model, missing_keys, unexpected_keys, mismatched_keys, error_msgs = cls._load_pretrained_model(
-                 model,
-                 state_dict,
-@@ -2344,6 +2358,16 @@ class PreTrainedModel(nn.Module, ModuleUtilsMixin, GenerationMixin, PushToHubMix
-         # Set model in evaluation mode to deactivate DropOut modules by default
-         model.eval()
-
-+        if nncf_config is not None:
-+            compression_state = None
-+            compression_state_file = os.path.join(pretrained_model_name_or_path, NNCF_PT_STATE_NAME)
-+            if os.path.isfile(compression_state_file):
-+                compression_state = torch.load(compression_state_file)
-+
-+            compression_algo_controller, model = create_compressed_model(model, nncf_config,
-+                                                                         compression_state=compression_state)
-+            return compression_algo_controller, model
-+
-         # Dispatch model with hooks on all devices if necessary
-         if device_map is not None:
-             dispatch_model(model, device_map=device_map, offload_dir=offload_folder)
-diff --git a/src/transformers/pytorch_utils.py b/src/transformers/pytorch_utils.py
-index d94e049b5..09c99d4dd 100644
---- a/src/transformers/pytorch_utils.py
-+++ b/src/transformers/pytorch_utils.py
-@@ -87,7 +87,8 @@ def prune_linear_layer(layer: nn.Linear, index: torch.LongTensor, dim: int = 0)
-         new_layer.bias.requires_grad = True
-     return new_layer
-
--
-+import nncf
-+@nncf.torch.register_module()
- class Conv1D(nn.Module):
-     """
-     1D-convolutional layer as defined by Radford et al. for OpenAI GPT (and also used in GPT-2).
-diff --git a/src/transformers/trainer.py b/src/transformers/trainer.py
-index 214e7a978..12a5787cd 100755
---- a/src/transformers/trainer.py
-+++ b/src/transformers/trainer.py
-@@ -33,7 +33,7 @@ from pathlib import Path
- from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union
-
- from tqdm.auto import tqdm
--
-+from nncf.torch.nncf_network import NNCFNetwork
-
- # Integrations must be imported before ML frameworks:
- from .integrations import (  # isort: split
-@@ -55,6 +55,8 @@ import numpy as np
- import torch
- import torch.distributed as dist
- from packaging import version
-+from nncf.torch.compression_method_api import PTCompressionAlgorithmController
-+from nncf.common.utils.tensorboard import prepare_for_tensorboard
- from torch import nn
- from torch.utils.data import DataLoader, Dataset, RandomSampler, SequentialSampler
- from torch.utils.data.distributed import DistributedSampler
-@@ -206,6 +208,30 @@ SCHEDULER_NAME = "scheduler.pt"
- SCALER_NAME = "scaler.pt"
-
-
-+def get_train_dataloader_for_init(args, train_dataset, data_collator=None):
-+    from torch.utils.data import RandomSampler
-+    from torch.utils.data import DistributedSampler
-+    train_sampler = (
-+        RandomSampler(train_dataset)
-+        if args.local_rank == -1
-+        else DistributedSampler(train_dataset)
-+    )
-+
-+    if data_collator is None:
-+        from transformers.data.data_collator import default_data_collator
-+        data_collator = default_data_collator
-+
-+    from torch.utils.data import DataLoader
-+    data_loader = DataLoader(
-+        train_dataset,
-+        batch_size=args.train_batch_size,
-+        sampler=train_sampler,
-+        collate_fn=data_collator,
-+        drop_last=args.dataloader_drop_last,
-+    )
-+    return data_loader
-+
-+
- class Trainer:
-     """
-     Trainer is a simple but feature-complete training and eval loop for PyTorch, optimized for 🤗 Transformers.
-@@ -304,12 +330,15 @@ class Trainer:
-         callbacks: Optional[List[TrainerCallback]] = None,
-         optimizers: Tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None),
-         preprocess_logits_for_metrics: Callable[[torch.Tensor, torch.Tensor], torch.Tensor] = None,
-+        compression_ctrl: PTCompressionAlgorithmController = None
-     ):
-         if args is None:
-             output_dir = "tmp_trainer"
-             logger.info(f"No `TrainingArguments` passed, using `output_dir={output_dir}`.")
-             args = TrainingArguments(output_dir=output_dir)
-         self.args = args
-+
-+        self.compression_ctrl = compression_ctrl
-         # Seed must be set before instantiating the model when using model
-         enable_full_determinism(self.args.seed) if self.args.full_determinism else set_seed(self.args.seed)
-         self.hp_name = None
-@@ -1409,6 +1438,8 @@ class Trainer:
-
-             if self.args.ddp_bucket_cap_mb is not None:
-                 kwargs["bucket_cap_mb"] = self.args.ddp_bucket_cap_mb
-+            if self.compression_ctrl is not None:
-+                self.compression_ctrl.distributed()
-             model = nn.parallel.DistributedDataParallel(
-                 model,
-                 device_ids=[self.args.local_rank] if self.args._n_gpu != 0 else None,
-@@ -1687,6 +1718,9 @@ class Trainer:
-                     _ = list(train_dataloader.sampler)
-
-         for epoch in range(epochs_trained, num_train_epochs):
-+            if self.compression_ctrl is not None:
-+                self.compression_ctrl.scheduler.epoch_step()
-+                print(self.compression_ctrl.statistics().to_str())
-             if isinstance(train_dataloader, DataLoader) and isinstance(train_dataloader.sampler, DistributedSampler):
-                 train_dataloader.sampler.set_epoch(epoch)
-             elif hasattr(train_dataloader, "dataset") and isinstance(train_dataloader.dataset, IterableDatasetShard):
-@@ -1790,6 +1824,8 @@ class Trainer:
-                             )
-
-                     # Optimizer step
-+                    if self.compression_ctrl is not None:
-+                        self.compression_ctrl.scheduler.step()
-                     optimizer_was_run = True
-                     if self.deepspeed:
-                         pass  # called outside the loop
-@@ -1814,6 +1850,7 @@ class Trainer:
-                     model.zero_grad()
-                     self.state.global_step += 1
-                     self.state.epoch = epoch + (step + 1) / steps_in_epoch
-+                    self.state.curr_loss = tr_loss_step.cpu().detach().item()
-                     self.control = self.callback_handler.on_step_end(args, self.state, self.control)
-
-                     self._maybe_log_save_evaluate(tr_loss, model, trial, epoch, ignore_keys_for_eval)
-@@ -2033,6 +2070,14 @@ class Trainer:
-             logs["loss"] = round(tr_loss_scalar / (self.state.global_step - self._globalstep_last_logged), 4)
-             logs["learning_rate"] = self._get_learning_rate()
-
-+            if self.compression_ctrl is not None:
-+                logs["compression_loss"] = self.compression_ctrl.loss().item()
-+                compression_stats = self.compression_ctrl.statistics()
-+                for key, value in prepare_for_tensorboard(compression_stats).items():
-+                    logs["compression/statistics/{0}".format(key)] = value
-+                print(compression_stats.to_str())
-+
-+
-             self._total_loss_scalar += tr_loss_scalar
-             self._globalstep_last_logged = self.state.global_step
-             self.store_flos()
-@@ -2492,6 +2537,10 @@ class Trainer:
-             # deepspeed handles loss scaling by gradient_accumulation_steps in its `backward`
-             loss = loss / self.args.gradient_accumulation_steps
-
-+        if self.compression_ctrl is not None:
-+            compression_loss = self.compression_ctrl.loss()
-+            loss += compression_loss
-+
-         if self.do_grad_scaling:
-             self.scaler.scale(loss).backward()
-         elif self.use_apex:
-diff --git a/src/transformers/training_args.py b/src/transformers/training_args.py
-index 170315fe2..daa497c02 100644
---- a/src/transformers/training_args.py
-+++ b/src/transformers/training_args.py
-@@ -993,6 +993,12 @@ class TrainingArguments:
-         },
-     )
-
-+    nncf_config: str = field(default=None,
-+                             metadata={"help": "NNCF configuration .json file for compression-enabled training"})
-+
-+    to_onnx: str = field(default=None,
-+                         metadata={"help": "Name of the ONNX model file to export the model to."})
-+
-     def __post_init__(self):
-         # Handle --use_env option in torch.distributed.launch (local_rank not passed as an arg then).
-         # This needs to happen before any call to self.device or self.n_gpu.
-diff --git a/src/transformers/utils/__init__.py b/src/transformers/utils/__init__.py
-index 2269f2254..2f3082293 100644
---- a/src/transformers/utils/__init__.py
-+++ b/src/transformers/utils/__init__.py
-@@ -154,6 +154,7 @@ from .import_utils import (
-
-
- WEIGHTS_NAME = "pytorch_model.bin"
-+NNCF_PT_STATE_NAME = "nncf_state.bin"
- WEIGHTS_INDEX_NAME = "pytorch_model.bin.index.json"
- TF2_WEIGHTS_NAME = "tf_model.h5"
- TF2_WEIGHTS_INDEX_NAME = "tf_model.h5.index.json"
---
-2.34.1
diff --git a/third_party_integration/huggingface_transformers/README.md b/third_party_integration/huggingface_transformers/README.md
deleted file mode 100644
index 1ae5b35b0f3..00000000000
--- a/third_party_integration/huggingface_transformers/README.md
+++ /dev/null
@@ -1,145 +0,0 @@
-# Integrating NNCF into Transformers
-
-https://github.com/huggingface/transformers
-
-This folder contains a git patch to enable NNCF-based quantization for XNLI, SQuAD and GLUE training pipelines of the huggingface transformers repository.
-
-**NOTE**: this patch is deprecated and will be removed from NNCF repository in future releases.
-
-Instructions:
-
-1. Apply the `0001-Modifications-for-NNCF-usage.patch` file to the huggingface transformers repository checked out at commit id: `bd469c40659ce76c81f69c7726759d249b4aef49`
-
-2. Install the `transformers` library and the example scripts from the patched repository as described in the documentation for the huggingface transformers repository.
-
-3. To start quantization-aware fine-tuning of NLP models using NNCF, use the regular scripts and command line parameters for XNLI and SQuAD training, but with additional `--nncf_config <path_to_nncf_config>` parameter.
-The NNCF configs to be used in this way are also provided in the same patch on a per-model, per-compression algorithm basis.
-Distributed multiprocessing is also supported, simply use the corresponding version of the command line in the huggingface transformers repository with the same additional `--nncf_config` parameter.
-
-4. While running with the `--nncf_config` option, the training scripts will output NNCF-wrapped model checkpoints instead of the regular ones. You may evaluate these checkpoints using the same command lines for training above, but with the`--do_train` key omitted. In order to export these checkpoints into ONNX format, further add `--to_onnx <path_to_output_onnx_file>` to your evaluation command line parameters.
-See exact command lines for each case in the model notes below.
-Note that in all cases the training hyperparameters might have to be adjusted to accommodate the hardware you have available.
-
-## Current best results
-
-All models use as their baselines the checkpoints obtained with the scripts and command line parameters from the corresponding sections in the original repository documentation. While fine-tuning the quantized model, the hyperparameters were left unchanged, i.e. the difference in the training script invocation was limited to adding `--nncf_config` option and specifying the pre-trained baseline model as the starting point for quantization fine-tuning. For RoBERTa-MNLI, no baseline model finetuning was necessary since the `roberta-large-mnli` model pretrained on MNLI was already available for download.
-
-Make sure that you are running evaluation on a single GPU, since the repository evaluation scripts give inconsistent results when running multi-GPU evaluation.
-
-### BERT-XNLI
-
-_Full-precision FP32 baseline model_ - bert-base-chinese, trained on the Chinese portion of XNLI - 77.68% accuracy when evaluated on the Chinese portion of XNLI test set.
-
-_INT8 model (symmetric weights, asymmetric activations quantization)_ - 77.22% accuracy in the same evaluation conditions.
-
-**INT8 model quantization-aware training command line:**
-
-`python examples/pytorch/text-classification/run_xnli.py --model_name_or_path bert-base-chinese --language zh --train_language zh --do_train --do_eval --per_gpu_train_batch_size 48 --per_gpu_eval_batch_size 1 --learning_rate 5e-5 --num_train_epochs 4.0 --max_seq_length 128 --output_dir bert_xnli_int8 --save_steps 200 --nncf_config nncf_bert_config_xnli.json`
-
-**Fine-tuned INT8 model evaluation and ONNX export command line:**
-
-`python examples/pytorch/text-classification/run_xnli.py --model_name_or_path bert_xnli_int8 --language zh --train_language zh --do_eval --per_gpu_eval_batch_size 1 --max_seq_length 128 --output_dir bert_xnli_int8 --nncf_config nncf_bert_config_xnli.json --to_onnx bert_xnli_int8.onnx`
-
-### BERT-SQuAD v1.1
-
-_Full-precision FP32 baseline model_ - bert-large-uncased-whole-word-masking model, trained on SQuAD v1.1 - 93.21% F1, 87.2% EM on the dev set,
-
-_INT8 model (symmetric quantization)_ - 92.55% F1, 86.1% EM on the dev set.
-
-**INT8 model quantization-aware training command line (trained on 4x Tesla V100):**
-
-`python examples/pytorch/question-answering/run_qa.py --model_name_or_path bert-large-uncased-whole-word-masking --do_train --do_eval --dataset_name squad --learning_rate 3e-5 --num_train_epochs 2 --max_seq_length 384 --doc_stride 128 --output_dir bert_squad_int8 --per_gpu_eval_batch_size=1 --per_gpu_train_batch_size=10 --save_steps=400 --nncf_config nncf_bert_config_squad.json`
-
-_INT8 model (symmetric quantization) + Knowledge Distillation_ - 92.89% F1, 86.68% EM on the dev set.
-
-**INT8 model quantization-aware training + Knowledge Distillation command line (trained on 4x Tesla V100):**
-
-`python examples/pytorch/question-answering/run_qa.py --model_name_or_path bert-large-uncased-whole-word-masking --do_train --do_eval --dataset_name squad --learning_rate 3e-5 --num_train_epochs 2 --max_seq_length 384 --doc_stride 128 --output_dir bert_squad_int8 --per_gpu_eval_batch_size=1 --per_gpu_train_batch_size=10 --save_steps=400 --nncf_config nncf_bert_config_squad_kd.json`
-
-**Fine-tuned INT8 model evaluation and ONNX export command line:**
-
-`python examples/pytorch/question-answering/run_qa.py --model_name_or_path bert_squad_int8 --do_eval --dataset_name squad --max_seq_length 384 --doc_stride 128 --output_dir bert_squad_int8 --per_gpu_eval_batch_size=1 --nncf_config nncf_bert_config_squad.json --to_onnx bert_squad_int8.onnx`
-
-### BERT-CoNLL2003
-
-_Full-precision FP32 baseline model_ - bert-base-cased model, trained on CoNLL2003 - 99.17% acc, 95.03% F1
-
-_INT8 model (symmetric quantization)_ - 99.18% acc, 95.31% F1
-
-**INT8 model quantization-aware training command line (trained on 4x Tesla V100):**
-
-`python examples/pytorch/token-classification/run_ner.py --model_name_or_path *path_to_fp32_finetuned_model* --dataset_name conll2003 --output_dir bert_base_cased_conll_int8 --do_train --do_eval --save_strategy epoch --evaluation_strategy epoch --nncf_config nncf_bert_config_conll.json`
-
-**Fine-tuned INT8 model evaluation and ONNX export command line:**
-
-`python examples/pytorch/token-classification/run_ner.py --model_name_or_path bert_base_cased_conll_int8 --dataset_name conll2003 --output_dir bert_base_cased_conll_int8 --do_eval --nncf_config nncf_bert_config_squad.json --to_onnx bert_base_cased_conll_int8.onnx`
-
-### BERT-MRPC
-
-_Full-precision FP32 baseline model_ -  bert-base-cased-finetuned-mrpc, 84.56% acc
-
-_INT8 model (symmetric quantization)_ - 84.8% acc
-
-**INT8 model quantization-aware training command line (trained on 1x RTX 2080):**
-
-`python examples/pytorch/token-classification/run_glue.py --model_name_or_path bert-base-cased-finetuned-mrpc --task_name mrpc --do_train --do_eval --num_train_epochs 5.0 --per_device_eval_batch_size 1 --output_dir bert_cased_mrpc_int8 --evaluation_strategy epoch --save_strategy epoch --nncf_config nncf_bert_config_mrpc.json`
-
-**Fine-tuned INT8 model evaluation and ONNX export command line:**
-
-`python examples/pytorch/token-classification/run_ner.py --model_name_or_path bert_cased_mrpc_int8 --task_name mrpc --do_eval --per_gpu_eval_batch_size 1 --output_dir bert_cased_mrpc_int8 --nncf_config nncf_bert_config_mrpc.json --to_onnx bert_base_cased_mrpc_int8.onnx`
-
-### RoBERTA-MNLI
-
-_Full-precision FP32 baseline model_ - roberta-large-mnli, pre-trained on MNLI - 90.6% accuracy (matched), 90.1% accuracy (mismatched)
-
-_INT8 model (asymmetrically quantized)_ - 89.25% accuracy (matched), 88.9% accuracy (mismatched)
-
-**INT8 model quantization-aware training command line:**
-
-`python examples/pytorch/text-classification/run_glue.py --model_name_or_path roberta-large-mnli --task_name mnli --do_train --do_eval --per_gpu_train_batch_size 24 --per_gpu_eval_batch_size 1 --learning_rate 2e-5 --num_train_epochs 3.0 --max_seq_length 128 --output_dir roberta_mnli_int8 --save_steps 400 --nncf_config nncf_roberta_config_mnli.json`
-
-**Fine-tuned INT8 model evaluation and ONNX export command line:**
-
-`python examples/pytorch/text-classification/run_glue.py --model_name_or_path roberta_mnli_int8 --task_name mnli --do_eval --learning_rate 2e-5 --num_train_epochs 3.0 --max_seq_length 128 --per_gpu_eval_batch_size 1 --output_dir roberta_mnli_int8 --save_steps 400 --nncf_config nncf_roberta_config_mnli.json --to_onnx roberta_mnli_int8.onnx`
-
-### DistilBERT-SST-2
-
-_Full-precision FP32 baseline model_ - distilbert-base-uncased-finetuned-sst-2-english, pre-trained on SST-2 - 91.1% accuracy
-
-_INT8 model (symmetrically quantized)_ - 90.94% accuracy
-
-**INT8 model quantization-aware training command line:**
-
-`python examples/pytorch/text-classification/run_glue.py --model_name_or_path distilbert-base-uncased-finetuned-sst-2-english --task_name sst2 --do_train --do_eval --per_gpu_train_batch_size 16 --per_gpu_eval_batch_size 1 --learning_rate 5e-5 --num_train_epochs 3.0 --max_seq_length 128 --output_dir distilbert_sst2_int8 --save_steps 100000 --nncf_config nncf_distilbert_config_sst2.json`
-
-**Fine-tuned INT8 model evaluation and ONNX export command line:**
-
-`python examples/pytorch/text-classification/run_glue.py --model_name_or_path distilbert_sst2_int8 --task_name sst2 --do_eval --per_gpu_eval_batch_size 1 --max_seq_length 128 --output_dir distilbert_sst2_int8 --save_steps 100000 --nncf_config nncf_distilbert_config_sst2.json --to_onnx distilbert_sst2_int8.onnx`
-
-### MobileBERT-SQuAD v1.1
-
-_Full-precision FP32 baseline model_ - google/mobilebert-uncased, trained on SQuAD v1.1 - 89.98% F1, 82.61% EM on the dev set,
-
-_INT8 model (symmetric quantization)_ - 89.4% F1, 82.05% EM on the dev set.
-
-**INT8 model quantization-aware training command line (trained on 3x Tesla V100):**
-
-`python examples/pytorch/question-answering/run_qa.py --model_name_or_path <path_to_pretrained_mobilebert_squad> --do_train --do_eval --dataset_name squad --learning_rate 3e-5 --num_train_epochs 5 --max_seq_length 384 --doc_stride 128 --output_dir mobilebert_squad_int8 --per_gpu_eval_batch_size=1 --per_gpu_train_batch_size=6 --save_steps=400 --nncf_config nncf_mobilebert_config_squad_int8.json`
-
-**Fine-tuned INT8 model evaluation and ONNX export command line:**
-
-`python examples/pytorch/question-answering/run_qa.py --model_name_or_path mobilebert_squad_int8 --do_eval --dataset_name squad --max_seq_length 384 --doc_stride 128 --output_dir mobilebert_squad_int8 --per_gpu_eval_batch_size=1 --nncf_config nncf_mobilebert_config_squad_int8.json --to_onnx mobilebert_squad_int8.onnx`
-
-### GPT-2-WikiText 2 (raw) language modeling
-
-_Full-precision FP32 baseline model_ - 19.73 perplexity on the test set
-
-_INT8 model (symmetric quantization)_ - 20.9 perplexity on the test set
-
-**INT8 model quantization-aware training command line (trained on 1x Tesla V100):**
-
-`python examples/pytorch/language-modeling/run_clm.py --model_name_or_path <path_to_pretrained_gpt2_on_wikitext2> --do_train --do_eval --dataset_name wikitext --num_train_epochs 3 --output_dir gpt2_wikitext2_int8 --per_gpu_eval_batch_size=1 --per_gpu_train_batch_size=4 --save_steps=591 --nncf_config nncf_gpt2_config_wikitext_hw_config.json`
-
-**Fine-tuned INT8 model evaluation and ONNX export command line:**
-
-`python examples/pytorch/language-modeling/run_clm.py --model_name_or_path gpt2_wikitext2_int8 --do_eval --dataset_name wikitext --output_dir gpt2_wikitext2_int8 --per_gpu_eval_batch_size=1 --nncf_config nncf_gpt2_config_wikitext_hw_config.json --to_onnx gpt2_wikitext2_int8.onnx`