[PT] bump torch to 2.5.0 (#3037)

### Changes 1. Change Torch Version from 2.4.0 -> 2.5.0 2. Update `nncf/experimental/torch/fx/transformations.py` for merging bias and linear/conv nodes to eliminate dead subgraph and nodes. Created `_get_connected_nodes()` to get the list of nodes directly or indirectly connected to the output node. 3. Import `torchvision` in NNCF patching for jit. 4. Update `MAP_BACKEND_PACKAGES` in `tests/cross_fw/shared/helpers.py` to include `torch` and `torchvision`. ### Reason for changes 1. Migration 2. Since the dead subgraphs and nodes were not being removed from the model after merging bias and conv/linear nodes, Only nodes which are directly or indirectly connected to the output node were kept. 3. This was done because import order with torchvision also affected the operators. 4. After change 3, it is required to pass `test_force_cuda_build` in `tests/torch/test_extensions_build.py`. ### Related tickets #3036 ### Tests `test_get_connected_nodes()` is added in `tests/torch/fx/test_model_transformer.py` to test `_get_connected_nodes()` function. ### Constrains Torch FX tests for windows are skipped (CVS-156781) since `torch._export.capture_pre_autograd_graph` does not support windows in torch 2.5 --------- Co-authored-by: Alexander Dokuchaev <[email protected]>
openvinotoolkit · Nov 8, 2024 · 058dce6 · 058dce6
1 parent c17b6ed
commit 058dce6
Show file tree

Hide file tree

Showing 18 changed files with 1,001 additions and 937 deletions.
diff --git a/.github/workflows/precommit.yml b/.github/workflows/precommit.yml
@@ -121,13 +121,13 @@ jobs:
           sudo apt-get --assume-yes install build-essential ninja-build libgl1-mesa-dev libglib2.0-0 wget make
       - name: Download CUDA
         run: |
-          wget -q https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda_12.1.1_530.30.02_linux.run
-          sudo sh cuda_12.1.1_530.30.02_linux.run --toolkit --silent
+          wget -q https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_550.54.14_linux.run
+          sudo sh cuda_12.4.0_550.54.14_linux.run --toolkit --silent
       - name: Runner info
         continue-on-error: true
         run: |
-          export PATH=/usr/local/cuda-12.1/bin${PATH:+:${PATH}}
-          export LD_LIBRARY_PATH=/usr/local/cuda-12.1/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}
+          export PATH=/usr/local/cuda-12.4/bin${PATH:+:${PATH}}
+          export LD_LIBRARY_PATH=/usr/local/cuda-12.4/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}
           nvidia-smi
           cat /proc/cpuinfo
           nvcc --version
@@ -147,8 +147,8 @@ jobs:
           python -c "import torch; print(torch.cuda.is_available())"
       - name: Run PyTorch precommit test scope
         run: |
-          export PATH=/usr/local/cuda-12.1/bin${PATH:+:${PATH}}
-          export LD_LIBRARY_PATH=/usr/local/cuda-12.1/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}
+          export PATH=/usr/local/cuda-12.4/bin${PATH:+:${PATH}}
+          export LD_LIBRARY_PATH=/usr/local/cuda-12.4/lib64${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}
           make test-torch-cuda
 
   tensorflow:

diff --git a/README.md b/README.md
@@ -439,12 +439,12 @@ conda install -c conda-forge nncf
 - Ubuntu\* 18.04 or later (64-bit)
 - Python\* 3.9 or later
 - Supported frameworks:
-  - PyTorch\* >=2.3, <2.5
+  - PyTorch\* >=2.4, <2.6
   - TensorFlow\* >=2.8.4, <=2.15.1
   - ONNX\* ==1.17.0
   - OpenVINO\* >=2022.3.0
 
-This repository is tested on Python* 3.10.14, PyTorch* 2.4.0 (NVidia CUDA\* Toolkit 12.1) and TensorFlow* 2.12.1 (NVidia CUDA\* Toolkit 11.8).
+This repository is tested on Python* 3.10.14, PyTorch* 2.5.0 (NVidia CUDA\* Toolkit 12.4) and TensorFlow* 2.12.1 (NVidia CUDA\* Toolkit 11.8).
 
 ## NNCF Compressed NNCF Model Zoo
 

diff --git a/constraints.txt b/constraints.txt
@@ -2,8 +2,8 @@
 openvino==2024.4.0
 
 # Pytorch
-torch==2.4.0
-torchvision==0.19.0
+torch==2.5.1
+torchvision==0.20.1
 
 # ONNX
 onnx==1.17.0

diff --git a/docs/Installation.md b/docs/Installation.md
@@ -43,7 +43,7 @@ as well as the supported versions of Python:
 
 | NNCF      | OpenVINO   | PyTorch  | ONNX     | TensorFlow | Python |
 |-----------|------------|----------|----------|------------|--------|
-| `develop` | `2024.4.0` | `2.4.0`  | `1.17.0` | `2.15.1`   | `3.10` |
+| `develop` | `2024.4.0` | `2.5.1`  | `1.17.0` | `2.15.1`   | `3.10` |
 | `2.13.0`  | `2024.4.0` | `2.4.0`  | `1.16.0` | `2.15.1`   | `3.8`* |
 | `2.12.0`  | `2024.3.0` | `2.3.0`  | `1.16.0` | `2.15.1`   | `3.8`* |
 | `2.11.0`  | `2024.2.0` | `2.3.0`  | `1.16.0` | `2.12.0`   | `3.8`  |

diff --git a/examples/llm_compression/openvino/tiny_llama_synthetic_data/requirements.txt b/examples/llm_compression/openvino/tiny_llama_synthetic_data/requirements.txt
@@ -1,4 +1,4 @@
-torch==2.4.0
+torch==2.5.1
 datasets==3.0.1
 numpy>=1.23.5
 openvino==2024.4

diff --git a/examples/post_training_quantization/torch/mobilenet_v2/requirements.txt b/examples/post_training_quantization/torch/mobilenet_v2/requirements.txt
@@ -1,6 +1,6 @@
 fastdownload==0.0.7
 openvino==2024.4
 scikit-learn
-torch==2.4.0
-torchvision==0.19.0
+torch==2.5.1
+torchvision==0.20.1
 setuptools<=72.1.0
diff --git a/examples/post_training_quantization/torch/ssd300_vgg16/requirements.txt b/examples/post_training_quantization/torch/ssd300_vgg16/requirements.txt
@@ -2,8 +2,8 @@ fastdownload==0.0.7
 onnx==1.17.0
 openvino==2024.4
 pycocotools==2.0.7
-torch==2.4.0
+torch==2.5.1
 torchmetrics==1.0.1
-torchvision==0.19.0
+torchvision==0.20.1
 numpy<2
 setuptools<=72.1.0
diff --git a/examples/post_training_quantization/torch_fx/resnet18/main.py b/examples/post_training_quantization/torch_fx/resnet18/main.py
@@ -14,6 +14,8 @@
 from time import time
 from typing import Tuple
 
+# We need to import openvino.torch for torch.compile() with openvino backend to work.
+import openvino.torch  # noqa
 import torch
 import torch.nn as nn
 import torch.nn.parallel

diff --git a/examples/post_training_quantization/torch_fx/resnet18/requirements.txt b/examples/post_training_quantization/torch_fx/resnet18/requirements.txt
@@ -1,4 +1,4 @@
 fastdownload==0.0.7
 openvino==2024.4
-torch==2.4.0
-torchvision==0.19.0
+torch==2.5.1
+torchvision==0.20.1
diff --git a/examples/quantization_aware_training/torch/resnet18/requirements.txt b/examples/quantization_aware_training/torch/resnet18/requirements.txt
@@ -1,5 +1,5 @@
 fastdownload==0.0.7
 openvino==2024.4
-torch==2.4.0
-torchvision==0.19.0
+torch==2.5.1
+torchvision==0.20.1
 setuptools<=72.1.0
diff --git a/nncf/experimental/torch/fx/transformations.py b/nncf/experimental/torch/fx/transformations.py
@@ -967,6 +967,27 @@ def merge_linear_and_bias(model: torch.fx.GraphModule):
     _merge_node_and_bias(model, _is_linear)
 
 
+def _get_connected_nodes(graph: torch.fx.Graph) -> List[torch.fx.Node]:
+    """
+    Returns the List of nodes which are directly or indirectly connected
+    to the output node.
+
+    :param graph: The torch FX graph to get nodes from.
+    """
+    output_nodes = [node for node in graph.nodes if node.op == "output"]
+    assert len(output_nodes) == 1
+    output_node = output_nodes[0]
+    connected_nodes = set()  # Every node is unique in the graph
+    nodes_to_visit = [output_node]
+    while nodes_to_visit:
+        current_node = nodes_to_visit.pop()
+        if current_node in connected_nodes:
+            continue
+        connected_nodes.add(current_node)
+        nodes_to_visit.extend(current_node.all_input_nodes)
+    return list(connected_nodes)
+
+
 def _merge_node_and_bias(model: torch.fx.GraphModule, is_target_node: Callable[[torch.fx.Node], bool]):
     """
     Merges two separate node and bias node to a one node: node+bias.
@@ -1000,5 +1021,13 @@ def _merge_node_and_bias(model: torch.fx.GraphModule, is_target_node: Callable[[
         for user in list(bias_node.users):
             user.replace_input_with(bias_node, conv_node)
 
+    # Remove nodes which are not connected to output. This removes dead nodes and dead subgraphs in the model graph.
+    nodes_connected_to_output = _get_connected_nodes(model.graph)
+    is_impure = lambda node: node in nodes_connected_to_output
+
+    for node in reversed(model.graph.nodes):
+        if not is_impure(node) and len(node.users) == 0:
+            model.graph.erase_node(node)
+
     model.graph.eliminate_dead_code()
     model.recompile()
diff --git a/nncf/experimental/torch2/function_hook/hook_executor_mode.py b/nncf/experimental/torch2/function_hook/hook_executor_mode.py
@@ -108,7 +108,7 @@ def __init__(self, model: nn.Module, hook_storage: HookStorage) -> None:
         :param model: The PyTorch model to which the hooks will be applied.
         :param hook_storage: Storage for hooks to be executed.
         """
-        super().__init__()  # type: ignore
+        super().__init__()
         self.hook_storage: HookStorage = hook_storage
         self.model: nn.Module = model
         self.module_call_stack: List[nn.Module] = []

diff --git a/nncf/torch/__init__.py b/nncf/torch/__init__.py
@@ -68,4 +68,12 @@
 
 from nncf.torch.extensions import force_build_cpu_extensions, force_build_cuda_extensions
 
+# This is required since torchvision changes a dictionary inside of pytorch mapping
+# different ops and their role in torch fx graph. Once the nncf mapping is done, it is
+# represented as a different custom operation which is how it is changed in
+# the said mapping. The polyfills loader is the specific file to be imported
+# before making wrapping changes
+if torch.__version__ >= "2.5.0":
+    from torch._dynamo.polyfills import loader
+
 patch_torch_operators()
diff --git a/nncf/version.py b/nncf/version.py
@@ -12,6 +12,6 @@
 __version__ = "2.14.0"
 
 
-BKC_TORCH_SPEC = "==2.4.*"
+BKC_TORCH_SPEC = "==2.5.*"
 BKC_TF_SPEC = "==2.15.*"
 STRICT_TF_SPEC = ">=2.9.3,<2.16.0"
diff --git a/tests/post_training/data/ptq_reference_data.yaml b/tests/post_training/data/ptq_reference_data.yaml
@@ -59,7 +59,7 @@ torchvision/swin_v2_s_backend_OV:
 torchvision/swin_v2_s_backend_FX_TORCH:
   metric_value: 0.8360
 timm/crossvit_9_240_backend_CUDA_TORCH:
-  metric_value: 0.689
+  metric_value: 0.7275
 timm/crossvit_9_240_backend_FP32:
   metric_value: 0.73982
 timm/crossvit_9_240_backend_ONNX:
@@ -79,7 +79,7 @@ timm/darknet53_backend_OV:
 timm/darknet53_backend_TORCH:
   metric_value: 0.79094
 timm/deit3_small_patch16_224_backend_CUDA_TORCH:
-  metric_value: 0.76816
+  metric_value: 0.81246
 timm/deit3_small_patch16_224_backend_FP32:
   metric_value: 0.81358
 timm/deit3_small_patch16_224_backend_ONNX:
@@ -89,7 +89,7 @@ timm/deit3_small_patch16_224_backend_OV:
 timm/deit3_small_patch16_224_backend_TORCH:
   metric_value: 0.81274
 timm/dla34_backend_CUDA_TORCH:
-  metric_value: 0.73978
+  metric_value: 0.74272
 timm/dla34_backend_FP32:
   metric_value: 0.74628
 timm/dla34_backend_ONNX:
@@ -99,7 +99,7 @@ timm/dla34_backend_OV:
 timm/dla34_backend_TORCH:
   metric_value: 0.74256
 timm/dpn68_backend_CUDA_TORCH:
-  metric_value: 0.75492
+  metric_value: 0.75786
 timm/dpn68_backend_FP32:
   metric_value: 0.76342
 timm/dpn68_backend_ONNX:
@@ -115,7 +115,7 @@ timm/efficientnet_b0_BC_backend_ONNX:
 timm/efficientnet_b0_BC_backend_OV:
   metric_value: 0.77166
 timm/efficientnet_b0_backend_CUDA_TORCH:
-  metric_value: 0.768
+  metric_value: 0.77124
 timm/efficientnet_b0_backend_FP32:
   metric_value: 0.77698
 timm/efficientnet_b0_backend_ONNX:
@@ -125,7 +125,7 @@ timm/efficientnet_b0_backend_OV:
 timm/efficientnet_b0_backend_TORCH:
   metric_value: 0.77042
 timm/efficientnet_lite0_backend_CUDA_TORCH:
-  metric_value: 0.74686
+  metric_value: 0.75162
 timm/efficientnet_lite0_backend_FP32:
   metric_value: 0.75496
 timm/efficientnet_lite0_backend_ONNX:
@@ -135,7 +135,7 @@ timm/efficientnet_lite0_backend_OV:
 timm/efficientnet_lite0_backend_TORCH:
   metric_value: 0.7517
 timm/hrnet_w18_backend_CUDA_TORCH:
-  metric_value: 0.76712
+  metric_value: 0.77178
 timm/hrnet_w18_backend_FP32:
   metric_value: 0.78124
 timm/hrnet_w18_backend_ONNX:
@@ -145,7 +145,7 @@ timm/hrnet_w18_backend_OV:
 timm/hrnet_w18_backend_TORCH:
   metric_value: 0.7722
 timm/inception_resnet_v2_backend_CUDA_TORCH:
-  metric_value: 0.80024
+  metric_value: 0.80334
 timm/inception_resnet_v2_backend_FP32:
   metric_value: 0.80448
 timm/inception_resnet_v2_backend_ONNX:
@@ -155,7 +155,7 @@ timm/inception_resnet_v2_backend_OV:
 timm/inception_resnet_v2_backend_TORCH:
   metric_value: 0.80334
 timm/levit_128_backend_CUDA_TORCH:
-  metric_value: 0.7324
+  metric_value: 0.77812
 timm/levit_128_backend_FP32:
   metric_value: 0.78474
 timm/levit_128_backend_ONNX:
@@ -171,7 +171,7 @@ timm/mobilenetv2_050_BC_backend_ONNX:
 timm/mobilenetv2_050_BC_backend_OV:
   metric_value: 0.65332
 timm/mobilenetv2_050_backend_CUDA_TORCH:
-  metric_value: 0.64278
+  metric_value: 0.6534
 timm/mobilenetv2_050_backend_FP32:
   metric_value: 0.6594
 timm/mobilenetv2_050_backend_ONNX:
@@ -181,7 +181,7 @@ timm/mobilenetv2_050_backend_OV:
 timm/mobilenetv2_050_backend_TORCH:
   metric_value: 0.65334
 timm/mobilenetv3_small_050_backend_CUDA_TORCH:
-  metric_value: 0.41888
+  metric_value: 0.4267
 timm/mobilenetv3_small_050_backend_FP32:
   metric_value: 0.57906
 timm/mobilenetv3_small_050_backend_ONNX:
@@ -197,7 +197,7 @@ timm/mobilenetv3_small_050_BC_backend_ONNX:
 timm/mobilenetv3_small_050_BC_backend_OV:
   metric_value: 0.5655
 timm/regnetx_002_backend_CUDA_TORCH:
-  metric_value: 0.67452
+  metric_value: 0.68596
 timm/regnetx_002_backend_FP32:
   metric_value: 0.68756
 timm/regnetx_002_backend_ONNX:
@@ -207,7 +207,7 @@ timm/regnetx_002_backend_OV:
 timm/regnetx_002_backend_TORCH:
   metric_value: 0.68576
 timm/resnest14d_backend_CUDA_TORCH:
-  metric_value: 0.74176
+  metric_value: 0.74898
 timm/resnest14d_backend_FP32:
   metric_value: 0.75516
 timm/resnest14d_backend_ONNX:
@@ -239,7 +239,7 @@ timm/tf_inception_v3_backend_OV:
 timm/tf_inception_v3_backend_TORCH:
   metric_value: 0.77586
 timm/vgg11_backend_CUDA_TORCH:
-  metric_value: 0.6809
+  metric_value: 0.688
 timm/vgg11_backend_FP32:
   metric_value: 0.6904
 timm/vgg11_backend_ONNX:
@@ -249,7 +249,7 @@ timm/vgg11_backend_OV:
 timm/vgg11_backend_TORCH:
   metric_value: 0.6879
 timm/visformer_small_backend_CUDA_TORCH:
-  metric_value: 0.77728
+  metric_value: 0.81612
 timm/visformer_small_backend_FP32:
   metric_value: 0.82098
 timm/visformer_small_backend_ONNX: