From def73de9419ec2a7806e28359acead96319bf44d Mon Sep 17 00:00:00 2001
From: takipipo <ktphap.ncu@gmail.com>
Date: Thu, 19 Dec 2024 21:35:10 +0700
Subject: [PATCH 1/2] refactor: remove code duplication but still keeping
 neuron_*.py

---
 ultralytics/engine/exporter.py | 34 ++++++++++++++++++++++++++++++++++
 ultralytics/nn/autobackend.py  | 23 +++++++++++++++++++++++
 2 files changed, 57 insertions(+)

diff --git a/ultralytics/engine/exporter.py b/ultralytics/engine/exporter.py
index ba329372d54..5227470a3c9 100644
--- a/ultralytics/engine/exporter.py
+++ b/ultralytics/engine/exporter.py
@@ -19,6 +19,8 @@
 MNN                     | `mnn`                     | yolo11n.mnn
 NCNN                    | `ncnn`                    | yolo11n_ncnn_model/
 IMX                     | `imx`                     | yolo11n_imx_model/
+AWS NeuronX             | `neuronx`                 | yolo11n.neuronx
+AWS Neuron              | `neuron`                  | yolo11n.neuron
 
 Requirements:
     $ pip install "ultralytics[export]"
@@ -116,6 +118,8 @@ def export_formats():
         ["MNN", "mnn", ".mnn", True, True],
         ["NCNN", "ncnn", "_ncnn_model", True, True],
         ["IMX", "imx", "_imx_model", True, True],
+        ["AWS NeuronX", "neuronx", ".neuronx", True, True],
+        ["AWS Neuron", "neuron", ".neuron", True, True],
     ]
     return dict(zip(["Format", "Argument", "Suffix", "CPU", "GPU"], zip(*x)))
 
@@ -210,6 +214,8 @@ def __call__(self, model=None) -> str:
             mnn,
             ncnn,
             imx,
+            neuronx,
+            neuron,
         ) = flags  # export booleans
         is_tf_format = any((saved_model, pb, tflite, edgetpu, tfjs))
 
@@ -382,6 +388,10 @@ def __call__(self, model=None) -> str:
             f[12], _ = self.export_ncnn()
         if imx:
             f[13], _ = self.export_imx()
+        if neuronx:  # NeuronX
+            f[14], _ = self.export_neuronx()
+        if neuron:  # Neuron
+            f[15], _ = self.export_neuron()
 
         # Finish
         f = [str(x) for x in f if x]  # filter out '' and None
@@ -1232,6 +1242,30 @@ def forward(self, images):
             file.writelines([f"{name}\n" for _, name in self.model.names.items()])
 
         return f, None
+    
+    @try_export
+    def export_neuronx(self, prefix=colorstr("AWS NeuronX:")):
+        import torch_neuronx
+
+        """YOLOv8 NeuronX model export."""
+        LOGGER.info(f"\n{prefix} starting export with torch {torch_neuronx.__version__}...")
+        f = self.file.with_suffix(".neuronx")
+        ts = torch_neuronx.trace(self.model, self.im, strict=False)
+        extra_files = {"config.txt": json.dumps(self.metadata)}  # torch._C.ExtraFilesMap()
+        ts.save(str(f), _extra_files=extra_files)
+        return f, None
+
+    @try_export
+    def export_neuron(self, prefix=colorstr("AWS Neuron:")):
+        import torch_neuron
+
+        """YOLOv8 Neuron model export."""
+        LOGGER.info(f"\n{prefix} starting export with torch {torch_neuron.__version__}...")
+        f = self.file.with_suffix(".neuron")
+        ts = torch_neuron.trace(self.model, self.im, strict=False)
+        extra_files = {"config.txt": json.dumps(self.metadata)}
+        ts.save(str(f), _extra_files=extra_files)
+        return f, None
 
     def _add_tflite_metadata(self, file):
         """Add metadata to *.tflite models per https://www.tensorflow.org/lite/models/convert/metadata."""
diff --git a/ultralytics/nn/autobackend.py b/ultralytics/nn/autobackend.py
index b6df3753ec3..6297b4a5721 100644
--- a/ultralytics/nn/autobackend.py
+++ b/ultralytics/nn/autobackend.py
@@ -75,6 +75,8 @@ class AutoBackend(nn.Module):
             | PaddlePaddle          | *_paddle_model/   |
             | MNN                   | *.mnn             |
             | NCNN                  | *_ncnn_model/     |
+            | AWS NeuronX           | *.neuronx         |
+            | AWS Neuron            | *.neuron          |
 
     This class offers dynamic backend switching capabilities based on the input model format, making it easier to deploy
     models across various platforms.
@@ -125,6 +127,8 @@ def __init__(
             ncnn,
             imx,
             triton,
+            neuronx,
+            neuron,
         ) = self._model_type(w)
         fp16 &= pt or jit or onnx or xml or engine or nn_module or triton  # FP16
         nhwc = coreml or saved_model or pb or tflite or edgetpu  # BHWC formats (vs torch BCWH)
@@ -176,7 +180,26 @@ def __init__(
             model.half() if fp16 else model.float()
             if extra_files["config.txt"]:  # load metadata dict
                 metadata = json.loads(extra_files["config.txt"], object_hook=lambda x: dict(x.items()))
+        # NeuronX
+        elif neuronx:
+            import torch_neuronx
 
+            LOGGER.info(f"Loading {w} for NeuronX version {torch_neuronx.__version__} inference... ")
+            extra_files = {"config.txt": ""}  # model metadata
+            model = torch.jit.load(w, _extra_files=extra_files, map_location=device)
+            model.half() if fp16 else model.float()
+            if extra_files["config.txt"]:  # load metadata dict
+                metadata = json.loads(extra_files["config.txt"], object_hook=lambda x: dict(x.items()))
+        ## Neuron
+        elif neuron:
+            import torch_neuron
+
+            LOGGER.info(f"Loading {w} for Neuron version {torch_neuron.__version__} inference... ")
+            extra_files = {"config.txt": ""}  # model metadata
+            model = torch.jit.load(w, _extra_files=extra_files, map_location=device)
+            model.half() if fp16 else model.float()
+            if extra_files["config.txt"]:  # load metadata dict
+                metadata = json.loads(extra_files["config.txt"], object_hook=lambda x: dict(x.items()))
         # ONNX OpenCV DNN
         elif dnn:
             LOGGER.info(f"Loading {w} for ONNX OpenCV DNN inference...")

From a7de722a62e4921bddeca2ecf4477944fa851b9d Mon Sep 17 00:00:00 2001
From: "nirattisai.t" <nirattisai.t@wisesight.com>
Date: Fri, 3 Jan 2025 11:11:47 +0700
Subject: [PATCH 2/2] refactor: remove neuron related files and imports

---
 ultralytics/__init__.py                       |   3 +-
 ultralytics/engine/neuron_exporter.py         | 368 ----------
 ultralytics/engine/neuron_model.py            |  55 --
 ultralytics/engine/neuron_predictor.py        |  25 -
 ultralytics/models/__init__.py                |   4 +-
 ultralytics/models/yolo/__init__.py           |   3 +-
 ultralytics/models/yolo/detect/__init__.py    |   2 -
 .../models/yolo/detect/neuron_predict.py      |  22 -
 ultralytics/models/yolo/neuron_model.py       |  16 -
 ultralytics/nn/neuron_autobackend.py          | 689 ------------------
 10 files changed, 4 insertions(+), 1183 deletions(-)
 delete mode 100644 ultralytics/engine/neuron_exporter.py
 delete mode 100644 ultralytics/engine/neuron_model.py
 delete mode 100644 ultralytics/engine/neuron_predictor.py
 delete mode 100644 ultralytics/models/yolo/detect/neuron_predict.py
 delete mode 100644 ultralytics/models/yolo/neuron_model.py
 delete mode 100644 ultralytics/nn/neuron_autobackend.py

diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py
index ee0aeb12bdc..7d863cf84ff 100644
--- a/ultralytics/__init__.py
+++ b/ultralytics/__init__.py
@@ -8,7 +8,7 @@
 if not os.environ.get("OMP_NUM_THREADS"):
     os.environ["OMP_NUM_THREADS"] = "1"  # default for reduced CPU utilization during training
 
-from ultralytics.models import NAS, RTDETR, SAM, YOLO, FastSAM, NeuronYOLO, YOLOWorld
+from ultralytics.models import NAS, RTDETR, SAM, YOLO, FastSAM, YOLOWorld
 from ultralytics.utils import ASSETS, SETTINGS
 from ultralytics.utils.checks import check_yolo as checks
 from ultralytics.utils.downloads import download
@@ -27,5 +27,4 @@
     "download",
     "settings",
     "Explorer",
-    "NeuronYOLO",
 )
diff --git a/ultralytics/engine/neuron_exporter.py b/ultralytics/engine/neuron_exporter.py
deleted file mode 100644
index c7208fa2fc4..00000000000
--- a/ultralytics/engine/neuron_exporter.py
+++ /dev/null
@@ -1,368 +0,0 @@
-# Ultralytics YOLO 🚀, AGPL-3.0 license
-"""
-Export a YOLOv8 PyTorch model to other formats. TensorFlow exports authored by https://github.com/zldrobit.
-
-Format                  | `format=argument`         | Model
----                     | ---                       | ---
-PyTorch                 | -                         | yolov8n.pt
-TorchScript             | `torchscript`             | yolov8n.torchscript
-AWS NeuronX             | `neuronx`                 | yolov8n.neuronx
-ONNX                    | `onnx`                    | yolov8n.onnx
-OpenVINO                | `openvino`                | yolov8n_openvino_model/
-TensorRT                | `engine`                  | yolov8n.engine
-CoreML                  | `coreml`                  | yolov8n.mlpackage
-TensorFlow SavedModel   | `saved_model`             | yolov8n_saved_model/
-TensorFlow GraphDef     | `pb`                      | yolov8n.pb
-TensorFlow Lite         | `tflite`                  | yolov8n.tflite
-TensorFlow Edge TPU     | `edgetpu`                 | yolov8n_edgetpu.tflite
-TensorFlow.js           | `tfjs`                    | yolov8n_web_model/
-PaddlePaddle            | `paddle`                  | yolov8n_paddle_model/
-NCNN                    | `ncnn`                    | yolov8n_ncnn_model/
-
-Requirements:
-    $ pip install "ultralytics[export]"
-
-Python:
-    from ultralytics import YOLO
-    model = YOLO('yolov8n.pt')
-    results = model.export(format='onnx')
-
-CLI:
-    $ yolo mode=export model=yolov8n.pt format=onnx
-
-Inference:
-    $ yolo predict model=yolov8n.pt                 # PyTorch
-                         yolov8n.torchscript        # TorchScript
-                         yolov8.neuronx             # AWS NeuronX
-                         yolov8n.onnx               # ONNX Runtime or OpenCV DNN with dnn=True
-                         yolov8n_openvino_model     # OpenVINO
-                         yolov8n.engine             # TensorRT
-                         yolov8n.mlpackage          # CoreML (macOS-only)
-                         yolov8n_saved_model        # TensorFlow SavedModel
-                         yolov8n.pb                 # TensorFlow GraphDef
-                         yolov8n.tflite             # TensorFlow Lite
-                         yolov8n_edgetpu.tflite     # TensorFlow Edge TPU
-                         yolov8n_paddle_model       # PaddlePaddle
-                         yolov8n_ncnn_model         # NCNN
-
-TensorFlow.js:
-    $ cd .. && git clone https://github.com/zldrobit/tfjs-yolov5-example.git && cd tfjs-yolov5-example
-    $ npm install
-    $ ln -s ../../yolov5/yolov8n_web_model public/yolov8n_web_model
-    $ npm start
-"""
-
-import json
-import time
-import warnings
-from copy import deepcopy
-from datetime import datetime
-from pathlib import Path
-
-import torch
-
-from ultralytics.cfg import TASK2DATA
-from ultralytics.engine.exporter import Exporter
-from ultralytics.nn.autobackend import check_class_names, default_class_names
-from ultralytics.nn.modules import C2f, Detect, RTDETRDecoder
-from ultralytics.nn.tasks import WorldModel
-from ultralytics.utils import (
-    DEFAULT_CFG,
-    LINUX,
-    LOGGER,
-    __version__,
-    colorstr,
-    get_default_args,
-)
-from ultralytics.utils.checks import check_imgsz
-from ultralytics.utils.files import file_size
-from ultralytics.utils.ops import Profile
-from ultralytics.utils.torch_utils import select_device, smart_inference_mode
-
-
-def export_formats():
-    """YOLOv8 export formats."""
-    import pandas  # scope for faster 'import ultralytics'
-
-    x = [
-        ["PyTorch", "-", ".pt", True, True],
-        ["TorchScript", "torchscript", ".torchscript", True, True],
-        ["AWS NeuronX", "neuronx", ".neuronx", True, True],
-        ["AWS Neuron", "neuron", ".neuron", True, True],
-        ["ONNX", "onnx", ".onnx", True, True],
-        ["OpenVINO", "openvino", "_openvino_model", True, False],
-        ["TensorRT", "engine", ".engine", False, True],
-        ["CoreML", "coreml", ".mlpackage", True, False],
-        ["TensorFlow SavedModel", "saved_model", "_saved_model", True, True],
-        ["TensorFlow GraphDef", "pb", ".pb", True, True],
-        ["TensorFlow Lite", "tflite", ".tflite", True, False],
-        ["TensorFlow Edge TPU", "edgetpu", "_edgetpu.tflite", True, False],
-        ["TensorFlow.js", "tfjs", "_web_model", True, False],
-        ["PaddlePaddle", "paddle", "_paddle_model", True, True],
-        ["NCNN", "ncnn", "_ncnn_model", True, True],
-    ]
-    return pandas.DataFrame(x, columns=["Format", "Argument", "Suffix", "CPU", "GPU"])
-
-
-def gd_outputs(gd):
-    """TensorFlow GraphDef model output node names."""
-    name_list, input_list = [], []
-    for node in gd.node:  # tensorflow.core.framework.node_def_pb2.NodeDef
-        name_list.append(node.name)
-        input_list.extend(node.input)
-    return sorted(f"{x}:0" for x in list(set(name_list) - set(input_list)) if not x.startswith("NoOp"))
-
-
-def try_export(inner_func):
-    """YOLOv8 export decorator, i.e. @try_export."""
-    inner_args = get_default_args(inner_func)
-
-    def outer_func(*args, **kwargs):
-        """Export a model."""
-        prefix = inner_args["prefix"]
-        try:
-            with Profile() as dt:
-                f, model = inner_func(*args, **kwargs)
-            LOGGER.info(f"{prefix} export success ✅ {dt.t:.1f}s, saved as '{f}' ({file_size(f):.1f} MB)")
-            return f, model
-        except Exception as e:
-            LOGGER.info(f"{prefix} export failure ❌ {dt.t:.1f}s: {e}")
-            raise e
-
-    return outer_func
-
-
-class NeuronExporter(Exporter):
-    """
-    A class for exporting a model.
-
-    Attributes:
-        args (SimpleNamespace): Configuration for the exporter.
-        callbacks (list, optional): List of callback functions. Defaults to None.
-    """
-
-    def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
-        super().__init__(cfg, overrides, _callbacks)
-
-    @smart_inference_mode()
-    def __call__(self, model=None) -> str:
-        """Returns list of exported files/dirs after running callbacks."""
-        self.run_callbacks("on_export_start")
-        t = time.time()
-        fmt = self.args.format.lower()  # to lowercase
-        if fmt in {"tensorrt", "trt"}:  # 'engine' aliases
-            fmt = "engine"
-        if fmt in {"mlmodel", "mlpackage", "mlprogram", "apple", "ios", "coreml"}:  # 'coreml' aliases
-            fmt = "coreml"
-        fmts = tuple(export_formats()["Argument"][1:])  # available export formats
-        flags = [x == fmt for x in fmts]
-        if sum(flags) != 1:
-            raise ValueError(f"Invalid export format='{fmt}'. Valid formats are {fmts}")
-        (
-            jit,
-            neuronx,
-            neuron,
-            onnx,
-            xml,
-            engine,
-            coreml,
-            saved_model,
-            pb,
-            tflite,
-            edgetpu,
-            tfjs,
-            paddle,
-            ncnn,
-        ) = flags  # export booleans
-        is_tf_format = any((saved_model, pb, tflite, edgetpu, tfjs))
-
-        # Device
-        if fmt == "engine" and self.args.device is None:
-            LOGGER.warning("WARNING ⚠️ TensorRT requires GPU export, automatically assigning device=0")
-            self.args.device = "0"
-        self.device = select_device("cpu" if self.args.device is None else self.args.device)
-
-        # Checks
-        if not hasattr(model, "names"):
-            model.names = default_class_names()
-        model.names = check_class_names(model.names)
-        if self.args.half and self.args.int8:
-            LOGGER.warning("WARNING ⚠️ half=True and int8=True are mutually exclusive, setting half=False.")
-            self.args.half = False
-        if self.args.half and onnx and self.device.type == "cpu":
-            LOGGER.warning("WARNING ⚠️ half=True only compatible with GPU export, i.e. use device=0")
-            self.args.half = False
-            assert not self.args.dynamic, "half=True not compatible with dynamic=True, i.e. use only one."
-        self.imgsz = check_imgsz(self.args.imgsz, stride=model.stride, min_dim=2)  # check image size
-        if self.args.int8 and (engine or xml):
-            self.args.dynamic = True  # enforce dynamic to export TensorRT INT8; ensures ONNX is dynamic
-        if self.args.optimize:
-            assert not ncnn, "optimize=True not compatible with format='ncnn', i.e. use optimize=False"
-            assert self.device.type == "cpu", "optimize=True not compatible with cuda devices, i.e. use device='cpu'"
-        if edgetpu:
-            if not LINUX:
-                raise SystemError("Edge TPU export only supported on Linux. See https://coral.ai/docs/edgetpu/compiler")
-            elif self.args.batch != 1:  # see github.com/ultralytics/ultralytics/pull/13420
-                LOGGER.warning("WARNING ⚠️ Edge TPU export requires batch size 1, setting batch=1.")
-                self.args.batch = 1
-        if isinstance(model, WorldModel):
-            LOGGER.warning(
-                "WARNING ⚠️ YOLOWorld (original version) export is not supported to any format.\n"
-                "WARNING ⚠️ YOLOWorldv2 models (i.e. 'yolov8s-worldv2.pt') only support export to "
-                "(torchscript, onnx, openvino, engine, coreml) formats. "
-                "See https://docs.ultralytics.com/models/yolo-world for details."
-            )
-        if self.args.int8 and not self.args.data:
-            self.args.data = DEFAULT_CFG.data or TASK2DATA[getattr(model, "task", "detect")]  # assign default data
-            LOGGER.warning(
-                "WARNING ⚠️ INT8 export requires a missing 'data' arg for calibration. "
-                f"Using default 'data={self.args.data}'."
-            )
-        # Input
-        im = torch.zeros(self.args.batch, 3, *self.imgsz).to(self.device)
-        file = Path(
-            getattr(model, "pt_path", None) or getattr(model, "yaml_file", None) or model.yaml.get("yaml_file", "")
-        )
-        if file.suffix in {".yaml", ".yml"}:
-            file = Path(file.name)
-
-        # Update model
-        model = deepcopy(model).to(self.device)
-        for p in model.parameters():
-            p.requires_grad = False
-        model.eval()
-        model.float()
-        model = model.fuse()
-        for m in model.modules():
-            if isinstance(m, (Detect, RTDETRDecoder)):  # includes all Detect subclasses like Segment, Pose, OBB
-                m.dynamic = self.args.dynamic
-                m.export = True
-                m.format = self.args.format
-            elif isinstance(m, C2f) and not is_tf_format:
-                # EdgeTPU does not support FlexSplitV while split provides cleaner ONNX graph
-                m.forward = m.forward_split
-
-        y = None
-        for _ in range(2):
-            y = model(im)  # dry runs
-        if self.args.half and onnx and self.device.type != "cpu":
-            im, model = im.half(), model.half()  # to FP16
-
-        # Filter warnings
-        warnings.filterwarnings("ignore", category=torch.jit.TracerWarning)  # suppress TracerWarning
-        warnings.filterwarnings("ignore", category=UserWarning)  # suppress shape prim::Constant missing ONNX warning
-        warnings.filterwarnings("ignore", category=DeprecationWarning)  # suppress CoreML np.bool deprecation warning
-
-        # Assign
-        self.im = im
-        self.model = model
-        self.file = file
-        self.output_shape = (
-            tuple(y.shape)
-            if isinstance(y, torch.Tensor)
-            else tuple(tuple(x.shape if isinstance(x, torch.Tensor) else []) for x in y)
-        )
-        self.pretty_name = Path(self.model.yaml.get("yaml_file", self.file)).stem.replace("yolo", "YOLO")
-        data = model.args["data"] if hasattr(model, "args") and isinstance(model.args, dict) else ""
-        description = f'Ultralytics {self.pretty_name} model {f"trained on {data}" if data else ""}'
-        self.metadata = {
-            "description": description,
-            "author": "Ultralytics",
-            "date": datetime.now().isoformat(),
-            "version": __version__,
-            "license": "AGPL-3.0 License (https://ultralytics.com/license)",
-            "docs": "https://docs.ultralytics.com",
-            "stride": int(max(model.stride)),
-            "task": model.task,
-            "batch": self.args.batch,
-            "imgsz": self.imgsz,
-            "names": model.names,
-        }  # model metadata
-        if model.task == "pose":
-            self.metadata["kpt_shape"] = model.model[-1].kpt_shape
-
-        LOGGER.info(
-            f"\n{colorstr('PyTorch:')} starting from '{file}' with input shape {tuple(im.shape)} BCHW and "
-            f'output shape(s) {self.output_shape} ({file_size(file):.1f} MB)'
-        )
-
-        # Exports
-        f = [""] * len(fmts)  # exported filenames
-        if jit or ncnn:  # TorchScript
-            f[0], _ = self.export_torchscript()
-        if engine:  # TensorRT required before ONNX
-            f[1], _ = self.export_engine()
-        if onnx:  # ONNX
-            f[2], _ = self.export_onnx()
-        if xml:  # OpenVINO
-            f[3], _ = self.export_openvino()
-        if coreml:  # CoreML
-            f[4], _ = self.export_coreml()
-        if is_tf_format:  # TensorFlow formats
-            self.args.int8 |= edgetpu
-            f[5], keras_model = self.export_saved_model()
-            if pb or tfjs:  # pb prerequisite to tfjs
-                f[6], _ = self.export_pb(keras_model=keras_model)
-            if tflite:
-                f[7], _ = self.export_tflite(keras_model=keras_model, nms=False, agnostic_nms=self.args.agnostic_nms)
-            if edgetpu:
-                f[8], _ = self.export_edgetpu(tflite_model=Path(f[5]) / f"{self.file.stem}_full_integer_quant.tflite")
-            if tfjs:
-                f[9], _ = self.export_tfjs()
-        if paddle:  # PaddlePaddle
-            f[10], _ = self.export_paddle()
-        if ncnn:  # NCNN
-            f[11], _ = self.export_ncnn()
-        if neuronx:  # NeuronX
-            f[12], _ = self.export_neuronx()
-        if neuron:  # Neuron
-            f[13], _ = self.export_neuron()
-
-        # Finish
-        f = [str(x) for x in f if x]  # filter out '' and None
-        if any(f):
-            f = str(Path(f[-1]))
-            square = self.imgsz[0] == self.imgsz[1]
-            s = (
-                ""
-                if square
-                else f"WARNING ⚠️ non-PyTorch val requires square images, 'imgsz={self.imgsz}' will not "
-                f"work. Use export 'imgsz={max(self.imgsz)}' if val is required."
-            )
-            imgsz = self.imgsz[0] if square else str(self.imgsz)[1:-1].replace(" ", "")
-            predict_data = f"data={data}" if model.task == "segment" and fmt == "pb" else ""
-            q = "int8" if self.args.int8 else "half" if self.args.half else ""  # quantization
-            LOGGER.info(
-                f'\nExport complete ({time.time() - t:.1f}s)'
-                f"\nResults saved to {colorstr('bold', file.parent.resolve())}"
-                f'\nPredict:         yolo predict task={model.task} model={f} imgsz={imgsz} {q} {predict_data}'
-                f'\nValidate:        yolo val task={model.task} model={f} imgsz={imgsz} data={data} {q} {s}'
-                f'\nVisualize:       https://netron.app'
-            )
-
-        self.run_callbacks("on_export_end")
-        return f  # return list of exported files/dirs
-
-    @try_export
-    def export_neuronx(self, prefix=colorstr("AWS NeuronX:")):
-        import torch_neuronx
-
-        """YOLOv8 NeuronX model export."""
-        LOGGER.info(f"\n{prefix} starting export with torch {torch_neuronx.__version__}...")
-        f = self.file.with_suffix(".neuronx")
-        ts = torch_neuronx.trace(self.model, self.im, strict=False)
-        extra_files = {"config.txt": json.dumps(self.metadata)}  # torch._C.ExtraFilesMap()
-        ts.save(str(f), _extra_files=extra_files)
-        return f, None
-
-    @try_export
-    def export_neuron(self, prefix=colorstr("AWS Neuron:")):
-        import torch_neuron
-
-        """YOLOv8 Neuron model export."""
-        LOGGER.info(f"\n{prefix} starting export with torch {torch_neuron.__version__}...")
-        f = self.file.with_suffix(".neuron")
-        ts = torch_neuron.trace(self.model, self.im, strict=False)
-        extra_files = {"config.txt": json.dumps(self.metadata)}
-        ts.save(str(f), _extra_files=extra_files)
-        return f, None
diff --git a/ultralytics/engine/neuron_model.py b/ultralytics/engine/neuron_model.py
deleted file mode 100644
index 4506c401dfb..00000000000
--- a/ultralytics/engine/neuron_model.py
+++ /dev/null
@@ -1,55 +0,0 @@
-from pathlib import Path
-from typing import Union
-
-from ultralytics.engine.model import Model
-
-
-class NeuronModel(Model):
-    def __init__(
-        self,
-        model: Union[str, Path] = "yolov8n.pt",
-        task: str = None,
-        verbose: bool = False,
-    ) -> None:
-        super().__init__(model, task, verbose)
-
-    def export(
-        self,
-        **kwargs,
-    ) -> str:
-        """
-        Exports the model to a different format suitable for deployment.
-
-        This method facilitates the export of the model to various formats (e.g., ONNX, TorchScript) for deployment
-        purposes. It uses the 'Exporter' class for the export process, combining model-specific overrides, method
-        defaults, and any additional arguments provided. The combined arguments are used to configure export settings.
-
-        The method supports a wide range of arguments to customize the export process. For a comprehensive list of all
-        possible arguments, refer to the 'configuration' section in the documentation.
-
-        Args:
-            **kwargs (any): Arbitrary keyword arguments to customize the export process. These are combined with the
-                model's overrides and method defaults.
-
-        Returns:
-            (str): The exported model filename in the specified format, or an object related to the export process.
-
-        Raises:
-            AssertionError: If the model is not a PyTorch model.
-        """
-        self._check_is_pytorch_model()
-        from .neuron_exporter import NeuronExporter
-
-        custom = {
-            "imgsz": self.model.args["imgsz"],
-            "batch": 1,
-            "data": None,
-            "verbose": False,
-        }  # method defaults
-        args = {
-            **self.overrides,
-            **custom,
-            **kwargs,
-            "mode": "export",
-        }  # highest priority args on the right
-        return NeuronExporter(overrides=args, _callbacks=self.callbacks)(model=self.model)
diff --git a/ultralytics/engine/neuron_predictor.py b/ultralytics/engine/neuron_predictor.py
deleted file mode 100644
index eb28beb4f12..00000000000
--- a/ultralytics/engine/neuron_predictor.py
+++ /dev/null
@@ -1,25 +0,0 @@
-from ultralytics.engine.predictor import BasePredictor
-from ultralytics.nn.neuron_autobackend import NeuronAutoBackend
-from ultralytics.utils.torch_utils import select_device
-
-
-class NeuronPredictor(BasePredictor):
-    def __init__(self, cfg=..., overrides=None, _callbacks=None):
-        super().__init__(cfg, overrides, _callbacks)
-
-    def setup_model(self, model, verbose=True):
-        """Initialize YOLO model with given parameters and set it to evaluation mode."""
-        self.model = NeuronAutoBackend(
-            weights=model or self.args.model,
-            device=select_device(self.args.device, verbose=verbose),
-            dnn=self.args.dnn,
-            data=self.args.data,
-            fp16=self.args.half,
-            batch=self.args.batch,
-            fuse=True,
-            verbose=verbose,
-        )
-
-        self.device = self.model.device  # update device
-        self.args.half = self.model.fp16  # update half
-        self.model.eval()
diff --git a/ultralytics/models/__init__.py b/ultralytics/models/__init__.py
index 2a98f3cea9f..32bb0fca725 100644
--- a/ultralytics/models/__init__.py
+++ b/ultralytics/models/__init__.py
@@ -4,7 +4,7 @@
 from .nas import NAS
 from .rtdetr import RTDETR
 from .sam import SAM
-from .yolo import YOLO, NeuronYOLO, YOLOWorld
+from .yolo import YOLO, YOLOWorld
 
 __all__ = (
     "YOLO",
@@ -12,5 +12,5 @@
     "SAM",
     "FastSAM",
     "NAS",
-    "YOLOWorld, NeuronYOLO",
+    "YOLOWorld",
 )  # allow simpler import
diff --git a/ultralytics/models/yolo/__init__.py b/ultralytics/models/yolo/__init__.py
index 29a96fcfd32..8d9aedfecb8 100644
--- a/ultralytics/models/yolo/__init__.py
+++ b/ultralytics/models/yolo/__init__.py
@@ -3,6 +3,5 @@
 from ultralytics.models.yolo import classify, detect, obb, pose, segment, world
 
 from .model import YOLO, YOLOWorld
-from .neuron_model import NeuronYOLO
 
-__all__ = "classify", "segment", "detect", "pose", "obb", "world", "YOLO", "YOLOWorld", "NeuronYOLO"
+__all__ = "classify", "segment", "detect", "pose", "obb", "world", "YOLO", "YOLOWorld"
diff --git a/ultralytics/models/yolo/detect/__init__.py b/ultralytics/models/yolo/detect/__init__.py
index bde72324e38..e499d6a7321 100644
--- a/ultralytics/models/yolo/detect/__init__.py
+++ b/ultralytics/models/yolo/detect/__init__.py
@@ -1,6 +1,5 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 
-from .neuron_predict import NeuronDetectionPredictor
 from .predict import DetectionPredictor
 from .train import DetectionTrainer
 from .val import DetectionValidator
@@ -9,5 +8,4 @@
     "DetectionPredictor",
     "DetectionTrainer",
     "DetectionValidator",
-    "NeuronDetectionPredictor",
 )
diff --git a/ultralytics/models/yolo/detect/neuron_predict.py b/ultralytics/models/yolo/detect/neuron_predict.py
deleted file mode 100644
index 9ee6c8093d8..00000000000
--- a/ultralytics/models/yolo/detect/neuron_predict.py
+++ /dev/null
@@ -1,22 +0,0 @@
-from ultralytics.models.yolo.detect.predict import DetectionPredictor
-from ultralytics.nn.neuron_autobackend import NeuronAutoBackend
-from ultralytics.utils.torch_utils import select_device
-
-
-class NeuronDetectionPredictor(DetectionPredictor):
-    def setup_model(self, model, verbose=True):
-        """Initialize YOLO model with given parameters and set it to evaluation mode."""
-        self.model = NeuronAutoBackend(
-            weights=model or self.args.model,
-            device=select_device(self.args.device, verbose=verbose),
-            dnn=self.args.dnn,
-            data=self.args.data,
-            fp16=self.args.half,
-            batch=self.args.batch,
-            fuse=True,
-            verbose=verbose,
-        )
-
-        self.device = self.model.device  # update device
-        self.args.half = self.model.fp16  # update half
-        self.model.eval()
diff --git a/ultralytics/models/yolo/neuron_model.py b/ultralytics/models/yolo/neuron_model.py
deleted file mode 100644
index 4a67e085e4e..00000000000
--- a/ultralytics/models/yolo/neuron_model.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from ultralytics.engine.neuron_model import NeuronModel
-from ultralytics.models import yolo
-
-
-class NeuronYOLO(NeuronModel):
-    def __init__(self, model="yolov8n.pt", task=None, verbose=False):
-        super().__init__(model, task, verbose)
-
-    @property
-    def task_map(self):
-        """Map head to model, trainer, validator, and predictor classes."""
-        return {
-            "detect": {
-                "predictor": yolo.detect.NeuronDetectionPredictor,
-            },
-        }
diff --git a/ultralytics/nn/neuron_autobackend.py b/ultralytics/nn/neuron_autobackend.py
deleted file mode 100644
index 719160a25bb..00000000000
--- a/ultralytics/nn/neuron_autobackend.py
+++ /dev/null
@@ -1,689 +0,0 @@
-# Ultralytics YOLO 🚀, AGPL-3.0 license
-
-import ast
-import contextlib
-import json
-import platform
-import zipfile
-from collections import OrderedDict, namedtuple
-from pathlib import Path
-
-import cv2
-import numpy as np
-import torch
-import torch.nn as nn
-from PIL import Image
-
-from ultralytics.utils import ARM64, IS_JETSON, IS_RASPBERRYPI, LINUX, LOGGER, ROOT, yaml_load
-from ultralytics.utils.checks import check_requirements, check_suffix, check_version, check_yaml
-from ultralytics.utils.downloads import attempt_download_asset, is_url
-
-
-def check_class_names(names):
-    """
-    Check class names.
-
-    Map imagenet class codes to human-readable names if required. Convert lists to dicts.
-    """
-    if isinstance(names, list):  # names is a list
-        names = dict(enumerate(names))  # convert to dict
-    if isinstance(names, dict):
-        # Convert 1) string keys to int, i.e. '0' to 0, and non-string values to strings, i.e. True to 'True'
-        names = {int(k): str(v) for k, v in names.items()}
-        n = len(names)
-        if max(names.keys()) >= n:
-            raise KeyError(
-                f"{n}-class dataset requires class indices 0-{n - 1}, but you have invalid class indices "
-                f"{min(names.keys())}-{max(names.keys())} defined in your dataset YAML."
-            )
-        if isinstance(names[0], str) and names[0].startswith("n0"):  # imagenet class codes, i.e. 'n01440764'
-            names_map = yaml_load(ROOT / "cfg/datasets/ImageNet.yaml")["map"]  # human-readable names
-            names = {k: names_map[v] for k, v in names.items()}
-    return names
-
-
-def default_class_names(data=None):
-    """Applies default class names to an input YAML file or returns numerical class names."""
-    if data:
-        with contextlib.suppress(Exception):
-            return yaml_load(check_yaml(data))["names"]
-    return {i: f"class{i}" for i in range(999)}  # return default if above errors
-
-
-class NeuronAutoBackend(nn.Module):
-    """
-    Handles dynamic backend selection for running inference using Ultralytics YOLO models.
-
-    The AutoBackend class is designed to provide an abstraction layer for various inference engines. It supports a wide
-    range of formats, each with specific naming conventions as outlined below:
-
-        Supported Formats and Naming Conventions:
-            | Format                | File Suffix      |
-            |-----------------------|------------------|
-            | PyTorch               | *.pt             |
-            | TorchScript           | *.torchscript    |
-            | ONNX Runtime          | *.onnx           |
-            | ONNX OpenCV DNN       | *.onnx (dnn=True)|
-            | OpenVINO              | *openvino_model/ |
-            | CoreML                | *.mlpackage      |
-            | TensorRT              | *.engine         |
-            | TensorFlow SavedModel | *_saved_model    |
-            | TensorFlow GraphDef   | *.pb             |
-            | TensorFlow Lite       | *.tflite         |
-            | TensorFlow Edge TPU   | *_edgetpu.tflite |
-            | PaddlePaddle          | *_paddle_model   |
-            | NCNN                  | *_ncnn_model     |
-
-    This class offers dynamic backend switching capabilities based on the input model format, making it easier to deploy
-    models across various platforms.
-    """
-
-    @torch.no_grad()
-    def __init__(
-        self,
-        weights="yolov8n.pt",
-        device=torch.device("cpu"),
-        dnn=False,
-        data=None,
-        fp16=False,
-        batch=1,
-        fuse=True,
-        verbose=True,
-    ):
-        """
-        Initialize the AutoBackend for inference.
-
-        Args:
-            weights (str): Path to the model weights file. Defaults to 'yolov8n.pt'.
-            device (torch.device): Device to run the model on. Defaults to CPU.
-            dnn (bool): Use OpenCV DNN module for ONNX inference. Defaults to False.
-            data (str | Path | optional): Path to the additional data.yaml file containing class names. Optional.
-            fp16 (bool): Enable half-precision inference. Supported only on specific backends. Defaults to False.
-            batch (int): Batch-size to assume for inference.
-            fuse (bool): Fuse Conv2D + BatchNorm layers for optimization. Defaults to True.
-            verbose (bool): Enable verbose logging. Defaults to True.
-        """
-        super().__init__()
-        w = str(weights[0] if isinstance(weights, list) else weights)
-        nn_module = isinstance(weights, torch.nn.Module)
-        (
-            pt,
-            jit,
-            neuronx,
-            neuron,
-            onnx,
-            xml,
-            engine,
-            coreml,
-            saved_model,
-            pb,
-            tflite,
-            edgetpu,
-            tfjs,
-            paddle,
-            ncnn,
-            triton,
-        ) = self._model_type(w)
-        fp16 &= pt or jit or onnx or xml or engine or nn_module or triton  # FP16
-        nhwc = coreml or saved_model or pb or tflite or edgetpu  # BHWC formats (vs torch BCWH)
-        stride = 32  # default stride
-        model, metadata = None, None
-
-        # Set device
-        cuda = torch.cuda.is_available() and device.type != "cpu"  # use CUDA
-        if cuda and not any([nn_module, pt, jit, engine, onnx]):  # GPU dataloader formats
-            device = torch.device("cpu")
-            cuda = False
-
-        # Download if not local
-        if not (pt or triton or nn_module):
-            w = attempt_download_asset(w)
-
-        # In-memory PyTorch model
-        if nn_module:
-            model = weights.to(device)
-            if fuse:
-                model = model.fuse(verbose=verbose)
-            if hasattr(model, "kpt_shape"):
-                kpt_shape = model.kpt_shape  # pose-only
-            stride = max(int(model.stride.max()), 32)  # model stride
-            names = model.module.names if hasattr(model, "module") else model.names  # get class names
-            model.half() if fp16 else model.float()
-            self.model = model  # explicitly assign for to(), cpu(), cuda(), half()
-            pt = True
-
-        # PyTorch
-        elif pt:
-            from ultralytics.nn.tasks import attempt_load_weights
-
-            model = attempt_load_weights(
-                weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=fuse
-            )
-            if hasattr(model, "kpt_shape"):
-                kpt_shape = model.kpt_shape  # pose-only
-            stride = max(int(model.stride.max()), 32)  # model stride
-            names = model.module.names if hasattr(model, "module") else model.names  # get class names
-            model.half() if fp16 else model.float()
-            self.model = model  # explicitly assign for to(), cpu(), cuda(), half()
-
-        # TorchScript
-        elif jit:
-            LOGGER.info(f"Loading {w} for TorchScript inference...")
-            extra_files = {"config.txt": ""}  # model metadata
-            model = torch.jit.load(w, _extra_files=extra_files, map_location=device)
-            model.half() if fp16 else model.float()
-            if extra_files["config.txt"]:  # load metadata dict
-                metadata = json.loads(extra_files["config.txt"], object_hook=lambda x: dict(x.items()))
-        # NeuronX
-        elif neuronx:
-            import torch_neuronx
-
-            LOGGER.info(f"Loading {w} for NeuronX inference... version {torch_neuronx.__version__}")
-            extra_files = {"config.txt": ""}  # model metadata
-            model = torch.jit.load(w, _extra_files=extra_files, map_location=device)
-            model.half() if fp16 else model.float()
-            if extra_files["config.txt"]:  # load metadata dict
-                metadata = json.loads(extra_files["config.txt"], object_hook=lambda x: dict(x.items()))
-        ## Neuron
-        elif neuron:
-            import torch_neuron
-
-            LOGGER.info(f"Loading {w} for Neuron inference... version {torch_neuron.__version__}")
-            extra_files = {"config.txt": ""}  # model metadata
-            model = torch.jit.load(w, _extra_files=extra_files, map_location=device)
-            model.half() if fp16 else model.float()
-            if extra_files["config.txt"]:  # load metadata dict
-                metadata = json.loads(extra_files["config.txt"], object_hook=lambda x: dict(x.items()))
-
-        # ONNX OpenCV DNN
-        elif dnn:
-            LOGGER.info(f"Loading {w} for ONNX OpenCV DNN inference...")
-            check_requirements("opencv-python>=4.5.4")
-            net = cv2.dnn.readNetFromONNX(w)
-
-        # ONNX Runtime
-        elif onnx:
-            LOGGER.info(f"Loading {w} for ONNX Runtime inference...")
-            check_requirements(("onnx", "onnxruntime-gpu" if cuda else "onnxruntime"))
-            if IS_RASPBERRYPI or IS_JETSON:
-                # Fix 'numpy.linalg._umath_linalg' has no attribute '_ilp64' for TF SavedModel on RPi and Jetson
-                check_requirements("numpy==1.23.5")
-            import onnxruntime
-
-            providers = ["CUDAExecutionProvider", "CPUExecutionProvider"] if cuda else ["CPUExecutionProvider"]
-            session = onnxruntime.InferenceSession(w, providers=providers)
-            output_names = [x.name for x in session.get_outputs()]
-            metadata = session.get_modelmeta().custom_metadata_map
-
-        # OpenVINO
-        elif xml:
-            LOGGER.info(f"Loading {w} for OpenVINO inference...")
-            check_requirements("openvino>=2024.0.0")
-            import openvino as ov
-
-            core = ov.Core()
-            w = Path(w)
-            if not w.is_file():  # if not *.xml
-                w = next(w.glob("*.xml"))  # get *.xml file from *_openvino_model dir
-            ov_model = core.read_model(model=str(w), weights=w.with_suffix(".bin"))
-            if ov_model.get_parameters()[0].get_layout().empty:
-                ov_model.get_parameters()[0].set_layout(ov.Layout("NCHW"))
-
-            # OpenVINO inference modes are 'LATENCY', 'THROUGHPUT' (not recommended), or 'CUMULATIVE_THROUGHPUT'
-            inference_mode = "CUMULATIVE_THROUGHPUT" if batch > 1 else "LATENCY"
-            LOGGER.info(f"Using OpenVINO {inference_mode} mode for batch={batch} inference...")
-            ov_compiled_model = core.compile_model(
-                ov_model,
-                device_name="AUTO",  # AUTO selects best available device, do not modify
-                config={"PERFORMANCE_HINT": inference_mode},
-            )
-            input_name = ov_compiled_model.input().get_any_name()
-            metadata = w.parent / "metadata.yaml"
-
-        # TensorRT
-        elif engine:
-            LOGGER.info(f"Loading {w} for TensorRT inference...")
-            try:
-                import tensorrt as trt  # noqa https://developer.nvidia.com/nvidia-tensorrt-download
-            except ImportError:
-                if LINUX:
-                    check_requirements("tensorrt", cmds="-U")
-                import tensorrt as trt  # noqa
-            check_version(trt.__version__, "7.0.0", hard=True)  # require tensorrt>=7.0.0
-            if device.type == "cpu":
-                device = torch.device("cuda:0")
-            Binding = namedtuple("Binding", ("name", "dtype", "shape", "data", "ptr"))
-            logger = trt.Logger(trt.Logger.INFO)
-            # Read file
-            with open(w, "rb") as f, trt.Runtime(logger) as runtime:
-                try:
-                    meta_len = int.from_bytes(f.read(4), byteorder="little")  # read metadata length
-                    metadata = json.loads(f.read(meta_len).decode("utf-8"))  # read metadata
-                except UnicodeDecodeError:
-                    f.seek(0)  # engine file may lack embedded Ultralytics metadata
-                model = runtime.deserialize_cuda_engine(f.read())  # read engine
-
-            # Model context
-            try:
-                context = model.create_execution_context()
-            except Exception as e:  # model is None
-                LOGGER.error(f"ERROR: TensorRT model exported with a different version than {trt.__version__}\n")
-                raise e
-
-            bindings = OrderedDict()
-            output_names = []
-            fp16 = False  # default updated below
-            dynamic = False
-            is_trt10 = not hasattr(model, "num_bindings")
-            num = range(model.num_io_tensors) if is_trt10 else range(model.num_bindings)
-            for i in num:
-                if is_trt10:
-                    name = model.get_tensor_name(i)
-                    dtype = trt.nptype(model.get_tensor_dtype(name))
-                    is_input = model.get_tensor_mode(name) == trt.TensorIOMode.INPUT
-                    if is_input:
-                        if -1 in tuple(model.get_tensor_shape(name)):
-                            dynamic = True
-                            context.set_input_shape(name, tuple(model.get_tensor_profile_shape(name, 0)[1]))
-                            if dtype == np.float16:
-                                fp16 = True
-                    else:
-                        output_names.append(name)
-                    shape = tuple(context.get_tensor_shape(name))
-                else:  # TensorRT < 10.0
-                    name = model.get_binding_name(i)
-                    dtype = trt.nptype(model.get_binding_dtype(i))
-                    is_input = model.binding_is_input(i)
-                    if model.binding_is_input(i):
-                        if -1 in tuple(model.get_binding_shape(i)):  # dynamic
-                            dynamic = True
-                            context.set_binding_shape(i, tuple(model.get_profile_shape(0, i)[1]))
-                        if dtype == np.float16:
-                            fp16 = True
-                    else:
-                        output_names.append(name)
-                    shape = tuple(context.get_binding_shape(i))
-                im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device)
-                bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr()))
-            binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
-            batch_size = bindings["images"].shape[0]  # if dynamic, this is instead max batch size
-
-        # CoreML
-        elif coreml:
-            LOGGER.info(f"Loading {w} for CoreML inference...")
-            import coremltools as ct
-
-            model = ct.models.MLModel(w)
-            metadata = dict(model.user_defined_metadata)
-
-        # TF SavedModel
-        elif saved_model:
-            LOGGER.info(f"Loading {w} for TensorFlow SavedModel inference...")
-            import tensorflow as tf
-
-            keras = False  # assume TF1 saved_model
-            model = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w)
-            metadata = Path(w) / "metadata.yaml"
-
-        # TF GraphDef
-        elif pb:  # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
-            LOGGER.info(f"Loading {w} for TensorFlow GraphDef inference...")
-            import tensorflow as tf
-
-            from ultralytics.engine.exporter import gd_outputs
-
-            def wrap_frozen_graph(gd, inputs, outputs):
-                """Wrap frozen graphs for deployment."""
-                x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), [])  # wrapped
-                ge = x.graph.as_graph_element
-                return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs))
-
-            gd = tf.Graph().as_graph_def()  # TF GraphDef
-            with open(w, "rb") as f:
-                gd.ParseFromString(f.read())
-            frozen_func = wrap_frozen_graph(gd, inputs="x:0", outputs=gd_outputs(gd))
-            with contextlib.suppress(StopIteration):  # find metadata in SavedModel alongside GraphDef
-                metadata = next(Path(w).resolve().parent.rglob(f"{Path(w).stem}_saved_model*/metadata.yaml"))
-
-        # TFLite or TFLite Edge TPU
-        elif tflite or edgetpu:  # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
-            try:  # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu
-                from tflite_runtime.interpreter import Interpreter, load_delegate
-            except ImportError:
-                import tensorflow as tf
-
-                Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate
-            if edgetpu:  # TF Edge TPU https://coral.ai/software/#edgetpu-runtime
-                LOGGER.info(f"Loading {w} for TensorFlow Lite Edge TPU inference...")
-                delegate = {"Linux": "libedgetpu.so.1", "Darwin": "libedgetpu.1.dylib", "Windows": "edgetpu.dll"}[
-                    platform.system()
-                ]
-                interpreter = Interpreter(model_path=w, experimental_delegates=[load_delegate(delegate)])
-            else:  # TFLite
-                LOGGER.info(f"Loading {w} for TensorFlow Lite inference...")
-                interpreter = Interpreter(model_path=w)  # load TFLite model
-            interpreter.allocate_tensors()  # allocate
-            input_details = interpreter.get_input_details()  # inputs
-            output_details = interpreter.get_output_details()  # outputs
-            # Load metadata
-            with contextlib.suppress(zipfile.BadZipFile):
-                with zipfile.ZipFile(w, "r") as model:
-                    meta_file = model.namelist()[0]
-                    metadata = ast.literal_eval(model.read(meta_file).decode("utf-8"))
-
-        # TF.js
-        elif tfjs:
-            raise NotImplementedError("YOLOv8 TF.js inference is not currently supported.")
-
-        # PaddlePaddle
-        elif paddle:
-            LOGGER.info(f"Loading {w} for PaddlePaddle inference...")
-            check_requirements("paddlepaddle-gpu" if cuda else "paddlepaddle")
-            import paddle.inference as pdi  # noqa
-
-            w = Path(w)
-            if not w.is_file():  # if not *.pdmodel
-                w = next(w.rglob("*.pdmodel"))  # get *.pdmodel file from *_paddle_model dir
-            config = pdi.Config(str(w), str(w.with_suffix(".pdiparams")))
-            if cuda:
-                config.enable_use_gpu(memory_pool_init_size_mb=2048, device_id=0)
-            predictor = pdi.create_predictor(config)
-            input_handle = predictor.get_input_handle(predictor.get_input_names()[0])
-            output_names = predictor.get_output_names()
-            metadata = w.parents[1] / "metadata.yaml"
-
-        # NCNN
-        elif ncnn:
-            LOGGER.info(f"Loading {w} for NCNN inference...")
-            check_requirements("git+https://github.com/Tencent/ncnn.git" if ARM64 else "ncnn")  # requires NCNN
-            import ncnn as pyncnn
-
-            net = pyncnn.Net()
-            net.opt.use_vulkan_compute = cuda
-            w = Path(w)
-            if not w.is_file():  # if not *.param
-                w = next(w.glob("*.param"))  # get *.param file from *_ncnn_model dir
-            net.load_param(str(w))
-            net.load_model(str(w.with_suffix(".bin")))
-            metadata = w.parent / "metadata.yaml"
-
-        # NVIDIA Triton Inference Server
-        elif triton:
-            check_requirements("tritonclient[all]")
-            from ultralytics.utils.triton import TritonRemoteModel
-
-            model = TritonRemoteModel(w)
-
-        # Any other format (unsupported)
-        else:
-            from ultralytics.engine.neuron_exporter import export_formats
-
-            raise TypeError(
-                f"model='{w}' is not a supported model format. "
-                f"See https://docs.ultralytics.com/modes/predict for help.\n\n{export_formats()}"
-            )
-
-        # Load external metadata YAML
-        if isinstance(metadata, (str, Path)) and Path(metadata).exists():
-            metadata = yaml_load(metadata)
-        if metadata and isinstance(metadata, dict):
-            for k, v in metadata.items():
-                if k in {"stride", "batch"}:
-                    metadata[k] = int(v)
-                elif k in {"imgsz", "names", "kpt_shape"} and isinstance(v, str):
-                    metadata[k] = eval(v)
-            stride = metadata["stride"]
-            task = metadata["task"]
-            batch = metadata["batch"]
-            imgsz = metadata["imgsz"]
-            names = metadata["names"]
-            kpt_shape = metadata.get("kpt_shape")
-        elif not (pt or triton or nn_module):
-            LOGGER.warning(f"WARNING ⚠️ Metadata not found for 'model={weights}'")
-
-        # Check names
-        if "names" not in locals():  # names missing
-            names = default_class_names(data)
-        names = check_class_names(names)
-
-        # Disable gradients
-        if pt:
-            for p in model.parameters():
-                p.requires_grad = False
-
-        self.__dict__.update(locals())  # assign all variables to self
-
-    def forward(self, im, augment=False, visualize=False, embed=None):
-        """
-        Runs inference on the YOLOv8 MultiBackend model.
-
-        Args:
-            im (torch.Tensor): The image tensor to perform inference on.
-            augment (bool): whether to perform data augmentation during inference, defaults to False
-            visualize (bool): whether to visualize the output predictions, defaults to False
-            embed (list, optional): A list of feature vectors/embeddings to return.
-
-        Returns:
-            (tuple): Tuple containing the raw output tensor, and processed output for visualization (if visualize=True)
-        """
-        b, ch, h, w = im.shape  # batch, channel, height, width
-        if self.fp16 and im.dtype != torch.float16:
-            im = im.half()  # to FP16
-        if self.nhwc:
-            im = im.permute(0, 2, 3, 1)  # torch BCHW to numpy BHWC shape(1,320,192,3)
-
-        # PyTorch
-        if self.pt or self.nn_module:
-            y = self.model(im, augment=augment, visualize=visualize, embed=embed)
-
-        # TorchScript
-        elif self.jit:
-            y = self.model(im)
-
-        elif self.neuronx:
-            y = self.model(im)
-        elif self.neuron:
-            y = self.model(im)
-        # ONNX OpenCV DNN
-        elif self.dnn:
-            im = im.cpu().numpy()  # torch to numpy
-            self.net.setInput(im)
-            y = self.net.forward()
-
-        # ONNX Runtime
-        elif self.onnx:
-            im = im.cpu().numpy()  # torch to numpy
-            y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})
-
-        # OpenVINO
-        elif self.xml:
-            im = im.cpu().numpy()  # FP32
-
-            if self.inference_mode in {"THROUGHPUT", "CUMULATIVE_THROUGHPUT"}:  # optimized for larger batch-sizes
-                n = im.shape[0]  # number of images in batch
-                results = [None] * n  # preallocate list with None to match the number of images
-
-                def callback(request, userdata):
-                    """Places result in preallocated list using userdata index."""
-                    results[userdata] = request.results
-
-                # Create AsyncInferQueue, set the callback and start asynchronous inference for each input image
-                async_queue = self.ov.runtime.AsyncInferQueue(self.ov_compiled_model)
-                async_queue.set_callback(callback)
-                for i in range(n):
-                    # Start async inference with userdata=i to specify the position in results list
-                    async_queue.start_async(inputs={self.input_name: im[i : i + 1]}, userdata=i)  # keep image as BCHW
-                async_queue.wait_all()  # wait for all inference requests to complete
-                y = np.concatenate([list(r.values())[0] for r in results])
-
-            else:  # inference_mode = "LATENCY", optimized for fastest first result at batch-size 1
-                y = list(self.ov_compiled_model(im).values())
-
-        # TensorRT
-        elif self.engine:
-            if self.dynamic or im.shape != self.bindings["images"].shape:
-                if self.is_trt10:
-                    self.context.set_input_shape("images", im.shape)
-                    self.bindings["images"] = self.bindings["images"]._replace(shape=im.shape)
-                    for name in self.output_names:
-                        self.bindings[name].data.resize_(tuple(self.context.get_tensor_shape(name)))
-                else:
-                    i = self.model.get_binding_index("images")
-                    self.context.set_binding_shape(i, im.shape)
-                    self.bindings["images"] = self.bindings["images"]._replace(shape=im.shape)
-                    for name in self.output_names:
-                        i = self.model.get_binding_index(name)
-                        self.bindings[name].data.resize_(tuple(self.context.get_binding_shape(i)))
-
-            s = self.bindings["images"].shape
-            assert im.shape == s, f"input size {im.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}"
-            self.binding_addrs["images"] = int(im.data_ptr())
-            self.context.execute_v2(list(self.binding_addrs.values()))
-            y = [self.bindings[x].data for x in sorted(self.output_names)]
-
-        # CoreML
-        elif self.coreml:
-            im = im[0].cpu().numpy()
-            im_pil = Image.fromarray((im * 255).astype("uint8"))
-            # im = im.resize((192, 320), Image.BILINEAR)
-            y = self.model.predict({"image": im_pil})  # coordinates are xywh normalized
-            if "confidence" in y:
-                raise TypeError(
-                    "Ultralytics only supports inference of non-pipelined CoreML models exported with "
-                    f"'nms=False', but 'model={w}' has an NMS pipeline created by an 'nms=True' export."
-                )
-                # TODO: CoreML NMS inference handling
-                # from ultralytics.utils.ops import xywh2xyxy
-                # box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]])  # xyxy pixels
-                # conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float32)
-                # y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
-            elif len(y) == 1:  # classification model
-                y = list(y.values())
-            elif len(y) == 2:  # segmentation model
-                y = list(reversed(y.values()))  # reversed for segmentation models (pred, proto)
-
-        # PaddlePaddle
-        elif self.paddle:
-            im = im.cpu().numpy().astype(np.float32)
-            self.input_handle.copy_from_cpu(im)
-            self.predictor.run()
-            y = [self.predictor.get_output_handle(x).copy_to_cpu() for x in self.output_names]
-
-        # NCNN
-        elif self.ncnn:
-            mat_in = self.pyncnn.Mat(im[0].cpu().numpy())
-            with self.net.create_extractor() as ex:
-                ex.input(self.net.input_names()[0], mat_in)
-                # WARNING: 'output_names' sorted as a temporary fix for https://github.com/pnnx/pnnx/issues/130
-                y = [np.array(ex.extract(x)[1])[None] for x in sorted(self.net.output_names())]
-
-        # NVIDIA Triton Inference Server
-        elif self.triton:
-            im = im.cpu().numpy()  # torch to numpy
-            y = self.model(im)
-
-        # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)
-        else:
-            im = im.cpu().numpy()
-            if self.saved_model:  # SavedModel
-                y = self.model(im, training=False) if self.keras else self.model(im)
-                if not isinstance(y, list):
-                    y = [y]
-            elif self.pb:  # GraphDef
-                y = self.frozen_func(x=self.tf.constant(im))
-                if (self.task == "segment" or len(y) == 2) and len(self.names) == 999:  # segments and names not defined
-                    ip, ib = (0, 1) if len(y[0].shape) == 4 else (1, 0)  # index of protos, boxes
-                    nc = y[ib].shape[1] - y[ip].shape[3] - 4  # y = (1, 160, 160, 32), (1, 116, 8400)
-                    self.names = {i: f"class{i}" for i in range(nc)}
-            else:  # Lite or Edge TPU
-                details = self.input_details[0]
-                is_int = details["dtype"] in {np.int8, np.int16}  # is TFLite quantized int8 or int16 model
-                if is_int:
-                    scale, zero_point = details["quantization"]
-                    im = (im / scale + zero_point).astype(details["dtype"])  # de-scale
-                self.interpreter.set_tensor(details["index"], im)
-                self.interpreter.invoke()
-                y = []
-                for output in self.output_details:
-                    x = self.interpreter.get_tensor(output["index"])
-                    if is_int:
-                        scale, zero_point = output["quantization"]
-                        x = (x.astype(np.float32) - zero_point) * scale  # re-scale
-                    if x.ndim == 3:  # if task is not classification, excluding masks (ndim=4) as well
-                        # Denormalize xywh by image size. See https://github.com/ultralytics/ultralytics/pull/1695
-                        # xywh are normalized in TFLite/EdgeTPU to mitigate quantization error of integer models
-                        x[:, [0, 2]] *= w
-                        x[:, [1, 3]] *= h
-                    y.append(x)
-            # TF segment fixes: export is reversed vs ONNX export and protos are transposed
-            if len(y) == 2:  # segment with (det, proto) output order reversed
-                if len(y[1].shape) != 4:
-                    y = list(reversed(y))  # should be y = (1, 116, 8400), (1, 160, 160, 32)
-                y[1] = np.transpose(y[1], (0, 3, 1, 2))  # should be y = (1, 116, 8400), (1, 32, 160, 160)
-            y = [x if isinstance(x, np.ndarray) else x.numpy() for x in y]
-
-        # for x in y:
-        #     print(type(x), len(x)) if isinstance(x, (list, tuple)) else print(type(x), x.shape)  # debug shapes
-        if isinstance(y, (list, tuple)):
-            return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y]
-        else:
-            return self.from_numpy(y)
-
-    def from_numpy(self, x):
-        """
-        Convert a numpy array to a tensor.
-
-        Args:
-            x (np.ndarray): The array to be converted.
-
-        Returns:
-            (torch.Tensor): The converted tensor
-        """
-        return torch.tensor(x).to(self.device) if isinstance(x, np.ndarray) else x
-
-    def warmup(self, imgsz=(1, 3, 640, 640)):
-        """
-        Warm up the model by running one forward pass with a dummy input.
-
-        Args:
-            imgsz (tuple): The shape of the dummy input tensor in the format (batch_size, channels, height, width)
-        """
-        import torchvision  # noqa (import here so torchvision import time not recorded in postprocess time)
-
-        warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton, self.nn_module
-        if any(warmup_types) and (self.device.type != "cpu" or self.triton):
-            im = torch.empty(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device)  # input
-            for _ in range(2 if self.jit else 1):
-                self.forward(im)  # warmup
-
-    @staticmethod
-    def _model_type(p="path/to/model.pt"):
-        """
-        This function takes a path to a model file and returns the model type. Possibles types are pt, jit, onnx, xml,
-        engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, ncnn or paddle.
-
-        Args:
-            p: path to the model file. Defaults to path/to/model.pt
-
-        Examples:
-            >>> model = AutoBackend(weights="path/to/model.onnx")
-            >>> model_type = model._model_type()  # returns "onnx"
-        """
-        from ultralytics.engine.neuron_exporter import export_formats
-
-        sf = list(export_formats().Suffix)  # export suffixes
-        if not is_url(p) and not isinstance(p, str):
-            check_suffix(p, sf)  # checks
-        name = Path(p).name
-        types = [s in name for s in sf]
-        types[5] |= name.endswith(".mlmodel")  # retain support for older Apple CoreML *.mlmodel formats
-        types[8] &= not types[9]  # tflite &= not edgetpu
-        if any(types):
-            triton = False
-        else:
-            from urllib.parse import urlsplit
-
-            url = urlsplit(p)
-            triton = bool(url.netloc) and bool(url.path) and url.scheme in {"http", "grpc"}
-
-        return types + [triton]