From def73de9419ec2a7806e28359acead96319bf44d Mon Sep 17 00:00:00 2001 From: takipipo Date: Thu, 19 Dec 2024 21:35:10 +0700 Subject: [PATCH 1/2] refactor: remove code duplication but still keeping neuron_*.py --- ultralytics/engine/exporter.py | 34 ++++++++++++++++++++++++++++++++++ ultralytics/nn/autobackend.py | 23 +++++++++++++++++++++++ 2 files changed, 57 insertions(+) diff --git a/ultralytics/engine/exporter.py b/ultralytics/engine/exporter.py index ba329372d54..5227470a3c9 100644 --- a/ultralytics/engine/exporter.py +++ b/ultralytics/engine/exporter.py @@ -19,6 +19,8 @@ MNN | `mnn` | yolo11n.mnn NCNN | `ncnn` | yolo11n_ncnn_model/ IMX | `imx` | yolo11n_imx_model/ +AWS NeuronX | `neuronx` | yolo11n.neuronx +AWS Neuron | `neuron` | yolo11n.neuron Requirements: $ pip install "ultralytics[export]" @@ -116,6 +118,8 @@ def export_formats(): ["MNN", "mnn", ".mnn", True, True], ["NCNN", "ncnn", "_ncnn_model", True, True], ["IMX", "imx", "_imx_model", True, True], + ["AWS NeuronX", "neuronx", ".neuronx", True, True], + ["AWS Neuron", "neuron", ".neuron", True, True], ] return dict(zip(["Format", "Argument", "Suffix", "CPU", "GPU"], zip(*x))) @@ -210,6 +214,8 @@ def __call__(self, model=None) -> str: mnn, ncnn, imx, + neuronx, + neuron, ) = flags # export booleans is_tf_format = any((saved_model, pb, tflite, edgetpu, tfjs)) @@ -382,6 +388,10 @@ def __call__(self, model=None) -> str: f[12], _ = self.export_ncnn() if imx: f[13], _ = self.export_imx() + if neuronx: # NeuronX + f[14], _ = self.export_neuronx() + if neuron: # Neuron + f[15], _ = self.export_neuron() # Finish f = [str(x) for x in f if x] # filter out '' and None @@ -1232,6 +1242,30 @@ def forward(self, images): file.writelines([f"{name}\n" for _, name in self.model.names.items()]) return f, None + + @try_export + def export_neuronx(self, prefix=colorstr("AWS NeuronX:")): + import torch_neuronx + + """YOLOv8 NeuronX model export.""" + LOGGER.info(f"\n{prefix} starting export with torch {torch_neuronx.__version__}...") + f = self.file.with_suffix(".neuronx") + ts = torch_neuronx.trace(self.model, self.im, strict=False) + extra_files = {"config.txt": json.dumps(self.metadata)} # torch._C.ExtraFilesMap() + ts.save(str(f), _extra_files=extra_files) + return f, None + + @try_export + def export_neuron(self, prefix=colorstr("AWS Neuron:")): + import torch_neuron + + """YOLOv8 Neuron model export.""" + LOGGER.info(f"\n{prefix} starting export with torch {torch_neuron.__version__}...") + f = self.file.with_suffix(".neuron") + ts = torch_neuron.trace(self.model, self.im, strict=False) + extra_files = {"config.txt": json.dumps(self.metadata)} + ts.save(str(f), _extra_files=extra_files) + return f, None def _add_tflite_metadata(self, file): """Add metadata to *.tflite models per https://www.tensorflow.org/lite/models/convert/metadata.""" diff --git a/ultralytics/nn/autobackend.py b/ultralytics/nn/autobackend.py index b6df3753ec3..6297b4a5721 100644 --- a/ultralytics/nn/autobackend.py +++ b/ultralytics/nn/autobackend.py @@ -75,6 +75,8 @@ class AutoBackend(nn.Module): | PaddlePaddle | *_paddle_model/ | | MNN | *.mnn | | NCNN | *_ncnn_model/ | + | AWS NeuronX | *.neuronx | + | AWS Neuron | *.neuron | This class offers dynamic backend switching capabilities based on the input model format, making it easier to deploy models across various platforms. @@ -125,6 +127,8 @@ def __init__( ncnn, imx, triton, + neuronx, + neuron, ) = self._model_type(w) fp16 &= pt or jit or onnx or xml or engine or nn_module or triton # FP16 nhwc = coreml or saved_model or pb or tflite or edgetpu # BHWC formats (vs torch BCWH) @@ -176,7 +180,26 @@ def __init__( model.half() if fp16 else model.float() if extra_files["config.txt"]: # load metadata dict metadata = json.loads(extra_files["config.txt"], object_hook=lambda x: dict(x.items())) + # NeuronX + elif neuronx: + import torch_neuronx + LOGGER.info(f"Loading {w} for NeuronX version {torch_neuronx.__version__} inference... ") + extra_files = {"config.txt": ""} # model metadata + model = torch.jit.load(w, _extra_files=extra_files, map_location=device) + model.half() if fp16 else model.float() + if extra_files["config.txt"]: # load metadata dict + metadata = json.loads(extra_files["config.txt"], object_hook=lambda x: dict(x.items())) + ## Neuron + elif neuron: + import torch_neuron + + LOGGER.info(f"Loading {w} for Neuron version {torch_neuron.__version__} inference... ") + extra_files = {"config.txt": ""} # model metadata + model = torch.jit.load(w, _extra_files=extra_files, map_location=device) + model.half() if fp16 else model.float() + if extra_files["config.txt"]: # load metadata dict + metadata = json.loads(extra_files["config.txt"], object_hook=lambda x: dict(x.items())) # ONNX OpenCV DNN elif dnn: LOGGER.info(f"Loading {w} for ONNX OpenCV DNN inference...") From a7de722a62e4921bddeca2ecf4477944fa851b9d Mon Sep 17 00:00:00 2001 From: "nirattisai.t" Date: Fri, 3 Jan 2025 11:11:47 +0700 Subject: [PATCH 2/2] refactor: remove neuron related files and imports --- ultralytics/__init__.py | 3 +- ultralytics/engine/neuron_exporter.py | 368 ---------- ultralytics/engine/neuron_model.py | 55 -- ultralytics/engine/neuron_predictor.py | 25 - ultralytics/models/__init__.py | 4 +- ultralytics/models/yolo/__init__.py | 3 +- ultralytics/models/yolo/detect/__init__.py | 2 - .../models/yolo/detect/neuron_predict.py | 22 - ultralytics/models/yolo/neuron_model.py | 16 - ultralytics/nn/neuron_autobackend.py | 689 ------------------ 10 files changed, 4 insertions(+), 1183 deletions(-) delete mode 100644 ultralytics/engine/neuron_exporter.py delete mode 100644 ultralytics/engine/neuron_model.py delete mode 100644 ultralytics/engine/neuron_predictor.py delete mode 100644 ultralytics/models/yolo/detect/neuron_predict.py delete mode 100644 ultralytics/models/yolo/neuron_model.py delete mode 100644 ultralytics/nn/neuron_autobackend.py diff --git a/ultralytics/__init__.py b/ultralytics/__init__.py index ee0aeb12bdc..7d863cf84ff 100644 --- a/ultralytics/__init__.py +++ b/ultralytics/__init__.py @@ -8,7 +8,7 @@ if not os.environ.get("OMP_NUM_THREADS"): os.environ["OMP_NUM_THREADS"] = "1" # default for reduced CPU utilization during training -from ultralytics.models import NAS, RTDETR, SAM, YOLO, FastSAM, NeuronYOLO, YOLOWorld +from ultralytics.models import NAS, RTDETR, SAM, YOLO, FastSAM, YOLOWorld from ultralytics.utils import ASSETS, SETTINGS from ultralytics.utils.checks import check_yolo as checks from ultralytics.utils.downloads import download @@ -27,5 +27,4 @@ "download", "settings", "Explorer", - "NeuronYOLO", ) diff --git a/ultralytics/engine/neuron_exporter.py b/ultralytics/engine/neuron_exporter.py deleted file mode 100644 index c7208fa2fc4..00000000000 --- a/ultralytics/engine/neuron_exporter.py +++ /dev/null @@ -1,368 +0,0 @@ -# Ultralytics YOLO 🚀, AGPL-3.0 license -""" -Export a YOLOv8 PyTorch model to other formats. TensorFlow exports authored by https://github.com/zldrobit. - -Format | `format=argument` | Model ---- | --- | --- -PyTorch | - | yolov8n.pt -TorchScript | `torchscript` | yolov8n.torchscript -AWS NeuronX | `neuronx` | yolov8n.neuronx -ONNX | `onnx` | yolov8n.onnx -OpenVINO | `openvino` | yolov8n_openvino_model/ -TensorRT | `engine` | yolov8n.engine -CoreML | `coreml` | yolov8n.mlpackage -TensorFlow SavedModel | `saved_model` | yolov8n_saved_model/ -TensorFlow GraphDef | `pb` | yolov8n.pb -TensorFlow Lite | `tflite` | yolov8n.tflite -TensorFlow Edge TPU | `edgetpu` | yolov8n_edgetpu.tflite -TensorFlow.js | `tfjs` | yolov8n_web_model/ -PaddlePaddle | `paddle` | yolov8n_paddle_model/ -NCNN | `ncnn` | yolov8n_ncnn_model/ - -Requirements: - $ pip install "ultralytics[export]" - -Python: - from ultralytics import YOLO - model = YOLO('yolov8n.pt') - results = model.export(format='onnx') - -CLI: - $ yolo mode=export model=yolov8n.pt format=onnx - -Inference: - $ yolo predict model=yolov8n.pt # PyTorch - yolov8n.torchscript # TorchScript - yolov8.neuronx # AWS NeuronX - yolov8n.onnx # ONNX Runtime or OpenCV DNN with dnn=True - yolov8n_openvino_model # OpenVINO - yolov8n.engine # TensorRT - yolov8n.mlpackage # CoreML (macOS-only) - yolov8n_saved_model # TensorFlow SavedModel - yolov8n.pb # TensorFlow GraphDef - yolov8n.tflite # TensorFlow Lite - yolov8n_edgetpu.tflite # TensorFlow Edge TPU - yolov8n_paddle_model # PaddlePaddle - yolov8n_ncnn_model # NCNN - -TensorFlow.js: - $ cd .. && git clone https://github.com/zldrobit/tfjs-yolov5-example.git && cd tfjs-yolov5-example - $ npm install - $ ln -s ../../yolov5/yolov8n_web_model public/yolov8n_web_model - $ npm start -""" - -import json -import time -import warnings -from copy import deepcopy -from datetime import datetime -from pathlib import Path - -import torch - -from ultralytics.cfg import TASK2DATA -from ultralytics.engine.exporter import Exporter -from ultralytics.nn.autobackend import check_class_names, default_class_names -from ultralytics.nn.modules import C2f, Detect, RTDETRDecoder -from ultralytics.nn.tasks import WorldModel -from ultralytics.utils import ( - DEFAULT_CFG, - LINUX, - LOGGER, - __version__, - colorstr, - get_default_args, -) -from ultralytics.utils.checks import check_imgsz -from ultralytics.utils.files import file_size -from ultralytics.utils.ops import Profile -from ultralytics.utils.torch_utils import select_device, smart_inference_mode - - -def export_formats(): - """YOLOv8 export formats.""" - import pandas # scope for faster 'import ultralytics' - - x = [ - ["PyTorch", "-", ".pt", True, True], - ["TorchScript", "torchscript", ".torchscript", True, True], - ["AWS NeuronX", "neuronx", ".neuronx", True, True], - ["AWS Neuron", "neuron", ".neuron", True, True], - ["ONNX", "onnx", ".onnx", True, True], - ["OpenVINO", "openvino", "_openvino_model", True, False], - ["TensorRT", "engine", ".engine", False, True], - ["CoreML", "coreml", ".mlpackage", True, False], - ["TensorFlow SavedModel", "saved_model", "_saved_model", True, True], - ["TensorFlow GraphDef", "pb", ".pb", True, True], - ["TensorFlow Lite", "tflite", ".tflite", True, False], - ["TensorFlow Edge TPU", "edgetpu", "_edgetpu.tflite", True, False], - ["TensorFlow.js", "tfjs", "_web_model", True, False], - ["PaddlePaddle", "paddle", "_paddle_model", True, True], - ["NCNN", "ncnn", "_ncnn_model", True, True], - ] - return pandas.DataFrame(x, columns=["Format", "Argument", "Suffix", "CPU", "GPU"]) - - -def gd_outputs(gd): - """TensorFlow GraphDef model output node names.""" - name_list, input_list = [], [] - for node in gd.node: # tensorflow.core.framework.node_def_pb2.NodeDef - name_list.append(node.name) - input_list.extend(node.input) - return sorted(f"{x}:0" for x in list(set(name_list) - set(input_list)) if not x.startswith("NoOp")) - - -def try_export(inner_func): - """YOLOv8 export decorator, i.e. @try_export.""" - inner_args = get_default_args(inner_func) - - def outer_func(*args, **kwargs): - """Export a model.""" - prefix = inner_args["prefix"] - try: - with Profile() as dt: - f, model = inner_func(*args, **kwargs) - LOGGER.info(f"{prefix} export success ✅ {dt.t:.1f}s, saved as '{f}' ({file_size(f):.1f} MB)") - return f, model - except Exception as e: - LOGGER.info(f"{prefix} export failure ❌ {dt.t:.1f}s: {e}") - raise e - - return outer_func - - -class NeuronExporter(Exporter): - """ - A class for exporting a model. - - Attributes: - args (SimpleNamespace): Configuration for the exporter. - callbacks (list, optional): List of callback functions. Defaults to None. - """ - - def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None): - super().__init__(cfg, overrides, _callbacks) - - @smart_inference_mode() - def __call__(self, model=None) -> str: - """Returns list of exported files/dirs after running callbacks.""" - self.run_callbacks("on_export_start") - t = time.time() - fmt = self.args.format.lower() # to lowercase - if fmt in {"tensorrt", "trt"}: # 'engine' aliases - fmt = "engine" - if fmt in {"mlmodel", "mlpackage", "mlprogram", "apple", "ios", "coreml"}: # 'coreml' aliases - fmt = "coreml" - fmts = tuple(export_formats()["Argument"][1:]) # available export formats - flags = [x == fmt for x in fmts] - if sum(flags) != 1: - raise ValueError(f"Invalid export format='{fmt}'. Valid formats are {fmts}") - ( - jit, - neuronx, - neuron, - onnx, - xml, - engine, - coreml, - saved_model, - pb, - tflite, - edgetpu, - tfjs, - paddle, - ncnn, - ) = flags # export booleans - is_tf_format = any((saved_model, pb, tflite, edgetpu, tfjs)) - - # Device - if fmt == "engine" and self.args.device is None: - LOGGER.warning("WARNING ⚠️ TensorRT requires GPU export, automatically assigning device=0") - self.args.device = "0" - self.device = select_device("cpu" if self.args.device is None else self.args.device) - - # Checks - if not hasattr(model, "names"): - model.names = default_class_names() - model.names = check_class_names(model.names) - if self.args.half and self.args.int8: - LOGGER.warning("WARNING ⚠️ half=True and int8=True are mutually exclusive, setting half=False.") - self.args.half = False - if self.args.half and onnx and self.device.type == "cpu": - LOGGER.warning("WARNING ⚠️ half=True only compatible with GPU export, i.e. use device=0") - self.args.half = False - assert not self.args.dynamic, "half=True not compatible with dynamic=True, i.e. use only one." - self.imgsz = check_imgsz(self.args.imgsz, stride=model.stride, min_dim=2) # check image size - if self.args.int8 and (engine or xml): - self.args.dynamic = True # enforce dynamic to export TensorRT INT8; ensures ONNX is dynamic - if self.args.optimize: - assert not ncnn, "optimize=True not compatible with format='ncnn', i.e. use optimize=False" - assert self.device.type == "cpu", "optimize=True not compatible with cuda devices, i.e. use device='cpu'" - if edgetpu: - if not LINUX: - raise SystemError("Edge TPU export only supported on Linux. See https://coral.ai/docs/edgetpu/compiler") - elif self.args.batch != 1: # see github.com/ultralytics/ultralytics/pull/13420 - LOGGER.warning("WARNING ⚠️ Edge TPU export requires batch size 1, setting batch=1.") - self.args.batch = 1 - if isinstance(model, WorldModel): - LOGGER.warning( - "WARNING ⚠️ YOLOWorld (original version) export is not supported to any format.\n" - "WARNING ⚠️ YOLOWorldv2 models (i.e. 'yolov8s-worldv2.pt') only support export to " - "(torchscript, onnx, openvino, engine, coreml) formats. " - "See https://docs.ultralytics.com/models/yolo-world for details." - ) - if self.args.int8 and not self.args.data: - self.args.data = DEFAULT_CFG.data or TASK2DATA[getattr(model, "task", "detect")] # assign default data - LOGGER.warning( - "WARNING ⚠️ INT8 export requires a missing 'data' arg for calibration. " - f"Using default 'data={self.args.data}'." - ) - # Input - im = torch.zeros(self.args.batch, 3, *self.imgsz).to(self.device) - file = Path( - getattr(model, "pt_path", None) or getattr(model, "yaml_file", None) or model.yaml.get("yaml_file", "") - ) - if file.suffix in {".yaml", ".yml"}: - file = Path(file.name) - - # Update model - model = deepcopy(model).to(self.device) - for p in model.parameters(): - p.requires_grad = False - model.eval() - model.float() - model = model.fuse() - for m in model.modules(): - if isinstance(m, (Detect, RTDETRDecoder)): # includes all Detect subclasses like Segment, Pose, OBB - m.dynamic = self.args.dynamic - m.export = True - m.format = self.args.format - elif isinstance(m, C2f) and not is_tf_format: - # EdgeTPU does not support FlexSplitV while split provides cleaner ONNX graph - m.forward = m.forward_split - - y = None - for _ in range(2): - y = model(im) # dry runs - if self.args.half and onnx and self.device.type != "cpu": - im, model = im.half(), model.half() # to FP16 - - # Filter warnings - warnings.filterwarnings("ignore", category=torch.jit.TracerWarning) # suppress TracerWarning - warnings.filterwarnings("ignore", category=UserWarning) # suppress shape prim::Constant missing ONNX warning - warnings.filterwarnings("ignore", category=DeprecationWarning) # suppress CoreML np.bool deprecation warning - - # Assign - self.im = im - self.model = model - self.file = file - self.output_shape = ( - tuple(y.shape) - if isinstance(y, torch.Tensor) - else tuple(tuple(x.shape if isinstance(x, torch.Tensor) else []) for x in y) - ) - self.pretty_name = Path(self.model.yaml.get("yaml_file", self.file)).stem.replace("yolo", "YOLO") - data = model.args["data"] if hasattr(model, "args") and isinstance(model.args, dict) else "" - description = f'Ultralytics {self.pretty_name} model {f"trained on {data}" if data else ""}' - self.metadata = { - "description": description, - "author": "Ultralytics", - "date": datetime.now().isoformat(), - "version": __version__, - "license": "AGPL-3.0 License (https://ultralytics.com/license)", - "docs": "https://docs.ultralytics.com", - "stride": int(max(model.stride)), - "task": model.task, - "batch": self.args.batch, - "imgsz": self.imgsz, - "names": model.names, - } # model metadata - if model.task == "pose": - self.metadata["kpt_shape"] = model.model[-1].kpt_shape - - LOGGER.info( - f"\n{colorstr('PyTorch:')} starting from '{file}' with input shape {tuple(im.shape)} BCHW and " - f'output shape(s) {self.output_shape} ({file_size(file):.1f} MB)' - ) - - # Exports - f = [""] * len(fmts) # exported filenames - if jit or ncnn: # TorchScript - f[0], _ = self.export_torchscript() - if engine: # TensorRT required before ONNX - f[1], _ = self.export_engine() - if onnx: # ONNX - f[2], _ = self.export_onnx() - if xml: # OpenVINO - f[3], _ = self.export_openvino() - if coreml: # CoreML - f[4], _ = self.export_coreml() - if is_tf_format: # TensorFlow formats - self.args.int8 |= edgetpu - f[5], keras_model = self.export_saved_model() - if pb or tfjs: # pb prerequisite to tfjs - f[6], _ = self.export_pb(keras_model=keras_model) - if tflite: - f[7], _ = self.export_tflite(keras_model=keras_model, nms=False, agnostic_nms=self.args.agnostic_nms) - if edgetpu: - f[8], _ = self.export_edgetpu(tflite_model=Path(f[5]) / f"{self.file.stem}_full_integer_quant.tflite") - if tfjs: - f[9], _ = self.export_tfjs() - if paddle: # PaddlePaddle - f[10], _ = self.export_paddle() - if ncnn: # NCNN - f[11], _ = self.export_ncnn() - if neuronx: # NeuronX - f[12], _ = self.export_neuronx() - if neuron: # Neuron - f[13], _ = self.export_neuron() - - # Finish - f = [str(x) for x in f if x] # filter out '' and None - if any(f): - f = str(Path(f[-1])) - square = self.imgsz[0] == self.imgsz[1] - s = ( - "" - if square - else f"WARNING ⚠️ non-PyTorch val requires square images, 'imgsz={self.imgsz}' will not " - f"work. Use export 'imgsz={max(self.imgsz)}' if val is required." - ) - imgsz = self.imgsz[0] if square else str(self.imgsz)[1:-1].replace(" ", "") - predict_data = f"data={data}" if model.task == "segment" and fmt == "pb" else "" - q = "int8" if self.args.int8 else "half" if self.args.half else "" # quantization - LOGGER.info( - f'\nExport complete ({time.time() - t:.1f}s)' - f"\nResults saved to {colorstr('bold', file.parent.resolve())}" - f'\nPredict: yolo predict task={model.task} model={f} imgsz={imgsz} {q} {predict_data}' - f'\nValidate: yolo val task={model.task} model={f} imgsz={imgsz} data={data} {q} {s}' - f'\nVisualize: https://netron.app' - ) - - self.run_callbacks("on_export_end") - return f # return list of exported files/dirs - - @try_export - def export_neuronx(self, prefix=colorstr("AWS NeuronX:")): - import torch_neuronx - - """YOLOv8 NeuronX model export.""" - LOGGER.info(f"\n{prefix} starting export with torch {torch_neuronx.__version__}...") - f = self.file.with_suffix(".neuronx") - ts = torch_neuronx.trace(self.model, self.im, strict=False) - extra_files = {"config.txt": json.dumps(self.metadata)} # torch._C.ExtraFilesMap() - ts.save(str(f), _extra_files=extra_files) - return f, None - - @try_export - def export_neuron(self, prefix=colorstr("AWS Neuron:")): - import torch_neuron - - """YOLOv8 Neuron model export.""" - LOGGER.info(f"\n{prefix} starting export with torch {torch_neuron.__version__}...") - f = self.file.with_suffix(".neuron") - ts = torch_neuron.trace(self.model, self.im, strict=False) - extra_files = {"config.txt": json.dumps(self.metadata)} - ts.save(str(f), _extra_files=extra_files) - return f, None diff --git a/ultralytics/engine/neuron_model.py b/ultralytics/engine/neuron_model.py deleted file mode 100644 index 4506c401dfb..00000000000 --- a/ultralytics/engine/neuron_model.py +++ /dev/null @@ -1,55 +0,0 @@ -from pathlib import Path -from typing import Union - -from ultralytics.engine.model import Model - - -class NeuronModel(Model): - def __init__( - self, - model: Union[str, Path] = "yolov8n.pt", - task: str = None, - verbose: bool = False, - ) -> None: - super().__init__(model, task, verbose) - - def export( - self, - **kwargs, - ) -> str: - """ - Exports the model to a different format suitable for deployment. - - This method facilitates the export of the model to various formats (e.g., ONNX, TorchScript) for deployment - purposes. It uses the 'Exporter' class for the export process, combining model-specific overrides, method - defaults, and any additional arguments provided. The combined arguments are used to configure export settings. - - The method supports a wide range of arguments to customize the export process. For a comprehensive list of all - possible arguments, refer to the 'configuration' section in the documentation. - - Args: - **kwargs (any): Arbitrary keyword arguments to customize the export process. These are combined with the - model's overrides and method defaults. - - Returns: - (str): The exported model filename in the specified format, or an object related to the export process. - - Raises: - AssertionError: If the model is not a PyTorch model. - """ - self._check_is_pytorch_model() - from .neuron_exporter import NeuronExporter - - custom = { - "imgsz": self.model.args["imgsz"], - "batch": 1, - "data": None, - "verbose": False, - } # method defaults - args = { - **self.overrides, - **custom, - **kwargs, - "mode": "export", - } # highest priority args on the right - return NeuronExporter(overrides=args, _callbacks=self.callbacks)(model=self.model) diff --git a/ultralytics/engine/neuron_predictor.py b/ultralytics/engine/neuron_predictor.py deleted file mode 100644 index eb28beb4f12..00000000000 --- a/ultralytics/engine/neuron_predictor.py +++ /dev/null @@ -1,25 +0,0 @@ -from ultralytics.engine.predictor import BasePredictor -from ultralytics.nn.neuron_autobackend import NeuronAutoBackend -from ultralytics.utils.torch_utils import select_device - - -class NeuronPredictor(BasePredictor): - def __init__(self, cfg=..., overrides=None, _callbacks=None): - super().__init__(cfg, overrides, _callbacks) - - def setup_model(self, model, verbose=True): - """Initialize YOLO model with given parameters and set it to evaluation mode.""" - self.model = NeuronAutoBackend( - weights=model or self.args.model, - device=select_device(self.args.device, verbose=verbose), - dnn=self.args.dnn, - data=self.args.data, - fp16=self.args.half, - batch=self.args.batch, - fuse=True, - verbose=verbose, - ) - - self.device = self.model.device # update device - self.args.half = self.model.fp16 # update half - self.model.eval() diff --git a/ultralytics/models/__init__.py b/ultralytics/models/__init__.py index 2a98f3cea9f..32bb0fca725 100644 --- a/ultralytics/models/__init__.py +++ b/ultralytics/models/__init__.py @@ -4,7 +4,7 @@ from .nas import NAS from .rtdetr import RTDETR from .sam import SAM -from .yolo import YOLO, NeuronYOLO, YOLOWorld +from .yolo import YOLO, YOLOWorld __all__ = ( "YOLO", @@ -12,5 +12,5 @@ "SAM", "FastSAM", "NAS", - "YOLOWorld, NeuronYOLO", + "YOLOWorld", ) # allow simpler import diff --git a/ultralytics/models/yolo/__init__.py b/ultralytics/models/yolo/__init__.py index 29a96fcfd32..8d9aedfecb8 100644 --- a/ultralytics/models/yolo/__init__.py +++ b/ultralytics/models/yolo/__init__.py @@ -3,6 +3,5 @@ from ultralytics.models.yolo import classify, detect, obb, pose, segment, world from .model import YOLO, YOLOWorld -from .neuron_model import NeuronYOLO -__all__ = "classify", "segment", "detect", "pose", "obb", "world", "YOLO", "YOLOWorld", "NeuronYOLO" +__all__ = "classify", "segment", "detect", "pose", "obb", "world", "YOLO", "YOLOWorld" diff --git a/ultralytics/models/yolo/detect/__init__.py b/ultralytics/models/yolo/detect/__init__.py index bde72324e38..e499d6a7321 100644 --- a/ultralytics/models/yolo/detect/__init__.py +++ b/ultralytics/models/yolo/detect/__init__.py @@ -1,6 +1,5 @@ # Ultralytics YOLO 🚀, AGPL-3.0 license -from .neuron_predict import NeuronDetectionPredictor from .predict import DetectionPredictor from .train import DetectionTrainer from .val import DetectionValidator @@ -9,5 +8,4 @@ "DetectionPredictor", "DetectionTrainer", "DetectionValidator", - "NeuronDetectionPredictor", ) diff --git a/ultralytics/models/yolo/detect/neuron_predict.py b/ultralytics/models/yolo/detect/neuron_predict.py deleted file mode 100644 index 9ee6c8093d8..00000000000 --- a/ultralytics/models/yolo/detect/neuron_predict.py +++ /dev/null @@ -1,22 +0,0 @@ -from ultralytics.models.yolo.detect.predict import DetectionPredictor -from ultralytics.nn.neuron_autobackend import NeuronAutoBackend -from ultralytics.utils.torch_utils import select_device - - -class NeuronDetectionPredictor(DetectionPredictor): - def setup_model(self, model, verbose=True): - """Initialize YOLO model with given parameters and set it to evaluation mode.""" - self.model = NeuronAutoBackend( - weights=model or self.args.model, - device=select_device(self.args.device, verbose=verbose), - dnn=self.args.dnn, - data=self.args.data, - fp16=self.args.half, - batch=self.args.batch, - fuse=True, - verbose=verbose, - ) - - self.device = self.model.device # update device - self.args.half = self.model.fp16 # update half - self.model.eval() diff --git a/ultralytics/models/yolo/neuron_model.py b/ultralytics/models/yolo/neuron_model.py deleted file mode 100644 index 4a67e085e4e..00000000000 --- a/ultralytics/models/yolo/neuron_model.py +++ /dev/null @@ -1,16 +0,0 @@ -from ultralytics.engine.neuron_model import NeuronModel -from ultralytics.models import yolo - - -class NeuronYOLO(NeuronModel): - def __init__(self, model="yolov8n.pt", task=None, verbose=False): - super().__init__(model, task, verbose) - - @property - def task_map(self): - """Map head to model, trainer, validator, and predictor classes.""" - return { - "detect": { - "predictor": yolo.detect.NeuronDetectionPredictor, - }, - } diff --git a/ultralytics/nn/neuron_autobackend.py b/ultralytics/nn/neuron_autobackend.py deleted file mode 100644 index 719160a25bb..00000000000 --- a/ultralytics/nn/neuron_autobackend.py +++ /dev/null @@ -1,689 +0,0 @@ -# Ultralytics YOLO 🚀, AGPL-3.0 license - -import ast -import contextlib -import json -import platform -import zipfile -from collections import OrderedDict, namedtuple -from pathlib import Path - -import cv2 -import numpy as np -import torch -import torch.nn as nn -from PIL import Image - -from ultralytics.utils import ARM64, IS_JETSON, IS_RASPBERRYPI, LINUX, LOGGER, ROOT, yaml_load -from ultralytics.utils.checks import check_requirements, check_suffix, check_version, check_yaml -from ultralytics.utils.downloads import attempt_download_asset, is_url - - -def check_class_names(names): - """ - Check class names. - - Map imagenet class codes to human-readable names if required. Convert lists to dicts. - """ - if isinstance(names, list): # names is a list - names = dict(enumerate(names)) # convert to dict - if isinstance(names, dict): - # Convert 1) string keys to int, i.e. '0' to 0, and non-string values to strings, i.e. True to 'True' - names = {int(k): str(v) for k, v in names.items()} - n = len(names) - if max(names.keys()) >= n: - raise KeyError( - f"{n}-class dataset requires class indices 0-{n - 1}, but you have invalid class indices " - f"{min(names.keys())}-{max(names.keys())} defined in your dataset YAML." - ) - if isinstance(names[0], str) and names[0].startswith("n0"): # imagenet class codes, i.e. 'n01440764' - names_map = yaml_load(ROOT / "cfg/datasets/ImageNet.yaml")["map"] # human-readable names - names = {k: names_map[v] for k, v in names.items()} - return names - - -def default_class_names(data=None): - """Applies default class names to an input YAML file or returns numerical class names.""" - if data: - with contextlib.suppress(Exception): - return yaml_load(check_yaml(data))["names"] - return {i: f"class{i}" for i in range(999)} # return default if above errors - - -class NeuronAutoBackend(nn.Module): - """ - Handles dynamic backend selection for running inference using Ultralytics YOLO models. - - The AutoBackend class is designed to provide an abstraction layer for various inference engines. It supports a wide - range of formats, each with specific naming conventions as outlined below: - - Supported Formats and Naming Conventions: - | Format | File Suffix | - |-----------------------|------------------| - | PyTorch | *.pt | - | TorchScript | *.torchscript | - | ONNX Runtime | *.onnx | - | ONNX OpenCV DNN | *.onnx (dnn=True)| - | OpenVINO | *openvino_model/ | - | CoreML | *.mlpackage | - | TensorRT | *.engine | - | TensorFlow SavedModel | *_saved_model | - | TensorFlow GraphDef | *.pb | - | TensorFlow Lite | *.tflite | - | TensorFlow Edge TPU | *_edgetpu.tflite | - | PaddlePaddle | *_paddle_model | - | NCNN | *_ncnn_model | - - This class offers dynamic backend switching capabilities based on the input model format, making it easier to deploy - models across various platforms. - """ - - @torch.no_grad() - def __init__( - self, - weights="yolov8n.pt", - device=torch.device("cpu"), - dnn=False, - data=None, - fp16=False, - batch=1, - fuse=True, - verbose=True, - ): - """ - Initialize the AutoBackend for inference. - - Args: - weights (str): Path to the model weights file. Defaults to 'yolov8n.pt'. - device (torch.device): Device to run the model on. Defaults to CPU. - dnn (bool): Use OpenCV DNN module for ONNX inference. Defaults to False. - data (str | Path | optional): Path to the additional data.yaml file containing class names. Optional. - fp16 (bool): Enable half-precision inference. Supported only on specific backends. Defaults to False. - batch (int): Batch-size to assume for inference. - fuse (bool): Fuse Conv2D + BatchNorm layers for optimization. Defaults to True. - verbose (bool): Enable verbose logging. Defaults to True. - """ - super().__init__() - w = str(weights[0] if isinstance(weights, list) else weights) - nn_module = isinstance(weights, torch.nn.Module) - ( - pt, - jit, - neuronx, - neuron, - onnx, - xml, - engine, - coreml, - saved_model, - pb, - tflite, - edgetpu, - tfjs, - paddle, - ncnn, - triton, - ) = self._model_type(w) - fp16 &= pt or jit or onnx or xml or engine or nn_module or triton # FP16 - nhwc = coreml or saved_model or pb or tflite or edgetpu # BHWC formats (vs torch BCWH) - stride = 32 # default stride - model, metadata = None, None - - # Set device - cuda = torch.cuda.is_available() and device.type != "cpu" # use CUDA - if cuda and not any([nn_module, pt, jit, engine, onnx]): # GPU dataloader formats - device = torch.device("cpu") - cuda = False - - # Download if not local - if not (pt or triton or nn_module): - w = attempt_download_asset(w) - - # In-memory PyTorch model - if nn_module: - model = weights.to(device) - if fuse: - model = model.fuse(verbose=verbose) - if hasattr(model, "kpt_shape"): - kpt_shape = model.kpt_shape # pose-only - stride = max(int(model.stride.max()), 32) # model stride - names = model.module.names if hasattr(model, "module") else model.names # get class names - model.half() if fp16 else model.float() - self.model = model # explicitly assign for to(), cpu(), cuda(), half() - pt = True - - # PyTorch - elif pt: - from ultralytics.nn.tasks import attempt_load_weights - - model = attempt_load_weights( - weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=fuse - ) - if hasattr(model, "kpt_shape"): - kpt_shape = model.kpt_shape # pose-only - stride = max(int(model.stride.max()), 32) # model stride - names = model.module.names if hasattr(model, "module") else model.names # get class names - model.half() if fp16 else model.float() - self.model = model # explicitly assign for to(), cpu(), cuda(), half() - - # TorchScript - elif jit: - LOGGER.info(f"Loading {w} for TorchScript inference...") - extra_files = {"config.txt": ""} # model metadata - model = torch.jit.load(w, _extra_files=extra_files, map_location=device) - model.half() if fp16 else model.float() - if extra_files["config.txt"]: # load metadata dict - metadata = json.loads(extra_files["config.txt"], object_hook=lambda x: dict(x.items())) - # NeuronX - elif neuronx: - import torch_neuronx - - LOGGER.info(f"Loading {w} for NeuronX inference... version {torch_neuronx.__version__}") - extra_files = {"config.txt": ""} # model metadata - model = torch.jit.load(w, _extra_files=extra_files, map_location=device) - model.half() if fp16 else model.float() - if extra_files["config.txt"]: # load metadata dict - metadata = json.loads(extra_files["config.txt"], object_hook=lambda x: dict(x.items())) - ## Neuron - elif neuron: - import torch_neuron - - LOGGER.info(f"Loading {w} for Neuron inference... version {torch_neuron.__version__}") - extra_files = {"config.txt": ""} # model metadata - model = torch.jit.load(w, _extra_files=extra_files, map_location=device) - model.half() if fp16 else model.float() - if extra_files["config.txt"]: # load metadata dict - metadata = json.loads(extra_files["config.txt"], object_hook=lambda x: dict(x.items())) - - # ONNX OpenCV DNN - elif dnn: - LOGGER.info(f"Loading {w} for ONNX OpenCV DNN inference...") - check_requirements("opencv-python>=4.5.4") - net = cv2.dnn.readNetFromONNX(w) - - # ONNX Runtime - elif onnx: - LOGGER.info(f"Loading {w} for ONNX Runtime inference...") - check_requirements(("onnx", "onnxruntime-gpu" if cuda else "onnxruntime")) - if IS_RASPBERRYPI or IS_JETSON: - # Fix 'numpy.linalg._umath_linalg' has no attribute '_ilp64' for TF SavedModel on RPi and Jetson - check_requirements("numpy==1.23.5") - import onnxruntime - - providers = ["CUDAExecutionProvider", "CPUExecutionProvider"] if cuda else ["CPUExecutionProvider"] - session = onnxruntime.InferenceSession(w, providers=providers) - output_names = [x.name for x in session.get_outputs()] - metadata = session.get_modelmeta().custom_metadata_map - - # OpenVINO - elif xml: - LOGGER.info(f"Loading {w} for OpenVINO inference...") - check_requirements("openvino>=2024.0.0") - import openvino as ov - - core = ov.Core() - w = Path(w) - if not w.is_file(): # if not *.xml - w = next(w.glob("*.xml")) # get *.xml file from *_openvino_model dir - ov_model = core.read_model(model=str(w), weights=w.with_suffix(".bin")) - if ov_model.get_parameters()[0].get_layout().empty: - ov_model.get_parameters()[0].set_layout(ov.Layout("NCHW")) - - # OpenVINO inference modes are 'LATENCY', 'THROUGHPUT' (not recommended), or 'CUMULATIVE_THROUGHPUT' - inference_mode = "CUMULATIVE_THROUGHPUT" if batch > 1 else "LATENCY" - LOGGER.info(f"Using OpenVINO {inference_mode} mode for batch={batch} inference...") - ov_compiled_model = core.compile_model( - ov_model, - device_name="AUTO", # AUTO selects best available device, do not modify - config={"PERFORMANCE_HINT": inference_mode}, - ) - input_name = ov_compiled_model.input().get_any_name() - metadata = w.parent / "metadata.yaml" - - # TensorRT - elif engine: - LOGGER.info(f"Loading {w} for TensorRT inference...") - try: - import tensorrt as trt # noqa https://developer.nvidia.com/nvidia-tensorrt-download - except ImportError: - if LINUX: - check_requirements("tensorrt", cmds="-U") - import tensorrt as trt # noqa - check_version(trt.__version__, "7.0.0", hard=True) # require tensorrt>=7.0.0 - if device.type == "cpu": - device = torch.device("cuda:0") - Binding = namedtuple("Binding", ("name", "dtype", "shape", "data", "ptr")) - logger = trt.Logger(trt.Logger.INFO) - # Read file - with open(w, "rb") as f, trt.Runtime(logger) as runtime: - try: - meta_len = int.from_bytes(f.read(4), byteorder="little") # read metadata length - metadata = json.loads(f.read(meta_len).decode("utf-8")) # read metadata - except UnicodeDecodeError: - f.seek(0) # engine file may lack embedded Ultralytics metadata - model = runtime.deserialize_cuda_engine(f.read()) # read engine - - # Model context - try: - context = model.create_execution_context() - except Exception as e: # model is None - LOGGER.error(f"ERROR: TensorRT model exported with a different version than {trt.__version__}\n") - raise e - - bindings = OrderedDict() - output_names = [] - fp16 = False # default updated below - dynamic = False - is_trt10 = not hasattr(model, "num_bindings") - num = range(model.num_io_tensors) if is_trt10 else range(model.num_bindings) - for i in num: - if is_trt10: - name = model.get_tensor_name(i) - dtype = trt.nptype(model.get_tensor_dtype(name)) - is_input = model.get_tensor_mode(name) == trt.TensorIOMode.INPUT - if is_input: - if -1 in tuple(model.get_tensor_shape(name)): - dynamic = True - context.set_input_shape(name, tuple(model.get_tensor_profile_shape(name, 0)[1])) - if dtype == np.float16: - fp16 = True - else: - output_names.append(name) - shape = tuple(context.get_tensor_shape(name)) - else: # TensorRT < 10.0 - name = model.get_binding_name(i) - dtype = trt.nptype(model.get_binding_dtype(i)) - is_input = model.binding_is_input(i) - if model.binding_is_input(i): - if -1 in tuple(model.get_binding_shape(i)): # dynamic - dynamic = True - context.set_binding_shape(i, tuple(model.get_profile_shape(0, i)[1])) - if dtype == np.float16: - fp16 = True - else: - output_names.append(name) - shape = tuple(context.get_binding_shape(i)) - im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device) - bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr())) - binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items()) - batch_size = bindings["images"].shape[0] # if dynamic, this is instead max batch size - - # CoreML - elif coreml: - LOGGER.info(f"Loading {w} for CoreML inference...") - import coremltools as ct - - model = ct.models.MLModel(w) - metadata = dict(model.user_defined_metadata) - - # TF SavedModel - elif saved_model: - LOGGER.info(f"Loading {w} for TensorFlow SavedModel inference...") - import tensorflow as tf - - keras = False # assume TF1 saved_model - model = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w) - metadata = Path(w) / "metadata.yaml" - - # TF GraphDef - elif pb: # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt - LOGGER.info(f"Loading {w} for TensorFlow GraphDef inference...") - import tensorflow as tf - - from ultralytics.engine.exporter import gd_outputs - - def wrap_frozen_graph(gd, inputs, outputs): - """Wrap frozen graphs for deployment.""" - x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped - ge = x.graph.as_graph_element - return x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs)) - - gd = tf.Graph().as_graph_def() # TF GraphDef - with open(w, "rb") as f: - gd.ParseFromString(f.read()) - frozen_func = wrap_frozen_graph(gd, inputs="x:0", outputs=gd_outputs(gd)) - with contextlib.suppress(StopIteration): # find metadata in SavedModel alongside GraphDef - metadata = next(Path(w).resolve().parent.rglob(f"{Path(w).stem}_saved_model*/metadata.yaml")) - - # TFLite or TFLite Edge TPU - elif tflite or edgetpu: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python - try: # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpu - from tflite_runtime.interpreter import Interpreter, load_delegate - except ImportError: - import tensorflow as tf - - Interpreter, load_delegate = tf.lite.Interpreter, tf.lite.experimental.load_delegate - if edgetpu: # TF Edge TPU https://coral.ai/software/#edgetpu-runtime - LOGGER.info(f"Loading {w} for TensorFlow Lite Edge TPU inference...") - delegate = {"Linux": "libedgetpu.so.1", "Darwin": "libedgetpu.1.dylib", "Windows": "edgetpu.dll"}[ - platform.system() - ] - interpreter = Interpreter(model_path=w, experimental_delegates=[load_delegate(delegate)]) - else: # TFLite - LOGGER.info(f"Loading {w} for TensorFlow Lite inference...") - interpreter = Interpreter(model_path=w) # load TFLite model - interpreter.allocate_tensors() # allocate - input_details = interpreter.get_input_details() # inputs - output_details = interpreter.get_output_details() # outputs - # Load metadata - with contextlib.suppress(zipfile.BadZipFile): - with zipfile.ZipFile(w, "r") as model: - meta_file = model.namelist()[0] - metadata = ast.literal_eval(model.read(meta_file).decode("utf-8")) - - # TF.js - elif tfjs: - raise NotImplementedError("YOLOv8 TF.js inference is not currently supported.") - - # PaddlePaddle - elif paddle: - LOGGER.info(f"Loading {w} for PaddlePaddle inference...") - check_requirements("paddlepaddle-gpu" if cuda else "paddlepaddle") - import paddle.inference as pdi # noqa - - w = Path(w) - if not w.is_file(): # if not *.pdmodel - w = next(w.rglob("*.pdmodel")) # get *.pdmodel file from *_paddle_model dir - config = pdi.Config(str(w), str(w.with_suffix(".pdiparams"))) - if cuda: - config.enable_use_gpu(memory_pool_init_size_mb=2048, device_id=0) - predictor = pdi.create_predictor(config) - input_handle = predictor.get_input_handle(predictor.get_input_names()[0]) - output_names = predictor.get_output_names() - metadata = w.parents[1] / "metadata.yaml" - - # NCNN - elif ncnn: - LOGGER.info(f"Loading {w} for NCNN inference...") - check_requirements("git+https://github.com/Tencent/ncnn.git" if ARM64 else "ncnn") # requires NCNN - import ncnn as pyncnn - - net = pyncnn.Net() - net.opt.use_vulkan_compute = cuda - w = Path(w) - if not w.is_file(): # if not *.param - w = next(w.glob("*.param")) # get *.param file from *_ncnn_model dir - net.load_param(str(w)) - net.load_model(str(w.with_suffix(".bin"))) - metadata = w.parent / "metadata.yaml" - - # NVIDIA Triton Inference Server - elif triton: - check_requirements("tritonclient[all]") - from ultralytics.utils.triton import TritonRemoteModel - - model = TritonRemoteModel(w) - - # Any other format (unsupported) - else: - from ultralytics.engine.neuron_exporter import export_formats - - raise TypeError( - f"model='{w}' is not a supported model format. " - f"See https://docs.ultralytics.com/modes/predict for help.\n\n{export_formats()}" - ) - - # Load external metadata YAML - if isinstance(metadata, (str, Path)) and Path(metadata).exists(): - metadata = yaml_load(metadata) - if metadata and isinstance(metadata, dict): - for k, v in metadata.items(): - if k in {"stride", "batch"}: - metadata[k] = int(v) - elif k in {"imgsz", "names", "kpt_shape"} and isinstance(v, str): - metadata[k] = eval(v) - stride = metadata["stride"] - task = metadata["task"] - batch = metadata["batch"] - imgsz = metadata["imgsz"] - names = metadata["names"] - kpt_shape = metadata.get("kpt_shape") - elif not (pt or triton or nn_module): - LOGGER.warning(f"WARNING ⚠️ Metadata not found for 'model={weights}'") - - # Check names - if "names" not in locals(): # names missing - names = default_class_names(data) - names = check_class_names(names) - - # Disable gradients - if pt: - for p in model.parameters(): - p.requires_grad = False - - self.__dict__.update(locals()) # assign all variables to self - - def forward(self, im, augment=False, visualize=False, embed=None): - """ - Runs inference on the YOLOv8 MultiBackend model. - - Args: - im (torch.Tensor): The image tensor to perform inference on. - augment (bool): whether to perform data augmentation during inference, defaults to False - visualize (bool): whether to visualize the output predictions, defaults to False - embed (list, optional): A list of feature vectors/embeddings to return. - - Returns: - (tuple): Tuple containing the raw output tensor, and processed output for visualization (if visualize=True) - """ - b, ch, h, w = im.shape # batch, channel, height, width - if self.fp16 and im.dtype != torch.float16: - im = im.half() # to FP16 - if self.nhwc: - im = im.permute(0, 2, 3, 1) # torch BCHW to numpy BHWC shape(1,320,192,3) - - # PyTorch - if self.pt or self.nn_module: - y = self.model(im, augment=augment, visualize=visualize, embed=embed) - - # TorchScript - elif self.jit: - y = self.model(im) - - elif self.neuronx: - y = self.model(im) - elif self.neuron: - y = self.model(im) - # ONNX OpenCV DNN - elif self.dnn: - im = im.cpu().numpy() # torch to numpy - self.net.setInput(im) - y = self.net.forward() - - # ONNX Runtime - elif self.onnx: - im = im.cpu().numpy() # torch to numpy - y = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im}) - - # OpenVINO - elif self.xml: - im = im.cpu().numpy() # FP32 - - if self.inference_mode in {"THROUGHPUT", "CUMULATIVE_THROUGHPUT"}: # optimized for larger batch-sizes - n = im.shape[0] # number of images in batch - results = [None] * n # preallocate list with None to match the number of images - - def callback(request, userdata): - """Places result in preallocated list using userdata index.""" - results[userdata] = request.results - - # Create AsyncInferQueue, set the callback and start asynchronous inference for each input image - async_queue = self.ov.runtime.AsyncInferQueue(self.ov_compiled_model) - async_queue.set_callback(callback) - for i in range(n): - # Start async inference with userdata=i to specify the position in results list - async_queue.start_async(inputs={self.input_name: im[i : i + 1]}, userdata=i) # keep image as BCHW - async_queue.wait_all() # wait for all inference requests to complete - y = np.concatenate([list(r.values())[0] for r in results]) - - else: # inference_mode = "LATENCY", optimized for fastest first result at batch-size 1 - y = list(self.ov_compiled_model(im).values()) - - # TensorRT - elif self.engine: - if self.dynamic or im.shape != self.bindings["images"].shape: - if self.is_trt10: - self.context.set_input_shape("images", im.shape) - self.bindings["images"] = self.bindings["images"]._replace(shape=im.shape) - for name in self.output_names: - self.bindings[name].data.resize_(tuple(self.context.get_tensor_shape(name))) - else: - i = self.model.get_binding_index("images") - self.context.set_binding_shape(i, im.shape) - self.bindings["images"] = self.bindings["images"]._replace(shape=im.shape) - for name in self.output_names: - i = self.model.get_binding_index(name) - self.bindings[name].data.resize_(tuple(self.context.get_binding_shape(i))) - - s = self.bindings["images"].shape - assert im.shape == s, f"input size {im.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}" - self.binding_addrs["images"] = int(im.data_ptr()) - self.context.execute_v2(list(self.binding_addrs.values())) - y = [self.bindings[x].data for x in sorted(self.output_names)] - - # CoreML - elif self.coreml: - im = im[0].cpu().numpy() - im_pil = Image.fromarray((im * 255).astype("uint8")) - # im = im.resize((192, 320), Image.BILINEAR) - y = self.model.predict({"image": im_pil}) # coordinates are xywh normalized - if "confidence" in y: - raise TypeError( - "Ultralytics only supports inference of non-pipelined CoreML models exported with " - f"'nms=False', but 'model={w}' has an NMS pipeline created by an 'nms=True' export." - ) - # TODO: CoreML NMS inference handling - # from ultralytics.utils.ops import xywh2xyxy - # box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels - # conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float32) - # y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1) - elif len(y) == 1: # classification model - y = list(y.values()) - elif len(y) == 2: # segmentation model - y = list(reversed(y.values())) # reversed for segmentation models (pred, proto) - - # PaddlePaddle - elif self.paddle: - im = im.cpu().numpy().astype(np.float32) - self.input_handle.copy_from_cpu(im) - self.predictor.run() - y = [self.predictor.get_output_handle(x).copy_to_cpu() for x in self.output_names] - - # NCNN - elif self.ncnn: - mat_in = self.pyncnn.Mat(im[0].cpu().numpy()) - with self.net.create_extractor() as ex: - ex.input(self.net.input_names()[0], mat_in) - # WARNING: 'output_names' sorted as a temporary fix for https://github.com/pnnx/pnnx/issues/130 - y = [np.array(ex.extract(x)[1])[None] for x in sorted(self.net.output_names())] - - # NVIDIA Triton Inference Server - elif self.triton: - im = im.cpu().numpy() # torch to numpy - y = self.model(im) - - # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU) - else: - im = im.cpu().numpy() - if self.saved_model: # SavedModel - y = self.model(im, training=False) if self.keras else self.model(im) - if not isinstance(y, list): - y = [y] - elif self.pb: # GraphDef - y = self.frozen_func(x=self.tf.constant(im)) - if (self.task == "segment" or len(y) == 2) and len(self.names) == 999: # segments and names not defined - ip, ib = (0, 1) if len(y[0].shape) == 4 else (1, 0) # index of protos, boxes - nc = y[ib].shape[1] - y[ip].shape[3] - 4 # y = (1, 160, 160, 32), (1, 116, 8400) - self.names = {i: f"class{i}" for i in range(nc)} - else: # Lite or Edge TPU - details = self.input_details[0] - is_int = details["dtype"] in {np.int8, np.int16} # is TFLite quantized int8 or int16 model - if is_int: - scale, zero_point = details["quantization"] - im = (im / scale + zero_point).astype(details["dtype"]) # de-scale - self.interpreter.set_tensor(details["index"], im) - self.interpreter.invoke() - y = [] - for output in self.output_details: - x = self.interpreter.get_tensor(output["index"]) - if is_int: - scale, zero_point = output["quantization"] - x = (x.astype(np.float32) - zero_point) * scale # re-scale - if x.ndim == 3: # if task is not classification, excluding masks (ndim=4) as well - # Denormalize xywh by image size. See https://github.com/ultralytics/ultralytics/pull/1695 - # xywh are normalized in TFLite/EdgeTPU to mitigate quantization error of integer models - x[:, [0, 2]] *= w - x[:, [1, 3]] *= h - y.append(x) - # TF segment fixes: export is reversed vs ONNX export and protos are transposed - if len(y) == 2: # segment with (det, proto) output order reversed - if len(y[1].shape) != 4: - y = list(reversed(y)) # should be y = (1, 116, 8400), (1, 160, 160, 32) - y[1] = np.transpose(y[1], (0, 3, 1, 2)) # should be y = (1, 116, 8400), (1, 32, 160, 160) - y = [x if isinstance(x, np.ndarray) else x.numpy() for x in y] - - # for x in y: - # print(type(x), len(x)) if isinstance(x, (list, tuple)) else print(type(x), x.shape) # debug shapes - if isinstance(y, (list, tuple)): - return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y] - else: - return self.from_numpy(y) - - def from_numpy(self, x): - """ - Convert a numpy array to a tensor. - - Args: - x (np.ndarray): The array to be converted. - - Returns: - (torch.Tensor): The converted tensor - """ - return torch.tensor(x).to(self.device) if isinstance(x, np.ndarray) else x - - def warmup(self, imgsz=(1, 3, 640, 640)): - """ - Warm up the model by running one forward pass with a dummy input. - - Args: - imgsz (tuple): The shape of the dummy input tensor in the format (batch_size, channels, height, width) - """ - import torchvision # noqa (import here so torchvision import time not recorded in postprocess time) - - warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.triton, self.nn_module - if any(warmup_types) and (self.device.type != "cpu" or self.triton): - im = torch.empty(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device) # input - for _ in range(2 if self.jit else 1): - self.forward(im) # warmup - - @staticmethod - def _model_type(p="path/to/model.pt"): - """ - This function takes a path to a model file and returns the model type. Possibles types are pt, jit, onnx, xml, - engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, ncnn or paddle. - - Args: - p: path to the model file. Defaults to path/to/model.pt - - Examples: - >>> model = AutoBackend(weights="path/to/model.onnx") - >>> model_type = model._model_type() # returns "onnx" - """ - from ultralytics.engine.neuron_exporter import export_formats - - sf = list(export_formats().Suffix) # export suffixes - if not is_url(p) and not isinstance(p, str): - check_suffix(p, sf) # checks - name = Path(p).name - types = [s in name for s in sf] - types[5] |= name.endswith(".mlmodel") # retain support for older Apple CoreML *.mlmodel formats - types[8] &= not types[9] # tflite &= not edgetpu - if any(types): - triton = False - else: - from urllib.parse import urlsplit - - url = urlsplit(p) - triton = bool(url.netloc) and bool(url.path) and url.scheme in {"http", "grpc"} - - return types + [triton]