diff --git a/tests/torch/data/reference_graphs/fx/reference_metatypes/yolov8n.json b/tests/torch/data/reference_graphs/fx/reference_metatypes/yolov8n.json
index 5704dd05bdf..61afa90e897 100644
--- a/tests/torch/data/reference_graphs/fx/reference_metatypes/yolov8n.json
+++ b/tests/torch/data/reference_graphs/fx/reference_metatypes/yolov8n.json
@@ -1 +1 @@
-{"arg0_1": "input_noop", "_param_constant0": "const_noop", "conv2d": "Conv2DOp", "empty": "unknown", "_param_constant1": "const_noop", "_param_constant2": "const_noop", "_tensor_constant0": "const_noop", "_tensor_constant1": "const_noop", "_native_batch_norm_legit_no_training": "BatchNormOp", "getitem": "GatherOp", "getitem_1": "GatherOp", "getitem_2": "GatherOp", "silu": "SiluOp", "_param_constant3": "const_noop", "conv2d_1": "Conv2DOp", "empty_1": "unknown", "_param_constant4": "const_noop", "_param_constant5": "const_noop", "_tensor_constant2": "const_noop", "_tensor_constant3": "const_noop", "_native_batch_norm_legit_no_training_1": "BatchNormOp", "getitem_3": "GatherOp", "getitem_4": "GatherOp", "getitem_5": "GatherOp", "silu_1": "SiluOp", "_param_constant6": "const_noop", "conv2d_2": "Conv2DOp", "empty_2": "unknown", "_param_constant7": "const_noop", "_param_constant8": "const_noop", "_tensor_constant4": "const_noop", "_tensor_constant5": "const_noop", "_native_batch_norm_legit_no_training_2": "BatchNormOp", "getitem_6": "GatherOp", "getitem_7": "GatherOp", "getitem_8": "GatherOp", "silu_2": "SiluOp", "chunk": "SplitOp", "getitem_9": "GatherOp", "getitem_10": "GatherOp", "_param_constant9": "const_noop", "conv2d_3": "Conv2DOp", "empty_3": "unknown", "_param_constant10": "const_noop", "_param_constant11": "const_noop", "_tensor_constant6": "const_noop", "_tensor_constant7": "const_noop", "_native_batch_norm_legit_no_training_3": "BatchNormOp", "getitem_11": "GatherOp", "getitem_12": "GatherOp", "getitem_13": "GatherOp", "silu_3": "SiluOp", "_param_constant12": "const_noop", "conv2d_4": "Conv2DOp", "empty_4": "unknown", "_param_constant13": "const_noop", "_param_constant14": "const_noop", "_tensor_constant8": "const_noop", "_tensor_constant9": "const_noop", "_native_batch_norm_legit_no_training_4": "BatchNormOp", "getitem_14": "GatherOp", "getitem_15": "GatherOp", "getitem_16": "GatherOp", "silu_4": "SiluOp", "add": "AddOp", "cat": "CatOp", "_param_constant15": "const_noop", "conv2d_5": "Conv2DOp", "empty_5": "unknown", "_param_constant16": "const_noop", "_param_constant17": "const_noop", "_tensor_constant10": "const_noop", "_tensor_constant11": "const_noop", "_native_batch_norm_legit_no_training_5": "BatchNormOp", "getitem_17": "GatherOp", "getitem_18": "GatherOp", "getitem_19": "GatherOp", "silu_5": "SiluOp", "_param_constant18": "const_noop", "conv2d_6": "Conv2DOp", "empty_6": "unknown", "_param_constant19": "const_noop", "_param_constant20": "const_noop", "_tensor_constant12": "const_noop", "_tensor_constant13": "const_noop", "_native_batch_norm_legit_no_training_6": "BatchNormOp", "getitem_20": "GatherOp", "getitem_21": "GatherOp", "getitem_22": "GatherOp", "silu_6": "SiluOp", "_param_constant21": "const_noop", "conv2d_7": "Conv2DOp", "empty_7": "unknown", "_param_constant22": "const_noop", "_param_constant23": "const_noop", "_tensor_constant14": "const_noop", "_tensor_constant15": "const_noop", "_native_batch_norm_legit_no_training_7": "BatchNormOp", "getitem_23": "GatherOp", "getitem_24": "GatherOp", "getitem_25": "GatherOp", "silu_7": "SiluOp", "chunk_1": "SplitOp", "getitem_26": "GatherOp", "getitem_27": "GatherOp", "_param_constant24": "const_noop", "conv2d_8": "Conv2DOp", "empty_8": "unknown", "_param_constant25": "const_noop", "_param_constant26": "const_noop", "_tensor_constant16": "const_noop", "_tensor_constant17": "const_noop", "_native_batch_norm_legit_no_training_8": "BatchNormOp", "getitem_28": "GatherOp", "getitem_29": "GatherOp", "getitem_30": "GatherOp", "silu_8": "SiluOp", "_param_constant27": "const_noop", "conv2d_9": "Conv2DOp", "empty_9": "unknown", "_param_constant28": "const_noop", "_param_constant29": "const_noop", "_tensor_constant18": "const_noop", "_tensor_constant19": "const_noop", "_native_batch_norm_legit_no_training_9": "BatchNormOp", "getitem_31": "GatherOp", "getitem_32": "GatherOp", "getitem_33": "GatherOp", "silu_9": "SiluOp", "add_1": "AddOp", "_param_constant30": "const_noop", "conv2d_10": "Conv2DOp", "empty_10": "unknown", "_param_constant31": "const_noop", "_param_constant32": "const_noop", "_tensor_constant20": "const_noop", "_tensor_constant21": "const_noop", "_native_batch_norm_legit_no_training_10": "BatchNormOp", "getitem_34": "GatherOp", "getitem_35": "GatherOp", "getitem_36": "GatherOp", "silu_10": "SiluOp", "_param_constant33": "const_noop", "conv2d_11": "Conv2DOp", "empty_11": "unknown", "_param_constant34": "const_noop", "_param_constant35": "const_noop", "_tensor_constant22": "const_noop", "_tensor_constant23": "const_noop", "_native_batch_norm_legit_no_training_11": "BatchNormOp", "getitem_37": "GatherOp", "getitem_38": "GatherOp", "getitem_39": "GatherOp", "silu_11": "SiluOp", "add_2": "AddOp", "cat_1": "CatOp", "_param_constant36": "const_noop", "conv2d_12": "Conv2DOp", "empty_12": "unknown", "_param_constant37": "const_noop", "_param_constant38": "const_noop", "_tensor_constant24": "const_noop", "_tensor_constant25": "const_noop", "_native_batch_norm_legit_no_training_12": "BatchNormOp", "getitem_40": "GatherOp", "getitem_41": "GatherOp", "getitem_42": "GatherOp", "silu_12": "SiluOp", "_param_constant39": "const_noop", "conv2d_13": "Conv2DOp", "empty_13": "unknown", "_param_constant40": "const_noop", "_param_constant41": "const_noop", "_tensor_constant26": "const_noop", "_tensor_constant27": "const_noop", "_native_batch_norm_legit_no_training_13": "BatchNormOp", "getitem_43": "GatherOp", "getitem_44": "GatherOp", "getitem_45": "GatherOp", "silu_13": "SiluOp", "_param_constant42": "const_noop", "conv2d_14": "Conv2DOp", "empty_14": "unknown", "_param_constant43": "const_noop", "_param_constant44": "const_noop", "_tensor_constant28": "const_noop", "_tensor_constant29": "const_noop", "_native_batch_norm_legit_no_training_14": "BatchNormOp", "getitem_46": "GatherOp", "getitem_47": "GatherOp", "getitem_48": "GatherOp", "silu_14": "SiluOp", "chunk_2": "SplitOp", "getitem_49": "GatherOp", "getitem_50": "GatherOp", "_param_constant45": "const_noop", "conv2d_15": "Conv2DOp", "empty_15": "unknown", "_param_constant46": "const_noop", "_param_constant47": "const_noop", "_tensor_constant30": "const_noop", "_tensor_constant31": "const_noop", "_native_batch_norm_legit_no_training_15": "BatchNormOp", "getitem_51": "GatherOp", "getitem_52": "GatherOp", "getitem_53": "GatherOp", "silu_15": "SiluOp", "_param_constant48": "const_noop", "conv2d_16": "Conv2DOp", "empty_16": "unknown", "_param_constant49": "const_noop", "_param_constant50": "const_noop", "_tensor_constant32": "const_noop", "_tensor_constant33": "const_noop", "_native_batch_norm_legit_no_training_16": "BatchNormOp", "getitem_54": "GatherOp", "getitem_55": "GatherOp", "getitem_56": "GatherOp", "silu_16": "SiluOp", "add_3": "AddOp", "_param_constant51": "const_noop", "conv2d_17": "Conv2DOp", "empty_17": "unknown", "_param_constant52": "const_noop", "_param_constant53": "const_noop", "_tensor_constant34": "const_noop", "_tensor_constant35": "const_noop", "_native_batch_norm_legit_no_training_17": "BatchNormOp", "getitem_57": "GatherOp", "getitem_58": "GatherOp", "getitem_59": "GatherOp", "silu_17": "SiluOp", "_param_constant54": "const_noop", "conv2d_18": "Conv2DOp", "empty_18": "unknown", "_param_constant55": "const_noop", "_param_constant56": "const_noop", "_tensor_constant36": "const_noop", "_tensor_constant37": "const_noop", "_native_batch_norm_legit_no_training_18": "BatchNormOp", "getitem_60": "GatherOp", "getitem_61": "GatherOp", "getitem_62": "GatherOp", "silu_18": "SiluOp", "add_4": "AddOp", "cat_2": "CatOp", "_param_constant57": "const_noop", "conv2d_19": "Conv2DOp", "empty_19": "unknown", "_param_constant58": "const_noop", "_param_constant59": "const_noop", "_tensor_constant38": "const_noop", "_tensor_constant39": "const_noop", "_native_batch_norm_legit_no_training_19": "BatchNormOp", "getitem_63": "GatherOp", "getitem_64": "GatherOp", "getitem_65": "GatherOp", "silu_19": "SiluOp", "_param_constant60": "const_noop", "conv2d_20": "Conv2DOp", "empty_20": "unknown", "_param_constant61": "const_noop", "_param_constant62": "const_noop", "_tensor_constant40": "const_noop", "_tensor_constant41": "const_noop", "_native_batch_norm_legit_no_training_20": "BatchNormOp", "getitem_66": "GatherOp", "getitem_67": "GatherOp", "getitem_68": "GatherOp", "silu_20": "SiluOp", "_param_constant63": "const_noop", "conv2d_21": "Conv2DOp", "empty_21": "unknown", "_param_constant64": "const_noop", "_param_constant65": "const_noop", "_tensor_constant42": "const_noop", "_tensor_constant43": "const_noop", "_native_batch_norm_legit_no_training_21": "BatchNormOp", "getitem_69": "GatherOp", "getitem_70": "GatherOp", "getitem_71": "GatherOp", "silu_21": "SiluOp", "chunk_3": "SplitOp", "getitem_72": "GatherOp", "getitem_73": "GatherOp", "_param_constant66": "const_noop", "conv2d_22": "Conv2DOp", "empty_22": "unknown", "_param_constant67": "const_noop", "_param_constant68": "const_noop", "_tensor_constant44": "const_noop", "_tensor_constant45": "const_noop", "_native_batch_norm_legit_no_training_22": "BatchNormOp", "getitem_74": "GatherOp", "getitem_75": "GatherOp", "getitem_76": "GatherOp", "silu_22": "SiluOp", "_param_constant69": "const_noop", "conv2d_23": "Conv2DOp", "empty_23": "unknown", "_param_constant70": "const_noop", "_param_constant71": "const_noop", "_tensor_constant46": "const_noop", "_tensor_constant47": "const_noop", "_native_batch_norm_legit_no_training_23": "BatchNormOp", "getitem_77": "GatherOp", "getitem_78": "GatherOp", "getitem_79": "GatherOp", "silu_23": "SiluOp", "add_5": "AddOp", "cat_3": "CatOp", "_param_constant72": "const_noop", "conv2d_24": "Conv2DOp", "empty_24": "unknown", "_param_constant73": "const_noop", "_param_constant74": "const_noop", "_tensor_constant48": "const_noop", "_tensor_constant49": "const_noop", "_native_batch_norm_legit_no_training_24": "BatchNormOp", "getitem_80": "GatherOp", "getitem_81": "GatherOp", "getitem_82": "GatherOp", "silu_24": "SiluOp", "_param_constant75": "const_noop", "conv2d_25": "Conv2DOp", "empty_25": "unknown", "_param_constant76": "const_noop", "_param_constant77": "const_noop", "_tensor_constant50": "const_noop", "_tensor_constant51": "const_noop", "_native_batch_norm_legit_no_training_25": "BatchNormOp", "getitem_83": "GatherOp", "getitem_84": "GatherOp", "getitem_85": "GatherOp", "silu_25": "SiluOp", "max_pool2d": "MaxPool2DOp", "max_pool2d_1": "MaxPool2DOp", "max_pool2d_2": "MaxPool2DOp", "cat_4": "CatOp", "_param_constant78": "const_noop", "conv2d_26": "Conv2DOp", "empty_26": "unknown", "_param_constant79": "const_noop", "_param_constant80": "const_noop", "_tensor_constant52": "const_noop", "_tensor_constant53": "const_noop", "_native_batch_norm_legit_no_training_26": "BatchNormOp", "getitem_86": "GatherOp", "getitem_87": "GatherOp", "getitem_88": "GatherOp", "silu_26": "SiluOp", "upsample_nearest2d": "InterpolateOp", "cat_5": "CatOp", "_param_constant81": "const_noop", "conv2d_27": "Conv2DOp", "empty_27": "unknown", "_param_constant82": "const_noop", "_param_constant83": "const_noop", "_tensor_constant54": "const_noop", "_tensor_constant55": "const_noop", "_native_batch_norm_legit_no_training_27": "BatchNormOp", "getitem_89": "GatherOp", "getitem_90": "GatherOp", "getitem_91": "GatherOp", "silu_27": "SiluOp", "chunk_4": "SplitOp", "getitem_92": "GatherOp", "getitem_93": "GatherOp", "_param_constant84": "const_noop", "conv2d_28": "Conv2DOp", "empty_28": "unknown", "_param_constant85": "const_noop", "_param_constant86": "const_noop", "_tensor_constant56": "const_noop", "_tensor_constant57": "const_noop", "_native_batch_norm_legit_no_training_28": "BatchNormOp", "getitem_94": "GatherOp", "getitem_95": "GatherOp", "getitem_96": "GatherOp", "silu_28": "SiluOp", "_param_constant87": "const_noop", "conv2d_29": "Conv2DOp", "empty_29": "unknown", "_param_constant88": "const_noop", "_param_constant89": "const_noop", "_tensor_constant58": "const_noop", "_tensor_constant59": "const_noop", "_native_batch_norm_legit_no_training_29": "BatchNormOp", "getitem_97": "GatherOp", "getitem_98": "GatherOp", "getitem_99": "GatherOp", "silu_29": "SiluOp", "cat_6": "CatOp", "_param_constant90": "const_noop", "conv2d_30": "Conv2DOp", "empty_30": "unknown", "_param_constant91": "const_noop", "_param_constant92": "const_noop", "_tensor_constant60": "const_noop", "_tensor_constant61": "const_noop", "_native_batch_norm_legit_no_training_30": "BatchNormOp", "getitem_100": "GatherOp", "getitem_101": "GatherOp", "getitem_102": "GatherOp", "silu_30": "SiluOp", "upsample_nearest2d_1": "InterpolateOp", "cat_7": "CatOp", "_param_constant93": "const_noop", "conv2d_31": "Conv2DOp", "empty_31": "unknown", "_param_constant94": "const_noop", "_param_constant95": "const_noop", "_tensor_constant62": "const_noop", "_tensor_constant63": "const_noop", "_native_batch_norm_legit_no_training_31": "BatchNormOp", "getitem_103": "GatherOp", "getitem_104": "GatherOp", "getitem_105": "GatherOp", "silu_31": "SiluOp", "chunk_5": "SplitOp", "getitem_106": "GatherOp", "getitem_107": "GatherOp", "_param_constant96": "const_noop", "conv2d_32": "Conv2DOp", "empty_32": "unknown", "_param_constant97": "const_noop", "_param_constant98": "const_noop", "_tensor_constant64": "const_noop", "_tensor_constant65": "const_noop", "_native_batch_norm_legit_no_training_32": "BatchNormOp", "getitem_108": "GatherOp", "getitem_109": "GatherOp", "getitem_110": "GatherOp", "silu_32": "SiluOp", "_param_constant99": "const_noop", "conv2d_33": "Conv2DOp", "empty_33": "unknown", "_param_constant100": "const_noop", "_param_constant101": "const_noop", "_tensor_constant66": "const_noop", "_tensor_constant67": "const_noop", "_native_batch_norm_legit_no_training_33": "BatchNormOp", "getitem_111": "GatherOp", "getitem_112": "GatherOp", "getitem_113": "GatherOp", "silu_33": "SiluOp", "cat_8": "CatOp", "_param_constant102": "const_noop", "conv2d_34": "Conv2DOp", "empty_34": "unknown", "_param_constant103": "const_noop", "_param_constant104": "const_noop", "_tensor_constant68": "const_noop", "_tensor_constant69": "const_noop", "_native_batch_norm_legit_no_training_34": "BatchNormOp", "getitem_114": "GatherOp", "getitem_115": "GatherOp", "getitem_116": "GatherOp", "silu_34": "SiluOp", "_param_constant105": "const_noop", "conv2d_35": "Conv2DOp", "empty_35": "unknown", "_param_constant106": "const_noop", "_param_constant107": "const_noop", "_tensor_constant70": "const_noop", "_tensor_constant71": "const_noop", "_native_batch_norm_legit_no_training_35": "BatchNormOp", "getitem_117": "GatherOp", "getitem_118": "GatherOp", "getitem_119": "GatherOp", "silu_35": "SiluOp", "cat_9": "CatOp", "_param_constant108": "const_noop", "conv2d_36": "Conv2DOp", "empty_36": "unknown", "_param_constant109": "const_noop", "_param_constant110": "const_noop", "_tensor_constant72": "const_noop", "_tensor_constant73": "const_noop", "_native_batch_norm_legit_no_training_36": "BatchNormOp", "getitem_120": "GatherOp", "getitem_121": "GatherOp", "getitem_122": "GatherOp", "silu_36": "SiluOp", "chunk_6": "SplitOp", "getitem_123": "GatherOp", "getitem_124": "GatherOp", "_param_constant111": "const_noop", "conv2d_37": "Conv2DOp", "empty_37": "unknown", "_param_constant112": "const_noop", "_param_constant113": "const_noop", "_tensor_constant74": "const_noop", "_tensor_constant75": "const_noop", "_native_batch_norm_legit_no_training_37": "BatchNormOp", "getitem_125": "GatherOp", "getitem_126": "GatherOp", "getitem_127": "GatherOp", "silu_37": "SiluOp", "_param_constant114": "const_noop", "conv2d_38": "Conv2DOp", "empty_38": "unknown", "_param_constant115": "const_noop", "_param_constant116": "const_noop", "_tensor_constant76": "const_noop", "_tensor_constant77": "const_noop", "_native_batch_norm_legit_no_training_38": "BatchNormOp", "getitem_128": "GatherOp", "getitem_129": "GatherOp", "getitem_130": "GatherOp", "silu_38": "SiluOp", "cat_10": "CatOp", "_param_constant117": "const_noop", "conv2d_39": "Conv2DOp", "empty_39": "unknown", "_param_constant118": "const_noop", "_param_constant119": "const_noop", "_tensor_constant78": "const_noop", "_tensor_constant79": "const_noop", "_native_batch_norm_legit_no_training_39": "BatchNormOp", "getitem_131": "GatherOp", "getitem_132": "GatherOp", "getitem_133": "GatherOp", "silu_39": "SiluOp", "_param_constant120": "const_noop", "conv2d_40": "Conv2DOp", "empty_40": "unknown", "_param_constant121": "const_noop", "_param_constant122": "const_noop", "_tensor_constant80": "const_noop", "_tensor_constant81": "const_noop", "_native_batch_norm_legit_no_training_40": "BatchNormOp", "getitem_134": "GatherOp", "getitem_135": "GatherOp", "getitem_136": "GatherOp", "silu_40": "SiluOp", "cat_11": "CatOp", "_param_constant123": "const_noop", "conv2d_41": "Conv2DOp", "empty_41": "unknown", "_param_constant124": "const_noop", "_param_constant125": "const_noop", "_tensor_constant82": "const_noop", "_tensor_constant83": "const_noop", "_native_batch_norm_legit_no_training_41": "BatchNormOp", "getitem_137": "GatherOp", "getitem_138": "GatherOp", "getitem_139": "GatherOp", "silu_41": "SiluOp", "chunk_7": "SplitOp", "getitem_140": "GatherOp", "getitem_141": "GatherOp", "_param_constant126": "const_noop", "conv2d_42": "Conv2DOp", "empty_42": "unknown", "_param_constant127": "const_noop", "_param_constant128": "const_noop", "_tensor_constant84": "const_noop", "_tensor_constant85": "const_noop", "_native_batch_norm_legit_no_training_42": "BatchNormOp", "getitem_142": "GatherOp", "getitem_143": "GatherOp", "getitem_144": "GatherOp", "silu_42": "SiluOp", "_param_constant129": "const_noop", "conv2d_43": "Conv2DOp", "empty_43": "unknown", "_param_constant130": "const_noop", "_param_constant131": "const_noop", "_tensor_constant86": "const_noop", "_tensor_constant87": "const_noop", "_native_batch_norm_legit_no_training_43": "BatchNormOp", "getitem_145": "GatherOp", "getitem_146": "GatherOp", "getitem_147": "GatherOp", "silu_43": "SiluOp", "cat_12": "CatOp", "_param_constant132": "const_noop", "conv2d_44": "Conv2DOp", "empty_44": "unknown", "_param_constant133": "const_noop", "_param_constant134": "const_noop", "_tensor_constant88": "const_noop", "_tensor_constant89": "const_noop", "_native_batch_norm_legit_no_training_44": "BatchNormOp", "getitem_148": "GatherOp", "getitem_149": "GatherOp", "getitem_150": "GatherOp", "silu_44": "SiluOp", "_param_constant135": "const_noop", "conv2d_45": "Conv2DOp", "empty_45": "unknown", "_param_constant136": "const_noop", "_param_constant137": "const_noop", "_tensor_constant90": "const_noop", "_tensor_constant91": "const_noop", "_native_batch_norm_legit_no_training_45": "BatchNormOp", "getitem_151": "GatherOp", "getitem_152": "GatherOp", "getitem_153": "GatherOp", "silu_45": "SiluOp", "_param_constant138": "const_noop", "conv2d_46": "Conv2DOp", "empty_46": "unknown", "_param_constant139": "const_noop", "_param_constant140": "const_noop", "_tensor_constant92": "const_noop", "_tensor_constant93": "const_noop", "_native_batch_norm_legit_no_training_46": "BatchNormOp", "getitem_154": "GatherOp", "getitem_155": "GatherOp", "getitem_156": "GatherOp", "silu_46": "SiluOp", "_param_constant141": "const_noop", "_param_constant142": "const_noop", "conv2d_47": "Conv2DOp", "_param_constant143": "const_noop", "conv2d_48": "Conv2DOp", "empty_47": "unknown", "_param_constant144": "const_noop", "_param_constant145": "const_noop", "_tensor_constant94": "const_noop", "_tensor_constant95": "const_noop", "_native_batch_norm_legit_no_training_47": "BatchNormOp", "getitem_157": "GatherOp", "getitem_158": "GatherOp", "getitem_159": "GatherOp", "silu_47": "SiluOp", "_param_constant146": "const_noop", "conv2d_49": "Conv2DOp", "empty_48": "unknown", "_param_constant147": "const_noop", "_param_constant148": "const_noop", "_tensor_constant96": "const_noop", "_tensor_constant97": "const_noop", "_native_batch_norm_legit_no_training_48": "BatchNormOp", "getitem_160": "GatherOp", "getitem_161": "GatherOp", "getitem_162": "GatherOp", "silu_48": "SiluOp", "_param_constant149": "const_noop", "_param_constant150": "const_noop", "conv2d_50": "Conv2DOp", "cat_13": "CatOp", "_param_constant151": "const_noop", "conv2d_51": "Conv2DOp", "empty_49": "unknown", "_param_constant152": "const_noop", "_param_constant153": "const_noop", "_tensor_constant98": "const_noop", "_tensor_constant99": "const_noop", "_native_batch_norm_legit_no_training_49": "BatchNormOp", "getitem_163": "GatherOp", "getitem_164": "GatherOp", "getitem_165": "GatherOp", "silu_49": "SiluOp", "_param_constant154": "const_noop", "conv2d_52": "Conv2DOp", "empty_50": "unknown", "_param_constant155": "const_noop", "_param_constant156": "const_noop", "_tensor_constant100": "const_noop", "_tensor_constant101": "const_noop", "_native_batch_norm_legit_no_training_50": "BatchNormOp", "getitem_166": "GatherOp", "getitem_167": "GatherOp", "getitem_168": "GatherOp", "silu_50": "SiluOp", "_param_constant157": "const_noop", "_param_constant158": "const_noop", "conv2d_53": "Conv2DOp", "_param_constant159": "const_noop", "conv2d_54": "Conv2DOp", "empty_51": "unknown", "_param_constant160": "const_noop", "_param_constant161": "const_noop", "_tensor_constant102": "const_noop", "_tensor_constant103": "const_noop", "_native_batch_norm_legit_no_training_51": "BatchNormOp", "getitem_169": "GatherOp", "getitem_170": "GatherOp", "getitem_171": "GatherOp", "silu_51": "SiluOp", "_param_constant162": "const_noop", "conv2d_55": "Conv2DOp", "empty_52": "unknown", "_param_constant163": "const_noop", "_param_constant164": "const_noop", "_tensor_constant104": "const_noop", "_tensor_constant105": "const_noop", "_native_batch_norm_legit_no_training_52": "BatchNormOp", "getitem_172": "GatherOp", "getitem_173": "GatherOp", "getitem_174": "GatherOp", "silu_52": "SiluOp", "_param_constant165": "const_noop", "_param_constant166": "const_noop", "conv2d_56": "Conv2DOp", "cat_14": "CatOp", "_param_constant167": "const_noop", "conv2d_57": "Conv2DOp", "empty_53": "unknown", "_param_constant168": "const_noop", "_param_constant169": "const_noop", "_tensor_constant106": "const_noop", "_tensor_constant107": "const_noop", "_native_batch_norm_legit_no_training_53": "BatchNormOp", "getitem_175": "GatherOp", "getitem_176": "GatherOp", "getitem_177": "GatherOp", "silu_53": "SiluOp", "_param_constant170": "const_noop", "conv2d_58": "Conv2DOp", "empty_54": "unknown", "_param_constant171": "const_noop", "_param_constant172": "const_noop", "_tensor_constant108": "const_noop", "_tensor_constant109": "const_noop", "_native_batch_norm_legit_no_training_54": "BatchNormOp", "getitem_178": "GatherOp", "getitem_179": "GatherOp", "getitem_180": "GatherOp", "silu_54": "SiluOp", "_param_constant173": "const_noop", "_param_constant174": "const_noop", "conv2d_59": "Conv2DOp", "_param_constant175": "const_noop", "conv2d_60": "Conv2DOp", "empty_55": "unknown", "_param_constant176": "const_noop", "_param_constant177": "const_noop", "_tensor_constant110": "const_noop", "_tensor_constant111": "const_noop", "_native_batch_norm_legit_no_training_55": "BatchNormOp", "getitem_181": "GatherOp", "getitem_182": "GatherOp", "getitem_183": "GatherOp", "silu_55": "SiluOp", "_param_constant178": "const_noop", "conv2d_61": "Conv2DOp", "empty_56": "unknown", "_param_constant179": "const_noop", "_param_constant180": "const_noop", "_tensor_constant112": "const_noop", "_tensor_constant113": "const_noop", "_native_batch_norm_legit_no_training_56": "BatchNormOp", "getitem_184": "GatherOp", "getitem_185": "GatherOp", "getitem_186": "GatherOp", "silu_56": "SiluOp", "_param_constant181": "const_noop", "_param_constant182": "const_noop", "conv2d_62": "Conv2DOp", "cat_15": "CatOp", "view": "ReshapeOp", "view_1": "ReshapeOp", "view_2": "ReshapeOp", "cat_16": "CatOp", "split_with_sizes": "SplitOp", "getitem_187": "GatherOp", "getitem_188": "GatherOp", "view_3": "ReshapeOp", "transpose": "TransposeOp", "softmax": "SoftmaxOp", "_param_constant183": "const_noop", "conv2d_63": "Conv2DOp", "view_4": "ReshapeOp", "_tensor_constant114": "const_noop", "unsqueeze": "ReshapeOp", "chunk_8": "SplitOp", "getitem_189": "GatherOp", "getitem_190": "GatherOp", "sub": "SubOp", "add_6": "AddOp", "add_7": "AddOp", "div": "DivOp", "sub_1": "SubOp", "cat_17": "CatOp", "_tensor_constant115": "const_noop", "mul": "MulOp", "sigmoid": "SigmoidOp", "cat_18": "CatOp", "output": "output_noop"}
\ No newline at end of file
+{"arg0_1": "input_noop", "_param_constant0": "const_noop", "conv2d": "Conv2DOp", "empty": "unknown", "_param_constant1": "const_noop", "_param_constant2": "const_noop", "_tensor_constant0": "const_noop", "_tensor_constant1": "const_noop", "_native_batch_norm_legit_no_training": "BatchNormOp", "getitem": "GatherOp", "getitem_1": "GatherOp", "getitem_2": "GatherOp", "silu_": "SiluOp", "_param_constant3": "const_noop", "conv2d_1": "Conv2DOp", "empty_1": "unknown", "_param_constant4": "const_noop", "_param_constant5": "const_noop", "_tensor_constant2": "const_noop", "_tensor_constant3": "const_noop", "_native_batch_norm_legit_no_training_1": "BatchNormOp", "getitem_3": "GatherOp", "getitem_4": "GatherOp", "getitem_5": "GatherOp", "silu__1": "SiluOp", "_param_constant6": "const_noop", "conv2d_2": "Conv2DOp", "empty_2": "unknown", "_param_constant7": "const_noop", "_param_constant8": "const_noop", "_tensor_constant4": "const_noop", "_tensor_constant5": "const_noop", "_native_batch_norm_legit_no_training_2": "BatchNormOp", "getitem_6": "GatherOp", "getitem_7": "GatherOp", "getitem_8": "GatherOp", "silu__2": "SiluOp", "chunk": "SplitOp", "getitem_9": "GatherOp", "getitem_10": "GatherOp", "_param_constant9": "const_noop", "conv2d_3": "Conv2DOp", "empty_3": "unknown", "_param_constant10": "const_noop", "_param_constant11": "const_noop", "_tensor_constant6": "const_noop", "_tensor_constant7": "const_noop", "_native_batch_norm_legit_no_training_3": "BatchNormOp", "getitem_11": "GatherOp", "getitem_12": "GatherOp", "getitem_13": "GatherOp", "silu__3": "SiluOp", "_param_constant12": "const_noop", "conv2d_4": "Conv2DOp", "empty_4": "unknown", "_param_constant13": "const_noop", "_param_constant14": "const_noop", "_tensor_constant8": "const_noop", "_tensor_constant9": "const_noop", "_native_batch_norm_legit_no_training_4": "BatchNormOp", "getitem_14": "GatherOp", "getitem_15": "GatherOp", "getitem_16": "GatherOp", "silu__4": "SiluOp", "add": "AddOp", "cat": "CatOp", "_param_constant15": "const_noop", "conv2d_5": "Conv2DOp", "empty_5": "unknown", "_param_constant16": "const_noop", "_param_constant17": "const_noop", "_tensor_constant10": "const_noop", "_tensor_constant11": "const_noop", "_native_batch_norm_legit_no_training_5": "BatchNormOp", "getitem_17": "GatherOp", "getitem_18": "GatherOp", "getitem_19": "GatherOp", "silu__5": "SiluOp", "_param_constant18": "const_noop", "conv2d_6": "Conv2DOp", "empty_6": "unknown", "_param_constant19": "const_noop", "_param_constant20": "const_noop", "_tensor_constant12": "const_noop", "_tensor_constant13": "const_noop", "_native_batch_norm_legit_no_training_6": "BatchNormOp", "getitem_20": "GatherOp", "getitem_21": "GatherOp", "getitem_22": "GatherOp", "silu__6": "SiluOp", "_param_constant21": "const_noop", "conv2d_7": "Conv2DOp", "empty_7": "unknown", "_param_constant22": "const_noop", "_param_constant23": "const_noop", "_tensor_constant14": "const_noop", "_tensor_constant15": "const_noop", "_native_batch_norm_legit_no_training_7": "BatchNormOp", "getitem_23": "GatherOp", "getitem_24": "GatherOp", "getitem_25": "GatherOp", "silu__7": "SiluOp", "chunk_1": "SplitOp", "getitem_26": "GatherOp", "getitem_27": "GatherOp", "_param_constant24": "const_noop", "conv2d_8": "Conv2DOp", "empty_8": "unknown", "_param_constant25": "const_noop", "_param_constant26": "const_noop", "_tensor_constant16": "const_noop", "_tensor_constant17": "const_noop", "_native_batch_norm_legit_no_training_8": "BatchNormOp", "getitem_28": "GatherOp", "getitem_29": "GatherOp", "getitem_30": "GatherOp", "silu__8": "SiluOp", "_param_constant27": "const_noop", "conv2d_9": "Conv2DOp", "empty_9": "unknown", "_param_constant28": "const_noop", "_param_constant29": "const_noop", "_tensor_constant18": "const_noop", "_tensor_constant19": "const_noop", "_native_batch_norm_legit_no_training_9": "BatchNormOp", "getitem_31": "GatherOp", "getitem_32": "GatherOp", "getitem_33": "GatherOp", "silu__9": "SiluOp", "add_1": "AddOp", "_param_constant30": "const_noop", "conv2d_10": "Conv2DOp", "empty_10": "unknown", "_param_constant31": "const_noop", "_param_constant32": "const_noop", "_tensor_constant20": "const_noop", "_tensor_constant21": "const_noop", "_native_batch_norm_legit_no_training_10": "BatchNormOp", "getitem_34": "GatherOp", "getitem_35": "GatherOp", "getitem_36": "GatherOp", "silu__10": "SiluOp", "_param_constant33": "const_noop", "conv2d_11": "Conv2DOp", "empty_11": "unknown", "_param_constant34": "const_noop", "_param_constant35": "const_noop", "_tensor_constant22": "const_noop", "_tensor_constant23": "const_noop", "_native_batch_norm_legit_no_training_11": "BatchNormOp", "getitem_37": "GatherOp", "getitem_38": "GatherOp", "getitem_39": "GatherOp", "silu__11": "SiluOp", "add_2": "AddOp", "cat_1": "CatOp", "_param_constant36": "const_noop", "conv2d_12": "Conv2DOp", "empty_12": "unknown", "_param_constant37": "const_noop", "_param_constant38": "const_noop", "_tensor_constant24": "const_noop", "_tensor_constant25": "const_noop", "_native_batch_norm_legit_no_training_12": "BatchNormOp", "getitem_40": "GatherOp", "getitem_41": "GatherOp", "getitem_42": "GatherOp", "silu__12": "SiluOp", "_param_constant39": "const_noop", "conv2d_13": "Conv2DOp", "empty_13": "unknown", "_param_constant40": "const_noop", "_param_constant41": "const_noop", "_tensor_constant26": "const_noop", "_tensor_constant27": "const_noop", "_native_batch_norm_legit_no_training_13": "BatchNormOp", "getitem_43": "GatherOp", "getitem_44": "GatherOp", "getitem_45": "GatherOp", "silu__13": "SiluOp", "_param_constant42": "const_noop", "conv2d_14": "Conv2DOp", "empty_14": "unknown", "_param_constant43": "const_noop", "_param_constant44": "const_noop", "_tensor_constant28": "const_noop", "_tensor_constant29": "const_noop", "_native_batch_norm_legit_no_training_14": "BatchNormOp", "getitem_46": "GatherOp", "getitem_47": "GatherOp", "getitem_48": "GatherOp", "silu__14": "SiluOp", "chunk_2": "SplitOp", "getitem_49": "GatherOp", "getitem_50": "GatherOp", "_param_constant45": "const_noop", "conv2d_15": "Conv2DOp", "empty_15": "unknown", "_param_constant46": "const_noop", "_param_constant47": "const_noop", "_tensor_constant30": "const_noop", "_tensor_constant31": "const_noop", "_native_batch_norm_legit_no_training_15": "BatchNormOp", "getitem_51": "GatherOp", "getitem_52": "GatherOp", "getitem_53": "GatherOp", "silu__15": "SiluOp", "_param_constant48": "const_noop", "conv2d_16": "Conv2DOp", "empty_16": "unknown", "_param_constant49": "const_noop", "_param_constant50": "const_noop", "_tensor_constant32": "const_noop", "_tensor_constant33": "const_noop", "_native_batch_norm_legit_no_training_16": "BatchNormOp", "getitem_54": "GatherOp", "getitem_55": "GatherOp", "getitem_56": "GatherOp", "silu__16": "SiluOp", "add_3": "AddOp", "_param_constant51": "const_noop", "conv2d_17": "Conv2DOp", "empty_17": "unknown", "_param_constant52": "const_noop", "_param_constant53": "const_noop", "_tensor_constant34": "const_noop", "_tensor_constant35": "const_noop", "_native_batch_norm_legit_no_training_17": "BatchNormOp", "getitem_57": "GatherOp", "getitem_58": "GatherOp", "getitem_59": "GatherOp", "silu__17": "SiluOp", "_param_constant54": "const_noop", "conv2d_18": "Conv2DOp", "empty_18": "unknown", "_param_constant55": "const_noop", "_param_constant56": "const_noop", "_tensor_constant36": "const_noop", "_tensor_constant37": "const_noop", "_native_batch_norm_legit_no_training_18": "BatchNormOp", "getitem_60": "GatherOp", "getitem_61": "GatherOp", "getitem_62": "GatherOp", "silu__18": "SiluOp", "add_4": "AddOp", "cat_2": "CatOp", "_param_constant57": "const_noop", "conv2d_19": "Conv2DOp", "empty_19": "unknown", "_param_constant58": "const_noop", "_param_constant59": "const_noop", "_tensor_constant38": "const_noop", "_tensor_constant39": "const_noop", "_native_batch_norm_legit_no_training_19": "BatchNormOp", "getitem_63": "GatherOp", "getitem_64": "GatherOp", "getitem_65": "GatherOp", "silu__19": "SiluOp", "_param_constant60": "const_noop", "conv2d_20": "Conv2DOp", "empty_20": "unknown", "_param_constant61": "const_noop", "_param_constant62": "const_noop", "_tensor_constant40": "const_noop", "_tensor_constant41": "const_noop", "_native_batch_norm_legit_no_training_20": "BatchNormOp", "getitem_66": "GatherOp", "getitem_67": "GatherOp", "getitem_68": "GatherOp", "silu__20": "SiluOp", "_param_constant63": "const_noop", "conv2d_21": "Conv2DOp", "empty_21": "unknown", "_param_constant64": "const_noop", "_param_constant65": "const_noop", "_tensor_constant42": "const_noop", "_tensor_constant43": "const_noop", "_native_batch_norm_legit_no_training_21": "BatchNormOp", "getitem_69": "GatherOp", "getitem_70": "GatherOp", "getitem_71": "GatherOp", "silu__21": "SiluOp", "chunk_3": "SplitOp", "getitem_72": "GatherOp", "getitem_73": "GatherOp", "_param_constant66": "const_noop", "conv2d_22": "Conv2DOp", "empty_22": "unknown", "_param_constant67": "const_noop", "_param_constant68": "const_noop", "_tensor_constant44": "const_noop", "_tensor_constant45": "const_noop", "_native_batch_norm_legit_no_training_22": "BatchNormOp", "getitem_74": "GatherOp", "getitem_75": "GatherOp", "getitem_76": "GatherOp", "silu__22": "SiluOp", "_param_constant69": "const_noop", "conv2d_23": "Conv2DOp", "empty_23": "unknown", "_param_constant70": "const_noop", "_param_constant71": "const_noop", "_tensor_constant46": "const_noop", "_tensor_constant47": "const_noop", "_native_batch_norm_legit_no_training_23": "BatchNormOp", "getitem_77": "GatherOp", "getitem_78": "GatherOp", "getitem_79": "GatherOp", "silu__23": "SiluOp", "add_5": "AddOp", "cat_3": "CatOp", "_param_constant72": "const_noop", "conv2d_24": "Conv2DOp", "empty_24": "unknown", "_param_constant73": "const_noop", "_param_constant74": "const_noop", "_tensor_constant48": "const_noop", "_tensor_constant49": "const_noop", "_native_batch_norm_legit_no_training_24": "BatchNormOp", "getitem_80": "GatherOp", "getitem_81": "GatherOp", "getitem_82": "GatherOp", "silu__24": "SiluOp", "_param_constant75": "const_noop", "conv2d_25": "Conv2DOp", "empty_25": "unknown", "_param_constant76": "const_noop", "_param_constant77": "const_noop", "_tensor_constant50": "const_noop", "_tensor_constant51": "const_noop", "_native_batch_norm_legit_no_training_25": "BatchNormOp", "getitem_83": "GatherOp", "getitem_84": "GatherOp", "getitem_85": "GatherOp", "silu__25": "SiluOp", "max_pool2d": "MaxPool2DOp", "max_pool2d_1": "MaxPool2DOp", "max_pool2d_2": "MaxPool2DOp", "cat_4": "CatOp", "_param_constant78": "const_noop", "conv2d_26": "Conv2DOp", "empty_26": "unknown", "_param_constant79": "const_noop", "_param_constant80": "const_noop", "_tensor_constant52": "const_noop", "_tensor_constant53": "const_noop", "_native_batch_norm_legit_no_training_26": "BatchNormOp", "getitem_86": "GatherOp", "getitem_87": "GatherOp", "getitem_88": "GatherOp", "silu__26": "SiluOp", "upsample_nearest2d": "InterpolateOp", "cat_5": "CatOp", "_param_constant81": "const_noop", "conv2d_27": "Conv2DOp", "empty_27": "unknown", "_param_constant82": "const_noop", "_param_constant83": "const_noop", "_tensor_constant54": "const_noop", "_tensor_constant55": "const_noop", "_native_batch_norm_legit_no_training_27": "BatchNormOp", "getitem_89": "GatherOp", "getitem_90": "GatherOp", "getitem_91": "GatherOp", "silu__27": "SiluOp", "chunk_4": "SplitOp", "getitem_92": "GatherOp", "getitem_93": "GatherOp", "_param_constant84": "const_noop", "conv2d_28": "Conv2DOp", "empty_28": "unknown", "_param_constant85": "const_noop", "_param_constant86": "const_noop", "_tensor_constant56": "const_noop", "_tensor_constant57": "const_noop", "_native_batch_norm_legit_no_training_28": "BatchNormOp", "getitem_94": "GatherOp", "getitem_95": "GatherOp", "getitem_96": "GatherOp", "silu__28": "SiluOp", "_param_constant87": "const_noop", "conv2d_29": "Conv2DOp", "empty_29": "unknown", "_param_constant88": "const_noop", "_param_constant89": "const_noop", "_tensor_constant58": "const_noop", "_tensor_constant59": "const_noop", "_native_batch_norm_legit_no_training_29": "BatchNormOp", "getitem_97": "GatherOp", "getitem_98": "GatherOp", "getitem_99": "GatherOp", "silu__29": "SiluOp", "cat_6": "CatOp", "_param_constant90": "const_noop", "conv2d_30": "Conv2DOp", "empty_30": "unknown", "_param_constant91": "const_noop", "_param_constant92": "const_noop", "_tensor_constant60": "const_noop", "_tensor_constant61": "const_noop", "_native_batch_norm_legit_no_training_30": "BatchNormOp", "getitem_100": "GatherOp", "getitem_101": "GatherOp", "getitem_102": "GatherOp", "silu__30": "SiluOp", "upsample_nearest2d_1": "InterpolateOp", "cat_7": "CatOp", "_param_constant93": "const_noop", "conv2d_31": "Conv2DOp", "empty_31": "unknown", "_param_constant94": "const_noop", "_param_constant95": "const_noop", "_tensor_constant62": "const_noop", "_tensor_constant63": "const_noop", "_native_batch_norm_legit_no_training_31": "BatchNormOp", "getitem_103": "GatherOp", "getitem_104": "GatherOp", "getitem_105": "GatherOp", "silu__31": "SiluOp", "chunk_5": "SplitOp", "getitem_106": "GatherOp", "getitem_107": "GatherOp", "_param_constant96": "const_noop", "conv2d_32": "Conv2DOp", "empty_32": "unknown", "_param_constant97": "const_noop", "_param_constant98": "const_noop", "_tensor_constant64": "const_noop", "_tensor_constant65": "const_noop", "_native_batch_norm_legit_no_training_32": "BatchNormOp", "getitem_108": "GatherOp", "getitem_109": "GatherOp", "getitem_110": "GatherOp", "silu__32": "SiluOp", "_param_constant99": "const_noop", "conv2d_33": "Conv2DOp", "empty_33": "unknown", "_param_constant100": "const_noop", "_param_constant101": "const_noop", "_tensor_constant66": "const_noop", "_tensor_constant67": "const_noop", "_native_batch_norm_legit_no_training_33": "BatchNormOp", "getitem_111": "GatherOp", "getitem_112": "GatherOp", "getitem_113": "GatherOp", "silu__33": "SiluOp", "cat_8": "CatOp", "_param_constant102": "const_noop", "conv2d_34": "Conv2DOp", "empty_34": "unknown", "_param_constant103": "const_noop", "_param_constant104": "const_noop", "_tensor_constant68": "const_noop", "_tensor_constant69": "const_noop", "_native_batch_norm_legit_no_training_34": "BatchNormOp", "getitem_114": "GatherOp", "getitem_115": "GatherOp", "getitem_116": "GatherOp", "silu__34": "SiluOp", "_param_constant105": "const_noop", "conv2d_35": "Conv2DOp", "empty_35": "unknown", "_param_constant106": "const_noop", "_param_constant107": "const_noop", "_tensor_constant70": "const_noop", "_tensor_constant71": "const_noop", "_native_batch_norm_legit_no_training_35": "BatchNormOp", "getitem_117": "GatherOp", "getitem_118": "GatherOp", "getitem_119": "GatherOp", "silu__35": "SiluOp", "cat_9": "CatOp", "_param_constant108": "const_noop", "conv2d_36": "Conv2DOp", "empty_36": "unknown", "_param_constant109": "const_noop", "_param_constant110": "const_noop", "_tensor_constant72": "const_noop", "_tensor_constant73": "const_noop", "_native_batch_norm_legit_no_training_36": "BatchNormOp", "getitem_120": "GatherOp", "getitem_121": "GatherOp", "getitem_122": "GatherOp", "silu__36": "SiluOp", "chunk_6": "SplitOp", "getitem_123": "GatherOp", "getitem_124": "GatherOp", "_param_constant111": "const_noop", "conv2d_37": "Conv2DOp", "empty_37": "unknown", "_param_constant112": "const_noop", "_param_constant113": "const_noop", "_tensor_constant74": "const_noop", "_tensor_constant75": "const_noop", "_native_batch_norm_legit_no_training_37": "BatchNormOp", "getitem_125": "GatherOp", "getitem_126": "GatherOp", "getitem_127": "GatherOp", "silu__37": "SiluOp", "_param_constant114": "const_noop", "conv2d_38": "Conv2DOp", "empty_38": "unknown", "_param_constant115": "const_noop", "_param_constant116": "const_noop", "_tensor_constant76": "const_noop", "_tensor_constant77": "const_noop", "_native_batch_norm_legit_no_training_38": "BatchNormOp", "getitem_128": "GatherOp", "getitem_129": "GatherOp", "getitem_130": "GatherOp", "silu__38": "SiluOp", "cat_10": "CatOp", "_param_constant117": "const_noop", "conv2d_39": "Conv2DOp", "empty_39": "unknown", "_param_constant118": "const_noop", "_param_constant119": "const_noop", "_tensor_constant78": "const_noop", "_tensor_constant79": "const_noop", "_native_batch_norm_legit_no_training_39": "BatchNormOp", "getitem_131": "GatherOp", "getitem_132": "GatherOp", "getitem_133": "GatherOp", "silu__39": "SiluOp", "_param_constant120": "const_noop", "conv2d_40": "Conv2DOp", "empty_40": "unknown", "_param_constant121": "const_noop", "_param_constant122": "const_noop", "_tensor_constant80": "const_noop", "_tensor_constant81": "const_noop", "_native_batch_norm_legit_no_training_40": "BatchNormOp", "getitem_134": "GatherOp", "getitem_135": "GatherOp", "getitem_136": "GatherOp", "silu__40": "SiluOp", "cat_11": "CatOp", "_param_constant123": "const_noop", "conv2d_41": "Conv2DOp", "empty_41": "unknown", "_param_constant124": "const_noop", "_param_constant125": "const_noop", "_tensor_constant82": "const_noop", "_tensor_constant83": "const_noop", "_native_batch_norm_legit_no_training_41": "BatchNormOp", "getitem_137": "GatherOp", "getitem_138": "GatherOp", "getitem_139": "GatherOp", "silu__41": "SiluOp", "chunk_7": "SplitOp", "getitem_140": "GatherOp", "getitem_141": "GatherOp", "_param_constant126": "const_noop", "conv2d_42": "Conv2DOp", "empty_42": "unknown", "_param_constant127": "const_noop", "_param_constant128": "const_noop", "_tensor_constant84": "const_noop", "_tensor_constant85": "const_noop", "_native_batch_norm_legit_no_training_42": "BatchNormOp", "getitem_142": "GatherOp", "getitem_143": "GatherOp", "getitem_144": "GatherOp", "silu__42": "SiluOp", "_param_constant129": "const_noop", "conv2d_43": "Conv2DOp", "empty_43": "unknown", "_param_constant130": "const_noop", "_param_constant131": "const_noop", "_tensor_constant86": "const_noop", "_tensor_constant87": "const_noop", "_native_batch_norm_legit_no_training_43": "BatchNormOp", "getitem_145": "GatherOp", "getitem_146": "GatherOp", "getitem_147": "GatherOp", "silu__43": "SiluOp", "cat_12": "CatOp", "_param_constant132": "const_noop", "conv2d_44": "Conv2DOp", "empty_44": "unknown", "_param_constant133": "const_noop", "_param_constant134": "const_noop", "_tensor_constant88": "const_noop", "_tensor_constant89": "const_noop", "_native_batch_norm_legit_no_training_44": "BatchNormOp", "getitem_148": "GatherOp", "getitem_149": "GatherOp", "getitem_150": "GatherOp", "silu__44": "SiluOp", "_param_constant135": "const_noop", "conv2d_45": "Conv2DOp", "empty_45": "unknown", "_param_constant136": "const_noop", "_param_constant137": "const_noop", "_tensor_constant90": "const_noop", "_tensor_constant91": "const_noop", "_native_batch_norm_legit_no_training_45": "BatchNormOp", "getitem_151": "GatherOp", "getitem_152": "GatherOp", "getitem_153": "GatherOp", "silu__45": "SiluOp", "_param_constant138": "const_noop", "conv2d_46": "Conv2DOp", "empty_46": "unknown", "_param_constant139": "const_noop", "_param_constant140": "const_noop", "_tensor_constant92": "const_noop", "_tensor_constant93": "const_noop", "_native_batch_norm_legit_no_training_46": "BatchNormOp", "getitem_154": "GatherOp", "getitem_155": "GatherOp", "getitem_156": "GatherOp", "silu__46": "SiluOp", "_param_constant141": "const_noop", "_param_constant142": "const_noop", "conv2d_47": "Conv2DOp", "_param_constant143": "const_noop", "conv2d_48": "Conv2DOp", "empty_47": "unknown", "_param_constant144": "const_noop", "_param_constant145": "const_noop", "_tensor_constant94": "const_noop", "_tensor_constant95": "const_noop", "_native_batch_norm_legit_no_training_47": "BatchNormOp", "getitem_157": "GatherOp", "getitem_158": "GatherOp", "getitem_159": "GatherOp", "silu__47": "SiluOp", "_param_constant146": "const_noop", "conv2d_49": "Conv2DOp", "empty_48": "unknown", "_param_constant147": "const_noop", "_param_constant148": "const_noop", "_tensor_constant96": "const_noop", "_tensor_constant97": "const_noop", "_native_batch_norm_legit_no_training_48": "BatchNormOp", "getitem_160": "GatherOp", "getitem_161": "GatherOp", "getitem_162": "GatherOp", "silu__48": "SiluOp", "_param_constant149": "const_noop", "_param_constant150": "const_noop", "conv2d_50": "Conv2DOp", "cat_13": "CatOp", "_param_constant151": "const_noop", "conv2d_51": "Conv2DOp", "empty_49": "unknown", "_param_constant152": "const_noop", "_param_constant153": "const_noop", "_tensor_constant98": "const_noop", "_tensor_constant99": "const_noop", "_native_batch_norm_legit_no_training_49": "BatchNormOp", "getitem_163": "GatherOp", "getitem_164": "GatherOp", "getitem_165": "GatherOp", "silu__49": "SiluOp", "_param_constant154": "const_noop", "conv2d_52": "Conv2DOp", "empty_50": "unknown", "_param_constant155": "const_noop", "_param_constant156": "const_noop", "_tensor_constant100": "const_noop", "_tensor_constant101": "const_noop", "_native_batch_norm_legit_no_training_50": "BatchNormOp", "getitem_166": "GatherOp", "getitem_167": "GatherOp", "getitem_168": "GatherOp", "silu__50": "SiluOp", "_param_constant157": "const_noop", "_param_constant158": "const_noop", "conv2d_53": "Conv2DOp", "_param_constant159": "const_noop", "conv2d_54": "Conv2DOp", "empty_51": "unknown", "_param_constant160": "const_noop", "_param_constant161": "const_noop", "_tensor_constant102": "const_noop", "_tensor_constant103": "const_noop", "_native_batch_norm_legit_no_training_51": "BatchNormOp", "getitem_169": "GatherOp", "getitem_170": "GatherOp", "getitem_171": "GatherOp", "silu__51": "SiluOp", "_param_constant162": "const_noop", "conv2d_55": "Conv2DOp", "empty_52": "unknown", "_param_constant163": "const_noop", "_param_constant164": "const_noop", "_tensor_constant104": "const_noop", "_tensor_constant105": "const_noop", "_native_batch_norm_legit_no_training_52": "BatchNormOp", "getitem_172": "GatherOp", "getitem_173": "GatherOp", "getitem_174": "GatherOp", "silu__52": "SiluOp", "_param_constant165": "const_noop", "_param_constant166": "const_noop", "conv2d_56": "Conv2DOp", "cat_14": "CatOp", "_param_constant167": "const_noop", "conv2d_57": "Conv2DOp", "empty_53": "unknown", "_param_constant168": "const_noop", "_param_constant169": "const_noop", "_tensor_constant106": "const_noop", "_tensor_constant107": "const_noop", "_native_batch_norm_legit_no_training_53": "BatchNormOp", "getitem_175": "GatherOp", "getitem_176": "GatherOp", "getitem_177": "GatherOp", "silu__53": "SiluOp", "_param_constant170": "const_noop", "conv2d_58": "Conv2DOp", "empty_54": "unknown", "_param_constant171": "const_noop", "_param_constant172": "const_noop", "_tensor_constant108": "const_noop", "_tensor_constant109": "const_noop", "_native_batch_norm_legit_no_training_54": "BatchNormOp", "getitem_178": "GatherOp", "getitem_179": "GatherOp", "getitem_180": "GatherOp", "silu__54": "SiluOp", "_param_constant173": "const_noop", "_param_constant174": "const_noop", "conv2d_59": "Conv2DOp", "_param_constant175": "const_noop", "conv2d_60": "Conv2DOp", "empty_55": "unknown", "_param_constant176": "const_noop", "_param_constant177": "const_noop", "_tensor_constant110": "const_noop", "_tensor_constant111": "const_noop", "_native_batch_norm_legit_no_training_55": "BatchNormOp", "getitem_181": "GatherOp", "getitem_182": "GatherOp", "getitem_183": "GatherOp", "silu__55": "SiluOp", "_param_constant178": "const_noop", "conv2d_61": "Conv2DOp", "empty_56": "unknown", "_param_constant179": "const_noop", "_param_constant180": "const_noop", "_tensor_constant112": "const_noop", "_tensor_constant113": "const_noop", "_native_batch_norm_legit_no_training_56": "BatchNormOp", "getitem_184": "GatherOp", "getitem_185": "GatherOp", "getitem_186": "GatherOp", "silu__56": "SiluOp", "_param_constant181": "const_noop", "_param_constant182": "const_noop", "conv2d_62": "Conv2DOp", "cat_15": "CatOp", "view": "ReshapeOp", "view_1": "ReshapeOp", "view_2": "ReshapeOp", "cat_16": "CatOp", "split_with_sizes": "SplitOp", "getitem_187": "GatherOp", "getitem_188": "GatherOp", "view_3": "ReshapeOp", "transpose": "TransposeOp", "softmax": "SoftmaxOp", "_param_constant183": "const_noop", "conv2d_63": "Conv2DOp", "view_4": "ReshapeOp", "_tensor_constant114": "const_noop", "unsqueeze": "ReshapeOp", "chunk_8": "SplitOp", "getitem_189": "GatherOp", "getitem_190": "GatherOp", "sub": "SubOp", "add_6": "AddOp", "add_7": "AddOp", "div": "DivOp", "sub_1": "SubOp", "cat_17": "CatOp", "_tensor_constant115": "const_noop", "mul": "MulOp", "sigmoid": "SigmoidOp", "cat_18": "CatOp", "output": "output_noop"}
\ No newline at end of file
diff --git a/tests/torch/data/reference_graphs/fx/yolov8n.dot b/tests/torch/data/reference_graphs/fx/yolov8n.dot
index ab54e352f8f..a964a41be9d 100644
--- a/tests/torch/data/reference_graphs/fx/yolov8n.dot
+++ b/tests/torch/data/reference_graphs/fx/yolov8n.dot
@@ -11,7 +11,7 @@ strict digraph  {
 "9 getitem" [id=9, type=__getitem__];
 "10 getitem_1" [id=10, type=__getitem__];
 "11 getitem_2" [id=11, type=__getitem__];
-"12 silu" [id=12, type=silu];
+"12 silu_" [id=12, type=silu_];
 "13 _param_constant3" [id=13, type=get_attr];
 "14 conv2d_1" [id=14, type=conv2d];
 "15 empty_1" [id=15, type=empty];
@@ -23,7 +23,7 @@ strict digraph  {
 "21 getitem_3" [id=21, type=__getitem__];
 "22 getitem_4" [id=22, type=__getitem__];
 "23 getitem_5" [id=23, type=__getitem__];
-"24 silu_1" [id=24, type=silu];
+"24 silu__1" [id=24, type=silu_];
 "25 _param_constant6" [id=25, type=get_attr];
 "26 conv2d_2" [id=26, type=conv2d];
 "27 empty_2" [id=27, type=empty];
@@ -35,7 +35,7 @@ strict digraph  {
 "33 getitem_6" [id=33, type=__getitem__];
 "34 getitem_7" [id=34, type=__getitem__];
 "35 getitem_8" [id=35, type=__getitem__];
-"36 silu_2" [id=36, type=silu];
+"36 silu__2" [id=36, type=silu_];
 "37 chunk" [id=37, type=chunk];
 "38 getitem_9" [id=38, type=__getitem__];
 "39 getitem_10" [id=39, type=__getitem__];
@@ -50,7 +50,7 @@ strict digraph  {
 "48 getitem_11" [id=48, type=__getitem__];
 "49 getitem_12" [id=49, type=__getitem__];
 "50 getitem_13" [id=50, type=__getitem__];
-"51 silu_3" [id=51, type=silu];
+"51 silu__3" [id=51, type=silu_];
 "52 _param_constant12" [id=52, type=get_attr];
 "53 conv2d_4" [id=53, type=conv2d];
 "54 empty_4" [id=54, type=empty];
@@ -62,7 +62,7 @@ strict digraph  {
 "60 getitem_14" [id=60, type=__getitem__];
 "61 getitem_15" [id=61, type=__getitem__];
 "62 getitem_16" [id=62, type=__getitem__];
-"63 silu_4" [id=63, type=silu];
+"63 silu__4" [id=63, type=silu_];
 "64 add" [id=64, type=add];
 "65 cat" [id=65, type=cat];
 "66 _param_constant15" [id=66, type=get_attr];
@@ -76,7 +76,7 @@ strict digraph  {
 "74 getitem_17" [id=74, type=__getitem__];
 "75 getitem_18" [id=75, type=__getitem__];
 "76 getitem_19" [id=76, type=__getitem__];
-"77 silu_5" [id=77, type=silu];
+"77 silu__5" [id=77, type=silu_];
 "78 _param_constant18" [id=78, type=get_attr];
 "79 conv2d_6" [id=79, type=conv2d];
 "80 empty_6" [id=80, type=empty];
@@ -88,7 +88,7 @@ strict digraph  {
 "86 getitem_20" [id=86, type=__getitem__];
 "87 getitem_21" [id=87, type=__getitem__];
 "88 getitem_22" [id=88, type=__getitem__];
-"89 silu_6" [id=89, type=silu];
+"89 silu__6" [id=89, type=silu_];
 "90 _param_constant21" [id=90, type=get_attr];
 "91 conv2d_7" [id=91, type=conv2d];
 "92 empty_7" [id=92, type=empty];
@@ -100,7 +100,7 @@ strict digraph  {
 "98 getitem_23" [id=98, type=__getitem__];
 "99 getitem_24" [id=99, type=__getitem__];
 "100 getitem_25" [id=100, type=__getitem__];
-"101 silu_7" [id=101, type=silu];
+"101 silu__7" [id=101, type=silu_];
 "102 chunk_1" [id=102, type=chunk];
 "103 getitem_26" [id=103, type=__getitem__];
 "104 getitem_27" [id=104, type=__getitem__];
@@ -115,7 +115,7 @@ strict digraph  {
 "113 getitem_28" [id=113, type=__getitem__];
 "114 getitem_29" [id=114, type=__getitem__];
 "115 getitem_30" [id=115, type=__getitem__];
-"116 silu_8" [id=116, type=silu];
+"116 silu__8" [id=116, type=silu_];
 "117 _param_constant27" [id=117, type=get_attr];
 "118 conv2d_9" [id=118, type=conv2d];
 "119 empty_9" [id=119, type=empty];
@@ -127,7 +127,7 @@ strict digraph  {
 "125 getitem_31" [id=125, type=__getitem__];
 "126 getitem_32" [id=126, type=__getitem__];
 "127 getitem_33" [id=127, type=__getitem__];
-"128 silu_9" [id=128, type=silu];
+"128 silu__9" [id=128, type=silu_];
 "129 add_1" [id=129, type=add];
 "130 _param_constant30" [id=130, type=get_attr];
 "131 conv2d_10" [id=131, type=conv2d];
@@ -140,7 +140,7 @@ strict digraph  {
 "138 getitem_34" [id=138, type=__getitem__];
 "139 getitem_35" [id=139, type=__getitem__];
 "140 getitem_36" [id=140, type=__getitem__];
-"141 silu_10" [id=141, type=silu];
+"141 silu__10" [id=141, type=silu_];
 "142 _param_constant33" [id=142, type=get_attr];
 "143 conv2d_11" [id=143, type=conv2d];
 "144 empty_11" [id=144, type=empty];
@@ -152,7 +152,7 @@ strict digraph  {
 "150 getitem_37" [id=150, type=__getitem__];
 "151 getitem_38" [id=151, type=__getitem__];
 "152 getitem_39" [id=152, type=__getitem__];
-"153 silu_11" [id=153, type=silu];
+"153 silu__11" [id=153, type=silu_];
 "154 add_2" [id=154, type=add];
 "155 cat_1" [id=155, type=cat];
 "156 _param_constant36" [id=156, type=get_attr];
@@ -166,7 +166,7 @@ strict digraph  {
 "164 getitem_40" [id=164, type=__getitem__];
 "165 getitem_41" [id=165, type=__getitem__];
 "166 getitem_42" [id=166, type=__getitem__];
-"167 silu_12" [id=167, type=silu];
+"167 silu__12" [id=167, type=silu_];
 "168 _param_constant39" [id=168, type=get_attr];
 "169 conv2d_13" [id=169, type=conv2d];
 "170 empty_13" [id=170, type=empty];
@@ -178,7 +178,7 @@ strict digraph  {
 "176 getitem_43" [id=176, type=__getitem__];
 "177 getitem_44" [id=177, type=__getitem__];
 "178 getitem_45" [id=178, type=__getitem__];
-"179 silu_13" [id=179, type=silu];
+"179 silu__13" [id=179, type=silu_];
 "180 _param_constant42" [id=180, type=get_attr];
 "181 conv2d_14" [id=181, type=conv2d];
 "182 empty_14" [id=182, type=empty];
@@ -190,7 +190,7 @@ strict digraph  {
 "188 getitem_46" [id=188, type=__getitem__];
 "189 getitem_47" [id=189, type=__getitem__];
 "190 getitem_48" [id=190, type=__getitem__];
-"191 silu_14" [id=191, type=silu];
+"191 silu__14" [id=191, type=silu_];
 "192 chunk_2" [id=192, type=chunk];
 "193 getitem_49" [id=193, type=__getitem__];
 "194 getitem_50" [id=194, type=__getitem__];
@@ -205,7 +205,7 @@ strict digraph  {
 "203 getitem_51" [id=203, type=__getitem__];
 "204 getitem_52" [id=204, type=__getitem__];
 "205 getitem_53" [id=205, type=__getitem__];
-"206 silu_15" [id=206, type=silu];
+"206 silu__15" [id=206, type=silu_];
 "207 _param_constant48" [id=207, type=get_attr];
 "208 conv2d_16" [id=208, type=conv2d];
 "209 empty_16" [id=209, type=empty];
@@ -217,7 +217,7 @@ strict digraph  {
 "215 getitem_54" [id=215, type=__getitem__];
 "216 getitem_55" [id=216, type=__getitem__];
 "217 getitem_56" [id=217, type=__getitem__];
-"218 silu_16" [id=218, type=silu];
+"218 silu__16" [id=218, type=silu_];
 "219 add_3" [id=219, type=add];
 "220 _param_constant51" [id=220, type=get_attr];
 "221 conv2d_17" [id=221, type=conv2d];
@@ -230,7 +230,7 @@ strict digraph  {
 "228 getitem_57" [id=228, type=__getitem__];
 "229 getitem_58" [id=229, type=__getitem__];
 "230 getitem_59" [id=230, type=__getitem__];
-"231 silu_17" [id=231, type=silu];
+"231 silu__17" [id=231, type=silu_];
 "232 _param_constant54" [id=232, type=get_attr];
 "233 conv2d_18" [id=233, type=conv2d];
 "234 empty_18" [id=234, type=empty];
@@ -242,7 +242,7 @@ strict digraph  {
 "240 getitem_60" [id=240, type=__getitem__];
 "241 getitem_61" [id=241, type=__getitem__];
 "242 getitem_62" [id=242, type=__getitem__];
-"243 silu_18" [id=243, type=silu];
+"243 silu__18" [id=243, type=silu_];
 "244 add_4" [id=244, type=add];
 "245 cat_2" [id=245, type=cat];
 "246 _param_constant57" [id=246, type=get_attr];
@@ -256,7 +256,7 @@ strict digraph  {
 "254 getitem_63" [id=254, type=__getitem__];
 "255 getitem_64" [id=255, type=__getitem__];
 "256 getitem_65" [id=256, type=__getitem__];
-"257 silu_19" [id=257, type=silu];
+"257 silu__19" [id=257, type=silu_];
 "258 _param_constant60" [id=258, type=get_attr];
 "259 conv2d_20" [id=259, type=conv2d];
 "260 empty_20" [id=260, type=empty];
@@ -268,7 +268,7 @@ strict digraph  {
 "266 getitem_66" [id=266, type=__getitem__];
 "267 getitem_67" [id=267, type=__getitem__];
 "268 getitem_68" [id=268, type=__getitem__];
-"269 silu_20" [id=269, type=silu];
+"269 silu__20" [id=269, type=silu_];
 "270 _param_constant63" [id=270, type=get_attr];
 "271 conv2d_21" [id=271, type=conv2d];
 "272 empty_21" [id=272, type=empty];
@@ -280,7 +280,7 @@ strict digraph  {
 "278 getitem_69" [id=278, type=__getitem__];
 "279 getitem_70" [id=279, type=__getitem__];
 "280 getitem_71" [id=280, type=__getitem__];
-"281 silu_21" [id=281, type=silu];
+"281 silu__21" [id=281, type=silu_];
 "282 chunk_3" [id=282, type=chunk];
 "283 getitem_72" [id=283, type=__getitem__];
 "284 getitem_73" [id=284, type=__getitem__];
@@ -295,7 +295,7 @@ strict digraph  {
 "293 getitem_74" [id=293, type=__getitem__];
 "294 getitem_75" [id=294, type=__getitem__];
 "295 getitem_76" [id=295, type=__getitem__];
-"296 silu_22" [id=296, type=silu];
+"296 silu__22" [id=296, type=silu_];
 "297 _param_constant69" [id=297, type=get_attr];
 "298 conv2d_23" [id=298, type=conv2d];
 "299 empty_23" [id=299, type=empty];
@@ -307,7 +307,7 @@ strict digraph  {
 "305 getitem_77" [id=305, type=__getitem__];
 "306 getitem_78" [id=306, type=__getitem__];
 "307 getitem_79" [id=307, type=__getitem__];
-"308 silu_23" [id=308, type=silu];
+"308 silu__23" [id=308, type=silu_];
 "309 add_5" [id=309, type=add];
 "310 cat_3" [id=310, type=cat];
 "311 _param_constant72" [id=311, type=get_attr];
@@ -321,7 +321,7 @@ strict digraph  {
 "319 getitem_80" [id=319, type=__getitem__];
 "320 getitem_81" [id=320, type=__getitem__];
 "321 getitem_82" [id=321, type=__getitem__];
-"322 silu_24" [id=322, type=silu];
+"322 silu__24" [id=322, type=silu_];
 "323 _param_constant75" [id=323, type=get_attr];
 "324 conv2d_25" [id=324, type=conv2d];
 "325 empty_25" [id=325, type=empty];
@@ -333,7 +333,7 @@ strict digraph  {
 "331 getitem_83" [id=331, type=__getitem__];
 "332 getitem_84" [id=332, type=__getitem__];
 "333 getitem_85" [id=333, type=__getitem__];
-"334 silu_25" [id=334, type=silu];
+"334 silu__25" [id=334, type=silu_];
 "335 max_pool2d" [id=335, type=max_pool2d];
 "336 max_pool2d_1" [id=336, type=max_pool2d];
 "337 max_pool2d_2" [id=337, type=max_pool2d];
@@ -349,7 +349,7 @@ strict digraph  {
 "347 getitem_86" [id=347, type=__getitem__];
 "348 getitem_87" [id=348, type=__getitem__];
 "349 getitem_88" [id=349, type=__getitem__];
-"350 silu_26" [id=350, type=silu];
+"350 silu__26" [id=350, type=silu_];
 "351 upsample_nearest2d" [id=351, type=upsample_nearest2d];
 "352 cat_5" [id=352, type=cat];
 "353 _param_constant81" [id=353, type=get_attr];
@@ -363,7 +363,7 @@ strict digraph  {
 "361 getitem_89" [id=361, type=__getitem__];
 "362 getitem_90" [id=362, type=__getitem__];
 "363 getitem_91" [id=363, type=__getitem__];
-"364 silu_27" [id=364, type=silu];
+"364 silu__27" [id=364, type=silu_];
 "365 chunk_4" [id=365, type=chunk];
 "366 getitem_92" [id=366, type=__getitem__];
 "367 getitem_93" [id=367, type=__getitem__];
@@ -378,7 +378,7 @@ strict digraph  {
 "376 getitem_94" [id=376, type=__getitem__];
 "377 getitem_95" [id=377, type=__getitem__];
 "378 getitem_96" [id=378, type=__getitem__];
-"379 silu_28" [id=379, type=silu];
+"379 silu__28" [id=379, type=silu_];
 "380 _param_constant87" [id=380, type=get_attr];
 "381 conv2d_29" [id=381, type=conv2d];
 "382 empty_29" [id=382, type=empty];
@@ -390,7 +390,7 @@ strict digraph  {
 "388 getitem_97" [id=388, type=__getitem__];
 "389 getitem_98" [id=389, type=__getitem__];
 "390 getitem_99" [id=390, type=__getitem__];
-"391 silu_29" [id=391, type=silu];
+"391 silu__29" [id=391, type=silu_];
 "392 cat_6" [id=392, type=cat];
 "393 _param_constant90" [id=393, type=get_attr];
 "394 conv2d_30" [id=394, type=conv2d];
@@ -403,7 +403,7 @@ strict digraph  {
 "401 getitem_100" [id=401, type=__getitem__];
 "402 getitem_101" [id=402, type=__getitem__];
 "403 getitem_102" [id=403, type=__getitem__];
-"404 silu_30" [id=404, type=silu];
+"404 silu__30" [id=404, type=silu_];
 "405 upsample_nearest2d_1" [id=405, type=upsample_nearest2d];
 "406 cat_7" [id=406, type=cat];
 "407 _param_constant93" [id=407, type=get_attr];
@@ -417,7 +417,7 @@ strict digraph  {
 "415 getitem_103" [id=415, type=__getitem__];
 "416 getitem_104" [id=416, type=__getitem__];
 "417 getitem_105" [id=417, type=__getitem__];
-"418 silu_31" [id=418, type=silu];
+"418 silu__31" [id=418, type=silu_];
 "419 chunk_5" [id=419, type=chunk];
 "420 getitem_106" [id=420, type=__getitem__];
 "421 getitem_107" [id=421, type=__getitem__];
@@ -432,7 +432,7 @@ strict digraph  {
 "430 getitem_108" [id=430, type=__getitem__];
 "431 getitem_109" [id=431, type=__getitem__];
 "432 getitem_110" [id=432, type=__getitem__];
-"433 silu_32" [id=433, type=silu];
+"433 silu__32" [id=433, type=silu_];
 "434 _param_constant99" [id=434, type=get_attr];
 "435 conv2d_33" [id=435, type=conv2d];
 "436 empty_33" [id=436, type=empty];
@@ -444,7 +444,7 @@ strict digraph  {
 "442 getitem_111" [id=442, type=__getitem__];
 "443 getitem_112" [id=443, type=__getitem__];
 "444 getitem_113" [id=444, type=__getitem__];
-"445 silu_33" [id=445, type=silu];
+"445 silu__33" [id=445, type=silu_];
 "446 cat_8" [id=446, type=cat];
 "447 _param_constant102" [id=447, type=get_attr];
 "448 conv2d_34" [id=448, type=conv2d];
@@ -457,7 +457,7 @@ strict digraph  {
 "455 getitem_114" [id=455, type=__getitem__];
 "456 getitem_115" [id=456, type=__getitem__];
 "457 getitem_116" [id=457, type=__getitem__];
-"458 silu_34" [id=458, type=silu];
+"458 silu__34" [id=458, type=silu_];
 "459 _param_constant105" [id=459, type=get_attr];
 "460 conv2d_35" [id=460, type=conv2d];
 "461 empty_35" [id=461, type=empty];
@@ -469,7 +469,7 @@ strict digraph  {
 "467 getitem_117" [id=467, type=__getitem__];
 "468 getitem_118" [id=468, type=__getitem__];
 "469 getitem_119" [id=469, type=__getitem__];
-"470 silu_35" [id=470, type=silu];
+"470 silu__35" [id=470, type=silu_];
 "471 cat_9" [id=471, type=cat];
 "472 _param_constant108" [id=472, type=get_attr];
 "473 conv2d_36" [id=473, type=conv2d];
@@ -482,7 +482,7 @@ strict digraph  {
 "480 getitem_120" [id=480, type=__getitem__];
 "481 getitem_121" [id=481, type=__getitem__];
 "482 getitem_122" [id=482, type=__getitem__];
-"483 silu_36" [id=483, type=silu];
+"483 silu__36" [id=483, type=silu_];
 "484 chunk_6" [id=484, type=chunk];
 "485 getitem_123" [id=485, type=__getitem__];
 "486 getitem_124" [id=486, type=__getitem__];
@@ -497,7 +497,7 @@ strict digraph  {
 "495 getitem_125" [id=495, type=__getitem__];
 "496 getitem_126" [id=496, type=__getitem__];
 "497 getitem_127" [id=497, type=__getitem__];
-"498 silu_37" [id=498, type=silu];
+"498 silu__37" [id=498, type=silu_];
 "499 _param_constant114" [id=499, type=get_attr];
 "500 conv2d_38" [id=500, type=conv2d];
 "501 empty_38" [id=501, type=empty];
@@ -509,7 +509,7 @@ strict digraph  {
 "507 getitem_128" [id=507, type=__getitem__];
 "508 getitem_129" [id=508, type=__getitem__];
 "509 getitem_130" [id=509, type=__getitem__];
-"510 silu_38" [id=510, type=silu];
+"510 silu__38" [id=510, type=silu_];
 "511 cat_10" [id=511, type=cat];
 "512 _param_constant117" [id=512, type=get_attr];
 "513 conv2d_39" [id=513, type=conv2d];
@@ -522,7 +522,7 @@ strict digraph  {
 "520 getitem_131" [id=520, type=__getitem__];
 "521 getitem_132" [id=521, type=__getitem__];
 "522 getitem_133" [id=522, type=__getitem__];
-"523 silu_39" [id=523, type=silu];
+"523 silu__39" [id=523, type=silu_];
 "524 _param_constant120" [id=524, type=get_attr];
 "525 conv2d_40" [id=525, type=conv2d];
 "526 empty_40" [id=526, type=empty];
@@ -534,7 +534,7 @@ strict digraph  {
 "532 getitem_134" [id=532, type=__getitem__];
 "533 getitem_135" [id=533, type=__getitem__];
 "534 getitem_136" [id=534, type=__getitem__];
-"535 silu_40" [id=535, type=silu];
+"535 silu__40" [id=535, type=silu_];
 "536 cat_11" [id=536, type=cat];
 "537 _param_constant123" [id=537, type=get_attr];
 "538 conv2d_41" [id=538, type=conv2d];
@@ -547,7 +547,7 @@ strict digraph  {
 "545 getitem_137" [id=545, type=__getitem__];
 "546 getitem_138" [id=546, type=__getitem__];
 "547 getitem_139" [id=547, type=__getitem__];
-"548 silu_41" [id=548, type=silu];
+"548 silu__41" [id=548, type=silu_];
 "549 chunk_7" [id=549, type=chunk];
 "550 getitem_140" [id=550, type=__getitem__];
 "551 getitem_141" [id=551, type=__getitem__];
@@ -562,7 +562,7 @@ strict digraph  {
 "560 getitem_142" [id=560, type=__getitem__];
 "561 getitem_143" [id=561, type=__getitem__];
 "562 getitem_144" [id=562, type=__getitem__];
-"563 silu_42" [id=563, type=silu];
+"563 silu__42" [id=563, type=silu_];
 "564 _param_constant129" [id=564, type=get_attr];
 "565 conv2d_43" [id=565, type=conv2d];
 "566 empty_43" [id=566, type=empty];
@@ -574,7 +574,7 @@ strict digraph  {
 "572 getitem_145" [id=572, type=__getitem__];
 "573 getitem_146" [id=573, type=__getitem__];
 "574 getitem_147" [id=574, type=__getitem__];
-"575 silu_43" [id=575, type=silu];
+"575 silu__43" [id=575, type=silu_];
 "576 cat_12" [id=576, type=cat];
 "577 _param_constant132" [id=577, type=get_attr];
 "578 conv2d_44" [id=578, type=conv2d];
@@ -587,7 +587,7 @@ strict digraph  {
 "585 getitem_148" [id=585, type=__getitem__];
 "586 getitem_149" [id=586, type=__getitem__];
 "587 getitem_150" [id=587, type=__getitem__];
-"588 silu_44" [id=588, type=silu];
+"588 silu__44" [id=588, type=silu_];
 "589 _param_constant135" [id=589, type=get_attr];
 "590 conv2d_45" [id=590, type=conv2d];
 "591 empty_45" [id=591, type=empty];
@@ -599,7 +599,7 @@ strict digraph  {
 "597 getitem_151" [id=597, type=__getitem__];
 "598 getitem_152" [id=598, type=__getitem__];
 "599 getitem_153" [id=599, type=__getitem__];
-"600 silu_45" [id=600, type=silu];
+"600 silu__45" [id=600, type=silu_];
 "601 _param_constant138" [id=601, type=get_attr];
 "602 conv2d_46" [id=602, type=conv2d];
 "603 empty_46" [id=603, type=empty];
@@ -611,7 +611,7 @@ strict digraph  {
 "609 getitem_154" [id=609, type=__getitem__];
 "610 getitem_155" [id=610, type=__getitem__];
 "611 getitem_156" [id=611, type=__getitem__];
-"612 silu_46" [id=612, type=silu];
+"612 silu__46" [id=612, type=silu_];
 "613 _param_constant141" [id=613, type=get_attr];
 "614 _param_constant142" [id=614, type=get_attr];
 "615 conv2d_47" [id=615, type=conv2d];
@@ -626,7 +626,7 @@ strict digraph  {
 "624 getitem_157" [id=624, type=__getitem__];
 "625 getitem_158" [id=625, type=__getitem__];
 "626 getitem_159" [id=626, type=__getitem__];
-"627 silu_47" [id=627, type=silu];
+"627 silu__47" [id=627, type=silu_];
 "628 _param_constant146" [id=628, type=get_attr];
 "629 conv2d_49" [id=629, type=conv2d];
 "630 empty_48" [id=630, type=empty];
@@ -638,7 +638,7 @@ strict digraph  {
 "636 getitem_160" [id=636, type=__getitem__];
 "637 getitem_161" [id=637, type=__getitem__];
 "638 getitem_162" [id=638, type=__getitem__];
-"639 silu_48" [id=639, type=silu];
+"639 silu__48" [id=639, type=silu_];
 "640 _param_constant149" [id=640, type=get_attr];
 "641 _param_constant150" [id=641, type=get_attr];
 "642 conv2d_50" [id=642, type=conv2d];
@@ -654,7 +654,7 @@ strict digraph  {
 "652 getitem_163" [id=652, type=__getitem__];
 "653 getitem_164" [id=653, type=__getitem__];
 "654 getitem_165" [id=654, type=__getitem__];
-"655 silu_49" [id=655, type=silu];
+"655 silu__49" [id=655, type=silu_];
 "656 _param_constant154" [id=656, type=get_attr];
 "657 conv2d_52" [id=657, type=conv2d];
 "658 empty_50" [id=658, type=empty];
@@ -666,7 +666,7 @@ strict digraph  {
 "664 getitem_166" [id=664, type=__getitem__];
 "665 getitem_167" [id=665, type=__getitem__];
 "666 getitem_168" [id=666, type=__getitem__];
-"667 silu_50" [id=667, type=silu];
+"667 silu__50" [id=667, type=silu_];
 "668 _param_constant157" [id=668, type=get_attr];
 "669 _param_constant158" [id=669, type=get_attr];
 "670 conv2d_53" [id=670, type=conv2d];
@@ -681,7 +681,7 @@ strict digraph  {
 "679 getitem_169" [id=679, type=__getitem__];
 "680 getitem_170" [id=680, type=__getitem__];
 "681 getitem_171" [id=681, type=__getitem__];
-"682 silu_51" [id=682, type=silu];
+"682 silu__51" [id=682, type=silu_];
 "683 _param_constant162" [id=683, type=get_attr];
 "684 conv2d_55" [id=684, type=conv2d];
 "685 empty_52" [id=685, type=empty];
@@ -693,7 +693,7 @@ strict digraph  {
 "691 getitem_172" [id=691, type=__getitem__];
 "692 getitem_173" [id=692, type=__getitem__];
 "693 getitem_174" [id=693, type=__getitem__];
-"694 silu_52" [id=694, type=silu];
+"694 silu__52" [id=694, type=silu_];
 "695 _param_constant165" [id=695, type=get_attr];
 "696 _param_constant166" [id=696, type=get_attr];
 "697 conv2d_56" [id=697, type=conv2d];
@@ -709,7 +709,7 @@ strict digraph  {
 "707 getitem_175" [id=707, type=__getitem__];
 "708 getitem_176" [id=708, type=__getitem__];
 "709 getitem_177" [id=709, type=__getitem__];
-"710 silu_53" [id=710, type=silu];
+"710 silu__53" [id=710, type=silu_];
 "711 _param_constant170" [id=711, type=get_attr];
 "712 conv2d_58" [id=712, type=conv2d];
 "713 empty_54" [id=713, type=empty];
@@ -721,7 +721,7 @@ strict digraph  {
 "719 getitem_178" [id=719, type=__getitem__];
 "720 getitem_179" [id=720, type=__getitem__];
 "721 getitem_180" [id=721, type=__getitem__];
-"722 silu_54" [id=722, type=silu];
+"722 silu__54" [id=722, type=silu_];
 "723 _param_constant173" [id=723, type=get_attr];
 "724 _param_constant174" [id=724, type=get_attr];
 "725 conv2d_59" [id=725, type=conv2d];
@@ -736,7 +736,7 @@ strict digraph  {
 "734 getitem_181" [id=734, type=__getitem__];
 "735 getitem_182" [id=735, type=__getitem__];
 "736 getitem_183" [id=736, type=__getitem__];
-"737 silu_55" [id=737, type=silu];
+"737 silu__55" [id=737, type=silu_];
 "738 _param_constant178" [id=738, type=get_attr];
 "739 conv2d_61" [id=739, type=conv2d];
 "740 empty_56" [id=740, type=empty];
@@ -748,7 +748,7 @@ strict digraph  {
 "746 getitem_184" [id=746, type=__getitem__];
 "747 getitem_185" [id=747, type=__getitem__];
 "748 getitem_186" [id=748, type=__getitem__];
-"749 silu_56" [id=749, type=silu];
+"749 silu__56" [id=749, type=silu_];
 "750 _param_constant181" [id=750, type=get_attr];
 "751 _param_constant182" [id=751, type=get_attr];
 "752 conv2d_62" [id=752, type=conv2d];
@@ -792,8 +792,8 @@ strict digraph  {
 "8 _native_batch_norm_legit_no_training" -> "9 getitem";
 "8 _native_batch_norm_legit_no_training" -> "10 getitem_1";
 "8 _native_batch_norm_legit_no_training" -> "11 getitem_2";
-"9 getitem" -> "12 silu";
-"12 silu" -> "14 conv2d_1";
+"9 getitem" -> "12 silu_";
+"12 silu_" -> "14 conv2d_1";
 "13 _param_constant3" -> "14 conv2d_1";
 "14 conv2d_1" -> "20 _native_batch_norm_legit_no_training_1";
 "16 _param_constant4" -> "20 _native_batch_norm_legit_no_training_1";
@@ -803,8 +803,8 @@ strict digraph  {
 "20 _native_batch_norm_legit_no_training_1" -> "21 getitem_3";
 "20 _native_batch_norm_legit_no_training_1" -> "22 getitem_4";
 "20 _native_batch_norm_legit_no_training_1" -> "23 getitem_5";
-"21 getitem_3" -> "24 silu_1";
-"24 silu_1" -> "26 conv2d_2";
+"21 getitem_3" -> "24 silu__1";
+"24 silu__1" -> "26 conv2d_2";
 "25 _param_constant6" -> "26 conv2d_2";
 "26 conv2d_2" -> "32 _native_batch_norm_legit_no_training_2";
 "28 _param_constant7" -> "32 _native_batch_norm_legit_no_training_2";
@@ -814,8 +814,8 @@ strict digraph  {
 "32 _native_batch_norm_legit_no_training_2" -> "33 getitem_6";
 "32 _native_batch_norm_legit_no_training_2" -> "34 getitem_7";
 "32 _native_batch_norm_legit_no_training_2" -> "35 getitem_8";
-"33 getitem_6" -> "36 silu_2";
-"36 silu_2" -> "37 chunk";
+"33 getitem_6" -> "36 silu__2";
+"36 silu__2" -> "37 chunk";
 "37 chunk" -> "38 getitem_9";
 "37 chunk" -> "39 getitem_10";
 "38 getitem_9" -> "65 cat";
@@ -831,8 +831,8 @@ strict digraph  {
 "47 _native_batch_norm_legit_no_training_3" -> "48 getitem_11";
 "47 _native_batch_norm_legit_no_training_3" -> "49 getitem_12";
 "47 _native_batch_norm_legit_no_training_3" -> "50 getitem_13";
-"48 getitem_11" -> "51 silu_3";
-"51 silu_3" -> "53 conv2d_4";
+"48 getitem_11" -> "51 silu__3";
+"51 silu__3" -> "53 conv2d_4";
 "52 _param_constant12" -> "53 conv2d_4";
 "53 conv2d_4" -> "59 _native_batch_norm_legit_no_training_4";
 "55 _param_constant13" -> "59 _native_batch_norm_legit_no_training_4";
@@ -842,8 +842,8 @@ strict digraph  {
 "59 _native_batch_norm_legit_no_training_4" -> "60 getitem_14";
 "59 _native_batch_norm_legit_no_training_4" -> "61 getitem_15";
 "59 _native_batch_norm_legit_no_training_4" -> "62 getitem_16";
-"60 getitem_14" -> "63 silu_4";
-"63 silu_4" -> "64 add";
+"60 getitem_14" -> "63 silu__4";
+"63 silu__4" -> "64 add";
 "64 add" -> "65 cat";
 "65 cat" -> "67 conv2d_5";
 "66 _param_constant15" -> "67 conv2d_5";
@@ -855,8 +855,8 @@ strict digraph  {
 "73 _native_batch_norm_legit_no_training_5" -> "74 getitem_17";
 "73 _native_batch_norm_legit_no_training_5" -> "75 getitem_18";
 "73 _native_batch_norm_legit_no_training_5" -> "76 getitem_19";
-"74 getitem_17" -> "77 silu_5";
-"77 silu_5" -> "79 conv2d_6";
+"74 getitem_17" -> "77 silu__5";
+"77 silu__5" -> "79 conv2d_6";
 "78 _param_constant18" -> "79 conv2d_6";
 "79 conv2d_6" -> "85 _native_batch_norm_legit_no_training_6";
 "81 _param_constant19" -> "85 _native_batch_norm_legit_no_training_6";
@@ -866,8 +866,8 @@ strict digraph  {
 "85 _native_batch_norm_legit_no_training_6" -> "86 getitem_20";
 "85 _native_batch_norm_legit_no_training_6" -> "87 getitem_21";
 "85 _native_batch_norm_legit_no_training_6" -> "88 getitem_22";
-"86 getitem_20" -> "89 silu_6";
-"89 silu_6" -> "91 conv2d_7";
+"86 getitem_20" -> "89 silu__6";
+"89 silu__6" -> "91 conv2d_7";
 "90 _param_constant21" -> "91 conv2d_7";
 "91 conv2d_7" -> "97 _native_batch_norm_legit_no_training_7";
 "93 _param_constant22" -> "97 _native_batch_norm_legit_no_training_7";
@@ -877,8 +877,8 @@ strict digraph  {
 "97 _native_batch_norm_legit_no_training_7" -> "98 getitem_23";
 "97 _native_batch_norm_legit_no_training_7" -> "99 getitem_24";
 "97 _native_batch_norm_legit_no_training_7" -> "100 getitem_25";
-"98 getitem_23" -> "101 silu_7";
-"101 silu_7" -> "102 chunk_1";
+"98 getitem_23" -> "101 silu__7";
+"101 silu__7" -> "102 chunk_1";
 "102 chunk_1" -> "103 getitem_26";
 "102 chunk_1" -> "104 getitem_27";
 "103 getitem_26" -> "155 cat_1";
@@ -896,8 +896,8 @@ strict digraph  {
 "112 _native_batch_norm_legit_no_training_8" -> "113 getitem_28";
 "112 _native_batch_norm_legit_no_training_8" -> "114 getitem_29";
 "112 _native_batch_norm_legit_no_training_8" -> "115 getitem_30";
-"113 getitem_28" -> "116 silu_8";
-"116 silu_8" -> "118 conv2d_9";
+"113 getitem_28" -> "116 silu__8";
+"116 silu__8" -> "118 conv2d_9";
 "117 _param_constant27" -> "118 conv2d_9";
 "118 conv2d_9" -> "124 _native_batch_norm_legit_no_training_9";
 "120 _param_constant28" -> "124 _native_batch_norm_legit_no_training_9";
@@ -907,8 +907,8 @@ strict digraph  {
 "124 _native_batch_norm_legit_no_training_9" -> "125 getitem_31";
 "124 _native_batch_norm_legit_no_training_9" -> "126 getitem_32";
 "124 _native_batch_norm_legit_no_training_9" -> "127 getitem_33";
-"125 getitem_31" -> "128 silu_9";
-"128 silu_9" -> "129 add_1";
+"125 getitem_31" -> "128 silu__9";
+"128 silu__9" -> "129 add_1";
 "129 add_1" -> "155 cat_1";
 "130 _param_constant30" -> "131 conv2d_10";
 "131 conv2d_10" -> "137 _native_batch_norm_legit_no_training_10";
@@ -919,8 +919,8 @@ strict digraph  {
 "137 _native_batch_norm_legit_no_training_10" -> "138 getitem_34";
 "137 _native_batch_norm_legit_no_training_10" -> "139 getitem_35";
 "137 _native_batch_norm_legit_no_training_10" -> "140 getitem_36";
-"138 getitem_34" -> "141 silu_10";
-"141 silu_10" -> "143 conv2d_11";
+"138 getitem_34" -> "141 silu__10";
+"141 silu__10" -> "143 conv2d_11";
 "142 _param_constant33" -> "143 conv2d_11";
 "143 conv2d_11" -> "149 _native_batch_norm_legit_no_training_11";
 "145 _param_constant34" -> "149 _native_batch_norm_legit_no_training_11";
@@ -930,8 +930,8 @@ strict digraph  {
 "149 _native_batch_norm_legit_no_training_11" -> "150 getitem_37";
 "149 _native_batch_norm_legit_no_training_11" -> "151 getitem_38";
 "149 _native_batch_norm_legit_no_training_11" -> "152 getitem_39";
-"150 getitem_37" -> "153 silu_11";
-"153 silu_11" -> "154 add_2";
+"150 getitem_37" -> "153 silu__11";
+"153 silu__11" -> "154 add_2";
 "154 add_2" -> "155 cat_1";
 "155 cat_1" -> "157 conv2d_12";
 "156 _param_constant36" -> "157 conv2d_12";
@@ -943,9 +943,9 @@ strict digraph  {
 "163 _native_batch_norm_legit_no_training_12" -> "164 getitem_40";
 "163 _native_batch_norm_legit_no_training_12" -> "165 getitem_41";
 "163 _native_batch_norm_legit_no_training_12" -> "166 getitem_42";
-"164 getitem_40" -> "167 silu_12";
-"167 silu_12" -> "169 conv2d_13";
-"167 silu_12" -> "406 cat_7";
+"164 getitem_40" -> "167 silu__12";
+"167 silu__12" -> "169 conv2d_13";
+"167 silu__12" -> "406 cat_7";
 "168 _param_constant39" -> "169 conv2d_13";
 "169 conv2d_13" -> "175 _native_batch_norm_legit_no_training_13";
 "171 _param_constant40" -> "175 _native_batch_norm_legit_no_training_13";
@@ -955,8 +955,8 @@ strict digraph  {
 "175 _native_batch_norm_legit_no_training_13" -> "176 getitem_43";
 "175 _native_batch_norm_legit_no_training_13" -> "177 getitem_44";
 "175 _native_batch_norm_legit_no_training_13" -> "178 getitem_45";
-"176 getitem_43" -> "179 silu_13";
-"179 silu_13" -> "181 conv2d_14";
+"176 getitem_43" -> "179 silu__13";
+"179 silu__13" -> "181 conv2d_14";
 "180 _param_constant42" -> "181 conv2d_14";
 "181 conv2d_14" -> "187 _native_batch_norm_legit_no_training_14";
 "183 _param_constant43" -> "187 _native_batch_norm_legit_no_training_14";
@@ -966,8 +966,8 @@ strict digraph  {
 "187 _native_batch_norm_legit_no_training_14" -> "188 getitem_46";
 "187 _native_batch_norm_legit_no_training_14" -> "189 getitem_47";
 "187 _native_batch_norm_legit_no_training_14" -> "190 getitem_48";
-"188 getitem_46" -> "191 silu_14";
-"191 silu_14" -> "192 chunk_2";
+"188 getitem_46" -> "191 silu__14";
+"191 silu__14" -> "192 chunk_2";
 "192 chunk_2" -> "193 getitem_49";
 "192 chunk_2" -> "194 getitem_50";
 "193 getitem_49" -> "245 cat_2";
@@ -985,8 +985,8 @@ strict digraph  {
 "202 _native_batch_norm_legit_no_training_15" -> "203 getitem_51";
 "202 _native_batch_norm_legit_no_training_15" -> "204 getitem_52";
 "202 _native_batch_norm_legit_no_training_15" -> "205 getitem_53";
-"203 getitem_51" -> "206 silu_15";
-"206 silu_15" -> "208 conv2d_16";
+"203 getitem_51" -> "206 silu__15";
+"206 silu__15" -> "208 conv2d_16";
 "207 _param_constant48" -> "208 conv2d_16";
 "208 conv2d_16" -> "214 _native_batch_norm_legit_no_training_16";
 "210 _param_constant49" -> "214 _native_batch_norm_legit_no_training_16";
@@ -996,8 +996,8 @@ strict digraph  {
 "214 _native_batch_norm_legit_no_training_16" -> "215 getitem_54";
 "214 _native_batch_norm_legit_no_training_16" -> "216 getitem_55";
 "214 _native_batch_norm_legit_no_training_16" -> "217 getitem_56";
-"215 getitem_54" -> "218 silu_16";
-"218 silu_16" -> "219 add_3";
+"215 getitem_54" -> "218 silu__16";
+"218 silu__16" -> "219 add_3";
 "219 add_3" -> "245 cat_2";
 "220 _param_constant51" -> "221 conv2d_17";
 "221 conv2d_17" -> "227 _native_batch_norm_legit_no_training_17";
@@ -1008,8 +1008,8 @@ strict digraph  {
 "227 _native_batch_norm_legit_no_training_17" -> "228 getitem_57";
 "227 _native_batch_norm_legit_no_training_17" -> "229 getitem_58";
 "227 _native_batch_norm_legit_no_training_17" -> "230 getitem_59";
-"228 getitem_57" -> "231 silu_17";
-"231 silu_17" -> "233 conv2d_18";
+"228 getitem_57" -> "231 silu__17";
+"231 silu__17" -> "233 conv2d_18";
 "232 _param_constant54" -> "233 conv2d_18";
 "233 conv2d_18" -> "239 _native_batch_norm_legit_no_training_18";
 "235 _param_constant55" -> "239 _native_batch_norm_legit_no_training_18";
@@ -1019,8 +1019,8 @@ strict digraph  {
 "239 _native_batch_norm_legit_no_training_18" -> "240 getitem_60";
 "239 _native_batch_norm_legit_no_training_18" -> "241 getitem_61";
 "239 _native_batch_norm_legit_no_training_18" -> "242 getitem_62";
-"240 getitem_60" -> "243 silu_18";
-"243 silu_18" -> "244 add_4";
+"240 getitem_60" -> "243 silu__18";
+"243 silu__18" -> "244 add_4";
 "244 add_4" -> "245 cat_2";
 "245 cat_2" -> "247 conv2d_19";
 "246 _param_constant57" -> "247 conv2d_19";
@@ -1032,9 +1032,9 @@ strict digraph  {
 "253 _native_batch_norm_legit_no_training_19" -> "254 getitem_63";
 "253 _native_batch_norm_legit_no_training_19" -> "255 getitem_64";
 "253 _native_batch_norm_legit_no_training_19" -> "256 getitem_65";
-"254 getitem_63" -> "257 silu_19";
-"257 silu_19" -> "259 conv2d_20";
-"257 silu_19" -> "352 cat_5";
+"254 getitem_63" -> "257 silu__19";
+"257 silu__19" -> "259 conv2d_20";
+"257 silu__19" -> "352 cat_5";
 "258 _param_constant60" -> "259 conv2d_20";
 "259 conv2d_20" -> "265 _native_batch_norm_legit_no_training_20";
 "261 _param_constant61" -> "265 _native_batch_norm_legit_no_training_20";
@@ -1044,8 +1044,8 @@ strict digraph  {
 "265 _native_batch_norm_legit_no_training_20" -> "266 getitem_66";
 "265 _native_batch_norm_legit_no_training_20" -> "267 getitem_67";
 "265 _native_batch_norm_legit_no_training_20" -> "268 getitem_68";
-"266 getitem_66" -> "269 silu_20";
-"269 silu_20" -> "271 conv2d_21";
+"266 getitem_66" -> "269 silu__20";
+"269 silu__20" -> "271 conv2d_21";
 "270 _param_constant63" -> "271 conv2d_21";
 "271 conv2d_21" -> "277 _native_batch_norm_legit_no_training_21";
 "273 _param_constant64" -> "277 _native_batch_norm_legit_no_training_21";
@@ -1055,8 +1055,8 @@ strict digraph  {
 "277 _native_batch_norm_legit_no_training_21" -> "278 getitem_69";
 "277 _native_batch_norm_legit_no_training_21" -> "279 getitem_70";
 "277 _native_batch_norm_legit_no_training_21" -> "280 getitem_71";
-"278 getitem_69" -> "281 silu_21";
-"281 silu_21" -> "282 chunk_3";
+"278 getitem_69" -> "281 silu__21";
+"281 silu__21" -> "282 chunk_3";
 "282 chunk_3" -> "283 getitem_72";
 "282 chunk_3" -> "284 getitem_73";
 "283 getitem_72" -> "310 cat_3";
@@ -1072,8 +1072,8 @@ strict digraph  {
 "292 _native_batch_norm_legit_no_training_22" -> "293 getitem_74";
 "292 _native_batch_norm_legit_no_training_22" -> "294 getitem_75";
 "292 _native_batch_norm_legit_no_training_22" -> "295 getitem_76";
-"293 getitem_74" -> "296 silu_22";
-"296 silu_22" -> "298 conv2d_23";
+"293 getitem_74" -> "296 silu__22";
+"296 silu__22" -> "298 conv2d_23";
 "297 _param_constant69" -> "298 conv2d_23";
 "298 conv2d_23" -> "304 _native_batch_norm_legit_no_training_23";
 "300 _param_constant70" -> "304 _native_batch_norm_legit_no_training_23";
@@ -1083,8 +1083,8 @@ strict digraph  {
 "304 _native_batch_norm_legit_no_training_23" -> "305 getitem_77";
 "304 _native_batch_norm_legit_no_training_23" -> "306 getitem_78";
 "304 _native_batch_norm_legit_no_training_23" -> "307 getitem_79";
-"305 getitem_77" -> "308 silu_23";
-"308 silu_23" -> "309 add_5";
+"305 getitem_77" -> "308 silu__23";
+"308 silu__23" -> "309 add_5";
 "309 add_5" -> "310 cat_3";
 "310 cat_3" -> "312 conv2d_24";
 "311 _param_constant72" -> "312 conv2d_24";
@@ -1096,8 +1096,8 @@ strict digraph  {
 "318 _native_batch_norm_legit_no_training_24" -> "319 getitem_80";
 "318 _native_batch_norm_legit_no_training_24" -> "320 getitem_81";
 "318 _native_batch_norm_legit_no_training_24" -> "321 getitem_82";
-"319 getitem_80" -> "322 silu_24";
-"322 silu_24" -> "324 conv2d_25";
+"319 getitem_80" -> "322 silu__24";
+"322 silu__24" -> "324 conv2d_25";
 "323 _param_constant75" -> "324 conv2d_25";
 "324 conv2d_25" -> "330 _native_batch_norm_legit_no_training_25";
 "326 _param_constant76" -> "330 _native_batch_norm_legit_no_training_25";
@@ -1107,11 +1107,11 @@ strict digraph  {
 "330 _native_batch_norm_legit_no_training_25" -> "331 getitem_83";
 "330 _native_batch_norm_legit_no_training_25" -> "332 getitem_84";
 "330 _native_batch_norm_legit_no_training_25" -> "333 getitem_85";
-"331 getitem_83" -> "334 silu_25";
-"334 silu_25" -> "335 max_pool2d";
-"334 silu_25" -> "336 max_pool2d_1";
-"334 silu_25" -> "337 max_pool2d_2";
-"334 silu_25" -> "338 cat_4";
+"331 getitem_83" -> "334 silu__25";
+"334 silu__25" -> "335 max_pool2d";
+"334 silu__25" -> "336 max_pool2d_1";
+"334 silu__25" -> "337 max_pool2d_2";
+"334 silu__25" -> "338 cat_4";
 "335 max_pool2d" -> "338 cat_4";
 "336 max_pool2d_1" -> "338 cat_4";
 "337 max_pool2d_2" -> "338 cat_4";
@@ -1125,9 +1125,9 @@ strict digraph  {
 "346 _native_batch_norm_legit_no_training_26" -> "347 getitem_86";
 "346 _native_batch_norm_legit_no_training_26" -> "348 getitem_87";
 "346 _native_batch_norm_legit_no_training_26" -> "349 getitem_88";
-"347 getitem_86" -> "350 silu_26";
-"350 silu_26" -> "351 upsample_nearest2d";
-"350 silu_26" -> "536 cat_11";
+"347 getitem_86" -> "350 silu__26";
+"350 silu__26" -> "351 upsample_nearest2d";
+"350 silu__26" -> "536 cat_11";
 "351 upsample_nearest2d" -> "352 cat_5";
 "352 cat_5" -> "354 conv2d_27";
 "353 _param_constant81" -> "354 conv2d_27";
@@ -1139,8 +1139,8 @@ strict digraph  {
 "360 _native_batch_norm_legit_no_training_27" -> "361 getitem_89";
 "360 _native_batch_norm_legit_no_training_27" -> "362 getitem_90";
 "360 _native_batch_norm_legit_no_training_27" -> "363 getitem_91";
-"361 getitem_89" -> "364 silu_27";
-"364 silu_27" -> "365 chunk_4";
+"361 getitem_89" -> "364 silu__27";
+"364 silu__27" -> "365 chunk_4";
 "365 chunk_4" -> "366 getitem_92";
 "365 chunk_4" -> "367 getitem_93";
 "366 getitem_92" -> "392 cat_6";
@@ -1155,8 +1155,8 @@ strict digraph  {
 "375 _native_batch_norm_legit_no_training_28" -> "376 getitem_94";
 "375 _native_batch_norm_legit_no_training_28" -> "377 getitem_95";
 "375 _native_batch_norm_legit_no_training_28" -> "378 getitem_96";
-"376 getitem_94" -> "379 silu_28";
-"379 silu_28" -> "381 conv2d_29";
+"376 getitem_94" -> "379 silu__28";
+"379 silu__28" -> "381 conv2d_29";
 "380 _param_constant87" -> "381 conv2d_29";
 "381 conv2d_29" -> "387 _native_batch_norm_legit_no_training_29";
 "383 _param_constant88" -> "387 _native_batch_norm_legit_no_training_29";
@@ -1166,8 +1166,8 @@ strict digraph  {
 "387 _native_batch_norm_legit_no_training_29" -> "388 getitem_97";
 "387 _native_batch_norm_legit_no_training_29" -> "389 getitem_98";
 "387 _native_batch_norm_legit_no_training_29" -> "390 getitem_99";
-"388 getitem_97" -> "391 silu_29";
-"391 silu_29" -> "392 cat_6";
+"388 getitem_97" -> "391 silu__29";
+"391 silu__29" -> "392 cat_6";
 "392 cat_6" -> "394 conv2d_30";
 "393 _param_constant90" -> "394 conv2d_30";
 "394 conv2d_30" -> "400 _native_batch_norm_legit_no_training_30";
@@ -1178,9 +1178,9 @@ strict digraph  {
 "400 _native_batch_norm_legit_no_training_30" -> "401 getitem_100";
 "400 _native_batch_norm_legit_no_training_30" -> "402 getitem_101";
 "400 _native_batch_norm_legit_no_training_30" -> "403 getitem_102";
-"401 getitem_100" -> "404 silu_30";
-"404 silu_30" -> "405 upsample_nearest2d_1";
-"404 silu_30" -> "471 cat_9";
+"401 getitem_100" -> "404 silu__30";
+"404 silu__30" -> "405 upsample_nearest2d_1";
+"404 silu__30" -> "471 cat_9";
 "405 upsample_nearest2d_1" -> "406 cat_7";
 "406 cat_7" -> "408 conv2d_31";
 "407 _param_constant93" -> "408 conv2d_31";
@@ -1192,8 +1192,8 @@ strict digraph  {
 "414 _native_batch_norm_legit_no_training_31" -> "415 getitem_103";
 "414 _native_batch_norm_legit_no_training_31" -> "416 getitem_104";
 "414 _native_batch_norm_legit_no_training_31" -> "417 getitem_105";
-"415 getitem_103" -> "418 silu_31";
-"418 silu_31" -> "419 chunk_5";
+"415 getitem_103" -> "418 silu__31";
+"418 silu__31" -> "419 chunk_5";
 "419 chunk_5" -> "420 getitem_106";
 "419 chunk_5" -> "421 getitem_107";
 "420 getitem_106" -> "446 cat_8";
@@ -1208,8 +1208,8 @@ strict digraph  {
 "429 _native_batch_norm_legit_no_training_32" -> "430 getitem_108";
 "429 _native_batch_norm_legit_no_training_32" -> "431 getitem_109";
 "429 _native_batch_norm_legit_no_training_32" -> "432 getitem_110";
-"430 getitem_108" -> "433 silu_32";
-"433 silu_32" -> "435 conv2d_33";
+"430 getitem_108" -> "433 silu__32";
+"433 silu__32" -> "435 conv2d_33";
 "434 _param_constant99" -> "435 conv2d_33";
 "435 conv2d_33" -> "441 _native_batch_norm_legit_no_training_33";
 "437 _param_constant100" -> "441 _native_batch_norm_legit_no_training_33";
@@ -1219,8 +1219,8 @@ strict digraph  {
 "441 _native_batch_norm_legit_no_training_33" -> "442 getitem_111";
 "441 _native_batch_norm_legit_no_training_33" -> "443 getitem_112";
 "441 _native_batch_norm_legit_no_training_33" -> "444 getitem_113";
-"442 getitem_111" -> "445 silu_33";
-"445 silu_33" -> "446 cat_8";
+"442 getitem_111" -> "445 silu__33";
+"445 silu__33" -> "446 cat_8";
 "446 cat_8" -> "448 conv2d_34";
 "447 _param_constant102" -> "448 conv2d_34";
 "448 conv2d_34" -> "454 _native_batch_norm_legit_no_training_34";
@@ -1231,10 +1231,10 @@ strict digraph  {
 "454 _native_batch_norm_legit_no_training_34" -> "455 getitem_114";
 "454 _native_batch_norm_legit_no_training_34" -> "456 getitem_115";
 "454 _native_batch_norm_legit_no_training_34" -> "457 getitem_116";
-"455 getitem_114" -> "458 silu_34";
-"458 silu_34" -> "460 conv2d_35";
-"458 silu_34" -> "590 conv2d_45";
-"458 silu_34" -> "617 conv2d_48";
+"455 getitem_114" -> "458 silu__34";
+"458 silu__34" -> "460 conv2d_35";
+"458 silu__34" -> "590 conv2d_45";
+"458 silu__34" -> "617 conv2d_48";
 "459 _param_constant105" -> "460 conv2d_35";
 "460 conv2d_35" -> "466 _native_batch_norm_legit_no_training_35";
 "462 _param_constant106" -> "466 _native_batch_norm_legit_no_training_35";
@@ -1244,8 +1244,8 @@ strict digraph  {
 "466 _native_batch_norm_legit_no_training_35" -> "467 getitem_117";
 "466 _native_batch_norm_legit_no_training_35" -> "468 getitem_118";
 "466 _native_batch_norm_legit_no_training_35" -> "469 getitem_119";
-"467 getitem_117" -> "470 silu_35";
-"470 silu_35" -> "471 cat_9";
+"467 getitem_117" -> "470 silu__35";
+"470 silu__35" -> "471 cat_9";
 "471 cat_9" -> "473 conv2d_36";
 "472 _param_constant108" -> "473 conv2d_36";
 "473 conv2d_36" -> "479 _native_batch_norm_legit_no_training_36";
@@ -1256,8 +1256,8 @@ strict digraph  {
 "479 _native_batch_norm_legit_no_training_36" -> "480 getitem_120";
 "479 _native_batch_norm_legit_no_training_36" -> "481 getitem_121";
 "479 _native_batch_norm_legit_no_training_36" -> "482 getitem_122";
-"480 getitem_120" -> "483 silu_36";
-"483 silu_36" -> "484 chunk_6";
+"480 getitem_120" -> "483 silu__36";
+"483 silu__36" -> "484 chunk_6";
 "484 chunk_6" -> "485 getitem_123";
 "484 chunk_6" -> "486 getitem_124";
 "485 getitem_123" -> "511 cat_10";
@@ -1272,8 +1272,8 @@ strict digraph  {
 "494 _native_batch_norm_legit_no_training_37" -> "495 getitem_125";
 "494 _native_batch_norm_legit_no_training_37" -> "496 getitem_126";
 "494 _native_batch_norm_legit_no_training_37" -> "497 getitem_127";
-"495 getitem_125" -> "498 silu_37";
-"498 silu_37" -> "500 conv2d_38";
+"495 getitem_125" -> "498 silu__37";
+"498 silu__37" -> "500 conv2d_38";
 "499 _param_constant114" -> "500 conv2d_38";
 "500 conv2d_38" -> "506 _native_batch_norm_legit_no_training_38";
 "502 _param_constant115" -> "506 _native_batch_norm_legit_no_training_38";
@@ -1283,8 +1283,8 @@ strict digraph  {
 "506 _native_batch_norm_legit_no_training_38" -> "507 getitem_128";
 "506 _native_batch_norm_legit_no_training_38" -> "508 getitem_129";
 "506 _native_batch_norm_legit_no_training_38" -> "509 getitem_130";
-"507 getitem_128" -> "510 silu_38";
-"510 silu_38" -> "511 cat_10";
+"507 getitem_128" -> "510 silu__38";
+"510 silu__38" -> "511 cat_10";
 "511 cat_10" -> "513 conv2d_39";
 "512 _param_constant117" -> "513 conv2d_39";
 "513 conv2d_39" -> "519 _native_batch_norm_legit_no_training_39";
@@ -1295,10 +1295,10 @@ strict digraph  {
 "519 _native_batch_norm_legit_no_training_39" -> "520 getitem_131";
 "519 _native_batch_norm_legit_no_training_39" -> "521 getitem_132";
 "519 _native_batch_norm_legit_no_training_39" -> "522 getitem_133";
-"520 getitem_131" -> "523 silu_39";
-"523 silu_39" -> "525 conv2d_40";
-"523 silu_39" -> "645 conv2d_51";
-"523 silu_39" -> "672 conv2d_54";
+"520 getitem_131" -> "523 silu__39";
+"523 silu__39" -> "525 conv2d_40";
+"523 silu__39" -> "645 conv2d_51";
+"523 silu__39" -> "672 conv2d_54";
 "524 _param_constant120" -> "525 conv2d_40";
 "525 conv2d_40" -> "531 _native_batch_norm_legit_no_training_40";
 "527 _param_constant121" -> "531 _native_batch_norm_legit_no_training_40";
@@ -1308,8 +1308,8 @@ strict digraph  {
 "531 _native_batch_norm_legit_no_training_40" -> "532 getitem_134";
 "531 _native_batch_norm_legit_no_training_40" -> "533 getitem_135";
 "531 _native_batch_norm_legit_no_training_40" -> "534 getitem_136";
-"532 getitem_134" -> "535 silu_40";
-"535 silu_40" -> "536 cat_11";
+"532 getitem_134" -> "535 silu__40";
+"535 silu__40" -> "536 cat_11";
 "536 cat_11" -> "538 conv2d_41";
 "537 _param_constant123" -> "538 conv2d_41";
 "538 conv2d_41" -> "544 _native_batch_norm_legit_no_training_41";
@@ -1320,8 +1320,8 @@ strict digraph  {
 "544 _native_batch_norm_legit_no_training_41" -> "545 getitem_137";
 "544 _native_batch_norm_legit_no_training_41" -> "546 getitem_138";
 "544 _native_batch_norm_legit_no_training_41" -> "547 getitem_139";
-"545 getitem_137" -> "548 silu_41";
-"548 silu_41" -> "549 chunk_7";
+"545 getitem_137" -> "548 silu__41";
+"548 silu__41" -> "549 chunk_7";
 "549 chunk_7" -> "550 getitem_140";
 "549 chunk_7" -> "551 getitem_141";
 "550 getitem_140" -> "576 cat_12";
@@ -1336,8 +1336,8 @@ strict digraph  {
 "559 _native_batch_norm_legit_no_training_42" -> "560 getitem_142";
 "559 _native_batch_norm_legit_no_training_42" -> "561 getitem_143";
 "559 _native_batch_norm_legit_no_training_42" -> "562 getitem_144";
-"560 getitem_142" -> "563 silu_42";
-"563 silu_42" -> "565 conv2d_43";
+"560 getitem_142" -> "563 silu__42";
+"563 silu__42" -> "565 conv2d_43";
 "564 _param_constant129" -> "565 conv2d_43";
 "565 conv2d_43" -> "571 _native_batch_norm_legit_no_training_43";
 "567 _param_constant130" -> "571 _native_batch_norm_legit_no_training_43";
@@ -1347,8 +1347,8 @@ strict digraph  {
 "571 _native_batch_norm_legit_no_training_43" -> "572 getitem_145";
 "571 _native_batch_norm_legit_no_training_43" -> "573 getitem_146";
 "571 _native_batch_norm_legit_no_training_43" -> "574 getitem_147";
-"572 getitem_145" -> "575 silu_43";
-"575 silu_43" -> "576 cat_12";
+"572 getitem_145" -> "575 silu__43";
+"575 silu__43" -> "576 cat_12";
 "576 cat_12" -> "578 conv2d_44";
 "577 _param_constant132" -> "578 conv2d_44";
 "578 conv2d_44" -> "584 _native_batch_norm_legit_no_training_44";
@@ -1359,9 +1359,9 @@ strict digraph  {
 "584 _native_batch_norm_legit_no_training_44" -> "585 getitem_148";
 "584 _native_batch_norm_legit_no_training_44" -> "586 getitem_149";
 "584 _native_batch_norm_legit_no_training_44" -> "587 getitem_150";
-"585 getitem_148" -> "588 silu_44";
-"588 silu_44" -> "700 conv2d_57";
-"588 silu_44" -> "727 conv2d_60";
+"585 getitem_148" -> "588 silu__44";
+"588 silu__44" -> "700 conv2d_57";
+"588 silu__44" -> "727 conv2d_60";
 "589 _param_constant135" -> "590 conv2d_45";
 "590 conv2d_45" -> "596 _native_batch_norm_legit_no_training_45";
 "592 _param_constant136" -> "596 _native_batch_norm_legit_no_training_45";
@@ -1371,8 +1371,8 @@ strict digraph  {
 "596 _native_batch_norm_legit_no_training_45" -> "597 getitem_151";
 "596 _native_batch_norm_legit_no_training_45" -> "598 getitem_152";
 "596 _native_batch_norm_legit_no_training_45" -> "599 getitem_153";
-"597 getitem_151" -> "600 silu_45";
-"600 silu_45" -> "602 conv2d_46";
+"597 getitem_151" -> "600 silu__45";
+"600 silu__45" -> "602 conv2d_46";
 "601 _param_constant138" -> "602 conv2d_46";
 "602 conv2d_46" -> "608 _native_batch_norm_legit_no_training_46";
 "604 _param_constant139" -> "608 _native_batch_norm_legit_no_training_46";
@@ -1382,8 +1382,8 @@ strict digraph  {
 "608 _native_batch_norm_legit_no_training_46" -> "609 getitem_154";
 "608 _native_batch_norm_legit_no_training_46" -> "610 getitem_155";
 "608 _native_batch_norm_legit_no_training_46" -> "611 getitem_156";
-"609 getitem_154" -> "612 silu_46";
-"612 silu_46" -> "615 conv2d_47";
+"609 getitem_154" -> "612 silu__46";
+"612 silu__46" -> "615 conv2d_47";
 "613 _param_constant141" -> "615 conv2d_47";
 "614 _param_constant142" -> "615 conv2d_47";
 "615 conv2d_47" -> "643 cat_13";
@@ -1396,8 +1396,8 @@ strict digraph  {
 "623 _native_batch_norm_legit_no_training_47" -> "624 getitem_157";
 "623 _native_batch_norm_legit_no_training_47" -> "625 getitem_158";
 "623 _native_batch_norm_legit_no_training_47" -> "626 getitem_159";
-"624 getitem_157" -> "627 silu_47";
-"627 silu_47" -> "629 conv2d_49";
+"624 getitem_157" -> "627 silu__47";
+"627 silu__47" -> "629 conv2d_49";
 "628 _param_constant146" -> "629 conv2d_49";
 "629 conv2d_49" -> "635 _native_batch_norm_legit_no_training_48";
 "631 _param_constant147" -> "635 _native_batch_norm_legit_no_training_48";
@@ -1407,8 +1407,8 @@ strict digraph  {
 "635 _native_batch_norm_legit_no_training_48" -> "636 getitem_160";
 "635 _native_batch_norm_legit_no_training_48" -> "637 getitem_161";
 "635 _native_batch_norm_legit_no_training_48" -> "638 getitem_162";
-"636 getitem_160" -> "639 silu_48";
-"639 silu_48" -> "642 conv2d_50";
+"636 getitem_160" -> "639 silu__48";
+"639 silu__48" -> "642 conv2d_50";
 "640 _param_constant149" -> "642 conv2d_50";
 "641 _param_constant150" -> "642 conv2d_50";
 "642 conv2d_50" -> "643 cat_13";
@@ -1423,8 +1423,8 @@ strict digraph  {
 "651 _native_batch_norm_legit_no_training_49" -> "652 getitem_163";
 "651 _native_batch_norm_legit_no_training_49" -> "653 getitem_164";
 "651 _native_batch_norm_legit_no_training_49" -> "654 getitem_165";
-"652 getitem_163" -> "655 silu_49";
-"655 silu_49" -> "657 conv2d_52";
+"652 getitem_163" -> "655 silu__49";
+"655 silu__49" -> "657 conv2d_52";
 "656 _param_constant154" -> "657 conv2d_52";
 "657 conv2d_52" -> "663 _native_batch_norm_legit_no_training_50";
 "659 _param_constant155" -> "663 _native_batch_norm_legit_no_training_50";
@@ -1434,8 +1434,8 @@ strict digraph  {
 "663 _native_batch_norm_legit_no_training_50" -> "664 getitem_166";
 "663 _native_batch_norm_legit_no_training_50" -> "665 getitem_167";
 "663 _native_batch_norm_legit_no_training_50" -> "666 getitem_168";
-"664 getitem_166" -> "667 silu_50";
-"667 silu_50" -> "670 conv2d_53";
+"664 getitem_166" -> "667 silu__50";
+"667 silu__50" -> "670 conv2d_53";
 "668 _param_constant157" -> "670 conv2d_53";
 "669 _param_constant158" -> "670 conv2d_53";
 "670 conv2d_53" -> "698 cat_14";
@@ -1448,8 +1448,8 @@ strict digraph  {
 "678 _native_batch_norm_legit_no_training_51" -> "679 getitem_169";
 "678 _native_batch_norm_legit_no_training_51" -> "680 getitem_170";
 "678 _native_batch_norm_legit_no_training_51" -> "681 getitem_171";
-"679 getitem_169" -> "682 silu_51";
-"682 silu_51" -> "684 conv2d_55";
+"679 getitem_169" -> "682 silu__51";
+"682 silu__51" -> "684 conv2d_55";
 "683 _param_constant162" -> "684 conv2d_55";
 "684 conv2d_55" -> "690 _native_batch_norm_legit_no_training_52";
 "686 _param_constant163" -> "690 _native_batch_norm_legit_no_training_52";
@@ -1459,8 +1459,8 @@ strict digraph  {
 "690 _native_batch_norm_legit_no_training_52" -> "691 getitem_172";
 "690 _native_batch_norm_legit_no_training_52" -> "692 getitem_173";
 "690 _native_batch_norm_legit_no_training_52" -> "693 getitem_174";
-"691 getitem_172" -> "694 silu_52";
-"694 silu_52" -> "697 conv2d_56";
+"691 getitem_172" -> "694 silu__52";
+"694 silu__52" -> "697 conv2d_56";
 "695 _param_constant165" -> "697 conv2d_56";
 "696 _param_constant166" -> "697 conv2d_56";
 "697 conv2d_56" -> "698 cat_14";
@@ -1475,8 +1475,8 @@ strict digraph  {
 "706 _native_batch_norm_legit_no_training_53" -> "707 getitem_175";
 "706 _native_batch_norm_legit_no_training_53" -> "708 getitem_176";
 "706 _native_batch_norm_legit_no_training_53" -> "709 getitem_177";
-"707 getitem_175" -> "710 silu_53";
-"710 silu_53" -> "712 conv2d_58";
+"707 getitem_175" -> "710 silu__53";
+"710 silu__53" -> "712 conv2d_58";
 "711 _param_constant170" -> "712 conv2d_58";
 "712 conv2d_58" -> "718 _native_batch_norm_legit_no_training_54";
 "714 _param_constant171" -> "718 _native_batch_norm_legit_no_training_54";
@@ -1486,8 +1486,8 @@ strict digraph  {
 "718 _native_batch_norm_legit_no_training_54" -> "719 getitem_178";
 "718 _native_batch_norm_legit_no_training_54" -> "720 getitem_179";
 "718 _native_batch_norm_legit_no_training_54" -> "721 getitem_180";
-"719 getitem_178" -> "722 silu_54";
-"722 silu_54" -> "725 conv2d_59";
+"719 getitem_178" -> "722 silu__54";
+"722 silu__54" -> "725 conv2d_59";
 "723 _param_constant173" -> "725 conv2d_59";
 "724 _param_constant174" -> "725 conv2d_59";
 "725 conv2d_59" -> "753 cat_15";
@@ -1500,8 +1500,8 @@ strict digraph  {
 "733 _native_batch_norm_legit_no_training_55" -> "734 getitem_181";
 "733 _native_batch_norm_legit_no_training_55" -> "735 getitem_182";
 "733 _native_batch_norm_legit_no_training_55" -> "736 getitem_183";
-"734 getitem_181" -> "737 silu_55";
-"737 silu_55" -> "739 conv2d_61";
+"734 getitem_181" -> "737 silu__55";
+"737 silu__55" -> "739 conv2d_61";
 "738 _param_constant178" -> "739 conv2d_61";
 "739 conv2d_61" -> "745 _native_batch_norm_legit_no_training_56";
 "741 _param_constant179" -> "745 _native_batch_norm_legit_no_training_56";
@@ -1511,8 +1511,8 @@ strict digraph  {
 "745 _native_batch_norm_legit_no_training_56" -> "746 getitem_184";
 "745 _native_batch_norm_legit_no_training_56" -> "747 getitem_185";
 "745 _native_batch_norm_legit_no_training_56" -> "748 getitem_186";
-"746 getitem_184" -> "749 silu_56";
-"749 silu_56" -> "752 conv2d_62";
+"746 getitem_184" -> "749 silu__56";
+"749 silu__56" -> "752 conv2d_62";
 "750 _param_constant181" -> "752 conv2d_62";
 "751 _param_constant182" -> "752 conv2d_62";
 "752 conv2d_62" -> "753 cat_15";
diff --git a/tests/torch/fx/test_models.py b/tests/torch/fx/test_models.py
index ca8a8ab5c12..044b15cd394 100644
--- a/tests/torch/fx/test_models.py
+++ b/tests/torch/fx/test_models.py
@@ -27,6 +27,7 @@
 import torch.utils.data.distributed
 import torchvision.models as models
 from torch._export import capture_pre_autograd_graph
+from ultralytics.models.yolo import YOLO
 
 from nncf.common.graph.graph import NNCFNodeName
 from nncf.common.graph.operator_metatypes import OperatorMetatype
@@ -35,7 +36,6 @@
 from nncf.torch.dynamic_graph.patch_pytorch import disable_patching
 from tests.shared.paths import TEST_ROOT
 from tests.torch.test_compressed_graph import check_graph
-from tests.torch.test_models.yolov8.model import YoloV8Model
 
 FX_DIR_NAME = "fx"
 
@@ -54,9 +54,12 @@ def torchvision_model_case(model_id: str, input_shape: Tuple[int,]):
 
 def yolo_v8_case(model_id, input_shape):
     def get_model() -> torch.nn.Module:
-        model = YoloV8Model().eval()
+        model_config = model_id + ".yaml"
+        model = YOLO(model_config)
+        model = model.model
+        model.eval()
         # Warmup model
-        model(torch.empty(input_shape))
+        model(torch.ones(input_shape))
         return model
 
     return ModelCase(get_model, model_id, input_shape)
diff --git a/tests/torch/requirements.txt b/tests/torch/requirements.txt
index be82652d65f..669deee7a40 100644
--- a/tests/torch/requirements.txt
+++ b/tests/torch/requirements.txt
@@ -24,3 +24,4 @@ timm==0.9.2
 # Required for torch/fx tests
 torchvision
 fastdownload==0.0.7
+ultralytics==8.2.56
diff --git a/tests/torch/test_models/yolov8/block.py b/tests/torch/test_models/yolov8/block.py
deleted file mode 100644
index 3058b4157f0..00000000000
--- a/tests/torch/test_models/yolov8/block.py
+++ /dev/null
@@ -1,818 +0,0 @@
-# Copyright (c) 2024 Intel Corporation
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#      http://www.apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Ultralytics YOLO 🚀, AGPL-3.0 license
-"""
-Source: ultralytics/ultralytics/nn/modules/block.py
-Commit: 673e76b86282859ead5517bd04dee896a647db93
-Block modules.
-"""
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-from .conv import Conv
-from .conv import DWConv
-from .conv import GhostConv
-from .conv import LightConv
-from .conv import RepConv
-from .conv import autopad
-from .transformer import TransformerBlock
-
-
-class DFL(nn.Module):
-    """
-    Integral module of Distribution Focal Loss (DFL).
-
-    Proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391
-    """
-
-    def __init__(self, c1=16):
-        """Initialize a convolutional layer with a given number of input channels."""
-        super().__init__()
-        self.conv = nn.Conv2d(c1, 1, 1, bias=False).requires_grad_(False)
-        x = torch.arange(c1, dtype=torch.float)
-        self.conv.weight.data[:] = nn.Parameter(x.view(1, c1, 1, 1))
-        self.c1 = c1
-
-    def forward(self, x):
-        """Applies a transformer layer on input tensor 'x' and returns a tensor."""
-        b, _, a = x.shape  # batch, channels, anchors
-        return self.conv(x.view(b, 4, self.c1, a).transpose(2, 1).softmax(1)).view(b, 4, a)
-        # return self.conv(x.view(b, self.c1, 4, a).softmax(1)).view(b, 4, a)
-
-
-class Proto(nn.Module):
-    """YOLOv8 mask Proto module for segmentation models."""
-
-    def __init__(self, c1, c_=256, c2=32):
-        """
-        Initializes the YOLOv8 mask Proto module with specified number of protos and masks.
-
-        Input arguments are ch_in, number of protos, number of masks.
-        """
-        super().__init__()
-        self.cv1 = Conv(c1, c_, k=3)
-        self.upsample = nn.ConvTranspose2d(c_, c_, 2, 2, 0, bias=True)  # nn.Upsample(scale_factor=2, mode='nearest')
-        self.cv2 = Conv(c_, c_, k=3)
-        self.cv3 = Conv(c_, c2)
-
-    def forward(self, x):
-        """Performs a forward pass through layers using an upsampled input image."""
-        return self.cv3(self.cv2(self.upsample(self.cv1(x))))
-
-
-class HGStem(nn.Module):
-    """
-    StemBlock of PPHGNetV2 with 5 convolutions and one maxpool2d.
-
-    https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
-    """
-
-    def __init__(self, c1, cm, c2):
-        """Initialize the SPP layer with input/output channels and specified kernel sizes for max pooling."""
-        super().__init__()
-        self.stem1 = Conv(c1, cm, 3, 2, act=nn.ReLU())
-        self.stem2a = Conv(cm, cm // 2, 2, 1, 0, act=nn.ReLU())
-        self.stem2b = Conv(cm // 2, cm, 2, 1, 0, act=nn.ReLU())
-        self.stem3 = Conv(cm * 2, cm, 3, 2, act=nn.ReLU())
-        self.stem4 = Conv(cm, c2, 1, 1, act=nn.ReLU())
-        self.pool = nn.MaxPool2d(kernel_size=2, stride=1, padding=0, ceil_mode=True)
-
-    def forward(self, x):
-        """Forward pass of a PPHGNetV2 backbone layer."""
-        x = self.stem1(x)
-        x = F.pad(x, [0, 1, 0, 1])
-        x2 = self.stem2a(x)
-        x2 = F.pad(x2, [0, 1, 0, 1])
-        x2 = self.stem2b(x2)
-        x1 = self.pool(x)
-        x = torch.cat([x1, x2], dim=1)
-        x = self.stem3(x)
-        x = self.stem4(x)
-        return x
-
-
-class HGBlock(nn.Module):
-    """
-    HG_Block of PPHGNetV2 with 2 convolutions and LightConv.
-
-    https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
-    """
-
-    def __init__(self, c1, cm, c2, k=3, n=6, lightconv=False, shortcut=False, act=nn.ReLU()):
-        """Initializes a CSP Bottleneck with 1 convolution using specified input and output channels."""
-        super().__init__()
-        block = LightConv if lightconv else Conv
-        self.m = nn.ModuleList(block(c1 if i == 0 else cm, cm, k=k, act=act) for i in range(n))
-        self.sc = Conv(c1 + n * cm, c2 // 2, 1, 1, act=act)  # squeeze conv
-        self.ec = Conv(c2 // 2, c2, 1, 1, act=act)  # excitation conv
-        self.add = shortcut and c1 == c2
-
-    def forward(self, x):
-        """Forward pass of a PPHGNetV2 backbone layer."""
-        y = [x]
-        y.extend(m(y[-1]) for m in self.m)
-        y = self.ec(self.sc(torch.cat(y, 1)))
-        return y + x if self.add else y
-
-
-class SPP(nn.Module):
-    """Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729."""
-
-    def __init__(self, c1, c2, k=(5, 9, 13)):
-        """Initialize the SPP layer with input/output channels and pooling kernel sizes."""
-        super().__init__()
-        c_ = c1 // 2  # hidden channels
-        self.cv1 = Conv(c1, c_, 1, 1)
-        self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
-        self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
-
-    def forward(self, x):
-        """Forward pass of the SPP layer, performing spatial pyramid pooling."""
-        x = self.cv1(x)
-        return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
-
-
-class SPPF(nn.Module):
-    """Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher."""
-
-    def __init__(self, c1, c2, k=5):
-        """
-        Initializes the SPPF layer with given input/output channels and kernel size.
-
-        This module is equivalent to SPP(k=(5, 9, 13)).
-        """
-        super().__init__()
-        c_ = c1 // 2  # hidden channels
-        self.cv1 = Conv(c1, c_, 1, 1)
-        self.cv2 = Conv(c_ * 4, c2, 1, 1)
-        self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
-
-    def forward(self, x):
-        """Forward pass through Ghost Convolution block."""
-        y = [self.cv1(x)]
-        y.extend(self.m(y[-1]) for _ in range(3))
-        return self.cv2(torch.cat(y, 1))
-
-
-class C1(nn.Module):
-    """CSP Bottleneck with 1 convolution."""
-
-    def __init__(self, c1, c2, n=1):
-        """Initializes the CSP Bottleneck with configurations for 1 convolution with arguments ch_in, ch_out, number."""
-        super().__init__()
-        self.cv1 = Conv(c1, c2, 1, 1)
-        self.m = nn.Sequential(*(Conv(c2, c2, 3) for _ in range(n)))
-
-    def forward(self, x):
-        """Applies cross-convolutions to input in the C3 module."""
-        y = self.cv1(x)
-        return self.m(y) + y
-
-
-class C2(nn.Module):
-    """CSP Bottleneck with 2 convolutions."""
-
-    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
-        """Initializes the CSP Bottleneck with 2 convolutions module with arguments ch_in, ch_out, number, shortcut,
-        groups, expansion.
-        """
-        super().__init__()
-        self.c = int(c2 * e)  # hidden channels
-        self.cv1 = Conv(c1, 2 * self.c, 1, 1)
-        self.cv2 = Conv(2 * self.c, c2, 1)  # optional act=FReLU(c2)
-        # self.attention = ChannelAttention(2 * self.c)  # or SpatialAttention()
-        self.m = nn.Sequential(*(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n)))
-
-    def forward(self, x):
-        """Forward pass through the CSP bottleneck with 2 convolutions."""
-        a, b = self.cv1(x).chunk(2, 1)
-        return self.cv2(torch.cat((self.m(a), b), 1))
-
-
-class C2f(nn.Module):
-    """Faster Implementation of CSP Bottleneck with 2 convolutions."""
-
-    def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5):
-        """Initialize CSP bottleneck layer with two convolutions with arguments ch_in, ch_out, number, shortcut, groups,
-        expansion.
-        """
-        super().__init__()
-        self.c = int(c2 * e)  # hidden channels
-        self.cv1 = Conv(c1, 2 * self.c, 1, 1)
-        self.cv2 = Conv((2 + n) * self.c, c2, 1)  # optional act=FReLU(c2)
-        self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))
-
-    def forward(self, x):
-        """Forward pass through C2f layer."""
-        y = list(self.cv1(x).chunk(2, 1))
-        y.extend(m(y[-1]) for m in self.m)
-        return self.cv2(torch.cat(y, 1))
-
-    def forward_split(self, x):
-        """Forward pass using split() instead of chunk()."""
-        y = list(self.cv1(x).split((self.c, self.c), 1))
-        y.extend(m(y[-1]) for m in self.m)
-        return self.cv2(torch.cat(y, 1))
-
-
-class C3(nn.Module):
-    """CSP Bottleneck with 3 convolutions."""
-
-    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
-        """Initialize the CSP Bottleneck with given channels, number, shortcut, groups, and expansion values."""
-        super().__init__()
-        c_ = int(c2 * e)  # hidden channels
-        self.cv1 = Conv(c1, c_, 1, 1)
-        self.cv2 = Conv(c1, c_, 1, 1)
-        self.cv3 = Conv(2 * c_, c2, 1)  # optional act=FReLU(c2)
-        self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, k=((1, 1), (3, 3)), e=1.0) for _ in range(n)))
-
-    def forward(self, x):
-        """Forward pass through the CSP bottleneck with 2 convolutions."""
-        return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
-
-
-class C3x(C3):
-    """C3 module with cross-convolutions."""
-
-    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
-        """Initialize C3TR instance and set default parameters."""
-        super().__init__(c1, c2, n, shortcut, g, e)
-        self.c_ = int(c2 * e)
-        self.m = nn.Sequential(*(Bottleneck(self.c_, self.c_, shortcut, g, k=((1, 3), (3, 1)), e=1) for _ in range(n)))
-
-
-class RepC3(nn.Module):
-    """Rep C3."""
-
-    def __init__(self, c1, c2, n=3, e=1.0):
-        """Initialize CSP Bottleneck with a single convolution using input channels, output channels, and number."""
-        super().__init__()
-        c_ = int(c2 * e)  # hidden channels
-        self.cv1 = Conv(c1, c2, 1, 1)
-        self.cv2 = Conv(c1, c2, 1, 1)
-        self.m = nn.Sequential(*[RepConv(c_, c_) for _ in range(n)])
-        self.cv3 = Conv(c_, c2, 1, 1) if c_ != c2 else nn.Identity()
-
-    def forward(self, x):
-        """Forward pass of RT-DETR neck layer."""
-        return self.cv3(self.m(self.cv1(x)) + self.cv2(x))
-
-
-class C3TR(C3):
-    """C3 module with TransformerBlock()."""
-
-    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
-        """Initialize C3Ghost module with GhostBottleneck()."""
-        super().__init__(c1, c2, n, shortcut, g, e)
-        c_ = int(c2 * e)
-        self.m = TransformerBlock(c_, c_, 4, n)
-
-
-class C3Ghost(C3):
-    """C3 module with GhostBottleneck()."""
-
-    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
-        """Initialize 'SPP' module with various pooling sizes for spatial pyramid pooling."""
-        super().__init__(c1, c2, n, shortcut, g, e)
-        c_ = int(c2 * e)  # hidden channels
-        self.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n)))
-
-
-class GhostBottleneck(nn.Module):
-    """Ghost Bottleneck https://github.com/huawei-noah/ghostnet."""
-
-    def __init__(self, c1, c2, k=3, s=1):
-        """Initializes GhostBottleneck module with arguments ch_in, ch_out, kernel, stride."""
-        super().__init__()
-        c_ = c2 // 2
-        self.conv = nn.Sequential(
-            GhostConv(c1, c_, 1, 1),  # pw
-            DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(),  # dw
-            GhostConv(c_, c2, 1, 1, act=False),  # pw-linear
-        )
-        self.shortcut = (
-            nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
-        )
-
-    def forward(self, x):
-        """Applies skip connection and concatenation to input tensor."""
-        return self.conv(x) + self.shortcut(x)
-
-
-class Bottleneck(nn.Module):
-    """Standard bottleneck."""
-
-    def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5):
-        """Initializes a bottleneck module with given input/output channels, shortcut option, group, kernels, and
-        expansion.
-        """
-        super().__init__()
-        c_ = int(c2 * e)  # hidden channels
-        self.cv1 = Conv(c1, c_, k[0], 1)
-        self.cv2 = Conv(c_, c2, k[1], 1, g=g)
-        self.add = shortcut and c1 == c2
-
-    def forward(self, x):
-        """'forward()' applies the YOLO FPN to input data."""
-        return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
-
-
-class BottleneckCSP(nn.Module):
-    """CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks."""
-
-    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
-        """Initializes the CSP Bottleneck given arguments for ch_in, ch_out, number, shortcut, groups, expansion."""
-        super().__init__()
-        c_ = int(c2 * e)  # hidden channels
-        self.cv1 = Conv(c1, c_, 1, 1)
-        self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
-        self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
-        self.cv4 = Conv(2 * c_, c2, 1, 1)
-        self.bn = nn.BatchNorm2d(2 * c_)  # applied to cat(cv2, cv3)
-        self.act = nn.SiLU()
-        self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
-
-    def forward(self, x):
-        """Applies a CSP bottleneck with 3 convolutions."""
-        y1 = self.cv3(self.m(self.cv1(x)))
-        y2 = self.cv2(x)
-        return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1))))
-
-
-class ResNetBlock(nn.Module):
-    """ResNet block with standard convolution layers."""
-
-    def __init__(self, c1, c2, s=1, e=4):
-        """Initialize convolution with given parameters."""
-        super().__init__()
-        c3 = e * c2
-        self.cv1 = Conv(c1, c2, k=1, s=1, act=True)
-        self.cv2 = Conv(c2, c2, k=3, s=s, p=1, act=True)
-        self.cv3 = Conv(c2, c3, k=1, act=False)
-        self.shortcut = nn.Sequential(Conv(c1, c3, k=1, s=s, act=False)) if s != 1 or c1 != c3 else nn.Identity()
-
-    def forward(self, x):
-        """Forward pass through the ResNet block."""
-        return F.relu(self.cv3(self.cv2(self.cv1(x))) + self.shortcut(x))
-
-
-class ResNetLayer(nn.Module):
-    """ResNet layer with multiple ResNet blocks."""
-
-    def __init__(self, c1, c2, s=1, is_first=False, n=1, e=4):
-        """Initializes the ResNetLayer given arguments."""
-        super().__init__()
-        self.is_first = is_first
-
-        if self.is_first:
-            self.layer = nn.Sequential(
-                Conv(c1, c2, k=7, s=2, p=3, act=True), nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
-            )
-        else:
-            blocks = [ResNetBlock(c1, c2, s, e=e)]
-            blocks.extend([ResNetBlock(e * c2, c2, 1, e=e) for _ in range(n - 1)])
-            self.layer = nn.Sequential(*blocks)
-
-    def forward(self, x):
-        """Forward pass through the ResNet layer."""
-        return self.layer(x)
-
-
-class MaxSigmoidAttnBlock(nn.Module):
-    """Max Sigmoid attention block."""
-
-    def __init__(self, c1, c2, nh=1, ec=128, gc=512, scale=False):
-        """Initializes MaxSigmoidAttnBlock with specified arguments."""
-        super().__init__()
-        self.nh = nh
-        self.hc = c2 // nh
-        self.ec = Conv(c1, ec, k=1, act=False) if c1 != ec else None
-        self.gl = nn.Linear(gc, ec)
-        self.bias = nn.Parameter(torch.zeros(nh))
-        self.proj_conv = Conv(c1, c2, k=3, s=1, act=False)
-        self.scale = nn.Parameter(torch.ones(1, nh, 1, 1)) if scale else 1.0
-
-    def forward(self, x, guide):
-        """Forward process."""
-        bs, _, h, w = x.shape
-
-        guide = self.gl(guide)
-        guide = guide.view(bs, -1, self.nh, self.hc)
-        embed = self.ec(x) if self.ec is not None else x
-        embed = embed.view(bs, self.nh, self.hc, h, w)
-
-        aw = torch.einsum("bmchw,bnmc->bmhwn", embed, guide)
-        aw = aw.max(dim=-1)[0]
-        aw = aw / (self.hc**0.5)
-        aw = aw + self.bias[None, :, None, None]
-        aw = aw.sigmoid() * self.scale
-
-        x = self.proj_conv(x)
-        x = x.view(bs, self.nh, -1, h, w)
-        x = x * aw.unsqueeze(2)
-        return x.view(bs, -1, h, w)
-
-
-class C2fAttn(nn.Module):
-    """C2f module with an additional attn module."""
-
-    def __init__(self, c1, c2, n=1, ec=128, nh=1, gc=512, shortcut=False, g=1, e=0.5):
-        """Initialize CSP bottleneck layer with two convolutions with arguments ch_in, ch_out, number, shortcut, groups,
-        expansion.
-        """
-        super().__init__()
-        self.c = int(c2 * e)  # hidden channels
-        self.cv1 = Conv(c1, 2 * self.c, 1, 1)
-        self.cv2 = Conv((3 + n) * self.c, c2, 1)  # optional act=FReLU(c2)
-        self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))
-        self.attn = MaxSigmoidAttnBlock(self.c, self.c, gc=gc, ec=ec, nh=nh)
-
-    def forward(self, x, guide):
-        """Forward pass through C2f layer."""
-        y = list(self.cv1(x).chunk(2, 1))
-        y.extend(m(y[-1]) for m in self.m)
-        y.append(self.attn(y[-1], guide))
-        return self.cv2(torch.cat(y, 1))
-
-    def forward_split(self, x, guide):
-        """Forward pass using split() instead of chunk()."""
-        y = list(self.cv1(x).split((self.c, self.c), 1))
-        y.extend(m(y[-1]) for m in self.m)
-        y.append(self.attn(y[-1], guide))
-        return self.cv2(torch.cat(y, 1))
-
-
-class ImagePoolingAttn(nn.Module):
-    """ImagePoolingAttn: Enhance the text embeddings with image-aware information."""
-
-    def __init__(self, ec=256, ch=(), ct=512, nh=8, k=3, scale=False):
-        """Initializes ImagePoolingAttn with specified arguments."""
-        super().__init__()
-
-        nf = len(ch)
-        self.query = nn.Sequential(nn.LayerNorm(ct), nn.Linear(ct, ec))
-        self.key = nn.Sequential(nn.LayerNorm(ec), nn.Linear(ec, ec))
-        self.value = nn.Sequential(nn.LayerNorm(ec), nn.Linear(ec, ec))
-        self.proj = nn.Linear(ec, ct)
-        self.scale = nn.Parameter(torch.tensor([0.0]), requires_grad=True) if scale else 1.0
-        self.projections = nn.ModuleList([nn.Conv2d(in_channels, ec, kernel_size=1) for in_channels in ch])
-        self.im_pools = nn.ModuleList([nn.AdaptiveMaxPool2d((k, k)) for _ in range(nf)])
-        self.ec = ec
-        self.nh = nh
-        self.nf = nf
-        self.hc = ec // nh
-        self.k = k
-
-    def forward(self, x, text):
-        """Executes attention mechanism on input tensor x and guide tensor."""
-        bs = x[0].shape[0]
-        assert len(x) == self.nf
-        num_patches = self.k**2
-        x = [pool(proj(x)).view(bs, -1, num_patches) for (x, proj, pool) in zip(x, self.projections, self.im_pools)]
-        x = torch.cat(x, dim=-1).transpose(1, 2)
-        q = self.query(text)
-        k = self.key(x)
-        v = self.value(x)
-
-        # q = q.reshape(1, text.shape[1], self.nh, self.hc).repeat(bs, 1, 1, 1)
-        q = q.reshape(bs, -1, self.nh, self.hc)
-        k = k.reshape(bs, -1, self.nh, self.hc)
-        v = v.reshape(bs, -1, self.nh, self.hc)
-
-        aw = torch.einsum("bnmc,bkmc->bmnk", q, k)
-        aw = aw / (self.hc**0.5)
-        aw = F.softmax(aw, dim=-1)
-
-        x = torch.einsum("bmnk,bkmc->bnmc", aw, v)
-        x = self.proj(x.reshape(bs, -1, self.ec))
-        return x * self.scale + text
-
-
-class ContrastiveHead(nn.Module):
-    """Contrastive Head for YOLO-World compute the region-text scores according to the similarity between image and text
-    features.
-    """
-
-    def __init__(self):
-        """Initializes ContrastiveHead with specified region-text similarity parameters."""
-        super().__init__()
-        # NOTE: use -10.0 to keep the init cls loss consistency with other losses
-        self.bias = nn.Parameter(torch.tensor([-10.0]))
-        self.logit_scale = nn.Parameter(torch.ones([]) * torch.tensor(1 / 0.07).log())
-
-    def forward(self, x, w):
-        """Forward function of contrastive learning."""
-        x = F.normalize(x, dim=1, p=2)
-        w = F.normalize(w, dim=-1, p=2)
-        x = torch.einsum("bchw,bkc->bkhw", x, w)
-        return x * self.logit_scale.exp() + self.bias
-
-
-class BNContrastiveHead(nn.Module):
-    """
-    Batch Norm Contrastive Head for YOLO-World using batch norm instead of l2-normalization.
-
-    Args:
-        embed_dims (int): Embed dimensions of text and image features.
-    """
-
-    def __init__(self, embed_dims: int):
-        """Initialize ContrastiveHead with region-text similarity parameters."""
-        super().__init__()
-        self.norm = nn.BatchNorm2d(embed_dims)
-        # NOTE: use -10.0 to keep the init cls loss consistency with other losses
-        self.bias = nn.Parameter(torch.tensor([-10.0]))
-        # use -1.0 is more stable
-        self.logit_scale = nn.Parameter(-1.0 * torch.ones([]))
-
-    def forward(self, x, w):
-        """Forward function of contrastive learning."""
-        x = self.norm(x)
-        w = F.normalize(w, dim=-1, p=2)
-        x = torch.einsum("bchw,bkc->bkhw", x, w)
-        return x * self.logit_scale.exp() + self.bias
-
-
-class RepBottleneck(Bottleneck):
-    """Rep bottleneck."""
-
-    def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5):
-        """Initializes a RepBottleneck module with customizable in/out channels, shortcut option, groups and expansion
-        ratio.
-        """
-        super().__init__(c1, c2, shortcut, g, k, e)
-        c_ = int(c2 * e)  # hidden channels
-        self.cv1 = RepConv(c1, c_, k[0], 1)
-
-
-class RepCSP(C3):
-    """Rep CSP Bottleneck with 3 convolutions."""
-
-    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
-        """Initializes RepCSP layer with given channels, repetitions, shortcut, groups and expansion ratio."""
-        super().__init__(c1, c2, n, shortcut, g, e)
-        c_ = int(c2 * e)  # hidden channels
-        self.m = nn.Sequential(*(RepBottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
-
-
-class RepNCSPELAN4(nn.Module):
-    """CSP-ELAN."""
-
-    def __init__(self, c1, c2, c3, c4, n=1):
-        """Initializes CSP-ELAN layer with specified channel sizes, repetitions, and convolutions."""
-        super().__init__()
-        self.c = c3 // 2
-        self.cv1 = Conv(c1, c3, 1, 1)
-        self.cv2 = nn.Sequential(RepCSP(c3 // 2, c4, n), Conv(c4, c4, 3, 1))
-        self.cv3 = nn.Sequential(RepCSP(c4, c4, n), Conv(c4, c4, 3, 1))
-        self.cv4 = Conv(c3 + (2 * c4), c2, 1, 1)
-
-    def forward(self, x):
-        """Forward pass through RepNCSPELAN4 layer."""
-        y = list(self.cv1(x).chunk(2, 1))
-        y.extend((m(y[-1])) for m in [self.cv2, self.cv3])
-        return self.cv4(torch.cat(y, 1))
-
-    def forward_split(self, x):
-        """Forward pass using split() instead of chunk()."""
-        y = list(self.cv1(x).split((self.c, self.c), 1))
-        y.extend(m(y[-1]) for m in [self.cv2, self.cv3])
-        return self.cv4(torch.cat(y, 1))
-
-
-class ELAN1(RepNCSPELAN4):
-    """ELAN1 module with 4 convolutions."""
-
-    def __init__(self, c1, c2, c3, c4):
-        """Initializes ELAN1 layer with specified channel sizes."""
-        super().__init__(c1, c2, c3, c4)
-        self.c = c3 // 2
-        self.cv1 = Conv(c1, c3, 1, 1)
-        self.cv2 = Conv(c3 // 2, c4, 3, 1)
-        self.cv3 = Conv(c4, c4, 3, 1)
-        self.cv4 = Conv(c3 + (2 * c4), c2, 1, 1)
-
-
-class AConv(nn.Module):
-    """AConv."""
-
-    def __init__(self, c1, c2):
-        """Initializes AConv module with convolution layers."""
-        super().__init__()
-        self.cv1 = Conv(c1, c2, 3, 2, 1)
-
-    def forward(self, x):
-        """Forward pass through AConv layer."""
-        x = torch.nn.functional.avg_pool2d(x, 2, 1, 0, False, True)
-        return self.cv1(x)
-
-
-class ADown(nn.Module):
-    """ADown."""
-
-    def __init__(self, c1, c2):
-        """Initializes ADown module with convolution layers to downsample input from channels c1 to c2."""
-        super().__init__()
-        self.c = c2 // 2
-        self.cv1 = Conv(c1 // 2, self.c, 3, 2, 1)
-        self.cv2 = Conv(c1 // 2, self.c, 1, 1, 0)
-
-    def forward(self, x):
-        """Forward pass through ADown layer."""
-        x = torch.nn.functional.avg_pool2d(x, 2, 1, 0, False, True)
-        x1, x2 = x.chunk(2, 1)
-        x1 = self.cv1(x1)
-        x2 = torch.nn.functional.max_pool2d(x2, 3, 2, 1)
-        x2 = self.cv2(x2)
-        return torch.cat((x1, x2), 1)
-
-
-class SPPELAN(nn.Module):
-    """SPP-ELAN."""
-
-    def __init__(self, c1, c2, c3, k=5):
-        """Initializes SPP-ELAN block with convolution and max pooling layers for spatial pyramid pooling."""
-        super().__init__()
-        self.c = c3
-        self.cv1 = Conv(c1, c3, 1, 1)
-        self.cv2 = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
-        self.cv3 = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
-        self.cv4 = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
-        self.cv5 = Conv(4 * c3, c2, 1, 1)
-
-    def forward(self, x):
-        """Forward pass through SPPELAN layer."""
-        y = [self.cv1(x)]
-        y.extend(m(y[-1]) for m in [self.cv2, self.cv3, self.cv4])
-        return self.cv5(torch.cat(y, 1))
-
-
-class CBLinear(nn.Module):
-    """CBLinear."""
-
-    def __init__(self, c1, c2s, k=1, s=1, p=None, g=1):
-        """Initializes the CBLinear module, passing inputs unchanged."""
-        super(CBLinear, self).__init__()
-        self.c2s = c2s
-        self.conv = nn.Conv2d(c1, sum(c2s), k, s, autopad(k, p), groups=g, bias=True)
-
-    def forward(self, x):
-        """Forward pass through CBLinear layer."""
-        return self.conv(x).split(self.c2s, dim=1)
-
-
-class CBFuse(nn.Module):
-    """CBFuse."""
-
-    def __init__(self, idx):
-        """Initializes CBFuse module with layer index for selective feature fusion."""
-        super(CBFuse, self).__init__()
-        self.idx = idx
-
-    def forward(self, xs):
-        """Forward pass through CBFuse layer."""
-        target_size = xs[-1].shape[2:]
-        res = [F.interpolate(x[self.idx[i]], size=target_size, mode="nearest") for i, x in enumerate(xs[:-1])]
-        return torch.sum(torch.stack(res + xs[-1:]), dim=0)
-
-
-class Attention(nn.Module):
-    """
-    Attention module that performs self-attention on the input tensor.
-
-    Args:
-        dim (int): The input tensor dimension.
-        num_heads (int): The number of attention heads.
-        attn_ratio (float): The ratio of the attention key dimension to the head dimension.
-
-    Attributes:
-        num_heads (int): The number of attention heads.
-        head_dim (int): The dimension of each attention head.
-        key_dim (int): The dimension of the attention key.
-        scale (float): The scaling factor for the attention scores.
-        qkv (Conv): Convolutional layer for computing the query, key, and value.
-        proj (Conv): Convolutional layer for projecting the attended values.
-        pe (Conv): Convolutional layer for positional encoding.
-    """
-
-    def __init__(self, dim, num_heads=8, attn_ratio=0.5):
-        """Initializes multi-head attention module with query, key, and value convolutions and positional encoding."""
-        super().__init__()
-        self.num_heads = num_heads
-        self.head_dim = dim // num_heads
-        self.key_dim = int(self.head_dim * attn_ratio)
-        self.scale = self.key_dim**-0.5
-        nh_kd = self.key_dim * num_heads
-        h = dim + nh_kd * 2
-        self.qkv = Conv(dim, h, 1, act=False)
-        self.proj = Conv(dim, dim, 1, act=False)
-        self.pe = Conv(dim, dim, 3, 1, g=dim, act=False)
-
-    def forward(self, x):
-        """
-        Forward pass of the Attention module.
-
-        Args:
-            x (torch.Tensor): The input tensor.
-
-        Returns:
-            (torch.Tensor): The output tensor after self-attention.
-        """
-        B, C, H, W = x.shape
-        N = H * W
-        qkv = self.qkv(x)
-        q, k, v = qkv.view(B, self.num_heads, self.key_dim * 2 + self.head_dim, N).split(
-            [self.key_dim, self.key_dim, self.head_dim], dim=2
-        )
-
-        attn = (q.transpose(-2, -1) @ k) * self.scale
-        attn = attn.softmax(dim=-1)
-        x = (v @ attn.transpose(-2, -1)).view(B, C, H, W) + self.pe(v.reshape(B, C, H, W))
-        x = self.proj(x)
-        return x
-
-
-class PSA(nn.Module):
-    """
-    Position-wise Spatial Attention module.
-
-    Args:
-        c1 (int): Number of input channels.
-        c2 (int): Number of output channels.
-        e (float): Expansion factor for the intermediate channels. Default is 0.5.
-
-    Attributes:
-        c (int): Number of intermediate channels.
-        cv1 (Conv): 1x1 convolution layer to reduce the number of input channels to 2*c.
-        cv2 (Conv): 1x1 convolution layer to reduce the number of output channels to c.
-        attn (Attention): Attention module for spatial attention.
-        ffn (nn.Sequential): Feed-forward network module.
-    """
-
-    def __init__(self, c1, c2, e=0.5):
-        """Initializes convolution layers, attention module, and feed-forward network with channel reduction."""
-        super().__init__()
-        assert c1 == c2
-        self.c = int(c1 * e)
-        self.cv1 = Conv(c1, 2 * self.c, 1, 1)
-        self.cv2 = Conv(2 * self.c, c1, 1)
-
-        self.attn = Attention(self.c, attn_ratio=0.5, num_heads=self.c // 64)
-        self.ffn = nn.Sequential(Conv(self.c, self.c * 2, 1), Conv(self.c * 2, self.c, 1, act=False))
-
-    def forward(self, x):
-        """
-        Forward pass of the PSA module.
-
-        Args:
-            x (torch.Tensor): Input tensor.
-
-        Returns:
-            (torch.Tensor): Output tensor.
-        """
-        a, b = self.cv1(x).split((self.c, self.c), dim=1)
-        b = b + self.attn(b)
-        b = b + self.ffn(b)
-        return self.cv2(torch.cat((a, b), 1))
-
-
-class SCDown(nn.Module):
-    """Spatial Channel Downsample (SCDown) module for reducing spatial and channel dimensions."""
-
-    def __init__(self, c1, c2, k, s):
-        """
-        Spatial Channel Downsample (SCDown) module.
-
-        Args:
-            c1 (int): Number of input channels.
-            c2 (int): Number of output channels.
-            k (int): Kernel size for the convolutional layer.
-            s (int): Stride for the convolutional layer.
-        """
-        super().__init__()
-        self.cv1 = Conv(c1, c2, 1, 1)
-        self.cv2 = Conv(c2, c2, k=k, s=s, g=c2, act=False)
-
-    def forward(self, x):
-        """
-        Forward pass of the SCDown module.
-
-        Args:
-            x (torch.Tensor): Input tensor.
-
-        Returns:
-            (torch.Tensor): Output tensor after applying the SCDown module.
-        """
-        return self.cv2(self.cv1(x))
diff --git a/tests/torch/test_models/yolov8/conv.py b/tests/torch/test_models/yolov8/conv.py
deleted file mode 100644
index abbd9e4d41e..00000000000
--- a/tests/torch/test_models/yolov8/conv.py
+++ /dev/null
@@ -1,348 +0,0 @@
-# Copyright (c) 2024 Intel Corporation
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#      http://www.apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Ultralytics YOLO 🚀, AGPL-3.0 license
-"""
-Copie of ultralytics/ultralytics/nn/modules/conv.py
-Commit: 673e76b86282859ead5517bd04dee896a647db93
-Convolution modules.
-"""
-
-import math
-
-import numpy as np
-import torch
-import torch.nn as nn
-
-__all__ = (
-    "Conv",
-    "Conv2",
-    "LightConv",
-    "DWConv",
-    "DWConvTranspose2d",
-    "ConvTranspose",
-    "Focus",
-    "GhostConv",
-    "ChannelAttention",
-    "SpatialAttention",
-    "CBAM",
-    "Concat",
-    "RepConv",
-)
-
-
-def autopad(k, p=None, d=1):  # kernel, padding, dilation
-    """Pad to 'same' shape outputs."""
-    if d > 1:
-        k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k]  # actual kernel-size
-    if p is None:
-        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
-    return p
-
-
-class Conv(nn.Module):
-    """Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)."""
-
-    default_act = nn.SiLU()  # default activation
-
-    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
-        """Initialize Conv layer with given arguments including activation."""
-        super().__init__()
-        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)
-        self.bn = nn.BatchNorm2d(c2)
-        self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
-
-    def forward(self, x):
-        """Apply convolution, batch normalization and activation to input tensor."""
-        return self.act(self.bn(self.conv(x)))
-
-    def forward_fuse(self, x):
-        """Perform transposed convolution of 2D data."""
-        return self.act(self.conv(x))
-
-
-class Conv2(Conv):
-    """Simplified RepConv module with Conv fusing."""
-
-    def __init__(self, c1, c2, k=3, s=1, p=None, g=1, d=1, act=True):
-        """Initialize Conv layer with given arguments including activation."""
-        super().__init__(c1, c2, k, s, p, g=g, d=d, act=act)
-        self.cv2 = nn.Conv2d(c1, c2, 1, s, autopad(1, p, d), groups=g, dilation=d, bias=False)  # add 1x1 conv
-
-    def forward(self, x):
-        """Apply convolution, batch normalization and activation to input tensor."""
-        return self.act(self.bn(self.conv(x) + self.cv2(x)))
-
-    def forward_fuse(self, x):
-        """Apply fused convolution, batch normalization and activation to input tensor."""
-        return self.act(self.bn(self.conv(x)))
-
-    def fuse_convs(self):
-        """Fuse parallel convolutions."""
-        w = torch.zeros_like(self.conv.weight.data)
-        i = [x // 2 for x in w.shape[2:]]
-        w[:, :, i[0] : i[0] + 1, i[1] : i[1] + 1] = self.cv2.weight.data.clone()
-        self.conv.weight.data += w
-        self.__delattr__("cv2")
-        self.forward = self.forward_fuse
-
-
-class LightConv(nn.Module):
-    """
-    Light convolution with args(ch_in, ch_out, kernel).
-
-    https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
-    """
-
-    def __init__(self, c1, c2, k=1, act=nn.ReLU()):
-        """Initialize Conv layer with given arguments including activation."""
-        super().__init__()
-        self.conv1 = Conv(c1, c2, 1, act=False)
-        self.conv2 = DWConv(c2, c2, k, act=act)
-
-    def forward(self, x):
-        """Apply 2 convolutions to input tensor."""
-        return self.conv2(self.conv1(x))
-
-
-class DWConv(Conv):
-    """Depth-wise convolution."""
-
-    def __init__(self, c1, c2, k=1, s=1, d=1, act=True):  # ch_in, ch_out, kernel, stride, dilation, activation
-        """Initialize Depth-wise convolution with given parameters."""
-        super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act)
-
-
-class DWConvTranspose2d(nn.ConvTranspose2d):
-    """Depth-wise transpose convolution."""
-
-    def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0):  # ch_in, ch_out, kernel, stride, padding, padding_out
-        """Initialize DWConvTranspose2d class with given parameters."""
-        super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2))
-
-
-class ConvTranspose(nn.Module):
-    """Convolution transpose 2d layer."""
-
-    default_act = nn.SiLU()  # default activation
-
-    def __init__(self, c1, c2, k=2, s=2, p=0, bn=True, act=True):
-        """Initialize ConvTranspose2d layer with batch normalization and activation function."""
-        super().__init__()
-        self.conv_transpose = nn.ConvTranspose2d(c1, c2, k, s, p, bias=not bn)
-        self.bn = nn.BatchNorm2d(c2) if bn else nn.Identity()
-        self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
-
-    def forward(self, x):
-        """Applies transposed convolutions, batch normalization and activation to input."""
-        return self.act(self.bn(self.conv_transpose(x)))
-
-    def forward_fuse(self, x):
-        """Applies activation and convolution transpose operation to input."""
-        return self.act(self.conv_transpose(x))
-
-
-class Focus(nn.Module):
-    """Focus wh information into c-space."""
-
-    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):
-        """Initializes Focus object with user defined channel, convolution, padding, group and activation values."""
-        super().__init__()
-        self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act)
-        # self.contract = Contract(gain=2)
-
-    def forward(self, x):
-        """
-        Applies convolution to concatenated tensor and returns the output.
-
-        Input shape is (b,c,w,h) and output shape is (b,4c,w/2,h/2).
-        """
-        return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1))
-        # return self.conv(self.contract(x))
-
-
-class GhostConv(nn.Module):
-    """Ghost Convolution https://github.com/huawei-noah/ghostnet."""
-
-    def __init__(self, c1, c2, k=1, s=1, g=1, act=True):
-        """Initializes the GhostConv object with input channels, output channels, kernel size, stride, groups and
-        activation.
-        """
-        super().__init__()
-        c_ = c2 // 2  # hidden channels
-        self.cv1 = Conv(c1, c_, k, s, None, g, act=act)
-        self.cv2 = Conv(c_, c_, 5, 1, None, c_, act=act)
-
-    def forward(self, x):
-        """Forward propagation through a Ghost Bottleneck layer with skip connection."""
-        y = self.cv1(x)
-        return torch.cat((y, self.cv2(y)), 1)
-
-
-class RepConv(nn.Module):
-    """
-    RepConv is a basic rep-style block, including training and deploy status.
-
-    This module is used in RT-DETR.
-    Based on https://github.com/DingXiaoH/RepVGG/blob/main/repvgg.py
-    """
-
-    default_act = nn.SiLU()  # default activation
-
-    def __init__(self, c1, c2, k=3, s=1, p=1, g=1, d=1, act=True, bn=False, deploy=False):
-        """Initializes Light Convolution layer with inputs, outputs & optional activation function."""
-        super().__init__()
-        assert k == 3 and p == 1
-        self.g = g
-        self.c1 = c1
-        self.c2 = c2
-        self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()
-
-        self.bn = nn.BatchNorm2d(num_features=c1) if bn and c2 == c1 and s == 1 else None
-        self.conv1 = Conv(c1, c2, k, s, p=p, g=g, act=False)
-        self.conv2 = Conv(c1, c2, 1, s, p=(p - k // 2), g=g, act=False)
-
-    def forward_fuse(self, x):
-        """Forward process."""
-        return self.act(self.conv(x))
-
-    def forward(self, x):
-        """Forward process."""
-        id_out = 0 if self.bn is None else self.bn(x)
-        return self.act(self.conv1(x) + self.conv2(x) + id_out)
-
-    def get_equivalent_kernel_bias(self):
-        """Returns equivalent kernel and bias by adding 3x3 kernel, 1x1 kernel and identity kernel with their biases."""
-        kernel3x3, bias3x3 = self._fuse_bn_tensor(self.conv1)
-        kernel1x1, bias1x1 = self._fuse_bn_tensor(self.conv2)
-        kernelid, biasid = self._fuse_bn_tensor(self.bn)
-        return kernel3x3 + self._pad_1x1_to_3x3_tensor(kernel1x1) + kernelid, bias3x3 + bias1x1 + biasid
-
-    def _pad_1x1_to_3x3_tensor(self, kernel1x1):
-        """Pads a 1x1 tensor to a 3x3 tensor."""
-        if kernel1x1 is None:
-            return 0
-        else:
-            return torch.nn.functional.pad(kernel1x1, [1, 1, 1, 1])
-
-    def _fuse_bn_tensor(self, branch):
-        """Generates appropriate kernels and biases for convolution by fusing branches of the neural network."""
-        if branch is None:
-            return 0, 0
-        if isinstance(branch, Conv):
-            kernel = branch.conv.weight
-            running_mean = branch.bn.running_mean
-            running_var = branch.bn.running_var
-            gamma = branch.bn.weight
-            beta = branch.bn.bias
-            eps = branch.bn.eps
-        elif isinstance(branch, nn.BatchNorm2d):
-            if not hasattr(self, "id_tensor"):
-                input_dim = self.c1 // self.g
-                kernel_value = np.zeros((self.c1, input_dim, 3, 3), dtype=np.float32)
-                for i in range(self.c1):
-                    kernel_value[i, i % input_dim, 1, 1] = 1
-                self.id_tensor = torch.from_numpy(kernel_value).to(branch.weight.device)
-            kernel = self.id_tensor
-            running_mean = branch.running_mean
-            running_var = branch.running_var
-            gamma = branch.weight
-            beta = branch.bias
-            eps = branch.eps
-        std = (running_var + eps).sqrt()
-        t = (gamma / std).reshape(-1, 1, 1, 1)
-        return kernel * t, beta - running_mean * gamma / std
-
-    def fuse_convs(self):
-        """Combines two convolution layers into a single layer and removes unused attributes from the class."""
-        if hasattr(self, "conv"):
-            return
-        kernel, bias = self.get_equivalent_kernel_bias()
-        self.conv = nn.Conv2d(
-            in_channels=self.conv1.conv.in_channels,
-            out_channels=self.conv1.conv.out_channels,
-            kernel_size=self.conv1.conv.kernel_size,
-            stride=self.conv1.conv.stride,
-            padding=self.conv1.conv.padding,
-            dilation=self.conv1.conv.dilation,
-            groups=self.conv1.conv.groups,
-            bias=True,
-        ).requires_grad_(False)
-        self.conv.weight.data = kernel
-        self.conv.bias.data = bias
-        for para in self.parameters():
-            para.detach_()
-        self.__delattr__("conv1")
-        self.__delattr__("conv2")
-        if hasattr(self, "nm"):
-            self.__delattr__("nm")
-        if hasattr(self, "bn"):
-            self.__delattr__("bn")
-        if hasattr(self, "id_tensor"):
-            self.__delattr__("id_tensor")
-
-
-class ChannelAttention(nn.Module):
-    """Channel-attention module https://github.com/open-mmlab/mmdetection/tree/v3.0.0rc1/configs/rtmdet."""
-
-    def __init__(self, channels: int) -> None:
-        """Initializes the class and sets the basic configurations and instance variables required."""
-        super().__init__()
-        self.pool = nn.AdaptiveAvgPool2d(1)
-        self.fc = nn.Conv2d(channels, channels, 1, 1, 0, bias=True)
-        self.act = nn.Sigmoid()
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Applies forward pass using activation on convolutions of the input, optionally using batch normalization."""
-        return x * self.act(self.fc(self.pool(x)))
-
-
-class SpatialAttention(nn.Module):
-    """Spatial-attention module."""
-
-    def __init__(self, kernel_size=7):
-        """Initialize Spatial-attention module with kernel size argument."""
-        super().__init__()
-        assert kernel_size in {3, 7}, "kernel size must be 3 or 7"
-        padding = 3 if kernel_size == 7 else 1
-        self.cv1 = nn.Conv2d(2, 1, kernel_size, padding=padding, bias=False)
-        self.act = nn.Sigmoid()
-
-    def forward(self, x):
-        """Apply channel and spatial attention on input for feature recalibration."""
-        return x * self.act(self.cv1(torch.cat([torch.mean(x, 1, keepdim=True), torch.max(x, 1, keepdim=True)[0]], 1)))
-
-
-class CBAM(nn.Module):
-    """Convolutional Block Attention Module."""
-
-    def __init__(self, c1, kernel_size=7):
-        """Initialize CBAM with given input channel (c1) and kernel size."""
-        super().__init__()
-        self.channel_attention = ChannelAttention(c1)
-        self.spatial_attention = SpatialAttention(kernel_size)
-
-    def forward(self, x):
-        """Applies the forward pass through C1 module."""
-        return self.spatial_attention(self.channel_attention(x))
-
-
-class Concat(nn.Module):
-    """Concatenate a list of tensors along dimension."""
-
-    def __init__(self, dimension=1):
-        """Concatenates a list of tensors along a specified dimension."""
-        super().__init__()
-        self.d = dimension
-
-    def forward(self, x):
-        """Forward pass for the YOLOv8 mask Proto module."""
-        return torch.cat(x, self.d)
diff --git a/tests/torch/test_models/yolov8/head.py b/tests/torch/test_models/yolov8/head.py
deleted file mode 100644
index 5dcc8e99141..00000000000
--- a/tests/torch/test_models/yolov8/head.py
+++ /dev/null
@@ -1,432 +0,0 @@
-# Copyright (c) 2024 Intel Corporation
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#      http://www.apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Ultralytics YOLO 🚀, AGPL-3.0 license
-"""
-Source: ultralytics/ultralytics/nn/modules/transformer.py
-Commit: 673e76b86282859ead5517bd04dee896a647db93
-Model head modules.
-"""
-
-import copy
-import math
-
-import torch
-import torch.nn as nn
-
-from .block import DFL
-from .block import BNContrastiveHead
-from .block import ContrastiveHead
-from .block import Proto
-from .conv import Conv
-
-
-def make_anchors(feats, strides, grid_cell_offset=0.5):
-    """Generate anchors from features."""
-    anchor_points, stride_tensor = [], []
-    assert feats is not None
-    dtype, device = feats[0].dtype, feats[0].device
-    for i, stride in enumerate(strides):
-        _, _, h, w = feats[i].shape
-        sx = torch.arange(end=w, device=device, dtype=dtype) + grid_cell_offset  # shift x
-        sy = torch.arange(end=h, device=device, dtype=dtype) + grid_cell_offset  # shift y
-        sy, sx = torch.meshgrid(sy, sx)
-        anchor_points.append(torch.stack((sx, sy), -1).view(-1, 2))
-        stride_tensor.append(torch.full((h * w, 1), stride, dtype=dtype, device=device))
-    return torch.cat(anchor_points), torch.cat(stride_tensor)
-
-
-def dist2rbox(pred_dist, pred_angle, anchor_points, dim=-1):
-    """
-    Decode predicted object bounding box coordinates from anchor points and distribution.
-
-    Args:
-        pred_dist (torch.Tensor): Predicted rotated distance, (bs, h*w, 4).
-        pred_angle (torch.Tensor): Predicted angle, (bs, h*w, 1).
-        anchor_points (torch.Tensor): Anchor points, (h*w, 2).
-    Returns:
-        (torch.Tensor): Predicted rotated bounding boxes, (bs, h*w, 4).
-    """
-    lt, rb = pred_dist.split(2, dim=dim)
-    cos, sin = torch.cos(pred_angle), torch.sin(pred_angle)
-    # (bs, h*w, 1)
-    xf, yf = ((rb - lt) / 2).split(1, dim=dim)
-    x, y = xf * cos - yf * sin, xf * sin + yf * cos
-    xy = torch.cat([x, y], dim=dim) + anchor_points
-    return torch.cat([xy, lt + rb], dim=dim)
-
-
-def dist2bbox(distance, anchor_points, xywh=True, dim=-1):
-    """Transform distance(ltrb) to box(xywh or xyxy)."""
-    lt, rb = distance.chunk(2, dim)
-    x1y1 = anchor_points - lt
-    x2y2 = anchor_points + rb
-    if xywh:
-        c_xy = (x1y1 + x2y2) / 2
-        wh = x2y2 - x1y1
-        return torch.cat((c_xy, wh), dim)  # xywh bbox
-    return torch.cat((x1y1, x2y2), dim)  # xyxy bbox
-
-
-class Detect(nn.Module):
-    """YOLOv8 Detect head for detection models."""
-
-    dynamic = False  # force grid reconstruction
-    export = False  # export mode
-    end2end = False  # end2end
-    max_det = 300  # max_det
-    shape = None
-    anchors = torch.empty(0)  # init
-    strides = torch.empty(0)  # init
-
-    def __init__(self, nc=80, ch=()):
-        """Initializes the YOLOv8 detection layer with specified number of classes and channels."""
-        super().__init__()
-        self.nc = nc  # number of classes
-        self.nl = len(ch)  # number of detection layers
-        self.reg_max = 16  # DFL channels (ch[0] // 16 to scale 4/8/12/16/20 for n/s/m/l/x)
-        self.no = nc + self.reg_max * 4  # number of outputs per anchor
-        self.stride = torch.zeros(self.nl)  # strides computed during build
-        c2, c3 = max((16, ch[0] // 4, self.reg_max * 4)), max(ch[0], min(self.nc, 100))  # channels
-        self.cv2 = nn.ModuleList(
-            nn.Sequential(Conv(x, c2, 3), Conv(c2, c2, 3), nn.Conv2d(c2, 4 * self.reg_max, 1)) for x in ch
-        )
-        self.cv3 = nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, self.nc, 1)) for x in ch)
-        self.dfl = DFL(self.reg_max) if self.reg_max > 1 else nn.Identity()
-
-        if self.end2end:
-            self.one2one_cv2 = copy.deepcopy(self.cv2)
-            self.one2one_cv3 = copy.deepcopy(self.cv3)
-
-    def forward(self, x):
-        """Concatenates and returns predicted bounding boxes and class probabilities."""
-        if self.end2end:
-            return self.forward_end2end(x)
-
-        for i in range(self.nl):
-            x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)
-        if self.training:  # Training path
-            return x
-        y = self._inference(x)
-        return y if self.export else (y, x)
-
-    def forward_end2end(self, x):
-        """
-        Performs forward pass of the v10Detect module.
-
-        Args:
-            x (tensor): Input tensor.
-
-        Returns:
-            (dict, tensor): If not in training mode,
-            returns a dictionary containing the outputs of both
-            one2many and one2one detections.
-            If in training mode, returns a dictionary containing
-            the outputs of one2many and one2one detections separately.
-        """
-        x_detach = [xi.detach() for xi in x]
-        one2one = [
-            torch.cat((self.one2one_cv2[i](x_detach[i]), self.one2one_cv3[i](x_detach[i])), 1) for i in range(self.nl)
-        ]
-        for i in range(self.nl):
-            x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)
-        if self.training:  # Training path
-            return {"one2many": x, "one2one": one2one}
-
-        y = self._inference(one2one)
-        y = self.postprocess(y.permute(0, 2, 1), self.max_det, self.nc)
-        return y if self.export else (y, {"one2many": x, "one2one": one2one})
-
-    def _inference(self, x):
-        """Decode predicted bounding boxes and class probabilities based on multiple-level feature maps."""
-        # Inference path
-        shape = x[0].shape  # BCHW
-        x_cat = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2)
-        if self.dynamic or self.shape != shape:
-            self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
-            self.shape = shape
-
-        if self.export and self.format in {"saved_model", "pb", "tflite", "edgetpu", "tfjs"}:  # avoid TF FlexSplitV ops
-            box = x_cat[:, : self.reg_max * 4]
-            cls = x_cat[:, self.reg_max * 4 :]
-        else:
-            box, cls = x_cat.split((self.reg_max * 4, self.nc), 1)
-
-        if self.export and self.format in {"tflite", "edgetpu"}:
-            # Precompute normalization factor to increase numerical stability
-            # See https://github.com/ultralytics/ultralytics/issues/7371
-            grid_h = shape[2]
-            grid_w = shape[3]
-            grid_size = torch.tensor([grid_w, grid_h, grid_w, grid_h], device=box.device).reshape(1, 4, 1)
-            norm = self.strides / (self.stride[0] * grid_size)
-            dbox = self.decode_bboxes(self.dfl(box) * norm, self.anchors.unsqueeze(0) * norm[:, :2])
-        else:
-            dbox = self.decode_bboxes(self.dfl(box), self.anchors.unsqueeze(0)) * self.strides
-
-        return torch.cat((dbox, cls.sigmoid()), 1)
-
-    def bias_init(self):
-        """Initialize Detect() biases, WARNING: requires stride availability."""
-        m = self  # self.model[-1]  # Detect() module
-        # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1
-        # ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum())  # nominal class frequency
-        for a, b, s in zip(m.cv2, m.cv3, m.stride):  # from
-            a[-1].bias.data[:] = 1.0  # box
-            b[-1].bias.data[: m.nc] = math.log(5 / m.nc / (640 / s) ** 2)  # cls (.01 objects, 80 classes, 640 img)
-        if self.end2end:
-            for a, b, s in zip(m.one2one_cv2, m.one2one_cv3, m.stride):  # from
-                a[-1].bias.data[:] = 1.0  # box
-                b[-1].bias.data[: m.nc] = math.log(5 / m.nc / (640 / s) ** 2)  # cls (.01 objects, 80 classes, 640 img)
-
-    def decode_bboxes(self, bboxes, anchors):
-        """Decode bounding boxes."""
-        return dist2bbox(bboxes, anchors, xywh=not self.end2end, dim=1)
-
-    @staticmethod
-    def postprocess(preds: torch.Tensor, max_det: int, nc: int = 80):
-        """
-        Post-processes the predictions obtained from a YOLOv10 model.
-
-        Args:
-            preds (torch.Tensor): The predictions obtained from the model.
-                It should have a shape of (batch_size, num_boxes, 4 + num_classes).
-            max_det (int): The maximum number of detections to keep.
-            nc (int, optional): The number of classes. Defaults to 80.
-
-        Returns:
-            (torch.Tensor): The post-processed predictions with shape (batch_size, max_det, 6),
-                including bounding boxes, scores and cls.
-        """
-        assert 4 + nc == preds.shape[-1]
-        boxes, scores = preds.split([4, nc], dim=-1)
-        max_scores = scores.amax(dim=-1)
-        max_scores, index = torch.topk(max_scores, min(max_det, max_scores.shape[1]), axis=-1)
-        index = index.unsqueeze(-1)
-        boxes = torch.gather(boxes, dim=1, index=index.repeat(1, 1, boxes.shape[-1]))
-        scores = torch.gather(scores, dim=1, index=index.repeat(1, 1, scores.shape[-1]))
-
-        # NOTE: simplify but result slightly lower mAP
-        # scores, labels = scores.max(dim=-1)
-        # return torch.cat([boxes, scores.unsqueeze(-1), labels.unsqueeze(-1)], dim=-1)
-
-        scores, index = torch.topk(scores.flatten(1), max_det, axis=-1)
-        labels = index % nc
-        index = index // nc
-        boxes = boxes.gather(dim=1, index=index.unsqueeze(-1).repeat(1, 1, boxes.shape[-1]))
-
-        return torch.cat([boxes, scores.unsqueeze(-1), labels.unsqueeze(-1).to(boxes.dtype)], dim=-1)
-
-
-class Segment(Detect):
-    """YOLOv8 Segment head for segmentation models."""
-
-    def __init__(self, nc=80, nm=32, npr=256, ch=()):
-        """Initialize the YOLO model attributes such as the number of masks, prototypes, and the convolution layers."""
-        super().__init__(nc, ch)
-        self.nm = nm  # number of masks
-        self.npr = npr  # number of protos
-        self.proto = Proto(ch[0], self.npr, self.nm)  # protos
-
-        c4 = max(ch[0] // 4, self.nm)
-        self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nm, 1)) for x in ch)
-
-    def forward(self, x):
-        """Return model outputs and mask coefficients if training, otherwise return outputs and mask coefficients."""
-        p = self.proto(x[0])  # mask protos
-        bs = p.shape[0]  # batch size
-
-        mc = torch.cat([self.cv4[i](x[i]).view(bs, self.nm, -1) for i in range(self.nl)], 2)  # mask coefficients
-        x = Detect.forward(self, x)
-        if self.training:
-            return x, mc, p
-        return (torch.cat([x, mc], 1), p) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p))
-
-
-class OBB(Detect):
-    """YOLOv8 OBB detection head for detection with rotation models."""
-
-    def __init__(self, nc=80, ne=1, ch=()):
-        """Initialize OBB with number of classes `nc` and layer channels `ch`."""
-        super().__init__(nc, ch)
-        self.ne = ne  # number of extra parameters
-
-        c4 = max(ch[0] // 4, self.ne)
-        self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.ne, 1)) for x in ch)
-
-    def forward(self, x):
-        """Concatenates and returns predicted bounding boxes and class probabilities."""
-        bs = x[0].shape[0]  # batch size
-        angle = torch.cat([self.cv4[i](x[i]).view(bs, self.ne, -1) for i in range(self.nl)], 2)  # OBB theta logits
-        # NOTE: set `angle` as an attribute so that `decode_bboxes` could use it.
-        angle = (angle.sigmoid() - 0.25) * math.pi  # [-pi/4, 3pi/4]
-        # angle = angle.sigmoid() * math.pi / 2  # [0, pi/2]
-        if not self.training:
-            self.angle = angle
-        x = Detect.forward(self, x)
-        if self.training:
-            return x, angle
-        return torch.cat([x, angle], 1) if self.export else (torch.cat([x[0], angle], 1), (x[1], angle))
-
-    def decode_bboxes(self, bboxes, anchors):
-        """Decode rotated bounding boxes."""
-        return dist2rbox(bboxes, self.angle, anchors, dim=1)
-
-
-class Pose(Detect):
-    """YOLOv8 Pose head for keypoints models."""
-
-    def __init__(self, nc=80, kpt_shape=(17, 3), ch=()):
-        """Initialize YOLO network with default parameters and Convolutional Layers."""
-        super().__init__(nc, ch)
-        self.kpt_shape = kpt_shape  # number of keypoints, number of dims (2 for x,y or 3 for x,y,visible)
-        self.nk = kpt_shape[0] * kpt_shape[1]  # number of keypoints total
-
-        c4 = max(ch[0] // 4, self.nk)
-        self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nk, 1)) for x in ch)
-
-    def forward(self, x):
-        """Perform forward pass through YOLO model and return predictions."""
-        bs = x[0].shape[0]  # batch size
-        kpt = torch.cat([self.cv4[i](x[i]).view(bs, self.nk, -1) for i in range(self.nl)], -1)  # (bs, 17*3, h*w)
-        x = Detect.forward(self, x)
-        if self.training:
-            return x, kpt
-        pred_kpt = self.kpts_decode(bs, kpt)
-        return torch.cat([x, pred_kpt], 1) if self.export else (torch.cat([x[0], pred_kpt], 1), (x[1], kpt))
-
-    def kpts_decode(self, bs, kpts):
-        """Decodes keypoints."""
-        ndim = self.kpt_shape[1]
-        if self.export:  # required for TFLite export to avoid 'PLACEHOLDER_FOR_GREATER_OP_CODES' bug
-            y = kpts.view(bs, *self.kpt_shape, -1)
-            a = (y[:, :, :2] * 2.0 + (self.anchors - 0.5)) * self.strides
-            if ndim == 3:
-                a = torch.cat((a, y[:, :, 2:3].sigmoid()), 2)
-            return a.view(bs, self.nk, -1)
-        else:
-            y = kpts.clone()
-            if ndim == 3:
-                y[:, 2::3] = y[:, 2::3].sigmoid()  # sigmoid (WARNING: inplace .sigmoid_() Apple MPS bug)
-            y[:, 0::ndim] = (y[:, 0::ndim] * 2.0 + (self.anchors[0] - 0.5)) * self.strides
-            y[:, 1::ndim] = (y[:, 1::ndim] * 2.0 + (self.anchors[1] - 0.5)) * self.strides
-            return y
-
-
-class Classify(nn.Module):
-    """YOLOv8 classification head, i.e. x(b,c1,20,20) to x(b,c2)."""
-
-    def __init__(self, c1, c2, k=1, s=1, p=None, g=1):
-        """Initializes YOLOv8 classification head with specified input and output channels, kernel size, stride,
-        padding, and groups.
-        """
-        super().__init__()
-        c_ = 1280  # efficientnet_b0 size
-        self.conv = Conv(c1, c_, k, s, p, g)
-        self.pool = nn.AdaptiveAvgPool2d(1)  # to x(b,c_,1,1)
-        self.drop = nn.Dropout(p=0.0, inplace=True)
-        self.linear = nn.Linear(c_, c2)  # to x(b,c2)
-
-    def forward(self, x):
-        """Performs a forward pass of the YOLO model on input image data."""
-        if isinstance(x, list):
-            x = torch.cat(x, 1)
-        x = self.linear(self.drop(self.pool(self.conv(x)).flatten(1)))
-        return x if self.training else x.softmax(1)
-
-
-class WorldDetect(Detect):
-    """Head for integrating YOLOv8 detection models with semantic understanding from text embeddings."""
-
-    def __init__(self, nc=80, embed=512, with_bn=False, ch=()):
-        """Initialize YOLOv8 detection layer with nc classes and layer channels ch."""
-        super().__init__(nc, ch)
-        c3 = max(ch[0], min(self.nc, 100))
-        self.cv3 = nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, embed, 1)) for x in ch)
-        self.cv4 = nn.ModuleList(BNContrastiveHead(embed) if with_bn else ContrastiveHead() for _ in ch)
-
-    def forward(self, x, text):
-        """Concatenates and returns predicted bounding boxes and class probabilities."""
-        for i in range(self.nl):
-            x[i] = torch.cat((self.cv2[i](x[i]), self.cv4[i](self.cv3[i](x[i]), text)), 1)
-        if self.training:
-            return x
-
-        # Inference path
-        shape = x[0].shape  # BCHW
-        x_cat = torch.cat([xi.view(shape[0], self.nc + self.reg_max * 4, -1) for xi in x], 2)
-        if self.dynamic or self.shape != shape:
-            self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
-            self.shape = shape
-
-        if self.export and self.format in {"saved_model", "pb", "tflite", "edgetpu", "tfjs"}:  # avoid TF FlexSplitV ops
-            box = x_cat[:, : self.reg_max * 4]
-            cls = x_cat[:, self.reg_max * 4 :]
-        else:
-            box, cls = x_cat.split((self.reg_max * 4, self.nc), 1)
-
-        if self.export and self.format in {"tflite", "edgetpu"}:
-            # Precompute normalization factor to increase numerical stability
-            # See https://github.com/ultralytics/ultralytics/issues/7371
-            grid_h = shape[2]
-            grid_w = shape[3]
-            grid_size = torch.tensor([grid_w, grid_h, grid_w, grid_h], device=box.device).reshape(1, 4, 1)
-            norm = self.strides / (self.stride[0] * grid_size)
-            dbox = self.decode_bboxes(self.dfl(box) * norm, self.anchors.unsqueeze(0) * norm[:, :2])
-        else:
-            dbox = self.decode_bboxes(self.dfl(box), self.anchors.unsqueeze(0)) * self.strides
-
-        y = torch.cat((dbox, cls.sigmoid()), 1)
-        return y if self.export else (y, x)
-
-    def bias_init(self):
-        """Initialize Detect() biases, WARNING: requires stride availability."""
-        m = self  # self.model[-1]  # Detect() module
-        # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1
-        # ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum())  # nominal class frequency
-        for a, b, s in zip(m.cv2, m.cv3, m.stride):  # from
-            a[-1].bias.data[:] = 1.0  # box
-            # b[-1].bias.data[:] = math.log(5 / m.nc / (640 / s) ** 2)  # cls (.01 objects, 80 classes, 640 img)
-
-
-class v10Detect(Detect):
-    """
-    v10 Detection head from https://arxiv.org/pdf/2405.14458
-
-    Args:
-        nc (int): Number of classes.
-        ch (tuple): Tuple of channel sizes.
-
-    Attributes:
-        max_det (int): Maximum number of detections.
-
-    Methods:
-        __init__(self, nc=80, ch=()): Initializes the v10Detect object.
-        forward(self, x): Performs forward pass of the v10Detect module.
-        bias_init(self): Initializes biases of the Detect module.
-
-    """
-
-    end2end = True
-
-    def __init__(self, nc=80, ch=()):
-        """Initializes the v10Detect object with the specified number of classes and input channels."""
-        super().__init__(nc, ch)
-        c3 = max(ch[0], min(self.nc, 100))  # channels
-        # Light cls head
-        self.cv3 = nn.ModuleList(
-            nn.Sequential(
-                nn.Sequential(Conv(x, x, 3, g=x), Conv(x, c3, 1)),
-                nn.Sequential(Conv(c3, c3, 3, g=c3), Conv(c3, c3, 1)),
-                nn.Conv2d(c3, self.nc, 1),
-            )
-            for x in ch
-        )
-        self.one2one_cv3 = copy.deepcopy(self.cv3)
diff --git a/tests/torch/test_models/yolov8/model.py b/tests/torch/test_models/yolov8/model.py
deleted file mode 100644
index 5663ed12156..00000000000
--- a/tests/torch/test_models/yolov8/model.py
+++ /dev/null
@@ -1,258 +0,0 @@
-# Copyright (c) 2024 Intel Corporation
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#      http://www.apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Ultralytics YOLO 🚀, AGPL-3.0 license
-"""
-Source: ultralytics/ultralytics/nn/tasks.py
-Commit: 673e76b86282859ead5517bd04dee896a647db93
-"""
-
-import contextlib
-import math
-
-import torch
-import torch.nn as nn
-
-from tests.torch.test_models.yolov8.block import C1
-from tests.torch.test_models.yolov8.block import C2
-from tests.torch.test_models.yolov8.block import C3
-from tests.torch.test_models.yolov8.block import C3TR
-from tests.torch.test_models.yolov8.block import ELAN1
-from tests.torch.test_models.yolov8.block import PSA
-from tests.torch.test_models.yolov8.block import SPP
-from tests.torch.test_models.yolov8.block import SPPELAN
-from tests.torch.test_models.yolov8.block import SPPF
-from tests.torch.test_models.yolov8.block import AConv
-from tests.torch.test_models.yolov8.block import ADown
-from tests.torch.test_models.yolov8.block import Bottleneck
-from tests.torch.test_models.yolov8.block import BottleneckCSP
-from tests.torch.test_models.yolov8.block import C2f
-from tests.torch.test_models.yolov8.block import C2fAttn
-from tests.torch.test_models.yolov8.block import C3Ghost
-from tests.torch.test_models.yolov8.block import C3x
-from tests.torch.test_models.yolov8.block import CBFuse
-from tests.torch.test_models.yolov8.block import CBLinear
-from tests.torch.test_models.yolov8.block import GhostBottleneck
-from tests.torch.test_models.yolov8.block import GhostConv
-from tests.torch.test_models.yolov8.block import HGBlock
-from tests.torch.test_models.yolov8.block import HGStem
-from tests.torch.test_models.yolov8.block import ImagePoolingAttn
-from tests.torch.test_models.yolov8.block import RepC3
-from tests.torch.test_models.yolov8.block import RepNCSPELAN4
-from tests.torch.test_models.yolov8.block import ResNetLayer
-from tests.torch.test_models.yolov8.block import SCDown
-from tests.torch.test_models.yolov8.conv import Concat
-from tests.torch.test_models.yolov8.conv import Conv
-from tests.torch.test_models.yolov8.conv import ConvTranspose
-from tests.torch.test_models.yolov8.conv import DWConv
-from tests.torch.test_models.yolov8.conv import DWConvTranspose2d
-from tests.torch.test_models.yolov8.conv import Focus
-from tests.torch.test_models.yolov8.head import OBB
-from tests.torch.test_models.yolov8.head import Classify
-from tests.torch.test_models.yolov8.head import Detect
-from tests.torch.test_models.yolov8.head import Pose
-from tests.torch.test_models.yolov8.head import Segment
-from tests.torch.test_models.yolov8.head import WorldDetect
-from tests.torch.test_models.yolov8.head import v10Detect
-
-
-def parse_model(d, ch, verbose=True):  # model_dict, input_channels(3)
-    """Parse a YOLO model.yaml dictionary into a PyTorch model."""
-    import ast
-
-    # Args
-    max_channels = float("inf")
-    nc, act, scales = (d.get(x) for x in ("nc", "activation", "scales"))
-    depth, width, kpt_shape = (d.get(x, 1.0) for x in ("depth_multiple", "width_multiple", "kpt_shape"))
-    if scales:
-        scale = d.get("scale")
-        if not scale:
-            scale = tuple(scales.keys())[0]
-            print(f"WARNING ⚠️ no model scale passed. Assuming scale='{scale}'.")
-        depth, width, max_channels = scales[scale]
-
-    if act:
-        Conv.default_act = eval(act)  # redefine default activation, i.e. Conv.default_act = nn.SiLU()
-        if verbose:
-            print(f"activation: {act}")  # print
-
-    if verbose:
-        print(f"\n{'':>3}{'from':>20}{'n':>3}{'params':>10}  {'module':<45}{'arguments':<30}")
-    ch = [ch]
-    layers, save, c2 = [], [], ch[-1]  # layers, savelist, ch out
-    for i, (f, n, m, args) in enumerate(d["backbone"] + d["head"]):  # from, number, module, args
-        m = getattr(torch.nn, m[3:]) if "nn." in m else globals()[m]  # get module
-        for j, a in enumerate(args):
-            if isinstance(a, str):
-                with contextlib.suppress(ValueError):
-                    args[j] = locals()[a] if a in locals() else ast.literal_eval(a)
-
-        n = n_ = max(round(n * depth), 1) if n > 1 else n  # depth gain
-        if m in {
-            Classify,
-            Conv,
-            ConvTranspose,
-            GhostConv,
-            Bottleneck,
-            GhostBottleneck,
-            SPP,
-            SPPF,
-            DWConv,
-            Focus,
-            BottleneckCSP,
-            C1,
-            C2,
-            C2f,
-            RepNCSPELAN4,
-            ELAN1,
-            ADown,
-            AConv,
-            SPPELAN,
-            C2fAttn,
-            C3,
-            C3TR,
-            C3Ghost,
-            nn.ConvTranspose2d,
-            DWConvTranspose2d,
-            C3x,
-            RepC3,
-            PSA,
-            SCDown,
-            # C2fCIB,
-        }:
-            c1, c2 = ch[f], args[0]
-            if c2 != nc:  # if c2 not equal to number of classes (i.e. for Classify() output)
-                c2 = make_divisible(min(c2, max_channels) * width, 8)
-            if m is C2fAttn:
-                args[1] = make_divisible(min(args[1], max_channels // 2) * width, 8)  # embed channels
-                args[2] = int(
-                    max(round(min(args[2], max_channels // 2 // 32)) * width, 1) if args[2] > 1 else args[2]
-                )  # num heads
-
-            args = [c1, c2, *args[1:]]
-            # if m in {BottleneckCSP, C1, C2, C2f, C2fAttn, C3, C3TR, C3Ghost, C3x, RepC3, C2fCIB}:
-            if m in {BottleneckCSP, C1, C2, C2f, C2fAttn, C3, C3TR, C3Ghost, C3x, RepC3}:
-                args.insert(2, n)  # number of repeats
-                n = 1
-        # elif m is AIFI:
-        #    args = [ch[f], *args]
-        elif m in {HGStem, HGBlock}:
-            c1, cm, c2 = ch[f], args[0], args[1]
-            args = [c1, cm, c2, *args[2:]]
-            if m is HGBlock:
-                args.insert(4, n)  # number of repeats
-                n = 1
-        elif m is ResNetLayer:
-            c2 = args[1] if args[3] else args[1] * 4
-        elif m is nn.BatchNorm2d:
-            args = [ch[f]]
-        elif m is Concat:
-            c2 = sum(ch[x] for x in f)
-        elif m in {Detect, WorldDetect, Segment, Pose, OBB, ImagePoolingAttn, v10Detect}:
-            args.append([ch[x] for x in f])
-            if m is Segment:
-                args[2] = make_divisible(min(args[2], max_channels) * width, 8)
-        # elif m is RTDETRDecoder:  # special case, channels arg must be passed in index 1
-        #    args.insert(1, [ch[x] for x in f])
-        elif m is CBLinear:
-            c2 = args[0]
-            c1 = ch[f]
-            args = [c1, c2, *args[1:]]
-        elif m is CBFuse:
-            c2 = ch[f[-1]]
-        else:
-            c2 = ch[f]
-
-        m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args)  # module
-        t = str(m)[8:-2].replace("__main__.", "")  # module type
-        m.np = sum(x.numel() for x in m_.parameters())  # number params
-        m_.i, m_.f, m_.type = i, f, t  # attach index, 'from' index, type
-        if verbose:
-            print(f"{i:>3}{str(f):>20}{n_:>3}{m.np:10.0f}  {t:<45}{str(args):<30}")  # print
-        save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
-        layers.append(m_)
-        if i == 0:
-            ch = []
-        ch.append(c2)
-    return nn.Sequential(*layers), sorted(save)
-
-
-def make_divisible(x, divisor):
-    """
-    Returns the nearest number that is divisible by the given divisor.
-
-    Args:
-        x (int): The number to make divisible.
-        divisor (int | torch.Tensor): The divisor.
-
-    Returns:
-        (int): The nearest number divisible by the divisor.
-    """
-    if isinstance(divisor, torch.Tensor):
-        divisor = int(divisor.max())  # to int
-    return math.ceil(x / divisor) * divisor
-
-
-YOLOV8_CONFIG = {
-    "nc": 80,
-    "scales": {
-        "n": [0.33, 0.25, 1024],
-        "s": [0.33, 0.5, 1024],
-        "m": [0.67, 0.75, 768],
-        "l": [1.0, 1.0, 512],
-        "x": [1.0, 1.25, 512],
-    },
-    "backbone": [
-        [-1, 1, "Conv", [64, 3, 2]],
-        [-1, 1, "Conv", [128, 3, 2]],
-        [-1, 3, "C2f", [128, True]],
-        [-1, 1, "Conv", [256, 3, 2]],
-        [-1, 6, "C2f", [256, True]],
-        [-1, 1, "Conv", [512, 3, 2]],
-        [-1, 6, "C2f", [512, True]],
-        [-1, 1, "Conv", [1024, 3, 2]],
-        [-1, 3, "C2f", [1024, True]],
-        [-1, 1, "SPPF", [1024, 5]],
-    ],
-    "head": [
-        [-1, 1, "nn.Upsample", ["None", 2, "nearest"]],
-        [[-1, 6], 1, "Concat", [1]],
-        [-1, 3, "C2f", [512]],
-        [-1, 1, "nn.Upsample", ["None", 2, "nearest"]],
-        [[-1, 4], 1, "Concat", [1]],
-        [-1, 3, "C2f", [256]],
-        [-1, 1, "Conv", [256, 3, 2]],
-        [[-1, 12], 1, "Concat", [1]],
-        [-1, 3, "C2f", [512]],
-        [-1, 1, "Conv", [512, 3, 2]],
-        [[-1, 9], 1, "Concat", [1]],
-        [-1, 3, "C2f", [1024]],
-        [[15, 18, 21], 1, "Detect", ["nc"]],
-    ],
-    "scale": "n",
-    "yaml_file": "yolov8n.yaml",
-    "ch": 3,
-}
-
-
-class YoloV8Model(torch.nn.Module):
-    def __init__(self):
-        super().__init__()
-        self.model, self.save = parse_model(YOLOV8_CONFIG, YOLOV8_CONFIG["ch"], verbose=False)
-
-    def forward(self, x):
-        y = []
-        for m in self.model:
-            if m.f != -1:  # if not from previous layer
-                x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers
-            x = m(x)  # run
-            y.append(x if m.i in self.save else None)  # save output
-        return x
diff --git a/tests/torch/test_models/yolov8/transformer.py b/tests/torch/test_models/yolov8/transformer.py
deleted file mode 100644
index 86da0c834bc..00000000000
--- a/tests/torch/test_models/yolov8/transformer.py
+++ /dev/null
@@ -1,97 +0,0 @@
-# Copyright (c) 2024 Intel Corporation
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#      http://www.apache.org/licenses/LICENSE-2.0
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Ultralytics YOLO 🚀, AGPL-3.0 license
-"""
-Source: ultralytics/ultralytics/nn/modules/transformer.py
-Commit: 673e76b86282859ead5517bd04dee896a647db93
-Transformer modules.
-"""
-
-
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-
-from .conv import Conv
-
-
-class TransformerLayer(nn.Module):
-    """Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)."""
-
-    def __init__(self, c, num_heads):
-        """Initializes a self-attention mechanism using linear transformations and multi-head attention."""
-        super().__init__()
-        self.q = nn.Linear(c, c, bias=False)
-        self.k = nn.Linear(c, c, bias=False)
-        self.v = nn.Linear(c, c, bias=False)
-        self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)
-        self.fc1 = nn.Linear(c, c, bias=False)
-        self.fc2 = nn.Linear(c, c, bias=False)
-
-    def forward(self, x):
-        """Apply a transformer block to the input x and return the output."""
-        x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x
-        return self.fc2(self.fc1(x)) + x
-
-
-class TransformerBlock(nn.Module):
-    """Vision Transformer https://arxiv.org/abs/2010.11929."""
-
-    def __init__(self, c1, c2, num_heads, num_layers):
-        """Initialize a Transformer module with position embedding and specified number of heads and layers."""
-        super().__init__()
-        self.conv = None
-        if c1 != c2:
-            self.conv = Conv(c1, c2)
-        self.linear = nn.Linear(c2, c2)  # learnable position embedding
-        self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers)))
-        self.c2 = c2
-
-    def forward(self, x):
-        """Forward propagates the input through the bottleneck module."""
-        if self.conv is not None:
-            x = self.conv(x)
-        b, _, w, h = x.shape
-        p = x.flatten(2).permute(2, 0, 1)
-        return self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, w, h)
-
-
-class MLPBlock(nn.Module):
-    """Implements a single block of a multi-layer perceptron."""
-
-    def __init__(self, embedding_dim, mlp_dim, act=nn.GELU):
-        """Initialize the MLPBlock with specified embedding dimension, MLP dimension, and activation function."""
-        super().__init__()
-        self.lin1 = nn.Linear(embedding_dim, mlp_dim)
-        self.lin2 = nn.Linear(mlp_dim, embedding_dim)
-        self.act = act()
-
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Forward pass for the MLPBlock."""
-        return self.lin2(self.act(self.lin1(x)))
-
-
-class MLP(nn.Module):
-    """Implements a simple multi-layer perceptron (also called FFN)."""
-
-    def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
-        """Initialize the MLP with specified input, hidden, output dimensions and number of layers."""
-        super().__init__()
-        self.num_layers = num_layers
-        h = [hidden_dim] * (num_layers - 1)
-        self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim]))
-
-    def forward(self, x):
-        """Forward pass for the entire MLP."""
-        for i, layer in enumerate(self.layers):
-            x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
-        return x