LLM POC updates

cavusmustafa · Nov 3, 2023 · d78d47b · d78d47b
1 parent caa81a0
commit d78d47b
Show file tree

Hide file tree

Showing 22 changed files with 526 additions and 93 deletions.
diff --git a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend.py b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/backend.py
@@ -112,46 +112,48 @@ def _call(*args):
 
 
 def fx_openvino(subgraph, example_inputs):
-    try:
-        executor_parameters = None
-        inputs_reversed = False
-        if os.getenv("OPENVINO_TORCH_MODEL_CACHING") is not None:
-            # Create a hash to be used for caching
-            model_hash_str = sha256(subgraph.code.encode('utf-8')).hexdigest()
-            executor_parameters = {"model_hash_str": model_hash_str}
-            # Check if the model was fully supported and already cached
-            example_inputs.reverse()
-            inputs_reversed = True
-            maybe_fs_cached_name = cached_model_name(model_hash_str + "_fs", get_device(), example_inputs, cache_root_path())
-            if os.path.isfile(maybe_fs_cached_name + ".xml") and os.path.isfile(maybe_fs_cached_name + ".bin"):
-                # Model is fully supported and already cached. Run the cached OV model directly.
-                compiled_model = openvino_compile_cached_model(maybe_fs_cached_name, *example_inputs)
-                def _call(*args):
-                    res = execute_cached(compiled_model, *args)
-                    return res
-                return _call
-        if inputs_reversed:
-            example_inputs.reverse()
-        model = make_fx(subgraph)(*example_inputs)
-        with torch.no_grad():
-            model.eval()
-        partitioner = Partitioner()
-        compiled_model = partitioner.make_partitions(model)
-
-        if executor_parameters is not None and 'model_hash_str' in executor_parameters:
-            # Check if the model is fully supported.
-            fully_supported = partitioner.check_fully_supported(compiled_model)
-            if fully_supported:
-                executor_parameters["model_hash_str"] += "_fs"
-
-        def _call(*args):
-            res = execute(compiled_model, *args, executor="openvino",
-                          executor_parameters=executor_parameters)
-            return res
-        return _call
-    except Exception as e:
-        log.debug(f"Failed in OpenVINO execution: {e}")
-        return compile_fx(subgraph, example_inputs)
+    #try:
+    print("DEBUG - fx_openvino - A")
+    executor_parameters = None
+    inputs_reversed = False
+    if os.getenv("OPENVINO_TORCH_MODEL_CACHING") is not None:
+        # Create a hash to be used for caching
+        model_hash_str = sha256(subgraph.code.encode('utf-8')).hexdigest()
+        executor_parameters = {"model_hash_str": model_hash_str}
+        # Check if the model was fully supported and already cached
+        example_inputs.reverse()
+        inputs_reversed = True
+        maybe_fs_cached_name = cached_model_name(model_hash_str + "_fs", get_device(), example_inputs, cache_root_path())
+        if os.path.isfile(maybe_fs_cached_name + ".xml") and os.path.isfile(maybe_fs_cached_name + ".bin"):
+            # Model is fully supported and already cached. Run the cached OV model directly.
+            compiled_model = openvino_compile_cached_model(maybe_fs_cached_name, *example_inputs)
+            def _call(*args):
+                res = execute_cached(compiled_model, *args)
+                return res
+            return _call
+    if inputs_reversed:
+        example_inputs.reverse()
+    model = make_fx(subgraph)(*example_inputs)
+    with torch.no_grad():
+        model.eval()
+    partitioner = Partitioner()
+    compiled_model = partitioner.make_partitions(model)
+
+    if executor_parameters is not None and 'model_hash_str' in executor_parameters:
+        # Check if the model is fully supported.
+        fully_supported = partitioner.check_fully_supported(compiled_model)
+        if fully_supported:
+            executor_parameters["model_hash_str"] += "_fs"
+
+    def _call(*args):
+        print("DEBUG - fx_openvino - B")
+        res = execute(compiled_model, *args, executor="openvino",
+                      executor_parameters=executor_parameters)
+        return res
+    return _call
+    #except Exception as e:
+    #    log.debug(f"Failed in OpenVINO execution: {e}")
+    #    return compile_fx(subgraph, example_inputs)
 
 def reset():
     clear_caches()
diff --git a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/compile.py b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/compile.py
@@ -34,9 +34,15 @@ def cached_model_name(model_hash_str, device, args, cache_root, reversed = False
     inputs_str = ""
     for idx, input_data in enumerate(args):  
         if reversed:
-            inputs_str = "_" + str(input_data.type()) + str(input_data.size())[11:-1].replace(" ", "") + inputs_str
+            if isinstance(input_data, torch.SymInt):
+                inputs_str = "_"+str(type(input_data)) + inputs_str
+            else:
+                inputs_str = "_" + str(input_data.type()) + str(input_data.size())[11:-1].replace(" ", "") + inputs_str
         else:
-            inputs_str += "_" + str(input_data.type()) + str(input_data.size())[11:-1].replace(" ", "")
+            if isinstance(input_data, torch.SymInt):
+                inputs_str += "_"+str(type(input_data))
+            else:
+                inputs_str += "_" + str(input_data.type()) + str(input_data.size())[11:-1].replace(" ", "")
     inputs_str = sha256(inputs_str.encode('utf-8')).hexdigest()
     file_name += inputs_str
 
@@ -101,7 +107,46 @@ def openvino_compile(gm: GraphModule, *args, model_hash_str: str = None):
         input_types = []
         for idx, input_data in enumerate(args):  
             input_types.append(input_data.type())
-            input_shapes.append(input_data.size())
+            if input_data.size() == torch.Size([17, 1, 2, 128]):
+                input_shapes.append(torch.Size([-1, 1, 2, 128]))
+            elif input_data.size() == torch.Size([17, 1, 4096]):
+                input_shapes.append(torch.Size([-1, 1, 4096]))
+            elif input_data.size() == torch.Size([17, 1, 32, 2]):
+                input_shapes.append(torch.Size([-1, 1, 32, 2]))
+            elif input_data.size() == torch.Size([18, 1, 2, 128]):
+                input_shapes.append(torch.Size([-1, 1, 2, 128]))
+            elif input_data.size() == torch.Size([18, 1, 4096]):
+                input_shapes.append(torch.Size([-1, 1, 4096]))
+            elif input_data.size() == torch.Size([18, 1, 32, 2]):
+                input_shapes.append(torch.Size([-1, 1, 32, 2]))
+            elif input_data.size() == torch.Size([48, 1, 2, 128]):
+                input_shapes.append(torch.Size([-1, 1, 2, 128]))
+            elif input_data.size() == torch.Size([48, 1, 4096]):
+                input_shapes.append(torch.Size([-1, 1, 4096]))
+            elif input_data.size() == torch.Size([48, 1, 32, 2]):
+                input_shapes.append(torch.Size([-1, 1, 32, 2]))
+            elif input_data.size() == torch.Size([22, 1, 2, 128]):
+                input_shapes.append(torch.Size([-1, 1, 2, 128]))
+            elif input_data.size() == torch.Size([22, 1, 4096]):
+                input_shapes.append(torch.Size([-1, 1, 4096]))
+            elif input_data.size() == torch.Size([22, 1, 32, 2]):
+                input_shapes.append(torch.Size([-1, 1, 32, 2]))
+            elif input_data.size() == torch.Size([23, 1, 2, 128]):
+                input_shapes.append(torch.Size([-1, 1, 2, 128]))
+            elif input_data.size() == torch.Size([23, 1, 4096]):
+                input_shapes.append(torch.Size([-1, 1, 4096]))
+            elif input_data.size() == torch.Size([23, 1, 32, 2]):
+                input_shapes.append(torch.Size([-1, 1, 32, 2]))
+            elif input_data.size() == torch.Size([24, 1, 2, 128]):
+                input_shapes.append(torch.Size([-1, 1, 2, 128]))
+            elif input_data.size() == torch.Size([24, 1, 4096]):
+                input_shapes.append(torch.Size([-1, 1, 4096]))
+            elif input_data.size() == torch.Size([24, 1, 32, 2]):
+                input_shapes.append(torch.Size([-1, 1, 32, 2]))
+            elif input_data.size() == torch.Size([1, 32, 24, 128]):
+                input_shapes.append(torch.Size([1, 32, -1, 128]))
+            else:
+                input_shapes.append(input_data.size())
 
         decoder = TorchFXPythonDecoder(gm, gm, input_shapes=input_shapes, input_types=input_types)
 
@@ -125,7 +170,46 @@ def openvino_compile(gm: GraphModule, *args, model_hash_str: str = None):
 
     for idx, input_data in enumerate(args):
         om.inputs[idx].get_node().set_element_type(dtype_mapping[input_data.dtype])
-        om.inputs[idx].get_node().set_partial_shape(PartialShape(list(input_data.shape)))
+        if input_data.size() == torch.Size([17, 1, 2, 128]):
+            om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 2, 128]))))
+        elif input_data.size() == torch.Size([17, 1, 4096]):
+            om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 4096]))))
+        elif input_data.size() == torch.Size([17, 1, 32, 2]):
+            om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 32, 2]))))
+        elif input_data.size() == torch.Size([18, 1, 2, 128]):
+            om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 2, 128]))))
+        elif input_data.size() == torch.Size([18, 1, 4096]):
+            om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 4096]))))
+        elif input_data.size() == torch.Size([18, 1, 32, 2]):
+            om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 32, 2]))))
+        elif input_data.size() == torch.Size([48, 1, 2, 128]):
+            om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 2, 128]))))
+        elif input_data.size() == torch.Size([48, 1, 4096]):
+            om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 4096]))))
+        elif input_data.size() == torch.Size([48, 1, 32, 2]):
+            om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 32, 2]))))
+        elif input_data.size() == torch.Size([22, 1, 2, 128]):
+            om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 2, 128]))))
+        elif input_data.size() == torch.Size([22, 1, 4096]):
+            om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 4096]))))
+        elif input_data.size() == torch.Size([22, 1, 32, 2]):
+            om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 32, 2]))))
+        elif input_data.size() == torch.Size([23, 1, 2, 128]):
+            om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 2, 128]))))
+        elif input_data.size() == torch.Size([23, 1, 4096]):
+            om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 4096]))))
+        elif input_data.size() == torch.Size([23, 1, 32, 2]):
+            om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 32, 2]))))
+        elif input_data.size() == torch.Size([24, 1, 2, 128]):
+            om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 2, 128]))))
+        elif input_data.size() == torch.Size([24, 1, 4096]):
+            om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 4096]))))
+        elif input_data.size() == torch.Size([23, 1, 32, 2]):
+            om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 32, 2]))))
+        elif input_data.size() == torch.Size([1, 32, 24, 128]):
+            om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([1, 32, -1, 128]))))
+        else:
+            om.inputs[idx].get_node().set_partial_shape(PartialShape(list(input_data.shape)))
     om.validate_nodes_and_infer_types()
 
     if model_hash_str is not None:

diff --git a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/execute.py b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/execute.py
@@ -83,11 +83,20 @@ def openvino_execute(gm: GraphModule, *args, executor_parameters=None, partition
         if not fully_supported:
             model_hash_str = model_hash_str + "_p" + str(partition_id)
 
-    if use_cache and (partition_id in compiled_cache):
-        compiled = compiled_cache[partition_id]
+    #if use_cache and (partition_id in compiled_cache):
+    #    compiled = compiled_cache[partition_id]
+    #else:
+    #    compiled = openvino_compile(gm, *args, model_hash_str=model_hash_str)
+    #    compiled_cache[partition_id] = compiled
+
+    cached_pid = partition_id
+    #if cached_pid > 1:
+    #    cached_pid = 1
+    if use_cache and (cached_pid in compiled_cache):
+        compiled = compiled_cache[cached_pid]
     else:
         compiled = openvino_compile(gm, *args, model_hash_str=model_hash_str)
-        compiled_cache[partition_id] = compiled
+        compiled_cache[cached_pid] = compiled
 
     flat_args, _ = tree_flatten(args)
     ov_inputs = [a.detach().cpu().numpy() for a in flat_args]
@@ -113,11 +122,12 @@ def __call__(self, *args):
         if self.perm_fallback:
             return self.gm(*args)
 
-        try:
-            result = openvino_execute(self.gm, *args, executor_parameters=self.executor_parameters, partition_id=self.partition_id)
-        except Exception:
-            self.perm_fallback = True
-            return self.gm(*args)
+        result = openvino_execute(self.gm, *args, executor_parameters=self.executor_parameters, partition_id=self.partition_id)
+        #try:
+        #    result = openvino_execute(self.gm, *args, executor_parameters=self.executor_parameters, partition_id=self.partition_id)
+        #except Exception:
+        #    self.perm_fallback = True
+        #    return self.gm(*args)
 
         return result
 
@@ -154,11 +164,11 @@ def openvino_execute_partitioned(gm: GraphModule, *args, executor_parameters=Non
     model_hash_str = executor_parameters.get("model_hash_str", None)
 
     signature = str(id(gm))
-    for idx, input_data in enumerate(args):
-        if isinstance(input_data, torch.Tensor):
-            signature = signature + "_" + str(idx) + ":" + str(input_data.type())[6:] + ":" + str(input_data.size())[11:-1].replace(" ", "")
-        else:
-            signature = signature + "_" + str(idx) + ":" + type(input_data).__name__ + ":val(" + str(input_data) + ")"
+    #for idx, input_data in enumerate(args):
+    #    if isinstance(input_data, torch.Tensor):
+    #        signature = signature + "_" + str(idx) + ":" + str(input_data.type())[6:] + ":" + str(input_data.size())[11:-1].replace(" ", "")
+    #    else:
+    #        signature = signature + "_" + str(idx) + ":" + type(input_data).__name__ + ":val(" + str(input_data) + ")"
 
     if signature not in partitioned_modules:
         partitioned_modules[signature] = partition_graph(gm, use_python_fusion_cache=use_python_fusion_cache,

diff --git a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/op_support.py b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/op_support.py
@@ -37,12 +37,16 @@ def __init__(self):
             "torch.ops.aten.add.Tensor": None,
             "torch.ops.aten.add_.Tensor": None,
             "torch.ops.aten.addmm.default": None,
+            "torch.ops.aten.all.default": None,
+            "torch.ops.aten.any.default": None,
             "torch.ops.aten.arange.start": None,
+            "torch.ops.aten.arange.start_step": None,
             "torch.ops.aten.arange.default": None,
             "torch.ops.aten.argmax.default": None,
             "torch.ops.aten.avg_pool2d.default": None,
             "torch.ops.aten.baddbmm.default": None,
             "torch.ops.aten.bitwise_and.Tensor": None,
+            "torch.ops.aten.bitwise_not.default": None,
             "torch.ops.aten.bmm.default": None,
             "torch.ops.aten.cat.default": None,
             "torch.ops.aten.clamp_min.default": None,
@@ -52,6 +56,7 @@ def __init__(self):
             "torch.ops.aten.cos.default": None,
             "torch.ops.aten.cumsum.default": None,
             "torch.ops.aten.detach.default": None,
+            "torch.ops.aten.detach_.default": None,
             "torch.ops.aten.div.Scalar": None,
             "torch.ops.aten.div.Tensor": None,
             "torch.ops.aten.embedding.default": None,
@@ -60,6 +65,7 @@ def __init__(self):
             "torch.ops.aten.eq.Tensor": None,
             "torch.ops.aten.exp.default": None,
             "torch.ops.aten.expand.default": None,
+            "torch.ops.aten.fill_.Tensor": None,
             "torch.ops.aten.full.default": None,
             "torch.ops.aten.gather.default": None,
             "torch.ops.aten.gelu.default": None,
@@ -68,12 +74,17 @@ def __init__(self):
             "torch.ops.aten.hardswish_.default": None,
             "torch.ops.aten.hardtanh_.default": None,
             "torch.ops.aten.index.Tensor": None,
+            "torch.ops.aten.isinf.default": None,
+            "torch.ops.aten.isnan.default": None,
+            "torch.ops.aten.le.Scalar": None,
             "torch.ops.aten.leaky_relu_.default": None,
             "torch.ops.aten.lift_fresh_copy.default": None,
             "torch.ops.aten.linalg_vector_norm.default": None,
             "torch.ops.aten.lt.Tensor": None,
             "torch.ops.aten.log.default": None,
+            "torch.ops.aten.logical_not.default": None,
             "torch.ops.aten.logsumexp.default": None,
+            "torch.ops.aten.masked_fill.Scalar": None,
             "torch.ops.aten.masked_fill_.Scalar": None,
             "torch.ops.aten.masked_fill.Tensor": None,
             "torch.ops.aten.max_pool2d_with_indices.default": None,
@@ -88,24 +99,36 @@ def __init__(self):
             "torch.ops.aten.native_layer_norm.default": None,
             "torch.ops.aten.neg.default": None,
             "torch.ops.aten.new_ones.default": None,
+            "torch.ops.aten.ones.default": None,
             "torch.ops.aten.permute.default": None,
+            "torch.ops.aten.pow.Scalar": None,
             "torch.ops.aten.pow.Tensor_Scalar": None,
+            "torch.ops.aten.pow.Tensor_Tensor": None,
+            "torch.ops.aten.reciprocal.default": None,
             "torch.ops.aten.relu.default": None,
             "torch.ops.aten.relu_.default": None,
+            "torch.ops.aten.rsqrt.default": None,
             "torch.ops.aten.rsub.Scalar": None,
+            "torch.ops.aten.scatter.src": None,
             "torch.ops.aten._scaled_dot_product_flash_attention.default": None,
             "torch.ops.aten.select.int": None,
             "torch.ops.aten.sigmoid.default": None,
             "torch.ops.aten.silu.default": None,
             "torch.ops.aten.silu_.default": None,
             "torch.ops.aten.sin.default": None,
             "torch.ops.aten.slice.Tensor": None,
+            "torch.ops.aten.sort.default": None,
             "torch.ops.aten.split.Tensor": None,
+            "torch.ops.aten.split_with_sizes.default": None,
+            "torch.ops.aten.stack.default": None,
             "torch.ops.aten.sub.default": None,
             "torch.ops.aten.sub.Tensor": None,
             "torch.ops.aten.t.default": None,
             "torch.ops.aten.tanh.default": None,
+            "torch.ops.aten.topk.default": None,
             "torch.ops.aten.transpose.int": None,
+            "torch.ops.aten.tril.default": None,
+            "torch.ops.aten.tril_.default": None,
             "torch.ops.aten.unsqueeze.default": None,
             "torch.ops.aten.upsample_nearest2d.default": None,
             "torch.ops.aten.view.default": None,

diff --git a/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/partition.py b/src/bindings/python/src/openvino/frontend/pytorch/torchdynamo/partition.py
@@ -19,6 +19,7 @@
 
 import typing as t
 import logging
+import os
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.WARNING)
@@ -59,7 +60,14 @@ def make_partitions(self, graph_module: GraphModule) -> GraphModule:
         partitioner = CapabilityBasedPartitioner(
             graph_module, self.supported_ops, allows_single_node_partition=False)
         partitions = partitioner.propose_partitions()
-        self.add_get_attr_inputs(partitions)
-        fused_graph_module = partitioner.fuse_partitions(partitions)
+        new_partitions = []
+        min_num_nodes = 0
+        if os.getenv("OPENVINO_TORCH_MIN_NUM_NODES") is not None:
+            min_num_nodes = int(os.getenv("OPENVINO_TORCH_MIN_NUM_NODES"))
+        for part in partitions:
+            if len(part.nodes) > min_num_nodes:
+                new_partitions.append(part)
+        self.add_get_attr_inputs(new_partitions)
+        fused_graph_module = partitioner.fuse_partitions(new_partitions)
 
         return fused_graph_module