Skip to content

Commit

Permalink
LLM POC updates
Browse files Browse the repository at this point in the history
  • Loading branch information
cavusmustafa committed Nov 3, 2023
1 parent caa81a0 commit d78d47b
Show file tree
Hide file tree
Showing 22 changed files with 526 additions and 93 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -112,46 +112,48 @@ def _call(*args):


def fx_openvino(subgraph, example_inputs):
try:
executor_parameters = None
inputs_reversed = False
if os.getenv("OPENVINO_TORCH_MODEL_CACHING") is not None:
# Create a hash to be used for caching
model_hash_str = sha256(subgraph.code.encode('utf-8')).hexdigest()
executor_parameters = {"model_hash_str": model_hash_str}
# Check if the model was fully supported and already cached
example_inputs.reverse()
inputs_reversed = True
maybe_fs_cached_name = cached_model_name(model_hash_str + "_fs", get_device(), example_inputs, cache_root_path())
if os.path.isfile(maybe_fs_cached_name + ".xml") and os.path.isfile(maybe_fs_cached_name + ".bin"):
# Model is fully supported and already cached. Run the cached OV model directly.
compiled_model = openvino_compile_cached_model(maybe_fs_cached_name, *example_inputs)
def _call(*args):
res = execute_cached(compiled_model, *args)
return res
return _call
if inputs_reversed:
example_inputs.reverse()
model = make_fx(subgraph)(*example_inputs)
with torch.no_grad():
model.eval()
partitioner = Partitioner()
compiled_model = partitioner.make_partitions(model)

if executor_parameters is not None and 'model_hash_str' in executor_parameters:
# Check if the model is fully supported.
fully_supported = partitioner.check_fully_supported(compiled_model)
if fully_supported:
executor_parameters["model_hash_str"] += "_fs"

def _call(*args):
res = execute(compiled_model, *args, executor="openvino",
executor_parameters=executor_parameters)
return res
return _call
except Exception as e:
log.debug(f"Failed in OpenVINO execution: {e}")
return compile_fx(subgraph, example_inputs)
#try:
print("DEBUG - fx_openvino - A")
executor_parameters = None
inputs_reversed = False
if os.getenv("OPENVINO_TORCH_MODEL_CACHING") is not None:
# Create a hash to be used for caching
model_hash_str = sha256(subgraph.code.encode('utf-8')).hexdigest()
executor_parameters = {"model_hash_str": model_hash_str}
# Check if the model was fully supported and already cached
example_inputs.reverse()
inputs_reversed = True
maybe_fs_cached_name = cached_model_name(model_hash_str + "_fs", get_device(), example_inputs, cache_root_path())
if os.path.isfile(maybe_fs_cached_name + ".xml") and os.path.isfile(maybe_fs_cached_name + ".bin"):
# Model is fully supported and already cached. Run the cached OV model directly.
compiled_model = openvino_compile_cached_model(maybe_fs_cached_name, *example_inputs)
def _call(*args):
res = execute_cached(compiled_model, *args)
return res
return _call
if inputs_reversed:
example_inputs.reverse()
model = make_fx(subgraph)(*example_inputs)
with torch.no_grad():
model.eval()
partitioner = Partitioner()
compiled_model = partitioner.make_partitions(model)

if executor_parameters is not None and 'model_hash_str' in executor_parameters:
# Check if the model is fully supported.
fully_supported = partitioner.check_fully_supported(compiled_model)
if fully_supported:
executor_parameters["model_hash_str"] += "_fs"

def _call(*args):
print("DEBUG - fx_openvino - B")
res = execute(compiled_model, *args, executor="openvino",
executor_parameters=executor_parameters)
return res
return _call
#except Exception as e:
# log.debug(f"Failed in OpenVINO execution: {e}")
# return compile_fx(subgraph, example_inputs)

def reset():
clear_caches()
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,15 @@ def cached_model_name(model_hash_str, device, args, cache_root, reversed = False
inputs_str = ""
for idx, input_data in enumerate(args):
if reversed:
inputs_str = "_" + str(input_data.type()) + str(input_data.size())[11:-1].replace(" ", "") + inputs_str
if isinstance(input_data, torch.SymInt):
inputs_str = "_"+str(type(input_data)) + inputs_str
else:
inputs_str = "_" + str(input_data.type()) + str(input_data.size())[11:-1].replace(" ", "") + inputs_str
else:
inputs_str += "_" + str(input_data.type()) + str(input_data.size())[11:-1].replace(" ", "")
if isinstance(input_data, torch.SymInt):
inputs_str += "_"+str(type(input_data))
else:
inputs_str += "_" + str(input_data.type()) + str(input_data.size())[11:-1].replace(" ", "")
inputs_str = sha256(inputs_str.encode('utf-8')).hexdigest()
file_name += inputs_str

Expand Down Expand Up @@ -101,7 +107,46 @@ def openvino_compile(gm: GraphModule, *args, model_hash_str: str = None):
input_types = []
for idx, input_data in enumerate(args):
input_types.append(input_data.type())
input_shapes.append(input_data.size())
if input_data.size() == torch.Size([17, 1, 2, 128]):
input_shapes.append(torch.Size([-1, 1, 2, 128]))
elif input_data.size() == torch.Size([17, 1, 4096]):
input_shapes.append(torch.Size([-1, 1, 4096]))
elif input_data.size() == torch.Size([17, 1, 32, 2]):
input_shapes.append(torch.Size([-1, 1, 32, 2]))
elif input_data.size() == torch.Size([18, 1, 2, 128]):
input_shapes.append(torch.Size([-1, 1, 2, 128]))
elif input_data.size() == torch.Size([18, 1, 4096]):
input_shapes.append(torch.Size([-1, 1, 4096]))
elif input_data.size() == torch.Size([18, 1, 32, 2]):
input_shapes.append(torch.Size([-1, 1, 32, 2]))
elif input_data.size() == torch.Size([48, 1, 2, 128]):
input_shapes.append(torch.Size([-1, 1, 2, 128]))
elif input_data.size() == torch.Size([48, 1, 4096]):
input_shapes.append(torch.Size([-1, 1, 4096]))
elif input_data.size() == torch.Size([48, 1, 32, 2]):
input_shapes.append(torch.Size([-1, 1, 32, 2]))
elif input_data.size() == torch.Size([22, 1, 2, 128]):
input_shapes.append(torch.Size([-1, 1, 2, 128]))
elif input_data.size() == torch.Size([22, 1, 4096]):
input_shapes.append(torch.Size([-1, 1, 4096]))
elif input_data.size() == torch.Size([22, 1, 32, 2]):
input_shapes.append(torch.Size([-1, 1, 32, 2]))
elif input_data.size() == torch.Size([23, 1, 2, 128]):
input_shapes.append(torch.Size([-1, 1, 2, 128]))
elif input_data.size() == torch.Size([23, 1, 4096]):
input_shapes.append(torch.Size([-1, 1, 4096]))
elif input_data.size() == torch.Size([23, 1, 32, 2]):
input_shapes.append(torch.Size([-1, 1, 32, 2]))
elif input_data.size() == torch.Size([24, 1, 2, 128]):
input_shapes.append(torch.Size([-1, 1, 2, 128]))
elif input_data.size() == torch.Size([24, 1, 4096]):
input_shapes.append(torch.Size([-1, 1, 4096]))
elif input_data.size() == torch.Size([24, 1, 32, 2]):
input_shapes.append(torch.Size([-1, 1, 32, 2]))
elif input_data.size() == torch.Size([1, 32, 24, 128]):
input_shapes.append(torch.Size([1, 32, -1, 128]))
else:
input_shapes.append(input_data.size())

decoder = TorchFXPythonDecoder(gm, gm, input_shapes=input_shapes, input_types=input_types)

Expand All @@ -125,7 +170,46 @@ def openvino_compile(gm: GraphModule, *args, model_hash_str: str = None):

for idx, input_data in enumerate(args):
om.inputs[idx].get_node().set_element_type(dtype_mapping[input_data.dtype])
om.inputs[idx].get_node().set_partial_shape(PartialShape(list(input_data.shape)))
if input_data.size() == torch.Size([17, 1, 2, 128]):
om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 2, 128]))))
elif input_data.size() == torch.Size([17, 1, 4096]):
om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 4096]))))
elif input_data.size() == torch.Size([17, 1, 32, 2]):
om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 32, 2]))))
elif input_data.size() == torch.Size([18, 1, 2, 128]):
om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 2, 128]))))
elif input_data.size() == torch.Size([18, 1, 4096]):
om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 4096]))))
elif input_data.size() == torch.Size([18, 1, 32, 2]):
om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 32, 2]))))
elif input_data.size() == torch.Size([48, 1, 2, 128]):
om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 2, 128]))))
elif input_data.size() == torch.Size([48, 1, 4096]):
om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 4096]))))
elif input_data.size() == torch.Size([48, 1, 32, 2]):
om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 32, 2]))))
elif input_data.size() == torch.Size([22, 1, 2, 128]):
om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 2, 128]))))
elif input_data.size() == torch.Size([22, 1, 4096]):
om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 4096]))))
elif input_data.size() == torch.Size([22, 1, 32, 2]):
om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 32, 2]))))
elif input_data.size() == torch.Size([23, 1, 2, 128]):
om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 2, 128]))))
elif input_data.size() == torch.Size([23, 1, 4096]):
om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 4096]))))
elif input_data.size() == torch.Size([23, 1, 32, 2]):
om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 32, 2]))))
elif input_data.size() == torch.Size([24, 1, 2, 128]):
om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 2, 128]))))
elif input_data.size() == torch.Size([24, 1, 4096]):
om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 4096]))))
elif input_data.size() == torch.Size([23, 1, 32, 2]):
om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([-1, 1, 32, 2]))))
elif input_data.size() == torch.Size([1, 32, 24, 128]):
om.inputs[idx].get_node().set_partial_shape(PartialShape(list(torch.Size([1, 32, -1, 128]))))
else:
om.inputs[idx].get_node().set_partial_shape(PartialShape(list(input_data.shape)))
om.validate_nodes_and_infer_types()

if model_hash_str is not None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,20 @@ def openvino_execute(gm: GraphModule, *args, executor_parameters=None, partition
if not fully_supported:
model_hash_str = model_hash_str + "_p" + str(partition_id)

if use_cache and (partition_id in compiled_cache):
compiled = compiled_cache[partition_id]
#if use_cache and (partition_id in compiled_cache):
# compiled = compiled_cache[partition_id]
#else:
# compiled = openvino_compile(gm, *args, model_hash_str=model_hash_str)
# compiled_cache[partition_id] = compiled

cached_pid = partition_id
#if cached_pid > 1:
# cached_pid = 1
if use_cache and (cached_pid in compiled_cache):
compiled = compiled_cache[cached_pid]
else:
compiled = openvino_compile(gm, *args, model_hash_str=model_hash_str)
compiled_cache[partition_id] = compiled
compiled_cache[cached_pid] = compiled

flat_args, _ = tree_flatten(args)
ov_inputs = [a.detach().cpu().numpy() for a in flat_args]
Expand All @@ -113,11 +122,12 @@ def __call__(self, *args):
if self.perm_fallback:
return self.gm(*args)

try:
result = openvino_execute(self.gm, *args, executor_parameters=self.executor_parameters, partition_id=self.partition_id)
except Exception:
self.perm_fallback = True
return self.gm(*args)
result = openvino_execute(self.gm, *args, executor_parameters=self.executor_parameters, partition_id=self.partition_id)
#try:
# result = openvino_execute(self.gm, *args, executor_parameters=self.executor_parameters, partition_id=self.partition_id)
#except Exception:
# self.perm_fallback = True
# return self.gm(*args)

return result

Expand Down Expand Up @@ -154,11 +164,11 @@ def openvino_execute_partitioned(gm: GraphModule, *args, executor_parameters=Non
model_hash_str = executor_parameters.get("model_hash_str", None)

signature = str(id(gm))
for idx, input_data in enumerate(args):
if isinstance(input_data, torch.Tensor):
signature = signature + "_" + str(idx) + ":" + str(input_data.type())[6:] + ":" + str(input_data.size())[11:-1].replace(" ", "")
else:
signature = signature + "_" + str(idx) + ":" + type(input_data).__name__ + ":val(" + str(input_data) + ")"
#for idx, input_data in enumerate(args):
# if isinstance(input_data, torch.Tensor):
# signature = signature + "_" + str(idx) + ":" + str(input_data.type())[6:] + ":" + str(input_data.size())[11:-1].replace(" ", "")
# else:
# signature = signature + "_" + str(idx) + ":" + type(input_data).__name__ + ":val(" + str(input_data) + ")"

if signature not in partitioned_modules:
partitioned_modules[signature] = partition_graph(gm, use_python_fusion_cache=use_python_fusion_cache,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,16 @@ def __init__(self):
"torch.ops.aten.add.Tensor": None,
"torch.ops.aten.add_.Tensor": None,
"torch.ops.aten.addmm.default": None,
"torch.ops.aten.all.default": None,
"torch.ops.aten.any.default": None,
"torch.ops.aten.arange.start": None,
"torch.ops.aten.arange.start_step": None,
"torch.ops.aten.arange.default": None,
"torch.ops.aten.argmax.default": None,
"torch.ops.aten.avg_pool2d.default": None,
"torch.ops.aten.baddbmm.default": None,
"torch.ops.aten.bitwise_and.Tensor": None,
"torch.ops.aten.bitwise_not.default": None,
"torch.ops.aten.bmm.default": None,
"torch.ops.aten.cat.default": None,
"torch.ops.aten.clamp_min.default": None,
Expand All @@ -52,6 +56,7 @@ def __init__(self):
"torch.ops.aten.cos.default": None,
"torch.ops.aten.cumsum.default": None,
"torch.ops.aten.detach.default": None,
"torch.ops.aten.detach_.default": None,
"torch.ops.aten.div.Scalar": None,
"torch.ops.aten.div.Tensor": None,
"torch.ops.aten.embedding.default": None,
Expand All @@ -60,6 +65,7 @@ def __init__(self):
"torch.ops.aten.eq.Tensor": None,
"torch.ops.aten.exp.default": None,
"torch.ops.aten.expand.default": None,
"torch.ops.aten.fill_.Tensor": None,
"torch.ops.aten.full.default": None,
"torch.ops.aten.gather.default": None,
"torch.ops.aten.gelu.default": None,
Expand All @@ -68,12 +74,17 @@ def __init__(self):
"torch.ops.aten.hardswish_.default": None,
"torch.ops.aten.hardtanh_.default": None,
"torch.ops.aten.index.Tensor": None,
"torch.ops.aten.isinf.default": None,
"torch.ops.aten.isnan.default": None,
"torch.ops.aten.le.Scalar": None,
"torch.ops.aten.leaky_relu_.default": None,
"torch.ops.aten.lift_fresh_copy.default": None,
"torch.ops.aten.linalg_vector_norm.default": None,
"torch.ops.aten.lt.Tensor": None,
"torch.ops.aten.log.default": None,
"torch.ops.aten.logical_not.default": None,
"torch.ops.aten.logsumexp.default": None,
"torch.ops.aten.masked_fill.Scalar": None,
"torch.ops.aten.masked_fill_.Scalar": None,
"torch.ops.aten.masked_fill.Tensor": None,
"torch.ops.aten.max_pool2d_with_indices.default": None,
Expand All @@ -88,24 +99,36 @@ def __init__(self):
"torch.ops.aten.native_layer_norm.default": None,
"torch.ops.aten.neg.default": None,
"torch.ops.aten.new_ones.default": None,
"torch.ops.aten.ones.default": None,
"torch.ops.aten.permute.default": None,
"torch.ops.aten.pow.Scalar": None,
"torch.ops.aten.pow.Tensor_Scalar": None,
"torch.ops.aten.pow.Tensor_Tensor": None,
"torch.ops.aten.reciprocal.default": None,
"torch.ops.aten.relu.default": None,
"torch.ops.aten.relu_.default": None,
"torch.ops.aten.rsqrt.default": None,
"torch.ops.aten.rsub.Scalar": None,
"torch.ops.aten.scatter.src": None,
"torch.ops.aten._scaled_dot_product_flash_attention.default": None,
"torch.ops.aten.select.int": None,
"torch.ops.aten.sigmoid.default": None,
"torch.ops.aten.silu.default": None,
"torch.ops.aten.silu_.default": None,
"torch.ops.aten.sin.default": None,
"torch.ops.aten.slice.Tensor": None,
"torch.ops.aten.sort.default": None,
"torch.ops.aten.split.Tensor": None,
"torch.ops.aten.split_with_sizes.default": None,
"torch.ops.aten.stack.default": None,
"torch.ops.aten.sub.default": None,
"torch.ops.aten.sub.Tensor": None,
"torch.ops.aten.t.default": None,
"torch.ops.aten.tanh.default": None,
"torch.ops.aten.topk.default": None,
"torch.ops.aten.transpose.int": None,
"torch.ops.aten.tril.default": None,
"torch.ops.aten.tril_.default": None,
"torch.ops.aten.unsqueeze.default": None,
"torch.ops.aten.upsample_nearest2d.default": None,
"torch.ops.aten.view.default": None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

import typing as t
import logging
import os

logger = logging.getLogger(__name__)
logger.setLevel(logging.WARNING)
Expand Down Expand Up @@ -59,7 +60,14 @@ def make_partitions(self, graph_module: GraphModule) -> GraphModule:
partitioner = CapabilityBasedPartitioner(
graph_module, self.supported_ops, allows_single_node_partition=False)
partitions = partitioner.propose_partitions()
self.add_get_attr_inputs(partitions)
fused_graph_module = partitioner.fuse_partitions(partitions)
new_partitions = []
min_num_nodes = 0
if os.getenv("OPENVINO_TORCH_MIN_NUM_NODES") is not None:
min_num_nodes = int(os.getenv("OPENVINO_TORCH_MIN_NUM_NODES"))
for part in partitions:
if len(part.nodes) > min_num_nodes:
new_partitions.append(part)
self.add_get_attr_inputs(new_partitions)
fused_graph_module = partitioner.fuse_partitions(new_partitions)

return fused_graph_module
Loading

0 comments on commit d78d47b

Please sign in to comment.