Skip to content

Commit

Permalink
Merge branch 'pytorch:main' into Arm-backend-Updated-toolchain-to-arm…
Browse files Browse the repository at this point in the history
…-gnu-toolchain-13.3.rel1
  • Loading branch information
zingo authored Nov 25, 2024
2 parents 44e0923 + fbee0c8 commit a0787ba
Show file tree
Hide file tree
Showing 91 changed files with 2,334 additions and 907 deletions.
8 changes: 8 additions & 0 deletions .ci/scripts/test_llama.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ while [[ $# -gt 0 ]]; do
MODE="$2" # portable or xnnpack+custom or xnnpack+custom+qe
shift 2
;;
-pt2e_quantize)
PT2E_QUANTIZE="$2"
shift 2
;;
-upload)
UPLOAD_DIR="$2"
shift 2
Expand Down Expand Up @@ -234,6 +238,10 @@ if [[ "${COREML}" == "ON" ]]; then
fi
if [[ "${QNN}" == "ON" ]]; then
EXPORT_ARGS="${EXPORT_ARGS} -kv -v --qnn --disable_dynamic_shape"
echo "PT2E_QUANTIZE is ${PT2E_QUANTIZE}"
if [[ "${PT2E_QUANTIZE}" == "qnn_16a16w" ]]; then
EXPORT_ARGS+=" --tokenizer_path tokenizer.model --pt2e_quantize qnn_16a16w --calibration_tasks wikitext --calibration_limit 1 --calibration_seq_length 128 --calibration_data Once "
fi
fi
# Add dynamically linked library location
$PYTHON_EXECUTABLE -m examples.models.llama.export_llama ${EXPORT_ARGS}
Expand Down
36 changes: 36 additions & 0 deletions .github/workflows/trunk.yml
Original file line number Diff line number Diff line change
Expand Up @@ -441,3 +441,39 @@ jobs:
cmake-out/examples/models/llama/llama_main --model_path=${ET_MODEL_NAME}.pte --tokenizer_path=${TOKENIZER_BIN_FILE} --prompt="My name is"
echo "::endgroup::"
test-llama-runner-qnn-linux:
name: test-llama-runner-qnn-linux
uses: pytorch/test-infra/.github/workflows/linux_job.yml@main
strategy:
matrix:
dtype: [fp32]
pt2e_quantize: [qnn_16a16w, qnn_8a8w]
mode: [qnn]
fail-fast: false
with:
runner: linux.2xlarge
docker-image: executorch-ubuntu-22.04-qnn-sdk
submodules: 'true'
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
timeout: 900
script: |
# The generic Linux job chooses to use base env, not the one setup by the image
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
conda activate "${CONDA_ENV}"
BUILD_TOOL="cmake"
DTYPE=${{ matrix.dtype }}
MODE=${{ matrix.mode }}
PT2E_QUANTIZE=${{ matrix.pt2e_quantize }}
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
# Setup executorch
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh "${BUILD_TOOL}"
# Install requirements for export_llama
PYTHON_EXECUTABLE=python bash examples/models/llama/install_requirements.sh
# Test llama2
PYTHON_EXECUTABLE=python bash .ci/scripts/test_llama.sh -model stories110M -build_tool "${BUILD_TOOL}" -mode "${MODE}" -dtype "${DTYPE}" -pt2e_quantize "${PT2E_QUANTIZE}"
13 changes: 9 additions & 4 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -283,10 +283,15 @@ for basics.
- If the reviewers have requests or questions, follow up with them.
- The goal of the reviewer is to ensure that the code in the `main` branch of
the repo is consistent, maintainable, and of high quality.
1. Once approved, your reviewer will import the PR into Meta's internal system
and merge it from there.
- If the PR is approved and not merged within a few business days, please
comment on the PR to ask about its status.
1. Once the PR has been approved,
- If you have the "write permission" in this repo, you can merge it yourself
by clicking the "Squash and merge" button once it is green and all CI
signals are passing.
- If you don't have "write permission" in this repo, the reviewer will take
care of the PR. The reviewer may import the PR into Meta's internal system
to validate it against internal CI.
- If the PR is approved but not merged within 5 business days, please comment
on the PR to ask about its status.
- Note that if the `main` [CI](#continuous-integration) jobs are broken, we
will only merge PRs that fix the broken jobs until all critical jobs are
fixed.
Expand Down
45 changes: 29 additions & 16 deletions backends/apple/coreml/runtime/delegate/ETCoreMLModelCompiler.mm
Original file line number Diff line number Diff line change
Expand Up @@ -26,25 +26,38 @@ + (nullable NSURL *)compileModelAtURL:(NSURL *)modelURL
#else
__block NSError *localError = nil;
__block NSURL *result = nil;

dispatch_semaphore_t sema = dispatch_semaphore_create(0);
[MLModel compileModelAtURL:modelURL completionHandler:^(NSURL * _Nullable tempURL, NSError * _Nullable compilationError) {
result = [tempURL copy];
localError = compilationError;
dispatch_semaphore_signal(sema);
}];

long status = dispatch_semaphore_wait(sema, dispatch_time(DISPATCH_TIME_NOW, (int64_t)(maxWaitTimeInSeconds * NSEC_PER_SEC)));
if (status != 0) {

if (@available(iOS 16, macOS 13, watchOS 9, tvOS 16, *)) {
dispatch_semaphore_t sema = dispatch_semaphore_create(0);
[MLModel compileModelAtURL:modelURL completionHandler:^(NSURL * _Nullable tempURL, NSError * _Nullable compilationError) {
result = [tempURL copy];
localError = compilationError;
dispatch_semaphore_signal(sema);
}];

long status = dispatch_semaphore_wait(sema, dispatch_time(DISPATCH_TIME_NOW, (int64_t)(maxWaitTimeInSeconds * NSEC_PER_SEC)));
if (status != 0) {
ETCoreMLLogErrorAndSetNSError(error,
ETCoreMLErrorCompilationFailed,
"%@: Failed to compile model in %f seconds.",
NSStringFromClass(ETCoreMLModelCompiler.class),
maxWaitTimeInSeconds);
return nil;
}
} else {
result = [MLModel compileModelAtURL:modelURL error:&localError];
}

if (localError) {
ETCoreMLLogErrorAndSetNSError(error,
ETCoreMLErrorCompilationFailed,
"%@: Failed to compile model in %f seconds.",
NSStringFromClass(ETCoreMLModelCompiler.class),
maxWaitTimeInSeconds);
ETCoreMLErrorCompilationFailed,
"%@: Failed to compile model, error: %@",
NSStringFromClass(ETCoreMLModelCompiler.class),
localError);
return nil;
} else {
return result;
}

return result;
#endif
}

Expand Down
2 changes: 1 addition & 1 deletion backends/apple/coreml/scripts/install_requirements.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ rm -rf "$COREML_DIR_PATH/third-party"
mkdir "$COREML_DIR_PATH/third-party"

echo "${green}ExecuTorch: Cloning coremltools."
git clone --depth 1 --branch 8.0 "https://github.com/apple/coremltools.git" $COREMLTOOLS_DIR_PATH
git clone --depth 1 --branch 8.1 "https://github.com/apple/coremltools.git" $COREMLTOOLS_DIR_PATH
cd $COREMLTOOLS_DIR_PATH

STATUS=$?
Expand Down
18 changes: 5 additions & 13 deletions backends/apple/coreml/test/test_coreml_partitioner.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,23 +71,15 @@ def test_vit_skip_conv(self):
)
)

conv_block = ["aten.convolution.default", "executorch_call_delegate"]
safe_softmax_block = [
"getitem",
"getitem",
"getitem",
"getitem",
"aten.any.dim",
"executorch_call_delegate",
]
final_block = ["getitem"]
total = conv_block + 12 * safe_softmax_block + final_block

assert [
node.target.__name__
for node in delegated_program_manager.exported_program().graph.nodes
if node.op == "call_function"
] == total
] == [
"aten.convolution.default",
"executorch_call_delegate",
"getitem",
]

def test_buffer(self):
embedding_dim = 3
Expand Down
33 changes: 30 additions & 3 deletions backends/arm/arm_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ def __init__(self):
self.permute_nhwc = False
self.quantize_io = False
self.tosa_version = None
self.input_order = None

def ethosu_compile_spec(
self,
Expand Down Expand Up @@ -89,7 +90,7 @@ def ethosu_compile_spec(
self.compiler_flags.append(extra_flags)

base_tosa_version = "TOSA-0.80.0+BI"
if "U55" in config:
if "u55" in config:
# Add the Ethos-U55 extension marker
base_tosa_version += "+u55"
self.tosa_version = TosaSpecification.create_from_string(base_tosa_version)
Expand Down Expand Up @@ -134,6 +135,14 @@ def set_quantize_io(self, quantize_io: bool = False) -> "ArmCompileSpecBuilder":
self.quantize_io = quantize_io
return self

def set_input_order(self, input_order: str = None) -> "ArmCompileSpecBuilder":
"""
Reorder the inputs coming in. This may be required when inputs > 1.
And while using the U55/U85 CompileSpec.
"""
self.input_order = input_order
return self

def build(self) -> List[CompileSpec]:
"""
Generate a list of compile spec objects from the builder
Expand Down Expand Up @@ -163,6 +172,13 @@ def build(self) -> List[CompileSpec]:
CompileSpec("permute_memory_format", "nhwc".encode())
)

if self.input_order:
self.compile_spec.append(
CompileSpec(
"input_order", " ".join(map(str, self.input_order)).encode()
)
)

if self.quantize_io:
self.compile_spec.append(CompileSpec("quantize_io", "True".encode()))

Expand Down Expand Up @@ -214,13 +230,16 @@ def preprocess( # noqa: C901
artifact_path = None
output_format = ""
compile_flags = []
input_order = []
for spec in compile_spec:
if spec.key == "debug_artifact_path":
artifact_path = spec.value.decode()
if spec.key == "output_format":
output_format = spec.value.decode()
if spec.key == "compile_flags":
compile_flags.append(spec.value.decode())
if spec.key == "input_order":
input_order = list(map(int, spec.value.decode().split(",")))

# Check that the output format is set in the compile spec
if not output_format:
Expand All @@ -246,19 +265,27 @@ def preprocess( # noqa: C901
)

node_visitors = get_node_visitors(edge_program, tosa_spec)

input_count = 0
for node in graph_module.graph.nodes:
if node.op == "call_function":
process_call_function(node, tosa_graph, node_visitors, tosa_spec)
elif node.op == "placeholder":
process_placeholder(node, tosa_graph, edge_program, tosa_spec)
if node.name in edge_program.graph_signature.user_inputs:
input_count += 1
elif node.op == "output":
process_output(node, tosa_graph)
else:
# This will only happen if an unpartitioned graph is passed without
# any checking of compatibility.
dbg_fail(node, tosa_graph, artifact_path)

if len(input_order) > 0:
if input_count != len(input_order):
raise RuntimeError(
"The rank of the input order is not equal to amount of input tensors"
)

# TODO: It would be awesome if this dump could somehow be done on top level and not here.
# Problem is that the desc.json has to be created on the tosa_graph object, which we can't
# access from top level.
Expand All @@ -275,7 +302,7 @@ def preprocess( # noqa: C901
# preprocess and some consume TOSA fb directly.
if output_format == "vela":
# Emit vela_bin_stream format
binary = vela_compile(tosa_graph, compile_flags)
binary = vela_compile(tosa_graph, compile_flags, input_order)
elif output_format == "tosa":
# Emit TOSA flatbuffer
binary = bytes(tosa_graph.serialize())
Expand Down
15 changes: 9 additions & 6 deletions backends/arm/arm_vela.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,13 @@

# Pack either input or output tensor block, compose the related arrays into
# per-io structs to simplify runtime use.
def vela_bin_pack_io(prefix, data):
ios = struct.pack("<i", len(data[prefix + "_shape"]))
for i in range(len(data[prefix + "_shape"])):
io_shape = data[prefix + "_shape"][i]
def vela_bin_pack_io(prefix, data, shape_order=None):
vela_input_shapes = data[prefix + "_shape"]

order = shape_order if shape_order else range(len(vela_input_shapes))
ios = struct.pack("<i", len(vela_input_shapes))
for i in order:
io_shape = vela_input_shapes[i]
io_elem_size = data[prefix + "_elem_size"][i]
io_offset = data[prefix + "_offset"][i]
io_region = data[prefix + "_region"][i]
Expand All @@ -36,7 +39,7 @@ def vela_bin_pack_io(prefix, data):
# Output via Vela to binary stream for ArmBackendEthosU
# WARNING: Do not change this without changing VelaBinStream.cpp as that
# function consumes this format and the two need to align.
def vela_compile(tosa_graph, args: List[str]):
def vela_compile(tosa_graph, args: List[str], shape_order=None):
with tempfile.TemporaryDirectory() as tmpdir:
tosaname = "out.tosa"
flatbuffer = tosa_graph.serialize()
Expand Down Expand Up @@ -78,7 +81,7 @@ def vela_compile(tosa_graph, args: List[str]):
bin_blocks["scratch_data"] = b"\x00" * block_length

# Capture inputs and outputs
bin_blocks["inputs"] = vela_bin_pack_io("input", data)
bin_blocks["inputs"] = vela_bin_pack_io("input", data, shape_order)
bin_blocks["outputs"] = vela_bin_pack_io("output", data)

bin_blocks["vela_end_stream"] = b""
Expand Down
7 changes: 6 additions & 1 deletion backends/arm/operator_support/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,9 @@

# pyre-unsafe

from . import mean_dim_support, tosa_supported_operators, var_correction_support # noqa
from . import ( # noqa
mean_dim_support,
right_shift_support,
tosa_supported_operators,
var_correction_support,
)
35 changes: 35 additions & 0 deletions backends/arm/operator_support/right_shift_support.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Copyright 2024 Arm Limited and/or its affiliates.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.


import logging

import torch.fx as fx
from executorch.backends.arm.operator_support.tosa_supported_operators import (
register_tosa_support_check,
SupportedTOSAOperatorCheck,
)
from executorch.backends.arm.tosa_specification import Tosa_0_80, TosaSpecification
from executorch.exir.dialects._ops import ops as exir_ops

logger = logging.getLogger(__name__)
logger.setLevel(logging.WARNING)


@register_tosa_support_check
class RightShiftSupported(SupportedTOSAOperatorCheck):
targets = [exir_ops.edge.aten.__rshift__.Scalar]

tosa_specs = [
TosaSpecification.create_from_string("TOSA-0.80.0+BI"),
TosaSpecification.create_from_string("TOSA-0.80.0+MI"),
]

def is_node_supported(self, node: fx.Node, tosa_spec: TosaSpecification):

# TODO MLETORCH-525 Remove warning
if isinstance(tosa_spec, Tosa_0_80) and tosa_spec.is_U55_subset:
logging.warning(f"{node.target} may introduce one-off errors.")
return True
1 change: 1 addition & 0 deletions backends/arm/operators/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
op_reciprocal,
op_relu,
op_repeat,
op_rshift,
op_rsqrt,
op_select,
op_sigmoid,
Expand Down
Loading

0 comments on commit a0787ba

Please sign in to comment.