diff --git a/include/tvm/ir/function.h b/include/tvm/ir/function.h index 13b984d9cb355..5ee719f9964f8 100644 --- a/include/tvm/ir/function.h +++ b/include/tvm/ir/function.h @@ -189,6 +189,27 @@ constexpr const char* kTarget = "target"; * Type: String */ constexpr const char* kGlobalSymbol = "global_symbol"; + +/*! + * \brief The device type which will hold each of the functions parameters. + * + * Only supported on Relay \p Functions. Generally added by the \p PlanDevices pass, but + * may be included as an annotation on user programs. + * + * Type: Array (but interpreted as Array) + */ +constexpr const char* kParamDeviceTypes = "param_device_types"; + +/*! + * \brief The device type which will hold the function result. + * + * Only supported on Relay \p Functions. Generally added by the \p PlanDevices pass, but + * may be included as an annotation on user programs. + * + * Type: Integer (but interpreted as DLDeviceType) + */ +constexpr const char* kResultDeviceType = "result_device_type"; + } // namespace attr } // namespace tvm #endif // TVM_IR_FUNCTION_H_ diff --git a/include/tvm/parser/parser.h b/include/tvm/parser/parser.h index 7673eec2a337f..8c27220509057 100644 --- a/include/tvm/parser/parser.h +++ b/include/tvm/parser/parser.h @@ -23,6 +23,7 @@ * \file parser.h * \brief A parser for TVM IR. */ +#include #include #include @@ -32,8 +33,11 @@ namespace tvm { namespace parser { -IRModule ParseModule(std::string file_name, std::string file_content, - Optional init_module = Optional()); +using MetaTable = Map>; + +IRModule ParseModule(const std::string& file_name, const std::string& file_content, + const Optional& init_module = Optional(), + const MetaTable& init_meta_table = MetaTable()); } // namespace parser } // namespace tvm diff --git a/include/tvm/relay/attrs/function.h b/include/tvm/relay/attrs/function.h deleted file mode 100644 index f4f94131da1f5..0000000000000 --- a/include/tvm/relay/attrs/function.h +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -/*! - * \file tvm/relay/attrs/function.h - * \brief Attributes for Relay Functions which don't make sense on PrimFuncs. - */ -#ifndef TVM_RELAY_ATTRS_FUNCTION_H_ -#define TVM_RELAY_ATTRS_FUNCTION_H_ - -namespace tvm { -namespace relay { -/*! - * \brief Attributes for Relay function definitions which capture the devices for the - * function parameters and result. - * - * See also OnDeviceAttrs in include/tvm/relay/attrs/annotation.h for the companion "on_device" - * call attributes. - */ -struct FunctionOnDeviceAttrs : public tvm::AttrsNode { - /*! \brief Device type on which each of the function's arguments already resides. */ - Array param_device_types; - // TODO(mbs): Replace device types with TargetDevice. - /*! \brief Device type on which function body should be evaluated. */ - int result_device_type = kInvalidDeviceType; - - TVM_DECLARE_ATTRS(FunctionOnDeviceAttrs, "relay.attrs.FunctionOnDeviceAttrs") { - TVM_ATTR_FIELD(param_device_types) - .describe("The type of the virtual device which holds each function parameters."); - TVM_ATTR_FIELD(result_device_type) - .describe("The type of the virtual device which will hold the function's result.") - .set_default(0); - } -}; - -namespace attr { - -/*! - * \brief Device annotations for function parameters and results. - * - * Type: FunctionOnDeviceAttrs - */ -constexpr static const char* kFunctionAttrsKey = "on_device"; - -} // namespace attr - -} // namespace relay -} // namespace tvm - -#endif // TVM_RELAY_ATTRS_FUNCTION_H_ diff --git a/python/tvm/parser/__init__.py b/python/tvm/parser/__init__.py index 60fcddb17f08b..4b8628e49a571 100644 --- a/python/tvm/parser/__init__.py +++ b/python/tvm/parser/__init__.py @@ -26,8 +26,10 @@ def add(self, name, content): return _ffi.get_global_func("SourceMapAdd")(self, name, content) -def parse(source, source_name="from_string"): - return _ffi_api.ParseModule(source_name, source) +def parse(source, source_name="from_string", init_module=None, init_meta_table=None): + if init_meta_table is None: + init_meta_table = {} + return _ffi_api.ParseModule(source_name, source, init_module, init_meta_table) def parse_expr(source): diff --git a/src/ir/diagnostic.cc b/src/ir/diagnostic.cc index 876113b85f6e4..b9677d198eba0 100644 --- a/src/ir/diagnostic.cc +++ b/src/ir/diagnostic.cc @@ -242,10 +242,10 @@ void ReportAt(const DiagnosticContext& context, std::ostream& out, const Span& s } auto source = (*it).second; - DLOG(INFO) << "Source: " << std::endl << source->source; + VLOG(1) << "Source: " << std::endl << source->source; - DLOG(INFO) << "ReportAt " - << "span = " << span << " msg = " << diagnostic->message; + VLOG(1) << "ReportAt " + << "span = " << span << " msg = " << diagnostic->message; auto line_text = source.GetLine(span->line); diff --git a/src/parser/meta_ref.h b/src/parser/meta_ref.h index 481f334cb0fe0..483b7f726e073 100644 --- a/src/parser/meta_ref.h +++ b/src/parser/meta_ref.h @@ -26,6 +26,7 @@ #define TVM_PARSER_META_REF_H_ #include +#include #include #include @@ -36,8 +37,6 @@ namespace parser { using namespace relay; -using MetaTable = Map>; - /*! * \brief Options for allocating storage. */ diff --git a/src/parser/parser.cc b/src/parser/parser.cc index 93dc687d72f5e..4803bffedd8dc 100644 --- a/src/parser/parser.cc +++ b/src/parser/parser.cc @@ -1088,7 +1088,7 @@ class Parser { VLOG(0) << "Parser::ParseFunctionDef"; return WithSpan([&]() { PushScope(); - PushTypeScope(); + PushTypeScope(); // TODO(mbs): BUG? Array generics; if (Peek()->token_type == TokenType::kLSquare) { @@ -1444,6 +1444,10 @@ class Parser { ICHECK(attr_obj.defined()); attrs = Downcast(attr_obj); } + } else { + this->diag_ctx.EmitFatal(Diagnostic::Error(op->span) + << "unable to determine the 'attrs_type_key' with which " + "to represent the call attributes for this operator"); } } return true; @@ -1867,7 +1871,7 @@ class Parser { }; Parser InitParser(const std::string& file_name, const std::string& file_content, - Optional init_module) { + const Optional& init_module, const MetaTable& init_meta_table) { VLOG(0) << "InitParser: file_name: " << file_name << "file_content_size: " << file_content.size(); SourceName src_name = SourceName::Get(file_name); Source source(src_name, file_content); @@ -1886,19 +1890,33 @@ Parser InitParser(const std::string& file_name, const std::string& file_content, auto tokens_and_table = Tokenize(diag_ctx, source); auto tokens = tokens_and_table.first; - auto meta_data_table = tokens_and_table.second; + MetaTable meta_data_table = tokens_and_table.second.ToMetadata(); + + // Merge any entries in init_meta_table into anything captured in the #[metadata] section + // of the file_content. Metadata references within file_content must use indexes which account + // for this ordering. + for (const auto& pair : init_meta_table) { + Array items; + if (meta_data_table.count(pair.first)) { + items = meta_data_table[pair.first]; + } + for (const auto& obj : pair.second) { + items.push_back(obj); + } + meta_data_table.Set(pair.first, items); + } - return Parser(module, diag_ctx, source, tokens, DefaultOpTable(), meta_data_table.ToMetadata()); + return Parser(module, diag_ctx, source, tokens, DefaultOpTable(), std::move(meta_data_table)); } -IRModule ParseModule(std::string file_name, std::string file_content, - Optional init_module) { +IRModule ParseModule(const std::string& file_name, const std::string& file_content, + const Optional& init_module, const MetaTable& init_meta_table) { VLOG(0) << "ParseModule"; - auto parser = InitParser(file_name, file_content, init_module); + auto parser = InitParser(file_name, file_content, init_module, init_meta_table); auto mod = parser.ParseModule(); ICHECK(mod.defined()) << "The parser must return a non-null module."; - // NB(@jroesch): it is very important that we render any errors before we procede - // if there were any errors which allow the parser to procede we must render them + // NB(@jroesch): it is very important that we render any errors before we proceed + // if there were any errors which allow the parser to proceed we must render them // here. parser.diag_ctx.Render(); auto infer_type = tvm::relay::transform::InferType(); @@ -1906,23 +1924,24 @@ IRModule ParseModule(std::string file_name, std::string file_content, return infer_type(mod); } -Expr ParseExpr(std::string file_name, std::string file_content) { +Expr ParseExpr(const std::string& file_name, const std::string& file_content) { VLOG(0) << "ParseExpr"; - auto parser = InitParser(file_name, file_content, Optional()); + auto parser = InitParser(file_name, file_content, Optional(), MetaTable()); parser.ParseSemVer(false); parser.PushScope(); auto expr = parser.ParseExpr(); parser.Match(TokenType::kEndOfFile); - // NB(@jroesch): it is very important that we render any errors before we procede - // if there were any errors which allow the parser to procede we must render them + // NB(@jroesch): it is very important that we render any errors before we proceed + // if there were any errors which allow the parser to proceed we must render them // here. parser.diag_ctx.Render(); return expr; } TVM_REGISTER_GLOBAL("parser.ParseModule") - .set_body_typed([](tvm::String file_name, tvm::String file_content) { - return ParseModule(file_name, file_content); + .set_body_typed([](const std::string& file_name, const std::string& file_content, + const Optional& init_module, const MetaTable& init_meta_table) { + return ParseModule(file_name, file_content, init_module, init_meta_table); }); TVM_REGISTER_GLOBAL("parser.ParseExpr") diff --git a/src/parser/source_map.cc b/src/parser/source_map.cc index 4e79d0e74c592..3c1329670c40e 100644 --- a/src/parser/source_map.cc +++ b/src/parser/source_map.cc @@ -60,7 +60,7 @@ Source::Source(SourceName src_name, std::string source) { } tvm::String Source::GetLine(int line) { - DLOG(INFO) << "Source::GetLine: line=" << line; + VLOG(1) << "Source::GetLine: line=" << line; ICHECK(line - 1 < static_cast((*this)->line_map.size())) << "requested line: " << line << "at index: " << (line - 1) << "line_map size: " << (*this)->line_map.size() << "source: " << (*this)->source; @@ -69,10 +69,10 @@ tvm::String Source::GetLine(int line) { auto range = (*this)->line_map.at(line - 1); int line_start = range.first; int line_length = range.second; - DLOG(INFO) << "Source::GetLine: line_start=" << line_start << " line_length=" << line_length; + VLOG(1) << "Source::GetLine: line_start=" << line_start << " line_length=" << line_length; // TODO(@jroesch): expose substring on tvm::String. auto line_text = std::string((*this)->source).substr(line_start, line_length); - DLOG(INFO) << "Source::GetLine: line_text=" << line_text; + VLOG(1) << "Source::GetLine: line_text=" << line_text; return line_text; } diff --git a/src/relay/op/annotation/annotation.cc b/src/relay/op/annotation/annotation.cc index 4eda15937f3a4..d29e1bbfde237 100644 --- a/src/relay/op/annotation/annotation.cc +++ b/src/relay/op/annotation/annotation.cc @@ -26,7 +26,6 @@ #include "./annotation.h" #include -#include #include #include #include @@ -92,6 +91,7 @@ RELAY_REGISTER_OP("on_device") .add_argument("data", "Tensor", "The input data.") .set_support_level(10) .add_type_rel("Identity", IdentityRel) + .set_attrs_type_key("relay.attrs.OnDeviceAttrs") .set_attr("TOpPattern", kOpaque) .set_attr("TOpIsStateful", false) .set_attr("FInferCorrectLayout", ElemwiseArbitraryLayout) @@ -128,14 +128,10 @@ OnDeviceProps GetOnDeviceProps(const Expr& expr) { return {}; } -TVM_REGISTER_NODE_TYPE(FunctionOnDeviceAttrs); - Function FunctionOnDevice(Function function, Array param_device_types, - DLDeviceType result_device_type) { - auto attrs = make_object(); - attrs->param_device_types = std::move(param_device_types); - attrs->result_device_type = result_device_type; - return WithAttr(std::move(function), attr::kFunctionAttrsKey, Attrs(std::move(attrs))); + Integer result_device_type) { + return WithAttr(WithAttr(std::move(function), tvm::attr::kParamDeviceTypes, param_device_types), + tvm::attr::kResultDeviceType, result_device_type); } Function FunctionOnDevice(Function function, const std::vector& param_device_types, @@ -143,9 +139,20 @@ Function FunctionOnDevice(Function function, const std::vector& pa Array arr; arr.reserve(param_device_types.size()); for (const auto device_type : param_device_types) { - arr.push_back(static_cast(device_type)); + arr.push_back(static_cast(device_type)); + } + return FunctionOnDevice(std::move(function), std::move(arr), + static_cast(result_device_type)); +} + +Function OptFunctionOnDevice(Function function, const std::vector& param_device_types, + DLDeviceType result_device_type) { + if (std::all_of(param_device_types.begin(), param_device_types.end(), + [](DLDeviceType type) { return type == kInvalidDeviceType; }) && + result_device_type == kInvalidDeviceType) { + return function; } - return FunctionOnDevice(function, arr, result_device_type); + return FunctionOnDevice(function, param_device_types, result_device_type); } TVM_REGISTER_GLOBAL("relay.op.annotation._make.function_on_device") @@ -156,32 +163,26 @@ TVM_REGISTER_GLOBAL("relay.op.annotation._make.function_on_device") }); DLDeviceType GetFunctionResultDeviceType(const FunctionNode* function_node) { - auto opt_attrs = function_node->GetAttr(attr::kFunctionAttrsKey); - if (!opt_attrs) { + auto opt_integer = function_node->GetAttr(tvm::attr::kResultDeviceType); + if (!opt_integer) { // No annotation. return kInvalidDeviceType; } - const auto* opt_function_on_device_attrs = opt_attrs.value().as(); - ICHECK(opt_function_on_device_attrs != nullptr) - << "function '" << attr::kFunctionAttrsKey << "' annotation must be a FunctionOnDeviceAttrs"; - return static_cast(opt_function_on_device_attrs->result_device_type); + return static_cast(opt_integer.value()->value); } DLDeviceType GetFunctionParamDeviceType(const FunctionNode* function_node, size_t i) { ICHECK_LT(i, function_node->params.size()) << "param index " << i << " out of range for function of arity " << function_node->params.size(); - auto opt_attrs = function_node->GetAttr(attr::kFunctionAttrsKey); - if (!opt_attrs) { + auto opt_array = function_node->GetAttr>(tvm::attr::kParamDeviceTypes); + if (!opt_array) { // No annotation. return kInvalidDeviceType; } - const auto* opt_function_on_device_attrs = opt_attrs.value().as(); - ICHECK(opt_function_on_device_attrs != nullptr) - << "function '" << attr::kFunctionAttrsKey << "' annotation must be a FunctionOnDeviceAttrs"; - ICHECK_EQ(opt_function_on_device_attrs->param_device_types.size(), function_node->params.size()) + ICHECK_EQ(opt_array.value().size(), function_node->params.size()) << "annotation parameters do not match function arity"; - return static_cast(opt_function_on_device_attrs->param_device_types[i]->value); + return static_cast(opt_array.value()[i]->value); } Expr StopFusion(Expr data) { diff --git a/src/relay/op/annotation/annotation.h b/src/relay/op/annotation/annotation.h index e3a4aea4708c4..7a4516bcd5813 100644 --- a/src/relay/op/annotation/annotation.h +++ b/src/relay/op/annotation/annotation.h @@ -83,24 +83,30 @@ OnDeviceProps GetOnDeviceProps(const Expr& expr); inline bool IsOnDeviceCall(const Expr& expr) { return GetOnDeviceProps(expr).body.defined(); } /*! - * \brief Returns \p function annotated with "on_device" attributes capturing parameter and result - * devices types. However returns \p function directly if all device types are \p - * kInvalidDeviceType. + * \brief Returns \p function annotated with "param_device_types" and "result_device_type" + * attributes capturing parameter and result devices types respectively. */ Function FunctionOnDevice(Function function, Array param_device_types, - DLDeviceType body_device_type); + Integer body_device_type); Function FunctionOnDevice(Function function, const std::vector& param_device_types, DLDeviceType body_device_type); +/*! + * \brief As for \p FunctionOnDevice, but returns \p function unchanged if all parameters and + * result device types are \p kInvalidDeviceType. + */ +Function OptFunctionOnDevice(Function function, const std::vector& param_device_types, + DLDeviceType result_device_type); + /*! * \brief Returns the device type for the resut of \p function_node, or \p kInvalidDeviceType - * if function does not have "on_device" annotation. + * if function does not have "result_device_type" annotation. */ DLDeviceType GetFunctionResultDeviceType(const FunctionNode* function_node); /*! * \brief Returns the device type for the \p i'th parameter of \p function_node, or - * \p kInvalidDeviceType if function does not have "on_device" annotation. + * \p kInvalidDeviceType if function does not have "param_device_types" annotation. */ DLDeviceType GetFunctionParamDeviceType(const FunctionNode* function_node, size_t i); diff --git a/src/relay/op/memory/device_copy.cc b/src/relay/op/memory/device_copy.cc index b94caac2c3d99..dce89aa91b65a 100644 --- a/src/relay/op/memory/device_copy.cc +++ b/src/relay/op/memory/device_copy.cc @@ -76,6 +76,7 @@ on different devices. .add_argument("data", "Tensor", "The input data.") .set_support_level(10) .add_type_rel("Identity", IdentityRel) + .set_attrs_type_key("relay.attrs.DeviceCopyAttrs") .set_attr("TOpPattern", kOpaque) .set_attr("TOpIsStateful", false) .set_attr("FInferCorrectLayout", ElemwiseArbitraryLayout) diff --git a/src/relay/op/memory/memory.cc b/src/relay/op/memory/memory.cc index 68a83ebba1fe6..5339d48e3a2f1 100644 --- a/src/relay/op/memory/memory.cc +++ b/src/relay/op/memory/memory.cc @@ -86,6 +86,7 @@ RELAY_REGISTER_OP("memory.alloc_storage") .add_argument("size", "Tensor", "The size of the storage to allocate.") .add_argument("alignment", "Tensor", "The alignment of the storage.") .add_type_rel("AllocStorage", AllocStorageRel) + .set_attrs_type_key("relay.attrs.AllocStorageAttrs") .set_support_level(10) .set_attr("TOpPattern", kOpaque) .set_attr("TOpIsStateful", false) @@ -200,6 +201,7 @@ RELAY_REGISTER_OP("memory.alloc_tensor") .add_argument("offset", "Tensor", "The offset into the backing storage.") .add_argument("shape", "Tensor", "The shape of the tensor to allocate.") .add_type_rel("AllocTensor", AllocTensorRel) + .set_attrs_type_key("relay.attrs.AllocTensorAttrs") .set_support_level(10) .set_attr("TOpPattern", kOpaque) .set_attr("TOpIsStateful", false) diff --git a/src/relay/op/vm/vm.cc b/src/relay/op/vm/vm.cc index a74a259a114f6..be31b54829379 100644 --- a/src/relay/op/vm/vm.cc +++ b/src/relay/op/vm/vm.cc @@ -50,6 +50,7 @@ RELAY_REGISTER_OP("vm.shape_of") .set_num_inputs(1) .add_argument("tensor", "Tensor", "The input tensor") .add_type_rel("ShapeOf", ShapeOfRel) + .set_attrs_type_key("relay.attrs.ShapeOfAttrs") .set_support_level(10) .set_attr("TOpPattern", kOpaque) .set_attr("TOpIsStateful", false) @@ -131,6 +132,7 @@ RELAY_REGISTER_OP("vm.shape_func") .add_argument("func", "Function", "The operation to call") .add_argument("ins", "Tuple", "The input tensors.") .add_argument("outs", "Tuple", "The output tensors.") + .set_attrs_type_key("relay.attrs.ShapeFuncAttrs") .add_type_rel("ShapeFuncRel", ShapeFuncRel) .set_support_level(10) .set_attr("TOpPattern", kOpaque) @@ -214,6 +216,7 @@ RELAY_REGISTER_OP("vm.reshape_tensor") .add_argument("data", "Tensor", "The input tensor") .add_argument("shape", "Tensor", "The output shape tensor") .add_type_rel("ReshapeTensor", ReshapeTensorRel) + .set_attrs_type_key("relay.attrs.ReshapeTensorAttrs") .set_support_level(10) .set_attr("TOpPattern", kOpaque) .set_attr("TOpIsStateful", false) diff --git a/src/relay/transforms/device_planner.cc b/src/relay/transforms/device_planner.cc index cdbfcc78c5662..9f959a3bc1d9f 100644 --- a/src/relay/transforms/device_planner.cc +++ b/src/relay/transforms/device_planner.cc @@ -256,7 +256,6 @@ #include #include #include -#include #include #include #include @@ -697,6 +696,22 @@ class DeviceDomains { args_and_result.emplace_back(free_domain); } args_and_result.emplace_back(free_domain); + } else if (call->op->IsInstance()) { + // (arg1, ..., argn) + // : fn(?x1?, ..., ?xn?):?xr? + // where we force all possibly higher-order ?xi? to be collapsed to the first-order ?xr?. + // TODO(mbs): This assumes we've eta-expanded constructors, thus all constructors appear + // in callee positions. + const auto* func_type_node = call->op->checked_type().as(); + ICHECK_NOTNULL(func_type_node); + ICHECK_EQ(func_type_node->arg_types.size(), call->args.size()); + auto result_domain = Free(func_type_node->ret_type); // first-order + for (const auto& arg_type : func_type_node->arg_types) { + auto param_domain = Free(arg_type); // possibly higher-order + UnifyCollapsed(result_domain, param_domain); // collapse if required + args_and_result.emplace_back(param_domain); + } + args_and_result.emplace_back(result_domain); } else { // Defer to normal case where op can be an arbitrary expression. return DomainFor(call->op); @@ -1038,7 +1053,8 @@ class DeviceAnalyzer : public ExprVisitor { VLOG(1) << "initial call function domain:" << std::endl << domains_->ToString(func_domain) << std::endl << "and implied domain:" << std::endl - << domains_->ToString(implied_domain) << "for call:" << std::endl + << domains_->ToString(implied_domain) << std::endl + << "for call:" << std::endl << PrettyPrint(call); // The above must match. @@ -1113,9 +1129,7 @@ class DeviceAnalyzer : public ExprVisitor { // If the function already has an "on_device" attribute then we can further // constrain the function's domain to match it. - Optional opt_attrs = - function_node->GetAttr(attr::kFunctionAttrsKey); - if (opt_attrs) { + if (GetFunctionResultDeviceType(function_node) != kInvalidDeviceType) { std::vector args_and_result; for (size_t i = 0; i < function_node->params.size(); ++i) { args_and_result.emplace_back( @@ -1213,8 +1227,8 @@ class DeviceAnalyzer : public ExprVisitor { } void VisitExpr_(const ConstructorNode* constructor_node) final { - // Probably needs to be device polymorphic. - domains_->DomainFor(GetRef(constructor_node)); + // no-op, constructors are handled at their call-sites. + // TODO(mbs): Assumes eta-expansion } void VisitExpr_(const IfNode* if_node) final { @@ -1396,7 +1410,7 @@ class DeviceCapturer : public ExprMutator { } private: - // Nothing interesting for VarNode, ConstantNode, GlobalVarNode and OpNode. + // Nothing interesting for VarNode, ConstantNode, GlobalVarNode, OpNode and ConstructorNode Expr VisitExpr_(const TupleNode* tuple_node) final { auto tuple = GetRef(tuple_node); @@ -1573,13 +1587,6 @@ class DeviceCapturer : public ExprMutator { return RefWrite(ref, value, ref_write_node->span); } - Expr VisitExpr_(const ConstructorNode* constructor_node) final { - auto constructor = GetRef(constructor_node); - // check we have a device type. - (void)GetDeviceType(constructor); - return constructor; - } - Expr VisitExpr_(const MatchNode* match_node) final { auto match = GetRef(match_node); Expr data = VisitChild(match, match_node->data); diff --git a/src/relay/transforms/type_infer.cc b/src/relay/transforms/type_infer.cc index 6c2371716b167..ebdf1fed2fab5 100644 --- a/src/relay/transforms/type_infer.cc +++ b/src/relay/transforms/type_infer.cc @@ -824,7 +824,6 @@ Pass InferType() { auto pass_info = PassInfo(0, "InferType", {}); return tvm::transform::CreateModulePass( [=](IRModule mod, const PassContext& pass_ctx) { - DLOG(INFO) << "tvm::relay::transform::InferType"; // Execute the pass function and return a new module. IRModule updated_mod = mod->ShallowCopy(); diff --git a/tests/python/relay/op/annotation/test_annotation.py b/tests/python/relay/op/annotation/test_annotation.py index 51daa9aaa06a4..58e559eb96809 100644 --- a/tests/python/relay/op/annotation/test_annotation.py +++ b/tests/python/relay/op/annotation/test_annotation.py @@ -54,13 +54,10 @@ def test_function_on_device(): f = relay.Function([x, y], relay.add(x, y)) func = relay.annotation.function_on_device(f, ["cpu", "cuda"], "cuda") assert isinstance(func, relay.Function) - assert len(func.attrs["on_device"].param_device_types) == 2 - assert func.attrs["on_device"].param_device_types[0] == 1 - # ie kDLCPU - assert func.attrs["on_device"].param_device_types[1] == 2 - # ie kDLCUDA - assert func.attrs["on_device"].result_device_type == 2 - # ie KDLCUDA + assert len(func.attrs["param_device_types"]) == 2 + assert func.attrs["param_device_types"][0] == 1 # ie kDLCPU + assert func.attrs["param_device_types"][1] == 2 # ie kDLCUDA + assert func.attrs["result_device_type"] == 2 # ie KDLCUDA if __name__ == "__main__": diff --git a/tests/python/relay/test_pass_plan_devices.py b/tests/python/relay/test_pass_plan_devices.py index 6c3d2e266b8d2..e06ad333aa3d5 100644 --- a/tests/python/relay/test_pass_plan_devices.py +++ b/tests/python/relay/test_pass_plan_devices.py @@ -18,23 +18,21 @@ """Unit tests for the PlanDevices pass. We check: - The pass alone given the expected AST, though we need to manually run InferTypes. - - The pass is idempotent.""" - -# TODO(mbs): All the input/expected programs should be directly quoted using @script -# TODO(mbs): Not testing Match and Constructor since not supported by Python bindings? -# TODO(mbs): Add back reference implementation tests once VM is ready. + - The pass is idempotent. + - Execution on the VM backend yields the correct result.""" import tvm from tvm import relay import tvm.testing import numpy as np -N = 5 -M = 7 CPU = tvm.device("cpu") # device_type=1 GPU = tvm.device("cuda") # device_type=2 DEFAULT = GPU +core = tvm.IRModule() +core.import_from_std("core.rly") + def rewrite_and_assert(in_mod, expected_mod): """Manually run the pass and assert it's structurally equals to the expected.""" @@ -42,7 +40,7 @@ def rewrite_and_assert(in_mod, expected_mod): actual_mod = relay.transform.PlanDevices(DEFAULT)(actual_mod) actual_mod = relay.transform.InferType()(actual_mod) expected_mod = relay.transform.InferType()(expected_mod) - if not tvm.ir.structural_equal(actual_mod, expected_mod): + if not tvm.ir.structural_equal(actual_mod, expected_mod, True): # Print everything in full so we can see what's going on when things fail. print("Input module:") print(in_mod) @@ -51,7 +49,20 @@ def rewrite_and_assert(in_mod, expected_mod): print("Actual module:") print(actual_mod) # Assert again so as to see the actual disagreeing sub-expressions. - tvm.ir.assert_structural_equal(actual_mod, expected_mod) + tvm.ir.assert_structural_equal(actual_mod, expected_mod, True) + + +def eval_and_assert(in_mod: tvm.IRModule, reference_func, args): + """Test the standard compilation flow gives us a function which agrees with the Numpy + reference implementation.""" + if not tvm.runtime.enabled(GPU): + print("Not evaluating since device %s is not enabled" % GPU) + return + with tvm.transform.PassContext(opt_level=3): + compiled = relay.create_executor("vm", mod=in_mod, device=GPU, target="cuda").evaluate() + actual = compiled(*args).numpy() + expected = reference_func(*args) + tvm.testing.assert_allclose(actual, expected) def rand(shape): @@ -68,455 +79,371 @@ def exercise(in_mod: tvm.IRModule, expected_mod: tvm.IRModule, reference_func, a rewrite_and_assert(in_mod, expected_mod) # Idempotence rewrite_and_assert(expected_mod, expected_mod) - # TODO(mbs): Add back compiling and comparing to reference implementation once VM is ready. - - -# -# Annotation shorthands -# - - -def on_cpu(expr: relay.Expr): - return relay.annotation.on_device(expr, CPU) - - -def on_gpu(expr: relay.Expr): - return relay.annotation.on_device(expr, GPU) - - -def cpu_to_gpu(expr: relay.Expr): - return relay.op.device_copy(expr, CPU, GPU) - - -def gpu_to_cpu(expr: relay.Expr): - return relay.op.device_copy(expr, GPU, CPU) - - -def fixed_cpu(expr: relay.Expr): - return relay.annotation.on_device(expr, CPU, True) - - -def fixed_gpu(expr: relay.Expr): - return relay.annotation.on_device(expr, GPU, True) + # The VM can compile and possibly even run the module + # TODO(mbs): Disabled until VM supports new device planning. + # if not (reference_func is None) and not (args is None): + # eval_and_assert(in_mod, reference_func, args) def test_plain(): - shape = (N, M) - a = relay.var("a", shape=shape) - b = relay.var("b", shape=shape) - c = relay.var("c", shape=shape) - d = relay.var("d", shape=shape) - - # def @main(a, b, c, d) { subtract(add(a, b), add(c, d)) } + # Everything defaults to GPU def input(): - return tvm.IRModule.from_expr( - relay.Function([a, b, c, d], relay.subtract(relay.add(a, b), relay.add(c, d))) + return tvm.parser.fromtext( + """ + #[version = "0.0.5"] + def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32], + %c: Tensor[(5, 7), float32], %d: Tensor[(5, 7), float32]) { + %0 = add(%a, %b); + %1 = add(%c, %d); + subtract(%0, %1) + } + """ ) - # def @main(a, b, c, d, on_device={param_device_types=[2,2,2,2], result_device_type=2}) { - # subtract(add(a, b), add(c, d)) - # } def expected(): - return tvm.IRModule.from_expr( - relay.annotation.function_on_device( - relay.Function([a, b, c, d], relay.subtract(relay.add(a, b), relay.add(c, d))), - [GPU, GPU, GPU, GPU], - GPU, - ) + return tvm.parser.fromtext( + """ + #[version = "0.0.5"] + def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32], + %c: Tensor[(5, 7), float32], %d: Tensor[(5, 7), float32], + param_device_types=[2, 2, 2, 2], result_device_type=2) { + %0 = add(%a, %b); + %1 = add(%c, %d); + subtract(%0, %1) + } + """ ) def ref(a, b, c, d): return np.subtract(np.add(a, b), np.add(c, d)) - exercise(input(), expected(), ref, rands(shape, 4)) + exercise(input(), expected(), ref, rands((5, 7), 4)) def test_left_add_on_cpu(): - shape = (N, M) - a = relay.var("a", shape=shape) - b = relay.var("b", shape=shape) - c = relay.var("c", shape=shape) - d = relay.var("d", shape=shape) - - # def @main(a, b, c, d) { subtract(on_cpu(add(a, b)), add(c, d)) } + # Force some args to be on CPU, rest default to GPU. def input(): - return tvm.IRModule.from_expr( - relay.Function([a, b, c, d], relay.subtract(on_cpu(relay.add(a, b)), relay.add(c, d))) + return tvm.parser.fromtext( + """ + #[version = "0.0.5"] + def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32], + %c: Tensor[(5, 7), float32], %d: Tensor[(5, 7), float32]) { + %0 = add(%a, %b); + %1 = on_device(%0, device_type=1); + %2 = add(%c, %d); + subtract(%1, %2) + } + """ ) - # def @main(a, b, c, d, on_device={param_device_types=[1,1,2,2], result_device_type=2}) { - # subtract(cpu_to_gpu(fixed_cpu(add(a, b)), add(c, d)) def expected(): - return tvm.IRModule.from_expr( - relay.annotation.function_on_device( - relay.Function( - [a, b, c, d], - relay.subtract(cpu_to_gpu(fixed_cpu(relay.add(a, b))), relay.add(c, d)), - ), - [CPU, CPU, GPU, GPU], - GPU, - ) + return tvm.parser.fromtext( + """ + #[version = "0.0.5"] + def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32], + %c: Tensor[(5, 7), float32], %d: Tensor[(5, 7), float32], + param_device_types=[1, 1, 2, 2], result_device_type=2) { + %0 = add(%a, %b); + %1 = on_device(%0, device_type=1, is_fixed=True); + %2 = device_copy(%1, src_dev_type=1, dst_dev_type=2); + %3 = add(%c, %d); + subtract(%2, %3) + } + """ ) def ref(a, b, c, d): return np.subtract(np.add(a, b), np.add(c, d)) - exercise(input(), expected(), ref, rands(shape, 4)) + exercise(input(), expected(), ref, rands((5, 7), 4)) def test_left_add_on_cpu_via_copy(): - shape = (N, M) - a = relay.var("a", shape=shape) - b = relay.var("b", shape=shape) - c = relay.var("c", shape=shape) - d = relay.var("d", shape=shape) - - # def @main(a, b, c, d) { subtract(cpu_to_gpu(add(a, b)), add(c, d)) } + # As for test_left_add_on_cpu, but with an explicit device_copy. def input(): - return tvm.IRModule.from_expr( - relay.Function( - [a, b, c, d], relay.subtract(cpu_to_gpu(relay.add(a, b)), relay.add(c, d)) - ) + return tvm.parser.fromtext( + """ + #[version = "0.0.5"] + def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32], + %c: Tensor[(5, 7), float32], %d: Tensor[(5, 7), float32]) { + %0 = add(%a, %b); + %1 = device_copy(%0, src_dev_type=1, dst_dev_type=2); + %2 = add(%c, %d); + subtract(%1, %2) + } + """ ) - # def @main(a, b, c, d, on_device={param_device_types=[1,1,2,2], result_device_type=2}) { - # subtract(cpu_to_gpu(fixed_cpu(add(a, b)), add(c, d)) def expected(): - return tvm.IRModule.from_expr( - relay.annotation.function_on_device( - relay.Function( - [a, b, c, d], - relay.subtract(cpu_to_gpu(fixed_cpu(relay.add(a, b))), relay.add(c, d)), - ), - [CPU, CPU, GPU, GPU], - GPU, - ) + return tvm.parser.fromtext( + """ + #[version = "0.0.5"] + def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32], + %c: Tensor[(5, 7), float32], %d: Tensor[(5, 7), float32], + param_device_types=[1, 1, 2, 2], result_device_type=2) { + %0 = add(%a, %b); + %1 = on_device(%0, device_type=1, is_fixed=True); + %2 = device_copy(%1, src_dev_type=1, dst_dev_type=2); + %3 = add(%c, %d); + subtract(%2, %3) + } + """ ) def ref(a, b, c, d): return np.subtract(np.add(a, b), np.add(c, d)) - exercise(input(), expected(), ref, rands(shape, 4)) + exercise(input(), expected(), ref, rands((5, 7), 4)) def test_both_adds_on_cpu(): - shape = (N, M) - a = relay.var("a", shape=shape) - b = relay.var("b", shape=shape) - c = relay.var("c", shape=shape) - d = relay.var("d", shape=shape) - - # def @main(a, b, c, d) { subtract(on_cpu(add(a, b)), on_cpu(add(c, d))) } def input(): - return tvm.IRModule.from_expr( - relay.Function( - [a, b, c, d], relay.subtract(on_cpu(relay.add(a, b)), on_cpu(relay.add(c, d))) - ) + return tvm.parser.fromtext( + """ + #[version = "0.0.5"] + def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32], + %c: Tensor[(5, 7), float32], %d: Tensor[(5, 7), float32]) { + %0 = add(%a, %b); + %1 = add(%c, %d); + %2 = on_device(%0, device_type=1); + %3 = on_device(%1, device_type=1); + subtract(%2, %3) + } + """ ) - # def @main(a, b, c, d, on_device={param_device_types=[1,1,1,1], result_device_type=2}) { - # subtract(cpu_to_gpu(fixed_cpu(add(a, b)), cpu_to_gpu(fixed_cpu(add(c, d)))) def expected(): - return tvm.IRModule.from_expr( - relay.annotation.function_on_device( - relay.Function( - [a, b, c, d], - relay.subtract( - cpu_to_gpu(fixed_cpu(relay.add(a, b))), - cpu_to_gpu(fixed_cpu(relay.add(c, d))), - ), - ), - [CPU, CPU, CPU, CPU], - GPU, - ) + return tvm.parser.fromtext( + """ + #[version = "0.0.5"] + def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32], + %c: Tensor[(5, 7), float32], %d: Tensor[(5, 7), float32], + param_device_types=[1, 1, 1, 1], result_device_type=2) { + %0 = add(%a, %b); + %1 = on_device(%0, device_type=1, is_fixed=True); + %2 = add(%c, %d); + %3 = on_device(%2, device_type=1, is_fixed=True); + %4 = device_copy(%1, src_dev_type=1, dst_dev_type=2); + %5 = device_copy(%3, src_dev_type=1, dst_dev_type=2); + subtract(%4, %5) + } + """ ) def ref(a, b, c, d): return np.subtract(np.add(a, b), np.add(c, d)) - exercise(input(), expected(), ref, rands(shape, 4)) + exercise(input(), expected(), ref, rands((5, 7), 4)) def test_sharing(): - shape = (N, M) - a = relay.var("a", shape=shape) - b = relay.var("b", shape=shape) - - # def @main(a, b) { - # %0 = add(a, b) - # subtract(on_cpu(%0), %0) } + # The same add sub-expression is annotated twice. def input(): - add = relay.add(a, b) - return tvm.IRModule.from_expr( - relay.Function([a, b], relay.subtract(on_cpu(add), on_cpu(add))) + return tvm.parser.fromtext( + """ + #[version = "0.0.5"] + def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32]) { + %0 = add(%a, %b); + %1 = on_device(%0, device_type=1); + %2 = on_device(%0, device_type=1); + subtract(%1, %2) + } + """ ) - # def @main(a, b, on_device={param_device_types=[1,1], result_device_type=2}) { - # %0 = add(a, b) - # subtract(cpu_to_gpu(fixed_cpu(%0), cpu_to_gpu(fixed_cpu(%0))) def expected(): - add = relay.add(a, b) - return tvm.IRModule.from_expr( - relay.annotation.function_on_device( - relay.Function( - [a, b], relay.subtract(cpu_to_gpu(fixed_cpu(add)), cpu_to_gpu(fixed_cpu(add))) - ), - [CPU, CPU], - GPU, - ) + return tvm.parser.fromtext( + """ + #[version = "0.0.5"] + def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32], + param_device_types=[1, 1], result_device_type=2) { + %0 = add(%a, %b); + %1 = on_device(%0, device_type=1, is_fixed=True); + %2 = on_device(%0, device_type=1, is_fixed=True); + %3 = device_copy(%1, src_dev_type=1, dst_dev_type=2); + %4 = device_copy(%2, src_dev_type=1, dst_dev_type=2); + subtract(%3, %4) + } + """ ) def ref(a, b): x = np.add(a, b) return np.subtract(x, x) - exercise(input(), expected(), ref, rands(shape, 2)) + exercise(input(), expected(), ref, rands((5, 7), 2)) def test_let_on_cpu(): - shape = (N, M) - a = relay.var("a", shape=shape) - b = relay.var("b", shape=shape) - c = relay.var("c", shape=shape) - d = relay.var("d", shape=shape) - l = relay.Var("l") - r = relay.Var("r") - - # def @main(a, b, c, d) { - # let l = add(a, b); - # let r = add(c, d); - # subtract(on_cpu(l), r) - # } + # The device for a let-bound expression can flow from uses of the let-bound var. def input(): - return tvm.IRModule.from_expr( - relay.Function( - [a, b, c, d], - relay.Let( - l, relay.add(a, b), relay.Let(r, relay.add(c, d), relay.subtract(on_cpu(l), r)) - ), - ) + return tvm.parser.fromtext( + """ + #[version = "0.0.5"] + def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32], + %c: Tensor[(5, 7), float32], %d: Tensor[(5, 7), float32]) { + let %l = add(%a, %b); + let %r = add(%c, %d); + %0 = on_device(%l, device_type=1); + subtract(%0, %r) + } + """ ) - # def @main(a, b, c, d, on_device={param_device_types=[1,1,2,2], result_device_type=2}) { - # let l = fixed_cpu(add(a, b)); - # let r = add(c, d); - # subtract(cpu_to_gpu(l), r) - # } def expected(): - return tvm.IRModule.from_expr( - relay.annotation.function_on_device( - relay.Function( - [a, b, c, d], - relay.Let( - l, - fixed_cpu(relay.add(a, b)), - relay.Let(r, relay.add(c, d), relay.subtract(cpu_to_gpu(l), r)), - ), - ), - [CPU, CPU, GPU, GPU], - GPU, - ) + return tvm.parser.fromtext( + """ + #[version = "0.0.5"] + def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32], + %c: Tensor[(5, 7), float32], %d: Tensor[(5, 7), float32], + param_device_types=[1, 1, 2, 2], result_device_type=2) { + %0 = add(%a, %b); + let %l = on_device(%0, device_type=1, is_fixed=True); + let %r = add(%c, %d); + %1 = device_copy(%l, src_dev_type=1, dst_dev_type=2); + subtract(%1, %r) + } + """ ) def ref(a, b, c, d): return np.subtract(np.add(a, b), np.add(c, d)) - exercise(input(), expected(), ref, rands(shape, 4)) + exercise(input(), expected(), ref, rands((5, 7), 4)) def test_func_param_on_cpu(): - shape = (N, M) - a = relay.var("a", shape=shape) - b = relay.var("b", shape=shape) - c = relay.var("c", shape=shape) - d = relay.var("d", shape=shape) - f = relay.Var("f") - x = relay.Var("x") - y = relay.Var("y") - - # def @main(a, b, c, d) { - # let f = fn(x, y) { on_cpu(add(x, y)) } -- forces both body and result on CPU - # subtract(f(a, b), add(c, d)) - # } + # Devices for function parameters flow to call sites. def input(): - return tvm.IRModule.from_expr( - relay.Function( - [a, b, c, d], - relay.Let( - f, - relay.Function([x, y], on_cpu(relay.add(x, y))), - relay.subtract(relay.Call(f, [a, b]), relay.add(c, d)), - ), - ) + return tvm.parser.fromtext( + """ + #[version = "0.0.5"] + def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32], + %c: Tensor[(5, 7), float32], %d: Tensor[(5, 7), float32]) { + let %f = fn (%x, %y) { + %0 = add(%x, %y); + on_device(%0, device_type=1) + }; + %1 = %f(%a, %b); + %2 = add(%c, %d); + subtract(%1, %2) + } + """ ) - # def @main(a, b, c, d, on_device={param_device_types=[1,1,1,1], result_device_type=1}) { - # let f = fn(x, y, on_device={param_device_types[1,1], result_device_type=1}) { - # add(x, y) - # }; - # subtract(f(a, b), add(c, d)) - # } def expected(): - return tvm.IRModule.from_expr( - relay.annotation.function_on_device( - relay.Function( - [a, b, c, d], - relay.Let( - f, - relay.annotation.function_on_device( - relay.Function([x, y], relay.add(x, y)), [CPU, CPU], CPU - ), - relay.subtract(relay.Call(f, [a, b]), relay.add(c, d)), - ), - ), - [CPU, CPU, CPU, CPU], - CPU, - ) + return tvm.parser.fromtext( + """ + #[version = "0.0.5"] + def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32], + %c: Tensor[(5, 7), float32], %d: Tensor[(5, 7), float32], + param_device_types=[1, 1, 1, 1], result_device_type=1) { + let %f = fn (%x, %y, param_device_types=[1, 1], result_device_type=1) { + add(%x, %y) + }; + %0 = %f(%a, %b); + %1 = add(%c, %d); + subtract(%0, %1) + } + """ ) def ref(a, b, c, d): return np.subtract(np.add(a, b), np.add(c, d)) - exercise(input(), expected(), ref, rands(shape, 4)) + exercise(input(), expected(), ref, rands((5, 7), 4)) def test_func_result_on_cpu(): - shape = (N, M) - a = relay.var("a", shape=shape) - b = relay.var("b", shape=shape) - c = relay.var("c", shape=shape) - d = relay.var("d", shape=shape) - f = relay.Var("f") - x = relay.Var("x") - y = relay.Var("y") - - # def @main(a, b, c, d) { - # let f = fn(x, y) { add(x, y) } - # subtract(on_cpu(f(a, b)), add(c, d)) - # } + # Devices for call sites flow to function results. def input(): - return tvm.IRModule.from_expr( - relay.Function( - [a, b, c, d], - relay.Let( - f, - relay.Function([x, y], relay.add(x, y)), - relay.subtract(on_cpu(relay.Call(f, [a, b])), relay.add(c, d)), - ), - ) + return tvm.parser.fromtext( + """ + #[version = "0.0.5"] + def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32], + %c: Tensor[(5, 7), float32], %d: Tensor[(5, 7), float32]) { + let %f = fn (%x, %y) { + add(%x, %y) + }; + %0 = %f(%a, %b); + %1 = on_device(%0, device_type=1); + %2 = add(%c, %d); + subtract(%1, %2) + } + """ ) - # def @main(a, b, c, d, on_device={param_device_types=[1,1,2,2], result_device_type=2}) { - # let f = fixed_cpu(fn(x, y, on_device={param_device_types=[1,1], result_device_type=1}) { - # add(x, y) - # }); - # subtract(cpu_to_gpu(fixed_cpu(f(a, b))), add(c, d)) - # } def expected(): - return tvm.IRModule.from_expr( - relay.annotation.function_on_device( - relay.Function( - [a, b, c, d], - relay.Let( - f, - fixed_cpu( - relay.annotation.function_on_device( - relay.Function([x, y], relay.add(x, y)), [CPU, CPU], CPU - ) - ), - relay.subtract( - cpu_to_gpu(fixed_cpu(relay.Call(f, [a, b]))), relay.add(c, d) - ), - ), - ), - [CPU, CPU, GPU, GPU], - GPU, - ) + return tvm.parser.fromtext( + """ + #[version = "0.0.5"] + def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32], + %c: Tensor[(5, 7), float32], %d: Tensor[(5, 7), float32], + param_device_types=[1, 1, 2, 2], result_device_type=2) { + %0 = fn (%x, %y, param_device_types=[1, 1], result_device_type=1) { + add(%x, %y) + }; + let %f = on_device(%0, device_type=1, is_fixed=True); + %1 = %f(%a, %b); + %2 = on_device(%1, device_type=1, is_fixed=True); + %3 = device_copy(%2, src_dev_type=1, dst_dev_type=2); + %4 = add(%c, %d); + subtract(%3, %4) + } + """ ) def ref(a, b, c, d): return np.subtract(np.add(a, b), np.add(c, d)) - exercise(input(), expected(), ref, rands(shape, 4)) + exercise(input(), expected(), ref, rands((5, 7), 4)) def test_higher_order(): - shape = (N, M) - x = relay.var("x", shape=shape) - y = relay.var("y", shape=shape) - f = relay.Var("f") - g = relay.Var("g") - a = relay.Var("a") - h = relay.Var("h") - b = relay.Var("b") - - # The constraint on a flows back to y via f and h - # def @main(x, y) { - # let f = fn(g) { fn(a) { add(g(on_cpu(a)), x) } } - # let h = fn(b) { relu(b) } - # subtract(x, f(h)(y)) - # } + # The constraint on %a flows back to %y via %f and %h def input(): - return tvm.IRModule.from_expr( - relay.Function( - [x, y], - relay.Let( - f, - relay.Function( - [g], relay.Function([a], relay.add(relay.Call(g, [on_cpu(a)]), x)) - ), - relay.Let( - h, - relay.Function([b], relay.negative(b)), - relay.subtract(x, relay.Call(relay.Call(f, [h]), [y])), - ), - ), - ) + return tvm.parser.fromtext( + """ + #[version = "0.0.5"] + def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32]) { + let %f = fn (%g) { + fn (%a) { + %0 = on_device(%a, device_type=1); + %1 = %g(%0); + add(%1, %x) + } + }; + let %h = fn (%b) { + negative(%b) + }; + %2 = %f(%h); + %3 = %2(%y); + subtract(%x, %3) + } + """ ) - # def @main(x, y, on_device={param_device_types=[GPU, CPU], result_device_type=GPU}) { - # let f = fn(g, on_device={param_device_types=[GPU], result_device_type=GPU}) { - # fn(a, on_device={param_device_types=[CPU], result_device_type=GPU}) { - # add(g(cpu_to_gpu(a)), x) - # } - # } - # let h = fn(b, on_device={param_device_types=[GPU], result_device_type=GPU}) { negative(b) } - # subtract(x, f(h)(y)) - # } def expected(): - # Yeah, this is illegible. - return tvm.IRModule.from_expr( - relay.annotation.function_on_device( - relay.Function( - [x, y], - relay.Let( - f, - relay.annotation.function_on_device( - relay.Function( - [g], - relay.annotation.function_on_device( - relay.Function( - [a], relay.add(relay.Call(g, [cpu_to_gpu(a)]), x) - ), - [CPU], - GPU, - ), - ), - [GPU], - GPU, - ), - relay.Let( - h, - relay.annotation.function_on_device( - relay.Function([b], relay.negative(b)), [GPU], GPU - ), - relay.subtract(x, relay.Call(relay.Call(f, [h]), [y])), - ), - ), - ), - [GPU, CPU], - GPU, - ) + return tvm.parser.fromtext( + """ + #[version = "0.0.5"] + def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32], + param_device_types=[2, 1], result_device_type=2) { + let %f = fn (%g, param_device_types=[2], result_device_type=2) { + fn (%a, param_device_types=[1], result_device_type=2) { + %0 = device_copy(%a, src_dev_type=1, dst_dev_type=2); + %1 = %g(%0); + add(%1, %x) + } + }; + let %h = fn (%b, param_device_types=[2], result_device_type=2) { + negative(%b) + }; + %2 = %f(%h); + %3 = %2(%y); + subtract(%x, %3) + } + """ ) def ref(x, y): @@ -528,157 +455,125 @@ def h(b): return np.subtract(x, f(h)(y)) - exercise(input(), expected(), ref, rands(shape, 2)) + exercise(input(), expected(), ref, rands((5, 7), 2)) def test_function_in_tuple(): - shape = (N, M) - x = relay.var("x", shape=shape) - y = relay.var("y", shape=shape) - a = relay.var("a", shape=shape) - b = relay.var("b", shape=shape) - y = relay.var("y", shape=shape) - f = relay.Var("f") - t = relay.Var("t") - - # Since f end up in a tuple its argument and result is forced to be on the CPU - # def @main(x, y) { - # let f = fn(a, b) { add(a, on_cpu(b)) } - # let t = (f, x) - # t.0(t.1, y) - # } + # Since %f ends up in a tuple its argument and result is forced to be on the CPU def input(): - return tvm.IRModule.from_expr( - relay.Function( - [x, y], - relay.Let( - f, - relay.Function([a, b], relay.add(a, on_cpu(b))), - relay.Let( - t, - relay.Tuple([f, x]), - relay.Call(relay.TupleGetItem(t, 0), [relay.TupleGetItem(t, 1), y]), - ), - ), - ) + return tvm.parser.fromtext( + """ + #[version = "0.0.5"] + def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32]) { + let %f = fn (%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32]) { + %0 = on_device(%b, device_type=1); + add(%a, %0) + }; + let %t = (%f, %x); + %1 = %t.1; + %2 = %t.0; + %2(%1, %y) + } + """ ) - # def @main(x, y, on_device={param_device_types=[1,1], result_device_type=1}) { - # let f = fn(a, b, on_device={param_device_types=[1,1], result_device_type=1}) { add(a, b) } - # let t = (f, x) - # t.0(t.1, y) - # } def expected(): - return tvm.IRModule.from_expr( - relay.annotation.function_on_device( - relay.Function( - [x, y], - relay.Let( - f, - relay.annotation.function_on_device( - relay.Function([a, b], relay.add(a, b)), [CPU, CPU], CPU - ), - relay.Let( - t, - relay.Tuple([f, x]), - relay.Call(relay.TupleGetItem(t, 0), [relay.TupleGetItem(t, 1), y]), - ), - ), - ), - [CPU, CPU], - CPU, - ) + return tvm.parser.fromtext( + """ + #[version = "0.0.5"] + def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32], + param_device_types=[1, 1], result_device_type=1) { + let %f = fn (%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32], + param_device_types=[1, 1], result_device_type=1) { + add(%a, %b) + }; + let %t = (%f, %x); + %0 = %t.1; + %1 = %t.0; + %1(%0, %y) + } + """ ) def ref(x, y): return np.add(x, y) - exercise(input(), expected(), ref, rands(shape, 2)) + exercise(input(), expected(), ref, rands((5, 7), 2)) def test_device_copy(): - shape = (N, M) - x = relay.var("x", shape=shape) - const = relay.const(rand(shape)) + const = rand((5, 7)) + metatable = {"relay.Constant": [relay.const(const)]} - # def @main(x) { add(cpu_to_gpu(x), const) } def input(): - return tvm.IRModule.from_expr(relay.Function([x], relay.add(cpu_to_gpu(x), const))) + return tvm.parser.parse( + """ + #[version = "0.0.5"] + def @main(%x: Tensor[(5, 7), float32]) { + %0 = device_copy(%x, src_dev_type=1, dst_dev_type=2); + add(%0, meta[relay.Constant][0]) + } + """, + "from_string", + None, + metatable, + ) - # def @main(x, on_device={param_device_types=[1], result_device_type=2}) { - # add(cpu_to_gpu(x), constant) - # } def expected(): - return tvm.IRModule.from_expr( - relay.annotation.function_on_device( - relay.Function([x], relay.add(cpu_to_gpu(x), const)), [CPU], GPU - ) + return tvm.parser.parse( + """ + #[version = "0.0.5"] + def @main(%x: Tensor[(5, 7), float32], param_device_types=[1], result_device_type=2) { + %0 = device_copy(%x, src_dev_type=1, dst_dev_type=2); + add(%0, meta[relay.Constant][0]) + } + """, + "from_string", + None, + metatable, ) def ref(x): - return np.add(x, const.data.numpy()) + return np.add(x, const) - exercise(input(), expected(), ref, rands(shape, 1)) + exercise(input(), expected(), ref, rands((5, 7), 1)) def test_shape_func(): - p = relay.var("p") - data_shape = (relay.Any(),) - x = relay.var("x", shape=data_shape) - y = relay.var("y", shape=data_shape) - s = relay.var("s", shape=(1,), dtype="int64") - - # def @main(x, s) { - # let p = fixed_gpu(fn(y) { relu(y) }) -- simulates a primitive post FuseOps - # shape_func(p, - # (shape_of(fixed_gpu(x)),), -- shape of primitive input tensor - # (s,), -- space for output shape - # [False]) -- calling with input shapes not tensors - # } def input(): - return tvm.IRModule.from_expr( - relay.Function( - [x, s], - relay.Let( - p, - fixed_gpu(relay.Function([y], relay.nn.relu(y))), - relay.op.vm.shape_func( - p, - relay.Tuple([relay.op.vm.shape_of(fixed_gpu(x))]), - relay.Tuple([s]), - [False], - ), - ), - ) + return tvm.parser.fromtext( + """ + #[version = "0.0.5"] + def @main(%x: Tensor[(?), float32], %s: Tensor[(1), int64]) { + %0 = fn (%y: Tensor[(?), float32]) { + nn.relu(%y) + }; + let %p = on_device(%0, device_type=2, is_fixed=True); + %1 = on_device(%x, device_type=2, is_fixed=True); + %2 = vm.shape_of(%1, dtype="int64"); + %3 = (%2,); + %4 = (%s,); + vm.shape_func(%p, %3, %4, is_input=[False]) + } + """ ) - # def @main(x, s, on_device={param_device_types=[2,1], result_device_type=1}) { - # let p = fixed_gpu(fn(y, param_device_types=[2], result_device_type=2) { relu(y) }) - # shape_func(p, - # (shape_of(x),), - # (s,), - # [False]) - # } def expected(): - return tvm.IRModule.from_expr( - relay.annotation.function_on_device( - relay.Function( - [x, s], - relay.Let( - p, - fixed_gpu( - relay.annotation.function_on_device( - relay.Function([y], relay.nn.relu(y)), [GPU], GPU - ) - ), - relay.op.vm.shape_func( - p, relay.Tuple([relay.op.vm.shape_of(x)]), relay.Tuple([s]), [False] - ), - ), - ), - [GPU, CPU], - CPU, - ) + return tvm.parser.fromtext( + """ + #[version = "0.0.5"] + def @main(%x: Tensor[(?), float32], %s: Tensor[(1), int64], + param_device_types=[2, 1], result_device_type=1) { + %0 = fn (%y: Tensor[(?), float32], param_device_types=[2], result_device_type=2) { + nn.relu(%y) + }; + let %p = on_device(%0, device_type=2, is_fixed=True); + %1 = vm.shape_of(%x, dtype="int64"); + %2 = (%1,); + %3 = (%s,); + vm.shape_func(%p, %2, %3, is_input=[False]) + } + """ ) # Don't try to execute, too fiddly to setup. @@ -686,264 +581,255 @@ def expected(): def test_shape_of(): - compiletime_shape = (relay.Any(), relay.Any()) - runtime_shape = (N, M) - x = relay.var("x", shape=compiletime_shape) - - # We need to use fixed_gpu since the result of on_gpu will default to the result device of @main which is cpu, - # which then forces a copy. + # We need to use is_fixed=True in the on_device call so that the tensor will be on the GPU. Otherwise the + # result defaults to the result device for @main which is the CPU, thus forcing a copy. # TODO(mbs): Perhaps the defaulting heuristics are being too clever? - # def @main(x) { shape_of(fixed_gpu(x)) } def input(): - return tvm.IRModule.from_expr(relay.Function([x], relay.op.vm.shape_of(fixed_gpu(x)))) + return tvm.parser.fromtext( + """ + #[version = "0.0.5"] + def @main(%x: Tensor[(?, ?), float32]) { + %0 = on_device(%x, device_type=2, is_fixed=True); + vm.shape_of(%0, dtype="int64") + } + """ + ) - # def @main(x, on_device={param_device_types=[2], result_dev_type=1}) { - # shape_of(x) - # } def expected(): - return tvm.IRModule.from_expr( - relay.annotation.function_on_device( - relay.Function([x], relay.op.vm.shape_of(x)), [GPU], CPU - ) + return tvm.parser.fromtext( + """ + #[version = "0.0.5"] + def @main(%x: Tensor[(?, ?), float32], param_device_types=[2], result_device_type=1) { + vm.shape_of(%x, dtype="int64") + } + """ ) def ref(x): return x.shape - exercise(input(), expected(), ref, rands(runtime_shape, 1)) + exercise(input(), expected(), ref, rands((5, 7), 1)) def test_alloc_storage(): - size = relay.Var("size", relay.scalar_type("int64")) - alignment = relay.Var("alignment", relay.scalar_type("int64")) - main = relay.GlobalVar("main") - stdlib = tvm.IRModule() - stdlib.import_from_std("core.rly") - - # def @main(size, alignment) { alloc_storage(size, alignment, GPU) } def input(): - mod = tvm.IRModule() - mod.update(stdlib) - mod[main] = relay.Function( - [size, alignment], relay.op.memory.alloc_storage(size, alignment, GPU) + return tvm.parser.parse( + """ + #[version = "0.0.5"] + def @main(%size: int64, %alignment: int64) { + memory.alloc_storage(%size, %alignment, device_id=0, device_type=2) + } + """, + "from_string", + core, ) - return mod - # def @main(size, alignment, on_device={param_device_types=[1,1], result_device_type=2}) { - # alloc_storage(size, alignment, GPU) - # } def expected(): - mod = tvm.IRModule() - mod.update(stdlib) - mod[main] = relay.annotation.function_on_device( - relay.Function([size, alignment], relay.op.memory.alloc_storage(size, alignment, GPU)), - [CPU, CPU], - GPU, + return tvm.parser.parse( + """ + #[version = "0.0.5"] + def @main(%size: int64, %alignment: int64, param_device_types=[1, 1], result_device_type=2) { + memory.alloc_storage(%size, %alignment, device_id=0, device_type=2) + } + """, + "from_string", + core, ) - return mod # Don't try to execute, too fiddly to setup. exercise(input(), expected(), None, None) def test_alloc_tensor(): - stdlib = tvm.IRModule() - stdlib.import_from_std("core.rly") - sto_type = relay.TypeCall(stdlib.get_global_type_var("Storage"), []) - sto = relay.Var("sto", sto_type) - main = relay.GlobalVar("main") - shape = relay.const(np.array([3, 2]), dtype="int64") - - # def @main(sto) { alloc_tensor(sto, 0, [3, 2]) } + shape = np.array([3, 2]) + metatable = {"relay.Constant": [relay.const(shape, dtype="int64")]} + def input(): - mod = tvm.IRModule() - mod.update(stdlib) - mod[main] = relay.Function( - [sto], relay.op.memory.alloc_tensor(sto, relay.const(0, dtype="int64"), shape) + return tvm.parser.parse( + """ + #[version = "0.0.5"] + def @main(%sto: Storage[]) { + memory.alloc_tensor(%sto, 0, meta[relay.Constant][0], + const_shape=meta[relay.Constant][0], assert_shape=[]) + } + """, + "from_string", + core, + metatable, ) - return mod - # def @main(sto, on_device={param_device_types=[2], result_device_type=2}) { - # alloc_tensor(sto, fixed_cpu(0), fixed_cpu([3, 2])) - # } def expected(): - mod = tvm.IRModule() - mod.update(stdlib) - mod[main] = relay.annotation.function_on_device( - relay.Function( - [sto], - relay.op.memory.alloc_tensor( - sto, fixed_cpu(relay.const(0, dtype="int64")), fixed_cpu(shape) - ), - ), - [GPU], - GPU, + return tvm.parser.parse( + """ + #[version = "0.0.5"] + def @main(%sto: Storage[], param_device_types=[2], result_device_type=2) { + %0 = on_device(0, device_type=1, is_fixed=True); + %1 = on_device(meta[relay.Constant][0], device_type=1, is_fixed=True); + memory.alloc_tensor(%sto, %0, %1, const_shape=meta[relay.Constant][0], assert_shape=[]) + } + """, + "from_string", + core, + metatable, ) - return mod # Don't try to execute, too fiddly to setup. exercise(input(), expected(), None, None) def test_reshape_tensor(): - shape = (2, 8) - x = relay.var("x", shape=shape, dtype="float32") - newshape_expr = relay.const([2, 4, 2], dtype="int64") - newshape_prim = [2, 4, 2] + newshape = [2, 4, 2] + metatable = {"relay.Constant": [relay.const(newshape)]} - # def @main(x) { reshape_tensor(x, shape, newshape=[2,4,2]) } def input(): - return tvm.IRModule.from_expr( - relay.Function([x], relay.op.vm.reshape_tensor(x, newshape_expr, newshape_prim)) + return tvm.parser.parse( + """ + #[version = "0.0.5"] + def @main(%x: Tensor[(2, 8), float32]) { + vm.reshape_tensor(%x, meta[relay.Constant][0], newshape=[2, 4, 2]) + } + """, + "from_string", + None, + metatable, ) - # def @main(x, on_device={param_device_types=[2], result_device_type=2}) { - # reshape_tensor(x, fixed_cpu(shape), newshape=[2,4,2]) - # } def expected(): - return tvm.IRModule.from_expr( - relay.annotation.function_on_device( - relay.Function( - [x], relay.op.vm.reshape_tensor(x, fixed_cpu(newshape_expr), newshape_prim) - ), - [GPU], - GPU, - ) + return tvm.parser.parse( + """ + #[version = "0.0.5"] + def @main(%x: Tensor[(2, 8), float32], param_device_types=[2], result_device_type=2) { + %0 = on_device(meta[relay.Constant][0], device_type=1, is_fixed=True); + vm.reshape_tensor(%x, %0, newshape=[2, 4, 2]) + } + """, + "from_string", + None, + metatable, ) def ref(x): - return np.reshape(x, newshape_prim) + return np.reshape(x, newshape) - exercise(input(), expected(), ref, rands(shape, 1)) + exercise(input(), expected(), ref, rands((2, 8), 1)) def test_dynamic_input(): - compiletime_shape = (relay.Any(), relay.Any()) - runtime_shape = (N, M) - x0 = relay.var("x0", shape=compiletime_shape) - x1 = relay.var("x1", shape=compiletime_shape) - - # def @main(x0, x1) { add(x0, x1) } + # There's nothing special about inferring devices for partially unknown types. def input(): - return tvm.IRModule.from_expr(relay.Function([x0, x1], relay.add(x0, x1))) + return tvm.parser.fromtext(""" + #[version = "0.0.5"] + def @main(%x0: Tensor[(?, ?), float32], %x1: Tensor[(?, ?), float32]) { + add(%x0, %x1) + } + """) - # def @main(x0, x1), on_device={param_device_types=[2,2], result_device_type=2}) { - # add(x0, x1) - # } def expected(): - return tvm.IRModule.from_expr( - relay.annotation.function_on_device( - relay.Function([x0, x1], relay.add(x0, x1)), [GPU, GPU], GPU - ) - ) + return tvm.parser.fromtext(""" + #[version = "0.0.5"] + def @main(%x0: Tensor[(?, ?), float32], %x1: Tensor[(?, ?), float32], + param_device_types=[2, 2], result_device_type=2) { + add(%x0, %x1) + } + """) def ref(x0, x1): return np.add(x0, x1) - exercise(input(), expected(), ref, rands(runtime_shape, 2)) + exercise(input(), expected(), ref, rands((5, 7), 2)) def test_redundant_annotation(): - shape = (N, M) - x = relay.var("x", shape=shape) - y = relay.var("y", shape=shape) - z = relay.var("z", shape=shape) - - # def @main(x, y, z) { - # %0 = add(x, y) - # add(subtract(on_cpu(%0), z), on_cpu(%0)) - # } def input(): - a = relay.add(x, y) - return tvm.IRModule.from_expr( - relay.Function([x, y, z], relay.add(relay.subtract(on_cpu(a), z), on_cpu(a))) - ) + return tvm.parser.fromtext(""" + #[version = "0.0.5"] + def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32], %z: Tensor[(5, 7), float32]) { + %0 = add(%x, %y); + %1 = on_device(%0, device_type=1); + %2 = subtract(%1, %z); + %3 = on_device(%0, device_type=1); + add(%2, %3) + } + """) - # def @main(x, y, z, on_device={param_device_types=[1,1,2], result_device_type=2}) { - # %0 = add(x, y) - # add(subtract(cpu_to_gpu(fixed_cpu(%0)), z), cpu_to_gpu(fixed_cpu(%0))) - # } def expected(): - a = relay.add(x, y) - return tvm.IRModule.from_expr( - relay.annotation.function_on_device( - relay.Function( - [x, y, z], - relay.add( - relay.subtract(cpu_to_gpu(fixed_cpu(a)), z), cpu_to_gpu(fixed_cpu(a)) - ), - ), - [CPU, CPU, GPU], - GPU, - ) - ) + return tvm.parser.fromtext(""" + #[version = "0.0.5"] + def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32], %z: Tensor[(5, 7), float32], + param_device_types=[1, 1, 2], result_device_type=2) { + %0 = add(%x, %y); + %1 = on_device(%0, device_type=1, is_fixed=True); + %2 = device_copy(%1, src_dev_type=1, dst_dev_type=2); + %3 = on_device(%0, device_type=1, is_fixed=True); + %4 = subtract(%2, %z); + %5 = device_copy(%3, src_dev_type=1, dst_dev_type=2); + add(%4, %5) + } + """) def ref(x, y, z): a = np.add(x, y) return np.add(np.subtract(a, z), a) - exercise(input(), expected(), ref, rands(shape, 3)) + exercise(input(), expected(), ref, rands((5, 7), 3)) def test_annotate_expr(): - shape = (N, M) - x = relay.var("x", shape=shape) - y = relay.var("y", shape=shape) - z = relay.var("z", shape=shape) - - # def @main(x, y, z) { on_cpu(subtract(on_gpu(add(x, y)), z)) } -- forces function result also on cpu def input(): - return tvm.IRModule.from_expr( - relay.Function([x, y, z], on_cpu(relay.subtract(on_gpu(relay.add(x, y)), z))) - ) + return tvm.parser.fromtext(""" + #[version = "0.0.5"] + def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32], %z: Tensor[(5, 7), float32]) { + %0 = add(%x, %y); + %1 = on_device(%0, device_type=2); + %2 = subtract(%1, %z); + on_device(%2, device_type=1) + } + """) - # def @main(x, y, z, on_device={param_device_types=[2,2,1], result_device_type=1}) { - # subtract(gpu_to_cpu(fixed_gpu(add(x, y))), z) - # } def expected(): - add = relay.add(x, y) - return tvm.IRModule.from_expr( - relay.annotation.function_on_device( - relay.Function( - [x, y, z], relay.subtract(gpu_to_cpu(fixed_gpu(relay.add(x, y))), z) - ), - [GPU, GPU, CPU], - CPU, - ) - ) + return tvm.parser.fromtext(""" + #[version = "0.0.5"] + def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32], %z: Tensor[(5, 7), float32], + param_device_types=[2, 2, 1], result_device_type=1) { + %0 = add(%x, %y); + %1 = on_device(%0, device_type=2, is_fixed=True); + %2 = device_copy(%1, src_dev_type=2, dst_dev_type=1); + subtract(%2, %z) + } + """) def ref(x, y, z): return np.subtract(np.add(x, y), z) - exercise(input(), expected(), ref, rands(shape, 3)) + exercise(input(), expected(), ref, rands((5, 7), 3)) def test_annotate_all(): - shape = (N, M) - x = relay.var("x", shape=shape) - y = relay.var("y", shape=shape) - z = relay.var("z", shape=shape) - - # def @main(x, y, z) { on_cpu(subtract(on_cpu(add(x, y)), z) } -- top-level also forces result to be CPU def input(): - return tvm.IRModule.from_expr( - relay.Function([x, y, z], on_cpu(relay.subtract(on_cpu(relay.add(x, y)), z))) - ) + return tvm.parser.parse(""" + #[version = "0.0.5"] + def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32], %z: Tensor[(5, 7), float32]) { + %0 = add(%x, %y); + %1 = on_device(%0, device_type=1); + %2 = subtract(%1, %z); + on_device(%2, device_type=1) + } + """) - # def @main(x, y, z, on_device={param_device_types=[CPU, CPU, CPU], result_device_type=CPU}) { - # subtract(add(x, y), z) - # } def expected(): - return tvm.IRModule.from_expr( - relay.annotation.function_on_device( - relay.Function([x, y, z], relay.subtract(relay.add(x, y), z)), [CPU, CPU, CPU], CPU - ) - ) + return tvm.parser.parse(""" + #[version = "0.0.5"] + def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32], %z: Tensor[(5, 7), float32], + param_device_types=[1, 1, 1], result_device_type=1) { + %0 = add(%x, %y); + subtract(%0, %z) + } + """) def ref(x, y, z): return np.subtract(np.add(x, y), z) - exercise(input(), expected(), ref, rands(shape, 3)) + exercise(input(), expected(), ref, rands((5, 7), 3)) def test_conv_network(): @@ -959,97 +845,87 @@ def test_conv_network(): | <--- CPU """ - batch_size = 1 - dshape = (batch_size, 64, 56, 56) - wshape = (64, 64, 3, 3) - weight = relay.var("weight", shape=wshape) - data1 = relay.var("data1", shape=dshape) - data2 = relay.var("data2", shape=dshape) def input(): - conv2d_1 = relay.nn.conv2d(data1, weight, channels=64, kernel_size=(3, 3), padding=(1, 1)) - conv2d_2 = relay.nn.conv2d(data2, weight, channels=64, kernel_size=(3, 3), padding=(1, 1)) - add = relay.add(on_cpu(conv2d_1), on_cpu(conv2d_2)) - conv2d_3 = relay.nn.conv2d( - on_gpu(add), weight, channels=64, kernel_size=(3, 3), padding=(1, 1) - ) - return tvm.IRModule.from_expr(relay.Function([data1, data2, weight], on_cpu(conv2d_3))) + return tvm.parser.fromtext(""" + #[version = "0.0.5"] + def @main(%data1: Tensor[(1, 64, 56, 56), float32], %data2: Tensor[(1, 64, 56, 56), float32], + %weight: Tensor[(64, 64, 3, 3), float32]) { + %0 = nn.conv2d(%data1, %weight, padding=[1, 1, 1, 1], channels=64, kernel_size=[3, 3]); + %1 = nn.conv2d(%data2, %weight, padding=[1, 1, 1, 1], channels=64, kernel_size=[3, 3]); + %2 = on_device(%0, device_type=1); + %3 = on_device(%1, device_type=1); + %4 = add(%2, %3); + %5 = on_device(%4, device_type=2); + %6 = nn.conv2d(%5, %weight, padding=[1, 1, 1, 1], channels=64, kernel_size=[3, 3]); + on_device(%6, device_type=1) + } + """) def expected(): - conv2d_1 = relay.nn.conv2d(data1, weight, channels=64, kernel_size=(3, 3), padding=(1, 1)) - conv2d_2 = relay.nn.conv2d(data2, weight, channels=64, kernel_size=(3, 3), padding=(1, 1)) - add = relay.add(cpu_to_gpu(fixed_cpu(conv2d_1)), cpu_to_gpu(fixed_cpu(conv2d_2))) - conv2d_3 = relay.nn.conv2d( - gpu_to_cpu(fixed_gpu(add)), weight, channels=64, kernel_size=(3, 3), padding=(1, 1) - ) - return tvm.IRModule.from_expr( - relay.annotation.function_on_device( - relay.Function([data1, data2, weight], conv2d_3), [CPU, CPU, CPU], CPU - ) - ) + return tvm.parser.fromtext(""" + #[version = "0.0.5"] + def @main(%data1: Tensor[(1, 64, 56, 56), float32], %data2: Tensor[(1, 64, 56, 56), float32], + %weight: Tensor[(64, 64, 3, 3), float32], param_device_types=[1, 1, 1], result_device_type=1) { + %0 = nn.conv2d(%data1, %weight, padding=[1, 1, 1, 1], channels=64, kernel_size=[3, 3]); + %1 = on_device(%0, device_type=1, is_fixed=True); + %2 = nn.conv2d(%data2, %weight, padding=[1, 1, 1, 1], channels=64, kernel_size=[3, 3]); + %3 = on_device(%2, device_type=1, is_fixed=True); + %4 = device_copy(%1, src_dev_type=1, dst_dev_type=2); + %5 = device_copy(%3, src_dev_type=1, dst_dev_type=2); + %6 = add(%4, %5); + %7 = on_device(%6, device_type=2, is_fixed=True); + %8 = device_copy(%7, src_dev_type=2, dst_dev_type=1); + nn.conv2d(%8, %weight, padding=[1, 1, 1, 1], channels=64, kernel_size=[3, 3]) + } + """) # Don't try to execute, we don't have a reference conv2d exercise(input(), expected(), None, None) def test_tuple_get_item(): - shape = (3, 3, 4) - x = relay.Var("x", relay.ty.TensorType(shape, "float32")) - t = relay.Var("t") - - # We'll device copy after projection, not before. - # def @main(x) { - # let t = split(x, 3); - # subtract(on_cpu(t).0, on_cpu(t).1) - # } + # Note that the device copy should be placed after projection rather than before. This is handled by + # a heuristic in the pass. def input(): - return tvm.IRModule.from_expr( - relay.Function( - [x], - relay.Let( - t, - relay.op.split(x, 3).astuple(), - on_gpu( - relay.subtract( - relay.TupleGetItem(on_cpu(t), 0), relay.TupleGetItem(on_cpu(t), 1) - ) - ), - ), - ) - ) + return tvm.parser.fromtext(""" + #[version = "0.0.5"] + def @main(%x: Tensor[(3, 3, 4), float32]) { + let %t = split(%x, indices_or_sections=3); + %0 = on_device(%t, device_type=1); + %1 = on_device(%t, device_type=1); + %2 = %0.0; + %3 = %1.1; + %4 = subtract(%2, %3); + on_device(%4, device_type=2) + } + """) - # def @main(x, on_device={param_device_type=[1], result_device_type=2}) { - # let t = fixed_cpu(split(x, 3)) - # subtract(cpu_to_gpu(fixed_cpu(t.0)), cpu_to_gpu(fixed_cpu(t.1))) - # } def expected(): - return tvm.IRModule.from_expr( - relay.annotation.function_on_device( - relay.Function( - [x], - relay.Let( - t, - fixed_cpu(relay.op.split(x, 3).astuple()), - relay.subtract( - cpu_to_gpu(fixed_cpu(relay.TupleGetItem(t, 0))), - cpu_to_gpu(fixed_cpu(relay.TupleGetItem(t, 1))), - ), - ), - ), - [CPU], - GPU, - ) - ) + return tvm.parser.fromtext(""" + #[version = "0.0.5"] + def @main(%x: Tensor[(3, 3, 4), float32], param_device_types=[1], result_device_type=2) { + %0 = split(%x, indices_or_sections=3); + let %t = on_device(%0, device_type=1, is_fixed=True); + %1 = %t.0; + %2 = on_device(%1, device_type=1, is_fixed=True); + %3 = %t.1; + %4 = on_device(%3, device_type=1, is_fixed=True); + %5 = device_copy(%2, src_dev_type=1, dst_dev_type=2); + %6 = device_copy(%4, src_dev_type=1, dst_dev_type=2); + subtract(%5, %6) + } + """) def ref(x): t = np.split(x, 3) return np.subtract(t[0], t[1]) - exercise(input(), expected(), ref, rands(shape, 1)) + exercise(input(), expected(), ref, rands((3, 3, 4), 1)) def test_propogation(): - R""" The network and devices are as follows: + r""" The network and devices are as follows: x <--- CPU | log <--- CPU @@ -1062,36 +938,52 @@ def test_propogation(): | <--- CPU """ - shape = (N, M) - x = relay.var("x", shape=shape) def input(): - log = relay.log(x) - log2 = relay.log2(on_cpu(log)) - log10 = relay.log10(on_cpu(log)) - add = relay.add(on_gpu(log2), on_gpu(log10)) - tan = relay.tan(on_gpu(add)) - return tvm.IRModule.from_expr(relay.Function([x], on_cpu(tan))) + return tvm.parser.fromtext(""" + #[version = "0.0.5"] + def @main(%x: Tensor[(5, 7), float32]) { + %0 = log(%x); + %1 = on_device(%0, device_type=1); + %2 = log2(%1); + %3 = on_device(%0, device_type=1); + %4 = log10(%3); + %5 = on_device(%2, device_type=2); + %6 = on_device(%4, device_type=2); + %7 = add(%5, %6); + %8 = on_device(%7, device_type=2); + %9 = tan(%8); + on_device(%9, device_type=1) + } + """) def expected(): - log = relay.log(x) - log2 = relay.log2(cpu_to_gpu(fixed_cpu(log))) - log10 = relay.log10(cpu_to_gpu(fixed_cpu(log))) - add = relay.add(log2, log10) - tan = relay.tan(gpu_to_cpu(fixed_gpu(add))) - return tvm.IRModule.from_expr( - relay.annotation.function_on_device(relay.Function([x], tan), [CPU], CPU) - ) + return tvm.parser.fromtext(""" + #[version = "0.0.5"] + def @main(%x: Tensor[(5, 7), float32], param_device_types=[1], result_device_type=1) { + %0 = log(%x); + %1 = on_device(%0, device_type=1, is_fixed=True); + %2 = device_copy(%1, src_dev_type=1, dst_dev_type=2); + %3 = on_device(%0, device_type=1, is_fixed=True); + %4 = device_copy(%3, src_dev_type=1, dst_dev_type=2); + %5 = log2(%2); + %6 = log10(%4); + %7 = add(%5, %6); + %8 = on_device(%7, device_type=2, is_fixed=True); + %9 = device_copy(%8, src_dev_type=2, dst_dev_type=1); + tan(%9) + } + """) def ref(x): y = np.log(x) return np.tan(np.add(np.log2(y), np.log10(y))) - exercise(input(), expected(), ref, rands(shape, 1)) + exercise(input(), expected(), ref, rands((5, 7), 1)) def test_fusible_network(): - R""" The network is as follows: + r""" The network is as follows: x y <--- GPU \ / add <--- GPU @@ -1106,33 +998,46 @@ def test_fusible_network(): | <--- CPU """ - shape = (N, M) - x = relay.var("x", shape=shape) - y = relay.var("y", shape=shape) def input(): - add = relay.add(x, y) - sqrt = relay.negative(on_gpu(add)) - log = relay.negative(add) - subtract = relay.add(on_cpu(sqrt), log) - exp = relay.negative(on_gpu(subtract)) - return tvm.IRModule.from_expr(relay.Function([x, y], on_cpu(exp))) + return tvm.parser.fromtext(""" + #[version = "0.0.5"] + def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32]) { + %0 = add(%x, %y); + %1 = on_device(%0, device_type=2); + %2 = negative(%1); + %3 = on_device(%2, device_type=1); + %4 = negative(%0); + %5 = add(%3, %4); + %6 = on_device(%5, device_type=2); + %7 = negative(%6); + on_device(%7, device_type=1) + } + """) def expected(): - add = relay.add(x, y) - sqrt = relay.negative(gpu_to_cpu(fixed_gpu(add))) - log = relay.negative(add) - subtract = relay.add(cpu_to_gpu(fixed_cpu(sqrt)), log) - exp = relay.negative(gpu_to_cpu(fixed_gpu(subtract))) - return tvm.IRModule.from_expr( - relay.annotation.function_on_device(relay.Function([x, y], exp), [GPU, GPU], CPU) - ) + return tvm.parser.fromtext(""" + #[version = "0.0.5"] + def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32], param_device_types=[2, 2], result_device_type=1) { + %0 = add(%x, %y); + %1 = on_device(%0, device_type=2, is_fixed=True); + %2 = device_copy(%1, src_dev_type=2, dst_dev_type=1); + %3 = negative(%2); + %4 = on_device(%3, device_type=1, is_fixed=True); + %5 = device_copy(%4, src_dev_type=1, dst_dev_type=2); + %6 = negative(%0); + %7 = add(%5, %6); + %8 = on_device(%7, device_type=2, is_fixed=True); + %9 = device_copy(%8, src_dev_type=2, dst_dev_type=1); + negative(%9) + } + """) def ref(x, y): z = np.add(x, y) return np.negative(np.add(np.negative(z), np.negative(z))) - exercise(input(), expected(), ref, rands(shape, 2)) + exercise(input(), expected(), ref, rands((5, 7), 2)) def test_unpropagatable_graph(): @@ -1149,109 +1054,82 @@ def test_unpropagatable_graph(): | <--- CPU """ - shape = (N, M) - a = relay.var("a", shape=shape) - b = relay.var("b", shape=shape) - c = relay.var("c", shape=shape) - d = relay.var("d", shape=shape) def input(): - return tvm.IRModule.from_expr( - relay.Function( - [a, b, c, d], - on_cpu(relay.subtract(on_cpu(relay.add(a, b)), on_gpu(relay.multiply(c, d)))), - ) - ) + return tvm.parser.fromtext(""" + #[version = "0.0.5"] + def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32], + %c: Tensor[(5, 7), float32], %d: Tensor[(5, 7), float32]) { + %0 = add(%a, %b); + %1 = multiply(%c, %d); + %2 = on_device(%0, device_type=1); + %3 = on_device(%1, device_type=2); + %4 = subtract(%2, %3); + on_device(%4, device_type=1) + } + """) def expected(): - return tvm.IRModule.from_expr( - relay.annotation.function_on_device( - relay.Function( - [a, b, c, d], - relay.subtract(relay.add(a, b), gpu_to_cpu(fixed_gpu(relay.multiply(c, d)))), - ), - [CPU, CPU, GPU, GPU], - CPU, - ) - ) + return tvm.parser.fromtext(""" + #[version = "0.0.5"] + def @main(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32], + %c: Tensor[(5, 7), float32], %d: Tensor[(5, 7), float32], + param_device_types=[1, 1, 2, 2], result_device_type=1) { + %0 = multiply(%c, %d); + %1 = on_device(%0, device_type=2, is_fixed=True); + %2 = add(%a, %b); + %3 = device_copy(%1, src_dev_type=2, dst_dev_type=1); + subtract(%2, %3) + } + """) def ref(a, b, c, d): return np.subtract(np.add(a, b), np.multiply(c, d)) - exercise(input(), expected(), ref, rands(shape, 4)) + exercise(input(), expected(), ref, rands((5, 7), 4)) def test_conditional(): - shape = (N, M) - x = relay.Var("x", relay.ty.scalar_type("bool")) - y = relay.var("y", shape=shape) - z = relay.var("z", shape=shape) - f = relay.Var("f") - g = relay.Var("g") - h = relay.Var("h") - a1 = relay.Var("a") - a2 = relay.Var("a") - - # def @main(x, y, z) { - # let f = fn(a) { add(a, fixed_cpu(y)) } - # let g = fn(a) { subtract(a, y) } - # let h = if (x) { - # f - # } else { - # g - # } - # h(z) - # } + # The conditional is over a function type, thus exercising the first-order/higher-order domain handling. def input(): - return tvm.IRModule.from_expr( - relay.Function( - [x, y, z], - relay.Let( - f, - relay.Function([a1], relay.add(a1, fixed_cpu(y))), - relay.Let( - g, - relay.Function([a2], relay.subtract(a2, y)), - relay.Let(h, relay.If(x, f, g), relay.Call(h, [z])), - ), - ), - ) - ) + return tvm.parser.fromtext(""" + #[version = "0.0.5"] + def @main(%x: bool, %y: Tensor[(5, 7), float32], %z: Tensor[(5, 7), float32]) { + let %f = fn (%a) { + %0 = on_device(%y, device_type=1, is_fixed=True); + add(%a, %0) + }; + let %g = fn (%a1) { + subtract(%a1, %y) + }; + let %h = if (%x) { + %f + } else { + %g + }; + %h(%z) + } + """) - # def @main(x, y, z, on_device={param_device_types=[1,1,1], result_device_type=1}) { - # let f = fn(a, on_device={param_device_types=[1], result_device_type=1}) { add(a, y) } - # let g = fn - # (a, on_device={param_device_types=[1], result_device_type=1}) { subtract(a, y) } - # let h = if (x) { - # f - # } else { - # g - # } - # h(z) - # } def expected(): - return tvm.IRModule.from_expr( - relay.annotation.function_on_device( - relay.Function( - [x, y, z], - relay.Let( - f, - relay.annotation.function_on_device( - relay.Function([a1], relay.add(a1, y)), [CPU], CPU - ), - relay.Let( - g, - relay.annotation.function_on_device( - relay.Function([a2], relay.subtract(a2, y)), [CPU], CPU - ), - relay.Let(h, relay.If(x, f, g), relay.Call(h, [z])), - ), - ), - ), - [CPU, CPU, CPU], - CPU, - ) - ) + return tvm.parser.fromtext(""" + #[version = "0.0.5"] + def @main(%x: bool, %y: Tensor[(5, 7), float32], %z: Tensor[(5, 7), float32], + param_device_types=[1, 1, 1], result_device_type=1) { + let %f = fn (%a, param_device_types=[1], result_device_type=1) { + add(%a, %y) + }; + let %g = fn (%a1, param_device_types=[1], result_device_type=1) { + subtract(%a1, %y) + }; + let %h = if (%x) { + %f + } else { + %g + }; + %h(%z) + } + """) def ref(x, y, z): def f(a): @@ -1263,47 +1141,37 @@ def g(a): h = f if x else g return h(z) - exercise(input(), expected(), ref, [True, rand(shape), rand(shape)]) + exercise(input(), expected(), ref, [True, rand((5, 7)), rand((5, 7))]) def test_global(): - shape = (N, M) - a = relay.var("a", shape=shape) - b = relay.var("b", shape=shape) - x = relay.var("x", shape=shape) - y = relay.var("y", shape=shape) - f = relay.GlobalVar("f") - main = relay.GlobalVar("main") - - # def @f(a, b) { add(a, on_cpu(b)) } - # def @main(x, y) { @f(y, x) } def input(): - mod = tvm.IRModule() - mod[f] = relay.Function( - [a, b], relay.add(a, on_cpu(b)), relay.ty.TensorType(shape, "float32") - ) - mod[main] = relay.Function( - [x, y], relay.Call(f, [y, x]), relay.ty.TensorType(shape, "float32") - ) - return mod + return tvm.parser.fromtext(""" + #[version = "0.0.5"] + def @f(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] { + %0 = on_device(%b, device_type=1); + add(%a, %0) + } + + def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32]) -> Tensor[(5, 7), float32] { + @f(%y, %x) + } + """) - # def @f(a, b, on_device={param_device_types=[2,1], result_device_type=2}) { add(a, on_cpu(b)) } - # def @main(x, y, on_device={param_device_types=[1,2], result_device_type=2}) { @f(y, x) } def expected(): - mod = tvm.IRModule() - mod[f] = relay.annotation.function_on_device( - relay.Function( - [a, b], relay.add(a, cpu_to_gpu(b)), relay.ty.TensorType(shape, "float32") - ), - [GPU, CPU], - GPU, - ) - mod[main] = relay.annotation.function_on_device( - relay.Function([x, y], relay.Call(f, [y, x]), relay.ty.TensorType(shape, "float32")), - [CPU, GPU], - GPU, - ) - return mod + return tvm.parser.fromtext(""" + #[version = "0.0.5"] + def @f(%a: Tensor[(5, 7), float32], %b: Tensor[(5, 7), float32], + param_device_types=[2, 1], result_device_type=2) -> Tensor[(5, 7), float32] { + %0 = device_copy(%b, src_dev_type=1, dst_dev_type=2); + add(%a, %0) + } + + def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32], + param_device_types=[1, 2], result_device_type=2) -> Tensor[(5, 7), float32] { + @f(%y, %x) + } + """) def ref(x, y): def f(a, b): @@ -1311,69 +1179,88 @@ def f(a, b): return f(x, y) - exercise(input(), expected(), ref, rands(shape, 2)) - - -# Note that match and ADTs don't appear to be supported for direct AST -# construction. + exercise(input(), expected(), ref, rands((5, 7), 2)) def test_ref(): - shape = (N, M) - x = relay.var("x", shape=shape) - y = relay.var("y", shape=shape) - r = relay.var("r") - dummy = relay.var("dummy") - - # def @main(x, y) { - # r = ref(x) - # ref_write(r, on_cpu(y)) - # add(x, ref_read(r)) - # } def input(): - return tvm.IRModule.from_expr( - relay.Function( - [x, y], - relay.Let( - r, - relay.RefCreate(x), - relay.Let(dummy, relay.RefWrite(r, on_cpu(y)), relay.add(x, relay.RefRead(r))), - ), - ) - ) + return tvm.parser.fromtext(""" + #[version = "0.0.5"] + def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32]) { + let %r = ref(%x); + %0 = on_device(%y, device_type=1); + ref_write(%r, %0); + %1 = ref_read(%r); + add(%x, %1) + } + """) - # def @main(x, y, on_device={param_device_types=[GPU, CPU], result_device_type=GPU}) { - # r = ref(x) - # ref_write(r, cpu_to_gpu(y)) - # add(x, ref_read(r)) - # } def expected(): - return tvm.IRModule.from_expr( - relay.annotation.function_on_device( - relay.Function( - [x, y], - relay.Let( - r, - relay.RefCreate(x), - relay.Let( - dummy, relay.RefWrite(r, cpu_to_gpu(y)), relay.add(x, relay.RefRead(r)) - ), - ), - ), - [GPU, CPU], - GPU, - ) - ) + return tvm.parser.fromtext(""" + #[version = "0.0.5"] + def @main(%x: Tensor[(5, 7), float32], %y: Tensor[(5, 7), float32], + param_device_types=[2, 1], result_device_type=2) { + let %r = ref(%x); + %0 = device_copy(%y, src_dev_type=1, dst_dev_type=2); + ref_write(%r, %0); + %1 = ref_read(%r); + add(%x, %1) + } + """) def ref(x, y): r = {"value": x} r["value"] = y return np.add(x, r["value"]) - # Don't try to execute, no backend currently supports both cross-devices and references. + # Don't try to execute, no backend currently supports both hetrogeneous devices and references. exercise(input(), expected(), None, None) +def test_adt(): + def input(): + return tvm.parser.fromtext(""" + #[version = "0.0.5"] + type List[A] { + Cons(A, List[A]), + Nil, + } + def @main(%x : Tensor[(5, 7), float32], %y : Tensor[(5, 7), float32]) { + %0 = on_device(%y, device_type=1, is_fixed=True); + %1 = Nil; + %2 = Cons(%0, %1); + let %l = Cons(%x, %2); + match? (%l) { + Cons(%z, _) => %z + } + } + """) + + def expected(): + return tvm.parser.fromtext(""" + #[version = "0.0.5"] + type List[A] { + Cons(A, List[A]), + Nil, + } + def @main(%x : Tensor[(5, 7), float32], %y : Tensor[(5, 7), float32], + param_device_types=[1, 1], result_device_type=1) { + %0 = Nil; + %1 = Cons(%y, %0); + let %l = Cons(%x, %1); + match? (%l) { + Cons(%z, _) => %z + } + } + """) + + def ref(x, y): + l = [x, y] + return l[0] + + exercise(input(), expected(), ref, rands((5, 7), 2)) + + if __name__ == "__main__": test_plain() test_left_add_on_cpu() @@ -1403,3 +1290,4 @@ def ref(x, y): test_conditional() test_global() test_ref() + test_adt()