Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Constants computed on-the-fly and mmapped Constants #27705

Draft
wants to merge 10 commits into
base: master
Choose a base branch
from
2 changes: 2 additions & 0 deletions src/bindings/python/src/openvino/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@
from openvino.runtime import shutdown
from openvino.runtime import tensor_from_file
from openvino.runtime import save_model
from openvino.runtime import save_tensor_data
from openvino.runtime import read_tensor_data
from openvino.runtime import layout_helpers

from openvino._pyopenvino import RemoteContext
Expand Down
2 changes: 2 additions & 0 deletions src/bindings/python/src/openvino/runtime/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@
from openvino._pyopenvino import set_batch
from openvino._pyopenvino import serialize
from openvino._pyopenvino import save_model
from openvino._pyopenvino import save_tensor_data
from openvino._pyopenvino import read_tensor_data
from openvino._pyopenvino import shutdown

# Import opsets
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2018-2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

import openvino


# Creates a new file with a given name, populates it with data from a given Constant,
# returns a new Constant node with content memory-mapped to that file.
# Doesn't remove the file in the end of the returned Constant's life time.
def move_constant_to_file(constant, path):
openvino.save_tensor_data(constant.get_tensor_view(), path)
mmapped = openvino.read_tensor_data(path, constant.get_output_element_type(0), constant.get_output_partial_shape(0))
return openvino.runtime.op.Constant(mmapped, shared_memory=True)
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2018-2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

import openvino

"""Postponed Constant is a way to materialize a big constant only when it is going to be serialized to IR and then immediately dispose."""


# `maker` is a function that returns ov.Tensor that represents a target Constant
def make_postponed_constant(element_type, shape, maker):
class PostponedConstant(openvino.Op):
class_type_info = openvino.runtime.DiscreteTypeInfo("PostponedConstant", "extension")

def __init__(self):
super().__init__(self)
self.get_rt_info()["postponed_constant"] = True # value doesn't matter
self.m_element_type = element_type
self.m_shape = shape
self.constructor_validate_and_infer_types()

def get_type_info(self):
return PostponedConstant.class_type_info

def evaluate(self, outputs, _):
maker().copy_to(outputs[0])
Copy link
Contributor Author

@slyalin slyalin Nov 22, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we allow the assignment of new ov.Tensor instances in outputs, then this copying won't be required. Another (probably better) option is to pass outputs[0] to maker function to build the tensor in place.

return True

def clone_with_new_inputs(self, _):
return PostponedConstant()

def validate_and_infer_types(self):
self.set_output_type(0, self.m_element_type, openvino.PartialShape(self.m_shape))

return PostponedConstant()
26 changes: 26 additions & 0 deletions src/bindings/python/src/pyopenvino/pyopenvino.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <openvino/core/model.hpp>
#include <openvino/core/node.hpp>
#include <openvino/core/version.hpp>
#include <openvino/runtime/tensor_util.hpp>
#include <string>

#include "openvino/runtime/core.hpp"
Expand Down Expand Up @@ -268,6 +269,31 @@ PYBIND11_MODULE(_pyopenvino, m) {
regclass_Core(m);
regclass_Tensor(m);

m.def(
"save_tensor_data",
[](const ov::Tensor& tensor,
const py::object& file_path) {
ov::save_tensor_data(tensor,
Common::utils::convert_path_to_string(file_path));
},
py::arg("tensor"),
py::arg("file_path"));

m.def(
"read_tensor_data",
[](const py::object& file_path,
const ov::element::Type& element_type,
const ov::PartialShape& shape,
std::size_t offset,
bool mmap) {
return ov::read_tensor_data(Common::utils::convert_path_to_string(file_path), element_type, shape, offset, mmap);
},
py::arg("file_path"),
py::arg("element_type") = ov::element::u8,
py::arg("shape") = ov::PartialShape({ov::Dimension()}),
py::arg("offset") = size_t(0),
py::arg("mmap") = true);

regclass_CompiledModel(m);
regclass_InferRequest(m);
regclass_VariableState(m);
Expand Down
80 changes: 80 additions & 0 deletions src/core/include/openvino/runtime/tensor_util.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// Copyright (C) 2018-2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include "openvino/core/partial_shape.hpp"
#include "openvino/runtime/tensor.hpp"

namespace ov {

/// \brief Save given tensor data into a file. File will contain only raw bytes of a tensor.data as it is allocated in
/// memory.
/// No element type nor shape nor other metadata are serialized. Strides are preserved.
/// \param tensor Tensor which data will be serialized.
/// \param file_name Path to the output file
OPENVINO_API
void save_tensor_data(const Tensor& tensor, const std::string& file_name);

#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT)
OPENVINO_API
void save_tensor_data(const Tensor& tensor, const std::wstring& output_model);
#endif

/// \brief Read a tensor content from a file. Only raw data is loaded.
/// \param file_name Path to the output file
/// \param element_type Element type, when not specified the it is assumed as element::u8.
/// \param shape Shape for resulting tensor. If provided shape is static, specified number of elements is read only.
/// File should contain enough bytes, an exception is raised otherwise.
/// One of the dimensions can be dynamic. In this case it will be determined automatically based on the
/// length of the file content and `offset`. Default value is [?].
/// \param offset Read file starting from specified offset. Default is 0. The remining size of the file should be
/// compatible with shape. \param mmap Use mmap that postpones real read from file until data is accessed.
OPENVINO_API
Tensor read_tensor_data(const std::string& file_name,
const element::Type& element_type = element::u8,
const PartialShape& shape = PartialShape{Dimension::dynamic()},
std::size_t offset = 0,
bool mmap = true);

#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT)
OPENVINO_API
Tensor read_tensor_data(const std::wstring& file_name,
const element::Type& element_type = element::u8,
const PartialShape& shape = PartialShape{Dimension::dynamic()},
std::size_t offset = 0,
bool mmap = true);
#endif

/// \brief Read raw data from a file into pre-allocated tensor.
/// \param file_name Path to the input file with raw tensor data.
/// \param tensor Tensor to read data to. Tensor should have correct element_type and shape set that is used to
/// determine how many bytes will be read from the file. \param offset Read file starting from specified offset. Default
/// is 0. The remining part of the file should contain enough bytes to satisfy tensor size.
OPENVINO_API
void read_tensor_data(const std::string& file_name, Tensor& tensor, std::size_t offset = 0);

#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT)
OPENVINO_API
void read_tensor_data(const std::wstring& file_name, Tensor& tensor, std::size_t offset = 0);
#endif

/// \brief Read raw data from a file into a tensor. Optionally re-allocate memory in tensor if required.
/// \param file_name Path to the input file with raw tensor data.
/// \param tensor Tensor to read data to. Memory is allocated using set_shape method.
/// \param shape Shape for resulting tensor. If provided shape is static, specified number of elements is read only.
/// File should contain enough bytes, an exception is raised otherwise.
/// One of the dimensions can be dynamic. In this case it will be determined automatically based on the
/// length of the file content and `offset`.
/// \param offset Read file starting from specified offset. Default is 0. The remining size of the file should be
/// compatible with shape.
OPENVINO_API
void read_tensor_data(const std::string& file_name, Tensor& tensor, const PartialShape& shape, std::size_t offset = 0);

#if defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT)
OPENVINO_API
void read_tensor_data(const std::wstring& file_name, Tensor& tensor, const PartialShape& shape, std::size_t offset = 0);
#endif

} // namespace ov
88 changes: 67 additions & 21 deletions src/core/src/pass/serialize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "openvino/core/model.hpp"
#include "openvino/core/parallel.hpp"
#include "openvino/core/type/float16.hpp"
#include "openvino/op/constant.hpp"
#include "openvino/op/util/framework_node.hpp"
#include "openvino/opsets/opset1.hpp"
#include "openvino/pass/constant_folding.hpp"
Expand Down Expand Up @@ -99,7 +100,9 @@ class ConstantWriter {
size_t size,
size_t& new_size,
bool compress_to_fp16 = false,
ov::element::Type src_type = ov::element::dynamic) {
ov::element::Type src_type = ov::element::dynamic,
bool ptr_is_temporary = false) { // when true, do not rely on ptr after this function call, data
// is temporary allocated
const FilePosition write_pos = m_binary_output.tellp();
const auto offset = write_pos - m_blob_offset;
new_size = size;
Expand Down Expand Up @@ -132,17 +135,19 @@ class ConstantWriter {
// Therefore we always have to compare values when finding a match in the hash multimap.
const HashValue hash = ov::runtime::compute_hash(ptr_to_write, new_size);

auto found = m_hash_to_file_positions.find(hash);
auto found = m_hash_to_file_positions.equal_range(hash);
// iterate over all matches of the key in the multimap
while (found != m_hash_to_file_positions.end()) {
if (memcmp(ptr, found->second.second, size) == 0) {
return found->second.first;
for (auto it = found.first; it != found.second; ++it) {
if (memcmp(ptr, it->second.second, size) == 0) {
return it->second.first;
}
found++;
}
// Since fp16_compressed data will be disposed at exit point and since we cannot reread it from the ostream,
// we store pointer to the original uncompressed blob.
m_hash_to_file_positions.insert({hash, {offset, static_cast<void const*>(ptr)}});

if (!ptr_is_temporary) {
// Since fp16_compressed data will be disposed at exit point and since we cannot reread it from the
// ostream, we store pointer to the original uncompressed blob.
m_hash_to_file_positions.insert({hash, {offset, static_cast<void const*>(ptr)}});
}
if (m_write_hash_value) {
m_binary_output.write(reinterpret_cast<const char*>(&hash), sizeof(uint64_t));
} else {
Expand Down Expand Up @@ -313,6 +318,7 @@ class XmlSerializer : public ov::AttributeVisitor {
bool m_deterministic;
bool m_compress_to_fp16;
ov::element::Type m_output_element_type;
bool m_data_is_temporary;

template <typename T>
std::string create_atribute_list(ov::ValueAccessor<std::vector<T>>& adapter) {
Expand Down Expand Up @@ -431,14 +437,16 @@ class XmlSerializer : public ov::AttributeVisitor {
int64_t version,
bool deterministic = false,
bool compress_to_fp16 = false,
ov::element::Type output_element_type = ov::element::dynamic)
ov::element::Type output_element_type = ov::element::dynamic,
bool data_is_temporary = false)
: m_xml_node(data),
m_node_type_name(node_type_name),
m_constant_write_handler(constant_write_handler),
m_version(version),
m_deterministic(deterministic),
m_compress_to_fp16(compress_to_fp16),
m_output_element_type(output_element_type) {}
m_output_element_type(output_element_type),
m_data_is_temporary(data_is_temporary) {}

void on_adapter(const std::string& name, ov::ValueAccessor<void>& adapter) override {
using BodyTargetNames = std::tuple<std::string, std::string, std::vector<std::string>>;
Expand Down Expand Up @@ -534,11 +542,13 @@ class XmlSerializer : public ov::AttributeVisitor {
a2->get_header(header_ptr, header_size);
}

int64_t offset = m_constant_write_handler.write(reinterpret_cast<const char*>(header_ptr.get()),
header_size,
inter_size,
m_compress_to_fp16,
m_output_element_type);
int64_t offset = m_constant_write_handler.write(
reinterpret_cast<const char*>(header_ptr.get()),
header_size,
inter_size,
m_compress_to_fp16,
m_output_element_type,
true); // header_ptr is allocated in AttributeAdapter that has limited life time
new_size += inter_size;

// write raw strings part
Expand All @@ -561,7 +571,8 @@ class XmlSerializer : public ov::AttributeVisitor {
raw_string_size,
inter_size,
m_compress_to_fp16,
m_output_element_type);
m_output_element_type,
m_data_is_temporary);
new_size += inter_size;
}
m_xml_node.append_attribute("offset").set_value(static_cast<unsigned long long>(offset));
Expand All @@ -575,7 +586,8 @@ class XmlSerializer : public ov::AttributeVisitor {
size,
new_size,
m_compress_to_fp16,
m_output_element_type);
m_output_element_type,
m_data_is_temporary);

m_xml_node.append_attribute("offset").set_value(static_cast<unsigned long long>(offset));
m_xml_node.append_attribute("size").set_value(static_cast<unsigned long long>(new_size));
Expand Down Expand Up @@ -891,6 +903,35 @@ class PaddingsFixer {
}
};

// Substiture a Constant node instead of a node by calling node->constant_fold if 'postponed_constant' rt_info attribute
// is present in the node
class PostponedConstantReplacer {
private:
ov::Node* m_node;
std::shared_ptr<ov::Node> m_constant;

public:
ov::Node* get_node() {
return m_node;
}

bool data_is_temporary() const {
return bool(m_constant);
}

PostponedConstantReplacer(ov::Node* node) : m_node(node) {
if (node->get_rt_info().count("postponed_constant")) {
OPENVINO_ASSERT(node->get_output_size() == 1);
ov::OutputVector outputs(1);
OPENVINO_ASSERT(
node->constant_fold(outputs, node->input_values()),
"Node with set `postponed_constant` attribute cannot be fold to constant when saving model to IR file");
m_constant = outputs[0].get_node_shared_ptr();
m_node = m_constant.get();
}
}
};

bool is_correct_tag_name(const std::string& name) {
if (name.length() == 0) {
return false;
Expand Down Expand Up @@ -993,12 +1034,16 @@ void ngfunction_2_ir(pugi::xml_node& netXml,

for (const auto& n : sorted_ops) {
ov::Node* node = n.get();
OPENVINO_ASSERT(layer_ids.find(node) != layer_ids.end(), "Internal error");
auto node_id = layer_ids.find(node)->second;
PostponedConstantReplacer modified_node(node);
node = modified_node.get_node();

const std::string& node_type_name{node->get_type_name()};

OPENVINO_ASSERT(layer_ids.find(node) != layer_ids.end(), "Internal error");
// <layers>
pugi::xml_node layer = layers.append_child("layer");
layer.append_attribute("id").set_value(layer_ids.find(node)->second);
layer.append_attribute("id").set_value(node_id);
// If determinism is not required, include auto-generated names into xml
// layer name is not critical for hash computing
if (!deterministic) {
Expand Down Expand Up @@ -1136,7 +1181,8 @@ void ngfunction_2_ir(pugi::xml_node& netXml,
version,
deterministic,
compress_to_fp16,
output_element_type);
output_element_type,
modified_node.data_is_temporary());
OPENVINO_ASSERT(fixed_node.get_node()->visit_attributes(visitor), "Visitor API is not supported in ", node);
}
rt_info::XmlSerializer{data}.serialize(node->get_rt_info());
Expand Down
Loading
Loading