openvinotoolkit · dmitry-gorokhov · Sep 5, 2024 · Aug 5, 2024 · Aug 6, 2024 · Aug 7, 2024
@@ -134,6 +134,12 @@ class OPENVINO_RUNTIME_API ICompiledModel : public std::enable_shared_from_this<
      */
     ov::SoPtr<ov::IRemoteContext> get_context() const;
 
+    /**
+     * @brief Release intermediate memory
+     * 
+     */
+    virtual void release_buffers();
+
     virtual ~ICompiledModel() = default;
 
 private:

@@ -200,6 +200,15 @@ class OPENVINO_RUNTIME_API CompiledModel {
         return get_property(property.name()).template as<T>();
     }
 
+    /**
+     * @brief Release intermediate memory.
+     *
+     * This methods forces the Compiled model to release memory allocated for intermediate structures, e.g. caches,
+     * tensors, temporal buffers etc.
+     *
+     */
+    void release_buffers();
+
     /**
      * @brief Returns pointer to device-specific shared context
      * on a remote accelerator device that was used to create this CompiledModel.

@@ -145,6 +145,10 @@ Any CompiledModel::get_property(const std::string& name) const {
     });
 }
 
+void CompiledModel::release_buffers() {
+    OV_COMPILED_MODEL_CALL_STATEMENT(_impl->release_buffers());
+}
+
 RemoteContext CompiledModel::get_context() const {
     OV_COMPILED_MODEL_CALL_STATEMENT({
         auto ctx = _impl->get_context();

@@ -147,3 +147,7 @@ ov::SoPtr<ov::IRemoteContext> ov::ICompiledModel::get_context() const {
 void ov::ICompiledModel::set_model_shared_object(ov::Model& model, const std::shared_ptr<void>& shared_object) {
     model.m_shared_object = shared_object;
 }
+
+void ov::ICompiledModel::release_buffers() {
+    OPENVINO_THROW("ov::ICompiledModel::release_buffers() is not implemented");
+}
@@ -342,5 +342,13 @@ void CompiledModel::export_model(std::ostream& modelStream) const {
     serializer << m_model;
 }
 
+void CompiledModel::release_buffers() {
+    for (auto&& graph : m_graphs) {
+        GraphGuard::Lock graph_lock{graph};
+        auto ctx = graph_lock._graph.getGraphContext();
+        ctx->getNetworkMemoryControl()->releaseMemory();
+    }
+}
+
 }  // namespace intel_cpu
 }  // namespace ov
@@ -49,6 +49,8 @@ class CompiledModel : public ov::ICompiledModel {
                                        "Set property to Core::compile_model during compilation");
     };
 
+    void release_buffers() override;
+
 private:
     std::shared_ptr<ov::ISyncInferRequest> create_sync_infer_request() const override;
     friend class SyncInferRequest;

diff --git a/src/plugins/intel_cpu/src/config.h b/src/plugins/intel_cpu/src/config.h
@@ -46,6 +46,7 @@ struct Config {
 
     bool collectPerfCounters = false;
     bool exclusiveAsyncRequests = false;
+    bool flushIntermediateTensors = true; //TODO: change to false by default
     SnippetsMode snippetsMode = SnippetsMode::Enable;
     std::string dumpToDot = {};
     std::string device_id = {};