[GPU] Transfer to usm_device if GPU architecture has separate cache (#…

…24237) ### Details: - Recent iGPU has separate cache. In such case, it is necessary to use usm_device if possible. ### Tickets: - 139455
openvinotoolkit · Apr 30, 2024 · efd4456 · efd4456
1 parent f8311f3
commit efd4456
Show file tree

Hide file tree

Showing 4 changed files with 39 additions and 7 deletions.
diff --git a/src/plugins/intel_gpu/include/intel_gpu/runtime/device_info.hpp b/src/plugins/intel_gpu/include/intel_gpu/runtime/device_info.hpp
@@ -66,6 +66,7 @@ struct device_info {
     bool supports_immad;                        ///< Does engine support int8 multi mad.
 
     bool supports_usm;                          ///< Does engine support unified shared memory.
+    bool has_separate_cache;                    ///< Does the target hardware has separate cache for usm_device and usm_host
 
     std::vector<size_t> supported_simd_sizes;   ///< List of SIMD sizes supported by current device and compiler
 

diff --git a/src/plugins/intel_gpu/src/graph/network.cpp b/src/plugins/intel_gpu/src/graph/network.cpp
@@ -1439,7 +1439,11 @@ void network::transfer_memory_to_device(std::shared_ptr<primitive_inst> instance
     if (!get_engine().supports_allocation(allocation_type::usm_device))
         return;
 
-    if (get_engine().get_device_info().dev_type != device_type::discrete_gpu)
+    if (!get_engine().get_device_info().has_separate_cache)
+        return;
+
+
+    if (node.is_shape_infer_dep())
         return;
 
     if (alloc_type == allocation_type::usm_host || alloc_type == allocation_type::usm_shared) {

diff --git a/src/plugins/intel_gpu/src/graph/program.cpp b/src/plugins/intel_gpu/src/graph/program.cpp
@@ -505,7 +505,7 @@ void program::build_program(bool is_internal) {
 
     if (!is_internal) {
         prim_info = get_current_stage_info();
-        if (get_engine().get_device_info().dev_type == device_type::discrete_gpu)
+        if (get_engine().get_device_info().has_separate_cache)
             transfer_memory_to_device();
     }
 }

diff --git a/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp b/src/plugins/intel_gpu/src/runtime/ocl/ocl_device.cpp
@@ -119,12 +119,31 @@ device_type get_device_type(const cl::Device& device) {
     return unified_mem ? device_type::integrated_gpu : device_type::discrete_gpu;
 }
 
-gfx_version parse_version(cl_uint ver) {
-    uint16_t major = ver >> 16;
-    uint8_t minor = (ver >> 8) & 0xFF;
-    uint8_t revision = ver & 0xFF;
 
-    return {major, minor, revision};
+gfx_version parse_version(cl_uint gmdid) {
+    union GMDID {
+        uint32_t value;
+        struct {
+            uint32_t revision : 6;
+            uint32_t reserved : 8;
+            uint32_t release : 8;
+            uint32_t architecture : 10;
+        };
+    };
+
+    GMDID gmd_id = {gmdid};
+    if (gmd_id.architecture > 0 && gmd_id.architecture < 100) {
+        // New format
+        return { static_cast<uint16_t>(gmd_id.architecture), static_cast<uint8_t>(gmd_id.release), static_cast<uint8_t>(gmd_id.revision)};
+    } else {
+        // Old format
+        cl_uint ver = gmdid;
+        uint16_t major = ver >> 16;
+        uint8_t minor = (ver >> 8) & 0xFF;
+        uint8_t revision = ver & 0xFF;
+
+        return {major, minor, revision};
+    }
 }
 
 bool get_imad_support(const cl::Device& device) {
@@ -229,6 +248,7 @@ device_info init_device_info(const cl::Device& device) {
 
     bool device_attr_supported = extensions.find("cl_intel_device_attribute_query") != std::string::npos;
     bool nv_device_attr_supported = extensions.find("cl_nv_device_attribute_query") != std::string::npos;
+    info.has_separate_cache = false;
     if (device_attr_supported) {
         info.gfx_ver = parse_version(device.getInfo<CL_DEVICE_IP_VERSION_INTEL>());
         info.device_id = device.getInfo<CL_DEVICE_ID_INTEL>();
@@ -240,6 +260,13 @@ device_info init_device_info(const cl::Device& device) {
 
         info.supports_imad = info.supports_imad || (features & CL_DEVICE_FEATURE_FLAG_DP4A_INTEL);
         info.supports_immad = info.supports_immad || (features & CL_DEVICE_FEATURE_FLAG_DPAS_INTEL);
+        if (info.dev_type == device_type::discrete_gpu ||
+            info.gfx_ver.major > 12 || (info.gfx_ver.major == 12 && info.gfx_ver.minor >= 70)) {
+            info.has_separate_cache = true;
+        }
+        GPU_DEBUG_INFO << "GPU version: "
+            << static_cast<int>(info.gfx_ver.major) << "." << static_cast<int>(info.gfx_ver.minor) << "." << static_cast<int>(info.gfx_ver.revision)
+            << (info.has_separate_cache ? " with separate cache" : "") << std::endl;
         GPU_DEBUG_GET_INSTANCE(debug_config);
         GPU_DEBUG_IF(debug_config->disable_onednn)
             info.supports_immad = false;