From fc0f927b30b8534afdb066910b9ae02693ff9dcc Mon Sep 17 00:00:00 2001 From: Tony Ye Date: Mon, 13 Aug 2018 06:20:26 +0800 Subject: [PATCH] Set subslice mask according to the per-context option. Updated slice shutdown uAPI to v12. Fixes #267. --- .../linux/common/os/libdrm/include/i915_drm.h | 125 ++++++++++++++++-- .../common/os/libdrm/include/mos_bufmgr.h | 7 +- .../linux/common/os/libdrm/mos_bufmgr.c | 70 ++++++++-- .../linux/common/os/mos_context_specific.cpp | 39 +++++- .../linux/common/os/mos_context_specific.h | 2 +- .../linux/common/os/mos_os_specific.c | 34 +++++ .../ult/libdrm_mock/include/i915_drm_mock.h | 122 +++++++++++++++-- 7 files changed, 360 insertions(+), 39 deletions(-) diff --git a/media_driver/linux/common/os/libdrm/include/i915_drm.h b/media_driver/linux/common/os/libdrm/include/i915_drm.h index 28d815ef293..f934d43bb2e 100644 --- a/media_driver/linux/common/os/libdrm/include/i915_drm.h +++ b/media_driver/linux/common/os/libdrm/include/i915_drm.h @@ -429,7 +429,83 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_GPU_RESET 35 #define I915_PARAM_HAS_RESOURCE_STREAMER 36 #define I915_PARAM_HAS_EXEC_SOFTPIN 37 -#define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51 +#define I915_PARAM_HAS_POOLED_EU 38 +#define I915_PARAM_MIN_EU_IN_POOL 39 +#define I915_PARAM_MMAP_GTT_VERSION 40 + +/* + * Query whether DRM_I915_GEM_EXECBUFFER2 supports user defined execution + * priorities and the driver will attempt to execute batches in priority order. + * The param returns a capability bitmask, nonzero implies that the scheduler + * is enabled, with different features present according to the mask. + * + * The initial priority for each batch is supplied by the context and is + * controlled via I915_CONTEXT_PARAM_PRIORITY. + */ +#define I915_PARAM_HAS_SCHEDULER 41 +#define I915_SCHEDULER_CAP_ENABLED (1ul << 0) +#define I915_SCHEDULER_CAP_PRIORITY (1ul << 1) +#define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2) + +#define I915_PARAM_HUC_STATUS 42 + +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to opt-out of + * synchronisation with implicit fencing on individual objects. + * See EXEC_OBJECT_ASYNC. + */ +#define I915_PARAM_HAS_EXEC_ASYNC 43 + +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports explicit fence support - + * both being able to pass in a sync_file fd to wait upon before executing, + * and being able to return a new sync_file fd that is signaled when the + * current request is complete. See I915_EXEC_FENCE_IN and I915_EXEC_FENCE_OUT. + */ +#define I915_PARAM_HAS_EXEC_FENCE 44 + +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to capture + * user specified bufffers for post-mortem debugging of GPU hangs. See + * EXEC_OBJECT_CAPTURE. + */ +#define I915_PARAM_HAS_EXEC_CAPTURE 45 + +#define I915_PARAM_SLICE_MASK 46 + +/* Assuming it's uniform for each slice, this queries the mask of subslices + * per-slice for this system. + */ +#define I915_PARAM_SUBSLICE_MASK 47 + +/* + * Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying the batch buffer + * as the first execobject as opposed to the last. See I915_EXEC_BATCH_FIRST. + */ +#define I915_PARAM_HAS_EXEC_BATCH_FIRST 48 + +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of + * drm_i915_gem_exec_fence structures. See I915_EXEC_FENCE_ARRAY. + */ +#define I915_PARAM_HAS_EXEC_FENCE_ARRAY 49 + +/* + * Query whether every context (both per-file default and user created) is + * isolated (insofar as HW supports). If this parameter is not true, then + * freshly created contexts may inherit values from an existing context, + * rather than default HW values. If true, it also ensures (insofar as HW + * supports) that all state set by this context will not leak to any other + * context. + * + * As not every engine across every gen support contexts, the returned + * value reports the support of context isolation for individual engines by + * returning a bitmask of each engine class set to true if that class supports + * isolation. + */ +#define I915_PARAM_HAS_CONTEXT_ISOLATION 50 + +/* Frequency of the command streamer timestamps given by the *_TIMESTAMP + * registers. This used to be fixed per platform but from CNL onwards, this + * might vary depending on the parts. + */ +#define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51 typedef struct drm_i915_getparam { __s32 param; @@ -1343,21 +1419,46 @@ struct drm_i915_gem_context_param { #define I915_CONTEXT_PARAM_BAN_PERIOD 0x1 #define I915_CONTEXT_PARAM_NO_ZEROMAP 0x2 #define I915_CONTEXT_PARAM_GTT_SIZE 0x3 -#define I915_CONTEXT_PARAM_SSEU 0x6 +#define I915_CONTEXT_PARAM_SSEU 0x7 __u64 value; }; struct drm_i915_gem_context_param_sseu { - __u64 flags; - union { - struct { - __u8 slice_mask; - __u8 subslice_mask; - __u8 min_eu_per_subslice; - __u8 max_eu_per_subslice; - } packed; - __u64 value; - }; + /* + * Engine class & instance to be configured or queried. + */ + __u16 engine_class; + __u16 instance; + + /* + * Unused for now. Must be cleared to zero. + */ + __u32 rsvd1; + + /* + * Mask of slices to enable for the context. Valid values are a subset + * of the bitmask value returned for I915_PARAM_SLICE_MASK. + */ + __u64 slice_mask; + + /* + * Mask of subslices to enable for the context. Valid values are a + * subset of the bitmask value return by I915_PARAM_SUBSLICE_MASK. + */ + __u64 subslice_mask; + + /* + * Minimum/Maximum number of EUs to enable per subslice for the + * context. min_eus_per_subslice must be inferior or equal to + * max_eus_per_subslice. + */ + __u16 min_eus_per_subslice; + __u16 max_eus_per_subslice; + + /* + * Unused for now. Must be cleared to zero. + */ + __u32 rsvd2; }; typedef struct drm_i915_ring_load_info diff --git a/media_driver/linux/common/os/libdrm/include/mos_bufmgr.h b/media_driver/linux/common/os/libdrm/include/mos_bufmgr.h index 70fe3dae7f6..01138ebed3e 100644 --- a/media_driver/linux/common/os/libdrm/include/mos_bufmgr.h +++ b/media_driver/linux/common/os/libdrm/include/mos_bufmgr.h @@ -370,8 +370,6 @@ int mos_get_context_param_sseu(struct mos_linux_context *ctx, int mos_set_context_param_sseu(struct mos_linux_context *ctx, struct drm_i915_gem_context_param_sseu sseu); -int mos_get_slice_mask(uint32_t slice_count); - int mos_get_context_param(struct mos_linux_context *ctx, uint32_t size, uint64_t param, @@ -384,6 +382,11 @@ int mos_set_context_param(struct mos_linux_context *ctx, int mos_get_subslice_total(int fd, unsigned int *subslice_total); int mos_get_eu_total(int fd, unsigned int *eu_total); +int mos_get_subslice_mask(int fd, unsigned int *subslice_mask); +int mos_get_slice_mask(int fd, unsigned int *slice_mask); +uint8_t mos_switch_off_n_bits(uint8_t in_mask, int n); +unsigned int mos_hweight8(uint8_t w); + #if defined(__cplusplus) extern "C" { #endif diff --git a/media_driver/linux/common/os/libdrm/mos_bufmgr.c b/media_driver/linux/common/os/libdrm/mos_bufmgr.c index 9aea16873a5..a449d389804 100644 --- a/media_driver/linux/common/os/libdrm/mos_bufmgr.c +++ b/media_driver/linux/common/os/libdrm/mos_bufmgr.c @@ -4695,20 +4695,40 @@ mos_get_reset_stats(struct mos_linux_context *ctx, return ret; } -int mos_get_slice_mask(uint32_t slice_count) +unsigned mos_hweight8(uint8_t w) { - uint32_t bitflag = 1; - uint32_t bitmask = 0; - int count = 0; + uint32_t i, weight = 0; - while (count < slice_count) + for (i=0; i<8; i++) { - bitmask = bitflag; - bitflag = bitmask | (bitflag << 1); - count++; + weight += !!((w) & (1UL << i)); } + return weight; +} + +uint8_t mos_switch_off_n_bits(uint8_t in_mask, int n) +{ + int i,count; + uint8_t bi,out_mask; - return bitmask; + assert (n>0 && n<=8); + + out_mask = in_mask; + count = n; + for(i=0; i<8; i++) + { + bi = 1UL< sliceCount) + { + sseu.slice_mask = mos_switch_off_n_bits((uint8_t)device_slice_mask, + mos_hweight8(device_slice_mask) - sliceCount); + } + else + { + sseu.slice_mask = device_slice_mask; + } + if (mos_set_context_param_sseu(m_intelContext, sseu)) { MOS_OS_ASSERTMESSAGE("Failed to set context parameter."); return ; } - m_sseu = sseu.value; + m_sseu = sseu; } *pSliceCount = sliceCount; @@ -559,6 +577,15 @@ MOS_STATUS OsContextSpecific::Init(PMOS_CONTEXT pOsDriverContext) MOS_OS_ASSERTMESSAGE("Failed to create drm intel context"); return MOS_STATUS_UNKNOWN; } + + MOS_ZeroMemory(&m_sseu, sizeof(m_sseu)); + m_sseu.engine_class = 0; + m_sseu.instance = 0; + if (mos_get_context_param_sseu(m_intelContext, &m_sseu)) + { + MOS_OS_ASSERTMESSAGE("Failed to get context parameter sseu."); + return MOS_STATUS_UNKNOWN; + }; #else m_intelContext = nullptr; #endif diff --git a/media_driver/linux/common/os/mos_context_specific.h b/media_driver/linux/common/os/mos_context_specific.h index 43abe7e4aaa..4b894109387 100644 --- a/media_driver/linux/common/os/mos_context_specific.h +++ b/media_driver/linux/common/os/mos_context_specific.h @@ -322,7 +322,7 @@ class OsContextSpecific : public OsContext //! //! \brief sseu for current context //! - uint64_t m_sseu = 0; + struct drm_i915_gem_context_param_sseu m_sseu; //! //! \brief Semophore ID for ruling SSEU configration //! diff --git a/media_driver/linux/common/os/mos_os_specific.c b/media_driver/linux/common/os/mos_os_specific.c index 29c97eb788d..f957742486b 100644 --- a/media_driver/linux/common/os/mos_os_specific.c +++ b/media_driver/linux/common/os/mos_os_specific.c @@ -3790,6 +3790,40 @@ MOS_STATUS Mos_Specific_CreateGpuContext( pOsContextSpecific->SetGpuContextHandle(mosGpuCxt, gpuContextSpecific->GetGpuContextHandle()); } + MOS_OS_CHK_NULL_RETURN(createOption); + if (createOption->SSEUValue) + { + unsigned int subslice_mask; + struct drm_i915_gem_context_param_sseu sseu; + MOS_ZeroMemory(&sseu, sizeof(sseu)); + sseu.engine_class = 0; + sseu.instance = 0; + + if (mos_get_context_param_sseu(pOsInterface->pOsContext->intel_context, &sseu)) + { + MOS_OS_ASSERTMESSAGE("Failed to get sseu configuration."); + return MOS_STATUS_UNKNOWN; + }; + + if (mos_get_subslice_mask(pOsInterface->pOsContext->fd, &subslice_mask)) + { + MOS_OS_ASSERTMESSAGE("Failed to get subslice mask."); + return MOS_STATUS_UNKNOWN; + } + + if (mos_hweight8(sseu.subslice_mask) > createOption->packed.SubSliceCount) + { + sseu.subslice_mask = mos_switch_off_n_bits(subslice_mask, + mos_hweight8((uint8_t)subslice_mask)-createOption->packed.SubSliceCount); + } + + if (mos_set_context_param_sseu(pOsInterface->pOsContext->intel_context, sseu)) + { + MOS_OS_ASSERTMESSAGE("Failed to set sseu configuration."); + return MOS_STATUS_UNKNOWN; + }; + } + return MOS_STATUS_SUCCESS; } diff --git a/media_driver/linux/ult/libdrm_mock/include/i915_drm_mock.h b/media_driver/linux/ult/libdrm_mock/include/i915_drm_mock.h index 444c915bef4..3cf53b62ccf 100644 --- a/media_driver/linux/ult/libdrm_mock/include/i915_drm_mock.h +++ b/media_driver/linux/ult/libdrm_mock/include/i915_drm_mock.h @@ -429,7 +429,83 @@ typedef struct drm_i915_irq_wait { #define I915_PARAM_HAS_GPU_RESET 35 #define I915_PARAM_HAS_RESOURCE_STREAMER 36 #define I915_PARAM_HAS_EXEC_SOFTPIN 37 -#define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51 +#define I915_PARAM_HAS_POOLED_EU 38 +#define I915_PARAM_MIN_EU_IN_POOL 39 +#define I915_PARAM_MMAP_GTT_VERSION 40 + +/* + * Query whether DRM_I915_GEM_EXECBUFFER2 supports user defined execution + * priorities and the driver will attempt to execute batches in priority order. + * The param returns a capability bitmask, nonzero implies that the scheduler + * is enabled, with different features present according to the mask. + * + * The initial priority for each batch is supplied by the context and is + * controlled via I915_CONTEXT_PARAM_PRIORITY. + */ +#define I915_PARAM_HAS_SCHEDULER 41 +#define I915_SCHEDULER_CAP_ENABLED (1ul << 0) +#define I915_SCHEDULER_CAP_PRIORITY (1ul << 1) +#define I915_SCHEDULER_CAP_PREEMPTION (1ul << 2) + +#define I915_PARAM_HUC_STATUS 42 + +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to opt-out of + * synchronisation with implicit fencing on individual objects. + * See EXEC_OBJECT_ASYNC. + */ +#define I915_PARAM_HAS_EXEC_ASYNC 43 + +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports explicit fence support - + * both being able to pass in a sync_file fd to wait upon before executing, + * and being able to return a new sync_file fd that is signaled when the + * current request is complete. See I915_EXEC_FENCE_IN and I915_EXEC_FENCE_OUT. + */ +#define I915_PARAM_HAS_EXEC_FENCE 44 + +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports the ability to capture + * user specified bufffers for post-mortem debugging of GPU hangs. See + * EXEC_OBJECT_CAPTURE. + */ +#define I915_PARAM_HAS_EXEC_CAPTURE 45 + +#define I915_PARAM_SLICE_MASK 46 + +/* Assuming it's uniform for each slice, this queries the mask of subslices + * per-slice for this system. + */ +#define I915_PARAM_SUBSLICE_MASK 47 + +/* + * Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying the batch buffer + * as the first execobject as opposed to the last. See I915_EXEC_BATCH_FIRST. + */ +#define I915_PARAM_HAS_EXEC_BATCH_FIRST 48 + +/* Query whether DRM_I915_GEM_EXECBUFFER2 supports supplying an array of + * drm_i915_gem_exec_fence structures. See I915_EXEC_FENCE_ARRAY. + */ +#define I915_PARAM_HAS_EXEC_FENCE_ARRAY 49 + +/* + * Query whether every context (both per-file default and user created) is + * isolated (insofar as HW supports). If this parameter is not true, then + * freshly created contexts may inherit values from an existing context, + * rather than default HW values. If true, it also ensures (insofar as HW + * supports) that all state set by this context will not leak to any other + * context. + * + * As not every engine across every gen support contexts, the returned + * value reports the support of context isolation for individual engines by + * returning a bitmask of each engine class set to true if that class supports + * isolation. + */ +#define I915_PARAM_HAS_CONTEXT_ISOLATION 50 + +/* Frequency of the command streamer timestamps given by the *_TIMESTAMP + * registers. This used to be fixed per platform but from CNL onwards, this + * might vary depending on the parts. + */ +#define I915_PARAM_CS_TIMESTAMP_FREQUENCY 51 typedef struct drm_i915_getparam { __s32 param; @@ -1347,14 +1423,42 @@ struct drm_i915_gem_context_param { __u64 value; }; -union drm_i915_gem_context_param_sseu { - struct { - __u8 slice_mask; - __u8 subslice_mask; - __u8 min_eu_per_subslice; - __u8 max_eu_per_subslice; - } packed; - __u64 value; +struct drm_i915_gem_context_param_sseu { + /* + * Engine class & instance to be configured or queried. + */ + __u16 engine_class; + __u16 instance; + + /* + * Unused for now. Must be cleared to zero. + */ + __u32 rsvd1; + + /* + * Mask of slices to enable for the context. Valid values are a subset + * of the bitmask value returned for I915_PARAM_SLICE_MASK. + */ + __u64 slice_mask; + + /* + * Mask of subslices to enable for the context. Valid values are a + * subset of the bitmask value return by I915_PARAM_SUBSLICE_MASK. + */ + __u64 subslice_mask; + + /* + * Minimum/Maximum number of EUs to enable per subslice for the + * context. min_eus_per_subslice must be inferior or equal to + * max_eus_per_subslice. + */ + __u16 min_eus_per_subslice; + __u16 max_eus_per_subslice; + + /* + * Unused for now. Must be cleared to zero. + */ + __u32 rsvd2; }; typedef struct drm_i915_ring_load_info