diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c index eacaa20d4dced6..61858ab80a2c9e 100644 --- a/drivers/gpu/drm/i915/i915_gem_context.c +++ b/drivers/gpu/drm/i915/i915_gem_context.c @@ -783,6 +783,91 @@ int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data, return 0; } +static int +i915_gem_context_reconfigure_sseu(struct i915_gem_context *ctx, + struct intel_engine_cs *engine, + struct intel_sseu sseu) +{ + struct drm_i915_private *i915 = ctx->i915; + struct i915_request *rq; + struct intel_ring *ring; + int ret; + + lockdep_assert_held(&i915->drm.struct_mutex); + + /* Submitting requests etc needs the hw awake. */ + intel_runtime_pm_get(i915); + + i915_retire_requests(i915); + + /* Now use the RCS to actually reconfigure. */ + engine = i915->engine[RCS]; + + rq = i915_request_alloc(engine, i915->kernel_context); + if (IS_ERR(rq)) { + ret = PTR_ERR(rq); + goto out_put; + } + + ret = engine->emit_rpcs_config(rq, ctx, sseu); + if (ret) + goto out_add; + + /* Queue this switch after all other activity */ + list_for_each_entry(ring, &i915->gt.active_rings, active_link) { + struct i915_request *prev; + + prev = last_request_on_engine(ring->timeline, engine); + if (prev) + i915_sw_fence_await_sw_fence_gfp(&rq->submit, + &prev->submit, + I915_FENCE_GFP); + } + + i915_gem_set_global_barrier(i915, rq); + +out_add: + i915_request_add(rq); +out_put: + intel_runtime_pm_put(i915); + + return ret; +} + +static int get_sseu(struct i915_gem_context *ctx, + struct drm_i915_gem_context_param *args) +{ + struct drm_i915_gem_context_param_sseu user_sseu; + struct intel_engine_cs *engine; + struct intel_context *ce; + + if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value), + sizeof(user_sseu))) + return -EFAULT; + + if (user_sseu.rsvd1 || user_sseu.rsvd2) + return -EINVAL; + + engine = intel_engine_lookup_user(ctx->i915, + user_sseu.class, + user_sseu.instance); + if (!engine) + return -EINVAL; + + ce = to_intel_context(ctx, engine); + + user_sseu.slice_mask = ce->sseu.slice_mask; + user_sseu.subslice_mask = ce->sseu.subslice_mask; + user_sseu.min_eus_per_subslice = ce->sseu.min_eus_per_subslice; + user_sseu.max_eus_per_subslice = ce->sseu.max_eus_per_subslice; + + if (copy_to_user(u64_to_user_ptr(args->value), &user_sseu, + sizeof(user_sseu))) + return -EFAULT; + + return 0; +} + int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { @@ -820,6 +905,9 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, case I915_CONTEXT_PARAM_PRIORITY: args->value = ctx->sched.priority; break; + case I915_CONTEXT_PARAM_SSEU: + ret = get_sseu(ctx, args); + break; default: ret = -EINVAL; break; @@ -829,6 +917,101 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data, return ret; } +static int +__user_to_context_sseu(const struct sseu_dev_info *device, + const struct drm_i915_gem_context_param_sseu *user, + struct intel_sseu *context) +{ + /* No zeros in any field. */ + if (!user->slice_mask || !user->subslice_mask || + !user->min_eus_per_subslice || !user->max_eus_per_subslice) + return -EINVAL; + + /* Max > min. */ + if (user->max_eus_per_subslice < user->min_eus_per_subslice) + return -EINVAL; + + /* Check validity against hardware. */ + if (user->slice_mask & ~device->slice_mask) + return -EINVAL; + + if (user->subslice_mask & ~device->subslice_mask[0]) + return -EINVAL; + + if (user->max_eus_per_subslice > device->max_eus_per_subslice) + return -EINVAL; + + context->slice_mask = user->slice_mask; + context->subslice_mask = user->subslice_mask; + context->min_eus_per_subslice = user->min_eus_per_subslice; + context->max_eus_per_subslice = user->max_eus_per_subslice; + + return 0; +} + +static int set_sseu(struct i915_gem_context *ctx, + struct drm_i915_gem_context_param *args) +{ + struct drm_i915_private *i915 = ctx->i915; + struct drm_i915_gem_context_param_sseu user_sseu; + struct intel_engine_cs *engine; + struct intel_sseu ctx_sseu; + struct intel_context *ce; + enum intel_engine_id id; + int ret; + + if (args->size) + return -EINVAL; + + if (!USES_FULL_PPGTT(i915)) + return -ENODEV; + + if (!IS_GEN11(i915) && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (copy_from_user(&user_sseu, u64_to_user_ptr(args->value), + sizeof(user_sseu))) + return -EFAULT; + + if (user_sseu.rsvd1 || user_sseu.rsvd2) + return -EINVAL; + + engine = intel_engine_lookup_user(i915, + user_sseu.class, + user_sseu.instance); + if (!engine) + return -EINVAL; + + if (!engine->emit_rpcs_config) + return -ENODEV; + + ret = __user_to_context_sseu(&INTEL_INFO(i915)->sseu, &user_sseu, + &ctx_sseu); + if (ret) + return ret; + + ce = to_intel_context(ctx, engine); + + /* Nothing to do if unmodified. */ + if (!memcmp(&ce->sseu, &ctx_sseu, sizeof(ctx_sseu))) + return 0; + + ret = i915_gem_context_reconfigure_sseu(ctx, engine, ctx_sseu); + if (ret) + return ret; + + /* + * Copy the configuration to all engines. Our hardware doesn't + * currently support different configurations for each engine. + */ + for_each_engine(engine, i915, id) { + ce = to_intel_context(ctx, engine); + ce->sseu = ctx_sseu; + } + + return 0; +} + int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, struct drm_file *file) { @@ -894,7 +1077,9 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data, ctx->sched.priority = priority; } break; - + case I915_CONTEXT_PARAM_SSEU: + ret = set_sseu(ctx, args); + break; default: ret = -EINVAL; break; diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index da95624b9672c8..60a0244318d44c 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -2239,6 +2239,60 @@ static void gen8_emit_breadcrumb_rcs(struct i915_request *request, u32 *cs) } static const int gen8_emit_breadcrumb_rcs_sz = 8 + WA_TAIL_DWORDS; +static int gen8_emit_rpcs_config(struct i915_request *rq, + struct i915_gem_context *ctx, + struct intel_sseu sseu) +{ + struct drm_i915_private *i915 = rq->i915; + struct intel_context *ce = to_intel_context(ctx, i915->engine[RCS]); + struct i915_vma *vma; + u64 offset; + u32 *cs; + int err; + + /* Let the deferred state allocation take care of this. */ + if (!ce->state) + return 0; + + vma = i915_vma_instance(ce->state->obj, + &i915->kernel_context->ppgtt->vm, + NULL); + if (IS_ERR(vma)) + return PTR_ERR(vma); + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) { + i915_vma_close(vma); + return err; + } + + err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE); + if (unlikely(err)) { + i915_vma_close(vma); + return err; + } + + i915_vma_unpin(vma); + + cs = intel_ring_begin(rq, 4); + if (IS_ERR(cs)) + return PTR_ERR(cs); + + offset = vma->node.start + + LRC_STATE_PN * PAGE_SIZE + + (CTX_R_PWR_CLK_STATE + 1) * 4; + + *cs++ = MI_STORE_DWORD_IMM_GEN4; + *cs++ = lower_32_bits(offset); + *cs++ = upper_32_bits(offset); + *cs++ = gen8_make_rpcs(&INTEL_INFO(i915)->sseu, + intel_engine_prepare_sseu(rq->engine, sseu)); + + intel_ring_advance(rq, cs); + + return 0; +} + static int gen8_init_rcs_context(struct i915_request *rq) { int ret; @@ -2331,6 +2385,7 @@ logical_ring_default_vfuncs(struct intel_engine_cs *engine) engine->emit_flush = gen8_emit_flush; engine->emit_breadcrumb = gen8_emit_breadcrumb; engine->emit_breadcrumb_sz = gen8_emit_breadcrumb_sz; + engine->emit_rpcs_config = gen8_emit_rpcs_config; engine->set_default_submission = intel_execlists_set_default_submission; diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h index f5ffa6d31e82c3..3b434a2d2b4677 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.h +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h @@ -478,6 +478,10 @@ struct intel_engine_cs { void (*emit_breadcrumb)(struct i915_request *rq, u32 *cs); int emit_breadcrumb_sz; + int (*emit_rpcs_config)(struct i915_request *rq, + struct i915_gem_context *ctx, + struct intel_sseu sseu); + /* Pass the request to the hardware queue (e.g. directly into * the legacy ringbuffer or to the end of an execlist). * diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h index 7f5634ce8e885d..21df158056a26f 100644 --- a/include/uapi/drm/i915_drm.h +++ b/include/uapi/drm/i915_drm.h @@ -1456,9 +1456,52 @@ struct drm_i915_gem_context_param { #define I915_CONTEXT_MAX_USER_PRIORITY 1023 /* inclusive */ #define I915_CONTEXT_DEFAULT_PRIORITY 0 #define I915_CONTEXT_MIN_USER_PRIORITY -1023 /* inclusive */ + /* + * When using the following param, value should be a pointer to + * drm_i915_gem_context_param_sseu. + */ +#define I915_CONTEXT_PARAM_SSEU 0x7 __u64 value; }; +struct drm_i915_gem_context_param_sseu { + /* + * Engine class & instance to be configured or queried. + */ + __u16 class; + __u16 instance; + + /* + * Unused for now. Must be cleared to zero. + */ + __u32 rsvd1; + + /* + * Mask of slices to enable for the context. Valid values are a subset + * of the bitmask value returned for I915_PARAM_SLICE_MASK. + */ + __u64 slice_mask; + + /* + * Mask of subslices to enable for the context. Valid values are a + * subset of the bitmask value return by I915_PARAM_SUBSLICE_MASK. + */ + __u64 subslice_mask; + + /* + * Minimum/Maximum number of EUs to enable per subslice for the + * context. min_eus_per_subslice must be inferior or equal to + * max_eus_per_subslice. + */ + __u16 min_eus_per_subslice; + __u16 max_eus_per_subslice; + + /* + * Unused for now. Must be cleared to zero. + */ + __u32 rsvd2; +}; + enum drm_i915_oa_format { I915_OA_FORMAT_A13 = 1, /* HSW only */ I915_OA_FORMAT_A29, /* HSW only */