diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index c120d87df5941..596810e0cfe81 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -943,30 +943,30 @@ static int i915_gem_fence_regs_info(struct seq_file *m, void *data) static ssize_t gpu_state_read(struct file *file, char __user *ubuf, size_t count, loff_t *pos) { - struct i915_gpu_state *error = file->private_data; - struct drm_i915_error_state_buf str; + struct i915_gpu_state *error; ssize_t ret; - loff_t tmp; + void *buf; + error = file->private_data; if (!error) return 0; - ret = i915_error_state_buf_init(&str, error->i915, count, *pos); - if (ret) - return ret; + /* Bounce buffer required because of kernfs __user API convenience. */ + buf = kmalloc(count, GFP_KERNEL); + if (!buf) + return -ENOMEM; - ret = i915_error_state_to_str(&str, error); - if (ret) + ret = i915_gpu_state_copy_to_buffer(error, buf, *pos, count); + if (ret <= 0) goto out; - tmp = 0; - ret = simple_read_from_buffer(ubuf, count, &tmp, str.buf, str.bytes); - if (ret < 0) - goto out; + if (!copy_to_user(ubuf, buf, ret)) + *pos += ret; + else + ret = -EFAULT; - *pos = str.start + ret; out: - i915_error_state_buf_release(&str); + kfree(buf); return ret; } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c index 8123bf0e4807d..a6885a59568b0 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.c +++ b/drivers/gpu/drm/i915/i915_gpu_error.c @@ -27,11 +27,14 @@ * */ -#include +#include +#include +#include #include +#include #include + #include -#include #include "i915_gpu_error.h" #include "i915_drv.h" @@ -77,112 +80,110 @@ static const char *purgeable_flag(int purgeable) return purgeable ? " purgeable" : ""; } -static bool __i915_error_ok(struct drm_i915_error_state_buf *e) +static void __sg_set_buf(struct scatterlist *sg, + void *addr, unsigned int len, loff_t it) { - - if (!e->err && WARN(e->bytes > (e->size - 1), "overflow")) { - e->err = -ENOSPC; - return false; - } - - if (e->bytes == e->size - 1 || e->err) - return false; - - return true; + sg->page_link = (unsigned long)virt_to_page(addr); + sg->offset = offset_in_page(addr); + sg->length = len; + sg->dma_address = it; } -static bool __i915_error_seek(struct drm_i915_error_state_buf *e, - unsigned len) +static bool __i915_error_grow(struct drm_i915_error_state_buf *e, size_t len) { - if (e->pos + len <= e->start) { - e->pos += len; + if (!len) return false; - } - /* First vsnprintf needs to fit in its entirety for memmove */ - if (len >= e->size) { - e->err = -EIO; - return false; - } + if (e->bytes + len + 1 <= e->size) + return true; - return true; -} + if (e->bytes) { + __sg_set_buf(e->cur++, e->buf, e->bytes, e->iter); + e->iter += e->bytes; + e->buf = NULL; + e->bytes = 0; + } -static void __i915_error_advance(struct drm_i915_error_state_buf *e, - unsigned len) -{ - /* If this is first printf in this window, adjust it so that - * start position matches start of the buffer - */ + if (e->cur == e->end) { + struct scatterlist *sgl; - if (e->pos < e->start) { - const size_t off = e->start - e->pos; + sgl = (typeof(sgl))__get_free_page(GFP_KERNEL); + if (!sgl) { + e->err = -ENOMEM; + return false; + } - /* Should not happen but be paranoid */ - if (off > len || e->bytes) { - e->err = -EIO; - return; + if (e->cur) { + e->cur->offset = 0; + e->cur->length = 0; + e->cur->page_link = + (unsigned long)sgl | SG_CHAIN; + } else { + e->sgl = sgl; } - memmove(e->buf, e->buf + off, len - off); - e->bytes = len - off; - e->pos = e->start; - return; + e->cur = sgl; + e->end = sgl + SG_MAX_SINGLE_ALLOC - 1; } - e->bytes += len; - e->pos += len; + e->size = ALIGN(len + 1, SZ_64K); + e->buf = kmalloc(e->size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); + if (!e->buf) { + e->size = PAGE_ALIGN(len + 1); + e->buf = kmalloc(e->size, GFP_KERNEL); + } + if (!e->buf) { + e->err = -ENOMEM; + return false; + } + + return true; } __printf(2, 0) static void i915_error_vprintf(struct drm_i915_error_state_buf *e, - const char *f, va_list args) + const char *fmt, va_list args) { - unsigned len; + va_list ap; + int len; - if (!__i915_error_ok(e)) + if (e->err) return; - /* Seek the first printf which is hits start position */ - if (e->pos < e->start) { - va_list tmp; - - va_copy(tmp, args); - len = vsnprintf(NULL, 0, f, tmp); - va_end(tmp); - - if (!__i915_error_seek(e, len)) - return; + va_copy(ap, args); + len = vsnprintf(NULL, 0, fmt, ap); + va_end(ap); + if (len <= 0) { + e->err = len; + return; } - len = vsnprintf(e->buf + e->bytes, e->size - e->bytes, f, args); - if (len >= e->size - e->bytes) - len = e->size - e->bytes - 1; + if (!__i915_error_grow(e, len)) + return; - __i915_error_advance(e, len); + GEM_BUG_ON(e->bytes >= e->size); + len = vscnprintf(e->buf + e->bytes, e->size - e->bytes, fmt, args); + if (len < 0) { + e->err = len; + return; + } + e->bytes += len; } -static void i915_error_puts(struct drm_i915_error_state_buf *e, - const char *str) +static void i915_error_puts(struct drm_i915_error_state_buf *e, const char *str) { unsigned len; - if (!__i915_error_ok(e)) + if (e->err || !str) return; len = strlen(str); + if (!__i915_error_grow(e, len)) + return; - /* Seek the first printf which is hits start position */ - if (e->pos < e->start) { - if (!__i915_error_seek(e, len)) - return; - } - - if (len >= e->size - e->bytes) - len = e->size - e->bytes - 1; + GEM_BUG_ON(e->bytes + len > e->size); memcpy(e->buf + e->bytes, str, len); - - __i915_error_advance(e, len); + e->bytes += len; } #define err_printf(e, ...) i915_error_printf(e, __VA_ARGS__) @@ -268,6 +269,8 @@ static int compress_page(struct compress *c, if (zlib_deflate(zstream, Z_NO_FLUSH) != Z_OK) return -EIO; + + touch_nmi_watchdog(); } while (zstream->avail_in); /* Fallback to uncompressed if we increase size? */ @@ -635,21 +638,29 @@ static void err_print_uc(struct drm_i915_error_state_buf *m, print_error_obj(m, NULL, "GuC log buffer", error_uc->guc_log); } -int i915_error_state_to_str(struct drm_i915_error_state_buf *m, - const struct i915_gpu_state *error) +static void err_free_sgl(struct scatterlist *sgl) { - struct drm_i915_private *dev_priv = m->i915; - struct drm_i915_error_object *obj; - struct timespec64 ts; - int i, j; + while (sgl) { + struct scatterlist *sg; - if (!error) { - err_printf(m, "No error state collected\n"); - return 0; + for (sg = sgl; !sg_is_chain(sg); sg++) { + kfree(sg_virt(sg)); + if (sg_is_last(sg)) + break; + } + + sg = sg_is_last(sg) ? NULL : sg_chain_ptr(sg); + free_page((unsigned long)sgl); + sgl = sg; } +} - if (IS_ERR(error)) - return PTR_ERR(error); +static void __err_print_to_sgl(struct drm_i915_error_state_buf *m, + struct i915_gpu_state *error) +{ + struct drm_i915_error_object *obj; + struct timespec64 ts; + int i, j; if (*error->error_msg) err_printf(m, "%s\n", error->error_msg); @@ -683,12 +694,12 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, err_printf(m, "Reset count: %u\n", error->reset_count); err_printf(m, "Suspend count: %u\n", error->suspend_count); err_printf(m, "Platform: %s\n", intel_platform_name(error->device_info.platform)); - err_print_pciid(m, error->i915); + err_print_pciid(m, m->i915); err_printf(m, "IOMMU enabled?: %d\n", error->iommu); - if (HAS_CSR(dev_priv)) { - struct intel_csr *csr = &dev_priv->csr; + if (HAS_CSR(m->i915)) { + struct intel_csr *csr = &m->i915->csr; err_printf(m, "DMC loaded: %s\n", yesno(csr->dmc_payload != NULL)); @@ -708,22 +719,23 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake); err_printf(m, "DERRMR: 0x%08x\n", error->derrmr); err_printf(m, "CCID: 0x%08x\n", error->ccid); - err_printf(m, "Missed interrupts: 0x%08lx\n", dev_priv->gpu_error.missed_irq_rings); + err_printf(m, "Missed interrupts: 0x%08lx\n", + m->i915->gpu_error.missed_irq_rings); for (i = 0; i < error->nfence; i++) err_printf(m, " fence[%d] = %08llx\n", i, error->fence[i]); - if (INTEL_GEN(dev_priv) >= 6) { + if (INTEL_GEN(m->i915) >= 6) { err_printf(m, "ERROR: 0x%08x\n", error->error); - if (INTEL_GEN(dev_priv) >= 8) + if (INTEL_GEN(m->i915) >= 8) err_printf(m, "FAULT_TLB_DATA: 0x%08x 0x%08x\n", error->fault_data1, error->fault_data0); err_printf(m, "DONE_REG: 0x%08x\n", error->done_reg); } - if (IS_GEN7(dev_priv)) + if (IS_GEN7(m->i915)) err_printf(m, "ERR_INT: 0x%08x\n", error->err_int); for (i = 0; i < ARRAY_SIZE(error->engine); i++) { @@ -745,7 +757,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, len += scnprintf(buf + len, sizeof(buf), "%s%s", first ? "" : ", ", - dev_priv->engine[j]->name); + m->i915->engine[j]->name); first = 0; } scnprintf(buf + len, sizeof(buf), ")"); @@ -763,7 +775,7 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, obj = ee->batchbuffer; if (obj) { - err_puts(m, dev_priv->engine[i]->name); + err_puts(m, m->i915->engine[i]->name); if (ee->context.pid) err_printf(m, " (submitted by %s [%d], ctx %d [%d], score %d%s)", ee->context.comm, @@ -775,16 +787,16 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, err_printf(m, " --- gtt_offset = 0x%08x %08x\n", upper_32_bits(obj->gtt_offset), lower_32_bits(obj->gtt_offset)); - print_error_obj(m, dev_priv->engine[i], NULL, obj); + print_error_obj(m, m->i915->engine[i], NULL, obj); } for (j = 0; j < ee->user_bo_count; j++) - print_error_obj(m, dev_priv->engine[i], + print_error_obj(m, m->i915->engine[i], "user", ee->user_bo[j]); if (ee->num_requests) { err_printf(m, "%s --- %d requests\n", - dev_priv->engine[i]->name, + m->i915->engine[i]->name, ee->num_requests); for (j = 0; j < ee->num_requests; j++) error_print_request(m, " ", @@ -794,10 +806,10 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, if (IS_ERR(ee->waiters)) { err_printf(m, "%s --- ? waiters [unable to acquire spinlock]\n", - dev_priv->engine[i]->name); + m->i915->engine[i]->name); } else if (ee->num_waiters) { err_printf(m, "%s --- %d waiters\n", - dev_priv->engine[i]->name, + m->i915->engine[i]->name, ee->num_waiters); for (j = 0; j < ee->num_waiters; j++) { err_printf(m, " seqno 0x%08x for %s [%d]\n", @@ -807,22 +819,22 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, } } - print_error_obj(m, dev_priv->engine[i], + print_error_obj(m, m->i915->engine[i], "ringbuffer", ee->ringbuffer); - print_error_obj(m, dev_priv->engine[i], + print_error_obj(m, m->i915->engine[i], "HW Status", ee->hws_page); - print_error_obj(m, dev_priv->engine[i], + print_error_obj(m, m->i915->engine[i], "HW context", ee->ctx); - print_error_obj(m, dev_priv->engine[i], + print_error_obj(m, m->i915->engine[i], "WA context", ee->wa_ctx); - print_error_obj(m, dev_priv->engine[i], + print_error_obj(m, m->i915->engine[i], "WA batchbuffer", ee->wa_batchbuffer); - print_error_obj(m, dev_priv->engine[i], + print_error_obj(m, m->i915->engine[i], "NULL context", ee->default_state); } @@ -835,43 +847,107 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m, err_print_capabilities(m, &error->device_info, &error->driver_caps); err_print_params(m, &error->params); err_print_uc(m, &error->uc); +} + +static int err_print_to_sgl(struct i915_gpu_state *error) +{ + struct drm_i915_error_state_buf m; + + if (IS_ERR(error)) + return PTR_ERR(error); + + if (READ_ONCE(error->sgl)) + return 0; + + memset(&m, 0, sizeof(m)); + m.i915 = error->i915; + + __err_print_to_sgl(&m, error); + + if (m.buf) { + __sg_set_buf(m.cur++, m.buf, m.bytes, m.iter); + m.bytes = 0; + m.buf = NULL; + } + if (m.cur) { + GEM_BUG_ON(m.end < m.cur); + sg_mark_end(m.cur - 1); + } + GEM_BUG_ON(m.sgl && !m.cur); + + if (m.err) { + err_free_sgl(m.sgl); + return m.err; + } - if (m->bytes == 0 && m->err) - return m->err; + if (cmpxchg(&error->sgl, NULL, m.sgl)) + err_free_sgl(m.sgl); return 0; } -int i915_error_state_buf_init(struct drm_i915_error_state_buf *ebuf, - struct drm_i915_private *i915, - size_t count, loff_t pos) +ssize_t i915_gpu_state_copy_to_buffer(struct i915_gpu_state *error, + char *buf, loff_t off, size_t rem) { - memset(ebuf, 0, sizeof(*ebuf)); - ebuf->i915 = i915; + struct scatterlist *sg; + size_t count; + loff_t pos; + int err; - /* We need to have enough room to store any i915_error_state printf - * so that we can move it to start position. - */ - ebuf->size = count + 1 > PAGE_SIZE ? count + 1 : PAGE_SIZE; - ebuf->buf = kmalloc(ebuf->size, - GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); + if (!error || !rem) + return 0; - if (ebuf->buf == NULL) { - ebuf->size = PAGE_SIZE; - ebuf->buf = kmalloc(ebuf->size, GFP_KERNEL); - } + err = err_print_to_sgl(error); + if (err) + return err; - if (ebuf->buf == NULL) { - ebuf->size = 128; - ebuf->buf = kmalloc(ebuf->size, GFP_KERNEL); - } + sg = READ_ONCE(error->fit); + if (!sg || off < sg->dma_address) + sg = error->sgl; + if (!sg) + return 0; - if (ebuf->buf == NULL) - return -ENOMEM; + pos = sg->dma_address; + count = 0; + do { + size_t len, start; + + if (sg_is_chain(sg)) { + sg = sg_chain_ptr(sg); + GEM_BUG_ON(sg_is_chain(sg)); + } + + len = sg->length; + if (pos + len <= off) { + pos += len; + continue; + } - ebuf->start = pos; + start = sg->offset; + if (pos < off) { + GEM_BUG_ON(off - pos > len); + len -= off - pos; + start += off - pos; + pos = off; + } - return 0; + len = min(len, rem); + GEM_BUG_ON(!len || len > sg->length); + + memcpy(buf, page_address(sg_page(sg)) + start, len); + + count += len; + pos += len; + + buf += len; + rem -= len; + if (!rem) { + WRITE_ONCE(error->fit, sg); + break; + } + } while (!sg_is_last(sg++)); + + return count; } static void i915_error_object_free(struct drm_i915_error_object *obj) @@ -944,6 +1020,7 @@ void __i915_gpu_state_free(struct kref *error_ref) cleanup_params(error); cleanup_uc_state(error); + err_free_sgl(error->sgl); kfree(error); } diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h index 3ec89a504de52..ff2652bbb0b08 100644 --- a/drivers/gpu/drm/i915/i915_gpu_error.h +++ b/drivers/gpu/drm/i915/i915_gpu_error.h @@ -192,6 +192,8 @@ struct i915_gpu_state { } *active_bo[I915_NUM_ENGINES], *pinned_bo; u32 active_bo_count[I915_NUM_ENGINES], pinned_bo_count; struct i915_address_space *active_vm[I915_NUM_ENGINES]; + + struct scatterlist *sgl, *fit; }; struct i915_gpu_error { @@ -298,29 +300,20 @@ struct i915_gpu_error { struct drm_i915_error_state_buf { struct drm_i915_private *i915; - unsigned int bytes; - unsigned int size; + struct scatterlist *sgl, *cur, *end; + + char *buf; + size_t bytes; + size_t size; + loff_t iter; + int err; - u8 *buf; - loff_t start; - loff_t pos; }; #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) __printf(2, 3) void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...); -int i915_error_state_to_str(struct drm_i915_error_state_buf *estr, - const struct i915_gpu_state *gpu); -int i915_error_state_buf_init(struct drm_i915_error_state_buf *eb, - struct drm_i915_private *i915, - size_t count, loff_t pos); - -static inline void -i915_error_state_buf_release(struct drm_i915_error_state_buf *eb) -{ - kfree(eb->buf); -} struct i915_gpu_state *i915_capture_gpu_state(struct drm_i915_private *i915); void i915_capture_error_state(struct drm_i915_private *dev_priv, @@ -334,6 +327,9 @@ i915_gpu_state_get(struct i915_gpu_state *gpu) return gpu; } +ssize_t i915_gpu_state_copy_to_buffer(struct i915_gpu_state *error, + char *buf, loff_t offset, size_t count); + void __i915_gpu_state_free(struct kref *kref); static inline void i915_gpu_state_put(struct i915_gpu_state *gpu) { diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c index e5e6f6bb2b05a..ae63a7d0f51df 100644 --- a/drivers/gpu/drm/i915/i915_sysfs.c +++ b/drivers/gpu/drm/i915/i915_sysfs.c @@ -516,26 +516,21 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj, { struct device *kdev = kobj_to_dev(kobj); - struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev); - struct drm_i915_error_state_buf error_str; + struct drm_i915_private *i915 = kdev_minor_to_i915(kdev); struct i915_gpu_state *gpu; ssize_t ret; - ret = i915_error_state_buf_init(&error_str, dev_priv, count, off); - if (ret) - return ret; - - gpu = i915_first_error_state(dev_priv); - ret = i915_error_state_to_str(&error_str, gpu); - if (ret) - goto out; - - ret = count < error_str.bytes ? count : error_str.bytes; - memcpy(buf, error_str.buf, ret); + gpu = i915_first_error_state(i915); + if (gpu) { + ret = i915_gpu_state_copy_to_buffer(gpu, buf, off, count); + i915_gpu_state_put(gpu); + } else { + const char *str = "No error state collected\n"; + size_t len = strlen(str); -out: - i915_gpu_state_put(gpu); - i915_error_state_buf_release(&error_str); + ret = min_t(size_t, count, len - off); + memcpy(buf, str + off, ret); + } return ret; }