aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2022-02-24 16:59:17 +1000
committerDave Airlie <airlied@redhat.com>2022-02-25 05:26:55 +1000
commit7f44571b53fd07e36ae4d2537a6fb40d79b39462 (patch)
tree604a8ffa1ee0940d05446ae86c9b05d9645537bd /drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
parent0a131b69c141638c1be85c4539c1513426abb2b2 (diff)
parent30424ebae8df0f786835e7a31ad790fa00764f35 (diff)
downloadlinux-7f44571b53fd07e36ae4d2537a6fb40d79b39462.tar.gz
linux-7f44571b53fd07e36ae4d2537a6fb40d79b39462.tar.bz2
linux-7f44571b53fd07e36ae4d2537a6fb40d79b39462.zip
Merge tag 'drm-intel-next-2022-02-23' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
Linux core: ----------- iosys-map: Add offset to iosys_map_memcpy_to() (Lucas) iosys-map: Add a few more helpers (Lucas) i915 (display and core changes on drm-intel-next): -------------------------------------------------- - Display's DBuf and watermark related fixes and improvements (Ville) - More i915 header and other code clean-up (Jani) - Display IPS fixes and improvements (Ville) - OPRegion fixes and cleanups (Jani) - Fix the plane end Y offset check for FBC (Ville) - DP 128b/132b updates (Jani) - Disable runtime pm wakeref tracking for the mock device selftest (Ville) - Many display code clean-up while targeting to fix up DP DFP 4:2:0 handling (Ville) - Bigjoiner state tracking and more bigjoiner related work (Ville) - Update DMC_DEBUG3 register for DG1 (Chuansheng) - SAGV fixes (Ville) - More GT register cleanup (Matt) - Fix build issue when using clang (Tong) - Display DG2 fixes (Matt) - ADL-P PHY related fixes (Imre) - PSR2 fixes (Jose) - Add PCH Support for Alder Lake N (Tejas) drm-intel-gt-next (drm-intel-gt-next-2022-02-17): ------------------------------------------------- UAPI Changes: - Weak parallel submission support for execlists Minimal implementation of the parallel submission support for execlists backend that was previously only implemented for GuC. Support one sibling non-virtual engine. Core Changes: - Two backmerges of drm/drm-next for header file renames/changes and i915_regs reorganization Driver Changes: - Add new DG2 subplatform: DG2-G12 (Matt R) - Add new DG2 workarounds (Matt R, Ram, Bruce) - Handle pre-programmed WOPCM registers for DG2+ (Daniele) - Update guc shim control programming on XeHP SDV+ (Daniele) - Add RPL-S C0/D0 stepping information (Anusha) - Improve GuC ADS initialization to work on ARM64 on dGFX (Lucas) - Fix KMD and GuC race on accessing PMU busyness (Umesh) - Use PM timestamp instead of RING TIMESTAMP for reference in PMU with GuC (Umesh) - Report error on invalid reset notification from GuC (John) - Avoid WARN splat by holding RPM wakelock during PXP unbind (Juston) - Fixes to parallel submission implementation (Matt B.) - Improve GuC loading status check/error reports (John) - Tweak TTM LRU priority hint selection (Matt A.) - Align the plane_vma to min_page_size of stolen mem (Ram) - Introduce vma resources and implement async unbinding (Thomas) - Use struct vma_resource instead of struct vma_snapshot (Thomas) - Return some TTM accel move errors instead of trying memcpy move (Thomas) - Fix a race between vma / object destruction and unbinding (Thomas) - Remove short-term pins from execbuf (Maarten) - Update to GuC version 69.0.3 (John, Michal Wa.) - Improvements to GT reset paths in GuC backend (Matt B.) - Use shrinker_release_pages instead of writeback in shmem object hooks (Matt A., Tvrtko) - Use trylock instead of blocking lock when freeing GEM objects (Maarten) - Allocate intel_engine_coredump_alloc with ALLOW_FAIL (Matt B.) - Fixes to object unmapping and purging (Matt A) - Check for wedged device in GuC backend (John) - Avoid lockdep splat by locking dpt_obj around set_cache_level (Maarten) - Allow dead vm to unbind vma's without lock (Maarten) - s/engine->i915/i915/ for DG2 engine workarounds (Matt R) - Use to_gt() helper for GGTT accesses (Michal Wi.) - Selftest improvements (Matt B., Thomas, Ram) - Coding style and compiler warning fixes (Matt B., Jasmine, Andi, Colin, Gustavo, Dan) Signed-off-by: Dave Airlie <airlied@redhat.com> From: Rodrigo Vivi <rodrigo.vivi@intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/YhbDan8wNZBR6FzF@intel.com
Diffstat (limited to 'drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c')
-rw-r--r--drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c240
1 files changed, 115 insertions, 125 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 54cae821513b..d42f437149c9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -25,13 +25,13 @@
#include "i915_cmd_parser.h"
#include "i915_drv.h"
+#include "i915_file_private.h"
#include "i915_gem_clflush.h"
#include "i915_gem_context.h"
#include "i915_gem_evict.h"
#include "i915_gem_ioctls.h"
#include "i915_trace.h"
#include "i915_user_extensions.h"
-#include "i915_vma_snapshot.h"
struct eb_vma {
struct i915_vma *vma;
@@ -443,7 +443,7 @@ eb_pin_vma(struct i915_execbuffer *eb,
else
pin_flags = entry->offset & PIN_OFFSET_MASK;
- pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED;
+ pin_flags |= PIN_USER | PIN_NOEVICT | PIN_OFFSET_FIXED | PIN_VALIDATE;
if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_GTT))
pin_flags |= PIN_GLOBAL;
@@ -461,17 +461,15 @@ eb_pin_vma(struct i915_execbuffer *eb,
entry->pad_to_size,
entry->alignment,
eb_pin_flags(entry, ev->flags) |
- PIN_USER | PIN_NOEVICT);
+ PIN_USER | PIN_NOEVICT | PIN_VALIDATE);
if (unlikely(err))
return err;
}
if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) {
err = i915_vma_pin_fence(vma);
- if (unlikely(err)) {
- i915_vma_unpin(vma);
+ if (unlikely(err))
return err;
- }
if (vma->fence)
ev->flags |= __EXEC_OBJECT_HAS_FENCE;
@@ -487,13 +485,9 @@ eb_pin_vma(struct i915_execbuffer *eb,
static inline void
eb_unreserve_vma(struct eb_vma *ev)
{
- if (!(ev->flags & __EXEC_OBJECT_HAS_PIN))
- return;
-
if (unlikely(ev->flags & __EXEC_OBJECT_HAS_FENCE))
__i915_vma_unpin_fence(ev->vma);
- __i915_vma_unpin(ev->vma);
ev->flags &= ~__EXEC_OBJECT_RESERVED;
}
@@ -675,10 +669,8 @@ static int eb_reserve_vma(struct i915_execbuffer *eb,
if (unlikely(ev->flags & EXEC_OBJECT_NEEDS_FENCE)) {
err = i915_vma_pin_fence(vma);
- if (unlikely(err)) {
- i915_vma_unpin(vma);
+ if (unlikely(err))
return err;
- }
if (vma->fence)
ev->flags |= __EXEC_OBJECT_HAS_FENCE;
@@ -690,85 +682,95 @@ static int eb_reserve_vma(struct i915_execbuffer *eb,
return 0;
}
-static int eb_reserve(struct i915_execbuffer *eb)
+static bool eb_unbind(struct i915_execbuffer *eb, bool force)
{
const unsigned int count = eb->buffer_count;
- unsigned int pin_flags = PIN_USER | PIN_NONBLOCK;
+ unsigned int i;
struct list_head last;
+ bool unpinned = false;
+
+ /* Resort *all* the objects into priority order */
+ INIT_LIST_HEAD(&eb->unbound);
+ INIT_LIST_HEAD(&last);
+
+ for (i = 0; i < count; i++) {
+ struct eb_vma *ev = &eb->vma[i];
+ unsigned int flags = ev->flags;
+
+ if (!force && flags & EXEC_OBJECT_PINNED &&
+ flags & __EXEC_OBJECT_HAS_PIN)
+ continue;
+
+ unpinned = true;
+ eb_unreserve_vma(ev);
+
+ if (flags & EXEC_OBJECT_PINNED)
+ /* Pinned must have their slot */
+ list_add(&ev->bind_link, &eb->unbound);
+ else if (flags & __EXEC_OBJECT_NEEDS_MAP)
+ /* Map require the lowest 256MiB (aperture) */
+ list_add_tail(&ev->bind_link, &eb->unbound);
+ else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
+ /* Prioritise 4GiB region for restricted bo */
+ list_add(&ev->bind_link, &last);
+ else
+ list_add_tail(&ev->bind_link, &last);
+ }
+
+ list_splice_tail(&last, &eb->unbound);
+ return unpinned;
+}
+
+static int eb_reserve(struct i915_execbuffer *eb)
+{
struct eb_vma *ev;
- unsigned int i, pass;
+ unsigned int pass;
int err = 0;
+ bool unpinned;
/*
* Attempt to pin all of the buffers into the GTT.
- * This is done in 3 phases:
+ * This is done in 2 phases:
*
- * 1a. Unbind all objects that do not match the GTT constraints for
- * the execbuffer (fenceable, mappable, alignment etc).
- * 1b. Increment pin count for already bound objects.
- * 2. Bind new objects.
- * 3. Decrement pin count.
+ * 1. Unbind all objects that do not match the GTT constraints for
+ * the execbuffer (fenceable, mappable, alignment etc).
+ * 2. Bind new objects.
*
* This avoid unnecessary unbinding of later objects in order to make
* room for the earlier objects *unless* we need to defragment.
+ *
+ * Defragmenting is skipped if all objects are pinned at a fixed location.
*/
- pass = 0;
- do {
- list_for_each_entry(ev, &eb->unbound, bind_link) {
- err = eb_reserve_vma(eb, ev, pin_flags);
- if (err)
- break;
- }
- if (err != -ENOSPC)
- return err;
+ for (pass = 0; pass <= 2; pass++) {
+ int pin_flags = PIN_USER | PIN_VALIDATE;
- /* Resort *all* the objects into priority order */
- INIT_LIST_HEAD(&eb->unbound);
- INIT_LIST_HEAD(&last);
- for (i = 0; i < count; i++) {
- unsigned int flags;
+ if (pass == 0)
+ pin_flags |= PIN_NONBLOCK;
- ev = &eb->vma[i];
- flags = ev->flags;
- if (flags & EXEC_OBJECT_PINNED &&
- flags & __EXEC_OBJECT_HAS_PIN)
- continue;
+ if (pass >= 1)
+ unpinned = eb_unbind(eb, pass == 2);
- eb_unreserve_vma(ev);
-
- if (flags & EXEC_OBJECT_PINNED)
- /* Pinned must have their slot */
- list_add(&ev->bind_link, &eb->unbound);
- else if (flags & __EXEC_OBJECT_NEEDS_MAP)
- /* Map require the lowest 256MiB (aperture) */
- list_add_tail(&ev->bind_link, &eb->unbound);
- else if (!(flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS))
- /* Prioritise 4GiB region for restricted bo */
- list_add(&ev->bind_link, &last);
- else
- list_add_tail(&ev->bind_link, &last);
- }
- list_splice_tail(&last, &eb->unbound);
-
- switch (pass++) {
- case 0:
- break;
-
- case 1:
- /* Too fragmented, unbind everything and retry */
- mutex_lock(&eb->context->vm->mutex);
- err = i915_gem_evict_vm(eb->context->vm);
- mutex_unlock(&eb->context->vm->mutex);
+ if (pass == 2) {
+ err = mutex_lock_interruptible(&eb->context->vm->mutex);
+ if (!err) {
+ err = i915_gem_evict_vm(eb->context->vm, &eb->ww);
+ mutex_unlock(&eb->context->vm->mutex);
+ }
if (err)
return err;
- break;
+ }
- default:
- return -ENOSPC;
+ list_for_each_entry(ev, &eb->unbound, bind_link) {
+ err = eb_reserve_vma(eb, ev, pin_flags);
+ if (err)
+ break;
}
- pin_flags = PIN_USER;
- } while (1);
+ if (err != -ENOSPC)
+ break;
+ }
+
+ return err;
}
static int eb_select_context(struct i915_execbuffer *eb)
@@ -1097,7 +1099,7 @@ static inline struct i915_ggtt *cache_to_ggtt(struct reloc_cache *cache)
{
struct drm_i915_private *i915 =
container_of(cache, struct i915_execbuffer, reloc_cache)->i915;
- return &i915->ggtt;
+ return to_gt(i915)->ggtt;
}
static void reloc_cache_unmap(struct reloc_cache *cache)
@@ -1216,10 +1218,11 @@ static void *reloc_kmap(struct drm_i915_gem_object *obj,
return vaddr;
}
-static void *reloc_iomap(struct drm_i915_gem_object *obj,
+static void *reloc_iomap(struct i915_vma *batch,
struct i915_execbuffer *eb,
unsigned long page)
{
+ struct drm_i915_gem_object *obj = batch->obj;
struct reloc_cache *cache = &eb->reloc_cache;
struct i915_ggtt *ggtt = cache_to_ggtt(cache);
unsigned long offset;
@@ -1229,7 +1232,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
intel_gt_flush_ggtt_writes(ggtt->vm.gt);
io_mapping_unmap_atomic((void __force __iomem *) unmask_page(cache->vaddr));
} else {
- struct i915_vma *vma;
+ struct i915_vma *vma = ERR_PTR(-ENODEV);
int err;
if (i915_gem_object_is_tiled(obj))
@@ -1242,10 +1245,23 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
if (err)
return ERR_PTR(err);
- vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0,
- PIN_MAPPABLE |
- PIN_NONBLOCK /* NOWARN */ |
- PIN_NOEVICT);
+ /*
+ * i915_gem_object_ggtt_pin_ww may attempt to remove the batch
+ * VMA from the object list because we no longer pin.
+ *
+ * Only attempt to pin the batch buffer to ggtt if the current batch
+ * is not inside ggtt, or the batch buffer is not misplaced.
+ */
+ if (!i915_is_ggtt(batch->vm)) {
+ vma = i915_gem_object_ggtt_pin_ww(obj, &eb->ww, NULL, 0, 0,
+ PIN_MAPPABLE |
+ PIN_NONBLOCK /* NOWARN */ |
+ PIN_NOEVICT);
+ } else if (i915_vma_is_map_and_fenceable(batch)) {
+ __i915_vma_pin(batch);
+ vma = batch;
+ }
+
if (vma == ERR_PTR(-EDEADLK))
return vma;
@@ -1283,7 +1299,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
return vaddr;
}
-static void *reloc_vaddr(struct drm_i915_gem_object *obj,
+static void *reloc_vaddr(struct i915_vma *vma,
struct i915_execbuffer *eb,
unsigned long page)
{
@@ -1295,9 +1311,9 @@ static void *reloc_vaddr(struct drm_i915_gem_object *obj,
} else {
vaddr = NULL;
if ((cache->vaddr & KMAP) == 0)
- vaddr = reloc_iomap(obj, eb, page);
+ vaddr = reloc_iomap(vma, eb, page);
if (!vaddr)
- vaddr = reloc_kmap(obj, cache, page);
+ vaddr = reloc_kmap(vma->obj, cache, page);
}
return vaddr;
@@ -1338,7 +1354,7 @@ relocate_entry(struct i915_vma *vma,
void *vaddr;
repeat:
- vaddr = reloc_vaddr(vma->obj, eb,
+ vaddr = reloc_vaddr(vma, eb,
offset >> PAGE_SHIFT);
if (IS_ERR(vaddr))
return PTR_ERR(vaddr);
@@ -1413,7 +1429,7 @@ eb_relocate_entry(struct i915_execbuffer *eb,
mutex_lock(&vma->vm->mutex);
err = i915_vma_bind(target->vma,
target->vma->obj->cache_level,
- PIN_GLOBAL, NULL);
+ PIN_GLOBAL, NULL, NULL);
mutex_unlock(&vma->vm->mutex);
reloc_cache_remap(&eb->reloc_cache, ev->vma->obj);
if (err)
@@ -1943,7 +1959,6 @@ static void eb_capture_stage(struct i915_execbuffer *eb)
{
const unsigned int count = eb->buffer_count;
unsigned int i = count, j;
- struct i915_vma_snapshot *vsnap;
while (i--) {
struct eb_vma *ev = &eb->vma[i];
@@ -1953,11 +1968,6 @@ static void eb_capture_stage(struct i915_execbuffer *eb)
if (!(flags & EXEC_OBJECT_CAPTURE))
continue;
- vsnap = i915_vma_snapshot_alloc(GFP_KERNEL);
- if (!vsnap)
- continue;
-
- i915_vma_snapshot_init(vsnap, vma, "user");
for_each_batch_create_order(eb, j) {
struct i915_capture_list *capture;
@@ -1966,10 +1976,9 @@ static void eb_capture_stage(struct i915_execbuffer *eb)
continue;
capture->next = eb->capture_lists[j];
- capture->vma_snapshot = i915_vma_snapshot_get(vsnap);
+ capture->vma_res = i915_vma_resource_get(vma->resource);
eb->capture_lists[j] = capture;
}
- i915_vma_snapshot_put(vsnap);
}
}
@@ -2200,7 +2209,7 @@ shadow_batch_pin(struct i915_execbuffer *eb,
if (IS_ERR(vma))
return vma;
- err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, flags);
+ err = i915_vma_pin_ww(vma, &eb->ww, 0, 0, flags | PIN_VALIDATE);
if (err)
return ERR_PTR(err);
@@ -2214,7 +2223,7 @@ static struct i915_vma *eb_dispatch_secure(struct i915_execbuffer *eb, struct i9
* batch" bit. Hence we need to pin secure batches into the global gtt.
* hsw should have this fixed, but bdw mucks it up again. */
if (eb->batch_flags & I915_DISPATCH_SECURE)
- return i915_gem_object_ggtt_pin_ww(vma->obj, &eb->ww, NULL, 0, 0, 0);
+ return i915_gem_object_ggtt_pin_ww(vma->obj, &eb->ww, NULL, 0, 0, PIN_VALIDATE);
return NULL;
}
@@ -2265,13 +2274,12 @@ static int eb_parse(struct i915_execbuffer *eb)
err = i915_gem_object_lock(pool->obj, &eb->ww);
if (err)
- goto err;
+ return err;
shadow = shadow_batch_pin(eb, pool->obj, eb->context->vm, PIN_USER);
- if (IS_ERR(shadow)) {
- err = PTR_ERR(shadow);
- goto err;
- }
+ if (IS_ERR(shadow))
+ return PTR_ERR(shadow);
+
intel_gt_buffer_pool_mark_used(pool);
i915_gem_object_set_readonly(shadow->obj);
shadow->private = pool;
@@ -2283,25 +2291,21 @@ static int eb_parse(struct i915_execbuffer *eb)
shadow = shadow_batch_pin(eb, pool->obj,
&eb->gt->ggtt->vm,
PIN_GLOBAL);
- if (IS_ERR(shadow)) {
- err = PTR_ERR(shadow);
- shadow = trampoline;
- goto err_shadow;
- }
+ if (IS_ERR(shadow))
+ return PTR_ERR(shadow);
+
shadow->private = pool;
eb->batch_flags |= I915_DISPATCH_SECURE;
}
batch = eb_dispatch_secure(eb, shadow);
- if (IS_ERR(batch)) {
- err = PTR_ERR(batch);
- goto err_trampoline;
- }
+ if (IS_ERR(batch))
+ return PTR_ERR(batch);
err = dma_resv_reserve_shared(shadow->obj->base.resv, 1);
if (err)
- goto err_trampoline;
+ return err;
err = intel_engine_cmd_parser(eb->context->engine,
eb->batches[0]->vma,
@@ -2309,7 +2313,7 @@ static int eb_parse(struct i915_execbuffer *eb)
eb->batch_len[0],
shadow, trampoline);
if (err)
- goto err_unpin_batch;
+ return err;
eb->batches[0] = &eb->vma[eb->buffer_count++];
eb->batches[0]->vma = i915_vma_get(shadow);
@@ -2328,17 +2332,6 @@ secure_batch:
eb->batches[0]->vma = i915_vma_get(batch);
}
return 0;
-
-err_unpin_batch:
- if (batch)
- i915_vma_unpin(batch);
-err_trampoline:
- if (trampoline)
- i915_vma_unpin(trampoline);
-err_shadow:
- i915_vma_unpin(shadow);
-err:
- return err;
}
static int eb_request_submit(struct i915_execbuffer *eb,
@@ -3277,9 +3270,8 @@ eb_requests_create(struct i915_execbuffer *eb, struct dma_fence *in_fence,
* _onstack interface.
*/
if (eb->batches[i]->vma)
- i915_vma_snapshot_init_onstack(&eb->requests[i]->batch_snapshot,
- eb->batches[i]->vma,
- "batch");
+ eb->requests[i]->batch_res =
+ i915_vma_resource_get(eb->batches[i]->vma->resource);
if (eb->batch_pool) {
GEM_BUG_ON(intel_context_is_parallel(eb->context));
intel_gt_buffer_pool_mark_active(eb->batch_pool,
@@ -3464,8 +3456,6 @@ err_request:
err_vma:
eb_release_vmas(&eb, true);
- if (eb.trampoline)
- i915_vma_unpin(eb.trampoline);
WARN_ON(err == -EDEADLK);
i915_gem_ww_ctx_fini(&eb.ww);