From 67a3acaab7167157fb827595019eaf55df244824 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 22 Nov 2019 09:49:24 +0000 Subject: drm/i915: Use a ctor for TYPESAFE_BY_RCU i915_request As we start peeking into requests for longer and longer, e.g. incorporating use of spinlocks when only protected by an rcu_read_lock(), we need to be careful in how we reset the request when recycling and need to preserve any barriers that may still be in use as the request is reset for reuse. Quoting Linus Torvalds: > If there is refcounting going on then why use SLAB_TYPESAFE_BY_RCU? .. because the object can be accessed (by RCU) after the refcount has gone down to zero, and the thing has been released. That's the whole and only point of SLAB_TYPESAFE_BY_RCU. That flag basically says: "I may end up accessing this object *after* it has been free'd, because there may be RCU lookups in flight" This has nothing to do with constructors. It's ok if the object gets reused as an object of the same type and does *not* get re-initialized, because we're perfectly fine seeing old stale data. What it guarantees is that the slab isn't shared with any other kind of object, _and_ that the underlying pages are free'd after an RCU quiescent period (so the pages aren't shared with another kind of object either during an RCU walk). And it doesn't necessarily have to have a constructor, because the thing that a RCU walk will care about is (a) guaranteed to be an object that *has* been on some RCU list (so it's not a "new" object) (b) the RCU walk needs to have logic to verify that it's still the *same* object and hasn't been re-used as something else. In contrast, a SLAB_TYPESAFE_BY_RCU memory gets free'd and re-used immediately, but because it gets reused as the same kind of object, the RCU walker can "know" what parts have meaning for re-use, in a way it couidn't if the re-use was random. That said, it *is* subtle, and people should be careful. > So the re-use might initialize the fields lazily, not necessarily using a ctor. If you have a well-defined refcount, and use "atomic_inc_not_zero()" to guard the speculative RCU access section, and use "atomic_dec_and_test()" in the freeing section, then you should be safe wrt new allocations. If you have a completely new allocation that has "random stale content", you know that it cannot be on the RCU list, so there is no speculative access that can ever see that random content. So the only case you need to worry about is a re-use allocation, and you know that the refcount will start out as zero even if you don't have a constructor. So you can think of the refcount itself as always having a zero constructor, *BUT* you need to be careful with ordering. In particular, whoever does the allocation needs to then set the refcount to a non-zero value *after* it has initialized all the other fields. And in particular, it needs to make sure that it uses the proper memory ordering to do so. NOTE! One thing to be very worried about is that re-initializing whatever RCU lists means that now the RCU walker may be walking on the wrong list so the walker may do the right thing for this particular entry, but it may miss walking *other* entries. So then you can get spurious lookup failures, because the RCU walker never walked all the way to the end of the right list. That ends up being a much more subtle bug. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191122094924.629690-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 49 +++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 16 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 00011f9533b6..a558f64186fa 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -188,7 +188,7 @@ static void free_capture_list(struct i915_request *request) { struct i915_capture_list *capture; - capture = request->capture_list; + capture = fetch_and_zero(&request->capture_list); while (capture) { struct i915_capture_list *next = capture->next; @@ -214,7 +214,7 @@ static void remove_from_engine(struct i915_request *rq) spin_lock(&engine->active.lock); locked = engine; } - list_del(&rq->sched.link); + list_del_init(&rq->sched.link); spin_unlock_irq(&locked->active.lock); } @@ -586,6 +586,21 @@ out: return kmem_cache_alloc(global.slab_requests, gfp); } +static void __i915_request_ctor(void *arg) +{ + struct i915_request *rq = arg; + + spin_lock_init(&rq->lock); + i915_sched_node_init(&rq->sched); + i915_sw_fence_init(&rq->submit, submit_notify); + i915_sw_fence_init(&rq->semaphore, semaphore_notify); + + rq->file_priv = NULL; + rq->capture_list = NULL; + + INIT_LIST_HEAD(&rq->execute_cb); +} + struct i915_request * __i915_request_create(struct intel_context *ce, gfp_t gfp) { @@ -648,6 +663,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->engine = ce->engine; rq->ring = ce->ring; rq->execution_mask = ce->engine->mask; + rq->flags = 0; rcu_assign_pointer(rq->timeline, tl); rq->hwsp_seqno = tl->hwsp_seqno; @@ -655,23 +671,20 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */ - spin_lock_init(&rq->lock); dma_fence_init(&rq->fence, &i915_fence_ops, &rq->lock, tl->fence_context, seqno); /* We bump the ref for the fence chain */ - i915_sw_fence_init(&i915_request_get(rq)->submit, submit_notify); - i915_sw_fence_init(&i915_request_get(rq)->semaphore, semaphore_notify); + i915_sw_fence_reinit(&i915_request_get(rq)->submit); + i915_sw_fence_reinit(&i915_request_get(rq)->semaphore); - i915_sched_node_init(&rq->sched); + i915_sched_node_reinit(&rq->sched); - /* No zalloc, must clear what we need by hand */ - rq->file_priv = NULL; + /* No zalloc, everything must be cleared after use */ rq->batch = NULL; - rq->capture_list = NULL; - rq->flags = 0; - - INIT_LIST_HEAD(&rq->execute_cb); + GEM_BUG_ON(rq->file_priv); + GEM_BUG_ON(rq->capture_list); + GEM_BUG_ON(!list_empty(&rq->execute_cb)); /* * Reserve space in the ring buffer for all the commands required to @@ -1533,10 +1546,14 @@ static struct i915_global_request global = { { int __init i915_global_request_init(void) { - global.slab_requests = KMEM_CACHE(i915_request, - SLAB_HWCACHE_ALIGN | - SLAB_RECLAIM_ACCOUNT | - SLAB_TYPESAFE_BY_RCU); + global.slab_requests = + kmem_cache_create("i915_request", + sizeof(struct i915_request), + __alignof__(struct i915_request), + SLAB_HWCACHE_ALIGN | + SLAB_RECLAIM_ACCOUNT | + SLAB_TYPESAFE_BY_RCU, + __i915_request_ctor); if (!global.slab_requests) return -ENOMEM; -- cgit From 9e31c1fe45d555a948ff66f1f0e3fe1f83ca63f7 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 6 Dec 2019 16:04:27 +0000 Subject: drm/i915: Propagate errors on awaiting already signaled fences If we see an already signaled fence that we want to await on, we skip adding to the i915_sw_fence. However, we should pay attention to whether there was an error on that fence and if so propagate it for our future request. Signed-off-by: Chris Wilson Reviewed-by: Matthew Auld Link: https://patchwork.freedesktop.org/patch/msgid/20191206160428.1503343-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index a558f64186fa..3fa1650975b8 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -958,8 +958,10 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) do { fence = *child++; - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { + i915_sw_fence_set_error_once(&rq->submit, fence->error); continue; + } /* * Requests on the same timeline are explicitly ordered, along @@ -1015,8 +1017,10 @@ i915_request_await_execution(struct i915_request *rq, do { fence = *child++; - if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) + if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) { + i915_sw_fence_set_error_once(&rq->submit, fence->error); continue; + } /* * We don't squash repeated fence dependencies here as we -- cgit From c81471f5e95c79c55687282ff6800f112b5d560b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 10 Dec 2019 15:13:32 +0000 Subject: drm/i915: Copy across scheduler behaviour flags across submit fences We want the bonded request to have the same scheduler properties as its master so that it is placed at the same depth in the queue. For example, consider we have requests A, B and B', where B & B' are a bonded pair to run in parallel on two engines. A -> B \- B' B will run after A and so may be scheduled on an idle engine and wait on A using a semaphore. B' sees B being executed and so enters the queue on the same engine as A. As B' did not inherit the semaphore-chain from B, it may have higher precedence than A and so preempts execution. However, B' then sits on a semaphore waiting for B, who is waiting for A, who is blocked by B. Ergo B' needs to inherit the scheduler properties from B (i.e. the semaphore chain) so that it is scheduled with the same priority as B and will not be executed ahead of Bs dependencies. Furthermore, to prevent the priorities changing via the expose fence on B', we need to couple in the dependencies for PI. This requires us to relax our sanity-checks that dependencies are strictly in order. v2: Synchronise (B, B') execution on all platforms, regardless of using a scheduler, any no-op syncs should be elided. Fixes: ee1136908e9b ("drm/i915/execlists: Virtual engine bonding") Closes: https://gitlab.freedesktop.org/drm/intel/issues/464 Testcase: igt/gem_exec_balancer/bonded-chain Testcase: igt/gem_exec_balancer/bonded-semaphore Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191210151332.3902215-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 114 ++++++++++++++++++++++++++++-------- 1 file changed, 89 insertions(+), 25 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index ddc6c311349c..a6238c626a16 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -300,11 +300,11 @@ void i915_request_retire_upto(struct i915_request *rq) } static int -__i915_request_await_execution(struct i915_request *rq, - struct i915_request *signal, - void (*hook)(struct i915_request *rq, - struct dma_fence *signal), - gfp_t gfp) +__await_execution(struct i915_request *rq, + struct i915_request *signal, + void (*hook)(struct i915_request *rq, + struct dma_fence *signal), + gfp_t gfp) { struct execute_cb *cb; @@ -341,6 +341,8 @@ __i915_request_await_execution(struct i915_request *rq, } spin_unlock_irq(&signal->lock); + /* Copy across semaphore status as we need the same behaviour */ + rq->sched.flags |= signal->sched.flags; return 0; } @@ -824,31 +826,21 @@ already_busywaiting(struct i915_request *rq) } static int -emit_semaphore_wait(struct i915_request *to, - struct i915_request *from, - gfp_t gfp) +__emit_semaphore_wait(struct i915_request *to, + struct i915_request *from, + u32 seqno) { const int has_token = INTEL_GEN(to->i915) >= 12; u32 hwsp_offset; - int len; + int len, err; u32 *cs; GEM_BUG_ON(INTEL_GEN(to->i915) < 8); - /* Just emit the first semaphore we see as request space is limited. */ - if (already_busywaiting(to) & from->engine->mask) - goto await_fence; - - if (i915_request_await_start(to, from) < 0) - goto await_fence; - - /* Only submit our spinner after the signaler is running! */ - if (__i915_request_await_execution(to, from, NULL, gfp)) - goto await_fence; - /* We need to pin the signaler's HWSP until we are finished reading. */ - if (intel_timeline_read_hwsp(from, to, &hwsp_offset)) - goto await_fence; + err = intel_timeline_read_hwsp(from, to, &hwsp_offset); + if (err) + return err; len = 4; if (has_token) @@ -871,7 +863,7 @@ emit_semaphore_wait(struct i915_request *to, MI_SEMAPHORE_POLL | MI_SEMAPHORE_SAD_GTE_SDD) + has_token; - *cs++ = from->fence.seqno; + *cs++ = seqno; *cs++ = hwsp_offset; *cs++ = 0; if (has_token) { @@ -880,6 +872,28 @@ emit_semaphore_wait(struct i915_request *to, } intel_ring_advance(to, cs); + return 0; +} + +static int +emit_semaphore_wait(struct i915_request *to, + struct i915_request *from, + gfp_t gfp) +{ + /* Just emit the first semaphore we see as request space is limited. */ + if (already_busywaiting(to) & from->engine->mask) + goto await_fence; + + if (i915_request_await_start(to, from) < 0) + goto await_fence; + + /* Only submit our spinner after the signaler is running! */ + if (__await_execution(to, from, NULL, gfp)) + goto await_fence; + + if (__emit_semaphore_wait(to, from, from->fence.seqno)) + goto await_fence; + to->sched.semaphores |= from->engine->mask; to->sched.flags |= I915_SCHED_HAS_SEMAPHORE_CHAIN; return 0; @@ -995,6 +1009,57 @@ i915_request_await_dma_fence(struct i915_request *rq, struct dma_fence *fence) return 0; } +static bool intel_timeline_sync_has_start(struct intel_timeline *tl, + struct dma_fence *fence) +{ + return __intel_timeline_sync_is_later(tl, + fence->context, + fence->seqno - 1); +} + +static int intel_timeline_sync_set_start(struct intel_timeline *tl, + const struct dma_fence *fence) +{ + return __intel_timeline_sync_set(tl, fence->context, fence->seqno - 1); +} + +static int +__i915_request_await_execution(struct i915_request *to, + struct i915_request *from, + void (*hook)(struct i915_request *rq, + struct dma_fence *signal)) +{ + int err; + + /* Submit both requests at the same time */ + err = __await_execution(to, from, hook, I915_FENCE_GFP); + if (err) + return err; + + /* Squash repeated depenendices to the same timelines */ + if (intel_timeline_sync_has_start(i915_request_timeline(to), + &from->fence)) + return 0; + + /* Ensure both start together [after all semaphores in signal] */ + if (intel_engine_has_semaphores(to->engine)) + err = __emit_semaphore_wait(to, from, from->fence.seqno - 1); + else + err = i915_request_await_start(to, from); + if (err < 0) + return err; + + /* Couple the dependency tree for PI on this exposed to->fence */ + if (to->engine->schedule) { + err = i915_sched_node_add_dependency(&to->sched, &from->sched); + if (err < 0) + return err; + } + + return intel_timeline_sync_set_start(i915_request_timeline(to), + &from->fence); +} + int i915_request_await_execution(struct i915_request *rq, struct dma_fence *fence, @@ -1030,8 +1095,7 @@ i915_request_await_execution(struct i915_request *rq, if (dma_fence_is_i915(fence)) ret = __i915_request_await_execution(rq, to_request(fence), - hook, - I915_FENCE_GFP); + hook); else ret = i915_sw_fence_await_dma_fence(&rq->submit, fence, I915_FENCE_TIMEOUT, -- cgit From 65c29dbb19b2451990c5c477fef7ada3b8218f05 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 11 Dec 2019 15:02:04 +0000 Subject: drm/i915: Use the i915_device name for identifying our request fences Use the dev_name(i915) to identify the requests for debugging, so we can tell different device timelines apart. Signed-off-by: Chris Wilson Cc: Venkata Sandeep Dhanalakota Reviewed-by: Venkata Sandeep Dhanalakota Link: https://patchwork.freedesktop.org/patch/msgid/20191211150204.133471-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index a6238c626a16..51bb8a0812a1 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -57,7 +57,7 @@ static struct i915_global_request { static const char *i915_fence_get_driver_name(struct dma_fence *fence) { - return "i915"; + return dev_name(to_request(fence)->i915->drm.dev); } static const char *i915_fence_get_timeline_name(struct dma_fence *fence) @@ -74,7 +74,7 @@ static const char *i915_fence_get_timeline_name(struct dma_fence *fence) if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) return "signaled"; - return to_request(fence)->gem_context->name ?: "[i915]"; + return to_request(fence)->gem_context->name ?: "[" DRIVER_NAME "]"; } static bool i915_fence_signaled(struct dma_fence *fence) -- cgit From 639f2f24895fb37dd67dfecabd2c74019ed64140 Mon Sep 17 00:00:00 2001 From: Venkata Sandeep Dhanalakota Date: Fri, 13 Dec 2019 07:51:52 -0800 Subject: drm/i915: Introduce new macros for tracing New macros ENGINE_TRACE(), CE_TRACE(), RQ_TRACE() and GT_TRACE() are introduce to tag device name and engine name with contexts and requests tracing in i915. Cc: Sudeep Dutt Cc: Rodrigo Vivi Cc: Daniel Vetter Cc: Chris Wilson Cc: Jani Nikula Signed-off-by: Venkata Sandeep Dhanalakota Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191213155152.69182-2-venkata.s.dhanalakota@intel.com --- drivers/gpu/drm/i915/i915_request.c | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 51bb8a0812a1..c6d59d263550 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -223,10 +223,7 @@ bool i915_request_retire(struct i915_request *rq) if (!i915_request_completed(rq)) return false; - GEM_TRACE("%s fence %llx:%lld, current %d\n", - rq->engine->name, - rq->fence.context, rq->fence.seqno, - hwsp_seqno(rq)); + RQ_TRACE(rq, "\n"); GEM_BUG_ON(!i915_sw_fence_signaled(&rq->submit)); trace_i915_request_retire(rq); @@ -287,10 +284,7 @@ void i915_request_retire_upto(struct i915_request *rq) struct intel_timeline * const tl = i915_request_timeline(rq); struct i915_request *tmp; - GEM_TRACE("%s fence %llx:%lld, current %d\n", - rq->engine->name, - rq->fence.context, rq->fence.seqno, - hwsp_seqno(rq)); + RQ_TRACE(rq, "\n"); GEM_BUG_ON(!i915_request_completed(rq)); @@ -351,10 +345,7 @@ bool __i915_request_submit(struct i915_request *request) struct intel_engine_cs *engine = request->engine; bool result = false; - GEM_TRACE("%s fence %llx:%lld, current %d\n", - engine->name, - request->fence.context, request->fence.seqno, - hwsp_seqno(request)); + RQ_TRACE(request, "\n"); GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&engine->active.lock); @@ -443,10 +434,7 @@ void __i915_request_unsubmit(struct i915_request *request) { struct intel_engine_cs *engine = request->engine; - GEM_TRACE("%s fence %llx:%lld, current %d\n", - engine->name, - request->fence.context, request->fence.seqno, - hwsp_seqno(request)); + RQ_TRACE(request, "\n"); GEM_BUG_ON(!irqs_disabled()); lockdep_assert_held(&engine->active.lock); @@ -1261,8 +1249,7 @@ struct i915_request *__i915_request_commit(struct i915_request *rq) struct intel_ring *ring = rq->ring; u32 *cs; - GEM_TRACE("%s fence %llx:%lld\n", - engine->name, rq->fence.context, rq->fence.seqno); + RQ_TRACE(rq, "\n"); /* * To ensure that this call will not fail, space for its emissions -- cgit From f1925f3309d13d431f70e7b6b72ba59cae90fdff Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 13 Dec 2019 16:03:47 +0000 Subject: drm/i915: Use EAGAIN for trylock failures While not good behaviour, it is, however, established behaviour that we can punt EAGAIN to userspace if we need to retry the ioctl. When trying to acquire a mutex, prefer to use EAGAIN to propagate losing the race so that if it does end up back in userspace, we try again. Fixes: c81471f5e95c ("drm/i915: Copy across scheduler behaviour flags across submit fences") Closes: https://gitlab.freedesktop.org/drm/intel/issues/800 Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20191213160347.1789004-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index c6d59d263550..af2f78e040d7 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -771,7 +771,7 @@ i915_request_await_start(struct i915_request *rq, struct i915_request *signal) if (!tl) /* already started or maybe even completed */ return 0; - fence = ERR_PTR(-EBUSY); + fence = ERR_PTR(-EAGAIN); if (mutex_trylock(&tl->mutex)) { fence = NULL; if (!i915_request_started(signal) && -- cgit From 9ddc8ec027a39759c946c3f4944d3e0c5a007ccd Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 16 Dec 2019 16:53:17 +0000 Subject: drm/i915: Eliminate the trylock for awaiting an earlier request We currently use an error-prone mutex_trylock to grab another timeline to find an earlier request along it. However, with a bit of a sleight-of-hand, we can reduce the mutex_trylock to a spin_lock on the immediate request and careful pointer chasing to acquire a reference on the previous request. Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191216165317.2742896-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 39 ++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 18 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index af2f78e040d7..a59b803aef92 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -756,34 +756,37 @@ err_unlock: static int i915_request_await_start(struct i915_request *rq, struct i915_request *signal) { - struct intel_timeline *tl; struct dma_fence *fence; int err; GEM_BUG_ON(i915_request_timeline(rq) == rcu_access_pointer(signal->timeline)); + fence = NULL; rcu_read_lock(); - tl = rcu_dereference(signal->timeline); - if (i915_request_started(signal) || !kref_get_unless_zero(&tl->kref)) - tl = NULL; - rcu_read_unlock(); - if (!tl) /* already started or maybe even completed */ - return 0; + spin_lock_irq(&signal->lock); + if (!i915_request_started(signal) && + !list_is_first(&signal->link, + &rcu_dereference(signal->timeline)->requests)) { + struct i915_request *prev = list_prev_entry(signal, link); - fence = ERR_PTR(-EAGAIN); - if (mutex_trylock(&tl->mutex)) { - fence = NULL; - if (!i915_request_started(signal) && - !list_is_first(&signal->link, &tl->requests)) { - signal = list_prev_entry(signal, link); - fence = dma_fence_get(&signal->fence); + /* + * Peek at the request before us in the timeline. That + * request will only be valid before it is retired, so + * after acquiring a reference to it, confirm that it is + * still part of the signaler's timeline. + */ + if (i915_request_get_rcu(prev)) { + if (list_next_entry(prev, link) == signal) + fence = &prev->fence; + else + i915_request_put(prev); } - mutex_unlock(&tl->mutex); } - intel_timeline_put(tl); - if (IS_ERR_OR_NULL(fence)) - return PTR_ERR_OR_ZERO(fence); + spin_unlock_irq(&signal->lock); + rcu_read_unlock(); + if (!fence) + return 0; err = 0; if (intel_timeline_sync_is_later(i915_request_timeline(rq), fence)) -- cgit From 85bedbf191e82aac0d7f05623bccfeccdcd91cea Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 17 Dec 2019 01:16:59 +0000 Subject: drm/i915/gt: Eliminate the trylock for reading a timeline's hwsp As we stash a pointer to the HWSP cacheline on the request, when reading it we only need confirm that the cacheline is still valid by checking that the request and timeline are still intact. v2: Protect hwsp_cachline with RCU Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191217011659.3092130-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index a59b803aef92..269470d3527a 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -655,9 +655,9 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->execution_mask = ce->engine->mask; rq->flags = 0; - rcu_assign_pointer(rq->timeline, tl); + RCU_INIT_POINTER(rq->timeline, tl); + RCU_INIT_POINTER(rq->hwsp_cacheline, tl->hwsp_cacheline); rq->hwsp_seqno = tl->hwsp_seqno; - rq->hwsp_cacheline = tl->hwsp_cacheline; rq->rcustate = get_state_synchronize_rcu(); /* acts as smp_mb() */ -- cgit From 54400257ae523fa7fff11fe4209e7f9dcafdefa0 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 17 Dec 2019 09:56:41 +0000 Subject: drm/i915/gt: Remove direct invocation of breadcrumb signaling Only signal the breadcrumbs from inside the irq_work, simplifying our interface and calling conventions. The micro-optimisation here is that by always using the irq_work interface, we know we are always inside an irq-off critical section for the breadcrumb signaling and can ellide save/restore of the irq flags. Signed-off-by: Chris Wilson Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191217095642.3124521-7-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 269470d3527a..2118284b796e 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -408,7 +408,7 @@ xfer: /* We may be recursing from the signal callback of another i915 fence */ if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &request->fence.flags) && !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &request->fence.flags) && !i915_request_enable_breadcrumb(request)) - intel_engine_queue_breadcrumbs(engine); + intel_engine_signal_breadcrumbs(engine); __notify_execute_cb(request); -- cgit From 9f3ccd40acf4a348aab4eda140cdb4d2f1f773b4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 20 Dec 2019 10:12:29 +0000 Subject: drm/i915: Drop GEM context as a direct link from i915_request Keep the intel_context as being the primary state for i915_request, with the GEM context a backpointer from the low level state for the rarer cases we need client information. Our goal is to remove such references to clients from the backend, and leave the HW submission agnostic to client interfaces and self-contained. Signed-off-by: Chris Wilson Cc: Andi Shyti Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20191220101230.256839-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 2118284b796e..218d20d4e414 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -62,6 +62,8 @@ static const char *i915_fence_get_driver_name(struct dma_fence *fence) static const char *i915_fence_get_timeline_name(struct dma_fence *fence) { + const struct i915_gem_context *ctx; + /* * The timeline struct (as part of the ppgtt underneath a context) * may be freed when the request is no longer in use by the GPU. @@ -74,7 +76,11 @@ static const char *i915_fence_get_timeline_name(struct dma_fence *fence) if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) return "signaled"; - return to_request(fence)->gem_context->name ?: "[" DRIVER_NAME "]"; + ctx = to_request(fence)->context->gem_context; + if (!ctx) + return "[" DRIVER_NAME "]"; + + return ctx->name; } static bool i915_fence_signaled(struct dma_fence *fence) @@ -269,8 +275,8 @@ bool i915_request_retire(struct i915_request *rq) remove_from_client(rq); list_del(&rq->link); - intel_context_exit(rq->hw_context); - intel_context_unpin(rq->hw_context); + intel_context_exit(rq->context); + intel_context_unpin(rq->context); free_capture_list(rq); i915_sched_node_fini(&rq->sched); @@ -369,7 +375,7 @@ bool __i915_request_submit(struct i915_request *request) if (i915_request_completed(request)) goto xfer; - if (i915_gem_context_is_banned(request->gem_context)) + if (intel_context_is_banned(request->context)) i915_request_skip(request, -EIO); /* @@ -648,8 +654,7 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) goto err_free; rq->i915 = ce->engine->i915; - rq->hw_context = ce; - rq->gem_context = ce->gem_context; + rq->context = ce; rq->engine = ce->engine; rq->ring = ce->ring; rq->execution_mask = ce->engine->mask; @@ -917,7 +922,7 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) &from->submit, I915_FENCE_GFP); } else if (intel_engine_has_semaphores(to->engine) && - to->gem_context->sched.priority >= I915_PRIORITY_NORMAL) { + to->context->gem_context->sched.priority >= I915_PRIORITY_NORMAL) { ret = emit_semaphore_wait(to, from, I915_FENCE_GFP); } else { ret = i915_sw_fence_await_dma_fence(&to->submit, @@ -1298,7 +1303,7 @@ void __i915_request_queue(struct i915_request *rq, void i915_request_add(struct i915_request *rq) { - struct i915_sched_attr attr = rq->gem_context->sched; + struct i915_sched_attr attr = rq->context->gem_context->sched; struct intel_timeline * const tl = i915_request_timeline(rq); struct i915_request *prev; -- cgit From 0f100b70487ab8b1323eed1f99cfc10eb18a688e Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 20 Dec 2019 10:12:30 +0000 Subject: drm/i915: Push the use-semaphore marker onto the intel_context Instead of rummaging through the intel_context to peek at the GEM context in the middle of request submission to decide whether to use semaphores, store that information on the intel_context itself. Signed-off-by: Chris Wilson Cc: Andi Shyti Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20191220101230.256839-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 218d20d4e414..ed70d8dcea74 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -917,18 +917,16 @@ i915_request_await_request(struct i915_request *to, struct i915_request *from) return ret; } - if (to->engine == from->engine) { + if (to->engine == from->engine) ret = i915_sw_fence_await_sw_fence_gfp(&to->submit, &from->submit, I915_FENCE_GFP); - } else if (intel_engine_has_semaphores(to->engine) && - to->context->gem_context->sched.priority >= I915_PRIORITY_NORMAL) { + else if (intel_context_use_semaphores(to->context)) ret = emit_semaphore_wait(to, from, I915_FENCE_GFP); - } else { + else ret = i915_sw_fence_await_dma_fence(&to->submit, &from->fence, 0, I915_FENCE_GFP); - } if (ret < 0) return ret; -- cgit From e6ba76480299a0d77c51d846f7467b1673aad25b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 21 Dec 2019 16:03:24 +0000 Subject: drm/i915: Remove i915->kernel_context Allocate only an internal intel_context for the kernel_context, forgoing a global GEM context for internal use as we only require a separate address space (for our own protection). Now having weaned GT from requiring ce->gem_context, we can stop referencing it entirely. This also means we no longer have to create random and unnecessary GEM contexts for internal use. GEM contexts are now entirely for tracking GEM clients, and intel_context the execution environment on the GPU. Signed-off-by: Chris Wilson Cc: Andi Shyti Acked-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20191221160324.1073045-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index ed70d8dcea74..14a5a99284fa 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -1301,8 +1301,8 @@ void __i915_request_queue(struct i915_request *rq, void i915_request_add(struct i915_request *rq) { - struct i915_sched_attr attr = rq->context->gem_context->sched; struct intel_timeline * const tl = i915_request_timeline(rq); + struct i915_sched_attr attr = {}; struct i915_request *prev; lockdep_assert_held(&tl->mutex); @@ -1312,6 +1312,9 @@ void i915_request_add(struct i915_request *rq) prev = __i915_request_commit(rq); + if (rq->context->gem_context) + attr = rq->context->gem_context->sched; + /* * Boost actual workloads past semaphores! * -- cgit From 6a8679c048eb104dbcc6aa43a0baa7450de46503 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 22 Dec 2019 23:35:58 +0000 Subject: drm/i915: Mark the GEM context link as RCU protected The only protection for intel_context.gem_cotext is granted by RCU, so annotate it as a rcu protected pointer and carefully dereference it in the few occasions we need to use it. Fixes: 9f3ccd40acf4 ("drm/i915: Drop GEM context as a direct link from i915_request") Signed-off-by: Chris Wilson Cc: Andi Shyti Acked-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20191222233558.2201901-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 14a5a99284fa..44a0d1a950c5 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -76,7 +76,7 @@ static const char *i915_fence_get_timeline_name(struct dma_fence *fence) if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) return "signaled"; - ctx = to_request(fence)->context->gem_context; + ctx = i915_request_gem_context(to_request(fence)); if (!ctx) return "[" DRIVER_NAME "]"; @@ -1312,8 +1312,8 @@ void i915_request_add(struct i915_request *rq) prev = __i915_request_commit(rq); - if (rq->context->gem_context) - attr = rq->context->gem_context->sched; + if (rcu_access_pointer(rq->context->gem_context)) + attr = i915_request_gem_context(rq)->sched; /* * Boost actual workloads past semaphores! -- cgit From e1c31fb5dde3af91df34d98ca041c746504309d6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 6 Jan 2020 11:42:31 +0000 Subject: drm/i915: Merge i915_request.flags with i915_request.fence.flags As we already have a flags field buried within i915_request, reuse it! Signed-off-by: Chris Wilson Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20200106114234.2529613-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_request.c') diff --git a/drivers/gpu/drm/i915/i915_request.c b/drivers/gpu/drm/i915/i915_request.c index 44a0d1a950c5..be185886e4fc 100644 --- a/drivers/gpu/drm/i915/i915_request.c +++ b/drivers/gpu/drm/i915/i915_request.c @@ -658,7 +658,6 @@ __i915_request_create(struct intel_context *ce, gfp_t gfp) rq->engine = ce->engine; rq->ring = ce->ring; rq->execution_mask = ce->engine->mask; - rq->flags = 0; RCU_INIT_POINTER(rq->timeline, tl); RCU_INIT_POINTER(rq->hwsp_cacheline, tl->hwsp_cacheline); -- cgit