From 639f2f24895fb37dd67dfecabd2c74019ed64140 Mon Sep 17 00:00:00 2001 From: Venkata Sandeep Dhanalakota Date: Fri, 13 Dec 2019 07:51:52 -0800 Subject: drm/i915: Introduce new macros for tracing New macros ENGINE_TRACE(), CE_TRACE(), RQ_TRACE() and GT_TRACE() are introduce to tag device name and engine name with contexts and requests tracing in i915. Cc: Sudeep Dutt Cc: Rodrigo Vivi Cc: Daniel Vetter Cc: Chris Wilson Cc: Jani Nikula Signed-off-by: Venkata Sandeep Dhanalakota Reviewed-by: Chris Wilson Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20191213155152.69182-2-venkata.s.dhanalakota@intel.com --- drivers/gpu/drm/i915/i915_request.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_request.h') diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 96991d64759c..a561b8efe869 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -49,6 +49,13 @@ struct i915_capture_list { struct i915_vma *vma; }; +#define RQ_TRACE(rq, fmt, ...) do { \ + const struct i915_request *rq__ = (rq); \ + ENGINE_TRACE(rq__->engine, "fence %llx:%lld, current %d" fmt, \ + rq__->fence.context, rq__->fence.seqno, \ + hwsp_seqno(rq__), ##__VA_ARGS__); \ +} while (0) + enum { /* * I915_FENCE_FLAG_ACTIVE - this request is currently submitted to HW. -- cgit From 85bedbf191e82aac0d7f05623bccfeccdcd91cea Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 17 Dec 2019 01:16:59 +0000 Subject: drm/i915/gt: Eliminate the trylock for reading a timeline's hwsp As we stash a pointer to the HWSP cacheline on the request, when reading it we only need confirm that the cacheline is still valid by checking that the request and timeline are still intact. v2: Protect hwsp_cachline with RCU Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Reviewed-by: Tvrtko Ursulin Link: https://patchwork.freedesktop.org/patch/msgid/20191217011659.3092130-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.h') diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index a561b8efe869..aa38290eea3d 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -30,6 +30,7 @@ #include "gt/intel_context_types.h" #include "gt/intel_engine_types.h" +#include "gt/intel_timeline_types.h" #include "i915_gem.h" #include "i915_scheduler.h" @@ -41,8 +42,6 @@ struct drm_file; struct drm_i915_gem_object; struct i915_request; -struct intel_timeline; -struct intel_timeline_cacheline; struct i915_capture_list { struct i915_capture_list *next; @@ -183,7 +182,7 @@ struct i915_request { * inside the timeline's HWSP vma, but it is only valid while this * request has not completed and guarded by the timeline mutex. */ - struct intel_timeline_cacheline *hwsp_cacheline; + struct intel_timeline_cacheline __rcu *hwsp_cacheline; /** Position in the ring of the start of the request */ u32 head; -- cgit From b81e4d9b594188a8babf06442e1d6f28de78c68f Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 19 Dec 2019 12:43:53 +0000 Subject: drm/i915/gt: Track engine round-trip times Knowing the round trip time of an engine is useful for tracking the health of the system as well as providing a metric for the baseline responsiveness of the engine. We can use the latter metric for automatically tuning our waits in selftests and when idling so we don't confuse a slower system with a dead one. Upon idling the engine, we send one last pulse to switch the context away from precious user state to the volatile kernel context. We know the engine is idle at this point, and the pulse is non-preemptible, so this provides us with a good measurement of the round trip time. It also provides us with faster engine parking for ringbuffer submission, which is a welcome bonus (e.g. softer-rc6). Signed-off-by: Chris Wilson Cc: Tvrtko Ursulin Cc: Stuart Summers Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20191219105043.4169050-1-chris@chris-wilson.co.uk Link: https://patchwork.freedesktop.org/patch/msgid/20191219124353.8607-2-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_request.h') diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index aa38290eea3d..c18c0bcd0193 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -150,6 +150,10 @@ struct i915_request { union { wait_queue_entry_t submitq; struct i915_sw_dma_fence_cb dmaq; + struct i915_request_duration_cb { + struct dma_fence_cb cb; + ktime_t emitted; + } duration; }; struct list_head execute_cb; struct i915_sw_fence semaphore; -- cgit From 9f3ccd40acf4a348aab4eda140cdb4d2f1f773b4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 20 Dec 2019 10:12:29 +0000 Subject: drm/i915: Drop GEM context as a direct link from i915_request Keep the intel_context as being the primary state for i915_request, with the GEM context a backpointer from the low level state for the rarer cases we need client information. Our goal is to remove such references to clients from the backend, and leave the HW submission agnostic to client interfaces and self-contained. Signed-off-by: Chris Wilson Cc: Andi Shyti Reviewed-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20191220101230.256839-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.h') diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index c18c0bcd0193..0e4fe3205ce7 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -115,9 +115,8 @@ struct i915_request { * i915_request_free() will then decrement the refcount on the * context. */ - struct i915_gem_context *gem_context; struct intel_engine_cs *engine; - struct intel_context *hw_context; + struct intel_context *context; struct intel_ring *ring; struct intel_timeline __rcu *timeline; struct list_head signal_link; -- cgit From 6a8679c048eb104dbcc6aa43a0baa7450de46503 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sun, 22 Dec 2019 23:35:58 +0000 Subject: drm/i915: Mark the GEM context link as RCU protected The only protection for intel_context.gem_cotext is granted by RCU, so annotate it as a rcu protected pointer and carefully dereference it in the few occasions we need to use it. Fixes: 9f3ccd40acf4 ("drm/i915: Drop GEM context as a direct link from i915_request") Signed-off-by: Chris Wilson Cc: Andi Shyti Acked-by: Andi Shyti Link: https://patchwork.freedesktop.org/patch/msgid/20191222233558.2201901-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/gpu/drm/i915/i915_request.h') diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 0e4fe3205ce7..565322640378 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -28,6 +28,7 @@ #include #include +#include "gem/i915_gem_context_types.h" #include "gt/intel_context_types.h" #include "gt/intel_engine_types.h" #include "gt/intel_timeline_types.h" @@ -463,6 +464,13 @@ i915_request_timeline(struct i915_request *rq) lockdep_is_held(&rcu_access_pointer(rq->timeline)->mutex)); } +static inline struct i915_gem_context * +i915_request_gem_context(struct i915_request *rq) +{ + /* Valid only while the request is being constructed (or retired). */ + return rcu_dereference_protected(rq->context->gem_context, true); +} + static inline struct intel_timeline * i915_request_active_timeline(struct i915_request *rq) { -- cgit From 41d329e287fb4888591491be4b4198f14b211cf6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 23 Dec 2019 20:44:10 +0000 Subject: drm/i915: Add spaces before compound GEM_TRACE Add a space between the prefixed format and the users format so that the join are not mistakenly combined into one long word. Fixes: 639f2f24895f ("drm/i915: Introduce new macros for tracing") Signed-off-by: Chris Wilson Cc: Venkata Sandeep Dhanalakota Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20191223204411.2355304-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/i915_request.h') diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 565322640378..9784421a3b4d 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -51,7 +51,7 @@ struct i915_capture_list { #define RQ_TRACE(rq, fmt, ...) do { \ const struct i915_request *rq__ = (rq); \ - ENGINE_TRACE(rq__->engine, "fence %llx:%lld, current %d" fmt, \ + ENGINE_TRACE(rq__->engine, "fence %llx:%lld, current %d " fmt, \ rq__->fence.context, rq__->fence.seqno, \ hwsp_seqno(rq__), ##__VA_ARGS__); \ } while (0) -- cgit From e1c31fb5dde3af91df34d98ca041c746504309d6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 6 Jan 2020 11:42:31 +0000 Subject: drm/i915: Merge i915_request.flags with i915_request.fence.flags As we already have a flags field buried within i915_request, reuse it! Signed-off-by: Chris Wilson Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20200106114234.2529613-3-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/i915_request.h | 43 ++++++++++++++++++++++++++++++------- 1 file changed, 35 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/i915/i915_request.h') diff --git a/drivers/gpu/drm/i915/i915_request.h b/drivers/gpu/drm/i915/i915_request.h index 9784421a3b4d..031433691a06 100644 --- a/drivers/gpu/drm/i915/i915_request.h +++ b/drivers/gpu/drm/i915/i915_request.h @@ -77,6 +77,38 @@ enum { * a request is on the various signal_list. */ I915_FENCE_FLAG_SIGNAL, + + /* + * I915_FENCE_FLAG_NOPREEMPT - this request should not be preempted + * + * The execution of some requests should not be interrupted. This is + * a sensitive operation as it makes the request super important, + * blocking other higher priority work. Abuse of this flag will + * lead to quality of service issues. + */ + I915_FENCE_FLAG_NOPREEMPT, + + /* + * I915_FENCE_FLAG_SENTINEL - this request should be last in the queue + * + * A high priority sentinel request may be submitted to clear the + * submission queue. As it will be the only request in-flight, upon + * execution all other active requests will have been preempted and + * unsubmitted. This preemptive pulse is used to re-evaluate the + * in-flight requests, particularly in cases where an active context + * is banned and those active requests need to be cancelled. + */ + I915_FENCE_FLAG_SENTINEL, + + /* + * I915_FENCE_FLAG_BOOST - upclock the gpu for this request + * + * Some requests are more important than others! In particular, a + * request that the user is waiting on is typically required for + * interactive latency, for which we want to minimise by upclocking + * the GPU. Here we track such boost requests on a per-request basis. + */ + I915_FENCE_FLAG_BOOST, }; /** @@ -225,11 +257,6 @@ struct i915_request { /** Time at which this request was emitted, in jiffies. */ unsigned long emitted_jiffies; - unsigned long flags; -#define I915_REQUEST_WAITBOOST BIT(0) -#define I915_REQUEST_NOPREEMPT BIT(1) -#define I915_REQUEST_SENTINEL BIT(2) - /** timeline->request entry for this request */ struct list_head link; @@ -442,18 +469,18 @@ static inline void i915_request_mark_complete(struct i915_request *rq) static inline bool i915_request_has_waitboost(const struct i915_request *rq) { - return rq->flags & I915_REQUEST_WAITBOOST; + return test_bit(I915_FENCE_FLAG_BOOST, &rq->fence.flags); } static inline bool i915_request_has_nopreempt(const struct i915_request *rq) { /* Preemption should only be disabled very rarely */ - return unlikely(rq->flags & I915_REQUEST_NOPREEMPT); + return unlikely(test_bit(I915_FENCE_FLAG_NOPREEMPT, &rq->fence.flags)); } static inline bool i915_request_has_sentinel(const struct i915_request *rq) { - return unlikely(rq->flags & I915_REQUEST_SENTINEL); + return unlikely(test_bit(I915_FENCE_FLAG_SENTINEL, &rq->fence.flags)); } static inline struct intel_timeline * -- cgit