aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/xe/xe_query.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/xe/xe_query.c')
-rw-r--r--drivers/gpu/drm/xe/xe_query.c93
1 files changed, 52 insertions, 41 deletions
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 848da8e68c7a..170ae72d1a7b 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -9,6 +9,7 @@
#include <linux/sched/clock.h>
#include <drm/ttm/ttm_placement.h>
+#include <generated/xe_wa_oob.h>
#include <uapi/drm/xe_drm.h>
#include "regs/xe_engine_regs.h"
@@ -23,6 +24,7 @@
#include "xe_macros.h"
#include "xe_mmio.h"
#include "xe_ttm_vram_mgr.h"
+#include "xe_wa.h"
static const u16 xe_to_user_engine_class[] = {
[XE_ENGINE_CLASS_RENDER] = DRM_XE_ENGINE_CLASS_RENDER,
@@ -83,24 +85,22 @@ static __ktime_func_t __clock_id_to_func(clockid_t clk_id)
}
static void
-__read_timestamps(struct xe_gt *gt,
- struct xe_reg lower_reg,
- struct xe_reg upper_reg,
- u64 *engine_ts,
- u64 *cpu_ts,
- u64 *cpu_delta,
- __ktime_func_t cpu_clock)
+hwe_read_timestamp(struct xe_hw_engine *hwe, u64 *engine_ts, u64 *cpu_ts,
+ u64 *cpu_delta, __ktime_func_t cpu_clock)
{
+ struct xe_mmio *mmio = &hwe->gt->mmio;
u32 upper, lower, old_upper, loop = 0;
+ struct xe_reg upper_reg = RING_TIMESTAMP_UDW(hwe->mmio_base),
+ lower_reg = RING_TIMESTAMP(hwe->mmio_base);
- upper = xe_mmio_read32(gt, upper_reg);
+ upper = xe_mmio_read32(mmio, upper_reg);
do {
*cpu_delta = local_clock();
*cpu_ts = cpu_clock();
- lower = xe_mmio_read32(gt, lower_reg);
+ lower = xe_mmio_read32(mmio, lower_reg);
*cpu_delta = local_clock() - *cpu_delta;
old_upper = upper;
- upper = xe_mmio_read32(gt, upper_reg);
+ upper = xe_mmio_read32(mmio, upper_reg);
} while (upper != old_upper && loop++ < 2);
*engine_ts = (u64)upper << 32 | lower;
@@ -117,6 +117,7 @@ query_engine_cycles(struct xe_device *xe,
__ktime_func_t cpu_clock;
struct xe_hw_engine *hwe;
struct xe_gt *gt;
+ unsigned int fw_ref;
if (query->size == 0) {
query->size = size;
@@ -149,18 +150,16 @@ query_engine_cycles(struct xe_device *xe,
if (!hwe)
return -EINVAL;
- if (xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL))
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
return -EIO;
+ }
- __read_timestamps(gt,
- RING_TIMESTAMP(hwe->mmio_base),
- RING_TIMESTAMP_UDW(hwe->mmio_base),
- &resp.engine_cycles,
- &resp.cpu_timestamp,
- &resp.cpu_delta,
- cpu_clock);
+ hwe_read_timestamp(hwe, &resp.engine_cycles, &resp.cpu_timestamp,
+ &resp.cpu_delta, cpu_clock);
- xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
if (GRAPHICS_VER(xe) >= 20)
resp.width = 64;
@@ -168,16 +167,10 @@ query_engine_cycles(struct xe_device *xe,
resp.width = 36;
/* Only write to the output fields of user query */
- if (put_user(resp.cpu_timestamp, &query_ptr->cpu_timestamp))
- return -EFAULT;
-
- if (put_user(resp.cpu_delta, &query_ptr->cpu_delta))
- return -EFAULT;
-
- if (put_user(resp.engine_cycles, &query_ptr->engine_cycles))
- return -EFAULT;
-
- if (put_user(resp.width, &query_ptr->width))
+ if (put_user(resp.cpu_timestamp, &query_ptr->cpu_timestamp) ||
+ put_user(resp.cpu_delta, &query_ptr->cpu_delta) ||
+ put_user(resp.engine_cycles, &query_ptr->engine_cycles) ||
+ put_user(resp.width, &query_ptr->width))
return -EFAULT;
return 0;
@@ -458,12 +451,23 @@ static int query_hwconfig(struct xe_device *xe,
static size_t calc_topo_query_size(struct xe_device *xe)
{
- return xe->info.gt_count *
- (4 * sizeof(struct drm_xe_query_topology_mask) +
- sizeof_field(struct xe_gt, fuse_topo.g_dss_mask) +
- sizeof_field(struct xe_gt, fuse_topo.c_dss_mask) +
- sizeof_field(struct xe_gt, fuse_topo.l3_bank_mask) +
- sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss));
+ struct xe_gt *gt;
+ size_t query_size = 0;
+ int id;
+
+ for_each_gt(gt, xe, id) {
+ query_size += 3 * sizeof(struct drm_xe_query_topology_mask) +
+ sizeof_field(struct xe_gt, fuse_topo.g_dss_mask) +
+ sizeof_field(struct xe_gt, fuse_topo.c_dss_mask) +
+ sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss);
+
+ /* L3bank mask may not be available for some GTs */
+ if (!XE_WA(gt, no_media_l3))
+ query_size += sizeof(struct drm_xe_query_topology_mask) +
+ sizeof_field(struct xe_gt, fuse_topo.l3_bank_mask);
+ }
+
+ return query_size;
}
static int copy_mask(void __user **ptr,
@@ -516,11 +520,18 @@ static int query_gt_topology(struct xe_device *xe,
if (err)
return err;
- topo.type = DRM_XE_TOPO_L3_BANK;
- err = copy_mask(&query_ptr, &topo, gt->fuse_topo.l3_bank_mask,
- sizeof(gt->fuse_topo.l3_bank_mask));
- if (err)
- return err;
+ /*
+ * If the kernel doesn't have a way to obtain a correct L3bank
+ * mask, then it's better to omit L3 from the query rather than
+ * reporting bogus or zeroed information to userspace.
+ */
+ if (!XE_WA(gt, no_media_l3)) {
+ topo.type = DRM_XE_TOPO_L3_BANK;
+ err = copy_mask(&query_ptr, &topo, gt->fuse_topo.l3_bank_mask,
+ sizeof(gt->fuse_topo.l3_bank_mask));
+ if (err)
+ return err;
+ }
topo.type = gt->fuse_topo.eu_type == XE_GT_EU_TYPE_SIMD16 ?
DRM_XE_TOPO_SIMD16_EU_PER_DSS :
@@ -659,7 +670,7 @@ static int query_oa_units(struct xe_device *xe,
du->oa_unit_id = u->oa_unit_id;
du->oa_unit_type = u->type;
du->oa_timestamp_freq = xe_oa_timestamp_frequency(gt);
- du->capabilities = DRM_XE_OA_CAPS_BASE;
+ du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS;
j = 0;
for_each_hw_engine(hwe, gt, hwe_id) {