aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/xe/xe_oa.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/xe/xe_oa.c')
-rw-r--r--drivers/gpu/drm/xe/xe_oa.c726
1 files changed, 494 insertions, 232 deletions
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index 78823f53d290..8dd55798ab31 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -36,11 +36,22 @@
#include "xe_pm.h"
#include "xe_sched_job.h"
#include "xe_sriov.h"
+#include "xe_sync.h"
#define DEFAULT_POLL_FREQUENCY_HZ 200
#define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ)
#define XE_OA_UNIT_INVALID U32_MAX
+enum xe_oa_submit_deps {
+ XE_OA_SUBMIT_NO_DEPS,
+ XE_OA_SUBMIT_ADD_DEPS,
+};
+
+enum xe_oa_user_extn_from {
+ XE_OA_USER_EXTN_FROM_OPEN,
+ XE_OA_USER_EXTN_FROM_CONFIG,
+};
+
struct xe_oa_reg {
struct xe_reg addr;
u32 value;
@@ -70,6 +81,7 @@ struct flex {
};
struct xe_oa_open_param {
+ struct xe_file *xef;
u32 oa_unit_id;
bool sample;
u32 metric_set;
@@ -81,6 +93,9 @@ struct xe_oa_open_param {
struct xe_exec_queue *exec_q;
struct xe_hw_engine *hwe;
bool no_preempt;
+ struct drm_xe_sync __user *syncs_user;
+ int num_syncs;
+ struct xe_sync_entry *syncs;
};
struct xe_oa_config_bo {
@@ -90,6 +105,17 @@ struct xe_oa_config_bo {
struct xe_bb *bb;
};
+struct xe_oa_fence {
+ /* @base: dma fence base */
+ struct dma_fence base;
+ /* @lock: lock for the fence */
+ spinlock_t lock;
+ /* @work: work to signal @base */
+ struct delayed_work work;
+ /* @cb: callback to schedule @work */
+ struct dma_fence_cb cb;
+};
+
#define DRM_FMT(x) DRM_XE_OA_FMT_TYPE_##x
static const struct xe_oa_format oa_formats[] = {
@@ -162,10 +188,10 @@ static struct xe_oa_config *xe_oa_get_oa_config(struct xe_oa *oa, int metrics_se
return oa_config;
}
-static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo)
+static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo, struct dma_fence *last_fence)
{
xe_oa_config_put(oa_bo->oa_config);
- xe_bb_free(oa_bo->bb, NULL);
+ xe_bb_free(oa_bo->bb, last_fence);
kfree(oa_bo);
}
@@ -176,7 +202,7 @@ static const struct xe_oa_regs *__oa_regs(struct xe_oa_stream *stream)
static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream)
{
- return xe_mmio_read32(stream->gt, __oa_regs(stream)->oa_tail_ptr) &
+ return xe_mmio_read32(&stream->gt->mmio, __oa_regs(stream)->oa_tail_ptr) &
OAG_OATAILPTR_MASK;
}
@@ -366,7 +392,7 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
struct xe_reg oaheadptr = __oa_regs(stream)->oa_head_ptr;
spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
- xe_mmio_write32(stream->gt, oaheadptr,
+ xe_mmio_write32(&stream->gt->mmio, oaheadptr,
(head + gtt_offset) & OAG_OAHEADPTR_MASK);
stream->oa_buffer.head = head;
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
@@ -377,22 +403,23 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
{
+ struct xe_mmio *mmio = &stream->gt->mmio;
u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
u32 oa_buf = gtt_offset | OABUFFER_SIZE_16M | OAG_OABUFFER_MEMORY_SELECT;
unsigned long flags;
spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags);
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_status, 0);
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_head_ptr,
+ xe_mmio_write32(mmio, __oa_regs(stream)->oa_status, 0);
+ xe_mmio_write32(mmio, __oa_regs(stream)->oa_head_ptr,
gtt_offset & OAG_OAHEADPTR_MASK);
stream->oa_buffer.head = 0;
/*
* PRM says: "This MMIO must be set before the OATAILPTR register and after the
* OAHEADPTR register. This is to enable proper functionality of the overflow bit".
*/
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_buffer, oa_buf);
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_tail_ptr,
+ xe_mmio_write32(mmio, __oa_regs(stream)->oa_buffer, oa_buf);
+ xe_mmio_write32(mmio, __oa_regs(stream)->oa_tail_ptr,
gtt_offset & OAG_OATAILPTR_MASK);
/* Mark that we need updated tail pointer to read from */
@@ -444,21 +471,23 @@ static void xe_oa_enable(struct xe_oa_stream *stream)
stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG)
val |= OAG_OACONTROL_OA_PES_DISAG_EN;
- xe_mmio_write32(stream->gt, regs->oa_ctrl, val);
+ xe_mmio_write32(&stream->gt->mmio, regs->oa_ctrl, val);
}
static void xe_oa_disable(struct xe_oa_stream *stream)
{
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctrl, 0);
- if (xe_mmio_wait32(stream->gt, __oa_regs(stream)->oa_ctrl,
+ struct xe_mmio *mmio = &stream->gt->mmio;
+
+ xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctrl, 0);
+ if (xe_mmio_wait32(mmio, __oa_regs(stream)->oa_ctrl,
OAG_OACONTROL_OA_COUNTER_ENABLE, 0, 50000, NULL, false))
drm_err(&stream->oa->xe->drm,
"wait for OA to be disabled timed out\n");
if (GRAPHICS_VERx100(stream->oa->xe) <= 1270 && GRAPHICS_VERx100(stream->oa->xe) != 1260) {
/* <= XE_METEORLAKE except XE_PVC */
- xe_mmio_write32(stream->gt, OA_TLB_INV_CR, 1);
- if (xe_mmio_wait32(stream->gt, OA_TLB_INV_CR, 1, 0, 50000, NULL, false))
+ xe_mmio_write32(mmio, OA_TLB_INV_CR, 1);
+ if (xe_mmio_wait32(mmio, OA_TLB_INV_CR, 1, 0, 50000, NULL, false))
drm_err(&stream->oa->xe->drm,
"wait for OA tlb invalidate timed out\n");
}
@@ -481,7 +510,7 @@ static int __xe_oa_read(struct xe_oa_stream *stream, char __user *buf,
size_t count, size_t *offset)
{
/* Only clear our bits to avoid side-effects */
- stream->oa_status = xe_mmio_rmw32(stream->gt, __oa_regs(stream)->oa_status,
+ stream->oa_status = xe_mmio_rmw32(&stream->gt->mmio, __oa_regs(stream)->oa_status,
OASTATUS_RELEVANT_BITS, 0);
/*
* Signal to userspace that there is non-zero OA status to read via
@@ -567,11 +596,11 @@ static __poll_t xe_oa_poll(struct file *file, poll_table *wait)
return ret;
}
-static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb)
+static struct dma_fence *xe_oa_submit_bb(struct xe_oa_stream *stream, enum xe_oa_submit_deps deps,
+ struct xe_bb *bb)
{
struct xe_sched_job *job;
struct dma_fence *fence;
- long timeout;
int err = 0;
/* Kernel configuration is issued on stream->k_exec_q, not stream->exec_q */
@@ -581,18 +610,24 @@ static int xe_oa_submit_bb(struct xe_oa_stream *stream, struct xe_bb *bb)
goto exit;
}
+ if (deps == XE_OA_SUBMIT_ADD_DEPS) {
+ for (int i = 0; i < stream->num_syncs && !err; i++)
+ err = xe_sync_entry_add_deps(&stream->syncs[i], job);
+ if (err) {
+ drm_dbg(&stream->oa->xe->drm, "xe_sync_entry_add_deps err %d\n", err);
+ goto err_put_job;
+ }
+ }
+
xe_sched_job_arm(job);
fence = dma_fence_get(&job->drm.s_fence->finished);
xe_sched_job_push(job);
- timeout = dma_fence_wait_timeout(fence, false, HZ);
- dma_fence_put(fence);
- if (timeout < 0)
- err = timeout;
- else if (!timeout)
- err = -ETIME;
+ return fence;
+err_put_job:
+ xe_sched_job_put(job);
exit:
- return err;
+ return ERR_PTR(err);
}
static void write_cs_mi_lri(struct xe_bb *bb, const struct xe_oa_reg *reg_data, u32 n_regs)
@@ -636,7 +671,8 @@ static void xe_oa_free_configs(struct xe_oa_stream *stream)
xe_oa_config_put(stream->oa_config);
llist_for_each_entry_safe(oa_bo, tmp, stream->oa_config_bos.first, node)
- free_oa_config_bo(oa_bo);
+ free_oa_config_bo(oa_bo, stream->last_fence);
+ dma_fence_put(stream->last_fence);
}
static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc,
@@ -656,6 +692,7 @@ static void xe_oa_store_flex(struct xe_oa_stream *stream, struct xe_lrc *lrc,
static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lrc,
const struct flex *flex, u32 count)
{
+ struct dma_fence *fence;
struct xe_bb *bb;
int err;
@@ -667,7 +704,16 @@ static int xe_oa_modify_ctx_image(struct xe_oa_stream *stream, struct xe_lrc *lr
xe_oa_store_flex(stream, lrc, bb, flex, count);
- err = xe_oa_submit_bb(stream, bb);
+ fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb);
+ if (IS_ERR(fence)) {
+ err = PTR_ERR(fence);
+ goto free_bb;
+ }
+ xe_bb_free(bb, fence);
+ dma_fence_put(fence);
+
+ return 0;
+free_bb:
xe_bb_free(bb, NULL);
exit:
return err;
@@ -675,6 +721,7 @@ exit:
static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *reg_lri)
{
+ struct dma_fence *fence;
struct xe_bb *bb;
int err;
@@ -686,7 +733,16 @@ static int xe_oa_load_with_lri(struct xe_oa_stream *stream, struct xe_oa_reg *re
write_cs_mi_lri(bb, reg_lri, 1);
- err = xe_oa_submit_bb(stream, bb);
+ fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_NO_DEPS, bb);
+ if (IS_ERR(fence)) {
+ err = PTR_ERR(fence);
+ goto free_bb;
+ }
+ xe_bb_free(bb, fence);
+ dma_fence_put(fence);
+
+ return 0;
+free_bb:
xe_bb_free(bb, NULL);
exit:
return err;
@@ -749,7 +805,8 @@ static int xe_oa_configure_oac_context(struct xe_oa_stream *stream, bool enable)
int err;
/* Set ccs select to enable programming of OAC_OACONTROL */
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctrl, __oa_ccs_select(stream));
+ xe_mmio_write32(&stream->gt->mmio, __oa_regs(stream)->oa_ctrl,
+ __oa_ccs_select(stream));
/* Modify stream hwe context image with regs_context */
err = xe_oa_modify_ctx_image(stream, stream->exec_q->lrc[0],
@@ -785,6 +842,7 @@ static u32 oag_configure_mmio_trigger(const struct xe_oa_stream *stream, bool en
static void xe_oa_disable_metric_set(struct xe_oa_stream *stream)
{
+ struct xe_mmio *mmio = &stream->gt->mmio;
u32 sqcnt1;
/*
@@ -798,7 +856,7 @@ static void xe_oa_disable_metric_set(struct xe_oa_stream *stream)
_MASKED_BIT_DISABLE(DISABLE_DOP_GATING));
}
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_debug,
+ xe_mmio_write32(mmio, __oa_regs(stream)->oa_debug,
oag_configure_mmio_trigger(stream, false));
/* disable the context save/restore or OAR counters */
@@ -806,13 +864,13 @@ static void xe_oa_disable_metric_set(struct xe_oa_stream *stream)
xe_oa_configure_oa_context(stream, false);
/* Make sure we disable noa to save power. */
- xe_mmio_rmw32(stream->gt, RPM_CONFIG1, GT_NOA_ENABLE, 0);
+ xe_mmio_rmw32(mmio, RPM_CONFIG1, GT_NOA_ENABLE, 0);
sqcnt1 = SQCNT1_PMON_ENABLE |
(HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0);
/* Reset PMON Enable to save power. */
- xe_mmio_rmw32(stream->gt, XELPMP_SQCNT1, sqcnt1, 0);
+ xe_mmio_rmw32(mmio, XELPMP_SQCNT1, sqcnt1, 0);
}
static void xe_oa_stream_destroy(struct xe_oa_stream *stream)
@@ -832,7 +890,7 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream)
xe_oa_free_oa_buffer(stream);
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
xe_pm_runtime_put(stream->oa->xe);
/* Wa_1509372804:pvc: Unset the override of GUCRC mode to enable rc6 */
@@ -840,6 +898,7 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream)
xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(&gt->uc.guc.pc));
xe_oa_free_configs(stream);
+ xe_file_put(stream->xef);
}
static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream)
@@ -910,11 +969,62 @@ out:
return oa_bo;
}
+static void xe_oa_update_last_fence(struct xe_oa_stream *stream, struct dma_fence *fence)
+{
+ dma_fence_put(stream->last_fence);
+ stream->last_fence = dma_fence_get(fence);
+}
+
+static void xe_oa_fence_work_fn(struct work_struct *w)
+{
+ struct xe_oa_fence *ofence = container_of(w, typeof(*ofence), work.work);
+
+ /* Signal fence to indicate new OA configuration is active */
+ dma_fence_signal(&ofence->base);
+ dma_fence_put(&ofence->base);
+}
+
+static void xe_oa_config_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+ /* Additional empirical delay needed for NOA programming after registers are written */
+#define NOA_PROGRAM_ADDITIONAL_DELAY_US 500
+
+ struct xe_oa_fence *ofence = container_of(cb, typeof(*ofence), cb);
+
+ INIT_DELAYED_WORK(&ofence->work, xe_oa_fence_work_fn);
+ queue_delayed_work(system_unbound_wq, &ofence->work,
+ usecs_to_jiffies(NOA_PROGRAM_ADDITIONAL_DELAY_US));
+ dma_fence_put(fence);
+}
+
+static const char *xe_oa_get_driver_name(struct dma_fence *fence)
+{
+ return "xe_oa";
+}
+
+static const char *xe_oa_get_timeline_name(struct dma_fence *fence)
+{
+ return "unbound";
+}
+
+static const struct dma_fence_ops xe_oa_fence_ops = {
+ .get_driver_name = xe_oa_get_driver_name,
+ .get_timeline_name = xe_oa_get_timeline_name,
+};
+
static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config *config)
{
#define NOA_PROGRAM_ADDITIONAL_DELAY_US 500
struct xe_oa_config_bo *oa_bo;
- int err, us = NOA_PROGRAM_ADDITIONAL_DELAY_US;
+ struct xe_oa_fence *ofence;
+ int i, err, num_signal = 0;
+ struct dma_fence *fence;
+
+ ofence = kzalloc(sizeof(*ofence), GFP_KERNEL);
+ if (!ofence) {
+ err = -ENOMEM;
+ goto exit;
+ }
oa_bo = xe_oa_alloc_config_buffer(stream, config);
if (IS_ERR(oa_bo)) {
@@ -922,11 +1032,50 @@ static int xe_oa_emit_oa_config(struct xe_oa_stream *stream, struct xe_oa_config
goto exit;
}
- err = xe_oa_submit_bb(stream, oa_bo->bb);
+ /* Emit OA configuration batch */
+ fence = xe_oa_submit_bb(stream, XE_OA_SUBMIT_ADD_DEPS, oa_bo->bb);
+ if (IS_ERR(fence)) {
+ err = PTR_ERR(fence);
+ goto exit;
+ }
- /* Additional empirical delay needed for NOA programming after registers are written */
- usleep_range(us, 2 * us);
+ /* Point of no return: initialize and set fence to signal */
+ spin_lock_init(&ofence->lock);
+ dma_fence_init(&ofence->base, &xe_oa_fence_ops, &ofence->lock, 0, 0);
+
+ for (i = 0; i < stream->num_syncs; i++) {
+ if (stream->syncs[i].flags & DRM_XE_SYNC_FLAG_SIGNAL)
+ num_signal++;
+ xe_sync_entry_signal(&stream->syncs[i], &ofence->base);
+ }
+
+ /* Additional dma_fence_get in case we dma_fence_wait */
+ if (!num_signal)
+ dma_fence_get(&ofence->base);
+
+ /* Update last fence too before adding callback */
+ xe_oa_update_last_fence(stream, fence);
+
+ /* Add job fence callback to schedule work to signal ofence->base */
+ err = dma_fence_add_callback(fence, &ofence->cb, xe_oa_config_cb);
+ xe_gt_assert(stream->gt, !err || err == -ENOENT);
+ if (err == -ENOENT)
+ xe_oa_config_cb(fence, &ofence->cb);
+
+ /* If nothing needs to be signaled we wait synchronously */
+ if (!num_signal) {
+ dma_fence_wait(&ofence->base, false);
+ dma_fence_put(&ofence->base);
+ }
+
+ /* Done with syncs */
+ for (i = 0; i < stream->num_syncs; i++)
+ xe_sync_entry_cleanup(&stream->syncs[i]);
+ kfree(stream->syncs);
+
+ return 0;
exit:
+ kfree(ofence);
return err;
}
@@ -940,6 +1089,7 @@ static u32 oag_report_ctx_switches(const struct xe_oa_stream *stream)
static int xe_oa_enable_metric_set(struct xe_oa_stream *stream)
{
+ struct xe_mmio *mmio = &stream->gt->mmio;
u32 oa_debug, sqcnt1;
int ret;
@@ -966,12 +1116,12 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream)
OAG_OA_DEBUG_DISABLE_START_TRG_2_COUNT_QUAL |
OAG_OA_DEBUG_DISABLE_START_TRG_1_COUNT_QUAL;
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_debug,
+ xe_mmio_write32(mmio, __oa_regs(stream)->oa_debug,
_MASKED_BIT_ENABLE(oa_debug) |
oag_report_ctx_switches(stream) |
oag_configure_mmio_trigger(stream, true));
- xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_ctx_ctrl, stream->periodic ?
+ xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctx_ctrl, stream->periodic ?
(OAG_OAGLBCTXCTRL_COUNTER_RESUME |
OAG_OAGLBCTXCTRL_TIMER_ENABLE |
REG_FIELD_PREP(OAG_OAGLBCTXCTRL_TIMER_PERIOD_MASK,
@@ -985,7 +1135,7 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream)
sqcnt1 = SQCNT1_PMON_ENABLE |
(HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0);
- xe_mmio_rmw32(stream->gt, XELPMP_SQCNT1, 0, sqcnt1);
+ xe_mmio_rmw32(mmio, XELPMP_SQCNT1, 0, sqcnt1);
/* Configure OAR/OAC */
if (stream->exec_q) {
@@ -997,6 +1147,262 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream)
return xe_oa_emit_oa_config(stream, stream->oa_config);
}
+static int decode_oa_format(struct xe_oa *oa, u64 fmt, enum xe_oa_format_name *name)
+{
+ u32 counter_size = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE, fmt);
+ u32 counter_sel = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SEL, fmt);
+ u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt);
+ u32 type = FIELD_GET(DRM_XE_OA_FORMAT_MASK_FMT_TYPE, fmt);
+ int idx;
+
+ for_each_set_bit(idx, oa->format_mask, __XE_OA_FORMAT_MAX) {
+ const struct xe_oa_format *f = &oa->oa_formats[idx];
+
+ if (counter_size == f->counter_size && bc_report == f->bc_report &&
+ type == f->type && counter_sel == f->counter_select) {
+ *name = idx;
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
+static int xe_oa_set_prop_oa_unit_id(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ if (value >= oa->oa_unit_ids) {
+ drm_dbg(&oa->xe->drm, "OA unit ID out of range %lld\n", value);
+ return -EINVAL;
+ }
+ param->oa_unit_id = value;
+ return 0;
+}
+
+static int xe_oa_set_prop_sample_oa(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ param->sample = value;
+ return 0;
+}
+
+static int xe_oa_set_prop_metric_set(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ param->metric_set = value;
+ return 0;
+}
+
+static int xe_oa_set_prop_oa_format(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ int ret = decode_oa_format(oa, value, &param->oa_format);
+
+ if (ret) {
+ drm_dbg(&oa->xe->drm, "Unsupported OA report format %#llx\n", value);
+ return ret;
+ }
+ return 0;
+}
+
+static int xe_oa_set_prop_oa_exponent(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+#define OA_EXPONENT_MAX 31
+
+ if (value > OA_EXPONENT_MAX) {
+ drm_dbg(&oa->xe->drm, "OA timer exponent too high (> %u)\n", OA_EXPONENT_MAX);
+ return -EINVAL;
+ }
+ param->period_exponent = value;
+ return 0;
+}
+
+static int xe_oa_set_prop_disabled(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ param->disabled = value;
+ return 0;
+}
+
+static int xe_oa_set_prop_exec_queue_id(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ param->exec_queue_id = value;
+ return 0;
+}
+
+static int xe_oa_set_prop_engine_instance(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ param->engine_instance = value;
+ return 0;
+}
+
+static int xe_oa_set_no_preempt(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ param->no_preempt = value;
+ return 0;
+}
+
+static int xe_oa_set_prop_num_syncs(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ param->num_syncs = value;
+ return 0;
+}
+
+static int xe_oa_set_prop_syncs_user(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ param->syncs_user = u64_to_user_ptr(value);
+ return 0;
+}
+
+static int xe_oa_set_prop_ret_inval(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param)
+{
+ return -EINVAL;
+}
+
+typedef int (*xe_oa_set_property_fn)(struct xe_oa *oa, u64 value,
+ struct xe_oa_open_param *param);
+static const xe_oa_set_property_fn xe_oa_set_property_funcs_open[] = {
+ [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_oa_unit_id,
+ [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_sample_oa,
+ [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set,
+ [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_oa_format,
+ [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_oa_exponent,
+ [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_disabled,
+ [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_exec_queue_id,
+ [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_engine_instance,
+ [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt,
+ [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs,
+ [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user,
+};
+
+static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = {
+ [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_ret_inval,
+ [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_ret_inval,
+ [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set,
+ [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_ret_inval,
+ [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_ret_inval,
+ [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_ret_inval,
+ [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_ret_inval,
+ [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_ret_inval,
+ [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_prop_ret_inval,
+ [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs,
+ [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user,
+};
+
+static int xe_oa_user_ext_set_property(struct xe_oa *oa, enum xe_oa_user_extn_from from,
+ u64 extension, struct xe_oa_open_param *param)
+{
+ u64 __user *address = u64_to_user_ptr(extension);
+ struct drm_xe_ext_set_property ext;
+ int err;
+ u32 idx;
+
+ err = __copy_from_user(&ext, address, sizeof(ext));
+ if (XE_IOCTL_DBG(oa->xe, err))
+ return -EFAULT;
+
+ BUILD_BUG_ON(ARRAY_SIZE(xe_oa_set_property_funcs_open) !=
+ ARRAY_SIZE(xe_oa_set_property_funcs_config));
+
+ if (XE_IOCTL_DBG(oa->xe, ext.property >= ARRAY_SIZE(xe_oa_set_property_funcs_open)) ||
+ XE_IOCTL_DBG(oa->xe, ext.pad))
+ return -EINVAL;
+
+ idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_oa_set_property_funcs_open));
+
+ if (from == XE_OA_USER_EXTN_FROM_CONFIG)
+ return xe_oa_set_property_funcs_config[idx](oa, ext.value, param);
+ else
+ return xe_oa_set_property_funcs_open[idx](oa, ext.value, param);
+}
+
+typedef int (*xe_oa_user_extension_fn)(struct xe_oa *oa, enum xe_oa_user_extn_from from,
+ u64 extension, struct xe_oa_open_param *param);
+static const xe_oa_user_extension_fn xe_oa_user_extension_funcs[] = {
+ [DRM_XE_OA_EXTENSION_SET_PROPERTY] = xe_oa_user_ext_set_property,
+};
+
+#define MAX_USER_EXTENSIONS 16
+static int xe_oa_user_extensions(struct xe_oa *oa, enum xe_oa_user_extn_from from, u64 extension,
+ int ext_number, struct xe_oa_open_param *param)
+{
+ u64 __user *address = u64_to_user_ptr(extension);
+ struct drm_xe_user_extension ext;
+ int err;
+ u32 idx;
+
+ if (XE_IOCTL_DBG(oa->xe, ext_number >= MAX_USER_EXTENSIONS))
+ return -E2BIG;
+
+ err = __copy_from_user(&ext, address, sizeof(ext));
+ if (XE_IOCTL_DBG(oa->xe, err))
+ return -EFAULT;
+
+ if (XE_IOCTL_DBG(oa->xe, ext.pad) ||
+ XE_IOCTL_DBG(oa->xe, ext.name >= ARRAY_SIZE(xe_oa_user_extension_funcs)))
+ return -EINVAL;
+
+ idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_oa_user_extension_funcs));
+ err = xe_oa_user_extension_funcs[idx](oa, from, extension, param);
+ if (XE_IOCTL_DBG(oa->xe, err))
+ return err;
+
+ if (ext.next_extension)
+ return xe_oa_user_extensions(oa, from, ext.next_extension, ++ext_number, param);
+
+ return 0;
+}
+
+static int xe_oa_parse_syncs(struct xe_oa *oa, struct xe_oa_open_param *param)
+{
+ int ret, num_syncs, num_ufence = 0;
+
+ if (param->num_syncs && !param->syncs_user) {
+ drm_dbg(&oa->xe->drm, "num_syncs specified without sync array\n");
+ ret = -EINVAL;
+ goto exit;
+ }
+
+ if (param->num_syncs) {
+ param->syncs = kcalloc(param->num_syncs, sizeof(*param->syncs), GFP_KERNEL);
+ if (!param->syncs) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+ }
+
+ for (num_syncs = 0; num_syncs < param->num_syncs; num_syncs++) {
+ ret = xe_sync_entry_parse(oa->xe, param->xef, &param->syncs[num_syncs],
+ &param->syncs_user[num_syncs], 0);
+ if (ret)
+ goto err_syncs;
+
+ if (xe_sync_is_ufence(&param->syncs[num_syncs]))
+ num_ufence++;
+ }
+
+ if (XE_IOCTL_DBG(oa->xe, num_ufence > 1)) {
+ ret = -EINVAL;
+ goto err_syncs;
+ }
+
+ return 0;
+
+err_syncs:
+ while (num_syncs--)
+ xe_sync_entry_cleanup(&param->syncs[num_syncs]);
+ kfree(param->syncs);
+exit:
+ return ret;
+}
+
static void xe_oa_stream_enable(struct xe_oa_stream *stream)
{
stream->pollin = false;
@@ -1090,36 +1496,38 @@ static int xe_oa_disable_locked(struct xe_oa_stream *stream)
static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg)
{
- struct drm_xe_ext_set_property ext;
+ struct xe_oa_open_param param = {};
long ret = stream->oa_config->id;
struct xe_oa_config *config;
int err;
- err = __copy_from_user(&ext, u64_to_user_ptr(arg), sizeof(ext));
- if (XE_IOCTL_DBG(stream->oa->xe, err))
- return -EFAULT;
-
- if (XE_IOCTL_DBG(stream->oa->xe, ext.pad) ||
- XE_IOCTL_DBG(stream->oa->xe, ext.base.name != DRM_XE_OA_EXTENSION_SET_PROPERTY) ||
- XE_IOCTL_DBG(stream->oa->xe, ext.base.next_extension) ||
- XE_IOCTL_DBG(stream->oa->xe, ext.property != DRM_XE_OA_PROPERTY_OA_METRIC_SET))
- return -EINVAL;
+ err = xe_oa_user_extensions(stream->oa, XE_OA_USER_EXTN_FROM_CONFIG, arg, 0, &param);
+ if (err)
+ return err;
- config = xe_oa_get_oa_config(stream->oa, ext.value);
+ config = xe_oa_get_oa_config(stream->oa, param.metric_set);
if (!config)
return -ENODEV;
- if (config != stream->oa_config) {
- err = xe_oa_emit_oa_config(stream, config);
- if (!err)
- config = xchg(&stream->oa_config, config);
- else
- ret = err;
+ param.xef = stream->xef;
+ err = xe_oa_parse_syncs(stream->oa, &param);
+ if (err)
+ goto err_config_put;
+
+ stream->num_syncs = param.num_syncs;
+ stream->syncs = param.syncs;
+
+ err = xe_oa_emit_oa_config(stream, config);
+ if (!err) {
+ config = xchg(&stream->oa_config, config);
+ drm_dbg(&stream->oa->xe->drm, "changed to oa config uuid=%s\n",
+ stream->oa_config->uuid);
}
+err_config_put:
xe_oa_config_put(config);
- return ret;
+ return err ?: ret;
}
static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg)
@@ -1349,6 +1757,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
{
struct xe_oa_unit *u = param->hwe->oa_unit;
struct xe_gt *gt = param->hwe->gt;
+ unsigned int fw_ref;
int ret;
stream->exec_q = param->exec_q;
@@ -1362,6 +1771,10 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
stream->period_exponent = param->period_exponent;
stream->no_preempt = param->no_preempt;
+ stream->xef = xe_file_get(param->xef);
+ stream->num_syncs = param->num_syncs;
+ stream->syncs = param->syncs;
+
/*
* For Xe2+, when overrun mode is enabled, there are no partial reports at the end
* of buffer, making the OA buffer effectively a non-power-of-2 size circular
@@ -1409,7 +1822,11 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
/* Take runtime pm ref and forcewake to disable RC6 */
xe_pm_runtime_get(stream->oa->xe);
- XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
+ ret = -ETIMEDOUT;
+ goto err_fw_put;
+ }
ret = xe_oa_alloc_oa_buffer(stream);
if (ret)
@@ -1451,13 +1868,14 @@ err_put_k_exec_q:
err_free_oa_buf:
xe_oa_free_oa_buffer(stream);
err_fw_put:
- XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
xe_pm_runtime_put(stream->oa->xe);
if (stream->override_gucrc)
xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(&gt->uc.guc.pc));
err_free_configs:
xe_oa_free_configs(stream);
exit:
+ xe_file_put(stream->xef);
return ret;
}
@@ -1535,7 +1953,7 @@ u32 xe_oa_timestamp_frequency(struct xe_gt *gt)
case XE_PVC:
case XE_METEORLAKE:
xe_pm_runtime_get(gt_to_xe(gt));
- reg = xe_mmio_read32(gt, RPM_CONFIG0);
+ reg = xe_mmio_read32(&gt->mmio, RPM_CONFIG0);
xe_pm_runtime_put(gt_to_xe(gt));
shift = REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg);
@@ -1567,27 +1985,6 @@ static bool engine_supports_oa_format(const struct xe_hw_engine *hwe, int type)
}
}
-static int decode_oa_format(struct xe_oa *oa, u64 fmt, enum xe_oa_format_name *name)
-{
- u32 counter_size = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE, fmt);
- u32 counter_sel = FIELD_GET(DRM_XE_OA_FORMAT_MASK_COUNTER_SEL, fmt);
- u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt);
- u32 type = FIELD_GET(DRM_XE_OA_FORMAT_MASK_FMT_TYPE, fmt);
- int idx;
-
- for_each_set_bit(idx, oa->format_mask, __XE_OA_FORMAT_MAX) {
- const struct xe_oa_format *f = &oa->oa_formats[idx];
-
- if (counter_size == f->counter_size && bc_report == f->bc_report &&
- type == f->type && counter_sel == f->counter_select) {
- *name = idx;
- return 0;
- }
- }
-
- return -EINVAL;
-}
-
/**
* xe_oa_unit_id - Return OA unit ID for a hardware engine
* @hwe: @xe_hw_engine
@@ -1634,155 +2031,6 @@ out:
return ret;
}
-static int xe_oa_set_prop_oa_unit_id(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
- if (value >= oa->oa_unit_ids) {
- drm_dbg(&oa->xe->drm, "OA unit ID out of range %lld\n", value);
- return -EINVAL;
- }
- param->oa_unit_id = value;
- return 0;
-}
-
-static int xe_oa_set_prop_sample_oa(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
- param->sample = value;
- return 0;
-}
-
-static int xe_oa_set_prop_metric_set(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
- param->metric_set = value;
- return 0;
-}
-
-static int xe_oa_set_prop_oa_format(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
- int ret = decode_oa_format(oa, value, &param->oa_format);
-
- if (ret) {
- drm_dbg(&oa->xe->drm, "Unsupported OA report format %#llx\n", value);
- return ret;
- }
- return 0;
-}
-
-static int xe_oa_set_prop_oa_exponent(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
-#define OA_EXPONENT_MAX 31
-
- if (value > OA_EXPONENT_MAX) {
- drm_dbg(&oa->xe->drm, "OA timer exponent too high (> %u)\n", OA_EXPONENT_MAX);
- return -EINVAL;
- }
- param->period_exponent = value;
- return 0;
-}
-
-static int xe_oa_set_prop_disabled(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
- param->disabled = value;
- return 0;
-}
-
-static int xe_oa_set_prop_exec_queue_id(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
- param->exec_queue_id = value;
- return 0;
-}
-
-static int xe_oa_set_prop_engine_instance(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
- param->engine_instance = value;
- return 0;
-}
-
-static int xe_oa_set_no_preempt(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param)
-{
- param->no_preempt = value;
- return 0;
-}
-
-typedef int (*xe_oa_set_property_fn)(struct xe_oa *oa, u64 value,
- struct xe_oa_open_param *param);
-static const xe_oa_set_property_fn xe_oa_set_property_funcs[] = {
- [DRM_XE_OA_PROPERTY_OA_UNIT_ID] = xe_oa_set_prop_oa_unit_id,
- [DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_sample_oa,
- [DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set,
- [DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_oa_format,
- [DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_oa_exponent,
- [DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_disabled,
- [DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_exec_queue_id,
- [DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_engine_instance,
- [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt,
-};
-
-static int xe_oa_user_ext_set_property(struct xe_oa *oa, u64 extension,
- struct xe_oa_open_param *param)
-{
- u64 __user *address = u64_to_user_ptr(extension);
- struct drm_xe_ext_set_property ext;
- int err;
- u32 idx;
-
- err = __copy_from_user(&ext, address, sizeof(ext));
- if (XE_IOCTL_DBG(oa->xe, err))
- return -EFAULT;
-
- if (XE_IOCTL_DBG(oa->xe, ext.property >= ARRAY_SIZE(xe_oa_set_property_funcs)) ||
- XE_IOCTL_DBG(oa->xe, ext.pad))
- return -EINVAL;
-
- idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_oa_set_property_funcs));
- return xe_oa_set_property_funcs[idx](oa, ext.value, param);
-}
-
-typedef int (*xe_oa_user_extension_fn)(struct xe_oa *oa, u64 extension,
- struct xe_oa_open_param *param);
-static const xe_oa_user_extension_fn xe_oa_user_extension_funcs[] = {
- [DRM_XE_OA_EXTENSION_SET_PROPERTY] = xe_oa_user_ext_set_property,
-};
-
-#define MAX_USER_EXTENSIONS 16
-static int xe_oa_user_extensions(struct xe_oa *oa, u64 extension, int ext_number,
- struct xe_oa_open_param *param)
-{
- u64 __user *address = u64_to_user_ptr(extension);
- struct drm_xe_user_extension ext;
- int err;
- u32 idx;
-
- if (XE_IOCTL_DBG(oa->xe, ext_number >= MAX_USER_EXTENSIONS))
- return -E2BIG;
-
- err = __copy_from_user(&ext, address, sizeof(ext));
- if (XE_IOCTL_DBG(oa->xe, err))
- return -EFAULT;
-
- if (XE_IOCTL_DBG(oa->xe, ext.pad) ||
- XE_IOCTL_DBG(oa->xe, ext.name >= ARRAY_SIZE(xe_oa_user_extension_funcs)))
- return -EINVAL;
-
- idx = array_index_nospec(ext.name, ARRAY_SIZE(xe_oa_user_extension_funcs));
- err = xe_oa_user_extension_funcs[idx](oa, extension, param);
- if (XE_IOCTL_DBG(oa->xe, err))
- return err;
-
- if (ext.next_extension)
- return xe_oa_user_extensions(oa, ext.next_extension, ++ext_number, param);
-
- return 0;
-}
-
/**
* xe_oa_stream_open_ioctl - Opens an OA stream
* @dev: @drm_device
@@ -1808,7 +2056,8 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f
return -ENODEV;
}
- ret = xe_oa_user_extensions(oa, data, 0, &param);
+ param.xef = xef;
+ ret = xe_oa_user_extensions(oa, XE_OA_USER_EXTN_FROM_OPEN, data, 0, &param);
if (ret)
return ret;
@@ -1876,11 +2125,24 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f
drm_dbg(&oa->xe->drm, "Using periodic sampling freq %lld Hz\n", oa_freq_hz);
}
+ ret = xe_oa_parse_syncs(oa, &param);
+ if (ret)
+ goto err_exec_q;
+
mutex_lock(&param.hwe->gt->oa.gt_lock);
ret = xe_oa_stream_open_ioctl_locked(oa, &param);
mutex_unlock(&param.hwe->gt->oa.gt_lock);
+ if (ret < 0)
+ goto err_sync_cleanup;
+
+ return ret;
+
+err_sync_cleanup:
+ while (param.num_syncs--)
+ xe_sync_entry_cleanup(&param.syncs[param.num_syncs]);
+ kfree(param.syncs);
err_exec_q:
- if (ret < 0 && param.exec_q)
+ if (param.exec_q)
xe_exec_queue_put(param.exec_q);
return ret;
}
@@ -2351,7 +2613,7 @@ static void __xe_oa_init_oa_units(struct xe_gt *gt)
}
/* Ensure MMIO trigger remains disabled till there is a stream */
- xe_mmio_write32(gt, u->regs.oa_debug,
+ xe_mmio_write32(&gt->mmio, u->regs.oa_debug,
oag_configure_mmio_trigger(NULL, false));
/* Set oa_unit_ids now to ensure ids remain contiguous */