From 277bd3371f11400d5b02df54f057569be4b10cea Mon Sep 17 00:00:00 2001 From: Le Ma Date: Tue, 24 May 2022 10:51:43 +0800 Subject: drm/amdgpu: convert gfx.kiq to array type (v3) v1: more kiq instances are a available in SOC (Le) v2: squash commits to avoid breaking the build (Le) v3: make the conversion for gfx/mec v11_0 (Hawking) Signed-off-by: Le Ma Reviewed-by: Hawking Zhang Signed-off-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index adbcd8127c82..adf86bc7ed36 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -898,7 +898,7 @@ static const struct kiq_pm4_funcs gfx_v9_0_kiq_pm4_funcs = { static void gfx_v9_0_set_kiq_pm4_funcs(struct amdgpu_device *adev) { - adev->gfx.kiq.pmf = &gfx_v9_0_kiq_pm4_funcs; + adev->gfx.kiq[0].pmf = &gfx_v9_0_kiq_pm4_funcs; } static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) @@ -2174,7 +2174,7 @@ static int gfx_v9_0_sw_init(void *handle) return r; } - kiq = &adev->gfx.kiq; + kiq = &adev->gfx.kiq[0]; r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); if (r) return r; @@ -2216,7 +2216,7 @@ static int gfx_v9_0_sw_fini(void *handle) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); amdgpu_gfx_mqd_sw_fini(adev); - amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring); + amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring); amdgpu_gfx_kiq_fini(adev); gfx_v9_0_mec_fini(adev); @@ -3155,7 +3155,7 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) } else { WREG32_SOC15_RLC(GC, 0, mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); - adev->gfx.kiq.ring.sched.ready = false; + adev->gfx.kiq[0].ring.sched.ready = false; } udelay(50); } @@ -3610,7 +3610,7 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) struct amdgpu_ring *ring; int r; - ring = &adev->gfx.kiq.ring; + ring = &adev->gfx.kiq[0].ring; r = amdgpu_bo_reserve(ring->mqd_obj, false); if (unlikely(r != 0)) @@ -3789,10 +3789,10 @@ static int gfx_v9_0_hw_fini(void *handle) */ if (!amdgpu_in_reset(adev) && !adev->in_suspend) { mutex_lock(&adev->srbm_mutex); - soc15_grbm_select(adev, adev->gfx.kiq.ring.me, - adev->gfx.kiq.ring.pipe, - adev->gfx.kiq.ring.queue, 0); - gfx_v9_0_kiq_fini_register(&adev->gfx.kiq.ring); + soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me, + adev->gfx.kiq[0].ring.pipe, + adev->gfx.kiq[0].ring.queue, 0); + gfx_v9_0_kiq_fini_register(&adev->gfx.kiq[0].ring); soc15_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); } @@ -3913,7 +3913,7 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev) unsigned long flags; uint32_t seq, reg_val_offs = 0; uint64_t value = 0; - struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; struct amdgpu_ring *ring = &kiq->ring; BUG_ON(!ring->funcs->emit_rreg); @@ -5385,7 +5385,7 @@ static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring) { int i, r = 0; struct amdgpu_device *adev = ring->adev; - struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_kiq *kiq = &adev->gfx.kiq[0]; struct amdgpu_ring *kiq_ring = &kiq->ring; unsigned long flags; @@ -6964,7 +6964,7 @@ static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) { int i; - adev->gfx.kiq.ring.funcs = &gfx_v9_0_ring_funcs_kiq; + adev->gfx.kiq[0].ring.funcs = &gfx_v9_0_ring_funcs_kiq; for (i = 0; i < adev->gfx.num_gfx_rings; i++) adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx; -- cgit From be697aa3a78ef83a6b8d49e1f0671a002e502cd0 Mon Sep 17 00:00:00 2001 From: Le Ma Date: Wed, 27 Jul 2022 14:35:49 +0800 Subject: drm/amdgpu: move queue_bitmap to an independent structure (v3) To allocate independent queue_bitmap for each XCD, then the old bitmap policy can be continued to use with a clear logic. Use mec_bitmap[0] as default for all non-GC 9.4.3 IPs. v2: squash commits to avoid breaking the build v3: unify naming style Signed-off-by: Le Ma Reviewed-by: Hawking Zhang Signed-off-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index adf86bc7ed36..49adc36dcc6f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1713,7 +1713,7 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev) const struct gfx_firmware_header_v1_0 *mec_hdr; - bitmap_zero(adev->gfx.mec.queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); + bitmap_zero(adev->gfx.mec_bitmap[0].queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); /* take ownership of the relevant compute queues */ amdgpu_gfx_compute_queue_acquire(adev); @@ -2154,7 +2154,8 @@ static int gfx_v9_0_sw_init(void *handle) for (i = 0; i < adev->gfx.mec.num_mec; ++i) { for (j = 0; j < adev->gfx.mec.num_queue_per_pipe; j++) { for (k = 0; k < adev->gfx.mec.num_pipe_per_mec; k++) { - if (!amdgpu_gfx_is_mec_queue_enabled(adev, i, k, j)) + if (!amdgpu_gfx_is_mec_queue_enabled(adev, 0, i, + k, j)) continue; r = gfx_v9_0_compute_ring_init(adev, -- cgit From def799c6596d078112095c24c25e162cb5102d90 Mon Sep 17 00:00:00 2001 From: Le Ma Date: Tue, 24 May 2022 12:23:03 +0800 Subject: drm/amdgpu: add multi-xcc support to amdgpu_gfx interfaces (v4) v1: Modify kiq_init/fini, mqd_sw_init/fini and enable/disable_kcq to adapt to multi-die case. Pass 0 as default to all asics with single xcc (Le) v2: squash commits to avoid breaking the build (Le) v3: unify naming style (Le) v4: apply the changes to gc v11_0 (Hawking) Signed-off-by: Le Ma Reviewed-by: Hawking Zhang Signed-off-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 49adc36dcc6f..62af92e5be51 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2169,19 +2169,19 @@ static int gfx_v9_0_sw_init(void *handle) } } - r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE); + r = amdgpu_gfx_kiq_init(adev, GFX9_MEC_HPD_SIZE, 0); if (r) { DRM_ERROR("Failed to init KIQ BOs!\n"); return r; } kiq = &adev->gfx.kiq[0]; - r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq); + r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq, 0); if (r) return r; /* create MQD for all compute queues as wel as KIQ for SRIOV case */ - r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation)); + r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v9_mqd_allocation), 0); if (r) return r; @@ -2216,9 +2216,9 @@ static int gfx_v9_0_sw_fini(void *handle) for (i = 0; i < adev->gfx.num_compute_rings; i++) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); - amdgpu_gfx_mqd_sw_fini(adev); + amdgpu_gfx_mqd_sw_fini(adev, 0); amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq[0].ring); - amdgpu_gfx_kiq_fini(adev); + amdgpu_gfx_kiq_fini(adev, 0); gfx_v9_0_mec_fini(adev); amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, @@ -3520,7 +3520,6 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; struct v9_mqd *mqd = ring->mqd_ptr; - int mqd_idx = AMDGPU_MAX_COMPUTE_RINGS; struct v9_mqd *tmp_mqd; gfx_v9_0_kiq_setting(ring); @@ -3530,11 +3529,11 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) * driver need to re-init the mqd. * check mqd->cp_hqd_pq_control since this value should not be 0 */ - tmp_mqd = (struct v9_mqd *)adev->gfx.mec.mqd_backup[mqd_idx]; + tmp_mqd = (struct v9_mqd *)adev->gfx.kiq[0].mqd_backup; if (amdgpu_in_reset(adev) && tmp_mqd->cp_hqd_pq_control){ /* for GPU_RESET case , reset MQD to a clean status */ - if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); + if (adev->gfx.kiq[0].mqd_backup) + memcpy(mqd, adev->gfx.kiq[0].mqd_backup, sizeof(struct v9_mqd_allocation)); /* reset ring buffer */ ring->wptr = 0; @@ -3558,8 +3557,8 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) soc15_grbm_select(adev, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); - if (adev->gfx.mec.mqd_backup[mqd_idx]) - memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); + if (adev->gfx.kiq[0].mqd_backup) + memcpy(adev->gfx.kiq[0].mqd_backup, mqd, sizeof(struct v9_mqd_allocation)); } return 0; @@ -3653,7 +3652,7 @@ static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) goto done; } - r = amdgpu_gfx_enable_kcq(adev); + r = amdgpu_gfx_enable_kcq(adev, 0); done: return r; } @@ -3772,7 +3771,7 @@ static int gfx_v9_0_hw_fini(void *handle) /* DF freeze and kcq disable will fail */ if (!amdgpu_ras_intr_triggered()) /* disable KCQ to avoid CPC touch memory not valid anymore */ - amdgpu_gfx_disable_kcq(adev); + amdgpu_gfx_disable_kcq(adev, 0); if (amdgpu_sriov_vf(adev)) { gfx_v9_0_cp_gfx_enable(adev, false); -- cgit From 5aa998baab3360d0f1b93d6aff3df924045f956c Mon Sep 17 00:00:00 2001 From: Le Ma Date: Wed, 17 Nov 2021 16:28:51 +0800 Subject: drm/amdgpu: add xcc index argument to soc15_grbm_select To support grbm select for multiple XCD case. v2: unify naming style Signed-off-by: Le Ma Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 62af92e5be51..4939fd61355b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1831,7 +1831,7 @@ static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev, u32 me, u32 pipe, u32 q, u32 vm) { - soc15_grbm_select(adev, me, pipe, q, vm); + soc15_grbm_select(adev, me, pipe, q, vm, 0); } static const struct amdgpu_gfx_funcs gfx_v9_0_gfx_funcs = { @@ -2324,12 +2324,12 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) mutex_lock(&adev->srbm_mutex); for (i = adev->vm_manager.first_kfd_vmid; i < AMDGPU_NUM_VMID; i++) { - soc15_grbm_select(adev, 0, 0, 0, i); + soc15_grbm_select(adev, 0, 0, 0, i, 0); /* CP and shaders */ WREG32_SOC15_RLC(GC, 0, mmSH_MEM_CONFIG, sh_mem_config); WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, sh_mem_bases); } - soc15_grbm_select(adev, 0, 0, 0, 0); + soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); /* Initialize all compute VMIDs to have no GDS, GWS, or OA @@ -2394,7 +2394,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) /* where to put LDS, scratch, GPUVM in FSA64 space */ mutex_lock(&adev->srbm_mutex); for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { - soc15_grbm_select(adev, 0, 0, 0, i); + soc15_grbm_select(adev, 0, 0, 0, i, 0); /* CP and shaders */ if (i == 0) { tmp = REG_SET_FIELD(0, SH_MEM_CONFIG, ALIGNMENT_MODE, @@ -2416,7 +2416,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) WREG32_SOC15_RLC(GC, 0, mmSH_MEM_BASES, tmp); } } - soc15_grbm_select(adev, 0, 0, 0, 0); + soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); @@ -3540,9 +3540,9 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) amdgpu_ring_clear_ring(ring); mutex_lock(&adev->srbm_mutex); - soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); gfx_v9_0_kiq_init_register(ring); - soc15_grbm_select(adev, 0, 0, 0, 0); + soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); } else { memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); @@ -3551,10 +3551,10 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring) if (amdgpu_sriov_vf(adev) && adev->in_suspend) amdgpu_ring_clear_ring(ring); mutex_lock(&adev->srbm_mutex); - soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); gfx_v9_0_mqd_init(ring); gfx_v9_0_kiq_init_register(ring); - soc15_grbm_select(adev, 0, 0, 0, 0); + soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); if (adev->gfx.kiq[0].mqd_backup) @@ -3582,9 +3582,9 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; mutex_lock(&adev->srbm_mutex); - soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0); + soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0, 0); gfx_v9_0_mqd_init(ring); - soc15_grbm_select(adev, 0, 0, 0, 0); + soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); if (adev->gfx.mec.mqd_backup[mqd_idx]) @@ -3791,9 +3791,9 @@ static int gfx_v9_0_hw_fini(void *handle) mutex_lock(&adev->srbm_mutex); soc15_grbm_select(adev, adev->gfx.kiq[0].ring.me, adev->gfx.kiq[0].ring.pipe, - adev->gfx.kiq[0].ring.queue, 0); + adev->gfx.kiq[0].ring.queue, 0, 0); gfx_v9_0_kiq_fini_register(&adev->gfx.kiq[0].ring); - soc15_grbm_select(adev, 0, 0, 0, 0); + soc15_grbm_select(adev, 0, 0, 0, 0, 0); mutex_unlock(&adev->srbm_mutex); } -- cgit From d51ac6d0a23caf1005cb640f8533161c5d2dd0c0 Mon Sep 17 00:00:00 2001 From: Le Ma Date: Tue, 24 May 2022 11:22:49 +0800 Subject: drm/amdgpu: add xcc index argument to select_sh_se function v2 v1: To support multiple XCD case (Le) v2: introduce xcc index to gfx_v11_0_select_sh_se (Hawking) Signed-off-by: Le Ma Reviewed-by: Hawking Zhang Signed-off-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 4939fd61355b..2fa7adef18a9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1504,7 +1504,7 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) mask = 1; cu_bitmap = 0; counter = 0; - amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { if (cu_info->bitmap[i][j] & mask) { @@ -1523,7 +1523,7 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) cu_info->ao_cu_bitmap[i][j] = cu_bitmap; } } - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); } @@ -1545,7 +1545,7 @@ static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); /* set mmRLC_LB_PARAMS = 0x003F_1006 */ @@ -1594,7 +1594,7 @@ static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); /* set mmRLC_LB_PARAMS = 0x003F_1006 */ @@ -2241,7 +2241,7 @@ static void gfx_v9_0_tiling_mode_table_init(struct amdgpu_device *adev) } void gfx_v9_0_select_se_sh(struct amdgpu_device *adev, u32 se_num, u32 sh_num, - u32 instance) + u32 instance, int xcc_id) { u32 data; @@ -2290,13 +2290,13 @@ static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); data = gfx_v9_0_get_rb_active_bitmap(adev); active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) * rb_bitmap_width_per_sh); } } - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); adev->gfx.config.backend_enable_mask = active_rbs; @@ -2433,7 +2433,7 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) mutex_lock(&adev->grbm_idx_mutex); for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { - amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); for (k = 0; k < adev->usec_timeout; k++) { if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0) break; @@ -2441,7 +2441,7 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) } if (k == adev->usec_timeout) { amdgpu_gfx_select_se_sh(adev, 0xffffffff, - 0xffffffff, 0xffffffff); + 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); DRM_INFO("Timeout wait for RLC serdes %u,%u\n", i, j); @@ -2449,7 +2449,7 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev) } } } - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK | @@ -6608,7 +6608,7 @@ static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev) for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { - amdgpu_gfx_select_se_sh(adev, j, 0x0, k); + amdgpu_gfx_select_se_sh(adev, j, 0x0, k, 0); RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); } } @@ -6670,7 +6670,7 @@ static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) { for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) { for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) { - amdgpu_gfx_select_se_sh(adev, j, 0, k); + amdgpu_gfx_select_se_sh(adev, j, 0, k, 0); reg_value = RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i])); if (reg_value) @@ -6685,7 +6685,7 @@ static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, err_data->ce_count += sec_count; err_data->ue_count += ded_count; - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); gfx_v9_0_query_utc_edc_status(adev, err_data); @@ -7145,7 +7145,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, mask = 1; ao_bitmap = 0; counter = 0; - amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff, 0); gfx_v9_0_set_user_cu_inactive_bitmap( adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]); bitmap = gfx_v9_0_get_cu_active_bitmap(adev); @@ -7178,7 +7178,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap; } } - amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff, 0); mutex_unlock(&adev->grbm_idx_mutex); cu_info->number = active_cu_number; -- cgit From 86b20703e4c5a3c39891def0a68e7438aeca9db9 Mon Sep 17 00:00:00 2001 From: Le Ma Date: Wed, 27 Jul 2022 14:24:05 +0800 Subject: drm/amdgpu: add xcc index argument to rlc safe_mode func (v4) v1: To support multple XCD case (Le) v2: unify naming style (Le) v3: apply the changes to gc v11_0 (Hawking) v4: apply the changes to gc SOC21 (Morris) Signed-off-by: Le Ma Reviewed-by: Hawking Zhang Signed-off-by: Hawking Zhang Signed-off-by: Morris Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 2fa7adef18a9..bce6919d666a 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4623,7 +4623,7 @@ static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev) return true; } -static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev) +static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev, int xcc_id) { uint32_t data; unsigned i; @@ -4640,7 +4640,7 @@ static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev) } } -static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev) +static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev, int xcc_id) { uint32_t data; @@ -4651,7 +4651,7 @@ static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev) static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, bool enable) { - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { gfx_v9_0_enable_gfx_cg_power_gating(adev, true); @@ -4663,7 +4663,7 @@ static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); } - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, @@ -4690,7 +4690,7 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev { uint32_t data, def; - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); /* It is disabled by HW by default */ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { @@ -4757,7 +4757,7 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev } } - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, @@ -4768,7 +4768,7 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, if (!adev->gfx.num_gfx_rings) return; - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); /* Enable 3D CGCG/CGLS */ if (enable) { @@ -4812,7 +4812,7 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); } - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, @@ -4820,7 +4820,7 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev { uint32_t def, data; - amdgpu_gfx_rlc_enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev, 0); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); @@ -4864,7 +4864,7 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); } - amdgpu_gfx_rlc_exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev, 0); } static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, -- cgit From 4e7f84ec068cec6a9a72fe0f558e0ae4cf765c51 Mon Sep 17 00:00:00 2001 From: Le Ma Date: Thu, 16 Dec 2021 15:35:25 +0800 Subject: drm/amdgpu: initialize num_xcd to 1 for gfx v9_0 Assign value here as the num_xcd is referenced in some gfx9 common path. Signed-off-by: Le Ma Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index bce6919d666a..8fb027cf1bfb 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4543,6 +4543,7 @@ static int gfx_v9_0_early_init(void *handle) adev->gfx.num_gfx_rings = 0; else adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; + adev->gfx.num_xcd = 1; adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), AMDGPU_MAX_COMPUTE_RINGS); gfx_v9_0_set_kiq_pm4_funcs(adev); -- cgit From 45b54a7dd3437632352ed28163e982233ef190a8 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 26 Apr 2023 14:49:00 -0400 Subject: drm/amdgpu/gfx9: always restore kcq MQDs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Always restore the MQD not just when we do a reset. This allows us to move the MQD to VRAM if we want. v2: always reset ring pointer as well (Christian) Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 8fb027cf1bfb..4a7556099469 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3589,17 +3589,14 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) if (adev->gfx.mec.mqd_backup[mqd_idx]) memcpy(adev->gfx.mec.mqd_backup[mqd_idx], mqd, sizeof(struct v9_mqd_allocation)); - } else if (amdgpu_in_reset(adev)) { /* for GPU_RESET case */ - /* reset MQD to a clean status */ + } else { + /* restore MQD to a clean status */ if (adev->gfx.mec.mqd_backup[mqd_idx]) memcpy(mqd, adev->gfx.mec.mqd_backup[mqd_idx], sizeof(struct v9_mqd_allocation)); - /* reset ring buffer */ ring->wptr = 0; atomic64_set((atomic64_t *)ring->wptr_cpu_addr, 0); amdgpu_ring_clear_ring(ring); - } else { - amdgpu_ring_clear_ring(ring); } return 0; -- cgit From d97b02bb9c7aa3008d473d11001e1b45b7e0c7c6 Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Sat, 6 May 2023 20:06:45 +0800 Subject: drm/amdgpu/gfx: disable gfx9 cp_ecc_error_irq only when enabling legacy gfx ras gfx9 cp_ecc_error_irq is only enabled when legacy gfx ras is assert. So in gfx_v9_0_hw_fini, interrupt disablement for cp_ecc_error_irq should be executed under such condition, otherwise, an amdgpu_irq_put calltrace will occur. [ 7283.170322] RIP: 0010:amdgpu_irq_put+0x45/0x70 [amdgpu] [ 7283.170964] RSP: 0018:ffff9a5fc3967d00 EFLAGS: 00010246 [ 7283.170967] RAX: ffff98d88afd3040 RBX: ffff98d89da20000 RCX: 0000000000000000 [ 7283.170969] RDX: 0000000000000000 RSI: ffff98d89da2bef8 RDI: ffff98d89da20000 [ 7283.170971] RBP: ffff98d89da20000 R08: ffff98d89da2ca18 R09: 0000000000000006 [ 7283.170973] R10: ffffd5764243c008 R11: 0000000000000000 R12: 0000000000001050 [ 7283.170975] R13: ffff98d89da38978 R14: ffffffff999ae15a R15: ffff98d880130105 [ 7283.170978] FS: 0000000000000000(0000) GS:ffff98d996f00000(0000) knlGS:0000000000000000 [ 7283.170981] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 7283.170983] CR2: 00000000f7a9d178 CR3: 00000001c42ea000 CR4: 00000000003506e0 [ 7283.170986] Call Trace: [ 7283.170988] [ 7283.170989] gfx_v9_0_hw_fini+0x1c/0x6d0 [amdgpu] [ 7283.171655] amdgpu_device_ip_suspend_phase2+0x101/0x1a0 [amdgpu] [ 7283.172245] amdgpu_device_suspend+0x103/0x180 [amdgpu] [ 7283.172823] amdgpu_pmops_freeze+0x21/0x60 [amdgpu] [ 7283.173412] pci_pm_freeze+0x54/0xc0 [ 7283.173419] ? __pfx_pci_pm_freeze+0x10/0x10 [ 7283.173425] dpm_run_callback+0x98/0x200 [ 7283.173430] __device_suspend+0x164/0x5f0 v2: drop gfx11 as it's fixed in a different solution by retiring cp_ecc_irq funcs(Hawking) Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2522 Signed-off-by: Guchun Chen Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 4a7556099469..e093e83ae739 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3761,7 +3761,8 @@ static int gfx_v9_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); + if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__GFX)) + amdgpu_irq_put(adev, &adev->gfx.cp_ecc_error_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); -- cgit From 82ad22bbad008f84ec52208c5ba2c8f1cf55fd8d Mon Sep 17 00:00:00 2001 From: Jesse Zhang Date: Fri, 12 May 2023 09:29:17 +0800 Subject: drm/amdgpu: Differentiate between Raven2 and Raven/Picasso according to revision id Due to the raven2 and raven/picasso maybe have the same GC_HWIP version. So differentiate them by revision id. Signed-off-by: shanshengwang Signed-off-by: Jesse Zhang Acked-by: Evan Quan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 33 ++++++++++++++------------------- 1 file changed, 14 insertions(+), 19 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index e093e83ae739..46577b59cb04 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4000,30 +4000,25 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) clock = clock_lo | (clock_hi << 32ULL); break; case IP_VERSION(9, 1, 0): + case IP_VERSION(9, 2, 2): preempt_disable(); - clock_hi = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven); - clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven); - hi_check = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven); - /* The PWR TSC clock frequency is 100MHz, which sets 32-bit carry over - * roughly every 42 seconds. - */ - if (hi_check != clock_hi) { + if (adev->rev_id >= 0x8) { + clock_hi = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven2); + clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven2); + hi_check = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven2); + } else { + clock_hi = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven); clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven); - clock_hi = hi_check; + hi_check = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven); } - preempt_enable(); - clock = clock_lo | (clock_hi << 32ULL); - break; - case IP_VERSION(9, 2, 2): - preempt_disable(); - clock_hi = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven2); - clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven2); - hi_check = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven2); /* The PWR TSC clock frequency is 100MHz, which sets 32-bit carry over - * roughly every 42 seconds. - */ + * roughly every 42 seconds. + */ if (hi_check != clock_hi) { - clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven2); + if (adev->rev_id >= 0x8) + clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven2); + else + clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven); clock_hi = hi_check; } preempt_enable(); -- cgit From f4caf5842652f08e024741ef6d423cb0c101d863 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Wed, 14 Sep 2022 16:35:50 +0800 Subject: drm/amdgpu: introduce vmhub definition for multi-partition cases (v3) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v1: Each partition has its own gfxhub or mmhub. adjust the num of MAX_VMHUBS and the GFXHUB/MMHUB layout (Le) v2: re-design the AMDGPU_GFXHUB/AMDGPU_MMHUB layout (Le) v3: apply the gfxhub/mmhub layout to new IPs (Hawking) v4: fix up gmc11 (Alex) v5: rebase (Alex) Signed-off-by: Le Ma Acked-by: Christian König Reviewed-by: Hawking Zhang Signed-off-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 46577b59cb04..91814dc083c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2005,7 +2005,7 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX9_MEC_HPD_SIZE); - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP @@ -2105,7 +2105,7 @@ static int gfx_v9_0_sw_init(void *handle) /* disable scheduler on the real ring */ ring->no_scheduler = true; - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, AMDGPU_RING_PRIO_DEFAULT, NULL); @@ -2123,7 +2123,7 @@ static int gfx_v9_0_sw_init(void *handle) ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; ring->is_sw_ring = true; hw_prio = amdgpu_sw_ring_priority(i); - ring->vm_hub = AMDGPU_GFXHUB_0; + ring->vm_hub = AMDGPU_GFXHUB(0); r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio, NULL); @@ -2393,7 +2393,7 @@ static void gfx_v9_0_constants_init(struct amdgpu_device *adev) /* XXX SH_MEM regs */ /* where to put LDS, scratch, GPUVM in FSA64 space */ mutex_lock(&adev->srbm_mutex); - for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB_0].num_ids; i++) { + for (i = 0; i < adev->vm_manager.id_mgr[AMDGPU_GFXHUB(0)].num_ids; i++) { soc15_grbm_select(adev, 0, 0, 0, i, 0); /* CP and shaders */ if (i == 0) { -- cgit From 8078f1c610fdcdd8003e2c538fb04af41fa5c269 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Wed, 29 Jun 2022 11:41:53 +0530 Subject: drm/amdgpu: Change num_xcd to xcc_mask Instead of number of XCCs, keep a mask of XCCs for the exact XCCs available on the ASIC. XCC configuration could differ based on different ASIC configs. v2: Rename num_xcd to num_xcc (Hawking) Use smaller xcc_mask size, changed to u16 (Le) Signed-off-by: Lijo Lazar Reviewed-by: Hawking Zhang Reviewed-by: Le Ma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 91814dc083c9..da69177dc76f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4536,7 +4536,7 @@ static int gfx_v9_0_early_init(void *handle) adev->gfx.num_gfx_rings = 0; else adev->gfx.num_gfx_rings = GFX9_NUM_GFX_RINGS; - adev->gfx.num_xcd = 1; + adev->gfx.xcc_mask = 1; adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev), AMDGPU_MAX_COMPUTE_RINGS); gfx_v9_0_set_kiq_pm4_funcs(adev); -- cgit From 553f973a0d7bbe95ea5da46979d926a9c0ada109 Mon Sep 17 00:00:00 2001 From: Tom St Denis Date: Tue, 11 Oct 2022 09:52:58 -0400 Subject: drm/amd/amdgpu: Update debugfs for XCC support (v3) This patch updates the 'regs2' interface for MMIO registers to add a new IOCTL command for a 'v2' state data that includes the XCC ID. This patch then updates amdgpu_gfx_select_se_sh() and amdgpu_gfx_select_me_pipe_q() (and the implementations in the gfx drivers) to support an additional parameter. This patch then creates a new debugfs interface "gprwave" which is a merge of shader GPR and wave status access. This new inteface uses an IOCTL to select banks as well as XCC identity. (v2) Fix missing xcc_id in wave_ind function (v3) Fix pm runtime calls and mutex locking (v4) Fix bad label Signed-off-by: Tom St Denis Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index da69177dc76f..cc005e3bcd40 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -1788,7 +1788,7 @@ static void wave_read_regs(struct amdgpu_device *adev, uint32_t simd, *(out++) = RREG32_SOC15(GC, 0, mmSQ_IND_DATA); } -static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) +static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields) { /* type 1 wave data */ dst[(*no_fields)++] = 1; @@ -1809,7 +1809,7 @@ static void gfx_v9_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd, u dst[(*no_fields)++] = wave_read_ind(adev, simd, wave, ixSQ_WAVE_MODE); } -static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, +static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst) { @@ -1818,7 +1818,7 @@ static void gfx_v9_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, start + SQIND_WAVE_SGPRS_OFFSET, size, dst); } -static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, +static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t xcc_id, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t start, uint32_t size, uint32_t *dst) @@ -1829,7 +1829,7 @@ static void gfx_v9_0_read_wave_vgprs(struct amdgpu_device *adev, uint32_t simd, } static void gfx_v9_0_select_me_pipe_q(struct amdgpu_device *adev, - u32 me, u32 pipe, u32 q, u32 vm) + u32 me, u32 pipe, u32 q, u32 vm, u32 xcc_id) { soc15_grbm_select(adev, me, pipe, q, vm, 0); } -- cgit From 2c22ed0bdb0cb6da9408593eafa6137325576017 Mon Sep 17 00:00:00 2001 From: Tao Zhou Date: Mon, 27 Feb 2023 18:25:23 +0800 Subject: drm/amdgpu: add instance mask for RAS inject User can specify injected instances by the mask. For backward compatibility, the mask value is incorporated into sub block index without interface change of RAS TA. User uses logical mask and driver should convert it to physical value before sending it to RAS TA. v2: update parameter name. Signed-off-by: Tao Zhou Reviewed-by: Hawking Zhang Reviewed-by: Stanley.Yang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index cc005e3bcd40..de8e70b3db75 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -770,7 +770,7 @@ static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status); static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, - void *inject_if); + void *inject_if, uint32_t instance_mask); static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev); static void gfx_v9_0_kiq_set_resources(struct amdgpu_ring *kiq_ring, @@ -6335,7 +6335,7 @@ static const struct soc15_ras_field_entry gfx_v9_0_ras_fields[] = { }; static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, - void *inject_if) + void *inject_if, uint32_t instance_mask) { struct ras_inject_if *info = (struct ras_inject_if *)inject_if; int ret; @@ -6374,7 +6374,7 @@ static int gfx_v9_0_ras_error_inject(struct amdgpu_device *adev, block_info.value = info->value; mutex_lock(&adev->grbm_idx_mutex); - ret = psp_ras_trigger_error(&adev->psp, &block_info); + ret = psp_ras_trigger_error(&adev->psp, &block_info, instance_mask); mutex_unlock(&adev->grbm_idx_mutex); return ret; -- cgit From 1385d88c6aa774332f1a88562b6f1bf04de6d710 Mon Sep 17 00:00:00 2001 From: Sukrut Bellary Date: Wed, 3 May 2023 16:15:07 -0700 Subject: drm:amd:amdgpu: Fix missing buffer object unlock in failure path smatch warning - 1) drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c:3615 gfx_v9_0_kiq_resume() warn: inconsistent returns 'ring->mqd_obj->tbo.base.resv'. 2) drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c:6901 gfx_v10_0_kiq_resume() warn: inconsistent returns 'ring->mqd_obj->tbo.base.resv'. Signed-off-by: Sukrut Bellary Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index de8e70b3db75..8bf95a6b0767 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3614,8 +3614,10 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) return r; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (unlikely(r != 0)) + if (unlikely(r != 0)) { + amdgpu_bo_unreserve(ring->mqd_obj); return r; + } gfx_v9_0_kiq_init_queue(ring); amdgpu_bo_kunmap(ring->mqd_obj); -- cgit From 232f2431899cbe6c00c1350e35cfba91ea0c1c0b Mon Sep 17 00:00:00 2001 From: Guchun Chen Date: Fri, 12 May 2023 16:10:40 +0800 Subject: drm/amdgpu/gfx: set sched.ready status after ring/IB test in gfx sched.ready is nothing with ring initialization, it needs to set to be true after ring/IB test in amdgpu_ring_test_helper to tell the ring is ready for submission. Signed-off-by: Guchun Chen Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 8bf95a6b0767..fe090eafa366 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -3144,7 +3144,6 @@ static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) /* start the ring */ gfx_v9_0_cp_gfx_start(adev); - ring->sched.ready = true; return 0; } @@ -3623,7 +3622,6 @@ static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) amdgpu_bo_kunmap(ring->mqd_obj); ring->mqd_ptr = NULL; amdgpu_bo_unreserve(ring->mqd_obj); - ring->sched.ready = true; return 0; } -- cgit From b7941e2fef13baabd3eade31601e70adf729e887 Mon Sep 17 00:00:00 2001 From: Jiadong Zhu Date: Wed, 24 May 2023 11:42:19 +0800 Subject: drm/amdgpu: Reset CP_VMID_PREEMPT after trailing fence signaled When MEC executes unmap_queue for mid command buffer preemption, it will kick the write pointer of the gfx ring, set CP_VMID_PREEMPT to trigger the preemption and wait for CP_VMID_PREEMPT becomes zero after the preemption done. There is a race condition that PFP may excute the resetting command before MEC set CP_VMID_PREEMPT. As a result, hang happens as CP_VMID_PREEMPT is always 0xffff. To avoid this, we send resetting CP_VMID_PREEMPT command after the trailing fence is siganled and update gfx write pointer explicitly. Signed-off-by: Jiadong Zhu Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index fe090eafa366..45fa02063491 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -5400,10 +5400,6 @@ static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring) amdgpu_ring_alloc(ring, 13); gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr, ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT); - /*reset the CP_VMID_PREEMPT after trailing fence*/ - amdgpu_ring_emit_wreg(ring, - SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT), - 0x0); /* assert IB preemption, emit the trailing fence */ kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP, @@ -5426,6 +5422,10 @@ static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring) DRM_WARN("ring %d timeout to preempt ib\n", ring->idx); } + /*reset the CP_VMID_PREEMPT after trailing fence*/ + amdgpu_ring_emit_wreg(ring, + SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT), + 0x0); amdgpu_ring_commit(ring); /* deassert preemption condition */ -- cgit From cfdce594171cea19ba033e8d7ff57a767c0ccd63 Mon Sep 17 00:00:00 2001 From: Jiadong Zhu Date: Wed, 24 May 2023 16:51:32 +0800 Subject: drm/amdgpu: Program gds backup address as zero if no gds allocated It is firmware requirement to set gds_backup_addrlo and gds_backup_addrhi of DE meta both zero if no gds partition is allocated for the frame. Signed-off-by: Jiadong Zhu Reviewed-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 45fa02063491..0189e50bd89f 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -765,7 +765,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev); static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev, struct amdgpu_cu_info *cu_info); static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev); -static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume); +static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds); static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring); static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev, void *ras_error_status); @@ -5158,7 +5158,8 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, gfx_v9_0_ring_emit_de_meta(ring, (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? - true : false); + true : false, + job->gds_size > 0 && job->gds_base != 0); } amdgpu_ring_write(ring, header); @@ -5433,7 +5434,7 @@ static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring) return r; } -static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) +static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bool usegds) { struct amdgpu_device *adev = ring->adev; struct v9_de_ib_state de_payload = {0}; @@ -5464,8 +5465,10 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume) PAGE_SIZE); } - de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); - de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); + if (usegds) { + de_payload.gds_backup_addrlo = lower_32_bits(gds_addr); + de_payload.gds_backup_addrhi = upper_32_bits(gds_addr); + } cnt = (sizeof(de_payload) >> 2) + 4 - 2; amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt)); -- cgit From 4504f14338cdc43586189558113faafa8acb9ffe Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Thu, 31 Mar 2022 12:05:00 -0400 Subject: drm/amdgpu: setup hw debug registers on driver initialization Add missing debug trap registers references and initialize all debug registers on boot by clearing the hardware exception overrides and the wave allocation ID index. The debugger requires that TTMPs 6 & 7 save the dispatch ID to map waves onto dispatch during compute context inspection. In order to correctly set this up, set the special reserved CP bit by default whenever the MQD is initailized. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 0189e50bd89f..7f17e0061027 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2303,6 +2303,29 @@ static void gfx_v9_0_setup_rb(struct amdgpu_device *adev) adev->gfx.config.num_rbs = hweight32(active_rbs); } +static void gfx_v9_0_debug_trap_config_init(struct amdgpu_device *adev, + uint32_t first_vmid, + uint32_t last_vmid) +{ + uint32_t data; + uint32_t trap_config_vmid_mask = 0; + int i; + + /* Calculate trap config vmid mask */ + for (i = first_vmid; i < last_vmid; i++) + trap_config_vmid_mask |= (1 << i); + + data = REG_SET_FIELD(0, SPI_GDBG_TRAP_CONFIG, + VMID_SEL, trap_config_vmid_mask); + data = REG_SET_FIELD(data, SPI_GDBG_TRAP_CONFIG, + TRAP_EN, 1); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_CONFIG), data); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_MASK), 0); + + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA0), 0); + WREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_GDBG_TRAP_DATA1), 0); +} + #define DEFAULT_SH_MEM_BASES (0x6000) static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) { @@ -4602,6 +4625,13 @@ static int gfx_v9_0_late_init(void *handle) if (r) return r; + if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) + gfx_v9_4_2_debug_trap_config_init(adev, + adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID); + else + gfx_v9_0_debug_trap_config_init(adev, + adev->vm_manager.first_kfd_vmid, AMDGPU_NUM_VMID); + return 0; } -- cgit From 01f648202c5390f4c366793b34c27cddad4ca8d7 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Wed, 30 Mar 2022 15:31:00 -0400 Subject: drm/amdgpu: add gfx9.4.1 hw debug mode enable and disable calls On GFX9.4.1, the implicit wait count instruction on s_barrier is disabled by default in the driver during normal operation for performance requirements. There is a hardware bug in GFX9.4.1 where if the implicit wait count instruction after an s_barrier instruction is disabled, any wave that hits an exception may step over the s_barrier when returning from the trap handler with the barrier logic having no ability to be aware of this, thereby causing other waves to wait at the barrier indefinitely resulting in a shader hang. This bug has been corrected for GFX9.4.2 and onward. Since the debugger subscribes to hardware exceptions, in order to avoid this bug, the debugger must enable implicit wait count on s_barrier for a debug session and disable it on detach. In order to change this setting in the in the device global SQ_CONFIG register, the GFX pipeline must be idle. GFX9.4.1 as a compute device will either dispatch work through the compute ring buffers used for image post processing or through the hardware scheduler by the KFD. Have the KGD suspend and drain the compute ring buffer, then suspend the hardware scheduler and block any future KFD process job requests before changing the implicit wait count setting. Once set, resume all work. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 7f17e0061027..f092a1dbdb56 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -2390,8 +2390,8 @@ static void gfx_v9_0_init_sq_config(struct amdgpu_device *adev) switch (adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(9, 4, 1): tmp = RREG32_SOC15(GC, 0, mmSQ_CONFIG); - tmp = REG_SET_FIELD(tmp, SQ_CONFIG, - DISABLE_BARRIER_WAITCNT, 1); + tmp = REG_SET_FIELD(tmp, SQ_CONFIG, DISABLE_BARRIER_WAITCNT, + !READ_ONCE(adev->barrier_has_auto_waitcnt)); WREG32_SOC15(GC, 0, mmSQ_CONFIG, tmp); break; default: -- cgit From f9bfc9fff2997abe3c1a560a38a0c359775e7ec5 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 2 Jun 2023 14:34:12 -0400 Subject: Revert "drm/amdgpu: Differentiate between Raven2 and Raven/Picasso according to revision id" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 9d2d1827af295fd6971786672c41c4dba3657154. This results in inconsistent timing reported via asynchronous GPU queries. Link: https://lists.freedesktop.org/archives/amd-gfx/2023-May/093731.html Cc: Jesse.Zhang@amd.com Cc: michel@daenzer.net Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index f092a1dbdb56..3bc0b100936b 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -4023,25 +4023,30 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) clock = clock_lo | (clock_hi << 32ULL); break; case IP_VERSION(9, 1, 0): - case IP_VERSION(9, 2, 2): preempt_disable(); - if (adev->rev_id >= 0x8) { - clock_hi = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven2); - clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven2); - hi_check = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven2); - } else { - clock_hi = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven); + clock_hi = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven); + clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven); + hi_check = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven); + /* The PWR TSC clock frequency is 100MHz, which sets 32-bit carry over + * roughly every 42 seconds. + */ + if (hi_check != clock_hi) { clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven); - hi_check = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven); + clock_hi = hi_check; } + preempt_enable(); + clock = clock_lo | (clock_hi << 32ULL); + break; + case IP_VERSION(9, 2, 2): + preempt_disable(); + clock_hi = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven2); + clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven2); + hi_check = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven2); /* The PWR TSC clock frequency is 100MHz, which sets 32-bit carry over - * roughly every 42 seconds. - */ + * roughly every 42 seconds. + */ if (hi_check != clock_hi) { - if (adev->rev_id >= 0x8) - clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven2); - else - clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven); + clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven2); clock_hi = hi_check; } preempt_enable(); -- cgit From 5a03159ab7ef456ba22460e47a9d0eab2f310424 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 2 Jun 2023 14:37:13 -0400 Subject: Revert "drm/amdgpu: switch to golden tsc registers for raven/raven2" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit f03eb1d26c2739b75580f58bbab4ab2d5d3eba46. This results in inconsistent timing reported via asynchronous GPU queries. Link: https://lists.freedesktop.org/archives/amd-gfx/2023-May/093731.html Cc: Jesse.Zhang@amd.com Cc: michel@daenzer.net Reviewed-by: Michel Dänzer Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 40 ----------------------------------- 1 file changed, 40 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 3bc0b100936b..4073e2ee7e6d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -149,16 +149,6 @@ MODULE_FIRMWARE("amdgpu/aldebaran_sjt_mec2.bin"); #define mmGOLDEN_TSC_COUNT_LOWER_Renoir 0x0026 #define mmGOLDEN_TSC_COUNT_LOWER_Renoir_BASE_IDX 1 -#define mmGOLDEN_TSC_COUNT_UPPER_Raven 0x007a -#define mmGOLDEN_TSC_COUNT_UPPER_Raven_BASE_IDX 0 -#define mmGOLDEN_TSC_COUNT_LOWER_Raven 0x007b -#define mmGOLDEN_TSC_COUNT_LOWER_Raven_BASE_IDX 0 - -#define mmGOLDEN_TSC_COUNT_UPPER_Raven2 0x0068 -#define mmGOLDEN_TSC_COUNT_UPPER_Raven2_BASE_IDX 0 -#define mmGOLDEN_TSC_COUNT_LOWER_Raven2 0x0069 -#define mmGOLDEN_TSC_COUNT_LOWER_Raven2_BASE_IDX 0 - enum ta_ras_gfx_subblock { /*CPC*/ TA_RAS_BLOCK__GFX_CPC_INDEX_START = 0, @@ -4022,36 +4012,6 @@ static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev) preempt_enable(); clock = clock_lo | (clock_hi << 32ULL); break; - case IP_VERSION(9, 1, 0): - preempt_disable(); - clock_hi = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven); - clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven); - hi_check = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven); - /* The PWR TSC clock frequency is 100MHz, which sets 32-bit carry over - * roughly every 42 seconds. - */ - if (hi_check != clock_hi) { - clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven); - clock_hi = hi_check; - } - preempt_enable(); - clock = clock_lo | (clock_hi << 32ULL); - break; - case IP_VERSION(9, 2, 2): - preempt_disable(); - clock_hi = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven2); - clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven2); - hi_check = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_UPPER_Raven2); - /* The PWR TSC clock frequency is 100MHz, which sets 32-bit carry over - * roughly every 42 seconds. - */ - if (hi_check != clock_hi) { - clock_lo = RREG32_SOC15_NO_KIQ(PWR, 0, mmGOLDEN_TSC_COUNT_LOWER_Raven2); - clock_hi = hi_check; - } - preempt_enable(); - clock = clock_lo | (clock_hi << 32ULL); - break; default: amdgpu_gfx_off_ctrl(adev, false); mutex_lock(&adev->gfx.gpu_clock_mutex); -- cgit From ea791e704b97ab5abd563b6d2f88c4019940079e Mon Sep 17 00:00:00 2001 From: Jiadong Zhu Date: Thu, 25 May 2023 18:42:15 +0800 Subject: drm/amdgpu: Implement gfx9 patch functions for resubmission Patch the packages including CONTEXT_CONTROL and WRITE_DATA for gfx9 during the resubmission scenario. Signed-off-by: Jiadong Zhu Acked-by: Alex Deucher Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 80 +++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c') diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index 4073e2ee7e6d..65577eca58f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -5165,9 +5165,83 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, #endif lower_32_bits(ib->gpu_addr)); amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_ib_on_emit_cntl(ring); amdgpu_ring_write(ring, control); } +static void gfx_v9_0_ring_patch_cntl(struct amdgpu_ring *ring, + unsigned offset) +{ + u32 control = ring->ring[offset]; + + control |= INDIRECT_BUFFER_PRE_RESUME(1); + ring->ring[offset] = control; +} + +static void gfx_v9_0_ring_patch_ce_meta(struct amdgpu_ring *ring, + unsigned offset) +{ + struct amdgpu_device *adev = ring->adev; + void *ce_payload_cpu_addr; + uint64_t payload_offset, payload_size; + + payload_size = sizeof(struct v9_ce_ib_state); + + if (ring->is_mes_queue) { + payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data, + gfx[0].gfx_meta_data) + + offsetof(struct v9_gfx_meta_data, ce_payload); + ce_payload_cpu_addr = + amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset); + } else { + payload_offset = offsetof(struct v9_gfx_meta_data, ce_payload); + ce_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; + } + + if (offset + (payload_size >> 2) <= ring->buf_mask + 1) { + memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, payload_size); + } else { + memcpy((void *)&ring->ring[offset], ce_payload_cpu_addr, + (ring->buf_mask + 1 - offset) << 2); + payload_size -= (ring->buf_mask + 1 - offset) << 2; + memcpy((void *)&ring->ring[0], + ce_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2), + payload_size); + } +} + +static void gfx_v9_0_ring_patch_de_meta(struct amdgpu_ring *ring, + unsigned offset) +{ + struct amdgpu_device *adev = ring->adev; + void *de_payload_cpu_addr; + uint64_t payload_offset, payload_size; + + payload_size = sizeof(struct v9_de_ib_state); + + if (ring->is_mes_queue) { + payload_offset = offsetof(struct amdgpu_mes_ctx_meta_data, + gfx[0].gfx_meta_data) + + offsetof(struct v9_gfx_meta_data, de_payload); + de_payload_cpu_addr = + amdgpu_mes_ctx_get_offs_cpu_addr(ring, payload_offset); + } else { + payload_offset = offsetof(struct v9_gfx_meta_data, de_payload); + de_payload_cpu_addr = adev->virt.csa_cpu_addr + payload_offset; + } + + if (offset + (payload_size >> 2) <= ring->buf_mask + 1) { + memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, payload_size); + } else { + memcpy((void *)&ring->ring[offset], de_payload_cpu_addr, + (ring->buf_mask + 1 - offset) << 2); + payload_size -= (ring->buf_mask + 1 - offset) << 2; + memcpy((void *)&ring->ring[0], + de_payload_cpu_addr + ((ring->buf_mask + 1 - offset) << 2), + payload_size); + } +} + static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, struct amdgpu_job *job, struct amdgpu_ib *ib, @@ -5363,6 +5437,8 @@ static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume) amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr)); amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr)); + amdgpu_ring_ib_on_emit_ce(ring); + if (resume) amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr, sizeof(ce_payload) >> 2); @@ -5474,6 +5550,7 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume, bo amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr)); amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr)); + amdgpu_ring_ib_on_emit_de(ring); if (resume) amdgpu_ring_write_multiple(ring, de_payload_cpu_addr, sizeof(de_payload) >> 2); @@ -6884,6 +6961,9 @@ static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = { .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, .soft_recovery = gfx_v9_0_ring_soft_recovery, .emit_mem_sync = gfx_v9_0_emit_mem_sync, + .patch_cntl = gfx_v9_0_ring_patch_cntl, + .patch_de = gfx_v9_0_ring_patch_de_meta, + .patch_ce = gfx_v9_0_ring_patch_ce_meta, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { -- cgit