From 11f25c844e29f85abb0b3ffdb360a2f82a2c4ed0 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 5 Apr 2023 20:41:09 +0530 Subject: drm/amd/amdgpu: Drop the hang limit parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The driver doesn't resubmit jobs on hangs any more, hence drop the hang limit parameter - amdgpu_job_hang_limit, wherever it is used. Suggested-by: Christian König Cc: Alex Deucher Cc: Mario Limonciello Cc: Kent Russell Signed-off-by: Srinivasan Shanmugam Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index fac9312b1695..4819b3f86750 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2365,7 +2365,7 @@ static int amdgpu_device_init_schedulers(struct amdgpu_device *adev) } r = drm_sched_init(&ring->sched, &amdgpu_sched_ops, - ring->num_hw_submission, amdgpu_job_hang_limit, + ring->num_hw_submission, 0, timeout, adev->reset_domain->wq, ring->sched_score, ring->name, adev->dev); -- cgit From 00fa40353bf3894adb495f8cce10a8bce43cd375 Mon Sep 17 00:00:00 2001 From: Sreekant Somasekharan Date: Mon, 13 Mar 2023 18:05:41 -0400 Subject: drm/amdkfd: Check PCIe atomics support on GFX11 to set CP_HQD_HQ_STATUS0[29] CP_HQD_HQ_STATUS0[29] bit will be used by CPFW to acknowledge whether PCIe atomics are supported. The default value of this bit is set to 0. Driver will check whether PCIe atomics are supported and set the bit to 1 if supported. This will force CPFW to use real atomic ops. If the bit is not set, CPFW will default to read/modify/write using the firmware itself. This is applicable only to GFX11 RS64 CP with MEC FW >= 509. If MEC FW < 509 and for all GFX11 F32 CP, PCIe atomics needs to be supported else it will skip the device. This commit also involves moving amdgpu_amdkfd_device_probe() function call after per-IP early_init loop in amdgpu_device_ip_early_init() function so as to check for RS64 enabled device. Signed-off-by: Sreekant Somasekharan Reviewed-by: Graham Sider Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 4819b3f86750..efdc6b73a55c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2184,7 +2184,6 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) adev->has_pr3 = parent ? pci_pr3_present(parent) : false; } - amdgpu_amdkfd_device_probe(adev); adev->pm.pp_feature = amdgpu_pp_feature_mask; if (amdgpu_sriov_vf(adev) || sched_policy == KFD_SCHED_POLICY_NO_HWS) @@ -2240,6 +2239,7 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) if (!total) return -ENODEV; + amdgpu_amdkfd_device_probe(adev); adev->cg_flags &= amdgpu_cg_mask; adev->pg_flags &= amdgpu_pg_mask; -- cgit From 0512e9ffebca0f9a91f6e54b0da90976dce2b025 Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Fri, 24 Feb 2023 11:25:07 +0100 Subject: drm/amdgpu: rename num_doorbells Rename doorbell.num_doorbells to doorbell.num_kernel_doorbells to make it more readable. Cc: Alex Deucher Cc: Christian Koenig Acked-by: Christian Koenig Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index efdc6b73a55c..97068c4dee74 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -602,7 +602,7 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index) if (amdgpu_device_skip_hw_access(adev)) return 0; - if (index < adev->doorbell.num_doorbells) { + if (index < adev->doorbell.num_kernel_doorbells) { return readl(adev->doorbell.ptr + index); } else { DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); @@ -625,7 +625,7 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v) if (amdgpu_device_skip_hw_access(adev)) return; - if (index < adev->doorbell.num_doorbells) { + if (index < adev->doorbell.num_kernel_doorbells) { writel(v, adev->doorbell.ptr + index); } else { DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); @@ -646,7 +646,7 @@ u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index) if (amdgpu_device_skip_hw_access(adev)) return 0; - if (index < adev->doorbell.num_doorbells) { + if (index < adev->doorbell.num_kernel_doorbells) { return atomic64_read((atomic64_t *)(adev->doorbell.ptr + index)); } else { DRM_ERROR("reading beyond doorbell aperture: 0x%08x!\n", index); @@ -669,7 +669,7 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v) if (amdgpu_device_skip_hw_access(adev)) return; - if (index < adev->doorbell.num_doorbells) { + if (index < adev->doorbell.num_kernel_doorbells) { atomic64_set((atomic64_t *)(adev->doorbell.ptr + index), v); } else { DRM_ERROR("writing beyond doorbell aperture: 0x%08x!\n", index); @@ -1060,7 +1060,7 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) if (adev->asic_type < CHIP_BONAIRE) { adev->doorbell.base = 0; adev->doorbell.size = 0; - adev->doorbell.num_doorbells = 0; + adev->doorbell.num_kernel_doorbells = 0; adev->doorbell.ptr = NULL; return 0; } @@ -1075,27 +1075,27 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) adev->doorbell.size = pci_resource_len(adev->pdev, 2); if (adev->enable_mes) { - adev->doorbell.num_doorbells = + adev->doorbell.num_kernel_doorbells = adev->doorbell.size / sizeof(u32); } else { - adev->doorbell.num_doorbells = + adev->doorbell.num_kernel_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32), adev->doorbell_index.max_assignment+1); - if (adev->doorbell.num_doorbells == 0) + if (adev->doorbell.num_kernel_doorbells == 0) return -EINVAL; /* For Vega, reserve and map two pages on doorbell BAR since SDMA * paging queue doorbell use the second page. The * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the * doorbells are in the first page. So with paging queue enabled, - * the max num_doorbells should + 1 page (0x400 in dword) + * the max num_kernel_doorbells should + 1 page (0x400 in dword) */ if (adev->asic_type >= CHIP_VEGA10) - adev->doorbell.num_doorbells += 0x400; + adev->doorbell.num_kernel_doorbells += 0x400; } adev->doorbell.ptr = ioremap(adev->doorbell.base, - adev->doorbell.num_doorbells * + adev->doorbell.num_kernel_doorbells * sizeof(u32)); if (adev->doorbell.ptr == NULL) return -ENOMEM; -- cgit From f22067419e9683f8fba40ca3a0d56fb3106c7c6f Mon Sep 17 00:00:00 2001 From: Aaron Liu Date: Wed, 5 Apr 2023 19:22:20 +0800 Subject: drm/amdgpu: skip kfd-iommu suspend/resume for S0ix GFX is in gfxoff mode during s0ix so we shouldn't need to actually execute kfd_iommu_suspend/kfd_iommu_resume operation. Signed-off-by: Aaron Liu Acked-by: Alex Deucher Reviewed-by: Mario Limonciello Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_device.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 97068c4dee74..a2292acf06d0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3305,9 +3305,11 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev) { int r; - r = amdgpu_amdkfd_resume_iommu(adev); - if (r) - return r; + if (!adev->in_s0ix) { + r = amdgpu_amdkfd_resume_iommu(adev); + if (r) + return r; + } r = amdgpu_device_ip_resume_phase1(adev); if (r) -- cgit