From a6bcffa596770b0c54b3ddccbc115bdab4df08e9 Mon Sep 17 00:00:00 2001 From: Hawking Zhang Date: Wed, 1 May 2024 00:12:34 +0800 Subject: drm/amdgpu: Add smu v13_0_14 ip block Add smu v13_0_14 ip block support Signed-off-by: Hawking Zhang Reviewed-by: Le Ma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index c50202215f6b..28febf33fb1b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -892,7 +892,9 @@ DEFINE_DEBUGFS_ATTRIBUTE(aca_debug_mode_fops, NULL, amdgpu_aca_smu_debug_mode_se void amdgpu_aca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root) { #if defined(CONFIG_DEBUG_FS) - if (!root || adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 6)) + if (!root || + (adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 6) && + adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 14))) return; debugfs_create_file("aca_debug_mode", 0200, root, adev, &aca_debug_mode_fops); -- cgit From e6ae021adb79e5f4c4bc4362dd651d7b8b646340 Mon Sep 17 00:00:00 2001 From: Jesse Zhang Date: Thu, 9 May 2024 13:17:47 +0800 Subject: drm/amdgpu: fix the warning bad bit shift operation for aca_error_type type Filter invalid aca error types before performing a shift operation. Signed-off-by: Jesse Zhang Reviewed-by: Yang Wang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index 28febf33fb1b..9e3560c190e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -534,7 +534,7 @@ int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *han if (aca_handle_is_valid(handle)) return -EOPNOTSUPP; - if (!(BIT(type) & handle->mask)) + if ((type < 0) || (!(BIT(type) & handle->mask))) return 0; return __aca_get_error_data(adev, handle, type, err_data, qctx); -- cgit From b2aa3d4b3048598e665230e2da46a62ad1f48d52 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Tue, 14 May 2024 15:56:34 +0800 Subject: drm/amdgpu: add debug flag to enable RAS ACA Use debug_mask=0x10 (BIT.4) param to help enable RAS ACA. (RAS ACA is disabled by default.) Signed-off-by: Yang Wang Reviewed-by: Hawking Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index 9e3560c190e3..0002d3ae4761 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -686,7 +686,8 @@ static void aca_manager_fini(struct aca_handle_manager *mgr) bool amdgpu_aca_is_enabled(struct amdgpu_device *adev) { - return adev->aca.is_enabled; + return (adev->aca.is_enabled || + adev->debug_enable_ras_aca); } int amdgpu_aca_init(struct amdgpu_device *adev) -- cgit From f6bce954f432c556659a57be9e18fecdc575affb Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 16 May 2024 19:57:24 +0800 Subject: drm/amdgpu: change aca bank error lock type to spinlock modify the lock type to 'spinlock' to avoid schedule issue in interrupt context. Signed-off-by: Yang Wang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index 0002d3ae4761..7260a6ce015a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -222,9 +222,9 @@ static struct aca_bank_error *new_bank_error(struct aca_error *aerr, struct aca_ INIT_LIST_HEAD(&bank_error->node); memcpy(&bank_error->info, info, sizeof(*info)); - mutex_lock(&aerr->lock); + spin_lock(&aerr->lock); list_add_tail(&bank_error->node, &aerr->list); - mutex_unlock(&aerr->lock); + spin_unlock(&aerr->lock); return bank_error; } @@ -235,7 +235,7 @@ static struct aca_bank_error *find_bank_error(struct aca_error *aerr, struct aca struct aca_bank_info *tmp_info; bool found = false; - mutex_lock(&aerr->lock); + spin_lock(&aerr->lock); list_for_each_entry(bank_error, &aerr->list, node) { tmp_info = &bank_error->info; if (tmp_info->socket_id == info->socket_id && @@ -246,7 +246,7 @@ static struct aca_bank_error *find_bank_error(struct aca_error *aerr, struct aca } out_unlock: - mutex_unlock(&aerr->lock); + spin_unlock(&aerr->lock); return found ? bank_error : NULL; } @@ -474,7 +474,7 @@ static int aca_log_aca_error(struct aca_handle *handle, enum aca_error_type type struct aca_error *aerr = &error_cache->errors[type]; struct aca_bank_error *bank_error, *tmp; - mutex_lock(&aerr->lock); + spin_lock(&aerr->lock); if (list_empty(&aerr->list)) goto out_unlock; @@ -485,7 +485,7 @@ static int aca_log_aca_error(struct aca_handle *handle, enum aca_error_type type } out_unlock: - mutex_unlock(&aerr->lock); + spin_unlock(&aerr->lock); return 0; } @@ -542,7 +542,7 @@ int amdgpu_aca_get_error_data(struct amdgpu_device *adev, struct aca_handle *han static void aca_error_init(struct aca_error *aerr, enum aca_error_type type) { - mutex_init(&aerr->lock); + spin_lock_init(&aerr->lock); INIT_LIST_HEAD(&aerr->list); aerr->type = type; aerr->nr_errors = 0; @@ -561,11 +561,10 @@ static void aca_error_fini(struct aca_error *aerr) { struct aca_bank_error *bank_error, *tmp; - mutex_lock(&aerr->lock); + spin_lock(&aerr->lock); list_for_each_entry_safe(bank_error, tmp, &aerr->list, node) aca_bank_error_remove(aerr, bank_error); - - mutex_destroy(&aerr->lock); + spin_unlock(&aerr->lock); } static void aca_fini_error_cache(struct aca_handle *handle) -- cgit From 062a7ce676e092faf03daa6c579224bb3d904ae1 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Fri, 17 May 2024 09:18:38 +0800 Subject: drm/amdgpu: fix ACA no query result after gpu reset fix ACA no query result after gpu reset. Signed-off-by: Yang Wang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index 7260a6ce015a..01d50ad603d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -712,13 +712,6 @@ void amdgpu_aca_fini(struct amdgpu_device *adev) atomic_set(&aca->ue_update_flag, 0); } -int amdgpu_aca_reset(struct amdgpu_device *adev) -{ - amdgpu_aca_fini(adev); - - return amdgpu_aca_init(adev); -} - void amdgpu_aca_set_smu_funcs(struct amdgpu_device *adev, const struct aca_smu_funcs *smu_funcs) { struct amdgpu_aca *aca = &adev->aca; -- cgit