From 7e0357bef402875425de0296800c34c41842ba82 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 18 Apr 2024 12:07:27 +0800 Subject: drm/amdgpu: remove unused MCA driver codes - remove unused callback functions. - make part of mca functions static and refine the function order. Signed-off-by: Yang Wang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h index e5bf07ce3451..4d0a0f91c375 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h @@ -122,8 +122,6 @@ struct amdgpu_mca_smu_funcs { int max_ue_count; int max_ce_count; int (*mca_set_debug_mode)(struct amdgpu_device *adev, bool enable); - int (*mca_get_ras_mca_set)(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, - struct mca_bank_set *mca_set); int (*mca_parse_mca_error_count)(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count); int (*mca_get_valid_mca_count)(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, @@ -152,23 +150,9 @@ int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev); void amdgpu_mca_smu_init_funcs(struct amdgpu_device *adev, const struct amdgpu_mca_smu_funcs *mca_funcs); int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable); -int amdgpu_mca_smu_get_valid_mca_count(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, uint32_t *count); int amdgpu_mca_smu_get_mca_set_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, uint32_t *total); -int amdgpu_mca_smu_get_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, - enum amdgpu_mca_error_type type, uint32_t *count); -int amdgpu_mca_smu_parse_mca_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, - enum amdgpu_mca_error_type type, struct mca_bank_entry *entry, uint32_t *count); -int amdgpu_mca_smu_get_mca_set(struct amdgpu_device *adev, enum amdgpu_ras_block blk, - enum amdgpu_mca_error_type type, struct mca_bank_set *mca_set); -int amdgpu_mca_smu_get_mca_entry(struct amdgpu_device *adev, enum amdgpu_mca_error_type type, - int idx, struct mca_bank_entry *entry); - void amdgpu_mca_smu_debugfs_init(struct amdgpu_device *adev, struct dentry *root); - -void amdgpu_mca_bank_set_init(struct mca_bank_set *mca_set); -int amdgpu_mca_bank_set_add_entry(struct mca_bank_set *mca_set, struct mca_bank_entry *entry); -void amdgpu_mca_bank_set_release(struct mca_bank_set *mca_set); int amdgpu_mca_smu_log_ras_error(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, struct ras_err_data *err_data, struct ras_query_context *qctx); -- cgit From 76ad30f51aa0d1bd99f12658d4775a86df6e4282 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Thu, 18 Apr 2024 15:46:00 +0800 Subject: drm/amdgpu: add MCA smu cache support v1: because SMU CE valid mca bank will be cleared after reading, this patch adds mca cache at the driver level to ensure that the mca bank is not lost. v2: refine amdgpu_mca_init/fini/reset() function name. v3: add mca_cache.lock support only add CE bank to mca bank cache. Signed-off-by: Yang Wang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h index 4d0a0f91c375..9b97cfa28e05 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h @@ -77,11 +77,22 @@ struct amdgpu_mca_ras { struct amdgpu_mca_ras_block *ras; }; +struct mca_bank_set { + int nr_entries; + struct list_head list; +}; + +struct mca_bank_cache { + struct mca_bank_set mca_set; + struct mutex lock; +}; + struct amdgpu_mca { struct amdgpu_mca_ras mp0; struct amdgpu_mca_ras mp1; struct amdgpu_mca_ras mpio; const struct amdgpu_mca_smu_funcs *mca_funcs; + struct mca_bank_cache mca_caches[AMDGPU_MCA_ERROR_TYPE_DE]; }; enum mca_reg_idx { @@ -113,11 +124,6 @@ struct mca_bank_node { struct list_head node; }; -struct mca_bank_set { - int nr_entries; - struct list_head list; -}; - struct amdgpu_mca_smu_funcs { int max_ue_count; int max_ce_count; @@ -149,6 +155,9 @@ int amdgpu_mca_mp1_ras_sw_init(struct amdgpu_device *adev); int amdgpu_mca_mpio_ras_sw_init(struct amdgpu_device *adev); void amdgpu_mca_smu_init_funcs(struct amdgpu_device *adev, const struct amdgpu_mca_smu_funcs *mca_funcs); +int amdgpu_mca_init(struct amdgpu_device *adev); +void amdgpu_mca_fini(struct amdgpu_device *adev); +int amdgpu_mca_reset(struct amdgpu_device *adev); int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable); int amdgpu_mca_smu_get_mca_set_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block blk, enum amdgpu_mca_error_type type, uint32_t *total); -- cgit From 5eccab32c15f1e5cf9651d865fb20012d3563c96 Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Tue, 23 Apr 2024 10:14:47 +0800 Subject: drm/amdgpu: avoid dump mca bank log muti times during ras ISR because the ue valid mca count will only be cleared after gpu reset, so only dump mca log on the first time to get mca bank after receive RAS interrupt. Signed-off-by: Yang Wang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h index 9b97cfa28e05..e80323ff90c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h @@ -93,6 +93,7 @@ struct amdgpu_mca { struct amdgpu_mca_ras mpio; const struct amdgpu_mca_smu_funcs *mca_funcs; struct mca_bank_cache mca_caches[AMDGPU_MCA_ERROR_TYPE_DE]; + atomic_t ue_update_flag; }; enum mca_reg_idx { -- cgit From 258ed689bc3163f86204f75df6c23f92b59b3fad Mon Sep 17 00:00:00 2001 From: Yang Wang Date: Fri, 17 May 2024 07:56:24 +0800 Subject: drm/amdgpu: change bank cache lock type to spinlock modify the lock type to 'spinlock' to avoid schedule issue in interrupt context. Signed-off-by: Yang Wang Reviewed-by: Tao Zhou Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h index e80323ff90c1..c3c184c88dad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.h @@ -84,7 +84,7 @@ struct mca_bank_set { struct mca_bank_cache { struct mca_bank_set mca_set; - struct mutex lock; + spinlock_t lock; }; struct amdgpu_mca { -- cgit