diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
35 files changed, 921 insertions, 169 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/aldebaran.c b/drivers/gpu/drm/amd/amdgpu/aldebaran.c index d0a8da67dc2a..b0f95a7649bf 100644 --- a/drivers/gpu/drm/amd/amdgpu/aldebaran.c +++ b/drivers/gpu/drm/amd/amdgpu/aldebaran.c @@ -316,8 +316,6 @@ static int aldebaran_mode2_restore_ip(struct amdgpu_device *adev) adev->ip_blocks[i].status.late_initialized = true; } - amdgpu_ras_set_error_query_ready(adev, true); - amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE); amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c index 7945173321a2..19158cc30f31 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c @@ -119,7 +119,7 @@ static struct aca_regs_dump { static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, struct aca_bank *bank, struct ras_query_context *qctx) { - u64 event_id = qctx ? qctx->event_id : 0ULL; + u64 event_id = qctx ? qctx->evid.event_id : RAS_EVENT_INVALID_ID; int i; RAS_EVENT_LOG(adev, event_id, HW_ERR "Accelerator Check Architecture events logged\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index ec888fc6ead8..916b6b8cf7d9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -1093,6 +1093,21 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) unsigned int i; int r; + /* + * We can't use gang submit on with reserved VMIDs when the VM changes + * can't be invalidated by more than one engine at the same time. + */ + if (p->gang_size > 1 && !p->adev->vm_manager.concurrent_flush) { + for (i = 0; i < p->gang_size; ++i) { + struct drm_sched_entity *entity = p->entities[i]; + struct drm_gpu_scheduler *sched = entity->rq->sched; + struct amdgpu_ring *ring = to_amdgpu_ring(sched); + + if (amdgpu_vmid_uses_reserved(vm, ring->vm_hub)) + return -EINVAL; + } + } + r = amdgpu_vm_clear_freed(adev, vm, NULL); if (r) return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index b27336a05aae..bcacf2e35eba 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -3142,7 +3142,8 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) return r; } - amdgpu_ras_set_error_query_ready(adev, true); + if (!amdgpu_in_reset(adev)) + amdgpu_ras_set_error_query_ready(adev, true); amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE); amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c index f927ccd7ec45..b241f61fe9c9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c @@ -2161,8 +2161,6 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev) break; case IP_VERSION(12, 0, 0): case IP_VERSION(12, 0, 1): - if (!amdgpu_exp_hw_support) - return -EINVAL; amdgpu_device_ip_block_add(adev, &gfx_v12_0_ip_block); break; default: diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 78089f2f79f5..094498a0964b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -116,9 +116,10 @@ * - 3.55.0 - Add AMDGPU_INFO_GPUVM_FAULT query * - 3.56.0 - Update IB start address and size alignment for decode and encode * - 3.57.0 - Compute tunneling on GFX10+ + * - 3.58.0 - Add GFX12 DCC support */ #define KMS_DRIVER_MAJOR 3 -#define KMS_DRIVER_MINOR 57 +#define KMS_DRIVER_MINOR 58 #define KMS_DRIVER_PATCHLEVEL 0 /* diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 6b0416777c5b..ddda94e49db4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -297,6 +297,7 @@ struct amdgpu_gfx_funcs { int (*switch_partition_mode)(struct amdgpu_device *adev, int num_xccs_per_xcp); int (*ih_node_to_logical_xcc)(struct amdgpu_device *adev, int ih_node); + int (*get_xccs_per_xcp)(struct amdgpu_device *adev); }; struct sq_work { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index b5b9d4f40f53..b6a8bddada4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -424,7 +424,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring, if (r || !idle) goto error; - if (vm->reserved_vmid[vmhub] || (enforce_isolation && (vmhub == AMDGPU_GFXHUB(0)))) { + if (amdgpu_vmid_uses_reserved(vm, vmhub)) { r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence); if (r || !id) goto error; @@ -474,6 +474,19 @@ error: return r; } +/* + * amdgpu_vmid_uses_reserved - check if a VM will use a reserved VMID + * @vm: the VM to check + * @vmhub: the VMHUB which will be used + * + * Returns: True if the VM will use a reserved VMID. + */ +bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub) +{ + return vm->reserved_vmid[vmhub] || + (enforce_isolation && (vmhub == AMDGPU_GFXHUB(0))); +} + int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, unsigned vmhub) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h index fa8c42c83d5d..240fa6751260 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h @@ -78,6 +78,7 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv, bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev, struct amdgpu_vmid *id); +bool amdgpu_vmid_uses_reserved(struct amdgpu_vm *vm, unsigned int vmhub); int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev, unsigned vmhub); void amdgpu_vmid_free_reserved(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index cf0c4470ab9c..e238f2832f65 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -41,7 +41,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) int r; if (!drm_dev_enter(adev_to_drm(adev), &idx)) { - DRM_INFO("%s - device unplugged skipping recovery on scheduler:%s", + dev_info(adev->dev, "%s - device unplugged skipping recovery on scheduler:%s", __func__, s_job->sched->name); /* Effectively the job is aborted as the device is gone */ @@ -53,19 +53,20 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) if (amdgpu_gpu_recovery && amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) { - DRM_ERROR("ring %s timeout, but soft recovered\n", - s_job->sched->name); + dev_err(adev->dev, "ring %s timeout, but soft recovered\n", + s_job->sched->name); goto exit; } - DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n", - job->base.sched->name, atomic_read(&ring->fence_drv.last_seq), - ring->fence_drv.sync_seq); + dev_err(adev->dev, "ring %s timeout, signaled seq=%u, emitted seq=%u\n", + job->base.sched->name, atomic_read(&ring->fence_drv.last_seq), + ring->fence_drv.sync_seq); ti = amdgpu_vm_get_task_info_pasid(ring->adev, job->pasid); if (ti) { - DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n", - ti->process_name, ti->tgid, ti->task_name, ti->pid); + dev_err(adev->dev, + "Process information: process %s pid %d thread %s pid %d\n", + ti->process_name, ti->tgid, ti->task_name, ti->pid); amdgpu_vm_put_task_info(ti); } @@ -82,7 +83,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) r = amdgpu_device_gpu_recover(ring->adev, job, &reset_context); if (r) - DRM_ERROR("GPU Recovery Failed: %d\n", r); + dev_err(adev->dev, "GPU Recovery Failed: %d\n", r); } else { drm_sched_suspend_timeout(&ring->sched); if (amdgpu_sriov_vf(adev)) @@ -274,7 +275,7 @@ amdgpu_job_prepare_job(struct drm_sched_job *sched_job, while (!fence && job->vm && !job->vmid) { r = amdgpu_vmid_grab(job->vm, ring, job, &fence); if (r) { - DRM_ERROR("Error getting VM ID (%d)\n", r); + dev_err(ring->adev->dev, "Error getting VM ID (%d)\n", r); goto error; } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c index 9d3a3c778504..2542bd7aa7c7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mca.c @@ -274,7 +274,7 @@ int amdgpu_mca_smu_set_debug_mode(struct amdgpu_device *adev, bool enable) static void amdgpu_mca_smu_mca_bank_dump(struct amdgpu_device *adev, int idx, struct mca_bank_entry *entry, struct ras_query_context *qctx) { - u64 event_id = qctx->event_id; + u64 event_id = qctx ? qctx->evid.event_id : RAS_EVENT_INVALID_ID; RAS_EVENT_LOG(adev, event_id, HW_ERR "Accelerator Check Architecture events logged\n"); RAS_EVENT_LOG(adev, event_id, HW_ERR "aca entry[%02d].STATUS=0x%016llx\n", @@ -543,7 +543,7 @@ static int mca_dump_show(struct seq_file *m, enum amdgpu_mca_error_type type) amdgpu_mca_bank_set_init(&mca_set); - qctx.event_id = 0ULL; + qctx.evid.event_id = RAS_EVENT_INVALID_ID; ret = amdgpu_mca_smu_get_mca_set(adev, type, &mca_set, &qctx); if (ret) goto err_free_mca_set; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 800cc7a148b2..7cdff355cedb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -1591,6 +1591,68 @@ static void psp_ras_ta_check_status(struct psp_context *psp) } } +static int psp_ras_send_cmd(struct psp_context *psp, + enum ras_command cmd_id, void *in, void *out) +{ + struct ta_ras_shared_memory *ras_cmd; + uint32_t cmd = cmd_id; + int ret = 0; + + if (!in) + return -EINVAL; + + mutex_lock(&psp->ras_context.mutex); + ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; + memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); + + switch (cmd) { + case TA_RAS_COMMAND__ENABLE_FEATURES: + case TA_RAS_COMMAND__DISABLE_FEATURES: + memcpy(&ras_cmd->ras_in_message, + in, sizeof(ras_cmd->ras_in_message)); + break; + case TA_RAS_COMMAND__TRIGGER_ERROR: + memcpy(&ras_cmd->ras_in_message.trigger_error, + in, sizeof(ras_cmd->ras_in_message.trigger_error)); + break; + case TA_RAS_COMMAND__QUERY_ADDRESS: + memcpy(&ras_cmd->ras_in_message.address, + in, sizeof(ras_cmd->ras_in_message.address)); + break; + default: + dev_err(psp->adev->dev, "Invalid ras cmd id: %u\n", cmd); + ret = -EINVAL; + goto err_out; + } + + ras_cmd->cmd_id = cmd; + ret = psp_ras_invoke(psp, ras_cmd->cmd_id); + + switch (cmd) { + case TA_RAS_COMMAND__TRIGGER_ERROR: + if (ret || psp->cmd_buf_mem->resp.status) + ret = -EINVAL; + else if (out) + memcpy(out, &ras_cmd->ras_status, sizeof(ras_cmd->ras_status)); + break; + case TA_RAS_COMMAND__QUERY_ADDRESS: + if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status) + ret = -EINVAL; + else if (out) + memcpy(out, + &ras_cmd->ras_out_message.address, + sizeof(ras_cmd->ras_out_message.address)); + break; + default: + break; + } + +err_out: + mutex_unlock(&psp->ras_context.mutex); + + return ret; +} + int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id) { struct ta_ras_shared_memory *ras_cmd; @@ -1632,23 +1694,15 @@ int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id) int psp_ras_enable_features(struct psp_context *psp, union ta_ras_cmd_input *info, bool enable) { - struct ta_ras_shared_memory *ras_cmd; + enum ras_command cmd_id; int ret; - if (!psp->ras_context.context.initialized) + if (!psp->ras_context.context.initialized || !info) return -EINVAL; - ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; - memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); - - if (enable) - ras_cmd->cmd_id = TA_RAS_COMMAND__ENABLE_FEATURES; - else - ras_cmd->cmd_id = TA_RAS_COMMAND__DISABLE_FEATURES; - - ras_cmd->ras_in_message = *info; - - ret = psp_ras_invoke(psp, ras_cmd->cmd_id); + cmd_id = enable ? + TA_RAS_COMMAND__ENABLE_FEATURES : TA_RAS_COMMAND__DISABLE_FEATURES; + ret = psp_ras_send_cmd(psp, cmd_id, info, NULL); if (ret) return -EINVAL; @@ -1672,6 +1726,8 @@ int psp_ras_terminate(struct psp_context *psp) psp->ras_context.context.initialized = false; + mutex_destroy(&psp->ras_context.mutex); + return ret; } @@ -1756,9 +1812,10 @@ int psp_ras_initialize(struct psp_context *psp) ret = psp_ta_load(psp, &psp->ras_context.context); - if (!ret && !ras_cmd->ras_status) + if (!ret && !ras_cmd->ras_status) { psp->ras_context.context.initialized = true; - else { + mutex_init(&psp->ras_context.mutex); + } else { if (ras_cmd->ras_status) dev_warn(adev->dev, "RAS Init Status: 0x%X\n", ras_cmd->ras_status); @@ -1772,12 +1829,12 @@ int psp_ras_initialize(struct psp_context *psp) int psp_ras_trigger_error(struct psp_context *psp, struct ta_ras_trigger_error_input *info, uint32_t instance_mask) { - struct ta_ras_shared_memory *ras_cmd; struct amdgpu_device *adev = psp->adev; int ret; uint32_t dev_mask; + uint32_t ras_status = 0; - if (!psp->ras_context.context.initialized) + if (!psp->ras_context.context.initialized || !info) return -EINVAL; switch (info->block_id) { @@ -1801,13 +1858,8 @@ int psp_ras_trigger_error(struct psp_context *psp, dev_mask &= AMDGPU_RAS_INST_MASK; info->sub_block_index |= dev_mask; - ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; - memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); - - ras_cmd->cmd_id = TA_RAS_COMMAND__TRIGGER_ERROR; - ras_cmd->ras_in_message.trigger_error = *info; - - ret = psp_ras_invoke(psp, ras_cmd->cmd_id); + ret = psp_ras_send_cmd(psp, + TA_RAS_COMMAND__TRIGGER_ERROR, info, &ras_status); if (ret) return -EINVAL; @@ -1817,9 +1869,9 @@ int psp_ras_trigger_error(struct psp_context *psp, if (amdgpu_ras_intr_triggered()) return 0; - if (ras_cmd->ras_status == TA_RAS_STATUS__TEE_ERROR_ACCESS_DENIED) + if (ras_status == TA_RAS_STATUS__TEE_ERROR_ACCESS_DENIED) return -EACCES; - else if (ras_cmd->ras_status) + else if (ras_status) return -EINVAL; return 0; @@ -1829,25 +1881,16 @@ int psp_ras_query_address(struct psp_context *psp, struct ta_ras_query_address_input *addr_in, struct ta_ras_query_address_output *addr_out) { - struct ta_ras_shared_memory *ras_cmd; int ret; - if (!psp->ras_context.context.initialized) - return -EINVAL; - - ras_cmd = (struct ta_ras_shared_memory *)psp->ras_context.context.mem_context.shared_buf; - memset(ras_cmd, 0, sizeof(struct ta_ras_shared_memory)); - - ras_cmd->cmd_id = TA_RAS_COMMAND__QUERY_ADDRESS; - ras_cmd->ras_in_message.address = *addr_in; - - ret = psp_ras_invoke(psp, ras_cmd->cmd_id); - if (ret || ras_cmd->ras_status || psp->cmd_buf_mem->resp.status) + if (!psp->ras_context.context.initialized || + !addr_in || !addr_out) return -EINVAL; - *addr_out = ras_cmd->ras_out_message.address; + ret = psp_ras_send_cmd(psp, + TA_RAS_COMMAND__QUERY_ADDRESS, addr_in, addr_out); - return 0; + return ret; } // ras end diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 3635303e6548..74a96516c913 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -200,6 +200,7 @@ struct psp_xgmi_context { struct psp_ras_context { struct ta_context context; struct amdgpu_ras *ras; + struct mutex mutex; }; #define MEM_TRAIN_SYSTEM_SIGNATURE 0x54534942 diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c index 8e8afbd237bc..0c856005df6b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c @@ -348,6 +348,7 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size context->session_id = ta_id; + mutex_lock(&psp->ras_context.mutex); ret = prep_ta_mem_context(&context->mem_context, shared_buf, shared_buf_len); if (ret) goto err_free_shared_buf; @@ -366,6 +367,7 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size ret = -EFAULT; err_free_shared_buf: + mutex_unlock(&psp->ras_context.mutex); kfree(shared_buf); return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 6d1f974e2987..d0307c55da50 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -124,6 +124,8 @@ const char *get_ras_block_str(struct ras_common_if *ras_block) #define AMDGPU_RAS_RETIRE_PAGE_INTERVAL 100 //ms +#define MAX_FLUSH_RETIRE_DWORK_TIMES 100 + enum amdgpu_ras_retire_page_reservation { AMDGPU_RAS_RETIRE_PAGE_RESERVED, AMDGPU_RAS_RETIRE_PAGE_PENDING, @@ -1055,7 +1057,7 @@ static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev, struct amdgpu_smuio_mcm_config_info *mcm_info; struct ras_err_node *err_node; struct ras_err_info *err_info; - u64 event_id = qctx->event_id; + u64 event_id = qctx->evid.event_id; if (is_ue) { for_each_ras_error(err_node, err_data) { @@ -1140,7 +1142,7 @@ static void amdgpu_ras_error_generate_report(struct amdgpu_device *adev, { struct ras_manager *ras_mgr = amdgpu_ras_find_obj(adev, &query_if->head); const char *blk_name = get_ras_block_str(&query_if->head); - u64 event_id = qctx->event_id; + u64 event_id = qctx->evid.event_id; if (err_data->ce_count) { if (err_data_has_source_info(err_data)) { @@ -1295,6 +1297,9 @@ ssize_t amdgpu_ras_aca_sysfs_read(struct device *dev, struct device_attribute *a .head = obj->head, }; + if (!amdgpu_ras_get_error_query_ready(obj->adev)) + return sysfs_emit(buf, "Query currently inaccessible\n"); + if (amdgpu_ras_query_error_status(obj->adev, &info)) return -EINVAL; @@ -1363,7 +1368,9 @@ static int amdgpu_ras_query_error_status_helper(struct amdgpu_device *adev, } /* query/inject/cure begin */ -int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_if *info) +static int amdgpu_ras_query_error_status_with_event(struct amdgpu_device *adev, + struct ras_query_if *info, + enum ras_event_type type) { struct ras_manager *obj = amdgpu_ras_find_obj(adev, &info->head); struct ras_err_data err_data; @@ -1382,8 +1389,8 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_i return -EINVAL; memset(&qctx, 0, sizeof(qctx)); - qctx.event_id = amdgpu_ras_acquire_event_id(adev, amdgpu_ras_intr_triggered() ? - RAS_EVENT_TYPE_ISR : RAS_EVENT_TYPE_INVALID); + qctx.evid.type = type; + qctx.evid.event_id = amdgpu_ras_acquire_event_id(adev, type); if (!down_read_trylock(&adev->reset_domain->sem)) { ret = -EIO; @@ -1412,6 +1419,11 @@ out_fini_err_data: return ret; } +int amdgpu_ras_query_error_status(struct amdgpu_device *adev, struct ras_query_if *info) +{ + return amdgpu_ras_query_error_status_with_event(adev, info, RAS_EVENT_TYPE_INVALID); +} + int amdgpu_ras_reset_error_count(struct amdgpu_device *adev, enum amdgpu_ras_block block) { @@ -1721,6 +1733,39 @@ static ssize_t amdgpu_ras_sysfs_schema_show(struct device *dev, return sysfs_emit(buf, "schema: 0x%x\n", con->schema); } +static struct { + enum ras_event_type type; + const char *name; +} dump_event[] = { + {RAS_EVENT_TYPE_FATAL, "Fatal Error"}, + {RAS_EVENT_TYPE_POISON_CREATION, "Poison Creation"}, + {RAS_EVENT_TYPE_POISON_CONSUMPTION, "Poison Consumption"}, +}; + +static ssize_t amdgpu_ras_sysfs_event_state_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct amdgpu_ras *con = + container_of(attr, struct amdgpu_ras, event_state_attr); + struct ras_event_manager *event_mgr = con->event_mgr; + struct ras_event_state *event_state; + int i, size = 0; + + if (!event_mgr) + return -EINVAL; + + size += sysfs_emit_at(buf, size, "current seqno: %llu\n", atomic64_read(&event_mgr->seqno)); + for (i = 0; i < ARRAY_SIZE(dump_event); i++) { + event_state = &event_mgr->event_state[dump_event[i].type]; + size += sysfs_emit_at(buf, size, "%s: count:%llu, last_seqno:%llu\n", + dump_event[i].name, + atomic64_read(&event_state->count), + event_state->last_seqno); + } + + return (ssize_t)size; +} + static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -1738,6 +1783,7 @@ static int amdgpu_ras_sysfs_remove_dev_attr_node(struct amdgpu_device *adev) &con->features_attr.attr, &con->version_attr.attr, &con->schema_attr.attr, + &con->event_state_attr.attr, NULL }; struct attribute_group group = { @@ -1970,6 +2016,8 @@ static DEVICE_ATTR(version, 0444, amdgpu_ras_sysfs_version_show, NULL); static DEVICE_ATTR(schema, 0444, amdgpu_ras_sysfs_schema_show, NULL); +static DEVICE_ATTR(event_state, 0444, + amdgpu_ras_sysfs_event_state_show, NULL); static int amdgpu_ras_fs_init(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); @@ -1980,6 +2028,7 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev) &con->features_attr.attr, &con->version_attr.attr, &con->schema_attr.attr, + &con->event_state_attr.attr, NULL }; struct bin_attribute *bin_attrs[] = { @@ -2002,6 +2051,10 @@ static int amdgpu_ras_fs_init(struct amdgpu_device *adev) con->schema_attr = dev_attr_schema; sysfs_attr_init(attrs[2]); + /* add event_state entry */ + con->event_state_attr = dev_attr_event_state; + sysfs_attr_init(attrs[3]); + if (amdgpu_bad_page_threshold != 0) { /* add bad_page_features entry */ bin_attr_gpu_vram_bad_pages.private = NULL; @@ -2066,10 +2119,17 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager * struct amdgpu_ras_block_object *block_obj = amdgpu_ras_get_ras_block(adev, obj->head.block, 0); struct amdgpu_ras *con = amdgpu_ras_get_context(adev); + enum ras_event_type type = RAS_EVENT_TYPE_POISON_CONSUMPTION; + u64 event_id; + int ret; if (!block_obj || !con) return; + ret = amdgpu_ras_mark_ras_event(adev, type); + if (ret) + return; + /* both query_poison_status and handle_poison_consumption are optional, * but at least one of them should be implemented if we need poison * consumption handler @@ -2094,8 +2154,10 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager * * For RMA case, amdgpu_umc_poison_handler will handle gpu reset. */ if (poison_stat && !con->is_rma) { - dev_info(adev->dev, "GPU reset for %s RAS poison consumption is issued!\n", - block_obj->ras_comm.name); + event_id = amdgpu_ras_acquire_event_id(adev, type); + RAS_EVENT_LOG(adev, event_id, + "GPU reset for %s RAS poison consumption is issued!\n", + block_obj->ras_comm.name); amdgpu_ras_reset_gpu(adev); } @@ -2106,8 +2168,17 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager * static void amdgpu_ras_interrupt_poison_creation_handler(struct ras_manager *obj, struct amdgpu_iv_entry *entry) { - dev_info(obj->adev->dev, - "Poison is created\n"); + struct amdgpu_device *adev = obj->adev; + enum ras_event_type type = RAS_EVENT_TYPE_POISON_CREATION; + u64 event_id; + int ret; + + ret = amdgpu_ras_mark_ras_event(adev, type); + if (ret) + return; + + event_id = amdgpu_ras_acquire_event_id(adev, type); + RAS_EVENT_LOG(adev, event_id, "Poison is created\n"); if (amdgpu_ip_version(obj->adev, UMC_HWIP, 0) >= IP_VERSION(12, 0, 0)) { struct amdgpu_ras *con = amdgpu_ras_get_context(obj->adev); @@ -2302,7 +2373,7 @@ static int amdgpu_ras_interrupt_remove_all(struct amdgpu_device *adev) /* ih end */ /* traversal all IPs except NBIO to query error counter */ -static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev) +static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev, enum ras_event_type type) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_manager *obj; @@ -2335,7 +2406,7 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev) IP_VERSION(13, 0, 2))) continue; - amdgpu_ras_query_error_status(adev, &info); + amdgpu_ras_query_error_status_with_event(adev, &info, type); if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(11, 0, 2) && @@ -2474,6 +2545,14 @@ bool amdgpu_ras_in_recovery(struct amdgpu_device *adev) return false; } +static enum ras_event_type amdgpu_ras_get_fatal_error_event(struct amdgpu_device *adev) +{ + if (amdgpu_ras_intr_triggered()) + return RAS_EVENT_TYPE_FATAL; + else + return RAS_EVENT_TYPE_POISON_CONSUMPTION; +} + static void amdgpu_ras_do_recovery(struct work_struct *work) { struct amdgpu_ras *ras = @@ -2482,6 +2561,7 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) struct amdgpu_device *adev = ras->adev; struct list_head device_list, *device_list_handle = NULL; struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev); + enum ras_event_type type; if (hive) { atomic_set(&hive->ras_recovery, 1); @@ -2509,10 +2589,11 @@ static void amdgpu_ras_do_recovery(struct work_struct *work) device_list_handle = &device_list; } + type = amdgpu_ras_get_fatal_error_event(adev); list_for_each_entry(remote_adev, device_list_handle, gmc.xgmi.head) { amdgpu_ras_query_err_status(remote_adev); - amdgpu_ras_log_on_err_counter(remote_adev); + amdgpu_ras_log_on_err_counter(remote_adev, type); } } @@ -2828,6 +2909,23 @@ static void amdgpu_ras_ecc_log_fini(struct ras_ecc_log_info *ecc_log) ecc_log->prev_de_queried_count = 0; } +static bool amdgpu_ras_schedule_retirement_dwork(struct amdgpu_ras *con, + uint32_t delayed_ms) +{ + int ret; + + mutex_lock(&con->umc_ecc_log.lock); + ret = radix_tree_tagged(&con->umc_ecc_log.de_page_tree, + UMC_ECC_NEW_DETECTED_TAG); + mutex_unlock(&con->umc_ecc_log.lock); + + if (ret) + schedule_delayed_work(&con->page_retirement_dwork, + msecs_to_jiffies(delayed_ms)); + + return ret ? true : false; +} + static void amdgpu_ras_do_page_retirement(struct work_struct *work) { struct amdgpu_ras *con = container_of(work, struct amdgpu_ras, @@ -2836,8 +2934,12 @@ static void amdgpu_ras_do_page_retirement(struct work_struct *work) struct ras_err_data err_data; unsigned long err_cnt; - if (amdgpu_in_reset(adev) || amdgpu_ras_in_recovery(adev)) + /* If gpu reset is ongoing, delay retiring the bad pages */ + if (amdgpu_in_reset(adev) || amdgpu_ras_in_recovery(adev)) { + amdgpu_ras_schedule_retirement_dwork(con, + AMDGPU_RAS_RETIRE_PAGE_INTERVAL * 3); return; + } amdgpu_ras_error_data_init(&err_data); @@ -2849,12 +2951,8 @@ static void amdgpu_ras_do_page_retirement(struct work_struct *work) if (err_cnt && con->is_rma) amdgpu_ras_reset_gpu(adev); - mutex_lock(&con->umc_ecc_log.lock); - if (radix_tree_tagged(&con->umc_ecc_log.de_page_tree, - UMC_ECC_NEW_DETECTED_TAG)) - schedule_delayed_work(&con->page_retirement_dwork, - msecs_to_jiffies(AMDGPU_RAS_RETIRE_PAGE_INTERVAL)); - mutex_unlock(&con->umc_ecc_log.lock); + amdgpu_ras_schedule_retirement_dwork(con, + AMDGPU_RAS_RETIRE_PAGE_INTERVAL); } static int amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev, @@ -2869,6 +2967,7 @@ static int amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev, uint32_t new_detect_count, total_detect_count; uint32_t need_query_count = poison_creation_count; bool query_data_timeout = false; + enum ras_event_type type = RAS_EVENT_TYPE_POISON_CREATION; memset(&info, 0, sizeof(info)); info.head.block = AMDGPU_RAS_BLOCK__UMC; @@ -2876,7 +2975,7 @@ static int amdgpu_ras_poison_creation_handler(struct amdgpu_device *adev, ecc_log = &ras->umc_ecc_log; total_detect_count = 0; do { - ret = amdgpu_ras_query_error_status(adev, &info); + ret = amdgpu_ras_query_error_status_with_event(adev, &info, type); if (ret) return ret; @@ -3157,11 +3256,19 @@ static int amdgpu_ras_recovery_fini(struct amdgpu_device *adev) { struct amdgpu_ras *con = amdgpu_ras_get_context(adev); struct ras_err_handler_data *data = con->eh_data; + int max_flush_timeout = MAX_FLUSH_RETIRE_DWORK_TIMES; + bool ret; /* recovery_init failed to init it, fini is useless */ if (!data) return 0; + /* Save all cached bad pages to eeprom */ + do { + flush_delayed_work(&con->page_retirement_dwork); + ret = amdgpu_ras_schedule_retirement_dwork(con, 0); + } while (ret && max_flush_timeout--); + if (con->page_retirement_thread) kthread_stop(con->page_retirement_thread); @@ -3401,10 +3508,17 @@ static int amdgpu_get_ras_schema(struct amdgpu_device *adev) static void ras_event_mgr_init(struct ras_event_manager *mgr) { + struct ras_event_state *event_state; int i; - for (i = 0; i < ARRAY_SIZE(mgr->seqnos); i++) - atomic64_set(&mgr->seqnos[i], 0); + memset(mgr, 0, sizeof(*mgr)); + atomic64_set(&mgr->seqno, 0); + + for (i = 0; i < ARRAY_SIZE(mgr->event_state); i++) { + event_state = &mgr->event_state[i]; + event_state->last_seqno = RAS_EVENT_INVALID_ID; + atomic64_set(&event_state->count, 0); + } } static void amdgpu_ras_event_mgr_init(struct amdgpu_device *adev) @@ -3904,23 +4018,68 @@ void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status) atomic_set(&ras->fed, !!status); } -bool amdgpu_ras_event_id_is_valid(struct amdgpu_device *adev, u64 id) +static struct ras_event_manager *__get_ras_event_mgr(struct amdgpu_device *adev) +{ + struct amdgpu_ras *ras; + + ras = amdgpu_ras_get_context(adev); + if (!ras) + return NULL; + + return ras->event_mgr; +} + +int amdgpu_ras_mark_ras_event_caller(struct amdgpu_device *adev, enum ras_event_type type, + const void *caller) { - return !(id & BIT_ULL(63)); + struct ras_event_manager *event_mgr; + struct ras_event_state *event_state; + int ret = 0; + + if (type >= RAS_EVENT_TYPE_COUNT) { + ret = -EINVAL; + goto out; + } + + event_mgr = __get_ras_event_mgr(adev); + if (!event_mgr) { + ret = -EINVAL; + goto out; + } + + event_state = &event_mgr->event_state[type]; + event_state->last_seqno = atomic64_inc_return(&event_mgr->seqno); + atomic64_inc(&event_state->count); + +out: + if (ret && caller) + dev_warn(adev->dev, "failed mark ras event (%d) in %ps, ret:%d\n", + (int)type, caller, ret); + + return ret; } u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type type) { - struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); + struct ras_event_manager *event_mgr; u64 id; + if (type >= RAS_EVENT_TYPE_COUNT) + return RAS_EVENT_INVALID_ID; + switch (type) { - case RAS_EVENT_TYPE_ISR: - id = (u64)atomic64_read(&ras->event_mgr->seqnos[type]); + case RAS_EVENT_TYPE_FATAL: + case RAS_EVENT_TYPE_POISON_CREATION: + case RAS_EVENT_TYPE_POISON_CONSUMPTION: + event_mgr = __get_ras_event_mgr(adev); + if (!event_mgr) + return RAS_EVENT_INVALID_ID; + + id = event_mgr->event_state[type].last_seqno; break; case RAS_EVENT_TYPE_INVALID: default: - id = BIT_ULL(63) | 0ULL; + id = RAS_EVENT_INVALID_ID; break; } @@ -3931,7 +4090,13 @@ void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) { if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) { struct amdgpu_ras *ras = amdgpu_ras_get_context(adev); - u64 event_id = (u64)atomic64_inc_return(&ras->event_mgr->seqnos[RAS_EVENT_TYPE_ISR]); + enum ras_event_type type = RAS_EVENT_TYPE_FATAL; + u64 event_id; + + if (amdgpu_ras_mark_ras_event(adev, type)) + return; + + event_id = amdgpu_ras_acquire_event_id(adev, type); RAS_EVENT_LOG(adev, event_id, "uncorrectable hardware error" "(ERREVENT_ATHUB_INTERRUPT) detected!\n"); @@ -4665,7 +4830,7 @@ void amdgpu_ras_event_log_print(struct amdgpu_device *adev, u64 event_id, vaf.fmt = fmt; vaf.va = &args; - if (amdgpu_ras_event_id_is_valid(adev, event_id)) + if (RAS_EVENT_ID_IS_VALID(event_id)) dev_printk(KERN_INFO, adev->dev, "{%llu}%pV", event_id, &vaf); else dev_printk(KERN_INFO, adev->dev, "%pV", &vaf); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 0fa1148e6642..dcf1f3dbb5c4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -68,8 +68,14 @@ struct amdgpu_iv_entry; /* The high three bits indicates socketid */ #define AMDGPU_RAS_GET_FEATURES(val) ((val) & ~AMDGPU_RAS_FEATURES_SOCKETID_MASK) +#define RAS_EVENT_INVALID_ID (BIT_ULL(63)) +#define RAS_EVENT_ID_IS_VALID(x) (!((x) & BIT_ULL(63))) + #define RAS_EVENT_LOG(adev, id, fmt, ...) \ - amdgpu_ras_event_log_print((adev), (id), (fmt), ##__VA_ARGS__); + amdgpu_ras_event_log_print((adev), (id), (fmt), ##__VA_ARGS__) + +#define amdgpu_ras_mark_ras_event(adev, type) \ + (amdgpu_ras_mark_ras_event_caller((adev), (type), __builtin_return_address(0))) enum amdgpu_ras_block { AMDGPU_RAS_BLOCK__UMC = 0, @@ -427,20 +433,32 @@ struct umc_ecc_info { }; enum ras_event_type { - RAS_EVENT_TYPE_INVALID = -1, - RAS_EVENT_TYPE_ISR = 0, + RAS_EVENT_TYPE_INVALID = 0, + RAS_EVENT_TYPE_FATAL, + RAS_EVENT_TYPE_POISON_CREATION, + RAS_EVENT_TYPE_POISON_CONSUMPTION, RAS_EVENT_TYPE_COUNT, }; +struct ras_event_state { + u64 last_seqno; + atomic64_t count; +}; + struct ras_event_manager { - atomic64_t seqnos[RAS_EVENT_TYPE_COUNT]; + atomic64_t seqno; + struct ras_event_state event_state[RAS_EVENT_TYPE_COUNT]; }; -struct ras_query_context { +struct ras_event_id { enum ras_event_type type; u64 event_id; }; +struct ras_query_context { + struct ras_event_id evid; +}; + typedef int (*pasid_notify)(struct amdgpu_device *adev, uint16_t pasid, void *data); @@ -483,6 +501,7 @@ struct amdgpu_ras { struct device_attribute features_attr; struct device_attribute version_attr; struct device_attribute schema_attr; + struct device_attribute event_state_attr; struct bin_attribute badpages_attr; struct dentry *de_ras_eeprom_table; /* block array */ @@ -947,8 +966,9 @@ void amdgpu_ras_del_mca_err_addr(struct ras_err_info *err_info, void amdgpu_ras_set_fed(struct amdgpu_device *adev, bool status); bool amdgpu_ras_get_fed_status(struct amdgpu_device *adev); -bool amdgpu_ras_event_id_is_valid(struct amdgpu_device *adev, u64 id); u64 amdgpu_ras_acquire_event_id(struct amdgpu_device *adev, enum ras_event_type type); +int amdgpu_ras_mark_ras_event_caller(struct amdgpu_device *adev, enum ras_event_type type, + const void *caller); int amdgpu_ras_reserve_page(struct amdgpu_device *adev, uint64_t pfn); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index 58906bf7448e..b8bc7fa8c375 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -308,7 +308,8 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, mutex_lock(&adev->mman.gtt_window_lock); while (src_mm.remaining) { - uint64_t from, to, cur_size; + uint64_t from, to, cur_size, tiling_flags; + uint32_t num_type, data_format, max_com; struct dma_fence *next; /* Never copy more than 256MiB at once to avoid a timeout */ @@ -329,10 +330,20 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, abo_dst = ttm_to_amdgpu_bo(dst->bo); if (tmz) copy_flags |= AMDGPU_COPY_FLAGS_TMZ; - if (abo_src->flags & AMDGPU_GEM_CREATE_GFX12_DCC) + if ((abo_src->flags & AMDGPU_GEM_CREATE_GFX12_DCC) && + (abo_src->tbo.resource->mem_type == TTM_PL_VRAM)) copy_flags |= AMDGPU_COPY_FLAGS_READ_DECOMPRESSED; - if (abo_dst->flags & AMDGPU_GEM_CREATE_GFX12_DCC) + if ((abo_dst->flags & AMDGPU_GEM_CREATE_GFX12_DCC) && + (dst->mem->mem_type == TTM_PL_VRAM)) { copy_flags |= AMDGPU_COPY_FLAGS_WRITE_COMPRESSED; + amdgpu_bo_get_tiling_flags(abo_dst, &tiling_flags); + max_com = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_MAX_COMPRESSED_BLOCK); + num_type = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_NUMBER_TYPE); + data_format = AMDGPU_TILING_GET(tiling_flags, GFX12_DCC_DATA_FORMAT); + copy_flags |= (AMDGPU_COPY_FLAGS_SET(MAX_COMPRESSED, max_com) | + AMDGPU_COPY_FLAGS_SET(NUMBER_TYPE, num_type) | + AMDGPU_COPY_FLAGS_SET(DATA_FORMAT, data_format)); + } r = amdgpu_copy_buffer(ring, from, to, cur_size, resv, &next, false, true, copy_flags); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index f2eb1cf364c5..138d80017f35 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -112,6 +112,17 @@ struct amdgpu_copy_mem { #define AMDGPU_COPY_FLAGS_TMZ (1 << 0) #define AMDGPU_COPY_FLAGS_READ_DECOMPRESSED (1 << 1) #define AMDGPU_COPY_FLAGS_WRITE_COMPRESSED (1 << 2) +#define AMDGPU_COPY_FLAGS_MAX_COMPRESSED_SHIFT 3 +#define AMDGPU_COPY_FLAGS_MAX_COMPRESSED_MASK 0x03 +#define AMDGPU_COPY_FLAGS_NUMBER_TYPE_SHIFT 5 +#define AMDGPU_COPY_FLAGS_NUMBER_TYPE_MASK 0x07 +#define AMDGPU_COPY_FLAGS_DATA_FORMAT_SHIFT 8 +#define AMDGPU_COPY_FLAGS_DATA_FORMAT_MASK 0x3f + +#define AMDGPU_COPY_FLAGS_SET(field, value) \ + (((__u32)(value) & AMDGPU_COPY_FLAGS_##field##_MASK) << AMDGPU_COPY_FLAGS_##field##_SHIFT) +#define AMDGPU_COPY_FLAGS_GET(value, field) \ + (((__u32)(value) >> AMDGPU_COPY_FLAGS_##field##_SHIFT) & AMDGPU_COPY_FLAGS_##field##_MASK) int amdgpu_gtt_mgr_init(struct amdgpu_device *adev, uint64_t gtt_size); void amdgpu_gtt_mgr_fini(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 8d65b096db90..43f44cc201cb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -147,6 +147,10 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) } } + /* from vcn4 and above, only unified queue is used */ + adev->vcn.using_unified_queue = + amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0); + hdr = (const struct common_firmware_header *)adev->vcn.fw[0]->data; adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version); @@ -275,18 +279,6 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev) return 0; } -/* from vcn4 and above, only unified queue is used */ -static bool amdgpu_vcn_using_unified_queue(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - bool ret = false; - - if (amdgpu_ip_version(adev, UVD_HWIP, 0) >= IP_VERSION(4, 0, 0)) - ret = true; - - return ret; -} - bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance) { bool ret = false; @@ -397,7 +389,9 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work) for (i = 0; i < adev->vcn.num_enc_rings; ++i) fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]); - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && + !adev->vcn.using_unified_queue) { struct dpg_pause_state new_state; if (fence[j] || @@ -443,7 +437,9 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, AMD_PG_STATE_UNGATE); - if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && + !adev->vcn.using_unified_queue) { struct dpg_pause_state new_state; if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) { @@ -469,8 +465,12 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) { + struct amdgpu_device *adev = ring->adev; + + /* Only set DPG pause for VCN3 or below, VCN4 and above will be handled by FW */ if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG && - ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) + ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC && + !adev->vcn.using_unified_queue) atomic_dec(&ring->adev->vcn.inst[ring->me].dpg_enc_submission_cnt); atomic_dec(&ring->adev->vcn.total_submission_cnt); @@ -724,12 +724,11 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, struct amdgpu_job *job; struct amdgpu_ib *ib; uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr); - bool sq = amdgpu_vcn_using_unified_queue(ring); uint32_t *ib_checksum; uint32_t ib_pack_in_dw; int i, r; - if (sq) + if (adev->vcn.using_unified_queue) ib_size_dw += 8; r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, @@ -742,7 +741,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, ib->length_dw = 0; /* single queue headers */ - if (sq) { + if (adev->vcn.using_unified_queue) { ib_pack_in_dw = sizeof(struct amdgpu_vcn_decode_buffer) / sizeof(uint32_t) + 4 + 2; /* engine info + decoding ib in dw */ ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, ib_pack_in_dw, false); @@ -761,7 +760,7 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring, for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; - if (sq) + if (adev->vcn.using_unified_queue) amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, ib_pack_in_dw); r = amdgpu_job_submit_direct(job, ring, &f); @@ -851,15 +850,15 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand struct dma_fence **fence) { unsigned int ib_size_dw = 16; + struct amdgpu_device *adev = ring->adev; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; uint32_t *ib_checksum = NULL; uint64_t addr; - bool sq = amdgpu_vcn_using_unified_queue(ring); int i, r; - if (sq) + if (adev->vcn.using_unified_queue) ib_size_dw += 8; r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, @@ -873,7 +872,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand ib->length_dw = 0; - if (sq) + if (adev->vcn.using_unified_queue) ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true); ib->ptr[ib->length_dw++] = 0x00000018; @@ -895,7 +894,7 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; - if (sq) + if (adev->vcn.using_unified_queue) amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11); r = amdgpu_job_submit_direct(job, ring, &f); @@ -918,15 +917,15 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han struct dma_fence **fence) { unsigned int ib_size_dw = 16; + struct amdgpu_device *adev = ring->adev; struct amdgpu_job *job; struct amdgpu_ib *ib; struct dma_fence *f = NULL; uint32_t *ib_checksum = NULL; uint64_t addr; - bool sq = amdgpu_vcn_using_unified_queue(ring); int i, r; - if (sq) + if (adev->vcn.using_unified_queue) ib_size_dw += 8; r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, @@ -940,7 +939,7 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han ib->length_dw = 0; - if (sq) + if (adev->vcn.using_unified_queue) ib_checksum = amdgpu_vcn_unified_ring_ib_header(ib, 0x11, true); ib->ptr[ib->length_dw++] = 0x00000018; @@ -962,7 +961,7 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; - if (sq) + if (adev->vcn.using_unified_queue) amdgpu_vcn_unified_ring_ib_checksum(&ib_checksum, 0x11); r = amdgpu_job_submit_direct(job, ring, &f); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 9f06def236fd..1a5439abd1a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -329,6 +329,7 @@ struct amdgpu_vcn { uint16_t inst_mask; uint8_t num_inst_per_aid; + bool using_unified_queue; }; struct amdgpu_fw_shared_rb_ptrs_struct { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index ccb3d041c2b2..111c380f929b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -86,8 +86,10 @@ int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init) if (virt->ops && virt->ops->req_full_gpu) { r = virt->ops->req_full_gpu(adev, init); - if (r) + if (r) { + adev->no_hw_access = true; return r; + } adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c index 2b99eed5ba19..a6d456ec6aeb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xcp.c @@ -219,7 +219,8 @@ int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags) { int mode; - if (xcp_mgr->mode == AMDGPU_XCP_MODE_NONE) + if (!amdgpu_sriov_vf(xcp_mgr->adev) && + xcp_mgr->mode == AMDGPU_XCP_MODE_NONE) return xcp_mgr->mode; if (!xcp_mgr->funcs || !xcp_mgr->funcs->query_partition_mode) @@ -228,6 +229,12 @@ int amdgpu_xcp_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr, u32 flags) if (!(flags & AMDGPU_XCP_FL_LOCKED)) mutex_lock(&xcp_mgr->xcp_lock); mode = xcp_mgr->funcs->query_partition_mode(xcp_mgr); + + /* First time query for VF, set the mode here */ + if (amdgpu_sriov_vf(xcp_mgr->adev) && + xcp_mgr->mode == AMDGPU_XCP_MODE_NONE) + xcp_mgr->mode = mode; + if (xcp_mgr->mode != AMDGPU_XCP_MODE_TRANS && mode != xcp_mgr->mode) dev_WARN( xcp_mgr->adev->dev, @@ -282,8 +289,7 @@ int amdgpu_xcp_mgr_init(struct amdgpu_device *adev, int init_mode, { struct amdgpu_xcp_mgr *xcp_mgr; - if (!xcp_funcs || !xcp_funcs->switch_partition_mode || - !xcp_funcs->get_ip_details) + if (!xcp_funcs || !xcp_funcs->get_ip_details) return -EINVAL; xcp_mgr = kzalloc(sizeof(*xcp_mgr), GFP_KERNEL); diff --git a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c index 2c9a0aa41e2d..228fd4dd32f1 100644 --- a/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c +++ b/drivers/gpu/drm/amd/amdgpu/aqua_vanjaram.c @@ -304,13 +304,56 @@ u64 aqua_vanjaram_encode_ext_smn_addressing(int ext_id) return ext_offset; } +static enum amdgpu_gfx_partition +__aqua_vanjaram_calc_xcp_mode(struct amdgpu_xcp_mgr *xcp_mgr) +{ + struct amdgpu_device *adev = xcp_mgr->adev; + int num_xcc, num_xcc_per_xcp = 0, mode = 0; + + num_xcc = NUM_XCC(xcp_mgr->adev->gfx.xcc_mask); + if (adev->gfx.funcs->get_xccs_per_xcp) + num_xcc_per_xcp = adev->gfx.funcs->get_xccs_per_xcp(adev); + if ((num_xcc_per_xcp) && (num_xcc % num_xcc_per_xcp == 0)) + mode = num_xcc / num_xcc_per_xcp; + + if (num_xcc_per_xcp == 1) + return AMDGPU_CPX_PARTITION_MODE; + + switch (mode) { + case 1: + return AMDGPU_SPX_PARTITION_MODE; + case 2: + return AMDGPU_DPX_PARTITION_MODE; + case 3: + return AMDGPU_TPX_PARTITION_MODE; + case 4: + return AMDGPU_QPX_PARTITION_MODE; + default: + return AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE; + } + + return AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE; +} + static int aqua_vanjaram_query_partition_mode(struct amdgpu_xcp_mgr *xcp_mgr) { - enum amdgpu_gfx_partition mode = AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE; + enum amdgpu_gfx_partition derv_mode, + mode = AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE; struct amdgpu_device *adev = xcp_mgr->adev; - if (adev->nbio.funcs->get_compute_partition_mode) + derv_mode = __aqua_vanjaram_calc_xcp_mode(xcp_mgr); + + if (amdgpu_sriov_vf(adev)) + return derv_mode; + + if (adev->nbio.funcs->get_compute_partition_mode) { mode = adev->nbio.funcs->get_compute_partition_mode(adev); + if (mode != derv_mode) + dev_warn( + adev->dev, + "Mismatch in compute partition mode - reported : %d derived : %d", + mode, derv_mode); + } return mode; } @@ -624,6 +667,9 @@ static int aqua_vanjaram_xcp_mgr_init(struct amdgpu_device *adev) { int ret; + if (amdgpu_sriov_vf(adev)) + aqua_vanjaram_xcp_funcs.switch_partition_mode = NULL; + ret = amdgpu_xcp_mgr_init(adev, AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE, 1, &aqua_vanjaram_xcp_funcs); if (ret) diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c index a52c72739b40..2957702fca0c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c @@ -7324,11 +7324,9 @@ static int gfx_v10_0_hw_init(void *handle) * loaded firstly, so in direct type, it has to load smc ucode * here before rlc. */ - if (!(adev->flags & AMD_IS_APU)) { - r = amdgpu_pm_load_smu_firmware(adev, NULL); - if (r) - return r; - } + r = amdgpu_pm_load_smu_firmware(adev, NULL); + if (r) + return r; gfx_v10_0_disable_gpa_mode(adev); } @@ -9334,7 +9332,7 @@ static void gfx_v10_ip_dump(void *handle) for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { /* ME0 is for GFX so start from 1 for CP */ - nv_grbm_select(adev, 1 + i, j, k, 0); + nv_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0); for (reg = 0; reg < reg_count; reg++) { adev->gfx.ip_dump_compute_queues[index + reg] = diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 6228dd0450a7..dcef39907449 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -4558,11 +4558,9 @@ static int gfx_v11_0_hw_init(void *handle) * loaded firstly, so in direct type, it has to load smc ucode * here before rlc. */ - if (!(adev->flags & AMD_IS_APU)) { - r = amdgpu_pm_load_smu_firmware(adev, NULL); - if (r) - return r; - } + r = amdgpu_pm_load_smu_firmware(adev, NULL); + if (r) + return r; } gfx_v11_0_constants_init(adev); @@ -6458,7 +6456,7 @@ static void gfx_v11_ip_dump(void *handle) for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { /* ME0 is for GFX so start from 1 for CP */ - soc21_grbm_select(adev, 1+i, j, k, 0); + soc21_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0); for (reg = 0; reg < reg_count; reg++) { adev->gfx.ip_dump_compute_queues[index + reg] = RREG32(SOC15_REG_ENTRY_OFFSET( diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c index ccb26f78252a..f384be0d1800 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v12_0.c @@ -63,6 +63,145 @@ MODULE_FIRMWARE("amdgpu/gc_12_0_1_mec.bin"); MODULE_FIRMWARE("amdgpu/gc_12_0_1_rlc.bin"); MODULE_FIRMWARE("amdgpu/gc_12_0_1_toc.bin"); +static const struct amdgpu_hwip_reg_entry gc_reg_list_12_0[] = { + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS2), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS3), + SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT2), + SOC15_REG_ENTRY_STR(GC, 0, regCP_STALLED_STAT3), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STALLED_STAT1), + SOC15_REG_ENTRY_STR(GC, 0, regCP_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_BUSY_STAT2), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_BUSY_STAT2), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPF_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_ERROR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HPD_STATUS0), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_BASE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_BASE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB0_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB2_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCPF_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCPC_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCPG_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regIA_UTCL1_STATUS_2), + SOC15_REG_ENTRY_STR(GC, 0, regPA_CL_CNTL_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regRMI_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regSQC_CACHES), + SOC15_REG_ENTRY_STR(GC, 0, regSQG_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regWD_UTCL1_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS_LO32), + SOC15_REG_ENTRY_STR(GC, 0, regGCVM_L2_PROTECTION_FAULT_STATUS_HI32), + SOC15_REG_ENTRY_STR(GC, 0, regCP_DEBUG), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_INSTR_PNTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_CPC_STATUS), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR0), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_RS64_INSTR_PNTR1), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_RS64_INSTR_PNTR), + + /* cp header registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_MEC_ME1_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_PFP_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_ME_HEADER_DUMP), + SOC15_REG_ENTRY_STR(GC, 0, regCP_MES_HEADER_DUMP), + /* SE status registers */ + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE0), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE1), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE2), + SOC15_REG_ENTRY_STR(GC, 0, regGRBM_STATUS_SE3) +}; + +static const struct amdgpu_hwip_reg_entry gc_cp_reg_list_12[] = { + /* compute registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_VMID), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PERSISTENT_STATE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PIPE_PRIORITY), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUEUE_PRIORITY), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_QUANTUM), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_POLL_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_DOORBELL_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_IB_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_REQUEST), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_EVENTS), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CNTL_STACK_SIZE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_WG_STATE_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_CTX_SAVE_SIZE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_GDS_RESOURCE_STATE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_ERROR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_EOP_WPTR_MEM), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_PQ_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_CNTL_STACK_DW_CNT), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_SUSPEND_WG_STATE_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regCP_HQD_DEQUEUE_STATUS) +}; + +static const struct amdgpu_hwip_reg_entry gc_gfx_queue_reg_list_12[] = { + /* gfx queue registers */ + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_ACTIVE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_VMID), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUEUE_PRIORITY), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUANTUM), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_OFFSET), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CNTL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_CSMD_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_WPTR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_DEQUEUE_REQUEST), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_MAPPED), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_QUE_MGR_CONTROL), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_CONTROL0), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_HQD_HQ_STATUS0), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_GFX_MQD_BASE_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_WPTR_POLL_ADDR_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_RB_RPTR), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_LO), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BASE_HI), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_CMD_BUFSZ), + SOC15_REG_ENTRY_STR(GC, 0, regCP_IB1_BUFSZ) +}; + #define DEFAULT_SH_MEM_CONFIG \ ((SH_MEM_ADDRESS_MODE_64 << SH_MEM_CONFIG__ADDRESS_MODE__SHIFT) | \ (SH_MEM_ALIGNMENT_MODE_UNALIGNED << SH_MEM_CONFIG__ALIGNMENT_MODE__SHIFT) | \ @@ -1129,6 +1268,47 @@ static int gfx_v12_0_rlc_backdoor_autoload_enable(struct amdgpu_device *adev) return 0; } +static void gfx_v12_0_alloc_ip_dump(struct amdgpu_device *adev) +{ + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_12_0); + uint32_t *ptr; + uint32_t inst; + + ptr = kcalloc(reg_count, sizeof(uint32_t), GFP_KERNEL); + if (ptr == NULL) { + DRM_ERROR("Failed to allocate memory for GFX IP Dump\n"); + adev->gfx.ip_dump_core = NULL; + } else { + adev->gfx.ip_dump_core = ptr; + } + + /* Allocate memory for compute queue registers for all the instances */ + reg_count = ARRAY_SIZE(gc_cp_reg_list_12); + inst = adev->gfx.mec.num_mec * adev->gfx.mec.num_pipe_per_mec * + adev->gfx.mec.num_queue_per_pipe; + + ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); + if (ptr == NULL) { + DRM_ERROR("Failed to allocate memory for Compute Queues IP Dump\n"); + adev->gfx.ip_dump_compute_queues = NULL; + } else { + adev->gfx.ip_dump_compute_queues = ptr; + } + + /* Allocate memory for gfx queue registers for all the instances */ + reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_12); + inst = adev->gfx.me.num_me * adev->gfx.me.num_pipe_per_me * + adev->gfx.me.num_queue_per_pipe; + + ptr = kcalloc(reg_count * inst, sizeof(uint32_t), GFP_KERNEL); + if (ptr == NULL) { + DRM_ERROR("Failed to allocate memory for GFX Queues IP Dump\n"); + adev->gfx.ip_dump_gfx_queues = NULL; + } else { + adev->gfx.ip_dump_gfx_queues = ptr; + } +} + static int gfx_v12_0_sw_init(void *handle) { int i, j, k, r, ring_id = 0; @@ -1261,6 +1441,8 @@ static int gfx_v12_0_sw_init(void *handle) if (r) return r; + gfx_v12_0_alloc_ip_dump(adev); + return 0; } @@ -1320,6 +1502,10 @@ static int gfx_v12_0_sw_fini(void *handle) gfx_v12_0_free_microcode(adev); + kfree(adev->gfx.ip_dump_core); + kfree(adev->gfx.ip_dump_compute_queues); + kfree(adev->gfx.ip_dump_gfx_queues); + return 0; } @@ -3306,11 +3492,9 @@ static int gfx_v12_0_hw_init(void *handle) * loaded firstly, so in direct type, it has to load smc ucode * here before rlc. */ - if (!(adev->flags & AMD_IS_APU)) { - r = amdgpu_pm_load_smu_firmware(adev, NULL); - if (r) - return r; - } + r = amdgpu_pm_load_smu_firmware(adev, NULL); + if (r) + return r; } gfx_v12_0_constants_init(adev); @@ -4673,6 +4857,136 @@ static void gfx_v12_0_emit_mem_sync(struct amdgpu_ring *ring) amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */ } +static void gfx_v12_ip_print(void *handle, struct drm_printer *p) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t i, j, k, reg, index = 0; + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_12_0); + + if (!adev->gfx.ip_dump_core) + return; + + for (i = 0; i < reg_count; i++) + drm_printf(p, "%-50s \t 0x%08x\n", + gc_reg_list_12_0[i].reg_name, + adev->gfx.ip_dump_core[i]); + + /* print compute queue registers for all instances */ + if (!adev->gfx.ip_dump_compute_queues) + return; + + reg_count = ARRAY_SIZE(gc_cp_reg_list_12); + drm_printf(p, "\nnum_mec: %d num_pipe: %d num_queue: %d\n", + adev->gfx.mec.num_mec, + adev->gfx.mec.num_pipe_per_mec, + adev->gfx.mec.num_queue_per_pipe); + + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { + drm_printf(p, "\nmec %d, pipe %d, queue %d\n", i, j, k); + for (reg = 0; reg < reg_count; reg++) { + drm_printf(p, "%-50s \t 0x%08x\n", + gc_cp_reg_list_12[reg].reg_name, + adev->gfx.ip_dump_compute_queues[index + reg]); + } + index += reg_count; + } + } + } + + /* print gfx queue registers for all instances */ + if (!adev->gfx.ip_dump_gfx_queues) + return; + + index = 0; + reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_12); + drm_printf(p, "\nnum_me: %d num_pipe: %d num_queue: %d\n", + adev->gfx.me.num_me, + adev->gfx.me.num_pipe_per_me, + adev->gfx.me.num_queue_per_pipe); + + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) { + drm_printf(p, "\nme %d, pipe %d, queue %d\n", i, j, k); + for (reg = 0; reg < reg_count; reg++) { + drm_printf(p, "%-50s \t 0x%08x\n", + gc_gfx_queue_reg_list_12[reg].reg_name, + adev->gfx.ip_dump_gfx_queues[index + reg]); + } + index += reg_count; + } + } + } +} + +static void gfx_v12_ip_dump(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + uint32_t i, j, k, reg, index = 0; + uint32_t reg_count = ARRAY_SIZE(gc_reg_list_12_0); + + if (!adev->gfx.ip_dump_core) + return; + + amdgpu_gfx_off_ctrl(adev, false); + for (i = 0; i < reg_count; i++) + adev->gfx.ip_dump_core[i] = RREG32(SOC15_REG_ENTRY_OFFSET(gc_reg_list_12_0[i])); + amdgpu_gfx_off_ctrl(adev, true); + + /* dump compute queue registers for all instances */ + if (!adev->gfx.ip_dump_compute_queues) + return; + + reg_count = ARRAY_SIZE(gc_cp_reg_list_12); + amdgpu_gfx_off_ctrl(adev, false); + mutex_lock(&adev->srbm_mutex); + for (i = 0; i < adev->gfx.mec.num_mec; i++) { + for (j = 0; j < adev->gfx.mec.num_pipe_per_mec; j++) { + for (k = 0; k < adev->gfx.mec.num_queue_per_pipe; k++) { + /* ME0 is for GFX so start from 1 for CP */ + soc24_grbm_select(adev, adev->gfx.me.num_me + i, j, k, 0); + for (reg = 0; reg < reg_count; reg++) { + adev->gfx.ip_dump_compute_queues[index + reg] = + RREG32(SOC15_REG_ENTRY_OFFSET( + gc_cp_reg_list_12[reg])); + } + index += reg_count; + } + } + } + soc24_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + amdgpu_gfx_off_ctrl(adev, true); + + /* dump gfx queue registers for all instances */ + if (!adev->gfx.ip_dump_gfx_queues) + return; + + index = 0; + reg_count = ARRAY_SIZE(gc_gfx_queue_reg_list_12); + amdgpu_gfx_off_ctrl(adev, false); + mutex_lock(&adev->srbm_mutex); + for (i = 0; i < adev->gfx.me.num_me; i++) { + for (j = 0; j < adev->gfx.me.num_pipe_per_me; j++) { + for (k = 0; k < adev->gfx.me.num_queue_per_pipe; k++) { + soc24_grbm_select(adev, i, j, k, 0); + + for (reg = 0; reg < reg_count; reg++) { + adev->gfx.ip_dump_gfx_queues[index + reg] = + RREG32(SOC15_REG_ENTRY_OFFSET( + gc_gfx_queue_reg_list_12[reg])); + } + index += reg_count; + } + } + } + soc24_grbm_select(adev, 0, 0, 0, 0); + mutex_unlock(&adev->srbm_mutex); + amdgpu_gfx_off_ctrl(adev, true); +} + static const struct amd_ip_funcs gfx_v12_0_ip_funcs = { .name = "gfx_v12_0", .early_init = gfx_v12_0_early_init, @@ -4688,6 +5002,8 @@ static const struct amd_ip_funcs gfx_v12_0_ip_funcs = { .set_clockgating_state = gfx_v12_0_set_clockgating_state, .set_powergating_state = gfx_v12_0_set_powergating_state, .get_clockgating_state = gfx_v12_0_get_clockgating_state, + .dump_ip_state = gfx_v12_ip_dump, + .print_ip_state = gfx_v12_ip_print, }; static const struct amdgpu_ring_funcs gfx_v12_0_ring_funcs_gfx = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c index 1149595a02d8..20ea6cb01edf 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c @@ -652,6 +652,15 @@ static void gfx_v9_4_3_select_me_pipe_q(struct amdgpu_device *adev, soc15_grbm_select(adev, me, pipe, q, vm, GET_INST(GC, xcc_id)); } +static int gfx_v9_4_3_get_xccs_per_xcp(struct amdgpu_device *adev) +{ + u32 xcp_ctl; + + /* Value is expected to be the same on all, fetch from first instance */ + xcp_ctl = RREG32_SOC15(GC, GET_INST(GC, 0), regCP_HYP_XCP_CTL); + + return REG_GET_FIELD(xcp_ctl, CP_HYP_XCP_CTL, NUM_XCC_IN_XCP); +} static int gfx_v9_4_3_switch_compute_partition(struct amdgpu_device *adev, int num_xccs_per_xcp) @@ -706,6 +715,7 @@ static const struct amdgpu_gfx_funcs gfx_v9_4_3_gfx_funcs = { .select_me_pipe_q = &gfx_v9_4_3_select_me_pipe_q, .switch_partition_mode = &gfx_v9_4_3_switch_compute_partition, .ih_node_to_logical_xcc = &gfx_v9_4_3_ih_to_xcc_inst, + .get_xccs_per_xcp = &gfx_v9_4_3_get_xccs_per_xcp, }; static int gfx_v9_4_3_aca_bank_parser(struct aca_handle *handle, @@ -1613,6 +1623,9 @@ static int gfx_v9_4_3_xcc_mqd_init(struct amdgpu_ring *ring, int xcc_id) DOORBELL_SOURCE, 0); tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_HIT, 0); + if (amdgpu_sriov_vf(adev)) + tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, + DOORBELL_MODE, 1); } else { tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 0); @@ -2047,18 +2060,31 @@ static int gfx_v9_4_3_xcc_cp_resume(struct amdgpu_device *adev, int xcc_id) static int gfx_v9_4_3_cp_resume(struct amdgpu_device *adev) { - int r = 0, i, num_xcc; + int r = 0, i, num_xcc, num_xcp, num_xcc_per_xcp; + + num_xcc = NUM_XCC(adev->gfx.xcc_mask); + if (amdgpu_sriov_vf(adev)) { + enum amdgpu_gfx_partition mode; - if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr, - AMDGPU_XCP_FL_NONE) == - AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) - r = amdgpu_xcp_switch_partition_mode(adev->xcp_mgr, - amdgpu_user_partt_mode); + mode = amdgpu_xcp_query_partition_mode(adev->xcp_mgr, + AMDGPU_XCP_FL_NONE); + if (mode == AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) + return -EINVAL; + num_xcc_per_xcp = gfx_v9_4_3_get_xccs_per_xcp(adev); + adev->gfx.num_xcc_per_xcp = num_xcc_per_xcp; + num_xcp = num_xcc / num_xcc_per_xcp; + r = amdgpu_xcp_init(adev->xcp_mgr, num_xcp, mode); + } else { + if (amdgpu_xcp_query_partition_mode(adev->xcp_mgr, + AMDGPU_XCP_FL_NONE) == + AMDGPU_UNKNOWN_COMPUTE_PARTITION_MODE) + r = amdgpu_xcp_switch_partition_mode( + adev->xcp_mgr, amdgpu_user_partt_mode); + } if (r) return r; - num_xcc = NUM_XCC(adev->gfx.xcc_mask); for (i = 0; i < num_xcc; i++) { r = gfx_v9_4_3_xcc_cp_resume(adev, i); if (r) diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_0.c index 7ea64f1e1e48..7609b9cecae8 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v12_0.c @@ -35,7 +35,27 @@ #define regGRBM_GFX_INDEX_DEFAULT 0xe0000000 static const char *gfxhub_client_ids[] = { - /* TODO */ + "CB", + "DB", + "GE1", + "GE2", + "CPF", + "CPC", + "CPG", + "RLC", + "TCP", + "SQC (inst)", + "SQC (data)", + "SQG/PC/SC", + "Reserved", + "SDMA0", + "SDMA1", + "GCR", + "Reserved", + "Reserved", + "WGS", + "DSM", + "PA" }; static uint32_t gfxhub_v12_0_get_invalidate_req(unsigned int vmid, diff --git a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c index aa6235dd4f2b..6852081fcff2 100644 --- a/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/ih_v7_0.c @@ -346,6 +346,21 @@ static int ih_v7_0_irq_init(struct amdgpu_device *adev) DELAY, 3); WREG32_SOC15(OSSSYS, 0, regIH_MSI_STORM_CTRL, tmp); + /* Redirect the interrupts to IH RB1 for dGPU */ + if (adev->irq.ih1.ring_size) { + tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_INDEX, INDEX, 0); + WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_INDEX, tmp); + + tmp = RREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, CLIENT_ID, 0xa); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, SOURCE_ID, 0x0); + tmp = REG_SET_FIELD(tmp, IH_RING1_CLIENT_CFG_DATA, + SOURCE_ID_MATCH_ENABLE, 0x1); + + WREG32_SOC15(OSSSYS, 0, regIH_RING1_CLIENT_CFG_DATA, tmp); + } + pci_set_master(adev->pdev); /* enable interrupts */ @@ -546,8 +561,15 @@ static int ih_v7_0_sw_init(void *handle) adev->irq.ih.use_doorbell = true; adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1; - adev->irq.ih1.ring_size = 0; - adev->irq.ih2.ring_size = 0; + if (!(adev->flags & AMD_IS_APU)) { + r = amdgpu_ih_ring_init(adev, &adev->irq.ih1, IH_RING_SIZE, + use_bus_addr); + if (r) + return r; + + adev->irq.ih1.use_doorbell = true; + adev->irq.ih1.doorbell_index = (adev->doorbell_index.ih + 1) << 1; + } /* initialize ih control register offset */ ih_v7_0_init_register_offset(adev); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index 1376b6ff1b77..8ce51b9236c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -120,6 +120,9 @@ static const char *mes_v11_0_opcodes[] = { "MISC", "UPDATE_ROOT_PAGE_TABLE", "AMD_LOG", + "unused", + "unused", + "SET_HW_RSRC_1", }; static const char *mes_v11_0_misc_opcodes[] = { diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c index 106eef1ff5cc..c9f74231ad59 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c @@ -99,6 +99,7 @@ static const char *mes_v12_0_opcodes[] = { "SET_LOG_BUFFER", "CHANGE_GANG_PRORITY", "QUERY_SCHEDULER_STATUS", + "unused", "SET_DEBUG_VMID", "MISC", "UPDATE_ROOT_PAGE_TABLE", diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c index 01644a869738..2c55bfd935bb 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c @@ -83,7 +83,7 @@ static unsigned sdma_v4_4_2_seq_to_irq_id(int seq_num) } } -static int sdma_v4_4_2_irq_id_to_seq(unsigned client_id) +static int sdma_v4_4_2_irq_id_to_seq(struct amdgpu_device *adev, unsigned client_id) { switch (client_id) { case SOC15_IH_CLIENTID_SDMA0: @@ -91,9 +91,15 @@ static int sdma_v4_4_2_irq_id_to_seq(unsigned client_id) case SOC15_IH_CLIENTID_SDMA1: return 1; case SOC15_IH_CLIENTID_SDMA2: - return 2; + if (amdgpu_sriov_vf(adev) && (adev->gfx.xcc_mask == 0x1)) + return 0; + else + return 2; case SOC15_IH_CLIENTID_SDMA3: - return 3; + if (amdgpu_sriov_vf(adev) && (adev->gfx.xcc_mask == 0x1)) + return 1; + else + return 3; default: return -EINVAL; } @@ -1524,7 +1530,7 @@ static int sdma_v4_4_2_process_trap_irq(struct amdgpu_device *adev, uint32_t instance, i; DRM_DEBUG("IH: SDMA trap\n"); - instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id); + instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id); /* Client id gives the SDMA instance in AID. To know the exact SDMA * instance, interrupt entry gives the node id which corresponds to the AID instance. @@ -1567,7 +1573,7 @@ static int sdma_v4_4_2_process_ras_data_cb(struct amdgpu_device *adev, if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__SDMA)) goto out; - instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id); + instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id); if (instance < 0) goto out; @@ -1586,7 +1592,7 @@ static int sdma_v4_4_2_process_illegal_inst_irq(struct amdgpu_device *adev, DRM_ERROR("Illegal instruction in SDMA command stream\n"); - instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id); + instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id); if (instance < 0) return 0; @@ -1620,7 +1626,7 @@ static int sdma_v4_4_2_print_iv_entry(struct amdgpu_device *adev, struct amdgpu_task_info *task_info; u64 addr; - instance = sdma_v4_4_2_irq_id_to_seq(entry->client_id); + instance = sdma_v4_4_2_irq_id_to_seq(adev, entry->client_id); if (instance < 0 || instance >= adev->sdma.num_instances) { dev_err(adev->dev, "sdma instance invalid %d\n", instance); return -EINVAL; diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c index 96514fd77e35..41b5e45697dc 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c @@ -1566,6 +1566,12 @@ static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib, uint32_t byte_count, uint32_t copy_flags) { + uint32_t num_type, data_format, max_com; + + max_com = AMDGPU_COPY_FLAGS_GET(copy_flags, MAX_COMPRESSED); + data_format = AMDGPU_COPY_FLAGS_GET(copy_flags, DATA_FORMAT); + num_type = AMDGPU_COPY_FLAGS_GET(copy_flags, NUMBER_TYPE); + ib->ptr[ib->length_dw++] = SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_COPY) | SDMA_PKT_COPY_LINEAR_HEADER_SUB_OP(SDMA_SUBOP_COPY_LINEAR) | SDMA_PKT_COPY_LINEAR_HEADER_TMZ((copy_flags & AMDGPU_COPY_FLAGS_TMZ) ? 1 : 0) | @@ -1580,10 +1586,10 @@ static void sdma_v7_0_emit_copy_buffer(struct amdgpu_ib *ib, ib->ptr[ib->length_dw++] = upper_32_bits(dst_offset); if ((copy_flags & (AMDGPU_COPY_FLAGS_READ_DECOMPRESSED | AMDGPU_COPY_FLAGS_WRITE_COMPRESSED))) - ib->ptr[ib->length_dw++] = SDMA_DCC_DATA_FORMAT(4) | SDMA_DCC_NUM_TYPE(4) | + ib->ptr[ib->length_dw++] = SDMA_DCC_DATA_FORMAT(data_format) | SDMA_DCC_NUM_TYPE(num_type) | ((copy_flags & AMDGPU_COPY_FLAGS_READ_DECOMPRESSED) ? SDMA_DCC_READ_CM(2) : 0) | ((copy_flags & AMDGPU_COPY_FLAGS_WRITE_COMPRESSED) ? SDMA_DCC_WRITE_CM(1) : 0) | - SDMA_DCC_MAX_COM(1) | SDMA_DCC_MAX_UCOM(1); + SDMA_DCC_MAX_COM(max_com) | SDMA_DCC_MAX_UCOM(1); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c index 6cc86d13f32a..d30ad7d56def 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc21.c +++ b/drivers/gpu/drm/amd/amdgpu/soc21.c @@ -774,7 +774,9 @@ static int soc21_common_early_init(void *handle) AMD_CG_SUPPORT_IH_CG | AMD_CG_SUPPORT_BIF_MGCG | AMD_CG_SUPPORT_BIF_LS; - adev->pg_flags = AMD_PG_SUPPORT_VCN | + adev->pg_flags = AMD_PG_SUPPORT_VCN_DPG | + AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_JPEG_DPG | AMD_PG_SUPPORT_JPEG | AMD_PG_SUPPORT_GFX_PG; adev->external_rev_id = adev->rev_id + 0x40; diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c index 0faa21d8a7b4..9dbb13adb661 100644 --- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c +++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c @@ -29,6 +29,7 @@ #include "mp/mp_13_0_6_sh_mask.h" #define MAX_ECC_NUM_PER_RETIREMENT 32 +#define DELAYED_TIME_FOR_GPU_RESET 1000 //ms static inline uint64_t get_umc_v12_0_reg_offset(struct amdgpu_device *adev, uint32_t node_inst, @@ -568,6 +569,23 @@ static int umc_v12_0_update_ecc_status(struct amdgpu_device *adev, con->umc_ecc_log.de_queried_count++; + /* The problem case is as follows: + * 1. GPU A triggers a gpu ras reset, and GPU A drives + * GPU B to also perform a gpu ras reset. + * 2. After gpu B ras reset started, gpu B queried a DE + * data. Since the DE data was queried in the ras reset + * thread instead of the page retirement thread, bad + * page retirement work would not be triggered. Then + * even if all gpu resets are completed, the bad pages + * will be cached in RAM until GPU B's bad page retirement + * work is triggered again and then saved to eeprom. + * Trigger delayed work to save the bad pages to eeprom in time + * after gpu ras reset is completed. + */ + if (amdgpu_ras_in_recovery(adev)) + schedule_delayed_work(&con->page_retirement_dwork, + msecs_to_jiffies(DELAYED_TIME_FOR_GPU_RESET)); + return 0; } |