From b8f67b9ddf4f8fe6dd536590712b5912ad78f99c Mon Sep 17 00:00:00 2001 From: Shashank Sharma Date: Thu, 18 Jan 2024 20:15:42 +0100 Subject: drm/amdgpu: change vm->task_info handling This patch changes the handling and lifecycle of vm->task_info object. The major changes are: - vm->task_info is a dynamically allocated ptr now, and its uasge is reference counted. - introducing two new helper funcs for task_info lifecycle management - amdgpu_vm_get_task_info: reference counts up task_info before returning this info - amdgpu_vm_put_task_info: reference counts down task_info - last put to task_info() frees task_info from the vm. This patch also does logistical changes required for existing usage of vm->task_info. V2: Do not block all the prints when task_info not found (Felix) V3: Fixed review comments from Felix - Fix wrong indentation - No debug message for -ENOMEM - Add NULL check for task_info - Do not duplicate the debug messages (ti vs no ti) - Get first reference of task_info in vm_init(), put last in vm_fini() V4: Fixed review comments from Felix - fix double reference increment in create_task_info - change amdgpu_vm_get_task_info_pasid - additional changes in amdgpu_gem.c while porting Cc: Christian Koenig Cc: Alex Deucher Cc: Felix Kuehling Reviewed-by: Felix Kuehling Signed-off-by: Shashank Sharma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_job.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 71a5cf37b472..4b3000c21ef2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -35,7 +35,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) { struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched); struct amdgpu_job *job = to_amdgpu_job(s_job); - struct amdgpu_task_info ti; + struct amdgpu_task_info *ti; struct amdgpu_device *adev = ring->adev; int idx; int r; @@ -48,7 +48,7 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) return DRM_GPU_SCHED_STAT_ENODEV; } - memset(&ti, 0, sizeof(struct amdgpu_task_info)); + adev->job_hang = true; if (amdgpu_gpu_recovery && @@ -58,12 +58,16 @@ static enum drm_gpu_sched_stat amdgpu_job_timedout(struct drm_sched_job *s_job) goto exit; } - amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti); DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n", - job->base.sched->name, atomic_read(&ring->fence_drv.last_seq), - ring->fence_drv.sync_seq); - DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n", - ti.process_name, ti.tgid, ti.task_name, ti.pid); + job->base.sched->name, atomic_read(&ring->fence_drv.last_seq), + ring->fence_drv.sync_seq); + + ti = amdgpu_vm_get_task_info_pasid(ring->adev, job->pasid); + if (ti) { + DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n", + ti->process_name, ti->tgid, ti->task_name, ti->pid); + amdgpu_vm_put_task_info(ti); + } dma_fence_set_error(&s_job->s_fence->finished, -ETIME); -- cgit