From 8dc1db3172ae2f17ae71e33b608a33411ce8a1aa Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Wed, 14 Sep 2022 16:39:48 +0800 Subject: drm/amdkfd: Introduce kfd_node struct (v5) Introduce a new structure, kfd_node, which will now represent a compute node. kfd_node is carved out of kfd_dev structure. kfd_dev struct now will become the parent of kfd_node, and will store common resources such as doorbells, GTT sub-alloctor etc. kfd_node struct will store all resources specific to a compute node, such as device queue manager, interrupt handling etc. This is the first step in adding compute partition support in KFD. v2: introduce kfd_node struct to gc v11 (Hawking) v3: make reference to kfd_dev struct through kfd_node (Morris) v4: use kfd_node instead for kfd isr/mqd functions (Morris) v5: rebase (Alex) Signed-off-by: Mukul Joshi Tested-by: Amber Lin Reviewed-by: Felix Kuehling Signed-off-by: Hawking Zhang Signed-off-by: Morris Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 43 ++++++++++++++++---------------- 1 file changed, 22 insertions(+), 21 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_chardev.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 81d07ecf666d..eb0b0b38f10e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -293,7 +293,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, void *data) { struct kfd_ioctl_create_queue_args *args = data; - struct kfd_dev *dev; + struct kfd_node *dev; int err = 0; unsigned int queue_id; struct kfd_process_device *pdd; @@ -328,7 +328,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, } if (!pdd->doorbell_index && - kfd_alloc_process_doorbells(dev, &pdd->doorbell_index) < 0) { + kfd_alloc_process_doorbells(dev->kfd, &pdd->doorbell_index) < 0) { err = -ENOMEM; goto err_alloc_doorbells; } @@ -336,7 +336,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, /* Starting with GFX11, wptr BOs must be mapped to GART for MES to determine work * on unmapped queues for usermode queue oversubscription (no aggregated doorbell) */ - if (dev->shared_resources.enable_mes && + if (dev->kfd->shared_resources.enable_mes && ((dev->adev->mes.sched_version & AMDGPU_MES_API_VERSION_MASK) >> AMDGPU_MES_API_VERSION_SHIFT) >= 2) { struct amdgpu_bo_va_mapping *wptr_mapping; @@ -887,7 +887,7 @@ static int kfd_ioctl_set_scratch_backing_va(struct file *filep, { struct kfd_ioctl_set_scratch_backing_va_args *args = data; struct kfd_process_device *pdd; - struct kfd_dev *dev; + struct kfd_node *dev; long err; mutex_lock(&p->mutex); @@ -1006,18 +1006,18 @@ err_drm_file: return ret; } -bool kfd_dev_is_large_bar(struct kfd_dev *dev) +bool kfd_dev_is_large_bar(struct kfd_node *dev) { if (debug_largebar) { pr_debug("Simulate large-bar allocation on non large-bar machine\n"); return true; } - if (dev->use_iommu_v2) + if (dev->kfd->use_iommu_v2) return false; - if (dev->local_mem_info.local_mem_size_private == 0 && - dev->local_mem_info.local_mem_size_public > 0) + if (dev->kfd->local_mem_info.local_mem_size_private == 0 && + dev->kfd->local_mem_info.local_mem_size_public > 0) return true; return false; } @@ -1041,7 +1041,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, struct kfd_ioctl_alloc_memory_of_gpu_args *args = data; struct kfd_process_device *pdd; void *mem; - struct kfd_dev *dev; + struct kfd_node *dev; int idr_handle; long err; uint64_t offset = args->mmap_offset; @@ -1105,7 +1105,7 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, } if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) { - if (args->size != kfd_doorbell_process_slice(dev)) { + if (args->size != kfd_doorbell_process_slice(dev->kfd)) { err = -EINVAL; goto err_unlock; } @@ -1231,7 +1231,7 @@ static int kfd_ioctl_map_memory_to_gpu(struct file *filep, struct kfd_ioctl_map_memory_to_gpu_args *args = data; struct kfd_process_device *pdd, *peer_pdd; void *mem; - struct kfd_dev *dev; + struct kfd_node *dev; long err = 0; int i; uint32_t *devices_arr = NULL; @@ -1405,7 +1405,7 @@ static int kfd_ioctl_unmap_memory_from_gpu(struct file *filep, args->n_success = i+1; } - flush_tlb = kfd_flush_tlb_after_unmap(pdd->dev); + flush_tlb = kfd_flush_tlb_after_unmap(pdd->dev->kfd); if (flush_tlb) { err = amdgpu_amdkfd_gpuvm_sync_memory(pdd->dev->adev, (struct kgd_mem *) mem, true); @@ -1445,7 +1445,7 @@ static int kfd_ioctl_alloc_queue_gws(struct file *filep, int retval; struct kfd_ioctl_alloc_queue_gws_args *args = data; struct queue *q; - struct kfd_dev *dev; + struct kfd_node *dev; mutex_lock(&p->mutex); q = pqm_get_user_queue(&p->pqm, args->queue_id); @@ -1482,7 +1482,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep, struct kfd_process *p, void *data) { struct kfd_ioctl_get_dmabuf_info_args *args = data; - struct kfd_dev *dev = NULL; + struct kfd_node *dev = NULL; struct amdgpu_device *dmabuf_adev; void *metadata_buffer = NULL; uint32_t flags; @@ -1596,7 +1596,7 @@ static int kfd_ioctl_export_dmabuf(struct file *filep, struct kfd_ioctl_export_dmabuf_args *args = data; struct kfd_process_device *pdd; struct dma_buf *dmabuf; - struct kfd_dev *dev; + struct kfd_node *dev; void *mem; int ret = 0; @@ -2178,7 +2178,7 @@ static int criu_restore_devices(struct kfd_process *p, } for (i = 0; i < args->num_devices; i++) { - struct kfd_dev *dev; + struct kfd_node *dev; struct kfd_process_device *pdd; struct file *drm_file; @@ -2240,7 +2240,7 @@ static int criu_restore_devices(struct kfd_process *p, } if (!pdd->doorbell_index && - kfd_alloc_process_doorbells(pdd->dev, &pdd->doorbell_index) < 0) { + kfd_alloc_process_doorbells(pdd->dev->kfd, &pdd->doorbell_index) < 0) { ret = -ENOMEM; goto exit; } @@ -2268,7 +2268,8 @@ static int criu_restore_memory_of_gpu(struct kfd_process_device *pdd, u64 offset; if (bo_bucket->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) { - if (bo_bucket->size != kfd_doorbell_process_slice(pdd->dev)) + if (bo_bucket->size != + kfd_doorbell_process_slice(pdd->dev->kfd)) return -EINVAL; offset = kfd_get_process_doorbells(pdd); @@ -2350,7 +2351,7 @@ static int criu_restore_bo(struct kfd_process *p, /* now map these BOs to GPU/s */ for (j = 0; j < p->n_pdds; j++) { - struct kfd_dev *peer; + struct kfd_node *peer; struct kfd_process_device *peer_pdd; if (!bo_priv->mapped_gpuids[j]) @@ -2947,7 +2948,7 @@ err_i1: return retcode; } -static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process, +static int kfd_mmio_mmap(struct kfd_node *dev, struct kfd_process *process, struct vm_area_struct *vma) { phys_addr_t address; @@ -2981,7 +2982,7 @@ static int kfd_mmio_mmap(struct kfd_dev *dev, struct kfd_process *process, static int kfd_mmap(struct file *filp, struct vm_area_struct *vma) { struct kfd_process *process; - struct kfd_dev *dev = NULL; + struct kfd_node *dev = NULL; unsigned long mmap_offset; unsigned int gpu_id; -- cgit From 3c8bdb51be0e895010da62dfa173bb1227ff3b6f Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Mon, 9 May 2022 21:50:43 -0400 Subject: drm/amdkfd: Add PM4 target XCC In a device that supports multiple XCCs, unlike AQL queues, the PM4 queue will be only processed in one XCC in the partitioning. This patch re-purposes the queue percentage variable in create queue and update queue ioctl for the user space to specify the target XCC. Signed-off-by: Amber Lin Signed-off-by: Mukul Joshi Tested-by: Amber Lin Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_chardev.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index eb0b0b38f10e..45e8da125f70 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -186,7 +186,12 @@ static int kfd_ioctl_get_version(struct file *filep, struct kfd_process *p, static int set_queue_properties_from_user(struct queue_properties *q_properties, struct kfd_ioctl_create_queue_args *args) { - if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) { + /* + * Repurpose queue percentage to accommodate new features: + * bit 0-7: queue percentage + * bit 8-15: pm4_target_xcc + */ + if ((args->queue_percentage & 0xFF) > KFD_MAX_QUEUE_PERCENTAGE) { pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); return -EINVAL; } @@ -236,7 +241,9 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, q_properties->is_interop = false; q_properties->is_gws = false; - q_properties->queue_percent = args->queue_percentage; + q_properties->queue_percent = args->queue_percentage & 0xFF; + /* bit 8-15 are repurposed to be PM4 target XCC */ + q_properties->pm4_target_xcc = (args->queue_percentage >> 8) & 0xFF; q_properties->priority = args->queue_priority; q_properties->queue_address = args->ring_base_address; q_properties->queue_size = args->ring_size; @@ -442,7 +449,12 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, struct kfd_ioctl_update_queue_args *args = data; struct queue_properties properties; - if (args->queue_percentage > KFD_MAX_QUEUE_PERCENTAGE) { + /* + * Repurpose queue percentage to accommodate new features: + * bit 0-7: queue percentage + * bit 8-15: pm4_target_xcc + */ + if ((args->queue_percentage & 0xFF) > KFD_MAX_QUEUE_PERCENTAGE) { pr_err("Queue percentage must be between 0 to KFD_MAX_QUEUE_PERCENTAGE\n"); return -EINVAL; } @@ -466,7 +478,9 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p, properties.queue_address = args->ring_base_address; properties.queue_size = args->ring_size; - properties.queue_percent = args->queue_percentage; + properties.queue_percent = args->queue_percentage & 0xFF; + /* bit 8-15 are repurposed to be PM4 target XCC */ + properties.pm4_target_xcc = (args->queue_percentage >> 8) & 0xFF; properties.priority = args->queue_priority; pr_debug("Updating queue id %d for pasid 0x%x\n", -- cgit From fe1f05df5919c67c3add49efb55e251a8d78ee4e Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Tue, 31 May 2022 14:39:36 -0400 Subject: drm/amdkfd: Rework kfd_locked handling Currently, even if kfd_locked is set, a process is first created and then removed to work around a race condition in updating kfd_locked flag. Rework kfd_locked handling to ensure no processes is created if kfd_locked is set. This is achieved by updating kfd_locked under kfd_processes_mutex. With this there is no need for kfd_locked to be an atomic counter. Instead, it can be a regular integer. Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_chardev.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 45e8da125f70..8b9accecf49b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -146,13 +146,6 @@ static int kfd_open(struct inode *inode, struct file *filep) if (IS_ERR(process)) return PTR_ERR(process); - if (kfd_is_locked()) { - dev_dbg(kfd_device, "kfd is locked!\n" - "process %d unreferenced", process->pasid); - kfd_unref_process(process); - return -EAGAIN; - } - /* filep now owns the reference returned by kfd_create_process */ filep->private_data = process; -- cgit From 228ce176434b0f61451019065393040d58e1668d Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Fri, 27 Jan 2023 21:57:00 -0500 Subject: drm/amdgpu: Handle VRAM dependencies on GFXIP9.4.3 [For 1P NPS1 mode driver bringup] Changes required to initialize the amdgpu driver with frontdoor firmware loading and discovery=2 with the native mode SBIOS that enables CPU GPU unified interleaved memory. sudo modprobe amdgpu discovery=2 Once PSP TMR region is reported via the ACPI interface, the dependency on the ip_discovery.bin will be removed. Choice of where to allocate driver table is given to each IP version. In general, both GTT and VRAM domains will be considered. If one of the tables has a strict restriction for VRAM domain, then only VRAM domain is considered. Reviewed-by: Felix Kuehling (lijo: Modified the handling for SMU Tables) Signed-off-by: Lijo Lazar Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_chardev.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 8b9accecf49b..f85ac4dbc673 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1026,6 +1026,12 @@ bool kfd_dev_is_large_bar(struct kfd_node *dev) if (dev->kfd->local_mem_info.local_mem_size_private == 0 && dev->kfd->local_mem_info.local_mem_size_public > 0) return true; + + if (dev->kfd->local_mem_info.local_mem_size_public == 0 && dev->kfd->adev->gmc.is_app_apu) { + pr_debug("APP APU, Consider like a large bar system\n"); + return true; + } + return false; } -- cgit From 2fa9ff25de08e598af051c76b216d2f073b2ee89 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 9 Mar 2023 19:30:02 -0500 Subject: drm/amdgpu: KFD graphics interop support compute partition kfd_ioctl_get_dmabuf use the amdgpu bo xcp_id to get the gpu_id of the KFD node from the exported dmabuf_adev, and then create kfd bo on the correct adev and KFD node when importing the amdgpu bo to KFD. Remove function kfd_device_by_adev, it is not needed as it is the same result as dmabuf_adev->kfd.dev->nodes[0]->id. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_chardev.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index f85ac4dbc673..fcad90d53c9b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1499,6 +1499,7 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep, struct amdgpu_device *dmabuf_adev; void *metadata_buffer = NULL; uint32_t flags; + int8_t xcp_id; unsigned int i; int r; @@ -1519,17 +1520,14 @@ static int kfd_ioctl_get_dmabuf_info(struct file *filep, r = amdgpu_amdkfd_get_dmabuf_info(dev->adev, args->dmabuf_fd, &dmabuf_adev, &args->size, metadata_buffer, args->metadata_size, - &args->metadata_size, &flags); + &args->metadata_size, &flags, &xcp_id); if (r) goto exit; - /* Reverse-lookup gpu_id from kgd pointer */ - dev = kfd_device_by_adev(dmabuf_adev); - if (!dev) { - r = -EINVAL; - goto exit; - } - args->gpu_id = dev->id; + if (xcp_id >= 0) + args->gpu_id = dmabuf_adev->kfd.dev->nodes[xcp_id]->id; + else + args->gpu_id = dmabuf_adev->kfd.dev->nodes[0]->id; args->flags = flags; /* Copy metadata buffer to user mode */ -- cgit From 315e29eca57f85107cc6f687c2d510aa532fb3f0 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Mon, 20 Mar 2023 11:21:38 -0400 Subject: drm/amdkfd: Move local_mem_info to kfd_node We need to track memory usage on a per partition basis. To do that, store the local memory information in KFD node instead of kfd device. v2: squash in fix ("amdkfd: Use mem_id to access mem_partition info") Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_chardev.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index fcad90d53c9b..1ae867482bc7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1023,11 +1023,12 @@ bool kfd_dev_is_large_bar(struct kfd_node *dev) if (dev->kfd->use_iommu_v2) return false; - if (dev->kfd->local_mem_info.local_mem_size_private == 0 && - dev->kfd->local_mem_info.local_mem_size_public > 0) + if (dev->local_mem_info.local_mem_size_private == 0 && + dev->local_mem_info.local_mem_size_public > 0) return true; - if (dev->kfd->local_mem_info.local_mem_size_public == 0 && dev->kfd->adev->gmc.is_app_apu) { + if (dev->local_mem_info.local_mem_size_public == 0 && + dev->kfd->adev->gmc.is_app_apu) { pr_debug("APP APU, Consider like a large bar system\n"); return true; } -- cgit From 1c77527a69d5ca19cb276e2728992d922b687f35 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Mon, 20 Mar 2023 11:22:30 -0400 Subject: drm/amdkfd: Fix memory reporting on GFX 9.4.3 This patch fixes memory reporting on the GFX 9.4.3 APU and dGPU by reporting available memory on a per partition basis. If its an APU, available and used memory calculations take into account system and TTM memory. v2: squash in fix ("drm/amdkfd: Fix array out of bound warning") squash in fix ("drm/amdgpu: Update memory reporting for GFX9.4.3") Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_chardev.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 1ae867482bc7..a9efff94390b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1044,7 +1044,8 @@ static int kfd_ioctl_get_available_memory(struct file *filep, if (!pdd) return -EINVAL; - args->available = amdgpu_amdkfd_get_available_memory(pdd->dev->adev); + args->available = amdgpu_amdkfd_get_available_memory(pdd->dev->adev, + pdd->dev->node_id); kfd_unlock_pdd(pdd); return 0; } -- cgit From 4f98cf2baf9faee5b6f2f7889dad7c0f7686a787 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Wed, 2 Mar 2022 14:30:12 -0500 Subject: drm/amdkfd: add debug and runtime enable interface Introduce the GPU debug operations interface. For ROCm-GDB to extend the GNU Debugger's ability to inspect the AMD GPU instruction set, provide the necessary interface to allow the debugger to HW debug-mode set and query exceptions per HSA queue, process or device. The runtime_enable interface coordinates exception handling with the HSA runtime. Usage is available in the kern docs at uapi/linux/kfd_ioctl.h. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 48 ++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_chardev.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index a9efff94390b..00e34125987c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -2729,6 +2729,48 @@ static int kfd_ioctl_criu(struct file *filep, struct kfd_process *p, void *data) return ret; } +static int kfd_ioctl_runtime_enable(struct file *filep, struct kfd_process *p, void *data) +{ + return 0; +} + +static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, void *data) +{ + struct kfd_ioctl_dbg_trap_args *args = data; + int r = 0; + + if (sched_policy == KFD_SCHED_POLICY_NO_HWS) { + pr_err("Debugging does not support sched_policy %i", sched_policy); + return -EINVAL; + } + + switch (args->op) { + case KFD_IOC_DBG_TRAP_ENABLE: + case KFD_IOC_DBG_TRAP_DISABLE: + case KFD_IOC_DBG_TRAP_SEND_RUNTIME_EVENT: + case KFD_IOC_DBG_TRAP_SET_EXCEPTIONS_ENABLED: + case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE: + case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE: + case KFD_IOC_DBG_TRAP_SUSPEND_QUEUES: + case KFD_IOC_DBG_TRAP_RESUME_QUEUES: + case KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH: + case KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH: + case KFD_IOC_DBG_TRAP_SET_FLAGS: + case KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT: + case KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO: + case KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT: + case KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT: + pr_warn("Debugging not supported yet\n"); + r = -EACCES; + break; + default: + pr_err("Invalid option: %i\n", args->op); + r = -EINVAL; + } + + return r; +} + #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ .cmd_drv = 0, .name = #ioctl} @@ -2841,6 +2883,12 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_EXPORT_DMABUF, kfd_ioctl_export_dmabuf, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_RUNTIME_ENABLE, + kfd_ioctl_runtime_enable, 0), + + AMDKFD_IOCTL_DEF(AMDKFD_IOC_DBG_TRAP, + kfd_ioctl_set_debug_trap, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) -- cgit From 0ab2d7532b05a3e7c06fd3b0c8bd6b46c1dfb508 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Fri, 25 Mar 2022 14:55:30 -0400 Subject: drm/amdkfd: prepare per-process debug enable and disable The ROCm debugger will attach to a process to debug by PTRACE and will expect the KFD to prepare a process for the target PID, whether the target PID has opened the KFD device or not. This patch is to explicity handle this requirement. Further HW mode setting and runtime coordination requirements will be handled in following patches. In the case where the target process has not opened the KFD device, a new KFD process must be created for the target PID. The debugger as well as the target process for this case will have not acquired any VMs so handle process restoration to correctly account for this. To coordinate with HSA runtime, the debugger must be aware of the target process' runtime enablement status and will copy the runtime status information into the debugged KFD process for later query. On enablement, the debugger will subscribe to a set of exceptions where each exception events will notify the debugger through a pollable FIFO file descriptor that the debugger provides to the KFD to manage. Finally on process termination of either the debugger or the target, debugging must be disabled if it has not been done so. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 102 ++++++++++++++++++++++++++++++- 1 file changed, 100 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_chardev.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 00e34125987c..ee086a0a46df 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -44,6 +44,7 @@ #include "amdgpu_amdkfd.h" #include "kfd_smi_events.h" #include "amdgpu_dma_buf.h" +#include "kfd_debug.h" static long kfd_ioctl(struct file *, unsigned int, unsigned long); static int kfd_open(struct inode *, struct file *); @@ -142,10 +143,15 @@ static int kfd_open(struct inode *inode, struct file *filep) return -EPERM; } - process = kfd_create_process(filep); + process = kfd_create_process(current); if (IS_ERR(process)) return PTR_ERR(process); + if (kfd_process_init_cwsr_apu(process, filep)) { + kfd_unref_process(process); + return -EFAULT; + } + /* filep now owns the reference returned by kfd_create_process */ filep->private_data = process; @@ -2737,6 +2743,10 @@ static int kfd_ioctl_runtime_enable(struct file *filep, struct kfd_process *p, v static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, void *data) { struct kfd_ioctl_dbg_trap_args *args = data; + struct task_struct *thread = NULL; + struct mm_struct *mm = NULL; + struct pid *pid = NULL; + struct kfd_process *target = NULL; int r = 0; if (sched_policy == KFD_SCHED_POLICY_NO_HWS) { @@ -2744,9 +2754,81 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v return -EINVAL; } + pid = find_get_pid(args->pid); + if (!pid) { + pr_debug("Cannot find pid info for %i\n", args->pid); + r = -ESRCH; + goto out; + } + + thread = get_pid_task(pid, PIDTYPE_PID); + if (!thread) { + r = -ESRCH; + goto out; + } + + mm = get_task_mm(thread); + if (!mm) { + r = -ESRCH; + goto out; + } + + if (args->op == KFD_IOC_DBG_TRAP_ENABLE) { + bool create_process; + + rcu_read_lock(); + create_process = thread && thread != current && ptrace_parent(thread) == current; + rcu_read_unlock(); + + target = create_process ? kfd_create_process(thread) : + kfd_lookup_process_by_pid(pid); + } else { + target = kfd_lookup_process_by_pid(pid); + } + + if (!target) { + pr_debug("Cannot find process PID %i to debug\n", args->pid); + r = -ESRCH; + goto out; + } + + /* Check if target is still PTRACED. */ + rcu_read_lock(); + if (target != p && args->op != KFD_IOC_DBG_TRAP_DISABLE + && ptrace_parent(target->lead_thread) != current) { + pr_err("PID %i is not PTRACED and cannot be debugged\n", args->pid); + r = -EPERM; + } + rcu_read_unlock(); + + if (r) + goto out; + + mutex_lock(&target->mutex); + + if (args->op != KFD_IOC_DBG_TRAP_ENABLE && !target->debug_trap_enabled) { + pr_err("PID %i not debug enabled for op %i\n", args->pid, args->op); + r = -EINVAL; + goto unlock_out; + } + switch (args->op) { case KFD_IOC_DBG_TRAP_ENABLE: + if (target != p) + target->debugger_process = p; + + r = kfd_dbg_trap_enable(target, + args->enable.dbg_fd, + (void __user *)args->enable.rinfo_ptr, + &args->enable.rinfo_size); + if (!r) + target->exception_enable_mask = args->enable.exception_mask; + + pr_warn("Debug functions limited\n"); + break; case KFD_IOC_DBG_TRAP_DISABLE: + r = kfd_dbg_trap_disable(target); + break; case KFD_IOC_DBG_TRAP_SEND_RUNTIME_EVENT: case KFD_IOC_DBG_TRAP_SET_EXCEPTIONS_ENABLED: case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE: @@ -2760,7 +2842,7 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v case KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO: case KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT: case KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT: - pr_warn("Debugging not supported yet\n"); + pr_warn("Debug op %i not supported yet\n", args->op); r = -EACCES; break; default: @@ -2768,6 +2850,22 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v r = -EINVAL; } +unlock_out: + mutex_unlock(&target->mutex); + +out: + if (thread) + put_task_struct(thread); + + if (mm) + mmput(mm); + + if (pid) + put_pid(pid); + + if (target) + kfd_unref_process(target); + return r; } -- cgit From 218895820e6fccade42a7c3ab9c0a44dec0a1ebc Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Tue, 5 Apr 2022 12:34:55 -0400 Subject: drm/amdkfd: add per process hw trap enable and disable functions To enable HW debug mode per process, all devices must be debug enabled successfully. If a failure occures, rewind the enablement of debug mode on the enabled devices. A power management scenario that needs to be considered is HW debug mode setting during GFXOFF. During GFXOFF, these registers will be unreachable so we have to transiently disable GFXOFF when setting. Also, some devices don't support the RLC save restore function for these debug registers so we have to disable GFXOFF completely during a debug session. Cooperative launch also has debugging restriction based on HW/FW bugs. If such bugs exists, the debugger cannot attach to a process that uses GWS resources nor can GWS resources be requested if a process is being debugged. Multi-process debug devices can only enable trap temporaries based on certain runtime scenerios, which will be explained when the runtime enable functions are implemented in a follow up patch. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_chardev.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index ee086a0a46df..826a99acb6fb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1488,6 +1488,11 @@ static int kfd_ioctl_alloc_queue_gws(struct file *filep, goto out_unlock; } + if (!kfd_dbg_has_gws_support(dev) && p->debug_trap_enabled) { + retval = -EBUSY; + goto out_unlock; + } + retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL); mutex_unlock(&p->mutex); -- cgit From 69a8c3ae2dea84a6d571e4c1aad306f630f3ccfd Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Thu, 1 Sep 2022 11:27:15 -0400 Subject: drm/amdkfd: apply trap workaround for gfx11 Due to a HW bug, waves in only half the shader arrays can enter trap. When starting a debug session, relocate all waves to the first shader array of each shader engine and mask off the 2nd shader array as unavailable. When ending a debug session, re-enable the 2nd shader array per shader engine. User CU masking per queue cannot be guaranteed to remain functional if requested during debugging (e.g. user cu mask requests only 2nd shader array as an available resource leading to zero HW resources available) nor can runtime be alerted of any of these changes during execution. Make user CU masking and debugging mutual exclusive with respect to availability. If the debugger tries to attach to a process with a user cu masked queue, return the runtime status as enabled but busy. If the debugger tries to attach and fails to reallocate queue waves to the first shader array of each shader engine, return the runtime status as enabled but with an error. In addition, like any other mutli-process debug supported devices, disable trap temporary setup per-process to avoid performance impact from setup overhead. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_chardev.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 826a99acb6fb..d4df424e4514 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -537,8 +537,6 @@ static int kfd_ioctl_set_cu_mask(struct file *filp, struct kfd_process *p, goto out; } - minfo.update_flag = UPDATE_FLAG_CU_MASK; - mutex_lock(&p->mutex); retval = pqm_update_mqd(&p->pqm, args->queue_id, &minfo); -- cgit From c2d2588c702364ff53916ddd97e2b26fd4f4a317 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Fri, 8 Apr 2022 12:49:48 -0400 Subject: drm/amdkfd: add send exception operation Add a debug operation that allows the debugger to send an exception directly to runtime through a payload address. For memory violations, normal vmfault signals will be applied to notify runtime instead after passing in the saved exception data when a memory violation was raised to the debugger. For runtime exceptions, this will unblock the runtime enable function which will be explained and implemented in a follow up patch. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_chardev.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index d4df424e4514..5e57b3e96ff9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -2833,6 +2833,11 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v r = kfd_dbg_trap_disable(target); break; case KFD_IOC_DBG_TRAP_SEND_RUNTIME_EVENT: + r = kfd_dbg_send_exception_to_runtime(target, + args->send_runtime_event.gpu_id, + args->send_runtime_event.queue_id, + args->send_runtime_event.exception_mask); + break; case KFD_IOC_DBG_TRAP_SET_EXCEPTIONS_ENABLED: case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE: case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE: -- cgit From 455227c4642c5e1867213cea73a527e431779060 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Fri, 8 Apr 2022 13:12:24 -0400 Subject: drm/amdkfd: add runtime enable operation The debugger can attach to a process prior to HSA enablement (i.e. inferior is spawned by the debugger and attached to immediately before target process has been enabled for HSA dispatches) or it can attach to a running target that is already HSA enabled. Either way, the debugger needs to know the enablement status to know when it can inspect queues. For the scenario where the debugger spawns the target process, it will have to wait for ROCr's runtime enable request from the target. The runtime enable request will be able to see that its process has been debug attached. ROCr raises an EC_PROCESS_RUNTIME signal to the debugger then blocks the target process while waiting the debugger's response. Once the debugger has received the runtime signal, it will unblock the target process. For the scenario where the debugger attaches to a running target process, ROCr will set the target process' runtime status as enabled so that on an attach request, the debugger will be able to see this status and will continue with debug enablement as normal. A secondary requirement is to conditionally enable the trap tempories only if the user requests it (env var HSA_ENABLE_DEBUG=1) or if the debugger attaches with HSA runtime enabled. This is because setting up the trap temporaries incurs a performance overhead that is unacceptable for microbench performance in normal mode for certain customers. In the scenario where the debugger spawns the target process, when ROCr detects that the debugger has attached during the runtime enable request, it will enable the trap temporaries before it blocks the target process while waiting for the debugger to respond. In the scenario where the debugger attaches to a running target process, it will enable to trap temporaries itself. Finally, there is an additional restriction that is required to be enforced with runtime enable and HW debug mode setting. The debugger must first ensure that HW debug mode has been enabled before permitting HW debug mode operations. With single process debug devices, allowing the debugger to set debug HW modes prior to trap activation means that debug HW mode setting can occur before the KFD has reserved the debug VMID (0xf) from the hardware scheduler's VMID allocation resource pool. This can result in the hardware scheduler assigning VMID 0xf to a non-debugged process and having that process inherit debug HW mode settings intended for the debugged target process instead, which is both incorrect and potentially fatal for normal mode operation. With multi process debug devices, allowing the debugger to set debug HW modes prior to trap activation means that non-debugged processes migrating to a new VMID could inherit unintended debug settings. All debug operations that touch HW settings must require trap activation where trap activation is triggered by both debug attach and runtime enablement (target has KFD opened and is ready to dispatch work). Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 143 ++++++++++++++++++++++++++++++- 1 file changed, 142 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_chardev.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 5e57b3e96ff9..615fa9ab36b7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -2738,11 +2738,140 @@ static int kfd_ioctl_criu(struct file *filep, struct kfd_process *p, void *data) return ret; } -static int kfd_ioctl_runtime_enable(struct file *filep, struct kfd_process *p, void *data) +static int runtime_enable(struct kfd_process *p, uint64_t r_debug, + bool enable_ttmp_setup) +{ + int i = 0, ret = 0; + + if (p->is_runtime_retry) + goto retry; + + if (p->runtime_info.runtime_state != DEBUG_RUNTIME_STATE_DISABLED) + return -EBUSY; + + for (i = 0; i < p->n_pdds; i++) { + struct kfd_process_device *pdd = p->pdds[i]; + + if (pdd->qpd.queue_count) + return -EEXIST; + } + + p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_ENABLED; + p->runtime_info.r_debug = r_debug; + p->runtime_info.ttmp_setup = enable_ttmp_setup; + + if (p->runtime_info.ttmp_setup) { + for (i = 0; i < p->n_pdds; i++) { + struct kfd_process_device *pdd = p->pdds[i]; + + if (!kfd_dbg_is_rlc_restore_supported(pdd->dev)) { + amdgpu_gfx_off_ctrl(pdd->dev->adev, false); + pdd->dev->kfd2kgd->enable_debug_trap( + pdd->dev->adev, + true, + pdd->dev->vm_info.last_vmid_kfd); + } else if (kfd_dbg_is_per_vmid_supported(pdd->dev)) { + pdd->spi_dbg_override = pdd->dev->kfd2kgd->enable_debug_trap( + pdd->dev->adev, + false, + 0); + } + } + } + +retry: + if (p->debug_trap_enabled) { + if (!p->is_runtime_retry) { + kfd_dbg_trap_activate(p); + kfd_dbg_ev_raise(KFD_EC_MASK(EC_PROCESS_RUNTIME), + p, NULL, 0, false, NULL, 0); + } + + mutex_unlock(&p->mutex); + ret = down_interruptible(&p->runtime_enable_sema); + mutex_lock(&p->mutex); + + p->is_runtime_retry = !!ret; + } + + return ret; +} + +static int runtime_disable(struct kfd_process *p) { + int i = 0, ret; + bool was_enabled = p->runtime_info.runtime_state == DEBUG_RUNTIME_STATE_ENABLED; + + p->runtime_info.runtime_state = DEBUG_RUNTIME_STATE_DISABLED; + p->runtime_info.r_debug = 0; + + if (p->debug_trap_enabled) { + if (was_enabled) + kfd_dbg_trap_deactivate(p, false, 0); + + if (!p->is_runtime_retry) + kfd_dbg_ev_raise(KFD_EC_MASK(EC_PROCESS_RUNTIME), + p, NULL, 0, false, NULL, 0); + + mutex_unlock(&p->mutex); + ret = down_interruptible(&p->runtime_enable_sema); + mutex_lock(&p->mutex); + + p->is_runtime_retry = !!ret; + if (ret) + return ret; + } + + if (was_enabled && p->runtime_info.ttmp_setup) { + for (i = 0; i < p->n_pdds; i++) { + struct kfd_process_device *pdd = p->pdds[i]; + + if (!kfd_dbg_is_rlc_restore_supported(pdd->dev)) + amdgpu_gfx_off_ctrl(pdd->dev->adev, true); + } + } + + p->runtime_info.ttmp_setup = false; + + /* disable ttmp setup */ + for (i = 0; i < p->n_pdds; i++) { + struct kfd_process_device *pdd = p->pdds[i]; + + if (kfd_dbg_is_per_vmid_supported(pdd->dev)) { + pdd->spi_dbg_override = + pdd->dev->kfd2kgd->disable_debug_trap( + pdd->dev->adev, + false, + pdd->dev->vm_info.last_vmid_kfd); + + if (!pdd->dev->kfd->shared_resources.enable_mes) + debug_refresh_runlist(pdd->dev->dqm); + else + kfd_dbg_set_mes_debug_mode(pdd); + } + } + return 0; } +static int kfd_ioctl_runtime_enable(struct file *filep, struct kfd_process *p, void *data) +{ + struct kfd_ioctl_runtime_enable_args *args = data; + int r; + + mutex_lock(&p->mutex); + + if (args->mode_mask & KFD_RUNTIME_ENABLE_MODE_ENABLE_MASK) + r = runtime_enable(p, args->r_debug, + !!(args->mode_mask & KFD_RUNTIME_ENABLE_MODE_TTMP_SAVE_MASK)); + else + r = runtime_disable(p); + + mutex_unlock(&p->mutex); + + return r; +} + static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, void *data) { struct kfd_ioctl_dbg_trap_args *args = data; @@ -2815,6 +2944,18 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v goto unlock_out; } + if (target->runtime_info.runtime_state != DEBUG_RUNTIME_STATE_ENABLED && + (args->op == KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE || + args->op == KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE || + args->op == KFD_IOC_DBG_TRAP_SUSPEND_QUEUES || + args->op == KFD_IOC_DBG_TRAP_RESUME_QUEUES || + args->op == KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH || + args->op == KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH || + args->op == KFD_IOC_DBG_TRAP_SET_FLAGS)) { + r = -EPERM; + goto unlock_out; + } + switch (args->op) { case KFD_IOC_DBG_TRAP_ENABLE: if (target != p) -- cgit From e90bf919f714ae2a658cdfd03238e7be9ce9185c Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Wed, 27 Apr 2022 10:24:37 -0400 Subject: drm/amdkfd: add debug set exceptions enabled operation The debugger subscibes to nofication for requested exceptions on attach. Allow the debugger to change its subsciption later on. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_chardev.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 615fa9ab36b7..dcf4b5f3886e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -2980,6 +2980,9 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v args->send_runtime_event.exception_mask); break; case KFD_IOC_DBG_TRAP_SET_EXCEPTIONS_ENABLED: + kfd_dbg_set_enabled_debug_exception_mask(target, + args->set_exceptions_enabled.exception_mask); + break; case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE: case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE: case KFD_IOC_DBG_TRAP_SUSPEND_QUEUES: -- cgit From 101827e13026a981e887527620fe9710adc0e481 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Wed, 27 Apr 2022 13:18:10 -0400 Subject: drm/amdkfd: add debug wave launch override operation This operation allows the debugger to override the enabled HW exceptions on the device. On debug devices that only support the debugging of a single process, the HW exceptions are global and set through the SPI_GDBG_TRAP_MASK register. Because they are global, only address watch exceptions are allowed to be enabled. In other words, the debugger must preserve all non-address watch exception states in normal mode operation by barring a full replacement override or a non-address watch override request. For multi-process debugging, all HW exception overrides are per-VMID so all exceptions can be overridden or fully replaced. In order for the debugger to know what is permissible, returned the supported override mask back to the debugger along with the previously enable overrides. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_chardev.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index dcf4b5f3886e..7fa249807671 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -2984,6 +2984,13 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v args->set_exceptions_enabled.exception_mask); break; case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_OVERRIDE: + r = kfd_dbg_trap_set_wave_launch_override(target, + args->launch_override.override_mode, + args->launch_override.enable_mask, + args->launch_override.support_request_mask, + &args->launch_override.enable_mask, + &args->launch_override.support_request_mask); + break; case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE: case KFD_IOC_DBG_TRAP_SUSPEND_QUEUES: case KFD_IOC_DBG_TRAP_RESUME_QUEUES: -- cgit From aea1b4738bebd8092bd437ce0b03aa9587fc20a7 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Mon, 2 May 2022 11:45:05 -0400 Subject: drm/amdkfd: add debug wave launch mode operation Allow the debugger to set wave behaviour on to either normally operate, halt at launch, trap on every instruction, terminate immediately or stall on allocation. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_chardev.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 7fa249807671..a6570b124b2b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -2992,6 +2992,9 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v &args->launch_override.support_request_mask); break; case KFD_IOC_DBG_TRAP_SET_WAVE_LAUNCH_MODE: + r = kfd_dbg_trap_set_wave_launch_mode(target, + args->launch_mode.launch_mode); + break; case KFD_IOC_DBG_TRAP_SUSPEND_QUEUES: case KFD_IOC_DBG_TRAP_RESUME_QUEUES: case KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH: -- cgit From a70a93fa568b4f05aba548dadb673703eccf5480 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Thu, 5 May 2022 16:15:37 -0400 Subject: drm/amdkfd: add debug suspend and resume process queues operation In order to inspect waves from the saved context at any point during a debug session, the debugger must be able to preempt queues to trigger context save by suspending them. On queue suspend, the KFD will copy the context save header information so that the debugger can correctly crawl the appropriate size of the saved context. The debugger must then also be allowed to resume suspended queues. A queue that is newly created cannot be suspended because queue ids are recycled after destruction so the debugger needs to know that this has occurred. Query functions will be later added that will clear a given queue of its new queue status. A queue cannot be destroyed while it is suspended to preserve its saved context during debugger inspection. Have queue destruction block while a queue is suspended and unblocked when it is resumed. Likewise, if a queue is about to be destroyed, it cannot be suspended. Return the number of queues successfully suspended or resumed along with a per queue status array where the upper bits per queue status show that the request was invalid (new/destroyed queue suspend request, missing queue) or an error occurred (HWS in a fatal state so it can't suspend or resume queues). Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_chardev.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index a6570b124b2b..1fae97df7a1e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -410,6 +410,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, pr_debug("Write ptr address == 0x%016llX\n", args->write_pointer_address); + kfd_dbg_ev_raise(KFD_EC_MASK(EC_QUEUE_NEW), p, dev, queue_id, false, NULL, 0); return 0; err_create_queue: @@ -2996,7 +2997,17 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v args->launch_mode.launch_mode); break; case KFD_IOC_DBG_TRAP_SUSPEND_QUEUES: + r = suspend_queues(target, + args->suspend_queues.num_queues, + args->suspend_queues.grace_period, + args->suspend_queues.exception_mask, + (uint32_t *)args->suspend_queues.queue_array_ptr); + + break; case KFD_IOC_DBG_TRAP_RESUME_QUEUES: + r = resume_queues(target, args->resume_queues.num_queues, + (uint32_t *)args->resume_queues.queue_array_ptr); + break; case KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH: case KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH: case KFD_IOC_DBG_TRAP_SET_FLAGS: -- cgit From e0f85f4690d089cc1a60337decafb1acf7eec45e Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Fri, 6 May 2022 14:58:55 -0400 Subject: drm/amdkfd: add debug set and clear address watch points operation Shader read, write and atomic memory operations can be alerted to the debugger as an address watch exception. Allow the debugger to pass in a watch point to a particular memory address per device. Note that there exists only 4 watch points per devices to date, so have the KFD keep track of what watch points are allocated or not. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_chardev.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 1fae97df7a1e..016724c82928 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -2880,6 +2880,7 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v struct mm_struct *mm = NULL; struct pid *pid = NULL; struct kfd_process *target = NULL; + struct kfd_process_device *pdd = NULL; int r = 0; if (sched_policy == KFD_SCHED_POLICY_NO_HWS) { @@ -2957,6 +2958,20 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v goto unlock_out; } + if (args->op == KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH || + args->op == KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH) { + int user_gpu_id = kfd_process_get_user_gpu_id(target, + args->op == KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH ? + args->set_node_address_watch.gpu_id : + args->clear_node_address_watch.gpu_id); + + pdd = kfd_process_device_data_by_id(target, user_gpu_id); + if (user_gpu_id == -EINVAL || !pdd) { + r = -ENODEV; + goto unlock_out; + } + } + switch (args->op) { case KFD_IOC_DBG_TRAP_ENABLE: if (target != p) @@ -3009,7 +3024,16 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v (uint32_t *)args->resume_queues.queue_array_ptr); break; case KFD_IOC_DBG_TRAP_SET_NODE_ADDRESS_WATCH: + r = kfd_dbg_trap_set_dev_address_watch(pdd, + args->set_node_address_watch.address, + args->set_node_address_watch.mask, + &args->set_node_address_watch.id, + args->set_node_address_watch.mode); + break; case KFD_IOC_DBG_TRAP_CLEAR_NODE_ADDRESS_WATCH: + r = kfd_dbg_trap_clear_dev_address_watch(pdd, + args->clear_node_address_watch.id); + break; case KFD_IOC_DBG_TRAP_SET_FLAGS: case KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT: case KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO: -- cgit From 103d5f08ff42b666c61c350be2c3e724c1646918 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Mon, 9 May 2022 10:51:56 -0400 Subject: drm/amdkfd: add debug set flags operation Allow the debugger to set single memory and single ALU operations. Some exceptions are imprecise (memory violations, address watch) in the sense that a trap occurs only when the exception interrupt occurs and not at the non-halting faulty instruction. Trap temporaries 0 & 1 save the program counter address, which means that these values will not point to the faulty instruction address but to whenever the interrupt was raised. Setting the Single Memory Operations flag will inject an automatic wait on every memory operation instruction forcing imprecise memory exceptions to become precise at the cost of performance. This setting is not permitted on debug devices that support only a global setting of this option. Return the previous set flags to the debugger as well. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_chardev.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 016724c82928..5ee38614ed9b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -3035,6 +3035,8 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v args->clear_node_address_watch.id); break; case KFD_IOC_DBG_TRAP_SET_FLAGS: + r = kfd_dbg_trap_set_flags(target, &args->set_flags.flags); + break; case KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT: case KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO: case KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT: -- cgit From 5bc20c224bcb863571e8831cdbba23cd61b10ac3 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Mon, 9 May 2022 11:10:32 -0400 Subject: drm/amdkfd: add debug query event operation Allow the debugger to query a single queue, device and process exception. The KFD should also return the GPU or Queue id of the exception. The debugger also has the option of clearing exceptions after being queried. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_chardev.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 5ee38614ed9b..498859259b55 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -3038,6 +3038,12 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v r = kfd_dbg_trap_set_flags(target, &args->set_flags.flags); break; case KFD_IOC_DBG_TRAP_QUERY_DEBUG_EVENT: + r = kfd_dbg_ev_query_debug_event(target, + &args->query_debug_event.queue_id, + &args->query_debug_event.gpu_id, + args->query_debug_event.exception_mask, + &args->query_debug_event.exception_mask); + break; case KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO: case KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT: case KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT: -- cgit From 2b36de971d25daa2ad287114ae3ca11a8f8d49d7 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Mon, 9 May 2022 13:37:36 -0400 Subject: drm/amdkfd: add debug query exception info operation Allow the debugger to query additional info based on an exception code. For device exceptions, it's currently only memory violation information. For process exceptions, it's currently only runtime information. Queue exception only report the queue exception status. The debugger has the option of clearing the target exception on query. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_chardev.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 498859259b55..b7ee79b5220a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -3045,6 +3045,13 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v &args->query_debug_event.exception_mask); break; case KFD_IOC_DBG_TRAP_QUERY_EXCEPTION_INFO: + r = kfd_dbg_trap_query_exception_info(target, + args->query_exception_info.source_id, + args->query_exception_info.exception_code, + args->query_exception_info.clear_exception, + (void __user *)args->query_exception_info.info_ptr, + &args->query_exception_info.info_size); + break; case KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT: case KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT: pr_warn("Debug op %i not supported yet\n", args->op); -- cgit From b17bd5dbf64677682a3bca249c64521d5eabcb38 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Tue, 10 May 2022 11:15:29 -0400 Subject: drm/amdkfd: add debug queue snapshot operation Allow the debugger to get a snapshot of a specified number of queues containing various queue property information that is copied to the debugger. Since the debugger doesn't know how many queues exist at any given time, allow the debugger to pass the requested number of snapshots as 0 to get the actual number of potential snapshots to use for a subsequent snapshot request for actual information. To prevent future ABI breakage, pass in the requested entry_size. The KFD will return it's own entry_size in case the debugger still wants log the information in a core dump on sizing failure. Also allow the debugger to clear exceptions when doing a snapshot. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_chardev.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index b7ee79b5220a..24066756e478 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -3053,6 +3053,12 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v &args->query_exception_info.info_size); break; case KFD_IOC_DBG_TRAP_GET_QUEUE_SNAPSHOT: + r = pqm_get_queue_snapshot(&target->pqm, + args->queue_snapshot.exception_mask, + (void __user *)args->queue_snapshot.snapshot_buf_ptr, + &args->queue_snapshot.num_queues, + &args->queue_snapshot.entry_size); + break; case KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT: pr_warn("Debug op %i not supported yet\n", args->op); r = -EACCES; -- cgit From 12976e6a5ab8fc3766c0304d72f7eec81a109b55 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Tue, 10 May 2022 12:47:45 -0400 Subject: drm/amdkfd: add debug device snapshot operation Similar to queue snapshot, return an array of device information using an entry_size check and return. Unlike queue snapshots, the debugger needs to pass to correct number of devices that exist. If it fails to do so, the KFD will return the number of actual devices so that the debugger can make a subsequent successful call. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_chardev.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 24066756e478..f54ff5c3387d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -3060,8 +3060,11 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v &args->queue_snapshot.entry_size); break; case KFD_IOC_DBG_TRAP_GET_DEVICE_SNAPSHOT: - pr_warn("Debug op %i not supported yet\n", args->op); - r = -EACCES; + r = kfd_dbg_trap_device_snapshot(target, + args->device_snapshot.exception_mask, + (void __user *)args->device_snapshot.snapshot_buf_ptr, + &args->device_snapshot.num_devices, + &args->device_snapshot.entry_size); break; default: pr_err("Invalid option: %i\n", args->op); -- cgit From a159afdad2f6b97e4d18549cff2b53d17e68a412 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Tue, 10 May 2022 12:51:26 -0400 Subject: drm/amdkfd: bump kfd ioctl minor version for debug api availability Bump the minor version to declare debugging capability is now available. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_chardev.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index f54ff5c3387d..cce2abe12e1b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -2984,7 +2984,6 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v if (!r) target->exception_enable_mask = args->enable.exception_mask; - pr_warn("Debug functions limited\n"); break; case KFD_IOC_DBG_TRAP_DISABLE: r = kfd_dbg_trap_disable(target); -- cgit From 8be295046748432c53a2dee39c469f63c60b0ec3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 6 Jun 2023 19:29:51 -0400 Subject: drm/amdkfd: potential error pointer dereference in ioctl The "target" either comes from kfd_create_process() which returns error pointers on error or kfd_lookup_process_by_pid() which returns NULL on error. So we need to check for both types of errors. Fixes: 0ab2d7532b05 ("drm/amdkfd: prepare per-process debug enable and disable") Signed-off-by: Dan Carpenter Reviewed-by: Jonathan Kim Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_chardev.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index cce2abe12e1b..d655c5bc951f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -2920,9 +2920,9 @@ static int kfd_ioctl_set_debug_trap(struct file *filep, struct kfd_process *p, v target = kfd_lookup_process_by_pid(pid); } - if (!target) { + if (IS_ERR_OR_NULL(target)) { pr_debug("Cannot find process PID %i to debug\n", args->pid); - r = -ESRCH; + r = target ? PTR_ERR(target) : -ESRCH; goto out; } -- cgit