From 8dc1db3172ae2f17ae71e33b608a33411ce8a1aa Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Wed, 14 Sep 2022 16:39:48 +0800 Subject: drm/amdkfd: Introduce kfd_node struct (v5) Introduce a new structure, kfd_node, which will now represent a compute node. kfd_node is carved out of kfd_dev structure. kfd_dev struct now will become the parent of kfd_node, and will store common resources such as doorbells, GTT sub-alloctor etc. kfd_node struct will store all resources specific to a compute node, such as device queue manager, interrupt handling etc. This is the first step in adding compute partition support in KFD. v2: introduce kfd_node struct to gc v11 (Hawking) v3: make reference to kfd_dev struct through kfd_node (Morris) v4: use kfd_node instead for kfd isr/mqd functions (Morris) v5: rebase (Alex) Signed-off-by: Mukul Joshi Tested-by: Amber Lin Reviewed-by: Felix Kuehling Signed-off-by: Hawking Zhang Signed-off-by: Morris Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 54 ++++++++++++++++---------------- 1 file changed, 27 insertions(+), 27 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_process.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 07a9eaf9b7d8..66e021889c64 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -269,7 +269,7 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer) int cu_cnt; int wave_cnt; int max_waves_per_cu; - struct kfd_dev *dev = NULL; + struct kfd_node *dev = NULL; struct kfd_process *proc = NULL; struct kfd_process_device *pdd = NULL; @@ -691,7 +691,7 @@ void kfd_process_destroy_wq(void) static void kfd_process_free_gpuvm(struct kgd_mem *mem, struct kfd_process_device *pdd, void **kptr) { - struct kfd_dev *dev = pdd->dev; + struct kfd_node *dev = pdd->dev; if (kptr && *kptr) { amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem); @@ -713,7 +713,7 @@ static int kfd_process_alloc_gpuvm(struct kfd_process_device *pdd, uint64_t gpu_va, uint32_t size, uint32_t flags, struct kgd_mem **mem, void **kptr) { - struct kfd_dev *kdev = pdd->dev; + struct kfd_node *kdev = pdd->dev; int err; err = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(kdev->adev, gpu_va, size, @@ -982,7 +982,7 @@ static void kfd_process_device_free_bos(struct kfd_process_device *pdd) static void kfd_process_kunmap_signal_bo(struct kfd_process *p) { struct kfd_process_device *pdd; - struct kfd_dev *kdev; + struct kfd_node *kdev; void *mem; kdev = kfd_device_by_id(GET_GPU_ID(p->signal_handle)); @@ -1040,9 +1040,9 @@ static void kfd_process_destroy_pdds(struct kfd_process *p) bitmap_free(pdd->qpd.doorbell_bitmap); idr_destroy(&pdd->alloc_idr); - kfd_free_process_doorbells(pdd->dev, pdd->doorbell_index); + kfd_free_process_doorbells(pdd->dev->kfd, pdd->doorbell_index); - if (pdd->dev->shared_resources.enable_mes) + if (pdd->dev->kfd->shared_resources.enable_mes) amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev, pdd->proc_ctx_bo); /* @@ -1259,10 +1259,10 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep) int i; for (i = 0; i < p->n_pdds; i++) { - struct kfd_dev *dev = p->pdds[i]->dev; + struct kfd_node *dev = p->pdds[i]->dev; struct qcm_process_device *qpd = &p->pdds[i]->qpd; - if (!dev->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base) + if (!dev->kfd->cwsr_enabled || qpd->cwsr_kaddr || qpd->cwsr_base) continue; offset = KFD_MMAP_TYPE_RESERVED_MEM | KFD_MMAP_GPU_ID(dev->id); @@ -1279,7 +1279,7 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep) return err; } - memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size); + memcpy(qpd->cwsr_kaddr, dev->kfd->cwsr_isa, dev->kfd->cwsr_isa_size); qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET; pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n", @@ -1291,7 +1291,7 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep) static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd) { - struct kfd_dev *dev = pdd->dev; + struct kfd_node *dev = pdd->dev; struct qcm_process_device *qpd = &pdd->qpd; uint32_t flags = KFD_IOC_ALLOC_MEM_FLAGS_GTT | KFD_IOC_ALLOC_MEM_FLAGS_NO_SUBSTITUTE @@ -1300,7 +1300,7 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd) void *kaddr; int ret; - if (!dev->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base) + if (!dev->kfd->cwsr_enabled || qpd->cwsr_kaddr || !qpd->cwsr_base) return 0; /* cwsr_base is only set for dGPU */ @@ -1313,7 +1313,7 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd) qpd->cwsr_kaddr = kaddr; qpd->tba_addr = qpd->cwsr_base; - memcpy(qpd->cwsr_kaddr, dev->cwsr_isa, dev->cwsr_isa_size); + memcpy(qpd->cwsr_kaddr, dev->kfd->cwsr_isa, dev->kfd->cwsr_isa_size); qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET; pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n", @@ -1324,10 +1324,10 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd) static void kfd_process_device_destroy_cwsr_dgpu(struct kfd_process_device *pdd) { - struct kfd_dev *dev = pdd->dev; + struct kfd_node *dev = pdd->dev; struct qcm_process_device *qpd = &pdd->qpd; - if (!dev->cwsr_enabled || !qpd->cwsr_kaddr || !qpd->cwsr_base) + if (!dev->kfd->cwsr_enabled || !qpd->cwsr_kaddr || !qpd->cwsr_base) return; kfd_process_free_gpuvm(qpd->cwsr_mem, pdd, &qpd->cwsr_kaddr); @@ -1371,7 +1371,7 @@ bool kfd_process_xnack_mode(struct kfd_process *p, bool supported) * support retry. */ for (i = 0; i < p->n_pdds; i++) { - struct kfd_dev *dev = p->pdds[i]->dev; + struct kfd_node *dev = p->pdds[i]->dev; /* Only consider GFXv9 and higher GPUs. Older GPUs don't * support the SVM APIs and don't need to be considered @@ -1394,7 +1394,7 @@ bool kfd_process_xnack_mode(struct kfd_process *p, bool supported) if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1)) return false; - if (dev->noretry) + if (dev->kfd->noretry) return false; } @@ -1528,7 +1528,7 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd, return 0; } -struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, +struct kfd_process_device *kfd_get_process_device_data(struct kfd_node *dev, struct kfd_process *p) { int i; @@ -1540,7 +1540,7 @@ struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, return NULL; } -struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, +struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev, struct kfd_process *p) { struct kfd_process_device *pdd = NULL; @@ -1552,7 +1552,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, if (!pdd) return NULL; - if (init_doorbell_bitmap(&pdd->qpd, dev)) { + if (init_doorbell_bitmap(&pdd->qpd, dev->kfd)) { pr_err("Failed to init doorbell for process\n"); goto err_free_pdd; } @@ -1573,7 +1573,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, pdd->user_gpu_id = dev->id; atomic64_set(&pdd->evict_duration_counter, 0); - if (dev->shared_resources.enable_mes) { + if (dev->kfd->shared_resources.enable_mes) { retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, AMDGPU_MES_PROC_CTX_SIZE, &pdd->proc_ctx_bo, @@ -1619,7 +1619,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd, struct amdgpu_fpriv *drv_priv; struct amdgpu_vm *avm; struct kfd_process *p; - struct kfd_dev *dev; + struct kfd_node *dev; int ret; if (!drm_file) @@ -1679,7 +1679,7 @@ err_reserve_ib_mem: * * Assumes that the process lock is held. */ -struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, +struct kfd_process_device *kfd_bind_process_to_device(struct kfd_node *dev, struct kfd_process *p) { struct kfd_process_device *pdd; @@ -1811,7 +1811,7 @@ int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger) for (i = 0; i < p->n_pdds; i++) { struct kfd_process_device *pdd = p->pdds[i]; - kfd_smi_event_queue_eviction(pdd->dev, p->lead_thread->pid, + kfd_smi_event_queue_eviction(pdd->dev->kfd, p->lead_thread->pid, trigger); r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm, @@ -1839,7 +1839,7 @@ fail: if (n_evicted == 0) break; - kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid); + kfd_smi_event_queue_restore(pdd->dev->kfd, p->lead_thread->pid); if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm, &pdd->qpd)) @@ -1860,7 +1860,7 @@ int kfd_process_restore_queues(struct kfd_process *p) for (i = 0; i < p->n_pdds; i++) { struct kfd_process_device *pdd = p->pdds[i]; - kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid); + kfd_smi_event_queue_restore(pdd->dev->kfd, p->lead_thread->pid); r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm, &pdd->qpd); @@ -2016,7 +2016,7 @@ int kfd_resume_all_processes(void) return ret; } -int kfd_reserved_mem_mmap(struct kfd_dev *dev, struct kfd_process *process, +int kfd_reserved_mem_mmap(struct kfd_node *dev, struct kfd_process *process, struct vm_area_struct *vma) { struct kfd_process_device *pdd; @@ -2051,7 +2051,7 @@ void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type) { struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv); uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm); - struct kfd_dev *dev = pdd->dev; + struct kfd_node *dev = pdd->dev; /* * It can be that we race and lose here, but that is extremely unlikely -- cgit From e2069a7b0880ccdc6fa6530b6091e47168705425 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Mon, 9 May 2022 22:20:52 -0400 Subject: drm/amdkfd: Add XCC instance to kgd2kfd interface (v3) Gfx 9 starts to have multiple XCC instances in one device. Add instance parameter to kgd2kfd functions where XCC instance was hard coded as 0. Also, update code to pass the correct instance number when running on a multi-XCC setup. v2: introduce the XCC instance to gfx v11 (Morris) v3: rebase (Alex) Signed-off-by: Amber Lin Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Tested-by: Amber Lin Signed-off-by: Morris Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_process.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 66e021889c64..888590dfa646 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -290,7 +290,7 @@ static int kfd_get_cu_occupancy(struct attribute *attr, char *buffer) wave_cnt = 0; max_waves_per_cu = 0; dev->kfd2kgd->get_cu_occupancy(dev->adev, proc->pasid, &wave_cnt, - &max_waves_per_cu); + &max_waves_per_cu, 0); /* Translate wave count to number of compute units */ cu_cnt = (wave_cnt + (max_waves_per_cu - 1)) / max_waves_per_cu; -- cgit From f87f686482c6d2d4465245356854710b01f312c1 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Mon, 9 May 2022 22:22:20 -0400 Subject: drm/amdgpu: Add XCC inst to PASID TLB flushing Add XCC instance to select the correct KIQ ring when flushing TLBs on a multi-XCC setup. Signed-off-by: Mukul Joshi Tested-by: Amber Lin Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_process.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 888590dfa646..9b1e84d33cdc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -2052,6 +2052,7 @@ void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type) struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv); uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm); struct kfd_node *dev = pdd->dev; + int xcc = 0; /* * It can be that we race and lose here, but that is extremely unlikely @@ -2069,8 +2070,10 @@ void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type) amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->adev, pdd->qpd.vmid); } else { - amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->adev, - pdd->process->pasid, type); + for (xcc = 0; xcc < dev->num_xcc_per_node; xcc++) + amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->adev, + pdd->process->pasid, type, + dev->start_xcc_id + xcc); } } -- cgit From fe1f05df5919c67c3add49efb55e251a8d78ee4e Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Tue, 31 May 2022 14:39:36 -0400 Subject: drm/amdkfd: Rework kfd_locked handling Currently, even if kfd_locked is set, a process is first created and then removed to work around a race condition in updating kfd_locked flag. Rework kfd_locked handling to ensure no processes is created if kfd_locked is set. This is achieved by updating kfd_locked under kfd_processes_mutex. With this there is no need for kfd_locked to be an atomic counter. Instead, it can be a regular integer. Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_process.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 9b1e84d33cdc..c3d43e6e5236 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -50,7 +50,7 @@ struct mm_struct; * Unique/indexed by mm_struct* */ DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE); -static DEFINE_MUTEX(kfd_processes_mutex); +DEFINE_MUTEX(kfd_processes_mutex); DEFINE_SRCU(kfd_processes_srcu); @@ -818,6 +818,12 @@ struct kfd_process *kfd_create_process(struct file *filep) */ mutex_lock(&kfd_processes_mutex); + if (kfd_is_locked()) { + mutex_unlock(&kfd_processes_mutex); + pr_debug("KFD is locked! Cannot create process"); + return ERR_PTR(-EINVAL); + } + /* A prior open of /dev/kfd could have already created the process. */ process = find_process(thread, false); if (process) { -- cgit From 5fb34bd9cf9e248d7e84e431a4a6b731334ab564 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Tue, 24 May 2022 10:22:12 -0500 Subject: drm/amdkfd: pass kfd_node ref to svm migration api This work is required for GC 9.4.3, previous to support memory partitions per node at SVM. When multiple partition is configured, every BO should be allocated inside one specific partition which corresponds to the current amdgpu_device and kfd_node. v2: squash in compilation fix (Alex) v3: squash in fix for pre-gfx 9.4.3 (Alex) v4: squash in best_loc fix (Alex) Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_process.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index c3d43e6e5236..666815b227a8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1891,13 +1891,13 @@ int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id) } int -kfd_process_gpuid_from_adev(struct kfd_process *p, struct amdgpu_device *adev, - uint32_t *gpuid, uint32_t *gpuidx) +kfd_process_gpuid_from_node(struct kfd_process *p, struct kfd_node *node, + uint32_t *gpuid, uint32_t *gpuidx) { int i; for (i = 0; i < p->n_pdds; i++) - if (p->pdds[i] && p->pdds[i]->dev->adev == adev) { + if (p->pdds[i] && p->pdds[i]->dev == node) { *gpuid = p->pdds[i]->user_gpu_id; *gpuidx = i; return 0; -- cgit From d6e924ad85a0cebc9e39eb956a23386ce32cc9f9 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Tue, 9 Aug 2022 14:56:53 -0400 Subject: drm/amdkfd: Update SMI events for GFX9.4.3 On GFX 9.4.3, there can be multiple KFD nodes. As a result, SMI events for SVM, queue evict/restore should be raised for each node independently. Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_process.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 666815b227a8..a6ff57f11472 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1817,7 +1817,7 @@ int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger) for (i = 0; i < p->n_pdds; i++) { struct kfd_process_device *pdd = p->pdds[i]; - kfd_smi_event_queue_eviction(pdd->dev->kfd, p->lead_thread->pid, + kfd_smi_event_queue_eviction(pdd->dev, p->lead_thread->pid, trigger); r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm, @@ -1845,7 +1845,7 @@ fail: if (n_evicted == 0) break; - kfd_smi_event_queue_restore(pdd->dev->kfd, p->lead_thread->pid); + kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid); if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm, &pdd->qpd)) @@ -1866,7 +1866,7 @@ int kfd_process_restore_queues(struct kfd_process *p) for (i = 0; i < p->n_pdds; i++) { struct kfd_process_device *pdd = p->pdds[i]; - kfd_smi_event_queue_restore(pdd->dev->kfd, p->lead_thread->pid); + kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid); r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm, &pdd->qpd); -- cgit From c4050ff1a43eec08498b1ed876efc6213592dba0 Mon Sep 17 00:00:00 2001 From: Lijo Lazar Date: Thu, 9 Feb 2023 16:30:53 +0530 Subject: drm/amdkfd: Use xcc mask for identifying xcc Instead of start xcc id and number of xcc per node, use the xcc mask which is the mask of logical ids of xccs belonging to a parition. Signed-off-by: Lijo Lazar Reviewed-by: Le Ma Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_process.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index a6ff57f11472..7f7d1378a2f8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -2058,6 +2058,7 @@ void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type) struct amdgpu_vm *vm = drm_priv_to_vm(pdd->drm_priv); uint64_t tlb_seq = amdgpu_vm_tlb_seq(vm); struct kfd_node *dev = pdd->dev; + uint32_t xcc_mask = dev->xcc_mask; int xcc = 0; /* @@ -2076,10 +2077,9 @@ void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type) amdgpu_amdkfd_flush_gpu_tlb_vmid(dev->adev, pdd->qpd.vmid); } else { - for (xcc = 0; xcc < dev->num_xcc_per_node; xcc++) - amdgpu_amdkfd_flush_gpu_tlb_pasid(dev->adev, - pdd->process->pasid, type, - dev->start_xcc_id + xcc); + for_each_inst(xcc, xcc_mask) + amdgpu_amdkfd_flush_gpu_tlb_pasid( + dev->adev, pdd->process->pasid, type, xcc); } } -- cgit From 0ab2d7532b05a3e7c06fd3b0c8bd6b46c1dfb508 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Fri, 25 Mar 2022 14:55:30 -0400 Subject: drm/amdkfd: prepare per-process debug enable and disable The ROCm debugger will attach to a process to debug by PTRACE and will expect the KFD to prepare a process for the target PID, whether the target PID has opened the KFD device or not. This patch is to explicity handle this requirement. Further HW mode setting and runtime coordination requirements will be handled in following patches. In the case where the target process has not opened the KFD device, a new KFD process must be created for the target PID. The debugger as well as the target process for this case will have not acquired any VMs so handle process restoration to correctly account for this. To coordinate with HSA runtime, the debugger must be aware of the target process' runtime enablement status and will copy the runtime status information into the debugged KFD process for later query. On enablement, the debugger will subscribe to a set of exceptions where each exception events will notify the debugger through a pollable FIFO file descriptor that the debugger provides to the KFD to manage. Finally on process termination of either the debugger or the target, debugging must be disabled if it has not been done so. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 60 +++++++++++++++++++++----------- 1 file changed, 40 insertions(+), 20 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_process.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 7f7d1378a2f8..ef67f5e37c6f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -44,6 +44,7 @@ struct mm_struct; #include "kfd_iommu.h" #include "kfd_svm.h" #include "kfd_smi_events.h" +#include "kfd_debug.h" /* * List of struct kfd_process (field kfd_process). @@ -69,7 +70,6 @@ static struct kfd_process *find_process(const struct task_struct *thread, bool ref); static void kfd_process_ref_release(struct kref *ref); static struct kfd_process *create_process(const struct task_struct *thread); -static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep); static void evict_process_worker(struct work_struct *work); static void restore_process_worker(struct work_struct *work); @@ -798,18 +798,19 @@ static void kfd_process_device_destroy_ib_mem(struct kfd_process_device *pdd) kfd_process_free_gpuvm(qpd->ib_mem, pdd, &qpd->ib_kaddr); } -struct kfd_process *kfd_create_process(struct file *filep) +struct kfd_process *kfd_create_process(struct task_struct *thread) { struct kfd_process *process; - struct task_struct *thread = current; int ret; - if (!thread->mm) + if (!(thread->mm && mmget_not_zero(thread->mm))) return ERR_PTR(-EINVAL); /* Only the pthreads threading model is supported. */ - if (thread->group_leader->mm != thread->mm) + if (thread->group_leader->mm != thread->mm) { + mmput(thread->mm); return ERR_PTR(-EINVAL); + } /* * take kfd processes mutex before starting of process creation @@ -833,10 +834,6 @@ struct kfd_process *kfd_create_process(struct file *filep) if (IS_ERR(process)) goto out; - ret = kfd_process_init_cwsr_apu(process, filep); - if (ret) - goto out_destroy; - if (!procfs.kobj) goto out; @@ -870,16 +867,9 @@ out: if (!IS_ERR(process)) kref_get(&process->ref); mutex_unlock(&kfd_processes_mutex); + mmput(thread->mm); return process; - -out_destroy: - hash_del_rcu(&process->kfd_processes); - mutex_unlock(&kfd_processes_mutex); - synchronize_srcu(&kfd_processes_srcu); - /* kfd_process_free_notifier will trigger the cleanup */ - mmu_notifier_put(&process->mmu_notifier); - return ERR_PTR(ret); } struct kfd_process *kfd_get_process(const struct task_struct *thread) @@ -1180,6 +1170,25 @@ static void kfd_process_notifier_release_internal(struct kfd_process *p) /* Indicate to other users that MM is no longer valid */ p->mm = NULL; + kfd_dbg_trap_disable(p); + + if (atomic_read(&p->debugged_process_count) > 0) { + struct kfd_process *target; + unsigned int temp; + int idx = srcu_read_lock(&kfd_processes_srcu); + + hash_for_each_rcu(kfd_processes_table, temp, target, kfd_processes) { + if (target->debugger_process && target->debugger_process == p) { + mutex_lock_nested(&target->mutex, 1); + kfd_dbg_trap_disable(target); + mutex_unlock(&target->mutex); + if (atomic_read(&p->debugged_process_count) == 0) + break; + } + } + + srcu_read_unlock(&kfd_processes_srcu, idx); + } mmu_notifier_put(&p->mmu_notifier); } @@ -1259,11 +1268,14 @@ void kfd_cleanup_processes(void) mmu_notifier_synchronize(); } -static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep) +int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep) { unsigned long offset; int i; + if (p->has_cwsr) + return 0; + for (i = 0; i < p->n_pdds; i++) { struct kfd_node *dev = p->pdds[i]->dev; struct qcm_process_device *qpd = &p->pdds[i]->qpd; @@ -1292,6 +1304,8 @@ static int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep) qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr); } + p->has_cwsr = true; + return 0; } @@ -1434,6 +1448,10 @@ static struct kfd_process *create_process(const struct task_struct *thread) if (err) goto err_event_init; process->is_32bit_user_mode = in_compat_syscall(); + process->debug_trap_enabled = false; + process->debugger_process = NULL; + process->exception_enable_mask = 0; + atomic_set(&process->debugged_process_count, 0); process->pasid = kfd_pasid_alloc(); if (process->pasid == 0) { @@ -1967,8 +1985,10 @@ static void restore_process_worker(struct work_struct *work) */ p->last_restore_timestamp = get_jiffies_64(); - ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info, - &p->ef); + /* VMs may not have been acquired yet during debugging. */ + if (p->kgd_process_info) + ret = amdgpu_amdkfd_gpuvm_restore_process_bos(p->kgd_process_info, + &p->ef); if (ret) { pr_debug("Failed to restore BOs of pasid 0x%x, retry after %d ms\n", p->pasid, PROCESS_BACK_OFF_TIME_MS); -- cgit From bb13d763f251c28b08d996671c5146a2113fc9e7 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Fri, 24 Mar 2023 16:19:27 -0400 Subject: drm/amdkfd: fix kfd_suspend_all_processes Flush delayed restore work in kfd_suspend_all_queues instead of cancelling. Cancelling the work before it runs results in the queues becoming permanently disabled. Flushing the work ensures that the queue suspend/resume state stays balanced. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_process.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index ef67f5e37c6f..d75dac92775c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -2014,7 +2014,7 @@ void kfd_suspend_all_processes(void) WARN(debug_evictions, "Evicting all processes"); hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { cancel_delayed_work_sync(&p->eviction_work); - cancel_delayed_work_sync(&p->restore_work); + flush_delayed_work(&p->restore_work); if (kfd_process_evict_queues(p, KFD_QUEUE_EVICTION_TRIGGER_SUSPEND)) pr_err("Failed to suspend process 0x%x\n", p->pasid); -- cgit From 0de4ec9a03537bd2b189b5afbf83acd6b72b0258 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Mon, 4 Apr 2022 13:38:11 -0400 Subject: drm/amdgpu: prepare map process for multi-process debug devices Unlike single process debug devices, multi-process debug devices allow debug mode setting per-VMID (non-device-global). Because the HWS manages PASID-VMID mapping, the new MAP_PROCESS API allows the KFD to forward the required SPI debug register write requests. To request a new debug mode setting change, the KFD must be able to preempt all queues then remap all queues with these new setting requests for MAP_PROCESS to take effect. Note that by default, trap enablement in non-debug mode must be disabled for performance reasons for multi-process debug devices due to setup overhead in FW. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_process.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index d75dac92775c..725d936b2cc7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1612,6 +1612,11 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev, } p->pdds[p->n_pdds++] = pdd; + if (kfd_dbg_is_per_vmid_supported(pdd->dev)) + pdd->spi_dbg_override = pdd->dev->kfd2kgd->disable_debug_trap( + pdd->dev->adev, + false, + 0); /* Init idr used for memory handle translation */ idr_init(&pdd->alloc_idr); -- cgit From 218895820e6fccade42a7c3ab9c0a44dec0a1ebc Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Tue, 5 Apr 2022 12:34:55 -0400 Subject: drm/amdkfd: add per process hw trap enable and disable functions To enable HW debug mode per process, all devices must be debug enabled successfully. If a failure occures, rewind the enablement of debug mode on the enabled devices. A power management scenario that needs to be considered is HW debug mode setting during GFXOFF. During GFXOFF, these registers will be unreachable so we have to transiently disable GFXOFF when setting. Also, some devices don't support the RLC save restore function for these debug registers so we have to disable GFXOFF completely during a debug session. Cooperative launch also has debugging restriction based on HW/FW bugs. If such bugs exists, the debugger cannot attach to a process that uses GWS resources nor can GWS resources be requested if a process is being debugged. Multi-process debug devices can only enable trap temporaries based on certain runtime scenerios, which will be explained when the runtime enable functions are implemented in a follow up patch. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_process.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 725d936b2cc7..e77cadadb09b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1165,9 +1165,19 @@ static void kfd_process_free_notifier(struct mmu_notifier *mn) static void kfd_process_notifier_release_internal(struct kfd_process *p) { + int i; + cancel_delayed_work_sync(&p->eviction_work); cancel_delayed_work_sync(&p->restore_work); + for (i = 0; i < p->n_pdds; i++) { + struct kfd_process_device *pdd = p->pdds[i]; + + /* re-enable GFX OFF since runtime enable with ttmp setup disabled it. */ + if (!kfd_dbg_is_rlc_restore_supported(pdd->dev) && p->runtime_info.ttmp_setup) + amdgpu_gfx_off_ctrl(pdd->dev->adev, true); + } + /* Indicate to other users that MM is no longer valid */ p->mm = NULL; kfd_dbg_trap_disable(p); -- cgit From 44b87bb0836c65d1b9d21b01503eb6e9b9297771 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Wed, 6 Apr 2022 12:03:31 -0400 Subject: drm/amdkfd: add raise exception event function Exception events can be generated from interrupts or queue activitity. The raise event function will save exception status of a queue, device or process then notify the debugger of the status change by writing to a debugger polled file descriptor that the debugger provides during debug attach. For memory violation exceptions, extra exception data will be saved. The debugger will be able to query the saved exception states by query operation that will be provided by follow up patches. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_process.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index e77cadadb09b..f904d6d6e01c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1509,6 +1509,8 @@ static struct kfd_process *create_process(const struct task_struct *thread) kfd_unref_process(process); get_task_struct(process->lead_thread); + INIT_WORK(&process->debug_event_workarea, debug_event_write_work_handler); + return process; err_register_notifier: -- cgit From c2d2588c702364ff53916ddd97e2b26fd4f4a317 Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Fri, 8 Apr 2022 12:49:48 -0400 Subject: drm/amdkfd: add send exception operation Add a debug operation that allows the debugger to send an exception directly to runtime through a payload address. For memory violations, normal vmfault signals will be applied to notify runtime instead after passing in the saved exception data when a memory violation was raised to the debugger. For runtime exceptions, this will unblock the runtime enable function which will be explained and implemented in a follow up patch. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 71 +++++++++++++++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_process.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index f904d6d6e01c..5cbfcaf08c8f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1462,6 +1462,7 @@ static struct kfd_process *create_process(const struct task_struct *thread) process->debugger_process = NULL; process->exception_enable_mask = 0; atomic_set(&process->debugged_process_count, 0); + sema_init(&process->runtime_enable_sema, 0); process->pasid = kfd_pasid_alloc(); if (process->pasid == 0) { @@ -2120,6 +2121,75 @@ void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type) } } +struct send_exception_work_handler_workarea { + struct work_struct work; + struct kfd_process *p; + unsigned int queue_id; + uint64_t error_reason; +}; + +static void send_exception_work_handler(struct work_struct *work) +{ + struct send_exception_work_handler_workarea *workarea; + struct kfd_process *p; + struct queue *q; + struct mm_struct *mm; + struct kfd_context_save_area_header __user *csa_header; + uint64_t __user *err_payload_ptr; + uint64_t cur_err; + uint32_t ev_id; + + workarea = container_of(work, + struct send_exception_work_handler_workarea, + work); + p = workarea->p; + + mm = get_task_mm(p->lead_thread); + + if (!mm) + return; + + kthread_use_mm(mm); + + q = pqm_get_user_queue(&p->pqm, workarea->queue_id); + + if (!q) + goto out; + + csa_header = (void __user *)q->properties.ctx_save_restore_area_address; + + get_user(err_payload_ptr, (uint64_t __user **)&csa_header->err_payload_addr); + get_user(cur_err, err_payload_ptr); + cur_err |= workarea->error_reason; + put_user(cur_err, err_payload_ptr); + get_user(ev_id, &csa_header->err_event_id); + + kfd_set_event(p, ev_id); + +out: + kthread_unuse_mm(mm); + mmput(mm); +} + +int kfd_send_exception_to_runtime(struct kfd_process *p, + unsigned int queue_id, + uint64_t error_reason) +{ + struct send_exception_work_handler_workarea worker; + + INIT_WORK_ONSTACK(&worker.work, send_exception_work_handler); + + worker.p = p; + worker.queue_id = queue_id; + worker.error_reason = error_reason; + + schedule_work(&worker.work); + flush_work(&worker.work); + destroy_work_on_stack(&worker.work); + + return 0; +} + struct kfd_process_device *kfd_process_device_data_by_id(struct kfd_process *p, uint32_t gpu_id) { int i; @@ -2179,4 +2249,3 @@ int kfd_debugfs_mqds_by_process(struct seq_file *m, void *data) } #endif - -- cgit From 50cff45e274896235d371f16eab67a180e12a732 Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Mon, 1 Mar 2021 18:34:39 -0600 Subject: drm/amdkfd: add debug trap enabled flag to tma Trap handler behavior will differ when a debugger is attached. Make the debug trap flag available in the trap handler TMA. Update it when the debug trap ioctl is invoked. Signed-off-by: Jay Cornwall Reviewed-by: Felix Kuehling Signed-off-by: Jonathan Kim Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_process.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 5cbfcaf08c8f..3b7f219c9d06 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1309,6 +1309,8 @@ int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep) memcpy(qpd->cwsr_kaddr, dev->kfd->cwsr_isa, dev->kfd->cwsr_isa_size); + kfd_process_set_trap_debug_flag(qpd, p->debug_trap_enabled); + qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET; pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n", qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr); @@ -1345,6 +1347,9 @@ static int kfd_process_device_init_cwsr_dgpu(struct kfd_process_device *pdd) memcpy(qpd->cwsr_kaddr, dev->kfd->cwsr_isa, dev->kfd->cwsr_isa_size); + kfd_process_set_trap_debug_flag(&pdd->qpd, + pdd->process->debug_trap_enabled); + qpd->tma_addr = qpd->tba_addr + KFD_CWSR_TMA_OFFSET; pr_debug("set tba :0x%llx, tma:0x%llx, cwsr_kaddr:%p for pqm.\n", qpd->tba_addr, qpd->tma_addr, qpd->cwsr_kaddr); @@ -1431,6 +1436,16 @@ bool kfd_process_xnack_mode(struct kfd_process *p, bool supported) return true; } +void kfd_process_set_trap_debug_flag(struct qcm_process_device *qpd, + bool enabled) +{ + if (qpd->cwsr_kaddr) { + uint64_t *tma = + (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET); + tma[2] = enabled; + } +} + /* * On return the kfd_process is fully operational and will be freed when the * mm is released -- cgit From 12fb1ad70d65edc3405884792d044fa79df7244f Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Fri, 22 Apr 2022 12:26:18 -0400 Subject: drm/amdkfd: update process interrupt handling for debug events The debugger must be notified by any debugger subscribed exception that comes from hardware interrupts. If a debugger session exits, any exceptions it subscribed to may still have interrupts in the interrupt ring buffer or KGD/KFD pipeline. To prevent a new session from inheriting stale interrupts, when a new queue is created, open an interrupt drain and allow the IH ring to drain from a timestamped checkpoint. Then inject a custom IV so that once the custom IV is picked up by the KFD, it's safe to close the drain and proceed with queue creation. The drain must also be on debug disable as SW interrupts may still be processed. Drain at this time and clear all the exception status. The debugger may also not be attached nor subscibed to certain exceptions so forward them directly to the runtime. GFX10 also requires its own IV processing, hence the creation of kfd_int_process_v10.c. This is because the IV from SQ interrupts are packed into a new continguous format unlike GFX9. To make this clear, a separate interrupting handling code file was created. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 47 ++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_process.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index 3b7f219c9d06..3d3611705d41 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -862,6 +862,8 @@ struct kfd_process *kfd_create_process(struct task_struct *thread) kfd_procfs_add_sysfs_stats(process); kfd_procfs_add_sysfs_files(process); kfd_procfs_add_sysfs_counters(process); + + init_waitqueue_head(&process->wait_irq_drain); } out: if (!IS_ERR(process)) @@ -2136,6 +2138,51 @@ void kfd_flush_tlb(struct kfd_process_device *pdd, enum TLB_FLUSH_TYPE type) } } +/* assumes caller holds process lock. */ +int kfd_process_drain_interrupts(struct kfd_process_device *pdd) +{ + uint32_t irq_drain_fence[8]; + int r = 0; + + if (!KFD_IS_SOC15(pdd->dev)) + return 0; + + pdd->process->irq_drain_is_open = true; + + memset(irq_drain_fence, 0, sizeof(irq_drain_fence)); + irq_drain_fence[0] = (KFD_IRQ_FENCE_SOURCEID << 8) | + KFD_IRQ_FENCE_CLIENTID; + irq_drain_fence[3] = pdd->process->pasid; + + /* ensure stale irqs scheduled KFD interrupts and send drain fence. */ + if (amdgpu_amdkfd_send_close_event_drain_irq(pdd->dev->adev, + irq_drain_fence)) { + pdd->process->irq_drain_is_open = false; + return 0; + } + + r = wait_event_interruptible(pdd->process->wait_irq_drain, + !READ_ONCE(pdd->process->irq_drain_is_open)); + if (r) + pdd->process->irq_drain_is_open = false; + + return r; +} + +void kfd_process_close_interrupt_drain(unsigned int pasid) +{ + struct kfd_process *p; + + p = kfd_lookup_process_by_pasid(pasid); + + if (!p) + return; + + WRITE_ONCE(p->irq_drain_is_open, false); + wake_up_all(&p->wait_irq_drain); + kfd_unref_process(p); +} + struct send_exception_work_handler_workarea { struct work_struct work; struct kfd_process *p; -- cgit