From 435af0b919bf9eb78f4e05e8596ebed9ca7885b7 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Mon, 24 Apr 2023 14:46:16 -0400 Subject: drm/amdkfd: Optimize svm range map to GPU with XNACK on With XNACK on if svm_range_set_attr set the range access or access_in_place attribute, we don't call svm_range_validate_and_map to update GPU mapping. This avoids prefaulting the range pages on system memory if the range is not prefetch to VRAM and not mapped to GPUs. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_svm.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 96a138a39515..c02430537e9c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -735,7 +735,9 @@ svm_range_apply_attrs(struct kfd_process *p, struct svm_range *prange, case KFD_IOCTL_SVM_ATTR_ACCESS: case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE: case KFD_IOCTL_SVM_ATTR_NO_ACCESS: - *update_mapping = true; + if (!p->xnack_enabled) + *update_mapping = true; + gpuidx = kfd_process_gpuidx_from_gpuid(p, attrs[i].value); if (attrs[i].type == KFD_IOCTL_SVM_ATTR_NO_ACCESS) { -- cgit From 8dc1db3172ae2f17ae71e33b608a33411ce8a1aa Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Wed, 14 Sep 2022 16:39:48 +0800 Subject: drm/amdkfd: Introduce kfd_node struct (v5) Introduce a new structure, kfd_node, which will now represent a compute node. kfd_node is carved out of kfd_dev structure. kfd_dev struct now will become the parent of kfd_node, and will store common resources such as doorbells, GTT sub-alloctor etc. kfd_node struct will store all resources specific to a compute node, such as device queue manager, interrupt handling etc. This is the first step in adding compute partition support in KFD. v2: introduce kfd_node struct to gc v11 (Hawking) v3: make reference to kfd_dev struct through kfd_node (Morris) v4: use kfd_node instead for kfd isr/mqd functions (Morris) v5: rebase (Alex) Signed-off-by: Mukul Joshi Tested-by: Amber Lin Reviewed-by: Felix Kuehling Signed-off-by: Hawking Zhang Signed-off-by: Morris Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_svm.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index c02430537e9c..96ccff79902c 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1266,7 +1266,7 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start, return -EINVAL; } - kfd_smi_event_unmap_from_gpu(pdd->dev, p->lead_thread->pid, + kfd_smi_event_unmap_from_gpu(pdd->dev->kfd, p->lead_thread->pid, start, last, trigger); r = svm_range_unmap_from_gpu(pdd->dev->adev, @@ -3083,7 +3083,7 @@ int svm_range_list_init(struct kfd_process *p) spin_lock_init(&svms->deferred_list_lock); for (i = 0; i < p->n_pdds; i++) - if (KFD_IS_SVM_API_SUPPORTED(p->pdds[i]->dev)) + if (KFD_IS_SVM_API_SUPPORTED(p->pdds[i]->dev->kfd)) bitmap_set(svms->bitmap_supported, i, 1); return 0; -- cgit From ef75a6ef37235e211bbdb17c25e5f79c55df1750 Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Thu, 3 Mar 2022 10:56:05 -0500 Subject: drm/amdkfd: Update coherence settings for svm ranges Recently introduced commit "drm/amdgpu: Set cache coherency for GC 9.4.3" did not update the settings applicable for svm ranges. Add the coherence settings for svm ranges for GFX IP 9.4.3. Reviewed-by: Amber Lin Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_svm.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 96ccff79902c..4b4f3bf8b823 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1159,6 +1159,7 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange, uint64_t pte_flags; bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN); bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT; + bool uncached = flags & KFD_IOCTL_SVM_FLAG_UNCACHED; if (domain == SVM_RANGE_VRAM_DOMAIN) bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev); @@ -1198,6 +1199,22 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange, AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; } break; + case IP_VERSION(9, 4, 3): + //TODO: Need more work for handling multiple memory partitions + //e.g. NPS4. Current approch is only applicable without memory + //partitions. + snoop = true; + if (uncached) + mapping_flags |= AMDGPU_VM_MTYPE_UC; + /* local HBM region close to partition*/ + else if (bo_adev == adev) + mapping_flags |= AMDGPU_VM_MTYPE_RW; + /* local HBM region far from partition or remote XGMI GPU or + * system memory + */ + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; + break; default: mapping_flags |= coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; -- cgit From 5fb34bd9cf9e248d7e84e431a4a6b731334ab564 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Tue, 24 May 2022 10:22:12 -0500 Subject: drm/amdkfd: pass kfd_node ref to svm migration api This work is required for GC 9.4.3, previous to support memory partitions per node at SVM. When multiple partition is configured, every BO should be allocated inside one specific partition which corresponds to the current amdgpu_device and kfd_node. v2: squash in compilation fix (Alex) v3: squash in fix for pre-gfx 9.4.3 (Alex) v4: squash in best_loc fix (Alex) Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 152 +++++++++++++++++------------------ 1 file changed, 76 insertions(+), 76 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_svm.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 4b4f3bf8b823..639831fbb6ca 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -170,8 +170,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, page = hmm_pfn_to_page(hmm_pfns[i]); if (is_zone_device_page(page)) { - struct amdgpu_device *bo_adev = - amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev); + struct amdgpu_device *bo_adev = prange->svm_bo->node->adev; addr[i] = (hmm_pfns[i] << PAGE_SHIFT) + bo_adev->vm_manager.vram_base_offset - @@ -424,10 +423,8 @@ static void svm_range_bo_unref(struct svm_range_bo *svm_bo) } static bool -svm_range_validate_svm_bo(struct amdgpu_device *adev, struct svm_range *prange) +svm_range_validate_svm_bo(struct kfd_node *node, struct svm_range *prange) { - struct amdgpu_device *bo_adev; - mutex_lock(&prange->lock); if (!prange->svm_bo) { mutex_unlock(&prange->lock); @@ -440,12 +437,11 @@ svm_range_validate_svm_bo(struct amdgpu_device *adev, struct svm_range *prange) } if (svm_bo_ref_unless_zero(prange->svm_bo)) { /* - * Migrate from GPU to GPU, remove range from source bo_adev - * svm_bo range list, and return false to allocate svm_bo from - * destination adev. + * Migrate from GPU to GPU, remove range from source svm_bo->node + * range list, and return false to allocate svm_bo from destination + * node. */ - bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev); - if (bo_adev != adev) { + if (prange->svm_bo->node != node) { mutex_unlock(&prange->lock); spin_lock(&prange->svm_bo->list_lock); @@ -513,7 +509,7 @@ static struct svm_range_bo *svm_range_bo_new(void) } int -svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange, +svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange, bool clear) { struct amdgpu_bo_param bp; @@ -528,7 +524,7 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange, pr_debug("pasid: %x svms 0x%p [0x%lx 0x%lx]\n", p->pasid, prange->svms, prange->start, prange->last); - if (svm_range_validate_svm_bo(adev, prange)) + if (svm_range_validate_svm_bo(node, prange)) return 0; svm_bo = svm_range_bo_new(); @@ -542,6 +538,7 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange, kfree(svm_bo); return -ESRCH; } + svm_bo->node = node; svm_bo->eviction_fence = amdgpu_amdkfd_fence_create(dma_fence_context_alloc(1), mm, @@ -559,7 +556,10 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange, bp.type = ttm_bo_type_device; bp.resv = NULL; - r = amdgpu_bo_create_user(adev, &bp, &ubo); + /* TODO: Allocate memory from the right memory partition. We can sort + * out the details later, once basic memory partitioning is working + */ + r = amdgpu_bo_create_user(node->adev, &bp, &ubo); if (r) { pr_debug("failed %d to create bo\n", r); goto create_bo_failed; @@ -617,45 +617,30 @@ void svm_range_vram_node_free(struct svm_range *prange) prange->ttm_res = NULL; } -struct amdgpu_device * -svm_range_get_adev_by_id(struct svm_range *prange, uint32_t gpu_id) +struct kfd_node * +svm_range_get_node_by_id(struct svm_range *prange, uint32_t gpu_id) { - struct kfd_process_device *pdd; struct kfd_process *p; - int32_t gpu_idx; + struct kfd_process_device *pdd; p = container_of(prange->svms, struct kfd_process, svms); - - gpu_idx = kfd_process_gpuidx_from_gpuid(p, gpu_id); - if (gpu_idx < 0) { - pr_debug("failed to get device by id 0x%x\n", gpu_id); - return NULL; - } - pdd = kfd_process_device_from_gpuidx(p, gpu_idx); + pdd = kfd_process_device_data_by_id(p, gpu_id); if (!pdd) { - pr_debug("failed to get device by idx 0x%x\n", gpu_idx); + pr_debug("failed to get kfd process device by id 0x%x\n", gpu_id); return NULL; } - return pdd->dev->adev; + return pdd->dev; } struct kfd_process_device * -svm_range_get_pdd_by_adev(struct svm_range *prange, struct amdgpu_device *adev) +svm_range_get_pdd_by_node(struct svm_range *prange, struct kfd_node *node) { struct kfd_process *p; - int32_t gpu_idx, gpuid; - int r; p = container_of(prange->svms, struct kfd_process, svms); - r = kfd_process_gpuid_from_adev(p, adev, &gpuid, &gpu_idx); - if (r) { - pr_debug("failed to get device id by adev %p\n", adev); - return NULL; - } - - return kfd_process_device_from_gpuidx(p, gpu_idx); + return kfd_get_process_device_data(node, p); } static int svm_range_bo_validate(void *param, struct amdgpu_bo *bo) @@ -1148,12 +1133,18 @@ svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm, } return 0; } +static bool +svm_nodes_in_same_hive(struct kfd_node *node_a, struct kfd_node *node_b) +{ + return (node_a->adev == node_b->adev || + amdgpu_xgmi_same_hive(node_a->adev, node_b->adev)); +} static uint64_t -svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange, - int domain) +svm_range_get_pte_flags(struct kfd_node *node, + struct svm_range *prange, int domain) { - struct amdgpu_device *bo_adev; + struct kfd_node *bo_node; uint32_t flags = prange->flags; uint32_t mapping_flags = 0; uint64_t pte_flags; @@ -1162,18 +1153,18 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange, bool uncached = flags & KFD_IOCTL_SVM_FLAG_UNCACHED; if (domain == SVM_RANGE_VRAM_DOMAIN) - bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev); + bo_node = prange->svm_bo->node; - switch (KFD_GC_VERSION(adev->kfd.dev)) { + switch (node->adev->ip_versions[GC_HWIP][0]) { case IP_VERSION(9, 4, 1): if (domain == SVM_RANGE_VRAM_DOMAIN) { - if (bo_adev == adev) { + if (bo_node == node) { mapping_flags |= coherent ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; } else { mapping_flags |= coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; - if (amdgpu_xgmi_same_hive(adev, bo_adev)) + if (svm_nodes_in_same_hive(node, bo_node)) snoop = true; } } else { @@ -1183,15 +1174,15 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange, break; case IP_VERSION(9, 4, 2): if (domain == SVM_RANGE_VRAM_DOMAIN) { - if (bo_adev == adev) { + if (bo_node == node) { mapping_flags |= coherent ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; - if (adev->gmc.xgmi.connected_to_cpu) + if (node->adev->gmc.xgmi.connected_to_cpu) snoop = true; } else { mapping_flags |= coherent ? AMDGPU_VM_MTYPE_UC : AMDGPU_VM_MTYPE_NC; - if (amdgpu_xgmi_same_hive(adev, bo_adev)) + if (svm_nodes_in_same_hive(node, bo_node)) snoop = true; } } else { @@ -1207,7 +1198,7 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange, if (uncached) mapping_flags |= AMDGPU_VM_MTYPE_UC; /* local HBM region close to partition*/ - else if (bo_adev == adev) + else if (bo_node == node) mapping_flags |= AMDGPU_VM_MTYPE_RW; /* local HBM region far from partition or remote XGMI GPU or * system memory @@ -1231,7 +1222,7 @@ svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange, pte_flags |= (domain == SVM_RANGE_VRAM_DOMAIN) ? 0 : AMDGPU_PTE_SYSTEM; pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0; - pte_flags |= amdgpu_gem_va_map_flags(adev, mapping_flags); + pte_flags |= amdgpu_gem_va_map_flags(node->adev, mapping_flags); return pte_flags; } @@ -1338,7 +1329,7 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange, pr_debug("Mapping range [0x%lx 0x%llx] on domain: %s\n", last_start, prange->start + i, last_domain ? "GPU" : "CPU"); - pte_flags = svm_range_get_pte_flags(adev, prange, last_domain); + pte_flags = svm_range_get_pte_flags(pdd->dev, prange, last_domain); if (readonly) pte_flags &= ~AMDGPU_PTE_WRITEABLE; @@ -1347,6 +1338,9 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange, (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0, pte_flags); + /* TODO: we still need to determine the vm_manager.vram_base_offset based on + * the memory partition. + */ r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, NULL, last_start, prange->start + i, pte_flags, @@ -1384,16 +1378,14 @@ svm_range_map_to_gpus(struct svm_range *prange, unsigned long offset, unsigned long *bitmap, bool wait, bool flush_tlb) { struct kfd_process_device *pdd; - struct amdgpu_device *bo_adev; + struct amdgpu_device *bo_adev = NULL; struct kfd_process *p; struct dma_fence *fence = NULL; uint32_t gpuidx; int r = 0; if (prange->svm_bo && prange->ttm_res) - bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev); - else - bo_adev = NULL; + bo_adev = prange->svm_bo->node->adev; p = container_of(prange->svms, struct kfd_process, svms); for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) { @@ -2526,17 +2518,17 @@ svm_range_from_addr(struct svm_range_list *svms, unsigned long addr, */ static int32_t svm_range_best_restore_location(struct svm_range *prange, - struct amdgpu_device *adev, + struct kfd_node *node, int32_t *gpuidx) { - struct amdgpu_device *bo_adev, *preferred_adev; + struct kfd_node *bo_node, *preferred_node; struct kfd_process *p; uint32_t gpuid; int r; p = container_of(prange->svms, struct kfd_process, svms); - r = kfd_process_gpuid_from_adev(p, adev, &gpuid, gpuidx); + r = kfd_process_gpuid_from_node(p, node, &gpuid, gpuidx); if (r < 0) { pr_debug("failed to get gpuid from kgd\n"); return -1; @@ -2546,9 +2538,8 @@ svm_range_best_restore_location(struct svm_range *prange, prange->preferred_loc == KFD_IOCTL_SVM_LOCATION_SYSMEM) { return prange->preferred_loc; } else if (prange->preferred_loc != KFD_IOCTL_SVM_LOCATION_UNDEFINED) { - preferred_adev = svm_range_get_adev_by_id(prange, - prange->preferred_loc); - if (amdgpu_xgmi_same_hive(adev, preferred_adev)) + preferred_node = svm_range_get_node_by_id(prange, prange->preferred_loc); + if (preferred_node && svm_nodes_in_same_hive(node, preferred_node)) return prange->preferred_loc; /* fall through */ } @@ -2560,8 +2551,8 @@ svm_range_best_restore_location(struct svm_range *prange, if (!prange->actual_loc) return 0; - bo_adev = svm_range_get_adev_by_id(prange, prange->actual_loc); - if (amdgpu_xgmi_same_hive(adev, bo_adev)) + bo_node = svm_range_get_node_by_id(prange, prange->actual_loc); + if (bo_node && svm_nodes_in_same_hive(node, bo_node)) return prange->actual_loc; else return 0; @@ -2678,7 +2669,7 @@ svm_range_check_vm_userptr(struct kfd_process *p, uint64_t start, uint64_t last, } static struct -svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev, +svm_range *svm_range_create_unregistered_range(struct kfd_node *node, struct kfd_process *p, struct mm_struct *mm, int64_t addr) @@ -2713,7 +2704,7 @@ svm_range *svm_range_create_unregistered_range(struct amdgpu_device *adev, pr_debug("Failed to create prange in address [0x%llx]\n", addr); return NULL; } - if (kfd_process_gpuid_from_adev(p, adev, &gpuid, &gpuidx)) { + if (kfd_process_gpuid_from_node(p, node, &gpuid, &gpuidx)) { pr_debug("failed to get gpuid from kgd\n"); svm_range_free(prange, true); return NULL; @@ -2767,7 +2758,7 @@ static bool svm_range_skip_recover(struct svm_range *prange) } static void -svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p, +svm_range_count_fault(struct kfd_node *node, struct kfd_process *p, int32_t gpuidx) { struct kfd_process_device *pdd; @@ -2780,7 +2771,7 @@ svm_range_count_fault(struct amdgpu_device *adev, struct kfd_process *p, uint32_t gpuid; int r; - r = kfd_process_gpuid_from_adev(p, adev, &gpuid, &gpuidx); + r = kfd_process_gpuid_from_node(p, node, &gpuid, &gpuidx); if (r < 0) return; } @@ -2808,6 +2799,7 @@ svm_fault_allowed(struct vm_area_struct *vma, bool write_fault) int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, + uint32_t client_id, uint32_t node_id, uint64_t addr, bool write_fault) { struct mm_struct *mm = NULL; @@ -2815,6 +2807,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, struct svm_range *prange; struct kfd_process *p; ktime_t timestamp = ktime_get_boottime(); + struct kfd_node *node; int32_t best_loc; int32_t gpuidx = MAX_GPU_INSTANCE; bool write_locked = false; @@ -2858,6 +2851,13 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, goto out; } + node = kfd_node_by_irq_ids(adev, node_id, client_id); + if (!node) { + pr_debug("kfd node does not exist node_id: %d, client_id: %d\n", node_id, + client_id); + r = -EFAULT; + goto out; + } mmap_read_lock(mm); retry_write_locked: mutex_lock(&svms->lock); @@ -2876,7 +2876,7 @@ retry_write_locked: write_locked = true; goto retry_write_locked; } - prange = svm_range_create_unregistered_range(adev, p, mm, addr); + prange = svm_range_create_unregistered_range(node, p, mm, addr); if (!prange) { pr_debug("failed to create unregistered range svms 0x%p address [0x%llx]\n", svms, addr); @@ -2891,7 +2891,7 @@ retry_write_locked: mutex_lock(&prange->migrate_mutex); if (svm_range_skip_recover(prange)) { - amdgpu_gmc_filter_faults_remove(adev, addr, pasid); + amdgpu_gmc_filter_faults_remove(node->adev, addr, pasid); r = 0; goto out_unlock_range; } @@ -2922,7 +2922,7 @@ retry_write_locked: goto out_unlock_range; } - best_loc = svm_range_best_restore_location(prange, adev, &gpuidx); + best_loc = svm_range_best_restore_location(prange, node, &gpuidx); if (best_loc == -1) { pr_debug("svms %p failed get best restore loc [0x%lx 0x%lx]\n", svms, prange->start, prange->last); @@ -2981,7 +2981,7 @@ out_unlock_svms: mutex_unlock(&svms->lock); mmap_read_unlock(mm); - svm_range_count_fault(adev, p, gpuidx); + svm_range_count_fault(node, p, gpuidx); mmput(mm); out: @@ -2989,7 +2989,7 @@ out: if (r == -EAGAIN) { pr_debug("recover vm fault later\n"); - amdgpu_gmc_filter_faults_remove(adev, addr, pasid); + amdgpu_gmc_filter_faults_remove(node->adev, addr, pasid); r = 0; } return r; @@ -3231,7 +3231,7 @@ svm_range_best_prefetch_location(struct svm_range *prange) DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE); uint32_t best_loc = prange->prefetch_loc; struct kfd_process_device *pdd; - struct amdgpu_device *bo_adev; + struct kfd_node *bo_node; struct kfd_process *p; uint32_t gpuidx; @@ -3240,9 +3240,9 @@ svm_range_best_prefetch_location(struct svm_range *prange) if (!best_loc || best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED) goto out; - bo_adev = svm_range_get_adev_by_id(prange, best_loc); - if (!bo_adev) { - WARN_ONCE(1, "failed to get device by id 0x%x\n", best_loc); + bo_node = svm_range_get_node_by_id(prange, best_loc); + if (!bo_node) { + WARN_ONCE(1, "failed to get valid kfd node at id%x\n", best_loc); best_loc = 0; goto out; } @@ -3260,10 +3260,10 @@ svm_range_best_prefetch_location(struct svm_range *prange) continue; } - if (pdd->dev->adev == bo_adev) + if (pdd->dev->adev == bo_node->adev) continue; - if (!amdgpu_xgmi_same_hive(pdd->dev->adev, bo_adev)) { + if (!svm_nodes_in_same_hive(pdd->dev, bo_node)) { best_loc = 0; break; } -- cgit From d6e924ad85a0cebc9e39eb956a23386ce32cc9f9 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Tue, 9 Aug 2022 14:56:53 -0400 Subject: drm/amdkfd: Update SMI events for GFX9.4.3 On GFX 9.4.3, there can be multiple KFD nodes. As a result, SMI events for SVM, queue evict/restore should be raised for each node independently. Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_svm.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 639831fbb6ca..0dafbbe954ca 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1274,7 +1274,7 @@ svm_range_unmap_from_gpus(struct svm_range *prange, unsigned long start, return -EINVAL; } - kfd_smi_event_unmap_from_gpu(pdd->dev->kfd, p->lead_thread->pid, + kfd_smi_event_unmap_from_gpu(pdd->dev, p->lead_thread->pid, start, last, trigger); r = svm_range_unmap_from_gpu(pdd->dev->adev, @@ -2934,7 +2934,7 @@ retry_write_locked: svms, prange->start, prange->last, best_loc, prange->actual_loc); - kfd_smi_event_page_fault_start(adev->kfd.dev, p->lead_thread->pid, addr, + kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr, write_fault, timestamp); if (prange->actual_loc != best_loc) { @@ -2972,7 +2972,7 @@ retry_write_locked: pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n", r, svms, prange->start, prange->last); - kfd_smi_event_page_fault_end(adev->kfd.dev, p->lead_thread->pid, addr, + kfd_smi_event_page_fault_end(node, p->lead_thread->pid, addr, migration); out_unlock_range: -- cgit From f5fe7edfd6ce62cd23fbd707e7f9fe0f56a45e94 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Fri, 30 Sep 2022 09:16:21 -0400 Subject: drm/amdkfd: Update interrupt handling for GFX9.4.3 Update interrupt handling in CPX mode for GFX9.4.3 by using the VMID space instead of SDMA client id to determine if an interrupt should be processed by a KFD node. This is especially needed for handling retry faults from MMHUB. Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_svm.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 0dafbbe954ca..5d6e02559d8e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -2799,7 +2799,7 @@ svm_fault_allowed(struct vm_area_struct *vma, bool write_fault) int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, - uint32_t client_id, uint32_t node_id, + uint32_t vmid, uint32_t node_id, uint64_t addr, bool write_fault) { struct mm_struct *mm = NULL; @@ -2851,10 +2851,10 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, goto out; } - node = kfd_node_by_irq_ids(adev, node_id, client_id); + node = kfd_node_by_irq_ids(adev, node_id, vmid); if (!node) { - pr_debug("kfd node does not exist node_id: %d, client_id: %d\n", node_id, - client_id); + pr_debug("kfd node does not exist node_id: %d, vmid: %d\n", node_id, + vmid); r = -EFAULT; goto out; } -- cgit From 85b45b60722f506322393320bb6cc195378f2e4f Mon Sep 17 00:00:00 2001 From: Amber Lin Date: Mon, 28 Nov 2022 11:26:02 -0500 Subject: amd/amdgpu: Set MTYPE_UC for access over PCIe For GFX v9_4_3, set MTYPE_UC for memory access over PCIe. v4 - add missing indentation pointed out by Felix and add his reviewed-by tag. v3 - add missing logic for the svm path. v2 - add amdgpu_xgmi_same_hive to separate access over xgmi from pcie Reviewed-by: Felix Kuehling Signed-off-by: Amber Lin Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_svm.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 5d6e02559d8e..6daba0582bf3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1195,16 +1195,29 @@ svm_range_get_pte_flags(struct kfd_node *node, //e.g. NPS4. Current approch is only applicable without memory //partitions. snoop = true; - if (uncached) + if (uncached) { mapping_flags |= AMDGPU_VM_MTYPE_UC; - /* local HBM region close to partition*/ - else if (bo_node == node) - mapping_flags |= AMDGPU_VM_MTYPE_RW; - /* local HBM region far from partition or remote XGMI GPU or - * system memory - */ - else + } else if (domain == SVM_RANGE_VRAM_DOMAIN) { + /* local HBM region close to partition with a workaround + * for Endpoint systems. + */ + if (bo_node == node) + mapping_flags |= + (node->adev->flags & AMD_IS_APU) ? + AMDGPU_VM_MTYPE_RW : AMDGPU_VM_MTYPE_NC; + /* local HBM region far from partition or remote XGMI GPU */ + else if (svm_nodes_in_same_hive(bo_node, node)) + mapping_flags |= AMDGPU_VM_MTYPE_NC; + /* PCIe P2P */ + else + mapping_flags |= AMDGPU_VM_MTYPE_UC; + /* system memory accessed by the APU */ + } else if (node->adev->flags & AMD_IS_APU) { mapping_flags |= AMDGPU_VM_MTYPE_NC; + /* system memory accessed by the dGPU */ + } else { + mapping_flags |= AMDGPU_VM_MTYPE_UC; + } break; default: mapping_flags |= coherent ? -- cgit From d839a158b2480814bc438f9f46f440a7b9f63cb6 Mon Sep 17 00:00:00 2001 From: Graham Sider Date: Thu, 5 Jan 2023 10:58:07 -0500 Subject: drm/amdgpu: Correct dGPU MTYPE settings for gfx943 Revert temporary dGPU VRAM MTYPE setting and align with expected coherency protocol. Signed-off-by: Graham Sider Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_svm.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 6daba0582bf3..2b79849ddd30 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1198,13 +1198,9 @@ svm_range_get_pte_flags(struct kfd_node *node, if (uncached) { mapping_flags |= AMDGPU_VM_MTYPE_UC; } else if (domain == SVM_RANGE_VRAM_DOMAIN) { - /* local HBM region close to partition with a workaround - * for Endpoint systems. - */ + /* local HBM region close to partition */ if (bo_node == node) - mapping_flags |= - (node->adev->flags & AMD_IS_APU) ? - AMDGPU_VM_MTYPE_RW : AMDGPU_VM_MTYPE_NC; + mapping_flags |= AMDGPU_VM_MTYPE_RW; /* local HBM region far from partition or remote XGMI GPU */ else if (svm_nodes_in_same_hive(bo_node, node)) mapping_flags |= AMDGPU_VM_MTYPE_NC; -- cgit From f4d8b6f5c61ab5e98258bd0072d733741c76bd8d Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Tue, 31 Jan 2023 11:23:50 -0500 Subject: drm/amdkfd: Enable SVM on Native mode This patch enables SVM capability on GFX9.4.3 when run in Native mode. It also sets best_prefetch and best_restore locations to CPU as there is no VRAM. Signed-off-by: Mukul Joshi Acked-by: Rajneesh Bhardwaj Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_svm.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 2b79849ddd30..cf354f9e4285 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -2543,6 +2543,9 @@ svm_range_best_restore_location(struct svm_range *prange, return -1; } + if (node->adev->gmc.is_app_apu) + return 0; + if (prange->preferred_loc == gpuid || prange->preferred_loc == KFD_IOCTL_SVM_LOCATION_SYSMEM) { return prange->preferred_loc; @@ -3256,6 +3259,11 @@ svm_range_best_prefetch_location(struct svm_range *prange) goto out; } + if (bo_node->adev->gmc.is_app_apu) { + best_loc = 0; + goto out; + } + if (p->xnack_enabled) bitmap_copy(bitmap, prange->bitmap_aip, MAX_GPU_INSTANCE); else -- cgit From 610dab118ff5013d46069c828b58d576e0907b66 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Fri, 31 Mar 2023 11:13:40 -0400 Subject: drm/amdkfd: Move pgmap to amdgpu_kfd_dev structure VRAM pgmap resource is allocated every time when switching compute partitions because kfd_dev is re-initialized by post_partition_switch, As a result, it causes memory region resource leaking and system memory usage accounting unbalanced. pgmap resource should be allocated and registered only once when loading driver and freed when unloading driver, move it from kfd_dev to amdgpu_kfd_dev. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_svm.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index cf354f9e4285..2b2129dd1e4a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -174,7 +174,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, addr[i] = (hmm_pfns[i] << PAGE_SHIFT) + bo_adev->vm_manager.vram_base_offset - - bo_adev->kfd.dev->pgmap.range.start; + bo_adev->kfd.pgmap.range.start; addr[i] |= SVM_RANGE_VRAM_DOMAIN; pr_debug_ratelimited("vram address: 0x%llx\n", addr[i]); continue; @@ -2827,7 +2827,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, bool migration = false; int r = 0; - if (!KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) { + if (!KFD_IS_SVM_API_SUPPORTED(adev)) { pr_debug("device does not support SVM\n"); return -EFAULT; } @@ -3112,7 +3112,7 @@ int svm_range_list_init(struct kfd_process *p) spin_lock_init(&svms->deferred_list_lock); for (i = 0; i < p->n_pdds; i++) - if (KFD_IS_SVM_API_SUPPORTED(p->pdds[i]->dev->kfd)) + if (KFD_IS_SVM_API_SUPPORTED(p->pdds[i]->dev->adev)) bitmap_set(svms->bitmap_supported, i, 1); return 0; -- cgit From 895797d9193b38e759bc01268a8e3887e521f682 Mon Sep 17 00:00:00 2001 From: Graham Sider Date: Mon, 6 Feb 2023 14:04:42 -0500 Subject: drm/amdgpu/bu: Add use_mtype_cc_wa module param By default, set use_mtype_cc_wa to 1 to set PTE coherence flag MTYPE_CC instead of MTYPE_RW by default. This is required for the time being to mitigate a bug causing XCCs to hit stale data due to TCC marking fully dirty lines as exclusive. Signed-off-by: Graham Sider Reviewed-by: Joseph Greathouse Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_svm.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 2b2129dd1e4a..477ef9294203 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1198,9 +1198,12 @@ svm_range_get_pte_flags(struct kfd_node *node, if (uncached) { mapping_flags |= AMDGPU_VM_MTYPE_UC; } else if (domain == SVM_RANGE_VRAM_DOMAIN) { - /* local HBM region close to partition */ + /* local HBM region close to partition + * FIXME: Temporarily using MTYPE_CC instead of MTYPE_RW where applicable. + * To force use of MTYPE_RW, set use_mtype_cc_wa=0 + */ if (bo_node == node) - mapping_flags |= AMDGPU_VM_MTYPE_RW; + mapping_flags |= amdgpu_use_mtype_cc_wa ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; /* local HBM region far from partition or remote XGMI GPU */ else if (svm_nodes_in_same_hive(bo_node, node)) mapping_flags |= AMDGPU_VM_MTYPE_NC; -- cgit From 1e4a00334add40f609162914af7a24bc92951008 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Tue, 21 Feb 2023 17:31:32 -0500 Subject: drm/amdgpu: Fix per-BO MTYPE selection for GFXv9.4.3 Treat system memory on NUMA systems as remote by default. Overriding with a more efficient MTYPE per page will be implemented in the next patch. No need for a special case for APP APUs. System memory is handled the same for carve-out and native mode. And VRAM doesn't exist in native mode. Signed-off-by: Felix Kuehling Reviewed-by: Philip Yang Reviewed-and-tested-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_svm.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 477ef9294203..4eec75b28917 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1151,6 +1151,7 @@ svm_range_get_pte_flags(struct kfd_node *node, bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN); bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT; bool uncached = flags & KFD_IOCTL_SVM_FLAG_UNCACHED; + unsigned int mtype_local; if (domain == SVM_RANGE_VRAM_DOMAIN) bo_node = prange->svm_bo->node; @@ -1191,19 +1192,16 @@ svm_range_get_pte_flags(struct kfd_node *node, } break; case IP_VERSION(9, 4, 3): - //TODO: Need more work for handling multiple memory partitions - //e.g. NPS4. Current approch is only applicable without memory - //partitions. + mtype_local = amdgpu_use_mtype_cc_wa ? AMDGPU_VM_MTYPE_CC : + AMDGPU_VM_MTYPE_RW; snoop = true; if (uncached) { mapping_flags |= AMDGPU_VM_MTYPE_UC; } else if (domain == SVM_RANGE_VRAM_DOMAIN) { - /* local HBM region close to partition - * FIXME: Temporarily using MTYPE_CC instead of MTYPE_RW where applicable. - * To force use of MTYPE_RW, set use_mtype_cc_wa=0 - */ - if (bo_node == node) - mapping_flags |= amdgpu_use_mtype_cc_wa ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW; + /* local HBM region close to partition */ + if (bo_node->adev == node->adev /* TODO: memory partitions && + bo_node->mem_id == node->mem_id*/) + mapping_flags |= mtype_local; /* local HBM region far from partition or remote XGMI GPU */ else if (svm_nodes_in_same_hive(bo_node, node)) mapping_flags |= AMDGPU_VM_MTYPE_NC; @@ -1212,7 +1210,13 @@ svm_range_get_pte_flags(struct kfd_node *node, mapping_flags |= AMDGPU_VM_MTYPE_UC; /* system memory accessed by the APU */ } else if (node->adev->flags & AMD_IS_APU) { - mapping_flags |= AMDGPU_VM_MTYPE_NC; + /* On NUMA systems, locality is determined per-page + * in amdgpu_gmc_override_vm_pte_flags + */ + if (num_possible_nodes() <= 1) + mapping_flags |= mtype_local; + else + mapping_flags |= AMDGPU_VM_MTYPE_NC; /* system memory accessed by the dGPU */ } else { mapping_flags |= AMDGPU_VM_MTYPE_UC; -- cgit From 76eb9c95a409ea820b2e7c968c220e7a38f27d76 Mon Sep 17 00:00:00 2001 From: David Francis Date: Mon, 27 Feb 2023 10:33:11 -0500 Subject: drm/amdgpu/bu: add mtype_local as a module parameter Selects the MTYPE to be used for local memory, (0 = MTYPE_CC (default), 1 = MTYPE_NC, 2 = MTYPE_RW) v2: squash in build fix (Alex) Reviewed-by: Graham Sider Signed-off-by: David Francis Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_svm.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 4eec75b28917..df0ed5677609 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1192,8 +1192,7 @@ svm_range_get_pte_flags(struct kfd_node *node, } break; case IP_VERSION(9, 4, 3): - mtype_local = amdgpu_use_mtype_cc_wa ? AMDGPU_VM_MTYPE_CC : - AMDGPU_VM_MTYPE_RW; + mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC : (amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_RW : AMDGPU_VM_MTYPE_CC); snoop = true; if (uncached) { mapping_flags |= AMDGPU_VM_MTYPE_UC; -- cgit From b9cbd51000ad3541351ca832b00600870ac08e5c Mon Sep 17 00:00:00 2001 From: Graham Sider Date: Mon, 6 Mar 2023 17:56:44 -0500 Subject: drm/amdgpu/bu: update mtype_local parameter settings Update mtype_local module parameter to use MTYPE_RW by default. 0: MTYPE_RW (default) 1: MTYPE_NC 2: MTYPE_CC Signed-off-by: Graham Sider Reviewed-by: Harish Kasiviswanathan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_svm.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index df0ed5677609..e6348d4133fd 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1192,7 +1192,8 @@ svm_range_get_pte_flags(struct kfd_node *node, } break; case IP_VERSION(9, 4, 3): - mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC : (amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_RW : AMDGPU_VM_MTYPE_CC); + mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC : + (amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW); snoop = true; if (uncached) { mapping_flags |= AMDGPU_VM_MTYPE_UC; -- cgit From 2046ed6c8aa951e4ae83c5022bb0a7c777386097 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 26 Jan 2023 18:45:32 -0500 Subject: drm/amdkfd: SVM range allocation support memory partition Pass kfd node->xcp->mem_id to amdgpu bo create parameter mem_id_plus1 to allocate new svm_bo on the specified memory partition. This is only for dGPU mode as we don't migrate with APU mode. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_svm.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index e6348d4133fd..62aa7fb2eaa5 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -555,16 +555,20 @@ svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange, bp.flags |= AMDGPU_GEM_CREATE_DISCARDABLE; bp.type = ttm_bo_type_device; bp.resv = NULL; + if (node->xcp) + bp.mem_id_plus1 = node->xcp->mem_id + 1; - /* TODO: Allocate memory from the right memory partition. We can sort - * out the details later, once basic memory partitioning is working - */ r = amdgpu_bo_create_user(node->adev, &bp, &ubo); if (r) { pr_debug("failed %d to create bo\n", r); goto create_bo_failed; } bo = &ubo->bo; + + pr_debug("alloc bo at offset 0x%lx size 0x%lx on partition %d\n", + bo->tbo.resource->start << PAGE_SHIFT, bp.size, + bp.mem_id_plus1 - 1); + r = amdgpu_bo_reserve(bo, true); if (r) { pr_debug("failed %d to reserve bo\n", r); -- cgit From dc12f9eddedb8b41f4dc948e5e636e5221fb4d43 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 2 Feb 2023 11:07:53 -0500 Subject: drm/amdkfd: Update MTYPE for far memory partition Use MTYPE RW/MTYPE_CC for mapping system memory or VRAM to KFD node within the same memory partition, use MTYPE_NC for mapping on KFD node from the far memory partition of the same socket or from another socket on same XGMI hive. On NPS4 or 4P system, MTYPE will be overridden per page depending on the memory NUMA node id and vm->mem_id. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_svm.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 62aa7fb2eaa5..a700d9ccd054 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1203,8 +1203,8 @@ svm_range_get_pte_flags(struct kfd_node *node, mapping_flags |= AMDGPU_VM_MTYPE_UC; } else if (domain == SVM_RANGE_VRAM_DOMAIN) { /* local HBM region close to partition */ - if (bo_node->adev == node->adev /* TODO: memory partitions && - bo_node->mem_id == node->mem_id*/) + if (bo_node->adev == node->adev && + (!bo_node->xcp || !node->xcp || bo_node->xcp->mem_id == node->xcp->mem_id)) mapping_flags |= mtype_local; /* local HBM region far from partition or remote XGMI GPU */ else if (svm_nodes_in_same_hive(bo_node, node)) @@ -1358,8 +1358,9 @@ svm_range_map_to_gpu(struct kfd_process_device *pdd, struct svm_range *prange, (last_domain == SVM_RANGE_VRAM_DOMAIN) ? 1 : 0, pte_flags); - /* TODO: we still need to determine the vm_manager.vram_base_offset based on - * the memory partition. + /* For dGPU mode, we use same vm_manager to allocate VRAM for + * different memory partition based on fpfn/lpfn, we should use + * same vm_manager.vram_base_offset regardless memory partition. */ r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, NULL, last_start, prange->start + i, -- cgit From 3ebfd221c1a83e5f0edadb87d173d8fd93d1d125 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Wed, 8 Mar 2023 11:57:00 -0500 Subject: drm/amdkfd: Store xcp partition id to amdgpu bo For memory accounting per compute partition and export drm amdgpu bo and then import to KFD, we need the xcp id to account the memory usage or find the KFD node of the original amdgpu bo to create the KFD bo on the correct adev KFD node. Set xcp_id_plus1 of amdgpu_bo_param to create bo and store xcp_id to amddgpu bo. Add helper macro to get the mem_id from adev and xcp_id. v2: squash in fix ("drm/amdgpu: Fix BO creation failure on GFX 9.4.3 dGPU") Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_svm.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index a700d9ccd054..45959892bc0f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -556,7 +556,7 @@ svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange, bp.type = ttm_bo_type_device; bp.resv = NULL; if (node->xcp) - bp.mem_id_plus1 = node->xcp->mem_id + 1; + bp.xcp_id_plus1 = node->xcp->id + 1; r = amdgpu_bo_create_user(node->adev, &bp, &ubo); if (r) { @@ -567,7 +567,7 @@ svm_range_vram_node_new(struct kfd_node *node, struct svm_range *prange, pr_debug("alloc bo at offset 0x%lx size 0x%lx on partition %d\n", bo->tbo.resource->start << PAGE_SHIFT, bp.size, - bp.mem_id_plus1 - 1); + bp.xcp_id_plus1 - 1); r = amdgpu_bo_reserve(bo, true); if (r) { -- cgit From 1c77527a69d5ca19cb276e2728992d922b687f35 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Mon, 20 Mar 2023 11:22:30 -0400 Subject: drm/amdkfd: Fix memory reporting on GFX 9.4.3 This patch fixes memory reporting on the GFX 9.4.3 APU and dGPU by reporting available memory on a per partition basis. If its an APU, available and used memory calculations take into account system and TTM memory. v2: squash in fix ("drm/amdkfd: Fix array out of bound warning") squash in fix ("drm/amdgpu: Update memory reporting for GFX9.4.3") Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_svm.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 45959892bc0f..c1ab70faf36e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -280,7 +280,7 @@ static void svm_range_free(struct svm_range *prange, bool update_mem_usage) if (update_mem_usage && !p->xnack_enabled) { pr_debug("unreserve prange 0x%p size: 0x%llx\n", prange, size); amdgpu_amdkfd_unreserve_mem_limit(NULL, size, - KFD_IOC_ALLOC_MEM_FLAGS_USERPTR); + KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0); } mutex_destroy(&prange->lock); mutex_destroy(&prange->migrate_mutex); @@ -313,7 +313,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, p = container_of(svms, struct kfd_process, svms); if (!p->xnack_enabled && update_mem_usage && amdgpu_amdkfd_reserve_mem_limit(NULL, size << PAGE_SHIFT, - KFD_IOC_ALLOC_MEM_FLAGS_USERPTR)) { + KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0)) { pr_info("SVM mapping failed, exceeds resident system memory limit\n"); kfree(prange); return NULL; @@ -3037,10 +3037,10 @@ svm_range_switch_xnack_reserve_mem(struct kfd_process *p, bool xnack_enabled) size = (pchild->last - pchild->start + 1) << PAGE_SHIFT; if (xnack_enabled) { amdgpu_amdkfd_unreserve_mem_limit(NULL, size, - KFD_IOC_ALLOC_MEM_FLAGS_USERPTR); + KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0); } else { r = amdgpu_amdkfd_reserve_mem_limit(NULL, size, - KFD_IOC_ALLOC_MEM_FLAGS_USERPTR); + KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0); if (r) goto out_unlock; reserved_size += size; @@ -3050,10 +3050,10 @@ svm_range_switch_xnack_reserve_mem(struct kfd_process *p, bool xnack_enabled) size = (prange->last - prange->start + 1) << PAGE_SHIFT; if (xnack_enabled) { amdgpu_amdkfd_unreserve_mem_limit(NULL, size, - KFD_IOC_ALLOC_MEM_FLAGS_USERPTR); + KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0); } else { r = amdgpu_amdkfd_reserve_mem_limit(NULL, size, - KFD_IOC_ALLOC_MEM_FLAGS_USERPTR); + KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0); if (r) goto out_unlock; reserved_size += size; @@ -3066,7 +3066,7 @@ out_unlock: if (r) amdgpu_amdkfd_unreserve_mem_limit(NULL, reserved_size, - KFD_IOC_ALLOC_MEM_FLAGS_USERPTR); + KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0); else /* Change xnack mode must be inside svms lock, to avoid race with * svm_range_deferred_list_work unreserve memory in parallel. -- cgit From 25f50704343de1bea70100ad41621b5737a6a96b Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 23 Mar 2023 08:45:56 -0400 Subject: drm/amdkfd: APU mode set max svm range pages svm_migrate_init set the max svm range pages based on the KFD nodes partition size. APU mode don't init pgmap because there is no migration. kgd2kfd_device_init calls svm_migrate_init after KFD nodes allocation and initialization. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_svm.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index c1ab70faf36e..206851c9e642 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1938,14 +1938,19 @@ void svm_range_set_max_pages(struct amdgpu_device *adev) { uint64_t max_pages; uint64_t pages, _pages; + uint64_t min_pages = 0; + int i; + + for (i = 0; i < adev->kfd.dev->num_nodes; i++) { + pages = KFD_XCP_MEMORY_SIZE(adev, adev->kfd.dev->nodes[i]->xcp->id) >> 17; + pages = clamp(pages, 1ULL << 9, 1ULL << 18); + pages = rounddown_pow_of_two(pages); + min_pages = min_not_zero(min_pages, pages); + } - /* 1/32 VRAM size in pages */ - pages = adev->gmc.real_vram_size >> 17; - pages = clamp(pages, 1ULL << 9, 1ULL << 18); - pages = rounddown_pow_of_two(pages); do { max_pages = READ_ONCE(max_svm_range_pages); - _pages = min_not_zero(max_pages, pages); + _pages = min_not_zero(max_pages, min_pages); } while (cmpxchg(&max_svm_range_pages, max_pages, _pages) != max_pages); } -- cgit From 45b3a914d40e63d2c9e3a3e02fb2014be975b9b0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 16 May 2023 17:16:30 -0400 Subject: drm/amdgpu/gmc9: fix 64 bit division in partition code Rework logic or use do_div() to avoid problems on 32 bit. v2: add a missing case for XCP macro v3: fix out of bounds array access v4: fix xcp handling harder Acked-by: Guchun Chen (v1) Reviewed-by: Mukul Joshi (v3) Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_svm.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 206851c9e642..b0f0d31bf3e6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1939,10 +1939,14 @@ void svm_range_set_max_pages(struct amdgpu_device *adev) uint64_t max_pages; uint64_t pages, _pages; uint64_t min_pages = 0; - int i; + int i, id; for (i = 0; i < adev->kfd.dev->num_nodes; i++) { - pages = KFD_XCP_MEMORY_SIZE(adev, adev->kfd.dev->nodes[i]->xcp->id) >> 17; + if (adev->kfd.dev->nodes[i]->xcp) + id = adev->kfd.dev->nodes[i]->xcp->id; + else + id = -1; + pages = KFD_XCP_MEMORY_SIZE(adev, id) >> 17; pages = clamp(pages, 1ULL << 9, 1ULL << 18); pages = rounddown_pow_of_two(pages); min_pages = min_not_zero(min_pages, pages); -- cgit From f2cd6b26922e68ffafd14a9128e20630296e430d Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Wed, 17 May 2023 15:04:35 -0400 Subject: drm/amdkfd: fix stack size in svm_range_validate_and_map MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allocate large local variable on heap to avoid exceeding the stack size: drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_svm.c: In function ‘svm_range_validate_and_map’: drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_svm.c:1690:1: warning: the frame size of 2360 bytes is larger than 2048 bytes [-Wframe-larger-than=] Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 56 +++++++++++++++++++++--------------- 1 file changed, 33 insertions(+), 23 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_svm.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index b0f0d31bf3e6..ee16130ddc75 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1554,48 +1554,54 @@ static int svm_range_validate_and_map(struct mm_struct *mm, struct svm_range *prange, int32_t gpuidx, bool intr, bool wait, bool flush_tlb) { - struct svm_validate_context ctx; + struct svm_validate_context *ctx; unsigned long start, end, addr; struct kfd_process *p; void *owner; int32_t idx; int r = 0; - ctx.process = container_of(prange->svms, struct kfd_process, svms); - ctx.prange = prange; - ctx.intr = intr; + ctx = kzalloc(sizeof(struct svm_validate_context), GFP_KERNEL); + if (!ctx) + return -ENOMEM; + ctx->process = container_of(prange->svms, struct kfd_process, svms); + ctx->prange = prange; + ctx->intr = intr; if (gpuidx < MAX_GPU_INSTANCE) { - bitmap_zero(ctx.bitmap, MAX_GPU_INSTANCE); - bitmap_set(ctx.bitmap, gpuidx, 1); - } else if (ctx.process->xnack_enabled) { - bitmap_copy(ctx.bitmap, prange->bitmap_aip, MAX_GPU_INSTANCE); + bitmap_zero(ctx->bitmap, MAX_GPU_INSTANCE); + bitmap_set(ctx->bitmap, gpuidx, 1); + } else if (ctx->process->xnack_enabled) { + bitmap_copy(ctx->bitmap, prange->bitmap_aip, MAX_GPU_INSTANCE); /* If prefetch range to GPU, or GPU retry fault migrate range to * GPU, which has ACCESS attribute to the range, create mapping * on that GPU. */ if (prange->actual_loc) { - gpuidx = kfd_process_gpuidx_from_gpuid(ctx.process, + gpuidx = kfd_process_gpuidx_from_gpuid(ctx->process, prange->actual_loc); if (gpuidx < 0) { WARN_ONCE(1, "failed get device by id 0x%x\n", prange->actual_loc); - return -EINVAL; + r = -EINVAL; + goto free_ctx; } if (test_bit(gpuidx, prange->bitmap_access)) - bitmap_set(ctx.bitmap, gpuidx, 1); + bitmap_set(ctx->bitmap, gpuidx, 1); } } else { - bitmap_or(ctx.bitmap, prange->bitmap_access, + bitmap_or(ctx->bitmap, prange->bitmap_access, prange->bitmap_aip, MAX_GPU_INSTANCE); } - if (bitmap_empty(ctx.bitmap, MAX_GPU_INSTANCE)) { - if (!prange->mapped_to_gpu) - return 0; + if (bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) { + if (!prange->mapped_to_gpu) { + r = 0; + goto free_ctx; + } - bitmap_copy(ctx.bitmap, prange->bitmap_access, MAX_GPU_INSTANCE); + bitmap_copy(ctx->bitmap, prange->bitmap_access, MAX_GPU_INSTANCE); } if (prange->actual_loc && !prange->ttm_res) { @@ -1603,15 +1609,16 @@ static int svm_range_validate_and_map(struct mm_struct *mm, * svm_migrate_ram_to_vram after allocating a BO. */ WARN_ONCE(1, "VRAM BO missing during validation\n"); - return -EINVAL; + r = -EINVAL; + goto free_ctx; } - svm_range_reserve_bos(&ctx); + svm_range_reserve_bos(ctx); p = container_of(prange->svms, struct kfd_process, svms); - owner = kfd_svm_page_owner(p, find_first_bit(ctx.bitmap, + owner = kfd_svm_page_owner(p, find_first_bit(ctx->bitmap, MAX_GPU_INSTANCE)); - for_each_set_bit(idx, ctx.bitmap, MAX_GPU_INSTANCE) { + for_each_set_bit(idx, ctx->bitmap, MAX_GPU_INSTANCE) { if (kfd_svm_page_owner(p, idx) != owner) { owner = NULL; break; @@ -1648,7 +1655,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm, } offset = (addr - start) >> PAGE_SHIFT; - r = svm_range_dma_map(prange, ctx.bitmap, offset, npages, + r = svm_range_dma_map(prange, ctx->bitmap, offset, npages, hmm_range->hmm_pfns); if (r) { pr_debug("failed %d to dma map range\n", r); @@ -1668,7 +1675,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm, } r = svm_range_map_to_gpus(prange, offset, npages, readonly, - ctx.bitmap, wait, flush_tlb); + ctx->bitmap, wait, flush_tlb); unlock_out: svm_range_unlock(prange); @@ -1682,11 +1689,14 @@ unlock_out: } unreserve_out: - svm_range_unreserve_bos(&ctx); + svm_range_unreserve_bos(ctx); if (!r) prange->validate_timestamp = ktime_get_boottime(); +free_ctx: + kfree(ctx); + return r; } -- cgit From c22b044070971e474dd0ff81a9830df93751f726 Mon Sep 17 00:00:00 2001 From: Alex Sierra Date: Mon, 29 May 2023 16:01:37 -0500 Subject: drm/amdkfd: flag added to handle errors from svm validate and map If a return error is raised during validation and mapping of a prange, this flag is set. It is a rare occurrence, but it could happen when `amdgpu_hmm_range_get_pages_done` returns true. In such cases, the caller should retry. However, it is important to ensure that the prange is updated correctly during the retry. Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_svm.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index ee16130ddc75..9c88d6e90c3b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -809,7 +809,7 @@ svm_range_is_same_attrs(struct kfd_process *p, struct svm_range *prange, } } - return true; + return !prange->is_error_flag; } /** @@ -1691,6 +1691,7 @@ unlock_out: unreserve_out: svm_range_unreserve_bos(ctx); + prange->is_error_flag = !!r; if (!r) prange->validate_timestamp = ktime_get_boottime(); -- cgit From 16cc3a221537bb3588ec2a568d7bd0e7972b25a8 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Wed, 31 May 2023 23:33:35 +0530 Subject: drm/amdgpu: Add function parameter 'event' to kdoc in svm_range_evict() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the following gcc with W=1: drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_svm.c:1841: warning: Function parameter or member 'event' not described in 'svm_range_evict' Cc: Felix Kuehling Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_svm.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 9c88d6e90c3b..615eab3f78c9 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1826,6 +1826,7 @@ out_reschedule: * @mm: current process mm_struct * @start: starting process queue number * @last: last process queue number + * @event: mmu notifier event when range is evicted or migrated * * Stop all queues of the process to ensure GPU doesn't access the memory, then * return to let CPU evict the buffer and proceed CPU pagetable update. -- cgit From ba3c87fffb79311f54464288c66421d19c2c1234 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 2 Jun 2023 12:58:05 -0400 Subject: amd/amdkfd: drop unused KFD_IOCTL_SVM_FLAG_UNCACHED flag Was leftover from GC 9.4.3 bring up and is currently unused. Drop it for now. Cc: Philip.Yang@amd.com Cc: rajneesh.bhardwaj@amd.com Cc: Felix.Kuehling@amd.com Reviewed-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_svm.c') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 615eab3f78c9..5ff1a5a89d96 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1154,7 +1154,7 @@ svm_range_get_pte_flags(struct kfd_node *node, uint64_t pte_flags; bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN); bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT; - bool uncached = flags & KFD_IOCTL_SVM_FLAG_UNCACHED; + bool uncached = false; /*flags & KFD_IOCTL_SVM_FLAG_UNCACHED;*/ unsigned int mtype_local; if (domain == SVM_RANGE_VRAM_DOMAIN) -- cgit