From 3af470cbcc9f40e47fe9b16882f60cd20b438095 Mon Sep 17 00:00:00 2001 From: Xiaogang Chen Date: Fri, 21 Apr 2023 13:35:01 -0500 Subject: drm/amdkfd: Fix an issue at userptr buffer validation process. amdgpu_ttm_tt_get_user_pages can fail(-EFAULT). If it failed mem has no associated hmm range or user_pages associated. Keep it at process_info->userptr_inval_list and mark mem->invalid until following scheduled attempts can valid it. Signed-off-by: Xiaogang Chen Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 83a83ced2439..4432e169fae8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -2445,7 +2445,9 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info, ret = -EAGAIN; goto unlock_out; } - mem->invalid = 0; + /* set mem valid if mem has hmm range associated */ + if (mem->range) + mem->invalid = 0; } unlock_out: @@ -2577,8 +2579,15 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i list_for_each_entry_safe(mem, tmp_mem, &process_info->userptr_inval_list, validate_list.head) { - bool valid = amdgpu_ttm_tt_get_user_pages_done( - mem->bo->tbo.ttm, mem->range); + bool valid; + + /* keep mem without hmm range at userptr_inval_list */ + if (!mem->range) + continue; + + /* Only check mem with hmm range associated */ + valid = amdgpu_ttm_tt_get_user_pages_done( + mem->bo->tbo.ttm, mem->range); mem->range = NULL; if (!valid) { @@ -2586,7 +2595,12 @@ static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_i ret = -EAGAIN; continue; } - WARN(mem->invalid, "Valid BO is marked invalid"); + + if (mem->invalid) { + WARN(1, "Valid BO is marked invalid"); + ret = -EAGAIN; + continue; + } list_move_tail(&mem->validate_list.head, &process_info->userptr_valid_list); -- cgit From 27fb73a0e3aa7478bcb5d2d59d65eec3c68fc165 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Tue, 25 Apr 2023 14:19:05 -0400 Subject: drm/amdkfd: Update KFD TTM mem limit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the helper function in TTM to get TTM memory limit and set KFD's internal mem limit. This ensures that KFD's TTM mem limit and actual TTM mem limit are exactly same. Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 4432e169fae8..c3990a5eb7c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -36,6 +36,7 @@ #include #include "amdgpu_xgmi.h" #include "kfd_smi_events.h" +#include /* Userptr restore delay, just long enough to allow consecutive VM * changes to accumulate @@ -110,13 +111,16 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) struct sysinfo si; uint64_t mem; + if (kfd_mem_limit.max_system_mem_limit) + return; + si_meminfo(&si); mem = si.freeram - si.freehigh; mem *= si.mem_unit; spin_lock_init(&kfd_mem_limit.mem_limit_lock); kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4); - kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3); + kfd_mem_limit.max_ttm_mem_limit = ttm_tt_pages_limit() << PAGE_SHIFT; pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n", (kfd_mem_limit.max_system_mem_limit >> 20), (kfd_mem_limit.max_ttm_mem_limit >> 20)); -- cgit From b9274387bc2a4cf54b02e039b6a0aef5dd5f2936 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 26 Apr 2023 22:07:43 -0400 Subject: drm/amdkfd: Don't trigger evictions unmapping dmabuf attachments Don't move DMABuf attachments for PCIe P2P mappings to the SYSTEM domain when unmapping. This avoids triggering eviction fences unnecessarily. Instead do the move to SYSTEM and back to GTT when mapping these attachments to ensure the SG table gets updated after evictions. This may still trigger unnecessary evictions if user mode unmaps and remaps the same BO. However, this is unlikely in real applications. Signed-off-by: Felix Kuehling Reviewed-by: Eric Huang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index c3990a5eb7c6..de6ba0d4b860 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -531,6 +531,12 @@ kfd_mem_dmamap_dmabuf(struct kfd_mem_attachment *attachment) { struct ttm_operation_ctx ctx = {.interruptible = true}; struct amdgpu_bo *bo = attachment->bo_va->base.bo; + int ret; + + amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); + ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + if (ret) + return ret; amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT); return ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); @@ -663,11 +669,10 @@ kfd_mem_dmaunmap_userptr(struct kgd_mem *mem, static void kfd_mem_dmaunmap_dmabuf(struct kfd_mem_attachment *attachment) { - struct ttm_operation_ctx ctx = {.interruptible = true}; - struct amdgpu_bo *bo = attachment->bo_va->base.bo; - - amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU); - ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); + /* This is a no-op. We don't want to trigger eviction fences when + * unmapping DMABufs. Therefore the invalidation (moving to system + * domain) is done in kfd_mem_dmamap_dmabuf. + */ } /** -- cgit From 8dc1db3172ae2f17ae71e33b608a33411ce8a1aa Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Wed, 14 Sep 2022 16:39:48 +0800 Subject: drm/amdkfd: Introduce kfd_node struct (v5) Introduce a new structure, kfd_node, which will now represent a compute node. kfd_node is carved out of kfd_dev structure. kfd_dev struct now will become the parent of kfd_node, and will store common resources such as doorbells, GTT sub-alloctor etc. kfd_node struct will store all resources specific to a compute node, such as device queue manager, interrupt handling etc. This is the first step in adding compute partition support in KFD. v2: introduce kfd_node struct to gc v11 (Hawking) v3: make reference to kfd_dev struct through kfd_node (Morris) v4: use kfd_node instead for kfd isr/mqd functions (Morris) v5: rebase (Alex) Signed-off-by: Mukul Joshi Tested-by: Amber Lin Reviewed-by: Felix Kuehling Signed-off-by: Hawking Zhang Signed-off-by: Morris Zhang Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index de6ba0d4b860..af37f2ef4438 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -35,6 +35,7 @@ #include "amdgpu_dma_buf.h" #include #include "amdgpu_xgmi.h" +#include "kfd_priv.h" #include "kfd_smi_events.h" #include -- cgit From 228ce176434b0f61451019065393040d58e1668d Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Fri, 27 Jan 2023 21:57:00 -0500 Subject: drm/amdgpu: Handle VRAM dependencies on GFXIP9.4.3 [For 1P NPS1 mode driver bringup] Changes required to initialize the amdgpu driver with frontdoor firmware loading and discovery=2 with the native mode SBIOS that enables CPU GPU unified interleaved memory. sudo modprobe amdgpu discovery=2 Once PSP TMR region is reported via the ACPI interface, the dependency on the ip_discovery.bin will be removed. Choice of where to allocate driver table is given to each IP version. In general, both GTT and VRAM domains will be considered. If one of the tables has a strict restriction for VRAM domain, then only VRAM domain is considered. Reviewed-by: Felix Kuehling (lijo: Modified the handling for SMU Tables) Signed-off-by: Lijo Lazar Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index af37f2ef4438..4e179e50de25 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -2292,8 +2292,9 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev, (*mem)->dmabuf = dma_buf; (*mem)->bo = bo; (*mem)->va = va; - (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? + (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) && !adev->gmc.is_app_apu ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT; + (*mem)->mapped_to_gpu_memory = 0; (*mem)->process_info = avm->process_info; add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false); -- cgit From 970c1646b5ac93a13496d3429aca3e799fa6cf07 Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Fri, 27 Jan 2023 21:48:06 -0500 Subject: drm/amdgpu: Create VRAM BOs on GTT for GFXIP9.4.3 On GFXIP9.4.3 APP APU where there is no dedicated VRAM domain handle VRAM BO allocation requests on CPU domain and validate them on GTT. Support for handling multi-socket and multi-numa partitions within a socket will be added by future patches, this enables 1P NPS1 asic bringup configuration. Reviewed-by: Felix Kuehling Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 4e179e50de25..bbdd5e3aa18e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1649,9 +1649,16 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( */ if (flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { domain = alloc_domain = AMDGPU_GEM_DOMAIN_VRAM; - alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; - alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ? + + if (adev->gmc.is_app_apu) { + domain = AMDGPU_GEM_DOMAIN_GTT; + alloc_domain = AMDGPU_GEM_DOMAIN_CPU; + alloc_flags = 0; + } else { + alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; + alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ? AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 0; + } } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT; alloc_flags = 0; @@ -1738,6 +1745,13 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( (*mem)->domain = domain; (*mem)->mapped_to_gpu_memory = 0; (*mem)->process_info = avm->process_info; + + if (adev->gmc.is_app_apu && + ((*mem)->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) { + bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; + bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; + } + add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr); if (user_addr) { -- cgit From fcfefd85f18a0004c7c7b499f0701fd2c76d4c68 Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Mon, 27 Feb 2023 20:08:29 -0500 Subject: drm/amdkfd: Native mode memory partition support For native mode, after amdgpu_bo is created on CPU domain, then call amdgpu_ttm_tt_set_mem_pool to select the TTM pool using bo->mem_id. ttm_bo_validate will allocate the memory to the correct memory partition before mapping to GPUs. Reviewed-by: Felix Kuehling Acked-and-tested-by: Mukul Joshi Signed-off-by: Philip Yang Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index bbdd5e3aa18e..59404b3e6b87 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1643,6 +1643,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( uint64_t aligned_size; u64 alloc_flags; int ret; + int mem_id = 0; /* Fixme : to be changed when mem_id support patch lands, until then NPS1, SPX only */ /* * Check on which domain to allocate BO @@ -1750,6 +1751,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( ((*mem)->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) { bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; + ret = amdgpu_ttm_tt_set_mem_pool(&bo->tbo, mem_id); + if (ret) { + pr_debug("failed to set ttm mem pool %d\n", ret); + goto err_set_mem_partition; + } } add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr); @@ -1778,6 +1784,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( allocate_init_user_pages_failed: err_pin_bo: remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info); +err_set_mem_partition: drm_vma_node_revoke(&gobj->vma_node, drm_priv); err_node_allow: /* Don't unreserve system mem limit twice */ -- cgit From e181be58ccc2ac48e4b79996c8dd6dd9f34fa4b5 Mon Sep 17 00:00:00 2001 From: Rajneesh Bhardwaj Date: Mon, 27 Feb 2023 13:17:14 -0500 Subject: drm/amdgpu: Fix xGMI access P2P mapping failure on GFXIP 9.4.3 On GFXIP 9.4.3, we dont need to rely on xGMI hive info to determine P2P access. Reviewed-by: Felix Kuehling Acked-and-tested-by: Mukul Joshi Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 59404b3e6b87..c4b949d17e14 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -814,7 +814,7 @@ static int kfd_mem_attach(struct amdgpu_device *adev, struct kgd_mem *mem, * if peer device has large BAR. In contrast, access over xGMI is * allowed for both small and large BAR configurations of peer device */ - if ((adev != bo_adev) && + if ((adev != bo_adev && !adev->gmc.is_app_apu) && ((mem->domain == AMDGPU_GEM_DOMAIN_VRAM) || (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) || (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_MMIO_REMAP))) { -- cgit From f915f3af9984464c308787102990d85d4e988d2c Mon Sep 17 00:00:00 2001 From: Harish Kasiviswanathan Date: Fri, 28 Apr 2023 14:20:00 -0400 Subject: drm/amdgpu: For GFX 9.4.3 APU fix vram_usage value For GFX 9.4.3 APP APU VRAM is allocated in GTT domain. While freeing memory check for GTT domain instead of VRAM if it is APP APU Signed-off-by: Harish Kasiviswanathan Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index c4b949d17e14..fd395cd61b54 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1886,11 +1886,14 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( } /* Update the size of the BO being freed if it was allocated from - * VRAM and is not imported. + * VRAM and is not imported. For APP APU VRAM allocations are done + * in GTT domain */ if (size) { - if ((mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM) && - (!is_imported)) + if (!is_imported && + (mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_VRAM || + (adev->gmc.is_app_apu && + mem->bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT))) *size = bo_size; else *size = 0; -- cgit From f24e924b7e8aba7b62671e7e1a19d83301a08597 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 26 Jan 2023 18:25:28 -0500 Subject: drm/amdgpu: Add memory partition mem_id to amdgpu_bo Add mem_id_plus1 parameter to amdgpu_gem_object_create and pass it to amdgpu_bo_create. For dGPU mode allocation, mem_id is used by VRAM manager to get the memory partition fpfn, lpfn from xcp manager. For APU native mode allocation, mem_id is used to get NUMA node id from xcp manager, then pass to TTM as numa pool id to alloc memory from the specific NUMA node. mem_id -1 means for entire VRAM or any NUMA nodes. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index fd395cd61b54..71f1e1990925 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -290,7 +290,7 @@ create_dmamap_sg_bo(struct amdgpu_device *adev, ret = amdgpu_gem_object_create(adev, mem->bo->tbo.base.size, 1, AMDGPU_GEM_DOMAIN_CPU, AMDGPU_GEM_CREATE_PREEMPTIBLE | flags, - ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj); + ttm_bo_type_sg, mem->bo->tbo.base.resv, &gem_obj, 0); amdgpu_bo_unreserve(mem->bo); @@ -1721,7 +1721,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( va, (*mem)->aql_queue ? size << 1 : size, domain_string(alloc_domain)); ret = amdgpu_gem_object_create(adev, aligned_size, 1, alloc_domain, alloc_flags, - bo_type, NULL, &gobj); + bo_type, NULL, &gobj, 0); if (ret) { pr_debug("Failed to create BO on domain %s. ret %d\n", domain_string(alloc_domain), ret); -- cgit From 53c5692e7a3c8e8eed3ec6b876a3c982d217a5d7 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Thu, 26 Jan 2023 18:50:09 -0500 Subject: drm/amdkfd: Alloc memory of GPU support memory partition For dGPU mode VRAM allocation, create amdgpu_bo from amdgpu_vm->mem_id, to alloc from the correct memory range. For APU mode VRAM allocation, set alloc domain to GTT, and set bp->mem_id_plus1 from amdgpu_vm->mem_id + 1 to create amdgpu_bo, to allocate system memory from correct NUMA node. For GTT allocation, use mem_id -1 to allocate system memory from any NUMA nodes. Remove amdgpu_ttm_tt_set_mem_pool, to avoid the confusion that memory maybe allocated from different mem_id. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 24 +++++++----------------- 1 file changed, 7 insertions(+), 17 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 71f1e1990925..c234dc0db799 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1641,9 +1641,9 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct drm_gem_object *gobj = NULL; u32 domain, alloc_domain; uint64_t aligned_size; + int8_t mem_id = -1; u64 alloc_flags; int ret; - int mem_id = 0; /* Fixme : to be changed when mem_id support patch lands, until then NPS1, SPX only */ /* * Check on which domain to allocate BO @@ -1653,13 +1653,14 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( if (adev->gmc.is_app_apu) { domain = AMDGPU_GEM_DOMAIN_GTT; - alloc_domain = AMDGPU_GEM_DOMAIN_CPU; + alloc_domain = AMDGPU_GEM_DOMAIN_GTT; alloc_flags = 0; } else { alloc_flags = AMDGPU_GEM_CREATE_VRAM_WIPE_ON_RELEASE; alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ? AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 0; } + mem_id = avm->mem_id; } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT; alloc_flags = 0; @@ -1717,11 +1718,12 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( goto err_reserve_limit; } - pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n", - va, (*mem)->aql_queue ? size << 1 : size, domain_string(alloc_domain)); + pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s mem_id %d\n", + va, (*mem)->aql_queue ? size << 1 : size, + domain_string(alloc_domain), mem_id); ret = amdgpu_gem_object_create(adev, aligned_size, 1, alloc_domain, alloc_flags, - bo_type, NULL, &gobj, 0); + bo_type, NULL, &gobj, mem_id + 1); if (ret) { pr_debug("Failed to create BO on domain %s. ret %d\n", domain_string(alloc_domain), ret); @@ -1747,17 +1749,6 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( (*mem)->mapped_to_gpu_memory = 0; (*mem)->process_info = avm->process_info; - if (adev->gmc.is_app_apu && - ((*mem)->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)) { - bo->allowed_domains = AMDGPU_GEM_DOMAIN_GTT; - bo->preferred_domains = AMDGPU_GEM_DOMAIN_GTT; - ret = amdgpu_ttm_tt_set_mem_pool(&bo->tbo, mem_id); - if (ret) { - pr_debug("failed to set ttm mem pool %d\n", ret); - goto err_set_mem_partition; - } - } - add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, user_addr); if (user_addr) { @@ -1784,7 +1775,6 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( allocate_init_user_pages_failed: err_pin_bo: remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info); -err_set_mem_partition: drm_vma_node_revoke(&gobj->vma_node, drm_priv); err_node_allow: /* Don't unreserve system mem limit twice */ -- cgit From 3ebfd221c1a83e5f0edadb87d173d8fd93d1d125 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Wed, 8 Mar 2023 11:57:00 -0500 Subject: drm/amdkfd: Store xcp partition id to amdgpu bo For memory accounting per compute partition and export drm amdgpu bo and then import to KFD, we need the xcp id to account the memory usage or find the KFD node of the original amdgpu bo to create the KFD bo on the correct adev KFD node. Set xcp_id_plus1 of amdgpu_bo_param to create bo and store xcp_id to amddgpu bo. Add helper macro to get the mem_id from adev and xcp_id. v2: squash in fix ("drm/amdgpu: Fix BO creation failure on GFX 9.4.3 dGPU") Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index c234dc0db799..8724a0be31b8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1634,6 +1634,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( uint64_t *offset, uint32_t flags, bool criu_resume) { struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv); + struct amdgpu_fpriv *fpriv = container_of(avm, struct amdgpu_fpriv, vm); enum ttm_bo_type bo_type = ttm_bo_type_device; struct sg_table *sg = NULL; uint64_t user_addr = 0; @@ -1641,7 +1642,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct drm_gem_object *gobj = NULL; u32 domain, alloc_domain; uint64_t aligned_size; - int8_t mem_id = -1; + int8_t xcp_id = -1; u64 alloc_flags; int ret; @@ -1660,7 +1661,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( alloc_flags |= (flags & KFD_IOC_ALLOC_MEM_FLAGS_PUBLIC) ? AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED : 0; } - mem_id = avm->mem_id; + xcp_id = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id; } else if (flags & KFD_IOC_ALLOC_MEM_FLAGS_GTT) { domain = alloc_domain = AMDGPU_GEM_DOMAIN_GTT; alloc_flags = 0; @@ -1718,12 +1719,12 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( goto err_reserve_limit; } - pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s mem_id %d\n", + pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s xcp_id %d\n", va, (*mem)->aql_queue ? size << 1 : size, - domain_string(alloc_domain), mem_id); + domain_string(alloc_domain), xcp_id); ret = amdgpu_gem_object_create(adev, aligned_size, 1, alloc_domain, alloc_flags, - bo_type, NULL, &gobj, mem_id + 1); + bo_type, NULL, &gobj, xcp_id + 1); if (ret) { pr_debug("Failed to create BO on domain %s. ret %d\n", domain_string(alloc_domain), ret); -- cgit From 1c77527a69d5ca19cb276e2728992d922b687f35 Mon Sep 17 00:00:00 2001 From: Mukul Joshi Date: Mon, 20 Mar 2023 11:22:30 -0400 Subject: drm/amdkfd: Fix memory reporting on GFX 9.4.3 This patch fixes memory reporting on the GFX 9.4.3 APU and dGPU by reporting available memory on a per partition basis. If its an APU, available and used memory calculations take into account system and TTM memory. v2: squash in fix ("drm/amdkfd: Fix array out of bound warning") squash in fix ("drm/amdgpu: Update memory reporting for GFX9.4.3") Signed-off-by: Mukul Joshi Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 81 ++++++++++++++++++------ 1 file changed, 63 insertions(+), 18 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 8724a0be31b8..cc37f04651e2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -157,12 +157,13 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size) * Return: returns -ENOMEM in case of error, ZERO otherwise */ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, - uint64_t size, u32 alloc_flag) + uint64_t size, u32 alloc_flag, int8_t xcp_id) { uint64_t reserved_for_pt = ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); size_t system_mem_needed, ttm_mem_needed, vram_needed; int ret = 0; + uint64_t vram_size = 0; system_mem_needed = 0; ttm_mem_needed = 0; @@ -177,6 +178,17 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, * 2M BO chunk. */ vram_needed = size; + /* + * For GFX 9.4.3, get the VRAM size from XCP structs + */ + if (WARN_ONCE(xcp_id < 0, "invalid XCP ID %d", xcp_id)) + return -EINVAL; + + vram_size = KFD_XCP_MEMORY_SIZE(adev, xcp_id); + if (adev->gmc.is_app_apu) { + system_mem_needed = size; + ttm_mem_needed = size; + } } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { system_mem_needed = size; } else if (!(alloc_flag & @@ -196,8 +208,8 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, kfd_mem_limit.max_system_mem_limit && !no_system_mem_limit) || (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > kfd_mem_limit.max_ttm_mem_limit) || - (adev && adev->kfd.vram_used + vram_needed > - adev->gmc.real_vram_size - reserved_for_pt)) { + (adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] + vram_needed > + vram_size - reserved_for_pt)) { ret = -ENOMEM; goto release; } @@ -207,9 +219,11 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, */ WARN_ONCE(vram_needed && !adev, "adev reference can't be null when vram is used"); - if (adev) { - adev->kfd.vram_used += vram_needed; - adev->kfd.vram_used_aligned += ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN); + if (adev && xcp_id >= 0) { + adev->kfd.vram_used[xcp_id] += vram_needed; + adev->kfd.vram_used_aligned[xcp_id] += adev->gmc.is_app_apu ? + vram_needed : + ALIGN(vram_needed, VRAM_AVAILABLITY_ALIGN); } kfd_mem_limit.system_mem_used += system_mem_needed; kfd_mem_limit.ttm_mem_used += ttm_mem_needed; @@ -220,7 +234,7 @@ release: } void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, - uint64_t size, u32 alloc_flag) + uint64_t size, u32 alloc_flag, int8_t xcp_id) { spin_lock(&kfd_mem_limit.mem_limit_lock); @@ -230,9 +244,19 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { WARN_ONCE(!adev, "adev reference can't be null when alloc mem flags vram is set"); + if (WARN_ONCE(xcp_id < 0, "invalid XCP ID %d", xcp_id)) + goto release; + if (adev) { - adev->kfd.vram_used -= size; - adev->kfd.vram_used_aligned -= ALIGN(size, VRAM_AVAILABLITY_ALIGN); + adev->kfd.vram_used[xcp_id] -= size; + if (adev->gmc.is_app_apu) { + adev->kfd.vram_used_aligned[xcp_id] -= size; + kfd_mem_limit.system_mem_used -= size; + kfd_mem_limit.ttm_mem_used -= size; + } else { + adev->kfd.vram_used_aligned[xcp_id] -= + ALIGN(size, VRAM_AVAILABLITY_ALIGN); + } } } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { kfd_mem_limit.system_mem_used -= size; @@ -242,8 +266,8 @@ void amdgpu_amdkfd_unreserve_mem_limit(struct amdgpu_device *adev, pr_err("%s: Invalid BO type %#x\n", __func__, alloc_flag); goto release; } - WARN_ONCE(adev && adev->kfd.vram_used < 0, - "KFD VRAM memory accounting unbalanced"); + WARN_ONCE(adev && xcp_id >= 0 && adev->kfd.vram_used[xcp_id] < 0, + "KFD VRAM memory accounting unbalanced for xcp: %d", xcp_id); WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0, "KFD TTM memory accounting unbalanced"); WARN_ONCE(kfd_mem_limit.system_mem_used < 0, @@ -259,7 +283,8 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo) u32 alloc_flags = bo->kfd_bo->alloc_flags; u64 size = amdgpu_bo_size(bo); - amdgpu_amdkfd_unreserve_mem_limit(adev, size, alloc_flags); + amdgpu_amdkfd_unreserve_mem_limit(adev, size, alloc_flags, + bo->xcp_id); kfree(bo->kfd_bo); } @@ -1609,23 +1634,42 @@ out_unlock: return ret; } -size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev) +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev, + uint8_t xcp_id) { uint64_t reserved_for_pt = ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); ssize_t available; + uint64_t vram_available, system_mem_available, ttm_mem_available; spin_lock(&kfd_mem_limit.mem_limit_lock); - available = adev->gmc.real_vram_size - - adev->kfd.vram_used_aligned + vram_available = KFD_XCP_MEMORY_SIZE(adev, xcp_id) + - adev->kfd.vram_used_aligned[xcp_id] - atomic64_read(&adev->vram_pin_size) - reserved_for_pt; + + if (adev->gmc.is_app_apu) { + system_mem_available = no_system_mem_limit ? + kfd_mem_limit.max_system_mem_limit : + kfd_mem_limit.max_system_mem_limit - + kfd_mem_limit.system_mem_used; + + ttm_mem_available = kfd_mem_limit.max_ttm_mem_limit - + kfd_mem_limit.ttm_mem_used; + + available = min3(system_mem_available, ttm_mem_available, + vram_available); + available = ALIGN_DOWN(available, PAGE_SIZE); + } else { + available = ALIGN_DOWN(vram_available, VRAM_AVAILABLITY_ALIGN); + } + spin_unlock(&kfd_mem_limit.mem_limit_lock); if (available < 0) available = 0; - return ALIGN_DOWN(available, VRAM_AVAILABLITY_ALIGN); + return available; } int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( @@ -1713,7 +1757,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( amdgpu_sync_create(&(*mem)->sync); - ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags); + ret = amdgpu_amdkfd_reserve_mem_limit(adev, aligned_size, flags, + xcp_id); if (ret) { pr_debug("Insufficient memory\n"); goto err_reserve_limit; @@ -1781,7 +1826,7 @@ err_node_allow: /* Don't unreserve system mem limit twice */ goto err_reserve_limit; err_bo_create: - amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags); + amdgpu_amdkfd_unreserve_mem_limit(adev, aligned_size, flags, xcp_id); err_reserve_limit: mutex_destroy(&(*mem)->lock); if (gobj) -- cgit From 837d4e071d250d695eba7a08c55c77f6a5b4bb5e Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Sat, 27 May 2023 22:24:53 +0530 Subject: drm/amdgpu: Fix create_dmamap_sg_bo kdoc warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix the following gcc with W=1: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c:292: warning: Cannot understand * @create_dmamap_sg_bo: Creates a amdgpu_bo object to reflect information Cc: Felix Kuehling Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index cc37f04651e2..23390b5932fb 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -290,8 +290,9 @@ void amdgpu_amdkfd_release_notify(struct amdgpu_bo *bo) } /** - * @create_dmamap_sg_bo: Creates a amdgpu_bo object to reflect information + * create_dmamap_sg_bo() - Creates a amdgpu_bo object to reflect information * about USERPTR or DOOREBELL or MMIO BO. + * * @adev: Device for which dmamap BO is being created * @mem: BO of peer device that is being DMA mapped. Provides parameters * in building the dmamap BO -- cgit From 932fc49479303961c1da54a1112eb26cdc890c76 Mon Sep 17 00:00:00 2001 From: Srinivasan Shanmugam Date: Mon, 29 May 2023 20:23:33 +0530 Subject: drm/amdgpu: Fix missing parameter desc for 'xcp_id' in amdgpu_amdkfd_reserve_mem_limit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix these warnings by adding 'xcp_id' argument. gcc with W=1 drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c:160: warning: Function parameter or member 'xcp_id' not described in 'amdgpu_amdkfd_reserve_mem_limit' Cc: Christian König Cc: Alex Deucher Signed-off-by: Srinivasan Shanmugam Acked-by: Alex Deucher Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 23390b5932fb..f61527b800e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -153,8 +153,11 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size) * @size: Size of buffer, in bytes, encapsulated by B0. This should be * equivalent to amdgpu_bo_size(BO) * @alloc_flag: Flag used in allocating a BO as noted above + * @xcp_id: xcp_id is used to get xcp from xcp manager, one xcp is + * managed as one compute node in driver for app * - * Return: returns -ENOMEM in case of error, ZERO otherwise + * Return: + * returns -ENOMEM in case of error, ZERO otherwise */ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 alloc_flag, int8_t xcp_id) -- cgit