From 9e089a29c696d86d26e79737bafbce94738fb462 Mon Sep 17 00:00:00 2001 From: yu kuai Date: Wed, 13 Nov 2019 20:44:31 +0800 Subject: drm/amdgpu: remove set but not used variable 'invalid' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes gcc '-Wunused-but-set-variable' warning: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c: In function ‘amdgpu_amdkfd_evict_userptr’: drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c:1665:6: warning: variable ‘invalid’ set but not used [-Wunused-but-set-variable] 'invalid' is never used, so can be removed. Thus 'atomic_inc_return' can be replaced as 'atomic_inc' Fixes: 5ae0283e831a ("drm/amdgpu: Add userptr support for KFD") Signed-off-by: yu kuai Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index ae6f5446262c..a1ed8a8e3752 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1662,10 +1662,10 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm) { struct amdkfd_process_info *process_info = mem->process_info; - int invalid, evicted_bos; + int evicted_bos; int r = 0; - invalid = atomic_inc_return(&mem->invalid); + atomic_inc(&mem->invalid); evicted_bos = atomic_inc_return(&process_info->evicted_bos); if (evicted_bos == 1) { /* First eviction, stop the queues */ -- cgit From b72ff1909cf3267e48b905e5bdd6e7e41d0a8593 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Mon, 25 Nov 2019 16:25:35 -0500 Subject: drm/amdgpu: Raise KFD unpinned system memory limit Allow KFD applications to use more unpinned system memory through HMM. Signed-off-by: Felix Kuehling Reviewed-by: Yong Zhao Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index a1ed8a8e3752..c2f92a7b56e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -85,7 +85,7 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm, } /* Set memory usage limits. Current, limits are - * System (TTM + userptr) memory - 3/4th System RAM + * System (TTM + userptr) memory - 15/16th System RAM * TTM memory - 3/8th System RAM */ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) @@ -98,7 +98,7 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) mem *= si.mem_unit; spin_lock_init(&kfd_mem_limit.mem_limit_lock); - kfd_mem_limit.max_system_mem_limit = (mem >> 1) + (mem >> 2); + kfd_mem_limit.max_system_mem_limit = mem - (mem >> 4); kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3); pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n", (kfd_mem_limit.max_system_mem_limit >> 20), -- cgit From 9f890f3044c79d18bce1f3a3f75037b93e71b880 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Mon, 15 Jul 2019 16:18:03 -0400 Subject: drm/amdgpu: Optimize KFD page table reservation Be less pessimistic about estimated page table use for KFD. Most allocations use 2MB pages and therefore need less VRAM for page tables. This allows more VRAM to be used for applications especially on large systems with many GPUs and hundreds of GB of system memory. Example: 8 GPUs with 32GB VRAM each + 256GB system memory = 512GB Old page table reservation per GPU: 1GB New page table reservation per GPU: 32MB Signed-off-by: Felix Kuehling Reviewed-by: xinhui pan Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index c2f92a7b56e4..b6d1958d514f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -105,11 +105,24 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) (kfd_mem_limit.max_ttm_mem_limit >> 20)); } +/* Estimate page table size needed to represent a given memory size + * + * With 4KB pages, we need one 8 byte PTE for each 4KB of memory + * (factor 512, >> 9). With 2MB pages, we need one 8 byte PTE for 2MB + * of memory (factor 256K, >> 18). ROCm user mode tries to optimize + * for 2MB pages for TLB efficiency. However, small allocations and + * fragmented system memory still need some 4KB pages. We choose a + * compromise that should work in most cases without reserving too + * much memory for page tables unnecessarily (factor 16K, >> 14). + */ +#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14) + static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, uint64_t size, u32 domain, bool sg) { + uint64_t reserved_for_pt = + ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed; - uint64_t reserved_for_pt = amdgpu_amdkfd_total_mem_size >> 9; int ret = 0; acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, -- cgit From e095fc17bbd216ccac7fe06132067ae6e91f01c3 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 29 Nov 2019 11:33:54 +0100 Subject: drm/amdgpu: explicitely sync to VM updates v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Allows us to reduce the overhead while syncing to fences a bit. v2: also drop adev parameter from the functions Signed-off-by: Christian König Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index b6d1958d514f..d8db5ecdf9c1 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -358,7 +358,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) if (ret) return ret; - return amdgpu_sync_fence(NULL, sync, vm->last_update, false); + return amdgpu_sync_fence(sync, vm->last_update, false); } static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem) @@ -751,7 +751,7 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev, amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update); - amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false); + amdgpu_sync_fence(sync, bo_va->last_pt_update, false); return 0; } @@ -770,7 +770,7 @@ static int update_gpuvm_pte(struct amdgpu_device *adev, return ret; } - return amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false); + return amdgpu_sync_fence(sync, bo_va->last_pt_update, false); } static int map_bo_to_gpuvm(struct amdgpu_device *adev, @@ -2045,7 +2045,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) pr_debug("Memory eviction: Validate BOs failed. Try again\n"); goto validate_map_fail; } - ret = amdgpu_sync_fence(NULL, &sync_obj, bo->tbo.moving, false); + ret = amdgpu_sync_fence(&sync_obj, bo->tbo.moving, false); if (ret) { pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); goto validate_map_fail; -- cgit