aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
diff options
context:
space:
mode:
authorChristian König <christian.koenig@amd.com>2022-11-10 12:31:41 +0100
committerAlex Deucher <alexander.deucher@amd.com>2022-11-17 00:23:30 -0500
commitfec8fdb54e8f74d88951c9f998f47bf4f2031fe0 (patch)
tree6488c4cbf8dedf902da012af243f8156d34a7be2 /drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
parent631945e04e1e243a503089f4487cad797476e8ca (diff)
downloadlinux-fec8fdb54e8f74d88951c9f998f47bf4f2031fe0.tar.gz
linux-fec8fdb54e8f74d88951c9f998f47bf4f2031fe0.tar.bz2
linux-fec8fdb54e8f74d88951c9f998f47bf4f2031fe0.zip
drm/amdgpu: fix userptr HMM range handling v2
The basic problem here is that it's not allowed to page fault while holding the reservation lock. So it can happen that multiple processes try to validate an userptr at the same time. Work around that by putting the HMM range object into the mutex protected bo list for now. v2: make sure range is set to NULL in case of an error Signed-off-by: Christian König <christian.koenig@amd.com> Reviewed-by: Alex Deucher <alexander.deucher@amd.com> Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com> CC: stable@vger.kernel.org Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c12
1 files changed, 8 insertions, 4 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 68741b157153..e44d740022bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -938,6 +938,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
struct amdkfd_process_info *process_info = mem->process_info;
struct amdgpu_bo *bo = mem->bo;
struct ttm_operation_ctx ctx = { true, false };
+ struct hmm_range *range;
int ret = 0;
mutex_lock(&process_info->lock);
@@ -967,7 +968,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
return 0;
}
- ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
+ ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, &range);
if (ret) {
pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
goto unregister_out;
@@ -985,7 +986,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
amdgpu_bo_unreserve(bo);
release_out:
- amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+ amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range);
unregister_out:
if (ret)
amdgpu_mn_unregister(bo);
@@ -2317,6 +2318,8 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
/* Go through userptr_inval_list and update any invalid user_pages */
list_for_each_entry(mem, &process_info->userptr_inval_list,
validate_list.head) {
+ struct hmm_range *range;
+
invalid = atomic_read(&mem->invalid);
if (!invalid)
/* BO hasn't been invalidated since the last
@@ -2327,7 +2330,8 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
bo = mem->bo;
/* Get updated user pages */
- ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
+ ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages,
+ &range);
if (ret) {
pr_debug("Failed %d to get user pages\n", ret);
@@ -2346,7 +2350,7 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
* FIXME: Cannot ignore the return code, must hold
* notifier_lock
*/
- amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+ amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range);
}
/* Mark the BO as valid unless it was invalidated