Merge airlied/drm-next into drm-misc-next

Archit requested this backmerge to facilitate merging some patches depending on changes between -rc2 & -rc5 Signed-off-by: Sean Paul <seanpaul@chromium.org>
author: Sean Paul <seanpaul@chromium.org> 2017-08-18 10:52:44 -0400
committer: Sean Paul <seanpaul@chromium.org> 2017-08-18 10:52:44 -0400
commit: 0e8841ec7ee5b1ffe416c3be7743985b1896ec00 (patch)
tree: 9e502f1f39c740ff7417e5078cbda6eedac1c572 /drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
parent: 36436f4e933b42616c8e9ba4907dccf1329cb318 (diff)
parent: 8824c751eb61ebffb053c291199932845bac88b4 (diff)
download: linux-0e8841ec7ee5b1ffe416c3be7743985b1896ec00.tar.gz
linux-0e8841ec7ee5b1ffe416c3be7743985b1896ec00.tar.bz2
linux-0e8841ec7ee5b1ffe416c3be7743985b1896ec00.zip
1 files changed, 78 insertions, 24 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 5599c01b265d..c05479ec825a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -54,7 +54,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 
 	*offset = data->offset;
 
-	drm_gem_object_unreference_unlocked(gobj);
+	drm_gem_object_put_unlocked(gobj);
 
 	if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) {
 		amdgpu_bo_unref(&p->uf_entry.robj);
@@ -90,7 +90,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 	}
 
 	/* get chunks */
-	chunk_array_user = (uint64_t __user *)(uintptr_t)(cs->in.chunks);
+	chunk_array_user = u64_to_user_ptr(cs->in.chunks);
 	if (copy_from_user(chunk_array, chunk_array_user,
 			   sizeof(uint64_t)*cs->in.num_chunks)) {
 		ret = -EFAULT;
@@ -110,7 +110,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 		struct drm_amdgpu_cs_chunk user_chunk;
 		uint32_t __user *cdata;
 
-		chunk_ptr = (void __user *)(uintptr_t)chunk_array[i];
+		chunk_ptr = u64_to_user_ptr(chunk_array[i]);
 		if (copy_from_user(&user_chunk, chunk_ptr,
 				       sizeof(struct drm_amdgpu_cs_chunk))) {
 			ret = -EFAULT;
@@ -121,7 +121,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 		p->chunks[i].length_dw = user_chunk.length_dw;
 
 		size = p->chunks[i].length_dw;
-		cdata = (void __user *)(uintptr_t)user_chunk.chunk_data;
+		cdata = u64_to_user_ptr(user_chunk.chunk_data);
 
 		p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
 		if (p->chunks[i].kdata == NULL) {
@@ -223,10 +223,11 @@ static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)
  * ticks. The accumulated microseconds (us) are converted to bytes and
  * returned.
  */
-static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
+static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
+					      u64 *max_bytes,
+					      u64 *max_vis_bytes)
 {
 	s64 time_us, increment_us;
-	u64 max_bytes;
 	u64 free_vram, total_vram, used_vram;
 
 	/* Allow a maximum of 200 accumulated ms. This is basically per-IB
@@ -238,8 +239,11 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
 	 */
 	const s64 us_upper_bound = 200000;
 
-	if (!adev->mm_stats.log2_max_MBps)
-		return 0;
+	if (!adev->mm_stats.log2_max_MBps) {
+		*max_bytes = 0;
+		*max_vis_bytes = 0;
+		return;
+	}
 
 	total_vram = adev->mc.real_vram_size - adev->vram_pin_size;
 	used_vram = atomic64_read(&adev->vram_usage);
@@ -280,23 +284,45 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
 		adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);
 	}
 
-	/* This returns 0 if the driver is in debt to disallow (optional)
+	/* This is set to 0 if the driver is in debt to disallow (optional)
 	 * buffer moves.
 	 */
-	max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
+	*max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
+
+	/* Do the same for visible VRAM if half of it is free */
+	if (adev->mc.visible_vram_size < adev->mc.real_vram_size) {
+		u64 total_vis_vram = adev->mc.visible_vram_size;
+		u64 used_vis_vram = atomic64_read(&adev->vram_vis_usage);
+
+		if (used_vis_vram < total_vis_vram) {
+			u64 free_vis_vram = total_vis_vram - used_vis_vram;
+			adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +
+							  increment_us, us_upper_bound);
+
+			if (free_vis_vram >= total_vis_vram / 2)
+				adev->mm_stats.accum_us_vis =
+					max(bytes_to_us(adev, free_vis_vram / 2),
+					    adev->mm_stats.accum_us_vis);
+		}
+
+		*max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis);
+	} else {
+		*max_vis_bytes = 0;
+	}
 
 	spin_unlock(&adev->mm_stats.lock);
-	return max_bytes;
 }
 
 /* Report how many bytes have really been moved for the last command
  * submission. This can result in a debt that can stop buffer migrations
  * temporarily.
  */
-void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes)
+void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
+				  u64 num_vis_bytes)
 {
 	spin_lock(&adev->mm_stats.lock);
 	adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
+	adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes);
 	spin_unlock(&adev->mm_stats.lock);
 }
 
@@ -304,7 +330,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
 				 struct amdgpu_bo *bo)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-	u64 initial_bytes_moved;
+	u64 initial_bytes_moved, bytes_moved;
 	uint32_t domain;
 	int r;
 
@@ -314,17 +340,35 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
 	/* Don't move this buffer if we have depleted our allowance
 	 * to move it. Don't move anything if the threshold is zero.
 	 */
-	if (p->bytes_moved < p->bytes_moved_threshold)
-		domain = bo->prefered_domains;
-	else
+	if (p->bytes_moved < p->bytes_moved_threshold) {
+		if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+		    (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
+			/* And don't move a CPU_ACCESS_REQUIRED BO to limited
+			 * visible VRAM if we've depleted our allowance to do
+			 * that.
+			 */
+			if (p->bytes_moved_vis < p->bytes_moved_vis_threshold)
+				domain = bo->preferred_domains;
+			else
+				domain = bo->allowed_domains;
+		} else {
+			domain = bo->preferred_domains;
+		}
+	} else {
 		domain = bo->allowed_domains;
+	}
 
 retry:
 	amdgpu_ttm_placement_from_domain(bo, domain);
 	initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
 	r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
-	p->bytes_moved += atomic64_read(&adev->num_bytes_moved) -
-		initial_bytes_moved;
+	bytes_moved = atomic64_read(&adev->num_bytes_moved) -
+		      initial_bytes_moved;
+	p->bytes_moved += bytes_moved;
+	if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+	    bo->tbo.mem.mem_type == TTM_PL_VRAM &&
+	    bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT)
+		p->bytes_moved_vis += bytes_moved;
 
 	if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
 		domain = bo->allowed_domains;
@@ -350,7 +394,8 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
 		struct amdgpu_bo_list_entry *candidate = p->evictable;
 		struct amdgpu_bo *bo = candidate->robj;
 		struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-		u64 initial_bytes_moved;
+		u64 initial_bytes_moved, bytes_moved;
+		bool update_bytes_moved_vis;
 		uint32_t other;
 
 		/* If we reached our current BO we can forget it */
@@ -370,10 +415,17 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
 
 		/* Good we can try to move this BO somewhere else */
 		amdgpu_ttm_placement_from_domain(bo, other);
+		update_bytes_moved_vis =
+			adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+			bo->tbo.mem.mem_type == TTM_PL_VRAM &&
+			bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT;
 		initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
 		r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
-		p->bytes_moved += atomic64_read(&adev->num_bytes_moved) -
+		bytes_moved = atomic64_read(&adev->num_bytes_moved) -
 			initial_bytes_moved;
+		p->bytes_moved += bytes_moved;
+		if (update_bytes_moved_vis)
+			p->bytes_moved_vis += bytes_moved;
 
 		if (unlikely(r))
 			break;
@@ -554,8 +606,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 		list_splice(&need_pages, &p->validated);
 	}
 
-	p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev);
+	amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
+					  &p->bytes_moved_vis_threshold);
 	p->bytes_moved = 0;
+	p->bytes_moved_vis = 0;
 	p->evictable = list_last_entry(&p->validated,
 				       struct amdgpu_bo_list_entry,
 				       tv.head);
@@ -579,8 +633,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 		goto error_validate;
 	}
 
-	amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved);
-
+	amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
+				     p->bytes_moved_vis);
 	fpriv->vm.last_eviction_counter =
 		atomic64_read(&p->adev->num_evictions);
 
@@ -1383,7 +1437,7 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
 	if (fences == NULL)
 		return -ENOMEM;
 
-	fences_user = (void __user *)(uintptr_t)(wait->in.fences);
+	fences_user = u64_to_user_ptr(wait->in.fences);
 	if (copy_from_user(fences, fences_user,
 		sizeof(struct drm_amdgpu_fence) * fence_count)) {
 		r = -EFAULT;
author	Sean Paul <seanpaul@chromium.org>	2017-08-18 10:52:44 -0400
committer	Sean Paul <seanpaul@chromium.org>	2017-08-18 10:52:44 -0400
commit	0e8841ec7ee5b1ffe416c3be7743985b1896ec00 (patch)
tree	9e502f1f39c740ff7417e5078cbda6eedac1c572 /drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
parent	36436f4e933b42616c8e9ba4907dccf1329cb318 (diff)
parent	8824c751eb61ebffb053c291199932845bac88b4 (diff)
download	linux-0e8841ec7ee5b1ffe416c3be7743985b1896ec00.tar.gz linux-0e8841ec7ee5b1ffe416c3be7743985b1896ec00.tar.bz2 linux-0e8841ec7ee5b1ffe416c3be7743985b1896ec00.zip