aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
diff options
context:
space:
mode:
authorSean Paul <seanpaul@chromium.org>2017-08-18 10:52:44 -0400
committerSean Paul <seanpaul@chromium.org>2017-08-18 10:52:44 -0400
commit0e8841ec7ee5b1ffe416c3be7743985b1896ec00 (patch)
tree9e502f1f39c740ff7417e5078cbda6eedac1c572 /drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
parent36436f4e933b42616c8e9ba4907dccf1329cb318 (diff)
parent8824c751eb61ebffb053c291199932845bac88b4 (diff)
downloadlinux-0e8841ec7ee5b1ffe416c3be7743985b1896ec00.tar.gz
linux-0e8841ec7ee5b1ffe416c3be7743985b1896ec00.tar.bz2
linux-0e8841ec7ee5b1ffe416c3be7743985b1896ec00.zip
Merge airlied/drm-next into drm-misc-next
Archit requested this backmerge to facilitate merging some patches depending on changes between -rc2 & -rc5 Signed-off-by: Sean Paul <seanpaul@chromium.org>
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c102
1 files changed, 78 insertions, 24 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 5599c01b265d..c05479ec825a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -54,7 +54,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
*offset = data->offset;
- drm_gem_object_unreference_unlocked(gobj);
+ drm_gem_object_put_unlocked(gobj);
if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) {
amdgpu_bo_unref(&p->uf_entry.robj);
@@ -90,7 +90,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
}
/* get chunks */
- chunk_array_user = (uint64_t __user *)(uintptr_t)(cs->in.chunks);
+ chunk_array_user = u64_to_user_ptr(cs->in.chunks);
if (copy_from_user(chunk_array, chunk_array_user,
sizeof(uint64_t)*cs->in.num_chunks)) {
ret = -EFAULT;
@@ -110,7 +110,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
struct drm_amdgpu_cs_chunk user_chunk;
uint32_t __user *cdata;
- chunk_ptr = (void __user *)(uintptr_t)chunk_array[i];
+ chunk_ptr = u64_to_user_ptr(chunk_array[i]);
if (copy_from_user(&user_chunk, chunk_ptr,
sizeof(struct drm_amdgpu_cs_chunk))) {
ret = -EFAULT;
@@ -121,7 +121,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
p->chunks[i].length_dw = user_chunk.length_dw;
size = p->chunks[i].length_dw;
- cdata = (void __user *)(uintptr_t)user_chunk.chunk_data;
+ cdata = u64_to_user_ptr(user_chunk.chunk_data);
p->chunks[i].kdata = kvmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
if (p->chunks[i].kdata == NULL) {
@@ -223,10 +223,11 @@ static s64 bytes_to_us(struct amdgpu_device *adev, u64 bytes)
* ticks. The accumulated microseconds (us) are converted to bytes and
* returned.
*/
-static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
+static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
+ u64 *max_bytes,
+ u64 *max_vis_bytes)
{
s64 time_us, increment_us;
- u64 max_bytes;
u64 free_vram, total_vram, used_vram;
/* Allow a maximum of 200 accumulated ms. This is basically per-IB
@@ -238,8 +239,11 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
*/
const s64 us_upper_bound = 200000;
- if (!adev->mm_stats.log2_max_MBps)
- return 0;
+ if (!adev->mm_stats.log2_max_MBps) {
+ *max_bytes = 0;
+ *max_vis_bytes = 0;
+ return;
+ }
total_vram = adev->mc.real_vram_size - adev->vram_pin_size;
used_vram = atomic64_read(&adev->vram_usage);
@@ -280,23 +284,45 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us);
}
- /* This returns 0 if the driver is in debt to disallow (optional)
+ /* This is set to 0 if the driver is in debt to disallow (optional)
* buffer moves.
*/
- max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
+ *max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);
+
+ /* Do the same for visible VRAM if half of it is free */
+ if (adev->mc.visible_vram_size < adev->mc.real_vram_size) {
+ u64 total_vis_vram = adev->mc.visible_vram_size;
+ u64 used_vis_vram = atomic64_read(&adev->vram_vis_usage);
+
+ if (used_vis_vram < total_vis_vram) {
+ u64 free_vis_vram = total_vis_vram - used_vis_vram;
+ adev->mm_stats.accum_us_vis = min(adev->mm_stats.accum_us_vis +
+ increment_us, us_upper_bound);
+
+ if (free_vis_vram >= total_vis_vram / 2)
+ adev->mm_stats.accum_us_vis =
+ max(bytes_to_us(adev, free_vis_vram / 2),
+ adev->mm_stats.accum_us_vis);
+ }
+
+ *max_vis_bytes = us_to_bytes(adev, adev->mm_stats.accum_us_vis);
+ } else {
+ *max_vis_bytes = 0;
+ }
spin_unlock(&adev->mm_stats.lock);
- return max_bytes;
}
/* Report how many bytes have really been moved for the last command
* submission. This can result in a debt that can stop buffer migrations
* temporarily.
*/
-void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes)
+void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
+ u64 num_vis_bytes)
{
spin_lock(&adev->mm_stats.lock);
adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
+ adev->mm_stats.accum_us_vis -= bytes_to_us(adev, num_vis_bytes);
spin_unlock(&adev->mm_stats.lock);
}
@@ -304,7 +330,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
struct amdgpu_bo *bo)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- u64 initial_bytes_moved;
+ u64 initial_bytes_moved, bytes_moved;
uint32_t domain;
int r;
@@ -314,17 +340,35 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
/* Don't move this buffer if we have depleted our allowance
* to move it. Don't move anything if the threshold is zero.
*/
- if (p->bytes_moved < p->bytes_moved_threshold)
- domain = bo->prefered_domains;
- else
+ if (p->bytes_moved < p->bytes_moved_threshold) {
+ if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+ (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
+ /* And don't move a CPU_ACCESS_REQUIRED BO to limited
+ * visible VRAM if we've depleted our allowance to do
+ * that.
+ */
+ if (p->bytes_moved_vis < p->bytes_moved_vis_threshold)
+ domain = bo->preferred_domains;
+ else
+ domain = bo->allowed_domains;
+ } else {
+ domain = bo->preferred_domains;
+ }
+ } else {
domain = bo->allowed_domains;
+ }
retry:
amdgpu_ttm_placement_from_domain(bo, domain);
initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
- p->bytes_moved += atomic64_read(&adev->num_bytes_moved) -
- initial_bytes_moved;
+ bytes_moved = atomic64_read(&adev->num_bytes_moved) -
+ initial_bytes_moved;
+ p->bytes_moved += bytes_moved;
+ if (adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+ bo->tbo.mem.mem_type == TTM_PL_VRAM &&
+ bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT)
+ p->bytes_moved_vis += bytes_moved;
if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
domain = bo->allowed_domains;
@@ -350,7 +394,8 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
struct amdgpu_bo_list_entry *candidate = p->evictable;
struct amdgpu_bo *bo = candidate->robj;
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
- u64 initial_bytes_moved;
+ u64 initial_bytes_moved, bytes_moved;
+ bool update_bytes_moved_vis;
uint32_t other;
/* If we reached our current BO we can forget it */
@@ -370,10 +415,17 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,
/* Good we can try to move this BO somewhere else */
amdgpu_ttm_placement_from_domain(bo, other);
+ update_bytes_moved_vis =
+ adev->mc.visible_vram_size < adev->mc.real_vram_size &&
+ bo->tbo.mem.mem_type == TTM_PL_VRAM &&
+ bo->tbo.mem.start < adev->mc.visible_vram_size >> PAGE_SHIFT;
initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
- p->bytes_moved += atomic64_read(&adev->num_bytes_moved) -
+ bytes_moved = atomic64_read(&adev->num_bytes_moved) -
initial_bytes_moved;
+ p->bytes_moved += bytes_moved;
+ if (update_bytes_moved_vis)
+ p->bytes_moved_vis += bytes_moved;
if (unlikely(r))
break;
@@ -554,8 +606,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
list_splice(&need_pages, &p->validated);
}
- p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev);
+ amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
+ &p->bytes_moved_vis_threshold);
p->bytes_moved = 0;
+ p->bytes_moved_vis = 0;
p->evictable = list_last_entry(&p->validated,
struct amdgpu_bo_list_entry,
tv.head);
@@ -579,8 +633,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
goto error_validate;
}
- amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved);
-
+ amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
+ p->bytes_moved_vis);
fpriv->vm.last_eviction_counter =
atomic64_read(&p->adev->num_evictions);
@@ -1383,7 +1437,7 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
if (fences == NULL)
return -ENOMEM;
- fences_user = (void __user *)(uintptr_t)(wait->in.fences);
+ fences_user = u64_to_user_ptr(wait->in.fences);
if (copy_from_user(fences, fences_user,
sizeof(struct drm_amdgpu_fence) * fence_count)) {
r = -EFAULT;