aboutsummaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/v3d
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/v3d')
-rw-r--r--drivers/gpu/drm/v3d/Makefile3
-rw-r--r--drivers/gpu/drm/v3d/v3d_bo.c16
-rw-r--r--drivers/gpu/drm/v3d/v3d_drv.c10
-rw-r--r--drivers/gpu/drm/v3d/v3d_drv.h14
-rw-r--r--drivers/gpu/drm/v3d/v3d_gem.c6
-rw-r--r--drivers/gpu/drm/v3d/v3d_gemfs.c50
-rw-r--r--drivers/gpu/drm/v3d/v3d_irq.c2
-rw-r--r--drivers/gpu/drm/v3d/v3d_mmu.c85
-rw-r--r--drivers/gpu/drm/v3d/v3d_perfmon.c6
-rw-r--r--drivers/gpu/drm/v3d/v3d_sched.c48
10 files changed, 190 insertions, 50 deletions
diff --git a/drivers/gpu/drm/v3d/Makefile b/drivers/gpu/drm/v3d/Makefile
index b7d673f1153b..fcf710926057 100644
--- a/drivers/gpu/drm/v3d/Makefile
+++ b/drivers/gpu/drm/v3d/Makefile
@@ -13,7 +13,8 @@ v3d-y := \
v3d_trace_points.o \
v3d_sched.o \
v3d_sysfs.o \
- v3d_submit.o
+ v3d_submit.o \
+ v3d_gemfs.o
v3d-$(CONFIG_DEBUG_FS) += v3d_debugfs.o
diff --git a/drivers/gpu/drm/v3d/v3d_bo.c b/drivers/gpu/drm/v3d/v3d_bo.c
index ebe52bef4ffb..73ab7dd31b17 100644
--- a/drivers/gpu/drm/v3d/v3d_bo.c
+++ b/drivers/gpu/drm/v3d/v3d_bo.c
@@ -107,6 +107,7 @@ v3d_bo_create_finish(struct drm_gem_object *obj)
struct v3d_dev *v3d = to_v3d_dev(obj->dev);
struct v3d_bo *bo = to_v3d_bo(obj);
struct sg_table *sgt;
+ u64 align;
int ret;
/* So far we pin the BO in the MMU for its lifetime, so use
@@ -116,6 +117,15 @@ v3d_bo_create_finish(struct drm_gem_object *obj)
if (IS_ERR(sgt))
return PTR_ERR(sgt);
+ if (!v3d->gemfs)
+ align = SZ_4K;
+ else if (obj->size >= SZ_1M)
+ align = SZ_1M;
+ else if (obj->size >= SZ_64K)
+ align = SZ_64K;
+ else
+ align = SZ_4K;
+
spin_lock(&v3d->mm_lock);
/* Allocate the object's space in the GPU's page tables.
* Inserting PTEs will happen later, but the offset is for the
@@ -123,7 +133,7 @@ v3d_bo_create_finish(struct drm_gem_object *obj)
*/
ret = drm_mm_insert_node_generic(&v3d->mm, &bo->node,
obj->size >> V3D_MMU_PAGE_SHIFT,
- GMP_GRANULARITY >> V3D_MMU_PAGE_SHIFT, 0, 0);
+ align >> V3D_MMU_PAGE_SHIFT, 0, 0);
spin_unlock(&v3d->mm_lock);
if (ret)
return ret;
@@ -143,10 +153,12 @@ struct v3d_bo *v3d_bo_create(struct drm_device *dev, struct drm_file *file_priv,
size_t unaligned_size)
{
struct drm_gem_shmem_object *shmem_obj;
+ struct v3d_dev *v3d = to_v3d_dev(dev);
struct v3d_bo *bo;
int ret;
- shmem_obj = drm_gem_shmem_create(dev, unaligned_size);
+ shmem_obj = drm_gem_shmem_create_with_mnt(dev, unaligned_size,
+ v3d->gemfs);
if (IS_ERR(shmem_obj))
return ERR_CAST(shmem_obj);
bo = to_v3d_bo(&shmem_obj->base);
diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
index d7ff1f5fa481..fb35c5c3f1a7 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.c
+++ b/drivers/gpu/drm/v3d/v3d_drv.c
@@ -36,6 +36,13 @@
#define DRIVER_MINOR 0
#define DRIVER_PATCHLEVEL 0
+/* Only expose the `super_pages` modparam if THP is enabled. */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+bool super_pages = true;
+module_param_named(super_pages, super_pages, bool, 0400);
+MODULE_PARM_DESC(super_pages, "Enable/Disable Super Pages support.");
+#endif
+
static int v3d_get_param_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
{
@@ -97,6 +104,9 @@ static int v3d_get_param_ioctl(struct drm_device *dev, void *data,
case DRM_V3D_PARAM_MAX_PERF_COUNTERS:
args->value = v3d->perfmon_info.max_counters;
return 0;
+ case DRM_V3D_PARAM_SUPPORTS_SUPER_PAGES:
+ args->value = !!v3d->gemfs;
+ return 0;
default:
DRM_DEBUG("Unknown parameter %d\n", args->param);
return -EINVAL;
diff --git a/drivers/gpu/drm/v3d/v3d_drv.h b/drivers/gpu/drm/v3d/v3d_drv.h
index cf4b23369dc4..de73eefff9ac 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.h
+++ b/drivers/gpu/drm/v3d/v3d_drv.h
@@ -19,9 +19,8 @@ struct clk;
struct platform_device;
struct reset_control;
-#define GMP_GRANULARITY (128 * 1024)
-
#define V3D_MMU_PAGE_SHIFT 12
+#define V3D_PAGE_FACTOR (PAGE_SIZE >> V3D_MMU_PAGE_SHIFT)
#define V3D_MAX_QUEUES (V3D_CPU + 1)
@@ -137,6 +136,11 @@ struct v3d_dev {
struct drm_mm mm;
spinlock_t mm_lock;
+ /*
+ * tmpfs instance used for shmem backed objects
+ */
+ struct vfsmount *gemfs;
+
struct work_struct overflow_mem_work;
struct v3d_bin_job *bin_job;
@@ -534,6 +538,11 @@ void v3d_reset(struct v3d_dev *v3d);
void v3d_invalidate_caches(struct v3d_dev *v3d);
void v3d_clean_caches(struct v3d_dev *v3d);
+/* v3d_gemfs.c */
+extern bool super_pages;
+void v3d_gemfs_init(struct v3d_dev *v3d);
+void v3d_gemfs_fini(struct v3d_dev *v3d);
+
/* v3d_submit.c */
void v3d_job_cleanup(struct v3d_job *job);
void v3d_job_put(struct v3d_job *job);
@@ -553,6 +562,7 @@ void v3d_irq_disable(struct v3d_dev *v3d);
void v3d_irq_reset(struct v3d_dev *v3d);
/* v3d_mmu.c */
+int v3d_mmu_flush_all(struct v3d_dev *v3d);
int v3d_mmu_set_page_table(struct v3d_dev *v3d);
void v3d_mmu_insert_ptes(struct v3d_bo *bo);
void v3d_mmu_remove_ptes(struct v3d_bo *bo);
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index da8faf3b9011..b1e681630ded 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -288,11 +288,14 @@ v3d_gem_init(struct drm_device *dev)
v3d_init_hw_state(v3d);
v3d_mmu_set_page_table(v3d);
+ v3d_gemfs_init(v3d);
+
ret = v3d_sched_init(v3d);
if (ret) {
drm_mm_takedown(&v3d->mm);
- dma_free_coherent(v3d->drm.dev, 4096 * 1024, (void *)v3d->pt,
+ dma_free_coherent(v3d->drm.dev, pt_size, (void *)v3d->pt,
v3d->pt_paddr);
+ return ret;
}
return 0;
@@ -304,6 +307,7 @@ v3d_gem_destroy(struct drm_device *dev)
struct v3d_dev *v3d = to_v3d_dev(dev);
v3d_sched_fini(v3d);
+ v3d_gemfs_fini(v3d);
/* Waiting for jobs to finish would need to be done before
* unregistering V3D.
diff --git a/drivers/gpu/drm/v3d/v3d_gemfs.c b/drivers/gpu/drm/v3d/v3d_gemfs.c
new file mode 100644
index 000000000000..4c5e18590a5c
--- /dev/null
+++ b/drivers/gpu/drm/v3d/v3d_gemfs.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Copyright (C) 2024 Raspberry Pi */
+
+#include <linux/fs.h>
+#include <linux/mount.h>
+
+#include "v3d_drv.h"
+
+void v3d_gemfs_init(struct v3d_dev *v3d)
+{
+ char huge_opt[] = "huge=within_size";
+ struct file_system_type *type;
+ struct vfsmount *gemfs;
+
+ /*
+ * By creating our own shmemfs mountpoint, we can pass in
+ * mount flags that better match our usecase. However, we
+ * only do so on platforms which benefit from it.
+ */
+ if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+ goto err;
+
+ /* The user doesn't want to enable Super Pages */
+ if (!super_pages)
+ goto err;
+
+ type = get_fs_type("tmpfs");
+ if (!type)
+ goto err;
+
+ gemfs = vfs_kern_mount(type, SB_KERNMOUNT, type->name, huge_opt);
+ if (IS_ERR(gemfs))
+ goto err;
+
+ v3d->gemfs = gemfs;
+ drm_info(&v3d->drm, "Using Transparent Hugepages\n");
+
+ return;
+
+err:
+ v3d->gemfs = NULL;
+ drm_notice(&v3d->drm,
+ "Transparent Hugepage support is recommended for optimal performance on this platform!\n");
+}
+
+void v3d_gemfs_fini(struct v3d_dev *v3d)
+{
+ if (v3d->gemfs)
+ kern_unmount(v3d->gemfs);
+}
diff --git a/drivers/gpu/drm/v3d/v3d_irq.c b/drivers/gpu/drm/v3d/v3d_irq.c
index d469bda52c1a..20bf33702c3c 100644
--- a/drivers/gpu/drm/v3d/v3d_irq.c
+++ b/drivers/gpu/drm/v3d/v3d_irq.c
@@ -70,6 +70,8 @@ v3d_overflow_mem_work(struct work_struct *work)
list_add_tail(&bo->unref_head, &v3d->bin_job->render->unref_list);
spin_unlock_irqrestore(&v3d->job_lock, irqflags);
+ v3d_mmu_flush_all(v3d);
+
V3D_CORE_WRITE(0, V3D_PTB_BPOA, bo->node.start << V3D_MMU_PAGE_SHIFT);
V3D_CORE_WRITE(0, V3D_PTB_BPOS, obj->size);
diff --git a/drivers/gpu/drm/v3d/v3d_mmu.c b/drivers/gpu/drm/v3d/v3d_mmu.c
index 14f3af40d6f6..0f564fd7160c 100644
--- a/drivers/gpu/drm/v3d/v3d_mmu.c
+++ b/drivers/gpu/drm/v3d/v3d_mmu.c
@@ -25,39 +25,37 @@
* superpage bit set.
*/
#define V3D_PTE_SUPERPAGE BIT(31)
+#define V3D_PTE_BIGPAGE BIT(30)
#define V3D_PTE_WRITEABLE BIT(29)
#define V3D_PTE_VALID BIT(28)
-static int v3d_mmu_flush_all(struct v3d_dev *v3d)
+static bool v3d_mmu_is_aligned(u32 page, u32 page_address, size_t alignment)
{
- int ret;
-
- /* Make sure that another flush isn't already running when we
- * start this one.
- */
- ret = wait_for(!(V3D_READ(V3D_MMU_CTL) &
- V3D_MMU_CTL_TLB_CLEARING), 100);
- if (ret)
- dev_err(v3d->drm.dev, "TLB clear wait idle pre-wait failed\n");
+ return IS_ALIGNED(page, alignment >> V3D_MMU_PAGE_SHIFT) &&
+ IS_ALIGNED(page_address, alignment >> V3D_MMU_PAGE_SHIFT);
+}
- V3D_WRITE(V3D_MMU_CTL, V3D_READ(V3D_MMU_CTL) |
- V3D_MMU_CTL_TLB_CLEAR);
+int v3d_mmu_flush_all(struct v3d_dev *v3d)
+{
+ int ret;
- V3D_WRITE(V3D_MMUC_CONTROL,
- V3D_MMUC_CONTROL_FLUSH |
+ V3D_WRITE(V3D_MMUC_CONTROL, V3D_MMUC_CONTROL_FLUSH |
V3D_MMUC_CONTROL_ENABLE);
- ret = wait_for(!(V3D_READ(V3D_MMU_CTL) &
- V3D_MMU_CTL_TLB_CLEARING), 100);
+ ret = wait_for(!(V3D_READ(V3D_MMUC_CONTROL) &
+ V3D_MMUC_CONTROL_FLUSHING), 100);
if (ret) {
- dev_err(v3d->drm.dev, "TLB clear wait idle failed\n");
+ dev_err(v3d->drm.dev, "MMUC flush wait idle failed\n");
return ret;
}
- ret = wait_for(!(V3D_READ(V3D_MMUC_CONTROL) &
- V3D_MMUC_CONTROL_FLUSHING), 100);
+ V3D_WRITE(V3D_MMU_CTL, V3D_READ(V3D_MMU_CTL) |
+ V3D_MMU_CTL_TLB_CLEAR);
+
+ ret = wait_for(!(V3D_READ(V3D_MMU_CTL) &
+ V3D_MMU_CTL_TLB_CLEARING), 100);
if (ret)
- dev_err(v3d->drm.dev, "MMUC flush wait idle failed\n");
+ dev_err(v3d->drm.dev, "MMU TLB clear wait idle failed\n");
return ret;
}
@@ -87,19 +85,40 @@ void v3d_mmu_insert_ptes(struct v3d_bo *bo)
struct drm_gem_shmem_object *shmem_obj = &bo->base;
struct v3d_dev *v3d = to_v3d_dev(shmem_obj->base.dev);
u32 page = bo->node.start;
- u32 page_prot = V3D_PTE_WRITEABLE | V3D_PTE_VALID;
- struct sg_dma_page_iter dma_iter;
-
- for_each_sgtable_dma_page(shmem_obj->sgt, &dma_iter, 0) {
- dma_addr_t dma_addr = sg_page_iter_dma_address(&dma_iter);
- u32 page_address = dma_addr >> V3D_MMU_PAGE_SHIFT;
- u32 pte = page_prot | page_address;
- u32 i;
-
- BUG_ON(page_address + (PAGE_SIZE >> V3D_MMU_PAGE_SHIFT) >=
- BIT(24));
- for (i = 0; i < PAGE_SIZE >> V3D_MMU_PAGE_SHIFT; i++)
- v3d->pt[page++] = pte + i;
+ struct scatterlist *sgl;
+ unsigned int count;
+
+ for_each_sgtable_dma_sg(shmem_obj->sgt, sgl, count) {
+ dma_addr_t dma_addr = sg_dma_address(sgl);
+ u32 pfn = dma_addr >> V3D_MMU_PAGE_SHIFT;
+ unsigned int len = sg_dma_len(sgl);
+
+ while (len > 0) {
+ u32 page_prot = V3D_PTE_WRITEABLE | V3D_PTE_VALID;
+ u32 page_address = page_prot | pfn;
+ unsigned int i, page_size;
+
+ BUG_ON(pfn + V3D_PAGE_FACTOR >= BIT(24));
+
+ if (len >= SZ_1M &&
+ v3d_mmu_is_aligned(page, page_address, SZ_1M)) {
+ page_size = SZ_1M;
+ page_address |= V3D_PTE_SUPERPAGE;
+ } else if (len >= SZ_64K &&
+ v3d_mmu_is_aligned(page, page_address, SZ_64K)) {
+ page_size = SZ_64K;
+ page_address |= V3D_PTE_BIGPAGE;
+ } else {
+ page_size = SZ_4K;
+ }
+
+ for (i = 0; i < page_size >> V3D_MMU_PAGE_SHIFT; i++) {
+ v3d->pt[page++] = page_address + i;
+ pfn++;
+ }
+
+ len -= page_size;
+ }
}
WARN_ON_ONCE(page - bo->node.start !=
diff --git a/drivers/gpu/drm/v3d/v3d_perfmon.c b/drivers/gpu/drm/v3d/v3d_perfmon.c
index 00cd081d7873..156be13ab2ef 100644
--- a/drivers/gpu/drm/v3d/v3d_perfmon.c
+++ b/drivers/gpu/drm/v3d/v3d_perfmon.c
@@ -409,11 +409,7 @@ int v3d_perfmon_get_values_ioctl(struct drm_device *dev, void *data,
if (req->pad != 0)
return -EINVAL;
- mutex_lock(&v3d_priv->perfmon.lock);
- perfmon = idr_find(&v3d_priv->perfmon.idr, req->id);
- v3d_perfmon_get(perfmon);
- mutex_unlock(&v3d_priv->perfmon.lock);
-
+ perfmon = v3d_perfmon_find(v3d_priv, req->id);
if (!perfmon)
return -EINVAL;
diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
index 08d2a2739582..99ac4995b5a1 100644
--- a/drivers/gpu/drm/v3d/v3d_sched.c
+++ b/drivers/gpu/drm/v3d/v3d_sched.c
@@ -135,8 +135,31 @@ v3d_job_start_stats(struct v3d_job *job, enum v3d_queue queue)
struct v3d_stats *global_stats = &v3d->queue[queue].stats;
struct v3d_stats *local_stats = &file->stats[queue];
u64 now = local_clock();
-
- preempt_disable();
+ unsigned long flags;
+
+ /*
+ * We only need to disable local interrupts to appease lockdep who
+ * otherwise would think v3d_job_start_stats vs v3d_stats_update has an
+ * unsafe in-irq vs no-irq-off usage problem. This is a false positive
+ * because all the locks are per queue and stats type, and all jobs are
+ * completely one at a time serialised. More specifically:
+ *
+ * 1. Locks for GPU queues are updated from interrupt handlers under a
+ * spin lock and started here with preemption disabled.
+ *
+ * 2. Locks for CPU queues are updated from the worker with preemption
+ * disabled and equally started here with preemption disabled.
+ *
+ * Therefore both are consistent.
+ *
+ * 3. Because next job can only be queued after the previous one has
+ * been signaled, and locks are per queue, there is also no scope for
+ * the start part to race with the update part.
+ */
+ if (IS_ENABLED(CONFIG_LOCKDEP))
+ local_irq_save(flags);
+ else
+ preempt_disable();
write_seqcount_begin(&local_stats->lock);
local_stats->start_ns = now;
@@ -146,7 +169,10 @@ v3d_job_start_stats(struct v3d_job *job, enum v3d_queue queue)
global_stats->start_ns = now;
write_seqcount_end(&global_stats->lock);
- preempt_enable();
+ if (IS_ENABLED(CONFIG_LOCKDEP))
+ local_irq_restore(flags);
+ else
+ preempt_enable();
}
static void
@@ -167,11 +193,21 @@ v3d_job_update_stats(struct v3d_job *job, enum v3d_queue queue)
struct v3d_stats *global_stats = &v3d->queue[queue].stats;
struct v3d_stats *local_stats = &file->stats[queue];
u64 now = local_clock();
+ unsigned long flags;
+
+ /* See comment in v3d_job_start_stats() */
+ if (IS_ENABLED(CONFIG_LOCKDEP))
+ local_irq_save(flags);
+ else
+ preempt_disable();
- preempt_disable();
v3d_stats_update(local_stats, now);
v3d_stats_update(global_stats, now);
- preempt_enable();
+
+ if (IS_ENABLED(CONFIG_LOCKDEP))
+ local_irq_restore(flags);
+ else
+ preempt_enable();
}
static struct dma_fence *v3d_bin_job_run(struct drm_sched_job *sched_job)
@@ -667,7 +703,7 @@ v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job)
/* Unblock schedulers and restart their jobs. */
for (q = 0; q < V3D_MAX_QUEUES; q++) {
- drm_sched_start(&v3d->queue[q].sched);
+ drm_sched_start(&v3d->queue[q].sched, 0);
}
mutex_unlock(&v3d->reset_lock);