aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig.debug29
-rw-r--r--lib/buildid.c5
-rw-r--r--lib/codetag.c3
-rw-r--r--lib/crc32.c4
-rw-r--r--lib/crypto/Makefile2
-rw-r--r--lib/crypto/mpi/mpi-bit.c1
-rw-r--r--lib/crypto/mpi/mpi-mul.c2
-rw-r--r--lib/crypto/simd.c11
-rw-r--r--lib/debugobjects.c849
-rw-r--r--lib/interval_tree_test.c2
-rw-r--r--lib/iov_iter.c93
-rw-r--r--lib/kunit/string-stream-test.c1
-rw-r--r--lib/locking-selftest.c39
-rw-r--r--lib/logic_pio.c4
-rw-r--r--lib/maple_tree.c14
-rw-r--r--lib/objpool.c18
-rw-r--r--lib/random32.c2
-rw-r--r--lib/rbtree_test.c2
-rw-r--r--lib/slub_kunit.c2
-rw-r--r--lib/test_bpf.c2
-rw-r--r--lib/test_parman.c2
-rw-r--r--lib/test_scanf.c2
22 files changed, 638 insertions, 451 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 7315f643817a..652ec5f5fdfb 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1328,19 +1328,6 @@ config SCHEDSTATS
endmenu
-config DEBUG_TIMEKEEPING
- bool "Enable extra timekeeping sanity checking"
- help
- This option will enable additional timekeeping sanity checks
- which may be helpful when diagnosing issues where timekeeping
- problems are suspected.
-
- This may include checks in the timekeeping hotpaths, so this
- option may have a (very small) performance impact to some
- workloads.
-
- If unsure, say N.
-
config DEBUG_PREEMPT
bool "Debug preemptible kernel"
depends on DEBUG_KERNEL && PREEMPTION && TRACE_IRQFLAGS_SUPPORT
@@ -1409,22 +1396,14 @@ config PROVE_LOCKING
For more details, see Documentation/locking/lockdep-design.rst.
config PROVE_RAW_LOCK_NESTING
- bool "Enable raw_spinlock - spinlock nesting checks"
+ bool
depends on PROVE_LOCKING
- default n
+ default y
help
Enable the raw_spinlock vs. spinlock nesting checks which ensure
that the lock nesting rules for PREEMPT_RT enabled kernels are
not violated.
- NOTE: There are known nesting problems. So if you enable this
- option expect lockdep splats until these problems have been fully
- addressed which is work in progress. This config switch allows to
- identify and analyze these problems. It will be removed and the
- check permanently enabled once the main issues have been fixed.
-
- If unsure, select N.
-
config LOCK_STAT
bool "Lock usage statistics"
depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
@@ -1905,7 +1884,7 @@ config STRICT_DEVMEM
bool "Filter access to /dev/mem"
depends on MMU && DEVMEM
depends on ARCH_HAS_DEVMEM_IS_ALLOWED || GENERIC_LIB_DEVMEM_IS_ALLOWED
- default y if PPC || X86 || ARM64
+ default y if PPC || X86 || ARM64 || S390
help
If this option is disabled, you allow userspace (root) access to all
of memory, including kernel and userspace memory. Accidental
@@ -3060,7 +3039,7 @@ config RUST_BUILD_ASSERT_ALLOW
bool "Allow unoptimized build-time assertions"
depends on RUST
help
- Controls how are `build_error!` and `build_assert!` handled during build.
+ Controls how `build_error!` and `build_assert!` are handled during the build.
If calls to them exist in the binary, it may indicate a violated invariant
or that the optimizer failed to verify the invariant during compilation.
diff --git a/lib/buildid.c b/lib/buildid.c
index 290641d92ac1..c4b0f376fb34 100644
--- a/lib/buildid.c
+++ b/lib/buildid.c
@@ -5,6 +5,7 @@
#include <linux/elf.h>
#include <linux/kernel.h>
#include <linux/pagemap.h>
+#include <linux/secretmem.h>
#define BUILD_ID 3
@@ -64,6 +65,10 @@ static int freader_get_folio(struct freader *r, loff_t file_off)
freader_put_folio(r);
+ /* reject secretmem folios created with memfd_secret() */
+ if (secretmem_mapping(r->file->f_mapping))
+ return -EFAULT;
+
r->folio = filemap_get_folio(r->file->f_mapping, file_off >> PAGE_SHIFT);
/* if sleeping is allowed, wait for the page, if necessary */
diff --git a/lib/codetag.c b/lib/codetag.c
index afa8a2d4f317..d1fbbb7c2ec3 100644
--- a/lib/codetag.c
+++ b/lib/codetag.c
@@ -228,6 +228,9 @@ bool codetag_unload_module(struct module *mod)
if (!mod)
return true;
+ /* await any module's kfree_rcu() operations to complete */
+ kvfree_rcu_barrier();
+
mutex_lock(&codetag_lock);
list_for_each_entry(cttype, &codetag_types, link) {
struct codetag_module *found = NULL;
diff --git a/lib/crc32.c b/lib/crc32.c
index 5649847d0a8d..ff587fee3893 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -205,7 +205,11 @@ EXPORT_SYMBOL(crc32_le);
EXPORT_SYMBOL(__crc32c_le);
u32 __pure crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32_le);
+EXPORT_SYMBOL(crc32_le_base);
+
u32 __pure __crc32c_le_base(u32, unsigned char const *, size_t) __alias(__crc32c_le);
+EXPORT_SYMBOL(__crc32c_le_base);
+
u32 __pure crc32_be_base(u32, unsigned char const *, size_t) __alias(crc32_be);
/*
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index 969baab8c805..01fac1cd05a1 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -58,3 +58,5 @@ libcurve25519-y += curve25519-selftest.o
endif
obj-$(CONFIG_MPILIB) += mpi/
+
+obj-$(CONFIG_CRYPTO_MANAGER_EXTRA_TESTS) += simd.o
diff --git a/lib/crypto/mpi/mpi-bit.c b/lib/crypto/mpi/mpi-bit.c
index 835a2f0622a0..934d81311360 100644
--- a/lib/crypto/mpi/mpi-bit.c
+++ b/lib/crypto/mpi/mpi-bit.c
@@ -95,6 +95,7 @@ int mpi_set_bit(MPI a, unsigned int n)
a->d[limbno] |= (A_LIMB_1<<bitno);
return 0;
}
+EXPORT_SYMBOL_GPL(mpi_set_bit);
/*
* Shift A by N bits to the right.
diff --git a/lib/crypto/mpi/mpi-mul.c b/lib/crypto/mpi/mpi-mul.c
index 892a246216b9..7e6ff1ce3e9b 100644
--- a/lib/crypto/mpi/mpi-mul.c
+++ b/lib/crypto/mpi/mpi-mul.c
@@ -21,7 +21,7 @@ int mpi_mul(MPI w, MPI u, MPI v)
int usign, vsign, sign_product;
int assign_wp = 0;
mpi_ptr_t tmp_limb = NULL;
- int err;
+ int err = 0;
if (u->nlimbs < v->nlimbs) {
/* Swap U and V. */
diff --git a/lib/crypto/simd.c b/lib/crypto/simd.c
new file mode 100644
index 000000000000..9c36cb3bb49c
--- /dev/null
+++ b/lib/crypto/simd.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * SIMD testing utility functions
+ *
+ * Copyright 2024 Google LLC
+ */
+
+#include <crypto/internal/simd.h>
+
+DEFINE_PER_CPU(bool, crypto_simd_disabled_for_test);
+EXPORT_PER_CPU_SYMBOL_GPL(crypto_simd_disabled_for_test);
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 5ce473ad499b..7f50c4480a4e 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -7,25 +7,30 @@
#define pr_fmt(fmt) "ODEBUG: " fmt
+#include <linux/cpu.h>
#include <linux/debugobjects.h>
-#include <linux/interrupt.h>
+#include <linux/debugfs.h>
+#include <linux/hash.h>
+#include <linux/kmemleak.h>
#include <linux/sched.h>
+#include <linux/sched/loadavg.h>
#include <linux/sched/task_stack.h>
#include <linux/seq_file.h>
-#include <linux/debugfs.h>
#include <linux/slab.h>
-#include <linux/hash.h>
-#include <linux/kmemleak.h>
-#include <linux/cpu.h>
+#include <linux/static_key.h>
#define ODEBUG_HASH_BITS 14
#define ODEBUG_HASH_SIZE (1 << ODEBUG_HASH_BITS)
-#define ODEBUG_POOL_SIZE 1024
-#define ODEBUG_POOL_MIN_LEVEL 256
-#define ODEBUG_POOL_PERCPU_SIZE 64
+/* Must be power of two */
#define ODEBUG_BATCH_SIZE 16
+/* Initial values. Must all be a multiple of batch size */
+#define ODEBUG_POOL_SIZE (64 * ODEBUG_BATCH_SIZE)
+#define ODEBUG_POOL_MIN_LEVEL (ODEBUG_POOL_SIZE / 4)
+
+#define ODEBUG_POOL_PERCPU_SIZE (8 * ODEBUG_BATCH_SIZE)
+
#define ODEBUG_CHUNK_SHIFT PAGE_SHIFT
#define ODEBUG_CHUNK_SIZE (1 << ODEBUG_CHUNK_SHIFT)
#define ODEBUG_CHUNK_MASK (~(ODEBUG_CHUNK_SIZE - 1))
@@ -35,7 +40,7 @@
* frequency of 10Hz and about 1024 objects for each freeing operation.
* So it is freeing at most 10k debug objects per second.
*/
-#define ODEBUG_FREE_WORK_MAX 1024
+#define ODEBUG_FREE_WORK_MAX (1024 / ODEBUG_BATCH_SIZE)
#define ODEBUG_FREE_WORK_DELAY DIV_ROUND_UP(HZ, 10)
struct debug_bucket {
@@ -43,16 +48,24 @@ struct debug_bucket {
raw_spinlock_t lock;
};
-/*
- * Debug object percpu free list
- * Access is protected by disabling irq
- */
-struct debug_percpu_free {
- struct hlist_head free_objs;
- int obj_free;
+struct pool_stats {
+ unsigned int cur_used;
+ unsigned int max_used;
+ unsigned int min_fill;
};
-static DEFINE_PER_CPU(struct debug_percpu_free, percpu_obj_pool);
+struct obj_pool {
+ struct hlist_head objects;
+ unsigned int cnt;
+ unsigned int min_cnt;
+ unsigned int max_cnt;
+ struct pool_stats stats;
+} ____cacheline_aligned;
+
+
+static DEFINE_PER_CPU_ALIGNED(struct obj_pool, pool_pcpu) = {
+ .max_cnt = ODEBUG_POOL_PERCPU_SIZE,
+};
static struct debug_bucket obj_hash[ODEBUG_HASH_SIZE];
@@ -60,37 +73,32 @@ static struct debug_obj obj_static_pool[ODEBUG_POOL_SIZE] __initdata;
static DEFINE_RAW_SPINLOCK(pool_lock);
-static HLIST_HEAD(obj_pool);
-static HLIST_HEAD(obj_to_free);
+static struct obj_pool pool_global = {
+ .min_cnt = ODEBUG_POOL_MIN_LEVEL,
+ .max_cnt = ODEBUG_POOL_SIZE,
+ .stats = {
+ .min_fill = ODEBUG_POOL_SIZE,
+ },
+};
-/*
- * Because of the presence of percpu free pools, obj_pool_free will
- * under-count those in the percpu free pools. Similarly, obj_pool_used
- * will over-count those in the percpu free pools. Adjustments will be
- * made at debug_stats_show(). Both obj_pool_min_free and obj_pool_max_used
- * can be off.
- */
-static int __data_racy obj_pool_min_free = ODEBUG_POOL_SIZE;
-static int __data_racy obj_pool_free = ODEBUG_POOL_SIZE;
-static int obj_pool_used;
-static int __data_racy obj_pool_max_used;
+static struct obj_pool pool_to_free = {
+ .max_cnt = UINT_MAX,
+};
+
+static HLIST_HEAD(pool_boot);
+
+static unsigned long avg_usage;
static bool obj_freeing;
-/* The number of objs on the global free list */
-static int obj_nr_tofree;
static int __data_racy debug_objects_maxchain __read_mostly;
static int __data_racy __maybe_unused debug_objects_maxchecked __read_mostly;
static int __data_racy debug_objects_fixups __read_mostly;
static int __data_racy debug_objects_warnings __read_mostly;
-static int __data_racy debug_objects_enabled __read_mostly
+static bool __data_racy debug_objects_enabled __read_mostly
= CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT;
-static int debug_objects_pool_size __ro_after_init
- = ODEBUG_POOL_SIZE;
-static int debug_objects_pool_min_level __ro_after_init
- = ODEBUG_POOL_MIN_LEVEL;
-static const struct debug_obj_descr *descr_test __read_mostly;
-static struct kmem_cache *obj_cache __ro_after_init;
+static const struct debug_obj_descr *descr_test __read_mostly;
+static struct kmem_cache *obj_cache __ro_after_init;
/*
* Track numbers of kmem_cache_alloc()/free() calls done.
@@ -101,19 +109,20 @@ static int __data_racy debug_objects_freed;
static void free_obj_work(struct work_struct *work);
static DECLARE_DELAYED_WORK(debug_obj_work, free_obj_work);
+static DEFINE_STATIC_KEY_FALSE(obj_cache_enabled);
+
static int __init enable_object_debug(char *str)
{
- debug_objects_enabled = 1;
+ debug_objects_enabled = true;
return 0;
}
+early_param("debug_objects", enable_object_debug);
static int __init disable_object_debug(char *str)
{
- debug_objects_enabled = 0;
+ debug_objects_enabled = false;
return 0;
}
-
-early_param("debug_objects", enable_object_debug);
early_param("no_debug_objects", disable_object_debug);
static const char *obj_states[ODEBUG_STATE_MAX] = {
@@ -125,61 +134,280 @@ static const char *obj_states[ODEBUG_STATE_MAX] = {
[ODEBUG_STATE_NOTAVAILABLE] = "not available",
};
-static void fill_pool(void)
+static __always_inline unsigned int pool_count(struct obj_pool *pool)
+{
+ return READ_ONCE(pool->cnt);
+}
+
+static __always_inline bool pool_should_refill(struct obj_pool *pool)
+{
+ return pool_count(pool) < pool->min_cnt;
+}
+
+static __always_inline bool pool_must_refill(struct obj_pool *pool)
+{
+ return pool_count(pool) < pool->min_cnt / 2;
+}
+
+static bool pool_move_batch(struct obj_pool *dst, struct obj_pool *src)
{
- gfp_t gfp = __GFP_HIGH | __GFP_NOWARN;
+ struct hlist_node *last, *next_batch, *first_batch;
struct debug_obj *obj;
- unsigned long flags;
- if (likely(READ_ONCE(obj_pool_free) >= debug_objects_pool_min_level))
+ if (dst->cnt >= dst->max_cnt || !src->cnt)
+ return false;
+
+ first_batch = src->objects.first;
+ obj = hlist_entry(first_batch, typeof(*obj), node);
+ last = obj->batch_last;
+ next_batch = last->next;
+
+ /* Move the next batch to the front of the source pool */
+ src->objects.first = next_batch;
+ if (next_batch)
+ next_batch->pprev = &src->objects.first;
+
+ /* Add the extracted batch to the destination pool */
+ last->next = dst->objects.first;
+ if (last->next)
+ last->next->pprev = &last->next;
+ first_batch->pprev = &dst->objects.first;
+ dst->objects.first = first_batch;
+
+ WRITE_ONCE(src->cnt, src->cnt - ODEBUG_BATCH_SIZE);
+ WRITE_ONCE(dst->cnt, dst->cnt + ODEBUG_BATCH_SIZE);
+ return true;
+}
+
+static bool pool_push_batch(struct obj_pool *dst, struct hlist_head *head)
+{
+ struct hlist_node *last;
+ struct debug_obj *obj;
+
+ if (dst->cnt >= dst->max_cnt)
+ return false;
+
+ obj = hlist_entry(head->first, typeof(*obj), node);
+ last = obj->batch_last;
+
+ hlist_splice_init(head, last, &dst->objects);
+ WRITE_ONCE(dst->cnt, dst->cnt + ODEBUG_BATCH_SIZE);
+ return true;
+}
+
+static bool pool_pop_batch(struct hlist_head *head, struct obj_pool *src)
+{
+ struct hlist_node *last, *next;
+ struct debug_obj *obj;
+
+ if (!src->cnt)
+ return false;
+
+ /* Move the complete list to the head */
+ hlist_move_list(&src->objects, head);
+
+ obj = hlist_entry(head->first, typeof(*obj), node);
+ last = obj->batch_last;
+ next = last->next;
+ /* Disconnect the batch from the list */
+ last->next = NULL;
+
+ /* Move the node after last back to the source pool. */
+ src->objects.first = next;
+ if (next)
+ next->pprev = &src->objects.first;
+
+ WRITE_ONCE(src->cnt, src->cnt - ODEBUG_BATCH_SIZE);
+ return true;
+}
+
+static struct debug_obj *__alloc_object(struct hlist_head *list)
+{
+ struct debug_obj *obj;
+
+ if (unlikely(!list->first))
+ return NULL;
+
+ obj = hlist_entry(list->first, typeof(*obj), node);
+ hlist_del(&obj->node);
+ return obj;
+}
+
+static void pcpu_refill_stats(void)
+{
+ struct pool_stats *stats = &pool_global.stats;
+
+ WRITE_ONCE(stats->cur_used, stats->cur_used + ODEBUG_BATCH_SIZE);
+
+ if (stats->cur_used > stats->max_used)
+ stats->max_used = stats->cur_used;
+
+ if (pool_global.cnt < stats->min_fill)
+ stats->min_fill = pool_global.cnt;
+}
+
+static struct debug_obj *pcpu_alloc(void)
+{
+ struct obj_pool *pcp = this_cpu_ptr(&pool_pcpu);
+
+ lockdep_assert_irqs_disabled();
+
+ for (;;) {
+ struct debug_obj *obj = __alloc_object(&pcp->objects);
+
+ if (likely(obj)) {
+ pcp->cnt--;
+ /*
+ * If this emptied a batch try to refill from the
+ * free pool. Don't do that if this was the top-most
+ * batch as pcpu_free() expects the per CPU pool
+ * to be less than ODEBUG_POOL_PERCPU_SIZE.
+ */
+ if (unlikely(pcp->cnt < (ODEBUG_POOL_PERCPU_SIZE - ODEBUG_BATCH_SIZE) &&
+ !(pcp->cnt % ODEBUG_BATCH_SIZE))) {
+ /*
+ * Don't try to allocate from the regular pool here
+ * to not exhaust it prematurely.
+ */
+ if (pool_count(&pool_to_free)) {
+ guard(raw_spinlock)(&pool_lock);
+ pool_move_batch(pcp, &pool_to_free);
+ pcpu_refill_stats();
+ }
+ }
+ return obj;
+ }
+
+ guard(raw_spinlock)(&pool_lock);
+ if (!pool_move_batch(pcp, &pool_to_free)) {
+ if (!pool_move_batch(pcp, &pool_global))
+ return NULL;
+ }
+ pcpu_refill_stats();
+ }
+}
+
+static void pcpu_free(struct debug_obj *obj)
+{
+ struct obj_pool *pcp = this_cpu_ptr(&pool_pcpu);
+ struct debug_obj *first;
+
+ lockdep_assert_irqs_disabled();
+
+ if (!(pcp->cnt % ODEBUG_BATCH_SIZE)) {
+ obj->batch_last = &obj->node;
+ } else {
+ first = hlist_entry(pcp->objects.first, typeof(*first), node);
+ obj->batch_last = first->batch_last;
+ }
+ hlist_add_head(&obj->node, &pcp->objects);
+ pcp->cnt++;
+
+ /* Pool full ? */
+ if (pcp->cnt < ODEBUG_POOL_PERCPU_SIZE)
return;
+ /* Remove a batch from the per CPU pool */
+ guard(raw_spinlock)(&pool_lock);
+ /* Try to fit the batch into the pool_global first */
+ if (!pool_move_batch(&pool_global, pcp))
+ pool_move_batch(&pool_to_free, pcp);
+ WRITE_ONCE(pool_global.stats.cur_used, pool_global.stats.cur_used - ODEBUG_BATCH_SIZE);
+}
+
+static void free_object_list(struct hlist_head *head)
+{
+ struct hlist_node *tmp;
+ struct debug_obj *obj;
+ int cnt = 0;
+
+ hlist_for_each_entry_safe(obj, tmp, head, node) {
+ hlist_del(&obj->node);
+ kmem_cache_free(obj_cache, obj);
+ cnt++;
+ }
+ debug_objects_freed += cnt;
+}
+
+static void fill_pool_from_freelist(void)
+{
+ static unsigned long state;
+
/*
* Reuse objs from the global obj_to_free list; they will be
* reinitialized when allocating.
- *
- * obj_nr_tofree is checked locklessly; the READ_ONCE() pairs with
- * the WRITE_ONCE() in pool_lock critical sections.
*/
- if (READ_ONCE(obj_nr_tofree)) {
- raw_spin_lock_irqsave(&pool_lock, flags);
- /*
- * Recheck with the lock held as the worker thread might have
- * won the race and freed the global free list already.
- */
- while (obj_nr_tofree && (obj_pool_free < debug_objects_pool_min_level)) {
- obj = hlist_entry(obj_to_free.first, typeof(*obj), node);
- hlist_del(&obj->node);
- WRITE_ONCE(obj_nr_tofree, obj_nr_tofree - 1);
- hlist_add_head(&obj->node, &obj_pool);
- WRITE_ONCE(obj_pool_free, obj_pool_free + 1);
+ if (!pool_count(&pool_to_free))
+ return;
+
+ /*
+ * Prevent the context from being scheduled or interrupted after
+ * setting the state flag;
+ */
+ guard(irqsave)();
+
+ /*
+ * Avoid lock contention on &pool_lock and avoid making the cache
+ * line exclusive by testing the bit before attempting to set it.
+ */
+ if (test_bit(0, &state) || test_and_set_bit(0, &state))
+ return;
+
+ /* Avoid taking the lock when there is no work to do */
+ while (pool_should_refill(&pool_global) && pool_count(&pool_to_free)) {
+ guard(raw_spinlock)(&pool_lock);
+ /* Move a batch if possible */
+ pool_move_batch(&pool_global, &pool_to_free);
+ }
+ clear_bit(0, &state);
+}
+
+static bool kmem_alloc_batch(struct hlist_head *head, struct kmem_cache *cache, gfp_t gfp)
+{
+ struct hlist_node *last = NULL;
+ struct debug_obj *obj;
+
+ for (int cnt = 0; cnt < ODEBUG_BATCH_SIZE; cnt++) {
+ obj = kmem_cache_zalloc(cache, gfp);
+ if (!obj) {
+ free_object_list(head);
+ return false;
}
- raw_spin_unlock_irqrestore(&pool_lock, flags);
+ debug_objects_allocated++;
+
+ if (!last)
+ last = &obj->node;
+ obj->batch_last = last;
+
+ hlist_add_head(&obj->node, head);
}
+ return true;
+}
+
+static void fill_pool(void)
+{
+ static atomic_t cpus_allocating;
- if (unlikely(!obj_cache))
+ /*
+ * Avoid allocation and lock contention when:
+ * - One other CPU is already allocating
+ * - the global pool has not reached the critical level yet
+ */
+ if (!pool_must_refill(&pool_global) && atomic_read(&cpus_allocating))
return;
- while (READ_ONCE(obj_pool_free) < debug_objects_pool_min_level) {
- struct debug_obj *new[ODEBUG_BATCH_SIZE];
- int cnt;
+ atomic_inc(&cpus_allocating);
+ while (pool_should_refill(&pool_global)) {
+ HLIST_HEAD(head);
- for (cnt = 0; cnt < ODEBUG_BATCH_SIZE; cnt++) {
- new[cnt] = kmem_cache_zalloc(obj_cache, gfp);
- if (!new[cnt])
- break;
- }
- if (!cnt)
- return;
+ if (!kmem_alloc_batch(&head, obj_cache, __GFP_HIGH | __GFP_NOWARN))
+ break;
- raw_spin_lock_irqsave(&pool_lock, flags);
- while (cnt) {
- hlist_add_head(&new[--cnt]->node, &obj_pool);
- debug_objects_allocated++;
- WRITE_ONCE(obj_pool_free, obj_pool_free + 1);
- }
- raw_spin_unlock_irqrestore(&pool_lock, flags);
+ guard(raw_spinlock_irqsave)(&pool_lock);
+ if (!pool_push_batch(&pool_global, &head))
+ pool_push_batch(&pool_to_free, &head);
}
+ atomic_dec(&cpus_allocating);
}
/*
@@ -201,72 +429,37 @@ static struct debug_obj *lookup_object(void *addr, struct debug_bucket *b)
return NULL;
}
-/*
- * Allocate a new object from the hlist
- */
-static struct debug_obj *__alloc_object(struct hlist_head *list)
+static void calc_usage(void)
{
- struct debug_obj *obj = NULL;
+ static DEFINE_RAW_SPINLOCK(avg_lock);
+ static unsigned long avg_period;
+ unsigned long cur, now = jiffies;
- if (list->first) {
- obj = hlist_entry(list->first, typeof(*obj), node);
- hlist_del(&obj->node);
- }
+ if (!time_after_eq(now, READ_ONCE(avg_period)))
+ return;
- return obj;
+ if (!raw_spin_trylock(&avg_lock))
+ return;
+
+ WRITE_ONCE(avg_period, now + msecs_to_jiffies(10));
+ cur = READ_ONCE(pool_global.stats.cur_used) * ODEBUG_FREE_WORK_MAX;
+ WRITE_ONCE(avg_usage, calc_load(avg_usage, EXP_5, cur));
+ raw_spin_unlock(&avg_lock);
}
-static struct debug_obj *
-alloc_object(void *addr, struct debug_bucket *b, const struct debug_obj_descr *descr)
+static struct debug_obj *alloc_object(void *addr, struct debug_bucket *b,
+ const struct debug_obj_descr *descr)
{
- struct debug_percpu_free *percpu_pool = this_cpu_ptr(&percpu_obj_pool);
struct debug_obj *obj;
- if (likely(obj_cache)) {
- obj = __alloc_object(&percpu_pool->free_objs);
- if (obj) {
- percpu_pool->obj_free--;
- goto init_obj;
- }
- }
-
- raw_spin_lock(&pool_lock);
- obj = __alloc_object(&obj_pool);
- if (obj) {
- obj_pool_used++;
- WRITE_ONCE(obj_pool_free, obj_pool_free - 1);
-
- /*
- * Looking ahead, allocate one batch of debug objects and
- * put them into the percpu free pool.
- */
- if (likely(obj_cache)) {
- int i;
-
- for (i = 0; i < ODEBUG_BATCH_SIZE; i++) {
- struct debug_obj *obj2;
-
- obj2 = __alloc_object(&obj_pool);
- if (!obj2)
- break;
- hlist_add_head(&obj2->node,
- &percpu_pool->free_objs);
- percpu_pool->obj_free++;
- obj_pool_used++;
- WRITE_ONCE(obj_pool_free, obj_pool_free - 1);
- }
- }
-
- if (obj_pool_used > obj_pool_max_used)
- obj_pool_max_used = obj_pool_used;
+ calc_usage();
- if (obj_pool_free < obj_pool_min_free)
- obj_pool_min_free = obj_pool_free;
- }
- raw_spin_unlock(&pool_lock);
+ if (static_branch_likely(&obj_cache_enabled))
+ obj = pcpu_alloc();
+ else
+ obj = __alloc_object(&pool_boot);
-init_obj:
- if (obj) {
+ if (likely(obj)) {
obj->object = addr;
obj->descr = descr;
obj->state = ODEBUG_STATE_NONE;
@@ -276,142 +469,58 @@ init_obj:
return obj;
}
-/*
- * workqueue function to free objects.
- *
- * To reduce contention on the global pool_lock, the actual freeing of
- * debug objects will be delayed if the pool_lock is busy.
- */
+/* workqueue function to free objects. */
static void free_obj_work(struct work_struct *work)
{
- struct hlist_node *tmp;
- struct debug_obj *obj;
- unsigned long flags;
- HLIST_HEAD(tofree);
+ static unsigned long last_use_avg;
+ unsigned long cur_used, last_used, delta;
+ unsigned int max_free = 0;
WRITE_ONCE(obj_freeing, false);
- if (!raw_spin_trylock_irqsave(&pool_lock, flags))
- return;
- if (obj_pool_free >= debug_objects_pool_size)
- goto free_objs;
+ /* Rate limit freeing based on current use average */
+ cur_used = READ_ONCE(avg_usage);
+ last_used = last_use_avg;
+ last_use_avg = cur_used;
- /*
- * The objs on the pool list might be allocated before the work is
- * run, so recheck if pool list it full or not, if not fill pool
- * list from the global free list. As it is likely that a workload
- * may be gearing up to use more and more objects, don't free any
- * of them until the next round.
- */
- while (obj_nr_tofree && obj_pool_free < debug_objects_pool_size) {
- obj = hlist_entry(obj_to_free.first, typeof(*obj), node);
- hlist_del(&obj->node);
- hlist_add_head(&obj->node, &obj_pool);
- WRITE_ONCE(obj_pool_free, obj_pool_free + 1);
- WRITE_ONCE(obj_nr_tofree, obj_nr_tofree - 1);
- }
- raw_spin_unlock_irqrestore(&pool_lock, flags);
- return;
+ if (!pool_count(&pool_to_free))
+ return;
-free_objs:
- /*
- * Pool list is already full and there are still objs on the free
- * list. Move remaining free objs to a temporary list to free the
- * memory outside the pool_lock held region.
- */
- if (obj_nr_tofree) {
- hlist_move_list(&obj_to_free, &tofree);
- debug_objects_freed += obj_nr_tofree;
- WRITE_ONCE(obj_nr_tofree, 0);
+ if (cur_used <= last_used) {
+ delta = (last_used - cur_used) / ODEBUG_FREE_WORK_MAX;
+ max_free = min(delta, ODEBUG_FREE_WORK_MAX);
}
- raw_spin_unlock_irqrestore(&pool_lock, flags);
- hlist_for_each_entry_safe(obj, tmp, &tofree, node) {
- hlist_del(&obj->node);
- kmem_cache_free(obj_cache, obj);
+ for (int cnt = 0; cnt < ODEBUG_FREE_WORK_MAX; cnt++) {
+ HLIST_HEAD(tofree);
+
+ /* Acquire and drop the lock for each batch */
+ scoped_guard(raw_spinlock_irqsave, &pool_lock) {
+ if (!pool_to_free.cnt)
+ return;
+
+ /* Refill the global pool if possible */
+ if (pool_move_batch(&pool_global, &pool_to_free)) {
+ /* Don't free as there seems to be demand */
+ max_free = 0;
+ } else if (max_free) {
+ pool_pop_batch(&tofree, &pool_to_free);
+ max_free--;
+ } else {
+ return;
+ }
+ }
+ free_object_list(&tofree);
}
}
static void __free_object(struct debug_obj *obj)
{
- struct debug_obj *objs[ODEBUG_BATCH_SIZE];
- struct debug_percpu_free *percpu_pool;
- int lookahead_count = 0;
- unsigned long flags;
- bool work;
-
- local_irq_save(flags);
- if (!obj_cache)
- goto free_to_obj_pool;
-
- /*
- * Try to free it into the percpu pool first.
- */
- percpu_pool = this_cpu_ptr(&percpu_obj_pool);
- if (percpu_pool->obj_free < ODEBUG_POOL_PERCPU_SIZE) {
- hlist_add_head(&obj->node, &percpu_pool->free_objs);
- percpu_pool->obj_free++;
- local_irq_restore(flags);
- return;
- }
-
- /*
- * As the percpu pool is full, look ahead and pull out a batch
- * of objects from the percpu pool and free them as well.
- */
- for (; lookahead_count < ODEBUG_BATCH_SIZE; lookahead_count++) {
- objs[lookahead_count] = __alloc_object(&percpu_pool->free_objs);
- if (!objs[lookahead_count])
- break;
- percpu_pool->obj_free--;
- }
-
-free_to_obj_pool:
- raw_spin_lock(&pool_lock);
- work = (obj_pool_free > debug_objects_pool_size) && obj_cache &&
- (obj_nr_tofree < ODEBUG_FREE_WORK_MAX);
- obj_pool_used--;
-
- if (work) {
- WRITE_ONCE(obj_nr_tofree, obj_nr_tofree + 1);
- hlist_add_head(&obj->node, &obj_to_free);
- if (lookahead_count) {
- WRITE_ONCE(obj_nr_tofree, obj_nr_tofree + lookahead_count);
- obj_pool_used -= lookahead_count;
- while (lookahead_count) {
- hlist_add_head(&objs[--lookahead_count]->node,
- &obj_to_free);
- }
- }
-
- if ((obj_pool_free > debug_objects_pool_size) &&
- (obj_nr_tofree < ODEBUG_FREE_WORK_MAX)) {
- int i;
-
- /*
- * Free one more batch of objects from obj_pool.
- */
- for (i = 0; i < ODEBUG_BATCH_SIZE; i++) {
- obj = __alloc_object(&obj_pool);
- hlist_add_head(&obj->node, &obj_to_free);
- WRITE_ONCE(obj_pool_free, obj_pool_free - 1);
- WRITE_ONCE(obj_nr_tofree, obj_nr_tofree + 1);
- }
- }
- } else {
- WRITE_ONCE(obj_pool_free, obj_pool_free + 1);
- hlist_add_head(&obj->node, &obj_pool);
- if (lookahead_count) {
- WRITE_ONCE(obj_pool_free, obj_pool_free + lookahead_count);
- obj_pool_used -= lookahead_count;
- while (lookahead_count) {
- hlist_add_head(&objs[--lookahead_count]->node,
- &obj_pool);
- }
- }
- }
- raw_spin_unlock(&pool_lock);
- local_irq_restore(flags);
+ guard(irqsave)();
+ if (static_branch_likely(&obj_cache_enabled))
+ pcpu_free(obj);
+ else
+ hlist_add_head(&obj->node, &pool_boot);
}
/*
@@ -421,63 +530,52 @@ free_to_obj_pool:
static void free_object(struct debug_obj *obj)
{
__free_object(obj);
- if (!READ_ONCE(obj_freeing) && READ_ONCE(obj_nr_tofree)) {
+ if (!READ_ONCE(obj_freeing) && pool_count(&pool_to_free)) {
WRITE_ONCE(obj_freeing, true);
schedule_delayed_work(&debug_obj_work, ODEBUG_FREE_WORK_DELAY);
}
}
-#ifdef CONFIG_HOTPLUG_CPU
-static int object_cpu_offline(unsigned int cpu)
+static void put_objects(struct hlist_head *list)
{
- struct debug_percpu_free *percpu_pool;
struct hlist_node *tmp;
struct debug_obj *obj;
- unsigned long flags;
- /* Remote access is safe as the CPU is dead already */
- percpu_pool = per_cpu_ptr(&percpu_obj_pool, cpu);
- hlist_for_each_entry_safe(obj, tmp, &percpu_pool->free_objs, node) {
+ /*
+ * Using free_object() puts the objects into reuse or schedules
+ * them for freeing and it get's all the accounting correct.
+ */
+ hlist_for_each_entry_safe(obj, tmp, list, node) {
hlist_del(&obj->node);
- kmem_cache_free(obj_cache, obj);
+ free_object(obj);
}
+}
- raw_spin_lock_irqsave(&pool_lock, flags);
- obj_pool_used -= percpu_pool->obj_free;
- debug_objects_freed += percpu_pool->obj_free;
- raw_spin_unlock_irqrestore(&pool_lock, flags);
-
- percpu_pool->obj_free = 0;
+#ifdef CONFIG_HOTPLUG_CPU
+static int object_cpu_offline(unsigned int cpu)
+{
+ /* Remote access is safe as the CPU is dead already */
+ struct obj_pool *pcp = per_cpu_ptr(&pool_pcpu, cpu);
+ put_objects(&pcp->objects);
+ pcp->cnt = 0;
return 0;
}
#endif
-/*
- * We run out of memory. That means we probably have tons of objects
- * allocated.
- */
+/* Out of memory. Free all objects from hash */
static void debug_objects_oom(void)
{
struct debug_bucket *db = obj_hash;
- struct hlist_node *tmp;
HLIST_HEAD(freelist);
- struct debug_obj *obj;
- unsigned long flags;
- int i;
pr_warn("Out of memory. ODEBUG disabled\n");
- for (i = 0; i < ODEBUG_HASH_SIZE; i++, db++) {
- raw_spin_lock_irqsave(&db->lock, flags);
- hlist_move_list(&db->list, &freelist);
- raw_spin_unlock_irqrestore(&db->lock, flags);
+ for (int i = 0; i < ODEBUG_HASH_SIZE; i++, db++) {
+ scoped_guard(raw_spinlock_irqsave, &db->lock)
+ hlist_move_list(&db->list, &freelist);
- /* Now free them */
- hlist_for_each_entry_safe(obj, tmp, &freelist, node) {
- hlist_del(&obj->node);
- free_object(obj);
- }
+ put_objects(&freelist);
}
}
@@ -592,12 +690,24 @@ static struct debug_obj *lookup_object_or_alloc(void *addr, struct debug_bucket
}
/* Out of memory. Do the cleanup outside of the locked region */
- debug_objects_enabled = 0;
+ debug_objects_enabled = false;
return NULL;
}
static void debug_objects_fill_pool(void)
{
+ if (!static_branch_likely(&obj_cache_enabled))
+ return;
+
+ if (likely(!pool_should_refill(&pool_global)))
+ return;
+
+ /* Try reusing objects from obj_to_free_list */
+ fill_pool_from_freelist();
+
+ if (likely(!pool_should_refill(&pool_global)))
+ return;
+
/*
* On RT enabled kernels the pool refill must happen in preemptible
* context -- for !RT kernels we rely on the fact that spinlock_t and
@@ -1007,7 +1117,7 @@ repeat:
debug_objects_maxchecked = objs_checked;
/* Schedule work to actually kmem_cache_free() objects */
- if (!READ_ONCE(obj_freeing) && READ_ONCE(obj_nr_tofree)) {
+ if (!READ_ONCE(obj_freeing) && pool_count(&pool_to_free)) {
WRITE_ONCE(obj_freeing, true);
schedule_delayed_work(&debug_obj_work, ODEBUG_FREE_WORK_DELAY);
}
@@ -1024,23 +1134,33 @@ void debug_check_no_obj_freed(const void *address, unsigned long size)
static int debug_stats_show(struct seq_file *m, void *v)
{
- int cpu, obj_percpu_free = 0;
+ unsigned int cpu, pool_used, pcp_free = 0;
+ /*
+ * pool_global.stats.cur_used is the number of batches currently
+ * handed out to per CPU pools. Convert it to number of objects
+ * and subtract the number of free objects in the per CPU pools.
+ * As this is lockless the number is an estimate.
+ */
for_each_possible_cpu(cpu)
- obj_percpu_free += per_cpu(percpu_obj_pool.obj_free, cpu);
-
- seq_printf(m, "max_chain :%d\n", debug_objects_maxchain);
- seq_printf(m, "max_checked :%d\n", debug_objects_maxchecked);
- seq_printf(m, "warnings :%d\n", debug_objects_warnings);
- seq_printf(m, "fixups :%d\n", debug_objects_fixups);
- seq_printf(m, "pool_free :%d\n", READ_ONCE(obj_pool_free) + obj_percpu_free);
- seq_printf(m, "pool_pcp_free :%d\n", obj_percpu_free);
- seq_printf(m, "pool_min_free :%d\n", obj_pool_min_free);
- seq_printf(m, "pool_used :%d\n", obj_pool_used - obj_percpu_free);
- seq_printf(m, "pool_max_used :%d\n", obj_pool_max_used);
- seq_printf(m, "on_free_list :%d\n", READ_ONCE(obj_nr_tofree));
- seq_printf(m, "objs_allocated:%d\n", debug_objects_allocated);
- seq_printf(m, "objs_freed :%d\n", debug_objects_freed);
+ pcp_free += per_cpu(pool_pcpu.cnt, cpu);
+
+ pool_used = READ_ONCE(pool_global.stats.cur_used);
+ pcp_free = min(pool_used, pcp_free);
+ pool_used -= pcp_free;
+
+ seq_printf(m, "max_chain : %d\n", debug_objects_maxchain);
+ seq_printf(m, "max_checked : %d\n", debug_objects_maxchecked);
+ seq_printf(m, "warnings : %d\n", debug_objects_warnings);
+ seq_printf(m, "fixups : %d\n", debug_objects_fixups);
+ seq_printf(m, "pool_free : %u\n", pool_count(&pool_global) + pcp_free);
+ seq_printf(m, "pool_pcp_free : %u\n", pcp_free);
+ seq_printf(m, "pool_min_free : %u\n", data_race(pool_global.stats.min_fill));
+ seq_printf(m, "pool_used : %u\n", pool_used);
+ seq_printf(m, "pool_max_used : %u\n", data_race(pool_global.stats.max_used));
+ seq_printf(m, "on_free_list : %u\n", pool_count(&pool_to_free));
+ seq_printf(m, "objs_allocated: %d\n", debug_objects_allocated);
+ seq_printf(m, "objs_freed : %d\n", debug_objects_freed);
return 0;
}
DEFINE_SHOW_ATTRIBUTE(debug_stats);
@@ -1194,7 +1314,7 @@ check_results(void *addr, enum debug_obj_state state, int fixups, int warnings)
out:
raw_spin_unlock_irqrestore(&db->lock, flags);
if (res)
- debug_objects_enabled = 0;
+ debug_objects_enabled = false;
return res;
}
@@ -1209,7 +1329,7 @@ static __initconst const struct debug_obj_descr descr_type_test = {
static __initdata struct self_test obj = { .static_init = 0 };
-static void __init debug_objects_selftest(void)
+static bool __init debug_objects_selftest(void)
{
int fixups, oldfixups, warnings, oldwarnings;
unsigned long flags;
@@ -1278,9 +1398,10 @@ out:
descr_test = NULL;
local_irq_restore(flags);
+ return debug_objects_enabled;
}
#else
-static inline void debug_objects_selftest(void) { }
+static inline bool debug_objects_selftest(void) { return true; }
#endif
/*
@@ -1295,65 +1416,54 @@ void __init debug_objects_early_init(void)
for (i = 0; i < ODEBUG_HASH_SIZE; i++)
raw_spin_lock_init(&obj_hash[i].lock);
+ /* Keep early boot simple and add everything to the boot list */
for (i = 0; i < ODEBUG_POOL_SIZE; i++)
- hlist_add_head(&obj_static_pool[i].node, &obj_pool);
+ hlist_add_head(&obj_static_pool[i].node, &pool_boot);
}
/*
- * Convert the statically allocated objects to dynamic ones:
+ * Convert the statically allocated objects to dynamic ones.
+ * debug_objects_mem_init() is called early so only one CPU is up and
+ * interrupts are disabled, which means it is safe to replace the active
+ * object references.
*/
-static int __init debug_objects_replace_static_objects(void)
+static bool __init debug_objects_replace_static_objects(struct kmem_cache *cache)
{
struct debug_bucket *db = obj_hash;
struct hlist_node *tmp;
- struct debug_obj *obj, *new;
+ struct debug_obj *obj;
HLIST_HEAD(objects);
- int i, cnt = 0;
+ int i;
- for (i = 0; i < ODEBUG_POOL_SIZE; i++) {
- obj = kmem_cache_zalloc(obj_cache, GFP_KERNEL);
- if (!obj)
+ for (i = 0; i < ODEBUG_POOL_SIZE; i += ODEBUG_BATCH_SIZE) {
+ if (!kmem_alloc_batch(&objects, cache, GFP_KERNEL))
goto free;
- hlist_add_head(&obj->node, &objects);
+ pool_push_batch(&pool_global, &objects);
}
- debug_objects_allocated += i;
-
- /*
- * debug_objects_mem_init() is now called early that only one CPU is up
- * and interrupts have been disabled, so it is safe to replace the
- * active object references.
- */
-
- /* Remove the statically allocated objects from the pool */
- hlist_for_each_entry_safe(obj, tmp, &obj_pool, node)
- hlist_del(&obj->node);
- /* Move the allocated objects to the pool */
- hlist_move_list(&objects, &obj_pool);
+ /* Disconnect the boot pool. */
+ pool_boot.first = NULL;
/* Replace the active object references */
for (i = 0; i < ODEBUG_HASH_SIZE; i++, db++) {
hlist_move_list(&db->list, &objects);
hlist_for_each_entry(obj, &objects, node) {
- new = hlist_entry(obj_pool.first, typeof(*obj), node);
- hlist_del(&new->node);
+ struct debug_obj *new = pcpu_alloc();
+
/* copy object data */
*new = *obj;
hlist_add_head(&new->node, &db->list);
- cnt++;
}
}
-
- pr_debug("%d of %d active objects replaced\n",
- cnt, obj_pool_used);
- return 0;
+ return true;
free:
- hlist_for_each_entry_safe(obj, tmp, &objects, node) {
+ /* Can't use free_object_list() as the cache is not populated yet */
+ hlist_for_each_entry_safe(obj, tmp, &pool_global.objects, node) {
hlist_del(&obj->node);
- kmem_cache_free(obj_cache, obj);
+ kmem_cache_free(cache, obj);
}
- return -ENOMEM;
+ return false;
}
/*
@@ -1364,43 +1474,40 @@ free:
*/
void __init debug_objects_mem_init(void)
{
- int cpu, extras;
+ struct kmem_cache *cache;
+ int extras;
if (!debug_objects_enabled)
return;
- /*
- * Initialize the percpu object pools
- *
- * Initialization is not strictly necessary, but was done for
- * completeness.
- */
- for_each_possible_cpu(cpu)
- INIT_HLIST_HEAD(&per_cpu(percpu_obj_pool.free_objs, cpu));
+ if (!debug_objects_selftest())
+ return;
- obj_cache = kmem_cache_create("debug_objects_cache",
- sizeof (struct debug_obj), 0,
- SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE,
- NULL);
+ cache = kmem_cache_create("debug_objects_cache", sizeof (struct debug_obj), 0,
+ SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE, NULL);
- if (!obj_cache || debug_objects_replace_static_objects()) {
- debug_objects_enabled = 0;
- kmem_cache_destroy(obj_cache);
- pr_warn("out of memory.\n");
+ if (!cache || !debug_objects_replace_static_objects(cache)) {
+ debug_objects_enabled = false;
+ pr_warn("Out of memory.\n");
return;
- } else
- debug_objects_selftest();
-
-#ifdef CONFIG_HOTPLUG_CPU
- cpuhp_setup_state_nocalls(CPUHP_DEBUG_OBJ_DEAD, "object:offline", NULL,
- object_cpu_offline);
-#endif
+ }
/*
- * Increase the thresholds for allocating and freeing objects
- * according to the number of possible CPUs available in the system.
+ * Adjust the thresholds for allocating and freeing objects
+ * according to the number of possible CPUs available in the
+ * system.
*/
extras = num_possible_cpus() * ODEBUG_BATCH_SIZE;
- debug_objects_pool_size += extras;
- debug_objects_pool_min_level += extras;
+ pool_global.max_cnt += extras;
+ pool_global.min_cnt += extras;
+
+ /* Everything worked. Expose the cache */
+ obj_cache = cache;
+ static_branch_enable(&obj_cache_enabled);
+
+#ifdef CONFIG_HOTPLUG_CPU
+ cpuhp_setup_state_nocalls(CPUHP_DEBUG_OBJ_DEAD, "object:offline", NULL,
+ object_cpu_offline);
+#endif
+ return;
}
diff --git a/lib/interval_tree_test.c b/lib/interval_tree_test.c
index f37f4d44faa9..837064b83a6c 100644
--- a/lib/interval_tree_test.c
+++ b/lib/interval_tree_test.c
@@ -2,7 +2,7 @@
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/interval_tree.h>
-#include <linux/random.h>
+#include <linux/prandom.h>
#include <linux/slab.h>
#include <asm/timex.h>
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 1abb32c0da50..9ec806f989f2 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -461,6 +461,8 @@ size_t copy_page_from_iter_atomic(struct page *page, size_t offset,
size_t bytes, struct iov_iter *i)
{
size_t n, copied = 0;
+ bool uses_kmap = IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP) ||
+ PageHighMem(page);
if (!page_copy_sane(page, offset, bytes))
return 0;
@@ -471,7 +473,7 @@ size_t copy_page_from_iter_atomic(struct page *page, size_t offset,
char *p;
n = bytes - copied;
- if (PageHighMem(page)) {
+ if (uses_kmap) {
page += offset / PAGE_SIZE;
offset %= PAGE_SIZE;
n = min_t(size_t, n, PAGE_SIZE - offset);
@@ -482,7 +484,7 @@ size_t copy_page_from_iter_atomic(struct page *page, size_t offset,
kunmap_atomic(p);
copied += n;
offset += n;
- } while (PageHighMem(page) && copied != bytes && n > 0);
+ } while (uses_kmap && copied != bytes && n > 0);
return copied;
}
@@ -1021,15 +1023,18 @@ static ssize_t iter_folioq_get_pages(struct iov_iter *iter,
size_t offset = iov_offset, fsize = folioq_folio_size(folioq, slot);
size_t part = PAGE_SIZE - offset % PAGE_SIZE;
- part = umin(part, umin(maxsize - extracted, fsize - offset));
- count -= part;
- iov_offset += part;
- extracted += part;
+ if (offset < fsize) {
+ part = umin(part, umin(maxsize - extracted, fsize - offset));
+ count -= part;
+ iov_offset += part;
+ extracted += part;
+
+ *pages = folio_page(folio, offset / PAGE_SIZE);
+ get_page(*pages);
+ pages++;
+ maxpages--;
+ }
- *pages = folio_page(folio, offset / PAGE_SIZE);
- get_page(*pages);
- pages++;
- maxpages--;
if (maxpages == 0 || extracted >= maxsize)
break;
@@ -1677,8 +1682,8 @@ static ssize_t iov_iter_extract_xarray_pages(struct iov_iter *i,
}
/*
- * Extract a list of contiguous pages from an ITER_BVEC iterator. This does
- * not get references on the pages, nor does it get a pin on them.
+ * Extract a list of virtually contiguous pages from an ITER_BVEC iterator.
+ * This does not get references on the pages, nor does it get a pin on them.
*/
static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i,
struct page ***pages, size_t maxsize,
@@ -1686,35 +1691,59 @@ static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i,
iov_iter_extraction_t extraction_flags,
size_t *offset0)
{
- struct page **p, *page;
- size_t skip = i->iov_offset, offset, size;
- int k;
+ size_t skip = i->iov_offset, size = 0;
+ struct bvec_iter bi;
+ int k = 0;
- for (;;) {
- if (i->nr_segs == 0)
- return 0;
- size = min(maxsize, i->bvec->bv_len - skip);
- if (size)
- break;
+ if (i->nr_segs == 0)
+ return 0;
+
+ if (i->iov_offset == i->bvec->bv_len) {
i->iov_offset = 0;
i->nr_segs--;
i->bvec++;
skip = 0;
}
+ bi.bi_idx = 0;
+ bi.bi_size = maxsize;
+ bi.bi_bvec_done = skip;
+
+ maxpages = want_pages_array(pages, maxsize, skip, maxpages);
+
+ while (bi.bi_size && bi.bi_idx < i->nr_segs) {
+ struct bio_vec bv = bvec_iter_bvec(i->bvec, bi);
+
+ /*
+ * The iov_iter_extract_pages interface only allows an offset
+ * into the first page. Break out of the loop if we see an
+ * offset into subsequent pages, the caller will have to call
+ * iov_iter_extract_pages again for the reminder.
+ */
+ if (k) {
+ if (bv.bv_offset)
+ break;
+ } else {
+ *offset0 = bv.bv_offset;
+ }
- skip += i->bvec->bv_offset;
- page = i->bvec->bv_page + skip / PAGE_SIZE;
- offset = skip % PAGE_SIZE;
- *offset0 = offset;
+ (*pages)[k++] = bv.bv_page;
+ size += bv.bv_len;
- maxpages = want_pages_array(pages, size, offset, maxpages);
- if (!maxpages)
- return -ENOMEM;
- p = *pages;
- for (k = 0; k < maxpages; k++)
- p[k] = page + k;
+ if (k >= maxpages)
+ break;
+
+ /*
+ * We are done when the end of the bvec doesn't align to a page
+ * boundary as that would create a hole in the returned space.
+ * The caller will handle this with another call to
+ * iov_iter_extract_pages.
+ */
+ if (bv.bv_offset + bv.bv_len != PAGE_SIZE)
+ break;
+
+ bvec_iter_advance_single(i->bvec, &bi, bv.bv_len);
+ }
- size = min_t(size_t, size, maxpages * PAGE_SIZE - offset);
iov_iter_advance(i, size);
return size;
}
diff --git a/lib/kunit/string-stream-test.c b/lib/kunit/string-stream-test.c
index 7511442ea98f..7734e33156f9 100644
--- a/lib/kunit/string-stream-test.c
+++ b/lib/kunit/string-stream-test.c
@@ -9,6 +9,7 @@
#include <kunit/static_stub.h>
#include <kunit/test.h>
#include <linux/ktime.h>
+#include <linux/prandom.h>
#include <linux/slab.h>
#include <linux/timekeeping.h>
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 6f6a5fc85b42..6e0c019f71b6 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -2710,6 +2710,43 @@ static void local_lock_3B(void)
}
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+static inline const char *rw_semaphore_lockdep_name(struct rw_semaphore *rwsem)
+{
+ return rwsem->dep_map.name;
+}
+#else
+static inline const char *rw_semaphore_lockdep_name(struct rw_semaphore *rwsem)
+{
+ return NULL;
+}
+#endif
+
+static void test_lockdep_set_subclass_name(void)
+{
+ const char *name_before = rw_semaphore_lockdep_name(&rwsem_X1);
+ const char *name_after;
+
+ lockdep_set_subclass(&rwsem_X1, 1);
+ name_after = rw_semaphore_lockdep_name(&rwsem_X1);
+ DEBUG_LOCKS_WARN_ON(name_before != name_after);
+}
+
+/*
+ * lockdep_set_subclass() should reuse the existing lock class name instead
+ * of creating a new one.
+ */
+static void lockdep_set_subclass_name_test(void)
+{
+ printk(" --------------------------------------------------------------------------\n");
+ printk(" | lockdep_set_subclass() name test|\n");
+ printk(" -----------------------------------\n");
+
+ print_testname("compare name before and after");
+ dotest(test_lockdep_set_subclass_name, SUCCESS, LOCKTYPE_RWSEM);
+ pr_cont("\n");
+}
+
static void local_lock_tests(void)
{
printk(" --------------------------------------------------------------------------\n");
@@ -2920,6 +2957,8 @@ void locking_selftest(void)
dotest(hardirq_deadlock_softirq_not_deadlock, FAILURE, LOCKTYPE_SPECIAL);
pr_cont("\n");
+ lockdep_set_subclass_name_test();
+
if (unexpected_testcase_failures) {
printk("-----------------------------------------------------------------\n");
debug_locks = 0;
diff --git a/lib/logic_pio.c b/lib/logic_pio.c
index 2ea564a40064..e29496a38d06 100644
--- a/lib/logic_pio.c
+++ b/lib/logic_pio.c
@@ -122,7 +122,7 @@ void logic_pio_unregister_range(struct logic_pio_hwaddr *range)
*
* Traverse the io_range_list to find the registered node for @fwnode.
*/
-struct logic_pio_hwaddr *find_io_range_by_fwnode(struct fwnode_handle *fwnode)
+struct logic_pio_hwaddr *find_io_range_by_fwnode(const struct fwnode_handle *fwnode)
{
struct logic_pio_hwaddr *range, *found_range = NULL;
@@ -186,7 +186,7 @@ resource_size_t logic_pio_to_hwaddr(unsigned long pio)
*
* Returns Logical PIO value if successful, ~0UL otherwise
*/
-unsigned long logic_pio_trans_hwaddr(struct fwnode_handle *fwnode,
+unsigned long logic_pio_trans_hwaddr(const struct fwnode_handle *fwnode,
resource_size_t addr, resource_size_t size)
{
struct logic_pio_hwaddr *range;
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index 20990ecba2dd..3619301dda2e 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -2196,6 +2196,8 @@ static inline void mas_node_or_none(struct ma_state *mas,
/*
* mas_wr_node_walk() - Find the correct offset for the index in the @mas.
+ * If @mas->index cannot be found within the containing
+ * node, we traverse to the last entry in the node.
* @wr_mas: The maple write state
*
* Uses mas_slot_locked() and does not need to worry about dead nodes.
@@ -3532,7 +3534,7 @@ static bool mas_wr_walk(struct ma_wr_state *wr_mas)
return true;
}
-static bool mas_wr_walk_index(struct ma_wr_state *wr_mas)
+static void mas_wr_walk_index(struct ma_wr_state *wr_mas)
{
struct ma_state *mas = wr_mas->mas;
@@ -3541,11 +3543,9 @@ static bool mas_wr_walk_index(struct ma_wr_state *wr_mas)
wr_mas->content = mas_slot_locked(mas, wr_mas->slots,
mas->offset);
if (ma_is_leaf(wr_mas->type))
- return true;
+ return;
mas_wr_walk_traverse(wr_mas);
-
}
- return true;
}
/*
* mas_extend_spanning_null() - Extend a store of a %NULL to include surrounding %NULLs.
@@ -3765,8 +3765,8 @@ static noinline void mas_wr_spanning_store(struct ma_wr_state *wr_mas)
memset(&b_node, 0, sizeof(struct maple_big_node));
/* Copy l_mas and store the value in b_node. */
mas_store_b_node(&l_wr_mas, &b_node, l_mas.end);
- /* Copy r_mas into b_node. */
- if (r_mas.offset <= r_mas.end)
+ /* Copy r_mas into b_node if there is anything to copy. */
+ if (r_mas.max > r_mas.last)
mas_mab_cp(&r_mas, r_mas.offset, r_mas.end,
&b_node, b_node.b_end + 1);
else
@@ -4218,7 +4218,7 @@ static inline void mas_wr_store_type(struct ma_wr_state *wr_mas)
/* Potential spanning rebalance collapsing a node */
if (new_end < mt_min_slots[wr_mas->type]) {
- if (!mte_is_root(mas->node)) {
+ if (!mte_is_root(mas->node) && !(mas->mas_flags & MA_STATE_BULK)) {
mas->store_type = wr_rebalance;
return;
}
diff --git a/lib/objpool.c b/lib/objpool.c
index 234f9d0bd081..b998b720c732 100644
--- a/lib/objpool.c
+++ b/lib/objpool.c
@@ -74,15 +74,21 @@ objpool_init_percpu_slots(struct objpool_head *pool, int nr_objs,
* warm caches and TLB hits. in default vmalloc is used to
* reduce the pressure of kernel slab system. as we know,
* mimimal size of vmalloc is one page since vmalloc would
- * always align the requested size to page size
+ * always align the requested size to page size.
+ * but if vmalloc fails or it is not available (e.g. GFP_ATOMIC)
+ * allocate percpu slot with kmalloc.
*/
- if (pool->gfp & GFP_ATOMIC)
- slot = kmalloc_node(size, pool->gfp, cpu_to_node(i));
- else
+ slot = NULL;
+
+ if ((pool->gfp & (GFP_ATOMIC | GFP_KERNEL)) != GFP_ATOMIC)
slot = __vmalloc_node(size, sizeof(void *), pool->gfp,
cpu_to_node(i), __builtin_return_address(0));
- if (!slot)
- return -ENOMEM;
+
+ if (!slot) {
+ slot = kmalloc_node(size, pool->gfp, cpu_to_node(i));
+ if (!slot)
+ return -ENOMEM;
+ }
memset(slot, 0, size);
pool->cpu_slots[i] = slot;
diff --git a/lib/random32.c b/lib/random32.c
index 0a5a0e3600c8..24e7acd9343f 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -36,7 +36,7 @@
#include <linux/percpu.h>
#include <linux/export.h>
#include <linux/jiffies.h>
-#include <linux/random.h>
+#include <linux/prandom.h>
#include <linux/sched.h>
#include <linux/bitops.h>
#include <linux/slab.h>
diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c
index 41ae3c7570d3..8655a76d29a1 100644
--- a/lib/rbtree_test.c
+++ b/lib/rbtree_test.c
@@ -2,7 +2,7 @@
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/rbtree_augmented.h>
-#include <linux/random.h>
+#include <linux/prandom.h>
#include <linux/slab.h>
#include <asm/timex.h>
diff --git a/lib/slub_kunit.c b/lib/slub_kunit.c
index 80e39f003344..33564f965958 100644
--- a/lib/slub_kunit.c
+++ b/lib/slub_kunit.c
@@ -141,7 +141,7 @@ static void test_kmalloc_redzone_access(struct kunit *test)
{
struct kmem_cache *s = test_kmem_cache_create("TestSlub_RZ_kmalloc", 32,
SLAB_KMALLOC|SLAB_STORE_USER|SLAB_RED_ZONE);
- u8 *p = __kmalloc_cache_noprof(s, GFP_KERNEL, 18);
+ u8 *p = alloc_hooks(__kmalloc_cache_noprof(s, GFP_KERNEL, 18));
kasan_disable_current();
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index fa5edd6ef7f7..2eed1ad958e9 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -14,7 +14,7 @@
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/if_vlan.h>
-#include <linux/random.h>
+#include <linux/prandom.h>
#include <linux/highmem.h>
#include <linux/sched.h>
diff --git a/lib/test_parman.c b/lib/test_parman.c
index 35e32243693c..f9b97426a337 100644
--- a/lib/test_parman.c
+++ b/lib/test_parman.c
@@ -39,7 +39,7 @@
#include <linux/slab.h>
#include <linux/bitops.h>
#include <linux/err.h>
-#include <linux/random.h>
+#include <linux/prandom.h>
#include <linux/parman.h>
#define TEST_PARMAN_PRIO_SHIFT 7 /* defines number of prios for testing */
diff --git a/lib/test_scanf.c b/lib/test_scanf.c
index 7257b1768545..44f8508c9d88 100644
--- a/lib/test_scanf.c
+++ b/lib/test_scanf.c
@@ -11,7 +11,7 @@
#include <linux/module.h>
#include <linux/overflow.h>
#include <linux/printk.h>
-#include <linux/random.h>
+#include <linux/prandom.h>
#include <linux/slab.h>
#include <linux/string.h>