diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Kconfig.debug | 29 | ||||
-rw-r--r-- | lib/buildid.c | 5 | ||||
-rw-r--r-- | lib/codetag.c | 3 | ||||
-rw-r--r-- | lib/crc32.c | 4 | ||||
-rw-r--r-- | lib/crypto/Makefile | 2 | ||||
-rw-r--r-- | lib/crypto/mpi/mpi-bit.c | 1 | ||||
-rw-r--r-- | lib/crypto/mpi/mpi-mul.c | 2 | ||||
-rw-r--r-- | lib/crypto/simd.c | 11 | ||||
-rw-r--r-- | lib/debugobjects.c | 849 | ||||
-rw-r--r-- | lib/interval_tree_test.c | 2 | ||||
-rw-r--r-- | lib/iov_iter.c | 93 | ||||
-rw-r--r-- | lib/kunit/string-stream-test.c | 1 | ||||
-rw-r--r-- | lib/locking-selftest.c | 39 | ||||
-rw-r--r-- | lib/logic_pio.c | 4 | ||||
-rw-r--r-- | lib/maple_tree.c | 14 | ||||
-rw-r--r-- | lib/objpool.c | 18 | ||||
-rw-r--r-- | lib/random32.c | 2 | ||||
-rw-r--r-- | lib/rbtree_test.c | 2 | ||||
-rw-r--r-- | lib/slub_kunit.c | 2 | ||||
-rw-r--r-- | lib/test_bpf.c | 2 | ||||
-rw-r--r-- | lib/test_parman.c | 2 | ||||
-rw-r--r-- | lib/test_scanf.c | 2 |
22 files changed, 638 insertions, 451 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 7315f643817a..652ec5f5fdfb 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1328,19 +1328,6 @@ config SCHEDSTATS endmenu -config DEBUG_TIMEKEEPING - bool "Enable extra timekeeping sanity checking" - help - This option will enable additional timekeeping sanity checks - which may be helpful when diagnosing issues where timekeeping - problems are suspected. - - This may include checks in the timekeeping hotpaths, so this - option may have a (very small) performance impact to some - workloads. - - If unsure, say N. - config DEBUG_PREEMPT bool "Debug preemptible kernel" depends on DEBUG_KERNEL && PREEMPTION && TRACE_IRQFLAGS_SUPPORT @@ -1409,22 +1396,14 @@ config PROVE_LOCKING For more details, see Documentation/locking/lockdep-design.rst. config PROVE_RAW_LOCK_NESTING - bool "Enable raw_spinlock - spinlock nesting checks" + bool depends on PROVE_LOCKING - default n + default y help Enable the raw_spinlock vs. spinlock nesting checks which ensure that the lock nesting rules for PREEMPT_RT enabled kernels are not violated. - NOTE: There are known nesting problems. So if you enable this - option expect lockdep splats until these problems have been fully - addressed which is work in progress. This config switch allows to - identify and analyze these problems. It will be removed and the - check permanently enabled once the main issues have been fixed. - - If unsure, select N. - config LOCK_STAT bool "Lock usage statistics" depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT @@ -1905,7 +1884,7 @@ config STRICT_DEVMEM bool "Filter access to /dev/mem" depends on MMU && DEVMEM depends on ARCH_HAS_DEVMEM_IS_ALLOWED || GENERIC_LIB_DEVMEM_IS_ALLOWED - default y if PPC || X86 || ARM64 + default y if PPC || X86 || ARM64 || S390 help If this option is disabled, you allow userspace (root) access to all of memory, including kernel and userspace memory. Accidental @@ -3060,7 +3039,7 @@ config RUST_BUILD_ASSERT_ALLOW bool "Allow unoptimized build-time assertions" depends on RUST help - Controls how are `build_error!` and `build_assert!` handled during build. + Controls how `build_error!` and `build_assert!` are handled during the build. If calls to them exist in the binary, it may indicate a violated invariant or that the optimizer failed to verify the invariant during compilation. diff --git a/lib/buildid.c b/lib/buildid.c index 290641d92ac1..c4b0f376fb34 100644 --- a/lib/buildid.c +++ b/lib/buildid.c @@ -5,6 +5,7 @@ #include <linux/elf.h> #include <linux/kernel.h> #include <linux/pagemap.h> +#include <linux/secretmem.h> #define BUILD_ID 3 @@ -64,6 +65,10 @@ static int freader_get_folio(struct freader *r, loff_t file_off) freader_put_folio(r); + /* reject secretmem folios created with memfd_secret() */ + if (secretmem_mapping(r->file->f_mapping)) + return -EFAULT; + r->folio = filemap_get_folio(r->file->f_mapping, file_off >> PAGE_SHIFT); /* if sleeping is allowed, wait for the page, if necessary */ diff --git a/lib/codetag.c b/lib/codetag.c index afa8a2d4f317..d1fbbb7c2ec3 100644 --- a/lib/codetag.c +++ b/lib/codetag.c @@ -228,6 +228,9 @@ bool codetag_unload_module(struct module *mod) if (!mod) return true; + /* await any module's kfree_rcu() operations to complete */ + kvfree_rcu_barrier(); + mutex_lock(&codetag_lock); list_for_each_entry(cttype, &codetag_types, link) { struct codetag_module *found = NULL; diff --git a/lib/crc32.c b/lib/crc32.c index 5649847d0a8d..ff587fee3893 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -205,7 +205,11 @@ EXPORT_SYMBOL(crc32_le); EXPORT_SYMBOL(__crc32c_le); u32 __pure crc32_le_base(u32, unsigned char const *, size_t) __alias(crc32_le); +EXPORT_SYMBOL(crc32_le_base); + u32 __pure __crc32c_le_base(u32, unsigned char const *, size_t) __alias(__crc32c_le); +EXPORT_SYMBOL(__crc32c_le_base); + u32 __pure crc32_be_base(u32, unsigned char const *, size_t) __alias(crc32_be); /* diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile index 969baab8c805..01fac1cd05a1 100644 --- a/lib/crypto/Makefile +++ b/lib/crypto/Makefile @@ -58,3 +58,5 @@ libcurve25519-y += curve25519-selftest.o endif obj-$(CONFIG_MPILIB) += mpi/ + +obj-$(CONFIG_CRYPTO_MANAGER_EXTRA_TESTS) += simd.o diff --git a/lib/crypto/mpi/mpi-bit.c b/lib/crypto/mpi/mpi-bit.c index 835a2f0622a0..934d81311360 100644 --- a/lib/crypto/mpi/mpi-bit.c +++ b/lib/crypto/mpi/mpi-bit.c @@ -95,6 +95,7 @@ int mpi_set_bit(MPI a, unsigned int n) a->d[limbno] |= (A_LIMB_1<<bitno); return 0; } +EXPORT_SYMBOL_GPL(mpi_set_bit); /* * Shift A by N bits to the right. diff --git a/lib/crypto/mpi/mpi-mul.c b/lib/crypto/mpi/mpi-mul.c index 892a246216b9..7e6ff1ce3e9b 100644 --- a/lib/crypto/mpi/mpi-mul.c +++ b/lib/crypto/mpi/mpi-mul.c @@ -21,7 +21,7 @@ int mpi_mul(MPI w, MPI u, MPI v) int usign, vsign, sign_product; int assign_wp = 0; mpi_ptr_t tmp_limb = NULL; - int err; + int err = 0; if (u->nlimbs < v->nlimbs) { /* Swap U and V. */ diff --git a/lib/crypto/simd.c b/lib/crypto/simd.c new file mode 100644 index 000000000000..9c36cb3bb49c --- /dev/null +++ b/lib/crypto/simd.c @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * SIMD testing utility functions + * + * Copyright 2024 Google LLC + */ + +#include <crypto/internal/simd.h> + +DEFINE_PER_CPU(bool, crypto_simd_disabled_for_test); +EXPORT_PER_CPU_SYMBOL_GPL(crypto_simd_disabled_for_test); diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 5ce473ad499b..7f50c4480a4e 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -7,25 +7,30 @@ #define pr_fmt(fmt) "ODEBUG: " fmt +#include <linux/cpu.h> #include <linux/debugobjects.h> -#include <linux/interrupt.h> +#include <linux/debugfs.h> +#include <linux/hash.h> +#include <linux/kmemleak.h> #include <linux/sched.h> +#include <linux/sched/loadavg.h> #include <linux/sched/task_stack.h> #include <linux/seq_file.h> -#include <linux/debugfs.h> #include <linux/slab.h> -#include <linux/hash.h> -#include <linux/kmemleak.h> -#include <linux/cpu.h> +#include <linux/static_key.h> #define ODEBUG_HASH_BITS 14 #define ODEBUG_HASH_SIZE (1 << ODEBUG_HASH_BITS) -#define ODEBUG_POOL_SIZE 1024 -#define ODEBUG_POOL_MIN_LEVEL 256 -#define ODEBUG_POOL_PERCPU_SIZE 64 +/* Must be power of two */ #define ODEBUG_BATCH_SIZE 16 +/* Initial values. Must all be a multiple of batch size */ +#define ODEBUG_POOL_SIZE (64 * ODEBUG_BATCH_SIZE) +#define ODEBUG_POOL_MIN_LEVEL (ODEBUG_POOL_SIZE / 4) + +#define ODEBUG_POOL_PERCPU_SIZE (8 * ODEBUG_BATCH_SIZE) + #define ODEBUG_CHUNK_SHIFT PAGE_SHIFT #define ODEBUG_CHUNK_SIZE (1 << ODEBUG_CHUNK_SHIFT) #define ODEBUG_CHUNK_MASK (~(ODEBUG_CHUNK_SIZE - 1)) @@ -35,7 +40,7 @@ * frequency of 10Hz and about 1024 objects for each freeing operation. * So it is freeing at most 10k debug objects per second. */ -#define ODEBUG_FREE_WORK_MAX 1024 +#define ODEBUG_FREE_WORK_MAX (1024 / ODEBUG_BATCH_SIZE) #define ODEBUG_FREE_WORK_DELAY DIV_ROUND_UP(HZ, 10) struct debug_bucket { @@ -43,16 +48,24 @@ struct debug_bucket { raw_spinlock_t lock; }; -/* - * Debug object percpu free list - * Access is protected by disabling irq - */ -struct debug_percpu_free { - struct hlist_head free_objs; - int obj_free; +struct pool_stats { + unsigned int cur_used; + unsigned int max_used; + unsigned int min_fill; }; -static DEFINE_PER_CPU(struct debug_percpu_free, percpu_obj_pool); +struct obj_pool { + struct hlist_head objects; + unsigned int cnt; + unsigned int min_cnt; + unsigned int max_cnt; + struct pool_stats stats; +} ____cacheline_aligned; + + +static DEFINE_PER_CPU_ALIGNED(struct obj_pool, pool_pcpu) = { + .max_cnt = ODEBUG_POOL_PERCPU_SIZE, +}; static struct debug_bucket obj_hash[ODEBUG_HASH_SIZE]; @@ -60,37 +73,32 @@ static struct debug_obj obj_static_pool[ODEBUG_POOL_SIZE] __initdata; static DEFINE_RAW_SPINLOCK(pool_lock); -static HLIST_HEAD(obj_pool); -static HLIST_HEAD(obj_to_free); +static struct obj_pool pool_global = { + .min_cnt = ODEBUG_POOL_MIN_LEVEL, + .max_cnt = ODEBUG_POOL_SIZE, + .stats = { + .min_fill = ODEBUG_POOL_SIZE, + }, +}; -/* - * Because of the presence of percpu free pools, obj_pool_free will - * under-count those in the percpu free pools. Similarly, obj_pool_used - * will over-count those in the percpu free pools. Adjustments will be - * made at debug_stats_show(). Both obj_pool_min_free and obj_pool_max_used - * can be off. - */ -static int __data_racy obj_pool_min_free = ODEBUG_POOL_SIZE; -static int __data_racy obj_pool_free = ODEBUG_POOL_SIZE; -static int obj_pool_used; -static int __data_racy obj_pool_max_used; +static struct obj_pool pool_to_free = { + .max_cnt = UINT_MAX, +}; + +static HLIST_HEAD(pool_boot); + +static unsigned long avg_usage; static bool obj_freeing; -/* The number of objs on the global free list */ -static int obj_nr_tofree; static int __data_racy debug_objects_maxchain __read_mostly; static int __data_racy __maybe_unused debug_objects_maxchecked __read_mostly; static int __data_racy debug_objects_fixups __read_mostly; static int __data_racy debug_objects_warnings __read_mostly; -static int __data_racy debug_objects_enabled __read_mostly +static bool __data_racy debug_objects_enabled __read_mostly = CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT; -static int debug_objects_pool_size __ro_after_init - = ODEBUG_POOL_SIZE; -static int debug_objects_pool_min_level __ro_after_init - = ODEBUG_POOL_MIN_LEVEL; -static const struct debug_obj_descr *descr_test __read_mostly; -static struct kmem_cache *obj_cache __ro_after_init; +static const struct debug_obj_descr *descr_test __read_mostly; +static struct kmem_cache *obj_cache __ro_after_init; /* * Track numbers of kmem_cache_alloc()/free() calls done. @@ -101,19 +109,20 @@ static int __data_racy debug_objects_freed; static void free_obj_work(struct work_struct *work); static DECLARE_DELAYED_WORK(debug_obj_work, free_obj_work); +static DEFINE_STATIC_KEY_FALSE(obj_cache_enabled); + static int __init enable_object_debug(char *str) { - debug_objects_enabled = 1; + debug_objects_enabled = true; return 0; } +early_param("debug_objects", enable_object_debug); static int __init disable_object_debug(char *str) { - debug_objects_enabled = 0; + debug_objects_enabled = false; return 0; } - -early_param("debug_objects", enable_object_debug); early_param("no_debug_objects", disable_object_debug); static const char *obj_states[ODEBUG_STATE_MAX] = { @@ -125,61 +134,280 @@ static const char *obj_states[ODEBUG_STATE_MAX] = { [ODEBUG_STATE_NOTAVAILABLE] = "not available", }; -static void fill_pool(void) +static __always_inline unsigned int pool_count(struct obj_pool *pool) +{ + return READ_ONCE(pool->cnt); +} + +static __always_inline bool pool_should_refill(struct obj_pool *pool) +{ + return pool_count(pool) < pool->min_cnt; +} + +static __always_inline bool pool_must_refill(struct obj_pool *pool) +{ + return pool_count(pool) < pool->min_cnt / 2; +} + +static bool pool_move_batch(struct obj_pool *dst, struct obj_pool *src) { - gfp_t gfp = __GFP_HIGH | __GFP_NOWARN; + struct hlist_node *last, *next_batch, *first_batch; struct debug_obj *obj; - unsigned long flags; - if (likely(READ_ONCE(obj_pool_free) >= debug_objects_pool_min_level)) + if (dst->cnt >= dst->max_cnt || !src->cnt) + return false; + + first_batch = src->objects.first; + obj = hlist_entry(first_batch, typeof(*obj), node); + last = obj->batch_last; + next_batch = last->next; + + /* Move the next batch to the front of the source pool */ + src->objects.first = next_batch; + if (next_batch) + next_batch->pprev = &src->objects.first; + + /* Add the extracted batch to the destination pool */ + last->next = dst->objects.first; + if (last->next) + last->next->pprev = &last->next; + first_batch->pprev = &dst->objects.first; + dst->objects.first = first_batch; + + WRITE_ONCE(src->cnt, src->cnt - ODEBUG_BATCH_SIZE); + WRITE_ONCE(dst->cnt, dst->cnt + ODEBUG_BATCH_SIZE); + return true; +} + +static bool pool_push_batch(struct obj_pool *dst, struct hlist_head *head) +{ + struct hlist_node *last; + struct debug_obj *obj; + + if (dst->cnt >= dst->max_cnt) + return false; + + obj = hlist_entry(head->first, typeof(*obj), node); + last = obj->batch_last; + + hlist_splice_init(head, last, &dst->objects); + WRITE_ONCE(dst->cnt, dst->cnt + ODEBUG_BATCH_SIZE); + return true; +} + +static bool pool_pop_batch(struct hlist_head *head, struct obj_pool *src) +{ + struct hlist_node *last, *next; + struct debug_obj *obj; + + if (!src->cnt) + return false; + + /* Move the complete list to the head */ + hlist_move_list(&src->objects, head); + + obj = hlist_entry(head->first, typeof(*obj), node); + last = obj->batch_last; + next = last->next; + /* Disconnect the batch from the list */ + last->next = NULL; + + /* Move the node after last back to the source pool. */ + src->objects.first = next; + if (next) + next->pprev = &src->objects.first; + + WRITE_ONCE(src->cnt, src->cnt - ODEBUG_BATCH_SIZE); + return true; +} + +static struct debug_obj *__alloc_object(struct hlist_head *list) +{ + struct debug_obj *obj; + + if (unlikely(!list->first)) + return NULL; + + obj = hlist_entry(list->first, typeof(*obj), node); + hlist_del(&obj->node); + return obj; +} + +static void pcpu_refill_stats(void) +{ + struct pool_stats *stats = &pool_global.stats; + + WRITE_ONCE(stats->cur_used, stats->cur_used + ODEBUG_BATCH_SIZE); + + if (stats->cur_used > stats->max_used) + stats->max_used = stats->cur_used; + + if (pool_global.cnt < stats->min_fill) + stats->min_fill = pool_global.cnt; +} + +static struct debug_obj *pcpu_alloc(void) +{ + struct obj_pool *pcp = this_cpu_ptr(&pool_pcpu); + + lockdep_assert_irqs_disabled(); + + for (;;) { + struct debug_obj *obj = __alloc_object(&pcp->objects); + + if (likely(obj)) { + pcp->cnt--; + /* + * If this emptied a batch try to refill from the + * free pool. Don't do that if this was the top-most + * batch as pcpu_free() expects the per CPU pool + * to be less than ODEBUG_POOL_PERCPU_SIZE. + */ + if (unlikely(pcp->cnt < (ODEBUG_POOL_PERCPU_SIZE - ODEBUG_BATCH_SIZE) && + !(pcp->cnt % ODEBUG_BATCH_SIZE))) { + /* + * Don't try to allocate from the regular pool here + * to not exhaust it prematurely. + */ + if (pool_count(&pool_to_free)) { + guard(raw_spinlock)(&pool_lock); + pool_move_batch(pcp, &pool_to_free); + pcpu_refill_stats(); + } + } + return obj; + } + + guard(raw_spinlock)(&pool_lock); + if (!pool_move_batch(pcp, &pool_to_free)) { + if (!pool_move_batch(pcp, &pool_global)) + return NULL; + } + pcpu_refill_stats(); + } +} + +static void pcpu_free(struct debug_obj *obj) +{ + struct obj_pool *pcp = this_cpu_ptr(&pool_pcpu); + struct debug_obj *first; + + lockdep_assert_irqs_disabled(); + + if (!(pcp->cnt % ODEBUG_BATCH_SIZE)) { + obj->batch_last = &obj->node; + } else { + first = hlist_entry(pcp->objects.first, typeof(*first), node); + obj->batch_last = first->batch_last; + } + hlist_add_head(&obj->node, &pcp->objects); + pcp->cnt++; + + /* Pool full ? */ + if (pcp->cnt < ODEBUG_POOL_PERCPU_SIZE) return; + /* Remove a batch from the per CPU pool */ + guard(raw_spinlock)(&pool_lock); + /* Try to fit the batch into the pool_global first */ + if (!pool_move_batch(&pool_global, pcp)) + pool_move_batch(&pool_to_free, pcp); + WRITE_ONCE(pool_global.stats.cur_used, pool_global.stats.cur_used - ODEBUG_BATCH_SIZE); +} + +static void free_object_list(struct hlist_head *head) +{ + struct hlist_node *tmp; + struct debug_obj *obj; + int cnt = 0; + + hlist_for_each_entry_safe(obj, tmp, head, node) { + hlist_del(&obj->node); + kmem_cache_free(obj_cache, obj); + cnt++; + } + debug_objects_freed += cnt; +} + +static void fill_pool_from_freelist(void) +{ + static unsigned long state; + /* * Reuse objs from the global obj_to_free list; they will be * reinitialized when allocating. - * - * obj_nr_tofree is checked locklessly; the READ_ONCE() pairs with - * the WRITE_ONCE() in pool_lock critical sections. */ - if (READ_ONCE(obj_nr_tofree)) { - raw_spin_lock_irqsave(&pool_lock, flags); - /* - * Recheck with the lock held as the worker thread might have - * won the race and freed the global free list already. - */ - while (obj_nr_tofree && (obj_pool_free < debug_objects_pool_min_level)) { - obj = hlist_entry(obj_to_free.first, typeof(*obj), node); - hlist_del(&obj->node); - WRITE_ONCE(obj_nr_tofree, obj_nr_tofree - 1); - hlist_add_head(&obj->node, &obj_pool); - WRITE_ONCE(obj_pool_free, obj_pool_free + 1); + if (!pool_count(&pool_to_free)) + return; + + /* + * Prevent the context from being scheduled or interrupted after + * setting the state flag; + */ + guard(irqsave)(); + + /* + * Avoid lock contention on &pool_lock and avoid making the cache + * line exclusive by testing the bit before attempting to set it. + */ + if (test_bit(0, &state) || test_and_set_bit(0, &state)) + return; + + /* Avoid taking the lock when there is no work to do */ + while (pool_should_refill(&pool_global) && pool_count(&pool_to_free)) { + guard(raw_spinlock)(&pool_lock); + /* Move a batch if possible */ + pool_move_batch(&pool_global, &pool_to_free); + } + clear_bit(0, &state); +} + +static bool kmem_alloc_batch(struct hlist_head *head, struct kmem_cache *cache, gfp_t gfp) +{ + struct hlist_node *last = NULL; + struct debug_obj *obj; + + for (int cnt = 0; cnt < ODEBUG_BATCH_SIZE; cnt++) { + obj = kmem_cache_zalloc(cache, gfp); + if (!obj) { + free_object_list(head); + return false; } - raw_spin_unlock_irqrestore(&pool_lock, flags); + debug_objects_allocated++; + + if (!last) + last = &obj->node; + obj->batch_last = last; + + hlist_add_head(&obj->node, head); } + return true; +} + +static void fill_pool(void) +{ + static atomic_t cpus_allocating; - if (unlikely(!obj_cache)) + /* + * Avoid allocation and lock contention when: + * - One other CPU is already allocating + * - the global pool has not reached the critical level yet + */ + if (!pool_must_refill(&pool_global) && atomic_read(&cpus_allocating)) return; - while (READ_ONCE(obj_pool_free) < debug_objects_pool_min_level) { - struct debug_obj *new[ODEBUG_BATCH_SIZE]; - int cnt; + atomic_inc(&cpus_allocating); + while (pool_should_refill(&pool_global)) { + HLIST_HEAD(head); - for (cnt = 0; cnt < ODEBUG_BATCH_SIZE; cnt++) { - new[cnt] = kmem_cache_zalloc(obj_cache, gfp); - if (!new[cnt]) - break; - } - if (!cnt) - return; + if (!kmem_alloc_batch(&head, obj_cache, __GFP_HIGH | __GFP_NOWARN)) + break; - raw_spin_lock_irqsave(&pool_lock, flags); - while (cnt) { - hlist_add_head(&new[--cnt]->node, &obj_pool); - debug_objects_allocated++; - WRITE_ONCE(obj_pool_free, obj_pool_free + 1); - } - raw_spin_unlock_irqrestore(&pool_lock, flags); + guard(raw_spinlock_irqsave)(&pool_lock); + if (!pool_push_batch(&pool_global, &head)) + pool_push_batch(&pool_to_free, &head); } + atomic_dec(&cpus_allocating); } /* @@ -201,72 +429,37 @@ static struct debug_obj *lookup_object(void *addr, struct debug_bucket *b) return NULL; } -/* - * Allocate a new object from the hlist - */ -static struct debug_obj *__alloc_object(struct hlist_head *list) +static void calc_usage(void) { - struct debug_obj *obj = NULL; + static DEFINE_RAW_SPINLOCK(avg_lock); + static unsigned long avg_period; + unsigned long cur, now = jiffies; - if (list->first) { - obj = hlist_entry(list->first, typeof(*obj), node); - hlist_del(&obj->node); - } + if (!time_after_eq(now, READ_ONCE(avg_period))) + return; - return obj; + if (!raw_spin_trylock(&avg_lock)) + return; + + WRITE_ONCE(avg_period, now + msecs_to_jiffies(10)); + cur = READ_ONCE(pool_global.stats.cur_used) * ODEBUG_FREE_WORK_MAX; + WRITE_ONCE(avg_usage, calc_load(avg_usage, EXP_5, cur)); + raw_spin_unlock(&avg_lock); } -static struct debug_obj * -alloc_object(void *addr, struct debug_bucket *b, const struct debug_obj_descr *descr) +static struct debug_obj *alloc_object(void *addr, struct debug_bucket *b, + const struct debug_obj_descr *descr) { - struct debug_percpu_free *percpu_pool = this_cpu_ptr(&percpu_obj_pool); struct debug_obj *obj; - if (likely(obj_cache)) { - obj = __alloc_object(&percpu_pool->free_objs); - if (obj) { - percpu_pool->obj_free--; - goto init_obj; - } - } - - raw_spin_lock(&pool_lock); - obj = __alloc_object(&obj_pool); - if (obj) { - obj_pool_used++; - WRITE_ONCE(obj_pool_free, obj_pool_free - 1); - - /* - * Looking ahead, allocate one batch of debug objects and - * put them into the percpu free pool. - */ - if (likely(obj_cache)) { - int i; - - for (i = 0; i < ODEBUG_BATCH_SIZE; i++) { - struct debug_obj *obj2; - - obj2 = __alloc_object(&obj_pool); - if (!obj2) - break; - hlist_add_head(&obj2->node, - &percpu_pool->free_objs); - percpu_pool->obj_free++; - obj_pool_used++; - WRITE_ONCE(obj_pool_free, obj_pool_free - 1); - } - } - - if (obj_pool_used > obj_pool_max_used) - obj_pool_max_used = obj_pool_used; + calc_usage(); - if (obj_pool_free < obj_pool_min_free) - obj_pool_min_free = obj_pool_free; - } - raw_spin_unlock(&pool_lock); + if (static_branch_likely(&obj_cache_enabled)) + obj = pcpu_alloc(); + else + obj = __alloc_object(&pool_boot); -init_obj: - if (obj) { + if (likely(obj)) { obj->object = addr; obj->descr = descr; obj->state = ODEBUG_STATE_NONE; @@ -276,142 +469,58 @@ init_obj: return obj; } -/* - * workqueue function to free objects. - * - * To reduce contention on the global pool_lock, the actual freeing of - * debug objects will be delayed if the pool_lock is busy. - */ +/* workqueue function to free objects. */ static void free_obj_work(struct work_struct *work) { - struct hlist_node *tmp; - struct debug_obj *obj; - unsigned long flags; - HLIST_HEAD(tofree); + static unsigned long last_use_avg; + unsigned long cur_used, last_used, delta; + unsigned int max_free = 0; WRITE_ONCE(obj_freeing, false); - if (!raw_spin_trylock_irqsave(&pool_lock, flags)) - return; - if (obj_pool_free >= debug_objects_pool_size) - goto free_objs; + /* Rate limit freeing based on current use average */ + cur_used = READ_ONCE(avg_usage); + last_used = last_use_avg; + last_use_avg = cur_used; - /* - * The objs on the pool list might be allocated before the work is - * run, so recheck if pool list it full or not, if not fill pool - * list from the global free list. As it is likely that a workload - * may be gearing up to use more and more objects, don't free any - * of them until the next round. - */ - while (obj_nr_tofree && obj_pool_free < debug_objects_pool_size) { - obj = hlist_entry(obj_to_free.first, typeof(*obj), node); - hlist_del(&obj->node); - hlist_add_head(&obj->node, &obj_pool); - WRITE_ONCE(obj_pool_free, obj_pool_free + 1); - WRITE_ONCE(obj_nr_tofree, obj_nr_tofree - 1); - } - raw_spin_unlock_irqrestore(&pool_lock, flags); - return; + if (!pool_count(&pool_to_free)) + return; -free_objs: - /* - * Pool list is already full and there are still objs on the free - * list. Move remaining free objs to a temporary list to free the - * memory outside the pool_lock held region. - */ - if (obj_nr_tofree) { - hlist_move_list(&obj_to_free, &tofree); - debug_objects_freed += obj_nr_tofree; - WRITE_ONCE(obj_nr_tofree, 0); + if (cur_used <= last_used) { + delta = (last_used - cur_used) / ODEBUG_FREE_WORK_MAX; + max_free = min(delta, ODEBUG_FREE_WORK_MAX); } - raw_spin_unlock_irqrestore(&pool_lock, flags); - hlist_for_each_entry_safe(obj, tmp, &tofree, node) { - hlist_del(&obj->node); - kmem_cache_free(obj_cache, obj); + for (int cnt = 0; cnt < ODEBUG_FREE_WORK_MAX; cnt++) { + HLIST_HEAD(tofree); + + /* Acquire and drop the lock for each batch */ + scoped_guard(raw_spinlock_irqsave, &pool_lock) { + if (!pool_to_free.cnt) + return; + + /* Refill the global pool if possible */ + if (pool_move_batch(&pool_global, &pool_to_free)) { + /* Don't free as there seems to be demand */ + max_free = 0; + } else if (max_free) { + pool_pop_batch(&tofree, &pool_to_free); + max_free--; + } else { + return; + } + } + free_object_list(&tofree); } } static void __free_object(struct debug_obj *obj) { - struct debug_obj *objs[ODEBUG_BATCH_SIZE]; - struct debug_percpu_free *percpu_pool; - int lookahead_count = 0; - unsigned long flags; - bool work; - - local_irq_save(flags); - if (!obj_cache) - goto free_to_obj_pool; - - /* - * Try to free it into the percpu pool first. - */ - percpu_pool = this_cpu_ptr(&percpu_obj_pool); - if (percpu_pool->obj_free < ODEBUG_POOL_PERCPU_SIZE) { - hlist_add_head(&obj->node, &percpu_pool->free_objs); - percpu_pool->obj_free++; - local_irq_restore(flags); - return; - } - - /* - * As the percpu pool is full, look ahead and pull out a batch - * of objects from the percpu pool and free them as well. - */ - for (; lookahead_count < ODEBUG_BATCH_SIZE; lookahead_count++) { - objs[lookahead_count] = __alloc_object(&percpu_pool->free_objs); - if (!objs[lookahead_count]) - break; - percpu_pool->obj_free--; - } - -free_to_obj_pool: - raw_spin_lock(&pool_lock); - work = (obj_pool_free > debug_objects_pool_size) && obj_cache && - (obj_nr_tofree < ODEBUG_FREE_WORK_MAX); - obj_pool_used--; - - if (work) { - WRITE_ONCE(obj_nr_tofree, obj_nr_tofree + 1); - hlist_add_head(&obj->node, &obj_to_free); - if (lookahead_count) { - WRITE_ONCE(obj_nr_tofree, obj_nr_tofree + lookahead_count); - obj_pool_used -= lookahead_count; - while (lookahead_count) { - hlist_add_head(&objs[--lookahead_count]->node, - &obj_to_free); - } - } - - if ((obj_pool_free > debug_objects_pool_size) && - (obj_nr_tofree < ODEBUG_FREE_WORK_MAX)) { - int i; - - /* - * Free one more batch of objects from obj_pool. - */ - for (i = 0; i < ODEBUG_BATCH_SIZE; i++) { - obj = __alloc_object(&obj_pool); - hlist_add_head(&obj->node, &obj_to_free); - WRITE_ONCE(obj_pool_free, obj_pool_free - 1); - WRITE_ONCE(obj_nr_tofree, obj_nr_tofree + 1); - } - } - } else { - WRITE_ONCE(obj_pool_free, obj_pool_free + 1); - hlist_add_head(&obj->node, &obj_pool); - if (lookahead_count) { - WRITE_ONCE(obj_pool_free, obj_pool_free + lookahead_count); - obj_pool_used -= lookahead_count; - while (lookahead_count) { - hlist_add_head(&objs[--lookahead_count]->node, - &obj_pool); - } - } - } - raw_spin_unlock(&pool_lock); - local_irq_restore(flags); + guard(irqsave)(); + if (static_branch_likely(&obj_cache_enabled)) + pcpu_free(obj); + else + hlist_add_head(&obj->node, &pool_boot); } /* @@ -421,63 +530,52 @@ free_to_obj_pool: static void free_object(struct debug_obj *obj) { __free_object(obj); - if (!READ_ONCE(obj_freeing) && READ_ONCE(obj_nr_tofree)) { + if (!READ_ONCE(obj_freeing) && pool_count(&pool_to_free)) { WRITE_ONCE(obj_freeing, true); schedule_delayed_work(&debug_obj_work, ODEBUG_FREE_WORK_DELAY); } } -#ifdef CONFIG_HOTPLUG_CPU -static int object_cpu_offline(unsigned int cpu) +static void put_objects(struct hlist_head *list) { - struct debug_percpu_free *percpu_pool; struct hlist_node *tmp; struct debug_obj *obj; - unsigned long flags; - /* Remote access is safe as the CPU is dead already */ - percpu_pool = per_cpu_ptr(&percpu_obj_pool, cpu); - hlist_for_each_entry_safe(obj, tmp, &percpu_pool->free_objs, node) { + /* + * Using free_object() puts the objects into reuse or schedules + * them for freeing and it get's all the accounting correct. + */ + hlist_for_each_entry_safe(obj, tmp, list, node) { hlist_del(&obj->node); - kmem_cache_free(obj_cache, obj); + free_object(obj); } +} - raw_spin_lock_irqsave(&pool_lock, flags); - obj_pool_used -= percpu_pool->obj_free; - debug_objects_freed += percpu_pool->obj_free; - raw_spin_unlock_irqrestore(&pool_lock, flags); - - percpu_pool->obj_free = 0; +#ifdef CONFIG_HOTPLUG_CPU +static int object_cpu_offline(unsigned int cpu) +{ + /* Remote access is safe as the CPU is dead already */ + struct obj_pool *pcp = per_cpu_ptr(&pool_pcpu, cpu); + put_objects(&pcp->objects); + pcp->cnt = 0; return 0; } #endif -/* - * We run out of memory. That means we probably have tons of objects - * allocated. - */ +/* Out of memory. Free all objects from hash */ static void debug_objects_oom(void) { struct debug_bucket *db = obj_hash; - struct hlist_node *tmp; HLIST_HEAD(freelist); - struct debug_obj *obj; - unsigned long flags; - int i; pr_warn("Out of memory. ODEBUG disabled\n"); - for (i = 0; i < ODEBUG_HASH_SIZE; i++, db++) { - raw_spin_lock_irqsave(&db->lock, flags); - hlist_move_list(&db->list, &freelist); - raw_spin_unlock_irqrestore(&db->lock, flags); + for (int i = 0; i < ODEBUG_HASH_SIZE; i++, db++) { + scoped_guard(raw_spinlock_irqsave, &db->lock) + hlist_move_list(&db->list, &freelist); - /* Now free them */ - hlist_for_each_entry_safe(obj, tmp, &freelist, node) { - hlist_del(&obj->node); - free_object(obj); - } + put_objects(&freelist); } } @@ -592,12 +690,24 @@ static struct debug_obj *lookup_object_or_alloc(void *addr, struct debug_bucket } /* Out of memory. Do the cleanup outside of the locked region */ - debug_objects_enabled = 0; + debug_objects_enabled = false; return NULL; } static void debug_objects_fill_pool(void) { + if (!static_branch_likely(&obj_cache_enabled)) + return; + + if (likely(!pool_should_refill(&pool_global))) + return; + + /* Try reusing objects from obj_to_free_list */ + fill_pool_from_freelist(); + + if (likely(!pool_should_refill(&pool_global))) + return; + /* * On RT enabled kernels the pool refill must happen in preemptible * context -- for !RT kernels we rely on the fact that spinlock_t and @@ -1007,7 +1117,7 @@ repeat: debug_objects_maxchecked = objs_checked; /* Schedule work to actually kmem_cache_free() objects */ - if (!READ_ONCE(obj_freeing) && READ_ONCE(obj_nr_tofree)) { + if (!READ_ONCE(obj_freeing) && pool_count(&pool_to_free)) { WRITE_ONCE(obj_freeing, true); schedule_delayed_work(&debug_obj_work, ODEBUG_FREE_WORK_DELAY); } @@ -1024,23 +1134,33 @@ void debug_check_no_obj_freed(const void *address, unsigned long size) static int debug_stats_show(struct seq_file *m, void *v) { - int cpu, obj_percpu_free = 0; + unsigned int cpu, pool_used, pcp_free = 0; + /* + * pool_global.stats.cur_used is the number of batches currently + * handed out to per CPU pools. Convert it to number of objects + * and subtract the number of free objects in the per CPU pools. + * As this is lockless the number is an estimate. + */ for_each_possible_cpu(cpu) - obj_percpu_free += per_cpu(percpu_obj_pool.obj_free, cpu); - - seq_printf(m, "max_chain :%d\n", debug_objects_maxchain); - seq_printf(m, "max_checked :%d\n", debug_objects_maxchecked); - seq_printf(m, "warnings :%d\n", debug_objects_warnings); - seq_printf(m, "fixups :%d\n", debug_objects_fixups); - seq_printf(m, "pool_free :%d\n", READ_ONCE(obj_pool_free) + obj_percpu_free); - seq_printf(m, "pool_pcp_free :%d\n", obj_percpu_free); - seq_printf(m, "pool_min_free :%d\n", obj_pool_min_free); - seq_printf(m, "pool_used :%d\n", obj_pool_used - obj_percpu_free); - seq_printf(m, "pool_max_used :%d\n", obj_pool_max_used); - seq_printf(m, "on_free_list :%d\n", READ_ONCE(obj_nr_tofree)); - seq_printf(m, "objs_allocated:%d\n", debug_objects_allocated); - seq_printf(m, "objs_freed :%d\n", debug_objects_freed); + pcp_free += per_cpu(pool_pcpu.cnt, cpu); + + pool_used = READ_ONCE(pool_global.stats.cur_used); + pcp_free = min(pool_used, pcp_free); + pool_used -= pcp_free; + + seq_printf(m, "max_chain : %d\n", debug_objects_maxchain); + seq_printf(m, "max_checked : %d\n", debug_objects_maxchecked); + seq_printf(m, "warnings : %d\n", debug_objects_warnings); + seq_printf(m, "fixups : %d\n", debug_objects_fixups); + seq_printf(m, "pool_free : %u\n", pool_count(&pool_global) + pcp_free); + seq_printf(m, "pool_pcp_free : %u\n", pcp_free); + seq_printf(m, "pool_min_free : %u\n", data_race(pool_global.stats.min_fill)); + seq_printf(m, "pool_used : %u\n", pool_used); + seq_printf(m, "pool_max_used : %u\n", data_race(pool_global.stats.max_used)); + seq_printf(m, "on_free_list : %u\n", pool_count(&pool_to_free)); + seq_printf(m, "objs_allocated: %d\n", debug_objects_allocated); + seq_printf(m, "objs_freed : %d\n", debug_objects_freed); return 0; } DEFINE_SHOW_ATTRIBUTE(debug_stats); @@ -1194,7 +1314,7 @@ check_results(void *addr, enum debug_obj_state state, int fixups, int warnings) out: raw_spin_unlock_irqrestore(&db->lock, flags); if (res) - debug_objects_enabled = 0; + debug_objects_enabled = false; return res; } @@ -1209,7 +1329,7 @@ static __initconst const struct debug_obj_descr descr_type_test = { static __initdata struct self_test obj = { .static_init = 0 }; -static void __init debug_objects_selftest(void) +static bool __init debug_objects_selftest(void) { int fixups, oldfixups, warnings, oldwarnings; unsigned long flags; @@ -1278,9 +1398,10 @@ out: descr_test = NULL; local_irq_restore(flags); + return debug_objects_enabled; } #else -static inline void debug_objects_selftest(void) { } +static inline bool debug_objects_selftest(void) { return true; } #endif /* @@ -1295,65 +1416,54 @@ void __init debug_objects_early_init(void) for (i = 0; i < ODEBUG_HASH_SIZE; i++) raw_spin_lock_init(&obj_hash[i].lock); + /* Keep early boot simple and add everything to the boot list */ for (i = 0; i < ODEBUG_POOL_SIZE; i++) - hlist_add_head(&obj_static_pool[i].node, &obj_pool); + hlist_add_head(&obj_static_pool[i].node, &pool_boot); } /* - * Convert the statically allocated objects to dynamic ones: + * Convert the statically allocated objects to dynamic ones. + * debug_objects_mem_init() is called early so only one CPU is up and + * interrupts are disabled, which means it is safe to replace the active + * object references. */ -static int __init debug_objects_replace_static_objects(void) +static bool __init debug_objects_replace_static_objects(struct kmem_cache *cache) { struct debug_bucket *db = obj_hash; struct hlist_node *tmp; - struct debug_obj *obj, *new; + struct debug_obj *obj; HLIST_HEAD(objects); - int i, cnt = 0; + int i; - for (i = 0; i < ODEBUG_POOL_SIZE; i++) { - obj = kmem_cache_zalloc(obj_cache, GFP_KERNEL); - if (!obj) + for (i = 0; i < ODEBUG_POOL_SIZE; i += ODEBUG_BATCH_SIZE) { + if (!kmem_alloc_batch(&objects, cache, GFP_KERNEL)) goto free; - hlist_add_head(&obj->node, &objects); + pool_push_batch(&pool_global, &objects); } - debug_objects_allocated += i; - - /* - * debug_objects_mem_init() is now called early that only one CPU is up - * and interrupts have been disabled, so it is safe to replace the - * active object references. - */ - - /* Remove the statically allocated objects from the pool */ - hlist_for_each_entry_safe(obj, tmp, &obj_pool, node) - hlist_del(&obj->node); - /* Move the allocated objects to the pool */ - hlist_move_list(&objects, &obj_pool); + /* Disconnect the boot pool. */ + pool_boot.first = NULL; /* Replace the active object references */ for (i = 0; i < ODEBUG_HASH_SIZE; i++, db++) { hlist_move_list(&db->list, &objects); hlist_for_each_entry(obj, &objects, node) { - new = hlist_entry(obj_pool.first, typeof(*obj), node); - hlist_del(&new->node); + struct debug_obj *new = pcpu_alloc(); + /* copy object data */ *new = *obj; hlist_add_head(&new->node, &db->list); - cnt++; } } - - pr_debug("%d of %d active objects replaced\n", - cnt, obj_pool_used); - return 0; + return true; free: - hlist_for_each_entry_safe(obj, tmp, &objects, node) { + /* Can't use free_object_list() as the cache is not populated yet */ + hlist_for_each_entry_safe(obj, tmp, &pool_global.objects, node) { hlist_del(&obj->node); - kmem_cache_free(obj_cache, obj); + kmem_cache_free(cache, obj); } - return -ENOMEM; + return false; } /* @@ -1364,43 +1474,40 @@ free: */ void __init debug_objects_mem_init(void) { - int cpu, extras; + struct kmem_cache *cache; + int extras; if (!debug_objects_enabled) return; - /* - * Initialize the percpu object pools - * - * Initialization is not strictly necessary, but was done for - * completeness. - */ - for_each_possible_cpu(cpu) - INIT_HLIST_HEAD(&per_cpu(percpu_obj_pool.free_objs, cpu)); + if (!debug_objects_selftest()) + return; - obj_cache = kmem_cache_create("debug_objects_cache", - sizeof (struct debug_obj), 0, - SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE, - NULL); + cache = kmem_cache_create("debug_objects_cache", sizeof (struct debug_obj), 0, + SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE, NULL); - if (!obj_cache || debug_objects_replace_static_objects()) { - debug_objects_enabled = 0; - kmem_cache_destroy(obj_cache); - pr_warn("out of memory.\n"); + if (!cache || !debug_objects_replace_static_objects(cache)) { + debug_objects_enabled = false; + pr_warn("Out of memory.\n"); return; - } else - debug_objects_selftest(); - -#ifdef CONFIG_HOTPLUG_CPU - cpuhp_setup_state_nocalls(CPUHP_DEBUG_OBJ_DEAD, "object:offline", NULL, - object_cpu_offline); -#endif + } /* - * Increase the thresholds for allocating and freeing objects - * according to the number of possible CPUs available in the system. + * Adjust the thresholds for allocating and freeing objects + * according to the number of possible CPUs available in the + * system. */ extras = num_possible_cpus() * ODEBUG_BATCH_SIZE; - debug_objects_pool_size += extras; - debug_objects_pool_min_level += extras; + pool_global.max_cnt += extras; + pool_global.min_cnt += extras; + + /* Everything worked. Expose the cache */ + obj_cache = cache; + static_branch_enable(&obj_cache_enabled); + +#ifdef CONFIG_HOTPLUG_CPU + cpuhp_setup_state_nocalls(CPUHP_DEBUG_OBJ_DEAD, "object:offline", NULL, + object_cpu_offline); +#endif + return; } diff --git a/lib/interval_tree_test.c b/lib/interval_tree_test.c index f37f4d44faa9..837064b83a6c 100644 --- a/lib/interval_tree_test.c +++ b/lib/interval_tree_test.c @@ -2,7 +2,7 @@ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/interval_tree.h> -#include <linux/random.h> +#include <linux/prandom.h> #include <linux/slab.h> #include <asm/timex.h> diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 1abb32c0da50..9ec806f989f2 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -461,6 +461,8 @@ size_t copy_page_from_iter_atomic(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { size_t n, copied = 0; + bool uses_kmap = IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP) || + PageHighMem(page); if (!page_copy_sane(page, offset, bytes)) return 0; @@ -471,7 +473,7 @@ size_t copy_page_from_iter_atomic(struct page *page, size_t offset, char *p; n = bytes - copied; - if (PageHighMem(page)) { + if (uses_kmap) { page += offset / PAGE_SIZE; offset %= PAGE_SIZE; n = min_t(size_t, n, PAGE_SIZE - offset); @@ -482,7 +484,7 @@ size_t copy_page_from_iter_atomic(struct page *page, size_t offset, kunmap_atomic(p); copied += n; offset += n; - } while (PageHighMem(page) && copied != bytes && n > 0); + } while (uses_kmap && copied != bytes && n > 0); return copied; } @@ -1021,15 +1023,18 @@ static ssize_t iter_folioq_get_pages(struct iov_iter *iter, size_t offset = iov_offset, fsize = folioq_folio_size(folioq, slot); size_t part = PAGE_SIZE - offset % PAGE_SIZE; - part = umin(part, umin(maxsize - extracted, fsize - offset)); - count -= part; - iov_offset += part; - extracted += part; + if (offset < fsize) { + part = umin(part, umin(maxsize - extracted, fsize - offset)); + count -= part; + iov_offset += part; + extracted += part; + + *pages = folio_page(folio, offset / PAGE_SIZE); + get_page(*pages); + pages++; + maxpages--; + } - *pages = folio_page(folio, offset / PAGE_SIZE); - get_page(*pages); - pages++; - maxpages--; if (maxpages == 0 || extracted >= maxsize) break; @@ -1677,8 +1682,8 @@ static ssize_t iov_iter_extract_xarray_pages(struct iov_iter *i, } /* - * Extract a list of contiguous pages from an ITER_BVEC iterator. This does - * not get references on the pages, nor does it get a pin on them. + * Extract a list of virtually contiguous pages from an ITER_BVEC iterator. + * This does not get references on the pages, nor does it get a pin on them. */ static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i, struct page ***pages, size_t maxsize, @@ -1686,35 +1691,59 @@ static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i, iov_iter_extraction_t extraction_flags, size_t *offset0) { - struct page **p, *page; - size_t skip = i->iov_offset, offset, size; - int k; + size_t skip = i->iov_offset, size = 0; + struct bvec_iter bi; + int k = 0; - for (;;) { - if (i->nr_segs == 0) - return 0; - size = min(maxsize, i->bvec->bv_len - skip); - if (size) - break; + if (i->nr_segs == 0) + return 0; + + if (i->iov_offset == i->bvec->bv_len) { i->iov_offset = 0; i->nr_segs--; i->bvec++; skip = 0; } + bi.bi_idx = 0; + bi.bi_size = maxsize; + bi.bi_bvec_done = skip; + + maxpages = want_pages_array(pages, maxsize, skip, maxpages); + + while (bi.bi_size && bi.bi_idx < i->nr_segs) { + struct bio_vec bv = bvec_iter_bvec(i->bvec, bi); + + /* + * The iov_iter_extract_pages interface only allows an offset + * into the first page. Break out of the loop if we see an + * offset into subsequent pages, the caller will have to call + * iov_iter_extract_pages again for the reminder. + */ + if (k) { + if (bv.bv_offset) + break; + } else { + *offset0 = bv.bv_offset; + } - skip += i->bvec->bv_offset; - page = i->bvec->bv_page + skip / PAGE_SIZE; - offset = skip % PAGE_SIZE; - *offset0 = offset; + (*pages)[k++] = bv.bv_page; + size += bv.bv_len; - maxpages = want_pages_array(pages, size, offset, maxpages); - if (!maxpages) - return -ENOMEM; - p = *pages; - for (k = 0; k < maxpages; k++) - p[k] = page + k; + if (k >= maxpages) + break; + + /* + * We are done when the end of the bvec doesn't align to a page + * boundary as that would create a hole in the returned space. + * The caller will handle this with another call to + * iov_iter_extract_pages. + */ + if (bv.bv_offset + bv.bv_len != PAGE_SIZE) + break; + + bvec_iter_advance_single(i->bvec, &bi, bv.bv_len); + } - size = min_t(size_t, size, maxpages * PAGE_SIZE - offset); iov_iter_advance(i, size); return size; } diff --git a/lib/kunit/string-stream-test.c b/lib/kunit/string-stream-test.c index 7511442ea98f..7734e33156f9 100644 --- a/lib/kunit/string-stream-test.c +++ b/lib/kunit/string-stream-test.c @@ -9,6 +9,7 @@ #include <kunit/static_stub.h> #include <kunit/test.h> #include <linux/ktime.h> +#include <linux/prandom.h> #include <linux/slab.h> #include <linux/timekeeping.h> diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c index 6f6a5fc85b42..6e0c019f71b6 100644 --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c @@ -2710,6 +2710,43 @@ static void local_lock_3B(void) } +#ifdef CONFIG_DEBUG_LOCK_ALLOC +static inline const char *rw_semaphore_lockdep_name(struct rw_semaphore *rwsem) +{ + return rwsem->dep_map.name; +} +#else +static inline const char *rw_semaphore_lockdep_name(struct rw_semaphore *rwsem) +{ + return NULL; +} +#endif + +static void test_lockdep_set_subclass_name(void) +{ + const char *name_before = rw_semaphore_lockdep_name(&rwsem_X1); + const char *name_after; + + lockdep_set_subclass(&rwsem_X1, 1); + name_after = rw_semaphore_lockdep_name(&rwsem_X1); + DEBUG_LOCKS_WARN_ON(name_before != name_after); +} + +/* + * lockdep_set_subclass() should reuse the existing lock class name instead + * of creating a new one. + */ +static void lockdep_set_subclass_name_test(void) +{ + printk(" --------------------------------------------------------------------------\n"); + printk(" | lockdep_set_subclass() name test|\n"); + printk(" -----------------------------------\n"); + + print_testname("compare name before and after"); + dotest(test_lockdep_set_subclass_name, SUCCESS, LOCKTYPE_RWSEM); + pr_cont("\n"); +} + static void local_lock_tests(void) { printk(" --------------------------------------------------------------------------\n"); @@ -2920,6 +2957,8 @@ void locking_selftest(void) dotest(hardirq_deadlock_softirq_not_deadlock, FAILURE, LOCKTYPE_SPECIAL); pr_cont("\n"); + lockdep_set_subclass_name_test(); + if (unexpected_testcase_failures) { printk("-----------------------------------------------------------------\n"); debug_locks = 0; diff --git a/lib/logic_pio.c b/lib/logic_pio.c index 2ea564a40064..e29496a38d06 100644 --- a/lib/logic_pio.c +++ b/lib/logic_pio.c @@ -122,7 +122,7 @@ void logic_pio_unregister_range(struct logic_pio_hwaddr *range) * * Traverse the io_range_list to find the registered node for @fwnode. */ -struct logic_pio_hwaddr *find_io_range_by_fwnode(struct fwnode_handle *fwnode) +struct logic_pio_hwaddr *find_io_range_by_fwnode(const struct fwnode_handle *fwnode) { struct logic_pio_hwaddr *range, *found_range = NULL; @@ -186,7 +186,7 @@ resource_size_t logic_pio_to_hwaddr(unsigned long pio) * * Returns Logical PIO value if successful, ~0UL otherwise */ -unsigned long logic_pio_trans_hwaddr(struct fwnode_handle *fwnode, +unsigned long logic_pio_trans_hwaddr(const struct fwnode_handle *fwnode, resource_size_t addr, resource_size_t size) { struct logic_pio_hwaddr *range; diff --git a/lib/maple_tree.c b/lib/maple_tree.c index 20990ecba2dd..3619301dda2e 100644 --- a/lib/maple_tree.c +++ b/lib/maple_tree.c @@ -2196,6 +2196,8 @@ static inline void mas_node_or_none(struct ma_state *mas, /* * mas_wr_node_walk() - Find the correct offset for the index in the @mas. + * If @mas->index cannot be found within the containing + * node, we traverse to the last entry in the node. * @wr_mas: The maple write state * * Uses mas_slot_locked() and does not need to worry about dead nodes. @@ -3532,7 +3534,7 @@ static bool mas_wr_walk(struct ma_wr_state *wr_mas) return true; } -static bool mas_wr_walk_index(struct ma_wr_state *wr_mas) +static void mas_wr_walk_index(struct ma_wr_state *wr_mas) { struct ma_state *mas = wr_mas->mas; @@ -3541,11 +3543,9 @@ static bool mas_wr_walk_index(struct ma_wr_state *wr_mas) wr_mas->content = mas_slot_locked(mas, wr_mas->slots, mas->offset); if (ma_is_leaf(wr_mas->type)) - return true; + return; mas_wr_walk_traverse(wr_mas); - } - return true; } /* * mas_extend_spanning_null() - Extend a store of a %NULL to include surrounding %NULLs. @@ -3765,8 +3765,8 @@ static noinline void mas_wr_spanning_store(struct ma_wr_state *wr_mas) memset(&b_node, 0, sizeof(struct maple_big_node)); /* Copy l_mas and store the value in b_node. */ mas_store_b_node(&l_wr_mas, &b_node, l_mas.end); - /* Copy r_mas into b_node. */ - if (r_mas.offset <= r_mas.end) + /* Copy r_mas into b_node if there is anything to copy. */ + if (r_mas.max > r_mas.last) mas_mab_cp(&r_mas, r_mas.offset, r_mas.end, &b_node, b_node.b_end + 1); else @@ -4218,7 +4218,7 @@ static inline void mas_wr_store_type(struct ma_wr_state *wr_mas) /* Potential spanning rebalance collapsing a node */ if (new_end < mt_min_slots[wr_mas->type]) { - if (!mte_is_root(mas->node)) { + if (!mte_is_root(mas->node) && !(mas->mas_flags & MA_STATE_BULK)) { mas->store_type = wr_rebalance; return; } diff --git a/lib/objpool.c b/lib/objpool.c index 234f9d0bd081..b998b720c732 100644 --- a/lib/objpool.c +++ b/lib/objpool.c @@ -74,15 +74,21 @@ objpool_init_percpu_slots(struct objpool_head *pool, int nr_objs, * warm caches and TLB hits. in default vmalloc is used to * reduce the pressure of kernel slab system. as we know, * mimimal size of vmalloc is one page since vmalloc would - * always align the requested size to page size + * always align the requested size to page size. + * but if vmalloc fails or it is not available (e.g. GFP_ATOMIC) + * allocate percpu slot with kmalloc. */ - if (pool->gfp & GFP_ATOMIC) - slot = kmalloc_node(size, pool->gfp, cpu_to_node(i)); - else + slot = NULL; + + if ((pool->gfp & (GFP_ATOMIC | GFP_KERNEL)) != GFP_ATOMIC) slot = __vmalloc_node(size, sizeof(void *), pool->gfp, cpu_to_node(i), __builtin_return_address(0)); - if (!slot) - return -ENOMEM; + + if (!slot) { + slot = kmalloc_node(size, pool->gfp, cpu_to_node(i)); + if (!slot) + return -ENOMEM; + } memset(slot, 0, size); pool->cpu_slots[i] = slot; diff --git a/lib/random32.c b/lib/random32.c index 0a5a0e3600c8..24e7acd9343f 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -36,7 +36,7 @@ #include <linux/percpu.h> #include <linux/export.h> #include <linux/jiffies.h> -#include <linux/random.h> +#include <linux/prandom.h> #include <linux/sched.h> #include <linux/bitops.h> #include <linux/slab.h> diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c index 41ae3c7570d3..8655a76d29a1 100644 --- a/lib/rbtree_test.c +++ b/lib/rbtree_test.c @@ -2,7 +2,7 @@ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/rbtree_augmented.h> -#include <linux/random.h> +#include <linux/prandom.h> #include <linux/slab.h> #include <asm/timex.h> diff --git a/lib/slub_kunit.c b/lib/slub_kunit.c index 80e39f003344..33564f965958 100644 --- a/lib/slub_kunit.c +++ b/lib/slub_kunit.c @@ -141,7 +141,7 @@ static void test_kmalloc_redzone_access(struct kunit *test) { struct kmem_cache *s = test_kmem_cache_create("TestSlub_RZ_kmalloc", 32, SLAB_KMALLOC|SLAB_STORE_USER|SLAB_RED_ZONE); - u8 *p = __kmalloc_cache_noprof(s, GFP_KERNEL, 18); + u8 *p = alloc_hooks(__kmalloc_cache_noprof(s, GFP_KERNEL, 18)); kasan_disable_current(); diff --git a/lib/test_bpf.c b/lib/test_bpf.c index fa5edd6ef7f7..2eed1ad958e9 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -14,7 +14,7 @@ #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/if_vlan.h> -#include <linux/random.h> +#include <linux/prandom.h> #include <linux/highmem.h> #include <linux/sched.h> diff --git a/lib/test_parman.c b/lib/test_parman.c index 35e32243693c..f9b97426a337 100644 --- a/lib/test_parman.c +++ b/lib/test_parman.c @@ -39,7 +39,7 @@ #include <linux/slab.h> #include <linux/bitops.h> #include <linux/err.h> -#include <linux/random.h> +#include <linux/prandom.h> #include <linux/parman.h> #define TEST_PARMAN_PRIO_SHIFT 7 /* defines number of prios for testing */ diff --git a/lib/test_scanf.c b/lib/test_scanf.c index 7257b1768545..44f8508c9d88 100644 --- a/lib/test_scanf.c +++ b/lib/test_scanf.c @@ -11,7 +11,7 @@ #include <linux/module.h> #include <linux/overflow.h> #include <linux/printk.h> -#include <linux/random.h> +#include <linux/prandom.h> #include <linux/slab.h> #include <linux/string.h> |