diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/audit.c | 8 | ||||
-rw-r--r-- | kernel/audit.h | 4 | ||||
-rw-r--r-- | kernel/auditsc.c | 10 | ||||
-rw-r--r-- | kernel/bpf/bpf_lsm.c | 1 | ||||
-rw-r--r-- | kernel/cfi.c | 352 | ||||
-rw-r--r-- | kernel/configs/rust.config | 1 | ||||
-rw-r--r-- | kernel/events/core.c | 9 | ||||
-rw-r--r-- | kernel/kallsyms.c | 43 | ||||
-rw-r--r-- | kernel/kthread.c | 3 | ||||
-rw-r--r-- | kernel/livepatch/core.c | 4 | ||||
-rw-r--r-- | kernel/module/main.c | 50 | ||||
-rw-r--r-- | kernel/rcu/rcutorture.c | 290 | ||||
-rw-r--r-- | kernel/rcu/srcutiny.c | 14 | ||||
-rw-r--r-- | kernel/rcu/tasks.h | 5 | ||||
-rw-r--r-- | kernel/rcu/tiny.c | 27 | ||||
-rw-r--r-- | kernel/rcu/tree.c | 330 | ||||
-rw-r--r-- | kernel/rcu/tree_exp.h | 57 | ||||
-rw-r--r-- | kernel/rcu/tree_nocb.h | 10 | ||||
-rw-r--r-- | kernel/rcu/tree_plugin.h | 26 | ||||
-rw-r--r-- | kernel/rcu/tree_stall.h | 5 | ||||
-rw-r--r-- | kernel/sched/core.c | 14 | ||||
-rw-r--r-- | kernel/sched/cpufreq_schedutil.c | 30 | ||||
-rw-r--r-- | kernel/smp.c | 3 | ||||
-rw-r--r-- | kernel/user_namespace.c | 5 | ||||
-rw-r--r-- | kernel/workqueue.c | 2 |
25 files changed, 754 insertions, 549 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index a75978ae38ad..9bc0b0301198 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -321,7 +321,6 @@ static inline int audit_rate_check(void) static DEFINE_SPINLOCK(lock); unsigned long flags; unsigned long now; - unsigned long elapsed; int retval = 0; if (!audit_rate_limit) return 1; @@ -330,9 +329,8 @@ static inline int audit_rate_check(void) if (++messages < audit_rate_limit) { retval = 1; } else { - now = jiffies; - elapsed = now - last_check; - if (elapsed > HZ) { + now = jiffies; + if (time_after(now, last_check + HZ)) { last_check = now; messages = 0; retval = 1; @@ -366,7 +364,7 @@ void audit_log_lost(const char *message) if (!print) { spin_lock_irqsave(&lock, flags); now = jiffies; - if (now - last_msg > HZ) { + if (time_after(now, last_msg + HZ)) { print = 1; last_msg = now; } diff --git a/kernel/audit.h b/kernel/audit.h index 58b66543b4d5..c57b008b9914 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -133,7 +133,7 @@ struct audit_context { struct sockaddr_storage *sockaddr; size_t sockaddr_len; /* Save things to print about task_struct */ - pid_t pid, ppid; + pid_t ppid; kuid_t uid, euid, suid, fsuid; kgid_t gid, egid, sgid, fsgid; unsigned long personality; @@ -245,8 +245,6 @@ struct audit_netlink_list { int audit_send_list_thread(void *_dest); -extern int selinux_audit_rule_update(void); - extern struct mutex audit_filter_mutex; extern int audit_del_rule(struct audit_entry *entry); extern void audit_free_rule_rcu(struct rcu_head *head); diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 79a5da1bc5bb..9f8c05228d6d 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -965,7 +965,7 @@ static void audit_reset_context(struct audit_context *ctx) if (!ctx) return; - /* if ctx is non-null, reset the "ctx->state" regardless */ + /* if ctx is non-null, reset the "ctx->context" regardless */ ctx->context = AUDIT_CTX_UNUSED; if (ctx->dummy) return; @@ -1002,7 +1002,7 @@ static void audit_reset_context(struct audit_context *ctx) kfree(ctx->sockaddr); ctx->sockaddr = NULL; ctx->sockaddr_len = 0; - ctx->pid = ctx->ppid = 0; + ctx->ppid = 0; ctx->uid = ctx->euid = ctx->suid = ctx->fsuid = KUIDT_INIT(0); ctx->gid = ctx->egid = ctx->sgid = ctx->fsgid = KGIDT_INIT(0); ctx->personality = 0; @@ -1016,7 +1016,6 @@ static void audit_reset_context(struct audit_context *ctx) WARN_ON(!list_empty(&ctx->killed_trees)); audit_free_module(ctx); ctx->fds[0] = -1; - audit_proctitle_free(ctx); ctx->type = 0; /* reset last for audit_free_*() */ } @@ -1077,6 +1076,7 @@ static inline void audit_free_context(struct audit_context *context) { /* resetting is extra work, but it is likely just noise */ audit_reset_context(context); + audit_proctitle_free(context); free_tree_refs(context); kfree(context->filterkey); kfree(context); @@ -1833,7 +1833,7 @@ void __audit_free(struct task_struct *tsk) /* We are called either by do_exit() or the fork() error handling code; * in the former case tsk == current and in the latter tsk is a - * random task_struct that doesn't doesn't have any meaningful data we + * random task_struct that doesn't have any meaningful data we * need to log via audit_log_exit(). */ if (tsk == current && !context->dummy) { @@ -2069,7 +2069,7 @@ void __audit_syscall_exit(int success, long return_code) /* run through both filters to ensure we set the filterkey properly */ audit_filter_syscall(current, context); audit_filter_inodes(current, context); - if (context->current_state < AUDIT_STATE_RECORD) + if (context->current_state != AUDIT_STATE_RECORD) goto out; audit_log_exit(); diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c index fa71d58b7ded..761998fda762 100644 --- a/kernel/bpf/bpf_lsm.c +++ b/kernel/bpf/bpf_lsm.c @@ -335,6 +335,7 @@ BTF_ID(func, bpf_lsm_task_getsecid_obj) BTF_ID(func, bpf_lsm_task_prctl) BTF_ID(func, bpf_lsm_task_setscheduler) BTF_ID(func, bpf_lsm_task_to_inode) +BTF_ID(func, bpf_lsm_userns_create) BTF_SET_END(sleepable_lsm_hooks) bool bpf_lsm_is_sleepable_hook(u32 btf_id) diff --git a/kernel/cfi.c b/kernel/cfi.c index 2046276ee234..08caad776717 100644 --- a/kernel/cfi.c +++ b/kernel/cfi.c @@ -1,339 +1,101 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Clang Control Flow Integrity (CFI) error and slowpath handling. + * Clang Control Flow Integrity (CFI) error handling. * - * Copyright (C) 2021 Google LLC + * Copyright (C) 2022 Google LLC */ -#include <linux/hardirq.h> -#include <linux/kallsyms.h> -#include <linux/module.h> -#include <linux/mutex.h> -#include <linux/printk.h> -#include <linux/ratelimit.h> -#include <linux/rcupdate.h> -#include <linux/vmalloc.h> -#include <asm/cacheflush.h> -#include <asm/set_memory.h> +#include <linux/cfi.h> -/* Compiler-defined handler names */ -#ifdef CONFIG_CFI_PERMISSIVE -#define cfi_failure_handler __ubsan_handle_cfi_check_fail -#else -#define cfi_failure_handler __ubsan_handle_cfi_check_fail_abort -#endif - -static inline void handle_cfi_failure(void *ptr) +enum bug_trap_type report_cfi_failure(struct pt_regs *regs, unsigned long addr, + unsigned long *target, u32 type) { - if (IS_ENABLED(CONFIG_CFI_PERMISSIVE)) - WARN_RATELIMIT(1, "CFI failure (target: %pS):\n", ptr); + if (target) + pr_err("CFI failure at %pS (target: %pS; expected type: 0x%08x)\n", + (void *)addr, (void *)*target, type); else - panic("CFI failure (target: %pS)\n", ptr); -} - -#ifdef CONFIG_MODULES -#ifdef CONFIG_CFI_CLANG_SHADOW -/* - * Index type. A 16-bit index can address at most (2^16)-2 pages (taking - * into account SHADOW_INVALID), i.e. ~256M with 4k pages. - */ -typedef u16 shadow_t; -#define SHADOW_INVALID ((shadow_t)~0UL) - -struct cfi_shadow { - /* Page index for the beginning of the shadow */ - unsigned long base; - /* An array of __cfi_check locations (as indices to the shadow) */ - shadow_t shadow[1]; -} __packed; - -/* - * The shadow covers ~128M from the beginning of the module region. If - * the region is larger, we fall back to __module_address for the rest. - */ -#define __SHADOW_RANGE (_UL(SZ_128M) >> PAGE_SHIFT) - -/* The in-memory size of struct cfi_shadow, always at least one page */ -#define __SHADOW_PAGES ((__SHADOW_RANGE * sizeof(shadow_t)) >> PAGE_SHIFT) -#define SHADOW_PAGES max(1UL, __SHADOW_PAGES) -#define SHADOW_SIZE (SHADOW_PAGES << PAGE_SHIFT) - -/* The actual size of the shadow array, minus metadata */ -#define SHADOW_ARR_SIZE (SHADOW_SIZE - offsetof(struct cfi_shadow, shadow)) -#define SHADOW_ARR_SLOTS (SHADOW_ARR_SIZE / sizeof(shadow_t)) - -static DEFINE_MUTEX(shadow_update_lock); -static struct cfi_shadow __rcu *cfi_shadow __read_mostly; + pr_err("CFI failure at %pS (no target information)\n", + (void *)addr); -/* Returns the index in the shadow for the given address */ -static inline int ptr_to_shadow(const struct cfi_shadow *s, unsigned long ptr) -{ - unsigned long index; - unsigned long page = ptr >> PAGE_SHIFT; - - if (unlikely(page < s->base)) - return -1; /* Outside of module area */ - - index = page - s->base; - - if (index >= SHADOW_ARR_SLOTS) - return -1; /* Cannot be addressed with shadow */ - - return (int)index; -} - -/* Returns the page address for an index in the shadow */ -static inline unsigned long shadow_to_ptr(const struct cfi_shadow *s, - int index) -{ - if (unlikely(index < 0 || index >= SHADOW_ARR_SLOTS)) - return 0; - - return (s->base + index) << PAGE_SHIFT; -} - -/* Returns the __cfi_check function address for the given shadow location */ -static inline unsigned long shadow_to_check_fn(const struct cfi_shadow *s, - int index) -{ - if (unlikely(index < 0 || index >= SHADOW_ARR_SLOTS)) - return 0; - - if (unlikely(s->shadow[index] == SHADOW_INVALID)) - return 0; - - /* __cfi_check is always page aligned */ - return (s->base + s->shadow[index]) << PAGE_SHIFT; -} - -static void prepare_next_shadow(const struct cfi_shadow __rcu *prev, - struct cfi_shadow *next) -{ - int i, index, check; - - /* Mark everything invalid */ - memset(next->shadow, 0xFF, SHADOW_ARR_SIZE); - - if (!prev) - return; /* No previous shadow */ - - /* If the base address didn't change, an update is not needed */ - if (prev->base == next->base) { - memcpy(next->shadow, prev->shadow, SHADOW_ARR_SIZE); - return; - } - - /* Convert the previous shadow to the new address range */ - for (i = 0; i < SHADOW_ARR_SLOTS; ++i) { - if (prev->shadow[i] == SHADOW_INVALID) - continue; - - index = ptr_to_shadow(next, shadow_to_ptr(prev, i)); - if (index < 0) - continue; - - check = ptr_to_shadow(next, - shadow_to_check_fn(prev, prev->shadow[i])); - if (check < 0) - continue; - - next->shadow[index] = (shadow_t)check; - } -} - -static void add_module_to_shadow(struct cfi_shadow *s, struct module *mod, - unsigned long min_addr, unsigned long max_addr) -{ - int check_index; - unsigned long check = (unsigned long)mod->cfi_check; - unsigned long ptr; - - if (unlikely(!PAGE_ALIGNED(check))) { - pr_warn("cfi: not using shadow for module %s\n", mod->name); - return; + if (IS_ENABLED(CONFIG_CFI_PERMISSIVE)) { + __warn(NULL, 0, (void *)addr, 0, regs, NULL); + return BUG_TRAP_TYPE_WARN; } - check_index = ptr_to_shadow(s, check); - if (check_index < 0) - return; /* Module not addressable with shadow */ - - /* For each page, store the check function index in the shadow */ - for (ptr = min_addr; ptr <= max_addr; ptr += PAGE_SIZE) { - int index = ptr_to_shadow(s, ptr); - - if (index >= 0) { - /* Each page must only contain one module */ - WARN_ON_ONCE(s->shadow[index] != SHADOW_INVALID); - s->shadow[index] = (shadow_t)check_index; - } - } + return BUG_TRAP_TYPE_BUG; } -static void remove_module_from_shadow(struct cfi_shadow *s, struct module *mod, - unsigned long min_addr, unsigned long max_addr) +#ifdef CONFIG_ARCH_USES_CFI_TRAPS +static inline unsigned long trap_address(s32 *p) { - unsigned long ptr; - - for (ptr = min_addr; ptr <= max_addr; ptr += PAGE_SIZE) { - int index = ptr_to_shadow(s, ptr); - - if (index >= 0) - s->shadow[index] = SHADOW_INVALID; - } + return (unsigned long)((long)p + (long)*p); } -typedef void (*update_shadow_fn)(struct cfi_shadow *, struct module *, - unsigned long min_addr, unsigned long max_addr); - -static void update_shadow(struct module *mod, unsigned long base_addr, - update_shadow_fn fn) +static bool is_trap(unsigned long addr, s32 *start, s32 *end) { - struct cfi_shadow *prev; - struct cfi_shadow *next; - unsigned long min_addr, max_addr; - - next = vmalloc(SHADOW_SIZE); - - mutex_lock(&shadow_update_lock); - prev = rcu_dereference_protected(cfi_shadow, - mutex_is_locked(&shadow_update_lock)); - - if (next) { - next->base = base_addr >> PAGE_SHIFT; - prepare_next_shadow(prev, next); + s32 *p; - min_addr = (unsigned long)mod->core_layout.base; - max_addr = min_addr + mod->core_layout.text_size; - fn(next, mod, min_addr & PAGE_MASK, max_addr & PAGE_MASK); - - set_memory_ro((unsigned long)next, SHADOW_PAGES); - } - - rcu_assign_pointer(cfi_shadow, next); - mutex_unlock(&shadow_update_lock); - synchronize_rcu(); - - if (prev) { - set_memory_rw((unsigned long)prev, SHADOW_PAGES); - vfree(prev); + for (p = start; p < end; ++p) { + if (trap_address(p) == addr) + return true; } -} -void cfi_module_add(struct module *mod, unsigned long base_addr) -{ - update_shadow(mod, base_addr, add_module_to_shadow); + return false; } -void cfi_module_remove(struct module *mod, unsigned long base_addr) -{ - update_shadow(mod, base_addr, remove_module_from_shadow); -} - -static inline cfi_check_fn ptr_to_check_fn(const struct cfi_shadow __rcu *s, - unsigned long ptr) +#ifdef CONFIG_MODULES +/* Populates `kcfi_trap(_end)?` fields in `struct module`. */ +void module_cfi_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, + struct module *mod) { - int index; - - if (unlikely(!s)) - return NULL; /* No shadow available */ - - index = ptr_to_shadow(s, ptr); - if (index < 0) - return NULL; /* Cannot be addressed with shadow */ + char *secstrings; + unsigned int i; - return (cfi_check_fn)shadow_to_check_fn(s, index); -} - -static inline cfi_check_fn find_shadow_check_fn(unsigned long ptr) -{ - cfi_check_fn fn; + mod->kcfi_traps = NULL; + mod->kcfi_traps_end = NULL; - rcu_read_lock_sched_notrace(); - fn = ptr_to_check_fn(rcu_dereference_sched(cfi_shadow), ptr); - rcu_read_unlock_sched_notrace(); + secstrings = (char *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; - return fn; -} - -#else /* !CONFIG_CFI_CLANG_SHADOW */ + for (i = 1; i < hdr->e_shnum; i++) { + if (strcmp(secstrings + sechdrs[i].sh_name, "__kcfi_traps")) + continue; -static inline cfi_check_fn find_shadow_check_fn(unsigned long ptr) -{ - return NULL; + mod->kcfi_traps = (s32 *)sechdrs[i].sh_addr; + mod->kcfi_traps_end = (s32 *)(sechdrs[i].sh_addr + sechdrs[i].sh_size); + break; + } } -#endif /* CONFIG_CFI_CLANG_SHADOW */ - -static inline cfi_check_fn find_module_check_fn(unsigned long ptr) +static bool is_module_cfi_trap(unsigned long addr) { - cfi_check_fn fn = NULL; struct module *mod; + bool found = false; rcu_read_lock_sched_notrace(); - mod = __module_address(ptr); - if (mod) - fn = mod->cfi_check; - rcu_read_unlock_sched_notrace(); - - return fn; -} - -static inline cfi_check_fn find_check_fn(unsigned long ptr) -{ - cfi_check_fn fn = NULL; - unsigned long flags; - bool rcu_idle; - - if (is_kernel_text(ptr)) - return __cfi_check; - /* - * Indirect call checks can happen when RCU is not watching. Both - * the shadow and __module_address use RCU, so we need to wake it - * up if necessary. - */ - rcu_idle = !rcu_is_watching(); - if (rcu_idle) { - local_irq_save(flags); - ct_irq_enter(); - } - - if (IS_ENABLED(CONFIG_CFI_CLANG_SHADOW)) - fn = find_shadow_check_fn(ptr); - if (!fn) - fn = find_module_check_fn(ptr); + mod = __module_address(addr); + if (mod) + found = is_trap(addr, mod->kcfi_traps, mod->kcfi_traps_end); - if (rcu_idle) { - ct_irq_exit(); - local_irq_restore(flags); - } + rcu_read_unlock_sched_notrace(); - return fn; + return found; } - -void __cfi_slowpath_diag(uint64_t id, void *ptr, void *diag) +#else /* CONFIG_MODULES */ +static inline bool is_module_cfi_trap(unsigned long addr) { - cfi_check_fn fn = find_check_fn((unsigned long)ptr); - - if (likely(fn)) - fn(id, ptr, diag); - else /* Don't allow unchecked modules */ - handle_cfi_failure(ptr); + return false; } -EXPORT_SYMBOL(__cfi_slowpath_diag); +#endif /* CONFIG_MODULES */ -#else /* !CONFIG_MODULES */ +extern s32 __start___kcfi_traps[]; +extern s32 __stop___kcfi_traps[]; -void __cfi_slowpath_diag(uint64_t id, void *ptr, void *diag) +bool is_cfi_trap(unsigned long addr) { - handle_cfi_failure(ptr); /* No modules */ -} -EXPORT_SYMBOL(__cfi_slowpath_diag); + if (is_trap(addr, __start___kcfi_traps, __stop___kcfi_traps)) + return true; -#endif /* CONFIG_MODULES */ - -void cfi_failure_handler(void *data, void *ptr, void *vtable) -{ - handle_cfi_failure(ptr); + return is_module_cfi_trap(addr); } -EXPORT_SYMBOL(cfi_failure_handler); +#endif /* CONFIG_ARCH_USES_CFI_TRAPS */ diff --git a/kernel/configs/rust.config b/kernel/configs/rust.config new file mode 100644 index 000000000000..38a7c5362c9c --- /dev/null +++ b/kernel/configs/rust.config @@ -0,0 +1 @@ +CONFIG_RUST=y diff --git a/kernel/events/core.c b/kernel/events/core.c index 2621fd24ad26..ff4bffc502c6 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6893,9 +6893,16 @@ static void perf_output_read_group(struct perf_output_handle *handle, { struct perf_event *leader = event->group_leader, *sub; u64 read_format = event->attr.read_format; + unsigned long flags; u64 values[6]; int n = 0; + /* + * Disabling interrupts avoids all counter scheduling + * (context switches, timer based rotation and IPIs). + */ + local_irq_save(flags); + values[n++] = 1 + leader->nr_siblings; if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) @@ -6931,6 +6938,8 @@ static void perf_output_read_group(struct perf_output_handle *handle, __output_copy(handle, values, n * sizeof(u64)); } + + local_irq_restore(flags); } #define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\ diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index 3e7e2c2ad2f7..60c20f301a6b 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -50,12 +50,20 @@ static unsigned int kallsyms_expand_symbol(unsigned int off, data = &kallsyms_names[off]; len = *data; data++; + off++; + + /* If MSB is 1, it is a "big" symbol, so needs an additional byte. */ + if ((len & 0x80) != 0) { + len = (len & 0x7F) | (*data << 7); + data++; + off++; + } /* * Update the offset to return the offset for the next symbol on * the compressed stream. */ - off += len + 1; + off += len; /* * For every byte on the compressed symbol data, copy the table @@ -108,7 +116,7 @@ static char kallsyms_get_symbol_type(unsigned int off) static unsigned int get_symbol_offset(unsigned long pos) { const u8 *name; - int i; + int i, len; /* * Use the closest marker we have. We have markers every 256 positions, @@ -122,8 +130,18 @@ static unsigned int get_symbol_offset(unsigned long pos) * so we just need to add the len to the current pointer for every * symbol we wish to skip. */ - for (i = 0; i < (pos & 0xFF); i++) - name = name + (*name) + 1; + for (i = 0; i < (pos & 0xFF); i++) { + len = *name; + + /* + * If MSB is 1, it is a "big" symbol, so we need to look into + * the next byte (and skip it, too). + */ + if ((len & 0x80) != 0) + len = ((len & 0x7F) | (name[1] << 7)) + 1; + + name = name + len + 1; + } return name - kallsyms_names; } @@ -159,7 +177,6 @@ static bool cleanup_symbol_name(char *s) * character in an identifier in C. Suffixes observed: * - foo.llvm.[0-9a-f]+ * - foo.[0-9a-f]+ - * - foo.[0-9a-f]+.cfi_jt */ res = strchr(s, '.'); if (res) { @@ -167,22 +184,6 @@ static bool cleanup_symbol_name(char *s) return true; } - if (!IS_ENABLED(CONFIG_CFI_CLANG) || - !IS_ENABLED(CONFIG_LTO_CLANG_THIN) || - CONFIG_CLANG_VERSION >= 130000) - return false; - - /* - * Prior to LLVM 13, the following suffixes were observed when thinLTO - * and CFI are both enabled: - * - foo$[0-9]+ - */ - res = strrchr(s, '$'); - if (res) { - *res = '\0'; - return true; - } - return false; } diff --git a/kernel/kthread.c b/kernel/kthread.c index 3c677918d8f2..28a6b7ab4a0f 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -1050,8 +1050,7 @@ static void __kthread_queue_delayed_work(struct kthread_worker *worker, struct timer_list *timer = &dwork->timer; struct kthread_work *work = &dwork->work; - WARN_ON_FUNCTION_MISMATCH(timer->function, - kthread_delayed_work_timer_fn); + WARN_ON_ONCE(timer->function != kthread_delayed_work_timer_fn); /* * If @delay is 0, queue @dwork->work immediately. This is for diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c index bc475e62279d..ec06ce59d728 100644 --- a/kernel/livepatch/core.c +++ b/kernel/livepatch/core.c @@ -213,7 +213,7 @@ static int klp_resolve_symbols(Elf_Shdr *sechdrs, const char *strtab, * we use the smallest/strictest upper bound possible (56, based on * the current definition of MODULE_NAME_LEN) to prevent overflows. */ - BUILD_BUG_ON(MODULE_NAME_LEN < 56 || KSYM_NAME_LEN != 128); + BUILD_BUG_ON(MODULE_NAME_LEN < 56 || KSYM_NAME_LEN != 512); relas = (Elf_Rela *) relasec->sh_addr; /* For each rela in this klp relocation section */ @@ -227,7 +227,7 @@ static int klp_resolve_symbols(Elf_Shdr *sechdrs, const char *strtab, /* Format: .klp.sym.sym_objname.sym_name,sympos */ cnt = sscanf(strtab + sym->st_name, - ".klp.sym.%55[^.].%127[^,],%lu", + ".klp.sym.%55[^.].%511[^,],%lu", sym_objname, sym_name, &sympos); if (cnt != 3) { pr_err("symbol %s has an incorrectly formatted name\n", diff --git a/kernel/module/main.c b/kernel/module/main.c index a4e4d84b6f4e..70c0b2c6fef8 100644 --- a/kernel/module/main.c +++ b/kernel/module/main.c @@ -53,6 +53,7 @@ #include <linux/bsearch.h> #include <linux/dynamic_debug.h> #include <linux/audit.h> +#include <linux/cfi.h> #include <uapi/linux/module.h> #include "internal.h" @@ -1144,8 +1145,6 @@ void __weak module_arch_freeing_init(struct module *mod) { } -static void cfi_cleanup(struct module *mod); - /* Free a module, remove from lists, etc. */ static void free_module(struct module *mod) { @@ -1190,9 +1189,6 @@ static void free_module(struct module *mod) mod->name); mutex_unlock(&module_mutex); - /* Clean up CFI for the module. */ - cfi_cleanup(mod); - /* This may be empty, but that's OK */ module_arch_freeing_init(mod); module_memfree(mod->init_layout.base); @@ -2602,8 +2598,9 @@ static int complete_formation(struct module *mod, struct load_info *info) if (err < 0) goto out; - /* This relies on module_mutex for list integrity. */ + /* These rely on module_mutex for list integrity. */ module_bug_finalize(info->hdr, info->sechdrs, mod); + module_cfi_finalize(info->hdr, info->sechdrs, mod); if (module_check_misalignment(mod)) goto out_misaligned; @@ -2665,8 +2662,6 @@ static int unknown_module_param_cb(char *param, char *val, const char *modname, return 0; } -static void cfi_init(struct module *mod); - /* * Allocate and load the module: note that size of section 0 is always * zero, and we rely on this for optional sections. @@ -2796,9 +2791,6 @@ static int load_module(struct load_info *info, const char __user *uargs, flush_module_icache(mod); - /* Setup CFI for the module. */ - cfi_init(mod); - /* Now copy in args */ mod->args = strndup_user(uargs, ~0UL >> 1); if (IS_ERR(mod->args)) { @@ -2875,7 +2867,6 @@ static int load_module(struct load_info *info, const char __user *uargs, synchronize_rcu(); kfree(mod->args); free_arch_cleanup: - cfi_cleanup(mod); module_arch_cleanup(mod); free_modinfo: free_modinfo(mod); @@ -2961,41 +2952,6 @@ static inline int within(unsigned long addr, void *start, unsigned long size) return ((void *)addr >= start && (void *)addr < start + size); } -static void cfi_init(struct module *mod) -{ -#ifdef CONFIG_CFI_CLANG - initcall_t *init; -#ifdef CONFIG_MODULE_UNLOAD - exitcall_t *exit; -#endif - - rcu_read_lock_sched(); - mod->cfi_check = (cfi_check_fn) - find_kallsyms_symbol_value(mod, "__cfi_check"); - init = (initcall_t *) - find_kallsyms_symbol_value(mod, "__cfi_jt_init_module"); - /* Fix init/exit functions to point to the CFI jump table */ - if (init) - mod->init = *init; -#ifdef CONFIG_MODULE_UNLOAD - exit = (exitcall_t *) - find_kallsyms_symbol_value(mod, "__cfi_jt_cleanup_module"); - if (exit) - mod->exit = *exit; -#endif - rcu_read_unlock_sched(); - - cfi_module_add(mod, mod_tree.addr_min); -#endif -} - -static void cfi_cleanup(struct module *mod) -{ -#ifdef CONFIG_CFI_CLANG - cfi_module_remove(mod, mod_tree.addr_min); -#endif -} - /* Keep in sync with MODULE_FLAGS_BUF_SIZE !!! */ char *module_flags(struct module *mod, char *buf, bool show_state) { diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index d8e1b270a065..503c2aa845a4 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -84,10 +84,15 @@ torture_param(int, fwd_progress_holdoff, 60, "Time between forward-progress test torture_param(bool, fwd_progress_need_resched, 1, "Hide cond_resched() behind need_resched()"); torture_param(bool, gp_cond, false, "Use conditional/async GP wait primitives"); torture_param(bool, gp_cond_exp, false, "Use conditional/async expedited GP wait primitives"); +torture_param(bool, gp_cond_full, false, "Use conditional/async full-state GP wait primitives"); +torture_param(bool, gp_cond_exp_full, false, + "Use conditional/async full-stateexpedited GP wait primitives"); torture_param(bool, gp_exp, false, "Use expedited GP wait primitives"); torture_param(bool, gp_normal, false, "Use normal (non-expedited) GP wait primitives"); torture_param(bool, gp_poll, false, "Use polling GP wait primitives"); torture_param(bool, gp_poll_exp, false, "Use polling expedited GP wait primitives"); +torture_param(bool, gp_poll_full, false, "Use polling full-state GP wait primitives"); +torture_param(bool, gp_poll_exp_full, false, "Use polling full-state expedited GP wait primitives"); torture_param(bool, gp_sync, false, "Use synchronous GP wait primitives"); torture_param(int, irqreader, 1, "Allow RCU readers from irq handlers"); torture_param(int, leakpointer, 0, "Leak pointer dereferences from readers"); @@ -194,16 +199,24 @@ static int rcu_torture_writer_state; #define RTWS_DEF_FREE 3 #define RTWS_EXP_SYNC 4 #define RTWS_COND_GET 5 -#define RTWS_COND_GET_EXP 6 -#define RTWS_COND_SYNC 7 -#define RTWS_COND_SYNC_EXP 8 -#define RTWS_POLL_GET 9 -#define RTWS_POLL_GET_EXP 10 -#define RTWS_POLL_WAIT 11 -#define RTWS_POLL_WAIT_EXP 12 -#define RTWS_SYNC 13 -#define RTWS_STUTTER 14 -#define RTWS_STOPPING 15 +#define RTWS_COND_GET_FULL 6 +#define RTWS_COND_GET_EXP 7 +#define RTWS_COND_GET_EXP_FULL 8 +#define RTWS_COND_SYNC 9 +#define RTWS_COND_SYNC_FULL 10 +#define RTWS_COND_SYNC_EXP 11 +#define RTWS_COND_SYNC_EXP_FULL 12 +#define RTWS_POLL_GET 13 +#define RTWS_POLL_GET_FULL 14 +#define RTWS_POLL_GET_EXP 15 +#define RTWS_POLL_GET_EXP_FULL 16 +#define RTWS_POLL_WAIT 17 +#define RTWS_POLL_WAIT_FULL 18 +#define RTWS_POLL_WAIT_EXP 19 +#define RTWS_POLL_WAIT_EXP_FULL 20 +#define RTWS_SYNC 21 +#define RTWS_STUTTER 22 +#define RTWS_STOPPING 23 static const char * const rcu_torture_writer_state_names[] = { "RTWS_FIXED_DELAY", "RTWS_DELAY", @@ -211,13 +224,21 @@ static const char * const rcu_torture_writer_state_names[] = { "RTWS_DEF_FREE", "RTWS_EXP_SYNC", "RTWS_COND_GET", + "RTWS_COND_GET_FULL", "RTWS_COND_GET_EXP", + "RTWS_COND_GET_EXP_FULL", "RTWS_COND_SYNC", + "RTWS_COND_SYNC_FULL", "RTWS_COND_SYNC_EXP", + "RTWS_COND_SYNC_EXP_FULL", "RTWS_POLL_GET", + "RTWS_POLL_GET_FULL", "RTWS_POLL_GET_EXP", + "RTWS_POLL_GET_EXP_FULL", "RTWS_POLL_WAIT", + "RTWS_POLL_WAIT_FULL", "RTWS_POLL_WAIT_EXP", + "RTWS_POLL_WAIT_EXP_FULL", "RTWS_SYNC", "RTWS_STUTTER", "RTWS_STOPPING", @@ -332,13 +353,21 @@ struct rcu_torture_ops { void (*exp_sync)(void); unsigned long (*get_gp_state_exp)(void); unsigned long (*start_gp_poll_exp)(void); + void (*start_gp_poll_exp_full)(struct rcu_gp_oldstate *rgosp); bool (*poll_gp_state_exp)(unsigned long oldstate); void (*cond_sync_exp)(unsigned long oldstate); + void (*cond_sync_exp_full)(struct rcu_gp_oldstate *rgosp); unsigned long (*get_gp_state)(void); + void (*get_gp_state_full)(struct rcu_gp_oldstate *rgosp); unsigned long (*get_gp_completed)(void); + void (*get_gp_completed_full)(struct rcu_gp_oldstate *rgosp); unsigned long (*start_gp_poll)(void); + void (*start_gp_poll_full)(struct rcu_gp_oldstate *rgosp); bool (*poll_gp_state)(unsigned long oldstate); + bool (*poll_gp_state_full)(struct rcu_gp_oldstate *rgosp); + bool (*poll_need_2gp)(bool poll, bool poll_full); void (*cond_sync)(unsigned long oldstate); + void (*cond_sync_full)(struct rcu_gp_oldstate *rgosp); call_rcu_func_t call; void (*cb_barrier)(void); void (*fqs)(void); @@ -489,6 +518,11 @@ static void rcu_sync_torture_init(void) INIT_LIST_HEAD(&rcu_torture_removed); } +static bool rcu_poll_need_2gp(bool poll, bool poll_full) +{ + return poll; +} + static struct rcu_torture_ops rcu_ops = { .ttype = RCU_FLAVOR, .init = rcu_sync_torture_init, @@ -502,12 +536,19 @@ static struct rcu_torture_ops rcu_ops = { .sync = synchronize_rcu, .exp_sync = synchronize_rcu_expedited, .get_gp_state = get_state_synchronize_rcu, + .get_gp_state_full = get_state_synchronize_rcu_full, .get_gp_completed = get_completed_synchronize_rcu, + .get_gp_completed_full = get_completed_synchronize_rcu_full, .start_gp_poll = start_poll_synchronize_rcu, + .start_gp_poll_full = start_poll_synchronize_rcu_full, .poll_gp_state = poll_state_synchronize_rcu, + .poll_gp_state_full = poll_state_synchronize_rcu_full, + .poll_need_2gp = rcu_poll_need_2gp, .cond_sync = cond_synchronize_rcu, + .cond_sync_full = cond_synchronize_rcu_full, .get_gp_state_exp = get_state_synchronize_rcu, .start_gp_poll_exp = start_poll_synchronize_rcu_expedited, + .start_gp_poll_exp_full = start_poll_synchronize_rcu_expedited_full, .poll_gp_state_exp = poll_state_synchronize_rcu, .cond_sync_exp = cond_synchronize_rcu_expedited, .call = call_rcu, @@ -709,6 +750,9 @@ static struct rcu_torture_ops srcud_ops = { .deferred_free = srcu_torture_deferred_free, .sync = srcu_torture_synchronize, .exp_sync = srcu_torture_synchronize_expedited, + .get_gp_state = srcu_torture_get_gp_state, + .start_gp_poll = srcu_torture_start_gp_poll, + .poll_gp_state = srcu_torture_poll_gp_state, .call = srcu_torture_call, .cb_barrier = srcu_torture_barrier, .stats = srcu_torture_stats, @@ -1148,15 +1192,35 @@ static int nsynctypes; */ static void rcu_torture_write_types(void) { - bool gp_cond1 = gp_cond, gp_cond_exp1 = gp_cond_exp, gp_exp1 = gp_exp; - bool gp_poll_exp1 = gp_poll_exp, gp_normal1 = gp_normal, gp_poll1 = gp_poll; - bool gp_sync1 = gp_sync; + bool gp_cond1 = gp_cond, gp_cond_exp1 = gp_cond_exp, gp_cond_full1 = gp_cond_full; + bool gp_cond_exp_full1 = gp_cond_exp_full, gp_exp1 = gp_exp, gp_poll_exp1 = gp_poll_exp; + bool gp_poll_exp_full1 = gp_poll_exp_full, gp_normal1 = gp_normal, gp_poll1 = gp_poll; + bool gp_poll_full1 = gp_poll_full, gp_sync1 = gp_sync; /* Initialize synctype[] array. If none set, take default. */ - if (!gp_cond1 && !gp_cond_exp1 && !gp_exp1 && !gp_poll_exp && - !gp_normal1 && !gp_poll1 && !gp_sync1) - gp_cond1 = gp_cond_exp1 = gp_exp1 = gp_poll_exp1 = - gp_normal1 = gp_poll1 = gp_sync1 = true; + if (!gp_cond1 && + !gp_cond_exp1 && + !gp_cond_full1 && + !gp_cond_exp_full1 && + !gp_exp1 && + !gp_poll_exp1 && + !gp_poll_exp_full1 && + !gp_normal1 && + !gp_poll1 && + !gp_poll_full1 && + !gp_sync1) { + gp_cond1 = true; + gp_cond_exp1 = true; + gp_cond_full1 = true; + gp_cond_exp_full1 = true; + gp_exp1 = true; + gp_poll_exp1 = true; + gp_poll_exp_full1 = true; + gp_normal1 = true; + gp_poll1 = true; + gp_poll_full1 = true; + gp_sync1 = true; + } if (gp_cond1 && cur_ops->get_gp_state && cur_ops->cond_sync) { synctype[nsynctypes++] = RTWS_COND_GET; pr_info("%s: Testing conditional GPs.\n", __func__); @@ -1169,6 +1233,19 @@ static void rcu_torture_write_types(void) } else if (gp_cond_exp && (!cur_ops->get_gp_state_exp || !cur_ops->cond_sync_exp)) { pr_alert("%s: gp_cond_exp without primitives.\n", __func__); } + if (gp_cond_full1 && cur_ops->get_gp_state && cur_ops->cond_sync_full) { + synctype[nsynctypes++] = RTWS_COND_GET_FULL; + pr_info("%s: Testing conditional full-state GPs.\n", __func__); + } else if (gp_cond_full && (!cur_ops->get_gp_state || !cur_ops->cond_sync_full)) { + pr_alert("%s: gp_cond_full without primitives.\n", __func__); + } + if (gp_cond_exp_full1 && cur_ops->get_gp_state_exp && cur_ops->cond_sync_exp_full) { + synctype[nsynctypes++] = RTWS_COND_GET_EXP_FULL; + pr_info("%s: Testing conditional full-state expedited GPs.\n", __func__); + } else if (gp_cond_exp_full && + (!cur_ops->get_gp_state_exp || !cur_ops->cond_sync_exp_full)) { + pr_alert("%s: gp_cond_exp_full without primitives.\n", __func__); + } if (gp_exp1 && cur_ops->exp_sync) { synctype[nsynctypes++] = RTWS_EXP_SYNC; pr_info("%s: Testing expedited GPs.\n", __func__); @@ -1187,12 +1264,25 @@ static void rcu_torture_write_types(void) } else if (gp_poll && (!cur_ops->start_gp_poll || !cur_ops->poll_gp_state)) { pr_alert("%s: gp_poll without primitives.\n", __func__); } + if (gp_poll_full1 && cur_ops->start_gp_poll_full && cur_ops->poll_gp_state_full) { + synctype[nsynctypes++] = RTWS_POLL_GET_FULL; + pr_info("%s: Testing polling full-state GPs.\n", __func__); + } else if (gp_poll_full && (!cur_ops->start_gp_poll_full || !cur_ops->poll_gp_state_full)) { + pr_alert("%s: gp_poll_full without primitives.\n", __func__); + } if (gp_poll_exp1 && cur_ops->start_gp_poll_exp && cur_ops->poll_gp_state_exp) { synctype[nsynctypes++] = RTWS_POLL_GET_EXP; pr_info("%s: Testing polling expedited GPs.\n", __func__); } else if (gp_poll_exp && (!cur_ops->start_gp_poll_exp || !cur_ops->poll_gp_state_exp)) { pr_alert("%s: gp_poll_exp without primitives.\n", __func__); } + if (gp_poll_exp_full1 && cur_ops->start_gp_poll_exp_full && cur_ops->poll_gp_state_full) { + synctype[nsynctypes++] = RTWS_POLL_GET_EXP_FULL; + pr_info("%s: Testing polling full-state expedited GPs.\n", __func__); + } else if (gp_poll_exp_full && + (!cur_ops->start_gp_poll_exp_full || !cur_ops->poll_gp_state_full)) { + pr_alert("%s: gp_poll_exp_full without primitives.\n", __func__); + } if (gp_sync1 && cur_ops->sync) { synctype[nsynctypes++] = RTWS_SYNC; pr_info("%s: Testing normal GPs.\n", __func__); @@ -1202,6 +1292,40 @@ static void rcu_torture_write_types(void) } /* + * Do the specified rcu_torture_writer() synchronous grace period, + * while also testing out the polled APIs. Note well that the single-CPU + * grace-period optimizations must be accounted for. + */ +static void do_rtws_sync(struct torture_random_state *trsp, void (*sync)(void)) +{ + unsigned long cookie; + struct rcu_gp_oldstate cookie_full; + bool dopoll; + bool dopoll_full; + unsigned long r = torture_random(trsp); + + dopoll = cur_ops->get_gp_state && cur_ops->poll_gp_state && !(r & 0x300); + dopoll_full = cur_ops->get_gp_state_full && cur_ops->poll_gp_state_full && !(r & 0xc00); + if (dopoll || dopoll_full) + cpus_read_lock(); + if (dopoll) + cookie = cur_ops->get_gp_state(); + if (dopoll_full) + cur_ops->get_gp_state_full(&cookie_full); + if (cur_ops->poll_need_2gp && cur_ops->poll_need_2gp(dopoll, dopoll_full)) + sync(); + sync(); + WARN_ONCE(dopoll && !cur_ops->poll_gp_state(cookie), + "%s: Cookie check 3 failed %pS() online %*pbl.", + __func__, sync, cpumask_pr_args(cpu_online_mask)); + WARN_ONCE(dopoll_full && !cur_ops->poll_gp_state_full(&cookie_full), + "%s: Cookie check 4 failed %pS() online %*pbl", + __func__, sync, cpumask_pr_args(cpu_online_mask)); + if (dopoll || dopoll_full) + cpus_read_unlock(); +} + +/* * RCU torture writer kthread. Repeatedly substitutes a new structure * for that pointed to by rcu_torture_current, freeing the old structure * after a series of grace periods (the "pipeline"). @@ -1212,8 +1336,10 @@ rcu_torture_writer(void *arg) bool boot_ended; bool can_expedite = !rcu_gp_is_expedited() && !rcu_gp_is_normal(); unsigned long cookie; + struct rcu_gp_oldstate cookie_full; int expediting = 0; unsigned long gp_snap; + struct rcu_gp_oldstate gp_snap_full; int i; int idx; int oldnice = task_nice(current); @@ -1261,11 +1387,12 @@ rcu_torture_writer(void *arg) atomic_inc(&rcu_torture_wcount[i]); WRITE_ONCE(old_rp->rtort_pipe_count, old_rp->rtort_pipe_count + 1); + + // Make sure readers block polled grace periods. if (cur_ops->get_gp_state && cur_ops->poll_gp_state) { idx = cur_ops->readlock(); cookie = cur_ops->get_gp_state(); - WARN_ONCE(rcu_torture_writer_state != RTWS_DEF_FREE && - cur_ops->poll_gp_state(cookie), + WARN_ONCE(cur_ops->poll_gp_state(cookie), "%s: Cookie check 1 failed %s(%d) %lu->%lu\n", __func__, rcu_torture_writer_state_getname(), @@ -1277,6 +1404,21 @@ rcu_torture_writer(void *arg) } cur_ops->readunlock(idx); } + if (cur_ops->get_gp_state_full && cur_ops->poll_gp_state_full) { + idx = cur_ops->readlock(); + cur_ops->get_gp_state_full(&cookie_full); + WARN_ONCE(cur_ops->poll_gp_state_full(&cookie_full), + "%s: Cookie check 5 failed %s(%d) online %*pbl\n", + __func__, + rcu_torture_writer_state_getname(), + rcu_torture_writer_state, + cpumask_pr_args(cpu_online_mask)); + if (cur_ops->get_gp_completed_full) { + cur_ops->get_gp_completed_full(&cookie_full); + WARN_ON_ONCE(!cur_ops->poll_gp_state_full(&cookie_full)); + } + cur_ops->readunlock(idx); + } switch (synctype[torture_random(&rand) % nsynctypes]) { case RTWS_DEF_FREE: rcu_torture_writer_state = RTWS_DEF_FREE; @@ -1284,12 +1426,7 @@ rcu_torture_writer(void *arg) break; case RTWS_EXP_SYNC: rcu_torture_writer_state = RTWS_EXP_SYNC; - if (cur_ops->get_gp_state && cur_ops->poll_gp_state) - cookie = cur_ops->get_gp_state(); - cur_ops->exp_sync(); - cur_ops->exp_sync(); - if (cur_ops->get_gp_state && cur_ops->poll_gp_state) - WARN_ON_ONCE(!cur_ops->poll_gp_state(cookie)); + do_rtws_sync(&rand, cur_ops->exp_sync); rcu_torture_pipe_update(old_rp); break; case RTWS_COND_GET: @@ -1308,6 +1445,22 @@ rcu_torture_writer(void *arg) cur_ops->cond_sync_exp(gp_snap); rcu_torture_pipe_update(old_rp); break; + case RTWS_COND_GET_FULL: + rcu_torture_writer_state = RTWS_COND_GET_FULL; + cur_ops->get_gp_state_full(&gp_snap_full); + torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand); + rcu_torture_writer_state = RTWS_COND_SYNC_FULL; + cur_ops->cond_sync_full(&gp_snap_full); + rcu_torture_pipe_update(old_rp); + break; + case RTWS_COND_GET_EXP_FULL: + rcu_torture_writer_state = RTWS_COND_GET_EXP_FULL; + cur_ops->get_gp_state_full(&gp_snap_full); + torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand); + rcu_torture_writer_state = RTWS_COND_SYNC_EXP_FULL; + cur_ops->cond_sync_exp_full(&gp_snap_full); + rcu_torture_pipe_update(old_rp); + break; case RTWS_POLL_GET: rcu_torture_writer_state = RTWS_POLL_GET; gp_snap = cur_ops->start_gp_poll(); @@ -1317,6 +1470,15 @@ rcu_torture_writer(void *arg) &rand); rcu_torture_pipe_update(old_rp); break; + case RTWS_POLL_GET_FULL: + rcu_torture_writer_state = RTWS_POLL_GET_FULL; + cur_ops->start_gp_poll_full(&gp_snap_full); + rcu_torture_writer_state = RTWS_POLL_WAIT_FULL; + while (!cur_ops->poll_gp_state_full(&gp_snap_full)) + torture_hrtimeout_jiffies(torture_random(&rand) % 16, + &rand); + rcu_torture_pipe_update(old_rp); + break; case RTWS_POLL_GET_EXP: rcu_torture_writer_state = RTWS_POLL_GET_EXP; gp_snap = cur_ops->start_gp_poll_exp(); @@ -1326,14 +1488,18 @@ rcu_torture_writer(void *arg) &rand); rcu_torture_pipe_update(old_rp); break; + case RTWS_POLL_GET_EXP_FULL: + rcu_torture_writer_state = RTWS_POLL_GET_EXP_FULL; + cur_ops->start_gp_poll_exp_full(&gp_snap_full); + rcu_torture_writer_state = RTWS_POLL_WAIT_EXP_FULL; + while (!cur_ops->poll_gp_state_full(&gp_snap_full)) + torture_hrtimeout_jiffies(torture_random(&rand) % 16, + &rand); + rcu_torture_pipe_update(old_rp); + break; case RTWS_SYNC: rcu_torture_writer_state = RTWS_SYNC; - if (cur_ops->get_gp_state && cur_ops->poll_gp_state) - cookie = cur_ops->get_gp_state(); - cur_ops->sync(); - cur_ops->sync(); - if (cur_ops->get_gp_state && cur_ops->poll_gp_state) - WARN_ON_ONCE(!cur_ops->poll_gp_state(cookie)); + do_rtws_sync(&rand, cur_ops->sync); rcu_torture_pipe_update(old_rp); break; default: @@ -1400,6 +1566,7 @@ static int rcu_torture_fakewriter(void *arg) { unsigned long gp_snap; + struct rcu_gp_oldstate gp_snap_full; DEFINE_TORTURE_RANDOM(rand); VERBOSE_TOROUT_STRING("rcu_torture_fakewriter task started"); @@ -1438,6 +1605,16 @@ rcu_torture_fakewriter(void *arg) torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand); cur_ops->cond_sync_exp(gp_snap); break; + case RTWS_COND_GET_FULL: + cur_ops->get_gp_state_full(&gp_snap_full); + torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand); + cur_ops->cond_sync_full(&gp_snap_full); + break; + case RTWS_COND_GET_EXP_FULL: + cur_ops->get_gp_state_full(&gp_snap_full); + torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand); + cur_ops->cond_sync_exp_full(&gp_snap_full); + break; case RTWS_POLL_GET: gp_snap = cur_ops->start_gp_poll(); while (!cur_ops->poll_gp_state(gp_snap)) { @@ -1445,6 +1622,13 @@ rcu_torture_fakewriter(void *arg) &rand); } break; + case RTWS_POLL_GET_FULL: + cur_ops->start_gp_poll_full(&gp_snap_full); + while (!cur_ops->poll_gp_state_full(&gp_snap_full)) { + torture_hrtimeout_jiffies(torture_random(&rand) % 16, + &rand); + } + break; case RTWS_POLL_GET_EXP: gp_snap = cur_ops->start_gp_poll_exp(); while (!cur_ops->poll_gp_state_exp(gp_snap)) { @@ -1452,6 +1636,13 @@ rcu_torture_fakewriter(void *arg) &rand); } break; + case RTWS_POLL_GET_EXP_FULL: + cur_ops->start_gp_poll_exp_full(&gp_snap_full); + while (!cur_ops->poll_gp_state_full(&gp_snap_full)) { + torture_hrtimeout_jiffies(torture_random(&rand) % 16, + &rand); + } + break; case RTWS_SYNC: cur_ops->sync(); break; @@ -1715,7 +1906,9 @@ rcutorture_loop_extend(int *readstate, struct torture_random_state *trsp, */ static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid) { + bool checkpolling = !(torture_random(trsp) & 0xfff); unsigned long cookie; + struct rcu_gp_oldstate cookie_full; int i; unsigned long started; unsigned long completed; @@ -1731,8 +1924,12 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid) WARN_ON_ONCE(!rcu_is_watching()); newstate = rcutorture_extend_mask(readstate, trsp); rcutorture_one_extend(&readstate, newstate, trsp, rtrsp++); - if (cur_ops->get_gp_state && cur_ops->poll_gp_state) - cookie = cur_ops->get_gp_state(); + if (checkpolling) { + if (cur_ops->get_gp_state && cur_ops->poll_gp_state) + cookie = cur_ops->get_gp_state(); + if (cur_ops->get_gp_state_full && cur_ops->poll_gp_state_full) + cur_ops->get_gp_state_full(&cookie_full); + } started = cur_ops->get_gp_seq(); ts = rcu_trace_clock_local(); p = rcu_dereference_check(rcu_torture_current, @@ -1766,13 +1963,22 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid) } __this_cpu_inc(rcu_torture_batch[completed]); preempt_enable(); - if (cur_ops->get_gp_state && cur_ops->poll_gp_state) - WARN_ONCE(cur_ops->poll_gp_state(cookie), - "%s: Cookie check 2 failed %s(%d) %lu->%lu\n", - __func__, - rcu_torture_writer_state_getname(), - rcu_torture_writer_state, - cookie, cur_ops->get_gp_state()); + if (checkpolling) { + if (cur_ops->get_gp_state && cur_ops->poll_gp_state) + WARN_ONCE(cur_ops->poll_gp_state(cookie), + "%s: Cookie check 2 failed %s(%d) %lu->%lu\n", + __func__, + rcu_torture_writer_state_getname(), + rcu_torture_writer_state, + cookie, cur_ops->get_gp_state()); + if (cur_ops->get_gp_state_full && cur_ops->poll_gp_state_full) + WARN_ONCE(cur_ops->poll_gp_state_full(&cookie_full), + "%s: Cookie check 6 failed %s(%d) online %*pbl\n", + __func__, + rcu_torture_writer_state_getname(), + rcu_torture_writer_state, + cpumask_pr_args(cpu_online_mask)); + } rcutorture_one_extend(&readstate, 0, trsp, rtrsp); WARN_ON_ONCE(readstate); // This next splat is expected behavior if leakpointer, especially @@ -2600,12 +2806,12 @@ static int rcutorture_oom_notify(struct notifier_block *self, for (i = 0; i < fwd_progress; i++) ncbs += rcu_torture_fwd_prog_cbfree(&rfp[i]); pr_info("%s: Freed %lu RCU callbacks.\n", __func__, ncbs); - rcu_barrier(); + cur_ops->cb_barrier(); ncbs = 0; for (i = 0; i < fwd_progress; i++) ncbs += rcu_torture_fwd_prog_cbfree(&rfp[i]); pr_info("%s: Freed %lu RCU callbacks.\n", __func__, ncbs); - rcu_barrier(); + cur_ops->cb_barrier(); ncbs = 0; for (i = 0; i < fwd_progress; i++) ncbs += rcu_torture_fwd_prog_cbfree(&rfp[i]); diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index 92c002d65482..33adafdad261 100644 --- a/kernel/rcu/srcutiny.c +++ b/kernel/rcu/srcutiny.c @@ -117,7 +117,7 @@ void srcu_drive_gp(struct work_struct *wp) struct srcu_struct *ssp; ssp = container_of(wp, struct srcu_struct, srcu_work); - if (ssp->srcu_gp_running || USHORT_CMP_GE(ssp->srcu_idx, READ_ONCE(ssp->srcu_idx_max))) + if (ssp->srcu_gp_running || ULONG_CMP_GE(ssp->srcu_idx, READ_ONCE(ssp->srcu_idx_max))) return; /* Already running or nothing to do. */ /* Remove recently arrived callbacks and wait for readers. */ @@ -150,17 +150,17 @@ void srcu_drive_gp(struct work_struct *wp) * straighten that out. */ WRITE_ONCE(ssp->srcu_gp_running, false); - if (USHORT_CMP_LT(ssp->srcu_idx, READ_ONCE(ssp->srcu_idx_max))) + if (ULONG_CMP_LT(ssp->srcu_idx, READ_ONCE(ssp->srcu_idx_max))) schedule_work(&ssp->srcu_work); } EXPORT_SYMBOL_GPL(srcu_drive_gp); static void srcu_gp_start_if_needed(struct srcu_struct *ssp) { - unsigned short cookie; + unsigned long cookie; cookie = get_state_synchronize_srcu(ssp); - if (USHORT_CMP_GE(READ_ONCE(ssp->srcu_idx_max), cookie)) + if (ULONG_CMP_GE(READ_ONCE(ssp->srcu_idx_max), cookie)) return; WRITE_ONCE(ssp->srcu_idx_max, cookie); if (!READ_ONCE(ssp->srcu_gp_running)) { @@ -215,7 +215,7 @@ unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp) barrier(); ret = (READ_ONCE(ssp->srcu_idx) + 3) & ~0x1; barrier(); - return ret & USHRT_MAX; + return ret; } EXPORT_SYMBOL_GPL(get_state_synchronize_srcu); @@ -240,10 +240,10 @@ EXPORT_SYMBOL_GPL(start_poll_synchronize_srcu); */ bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie) { - bool ret = USHORT_CMP_GE(READ_ONCE(ssp->srcu_idx), cookie); + unsigned long cur_s = READ_ONCE(ssp->srcu_idx); barrier(); - return ret; + return ULONG_CMP_GE(cur_s, cookie) || ULONG_CMP_LT(cur_s, cookie - 3); } EXPORT_SYMBOL_GPL(poll_state_synchronize_srcu); diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index 83c7e6620d40..f5bf6fb430da 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -560,7 +560,7 @@ static int __noreturn rcu_tasks_kthread(void *arg) static void synchronize_rcu_tasks_generic(struct rcu_tasks *rtp) { /* Complain if the scheduler has not started. */ - RCU_LOCKDEP_WARN(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE, + WARN_ONCE(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE, "synchronize_rcu_tasks called too soon"); // If the grace-period kthread is running, use it. @@ -1500,6 +1500,7 @@ static void rcu_tasks_trace_pregp_step(struct list_head *hop) if (rcu_tasks_trace_pertask_prep(t, true)) trc_add_holdout(t, hop); rcu_read_unlock(); + cond_resched_tasks_rcu_qs(); } // Only after all running tasks have been accounted for is it @@ -1520,6 +1521,7 @@ static void rcu_tasks_trace_pregp_step(struct list_head *hop) raw_spin_lock_irqsave_rcu_node(rtpcp, flags); } raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags); + cond_resched_tasks_rcu_qs(); } // Re-enable CPU hotplug now that the holdout list is populated. @@ -1619,6 +1621,7 @@ static void check_all_holdout_tasks_trace(struct list_head *hop, trc_del_holdout(t); else if (needreport) show_stalled_task_trace(t, firstreport); + cond_resched_tasks_rcu_qs(); } // Re-enable CPU hotplug now that the holdout list scan has completed. diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c index f0561ee16b9c..a33a8d4942c3 100644 --- a/kernel/rcu/tiny.c +++ b/kernel/rcu/tiny.c @@ -158,6 +158,10 @@ void synchronize_rcu(void) } EXPORT_SYMBOL_GPL(synchronize_rcu); +static void tiny_rcu_leak_callback(struct rcu_head *rhp) +{ +} + /* * Post an RCU callback to be invoked after the end of an RCU grace * period. But since we have but one CPU, that would be after any @@ -165,9 +169,20 @@ EXPORT_SYMBOL_GPL(synchronize_rcu); */ void call_rcu(struct rcu_head *head, rcu_callback_t func) { + static atomic_t doublefrees; unsigned long flags; - debug_rcu_head_queue(head); + if (debug_rcu_head_queue(head)) { + if (atomic_inc_return(&doublefrees) < 4) { + pr_err("%s(): Double-freed CB %p->%pS()!!! ", __func__, head, head->func); + mem_dump_obj(head); + } + + if (!__is_kvfree_rcu_offset((unsigned long)head->func)) + WRITE_ONCE(head->func, tiny_rcu_leak_callback); + return; + } + head->func = func; head->next = NULL; @@ -184,6 +199,16 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func) EXPORT_SYMBOL_GPL(call_rcu); /* + * Store a grace-period-counter "cookie". For more information, + * see the Tree RCU header comment. + */ +void get_completed_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp) +{ + rgosp->rgos_norm = RCU_GET_STATE_COMPLETED; +} +EXPORT_SYMBOL_GPL(get_completed_synchronize_rcu_full); + +/* * Return a grace-period-counter "cookie". For more information, * see the Tree RCU header comment. */ diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 79aea7df4345..6bb8e72bc815 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -76,6 +76,7 @@ /* Data structures. */ static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = { + .gpwrap = true, #ifdef CONFIG_RCU_NOCB_CPU .cblist.flags = SEGCBLIST_RCU_CORE, #endif @@ -1755,6 +1756,8 @@ static noinline void rcu_gp_cleanup(void) dump_blkd_tasks(rnp, 10); WARN_ON_ONCE(rnp->qsmask); WRITE_ONCE(rnp->gp_seq, new_gp_seq); + if (!rnp->parent) + smp_mb(); // Order against failing poll_state_synchronize_rcu_full(). rdp = this_cpu_ptr(&rcu_data); if (rnp == rdp->mynode) needgp = __note_gp_changes(rnp, rdp) || needgp; @@ -2341,8 +2344,8 @@ void rcu_sched_clock_irq(int user) rcu_flavor_sched_clock_irq(user); if (rcu_pending(user)) invoke_rcu_core(); - if (user) - rcu_tasks_classic_qs(current, false); + if (user || rcu_is_cpu_rrupt_from_idle()) + rcu_note_voluntary_context_switch(current); lockdep_assert_irqs_disabled(); trace_rcu_utilization(TPS("End scheduler-tick")); @@ -2832,7 +2835,7 @@ EXPORT_SYMBOL_GPL(call_rcu); /* Maximum number of jiffies to wait before draining a batch. */ -#define KFREE_DRAIN_JIFFIES (HZ / 50) +#define KFREE_DRAIN_JIFFIES (5 * HZ) #define KFREE_N_BATCHES 2 #define FREE_N_CHANNELS 2 @@ -3093,6 +3096,21 @@ need_offload_krc(struct kfree_rcu_cpu *krcp) return !!krcp->head; } +static void +schedule_delayed_monitor_work(struct kfree_rcu_cpu *krcp) +{ + long delay, delay_left; + + delay = READ_ONCE(krcp->count) >= KVFREE_BULK_MAX_ENTR ? 1:KFREE_DRAIN_JIFFIES; + if (delayed_work_pending(&krcp->monitor_work)) { + delay_left = krcp->monitor_work.timer.expires - jiffies; + if (delay < delay_left) + mod_delayed_work(system_wq, &krcp->monitor_work, delay); + return; + } + queue_delayed_work(system_wq, &krcp->monitor_work, delay); +} + /* * This function is invoked after the KFREE_DRAIN_JIFFIES timeout. */ @@ -3150,7 +3168,7 @@ static void kfree_rcu_monitor(struct work_struct *work) // work to repeat an attempt. Because previous batches are // still in progress. if (need_offload_krc(krcp)) - schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES); + schedule_delayed_monitor_work(krcp); raw_spin_unlock_irqrestore(&krcp->lock, flags); } @@ -3183,15 +3201,16 @@ static void fill_page_cache_func(struct work_struct *work) bnode = (struct kvfree_rcu_bulk_data *) __get_free_page(GFP_KERNEL | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); - if (bnode) { - raw_spin_lock_irqsave(&krcp->lock, flags); - pushed = put_cached_bnode(krcp, bnode); - raw_spin_unlock_irqrestore(&krcp->lock, flags); + if (!bnode) + break; - if (!pushed) { - free_page((unsigned long) bnode); - break; - } + raw_spin_lock_irqsave(&krcp->lock, flags); + pushed = put_cached_bnode(krcp, bnode); + raw_spin_unlock_irqrestore(&krcp->lock, flags); + + if (!pushed) { + free_page((unsigned long) bnode); + break; } } @@ -3338,7 +3357,7 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func) // Set timer to drain after KFREE_DRAIN_JIFFIES. if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING) - schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES); + schedule_delayed_monitor_work(krcp); unlock_return: krc_this_cpu_unlock(krcp, flags); @@ -3371,7 +3390,7 @@ kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc) atomic_set(&krcp->backoff_page_cache_fill, 1); } - return count; + return count == 0 ? SHRINK_EMPTY : count; } static unsigned long @@ -3414,49 +3433,27 @@ void __init kfree_rcu_scheduler_running(void) raw_spin_lock_irqsave(&krcp->lock, flags); if (need_offload_krc(krcp)) - schedule_delayed_work_on(cpu, &krcp->monitor_work, KFREE_DRAIN_JIFFIES); + schedule_delayed_monitor_work(krcp); raw_spin_unlock_irqrestore(&krcp->lock, flags); } } /* * During early boot, any blocking grace-period wait automatically - * implies a grace period. Later on, this is never the case for PREEMPTION. + * implies a grace period. * - * However, because a context switch is a grace period for !PREEMPTION, any - * blocking grace-period wait automatically implies a grace period if - * there is only one CPU online at any point time during execution of - * either synchronize_rcu() or synchronize_rcu_expedited(). It is OK to - * occasionally incorrectly indicate that there are multiple CPUs online - * when there was in fact only one the whole time, as this just adds some - * overhead: RCU still operates correctly. + * Later on, this could in theory be the case for kernels built with + * CONFIG_SMP=y && CONFIG_PREEMPTION=y running on a single CPU, but this + * is not a common case. Furthermore, this optimization would cause + * the rcu_gp_oldstate structure to expand by 50%, so this potential + * grace-period optimization is ignored once the scheduler is running. */ static int rcu_blocking_is_gp(void) { - int ret; - - // Invoking preempt_model_*() too early gets a splat. - if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE || - preempt_model_full() || preempt_model_rt()) - return rcu_scheduler_active == RCU_SCHEDULER_INACTIVE; + if (rcu_scheduler_active != RCU_SCHEDULER_INACTIVE) + return false; might_sleep(); /* Check for RCU read-side critical section. */ - preempt_disable(); - /* - * If the rcu_state.n_online_cpus counter is equal to one, - * there is only one CPU, and that CPU sees all prior accesses - * made by any CPU that was online at the time of its access. - * Furthermore, if this counter is equal to one, its value cannot - * change until after the preempt_enable() below. - * - * Furthermore, if rcu_state.n_online_cpus is equal to one here, - * all later CPUs (both this one and any that come online later - * on) are guaranteed to see all accesses prior to this point - * in the code, without the need for additional memory barriers. - * Those memory barriers are provided by CPU-hotplug code. - */ - ret = READ_ONCE(rcu_state.n_online_cpus) <= 1; - preempt_enable(); - return ret; + return true; } /** @@ -3499,30 +3496,59 @@ static int rcu_blocking_is_gp(void) */ void synchronize_rcu(void) { + unsigned long flags; + struct rcu_node *rnp; + RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) || lock_is_held(&rcu_lock_map) || lock_is_held(&rcu_sched_lock_map), "Illegal synchronize_rcu() in RCU read-side critical section"); - if (rcu_blocking_is_gp()) { - // Note well that this code runs with !PREEMPT && !SMP. - // In addition, all code that advances grace periods runs at - // process level. Therefore, this normal GP overlaps with - // other normal GPs only by being fully nested within them, - // which allows reuse of ->gp_seq_polled_snap. - rcu_poll_gp_seq_start_unlocked(&rcu_state.gp_seq_polled_snap); - rcu_poll_gp_seq_end_unlocked(&rcu_state.gp_seq_polled_snap); - if (rcu_init_invoked()) - cond_resched_tasks_rcu_qs(); - return; // Context allows vacuous grace periods. + if (!rcu_blocking_is_gp()) { + if (rcu_gp_is_expedited()) + synchronize_rcu_expedited(); + else + wait_rcu_gp(call_rcu); + return; } - if (rcu_gp_is_expedited()) - synchronize_rcu_expedited(); - else - wait_rcu_gp(call_rcu); + + // Context allows vacuous grace periods. + // Note well that this code runs with !PREEMPT && !SMP. + // In addition, all code that advances grace periods runs at + // process level. Therefore, this normal GP overlaps with other + // normal GPs only by being fully nested within them, which allows + // reuse of ->gp_seq_polled_snap. + rcu_poll_gp_seq_start_unlocked(&rcu_state.gp_seq_polled_snap); + rcu_poll_gp_seq_end_unlocked(&rcu_state.gp_seq_polled_snap); + + // Update the normal grace-period counters to record + // this grace period, but only those used by the boot CPU. + // The rcu_scheduler_starting() will take care of the rest of + // these counters. + local_irq_save(flags); + WARN_ON_ONCE(num_online_cpus() > 1); + rcu_state.gp_seq += (1 << RCU_SEQ_CTR_SHIFT); + for (rnp = this_cpu_ptr(&rcu_data)->mynode; rnp; rnp = rnp->parent) + rnp->gp_seq_needed = rnp->gp_seq = rcu_state.gp_seq; + local_irq_restore(flags); } EXPORT_SYMBOL_GPL(synchronize_rcu); /** + * get_completed_synchronize_rcu_full - Return a full pre-completed polled state cookie + * @rgosp: Place to put state cookie + * + * Stores into @rgosp a value that will always be treated by functions + * like poll_state_synchronize_rcu_full() as a cookie whose grace period + * has already completed. + */ +void get_completed_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp) +{ + rgosp->rgos_norm = RCU_GET_STATE_COMPLETED; + rgosp->rgos_exp = RCU_GET_STATE_COMPLETED; +} +EXPORT_SYMBOL_GPL(get_completed_synchronize_rcu_full); + +/** * get_state_synchronize_rcu - Snapshot current RCU state * * Returns a cookie that is used by a later call to cond_synchronize_rcu() @@ -3541,21 +3567,42 @@ unsigned long get_state_synchronize_rcu(void) EXPORT_SYMBOL_GPL(get_state_synchronize_rcu); /** - * start_poll_synchronize_rcu - Snapshot and start RCU grace period + * get_state_synchronize_rcu_full - Snapshot RCU state, both normal and expedited + * @rgosp: location to place combined normal/expedited grace-period state * - * Returns a cookie that is used by a later call to cond_synchronize_rcu() - * or poll_state_synchronize_rcu() to determine whether or not a full - * grace period has elapsed in the meantime. If the needed grace period - * is not already slated to start, notifies RCU core of the need for that - * grace period. + * Places the normal and expedited grace-period states in @rgosp. This + * state value can be passed to a later call to cond_synchronize_rcu_full() + * or poll_state_synchronize_rcu_full() to determine whether or not a + * grace period (whether normal or expedited) has elapsed in the meantime. + * The rcu_gp_oldstate structure takes up twice the memory of an unsigned + * long, but is guaranteed to see all grace periods. In contrast, the + * combined state occupies less memory, but can sometimes fail to take + * grace periods into account. * - * Interrupts must be enabled for the case where it is necessary to awaken - * the grace-period kthread. + * This does not guarantee that the needed grace period will actually + * start. */ -unsigned long start_poll_synchronize_rcu(void) +void get_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp) +{ + struct rcu_node *rnp = rcu_get_root(); + + /* + * Any prior manipulation of RCU-protected data must happen + * before the loads from ->gp_seq and ->expedited_sequence. + */ + smp_mb(); /* ^^^ */ + rgosp->rgos_norm = rcu_seq_snap(&rnp->gp_seq); + rgosp->rgos_exp = rcu_seq_snap(&rcu_state.expedited_sequence); +} +EXPORT_SYMBOL_GPL(get_state_synchronize_rcu_full); + +/* + * Helper function for start_poll_synchronize_rcu() and + * start_poll_synchronize_rcu_full(). + */ +static void start_poll_synchronize_rcu_common(void) { unsigned long flags; - unsigned long gp_seq = get_state_synchronize_rcu(); bool needwake; struct rcu_data *rdp; struct rcu_node *rnp; @@ -3575,17 +3622,57 @@ unsigned long start_poll_synchronize_rcu(void) raw_spin_unlock_irqrestore_rcu_node(rnp, flags); if (needwake) rcu_gp_kthread_wake(); +} + +/** + * start_poll_synchronize_rcu - Snapshot and start RCU grace period + * + * Returns a cookie that is used by a later call to cond_synchronize_rcu() + * or poll_state_synchronize_rcu() to determine whether or not a full + * grace period has elapsed in the meantime. If the needed grace period + * is not already slated to start, notifies RCU core of the need for that + * grace period. + * + * Interrupts must be enabled for the case where it is necessary to awaken + * the grace-period kthread. + */ +unsigned long start_poll_synchronize_rcu(void) +{ + unsigned long gp_seq = get_state_synchronize_rcu(); + + start_poll_synchronize_rcu_common(); return gp_seq; } EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu); /** - * poll_state_synchronize_rcu - Conditionally wait for an RCU grace period + * start_poll_synchronize_rcu_full - Take a full snapshot and start RCU grace period + * @rgosp: value from get_state_synchronize_rcu_full() or start_poll_synchronize_rcu_full() * + * Places the normal and expedited grace-period states in *@rgos. This + * state value can be passed to a later call to cond_synchronize_rcu_full() + * or poll_state_synchronize_rcu_full() to determine whether or not a + * grace period (whether normal or expedited) has elapsed in the meantime. + * If the needed grace period is not already slated to start, notifies + * RCU core of the need for that grace period. + * + * Interrupts must be enabled for the case where it is necessary to awaken + * the grace-period kthread. + */ +void start_poll_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp) +{ + get_state_synchronize_rcu_full(rgosp); + + start_poll_synchronize_rcu_common(); +} +EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu_full); + +/** + * poll_state_synchronize_rcu - Has the specified RCU grace period completed? * @oldstate: value from get_state_synchronize_rcu() or start_poll_synchronize_rcu() * * If a full RCU grace period has elapsed since the earlier call from - * which oldstate was obtained, return @true, otherwise return @false. + * which @oldstate was obtained, return @true, otherwise return @false. * If @false is returned, it is the caller's responsibility to invoke this * function later on until it does return @true. Alternatively, the caller * can explicitly wait for a grace period, for example, by passing @oldstate @@ -3594,10 +3681,11 @@ EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu); * Yes, this function does not take counter wrap into account. * But counter wrap is harmless. If the counter wraps, we have waited for * more than a billion grace periods (and way more on a 64-bit system!). - * Those needing to keep oldstate values for very long time periods - * (many hours even on 32-bit systems) should check them occasionally - * and either refresh them or set a flag indicating that the grace period - * has completed. + * Those needing to keep old state values for very long time periods + * (many hours even on 32-bit systems) should check them occasionally and + * either refresh them or set a flag indicating that the grace period has + * completed. Alternatively, they can use get_completed_synchronize_rcu() + * to get a guaranteed-completed grace-period state. * * This function provides the same memory-ordering guarantees that * would be provided by a synchronize_rcu() that was invoked at the call @@ -3616,8 +3704,56 @@ bool poll_state_synchronize_rcu(unsigned long oldstate) EXPORT_SYMBOL_GPL(poll_state_synchronize_rcu); /** - * cond_synchronize_rcu - Conditionally wait for an RCU grace period + * poll_state_synchronize_rcu_full - Has the specified RCU grace period completed? + * @rgosp: value from get_state_synchronize_rcu_full() or start_poll_synchronize_rcu_full() * + * If a full RCU grace period has elapsed since the earlier call from + * which *rgosp was obtained, return @true, otherwise return @false. + * If @false is returned, it is the caller's responsibility to invoke this + * function later on until it does return @true. Alternatively, the caller + * can explicitly wait for a grace period, for example, by passing @rgosp + * to cond_synchronize_rcu() or by directly invoking synchronize_rcu(). + * + * Yes, this function does not take counter wrap into account. + * But counter wrap is harmless. If the counter wraps, we have waited + * for more than a billion grace periods (and way more on a 64-bit + * system!). Those needing to keep rcu_gp_oldstate values for very + * long time periods (many hours even on 32-bit systems) should check + * them occasionally and either refresh them or set a flag indicating + * that the grace period has completed. Alternatively, they can use + * get_completed_synchronize_rcu_full() to get a guaranteed-completed + * grace-period state. + * + * This function provides the same memory-ordering guarantees that would + * be provided by a synchronize_rcu() that was invoked at the call to + * the function that provided @rgosp, and that returned at the end of this + * function. And this guarantee requires that the root rcu_node structure's + * ->gp_seq field be checked instead of that of the rcu_state structure. + * The problem is that the just-ending grace-period's callbacks can be + * invoked between the time that the root rcu_node structure's ->gp_seq + * field is updated and the time that the rcu_state structure's ->gp_seq + * field is updated. Therefore, if a single synchronize_rcu() is to + * cause a subsequent poll_state_synchronize_rcu_full() to return @true, + * then the root rcu_node structure is the one that needs to be polled. + */ +bool poll_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp) +{ + struct rcu_node *rnp = rcu_get_root(); + + smp_mb(); // Order against root rcu_node structure grace-period cleanup. + if (rgosp->rgos_norm == RCU_GET_STATE_COMPLETED || + rcu_seq_done_exact(&rnp->gp_seq, rgosp->rgos_norm) || + rgosp->rgos_exp == RCU_GET_STATE_COMPLETED || + rcu_seq_done_exact(&rcu_state.expedited_sequence, rgosp->rgos_exp)) { + smp_mb(); /* Ensure GP ends before subsequent accesses. */ + return true; + } + return false; +} +EXPORT_SYMBOL_GPL(poll_state_synchronize_rcu_full); + +/** + * cond_synchronize_rcu - Conditionally wait for an RCU grace period * @oldstate: value from get_state_synchronize_rcu(), start_poll_synchronize_rcu(), or start_poll_synchronize_rcu_expedited() * * If a full RCU grace period has elapsed since the earlier call to @@ -3641,6 +3777,33 @@ void cond_synchronize_rcu(unsigned long oldstate) } EXPORT_SYMBOL_GPL(cond_synchronize_rcu); +/** + * cond_synchronize_rcu_full - Conditionally wait for an RCU grace period + * @rgosp: value from get_state_synchronize_rcu_full(), start_poll_synchronize_rcu_full(), or start_poll_synchronize_rcu_expedited_full() + * + * If a full RCU grace period has elapsed since the call to + * get_state_synchronize_rcu_full(), start_poll_synchronize_rcu_full(), + * or start_poll_synchronize_rcu_expedited_full() from which @rgosp was + * obtained, just return. Otherwise, invoke synchronize_rcu() to wait + * for a full grace period. + * + * Yes, this function does not take counter wrap into account. + * But counter wrap is harmless. If the counter wraps, we have waited for + * more than 2 billion grace periods (and way more on a 64-bit system!), + * so waiting for a couple of additional grace periods should be just fine. + * + * This function provides the same memory-ordering guarantees that + * would be provided by a synchronize_rcu() that was invoked at the call + * to the function that provided @rgosp and that returned at the end of + * this function. + */ +void cond_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp) +{ + if (!poll_state_synchronize_rcu_full(rgosp)) + synchronize_rcu(); +} +EXPORT_SYMBOL_GPL(cond_synchronize_rcu_full); + /* * Check to see if there is any immediate RCU-related work to be done by * the current CPU, returning 1 if so and zero otherwise. The checks are @@ -4312,9 +4475,20 @@ early_initcall(rcu_spawn_gp_kthread); */ void rcu_scheduler_starting(void) { + unsigned long flags; + struct rcu_node *rnp; + WARN_ON(num_online_cpus() != 1); WARN_ON(nr_context_switches() > 0); rcu_test_sync_prims(); + + // Fix up the ->gp_seq counters. + local_irq_save(flags); + rcu_for_each_node_breadth_first(rnp) + rnp->gp_seq_needed = rnp->gp_seq = rcu_state.gp_seq; + local_irq_restore(flags); + + // Switch out of early boot mode. rcu_scheduler_active = RCU_SCHEDULER_INIT; rcu_test_sync_prims(); } diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index be667583a554..18e9b4cd78ef 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -828,11 +828,13 @@ static void rcu_exp_handler(void *unused) { struct rcu_data *rdp = this_cpu_ptr(&rcu_data); struct rcu_node *rnp = rdp->mynode; + bool preempt_bh_enabled = !(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK)); if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) || __this_cpu_read(rcu_data.cpu_no_qs.b.exp)) return; - if (rcu_is_cpu_rrupt_from_idle()) { + if (rcu_is_cpu_rrupt_from_idle() || + (IS_ENABLED(CONFIG_PREEMPT_COUNT) && preempt_bh_enabled)) { rcu_report_exp_rdp(this_cpu_ptr(&rcu_data)); return; } @@ -906,6 +908,7 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp) void synchronize_rcu_expedited(void) { bool boottime = (rcu_scheduler_active == RCU_SCHEDULER_INIT); + unsigned long flags; struct rcu_exp_work rew; struct rcu_node *rnp; unsigned long s; @@ -924,8 +927,11 @@ void synchronize_rcu_expedited(void) // them, which allows reuse of ->gp_seq_polled_exp_snap. rcu_poll_gp_seq_start_unlocked(&rcu_state.gp_seq_polled_exp_snap); rcu_poll_gp_seq_end_unlocked(&rcu_state.gp_seq_polled_exp_snap); - if (rcu_init_invoked()) - cond_resched(); + + local_irq_save(flags); + WARN_ON_ONCE(num_online_cpus() > 1); + rcu_state.expedited_sequence += (1 << RCU_SEQ_CTR_SHIFT); + local_irq_restore(flags); return; // Context allows vacuous grace periods. } @@ -1028,6 +1034,24 @@ unsigned long start_poll_synchronize_rcu_expedited(void) EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu_expedited); /** + * start_poll_synchronize_rcu_expedited_full - Take a full snapshot and start expedited grace period + * @rgosp: Place to put snapshot of grace-period state + * + * Places the normal and expedited grace-period states in rgosp. This + * state value can be passed to a later call to cond_synchronize_rcu_full() + * or poll_state_synchronize_rcu_full() to determine whether or not a + * grace period (whether normal or expedited) has elapsed in the meantime. + * If the needed expedited grace period is not already slated to start, + * initiates that grace period. + */ +void start_poll_synchronize_rcu_expedited_full(struct rcu_gp_oldstate *rgosp) +{ + get_state_synchronize_rcu_full(rgosp); + (void)start_poll_synchronize_rcu_expedited(); +} +EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu_expedited_full); + +/** * cond_synchronize_rcu_expedited - Conditionally wait for an expedited RCU grace period * * @oldstate: value from get_state_synchronize_rcu(), start_poll_synchronize_rcu(), or start_poll_synchronize_rcu_expedited() @@ -1053,3 +1077,30 @@ void cond_synchronize_rcu_expedited(unsigned long oldstate) synchronize_rcu_expedited(); } EXPORT_SYMBOL_GPL(cond_synchronize_rcu_expedited); + +/** + * cond_synchronize_rcu_expedited_full - Conditionally wait for an expedited RCU grace period + * @rgosp: value from get_state_synchronize_rcu_full(), start_poll_synchronize_rcu_full(), or start_poll_synchronize_rcu_expedited_full() + * + * If a full RCU grace period has elapsed since the call to + * get_state_synchronize_rcu_full(), start_poll_synchronize_rcu_full(), + * or start_poll_synchronize_rcu_expedited_full() from which @rgosp was + * obtained, just return. Otherwise, invoke synchronize_rcu_expedited() + * to wait for a full grace period. + * + * Yes, this function does not take counter wrap into account. + * But counter wrap is harmless. If the counter wraps, we have waited for + * more than 2 billion grace periods (and way more on a 64-bit system!), + * so waiting for a couple of additional grace periods should be just fine. + * + * This function provides the same memory-ordering guarantees that + * would be provided by a synchronize_rcu() that was invoked at the call + * to the function that provided @rgosp and that returned at the end of + * this function. + */ +void cond_synchronize_rcu_expedited_full(struct rcu_gp_oldstate *rgosp) +{ + if (!poll_state_synchronize_rcu_full(rgosp)) + synchronize_rcu_expedited(); +} +EXPORT_SYMBOL_GPL(cond_synchronize_rcu_expedited_full); diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index a8f574d8850d..0a5f0ef41484 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -1111,7 +1111,7 @@ int rcu_nocb_cpu_deoffload(int cpu) if (!ret) cpumask_clear_cpu(cpu, rcu_nocb_mask); } else { - pr_info("NOCB: Can't CB-deoffload an offline CPU\n"); + pr_info("NOCB: Cannot CB-deoffload offline CPU %d\n", rdp->cpu); ret = -EINVAL; } } @@ -1196,7 +1196,7 @@ int rcu_nocb_cpu_offload(int cpu) if (!ret) cpumask_set_cpu(cpu, rcu_nocb_mask); } else { - pr_info("NOCB: Can't CB-offload an offline CPU\n"); + pr_info("NOCB: Cannot CB-offload offline CPU %d\n", rdp->cpu); ret = -EINVAL; } } @@ -1452,8 +1452,8 @@ static void show_rcu_nocb_gp_state(struct rcu_data *rdp) (long)rdp->nocb_gp_seq, rnp->grplo, rnp->grphi, READ_ONCE(rdp->nocb_gp_loops), rdp->nocb_gp_kthread ? task_state_to_char(rdp->nocb_gp_kthread) : '.', - rdp->nocb_cb_kthread ? (int)task_cpu(rdp->nocb_gp_kthread) : -1, - show_rcu_should_be_on_cpu(rdp->nocb_cb_kthread)); + rdp->nocb_gp_kthread ? (int)task_cpu(rdp->nocb_gp_kthread) : -1, + show_rcu_should_be_on_cpu(rdp->nocb_gp_kthread)); } /* Dump out nocb kthread state for the specified rcu_data structure. */ @@ -1497,7 +1497,7 @@ static void show_rcu_nocb_state(struct rcu_data *rdp) ".B"[!!rcu_cblist_n_cbs(&rdp->nocb_bypass)], rcu_segcblist_n_cbs(&rdp->cblist), rdp->nocb_cb_kthread ? task_state_to_char(rdp->nocb_cb_kthread) : '.', - rdp->nocb_cb_kthread ? (int)task_cpu(rdp->nocb_gp_kthread) : -1, + rdp->nocb_cb_kthread ? (int)task_cpu(rdp->nocb_cb_kthread) : -1, show_rcu_should_be_on_cpu(rdp->nocb_cb_kthread)); /* It is OK for GP kthreads to have GP state. */ diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 438ecae6bd7e..e3142ee35fc6 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -641,7 +641,8 @@ static void rcu_read_unlock_special(struct task_struct *t) expboost = (t->rcu_blocked_node && READ_ONCE(t->rcu_blocked_node->exp_tasks)) || (rdp->grpmask & READ_ONCE(rnp->expmask)) || - IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) || + (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) && + ((rdp->grpmask & READ_ONCE(rnp->qsmask)) || t->rcu_blocked_node)) || (IS_ENABLED(CONFIG_RCU_BOOST) && irqs_were_disabled && t->rcu_blocked_node); // Need to defer quiescent state until everything is enabled. @@ -718,9 +719,6 @@ static void rcu_flavor_sched_clock_irq(int user) struct task_struct *t = current; lockdep_assert_irqs_disabled(); - if (user || rcu_is_cpu_rrupt_from_idle()) { - rcu_note_voluntary_context_switch(current); - } if (rcu_preempt_depth() > 0 || (preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK))) { /* No QS, force context switch if deferred. */ @@ -824,6 +822,7 @@ void rcu_read_unlock_strict(void) if (irqs_disabled() || preempt_count() || !rcu_state.gp_kthread) return; rdp = this_cpu_ptr(&rcu_data); + rdp->cpu_no_qs.b.norm = false; rcu_report_qs_rdp(rdp); udelay(rcu_unlock_delay); } @@ -869,7 +868,7 @@ void rcu_all_qs(void) if (!raw_cpu_read(rcu_data.rcu_urgent_qs)) return; - preempt_disable(); + preempt_disable(); // For CONFIG_PREEMPT_COUNT=y kernels /* Load rcu_urgent_qs before other flags. */ if (!smp_load_acquire(this_cpu_ptr(&rcu_data.rcu_urgent_qs))) { preempt_enable(); @@ -931,10 +930,13 @@ static notrace bool rcu_preempt_need_deferred_qs(struct task_struct *t) return false; } -// Except that we do need to respond to a request by an expedited grace -// period for a quiescent state from this CPU. Note that requests from -// tasks are handled when removing the task from the blocked-tasks list -// below. +// Except that we do need to respond to a request by an expedited +// grace period for a quiescent state from this CPU. Note that in +// non-preemptible kernels, there can be no context switches within RCU +// read-side critical sections, which in turn means that the leaf rcu_node +// structure's blocked-tasks list is always empty. is therefore no need to +// actually check it. Instead, a quiescent state from this CPU suffices, +// and this function is only called from such a quiescent state. notrace void rcu_preempt_deferred_qs(struct task_struct *t) { struct rcu_data *rdp = this_cpu_ptr(&rcu_data); @@ -972,7 +974,6 @@ static void rcu_flavor_sched_clock_irq(int user) * neither access nor modify, at least not while the * corresponding CPU is online. */ - rcu_qs(); } } @@ -1238,8 +1239,11 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) cpu != outgoingcpu) cpumask_set_cpu(cpu, cm); cpumask_and(cm, cm, housekeeping_cpumask(HK_TYPE_RCU)); - if (cpumask_empty(cm)) + if (cpumask_empty(cm)) { cpumask_copy(cm, housekeeping_cpumask(HK_TYPE_RCU)); + if (outgoingcpu >= 0) + cpumask_clear_cpu(outgoingcpu, cm); + } set_cpus_allowed_ptr(t, cm); mutex_unlock(&rnp->boost_kthread_mutex); free_cpumask_var(cm); diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index c3fbbcc09327..5653560573e2 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -368,7 +368,7 @@ static void rcu_dump_cpu_stacks(void) if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) { if (cpu_is_offline(cpu)) pr_err("Offline CPU %d blocking current GP.\n", cpu); - else if (!trigger_single_cpu_backtrace(cpu)) + else dump_cpu_task(cpu); } raw_spin_unlock_irqrestore_rcu_node(rnp, flags); @@ -511,8 +511,7 @@ static void rcu_check_gp_kthread_starvation(void) pr_err("RCU GP kthread last ran on offline CPU %d.\n", cpu); } else { pr_err("Stack dump where RCU GP kthread last ran:\n"); - if (!trigger_single_cpu_backtrace(cpu)) - dump_cpu_task(cpu); + dump_cpu_task(cpu); } } wake_up_process(gpk); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index ee28253c9ac0..60fdc0faf1c9 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -73,6 +73,7 @@ #include <uapi/linux/sched/types.h> +#include <asm/irq_regs.h> #include <asm/switch_to.h> #include <asm/tlb.h> @@ -11183,6 +11184,19 @@ struct cgroup_subsys cpu_cgrp_subsys = { void dump_cpu_task(int cpu) { + if (cpu == smp_processor_id() && in_hardirq()) { + struct pt_regs *regs; + + regs = get_irq_regs(); + if (regs) { + show_regs(regs); + return; + } + } + + if (trigger_single_cpu_backtrace(cpu)) + return; + pr_info("Task dump for CPU %d:\n", cpu); sched_show_task(cpu_curr(cpu)); } diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c index 1207c78f85c1..9161d1136d01 100644 --- a/kernel/sched/cpufreq_schedutil.c +++ b/kernel/sched/cpufreq_schedutil.c @@ -25,6 +25,9 @@ struct sugov_policy { unsigned int next_freq; unsigned int cached_raw_freq; + /* max CPU capacity, which is equal for all CPUs in freq. domain */ + unsigned long max; + /* The next fields are only needed if fast switch cannot be used: */ struct irq_work irq_work; struct kthread_work work; @@ -48,7 +51,6 @@ struct sugov_cpu { unsigned long util; unsigned long bw_dl; - unsigned long max; /* The field below is for single-CPU policies only: */ #ifdef CONFIG_NO_HZ_COMMON @@ -158,7 +160,6 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu) { struct rq *rq = cpu_rq(sg_cpu->cpu); - sg_cpu->max = arch_scale_cpu_capacity(sg_cpu->cpu); sg_cpu->bw_dl = cpu_bw_dl(rq); sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(sg_cpu->cpu), FREQUENCY_UTIL, NULL); @@ -253,6 +254,7 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, */ static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time) { + struct sugov_policy *sg_policy = sg_cpu->sg_policy; unsigned long boost; /* No boost currently required */ @@ -280,7 +282,8 @@ static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time) * sg_cpu->util is already in capacity scale; convert iowait_boost * into the same scale so we can compare. */ - boost = (sg_cpu->iowait_boost * sg_cpu->max) >> SCHED_CAPACITY_SHIFT; + boost = sg_cpu->iowait_boost * sg_policy->max; + boost >>= SCHED_CAPACITY_SHIFT; boost = uclamp_rq_util_with(cpu_rq(sg_cpu->cpu), boost, NULL); if (sg_cpu->util < boost) sg_cpu->util = boost; @@ -337,7 +340,7 @@ static void sugov_update_single_freq(struct update_util_data *hook, u64 time, if (!sugov_update_single_common(sg_cpu, time, flags)) return; - next_f = get_next_freq(sg_policy, sg_cpu->util, sg_cpu->max); + next_f = get_next_freq(sg_policy, sg_cpu->util, sg_policy->max); /* * Do not reduce the frequency if the CPU has not been idle * recently, as the reduction is likely to be premature then. @@ -373,6 +376,7 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time, unsigned int flags) { struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); + struct sugov_policy *sg_policy = sg_cpu->sg_policy; unsigned long prev_util = sg_cpu->util; /* @@ -399,7 +403,8 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time, sg_cpu->util = prev_util; cpufreq_driver_adjust_perf(sg_cpu->cpu, map_util_perf(sg_cpu->bw_dl), - map_util_perf(sg_cpu->util), sg_cpu->max); + map_util_perf(sg_cpu->util), + sg_policy->max); sg_cpu->sg_policy->last_freq_update_time = time; } @@ -408,25 +413,19 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time) { struct sugov_policy *sg_policy = sg_cpu->sg_policy; struct cpufreq_policy *policy = sg_policy->policy; - unsigned long util = 0, max = 1; + unsigned long util = 0; unsigned int j; for_each_cpu(j, policy->cpus) { struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j); - unsigned long j_util, j_max; sugov_get_util(j_sg_cpu); sugov_iowait_apply(j_sg_cpu, time); - j_util = j_sg_cpu->util; - j_max = j_sg_cpu->max; - if (j_util * max > j_max * util) { - util = j_util; - max = j_max; - } + util = max(j_sg_cpu->util, util); } - return get_next_freq(sg_policy, util, max); + return get_next_freq(sg_policy, util, sg_policy->max); } static void @@ -752,7 +751,7 @@ static int sugov_start(struct cpufreq_policy *policy) { struct sugov_policy *sg_policy = policy->governor_data; void (*uu)(struct update_util_data *data, u64 time, unsigned int flags); - unsigned int cpu; + unsigned int cpu = cpumask_first(policy->cpus); sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC; sg_policy->last_freq_update_time = 0; @@ -760,6 +759,7 @@ static int sugov_start(struct cpufreq_policy *policy) sg_policy->work_in_progress = false; sg_policy->limits_changed = false; sg_policy->cached_raw_freq = 0; + sg_policy->max = arch_scale_cpu_capacity(cpu); sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS); diff --git a/kernel/smp.c b/kernel/smp.c index 650810a6f29b..e8cdc025a046 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -370,8 +370,7 @@ static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 * if (cpu >= 0) { if (static_branch_unlikely(&csdlock_debug_extended)) csd_lock_print_extended(csd, cpu); - if (!trigger_single_cpu_backtrace(cpu)) - dump_cpu_task(cpu); + dump_cpu_task(cpu); if (!cpu_cur_csd) { pr_alert("csd: Re-sending CSD lock (#%d) IPI from CPU#%02d to CPU#%02d\n", *bug_id, raw_smp_processor_id(), cpu); arch_send_call_function_single_ipi(cpu); diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 5481ba44a8d6..3f464bbda0e9 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -9,6 +9,7 @@ #include <linux/highuid.h> #include <linux/cred.h> #include <linux/securebits.h> +#include <linux/security.h> #include <linux/keyctl.h> #include <linux/key-type.h> #include <keys/user-type.h> @@ -113,6 +114,10 @@ int create_user_ns(struct cred *new) !kgid_has_mapping(parent_ns, group)) goto fail_dec; + ret = security_create_user_ns(new); + if (ret < 0) + goto fail_dec; + ret = -ENOMEM; ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL); if (!ns) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 39060a5d0905..7cd5f5e7e0a1 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1651,7 +1651,7 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, struct work_struct *work = &dwork->work; WARN_ON_ONCE(!wq); - WARN_ON_FUNCTION_MISMATCH(timer->function, delayed_work_timer_fn); + WARN_ON_ONCE(timer->function != delayed_work_timer_fn); WARN_ON_ONCE(timer_pending(timer)); WARN_ON_ONCE(!list_empty(&work->entry)); |