aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/audit.c8
-rw-r--r--kernel/audit.h4
-rw-r--r--kernel/auditsc.c10
-rw-r--r--kernel/bpf/bpf_lsm.c1
-rw-r--r--kernel/cfi.c352
-rw-r--r--kernel/configs/rust.config1
-rw-r--r--kernel/events/core.c9
-rw-r--r--kernel/kallsyms.c43
-rw-r--r--kernel/kthread.c3
-rw-r--r--kernel/livepatch/core.c4
-rw-r--r--kernel/module/main.c50
-rw-r--r--kernel/rcu/rcutorture.c290
-rw-r--r--kernel/rcu/srcutiny.c14
-rw-r--r--kernel/rcu/tasks.h5
-rw-r--r--kernel/rcu/tiny.c27
-rw-r--r--kernel/rcu/tree.c330
-rw-r--r--kernel/rcu/tree_exp.h57
-rw-r--r--kernel/rcu/tree_nocb.h10
-rw-r--r--kernel/rcu/tree_plugin.h26
-rw-r--r--kernel/rcu/tree_stall.h5
-rw-r--r--kernel/sched/core.c14
-rw-r--r--kernel/sched/cpufreq_schedutil.c30
-rw-r--r--kernel/smp.c3
-rw-r--r--kernel/user_namespace.c5
-rw-r--r--kernel/workqueue.c2
25 files changed, 754 insertions, 549 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index a75978ae38ad..9bc0b0301198 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -321,7 +321,6 @@ static inline int audit_rate_check(void)
static DEFINE_SPINLOCK(lock);
unsigned long flags;
unsigned long now;
- unsigned long elapsed;
int retval = 0;
if (!audit_rate_limit) return 1;
@@ -330,9 +329,8 @@ static inline int audit_rate_check(void)
if (++messages < audit_rate_limit) {
retval = 1;
} else {
- now = jiffies;
- elapsed = now - last_check;
- if (elapsed > HZ) {
+ now = jiffies;
+ if (time_after(now, last_check + HZ)) {
last_check = now;
messages = 0;
retval = 1;
@@ -366,7 +364,7 @@ void audit_log_lost(const char *message)
if (!print) {
spin_lock_irqsave(&lock, flags);
now = jiffies;
- if (now - last_msg > HZ) {
+ if (time_after(now, last_msg + HZ)) {
print = 1;
last_msg = now;
}
diff --git a/kernel/audit.h b/kernel/audit.h
index 58b66543b4d5..c57b008b9914 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -133,7 +133,7 @@ struct audit_context {
struct sockaddr_storage *sockaddr;
size_t sockaddr_len;
/* Save things to print about task_struct */
- pid_t pid, ppid;
+ pid_t ppid;
kuid_t uid, euid, suid, fsuid;
kgid_t gid, egid, sgid, fsgid;
unsigned long personality;
@@ -245,8 +245,6 @@ struct audit_netlink_list {
int audit_send_list_thread(void *_dest);
-extern int selinux_audit_rule_update(void);
-
extern struct mutex audit_filter_mutex;
extern int audit_del_rule(struct audit_entry *entry);
extern void audit_free_rule_rcu(struct rcu_head *head);
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 79a5da1bc5bb..9f8c05228d6d 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -965,7 +965,7 @@ static void audit_reset_context(struct audit_context *ctx)
if (!ctx)
return;
- /* if ctx is non-null, reset the "ctx->state" regardless */
+ /* if ctx is non-null, reset the "ctx->context" regardless */
ctx->context = AUDIT_CTX_UNUSED;
if (ctx->dummy)
return;
@@ -1002,7 +1002,7 @@ static void audit_reset_context(struct audit_context *ctx)
kfree(ctx->sockaddr);
ctx->sockaddr = NULL;
ctx->sockaddr_len = 0;
- ctx->pid = ctx->ppid = 0;
+ ctx->ppid = 0;
ctx->uid = ctx->euid = ctx->suid = ctx->fsuid = KUIDT_INIT(0);
ctx->gid = ctx->egid = ctx->sgid = ctx->fsgid = KGIDT_INIT(0);
ctx->personality = 0;
@@ -1016,7 +1016,6 @@ static void audit_reset_context(struct audit_context *ctx)
WARN_ON(!list_empty(&ctx->killed_trees));
audit_free_module(ctx);
ctx->fds[0] = -1;
- audit_proctitle_free(ctx);
ctx->type = 0; /* reset last for audit_free_*() */
}
@@ -1077,6 +1076,7 @@ static inline void audit_free_context(struct audit_context *context)
{
/* resetting is extra work, but it is likely just noise */
audit_reset_context(context);
+ audit_proctitle_free(context);
free_tree_refs(context);
kfree(context->filterkey);
kfree(context);
@@ -1833,7 +1833,7 @@ void __audit_free(struct task_struct *tsk)
/* We are called either by do_exit() or the fork() error handling code;
* in the former case tsk == current and in the latter tsk is a
- * random task_struct that doesn't doesn't have any meaningful data we
+ * random task_struct that doesn't have any meaningful data we
* need to log via audit_log_exit().
*/
if (tsk == current && !context->dummy) {
@@ -2069,7 +2069,7 @@ void __audit_syscall_exit(int success, long return_code)
/* run through both filters to ensure we set the filterkey properly */
audit_filter_syscall(current, context);
audit_filter_inodes(current, context);
- if (context->current_state < AUDIT_STATE_RECORD)
+ if (context->current_state != AUDIT_STATE_RECORD)
goto out;
audit_log_exit();
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index fa71d58b7ded..761998fda762 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -335,6 +335,7 @@ BTF_ID(func, bpf_lsm_task_getsecid_obj)
BTF_ID(func, bpf_lsm_task_prctl)
BTF_ID(func, bpf_lsm_task_setscheduler)
BTF_ID(func, bpf_lsm_task_to_inode)
+BTF_ID(func, bpf_lsm_userns_create)
BTF_SET_END(sleepable_lsm_hooks)
bool bpf_lsm_is_sleepable_hook(u32 btf_id)
diff --git a/kernel/cfi.c b/kernel/cfi.c
index 2046276ee234..08caad776717 100644
--- a/kernel/cfi.c
+++ b/kernel/cfi.c
@@ -1,339 +1,101 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Clang Control Flow Integrity (CFI) error and slowpath handling.
+ * Clang Control Flow Integrity (CFI) error handling.
*
- * Copyright (C) 2021 Google LLC
+ * Copyright (C) 2022 Google LLC
*/
-#include <linux/hardirq.h>
-#include <linux/kallsyms.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/printk.h>
-#include <linux/ratelimit.h>
-#include <linux/rcupdate.h>
-#include <linux/vmalloc.h>
-#include <asm/cacheflush.h>
-#include <asm/set_memory.h>
+#include <linux/cfi.h>
-/* Compiler-defined handler names */
-#ifdef CONFIG_CFI_PERMISSIVE
-#define cfi_failure_handler __ubsan_handle_cfi_check_fail
-#else
-#define cfi_failure_handler __ubsan_handle_cfi_check_fail_abort
-#endif
-
-static inline void handle_cfi_failure(void *ptr)
+enum bug_trap_type report_cfi_failure(struct pt_regs *regs, unsigned long addr,
+ unsigned long *target, u32 type)
{
- if (IS_ENABLED(CONFIG_CFI_PERMISSIVE))
- WARN_RATELIMIT(1, "CFI failure (target: %pS):\n", ptr);
+ if (target)
+ pr_err("CFI failure at %pS (target: %pS; expected type: 0x%08x)\n",
+ (void *)addr, (void *)*target, type);
else
- panic("CFI failure (target: %pS)\n", ptr);
-}
-
-#ifdef CONFIG_MODULES
-#ifdef CONFIG_CFI_CLANG_SHADOW
-/*
- * Index type. A 16-bit index can address at most (2^16)-2 pages (taking
- * into account SHADOW_INVALID), i.e. ~256M with 4k pages.
- */
-typedef u16 shadow_t;
-#define SHADOW_INVALID ((shadow_t)~0UL)
-
-struct cfi_shadow {
- /* Page index for the beginning of the shadow */
- unsigned long base;
- /* An array of __cfi_check locations (as indices to the shadow) */
- shadow_t shadow[1];
-} __packed;
-
-/*
- * The shadow covers ~128M from the beginning of the module region. If
- * the region is larger, we fall back to __module_address for the rest.
- */
-#define __SHADOW_RANGE (_UL(SZ_128M) >> PAGE_SHIFT)
-
-/* The in-memory size of struct cfi_shadow, always at least one page */
-#define __SHADOW_PAGES ((__SHADOW_RANGE * sizeof(shadow_t)) >> PAGE_SHIFT)
-#define SHADOW_PAGES max(1UL, __SHADOW_PAGES)
-#define SHADOW_SIZE (SHADOW_PAGES << PAGE_SHIFT)
-
-/* The actual size of the shadow array, minus metadata */
-#define SHADOW_ARR_SIZE (SHADOW_SIZE - offsetof(struct cfi_shadow, shadow))
-#define SHADOW_ARR_SLOTS (SHADOW_ARR_SIZE / sizeof(shadow_t))
-
-static DEFINE_MUTEX(shadow_update_lock);
-static struct cfi_shadow __rcu *cfi_shadow __read_mostly;
+ pr_err("CFI failure at %pS (no target information)\n",
+ (void *)addr);
-/* Returns the index in the shadow for the given address */
-static inline int ptr_to_shadow(const struct cfi_shadow *s, unsigned long ptr)
-{
- unsigned long index;
- unsigned long page = ptr >> PAGE_SHIFT;
-
- if (unlikely(page < s->base))
- return -1; /* Outside of module area */
-
- index = page - s->base;
-
- if (index >= SHADOW_ARR_SLOTS)
- return -1; /* Cannot be addressed with shadow */
-
- return (int)index;
-}
-
-/* Returns the page address for an index in the shadow */
-static inline unsigned long shadow_to_ptr(const struct cfi_shadow *s,
- int index)
-{
- if (unlikely(index < 0 || index >= SHADOW_ARR_SLOTS))
- return 0;
-
- return (s->base + index) << PAGE_SHIFT;
-}
-
-/* Returns the __cfi_check function address for the given shadow location */
-static inline unsigned long shadow_to_check_fn(const struct cfi_shadow *s,
- int index)
-{
- if (unlikely(index < 0 || index >= SHADOW_ARR_SLOTS))
- return 0;
-
- if (unlikely(s->shadow[index] == SHADOW_INVALID))
- return 0;
-
- /* __cfi_check is always page aligned */
- return (s->base + s->shadow[index]) << PAGE_SHIFT;
-}
-
-static void prepare_next_shadow(const struct cfi_shadow __rcu *prev,
- struct cfi_shadow *next)
-{
- int i, index, check;
-
- /* Mark everything invalid */
- memset(next->shadow, 0xFF, SHADOW_ARR_SIZE);
-
- if (!prev)
- return; /* No previous shadow */
-
- /* If the base address didn't change, an update is not needed */
- if (prev->base == next->base) {
- memcpy(next->shadow, prev->shadow, SHADOW_ARR_SIZE);
- return;
- }
-
- /* Convert the previous shadow to the new address range */
- for (i = 0; i < SHADOW_ARR_SLOTS; ++i) {
- if (prev->shadow[i] == SHADOW_INVALID)
- continue;
-
- index = ptr_to_shadow(next, shadow_to_ptr(prev, i));
- if (index < 0)
- continue;
-
- check = ptr_to_shadow(next,
- shadow_to_check_fn(prev, prev->shadow[i]));
- if (check < 0)
- continue;
-
- next->shadow[index] = (shadow_t)check;
- }
-}
-
-static void add_module_to_shadow(struct cfi_shadow *s, struct module *mod,
- unsigned long min_addr, unsigned long max_addr)
-{
- int check_index;
- unsigned long check = (unsigned long)mod->cfi_check;
- unsigned long ptr;
-
- if (unlikely(!PAGE_ALIGNED(check))) {
- pr_warn("cfi: not using shadow for module %s\n", mod->name);
- return;
+ if (IS_ENABLED(CONFIG_CFI_PERMISSIVE)) {
+ __warn(NULL, 0, (void *)addr, 0, regs, NULL);
+ return BUG_TRAP_TYPE_WARN;
}
- check_index = ptr_to_shadow(s, check);
- if (check_index < 0)
- return; /* Module not addressable with shadow */
-
- /* For each page, store the check function index in the shadow */
- for (ptr = min_addr; ptr <= max_addr; ptr += PAGE_SIZE) {
- int index = ptr_to_shadow(s, ptr);
-
- if (index >= 0) {
- /* Each page must only contain one module */
- WARN_ON_ONCE(s->shadow[index] != SHADOW_INVALID);
- s->shadow[index] = (shadow_t)check_index;
- }
- }
+ return BUG_TRAP_TYPE_BUG;
}
-static void remove_module_from_shadow(struct cfi_shadow *s, struct module *mod,
- unsigned long min_addr, unsigned long max_addr)
+#ifdef CONFIG_ARCH_USES_CFI_TRAPS
+static inline unsigned long trap_address(s32 *p)
{
- unsigned long ptr;
-
- for (ptr = min_addr; ptr <= max_addr; ptr += PAGE_SIZE) {
- int index = ptr_to_shadow(s, ptr);
-
- if (index >= 0)
- s->shadow[index] = SHADOW_INVALID;
- }
+ return (unsigned long)((long)p + (long)*p);
}
-typedef void (*update_shadow_fn)(struct cfi_shadow *, struct module *,
- unsigned long min_addr, unsigned long max_addr);
-
-static void update_shadow(struct module *mod, unsigned long base_addr,
- update_shadow_fn fn)
+static bool is_trap(unsigned long addr, s32 *start, s32 *end)
{
- struct cfi_shadow *prev;
- struct cfi_shadow *next;
- unsigned long min_addr, max_addr;
-
- next = vmalloc(SHADOW_SIZE);
-
- mutex_lock(&shadow_update_lock);
- prev = rcu_dereference_protected(cfi_shadow,
- mutex_is_locked(&shadow_update_lock));
-
- if (next) {
- next->base = base_addr >> PAGE_SHIFT;
- prepare_next_shadow(prev, next);
+ s32 *p;
- min_addr = (unsigned long)mod->core_layout.base;
- max_addr = min_addr + mod->core_layout.text_size;
- fn(next, mod, min_addr & PAGE_MASK, max_addr & PAGE_MASK);
-
- set_memory_ro((unsigned long)next, SHADOW_PAGES);
- }
-
- rcu_assign_pointer(cfi_shadow, next);
- mutex_unlock(&shadow_update_lock);
- synchronize_rcu();
-
- if (prev) {
- set_memory_rw((unsigned long)prev, SHADOW_PAGES);
- vfree(prev);
+ for (p = start; p < end; ++p) {
+ if (trap_address(p) == addr)
+ return true;
}
-}
-void cfi_module_add(struct module *mod, unsigned long base_addr)
-{
- update_shadow(mod, base_addr, add_module_to_shadow);
+ return false;
}
-void cfi_module_remove(struct module *mod, unsigned long base_addr)
-{
- update_shadow(mod, base_addr, remove_module_from_shadow);
-}
-
-static inline cfi_check_fn ptr_to_check_fn(const struct cfi_shadow __rcu *s,
- unsigned long ptr)
+#ifdef CONFIG_MODULES
+/* Populates `kcfi_trap(_end)?` fields in `struct module`. */
+void module_cfi_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
+ struct module *mod)
{
- int index;
-
- if (unlikely(!s))
- return NULL; /* No shadow available */
-
- index = ptr_to_shadow(s, ptr);
- if (index < 0)
- return NULL; /* Cannot be addressed with shadow */
+ char *secstrings;
+ unsigned int i;
- return (cfi_check_fn)shadow_to_check_fn(s, index);
-}
-
-static inline cfi_check_fn find_shadow_check_fn(unsigned long ptr)
-{
- cfi_check_fn fn;
+ mod->kcfi_traps = NULL;
+ mod->kcfi_traps_end = NULL;
- rcu_read_lock_sched_notrace();
- fn = ptr_to_check_fn(rcu_dereference_sched(cfi_shadow), ptr);
- rcu_read_unlock_sched_notrace();
+ secstrings = (char *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
- return fn;
-}
-
-#else /* !CONFIG_CFI_CLANG_SHADOW */
+ for (i = 1; i < hdr->e_shnum; i++) {
+ if (strcmp(secstrings + sechdrs[i].sh_name, "__kcfi_traps"))
+ continue;
-static inline cfi_check_fn find_shadow_check_fn(unsigned long ptr)
-{
- return NULL;
+ mod->kcfi_traps = (s32 *)sechdrs[i].sh_addr;
+ mod->kcfi_traps_end = (s32 *)(sechdrs[i].sh_addr + sechdrs[i].sh_size);
+ break;
+ }
}
-#endif /* CONFIG_CFI_CLANG_SHADOW */
-
-static inline cfi_check_fn find_module_check_fn(unsigned long ptr)
+static bool is_module_cfi_trap(unsigned long addr)
{
- cfi_check_fn fn = NULL;
struct module *mod;
+ bool found = false;
rcu_read_lock_sched_notrace();
- mod = __module_address(ptr);
- if (mod)
- fn = mod->cfi_check;
- rcu_read_unlock_sched_notrace();
-
- return fn;
-}
-
-static inline cfi_check_fn find_check_fn(unsigned long ptr)
-{
- cfi_check_fn fn = NULL;
- unsigned long flags;
- bool rcu_idle;
-
- if (is_kernel_text(ptr))
- return __cfi_check;
- /*
- * Indirect call checks can happen when RCU is not watching. Both
- * the shadow and __module_address use RCU, so we need to wake it
- * up if necessary.
- */
- rcu_idle = !rcu_is_watching();
- if (rcu_idle) {
- local_irq_save(flags);
- ct_irq_enter();
- }
-
- if (IS_ENABLED(CONFIG_CFI_CLANG_SHADOW))
- fn = find_shadow_check_fn(ptr);
- if (!fn)
- fn = find_module_check_fn(ptr);
+ mod = __module_address(addr);
+ if (mod)
+ found = is_trap(addr, mod->kcfi_traps, mod->kcfi_traps_end);
- if (rcu_idle) {
- ct_irq_exit();
- local_irq_restore(flags);
- }
+ rcu_read_unlock_sched_notrace();
- return fn;
+ return found;
}
-
-void __cfi_slowpath_diag(uint64_t id, void *ptr, void *diag)
+#else /* CONFIG_MODULES */
+static inline bool is_module_cfi_trap(unsigned long addr)
{
- cfi_check_fn fn = find_check_fn((unsigned long)ptr);
-
- if (likely(fn))
- fn(id, ptr, diag);
- else /* Don't allow unchecked modules */
- handle_cfi_failure(ptr);
+ return false;
}
-EXPORT_SYMBOL(__cfi_slowpath_diag);
+#endif /* CONFIG_MODULES */
-#else /* !CONFIG_MODULES */
+extern s32 __start___kcfi_traps[];
+extern s32 __stop___kcfi_traps[];
-void __cfi_slowpath_diag(uint64_t id, void *ptr, void *diag)
+bool is_cfi_trap(unsigned long addr)
{
- handle_cfi_failure(ptr); /* No modules */
-}
-EXPORT_SYMBOL(__cfi_slowpath_diag);
+ if (is_trap(addr, __start___kcfi_traps, __stop___kcfi_traps))
+ return true;
-#endif /* CONFIG_MODULES */
-
-void cfi_failure_handler(void *data, void *ptr, void *vtable)
-{
- handle_cfi_failure(ptr);
+ return is_module_cfi_trap(addr);
}
-EXPORT_SYMBOL(cfi_failure_handler);
+#endif /* CONFIG_ARCH_USES_CFI_TRAPS */
diff --git a/kernel/configs/rust.config b/kernel/configs/rust.config
new file mode 100644
index 000000000000..38a7c5362c9c
--- /dev/null
+++ b/kernel/configs/rust.config
@@ -0,0 +1 @@
+CONFIG_RUST=y
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 2621fd24ad26..ff4bffc502c6 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -6893,9 +6893,16 @@ static void perf_output_read_group(struct perf_output_handle *handle,
{
struct perf_event *leader = event->group_leader, *sub;
u64 read_format = event->attr.read_format;
+ unsigned long flags;
u64 values[6];
int n = 0;
+ /*
+ * Disabling interrupts avoids all counter scheduling
+ * (context switches, timer based rotation and IPIs).
+ */
+ local_irq_save(flags);
+
values[n++] = 1 + leader->nr_siblings;
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
@@ -6931,6 +6938,8 @@ static void perf_output_read_group(struct perf_output_handle *handle,
__output_copy(handle, values, n * sizeof(u64));
}
+
+ local_irq_restore(flags);
}
#define PERF_FORMAT_TOTAL_TIMES (PERF_FORMAT_TOTAL_TIME_ENABLED|\
diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
index 3e7e2c2ad2f7..60c20f301a6b 100644
--- a/kernel/kallsyms.c
+++ b/kernel/kallsyms.c
@@ -50,12 +50,20 @@ static unsigned int kallsyms_expand_symbol(unsigned int off,
data = &kallsyms_names[off];
len = *data;
data++;
+ off++;
+
+ /* If MSB is 1, it is a "big" symbol, so needs an additional byte. */
+ if ((len & 0x80) != 0) {
+ len = (len & 0x7F) | (*data << 7);
+ data++;
+ off++;
+ }
/*
* Update the offset to return the offset for the next symbol on
* the compressed stream.
*/
- off += len + 1;
+ off += len;
/*
* For every byte on the compressed symbol data, copy the table
@@ -108,7 +116,7 @@ static char kallsyms_get_symbol_type(unsigned int off)
static unsigned int get_symbol_offset(unsigned long pos)
{
const u8 *name;
- int i;
+ int i, len;
/*
* Use the closest marker we have. We have markers every 256 positions,
@@ -122,8 +130,18 @@ static unsigned int get_symbol_offset(unsigned long pos)
* so we just need to add the len to the current pointer for every
* symbol we wish to skip.
*/
- for (i = 0; i < (pos & 0xFF); i++)
- name = name + (*name) + 1;
+ for (i = 0; i < (pos & 0xFF); i++) {
+ len = *name;
+
+ /*
+ * If MSB is 1, it is a "big" symbol, so we need to look into
+ * the next byte (and skip it, too).
+ */
+ if ((len & 0x80) != 0)
+ len = ((len & 0x7F) | (name[1] << 7)) + 1;
+
+ name = name + len + 1;
+ }
return name - kallsyms_names;
}
@@ -159,7 +177,6 @@ static bool cleanup_symbol_name(char *s)
* character in an identifier in C. Suffixes observed:
* - foo.llvm.[0-9a-f]+
* - foo.[0-9a-f]+
- * - foo.[0-9a-f]+.cfi_jt
*/
res = strchr(s, '.');
if (res) {
@@ -167,22 +184,6 @@ static bool cleanup_symbol_name(char *s)
return true;
}
- if (!IS_ENABLED(CONFIG_CFI_CLANG) ||
- !IS_ENABLED(CONFIG_LTO_CLANG_THIN) ||
- CONFIG_CLANG_VERSION >= 130000)
- return false;
-
- /*
- * Prior to LLVM 13, the following suffixes were observed when thinLTO
- * and CFI are both enabled:
- * - foo$[0-9]+
- */
- res = strrchr(s, '$');
- if (res) {
- *res = '\0';
- return true;
- }
-
return false;
}
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 3c677918d8f2..28a6b7ab4a0f 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -1050,8 +1050,7 @@ static void __kthread_queue_delayed_work(struct kthread_worker *worker,
struct timer_list *timer = &dwork->timer;
struct kthread_work *work = &dwork->work;
- WARN_ON_FUNCTION_MISMATCH(timer->function,
- kthread_delayed_work_timer_fn);
+ WARN_ON_ONCE(timer->function != kthread_delayed_work_timer_fn);
/*
* If @delay is 0, queue @dwork->work immediately. This is for
diff --git a/kernel/livepatch/core.c b/kernel/livepatch/core.c
index bc475e62279d..ec06ce59d728 100644
--- a/kernel/livepatch/core.c
+++ b/kernel/livepatch/core.c
@@ -213,7 +213,7 @@ static int klp_resolve_symbols(Elf_Shdr *sechdrs, const char *strtab,
* we use the smallest/strictest upper bound possible (56, based on
* the current definition of MODULE_NAME_LEN) to prevent overflows.
*/
- BUILD_BUG_ON(MODULE_NAME_LEN < 56 || KSYM_NAME_LEN != 128);
+ BUILD_BUG_ON(MODULE_NAME_LEN < 56 || KSYM_NAME_LEN != 512);
relas = (Elf_Rela *) relasec->sh_addr;
/* For each rela in this klp relocation section */
@@ -227,7 +227,7 @@ static int klp_resolve_symbols(Elf_Shdr *sechdrs, const char *strtab,
/* Format: .klp.sym.sym_objname.sym_name,sympos */
cnt = sscanf(strtab + sym->st_name,
- ".klp.sym.%55[^.].%127[^,],%lu",
+ ".klp.sym.%55[^.].%511[^,],%lu",
sym_objname, sym_name, &sympos);
if (cnt != 3) {
pr_err("symbol %s has an incorrectly formatted name\n",
diff --git a/kernel/module/main.c b/kernel/module/main.c
index a4e4d84b6f4e..70c0b2c6fef8 100644
--- a/kernel/module/main.c
+++ b/kernel/module/main.c
@@ -53,6 +53,7 @@
#include <linux/bsearch.h>
#include <linux/dynamic_debug.h>
#include <linux/audit.h>
+#include <linux/cfi.h>
#include <uapi/linux/module.h>
#include "internal.h"
@@ -1144,8 +1145,6 @@ void __weak module_arch_freeing_init(struct module *mod)
{
}
-static void cfi_cleanup(struct module *mod);
-
/* Free a module, remove from lists, etc. */
static void free_module(struct module *mod)
{
@@ -1190,9 +1189,6 @@ static void free_module(struct module *mod)
mod->name);
mutex_unlock(&module_mutex);
- /* Clean up CFI for the module. */
- cfi_cleanup(mod);
-
/* This may be empty, but that's OK */
module_arch_freeing_init(mod);
module_memfree(mod->init_layout.base);
@@ -2602,8 +2598,9 @@ static int complete_formation(struct module *mod, struct load_info *info)
if (err < 0)
goto out;
- /* This relies on module_mutex for list integrity. */
+ /* These rely on module_mutex for list integrity. */
module_bug_finalize(info->hdr, info->sechdrs, mod);
+ module_cfi_finalize(info->hdr, info->sechdrs, mod);
if (module_check_misalignment(mod))
goto out_misaligned;
@@ -2665,8 +2662,6 @@ static int unknown_module_param_cb(char *param, char *val, const char *modname,
return 0;
}
-static void cfi_init(struct module *mod);
-
/*
* Allocate and load the module: note that size of section 0 is always
* zero, and we rely on this for optional sections.
@@ -2796,9 +2791,6 @@ static int load_module(struct load_info *info, const char __user *uargs,
flush_module_icache(mod);
- /* Setup CFI for the module. */
- cfi_init(mod);
-
/* Now copy in args */
mod->args = strndup_user(uargs, ~0UL >> 1);
if (IS_ERR(mod->args)) {
@@ -2875,7 +2867,6 @@ static int load_module(struct load_info *info, const char __user *uargs,
synchronize_rcu();
kfree(mod->args);
free_arch_cleanup:
- cfi_cleanup(mod);
module_arch_cleanup(mod);
free_modinfo:
free_modinfo(mod);
@@ -2961,41 +2952,6 @@ static inline int within(unsigned long addr, void *start, unsigned long size)
return ((void *)addr >= start && (void *)addr < start + size);
}
-static void cfi_init(struct module *mod)
-{
-#ifdef CONFIG_CFI_CLANG
- initcall_t *init;
-#ifdef CONFIG_MODULE_UNLOAD
- exitcall_t *exit;
-#endif
-
- rcu_read_lock_sched();
- mod->cfi_check = (cfi_check_fn)
- find_kallsyms_symbol_value(mod, "__cfi_check");
- init = (initcall_t *)
- find_kallsyms_symbol_value(mod, "__cfi_jt_init_module");
- /* Fix init/exit functions to point to the CFI jump table */
- if (init)
- mod->init = *init;
-#ifdef CONFIG_MODULE_UNLOAD
- exit = (exitcall_t *)
- find_kallsyms_symbol_value(mod, "__cfi_jt_cleanup_module");
- if (exit)
- mod->exit = *exit;
-#endif
- rcu_read_unlock_sched();
-
- cfi_module_add(mod, mod_tree.addr_min);
-#endif
-}
-
-static void cfi_cleanup(struct module *mod)
-{
-#ifdef CONFIG_CFI_CLANG
- cfi_module_remove(mod, mod_tree.addr_min);
-#endif
-}
-
/* Keep in sync with MODULE_FLAGS_BUF_SIZE !!! */
char *module_flags(struct module *mod, char *buf, bool show_state)
{
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index d8e1b270a065..503c2aa845a4 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -84,10 +84,15 @@ torture_param(int, fwd_progress_holdoff, 60, "Time between forward-progress test
torture_param(bool, fwd_progress_need_resched, 1, "Hide cond_resched() behind need_resched()");
torture_param(bool, gp_cond, false, "Use conditional/async GP wait primitives");
torture_param(bool, gp_cond_exp, false, "Use conditional/async expedited GP wait primitives");
+torture_param(bool, gp_cond_full, false, "Use conditional/async full-state GP wait primitives");
+torture_param(bool, gp_cond_exp_full, false,
+ "Use conditional/async full-stateexpedited GP wait primitives");
torture_param(bool, gp_exp, false, "Use expedited GP wait primitives");
torture_param(bool, gp_normal, false, "Use normal (non-expedited) GP wait primitives");
torture_param(bool, gp_poll, false, "Use polling GP wait primitives");
torture_param(bool, gp_poll_exp, false, "Use polling expedited GP wait primitives");
+torture_param(bool, gp_poll_full, false, "Use polling full-state GP wait primitives");
+torture_param(bool, gp_poll_exp_full, false, "Use polling full-state expedited GP wait primitives");
torture_param(bool, gp_sync, false, "Use synchronous GP wait primitives");
torture_param(int, irqreader, 1, "Allow RCU readers from irq handlers");
torture_param(int, leakpointer, 0, "Leak pointer dereferences from readers");
@@ -194,16 +199,24 @@ static int rcu_torture_writer_state;
#define RTWS_DEF_FREE 3
#define RTWS_EXP_SYNC 4
#define RTWS_COND_GET 5
-#define RTWS_COND_GET_EXP 6
-#define RTWS_COND_SYNC 7
-#define RTWS_COND_SYNC_EXP 8
-#define RTWS_POLL_GET 9
-#define RTWS_POLL_GET_EXP 10
-#define RTWS_POLL_WAIT 11
-#define RTWS_POLL_WAIT_EXP 12
-#define RTWS_SYNC 13
-#define RTWS_STUTTER 14
-#define RTWS_STOPPING 15
+#define RTWS_COND_GET_FULL 6
+#define RTWS_COND_GET_EXP 7
+#define RTWS_COND_GET_EXP_FULL 8
+#define RTWS_COND_SYNC 9
+#define RTWS_COND_SYNC_FULL 10
+#define RTWS_COND_SYNC_EXP 11
+#define RTWS_COND_SYNC_EXP_FULL 12
+#define RTWS_POLL_GET 13
+#define RTWS_POLL_GET_FULL 14
+#define RTWS_POLL_GET_EXP 15
+#define RTWS_POLL_GET_EXP_FULL 16
+#define RTWS_POLL_WAIT 17
+#define RTWS_POLL_WAIT_FULL 18
+#define RTWS_POLL_WAIT_EXP 19
+#define RTWS_POLL_WAIT_EXP_FULL 20
+#define RTWS_SYNC 21
+#define RTWS_STUTTER 22
+#define RTWS_STOPPING 23
static const char * const rcu_torture_writer_state_names[] = {
"RTWS_FIXED_DELAY",
"RTWS_DELAY",
@@ -211,13 +224,21 @@ static const char * const rcu_torture_writer_state_names[] = {
"RTWS_DEF_FREE",
"RTWS_EXP_SYNC",
"RTWS_COND_GET",
+ "RTWS_COND_GET_FULL",
"RTWS_COND_GET_EXP",
+ "RTWS_COND_GET_EXP_FULL",
"RTWS_COND_SYNC",
+ "RTWS_COND_SYNC_FULL",
"RTWS_COND_SYNC_EXP",
+ "RTWS_COND_SYNC_EXP_FULL",
"RTWS_POLL_GET",
+ "RTWS_POLL_GET_FULL",
"RTWS_POLL_GET_EXP",
+ "RTWS_POLL_GET_EXP_FULL",
"RTWS_POLL_WAIT",
+ "RTWS_POLL_WAIT_FULL",
"RTWS_POLL_WAIT_EXP",
+ "RTWS_POLL_WAIT_EXP_FULL",
"RTWS_SYNC",
"RTWS_STUTTER",
"RTWS_STOPPING",
@@ -332,13 +353,21 @@ struct rcu_torture_ops {
void (*exp_sync)(void);
unsigned long (*get_gp_state_exp)(void);
unsigned long (*start_gp_poll_exp)(void);
+ void (*start_gp_poll_exp_full)(struct rcu_gp_oldstate *rgosp);
bool (*poll_gp_state_exp)(unsigned long oldstate);
void (*cond_sync_exp)(unsigned long oldstate);
+ void (*cond_sync_exp_full)(struct rcu_gp_oldstate *rgosp);
unsigned long (*get_gp_state)(void);
+ void (*get_gp_state_full)(struct rcu_gp_oldstate *rgosp);
unsigned long (*get_gp_completed)(void);
+ void (*get_gp_completed_full)(struct rcu_gp_oldstate *rgosp);
unsigned long (*start_gp_poll)(void);
+ void (*start_gp_poll_full)(struct rcu_gp_oldstate *rgosp);
bool (*poll_gp_state)(unsigned long oldstate);
+ bool (*poll_gp_state_full)(struct rcu_gp_oldstate *rgosp);
+ bool (*poll_need_2gp)(bool poll, bool poll_full);
void (*cond_sync)(unsigned long oldstate);
+ void (*cond_sync_full)(struct rcu_gp_oldstate *rgosp);
call_rcu_func_t call;
void (*cb_barrier)(void);
void (*fqs)(void);
@@ -489,6 +518,11 @@ static void rcu_sync_torture_init(void)
INIT_LIST_HEAD(&rcu_torture_removed);
}
+static bool rcu_poll_need_2gp(bool poll, bool poll_full)
+{
+ return poll;
+}
+
static struct rcu_torture_ops rcu_ops = {
.ttype = RCU_FLAVOR,
.init = rcu_sync_torture_init,
@@ -502,12 +536,19 @@ static struct rcu_torture_ops rcu_ops = {
.sync = synchronize_rcu,
.exp_sync = synchronize_rcu_expedited,
.get_gp_state = get_state_synchronize_rcu,
+ .get_gp_state_full = get_state_synchronize_rcu_full,
.get_gp_completed = get_completed_synchronize_rcu,
+ .get_gp_completed_full = get_completed_synchronize_rcu_full,
.start_gp_poll = start_poll_synchronize_rcu,
+ .start_gp_poll_full = start_poll_synchronize_rcu_full,
.poll_gp_state = poll_state_synchronize_rcu,
+ .poll_gp_state_full = poll_state_synchronize_rcu_full,
+ .poll_need_2gp = rcu_poll_need_2gp,
.cond_sync = cond_synchronize_rcu,
+ .cond_sync_full = cond_synchronize_rcu_full,
.get_gp_state_exp = get_state_synchronize_rcu,
.start_gp_poll_exp = start_poll_synchronize_rcu_expedited,
+ .start_gp_poll_exp_full = start_poll_synchronize_rcu_expedited_full,
.poll_gp_state_exp = poll_state_synchronize_rcu,
.cond_sync_exp = cond_synchronize_rcu_expedited,
.call = call_rcu,
@@ -709,6 +750,9 @@ static struct rcu_torture_ops srcud_ops = {
.deferred_free = srcu_torture_deferred_free,
.sync = srcu_torture_synchronize,
.exp_sync = srcu_torture_synchronize_expedited,
+ .get_gp_state = srcu_torture_get_gp_state,
+ .start_gp_poll = srcu_torture_start_gp_poll,
+ .poll_gp_state = srcu_torture_poll_gp_state,
.call = srcu_torture_call,
.cb_barrier = srcu_torture_barrier,
.stats = srcu_torture_stats,
@@ -1148,15 +1192,35 @@ static int nsynctypes;
*/
static void rcu_torture_write_types(void)
{
- bool gp_cond1 = gp_cond, gp_cond_exp1 = gp_cond_exp, gp_exp1 = gp_exp;
- bool gp_poll_exp1 = gp_poll_exp, gp_normal1 = gp_normal, gp_poll1 = gp_poll;
- bool gp_sync1 = gp_sync;
+ bool gp_cond1 = gp_cond, gp_cond_exp1 = gp_cond_exp, gp_cond_full1 = gp_cond_full;
+ bool gp_cond_exp_full1 = gp_cond_exp_full, gp_exp1 = gp_exp, gp_poll_exp1 = gp_poll_exp;
+ bool gp_poll_exp_full1 = gp_poll_exp_full, gp_normal1 = gp_normal, gp_poll1 = gp_poll;
+ bool gp_poll_full1 = gp_poll_full, gp_sync1 = gp_sync;
/* Initialize synctype[] array. If none set, take default. */
- if (!gp_cond1 && !gp_cond_exp1 && !gp_exp1 && !gp_poll_exp &&
- !gp_normal1 && !gp_poll1 && !gp_sync1)
- gp_cond1 = gp_cond_exp1 = gp_exp1 = gp_poll_exp1 =
- gp_normal1 = gp_poll1 = gp_sync1 = true;
+ if (!gp_cond1 &&
+ !gp_cond_exp1 &&
+ !gp_cond_full1 &&
+ !gp_cond_exp_full1 &&
+ !gp_exp1 &&
+ !gp_poll_exp1 &&
+ !gp_poll_exp_full1 &&
+ !gp_normal1 &&
+ !gp_poll1 &&
+ !gp_poll_full1 &&
+ !gp_sync1) {
+ gp_cond1 = true;
+ gp_cond_exp1 = true;
+ gp_cond_full1 = true;
+ gp_cond_exp_full1 = true;
+ gp_exp1 = true;
+ gp_poll_exp1 = true;
+ gp_poll_exp_full1 = true;
+ gp_normal1 = true;
+ gp_poll1 = true;
+ gp_poll_full1 = true;
+ gp_sync1 = true;
+ }
if (gp_cond1 && cur_ops->get_gp_state && cur_ops->cond_sync) {
synctype[nsynctypes++] = RTWS_COND_GET;
pr_info("%s: Testing conditional GPs.\n", __func__);
@@ -1169,6 +1233,19 @@ static void rcu_torture_write_types(void)
} else if (gp_cond_exp && (!cur_ops->get_gp_state_exp || !cur_ops->cond_sync_exp)) {
pr_alert("%s: gp_cond_exp without primitives.\n", __func__);
}
+ if (gp_cond_full1 && cur_ops->get_gp_state && cur_ops->cond_sync_full) {
+ synctype[nsynctypes++] = RTWS_COND_GET_FULL;
+ pr_info("%s: Testing conditional full-state GPs.\n", __func__);
+ } else if (gp_cond_full && (!cur_ops->get_gp_state || !cur_ops->cond_sync_full)) {
+ pr_alert("%s: gp_cond_full without primitives.\n", __func__);
+ }
+ if (gp_cond_exp_full1 && cur_ops->get_gp_state_exp && cur_ops->cond_sync_exp_full) {
+ synctype[nsynctypes++] = RTWS_COND_GET_EXP_FULL;
+ pr_info("%s: Testing conditional full-state expedited GPs.\n", __func__);
+ } else if (gp_cond_exp_full &&
+ (!cur_ops->get_gp_state_exp || !cur_ops->cond_sync_exp_full)) {
+ pr_alert("%s: gp_cond_exp_full without primitives.\n", __func__);
+ }
if (gp_exp1 && cur_ops->exp_sync) {
synctype[nsynctypes++] = RTWS_EXP_SYNC;
pr_info("%s: Testing expedited GPs.\n", __func__);
@@ -1187,12 +1264,25 @@ static void rcu_torture_write_types(void)
} else if (gp_poll && (!cur_ops->start_gp_poll || !cur_ops->poll_gp_state)) {
pr_alert("%s: gp_poll without primitives.\n", __func__);
}
+ if (gp_poll_full1 && cur_ops->start_gp_poll_full && cur_ops->poll_gp_state_full) {
+ synctype[nsynctypes++] = RTWS_POLL_GET_FULL;
+ pr_info("%s: Testing polling full-state GPs.\n", __func__);
+ } else if (gp_poll_full && (!cur_ops->start_gp_poll_full || !cur_ops->poll_gp_state_full)) {
+ pr_alert("%s: gp_poll_full without primitives.\n", __func__);
+ }
if (gp_poll_exp1 && cur_ops->start_gp_poll_exp && cur_ops->poll_gp_state_exp) {
synctype[nsynctypes++] = RTWS_POLL_GET_EXP;
pr_info("%s: Testing polling expedited GPs.\n", __func__);
} else if (gp_poll_exp && (!cur_ops->start_gp_poll_exp || !cur_ops->poll_gp_state_exp)) {
pr_alert("%s: gp_poll_exp without primitives.\n", __func__);
}
+ if (gp_poll_exp_full1 && cur_ops->start_gp_poll_exp_full && cur_ops->poll_gp_state_full) {
+ synctype[nsynctypes++] = RTWS_POLL_GET_EXP_FULL;
+ pr_info("%s: Testing polling full-state expedited GPs.\n", __func__);
+ } else if (gp_poll_exp_full &&
+ (!cur_ops->start_gp_poll_exp_full || !cur_ops->poll_gp_state_full)) {
+ pr_alert("%s: gp_poll_exp_full without primitives.\n", __func__);
+ }
if (gp_sync1 && cur_ops->sync) {
synctype[nsynctypes++] = RTWS_SYNC;
pr_info("%s: Testing normal GPs.\n", __func__);
@@ -1202,6 +1292,40 @@ static void rcu_torture_write_types(void)
}
/*
+ * Do the specified rcu_torture_writer() synchronous grace period,
+ * while also testing out the polled APIs. Note well that the single-CPU
+ * grace-period optimizations must be accounted for.
+ */
+static void do_rtws_sync(struct torture_random_state *trsp, void (*sync)(void))
+{
+ unsigned long cookie;
+ struct rcu_gp_oldstate cookie_full;
+ bool dopoll;
+ bool dopoll_full;
+ unsigned long r = torture_random(trsp);
+
+ dopoll = cur_ops->get_gp_state && cur_ops->poll_gp_state && !(r & 0x300);
+ dopoll_full = cur_ops->get_gp_state_full && cur_ops->poll_gp_state_full && !(r & 0xc00);
+ if (dopoll || dopoll_full)
+ cpus_read_lock();
+ if (dopoll)
+ cookie = cur_ops->get_gp_state();
+ if (dopoll_full)
+ cur_ops->get_gp_state_full(&cookie_full);
+ if (cur_ops->poll_need_2gp && cur_ops->poll_need_2gp(dopoll, dopoll_full))
+ sync();
+ sync();
+ WARN_ONCE(dopoll && !cur_ops->poll_gp_state(cookie),
+ "%s: Cookie check 3 failed %pS() online %*pbl.",
+ __func__, sync, cpumask_pr_args(cpu_online_mask));
+ WARN_ONCE(dopoll_full && !cur_ops->poll_gp_state_full(&cookie_full),
+ "%s: Cookie check 4 failed %pS() online %*pbl",
+ __func__, sync, cpumask_pr_args(cpu_online_mask));
+ if (dopoll || dopoll_full)
+ cpus_read_unlock();
+}
+
+/*
* RCU torture writer kthread. Repeatedly substitutes a new structure
* for that pointed to by rcu_torture_current, freeing the old structure
* after a series of grace periods (the "pipeline").
@@ -1212,8 +1336,10 @@ rcu_torture_writer(void *arg)
bool boot_ended;
bool can_expedite = !rcu_gp_is_expedited() && !rcu_gp_is_normal();
unsigned long cookie;
+ struct rcu_gp_oldstate cookie_full;
int expediting = 0;
unsigned long gp_snap;
+ struct rcu_gp_oldstate gp_snap_full;
int i;
int idx;
int oldnice = task_nice(current);
@@ -1261,11 +1387,12 @@ rcu_torture_writer(void *arg)
atomic_inc(&rcu_torture_wcount[i]);
WRITE_ONCE(old_rp->rtort_pipe_count,
old_rp->rtort_pipe_count + 1);
+
+ // Make sure readers block polled grace periods.
if (cur_ops->get_gp_state && cur_ops->poll_gp_state) {
idx = cur_ops->readlock();
cookie = cur_ops->get_gp_state();
- WARN_ONCE(rcu_torture_writer_state != RTWS_DEF_FREE &&
- cur_ops->poll_gp_state(cookie),
+ WARN_ONCE(cur_ops->poll_gp_state(cookie),
"%s: Cookie check 1 failed %s(%d) %lu->%lu\n",
__func__,
rcu_torture_writer_state_getname(),
@@ -1277,6 +1404,21 @@ rcu_torture_writer(void *arg)
}
cur_ops->readunlock(idx);
}
+ if (cur_ops->get_gp_state_full && cur_ops->poll_gp_state_full) {
+ idx = cur_ops->readlock();
+ cur_ops->get_gp_state_full(&cookie_full);
+ WARN_ONCE(cur_ops->poll_gp_state_full(&cookie_full),
+ "%s: Cookie check 5 failed %s(%d) online %*pbl\n",
+ __func__,
+ rcu_torture_writer_state_getname(),
+ rcu_torture_writer_state,
+ cpumask_pr_args(cpu_online_mask));
+ if (cur_ops->get_gp_completed_full) {
+ cur_ops->get_gp_completed_full(&cookie_full);
+ WARN_ON_ONCE(!cur_ops->poll_gp_state_full(&cookie_full));
+ }
+ cur_ops->readunlock(idx);
+ }
switch (synctype[torture_random(&rand) % nsynctypes]) {
case RTWS_DEF_FREE:
rcu_torture_writer_state = RTWS_DEF_FREE;
@@ -1284,12 +1426,7 @@ rcu_torture_writer(void *arg)
break;
case RTWS_EXP_SYNC:
rcu_torture_writer_state = RTWS_EXP_SYNC;
- if (cur_ops->get_gp_state && cur_ops->poll_gp_state)
- cookie = cur_ops->get_gp_state();
- cur_ops->exp_sync();
- cur_ops->exp_sync();
- if (cur_ops->get_gp_state && cur_ops->poll_gp_state)
- WARN_ON_ONCE(!cur_ops->poll_gp_state(cookie));
+ do_rtws_sync(&rand, cur_ops->exp_sync);
rcu_torture_pipe_update(old_rp);
break;
case RTWS_COND_GET:
@@ -1308,6 +1445,22 @@ rcu_torture_writer(void *arg)
cur_ops->cond_sync_exp(gp_snap);
rcu_torture_pipe_update(old_rp);
break;
+ case RTWS_COND_GET_FULL:
+ rcu_torture_writer_state = RTWS_COND_GET_FULL;
+ cur_ops->get_gp_state_full(&gp_snap_full);
+ torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand);
+ rcu_torture_writer_state = RTWS_COND_SYNC_FULL;
+ cur_ops->cond_sync_full(&gp_snap_full);
+ rcu_torture_pipe_update(old_rp);
+ break;
+ case RTWS_COND_GET_EXP_FULL:
+ rcu_torture_writer_state = RTWS_COND_GET_EXP_FULL;
+ cur_ops->get_gp_state_full(&gp_snap_full);
+ torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand);
+ rcu_torture_writer_state = RTWS_COND_SYNC_EXP_FULL;
+ cur_ops->cond_sync_exp_full(&gp_snap_full);
+ rcu_torture_pipe_update(old_rp);
+ break;
case RTWS_POLL_GET:
rcu_torture_writer_state = RTWS_POLL_GET;
gp_snap = cur_ops->start_gp_poll();
@@ -1317,6 +1470,15 @@ rcu_torture_writer(void *arg)
&rand);
rcu_torture_pipe_update(old_rp);
break;
+ case RTWS_POLL_GET_FULL:
+ rcu_torture_writer_state = RTWS_POLL_GET_FULL;
+ cur_ops->start_gp_poll_full(&gp_snap_full);
+ rcu_torture_writer_state = RTWS_POLL_WAIT_FULL;
+ while (!cur_ops->poll_gp_state_full(&gp_snap_full))
+ torture_hrtimeout_jiffies(torture_random(&rand) % 16,
+ &rand);
+ rcu_torture_pipe_update(old_rp);
+ break;
case RTWS_POLL_GET_EXP:
rcu_torture_writer_state = RTWS_POLL_GET_EXP;
gp_snap = cur_ops->start_gp_poll_exp();
@@ -1326,14 +1488,18 @@ rcu_torture_writer(void *arg)
&rand);
rcu_torture_pipe_update(old_rp);
break;
+ case RTWS_POLL_GET_EXP_FULL:
+ rcu_torture_writer_state = RTWS_POLL_GET_EXP_FULL;
+ cur_ops->start_gp_poll_exp_full(&gp_snap_full);
+ rcu_torture_writer_state = RTWS_POLL_WAIT_EXP_FULL;
+ while (!cur_ops->poll_gp_state_full(&gp_snap_full))
+ torture_hrtimeout_jiffies(torture_random(&rand) % 16,
+ &rand);
+ rcu_torture_pipe_update(old_rp);
+ break;
case RTWS_SYNC:
rcu_torture_writer_state = RTWS_SYNC;
- if (cur_ops->get_gp_state && cur_ops->poll_gp_state)
- cookie = cur_ops->get_gp_state();
- cur_ops->sync();
- cur_ops->sync();
- if (cur_ops->get_gp_state && cur_ops->poll_gp_state)
- WARN_ON_ONCE(!cur_ops->poll_gp_state(cookie));
+ do_rtws_sync(&rand, cur_ops->sync);
rcu_torture_pipe_update(old_rp);
break;
default:
@@ -1400,6 +1566,7 @@ static int
rcu_torture_fakewriter(void *arg)
{
unsigned long gp_snap;
+ struct rcu_gp_oldstate gp_snap_full;
DEFINE_TORTURE_RANDOM(rand);
VERBOSE_TOROUT_STRING("rcu_torture_fakewriter task started");
@@ -1438,6 +1605,16 @@ rcu_torture_fakewriter(void *arg)
torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand);
cur_ops->cond_sync_exp(gp_snap);
break;
+ case RTWS_COND_GET_FULL:
+ cur_ops->get_gp_state_full(&gp_snap_full);
+ torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand);
+ cur_ops->cond_sync_full(&gp_snap_full);
+ break;
+ case RTWS_COND_GET_EXP_FULL:
+ cur_ops->get_gp_state_full(&gp_snap_full);
+ torture_hrtimeout_jiffies(torture_random(&rand) % 16, &rand);
+ cur_ops->cond_sync_exp_full(&gp_snap_full);
+ break;
case RTWS_POLL_GET:
gp_snap = cur_ops->start_gp_poll();
while (!cur_ops->poll_gp_state(gp_snap)) {
@@ -1445,6 +1622,13 @@ rcu_torture_fakewriter(void *arg)
&rand);
}
break;
+ case RTWS_POLL_GET_FULL:
+ cur_ops->start_gp_poll_full(&gp_snap_full);
+ while (!cur_ops->poll_gp_state_full(&gp_snap_full)) {
+ torture_hrtimeout_jiffies(torture_random(&rand) % 16,
+ &rand);
+ }
+ break;
case RTWS_POLL_GET_EXP:
gp_snap = cur_ops->start_gp_poll_exp();
while (!cur_ops->poll_gp_state_exp(gp_snap)) {
@@ -1452,6 +1636,13 @@ rcu_torture_fakewriter(void *arg)
&rand);
}
break;
+ case RTWS_POLL_GET_EXP_FULL:
+ cur_ops->start_gp_poll_exp_full(&gp_snap_full);
+ while (!cur_ops->poll_gp_state_full(&gp_snap_full)) {
+ torture_hrtimeout_jiffies(torture_random(&rand) % 16,
+ &rand);
+ }
+ break;
case RTWS_SYNC:
cur_ops->sync();
break;
@@ -1715,7 +1906,9 @@ rcutorture_loop_extend(int *readstate, struct torture_random_state *trsp,
*/
static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid)
{
+ bool checkpolling = !(torture_random(trsp) & 0xfff);
unsigned long cookie;
+ struct rcu_gp_oldstate cookie_full;
int i;
unsigned long started;
unsigned long completed;
@@ -1731,8 +1924,12 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid)
WARN_ON_ONCE(!rcu_is_watching());
newstate = rcutorture_extend_mask(readstate, trsp);
rcutorture_one_extend(&readstate, newstate, trsp, rtrsp++);
- if (cur_ops->get_gp_state && cur_ops->poll_gp_state)
- cookie = cur_ops->get_gp_state();
+ if (checkpolling) {
+ if (cur_ops->get_gp_state && cur_ops->poll_gp_state)
+ cookie = cur_ops->get_gp_state();
+ if (cur_ops->get_gp_state_full && cur_ops->poll_gp_state_full)
+ cur_ops->get_gp_state_full(&cookie_full);
+ }
started = cur_ops->get_gp_seq();
ts = rcu_trace_clock_local();
p = rcu_dereference_check(rcu_torture_current,
@@ -1766,13 +1963,22 @@ static bool rcu_torture_one_read(struct torture_random_state *trsp, long myid)
}
__this_cpu_inc(rcu_torture_batch[completed]);
preempt_enable();
- if (cur_ops->get_gp_state && cur_ops->poll_gp_state)
- WARN_ONCE(cur_ops->poll_gp_state(cookie),
- "%s: Cookie check 2 failed %s(%d) %lu->%lu\n",
- __func__,
- rcu_torture_writer_state_getname(),
- rcu_torture_writer_state,
- cookie, cur_ops->get_gp_state());
+ if (checkpolling) {
+ if (cur_ops->get_gp_state && cur_ops->poll_gp_state)
+ WARN_ONCE(cur_ops->poll_gp_state(cookie),
+ "%s: Cookie check 2 failed %s(%d) %lu->%lu\n",
+ __func__,
+ rcu_torture_writer_state_getname(),
+ rcu_torture_writer_state,
+ cookie, cur_ops->get_gp_state());
+ if (cur_ops->get_gp_state_full && cur_ops->poll_gp_state_full)
+ WARN_ONCE(cur_ops->poll_gp_state_full(&cookie_full),
+ "%s: Cookie check 6 failed %s(%d) online %*pbl\n",
+ __func__,
+ rcu_torture_writer_state_getname(),
+ rcu_torture_writer_state,
+ cpumask_pr_args(cpu_online_mask));
+ }
rcutorture_one_extend(&readstate, 0, trsp, rtrsp);
WARN_ON_ONCE(readstate);
// This next splat is expected behavior if leakpointer, especially
@@ -2600,12 +2806,12 @@ static int rcutorture_oom_notify(struct notifier_block *self,
for (i = 0; i < fwd_progress; i++)
ncbs += rcu_torture_fwd_prog_cbfree(&rfp[i]);
pr_info("%s: Freed %lu RCU callbacks.\n", __func__, ncbs);
- rcu_barrier();
+ cur_ops->cb_barrier();
ncbs = 0;
for (i = 0; i < fwd_progress; i++)
ncbs += rcu_torture_fwd_prog_cbfree(&rfp[i]);
pr_info("%s: Freed %lu RCU callbacks.\n", __func__, ncbs);
- rcu_barrier();
+ cur_ops->cb_barrier();
ncbs = 0;
for (i = 0; i < fwd_progress; i++)
ncbs += rcu_torture_fwd_prog_cbfree(&rfp[i]);
diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c
index 92c002d65482..33adafdad261 100644
--- a/kernel/rcu/srcutiny.c
+++ b/kernel/rcu/srcutiny.c
@@ -117,7 +117,7 @@ void srcu_drive_gp(struct work_struct *wp)
struct srcu_struct *ssp;
ssp = container_of(wp, struct srcu_struct, srcu_work);
- if (ssp->srcu_gp_running || USHORT_CMP_GE(ssp->srcu_idx, READ_ONCE(ssp->srcu_idx_max)))
+ if (ssp->srcu_gp_running || ULONG_CMP_GE(ssp->srcu_idx, READ_ONCE(ssp->srcu_idx_max)))
return; /* Already running or nothing to do. */
/* Remove recently arrived callbacks and wait for readers. */
@@ -150,17 +150,17 @@ void srcu_drive_gp(struct work_struct *wp)
* straighten that out.
*/
WRITE_ONCE(ssp->srcu_gp_running, false);
- if (USHORT_CMP_LT(ssp->srcu_idx, READ_ONCE(ssp->srcu_idx_max)))
+ if (ULONG_CMP_LT(ssp->srcu_idx, READ_ONCE(ssp->srcu_idx_max)))
schedule_work(&ssp->srcu_work);
}
EXPORT_SYMBOL_GPL(srcu_drive_gp);
static void srcu_gp_start_if_needed(struct srcu_struct *ssp)
{
- unsigned short cookie;
+ unsigned long cookie;
cookie = get_state_synchronize_srcu(ssp);
- if (USHORT_CMP_GE(READ_ONCE(ssp->srcu_idx_max), cookie))
+ if (ULONG_CMP_GE(READ_ONCE(ssp->srcu_idx_max), cookie))
return;
WRITE_ONCE(ssp->srcu_idx_max, cookie);
if (!READ_ONCE(ssp->srcu_gp_running)) {
@@ -215,7 +215,7 @@ unsigned long get_state_synchronize_srcu(struct srcu_struct *ssp)
barrier();
ret = (READ_ONCE(ssp->srcu_idx) + 3) & ~0x1;
barrier();
- return ret & USHRT_MAX;
+ return ret;
}
EXPORT_SYMBOL_GPL(get_state_synchronize_srcu);
@@ -240,10 +240,10 @@ EXPORT_SYMBOL_GPL(start_poll_synchronize_srcu);
*/
bool poll_state_synchronize_srcu(struct srcu_struct *ssp, unsigned long cookie)
{
- bool ret = USHORT_CMP_GE(READ_ONCE(ssp->srcu_idx), cookie);
+ unsigned long cur_s = READ_ONCE(ssp->srcu_idx);
barrier();
- return ret;
+ return ULONG_CMP_GE(cur_s, cookie) || ULONG_CMP_LT(cur_s, cookie - 3);
}
EXPORT_SYMBOL_GPL(poll_state_synchronize_srcu);
diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
index 83c7e6620d40..f5bf6fb430da 100644
--- a/kernel/rcu/tasks.h
+++ b/kernel/rcu/tasks.h
@@ -560,7 +560,7 @@ static int __noreturn rcu_tasks_kthread(void *arg)
static void synchronize_rcu_tasks_generic(struct rcu_tasks *rtp)
{
/* Complain if the scheduler has not started. */
- RCU_LOCKDEP_WARN(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE,
+ WARN_ONCE(rcu_scheduler_active == RCU_SCHEDULER_INACTIVE,
"synchronize_rcu_tasks called too soon");
// If the grace-period kthread is running, use it.
@@ -1500,6 +1500,7 @@ static void rcu_tasks_trace_pregp_step(struct list_head *hop)
if (rcu_tasks_trace_pertask_prep(t, true))
trc_add_holdout(t, hop);
rcu_read_unlock();
+ cond_resched_tasks_rcu_qs();
}
// Only after all running tasks have been accounted for is it
@@ -1520,6 +1521,7 @@ static void rcu_tasks_trace_pregp_step(struct list_head *hop)
raw_spin_lock_irqsave_rcu_node(rtpcp, flags);
}
raw_spin_unlock_irqrestore_rcu_node(rtpcp, flags);
+ cond_resched_tasks_rcu_qs();
}
// Re-enable CPU hotplug now that the holdout list is populated.
@@ -1619,6 +1621,7 @@ static void check_all_holdout_tasks_trace(struct list_head *hop,
trc_del_holdout(t);
else if (needreport)
show_stalled_task_trace(t, firstreport);
+ cond_resched_tasks_rcu_qs();
}
// Re-enable CPU hotplug now that the holdout list scan has completed.
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index f0561ee16b9c..a33a8d4942c3 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -158,6 +158,10 @@ void synchronize_rcu(void)
}
EXPORT_SYMBOL_GPL(synchronize_rcu);
+static void tiny_rcu_leak_callback(struct rcu_head *rhp)
+{
+}
+
/*
* Post an RCU callback to be invoked after the end of an RCU grace
* period. But since we have but one CPU, that would be after any
@@ -165,9 +169,20 @@ EXPORT_SYMBOL_GPL(synchronize_rcu);
*/
void call_rcu(struct rcu_head *head, rcu_callback_t func)
{
+ static atomic_t doublefrees;
unsigned long flags;
- debug_rcu_head_queue(head);
+ if (debug_rcu_head_queue(head)) {
+ if (atomic_inc_return(&doublefrees) < 4) {
+ pr_err("%s(): Double-freed CB %p->%pS()!!! ", __func__, head, head->func);
+ mem_dump_obj(head);
+ }
+
+ if (!__is_kvfree_rcu_offset((unsigned long)head->func))
+ WRITE_ONCE(head->func, tiny_rcu_leak_callback);
+ return;
+ }
+
head->func = func;
head->next = NULL;
@@ -184,6 +199,16 @@ void call_rcu(struct rcu_head *head, rcu_callback_t func)
EXPORT_SYMBOL_GPL(call_rcu);
/*
+ * Store a grace-period-counter "cookie". For more information,
+ * see the Tree RCU header comment.
+ */
+void get_completed_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp)
+{
+ rgosp->rgos_norm = RCU_GET_STATE_COMPLETED;
+}
+EXPORT_SYMBOL_GPL(get_completed_synchronize_rcu_full);
+
+/*
* Return a grace-period-counter "cookie". For more information,
* see the Tree RCU header comment.
*/
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 79aea7df4345..6bb8e72bc815 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -76,6 +76,7 @@
/* Data structures. */
static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = {
+ .gpwrap = true,
#ifdef CONFIG_RCU_NOCB_CPU
.cblist.flags = SEGCBLIST_RCU_CORE,
#endif
@@ -1755,6 +1756,8 @@ static noinline void rcu_gp_cleanup(void)
dump_blkd_tasks(rnp, 10);
WARN_ON_ONCE(rnp->qsmask);
WRITE_ONCE(rnp->gp_seq, new_gp_seq);
+ if (!rnp->parent)
+ smp_mb(); // Order against failing poll_state_synchronize_rcu_full().
rdp = this_cpu_ptr(&rcu_data);
if (rnp == rdp->mynode)
needgp = __note_gp_changes(rnp, rdp) || needgp;
@@ -2341,8 +2344,8 @@ void rcu_sched_clock_irq(int user)
rcu_flavor_sched_clock_irq(user);
if (rcu_pending(user))
invoke_rcu_core();
- if (user)
- rcu_tasks_classic_qs(current, false);
+ if (user || rcu_is_cpu_rrupt_from_idle())
+ rcu_note_voluntary_context_switch(current);
lockdep_assert_irqs_disabled();
trace_rcu_utilization(TPS("End scheduler-tick"));
@@ -2832,7 +2835,7 @@ EXPORT_SYMBOL_GPL(call_rcu);
/* Maximum number of jiffies to wait before draining a batch. */
-#define KFREE_DRAIN_JIFFIES (HZ / 50)
+#define KFREE_DRAIN_JIFFIES (5 * HZ)
#define KFREE_N_BATCHES 2
#define FREE_N_CHANNELS 2
@@ -3093,6 +3096,21 @@ need_offload_krc(struct kfree_rcu_cpu *krcp)
return !!krcp->head;
}
+static void
+schedule_delayed_monitor_work(struct kfree_rcu_cpu *krcp)
+{
+ long delay, delay_left;
+
+ delay = READ_ONCE(krcp->count) >= KVFREE_BULK_MAX_ENTR ? 1:KFREE_DRAIN_JIFFIES;
+ if (delayed_work_pending(&krcp->monitor_work)) {
+ delay_left = krcp->monitor_work.timer.expires - jiffies;
+ if (delay < delay_left)
+ mod_delayed_work(system_wq, &krcp->monitor_work, delay);
+ return;
+ }
+ queue_delayed_work(system_wq, &krcp->monitor_work, delay);
+}
+
/*
* This function is invoked after the KFREE_DRAIN_JIFFIES timeout.
*/
@@ -3150,7 +3168,7 @@ static void kfree_rcu_monitor(struct work_struct *work)
// work to repeat an attempt. Because previous batches are
// still in progress.
if (need_offload_krc(krcp))
- schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
+ schedule_delayed_monitor_work(krcp);
raw_spin_unlock_irqrestore(&krcp->lock, flags);
}
@@ -3183,15 +3201,16 @@ static void fill_page_cache_func(struct work_struct *work)
bnode = (struct kvfree_rcu_bulk_data *)
__get_free_page(GFP_KERNEL | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN);
- if (bnode) {
- raw_spin_lock_irqsave(&krcp->lock, flags);
- pushed = put_cached_bnode(krcp, bnode);
- raw_spin_unlock_irqrestore(&krcp->lock, flags);
+ if (!bnode)
+ break;
- if (!pushed) {
- free_page((unsigned long) bnode);
- break;
- }
+ raw_spin_lock_irqsave(&krcp->lock, flags);
+ pushed = put_cached_bnode(krcp, bnode);
+ raw_spin_unlock_irqrestore(&krcp->lock, flags);
+
+ if (!pushed) {
+ free_page((unsigned long) bnode);
+ break;
}
}
@@ -3338,7 +3357,7 @@ void kvfree_call_rcu(struct rcu_head *head, rcu_callback_t func)
// Set timer to drain after KFREE_DRAIN_JIFFIES.
if (rcu_scheduler_active == RCU_SCHEDULER_RUNNING)
- schedule_delayed_work(&krcp->monitor_work, KFREE_DRAIN_JIFFIES);
+ schedule_delayed_monitor_work(krcp);
unlock_return:
krc_this_cpu_unlock(krcp, flags);
@@ -3371,7 +3390,7 @@ kfree_rcu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
atomic_set(&krcp->backoff_page_cache_fill, 1);
}
- return count;
+ return count == 0 ? SHRINK_EMPTY : count;
}
static unsigned long
@@ -3414,49 +3433,27 @@ void __init kfree_rcu_scheduler_running(void)
raw_spin_lock_irqsave(&krcp->lock, flags);
if (need_offload_krc(krcp))
- schedule_delayed_work_on(cpu, &krcp->monitor_work, KFREE_DRAIN_JIFFIES);
+ schedule_delayed_monitor_work(krcp);
raw_spin_unlock_irqrestore(&krcp->lock, flags);
}
}
/*
* During early boot, any blocking grace-period wait automatically
- * implies a grace period. Later on, this is never the case for PREEMPTION.
+ * implies a grace period.
*
- * However, because a context switch is a grace period for !PREEMPTION, any
- * blocking grace-period wait automatically implies a grace period if
- * there is only one CPU online at any point time during execution of
- * either synchronize_rcu() or synchronize_rcu_expedited(). It is OK to
- * occasionally incorrectly indicate that there are multiple CPUs online
- * when there was in fact only one the whole time, as this just adds some
- * overhead: RCU still operates correctly.
+ * Later on, this could in theory be the case for kernels built with
+ * CONFIG_SMP=y && CONFIG_PREEMPTION=y running on a single CPU, but this
+ * is not a common case. Furthermore, this optimization would cause
+ * the rcu_gp_oldstate structure to expand by 50%, so this potential
+ * grace-period optimization is ignored once the scheduler is running.
*/
static int rcu_blocking_is_gp(void)
{
- int ret;
-
- // Invoking preempt_model_*() too early gets a splat.
- if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE ||
- preempt_model_full() || preempt_model_rt())
- return rcu_scheduler_active == RCU_SCHEDULER_INACTIVE;
+ if (rcu_scheduler_active != RCU_SCHEDULER_INACTIVE)
+ return false;
might_sleep(); /* Check for RCU read-side critical section. */
- preempt_disable();
- /*
- * If the rcu_state.n_online_cpus counter is equal to one,
- * there is only one CPU, and that CPU sees all prior accesses
- * made by any CPU that was online at the time of its access.
- * Furthermore, if this counter is equal to one, its value cannot
- * change until after the preempt_enable() below.
- *
- * Furthermore, if rcu_state.n_online_cpus is equal to one here,
- * all later CPUs (both this one and any that come online later
- * on) are guaranteed to see all accesses prior to this point
- * in the code, without the need for additional memory barriers.
- * Those memory barriers are provided by CPU-hotplug code.
- */
- ret = READ_ONCE(rcu_state.n_online_cpus) <= 1;
- preempt_enable();
- return ret;
+ return true;
}
/**
@@ -3499,30 +3496,59 @@ static int rcu_blocking_is_gp(void)
*/
void synchronize_rcu(void)
{
+ unsigned long flags;
+ struct rcu_node *rnp;
+
RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) ||
lock_is_held(&rcu_lock_map) ||
lock_is_held(&rcu_sched_lock_map),
"Illegal synchronize_rcu() in RCU read-side critical section");
- if (rcu_blocking_is_gp()) {
- // Note well that this code runs with !PREEMPT && !SMP.
- // In addition, all code that advances grace periods runs at
- // process level. Therefore, this normal GP overlaps with
- // other normal GPs only by being fully nested within them,
- // which allows reuse of ->gp_seq_polled_snap.
- rcu_poll_gp_seq_start_unlocked(&rcu_state.gp_seq_polled_snap);
- rcu_poll_gp_seq_end_unlocked(&rcu_state.gp_seq_polled_snap);
- if (rcu_init_invoked())
- cond_resched_tasks_rcu_qs();
- return; // Context allows vacuous grace periods.
+ if (!rcu_blocking_is_gp()) {
+ if (rcu_gp_is_expedited())
+ synchronize_rcu_expedited();
+ else
+ wait_rcu_gp(call_rcu);
+ return;
}
- if (rcu_gp_is_expedited())
- synchronize_rcu_expedited();
- else
- wait_rcu_gp(call_rcu);
+
+ // Context allows vacuous grace periods.
+ // Note well that this code runs with !PREEMPT && !SMP.
+ // In addition, all code that advances grace periods runs at
+ // process level. Therefore, this normal GP overlaps with other
+ // normal GPs only by being fully nested within them, which allows
+ // reuse of ->gp_seq_polled_snap.
+ rcu_poll_gp_seq_start_unlocked(&rcu_state.gp_seq_polled_snap);
+ rcu_poll_gp_seq_end_unlocked(&rcu_state.gp_seq_polled_snap);
+
+ // Update the normal grace-period counters to record
+ // this grace period, but only those used by the boot CPU.
+ // The rcu_scheduler_starting() will take care of the rest of
+ // these counters.
+ local_irq_save(flags);
+ WARN_ON_ONCE(num_online_cpus() > 1);
+ rcu_state.gp_seq += (1 << RCU_SEQ_CTR_SHIFT);
+ for (rnp = this_cpu_ptr(&rcu_data)->mynode; rnp; rnp = rnp->parent)
+ rnp->gp_seq_needed = rnp->gp_seq = rcu_state.gp_seq;
+ local_irq_restore(flags);
}
EXPORT_SYMBOL_GPL(synchronize_rcu);
/**
+ * get_completed_synchronize_rcu_full - Return a full pre-completed polled state cookie
+ * @rgosp: Place to put state cookie
+ *
+ * Stores into @rgosp a value that will always be treated by functions
+ * like poll_state_synchronize_rcu_full() as a cookie whose grace period
+ * has already completed.
+ */
+void get_completed_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp)
+{
+ rgosp->rgos_norm = RCU_GET_STATE_COMPLETED;
+ rgosp->rgos_exp = RCU_GET_STATE_COMPLETED;
+}
+EXPORT_SYMBOL_GPL(get_completed_synchronize_rcu_full);
+
+/**
* get_state_synchronize_rcu - Snapshot current RCU state
*
* Returns a cookie that is used by a later call to cond_synchronize_rcu()
@@ -3541,21 +3567,42 @@ unsigned long get_state_synchronize_rcu(void)
EXPORT_SYMBOL_GPL(get_state_synchronize_rcu);
/**
- * start_poll_synchronize_rcu - Snapshot and start RCU grace period
+ * get_state_synchronize_rcu_full - Snapshot RCU state, both normal and expedited
+ * @rgosp: location to place combined normal/expedited grace-period state
*
- * Returns a cookie that is used by a later call to cond_synchronize_rcu()
- * or poll_state_synchronize_rcu() to determine whether or not a full
- * grace period has elapsed in the meantime. If the needed grace period
- * is not already slated to start, notifies RCU core of the need for that
- * grace period.
+ * Places the normal and expedited grace-period states in @rgosp. This
+ * state value can be passed to a later call to cond_synchronize_rcu_full()
+ * or poll_state_synchronize_rcu_full() to determine whether or not a
+ * grace period (whether normal or expedited) has elapsed in the meantime.
+ * The rcu_gp_oldstate structure takes up twice the memory of an unsigned
+ * long, but is guaranteed to see all grace periods. In contrast, the
+ * combined state occupies less memory, but can sometimes fail to take
+ * grace periods into account.
*
- * Interrupts must be enabled for the case where it is necessary to awaken
- * the grace-period kthread.
+ * This does not guarantee that the needed grace period will actually
+ * start.
*/
-unsigned long start_poll_synchronize_rcu(void)
+void get_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp)
+{
+ struct rcu_node *rnp = rcu_get_root();
+
+ /*
+ * Any prior manipulation of RCU-protected data must happen
+ * before the loads from ->gp_seq and ->expedited_sequence.
+ */
+ smp_mb(); /* ^^^ */
+ rgosp->rgos_norm = rcu_seq_snap(&rnp->gp_seq);
+ rgosp->rgos_exp = rcu_seq_snap(&rcu_state.expedited_sequence);
+}
+EXPORT_SYMBOL_GPL(get_state_synchronize_rcu_full);
+
+/*
+ * Helper function for start_poll_synchronize_rcu() and
+ * start_poll_synchronize_rcu_full().
+ */
+static void start_poll_synchronize_rcu_common(void)
{
unsigned long flags;
- unsigned long gp_seq = get_state_synchronize_rcu();
bool needwake;
struct rcu_data *rdp;
struct rcu_node *rnp;
@@ -3575,17 +3622,57 @@ unsigned long start_poll_synchronize_rcu(void)
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
if (needwake)
rcu_gp_kthread_wake();
+}
+
+/**
+ * start_poll_synchronize_rcu - Snapshot and start RCU grace period
+ *
+ * Returns a cookie that is used by a later call to cond_synchronize_rcu()
+ * or poll_state_synchronize_rcu() to determine whether or not a full
+ * grace period has elapsed in the meantime. If the needed grace period
+ * is not already slated to start, notifies RCU core of the need for that
+ * grace period.
+ *
+ * Interrupts must be enabled for the case where it is necessary to awaken
+ * the grace-period kthread.
+ */
+unsigned long start_poll_synchronize_rcu(void)
+{
+ unsigned long gp_seq = get_state_synchronize_rcu();
+
+ start_poll_synchronize_rcu_common();
return gp_seq;
}
EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu);
/**
- * poll_state_synchronize_rcu - Conditionally wait for an RCU grace period
+ * start_poll_synchronize_rcu_full - Take a full snapshot and start RCU grace period
+ * @rgosp: value from get_state_synchronize_rcu_full() or start_poll_synchronize_rcu_full()
*
+ * Places the normal and expedited grace-period states in *@rgos. This
+ * state value can be passed to a later call to cond_synchronize_rcu_full()
+ * or poll_state_synchronize_rcu_full() to determine whether or not a
+ * grace period (whether normal or expedited) has elapsed in the meantime.
+ * If the needed grace period is not already slated to start, notifies
+ * RCU core of the need for that grace period.
+ *
+ * Interrupts must be enabled for the case where it is necessary to awaken
+ * the grace-period kthread.
+ */
+void start_poll_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp)
+{
+ get_state_synchronize_rcu_full(rgosp);
+
+ start_poll_synchronize_rcu_common();
+}
+EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu_full);
+
+/**
+ * poll_state_synchronize_rcu - Has the specified RCU grace period completed?
* @oldstate: value from get_state_synchronize_rcu() or start_poll_synchronize_rcu()
*
* If a full RCU grace period has elapsed since the earlier call from
- * which oldstate was obtained, return @true, otherwise return @false.
+ * which @oldstate was obtained, return @true, otherwise return @false.
* If @false is returned, it is the caller's responsibility to invoke this
* function later on until it does return @true. Alternatively, the caller
* can explicitly wait for a grace period, for example, by passing @oldstate
@@ -3594,10 +3681,11 @@ EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu);
* Yes, this function does not take counter wrap into account.
* But counter wrap is harmless. If the counter wraps, we have waited for
* more than a billion grace periods (and way more on a 64-bit system!).
- * Those needing to keep oldstate values for very long time periods
- * (many hours even on 32-bit systems) should check them occasionally
- * and either refresh them or set a flag indicating that the grace period
- * has completed.
+ * Those needing to keep old state values for very long time periods
+ * (many hours even on 32-bit systems) should check them occasionally and
+ * either refresh them or set a flag indicating that the grace period has
+ * completed. Alternatively, they can use get_completed_synchronize_rcu()
+ * to get a guaranteed-completed grace-period state.
*
* This function provides the same memory-ordering guarantees that
* would be provided by a synchronize_rcu() that was invoked at the call
@@ -3616,8 +3704,56 @@ bool poll_state_synchronize_rcu(unsigned long oldstate)
EXPORT_SYMBOL_GPL(poll_state_synchronize_rcu);
/**
- * cond_synchronize_rcu - Conditionally wait for an RCU grace period
+ * poll_state_synchronize_rcu_full - Has the specified RCU grace period completed?
+ * @rgosp: value from get_state_synchronize_rcu_full() or start_poll_synchronize_rcu_full()
*
+ * If a full RCU grace period has elapsed since the earlier call from
+ * which *rgosp was obtained, return @true, otherwise return @false.
+ * If @false is returned, it is the caller's responsibility to invoke this
+ * function later on until it does return @true. Alternatively, the caller
+ * can explicitly wait for a grace period, for example, by passing @rgosp
+ * to cond_synchronize_rcu() or by directly invoking synchronize_rcu().
+ *
+ * Yes, this function does not take counter wrap into account.
+ * But counter wrap is harmless. If the counter wraps, we have waited
+ * for more than a billion grace periods (and way more on a 64-bit
+ * system!). Those needing to keep rcu_gp_oldstate values for very
+ * long time periods (many hours even on 32-bit systems) should check
+ * them occasionally and either refresh them or set a flag indicating
+ * that the grace period has completed. Alternatively, they can use
+ * get_completed_synchronize_rcu_full() to get a guaranteed-completed
+ * grace-period state.
+ *
+ * This function provides the same memory-ordering guarantees that would
+ * be provided by a synchronize_rcu() that was invoked at the call to
+ * the function that provided @rgosp, and that returned at the end of this
+ * function. And this guarantee requires that the root rcu_node structure's
+ * ->gp_seq field be checked instead of that of the rcu_state structure.
+ * The problem is that the just-ending grace-period's callbacks can be
+ * invoked between the time that the root rcu_node structure's ->gp_seq
+ * field is updated and the time that the rcu_state structure's ->gp_seq
+ * field is updated. Therefore, if a single synchronize_rcu() is to
+ * cause a subsequent poll_state_synchronize_rcu_full() to return @true,
+ * then the root rcu_node structure is the one that needs to be polled.
+ */
+bool poll_state_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp)
+{
+ struct rcu_node *rnp = rcu_get_root();
+
+ smp_mb(); // Order against root rcu_node structure grace-period cleanup.
+ if (rgosp->rgos_norm == RCU_GET_STATE_COMPLETED ||
+ rcu_seq_done_exact(&rnp->gp_seq, rgosp->rgos_norm) ||
+ rgosp->rgos_exp == RCU_GET_STATE_COMPLETED ||
+ rcu_seq_done_exact(&rcu_state.expedited_sequence, rgosp->rgos_exp)) {
+ smp_mb(); /* Ensure GP ends before subsequent accesses. */
+ return true;
+ }
+ return false;
+}
+EXPORT_SYMBOL_GPL(poll_state_synchronize_rcu_full);
+
+/**
+ * cond_synchronize_rcu - Conditionally wait for an RCU grace period
* @oldstate: value from get_state_synchronize_rcu(), start_poll_synchronize_rcu(), or start_poll_synchronize_rcu_expedited()
*
* If a full RCU grace period has elapsed since the earlier call to
@@ -3641,6 +3777,33 @@ void cond_synchronize_rcu(unsigned long oldstate)
}
EXPORT_SYMBOL_GPL(cond_synchronize_rcu);
+/**
+ * cond_synchronize_rcu_full - Conditionally wait for an RCU grace period
+ * @rgosp: value from get_state_synchronize_rcu_full(), start_poll_synchronize_rcu_full(), or start_poll_synchronize_rcu_expedited_full()
+ *
+ * If a full RCU grace period has elapsed since the call to
+ * get_state_synchronize_rcu_full(), start_poll_synchronize_rcu_full(),
+ * or start_poll_synchronize_rcu_expedited_full() from which @rgosp was
+ * obtained, just return. Otherwise, invoke synchronize_rcu() to wait
+ * for a full grace period.
+ *
+ * Yes, this function does not take counter wrap into account.
+ * But counter wrap is harmless. If the counter wraps, we have waited for
+ * more than 2 billion grace periods (and way more on a 64-bit system!),
+ * so waiting for a couple of additional grace periods should be just fine.
+ *
+ * This function provides the same memory-ordering guarantees that
+ * would be provided by a synchronize_rcu() that was invoked at the call
+ * to the function that provided @rgosp and that returned at the end of
+ * this function.
+ */
+void cond_synchronize_rcu_full(struct rcu_gp_oldstate *rgosp)
+{
+ if (!poll_state_synchronize_rcu_full(rgosp))
+ synchronize_rcu();
+}
+EXPORT_SYMBOL_GPL(cond_synchronize_rcu_full);
+
/*
* Check to see if there is any immediate RCU-related work to be done by
* the current CPU, returning 1 if so and zero otherwise. The checks are
@@ -4312,9 +4475,20 @@ early_initcall(rcu_spawn_gp_kthread);
*/
void rcu_scheduler_starting(void)
{
+ unsigned long flags;
+ struct rcu_node *rnp;
+
WARN_ON(num_online_cpus() != 1);
WARN_ON(nr_context_switches() > 0);
rcu_test_sync_prims();
+
+ // Fix up the ->gp_seq counters.
+ local_irq_save(flags);
+ rcu_for_each_node_breadth_first(rnp)
+ rnp->gp_seq_needed = rnp->gp_seq = rcu_state.gp_seq;
+ local_irq_restore(flags);
+
+ // Switch out of early boot mode.
rcu_scheduler_active = RCU_SCHEDULER_INIT;
rcu_test_sync_prims();
}
diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
index be667583a554..18e9b4cd78ef 100644
--- a/kernel/rcu/tree_exp.h
+++ b/kernel/rcu/tree_exp.h
@@ -828,11 +828,13 @@ static void rcu_exp_handler(void *unused)
{
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
struct rcu_node *rnp = rdp->mynode;
+ bool preempt_bh_enabled = !(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK));
if (!(READ_ONCE(rnp->expmask) & rdp->grpmask) ||
__this_cpu_read(rcu_data.cpu_no_qs.b.exp))
return;
- if (rcu_is_cpu_rrupt_from_idle()) {
+ if (rcu_is_cpu_rrupt_from_idle() ||
+ (IS_ENABLED(CONFIG_PREEMPT_COUNT) && preempt_bh_enabled)) {
rcu_report_exp_rdp(this_cpu_ptr(&rcu_data));
return;
}
@@ -906,6 +908,7 @@ static int rcu_print_task_exp_stall(struct rcu_node *rnp)
void synchronize_rcu_expedited(void)
{
bool boottime = (rcu_scheduler_active == RCU_SCHEDULER_INIT);
+ unsigned long flags;
struct rcu_exp_work rew;
struct rcu_node *rnp;
unsigned long s;
@@ -924,8 +927,11 @@ void synchronize_rcu_expedited(void)
// them, which allows reuse of ->gp_seq_polled_exp_snap.
rcu_poll_gp_seq_start_unlocked(&rcu_state.gp_seq_polled_exp_snap);
rcu_poll_gp_seq_end_unlocked(&rcu_state.gp_seq_polled_exp_snap);
- if (rcu_init_invoked())
- cond_resched();
+
+ local_irq_save(flags);
+ WARN_ON_ONCE(num_online_cpus() > 1);
+ rcu_state.expedited_sequence += (1 << RCU_SEQ_CTR_SHIFT);
+ local_irq_restore(flags);
return; // Context allows vacuous grace periods.
}
@@ -1028,6 +1034,24 @@ unsigned long start_poll_synchronize_rcu_expedited(void)
EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu_expedited);
/**
+ * start_poll_synchronize_rcu_expedited_full - Take a full snapshot and start expedited grace period
+ * @rgosp: Place to put snapshot of grace-period state
+ *
+ * Places the normal and expedited grace-period states in rgosp. This
+ * state value can be passed to a later call to cond_synchronize_rcu_full()
+ * or poll_state_synchronize_rcu_full() to determine whether or not a
+ * grace period (whether normal or expedited) has elapsed in the meantime.
+ * If the needed expedited grace period is not already slated to start,
+ * initiates that grace period.
+ */
+void start_poll_synchronize_rcu_expedited_full(struct rcu_gp_oldstate *rgosp)
+{
+ get_state_synchronize_rcu_full(rgosp);
+ (void)start_poll_synchronize_rcu_expedited();
+}
+EXPORT_SYMBOL_GPL(start_poll_synchronize_rcu_expedited_full);
+
+/**
* cond_synchronize_rcu_expedited - Conditionally wait for an expedited RCU grace period
*
* @oldstate: value from get_state_synchronize_rcu(), start_poll_synchronize_rcu(), or start_poll_synchronize_rcu_expedited()
@@ -1053,3 +1077,30 @@ void cond_synchronize_rcu_expedited(unsigned long oldstate)
synchronize_rcu_expedited();
}
EXPORT_SYMBOL_GPL(cond_synchronize_rcu_expedited);
+
+/**
+ * cond_synchronize_rcu_expedited_full - Conditionally wait for an expedited RCU grace period
+ * @rgosp: value from get_state_synchronize_rcu_full(), start_poll_synchronize_rcu_full(), or start_poll_synchronize_rcu_expedited_full()
+ *
+ * If a full RCU grace period has elapsed since the call to
+ * get_state_synchronize_rcu_full(), start_poll_synchronize_rcu_full(),
+ * or start_poll_synchronize_rcu_expedited_full() from which @rgosp was
+ * obtained, just return. Otherwise, invoke synchronize_rcu_expedited()
+ * to wait for a full grace period.
+ *
+ * Yes, this function does not take counter wrap into account.
+ * But counter wrap is harmless. If the counter wraps, we have waited for
+ * more than 2 billion grace periods (and way more on a 64-bit system!),
+ * so waiting for a couple of additional grace periods should be just fine.
+ *
+ * This function provides the same memory-ordering guarantees that
+ * would be provided by a synchronize_rcu() that was invoked at the call
+ * to the function that provided @rgosp and that returned at the end of
+ * this function.
+ */
+void cond_synchronize_rcu_expedited_full(struct rcu_gp_oldstate *rgosp)
+{
+ if (!poll_state_synchronize_rcu_full(rgosp))
+ synchronize_rcu_expedited();
+}
+EXPORT_SYMBOL_GPL(cond_synchronize_rcu_expedited_full);
diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h
index a8f574d8850d..0a5f0ef41484 100644
--- a/kernel/rcu/tree_nocb.h
+++ b/kernel/rcu/tree_nocb.h
@@ -1111,7 +1111,7 @@ int rcu_nocb_cpu_deoffload(int cpu)
if (!ret)
cpumask_clear_cpu(cpu, rcu_nocb_mask);
} else {
- pr_info("NOCB: Can't CB-deoffload an offline CPU\n");
+ pr_info("NOCB: Cannot CB-deoffload offline CPU %d\n", rdp->cpu);
ret = -EINVAL;
}
}
@@ -1196,7 +1196,7 @@ int rcu_nocb_cpu_offload(int cpu)
if (!ret)
cpumask_set_cpu(cpu, rcu_nocb_mask);
} else {
- pr_info("NOCB: Can't CB-offload an offline CPU\n");
+ pr_info("NOCB: Cannot CB-offload offline CPU %d\n", rdp->cpu);
ret = -EINVAL;
}
}
@@ -1452,8 +1452,8 @@ static void show_rcu_nocb_gp_state(struct rcu_data *rdp)
(long)rdp->nocb_gp_seq,
rnp->grplo, rnp->grphi, READ_ONCE(rdp->nocb_gp_loops),
rdp->nocb_gp_kthread ? task_state_to_char(rdp->nocb_gp_kthread) : '.',
- rdp->nocb_cb_kthread ? (int)task_cpu(rdp->nocb_gp_kthread) : -1,
- show_rcu_should_be_on_cpu(rdp->nocb_cb_kthread));
+ rdp->nocb_gp_kthread ? (int)task_cpu(rdp->nocb_gp_kthread) : -1,
+ show_rcu_should_be_on_cpu(rdp->nocb_gp_kthread));
}
/* Dump out nocb kthread state for the specified rcu_data structure. */
@@ -1497,7 +1497,7 @@ static void show_rcu_nocb_state(struct rcu_data *rdp)
".B"[!!rcu_cblist_n_cbs(&rdp->nocb_bypass)],
rcu_segcblist_n_cbs(&rdp->cblist),
rdp->nocb_cb_kthread ? task_state_to_char(rdp->nocb_cb_kthread) : '.',
- rdp->nocb_cb_kthread ? (int)task_cpu(rdp->nocb_gp_kthread) : -1,
+ rdp->nocb_cb_kthread ? (int)task_cpu(rdp->nocb_cb_kthread) : -1,
show_rcu_should_be_on_cpu(rdp->nocb_cb_kthread));
/* It is OK for GP kthreads to have GP state. */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 438ecae6bd7e..e3142ee35fc6 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -641,7 +641,8 @@ static void rcu_read_unlock_special(struct task_struct *t)
expboost = (t->rcu_blocked_node && READ_ONCE(t->rcu_blocked_node->exp_tasks)) ||
(rdp->grpmask & READ_ONCE(rnp->expmask)) ||
- IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) ||
+ (IS_ENABLED(CONFIG_RCU_STRICT_GRACE_PERIOD) &&
+ ((rdp->grpmask & READ_ONCE(rnp->qsmask)) || t->rcu_blocked_node)) ||
(IS_ENABLED(CONFIG_RCU_BOOST) && irqs_were_disabled &&
t->rcu_blocked_node);
// Need to defer quiescent state until everything is enabled.
@@ -718,9 +719,6 @@ static void rcu_flavor_sched_clock_irq(int user)
struct task_struct *t = current;
lockdep_assert_irqs_disabled();
- if (user || rcu_is_cpu_rrupt_from_idle()) {
- rcu_note_voluntary_context_switch(current);
- }
if (rcu_preempt_depth() > 0 ||
(preempt_count() & (PREEMPT_MASK | SOFTIRQ_MASK))) {
/* No QS, force context switch if deferred. */
@@ -824,6 +822,7 @@ void rcu_read_unlock_strict(void)
if (irqs_disabled() || preempt_count() || !rcu_state.gp_kthread)
return;
rdp = this_cpu_ptr(&rcu_data);
+ rdp->cpu_no_qs.b.norm = false;
rcu_report_qs_rdp(rdp);
udelay(rcu_unlock_delay);
}
@@ -869,7 +868,7 @@ void rcu_all_qs(void)
if (!raw_cpu_read(rcu_data.rcu_urgent_qs))
return;
- preempt_disable();
+ preempt_disable(); // For CONFIG_PREEMPT_COUNT=y kernels
/* Load rcu_urgent_qs before other flags. */
if (!smp_load_acquire(this_cpu_ptr(&rcu_data.rcu_urgent_qs))) {
preempt_enable();
@@ -931,10 +930,13 @@ static notrace bool rcu_preempt_need_deferred_qs(struct task_struct *t)
return false;
}
-// Except that we do need to respond to a request by an expedited grace
-// period for a quiescent state from this CPU. Note that requests from
-// tasks are handled when removing the task from the blocked-tasks list
-// below.
+// Except that we do need to respond to a request by an expedited
+// grace period for a quiescent state from this CPU. Note that in
+// non-preemptible kernels, there can be no context switches within RCU
+// read-side critical sections, which in turn means that the leaf rcu_node
+// structure's blocked-tasks list is always empty. is therefore no need to
+// actually check it. Instead, a quiescent state from this CPU suffices,
+// and this function is only called from such a quiescent state.
notrace void rcu_preempt_deferred_qs(struct task_struct *t)
{
struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
@@ -972,7 +974,6 @@ static void rcu_flavor_sched_clock_irq(int user)
* neither access nor modify, at least not while the
* corresponding CPU is online.
*/
-
rcu_qs();
}
}
@@ -1238,8 +1239,11 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
cpu != outgoingcpu)
cpumask_set_cpu(cpu, cm);
cpumask_and(cm, cm, housekeeping_cpumask(HK_TYPE_RCU));
- if (cpumask_empty(cm))
+ if (cpumask_empty(cm)) {
cpumask_copy(cm, housekeeping_cpumask(HK_TYPE_RCU));
+ if (outgoingcpu >= 0)
+ cpumask_clear_cpu(outgoingcpu, cm);
+ }
set_cpus_allowed_ptr(t, cm);
mutex_unlock(&rnp->boost_kthread_mutex);
free_cpumask_var(cm);
diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h
index c3fbbcc09327..5653560573e2 100644
--- a/kernel/rcu/tree_stall.h
+++ b/kernel/rcu/tree_stall.h
@@ -368,7 +368,7 @@ static void rcu_dump_cpu_stacks(void)
if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) {
if (cpu_is_offline(cpu))
pr_err("Offline CPU %d blocking current GP.\n", cpu);
- else if (!trigger_single_cpu_backtrace(cpu))
+ else
dump_cpu_task(cpu);
}
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
@@ -511,8 +511,7 @@ static void rcu_check_gp_kthread_starvation(void)
pr_err("RCU GP kthread last ran on offline CPU %d.\n", cpu);
} else {
pr_err("Stack dump where RCU GP kthread last ran:\n");
- if (!trigger_single_cpu_backtrace(cpu))
- dump_cpu_task(cpu);
+ dump_cpu_task(cpu);
}
}
wake_up_process(gpk);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ee28253c9ac0..60fdc0faf1c9 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -73,6 +73,7 @@
#include <uapi/linux/sched/types.h>
+#include <asm/irq_regs.h>
#include <asm/switch_to.h>
#include <asm/tlb.h>
@@ -11183,6 +11184,19 @@ struct cgroup_subsys cpu_cgrp_subsys = {
void dump_cpu_task(int cpu)
{
+ if (cpu == smp_processor_id() && in_hardirq()) {
+ struct pt_regs *regs;
+
+ regs = get_irq_regs();
+ if (regs) {
+ show_regs(regs);
+ return;
+ }
+ }
+
+ if (trigger_single_cpu_backtrace(cpu))
+ return;
+
pr_info("Task dump for CPU %d:\n", cpu);
sched_show_task(cpu_curr(cpu));
}
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index 1207c78f85c1..9161d1136d01 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -25,6 +25,9 @@ struct sugov_policy {
unsigned int next_freq;
unsigned int cached_raw_freq;
+ /* max CPU capacity, which is equal for all CPUs in freq. domain */
+ unsigned long max;
+
/* The next fields are only needed if fast switch cannot be used: */
struct irq_work irq_work;
struct kthread_work work;
@@ -48,7 +51,6 @@ struct sugov_cpu {
unsigned long util;
unsigned long bw_dl;
- unsigned long max;
/* The field below is for single-CPU policies only: */
#ifdef CONFIG_NO_HZ_COMMON
@@ -158,7 +160,6 @@ static void sugov_get_util(struct sugov_cpu *sg_cpu)
{
struct rq *rq = cpu_rq(sg_cpu->cpu);
- sg_cpu->max = arch_scale_cpu_capacity(sg_cpu->cpu);
sg_cpu->bw_dl = cpu_bw_dl(rq);
sg_cpu->util = effective_cpu_util(sg_cpu->cpu, cpu_util_cfs(sg_cpu->cpu),
FREQUENCY_UTIL, NULL);
@@ -253,6 +254,7 @@ static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
*/
static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time)
{
+ struct sugov_policy *sg_policy = sg_cpu->sg_policy;
unsigned long boost;
/* No boost currently required */
@@ -280,7 +282,8 @@ static void sugov_iowait_apply(struct sugov_cpu *sg_cpu, u64 time)
* sg_cpu->util is already in capacity scale; convert iowait_boost
* into the same scale so we can compare.
*/
- boost = (sg_cpu->iowait_boost * sg_cpu->max) >> SCHED_CAPACITY_SHIFT;
+ boost = sg_cpu->iowait_boost * sg_policy->max;
+ boost >>= SCHED_CAPACITY_SHIFT;
boost = uclamp_rq_util_with(cpu_rq(sg_cpu->cpu), boost, NULL);
if (sg_cpu->util < boost)
sg_cpu->util = boost;
@@ -337,7 +340,7 @@ static void sugov_update_single_freq(struct update_util_data *hook, u64 time,
if (!sugov_update_single_common(sg_cpu, time, flags))
return;
- next_f = get_next_freq(sg_policy, sg_cpu->util, sg_cpu->max);
+ next_f = get_next_freq(sg_policy, sg_cpu->util, sg_policy->max);
/*
* Do not reduce the frequency if the CPU has not been idle
* recently, as the reduction is likely to be premature then.
@@ -373,6 +376,7 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time,
unsigned int flags)
{
struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
+ struct sugov_policy *sg_policy = sg_cpu->sg_policy;
unsigned long prev_util = sg_cpu->util;
/*
@@ -399,7 +403,8 @@ static void sugov_update_single_perf(struct update_util_data *hook, u64 time,
sg_cpu->util = prev_util;
cpufreq_driver_adjust_perf(sg_cpu->cpu, map_util_perf(sg_cpu->bw_dl),
- map_util_perf(sg_cpu->util), sg_cpu->max);
+ map_util_perf(sg_cpu->util),
+ sg_policy->max);
sg_cpu->sg_policy->last_freq_update_time = time;
}
@@ -408,25 +413,19 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
{
struct sugov_policy *sg_policy = sg_cpu->sg_policy;
struct cpufreq_policy *policy = sg_policy->policy;
- unsigned long util = 0, max = 1;
+ unsigned long util = 0;
unsigned int j;
for_each_cpu(j, policy->cpus) {
struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j);
- unsigned long j_util, j_max;
sugov_get_util(j_sg_cpu);
sugov_iowait_apply(j_sg_cpu, time);
- j_util = j_sg_cpu->util;
- j_max = j_sg_cpu->max;
- if (j_util * max > j_max * util) {
- util = j_util;
- max = j_max;
- }
+ util = max(j_sg_cpu->util, util);
}
- return get_next_freq(sg_policy, util, max);
+ return get_next_freq(sg_policy, util, sg_policy->max);
}
static void
@@ -752,7 +751,7 @@ static int sugov_start(struct cpufreq_policy *policy)
{
struct sugov_policy *sg_policy = policy->governor_data;
void (*uu)(struct update_util_data *data, u64 time, unsigned int flags);
- unsigned int cpu;
+ unsigned int cpu = cpumask_first(policy->cpus);
sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC;
sg_policy->last_freq_update_time = 0;
@@ -760,6 +759,7 @@ static int sugov_start(struct cpufreq_policy *policy)
sg_policy->work_in_progress = false;
sg_policy->limits_changed = false;
sg_policy->cached_raw_freq = 0;
+ sg_policy->max = arch_scale_cpu_capacity(cpu);
sg_policy->need_freq_update = cpufreq_driver_test_flags(CPUFREQ_NEED_UPDATE_LIMITS);
diff --git a/kernel/smp.c b/kernel/smp.c
index 650810a6f29b..e8cdc025a046 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -370,8 +370,7 @@ static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 *
if (cpu >= 0) {
if (static_branch_unlikely(&csdlock_debug_extended))
csd_lock_print_extended(csd, cpu);
- if (!trigger_single_cpu_backtrace(cpu))
- dump_cpu_task(cpu);
+ dump_cpu_task(cpu);
if (!cpu_cur_csd) {
pr_alert("csd: Re-sending CSD lock (#%d) IPI from CPU#%02d to CPU#%02d\n", *bug_id, raw_smp_processor_id(), cpu);
arch_send_call_function_single_ipi(cpu);
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 5481ba44a8d6..3f464bbda0e9 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -9,6 +9,7 @@
#include <linux/highuid.h>
#include <linux/cred.h>
#include <linux/securebits.h>
+#include <linux/security.h>
#include <linux/keyctl.h>
#include <linux/key-type.h>
#include <keys/user-type.h>
@@ -113,6 +114,10 @@ int create_user_ns(struct cred *new)
!kgid_has_mapping(parent_ns, group))
goto fail_dec;
+ ret = security_create_user_ns(new);
+ if (ret < 0)
+ goto fail_dec;
+
ret = -ENOMEM;
ns = kmem_cache_zalloc(user_ns_cachep, GFP_KERNEL);
if (!ns)
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 39060a5d0905..7cd5f5e7e0a1 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -1651,7 +1651,7 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq,
struct work_struct *work = &dwork->work;
WARN_ON_ONCE(!wq);
- WARN_ON_FUNCTION_MISMATCH(timer->function, delayed_work_timer_fn);
+ WARN_ON_ONCE(timer->function != delayed_work_timer_fn);
WARN_ON_ONCE(timer_pending(timer));
WARN_ON_ONCE(!list_empty(&work->entry));