diff options
Diffstat (limited to 'kernel/rcu/tree_plugin.h')
-rw-r--r-- | kernel/rcu/tree_plugin.h | 319 |
1 files changed, 118 insertions, 201 deletions
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 05915e536336..97dba50f6fb2 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1,27 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ /* * Read-Copy Update mechanism for mutual exclusion (tree-based version) * Internal non-public definitions that provide either classic * or preemptible semantics. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, you can access it online at - * http://www.gnu.org/licenses/gpl-2.0.html. - * * Copyright Red Hat, 2009 * Copyright IBM Corporation, 2009 * * Author: Ingo Molnar <mingo@elte.hu> - * Paul E. McKenney <paulmck@linux.vnet.ibm.com> + * Paul E. McKenney <paulmck@linux.ibm.com> */ #include <linux/delay.h> @@ -34,17 +21,7 @@ #include "../time/tick-internal.h" #ifdef CONFIG_RCU_BOOST - #include "../locking/rtmutex_common.h" - -/* - * Control variables for per-CPU and per-rcu_node kthreads. - */ -static DEFINE_PER_CPU(struct task_struct *, rcu_cpu_kthread_task); -DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_status); -DEFINE_PER_CPU(unsigned int, rcu_cpu_kthread_loops); -DEFINE_PER_CPU(char, rcu_cpu_has_work); - #else /* #ifdef CONFIG_RCU_BOOST */ /* @@ -307,7 +284,7 @@ static void rcu_qs(void) __this_cpu_read(rcu_data.gp_seq), TPS("cpuqs")); __this_cpu_write(rcu_data.cpu_no_qs.b.norm, false); - barrier(); /* Coordinate with rcu_flavor_check_callbacks(). */ + barrier(); /* Coordinate with rcu_flavor_sched_clock_irq(). */ current->rcu_read_unlock_special.b.need_qs = false; } } @@ -397,6 +374,11 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) return rnp->gp_tasks != NULL; } +/* Bias and limit values for ->rcu_read_lock_nesting. */ +#define RCU_NEST_BIAS INT_MAX +#define RCU_NEST_NMAX (-INT_MAX / 2) +#define RCU_NEST_PMAX (INT_MAX / 2) + /* * Preemptible RCU implementation for rcu_read_lock(). * Just increment ->rcu_read_lock_nesting, shared state will be updated @@ -405,6 +387,8 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) void __rcu_read_lock(void) { current->rcu_read_lock_nesting++; + if (IS_ENABLED(CONFIG_PROVE_LOCKING)) + WARN_ON_ONCE(current->rcu_read_lock_nesting > RCU_NEST_PMAX); barrier(); /* critical section after entry code. */ } EXPORT_SYMBOL_GPL(__rcu_read_lock); @@ -424,20 +408,18 @@ void __rcu_read_unlock(void) --t->rcu_read_lock_nesting; } else { barrier(); /* critical section before exit code. */ - t->rcu_read_lock_nesting = INT_MIN; + t->rcu_read_lock_nesting = -RCU_NEST_BIAS; barrier(); /* assign before ->rcu_read_unlock_special load */ if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s))) rcu_read_unlock_special(t); barrier(); /* ->rcu_read_unlock_special load before assign */ t->rcu_read_lock_nesting = 0; } -#ifdef CONFIG_PROVE_LOCKING - { - int rrln = READ_ONCE(t->rcu_read_lock_nesting); + if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { + int rrln = t->rcu_read_lock_nesting; - WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2); + WARN_ON_ONCE(rrln < 0 && rrln > RCU_NEST_NMAX); } -#endif /* #ifdef CONFIG_PROVE_LOCKING */ } EXPORT_SYMBOL_GPL(__rcu_read_unlock); @@ -597,7 +579,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags) */ static bool rcu_preempt_need_deferred_qs(struct task_struct *t) { - return (this_cpu_ptr(&rcu_data)->deferred_qs || + return (__this_cpu_read(rcu_data.deferred_qs) || READ_ONCE(t->rcu_read_unlock_special.s)) && t->rcu_read_lock_nesting <= 0; } @@ -617,11 +599,11 @@ static void rcu_preempt_deferred_qs(struct task_struct *t) if (!rcu_preempt_need_deferred_qs(t)) return; if (couldrecurse) - t->rcu_read_lock_nesting -= INT_MIN; + t->rcu_read_lock_nesting -= RCU_NEST_BIAS; local_irq_save(flags); rcu_preempt_deferred_qs_irqrestore(t, flags); if (couldrecurse) - t->rcu_read_lock_nesting += INT_MIN; + t->rcu_read_lock_nesting += RCU_NEST_BIAS; } /* @@ -642,13 +624,21 @@ static void rcu_read_unlock_special(struct task_struct *t) local_irq_save(flags); irqs_were_disabled = irqs_disabled_flags(flags); - if ((preempt_bh_were_disabled || irqs_were_disabled) && - t->rcu_read_unlock_special.b.blocked) { + if (preempt_bh_were_disabled || irqs_were_disabled) { + WRITE_ONCE(t->rcu_read_unlock_special.b.exp_hint, false); /* Need to defer quiescent state until everything is enabled. */ - raise_softirq_irqoff(RCU_SOFTIRQ); + if (irqs_were_disabled) { + /* Enabling irqs does not reschedule, so... */ + raise_softirq_irqoff(RCU_SOFTIRQ); + } else { + /* Enabling BH or preempt does reschedule, so... */ + set_tsk_need_resched(current); + set_preempt_need_resched(); + } local_irq_restore(flags); return; } + WRITE_ONCE(t->rcu_read_unlock_special.b.exp_hint, false); rcu_preempt_deferred_qs_irqrestore(t, flags); } @@ -775,13 +765,13 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) } /* - * Check for a quiescent state from the current CPU. When a task blocks, - * the task is recorded in the corresponding CPU's rcu_node structure, - * which is checked elsewhere. - * - * Caller must disable hard irqs. + * Check for a quiescent state from the current CPU, including voluntary + * context switches for Tasks RCU. When a task blocks, the task is + * recorded in the corresponding CPU's rcu_node structure, which is checked + * elsewhere, hence this function need only check for quiescent states + * related to the current CPU, not to those related to tasks. */ -static void rcu_flavor_check_callbacks(int user) +static void rcu_flavor_sched_clock_irq(int user) { struct task_struct *t = current; @@ -812,54 +802,6 @@ static void rcu_flavor_check_callbacks(int user) t->rcu_read_unlock_special.b.need_qs = true; } -/** - * synchronize_rcu - wait until a grace period has elapsed. - * - * Control will return to the caller some time after a full grace - * period has elapsed, in other words after all currently executing RCU - * read-side critical sections have completed. Note, however, that - * upon return from synchronize_rcu(), the caller might well be executing - * concurrently with new RCU read-side critical sections that began while - * synchronize_rcu() was waiting. RCU read-side critical sections are - * delimited by rcu_read_lock() and rcu_read_unlock(), and may be nested. - * In addition, regions of code across which interrupts, preemption, or - * softirqs have been disabled also serve as RCU read-side critical - * sections. This includes hardware interrupt handlers, softirq handlers, - * and NMI handlers. - * - * Note that this guarantee implies further memory-ordering guarantees. - * On systems with more than one CPU, when synchronize_rcu() returns, - * each CPU is guaranteed to have executed a full memory barrier since - * the end of its last RCU read-side critical section whose beginning - * preceded the call to synchronize_rcu(). In addition, each CPU having - * an RCU read-side critical section that extends beyond the return from - * synchronize_rcu() is guaranteed to have executed a full memory barrier - * after the beginning of synchronize_rcu() and before the beginning of - * that RCU read-side critical section. Note that these guarantees include - * CPUs that are offline, idle, or executing in user mode, as well as CPUs - * that are executing in the kernel. - * - * Furthermore, if CPU A invoked synchronize_rcu(), which returned - * to its caller on CPU B, then both CPU A and CPU B are guaranteed - * to have executed a full memory barrier during the execution of - * synchronize_rcu() -- even if CPU A and CPU B are the same CPU (but - * again only if the system has more than one CPU). - */ -void synchronize_rcu(void) -{ - RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) || - lock_is_held(&rcu_lock_map) || - lock_is_held(&rcu_sched_lock_map), - "Illegal synchronize_rcu() in RCU read-side critical section"); - if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE) - return; - if (rcu_gp_is_expedited()) - synchronize_rcu_expedited(); - else - wait_rcu_gp(call_rcu); -} -EXPORT_SYMBOL_GPL(synchronize_rcu); - /* * Check for a task exiting while in a preemptible-RCU read-side * critical section, clean up if so. No need to issue warnings, @@ -1075,14 +1017,10 @@ static void rcu_preempt_check_blocked_tasks(struct rcu_node *rnp) } /* - * Check to see if this CPU is in a non-context-switch quiescent state - * (user mode or idle loop for rcu, non-softirq execution for rcu_bh). - * Also schedule RCU core processing. - * - * This function must be called from hardirq context. It is normally - * invoked from the scheduling-clock interrupt. + * Check to see if this CPU is in a non-context-switch quiescent state, + * namely user mode and idle loop. */ -static void rcu_flavor_check_callbacks(int user) +static void rcu_flavor_sched_clock_irq(int user) { if (user || rcu_is_cpu_rrupt_from_idle()) { @@ -1102,22 +1040,6 @@ static void rcu_flavor_check_callbacks(int user) } } -/* PREEMPT=n implementation of synchronize_rcu(). */ -void synchronize_rcu(void) -{ - RCU_LOCKDEP_WARN(lock_is_held(&rcu_bh_lock_map) || - lock_is_held(&rcu_lock_map) || - lock_is_held(&rcu_sched_lock_map), - "Illegal synchronize_rcu() in RCU read-side critical section"); - if (rcu_blocking_is_gp()) - return; - if (rcu_gp_is_expedited()) - synchronize_rcu_expedited(); - else - wait_rcu_gp(call_rcu); -} -EXPORT_SYMBOL_GPL(synchronize_rcu); - /* * Because preemptible RCU does not exist, tasks cannot possibly exit * while in preemptible RCU read-side critical sections. @@ -1294,11 +1216,11 @@ static void invoke_rcu_callbacks_kthread(void) unsigned long flags; local_irq_save(flags); - __this_cpu_write(rcu_cpu_has_work, 1); - if (__this_cpu_read(rcu_cpu_kthread_task) != NULL && - current != __this_cpu_read(rcu_cpu_kthread_task)) { - rcu_wake_cond(__this_cpu_read(rcu_cpu_kthread_task), - __this_cpu_read(rcu_cpu_kthread_status)); + __this_cpu_write(rcu_data.rcu_cpu_has_work, 1); + if (__this_cpu_read(rcu_data.rcu_cpu_kthread_task) != NULL && + current != __this_cpu_read(rcu_data.rcu_cpu_kthread_task)) { + rcu_wake_cond(__this_cpu_read(rcu_data.rcu_cpu_kthread_task), + __this_cpu_read(rcu_data.rcu_cpu_kthread_status)); } local_irq_restore(flags); } @@ -1309,7 +1231,7 @@ static void invoke_rcu_callbacks_kthread(void) */ static bool rcu_is_callbacks_kthread(void) { - return __this_cpu_read(rcu_cpu_kthread_task) == current; + return __this_cpu_read(rcu_data.rcu_cpu_kthread_task) == current; } #define RCU_BOOST_DELAY_JIFFIES DIV_ROUND_UP(CONFIG_RCU_BOOST_DELAY * HZ, 1000) @@ -1356,11 +1278,6 @@ static int rcu_spawn_one_boost_kthread(struct rcu_node *rnp) return 0; } -static void rcu_kthread_do_work(void) -{ - rcu_do_batch(this_cpu_ptr(&rcu_data)); -} - static void rcu_cpu_kthread_setup(unsigned int cpu) { struct sched_param sp; @@ -1371,12 +1288,12 @@ static void rcu_cpu_kthread_setup(unsigned int cpu) static void rcu_cpu_kthread_park(unsigned int cpu) { - per_cpu(rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; + per_cpu(rcu_data.rcu_cpu_kthread_status, cpu) = RCU_KTHREAD_OFFCPU; } static int rcu_cpu_kthread_should_run(unsigned int cpu) { - return __this_cpu_read(rcu_cpu_has_work); + return __this_cpu_read(rcu_data.rcu_cpu_has_work); } /* @@ -1386,21 +1303,20 @@ static int rcu_cpu_kthread_should_run(unsigned int cpu) */ static void rcu_cpu_kthread(unsigned int cpu) { - unsigned int *statusp = this_cpu_ptr(&rcu_cpu_kthread_status); - char work, *workp = this_cpu_ptr(&rcu_cpu_has_work); + unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status); + char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work); int spincnt; for (spincnt = 0; spincnt < 10; spincnt++) { trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait")); local_bh_disable(); *statusp = RCU_KTHREAD_RUNNING; - this_cpu_inc(rcu_cpu_kthread_loops); local_irq_disable(); work = *workp; *workp = 0; local_irq_enable(); if (work) - rcu_kthread_do_work(); + rcu_do_batch(this_cpu_ptr(&rcu_data)); local_bh_enable(); if (*workp == 0) { trace_rcu_utilization(TPS("End CPU kthread@rcu_wait")); @@ -1446,7 +1362,7 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) } static struct smp_hotplug_thread rcu_cpu_thread_spec = { - .store = &rcu_cpu_kthread_task, + .store = &rcu_data.rcu_cpu_kthread_task, .thread_should_run = rcu_cpu_kthread_should_run, .thread_fn = rcu_cpu_kthread, .thread_comm = "rcuc/%u", @@ -1463,8 +1379,9 @@ static void __init rcu_spawn_boost_kthreads(void) int cpu; for_each_possible_cpu(cpu) - per_cpu(rcu_cpu_has_work, cpu) = 0; - BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); + per_cpu(rcu_data.rcu_cpu_has_work, cpu) = 0; + if (WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__)) + return; rcu_for_each_leaf_node(rnp) (void)rcu_spawn_one_boost_kthread(rnp); } @@ -1529,7 +1446,7 @@ static void rcu_prepare_kthreads(int cpu) int rcu_needs_cpu(u64 basemono, u64 *nextevt) { *nextevt = KTIME_MAX; - return rcu_cpu_has_callbacks(NULL); + return !rcu_segcblist_empty(&this_cpu_ptr(&rcu_data)->cblist); } /* @@ -1548,14 +1465,6 @@ static void rcu_prepare_for_idle(void) { } -/* - * Don't bother keeping a running count of the number of RCU callbacks - * posted because CONFIG_RCU_FAST_NO_HZ=n. - */ -static void rcu_idle_count_callbacks_posted(void) -{ -} - #else /* #if !defined(CONFIG_RCU_FAST_NO_HZ) */ /* @@ -1638,11 +1547,8 @@ int rcu_needs_cpu(u64 basemono, u64 *nextevt) lockdep_assert_irqs_disabled(); - /* Snapshot to detect later posting of non-lazy callback. */ - rdp->nonlazy_posted_snap = rdp->nonlazy_posted; - /* If no callbacks, RCU doesn't need the CPU. */ - if (!rcu_cpu_has_callbacks(&rdp->all_lazy)) { + if (rcu_segcblist_empty(&rdp->cblist)) { *nextevt = KTIME_MAX; return 0; } @@ -1656,11 +1562,12 @@ int rcu_needs_cpu(u64 basemono, u64 *nextevt) rdp->last_accelerate = jiffies; /* Request timer delay depending on laziness, and round. */ - if (!rdp->all_lazy) { + rdp->all_lazy = !rcu_segcblist_n_nonlazy_cbs(&rdp->cblist); + if (rdp->all_lazy) { + dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies; + } else { dj = round_up(rcu_idle_gp_delay + jiffies, rcu_idle_gp_delay) - jiffies; - } else { - dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies; } *nextevt = basemono + dj * TICK_NSEC; return 0; @@ -1690,7 +1597,7 @@ static void rcu_prepare_for_idle(void) /* Handle nohz enablement switches conservatively. */ tne = READ_ONCE(tick_nohz_active); if (tne != rdp->tick_nohz_enabled_snap) { - if (rcu_cpu_has_callbacks(NULL)) + if (!rcu_segcblist_empty(&rdp->cblist)) invoke_rcu_core(); /* force nohz to see update. */ rdp->tick_nohz_enabled_snap = tne; return; @@ -1703,10 +1610,8 @@ static void rcu_prepare_for_idle(void) * callbacks, invoke RCU core for the side-effect of recalculating * idle duration on re-entry to idle. */ - if (rdp->all_lazy && - rdp->nonlazy_posted != rdp->nonlazy_posted_snap) { + if (rdp->all_lazy && rcu_segcblist_n_nonlazy_cbs(&rdp->cblist)) { rdp->all_lazy = false; - rdp->nonlazy_posted_snap = rdp->nonlazy_posted; invoke_rcu_core(); return; } @@ -1742,19 +1647,6 @@ static void rcu_cleanup_after_idle(void) invoke_rcu_core(); } -/* - * Keep a running count of the number of non-lazy callbacks posted - * on this CPU. This running counter (which is never decremented) allows - * rcu_prepare_for_idle() to detect when something out of the idle loop - * posts a callback, even if an equal number of callbacks are invoked. - * Of course, callbacks should only be posted from within a trace event - * designed to be called from idle or from within RCU_NONIDLE(). - */ -static void rcu_idle_count_callbacks_posted(void) -{ - __this_cpu_add(rcu_data.nonlazy_posted, 1); -} - #endif /* #else #if !defined(CONFIG_RCU_FAST_NO_HZ) */ #ifdef CONFIG_RCU_FAST_NO_HZ @@ -1762,13 +1654,12 @@ static void rcu_idle_count_callbacks_posted(void) static void print_cpu_stall_fast_no_hz(char *cp, int cpu) { struct rcu_data *rdp = &per_cpu(rcu_data, cpu); - unsigned long nlpd = rdp->nonlazy_posted - rdp->nonlazy_posted_snap; - sprintf(cp, "last_accelerate: %04lx/%04lx, nonlazy_posted: %ld, %c%c", + sprintf(cp, "last_accelerate: %04lx/%04lx, Nonlazy posted: %c%c%c", rdp->last_accelerate & 0xffff, jiffies & 0xffff, - ulong2long(nlpd), - rdp->all_lazy ? 'L' : '.', - rdp->tick_nohz_enabled_snap ? '.' : 'D'); + ".l"[rdp->all_lazy], + ".L"[!rcu_segcblist_n_nonlazy_cbs(&rdp->cblist)], + ".D"[!rdp->tick_nohz_enabled_snap]); } #else /* #ifdef CONFIG_RCU_FAST_NO_HZ */ @@ -1854,22 +1745,24 @@ static void zero_cpu_stall_ticks(struct rcu_data *rdp) /* * Offload callback processing from the boot-time-specified set of CPUs - * specified by rcu_nocb_mask. For each CPU in the set, there is a - * kthread created that pulls the callbacks from the corresponding CPU, - * waits for a grace period to elapse, and invokes the callbacks. - * The no-CBs CPUs do a wake_up() on their kthread when they insert - * a callback into any empty list, unless the rcu_nocb_poll boot parameter - * has been specified, in which case each kthread actively polls its - * CPU. (Which isn't so great for energy efficiency, but which does - * reduce RCU's overhead on that CPU.) + * specified by rcu_nocb_mask. For the CPUs in the set, there are kthreads + * created that pull the callbacks from the corresponding CPU, wait for + * a grace period to elapse, and invoke the callbacks. These kthreads + * are organized into leaders, which manage incoming callbacks, wait for + * grace periods, and awaken followers, and the followers, which only + * invoke callbacks. Each leader is its own follower. The no-CBs CPUs + * do a wake_up() on their kthread when they insert a callback into any + * empty list, unless the rcu_nocb_poll boot parameter has been specified, + * in which case each kthread actively polls its CPU. (Which isn't so great + * for energy efficiency, but which does reduce RCU's overhead on that CPU.) * * This is intended to be used in conjunction with Frederic Weisbecker's * adaptive-idle work, which would seriously reduce OS jitter on CPUs * running CPU-bound user-mode computations. * - * Offloading of callback processing could also in theory be used as - * an energy-efficiency measure because CPUs with no RCU callbacks - * queued are more aggressive about entering dyntick-idle mode. + * Offloading of callbacks can also be used as an energy-efficiency + * measure because CPUs with no RCU callbacks queued are more aggressive + * about entering dyntick-idle mode. */ @@ -1973,10 +1866,7 @@ static void wake_nocb_leader_defer(struct rcu_data *rdp, int waketype, raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); } -/* - * Does the specified CPU need an RCU callback for this invocation - * of rcu_barrier()? - */ +/* Does rcu_barrier need to queue an RCU callback on the specified CPU? */ static bool rcu_nocb_cpu_needs_barrier(int cpu) { struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); @@ -1992,12 +1882,12 @@ static bool rcu_nocb_cpu_needs_barrier(int cpu) * callbacks would be posted. In the worst case, the first * barrier in rcu_barrier() suffices (but the caller cannot * necessarily rely on this, not a substitute for the caller - * getting the concurrency design right!). There must also be - * a barrier between the following load an posting of a callback + * getting the concurrency design right!). There must also be a + * barrier between the following load and posting of a callback * (if a callback is in fact needed). This is associated with an * atomic_inc() in the caller. */ - ret = atomic_long_read(&rdp->nocb_q_count); + ret = rcu_get_n_cbs_nocb_cpu(rdp); #ifdef CONFIG_PROVE_RCU rhp = READ_ONCE(rdp->nocb_head); @@ -2052,7 +1942,7 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, TPS("WakeNotPoll")); return; } - len = atomic_long_read(&rdp->nocb_q_count); + len = rcu_get_n_cbs_nocb_cpu(rdp); if (old_rhpp == &rdp->nocb_head) { if (!irqs_disabled_flags(flags)) { /* ... if queue was empty ... */ @@ -2101,11 +1991,11 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, trace_rcu_kfree_callback(rcu_state.name, rhp, (unsigned long)rhp->func, -atomic_long_read(&rdp->nocb_q_count_lazy), - -atomic_long_read(&rdp->nocb_q_count)); + -rcu_get_n_cbs_nocb_cpu(rdp)); else trace_rcu_callback(rcu_state.name, rhp, -atomic_long_read(&rdp->nocb_q_count_lazy), - -atomic_long_read(&rdp->nocb_q_count)); + -rcu_get_n_cbs_nocb_cpu(rdp)); /* * If called from an extended quiescent state with interrupts @@ -2322,13 +2212,14 @@ static int rcu_nocb_kthread(void *arg) tail = rdp->nocb_follower_tail; rdp->nocb_follower_tail = &rdp->nocb_follower_head; raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); - BUG_ON(!list); + if (WARN_ON_ONCE(!list)) + continue; trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeNonEmpty")); /* Each pass through the following loop invokes a callback. */ trace_rcu_batch_start(rcu_state.name, atomic_long_read(&rdp->nocb_q_count_lazy), - atomic_long_read(&rdp->nocb_q_count), -1); + rcu_get_n_cbs_nocb_cpu(rdp), -1); c = cl = 0; while (list) { next = list->next; @@ -2495,15 +2386,16 @@ static void rcu_spawn_one_nocb_kthread(int cpu) /* Spawn the kthread for this CPU. */ t = kthread_run(rcu_nocb_kthread, rdp_spawn, "rcuo%c/%d", rcu_state.abbr, cpu); - BUG_ON(IS_ERR(t)); + if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo kthread, OOM is now expected behavior\n", __func__)) + return; WRITE_ONCE(rdp_spawn->nocb_kthread, t); } /* * If the specified CPU is a no-CBs CPU that does not already have its - * rcuo kthreads, spawn them. + * rcuo kthread, spawn it. */ -static void rcu_spawn_all_nocb_kthreads(int cpu) +static void rcu_spawn_cpu_nocb_kthread(int cpu) { if (rcu_scheduler_fully_active) rcu_spawn_one_nocb_kthread(cpu); @@ -2520,7 +2412,7 @@ static void __init rcu_spawn_nocb_kthreads(void) int cpu; for_each_online_cpu(cpu) - rcu_spawn_all_nocb_kthreads(cpu); + rcu_spawn_cpu_nocb_kthread(cpu); } /* How many follower CPU IDs per leader? Default of -1 for sqrt(nr_cpu_ids). */ @@ -2587,6 +2479,26 @@ static bool init_nocb_callback_list(struct rcu_data *rdp) return true; } +/* + * Bind the current task to the offloaded CPUs. If there are no offloaded + * CPUs, leave the task unbound. Splat if the bind attempt fails. + */ +void rcu_bind_current_to_nocb(void) +{ + if (cpumask_available(rcu_nocb_mask) && cpumask_weight(rcu_nocb_mask)) + WARN_ON(sched_setaffinity(current->pid, rcu_nocb_mask)); +} +EXPORT_SYMBOL_GPL(rcu_bind_current_to_nocb); + +/* + * Return the number of RCU callbacks still queued from the specified + * CPU, which must be a nocbs CPU. + */ +static unsigned long rcu_get_n_cbs_nocb_cpu(struct rcu_data *rdp) +{ + return atomic_long_read(&rdp->nocb_q_count); +} + #else /* #ifdef CONFIG_RCU_NOCB_CPU */ static bool rcu_nocb_cpu_needs_barrier(int cpu) @@ -2634,7 +2546,7 @@ static void do_nocb_deferred_wakeup(struct rcu_data *rdp) { } -static void rcu_spawn_all_nocb_kthreads(int cpu) +static void rcu_spawn_cpu_nocb_kthread(int cpu) { } @@ -2647,6 +2559,11 @@ static bool init_nocb_callback_list(struct rcu_data *rdp) return false; } +static unsigned long rcu_get_n_cbs_nocb_cpu(struct rcu_data *rdp) +{ + return 0; +} + #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ /* |