diff options
Diffstat (limited to 'kernel/smp.c')
-rw-r--r-- | kernel/smp.c | 52 |
1 files changed, 36 insertions, 16 deletions
diff --git a/kernel/smp.c b/kernel/smp.c index 385179dae360..f085ebcdf9e7 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -46,6 +46,8 @@ static DEFINE_PER_CPU_ALIGNED(struct call_function_data, cfd_data); static DEFINE_PER_CPU_SHARED_ALIGNED(struct llist_head, call_single_queue); +static DEFINE_PER_CPU(atomic_t, trigger_backtrace) = ATOMIC_INIT(1); + static void __flush_smp_call_function_queue(bool warn_cpu_offline); int smpcfd_prepare_cpu(unsigned int cpu) @@ -125,7 +127,7 @@ send_call_function_ipi_mask(struct cpumask *mask) } static __always_inline void -csd_do_func(smp_call_func_t func, void *info, struct __call_single_data *csd) +csd_do_func(smp_call_func_t func, void *info, call_single_data_t *csd) { trace_csd_function_entry(func, csd); func(info); @@ -168,11 +170,13 @@ static DEFINE_PER_CPU(void *, cur_csd_info); static ulong csd_lock_timeout = 5000; /* CSD lock timeout in milliseconds. */ module_param(csd_lock_timeout, ulong, 0444); +static int panic_on_ipistall; /* CSD panic timeout in milliseconds, 300000 for five minutes. */ +module_param(panic_on_ipistall, int, 0444); static atomic_t csd_bug_count = ATOMIC_INIT(0); /* Record current CSD work for current CPU, NULL to erase. */ -static void __csd_lock_record(struct __call_single_data *csd) +static void __csd_lock_record(call_single_data_t *csd) { if (!csd) { smp_mb(); /* NULL cur_csd after unlock. */ @@ -187,13 +191,13 @@ static void __csd_lock_record(struct __call_single_data *csd) /* Or before unlock, as the case may be. */ } -static __always_inline void csd_lock_record(struct __call_single_data *csd) +static __always_inline void csd_lock_record(call_single_data_t *csd) { if (static_branch_unlikely(&csdlock_debug_enabled)) __csd_lock_record(csd); } -static int csd_lock_wait_getcpu(struct __call_single_data *csd) +static int csd_lock_wait_getcpu(call_single_data_t *csd) { unsigned int csd_type; @@ -208,7 +212,7 @@ static int csd_lock_wait_getcpu(struct __call_single_data *csd) * the CSD_TYPE_SYNC/ASYNC types provide the destination CPU, * so waiting on other types gets much less information. */ -static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 *ts1, int *bug_id) +static bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, int *bug_id) { int cpu = -1; int cpux; @@ -228,6 +232,7 @@ static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 * } ts2 = sched_clock(); + /* How long since we last checked for a stuck CSD lock.*/ ts_delta = ts2 - *ts1; if (likely(ts_delta <= csd_lock_timeout_ns || csd_lock_timeout_ns == 0)) return false; @@ -241,9 +246,17 @@ static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 * else cpux = cpu; cpu_cur_csd = smp_load_acquire(&per_cpu(cur_csd, cpux)); /* Before func and info. */ + /* How long since this CSD lock was stuck. */ + ts_delta = ts2 - ts0; pr_alert("csd: %s non-responsive CSD lock (#%d) on CPU#%d, waiting %llu ns for CPU#%02d %pS(%ps).\n", - firsttime ? "Detected" : "Continued", *bug_id, raw_smp_processor_id(), ts2 - ts0, + firsttime ? "Detected" : "Continued", *bug_id, raw_smp_processor_id(), ts_delta, cpu, csd->func, csd->info); + /* + * If the CSD lock is still stuck after 5 minutes, it is unlikely + * to become unstuck. Use a signed comparison to avoid triggering + * on underflows when the TSC is out of sync between sockets. + */ + BUG_ON(panic_on_ipistall > 0 && (s64)ts_delta > ((s64)panic_on_ipistall * NSEC_PER_MSEC)); if (cpu_cur_csd && csd != cpu_cur_csd) { pr_alert("\tcsd: CSD lock (#%d) handling prior %pS(%ps) request.\n", *bug_id, READ_ONCE(per_cpu(cur_csd_func, cpux)), @@ -253,13 +266,15 @@ static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 * *bug_id, !cpu_cur_csd ? "unresponsive" : "handling this request"); } if (cpu >= 0) { - dump_cpu_task(cpu); + if (atomic_cmpxchg_acquire(&per_cpu(trigger_backtrace, cpu), 1, 0)) + dump_cpu_task(cpu); if (!cpu_cur_csd) { pr_alert("csd: Re-sending CSD lock (#%d) IPI from CPU#%02d to CPU#%02d\n", *bug_id, raw_smp_processor_id(), cpu); arch_send_call_function_single_ipi(cpu); } } - dump_stack(); + if (firsttime) + dump_stack(); *ts1 = ts2; return false; @@ -272,7 +287,7 @@ static bool csd_lock_wait_toolong(struct __call_single_data *csd, u64 ts0, u64 * * previous function call. For multi-cpu calls its even more interesting * as we'll have to ensure no other cpu is observing our csd. */ -static void __csd_lock_wait(struct __call_single_data *csd) +static void __csd_lock_wait(call_single_data_t *csd) { int bug_id = 0; u64 ts0, ts1; @@ -286,7 +301,7 @@ static void __csd_lock_wait(struct __call_single_data *csd) smp_acquire__after_ctrl_dep(); } -static __always_inline void csd_lock_wait(struct __call_single_data *csd) +static __always_inline void csd_lock_wait(call_single_data_t *csd) { if (static_branch_unlikely(&csdlock_debug_enabled)) { __csd_lock_wait(csd); @@ -296,17 +311,17 @@ static __always_inline void csd_lock_wait(struct __call_single_data *csd) smp_cond_load_acquire(&csd->node.u_flags, !(VAL & CSD_FLAG_LOCK)); } #else -static void csd_lock_record(struct __call_single_data *csd) +static void csd_lock_record(call_single_data_t *csd) { } -static __always_inline void csd_lock_wait(struct __call_single_data *csd) +static __always_inline void csd_lock_wait(call_single_data_t *csd) { smp_cond_load_acquire(&csd->node.u_flags, !(VAL & CSD_FLAG_LOCK)); } #endif -static __always_inline void csd_lock(struct __call_single_data *csd) +static __always_inline void csd_lock(call_single_data_t *csd) { csd_lock_wait(csd); csd->node.u_flags |= CSD_FLAG_LOCK; @@ -319,7 +334,7 @@ static __always_inline void csd_lock(struct __call_single_data *csd) smp_wmb(); } -static __always_inline void csd_unlock(struct __call_single_data *csd) +static __always_inline void csd_unlock(call_single_data_t *csd) { WARN_ON(!(csd->node.u_flags & CSD_FLAG_LOCK)); @@ -372,7 +387,7 @@ void __smp_call_single_queue(int cpu, struct llist_node *node) * for execution on the given CPU. data must already have * ->func, ->info, and ->flags set. */ -static int generic_exec_single(int cpu, struct __call_single_data *csd) +static int generic_exec_single(int cpu, call_single_data_t *csd) { if (cpu == smp_processor_id()) { smp_call_func_t func = csd->func; @@ -433,9 +448,14 @@ static void __flush_smp_call_function_queue(bool warn_cpu_offline) struct llist_node *entry, *prev; struct llist_head *head; static bool warned; + atomic_t *tbt; lockdep_assert_irqs_disabled(); + /* Allow waiters to send backtrace NMI from here onwards */ + tbt = this_cpu_ptr(&trigger_backtrace); + atomic_set_release(tbt, 1); + head = this_cpu_ptr(&call_single_queue); entry = llist_del_all(head); entry = llist_reverse_order(entry); @@ -658,7 +678,7 @@ EXPORT_SYMBOL(smp_call_function_single); * * Return: %0 on success or negative errno value on error */ -int smp_call_function_single_async(int cpu, struct __call_single_data *csd) +int smp_call_function_single_async(int cpu, call_single_data_t *csd) { int err = 0; |