From 073107b39e553a5ef911f019d4e433fd5a8601b7 Mon Sep 17 00:00:00 2001 From: Sangmoon Kim Date: Tue, 6 Aug 2024 14:12:09 +0900 Subject: workqueue: add cmdline parameter workqueue.panic_on_stall When we want to debug the workqueue stall, we can immediately make a panic to get the information we want. In some systems, it may be necessary to quickly reboot the system to escape from a workqueue lockup situation. In this case, we can control the number of stall detections to generate panic. workqueue.panic_on_stall sets the number times of the stall to trigger panic. 0 disables the panic on stall. Signed-off-by: Sangmoon Kim Signed-off-by: Tejun Heo --- kernel/workqueue.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'kernel/workqueue.c') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 1745ca788ede..80cb0a923046 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -7398,6 +7398,9 @@ static struct timer_list wq_watchdog_timer; static unsigned long wq_watchdog_touched = INITIAL_JIFFIES; static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES; +static unsigned int wq_panic_on_stall; +module_param_named(panic_on_stall, wq_panic_on_stall, uint, 0644); + /* * Show workers that might prevent the processing of pending work items. * The only candidates are CPU-bound workers in the running state. @@ -7449,6 +7452,16 @@ static void show_cpu_pools_hogs(void) rcu_read_unlock(); } +static void panic_on_wq_watchdog(void) +{ + static unsigned int wq_stall; + + if (wq_panic_on_stall) { + wq_stall++; + BUG_ON(wq_stall >= wq_panic_on_stall); + } +} + static void wq_watchdog_reset_touched(void) { int cpu; @@ -7521,6 +7534,9 @@ static void wq_watchdog_timer_fn(struct timer_list *unused) if (cpu_pool_stall) show_cpu_pools_hogs(); + if (lockup_detected) + panic_on_wq_watchdog(); + wq_watchdog_reset_touched(); mod_timer(&wq_watchdog_timer, jiffies + thresh); } -- cgit From b188c57af2b5c17a1e8f71a0358f330446a4f788 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 9 Aug 2024 15:28:23 -0700 Subject: workqueue: Split alloc_workqueue into internal function and lockdep init Will help enable user-defined lockdep maps for workqueues. Cc: Tejun Heo Cc: Lai Jiangshan Signed-off-by: Matthew Brost Signed-off-by: Tejun Heo --- kernel/workqueue.c | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) (limited to 'kernel/workqueue.c') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 80cb0a923046..3db4703f4652 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -5612,9 +5612,9 @@ static void wq_adjust_max_active(struct workqueue_struct *wq) } __printf(1, 4) -struct workqueue_struct *alloc_workqueue(const char *fmt, - unsigned int flags, - int max_active, ...) +static struct workqueue_struct *__alloc_workqueue(const char *fmt, + unsigned int flags, + int max_active, ...) { va_list args; struct workqueue_struct *wq; @@ -5680,12 +5680,11 @@ struct workqueue_struct *alloc_workqueue(const char *fmt, INIT_LIST_HEAD(&wq->flusher_overflow); INIT_LIST_HEAD(&wq->maydays); - wq_init_lockdep(wq); INIT_LIST_HEAD(&wq->list); if (flags & WQ_UNBOUND) { if (alloc_node_nr_active(wq->node_nr_active) < 0) - goto err_unreg_lockdep; + goto err_free_wq; } /* @@ -5724,9 +5723,6 @@ err_unlock_free_node_nr_active: kthread_flush_worker(pwq_release_worker); free_node_nr_active(wq->node_nr_active); } -err_unreg_lockdep: - wq_unregister_lockdep(wq); - wq_free_lockdep(wq); err_free_wq: free_workqueue_attrs(wq->unbound_attrs); kfree(wq); @@ -5737,6 +5733,25 @@ err_destroy: destroy_workqueue(wq); return NULL; } + +__printf(1, 4) +struct workqueue_struct *alloc_workqueue(const char *fmt, + unsigned int flags, + int max_active, ...) +{ + struct workqueue_struct *wq; + va_list args; + + va_start(args, max_active); + wq = __alloc_workqueue(fmt, flags, max_active, args); + va_end(args); + if (!wq) + return NULL; + + wq_init_lockdep(wq); + + return wq; +} EXPORT_SYMBOL_GPL(alloc_workqueue); static bool pwq_busy(struct pool_workqueue *pwq) -- cgit From 4f022f430e21e456893283036bc2ea78ac6bd2a1 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 9 Aug 2024 15:28:24 -0700 Subject: workqueue: Change workqueue lockdep map to pointer Will help enable user-defined lockdep maps for workqueues. Cc: Tejun Heo Cc: Lai Jiangshan Signed-off-by: Matthew Brost Signed-off-by: Tejun Heo --- kernel/workqueue.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'kernel/workqueue.c') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 3db4703f4652..4c8ba5c4229f 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -364,7 +364,8 @@ struct workqueue_struct { #ifdef CONFIG_LOCKDEP char *lock_name; struct lock_class_key key; - struct lockdep_map lockdep_map; + struct lockdep_map __lockdep_map; + struct lockdep_map *lockdep_map; #endif char name[WQ_NAME_LEN]; /* I: workqueue name */ @@ -3203,7 +3204,7 @@ __acquires(&pool->lock) lockdep_start_depth = lockdep_depth(current); /* see drain_dead_softirq_workfn() */ if (!bh_draining) - lock_map_acquire(&pwq->wq->lockdep_map); + lock_map_acquire(pwq->wq->lockdep_map); lock_map_acquire(&lockdep_map); /* * Strictly speaking we should mark the invariant state without holding @@ -3237,7 +3238,7 @@ __acquires(&pool->lock) pwq->stats[PWQ_STAT_COMPLETED]++; lock_map_release(&lockdep_map); if (!bh_draining) - lock_map_release(&pwq->wq->lockdep_map); + lock_map_release(pwq->wq->lockdep_map); if (unlikely((worker->task && in_atomic()) || lockdep_depth(current) != lockdep_start_depth || @@ -3873,8 +3874,8 @@ static void touch_wq_lockdep_map(struct workqueue_struct *wq) if (wq->flags & WQ_BH) local_bh_disable(); - lock_map_acquire(&wq->lockdep_map); - lock_map_release(&wq->lockdep_map); + lock_map_acquire(wq->lockdep_map); + lock_map_release(wq->lockdep_map); if (wq->flags & WQ_BH) local_bh_enable(); @@ -3908,7 +3909,7 @@ void __flush_workqueue(struct workqueue_struct *wq) struct wq_flusher this_flusher = { .list = LIST_HEAD_INIT(this_flusher.list), .flush_color = -1, - .done = COMPLETION_INITIALIZER_ONSTACK_MAP(this_flusher.done, wq->lockdep_map), + .done = COMPLETION_INITIALIZER_ONSTACK_MAP(this_flusher.done, (*wq->lockdep_map)), }; int next_color; @@ -4768,7 +4769,8 @@ static void wq_init_lockdep(struct workqueue_struct *wq) lock_name = wq->name; wq->lock_name = lock_name; - lockdep_init_map(&wq->lockdep_map, lock_name, &wq->key, 0); + wq->lockdep_map = &wq->__lockdep_map; + lockdep_init_map(wq->lockdep_map, lock_name, &wq->key, 0); } static void wq_unregister_lockdep(struct workqueue_struct *wq) -- cgit From ec0a7d44b358afaaf52856d03c72e20587bc888b Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Fri, 9 Aug 2024 15:28:25 -0700 Subject: workqueue: Add interface for user-defined workqueue lockdep map Add an interface for a user-defined workqueue lockdep map, which is helpful when multiple workqueues are created for the same purpose. This also helps avoid leaking lockdep maps on each workqueue creation. v2: - Add alloc_workqueue_lockdep_map (Tejun) v3: - Drop __WQ_USER_OWNED_LOCKDEP (Tejun) - static inline alloc_ordered_workqueue_lockdep_map (Tejun) Cc: Tejun Heo Cc: Lai Jiangshan Signed-off-by: Matthew Brost Signed-off-by: Tejun Heo --- kernel/workqueue.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'kernel/workqueue.c') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 4c8ba5c4229f..7468ba5e2417 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4775,11 +4775,17 @@ static void wq_init_lockdep(struct workqueue_struct *wq) static void wq_unregister_lockdep(struct workqueue_struct *wq) { + if (wq->lockdep_map != &wq->__lockdep_map) + return; + lockdep_unregister_key(&wq->key); } static void wq_free_lockdep(struct workqueue_struct *wq) { + if (wq->lockdep_map != &wq->__lockdep_map) + return; + if (wq->lock_name != wq->name) kfree(wq->lock_name); } @@ -5756,6 +5762,28 @@ struct workqueue_struct *alloc_workqueue(const char *fmt, } EXPORT_SYMBOL_GPL(alloc_workqueue); +#ifdef CONFIG_LOCKDEP +__printf(1, 5) +struct workqueue_struct * +alloc_workqueue_lockdep_map(const char *fmt, unsigned int flags, + int max_active, struct lockdep_map *lockdep_map, ...) +{ + struct workqueue_struct *wq; + va_list args; + + va_start(args, lockdep_map); + wq = __alloc_workqueue(fmt, flags, max_active, args); + va_end(args); + if (!wq) + return NULL; + + wq->lockdep_map = lockdep_map; + + return wq; +} +EXPORT_SYMBOL_GPL(alloc_workqueue_lockdep_map); +#endif + static bool pwq_busy(struct pool_workqueue *pwq) { int i; -- cgit From 9b59a85a84dc37ca4f2c54df5e06aff4c1eae5d3 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Tue, 20 Aug 2024 12:38:08 -0700 Subject: workqueue: Don't call va_start / va_end twice Calling va_start / va_end multiple times is undefined and causes problems with certain compiler / platforms. Change alloc_ordered_workqueue_lockdep_map to a macro and updated __alloc_workqueue to take a va_list argument. Cc: Sergey Senozhatsky Cc: Tejun Heo Cc: Lai Jiangshan Signed-off-by: Matthew Brost Signed-off-by: Tejun Heo --- kernel/workqueue.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'kernel/workqueue.c') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 7468ba5e2417..7cc77adb18bb 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -5619,12 +5619,10 @@ static void wq_adjust_max_active(struct workqueue_struct *wq) } while (activated); } -__printf(1, 4) static struct workqueue_struct *__alloc_workqueue(const char *fmt, unsigned int flags, - int max_active, ...) + int max_active, va_list args) { - va_list args; struct workqueue_struct *wq; size_t wq_size; int name_len; @@ -5656,9 +5654,7 @@ static struct workqueue_struct *__alloc_workqueue(const char *fmt, goto err_free_wq; } - va_start(args, max_active); name_len = vsnprintf(wq->name, sizeof(wq->name), fmt, args); - va_end(args); if (name_len >= WQ_NAME_LEN) pr_warn_once("workqueue: name exceeds WQ_NAME_LEN. Truncating to: %s\n", -- cgit From 84c425bef3409d69ddb171c89664f66030bbf9d9 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky Date: Thu, 15 Aug 2024 16:02:58 +0900 Subject: workqueue: fix null-ptr-deref on __alloc_workqueue() error wq->lockdep_map is set only after __alloc_workqueue() successfully returns. However, on its error path __alloc_workqueue() may call destroy_workqueue() which expects wq->lockdep_map to be already set, which results in a null-ptr-deref in touch_wq_lockdep_map(). Add a simple NULL-check to touch_wq_lockdep_map(). Oops: general protection fault, probably for non-canonical address KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007] RIP: 0010:__lock_acquire+0x81/0x7800 [..] Call Trace: ? __die_body+0x66/0xb0 ? die_addr+0xb2/0xe0 ? exc_general_protection+0x300/0x470 ? asm_exc_general_protection+0x22/0x30 ? __lock_acquire+0x81/0x7800 ? mark_lock+0x94/0x330 ? __lock_acquire+0x12fd/0x7800 ? __lock_acquire+0x3439/0x7800 lock_acquire+0x14c/0x3e0 ? __flush_workqueue+0x167/0x13a0 ? __init_swait_queue_head+0xaf/0x150 ? __flush_workqueue+0x167/0x13a0 __flush_workqueue+0x17d/0x13a0 ? __flush_workqueue+0x167/0x13a0 ? lock_release+0x50f/0x830 ? drain_workqueue+0x94/0x300 drain_workqueue+0xe3/0x300 destroy_workqueue+0xac/0xc40 ? workqueue_sysfs_register+0x159/0x2f0 __alloc_workqueue+0x1506/0x1760 alloc_workqueue+0x61/0x150 ... Signed-off-by: Sergey Senozhatsky Signed-off-by: Tejun Heo --- kernel/workqueue.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'kernel/workqueue.c') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 7cc77adb18bb..03dc2c95d62e 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3871,6 +3871,9 @@ static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq, static void touch_wq_lockdep_map(struct workqueue_struct *wq) { #ifdef CONFIG_LOCKDEP + if (unlikely(!wq->lockdep_map)) + return; + if (wq->flags & WQ_BH) local_bh_disable(); -- cgit From b4722b8593b8815785bbadf87f13c88e89a0ebef Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Wed, 11 Sep 2024 13:07:28 +0800 Subject: kernel/workqueue.c: fix DEFINE_PER_CPU_SHARED_ALIGNED expansion Make tags always produces below annoying warnings: ctags: Warning: kernel/workqueue.c:470: null expansion of name pattern "\1" ctags: Warning: kernel/workqueue.c:474: null expansion of name pattern "\1" ctags: Warning: kernel/workqueue.c:478: null expansion of name pattern "\1" In commit 25528213fe9f ("tags: Fix DEFINE_PER_CPU expansions"), codes in places have been adjusted including cpu_worker_pools definition. I noticed in commit 4cb1ef64609f ("workqueue: Implement BH workqueues to eventually replace tasklets"), cpu_worker_pools definition was unfolded back. Not sure if it was intentionally done or ignored carelessly. Makes change to mute them specifically. Signed-off-by: Baoquan He Signed-off-by: Tejun Heo --- kernel/workqueue.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'kernel/workqueue.c') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 03dc2c95d62e..95a7372431f6 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -477,16 +477,13 @@ static bool wq_debug_force_rr_cpu = false; module_param_named(debug_force_rr_cpu, wq_debug_force_rr_cpu, bool, 0644); /* to raise softirq for the BH worker pools on other CPUs */ -static DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_work [NR_STD_WORKER_POOLS], - bh_pool_irq_works); +static DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_work [NR_STD_WORKER_POOLS], bh_pool_irq_works); /* the BH worker pools */ -static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], - bh_worker_pools); +static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], bh_worker_pools); /* the per-cpu worker pools */ -static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], - cpu_worker_pools); +static DEFINE_PER_CPU_SHARED_ALIGNED(struct worker_pool [NR_STD_WORKER_POOLS], cpu_worker_pools); static DEFINE_IDR(worker_pool_idr); /* PR: idr of all pools */ -- cgit From 73613840a8896f4f859eea489cb4a7a656939e70 Mon Sep 17 00:00:00 2001 From: Lai Jiangshan Date: Thu, 12 Sep 2024 11:23:29 +0800 Subject: workqueue: Clear worker->pool in the worker thread context Marc Hartmayer reported: [ 23.133876] Unable to handle kernel pointer dereference in virtual kernel address space [ 23.133950] Failing address: 0000000000000000 TEID: 0000000000000483 [ 23.133954] Fault in home space mode while using kernel ASCE. [ 23.133957] AS:000000001b8f0007 R3:0000000056cf4007 S:0000000056cf3800 P:000000000000003d [ 23.134207] Oops: 0004 ilc:2 [#1] SMP (snip) [ 23.134516] Call Trace: [ 23.134520] [<0000024e326caf28>] worker_thread+0x48/0x430 [ 23.134525] ([<0000024e326caf18>] worker_thread+0x38/0x430) [ 23.134528] [<0000024e326d3a3e>] kthread+0x11e/0x130 [ 23.134533] [<0000024e3264b0dc>] __ret_from_fork+0x3c/0x60 [ 23.134536] [<0000024e333fb37a>] ret_from_fork+0xa/0x38 [ 23.134552] Last Breaking-Event-Address: [ 23.134553] [<0000024e333f4c04>] mutex_unlock+0x24/0x30 [ 23.134562] Kernel panic - not syncing: Fatal exception: panic_on_oops With debuging and analysis, worker_thread() accesses to the nullified worker->pool when the newly created worker is destroyed before being waken-up, in which case worker_thread() can see the result detach_worker() reseting worker->pool to NULL at the begining. Move the code "worker->pool = NULL;" out from detach_worker() to fix the problem. worker->pool had been designed to be constant for regular workers and changeable for rescuer. To share attaching/detaching code for regular and rescuer workers and to avoid worker->pool being accessed inadvertently when the worker has been detached, worker->pool is reset to NULL when detached no matter the worker is rescuer or not. To maintain worker->pool being reset after detached, move the code "worker->pool = NULL;" in the worker thread context after detached. It is either be in the regular worker thread context after PF_WQ_WORKER is cleared or in rescuer worker thread context with wq_pool_attach_mutex held. So it is safe to do so. Cc: Marc Hartmayer Link: https://lore.kernel.org/lkml/87wmjj971b.fsf@linux.ibm.com/ Reported-by: Marc Hartmayer Fixes: f4b7b53c94af ("workqueue: Detach workers directly in idle_cull_fn()") Cc: stable@vger.kernel.org # v6.11+ Signed-off-by: Lai Jiangshan Signed-off-by: Tejun Heo --- kernel/workqueue.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'kernel/workqueue.c') diff --git a/kernel/workqueue.c b/kernel/workqueue.c index e7b005ff3750..6f2545037e57 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2709,7 +2709,6 @@ static void detach_worker(struct worker *worker) unbind_worker(worker); list_del(&worker->node); - worker->pool = NULL; } /** @@ -2729,6 +2728,7 @@ static void worker_detach_from_pool(struct worker *worker) mutex_lock(&wq_pool_attach_mutex); detach_worker(worker); + worker->pool = NULL; mutex_unlock(&wq_pool_attach_mutex); /* clear leftover flags without pool->lock after it is detached */ @@ -3349,7 +3349,11 @@ woke_up: if (unlikely(worker->flags & WORKER_DIE)) { raw_spin_unlock_irq(&pool->lock); set_pf_worker(false); - + /* + * The worker is dead and PF_WQ_WORKER is cleared, worker->pool + * shouldn't be accessed, reset it to NULL in case otherwise. + */ + worker->pool = NULL; ida_free(&pool->worker_ida, worker->id); return 0; } -- cgit