aboutsummaryrefslogtreecommitdiff
path: root/kernel/workqueue.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/workqueue.c')
-rw-r--r--kernel/workqueue.c130
1 files changed, 89 insertions, 41 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c74bf39ef764..a2dccfe1acec 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -21,7 +21,7 @@
* pools for workqueues which are not bound to any specific CPU - the
* number of these backing pools is dynamic.
*
- * Please read Documentation/workqueue.txt for details.
+ * Please read Documentation/core-api/workqueue.rst for details.
*/
#include <linux/export.h>
@@ -68,6 +68,7 @@ enum {
* attach_mutex to avoid changing binding state while
* worker_attach_to_pool() is in progress.
*/
+ POOL_MANAGER_ACTIVE = 1 << 0, /* being managed */
POOL_DISASSOCIATED = 1 << 2, /* cpu can't serve workers */
/* worker flags */
@@ -165,7 +166,6 @@ struct worker_pool {
/* L: hash of busy workers */
/* see manage_workers() for details on the two manager mutexes */
- struct mutex manager_arb; /* manager arbitration */
struct worker *manager; /* L: purely informational */
struct mutex attach_mutex; /* attach/detach exclusion */
struct list_head workers; /* A: attached workers */
@@ -299,6 +299,7 @@ static struct workqueue_attrs *wq_update_unbound_numa_attrs_buf;
static DEFINE_MUTEX(wq_pool_mutex); /* protects pools and workqueues list */
static DEFINE_SPINLOCK(wq_mayday_lock); /* protects wq->maydays list */
+static DECLARE_WAIT_QUEUE_HEAD(wq_manager_wait); /* wait for manager to go away */
static LIST_HEAD(workqueues); /* PR: list of all workqueues */
static bool workqueue_freezing; /* PL: have wqs started freezing? */
@@ -801,7 +802,7 @@ static bool need_to_create_worker(struct worker_pool *pool)
/* Do we have too many workers and should some go away? */
static bool too_many_workers(struct worker_pool *pool)
{
- bool managing = mutex_is_locked(&pool->manager_arb);
+ bool managing = pool->flags & POOL_MANAGER_ACTIVE;
int nr_idle = pool->nr_idle + managing; /* manager is considered idle */
int nr_busy = pool->nr_workers - nr_idle;
@@ -1980,24 +1981,17 @@ static bool manage_workers(struct worker *worker)
{
struct worker_pool *pool = worker->pool;
- /*
- * Anyone who successfully grabs manager_arb wins the arbitration
- * and becomes the manager. mutex_trylock() on pool->manager_arb
- * failure while holding pool->lock reliably indicates that someone
- * else is managing the pool and the worker which failed trylock
- * can proceed to executing work items. This means that anyone
- * grabbing manager_arb is responsible for actually performing
- * manager duties. If manager_arb is grabbed and released without
- * actual management, the pool may stall indefinitely.
- */
- if (!mutex_trylock(&pool->manager_arb))
+ if (pool->flags & POOL_MANAGER_ACTIVE)
return false;
+
+ pool->flags |= POOL_MANAGER_ACTIVE;
pool->manager = worker;
maybe_create_worker(pool);
pool->manager = NULL;
- mutex_unlock(&pool->manager_arb);
+ pool->flags &= ~POOL_MANAGER_ACTIVE;
+ wake_up(&wq_manager_wait);
return true;
}
@@ -2091,8 +2085,30 @@ __acquires(&pool->lock)
spin_unlock_irq(&pool->lock);
- lock_map_acquire_read(&pwq->wq->lockdep_map);
+ lock_map_acquire(&pwq->wq->lockdep_map);
lock_map_acquire(&lockdep_map);
+ /*
+ * Strictly speaking we should mark the invariant state without holding
+ * any locks, that is, before these two lock_map_acquire()'s.
+ *
+ * However, that would result in:
+ *
+ * A(W1)
+ * WFC(C)
+ * A(W1)
+ * C(C)
+ *
+ * Which would create W1->C->W1 dependencies, even though there is no
+ * actual deadlock possible. There are two solutions, using a
+ * read-recursive acquire on the work(queue) 'locks', but this will then
+ * hit the lockdep limitation on recursive locks, or simply discard
+ * these locks.
+ *
+ * AFAICT there is no possible deadlock scenario between the
+ * flush_work() and complete() primitives (except for single-threaded
+ * workqueues), so hiding them isn't a problem.
+ */
+ lockdep_invariant_state(true);
trace_workqueue_execute_start(work);
worker->current_func(work);
/*
@@ -2247,7 +2263,7 @@ sleep:
* event.
*/
worker_enter_idle(worker);
- __set_current_state(TASK_INTERRUPTIBLE);
+ __set_current_state(TASK_IDLE);
spin_unlock_irq(&pool->lock);
schedule();
goto woke_up;
@@ -2289,7 +2305,7 @@ static int rescuer_thread(void *__rescuer)
*/
rescuer->task->flags |= PF_WQ_WORKER;
repeat:
- set_current_state(TASK_INTERRUPTIBLE);
+ set_current_state(TASK_IDLE);
/*
* By the time the rescuer is requested to stop, the workqueue
@@ -2474,7 +2490,16 @@ static void insert_wq_barrier(struct pool_workqueue *pwq,
*/
INIT_WORK_ONSTACK(&barr->work, wq_barrier_func);
__set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
- init_completion(&barr->done);
+
+ /*
+ * Explicitly init the crosslock for wq_barrier::done, make its lock
+ * key a subkey of the corresponding work. As a result we won't
+ * build a dependency between wq_barrier::done and unrelated work.
+ */
+ lockdep_init_map_crosslock((struct lockdep_map *)&barr->done.map,
+ "(complete)wq_barr::done",
+ target->lockdep_map.key, 1);
+ __init_completion(&barr->done);
barr->task = current;
/*
@@ -2815,16 +2840,18 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)
spin_unlock_irq(&pool->lock);
/*
- * If @max_active is 1 or rescuer is in use, flushing another work
- * item on the same workqueue may lead to deadlock. Make sure the
- * flusher is not running on the same workqueue by verifying write
- * access.
+ * Force a lock recursion deadlock when using flush_work() inside a
+ * single-threaded or rescuer equipped workqueue.
+ *
+ * For single threaded workqueues the deadlock happens when the work
+ * is after the work issuing the flush_work(). For rescuer equipped
+ * workqueues the deadlock happens when the rescuer stalls, blocking
+ * forward progress.
*/
- if (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer)
+ if (pwq->wq->saved_max_active == 1 || pwq->wq->rescuer) {
lock_map_acquire(&pwq->wq->lockdep_map);
- else
- lock_map_acquire_read(&pwq->wq->lockdep_map);
- lock_map_release(&pwq->wq->lockdep_map);
+ lock_map_release(&pwq->wq->lockdep_map);
+ }
return true;
already_gone:
@@ -2864,11 +2891,11 @@ bool flush_work(struct work_struct *work)
EXPORT_SYMBOL_GPL(flush_work);
struct cwt_wait {
- wait_queue_t wait;
+ wait_queue_entry_t wait;
struct work_struct *work;
};
-static int cwt_wakefn(wait_queue_t *wait, unsigned mode, int sync, void *key)
+static int cwt_wakefn(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
{
struct cwt_wait *cwait = container_of(wait, struct cwt_wait, wait);
@@ -3215,7 +3242,6 @@ static int init_worker_pool(struct worker_pool *pool)
setup_timer(&pool->mayday_timer, pool_mayday_timeout,
(unsigned long)pool);
- mutex_init(&pool->manager_arb);
mutex_init(&pool->attach_mutex);
INIT_LIST_HEAD(&pool->workers);
@@ -3285,13 +3311,15 @@ static void put_unbound_pool(struct worker_pool *pool)
hash_del(&pool->hash_node);
/*
- * Become the manager and destroy all workers. Grabbing
- * manager_arb prevents @pool's workers from blocking on
- * attach_mutex.
+ * Become the manager and destroy all workers. This prevents
+ * @pool's workers from blocking on attach_mutex. We're the last
+ * manager and @pool gets freed with the flag set.
*/
- mutex_lock(&pool->manager_arb);
-
spin_lock_irq(&pool->lock);
+ wait_event_lock_irq(wq_manager_wait,
+ !(pool->flags & POOL_MANAGER_ACTIVE), pool->lock);
+ pool->flags |= POOL_MANAGER_ACTIVE;
+
while ((worker = first_idle_worker(pool)))
destroy_worker(worker);
WARN_ON(pool->nr_workers || pool->nr_idle);
@@ -3305,8 +3333,6 @@ static void put_unbound_pool(struct worker_pool *pool)
if (pool->detach_completion)
wait_for_completion(pool->detach_completion);
- mutex_unlock(&pool->manager_arb);
-
/* shut down the timers */
del_timer_sync(&pool->idle_timer);
del_timer_sync(&pool->mayday_timer);
@@ -3577,6 +3603,13 @@ static bool wq_calc_node_cpumask(const struct workqueue_attrs *attrs, int node,
/* yeap, return possible CPUs in @node that @attrs wants */
cpumask_and(cpumask, attrs->cpumask, wq_numa_possible_cpumask[node]);
+
+ if (cpumask_empty(cpumask)) {
+ pr_warn_once("WARNING: workqueue cpumask: online intersect > "
+ "possible intersect\n");
+ return false;
+ }
+
return !cpumask_equal(cpumask, attrs->cpumask);
use_dfl:
@@ -3744,8 +3777,12 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,
return -EINVAL;
/* creating multiple pwqs breaks ordering guarantee */
- if (WARN_ON((wq->flags & __WQ_ORDERED) && !list_empty(&wq->pwqs)))
- return -EINVAL;
+ if (!list_empty(&wq->pwqs)) {
+ if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
+ return -EINVAL;
+
+ wq->flags &= ~__WQ_ORDERED;
+ }
ctx = apply_wqattrs_prepare(wq, attrs);
if (!ctx)
@@ -3929,6 +3966,16 @@ struct workqueue_struct *__alloc_workqueue_key(const char *fmt,
struct workqueue_struct *wq;
struct pool_workqueue *pwq;
+ /*
+ * Unbound && max_active == 1 used to imply ordered, which is no
+ * longer the case on NUMA machines due to per-node pools. While
+ * alloc_ordered_workqueue() is the right way to create an ordered
+ * workqueue, keep the previous behavior to avoid subtle breakages
+ * on NUMA.
+ */
+ if ((flags & WQ_UNBOUND) && max_active == 1)
+ flags |= __WQ_ORDERED;
+
/* see the comment above the definition of WQ_POWER_EFFICIENT */
if ((flags & WQ_POWER_EFFICIENT) && wq_power_efficient)
flags |= WQ_UNBOUND;
@@ -4119,13 +4166,14 @@ void workqueue_set_max_active(struct workqueue_struct *wq, int max_active)
struct pool_workqueue *pwq;
/* disallow meddling with max_active for ordered workqueues */
- if (WARN_ON(wq->flags & __WQ_ORDERED))
+ if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
return;
max_active = wq_clamp_max_active(max_active, wq->flags, wq->name);
mutex_lock(&wq->mutex);
+ wq->flags &= ~__WQ_ORDERED;
wq->saved_max_active = max_active;
for_each_pwq(pwq, wq)
@@ -5253,7 +5301,7 @@ int workqueue_sysfs_register(struct workqueue_struct *wq)
* attributes breaks ordering guarantee. Disallow exposing ordered
* workqueues.
*/
- if (WARN_ON(wq->flags & __WQ_ORDERED))
+ if (WARN_ON(wq->flags & __WQ_ORDERED_EXPLICIT))
return -EINVAL;
wq->wq_dev = wq_dev = kzalloc(sizeof(*wq_dev), GFP_KERNEL);