aboutsummaryrefslogtreecommitdiff
path: root/kernel/sched/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched/core.c')
-rw-r--r--kernel/sched/core.c302
1 files changed, 205 insertions, 97 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 719e0ed1e976..95e40895a519 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -832,7 +832,7 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
rq_lock(rq, &rf);
update_rq_clock(rq);
- rq->curr->sched_class->task_tick(rq, rq->curr, 1);
+ rq->donor->sched_class->task_tick(rq, rq->curr, 1);
rq_unlock(rq, &rf);
return HRTIMER_NORESTART;
@@ -941,10 +941,9 @@ static inline void hrtick_rq_init(struct rq *rq)
* this avoids any races wrt polling state changes and thereby avoids
* spurious IPIs.
*/
-static inline bool set_nr_and_not_polling(struct task_struct *p)
+static inline bool set_nr_and_not_polling(struct thread_info *ti, int tif)
{
- struct thread_info *ti = task_thread_info(p);
- return !(fetch_or(&ti->flags, _TIF_NEED_RESCHED) & _TIF_POLLING_NRFLAG);
+ return !(fetch_or(&ti->flags, 1 << tif) & _TIF_POLLING_NRFLAG);
}
/*
@@ -969,9 +968,9 @@ static bool set_nr_if_polling(struct task_struct *p)
}
#else
-static inline bool set_nr_and_not_polling(struct task_struct *p)
+static inline bool set_nr_and_not_polling(struct thread_info *ti, int tif)
{
- set_tsk_need_resched(p);
+ set_ti_thread_flag(ti, tif);
return true;
}
@@ -1076,28 +1075,70 @@ void wake_up_q(struct wake_q_head *head)
* might also involve a cross-CPU call to trigger the scheduler on
* the target CPU.
*/
-void resched_curr(struct rq *rq)
+static void __resched_curr(struct rq *rq, int tif)
{
struct task_struct *curr = rq->curr;
+ struct thread_info *cti = task_thread_info(curr);
int cpu;
lockdep_assert_rq_held(rq);
- if (test_tsk_need_resched(curr))
+ /*
+ * Always immediately preempt the idle task; no point in delaying doing
+ * actual work.
+ */
+ if (is_idle_task(curr) && tif == TIF_NEED_RESCHED_LAZY)
+ tif = TIF_NEED_RESCHED;
+
+ if (cti->flags & ((1 << tif) | _TIF_NEED_RESCHED))
return;
cpu = cpu_of(rq);
if (cpu == smp_processor_id()) {
- set_tsk_need_resched(curr);
- set_preempt_need_resched();
+ set_ti_thread_flag(cti, tif);
+ if (tif == TIF_NEED_RESCHED)
+ set_preempt_need_resched();
return;
}
- if (set_nr_and_not_polling(curr))
- smp_send_reschedule(cpu);
- else
+ if (set_nr_and_not_polling(cti, tif)) {
+ if (tif == TIF_NEED_RESCHED)
+ smp_send_reschedule(cpu);
+ } else {
trace_sched_wake_idle_without_ipi(cpu);
+ }
+}
+
+void resched_curr(struct rq *rq)
+{
+ __resched_curr(rq, TIF_NEED_RESCHED);
+}
+
+#ifdef CONFIG_PREEMPT_DYNAMIC
+static DEFINE_STATIC_KEY_FALSE(sk_dynamic_preempt_lazy);
+static __always_inline bool dynamic_preempt_lazy(void)
+{
+ return static_branch_unlikely(&sk_dynamic_preempt_lazy);
+}
+#else
+static __always_inline bool dynamic_preempt_lazy(void)
+{
+ return IS_ENABLED(CONFIG_PREEMPT_LAZY);
+}
+#endif
+
+static __always_inline int get_lazy_tif_bit(void)
+{
+ if (dynamic_preempt_lazy())
+ return TIF_NEED_RESCHED_LAZY;
+
+ return TIF_NEED_RESCHED;
+}
+
+void resched_curr_lazy(struct rq *rq)
+{
+ __resched_curr(rq, get_lazy_tif_bit());
}
void resched_cpu(int cpu)
@@ -1192,7 +1233,7 @@ static void wake_up_idle_cpu(int cpu)
* and testing of the above solutions didn't appear to report
* much benefits.
*/
- if (set_nr_and_not_polling(rq->idle))
+ if (set_nr_and_not_polling(task_thread_info(rq->idle), TIF_NEED_RESCHED))
smp_send_reschedule(cpu);
else
trace_sched_wake_idle_without_ipi(cpu);
@@ -1399,7 +1440,7 @@ void set_load_weight(struct task_struct *p, bool update_load)
* requests are serialized using a mutex to reduce the risk of conflicting
* updates or API abuses.
*/
-static DEFINE_MUTEX(uclamp_mutex);
+static __maybe_unused DEFINE_MUTEX(uclamp_mutex);
/* Max allowed minimum utilization */
static unsigned int __maybe_unused sysctl_sched_uclamp_util_min = SCHED_CAPACITY_SCALE;
@@ -2024,10 +2065,10 @@ void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
*/
uclamp_rq_inc(rq, p);
- if (!(flags & ENQUEUE_RESTORE)) {
+ psi_enqueue(p, flags);
+
+ if (!(flags & ENQUEUE_RESTORE))
sched_info_enqueue(rq, p);
- psi_enqueue(p, flags & ENQUEUE_MIGRATED);
- }
if (sched_core_enabled(rq))
sched_core_enqueue(rq, p);
@@ -2044,10 +2085,10 @@ inline bool dequeue_task(struct rq *rq, struct task_struct *p, int flags)
if (!(flags & DEQUEUE_NOCLOCK))
update_rq_clock(rq);
- if (!(flags & DEQUEUE_SAVE)) {
+ if (!(flags & DEQUEUE_SAVE))
sched_info_dequeue(rq, p);
- psi_dequeue(p, !(flags & DEQUEUE_SLEEP));
- }
+
+ psi_dequeue(p, flags);
/*
* Must be before ->dequeue_task() because ->dequeue_task() can 'fail'
@@ -2135,16 +2176,18 @@ void check_class_changed(struct rq *rq, struct task_struct *p,
void wakeup_preempt(struct rq *rq, struct task_struct *p, int flags)
{
- if (p->sched_class == rq->curr->sched_class)
- rq->curr->sched_class->wakeup_preempt(rq, p, flags);
- else if (sched_class_above(p->sched_class, rq->curr->sched_class))
+ struct task_struct *donor = rq->donor;
+
+ if (p->sched_class == donor->sched_class)
+ donor->sched_class->wakeup_preempt(rq, p, flags);
+ else if (sched_class_above(p->sched_class, donor->sched_class))
resched_curr(rq);
/*
* A queue event has occurred, and we're going to schedule. In
* this case, we can save a useless back to back clock update.
*/
- if (task_on_rq_queued(rq->curr) && test_tsk_need_resched(rq->curr))
+ if (task_on_rq_queued(donor) && test_tsk_need_resched(rq->curr))
rq_clock_skip_update(rq);
}
@@ -2620,9 +2663,7 @@ int push_cpu_stop(void *arg)
// XXX validate p is still the highest prio task
if (task_rq(p) == rq) {
- deactivate_task(rq, p, 0);
- set_task_cpu(p, lowest_rq->cpu);
- activate_task(lowest_rq, p, 0);
+ move_queued_task_locked(rq, lowest_rq, p);
resched_curr(lowest_rq);
}
@@ -2682,7 +2723,7 @@ __do_set_cpus_allowed(struct task_struct *p, struct affinity_context *ctx)
lockdep_assert_held(&p->pi_lock);
queued = task_on_rq_queued(p);
- running = task_current(rq, p);
+ running = task_current_donor(rq, p);
if (queued) {
/*
@@ -2696,6 +2737,7 @@ __do_set_cpus_allowed(struct task_struct *p, struct affinity_context *ctx)
put_prev_task(rq, p);
p->sched_class->set_cpus_allowed(p, ctx);
+ mm_set_cpus_allowed(p->mm, ctx->new_mask);
if (queued)
enqueue_task(rq, p, ENQUEUE_RESTORE | ENQUEUE_NOCLOCK);
@@ -3308,9 +3350,7 @@ static void __migrate_swap_task(struct task_struct *p, int cpu)
rq_pin_lock(src_rq, &srf);
rq_pin_lock(dst_rq, &drf);
- deactivate_task(src_rq, p, 0);
- set_task_cpu(p, cpu);
- activate_task(dst_rq, p, 0);
+ move_queued_task_locked(src_rq, dst_rq, p);
wakeup_preempt(dst_rq, p, 0);
rq_unpin_lock(dst_rq, &drf);
@@ -4424,7 +4464,8 @@ int wake_up_state(struct task_struct *p, unsigned int state)
* Perform scheduler related setup for a newly forked process p.
* p is forked by current.
*
- * __sched_fork() is basic setup used by init_idle() too:
+ * __sched_fork() is basic setup which is also used by sched_init() to
+ * initialize the boot CPU's idle task.
*/
static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
{
@@ -5517,7 +5558,7 @@ unsigned long long task_sched_runtime(struct task_struct *p)
* project cycles that may never be accounted to this
* thread, breaking clock_gettime().
*/
- if (task_current(rq, p) && task_on_rq_queued(p)) {
+ if (task_current_donor(rq, p) && task_on_rq_queued(p)) {
prefetch_curr_exec_start(p);
update_rq_clock(rq);
p->sched_class->update_curr(rq);
@@ -5585,7 +5626,8 @@ void sched_tick(void)
{
int cpu = smp_processor_id();
struct rq *rq = cpu_rq(cpu);
- struct task_struct *curr;
+ /* accounting goes to the donor task */
+ struct task_struct *donor;
struct rq_flags rf;
unsigned long hw_pressure;
u64 resched_latency;
@@ -5596,19 +5638,23 @@ void sched_tick(void)
sched_clock_tick();
rq_lock(rq, &rf);
+ donor = rq->donor;
- curr = rq->curr;
- psi_account_irqtime(rq, curr, NULL);
+ psi_account_irqtime(rq, donor, NULL);
update_rq_clock(rq);
hw_pressure = arch_scale_hw_pressure(cpu_of(rq));
update_hw_load_avg(rq_clock_task(rq), rq, hw_pressure);
- curr->sched_class->task_tick(rq, curr, 0);
+
+ if (dynamic_preempt_lazy() && tif_test_bit(TIF_NEED_RESCHED_LAZY))
+ resched_curr(rq);
+
+ donor->sched_class->task_tick(rq, donor, 0);
if (sched_feat(LATENCY_WARN))
resched_latency = cpu_resched_latency(rq);
calc_global_load_tick(rq);
sched_core_tick(rq);
- task_tick_mm_cid(rq, curr);
+ task_tick_mm_cid(rq, donor);
scx_tick(rq);
rq_unlock(rq, &rf);
@@ -5618,8 +5664,8 @@ void sched_tick(void)
perf_event_task_tick();
- if (curr->flags & PF_WQ_WORKER)
- wq_worker_tick(curr);
+ if (donor->flags & PF_WQ_WORKER)
+ wq_worker_tick(donor);
#ifdef CONFIG_SMP
if (!scx_switched_all()) {
@@ -5686,6 +5732,12 @@ static void sched_tick_remote(struct work_struct *work)
struct task_struct *curr = rq->curr;
if (cpu_online(cpu)) {
+ /*
+ * Since this is a remote tick for full dynticks mode,
+ * we are always sure that there is no proxy (only a
+ * single task is running).
+ */
+ SCHED_WARN_ON(rq->curr != rq->donor);
update_rq_clock(rq);
if (!is_idle_task(curr)) {
@@ -5920,12 +5972,15 @@ static void prev_balance(struct rq *rq, struct task_struct *prev,
#ifdef CONFIG_SCHED_CLASS_EXT
/*
- * SCX requires a balance() call before every pick_next_task() including
- * when waking up from SCHED_IDLE. If @start_class is below SCX, start
- * from SCX instead.
+ * SCX requires a balance() call before every pick_task() including when
+ * waking up from SCHED_IDLE. If @start_class is below SCX, start from
+ * SCX instead. Also, set a flag to detect missing balance() call.
*/
- if (scx_enabled() && sched_class_above(&ext_sched_class, start_class))
- start_class = &ext_sched_class;
+ if (scx_enabled()) {
+ rq->scx.flags |= SCX_RQ_BAL_PENDING;
+ if (sched_class_above(&ext_sched_class, start_class))
+ start_class = &ext_sched_class;
+ }
#endif
/*
@@ -6306,10 +6361,7 @@ static bool try_steal_cookie(int this, int that)
if (sched_task_is_throttled(p, this))
goto next;
- deactivate_task(src, p, 0);
- set_task_cpu(p, this);
- activate_task(dst, p, 0);
-
+ move_queued_task_locked(src, dst, p);
resched_curr(dst);
success = true;
@@ -6504,6 +6556,45 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf)
#define SM_RTLOCK_WAIT 2
/*
+ * Helper function for __schedule()
+ *
+ * If a task does not have signals pending, deactivate it
+ * Otherwise marks the task's __state as RUNNING
+ */
+static bool try_to_block_task(struct rq *rq, struct task_struct *p,
+ unsigned long task_state)
+{
+ int flags = DEQUEUE_NOCLOCK;
+
+ if (signal_pending_state(task_state, p)) {
+ WRITE_ONCE(p->__state, TASK_RUNNING);
+ return false;
+ }
+
+ p->sched_contributes_to_load =
+ (task_state & TASK_UNINTERRUPTIBLE) &&
+ !(task_state & TASK_NOLOAD) &&
+ !(task_state & TASK_FROZEN);
+
+ if (unlikely(is_special_task_state(task_state)))
+ flags |= DEQUEUE_SPECIAL;
+
+ /*
+ * __schedule() ttwu()
+ * prev_state = prev->state; if (p->on_rq && ...)
+ * if (prev_state) goto out;
+ * p->on_rq = 0; smp_acquire__after_ctrl_dep();
+ * p->state = TASK_WAKING
+ *
+ * Where __schedule() and ttwu() have matching control dependencies.
+ *
+ * After this, schedule() must not care about p->state any more.
+ */
+ block_task(rq, p, flags);
+ return true;
+}
+
+/*
* __schedule() is the main scheduler function.
*
* The main means of driving the scheduler and thus entering this function are:
@@ -6611,37 +6702,12 @@ static void __sched notrace __schedule(int sched_mode)
goto picked;
}
} else if (!preempt && prev_state) {
- if (signal_pending_state(prev_state, prev)) {
- WRITE_ONCE(prev->__state, TASK_RUNNING);
- } else {
- int flags = DEQUEUE_NOCLOCK;
-
- prev->sched_contributes_to_load =
- (prev_state & TASK_UNINTERRUPTIBLE) &&
- !(prev_state & TASK_NOLOAD) &&
- !(prev_state & TASK_FROZEN);
-
- if (unlikely(is_special_task_state(prev_state)))
- flags |= DEQUEUE_SPECIAL;
-
- /*
- * __schedule() ttwu()
- * prev_state = prev->state; if (p->on_rq && ...)
- * if (prev_state) goto out;
- * p->on_rq = 0; smp_acquire__after_ctrl_dep();
- * p->state = TASK_WAKING
- *
- * Where __schedule() and ttwu() have matching control dependencies.
- *
- * After this, schedule() must not care about p->state any more.
- */
- block_task(rq, prev, flags);
- block = true;
- }
+ block = try_to_block_task(rq, prev, prev_state);
switch_count = &prev->nvcsw;
}
next = pick_next_task(rq, prev, &rf);
+ rq_set_donor(rq, next);
picked:
clear_tsk_need_resched(prev);
clear_preempt_need_resched();
@@ -7148,7 +7214,7 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
queued = task_on_rq_queued(p);
- running = task_current(rq, p);
+ running = task_current_donor(rq, p);
if (queued)
dequeue_task(rq, p, queue_flag);
if (running)
@@ -7348,6 +7414,7 @@ EXPORT_SYMBOL(__cond_resched_rwlock_write);
* preempt_schedule <- NOP
* preempt_schedule_notrace <- NOP
* irqentry_exit_cond_resched <- NOP
+ * dynamic_preempt_lazy <- false
*
* VOLUNTARY:
* cond_resched <- __cond_resched
@@ -7355,6 +7422,7 @@ EXPORT_SYMBOL(__cond_resched_rwlock_write);
* preempt_schedule <- NOP
* preempt_schedule_notrace <- NOP
* irqentry_exit_cond_resched <- NOP
+ * dynamic_preempt_lazy <- false
*
* FULL:
* cond_resched <- RET0
@@ -7362,6 +7430,15 @@ EXPORT_SYMBOL(__cond_resched_rwlock_write);
* preempt_schedule <- preempt_schedule
* preempt_schedule_notrace <- preempt_schedule_notrace
* irqentry_exit_cond_resched <- irqentry_exit_cond_resched
+ * dynamic_preempt_lazy <- false
+ *
+ * LAZY:
+ * cond_resched <- RET0
+ * might_resched <- RET0
+ * preempt_schedule <- preempt_schedule
+ * preempt_schedule_notrace <- preempt_schedule_notrace
+ * irqentry_exit_cond_resched <- irqentry_exit_cond_resched
+ * dynamic_preempt_lazy <- true
*/
enum {
@@ -7369,30 +7446,41 @@ enum {
preempt_dynamic_none,
preempt_dynamic_voluntary,
preempt_dynamic_full,
+ preempt_dynamic_lazy,
};
int preempt_dynamic_mode = preempt_dynamic_undefined;
int sched_dynamic_mode(const char *str)
{
+#ifndef CONFIG_PREEMPT_RT
if (!strcmp(str, "none"))
return preempt_dynamic_none;
if (!strcmp(str, "voluntary"))
return preempt_dynamic_voluntary;
+#endif
if (!strcmp(str, "full"))
return preempt_dynamic_full;
+#ifdef CONFIG_ARCH_HAS_PREEMPT_LAZY
+ if (!strcmp(str, "lazy"))
+ return preempt_dynamic_lazy;
+#endif
+
return -EINVAL;
}
+#define preempt_dynamic_key_enable(f) static_key_enable(&sk_dynamic_##f.key)
+#define preempt_dynamic_key_disable(f) static_key_disable(&sk_dynamic_##f.key)
+
#if defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL)
#define preempt_dynamic_enable(f) static_call_update(f, f##_dynamic_enabled)
#define preempt_dynamic_disable(f) static_call_update(f, f##_dynamic_disabled)
#elif defined(CONFIG_HAVE_PREEMPT_DYNAMIC_KEY)
-#define preempt_dynamic_enable(f) static_key_enable(&sk_dynamic_##f.key)
-#define preempt_dynamic_disable(f) static_key_disable(&sk_dynamic_##f.key)
+#define preempt_dynamic_enable(f) preempt_dynamic_key_enable(f)
+#define preempt_dynamic_disable(f) preempt_dynamic_key_disable(f)
#else
#error "Unsupported PREEMPT_DYNAMIC mechanism"
#endif
@@ -7412,6 +7500,7 @@ static void __sched_dynamic_update(int mode)
preempt_dynamic_enable(preempt_schedule);
preempt_dynamic_enable(preempt_schedule_notrace);
preempt_dynamic_enable(irqentry_exit_cond_resched);
+ preempt_dynamic_key_disable(preempt_lazy);
switch (mode) {
case preempt_dynamic_none:
@@ -7421,6 +7510,7 @@ static void __sched_dynamic_update(int mode)
preempt_dynamic_disable(preempt_schedule);
preempt_dynamic_disable(preempt_schedule_notrace);
preempt_dynamic_disable(irqentry_exit_cond_resched);
+ preempt_dynamic_key_disable(preempt_lazy);
if (mode != preempt_dynamic_mode)
pr_info("Dynamic Preempt: none\n");
break;
@@ -7432,6 +7522,7 @@ static void __sched_dynamic_update(int mode)
preempt_dynamic_disable(preempt_schedule);
preempt_dynamic_disable(preempt_schedule_notrace);
preempt_dynamic_disable(irqentry_exit_cond_resched);
+ preempt_dynamic_key_disable(preempt_lazy);
if (mode != preempt_dynamic_mode)
pr_info("Dynamic Preempt: voluntary\n");
break;
@@ -7443,9 +7534,22 @@ static void __sched_dynamic_update(int mode)
preempt_dynamic_enable(preempt_schedule);
preempt_dynamic_enable(preempt_schedule_notrace);
preempt_dynamic_enable(irqentry_exit_cond_resched);
+ preempt_dynamic_key_disable(preempt_lazy);
if (mode != preempt_dynamic_mode)
pr_info("Dynamic Preempt: full\n");
break;
+
+ case preempt_dynamic_lazy:
+ if (!klp_override)
+ preempt_dynamic_disable(cond_resched);
+ preempt_dynamic_disable(might_resched);
+ preempt_dynamic_enable(preempt_schedule);
+ preempt_dynamic_enable(preempt_schedule_notrace);
+ preempt_dynamic_enable(irqentry_exit_cond_resched);
+ preempt_dynamic_key_enable(preempt_lazy);
+ if (mode != preempt_dynamic_mode)
+ pr_info("Dynamic Preempt: lazy\n");
+ break;
}
preempt_dynamic_mode = mode;
@@ -7508,6 +7612,8 @@ static void __init preempt_dynamic_init(void)
sched_dynamic_update(preempt_dynamic_none);
} else if (IS_ENABLED(CONFIG_PREEMPT_VOLUNTARY)) {
sched_dynamic_update(preempt_dynamic_voluntary);
+ } else if (IS_ENABLED(CONFIG_PREEMPT_LAZY)) {
+ sched_dynamic_update(preempt_dynamic_lazy);
} else {
/* Default static call setting, nothing to do */
WARN_ON_ONCE(!IS_ENABLED(CONFIG_PREEMPT));
@@ -7528,6 +7634,7 @@ static void __init preempt_dynamic_init(void)
PREEMPT_MODEL_ACCESSOR(none);
PREEMPT_MODEL_ACCESSOR(voluntary);
PREEMPT_MODEL_ACCESSOR(full);
+PREEMPT_MODEL_ACCESSOR(lazy);
#else /* !CONFIG_PREEMPT_DYNAMIC: */
@@ -7680,8 +7787,6 @@ void __init init_idle(struct task_struct *idle, int cpu)
struct rq *rq = cpu_rq(cpu);
unsigned long flags;
- __sched_fork(0, idle);
-
raw_spin_lock_irqsave(&idle->pi_lock, flags);
raw_spin_rq_lock(rq);
@@ -7696,10 +7801,8 @@ void __init init_idle(struct task_struct *idle, int cpu)
#ifdef CONFIG_SMP
/*
- * It's possible that init_idle() gets called multiple times on a task,
- * in that case do_set_cpus_allowed() will not do the right thing.
- *
- * And since this is boot we can forgo the serialization.
+ * No validation and serialization required at boot time and for
+ * setting up the idle tasks of not yet online CPUs.
*/
set_cpus_allowed_common(idle, &ac);
#endif
@@ -7718,6 +7821,7 @@ void __init init_idle(struct task_struct *idle, int cpu)
rcu_read_unlock();
rq->idle = idle;
+ rq_set_donor(rq, idle);
rcu_assign_pointer(rq->curr, idle);
idle->on_rq = TASK_ON_RQ_QUEUED;
#ifdef CONFIG_SMP
@@ -7807,7 +7911,7 @@ void sched_setnuma(struct task_struct *p, int nid)
rq = task_rq_lock(p, &rf);
queued = task_on_rq_queued(p);
- running = task_current(rq, p);
+ running = task_current_donor(rq, p);
if (queued)
dequeue_task(rq, p, DEQUEUE_SAVE);
@@ -8543,6 +8647,7 @@ void __init sched_init(void)
* but because we are the idle thread, we just pick up running again
* when this runqueue becomes "idle".
*/
+ __sched_fork(0, current);
init_idle(current, smp_processor_id());
calc_load_update = jiffies + LOAD_FREQ;
@@ -8957,7 +9062,7 @@ void sched_move_task(struct task_struct *tsk)
update_rq_clock(rq);
- running = task_current(rq, tsk);
+ running = task_current_donor(rq, tsk);
queued = task_on_rq_queued(tsk);
if (queued)
@@ -10250,6 +10355,7 @@ int __sched_mm_cid_migrate_from_try_steal_cid(struct rq *src_rq,
*/
if (!try_cmpxchg(&src_pcpu_cid->cid, &lazy_cid, MM_CID_UNSET))
return -1;
+ WRITE_ONCE(src_pcpu_cid->recent_cid, MM_CID_UNSET);
return src_cid;
}
@@ -10262,7 +10368,8 @@ void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t)
{
struct mm_cid *src_pcpu_cid, *dst_pcpu_cid;
struct mm_struct *mm = t->mm;
- int src_cid, dst_cid, src_cpu;
+ int src_cid, src_cpu;
+ bool dst_cid_is_set;
struct rq *src_rq;
lockdep_assert_rq_held(dst_rq);
@@ -10279,9 +10386,9 @@ void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t)
* allocation closest to 0 in cases where few threads migrate around
* many CPUs.
*
- * If destination cid is already set, we may have to just clear
- * the src cid to ensure compactness in frequent migrations
- * scenarios.
+ * If destination cid or recent cid is already set, we may have
+ * to just clear the src cid to ensure compactness in frequent
+ * migrations scenarios.
*
* It is not useful to clear the src cid when the number of threads is
* greater or equal to the number of allowed CPUs, because user-space
@@ -10289,9 +10396,9 @@ void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t)
* allowed CPUs.
*/
dst_pcpu_cid = per_cpu_ptr(mm->pcpu_cid, cpu_of(dst_rq));
- dst_cid = READ_ONCE(dst_pcpu_cid->cid);
- if (!mm_cid_is_unset(dst_cid) &&
- atomic_read(&mm->mm_users) >= t->nr_cpus_allowed)
+ dst_cid_is_set = !mm_cid_is_unset(READ_ONCE(dst_pcpu_cid->cid)) ||
+ !mm_cid_is_unset(READ_ONCE(dst_pcpu_cid->recent_cid));
+ if (dst_cid_is_set && atomic_read(&mm->mm_users) >= READ_ONCE(mm->nr_cpus_allowed))
return;
src_pcpu_cid = per_cpu_ptr(mm->pcpu_cid, src_cpu);
src_rq = cpu_rq(src_cpu);
@@ -10302,13 +10409,14 @@ void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t)
src_cid);
if (src_cid == -1)
return;
- if (!mm_cid_is_unset(dst_cid)) {
+ if (dst_cid_is_set) {
__mm_cid_put(mm, src_cid);
return;
}
/* Move src_cid to dst cpu. */
mm_cid_snapshot_time(dst_rq, mm);
WRITE_ONCE(dst_pcpu_cid->cid, src_cid);
+ WRITE_ONCE(dst_pcpu_cid->recent_cid, src_cid);
}
static void sched_mm_cid_remote_clear(struct mm_struct *mm, struct mm_cid *pcpu_cid,
@@ -10547,7 +10655,7 @@ void sched_mm_cid_after_execve(struct task_struct *t)
* Matches barrier in sched_mm_cid_remote_clear_old().
*/
smp_mb();
- t->last_mm_cid = t->mm_cid = mm_cid_get(rq, mm);
+ t->last_mm_cid = t->mm_cid = mm_cid_get(rq, t, mm);
}
rseq_set_notify_resume(t);
}