aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/mm.h4
-rw-r--r--include/linux/mm_types.h3
-rw-r--r--kernel/sched/fair.c23
3 files changed, 25 insertions, 5 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 215327daffae..e05a878e186e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1692,8 +1692,8 @@ static inline void vma_set_access_pid_bit(struct vm_area_struct *vma)
unsigned int pid_bit;
pid_bit = current->pid % BITS_PER_LONG;
- if (vma->numab_state && !test_bit(pid_bit, &vma->numab_state->access_pids)) {
- __set_bit(pid_bit, &vma->numab_state->access_pids);
+ if (vma->numab_state && !test_bit(pid_bit, &vma->numab_state->access_pids[1])) {
+ __set_bit(pid_bit, &vma->numab_state->access_pids[1]);
}
}
#else /* !CONFIG_NUMA_BALANCING */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index f8cbd8efc7cb..092f842a854f 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -477,7 +477,8 @@ struct vma_lock {
struct vma_numab_state {
unsigned long next_scan;
- unsigned long access_pids;
+ unsigned long next_pid_reset;
+ unsigned long access_pids[2];
};
/*
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index ef27b5931480..a962d4b60cd7 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2930,6 +2930,7 @@ static void reset_ptenuma_scan(struct task_struct *p)
static bool vma_is_accessed(struct vm_area_struct *vma)
{
+ unsigned long pids;
/*
* Allow unconditional access first two times, so that all the (pages)
* of VMAs get prot_none fault introduced irrespective of accesses.
@@ -2939,10 +2940,12 @@ static bool vma_is_accessed(struct vm_area_struct *vma)
if (READ_ONCE(current->mm->numa_scan_seq) < 2)
return true;
- return test_bit(current->pid % BITS_PER_LONG,
- &vma->numab_state->access_pids);
+ pids = vma->numab_state->access_pids[0] | vma->numab_state->access_pids[1];
+ return test_bit(current->pid % BITS_PER_LONG, &pids);
}
+#define VMA_PID_RESET_PERIOD (4 * sysctl_numa_balancing_scan_delay)
+
/*
* The expensive part of numa migration is done from task_work context.
* Triggered from task_tick_numa().
@@ -3051,6 +3054,10 @@ static void task_numa_work(struct callback_head *work)
vma->numab_state->next_scan = now +
msecs_to_jiffies(sysctl_numa_balancing_scan_delay);
+
+ /* Reset happens after 4 times scan delay of scan start */
+ vma->numab_state->next_pid_reset = vma->numab_state->next_scan +
+ msecs_to_jiffies(VMA_PID_RESET_PERIOD);
}
/*
@@ -3065,6 +3072,18 @@ static void task_numa_work(struct callback_head *work)
if (!vma_is_accessed(vma))
continue;
+ /*
+ * RESET access PIDs regularly for old VMAs. Resetting after checking
+ * vma for recent access to avoid clearing PID info before access..
+ */
+ if (mm->numa_scan_seq &&
+ time_after(jiffies, vma->numab_state->next_pid_reset)) {
+ vma->numab_state->next_pid_reset = vma->numab_state->next_pid_reset +
+ msecs_to_jiffies(VMA_PID_RESET_PERIOD);
+ vma->numab_state->access_pids[0] = READ_ONCE(vma->numab_state->access_pids[1]);
+ vma->numab_state->access_pids[1] = 0;
+ }
+
do {
start = max(start, vma->vm_start);
end = ALIGN(start + (pages << PAGE_SHIFT), HPAGE_SIZE);