diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap.c | 2 | ||||
-rw-r--r-- | mm/madvise.c | 6 | ||||
-rw-r--r-- | mm/memory.c | 26 | ||||
-rw-r--r-- | mm/mmu_notifier.c | 14 | ||||
-rw-r--r-- | mm/page_alloc.c | 9 | ||||
-rw-r--r-- | mm/rmap.c | 77 |
6 files changed, 83 insertions, 51 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 0b41c8cbeabc..65b4b6e7f7bd 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1041,7 +1041,7 @@ void add_page_wait_queue(struct page *page, wait_queue_entry_t *waiter) unsigned long flags; spin_lock_irqsave(&q->lock, flags); - __add_wait_queue(q, waiter); + __add_wait_queue_entry_tail(q, waiter); SetPageWaiters(page); spin_unlock_irqrestore(&q->lock, flags); } diff --git a/mm/madvise.c b/mm/madvise.c index 23ed525bc2bc..4d7d1e5ddba9 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -613,6 +613,7 @@ static int madvise_inject_error(int behavior, unsigned long start, unsigned long end) { struct page *page; + struct zone *zone; if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -646,6 +647,11 @@ static int madvise_inject_error(int behavior, if (ret) return ret; } + + /* Ensure that all poisoned pages are removed from per-cpu lists */ + for_each_populated_zone(zone) + drain_all_pages(zone); + return 0; } #endif diff --git a/mm/memory.c b/mm/memory.c index fe2fba27ded2..56e48e4593cb 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4008,7 +4008,8 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) #endif /* __PAGETABLE_PMD_FOLDED */ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address, - pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp) + unsigned long *start, unsigned long *end, + pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp) { pgd_t *pgd; p4d_t *p4d; @@ -4035,17 +4036,29 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address, if (!pmdpp) goto out; + if (start && end) { + *start = address & PMD_MASK; + *end = *start + PMD_SIZE; + mmu_notifier_invalidate_range_start(mm, *start, *end); + } *ptlp = pmd_lock(mm, pmd); if (pmd_huge(*pmd)) { *pmdpp = pmd; return 0; } spin_unlock(*ptlp); + if (start && end) + mmu_notifier_invalidate_range_end(mm, *start, *end); } if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) goto out; + if (start && end) { + *start = address & PAGE_MASK; + *end = *start + PAGE_SIZE; + mmu_notifier_invalidate_range_start(mm, *start, *end); + } ptep = pte_offset_map_lock(mm, pmd, address, ptlp); if (!pte_present(*ptep)) goto unlock; @@ -4053,6 +4066,8 @@ static int __follow_pte_pmd(struct mm_struct *mm, unsigned long address, return 0; unlock: pte_unmap_unlock(ptep, *ptlp); + if (start && end) + mmu_notifier_invalidate_range_end(mm, *start, *end); out: return -EINVAL; } @@ -4064,20 +4079,21 @@ static inline int follow_pte(struct mm_struct *mm, unsigned long address, /* (void) is needed to make gcc happy */ (void) __cond_lock(*ptlp, - !(res = __follow_pte_pmd(mm, address, ptepp, NULL, - ptlp))); + !(res = __follow_pte_pmd(mm, address, NULL, NULL, + ptepp, NULL, ptlp))); return res; } int follow_pte_pmd(struct mm_struct *mm, unsigned long address, + unsigned long *start, unsigned long *end, pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp) { int res; /* (void) is needed to make gcc happy */ (void) __cond_lock(*ptlp, - !(res = __follow_pte_pmd(mm, address, ptepp, pmdpp, - ptlp))); + !(res = __follow_pte_pmd(mm, address, start, end, + ptepp, pmdpp, ptlp))); return res; } EXPORT_SYMBOL(follow_pte_pmd); diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index 54ca54562928..314285284e6e 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c @@ -174,20 +174,6 @@ void __mmu_notifier_change_pte(struct mm_struct *mm, unsigned long address, srcu_read_unlock(&srcu, id); } -void __mmu_notifier_invalidate_page(struct mm_struct *mm, - unsigned long address) -{ - struct mmu_notifier *mn; - int id; - - id = srcu_read_lock(&srcu); - hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) { - if (mn->ops->invalidate_page) - mn->ops->invalidate_page(mn, mm, address); - } - srcu_read_unlock(&srcu, id); -} - void __mmu_notifier_invalidate_range_start(struct mm_struct *mm, unsigned long start, unsigned long end) { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 7a58eb5757e3..1423da8dd16f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3291,10 +3291,13 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order, /* * Go through the zonelist yet one more time, keep very high watermark * here, this is only to catch a parallel oom killing, we must fail if - * we're still under heavy pressure. + * we're still under heavy pressure. But make sure that this reclaim + * attempt shall not depend on __GFP_DIRECT_RECLAIM && !__GFP_NORETRY + * allocation which will never fail due to oom_lock already held. */ - page = get_page_from_freelist(gfp_mask | __GFP_HARDWALL, order, - ALLOC_WMARK_HIGH|ALLOC_CPUSET, ac); + page = get_page_from_freelist((gfp_mask | __GFP_HARDWALL) & + ~__GFP_DIRECT_RECLAIM, order, + ALLOC_WMARK_HIGH|ALLOC_CPUSET, ac); if (page) goto out; diff --git a/mm/rmap.c b/mm/rmap.c index c1286d47aa1f..c570f82e6827 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -887,11 +887,21 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, .address = address, .flags = PVMW_SYNC, }; + unsigned long start = address, end; int *cleaned = arg; - bool invalidation_needed = false; + + /* + * We have to assume the worse case ie pmd for invalidation. Note that + * the page can not be free from this function. + */ + end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page))); + mmu_notifier_invalidate_range_start(vma->vm_mm, start, end); while (page_vma_mapped_walk(&pvmw)) { + unsigned long cstart, cend; int ret = 0; + + cstart = address = pvmw.address; if (pvmw.pte) { pte_t entry; pte_t *pte = pvmw.pte; @@ -899,11 +909,12 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, if (!pte_dirty(*pte) && !pte_write(*pte)) continue; - flush_cache_page(vma, pvmw.address, pte_pfn(*pte)); - entry = ptep_clear_flush(vma, pvmw.address, pte); + flush_cache_page(vma, address, pte_pfn(*pte)); + entry = ptep_clear_flush(vma, address, pte); entry = pte_wrprotect(entry); entry = pte_mkclean(entry); - set_pte_at(vma->vm_mm, pvmw.address, pte, entry); + set_pte_at(vma->vm_mm, address, pte, entry); + cend = cstart + PAGE_SIZE; ret = 1; } else { #ifdef CONFIG_TRANSPARENT_HUGE_PAGECACHE @@ -913,11 +924,13 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, if (!pmd_dirty(*pmd) && !pmd_write(*pmd)) continue; - flush_cache_page(vma, pvmw.address, page_to_pfn(page)); - entry = pmdp_huge_clear_flush(vma, pvmw.address, pmd); + flush_cache_page(vma, address, page_to_pfn(page)); + entry = pmdp_huge_clear_flush(vma, address, pmd); entry = pmd_wrprotect(entry); entry = pmd_mkclean(entry); - set_pmd_at(vma->vm_mm, pvmw.address, pmd, entry); + set_pmd_at(vma->vm_mm, address, pmd, entry); + cstart &= PMD_MASK; + cend = cstart + PMD_SIZE; ret = 1; #else /* unexpected pmd-mapped page? */ @@ -926,15 +939,12 @@ static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, } if (ret) { + mmu_notifier_invalidate_range(vma->vm_mm, cstart, cend); (*cleaned)++; - invalidation_needed = true; } } - if (invalidation_needed) { - mmu_notifier_invalidate_range(vma->vm_mm, address, - address + (1UL << compound_order(page))); - } + mmu_notifier_invalidate_range_end(vma->vm_mm, start, end); return true; } @@ -1328,7 +1338,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, }; pte_t pteval; struct page *subpage; - bool ret = true, invalidation_needed = false; + bool ret = true; + unsigned long start = address, end; enum ttu_flags flags = (enum ttu_flags)arg; /* munlock has nothing to gain from examining un-locked vmas */ @@ -1340,6 +1351,14 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, flags & TTU_MIGRATION, page); } + /* + * We have to assume the worse case ie pmd for invalidation. Note that + * the page can not be free in this function as call of try_to_unmap() + * must hold a reference on the page. + */ + end = min(vma->vm_end, start + (PAGE_SIZE << compound_order(page))); + mmu_notifier_invalidate_range_start(vma->vm_mm, start, end); + while (page_vma_mapped_walk(&pvmw)) { /* * If the page is mlock()d, we cannot swap it out. @@ -1368,9 +1387,11 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, VM_BUG_ON_PAGE(!pvmw.pte, page); subpage = page - page_to_pfn(page) + pte_pfn(*pvmw.pte); + address = pvmw.address; + if (!(flags & TTU_IGNORE_ACCESS)) { - if (ptep_clear_flush_young_notify(vma, pvmw.address, + if (ptep_clear_flush_young_notify(vma, address, pvmw.pte)) { ret = false; page_vma_mapped_walk_done(&pvmw); @@ -1379,7 +1400,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, } /* Nuke the page table entry. */ - flush_cache_page(vma, pvmw.address, pte_pfn(*pvmw.pte)); + flush_cache_page(vma, address, pte_pfn(*pvmw.pte)); if (should_defer_flush(mm, flags)) { /* * We clear the PTE but do not flush so potentially @@ -1389,12 +1410,11 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, * transition on a cached TLB entry is written through * and traps if the PTE is unmapped. */ - pteval = ptep_get_and_clear(mm, pvmw.address, - pvmw.pte); + pteval = ptep_get_and_clear(mm, address, pvmw.pte); set_tlb_ubc_flush_pending(mm, pte_dirty(pteval)); } else { - pteval = ptep_clear_flush(vma, pvmw.address, pvmw.pte); + pteval = ptep_clear_flush(vma, address, pvmw.pte); } /* Move the dirty bit to the page. Now the pte is gone. */ @@ -1409,12 +1429,12 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, if (PageHuge(page)) { int nr = 1 << compound_order(page); hugetlb_count_sub(nr, mm); - set_huge_swap_pte_at(mm, pvmw.address, + set_huge_swap_pte_at(mm, address, pvmw.pte, pteval, vma_mmu_pagesize(vma)); } else { dec_mm_counter(mm, mm_counter(page)); - set_pte_at(mm, pvmw.address, pvmw.pte, pteval); + set_pte_at(mm, address, pvmw.pte, pteval); } } else if (pte_unused(pteval)) { @@ -1438,7 +1458,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, swp_pte = swp_entry_to_pte(entry); if (pte_soft_dirty(pteval)) swp_pte = pte_swp_mksoft_dirty(swp_pte); - set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte); + set_pte_at(mm, address, pvmw.pte, swp_pte); } else if (PageAnon(page)) { swp_entry_t entry = { .val = page_private(subpage) }; pte_t swp_pte; @@ -1449,6 +1469,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, if (unlikely(PageSwapBacked(page) != PageSwapCache(page))) { WARN_ON_ONCE(1); ret = false; + /* We have to invalidate as we cleared the pte */ page_vma_mapped_walk_done(&pvmw); break; } @@ -1464,7 +1485,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, * If the page was redirtied, it cannot be * discarded. Remap the page to page table. */ - set_pte_at(mm, pvmw.address, pvmw.pte, pteval); + set_pte_at(mm, address, pvmw.pte, pteval); SetPageSwapBacked(page); ret = false; page_vma_mapped_walk_done(&pvmw); @@ -1472,7 +1493,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, } if (swap_duplicate(entry) < 0) { - set_pte_at(mm, pvmw.address, pvmw.pte, pteval); + set_pte_at(mm, address, pvmw.pte, pteval); ret = false; page_vma_mapped_walk_done(&pvmw); break; @@ -1488,18 +1509,18 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, swp_pte = swp_entry_to_pte(entry); if (pte_soft_dirty(pteval)) swp_pte = pte_swp_mksoft_dirty(swp_pte); - set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte); + set_pte_at(mm, address, pvmw.pte, swp_pte); } else dec_mm_counter(mm, mm_counter_file(page)); discard: page_remove_rmap(subpage, PageHuge(page)); put_page(page); - invalidation_needed = true; + mmu_notifier_invalidate_range(mm, address, + address + PAGE_SIZE); } - if (invalidation_needed) - mmu_notifier_invalidate_range(mm, address, - address + (1UL << compound_order(page))); + mmu_notifier_invalidate_range_end(vma->vm_mm, start, end); + return ret; } |