diff options
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 445 |
1 files changed, 186 insertions, 259 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index 090bfb605ecf..1678802e03e7 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -56,6 +56,7 @@ #include <linux/swapops.h> #include <linux/balloon_compaction.h> +#include <linux/sched/sysctl.h> #include "internal.h" @@ -978,47 +979,36 @@ void drop_slab(void) drop_slab_node(nid); } -static inline int is_page_cache_freeable(struct page *page) +static inline int is_page_cache_freeable(struct folio *folio) { /* * A freeable page cache page is referenced only by the caller * that isolated the page, the page cache and optional buffer * heads at page->private. */ - int page_cache_pins = thp_nr_pages(page); - return page_count(page) - page_has_private(page) == 1 + page_cache_pins; -} - -static int may_write_to_inode(struct inode *inode) -{ - if (current->flags & PF_SWAPWRITE) - return 1; - if (!inode_write_congested(inode)) - return 1; - if (inode_to_bdi(inode) == current->backing_dev_info) - return 1; - return 0; + return folio_ref_count(folio) - folio_test_private(folio) == + 1 + folio_nr_pages(folio); } /* - * We detected a synchronous write error writing a page out. Probably + * We detected a synchronous write error writing a folio out. Probably * -ENOSPC. We need to propagate that into the address_space for a subsequent * fsync(), msync() or close(). * * The tricky part is that after writepage we cannot touch the mapping: nothing - * prevents it from being freed up. But we have a ref on the page and once - * that page is locked, the mapping is pinned. + * prevents it from being freed up. But we have a ref on the folio and once + * that folio is locked, the mapping is pinned. * - * We're allowed to run sleeping lock_page() here because we know the caller has + * We're allowed to run sleeping folio_lock() here because we know the caller has * __GFP_FS. */ static void handle_write_error(struct address_space *mapping, - struct page *page, int error) + struct folio *folio, int error) { - lock_page(page); - if (page_mapping(page) == mapping) + folio_lock(folio); + if (folio_mapping(folio) == mapping) mapping_set_error(mapping, error); - unlock_page(page); + folio_unlock(folio); } static bool skip_throttle_noprogress(pg_data_t *pgdat) @@ -1066,8 +1056,10 @@ void reclaim_throttle(pg_data_t *pgdat, enum vmscan_throttle_state reason) * forward progress (e.g. journalling workqueues or kthreads). */ if (!current_is_kswapd() && - current->flags & (PF_IO_WORKER|PF_KTHREAD)) + current->flags & (PF_IO_WORKER|PF_KTHREAD)) { + cond_resched(); return; + } /* * These figures are pulled out of thin air. @@ -1163,35 +1155,35 @@ typedef enum { * pageout is called by shrink_page_list() for each dirty page. * Calls ->writepage(). */ -static pageout_t pageout(struct page *page, struct address_space *mapping) +static pageout_t pageout(struct folio *folio, struct address_space *mapping) { /* - * If the page is dirty, only perform writeback if that write + * If the folio is dirty, only perform writeback if that write * will be non-blocking. To prevent this allocation from being * stalled by pagecache activity. But note that there may be * stalls if we need to run get_block(). We could test * PagePrivate for that. * * If this process is currently in __generic_file_write_iter() against - * this page's queue, we can perform writeback even if that + * this folio's queue, we can perform writeback even if that * will block. * - * If the page is swapcache, write it back even if that would + * If the folio is swapcache, write it back even if that would * block, for some throttling. This happens by accident, because * swap_backing_dev_info is bust: it doesn't reflect the * congestion state of the swapdevs. Easy to fix, if needed. */ - if (!is_page_cache_freeable(page)) + if (!is_page_cache_freeable(folio)) return PAGE_KEEP; if (!mapping) { /* - * Some data journaling orphaned pages can have - * page->mapping == NULL while being dirty with clean buffers. + * Some data journaling orphaned folios can have + * folio->mapping == NULL while being dirty with clean buffers. */ - if (page_has_private(page)) { - if (try_to_free_buffers(page)) { - ClearPageDirty(page); - pr_info("%s: orphaned page\n", __func__); + if (folio_test_private(folio)) { + if (try_to_free_buffers(&folio->page)) { + folio_clear_dirty(folio); + pr_info("%s: orphaned folio\n", __func__); return PAGE_CLEAN; } } @@ -1199,10 +1191,8 @@ static pageout_t pageout(struct page *page, struct address_space *mapping) } if (mapping->a_ops->writepage == NULL) return PAGE_ACTIVATE; - if (!may_write_to_inode(mapping->host)) - return PAGE_KEEP; - if (clear_page_dirty_for_io(page)) { + if (folio_clear_dirty_for_io(folio)) { int res; struct writeback_control wbc = { .sync_mode = WB_SYNC_NONE, @@ -1212,21 +1202,21 @@ static pageout_t pageout(struct page *page, struct address_space *mapping) .for_reclaim = 1, }; - SetPageReclaim(page); - res = mapping->a_ops->writepage(page, &wbc); + folio_set_reclaim(folio); + res = mapping->a_ops->writepage(&folio->page, &wbc); if (res < 0) - handle_write_error(mapping, page, res); + handle_write_error(mapping, folio, res); if (res == AOP_WRITEPAGE_ACTIVATE) { - ClearPageReclaim(page); + folio_clear_reclaim(folio); return PAGE_ACTIVATE; } - if (!PageWriteback(page)) { + if (!folio_test_writeback(folio)) { /* synchronous write or broken a_ops? */ - ClearPageReclaim(page); + folio_clear_reclaim(folio); } - trace_mm_vmscan_writepage(page); - inc_node_page_state(page, NR_VMSCAN_WRITE); + trace_mm_vmscan_write_folio(folio); + node_stat_add_folio(folio, NR_VMSCAN_WRITE); return PAGE_SUCCESS; } @@ -1237,16 +1227,16 @@ static pageout_t pageout(struct page *page, struct address_space *mapping) * Same as remove_mapping, but if the page is removed from the mapping, it * gets returned with a refcount of 0. */ -static int __remove_mapping(struct address_space *mapping, struct page *page, +static int __remove_mapping(struct address_space *mapping, struct folio *folio, bool reclaimed, struct mem_cgroup *target_memcg) { int refcount; void *shadow = NULL; - BUG_ON(!PageLocked(page)); - BUG_ON(mapping != page_mapping(page)); + BUG_ON(!folio_test_locked(folio)); + BUG_ON(mapping != folio_mapping(folio)); - if (!PageSwapCache(page)) + if (!folio_test_swapcache(folio)) spin_lock(&mapping->host->i_lock); xa_lock_irq(&mapping->i_pages); /* @@ -1274,23 +1264,23 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, * Note that if SetPageDirty is always performed via set_page_dirty, * and thus under the i_pages lock, then this ordering is not required. */ - refcount = 1 + compound_nr(page); - if (!page_ref_freeze(page, refcount)) + refcount = 1 + folio_nr_pages(folio); + if (!folio_ref_freeze(folio, refcount)) goto cannot_free; /* note: atomic_cmpxchg in page_ref_freeze provides the smp_rmb */ - if (unlikely(PageDirty(page))) { - page_ref_unfreeze(page, refcount); + if (unlikely(folio_test_dirty(folio))) { + folio_ref_unfreeze(folio, refcount); goto cannot_free; } - if (PageSwapCache(page)) { - swp_entry_t swap = { .val = page_private(page) }; - mem_cgroup_swapout(page, swap); + if (folio_test_swapcache(folio)) { + swp_entry_t swap = folio_swap_entry(folio); + mem_cgroup_swapout(folio, swap); if (reclaimed && !mapping_exiting(mapping)) - shadow = workingset_eviction(page, target_memcg); - __delete_from_swap_cache(page, swap, shadow); + shadow = workingset_eviction(folio, target_memcg); + __delete_from_swap_cache(&folio->page, swap, shadow); xa_unlock_irq(&mapping->i_pages); - put_swap_page(page, swap); + put_swap_page(&folio->page, swap); } else { void (*freepage)(struct page *); @@ -1311,61 +1301,67 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, * exceptional entries and shadow exceptional entries in the * same address_space. */ - if (reclaimed && page_is_file_lru(page) && + if (reclaimed && folio_is_file_lru(folio) && !mapping_exiting(mapping) && !dax_mapping(mapping)) - shadow = workingset_eviction(page, target_memcg); - __delete_from_page_cache(page, shadow); + shadow = workingset_eviction(folio, target_memcg); + __filemap_remove_folio(folio, shadow); xa_unlock_irq(&mapping->i_pages); if (mapping_shrinkable(mapping)) inode_add_lru(mapping->host); spin_unlock(&mapping->host->i_lock); if (freepage != NULL) - freepage(page); + freepage(&folio->page); } return 1; cannot_free: xa_unlock_irq(&mapping->i_pages); - if (!PageSwapCache(page)) + if (!folio_test_swapcache(folio)) spin_unlock(&mapping->host->i_lock); return 0; } -/* - * Attempt to detach a locked page from its ->mapping. If it is dirty or if - * someone else has a ref on the page, abort and return 0. If it was - * successfully detached, return 1. Assumes the caller has a single ref on - * this page. +/** + * remove_mapping() - Attempt to remove a folio from its mapping. + * @mapping: The address space. + * @folio: The folio to remove. + * + * If the folio is dirty, under writeback or if someone else has a ref + * on it, removal will fail. + * Return: The number of pages removed from the mapping. 0 if the folio + * could not be removed. + * Context: The caller should have a single refcount on the folio and + * hold its lock. */ -int remove_mapping(struct address_space *mapping, struct page *page) +long remove_mapping(struct address_space *mapping, struct folio *folio) { - if (__remove_mapping(mapping, page, false, NULL)) { + if (__remove_mapping(mapping, folio, false, NULL)) { /* - * Unfreezing the refcount with 1 rather than 2 effectively + * Unfreezing the refcount with 1 effectively * drops the pagecache ref for us without requiring another * atomic operation. */ - page_ref_unfreeze(page, 1); - return 1; + folio_ref_unfreeze(folio, 1); + return folio_nr_pages(folio); } return 0; } /** - * putback_lru_page - put previously isolated page onto appropriate LRU list - * @page: page to be put back to appropriate lru list + * folio_putback_lru - Put previously isolated folio onto appropriate LRU list. + * @folio: Folio to be returned to an LRU list. * - * Add previously isolated @page to appropriate LRU list. - * Page may still be unevictable for other reasons. + * Add previously isolated @folio to appropriate LRU list. + * The folio may still be unevictable for other reasons. * - * lru_lock must not be held, interrupts must be enabled. + * Context: lru_lock must not be held, interrupts must be enabled. */ -void putback_lru_page(struct page *page) +void folio_putback_lru(struct folio *folio) { - lru_cache_add(page); - put_page(page); /* drop ref from isolate */ + folio_add_lru(folio); + folio_put(folio); /* drop ref from isolate */ } enum page_references { @@ -1375,61 +1371,61 @@ enum page_references { PAGEREF_ACTIVATE, }; -static enum page_references page_check_references(struct page *page, +static enum page_references folio_check_references(struct folio *folio, struct scan_control *sc) { - int referenced_ptes, referenced_page; + int referenced_ptes, referenced_folio; unsigned long vm_flags; - referenced_ptes = page_referenced(page, 1, sc->target_mem_cgroup, - &vm_flags); - referenced_page = TestClearPageReferenced(page); + referenced_ptes = folio_referenced(folio, 1, sc->target_mem_cgroup, + &vm_flags); + referenced_folio = folio_test_clear_referenced(folio); /* - * Mlock lost the isolation race with us. Let try_to_unmap() - * move the page to the unevictable list. + * The supposedly reclaimable folio was found to be in a VM_LOCKED vma. + * Let the folio, now marked Mlocked, be moved to the unevictable list. */ if (vm_flags & VM_LOCKED) - return PAGEREF_RECLAIM; + return PAGEREF_ACTIVATE; if (referenced_ptes) { /* - * All mapped pages start out with page table + * All mapped folios start out with page table * references from the instantiating fault, so we need - * to look twice if a mapped file page is used more + * to look twice if a mapped file/anon folio is used more * than once. * * Mark it and spare it for another trip around the * inactive list. Another page table reference will * lead to its activation. * - * Note: the mark is set for activated pages as well - * so that recently deactivated but used pages are + * Note: the mark is set for activated folios as well + * so that recently deactivated but used folios are * quickly recovered. */ - SetPageReferenced(page); + folio_set_referenced(folio); - if (referenced_page || referenced_ptes > 1) + if (referenced_folio || referenced_ptes > 1) return PAGEREF_ACTIVATE; /* - * Activate file-backed executable pages after first usage. + * Activate file-backed executable folios after first usage. */ - if ((vm_flags & VM_EXEC) && !PageSwapBacked(page)) + if ((vm_flags & VM_EXEC) && !folio_test_swapbacked(folio)) return PAGEREF_ACTIVATE; return PAGEREF_KEEP; } - /* Reclaim if clean, defer dirty pages to writeback */ - if (referenced_page && !PageSwapBacked(page)) + /* Reclaim if clean, defer dirty folios to writeback */ + if (referenced_folio && !folio_test_swapbacked(folio)) return PAGEREF_RECLAIM_CLEAN; return PAGEREF_RECLAIM; } /* Check if a page is dirty or under writeback */ -static void page_check_dirty_writeback(struct page *page, +static void folio_check_dirty_writeback(struct folio *folio, bool *dirty, bool *writeback) { struct address_space *mapping; @@ -1438,24 +1434,24 @@ static void page_check_dirty_writeback(struct page *page, * Anonymous pages are not handled by flushers and must be written * from reclaim context. Do not stall reclaim based on them */ - if (!page_is_file_lru(page) || - (PageAnon(page) && !PageSwapBacked(page))) { + if (!folio_is_file_lru(folio) || + (folio_test_anon(folio) && !folio_test_swapbacked(folio))) { *dirty = false; *writeback = false; return; } - /* By default assume that the page flags are accurate */ - *dirty = PageDirty(page); - *writeback = PageWriteback(page); + /* By default assume that the folio flags are accurate */ + *dirty = folio_test_dirty(folio); + *writeback = folio_test_writeback(folio); /* Verify dirty/writeback state if the filesystem supports it */ - if (!page_has_private(page)) + if (!folio_test_private(folio)) return; - mapping = page_mapping(page); + mapping = folio_mapping(folio); if (mapping && mapping->a_ops->is_dirty_writeback) - mapping->a_ops->is_dirty_writeback(page, dirty, writeback); + mapping->a_ops->is_dirty_writeback(&folio->page, dirty, writeback); } static struct page *alloc_demote_page(struct page *page, unsigned long node) @@ -1529,14 +1525,16 @@ retry: while (!list_empty(page_list)) { struct address_space *mapping; struct page *page; + struct folio *folio; enum page_references references = PAGEREF_RECLAIM; bool dirty, writeback, may_enter_fs; unsigned int nr_pages; cond_resched(); - page = lru_to_page(page_list); - list_del(&page->lru); + folio = lru_to_folio(page_list); + list_del(&folio->lru); + page = &folio->page; if (!trylock_page(page)) goto keep; @@ -1562,12 +1560,12 @@ retry: * reclaim_congested. kswapd will stall and start writing * pages if the tail of the LRU is all dirty unqueued pages. */ - page_check_dirty_writeback(page, &dirty, &writeback); + folio_check_dirty_writeback(folio, &dirty, &writeback); if (dirty || writeback) - stat->nr_dirty++; + stat->nr_dirty += nr_pages; if (dirty && !writeback) - stat->nr_unqueued_dirty++; + stat->nr_unqueued_dirty += nr_pages; /* * Treat this page as congested if the underlying BDI is or if @@ -1576,10 +1574,8 @@ retry: * end of the LRU a second time. */ mapping = page_mapping(page); - if (((dirty || writeback) && mapping && - inode_write_congested(mapping->host)) || - (writeback && PageReclaim(page))) - stat->nr_congested++; + if (writeback && PageReclaim(page)) + stat->nr_congested += nr_pages; /* * If a page at the tail of the LRU is under writeback, there @@ -1628,7 +1624,7 @@ retry: if (current_is_kswapd() && PageReclaim(page) && test_bit(PGDAT_WRITEBACK, &pgdat->flags)) { - stat->nr_immediate++; + stat->nr_immediate += nr_pages; goto activate_locked; /* Case 2 above */ @@ -1646,7 +1642,7 @@ retry: * and it's also appropriate in global reclaim. */ SetPageReclaim(page); - stat->nr_writeback++; + stat->nr_writeback += nr_pages; goto activate_locked; /* Case 3 above */ @@ -1660,7 +1656,7 @@ retry: } if (!ignore_references) - references = page_check_references(page, sc); + references = folio_check_references(folio, sc); switch (references) { case PAGEREF_ACTIVATE: @@ -1693,28 +1689,28 @@ retry: if (!PageSwapCache(page)) { if (!(sc->gfp_mask & __GFP_IO)) goto keep_locked; - if (page_maybe_dma_pinned(page)) + if (folio_maybe_dma_pinned(folio)) goto keep_locked; if (PageTransHuge(page)) { /* cannot split THP, skip it */ - if (!can_split_huge_page(page, NULL)) + if (!can_split_folio(folio, NULL)) goto activate_locked; /* * Split pages without a PMD map right * away. Chances are some or all of the * tail pages can be freed without IO. */ - if (!compound_mapcount(page) && - split_huge_page_to_list(page, - page_list)) + if (!folio_entire_mapcount(folio) && + split_folio_to_list(folio, + page_list)) goto activate_locked; } if (!add_to_swap(page)) { if (!PageTransHuge(page)) goto activate_locked_split; /* Fallback to swap normal pages */ - if (split_huge_page_to_list(page, - page_list)) + if (split_folio_to_list(folio, + page_list)) goto activate_locked; #ifdef CONFIG_TRANSPARENT_HUGEPAGE count_vm_event(THP_SWPOUT_FALLBACK); @@ -1728,9 +1724,9 @@ retry: /* Adding to swap updated mapping */ mapping = page_mapping(page); } - } else if (unlikely(PageTransHuge(page))) { - /* Split file THP */ - if (split_huge_page_to_list(page, page_list)) + } else if (PageSwapBacked(page) && PageTransHuge(page)) { + /* Split shmem THP */ + if (split_folio_to_list(folio, page_list)) goto keep_locked; } @@ -1754,10 +1750,11 @@ retry: enum ttu_flags flags = TTU_BATCH_FLUSH; bool was_swapbacked = PageSwapBacked(page); - if (unlikely(PageTransHuge(page))) + if (PageTransHuge(page) && + thp_order(page) >= HPAGE_PMD_ORDER) flags |= TTU_SPLIT_HUGE_PMD; - try_to_unmap(page, flags); + try_to_unmap(folio, flags); if (page_mapped(page)) { stat->nr_unmap_fail += nr_pages; if (!was_swapbacked && PageSwapBacked(page)) @@ -1805,13 +1802,13 @@ retry: * starts and then write it out here. */ try_to_unmap_flush_dirty(); - switch (pageout(page, mapping)) { + switch (pageout(folio, mapping)) { case PAGE_KEEP: goto keep_locked; case PAGE_ACTIVATE: goto activate_locked; case PAGE_SUCCESS: - stat->nr_pageout += thp_nr_pages(page); + stat->nr_pageout += nr_pages; if (PageWriteback(page)) goto keep; @@ -1889,7 +1886,7 @@ retry: */ count_vm_event(PGLAZYFREED); count_memcg_page_event(page, PGLAZYFREED); - } else if (!mapping || !__remove_mapping(mapping, page, true, + } else if (!mapping || !__remove_mapping(mapping, folio, true, sc->target_mem_cgroup)) goto keep_locked; @@ -2012,69 +2009,6 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone, } /* - * Attempt to remove the specified page from its LRU. Only take this page - * if it is of the appropriate PageActive status. Pages which are being - * freed elsewhere are also ignored. - * - * page: page to consider - * mode: one of the LRU isolation modes defined above - * - * returns true on success, false on failure. - */ -bool __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode) -{ - /* Only take pages on the LRU. */ - if (!PageLRU(page)) - return false; - - /* Compaction should not handle unevictable pages but CMA can do so */ - if (PageUnevictable(page) && !(mode & ISOLATE_UNEVICTABLE)) - return false; - - /* - * To minimise LRU disruption, the caller can indicate that it only - * wants to isolate pages it will be able to operate on without - * blocking - clean pages for the most part. - * - * ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages - * that it is possible to migrate without blocking - */ - if (mode & ISOLATE_ASYNC_MIGRATE) { - /* All the caller can do on PageWriteback is block */ - if (PageWriteback(page)) - return false; - - if (PageDirty(page)) { - struct address_space *mapping; - bool migrate_dirty; - - /* - * Only pages without mappings or that have a - * ->migratepage callback are possible to migrate - * without blocking. However, we can be racing with - * truncation so it's necessary to lock the page - * to stabilise the mapping as truncation holds - * the page lock until after the page is removed - * from the page cache. - */ - if (!trylock_page(page)) - return false; - - mapping = page_mapping(page); - migrate_dirty = !mapping || mapping->a_ops->migratepage; - unlock_page(page); - if (!migrate_dirty) - return false; - } - } - - if ((mode & ISOLATE_UNMAPPED) && page_mapped(page)) - return false; - - return true; -} - -/* * Update LRU sizes after isolating pages. The LRU size updates must * be complete before mem_cgroup_update_lru_size due to a sanity check. */ @@ -2125,11 +2059,11 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, unsigned long skipped = 0; unsigned long scan, total_scan, nr_pages; LIST_HEAD(pages_skipped); - isolate_mode_t mode = (sc->may_unmap ? 0 : ISOLATE_UNMAPPED); total_scan = 0; scan = 0; while (scan < nr_to_scan && !list_empty(src)) { + struct list_head *move_to = src; struct page *page; page = lru_to_page(src); @@ -2139,9 +2073,9 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, total_scan += nr_pages; if (page_zonenum(page) > sc->reclaim_idx) { - list_move(&page->lru, &pages_skipped); nr_skipped[page_zonenum(page)] += nr_pages; - continue; + move_to = &pages_skipped; + goto move; } /* @@ -2149,37 +2083,34 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, * return with no isolated pages if the LRU mostly contains * ineligible pages. This causes the VM to not reclaim any * pages, triggering a premature OOM. - * - * Account all tail pages of THP. This would not cause - * premature OOM since __isolate_lru_page() returns -EBUSY - * only when the page is being freed somewhere else. + * Account all tail pages of THP. */ scan += nr_pages; - if (!__isolate_lru_page_prepare(page, mode)) { - /* It is being freed elsewhere */ - list_move(&page->lru, src); - continue; - } + + if (!PageLRU(page)) + goto move; + if (!sc->may_unmap && page_mapped(page)) + goto move; + /* * Be careful not to clear PageLRU until after we're * sure the page is not being freed elsewhere -- the * page release code relies on it. */ - if (unlikely(!get_page_unless_zero(page))) { - list_move(&page->lru, src); - continue; - } + if (unlikely(!get_page_unless_zero(page))) + goto move; if (!TestClearPageLRU(page)) { /* Another thread is already isolating this page */ put_page(page); - list_move(&page->lru, src); - continue; + goto move; } nr_taken += nr_pages; nr_zone_taken[page_zonenum(page)] += nr_pages; - list_move(&page->lru, dst); + move_to = dst; +move: + list_move(&page->lru, move_to); } /* @@ -2203,51 +2134,47 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, } *nr_scanned = total_scan; trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan, - total_scan, skipped, nr_taken, mode, lru); + total_scan, skipped, nr_taken, + sc->may_unmap ? 0 : ISOLATE_UNMAPPED, lru); update_lru_sizes(lruvec, lru, nr_zone_taken); return nr_taken; } /** - * isolate_lru_page - tries to isolate a page from its LRU list - * @page: page to isolate from its LRU list - * - * Isolates a @page from an LRU list, clears PageLRU and adjusts the - * vmstat statistic corresponding to whatever LRU list the page was on. + * folio_isolate_lru() - Try to isolate a folio from its LRU list. + * @folio: Folio to isolate from its LRU list. * - * Returns 0 if the page was removed from an LRU list. - * Returns -EBUSY if the page was not on an LRU list. + * Isolate a @folio from an LRU list and adjust the vmstat statistic + * corresponding to whatever LRU list the folio was on. * - * The returned page will have PageLRU() cleared. If it was found on - * the active list, it will have PageActive set. If it was found on - * the unevictable list, it will have the PageUnevictable bit set. That flag + * The folio will have its LRU flag cleared. If it was found on the + * active list, it will have the Active flag set. If it was found on the + * unevictable list, it will have the Unevictable flag set. These flags * may need to be cleared by the caller before letting the page go. * - * The vmstat statistic corresponding to the list on which the page was - * found will be decremented. - * - * Restrictions: + * Context: * * (1) Must be called with an elevated refcount on the page. This is a - * fundamental difference from isolate_lru_pages (which is called + * fundamental difference from isolate_lru_pages() (which is called * without a stable reference). - * (2) the lru_lock must not be held. - * (3) interrupts must be enabled. + * (2) The lru_lock must not be held. + * (3) Interrupts must be enabled. + * + * Return: 0 if the folio was removed from an LRU list. + * -EBUSY if the folio was not on an LRU list. */ -int isolate_lru_page(struct page *page) +int folio_isolate_lru(struct folio *folio) { - struct folio *folio = page_folio(page); int ret = -EBUSY; - VM_BUG_ON_PAGE(!page_count(page), page); - WARN_RATELIMIT(PageTail(page), "trying to isolate tail page"); + VM_BUG_ON_FOLIO(!folio_ref_count(folio), folio); - if (TestClearPageLRU(page)) { + if (folio_test_clear_lru(folio)) { struct lruvec *lruvec; - get_page(page); + folio_get(folio); lruvec = folio_lruvec_lock_irq(folio); - del_page_from_lru_list(page, lruvec); + lruvec_del_folio(lruvec, folio); unlock_page_lruvec_irq(lruvec); ret = 0; } @@ -2377,9 +2304,7 @@ static unsigned int move_pages_to_lru(struct lruvec *lruvec, */ static int current_may_throttle(void) { - return !(current->flags & PF_LOCAL_THROTTLE) || - current->backing_dev_info == NULL || - bdi_write_congested(current->backing_dev_info); + return !(current->flags & PF_LOCAL_THROTTLE); } /* @@ -2485,7 +2410,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, * * If the pages are mostly unmapped, the processing is fast and it is * appropriate to hold lru_lock across the whole operation. But if - * the pages are mapped, the processing is slow (page_referenced()), so + * the pages are mapped, the processing is slow (folio_referenced()), so * we should drop lru_lock around each page. It's impossible to balance * this, so instead we remove the pages from the LRU while processing them. * It is safe to rely on PG_active against the non-LRU pages in here because @@ -2505,7 +2430,6 @@ static void shrink_active_list(unsigned long nr_to_scan, LIST_HEAD(l_hold); /* The pages which were snipped off */ LIST_HEAD(l_active); LIST_HEAD(l_inactive); - struct page *page; unsigned nr_deactivate, nr_activate; unsigned nr_rotated = 0; int file = is_file_lru(lru); @@ -2527,9 +2451,13 @@ static void shrink_active_list(unsigned long nr_to_scan, spin_unlock_irq(&lruvec->lru_lock); while (!list_empty(&l_hold)) { + struct folio *folio; + struct page *page; + cond_resched(); - page = lru_to_page(&l_hold); - list_del(&page->lru); + folio = lru_to_folio(&l_hold); + list_del(&folio->lru); + page = &folio->page; if (unlikely(!page_evictable(page))) { putback_lru_page(page); @@ -2544,8 +2472,8 @@ static void shrink_active_list(unsigned long nr_to_scan, } } - if (page_referenced(page, 0, sc->target_mem_cgroup, - &vm_flags)) { + if (folio_referenced(folio, 0, sc->target_mem_cgroup, + &vm_flags)) { /* * Identify referenced, file-backed active pages and * give them one more trip around the active list. So @@ -3975,7 +3903,10 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int highest_zoneidx) if (!managed_zone(zone)) continue; - mark = high_wmark_pages(zone); + if (sysctl_numa_balancing_mode & NUMA_BALANCING_MEMORY_TIERING) + mark = wmark_pages(zone, WMARK_PROMO); + else + mark = high_wmark_pages(zone); if (zone_watermark_ok_safe(zone, order, mark, highest_zoneidx)) return true; } @@ -4472,7 +4403,7 @@ static int kswapd(void *p) * us from recursively trying to free more memory as we're * trying to free the first piece of memory in the first place). */ - tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD; + tsk->flags |= PF_MEMALLOC | PF_KSWAPD; set_freezable(); WRITE_ONCE(pgdat->kswapd_order, 0); @@ -4523,7 +4454,7 @@ kswapd_try_sleep: goto kswapd_try_sleep; } - tsk->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD); + tsk->flags &= ~(PF_MEMALLOC | PF_KSWAPD); return 0; } @@ -4764,11 +4695,8 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in fs_reclaim_acquire(sc.gfp_mask); /* * We need to be able to allocate from the reserves for RECLAIM_UNMAP - * and we also need to be able to write out pages for RECLAIM_WRITE - * and RECLAIM_UNMAP. */ noreclaim_flag = memalloc_noreclaim_save(); - p->flags |= PF_SWAPWRITE; set_task_reclaim_state(p, &sc.reclaim_state); if (node_pagecache_reclaimable(pgdat) > pgdat->min_unmapped_pages) { @@ -4782,7 +4710,6 @@ static int __node_reclaim(struct pglist_data *pgdat, gfp_t gfp_mask, unsigned in } set_task_reclaim_state(p, NULL); - current->flags &= ~PF_SWAPWRITE; memalloc_noreclaim_restore(noreclaim_flag); fs_reclaim_release(sc.gfp_mask); psi_memstall_leave(&pflags); |