aboutsummaryrefslogtreecommitdiff
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c580
1 files changed, 274 insertions, 306 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 28f80daf5c04..119a3a52f96e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -286,9 +286,7 @@ EXPORT_SYMBOL(nr_online_nodes);
#endif
static bool page_contains_unaccepted(struct page *page, unsigned int order);
-static void accept_page(struct page *page, unsigned int order);
-static bool try_to_accept_memory(struct zone *zone, unsigned int order);
-static inline bool has_unaccepted_memory(void);
+static bool cond_accept_memory(struct zone *zone, unsigned int order);
static bool __free_unaccepted(struct page *page);
int page_group_by_mobility_disabled __read_mostly;
@@ -322,6 +320,11 @@ static inline bool deferred_pages_enabled(void)
{
return false;
}
+
+static inline bool _deferred_grow_zone(struct zone *zone, unsigned int order)
+{
+ return false;
+}
#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
/* Return a pointer to the bitmap storing bits affecting a block of pages */
@@ -632,6 +635,8 @@ compaction_capture(struct capture_control *capc, struct page *page,
static inline void account_freepages(struct zone *zone, int nr_pages,
int migratetype)
{
+ lockdep_assert_held(&zone->lock);
+
if (is_migrate_isolate(migratetype))
return;
@@ -639,6 +644,9 @@ static inline void account_freepages(struct zone *zone, int nr_pages,
if (is_migrate_cma(migratetype))
__mod_zone_page_state(zone, NR_FREE_CMA_PAGES, nr_pages);
+ else if (is_migrate_highatomic(migratetype))
+ WRITE_ONCE(zone->nr_free_highatomic,
+ zone->nr_free_highatomic + nr_pages);
}
/* Used for pages not on another list */
@@ -1040,6 +1048,7 @@ __always_inline bool free_pages_prepare(struct page *page,
bool skip_kasan_poison = should_skip_kasan_poison(page);
bool init = want_init_on_free();
bool compound = PageCompound(page);
+ struct folio *folio = page_folio(page);
VM_BUG_ON_PAGE(PageTail(page), page);
@@ -1049,11 +1058,32 @@ __always_inline bool free_pages_prepare(struct page *page,
if (memcg_kmem_online() && PageMemcgKmem(page))
__memcg_kmem_uncharge_page(page, order);
+ /*
+ * In rare cases, when truncation or holepunching raced with
+ * munlock after VM_LOCKED was cleared, Mlocked may still be
+ * found set here. This does not indicate a problem, unless
+ * "unevictable_pgs_cleared" appears worryingly large.
+ */
+ if (unlikely(folio_test_mlocked(folio))) {
+ long nr_pages = folio_nr_pages(folio);
+
+ __folio_clear_mlocked(folio);
+ zone_stat_mod_folio(folio, NR_MLOCK, -nr_pages);
+ count_vm_events(UNEVICTABLE_PGCLEARED, nr_pages);
+ }
+
if (unlikely(PageHWPoison(page)) && !order) {
/* Do not let hwpoison pages hit pcplists/buddy */
reset_page_owner(page, order);
page_table_check_free(page, order);
pgalloc_tag_sub(page, 1 << order);
+
+ /*
+ * The page is isolated and accounted for.
+ * Mark the codetag as empty to avoid accounting error
+ * when the page is freed by unpoison_memory().
+ */
+ clear_page_tag_ref(page);
return false;
}
@@ -1080,8 +1110,11 @@ __always_inline bool free_pages_prepare(struct page *page,
(page + i)->flags &= ~PAGE_FLAGS_CHECK_AT_PREP;
}
}
- if (PageMappingFlags(page))
+ if (PageMappingFlags(page)) {
+ if (PageAnon(page))
+ mod_mthp_stat(order, MTHP_STAT_NR_ANON, -1);
page->mapping = NULL;
+ }
if (is_check_pages_enabled()) {
if (free_page_is_bad(page))
bad++;
@@ -1192,17 +1225,39 @@ static void free_pcppages_bulk(struct zone *zone, int count,
spin_unlock_irqrestore(&zone->lock, flags);
}
+/* Split a multi-block free page into its individual pageblocks. */
+static void split_large_buddy(struct zone *zone, struct page *page,
+ unsigned long pfn, int order, fpi_t fpi)
+{
+ unsigned long end = pfn + (1 << order);
+
+ VM_WARN_ON_ONCE(!IS_ALIGNED(pfn, 1 << order));
+ /* Caller removed page from freelist, buddy info cleared! */
+ VM_WARN_ON_ONCE(PageBuddy(page));
+
+ if (order > pageblock_order)
+ order = pageblock_order;
+
+ while (pfn != end) {
+ int mt = get_pfnblock_migratetype(page, pfn);
+
+ __free_one_page(page, pfn, zone, order, mt, fpi);
+ pfn += 1 << order;
+ page = pfn_to_page(pfn);
+ }
+}
+
static void free_one_page(struct zone *zone, struct page *page,
unsigned long pfn, unsigned int order,
fpi_t fpi_flags)
{
unsigned long flags;
- int migratetype;
spin_lock_irqsave(&zone->lock, flags);
- migratetype = get_pfnblock_migratetype(page, pfn);
- __free_one_page(page, pfn, zone, order, migratetype, fpi_flags);
+ split_large_buddy(zone, page, pfn, order, fpi_flags);
spin_unlock_irqrestore(&zone->lock, flags);
+
+ __count_vm_events(PGFREE, 1 << order);
}
static void __free_pages_ok(struct page *page, unsigned int order,
@@ -1211,12 +1266,8 @@ static void __free_pages_ok(struct page *page, unsigned int order,
unsigned long pfn = page_to_pfn(page);
struct zone *zone = page_zone(page);
- if (!free_pages_prepare(page, order))
- return;
-
- free_one_page(zone, page, pfn, order, fpi_flags);
-
- __count_vm_events(PGFREE, 1 << order);
+ if (free_pages_prepare(page, order))
+ free_one_page(zone, page, pfn, order, fpi_flags);
}
void __meminit __free_pages_core(struct page *page, unsigned int order,
@@ -1263,7 +1314,7 @@ void __meminit __free_pages_core(struct page *page, unsigned int order,
if (order == MAX_PAGE_ORDER && __free_unaccepted(page))
return;
- accept_page(page, order);
+ accept_memory(page_to_phys(page), PAGE_SIZE << order);
}
/*
@@ -1339,11 +1390,11 @@ struct page *__pageblock_pfn_to_page(unsigned long start_pfn,
*
* -- nyc
*/
-static inline void expand(struct zone *zone, struct page *page,
- int low, int high, int migratetype)
+static inline unsigned int expand(struct zone *zone, struct page *page, int low,
+ int high, int migratetype)
{
- unsigned long size = 1 << high;
- unsigned long nr_added = 0;
+ unsigned int size = 1 << high;
+ unsigned int nr_added = 0;
while (high > low) {
high--;
@@ -1363,7 +1414,19 @@ static inline void expand(struct zone *zone, struct page *page,
set_buddy_order(&page[size], high);
nr_added += size;
}
- account_freepages(zone, nr_added, migratetype);
+
+ return nr_added;
+}
+
+static __always_inline void page_del_and_expand(struct zone *zone,
+ struct page *page, int low,
+ int high, int migratetype)
+{
+ int nr_pages = 1 << high;
+
+ __del_page_from_free_list(page, zone, high, migratetype);
+ nr_pages -= expand(zone, page, low, high, migratetype);
+ account_freepages(zone, -nr_pages, migratetype);
}
static void check_new_page_bad(struct page *page)
@@ -1533,8 +1596,9 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
page = get_page_from_free_area(area, migratetype);
if (!page)
continue;
- del_page_from_free_list(page, zone, current_order, migratetype);
- expand(zone, page, order, current_order, migratetype);
+
+ page_del_and_expand(zone, page, order, current_order,
+ migratetype);
trace_mm_page_alloc_zone_locked(page, order, migratetype,
pcp_allowed_order(order) &&
migratetype < MIGRATE_PCPTYPES);
@@ -1693,27 +1757,6 @@ static unsigned long find_large_buddy(unsigned long start_pfn)
return start_pfn;
}
-/* Split a multi-block free page into its individual pageblocks */
-static void split_large_buddy(struct zone *zone, struct page *page,
- unsigned long pfn, int order)
-{
- unsigned long end_pfn = pfn + (1 << order);
-
- VM_WARN_ON_ONCE(order <= pageblock_order);
- VM_WARN_ON_ONCE(pfn & (pageblock_nr_pages - 1));
-
- /* Caller removed page from freelist, buddy info cleared! */
- VM_WARN_ON_ONCE(PageBuddy(page));
-
- while (pfn != end_pfn) {
- int mt = get_pfnblock_migratetype(page, pfn);
-
- __free_one_page(page, pfn, zone, pageblock_order, mt, FPI_NONE);
- pfn += pageblock_nr_pages;
- page = pfn_to_page(pfn);
- }
-}
-
/**
* move_freepages_block_isolate - move free pages in block for page isolation
* @zone: the zone
@@ -1754,7 +1797,7 @@ bool move_freepages_block_isolate(struct zone *zone, struct page *page,
del_page_from_free_list(buddy, zone, order,
get_pfnblock_migratetype(buddy, pfn));
set_pageblock_migratetype(page, migratetype);
- split_large_buddy(zone, buddy, pfn, order);
+ split_large_buddy(zone, buddy, pfn, order, FPI_NONE);
return true;
}
@@ -1765,7 +1808,7 @@ bool move_freepages_block_isolate(struct zone *zone, struct page *page,
del_page_from_free_list(page, zone, order,
get_pfnblock_migratetype(page, pfn));
set_pageblock_migratetype(page, migratetype);
- split_large_buddy(zone, page, pfn, order);
+ split_large_buddy(zone, page, pfn, order, FPI_NONE);
return true;
}
move:
@@ -1885,9 +1928,12 @@ steal_suitable_fallback(struct zone *zone, struct page *page,
/* Take ownership for orders >= pageblock_order */
if (current_order >= pageblock_order) {
+ unsigned int nr_added;
+
del_page_from_free_list(page, zone, current_order, block_type);
change_pageblock_range(page, current_order, start_type);
- expand(zone, page, order, current_order, start_type);
+ nr_added = expand(zone, page, order, current_order, start_type);
+ account_freepages(zone, nr_added, start_type);
return page;
}
@@ -1940,8 +1986,7 @@ steal_suitable_fallback(struct zone *zone, struct page *page,
}
single_page:
- del_page_from_free_list(page, zone, current_order, block_type);
- expand(zone, page, order, current_order, block_type);
+ page_del_and_expand(zone, page, order, current_order, block_type);
return page;
}
@@ -2656,7 +2701,6 @@ void free_unref_folios(struct folio_batch *folios)
unsigned long pfn = folio_pfn(folio);
unsigned int order = folio_order(folio);
- folio_undo_large_rmappable(folio);
if (!free_pages_prepare(&folio->page, order))
continue;
/*
@@ -2757,7 +2801,7 @@ void split_page(struct page *page, unsigned int order)
for (i = 1; i < (1 << order); i++)
set_page_refcounted(page + i);
split_page_owner(page, order, 0);
- pgalloc_tag_split(page, 1 << order);
+ pgalloc_tag_split(page_folio(page), order, 0);
split_page_memcg(page, order, 0);
}
EXPORT_SYMBOL_GPL(split_page);
@@ -2867,12 +2911,12 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone,
page = __rmqueue(zone, order, migratetype, alloc_flags);
/*
- * If the allocation fails, allow OOM handling access
- * to HIGHATOMIC reserves as failing now is worse than
- * failing a high-order atomic allocation in the
- * future.
+ * If the allocation fails, allow OOM handling and
+ * order-0 (atomic) allocs access to HIGHATOMIC
+ * reserves as failing now is worse than failing a
+ * high-order atomic allocation in the future.
*/
- if (!page && (alloc_flags & ALLOC_OOM))
+ if (!page && (alloc_flags & (ALLOC_OOM|ALLOC_NON_BLOCK)))
page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
if (!page) {
@@ -3026,12 +3070,6 @@ struct page *rmqueue(struct zone *preferred_zone,
{
struct page *page;
- /*
- * We most definitely don't want callers attempting to
- * allocate greater than order-1 page units with __GFP_NOFAIL.
- */
- WARN_ON_ONCE((gfp_flags & __GFP_NOFAIL) && (order > 1));
-
if (likely(pcp_allowed_order(order))) {
page = rmqueue_pcplist(preferred_zone, zone, order,
migratetype, alloc_flags);
@@ -3061,20 +3099,16 @@ static inline long __zone_watermark_unusable_free(struct zone *z,
/*
* If the caller does not have rights to reserves below the min
- * watermark then subtract the high-atomic reserves. This will
- * over-estimate the size of the atomic reserve but it avoids a search.
+ * watermark then subtract the free pages reserved for highatomic.
*/
if (likely(!(alloc_flags & ALLOC_RESERVES)))
- unusable_free += z->nr_reserved_highatomic;
+ unusable_free += READ_ONCE(z->nr_free_highatomic);
#ifdef CONFIG_CMA
/* If allocation can't use CMA areas don't use free CMA pages */
if (!(alloc_flags & ALLOC_CMA))
unusable_free += zone_page_state(z, NR_FREE_CMA_PAGES);
#endif
-#ifdef CONFIG_UNACCEPTED_MEMORY
- unusable_free += zone_page_state(z, NR_UNACCEPTED);
-#endif
return unusable_free;
}
@@ -3353,7 +3387,7 @@ retry:
}
if (no_fallback && nr_online_nodes > 1 &&
- zone != ac->preferred_zoneref->zone) {
+ zone != zonelist_zone(ac->preferred_zoneref)) {
int local_nid;
/*
@@ -3361,13 +3395,15 @@ retry:
* fragmenting fallbacks. Locality is more important
* than fragmentation avoidance.
*/
- local_nid = zone_to_nid(ac->preferred_zoneref->zone);
+ local_nid = zonelist_node_idx(ac->preferred_zoneref);
if (zone_to_nid(zone) != local_nid) {
alloc_flags &= ~ALLOC_NOFRAGMENT;
goto retry;
}
}
+ cond_accept_memory(zone, order);
+
/*
* Detect whether the number of free pages is below high
* watermark. If so, we will decrease pcp->high and free
@@ -3393,12 +3429,9 @@ check_alloc_wmark:
gfp_mask)) {
int ret;
- if (has_unaccepted_memory()) {
- if (try_to_accept_memory(zone, order))
- goto try_this_zone;
- }
+ if (cond_accept_memory(zone, order))
+ goto try_this_zone;
-#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
/*
* Watermark failed for this zone, but see if we can
* grow this zone if it contains deferred pages.
@@ -3407,14 +3440,13 @@ check_alloc_wmark:
if (_deferred_grow_zone(zone, order))
goto try_this_zone;
}
-#endif
/* Checked here to keep the fast path fast */
BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK);
if (alloc_flags & ALLOC_NO_WATERMARKS)
goto try_this_zone;
if (!node_reclaim_enabled() ||
- !zone_allows_reclaim(ac->preferred_zoneref->zone, zone))
+ !zone_allows_reclaim(zonelist_zone(ac->preferred_zoneref), zone))
continue;
ret = node_reclaim(zone->zone_pgdat, gfp_mask, order);
@@ -3436,7 +3468,7 @@ check_alloc_wmark:
}
try_this_zone:
- page = rmqueue(ac->preferred_zoneref->zone, zone, order,
+ page = rmqueue(zonelist_zone(ac->preferred_zoneref), zone, order,
gfp_mask, alloc_flags, ac->migratetype);
if (page) {
prep_new_page(page, order, gfp_mask, alloc_flags);
@@ -3450,18 +3482,14 @@ try_this_zone:
return page;
} else {
- if (has_unaccepted_memory()) {
- if (try_to_accept_memory(zone, order))
- goto try_this_zone;
- }
+ if (cond_accept_memory(zone, order))
+ goto try_this_zone;
-#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
/* Try again if zone has deferred pages */
if (deferred_pages_enabled()) {
if (_deferred_grow_zone(zone, order))
goto try_this_zone;
}
-#endif
}
}
@@ -4002,7 +4030,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask, unsigned int order)
*/
if (alloc_flags & ALLOC_MIN_RESERVE)
alloc_flags &= ~ALLOC_CPUSET;
- } else if (unlikely(rt_task(current)) && in_task())
+ } else if (unlikely(rt_or_dl_task(current)) && in_task())
alloc_flags |= ALLOC_MIN_RESERVE;
alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, alloc_flags);
@@ -4098,6 +4126,11 @@ should_reclaim_retry(gfp_t gfp_mask, unsigned order,
unsigned long min_wmark = min_wmark_pages(zone);
bool wmark;
+ if (cpusets_enabled() &&
+ (alloc_flags & ALLOC_CPUSET) &&
+ !__cpuset_zone_allowed(zone, gfp_mask))
+ continue;
+
available = reclaimable = zone_reclaimable_pages(zone);
available += zone_page_state_snapshot(zone, NR_FREE_PAGES);
@@ -4173,6 +4206,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
{
bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM;
bool can_compact = gfp_compaction_allowed(gfp_mask);
+ bool nofail = gfp_mask & __GFP_NOFAIL;
const bool costly_order = order > PAGE_ALLOC_COSTLY_ORDER;
struct page *page = NULL;
unsigned int alloc_flags;
@@ -4185,6 +4219,25 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
unsigned int zonelist_iter_cookie;
int reserve_flags;
+ if (unlikely(nofail)) {
+ /*
+ * We most definitely don't want callers attempting to
+ * allocate greater than order-1 page units with __GFP_NOFAIL.
+ */
+ WARN_ON_ONCE(order > 1);
+ /*
+ * Also we don't support __GFP_NOFAIL without __GFP_DIRECT_RECLAIM,
+ * otherwise, we may result in lockup.
+ */
+ WARN_ON_ONCE(!can_direct_reclaim);
+ /*
+ * PF_MEMALLOC request from this context is rather bizarre
+ * because we cannot reclaim anything and only can loop waiting
+ * for somebody to do a work for us.
+ */
+ WARN_ON_ONCE(current->flags & PF_MEMALLOC);
+ }
+
restart:
compaction_retries = 0;
no_progress_loops = 0;
@@ -4207,7 +4260,7 @@ restart:
*/
ac->preferred_zoneref = first_zones_zonelist(ac->zonelist,
ac->highest_zoneidx, ac->nodemask);
- if (!ac->preferred_zoneref->zone)
+ if (!zonelist_zone(ac->preferred_zoneref))
goto nopage;
/*
@@ -4219,7 +4272,7 @@ restart:
struct zoneref *z = first_zones_zonelist(ac->zonelist,
ac->highest_zoneidx,
&cpuset_current_mems_allowed);
- if (!z->zone)
+ if (!zonelist_zone(z))
goto nopage;
}
@@ -4402,30 +4455,16 @@ nopage:
* Make sure that __GFP_NOFAIL request doesn't leak out and make sure
* we always retry
*/
- if (gfp_mask & __GFP_NOFAIL) {
+ if (unlikely(nofail)) {
/*
- * All existing users of the __GFP_NOFAIL are blockable, so warn
- * of any new users that actually require GFP_NOWAIT
+ * Lacking direct_reclaim we can't do anything to reclaim memory,
+ * we disregard these unreasonable nofail requests and still
+ * return NULL
*/
- if (WARN_ON_ONCE_GFP(!can_direct_reclaim, gfp_mask))
+ if (!can_direct_reclaim)
goto fail;
/*
- * PF_MEMALLOC request from this context is rather bizarre
- * because we cannot reclaim anything and only can loop waiting
- * for somebody to do a work for us
- */
- WARN_ON_ONCE_GFP(current->flags & PF_MEMALLOC, gfp_mask);
-
- /*
- * non failing costly orders are a hard requirement which we
- * are not prepared for much so let's warn about these users
- * so that we can identify them and convert them to something
- * else.
- */
- WARN_ON_ONCE_GFP(costly_order, gfp_mask);
-
- /*
* Help non-failing allocations by giving some access to memory
* reserves normally used for high priority non-blocking
* allocations but do not use ALLOC_NO_WATERMARKS because this
@@ -4568,7 +4607,8 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
gfp = alloc_gfp;
/* Find an allowed local zone that meets the low watermark. */
- for_each_zone_zonelist_nodemask(zone, z, ac.zonelist, ac.highest_zoneidx, ac.nodemask) {
+ z = ac.preferred_zoneref;
+ for_next_zone_zonelist_nodemask(zone, z, ac.highest_zoneidx, ac.nodemask) {
unsigned long mark;
if (cpusets_enabled() && (alloc_flags & ALLOC_CPUSET) &&
@@ -4576,17 +4616,28 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
continue;
}
- if (nr_online_nodes > 1 && zone != ac.preferred_zoneref->zone &&
- zone_to_nid(zone) != zone_to_nid(ac.preferred_zoneref->zone)) {
+ if (nr_online_nodes > 1 && zone != zonelist_zone(ac.preferred_zoneref) &&
+ zone_to_nid(zone) != zonelist_node_idx(ac.preferred_zoneref)) {
goto failed;
}
+ cond_accept_memory(zone, 0);
+retry_this_zone:
mark = wmark_pages(zone, alloc_flags & ALLOC_WMARK_MASK) + nr_pages;
if (zone_watermark_fast(zone, 0, mark,
zonelist_zone_idx(ac.preferred_zoneref),
alloc_flags, gfp)) {
break;
}
+
+ if (cond_accept_memory(zone, 0))
+ goto retry_this_zone;
+
+ /* Try again if zone has deferred pages */
+ if (deferred_pages_enabled()) {
+ if (_deferred_grow_zone(zone, 0))
+ goto retry_this_zone;
+ }
}
/*
@@ -4636,7 +4687,7 @@ unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid,
pcp_trylock_finish(UP_flags);
__count_zid_vm_events(PGALLOC, zone_idx(zone), nr_account);
- zone_statistics(ac.preferred_zoneref->zone, zone, nr_account);
+ zone_statistics(zonelist_zone(ac.preferred_zoneref), zone, nr_account);
out:
return nr_populated;
@@ -4694,7 +4745,7 @@ struct page *__alloc_pages_noprof(gfp_t gfp, unsigned int order,
* Forbid the first pass from falling back to types that fragment
* memory until all local zones are considered.
*/
- alloc_flags |= alloc_flags_nofragment(ac.preferred_zoneref->zone, gfp);
+ alloc_flags |= alloc_flags_nofragment(zonelist_zone(ac.preferred_zoneref), gfp);
/* First allocation attempt */
page = get_page_from_freelist(alloc_gfp, order, alloc_flags, &ac);
@@ -4803,142 +4854,6 @@ void free_pages(unsigned long addr, unsigned int order)
EXPORT_SYMBOL(free_pages);
-/*
- * Page Fragment:
- * An arbitrary-length arbitrary-offset area of memory which resides
- * within a 0 or higher order page. Multiple fragments within that page
- * are individually refcounted, in the page's reference counter.
- *
- * The page_frag functions below provide a simple allocation framework for
- * page fragments. This is used by the network stack and network device
- * drivers to provide a backing region of memory for use as either an
- * sk_buff->head, or to be used in the "frags" portion of skb_shared_info.
- */
-static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
- gfp_t gfp_mask)
-{
- struct page *page = NULL;
- gfp_t gfp = gfp_mask;
-
-#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
- gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) | __GFP_COMP |
- __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
- page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
- PAGE_FRAG_CACHE_MAX_ORDER);
- nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE;
-#endif
- if (unlikely(!page))
- page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
-
- nc->va = page ? page_address(page) : NULL;
-
- return page;
-}
-
-void page_frag_cache_drain(struct page_frag_cache *nc)
-{
- if (!nc->va)
- return;
-
- __page_frag_cache_drain(virt_to_head_page(nc->va), nc->pagecnt_bias);
- nc->va = NULL;
-}
-EXPORT_SYMBOL(page_frag_cache_drain);
-
-void __page_frag_cache_drain(struct page *page, unsigned int count)
-{
- VM_BUG_ON_PAGE(page_ref_count(page) == 0, page);
-
- if (page_ref_sub_and_test(page, count))
- free_unref_page(page, compound_order(page));
-}
-EXPORT_SYMBOL(__page_frag_cache_drain);
-
-void *__page_frag_alloc_align(struct page_frag_cache *nc,
- unsigned int fragsz, gfp_t gfp_mask,
- unsigned int align_mask)
-{
- unsigned int size = PAGE_SIZE;
- struct page *page;
- int offset;
-
- if (unlikely(!nc->va)) {
-refill:
- page = __page_frag_cache_refill(nc, gfp_mask);
- if (!page)
- return NULL;
-
-#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
- /* if size can vary use size else just use PAGE_SIZE */
- size = nc->size;
-#endif
- /* Even if we own the page, we do not use atomic_set().
- * This would break get_page_unless_zero() users.
- */
- page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);
-
- /* reset page count bias and offset to start of new frag */
- nc->pfmemalloc = page_is_pfmemalloc(page);
- nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
- nc->offset = size;
- }
-
- offset = nc->offset - fragsz;
- if (unlikely(offset < 0)) {
- page = virt_to_page(nc->va);
-
- if (!page_ref_sub_and_test(page, nc->pagecnt_bias))
- goto refill;
-
- if (unlikely(nc->pfmemalloc)) {
- free_unref_page(page, compound_order(page));
- goto refill;
- }
-
-#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
- /* if size can vary use size else just use PAGE_SIZE */
- size = nc->size;
-#endif
- /* OK, page count is 0, we can safely set it */
- set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1);
-
- /* reset page count bias and offset to start of new frag */
- nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
- offset = size - fragsz;
- if (unlikely(offset < 0)) {
- /*
- * The caller is trying to allocate a fragment
- * with fragsz > PAGE_SIZE but the cache isn't big
- * enough to satisfy the request, this may
- * happen in low memory conditions.
- * We don't release the cache page because
- * it could make memory pressure worse
- * so we simply return NULL here.
- */
- return NULL;
- }
- }
-
- nc->pagecnt_bias--;
- offset &= align_mask;
- nc->offset = offset;
-
- return nc->va + offset;
-}
-EXPORT_SYMBOL(__page_frag_alloc_align);
-
-/*
- * Frees a page fragment allocated out of either a compound or order 0 page.
- */
-void page_frag_free(void *addr)
-{
- struct page *page = virt_to_head_page(addr);
-
- if (unlikely(put_page_testzero(page)))
- free_unref_page(page, compound_order(page));
-}
-EXPORT_SYMBOL(page_frag_free);
-
static void *make_alloc_exact(unsigned long addr, unsigned int order,
size_t size)
{
@@ -4948,7 +4863,7 @@ static void *make_alloc_exact(unsigned long addr, unsigned int order,
struct page *last = page + nr;
split_page_owner(page, order, 0);
- pgalloc_tag_split(page, 1 << order);
+ pgalloc_tag_split(page_folio(page), order, 0);
split_page_memcg(page, order, 0);
while (page < --last)
set_page_refcounted(last);
@@ -5299,7 +5214,7 @@ int local_memory_node(int node)
z = first_zones_zonelist(node_zonelist(node, GFP_KERNEL),
gfp_zone(GFP_KERNEL),
NULL);
- return zone_to_nid(z->zone);
+ return zonelist_node_idx(z);
}
#endif
@@ -5755,7 +5670,6 @@ void __init setup_per_cpu_pageset(void)
for_each_online_pgdat(pgdat)
pgdat->per_cpu_nodestats =
alloc_percpu(struct per_cpu_nodestat);
- store_early_perpage_metadata();
}
__meminit void zone_pcp_init(struct zone *zone)
@@ -5821,14 +5735,7 @@ unsigned long free_reserved_area(void *start, void *end, int poison, const char
void free_reserved_page(struct page *page)
{
- if (mem_alloc_profiling_enabled()) {
- union codetag_ref *ref = get_page_tag_ref(page);
-
- if (ref) {
- set_codetag_empty(ref);
- put_page_tag_ref(ref);
- }
- }
+ clear_page_tag_ref(page);
ClearPageReserved(page);
init_page_count(page);
__free_page(page);
@@ -6439,6 +6346,31 @@ int __alloc_contig_migrate_range(struct compact_control *cc,
return (ret < 0) ? ret : 0;
}
+static void split_free_pages(struct list_head *list)
+{
+ int order;
+
+ for (order = 0; order < NR_PAGE_ORDERS; order++) {
+ struct page *page, *next;
+ int nr_pages = 1 << order;
+
+ list_for_each_entry_safe(page, next, &list[order], lru) {
+ int i;
+
+ post_alloc_hook(page, order, __GFP_MOVABLE);
+ if (!order)
+ continue;
+
+ split_page(page, order);
+
+ /* Add all subpages to the order-0 head, in sequence. */
+ list_del(&page->lru);
+ for (i = 0; i < nr_pages; i++)
+ list_add_tail(&page[i].lru, &list[0]);
+ }
+ }
+}
+
/**
* alloc_contig_range() -- tries to allocate given range of pages
* @start: start PFN to allocate
@@ -6551,12 +6483,25 @@ int alloc_contig_range_noprof(unsigned long start, unsigned long end,
goto done;
}
- /* Free head and tail (if any) */
- if (start != outer_start)
- free_contig_range(outer_start, start - outer_start);
- if (end != outer_end)
- free_contig_range(end, outer_end - end);
+ if (!(gfp_mask & __GFP_COMP)) {
+ split_free_pages(cc.freepages);
+ /* Free head and tail (if any) */
+ if (start != outer_start)
+ free_contig_range(outer_start, start - outer_start);
+ if (end != outer_end)
+ free_contig_range(end, outer_end - end);
+ } else if (start == outer_start && end == outer_end && is_power_of_2(end - start)) {
+ struct page *head = pfn_to_page(start);
+ int order = ilog2(end - start);
+
+ check_new_pages(head, order);
+ prep_new_page(head, order, gfp_mask, 0);
+ } else {
+ ret = -EINVAL;
+ WARN(true, "PFN range: requested [%lu, %lu), allocated [%lu, %lu)\n",
+ start, end, outer_start, outer_end);
+ }
done:
undo_isolate_page_range(start, end, migratetype);
return ret;
@@ -6665,6 +6610,18 @@ struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask,
void free_contig_range(unsigned long pfn, unsigned long nr_pages)
{
unsigned long count = 0;
+ struct folio *folio = pfn_folio(pfn);
+
+ if (folio_test_large(folio)) {
+ int expected = folio_nr_pages(folio);
+
+ if (nr_pages == expected)
+ folio_put(folio);
+ else
+ WARN(true, "PFN %lu: nr_pages %lu != expected %d\n",
+ pfn, nr_pages, expected);
+ return;
+ }
for (; nr_pages--; pfn++) {
struct page *page = pfn_to_page(pfn);
@@ -6933,26 +6890,50 @@ early_param("accept_memory", accept_memory_parse);
static bool page_contains_unaccepted(struct page *page, unsigned int order)
{
phys_addr_t start = page_to_phys(page);
- phys_addr_t end = start + (PAGE_SIZE << order);
- return range_contains_unaccepted_memory(start, end);
+ return range_contains_unaccepted_memory(start, PAGE_SIZE << order);
}
-static void accept_page(struct page *page, unsigned int order)
+static void __accept_page(struct zone *zone, unsigned long *flags,
+ struct page *page)
{
- phys_addr_t start = page_to_phys(page);
+ bool last;
- accept_memory(start, start + (PAGE_SIZE << order));
+ list_del(&page->lru);
+ last = list_empty(&zone->unaccepted_pages);
+
+ account_freepages(zone, -MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
+ __mod_zone_page_state(zone, NR_UNACCEPTED, -MAX_ORDER_NR_PAGES);
+ __ClearPageUnaccepted(page);
+ spin_unlock_irqrestore(&zone->lock, *flags);
+
+ accept_memory(page_to_phys(page), PAGE_SIZE << MAX_PAGE_ORDER);
+
+ __free_pages_ok(page, MAX_PAGE_ORDER, FPI_TO_TAIL);
+
+ if (last)
+ static_branch_dec(&zones_with_unaccepted_pages);
+}
+
+void accept_page(struct page *page)
+{
+ struct zone *zone = page_zone(page);
+ unsigned long flags;
+
+ spin_lock_irqsave(&zone->lock, flags);
+ if (!PageUnaccepted(page)) {
+ spin_unlock_irqrestore(&zone->lock, flags);
+ return;
+ }
+
+ /* Unlocks zone->lock */
+ __accept_page(zone, &flags, page);
}
static bool try_to_accept_memory_one(struct zone *zone)
{
unsigned long flags;
struct page *page;
- bool last;
-
- if (list_empty(&zone->unaccepted_pages))
- return false;
spin_lock_irqsave(&zone->lock, flags);
page = list_first_entry_or_null(&zone->unaccepted_pages,
@@ -6962,49 +6943,44 @@ static bool try_to_accept_memory_one(struct zone *zone)
return false;
}
- list_del(&page->lru);
- last = list_empty(&zone->unaccepted_pages);
-
- account_freepages(zone, -MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
- __mod_zone_page_state(zone, NR_UNACCEPTED, -MAX_ORDER_NR_PAGES);
- spin_unlock_irqrestore(&zone->lock, flags);
-
- accept_page(page, MAX_PAGE_ORDER);
-
- __free_pages_ok(page, MAX_PAGE_ORDER, FPI_TO_TAIL);
-
- if (last)
- static_branch_dec(&zones_with_unaccepted_pages);
+ /* Unlocks zone->lock */
+ __accept_page(zone, &flags, page);
return true;
}
-static bool try_to_accept_memory(struct zone *zone, unsigned int order)
+static inline bool has_unaccepted_memory(void)
+{
+ return static_branch_unlikely(&zones_with_unaccepted_pages);
+}
+
+static bool cond_accept_memory(struct zone *zone, unsigned int order)
{
long to_accept;
- int ret = false;
+ bool ret = false;
+
+ if (!has_unaccepted_memory())
+ return false;
+
+ if (list_empty(&zone->unaccepted_pages))
+ return false;
- /* How much to accept to get to high watermark? */
- to_accept = high_wmark_pages(zone) -
+ /* How much to accept to get to promo watermark? */
+ to_accept = promo_wmark_pages(zone) -
(zone_page_state(zone, NR_FREE_PAGES) -
- __zone_watermark_unusable_free(zone, order, 0));
+ __zone_watermark_unusable_free(zone, order, 0) -
+ zone_page_state(zone, NR_UNACCEPTED));
- /* Accept at least one page */
- do {
+ while (to_accept > 0) {
if (!try_to_accept_memory_one(zone))
break;
ret = true;
to_accept -= MAX_ORDER_NR_PAGES;
- } while (to_accept > 0);
+ }
return ret;
}
-static inline bool has_unaccepted_memory(void)
-{
- return static_branch_unlikely(&zones_with_unaccepted_pages);
-}
-
static bool __free_unaccepted(struct page *page)
{
struct zone *zone = page_zone(page);
@@ -7019,6 +6995,7 @@ static bool __free_unaccepted(struct page *page)
list_add_tail(&page->lru, &zone->unaccepted_pages);
account_freepages(zone, MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE);
__mod_zone_page_state(zone, NR_UNACCEPTED, MAX_ORDER_NR_PAGES);
+ __SetPageUnaccepted(page);
spin_unlock_irqrestore(&zone->lock, flags);
if (first)
@@ -7034,16 +7011,7 @@ static bool page_contains_unaccepted(struct page *page, unsigned int order)
return false;
}
-static void accept_page(struct page *page, unsigned int order)
-{
-}
-
-static bool try_to_accept_memory(struct zone *zone, unsigned int order)
-{
- return false;
-}
-
-static inline bool has_unaccepted_memory(void)
+static bool cond_accept_memory(struct zone *zone, unsigned int order)
{
return false;
}