diff options
Diffstat (limited to 'include/linux/memcontrol.h')
-rw-r--r-- | include/linux/memcontrol.h | 124 |
1 files changed, 82 insertions, 42 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 5818af8eca5a..7bdcf3020d7a 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -21,6 +21,7 @@ #include <linux/vmstat.h> #include <linux/writeback.h> #include <linux/page-flags.h> +#include <linux/shrinker.h> struct mem_cgroup; struct obj_cgroup; @@ -61,7 +62,6 @@ struct mem_cgroup_reclaim_cookie { #ifdef CONFIG_MEMCG #define MEM_CGROUP_ID_SHIFT 16 -#define MEM_CGROUP_ID_MAX USHRT_MAX struct mem_cgroup_id { int id; @@ -89,17 +89,6 @@ struct mem_cgroup_reclaim_iter { unsigned int generation; }; -/* - * Bitmap and deferred work of shrinker::id corresponding to memcg-aware - * shrinkers, which have elements charged to this memcg. - */ -struct shrinker_info { - struct rcu_head rcu; - atomic_long_t *nr_deferred; - unsigned long *map; - int map_nr_max; -}; - struct lruvec_stats_percpu { /* Local (CPU and cgroup) state */ long state[NR_VM_NODE_STAT_ITEMS]; @@ -112,6 +101,9 @@ struct lruvec_stats { /* Aggregated (CPU and subtree) state */ long state[NR_VM_NODE_STAT_ITEMS]; + /* Non-hierarchical (CPU aggregated) state */ + long state_local[NR_VM_NODE_STAT_ITEMS]; + /* Pending child counts during tree propagation */ long state_pending[NR_VM_NODE_STAT_ITEMS]; }; @@ -151,7 +143,7 @@ struct mem_cgroup_threshold_ary { /* Size of entries[] */ unsigned int size; /* Array of thresholds */ - struct mem_cgroup_threshold entries[]; + struct mem_cgroup_threshold entries[] __counted_by(size); }; struct mem_cgroup_thresholds { @@ -284,6 +276,11 @@ struct mem_cgroup { atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS]; atomic_long_t memory_events_local[MEMCG_NR_MEMORY_EVENTS]; + /* + * Hint of reclaim pressure for socket memroy management. Note + * that this indicator should NOT be used in legacy cgroup mode + * where socket memory is accounted/charged separately. + */ unsigned long socket_pressure; /* Legacy tcp memory accounting */ @@ -292,7 +289,13 @@ struct mem_cgroup { #ifdef CONFIG_MEMCG_KMEM int kmemcg_id; - struct obj_cgroup __rcu *objcg; + /* + * memcg->objcg is wiped out as a part of the objcg repaprenting + * process. memcg->orig_objcg preserves a pointer (and a reference) + * to the original objcg until the end of live of memcg. + */ + struct obj_cgroup __rcu *objcg; + struct obj_cgroup *orig_objcg; /* list of inherited objcgs, protected by objcg_lock */ struct list_head objcg_list; #endif @@ -583,7 +586,7 @@ static inline void mem_cgroup_protection(struct mem_cgroup *root, /* * There is no reclaim protection applied to a targeted reclaim. * We are special casing this specific case here because - * mem_cgroup_protected calculation is not robust enough to keep + * mem_cgroup_calculate_protection is not robust enough to keep * the protection invariant for calculated effective values for * parallel reclaimers with different reclaim target. This is * especially a problem for tail memcgs (as they have pages on LRU) @@ -655,6 +658,8 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *target, page_counter_read(&memcg->memory); } +void mem_cgroup_commit_charge(struct folio *folio, struct mem_cgroup *memcg); + int __mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp); /** @@ -679,6 +684,9 @@ static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, return __mem_cgroup_charge(folio, mm, gfp); } +int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg, gfp_t gfp, + long nr_pages); + int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm, gfp_t gfp, swp_entry_t entry); void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry); @@ -706,6 +714,10 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list) __mem_cgroup_uncharge_list(page_list); } +void mem_cgroup_cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages); + +void mem_cgroup_replace_folio(struct folio *old, struct folio *new); + void mem_cgroup_migrate(struct folio *old, struct folio *new); /** @@ -762,6 +774,8 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm); +struct mem_cgroup *get_mem_cgroup_from_current(void); + struct lruvec *folio_lruvec_lock(struct folio *folio); struct lruvec *folio_lruvec_lock_irq(struct folio *folio); struct lruvec *folio_lruvec_lock_irqsave(struct folio *folio, @@ -861,8 +875,7 @@ static inline struct mem_cgroup *lruvec_memcg(struct lruvec *lruvec) * parent_mem_cgroup - find the accounting parent of a memcg * @memcg: memcg whose parent to find * - * Returns the parent memcg, or NULL if this is the root or the memory - * controller is in legacy no-hierarchy mode. + * Returns the parent memcg, or NULL if this is the root. */ static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg) { @@ -914,7 +927,7 @@ unsigned long mem_cgroup_get_zone_lru_size(struct lruvec *lruvec, return READ_ONCE(mz->lru_zone_size[zone_idx][lru]); } -void mem_cgroup_handle_over_high(void); +void mem_cgroup_handle_over_high(gfp_t gfp_mask); unsigned long mem_cgroup_get_max(struct mem_cgroup *memcg); @@ -1020,14 +1033,12 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec, { struct mem_cgroup_per_node *pn; long x = 0; - int cpu; if (mem_cgroup_disabled()) return node_page_state(lruvec_pgdat(lruvec), idx); pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec); - for_each_possible_cpu(cpu) - x += per_cpu(pn->lruvec_stats_percpu->state[idx], cpu); + x = READ_ONCE(pn->lruvec_stats.state_local[idx]); #ifdef CONFIG_SMP if (x < 0) x = 0; @@ -1076,15 +1087,6 @@ static inline void count_memcg_events(struct mem_cgroup *memcg, local_irq_restore(flags); } -static inline void count_memcg_page_event(struct page *page, - enum vm_event_item idx) -{ - struct mem_cgroup *memcg = page_memcg(page); - - if (memcg) - count_memcg_events(memcg, idx, 1); -} - static inline void count_memcg_folio_events(struct folio *folio, enum vm_event_item idx, unsigned long nr) { @@ -1158,7 +1160,6 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, #else /* CONFIG_MEMCG */ #define MEM_CGROUP_ID_SHIFT 0 -#define MEM_CGROUP_ID_MAX 0 static inline struct mem_cgroup *folio_memcg(struct folio *folio) { @@ -1246,12 +1247,23 @@ static inline bool mem_cgroup_below_min(struct mem_cgroup *target, return false; } +static inline void mem_cgroup_commit_charge(struct folio *folio, + struct mem_cgroup *memcg) +{ +} + static inline int mem_cgroup_charge(struct folio *folio, struct mm_struct *mm, gfp_t gfp) { return 0; } +static inline int mem_cgroup_hugetlb_try_charge(struct mem_cgroup *memcg, + gfp_t gfp, long nr_pages) +{ + return 0; +} + static inline int mem_cgroup_swapin_charge_folio(struct folio *folio, struct mm_struct *mm, gfp_t gfp, swp_entry_t entry) { @@ -1270,6 +1282,16 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list) { } +static inline void mem_cgroup_cancel_charge(struct mem_cgroup *memcg, + unsigned int nr_pages) +{ +} + +static inline void mem_cgroup_replace_folio(struct folio *old, + struct folio *new) +{ +} + static inline void mem_cgroup_migrate(struct folio *old, struct folio *new) { } @@ -1307,6 +1329,11 @@ static inline struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm) return NULL; } +static inline struct mem_cgroup *get_mem_cgroup_from_current(void) +{ + return NULL; +} + static inline struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css) { @@ -1455,7 +1482,7 @@ static inline void mem_cgroup_unlock_pages(void) rcu_read_unlock(); } -static inline void mem_cgroup_handle_over_high(void) +static inline void mem_cgroup_handle_over_high(gfp_t gfp_mask) { } @@ -1562,11 +1589,6 @@ static inline void __count_memcg_events(struct mem_cgroup *memcg, { } -static inline void count_memcg_page_event(struct page *page, - int idx) -{ -} - static inline void count_memcg_folio_events(struct folio *folio, enum vm_event_item idx, unsigned long nr) { @@ -1727,8 +1749,8 @@ void mem_cgroup_sk_alloc(struct sock *sk); void mem_cgroup_sk_free(struct sock *sk); static inline bool mem_cgroup_under_socket_pressure(struct mem_cgroup *memcg) { - if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) && memcg->tcpmem_pressure) - return true; + if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) + return !!memcg->tcpmem_pressure; do { if (time_before(jiffies, READ_ONCE(memcg->socket_pressure))) return true; @@ -1760,8 +1782,26 @@ bool mem_cgroup_kmem_disabled(void); int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order); void __memcg_kmem_uncharge_page(struct page *page, int order); -struct obj_cgroup *get_obj_cgroup_from_current(void); -struct obj_cgroup *get_obj_cgroup_from_page(struct page *page); +/* + * The returned objcg pointer is safe to use without additional + * protection within a scope. The scope is defined either by + * the current task (similar to the "current" global variable) + * or by set_active_memcg() pair. + * Please, use obj_cgroup_get() to get a reference if the pointer + * needs to be used outside of the local scope. + */ +struct obj_cgroup *current_obj_cgroup(void); +struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio); + +static inline struct obj_cgroup *get_obj_cgroup_from_current(void) +{ + struct obj_cgroup *objcg = current_obj_cgroup(); + + if (objcg) + obj_cgroup_get(objcg); + + return objcg; +} int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size); void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size); @@ -1845,7 +1885,7 @@ static inline void __memcg_kmem_uncharge_page(struct page *page, int order) { } -static inline struct obj_cgroup *get_obj_cgroup_from_page(struct page *page) +static inline struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio) { return NULL; } |