aboutsummaryrefslogtreecommitdiff
path: root/mm/hugetlb_cgroup.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2022-01-15 20:37:06 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2022-01-15 20:37:06 +0200
commitf56caedaf94f9ced5dbfcdb0060a3e788d2078af (patch)
treee213532d1b3d32f9f0e81948f3b23804baff287d /mm/hugetlb_cgroup.c
parenta33f5c380c4bd3fa5278d690421b72052456d9fe (diff)
parent76fd0285b447991267e838842c0be7395eb454bb (diff)
downloadlinux-f56caedaf94f9ced5dbfcdb0060a3e788d2078af.tar.gz
linux-f56caedaf94f9ced5dbfcdb0060a3e788d2078af.tar.bz2
linux-f56caedaf94f9ced5dbfcdb0060a3e788d2078af.zip
Merge branch 'akpm' (patches from Andrew)
Merge misc updates from Andrew Morton: "146 patches. Subsystems affected by this patch series: kthread, ia64, scripts, ntfs, squashfs, ocfs2, vfs, and mm (slab-generic, slab, kmemleak, dax, kasan, debug, pagecache, gup, shmem, frontswap, memremap, memcg, selftests, pagemap, dma, vmalloc, memory-failure, hugetlb, userfaultfd, vmscan, mempolicy, oom-kill, hugetlbfs, migration, thp, ksm, page-poison, percpu, rmap, zswap, zram, cleanups, hmm, and damon)" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (146 commits) mm/damon: hide kernel pointer from tracepoint event mm/damon/vaddr: hide kernel pointer from damon_va_three_regions() failure log mm/damon/vaddr: use pr_debug() for damon_va_three_regions() failure logging mm/damon/dbgfs: remove an unnecessary variable mm/damon: move the implementation of damon_insert_region to damon.h mm/damon: add access checking for hugetlb pages Docs/admin-guide/mm/damon/usage: update for schemes statistics mm/damon/dbgfs: support all DAMOS stats Docs/admin-guide/mm/damon/reclaim: document statistics parameters mm/damon/reclaim: provide reclamation statistics mm/damon/schemes: account how many times quota limit has exceeded mm/damon/schemes: account scheme actions that successfully applied mm/damon: remove a mistakenly added comment for a future feature Docs/admin-guide/mm/damon/usage: update for kdamond_pid and (mk|rm)_contexts Docs/admin-guide/mm/damon/usage: mention tracepoint at the beginning Docs/admin-guide/mm/damon/usage: remove redundant information Docs/admin-guide/mm/damon/usage: update for scheme quotas and watermarks mm/damon: convert macro functions to static inline functions mm/damon: modify damon_rand() macro to static inline function mm/damon: move damon_rand() definition into damon.h ...
Diffstat (limited to 'mm/hugetlb_cgroup.c')
-rw-r--r--mm/hugetlb_cgroup.c133
1 files changed, 123 insertions, 10 deletions
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index 79d93534ef1e..f9942841df18 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -123,29 +123,58 @@ static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup,
}
}
+static void hugetlb_cgroup_free(struct hugetlb_cgroup *h_cgroup)
+{
+ int node;
+
+ for_each_node(node)
+ kfree(h_cgroup->nodeinfo[node]);
+ kfree(h_cgroup);
+}
+
static struct cgroup_subsys_state *
hugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
{
struct hugetlb_cgroup *parent_h_cgroup = hugetlb_cgroup_from_css(parent_css);
struct hugetlb_cgroup *h_cgroup;
+ int node;
+
+ h_cgroup = kzalloc(struct_size(h_cgroup, nodeinfo, nr_node_ids),
+ GFP_KERNEL);
- h_cgroup = kzalloc(sizeof(*h_cgroup), GFP_KERNEL);
if (!h_cgroup)
return ERR_PTR(-ENOMEM);
if (!parent_h_cgroup)
root_h_cgroup = h_cgroup;
+ /*
+ * TODO: this routine can waste much memory for nodes which will
+ * never be onlined. It's better to use memory hotplug callback
+ * function.
+ */
+ for_each_node(node) {
+ /* Set node_to_alloc to -1 for offline nodes. */
+ int node_to_alloc =
+ node_state(node, N_NORMAL_MEMORY) ? node : -1;
+ h_cgroup->nodeinfo[node] =
+ kzalloc_node(sizeof(struct hugetlb_cgroup_per_node),
+ GFP_KERNEL, node_to_alloc);
+ if (!h_cgroup->nodeinfo[node])
+ goto fail_alloc_nodeinfo;
+ }
+
hugetlb_cgroup_init(h_cgroup, parent_h_cgroup);
return &h_cgroup->css;
+
+fail_alloc_nodeinfo:
+ hugetlb_cgroup_free(h_cgroup);
+ return ERR_PTR(-ENOMEM);
}
static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css)
{
- struct hugetlb_cgroup *h_cgroup;
-
- h_cgroup = hugetlb_cgroup_from_css(css);
- kfree(h_cgroup);
+ hugetlb_cgroup_free(hugetlb_cgroup_from_css(css));
}
/*
@@ -289,7 +318,17 @@ static void __hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
return;
__set_hugetlb_cgroup(page, h_cg, rsvd);
- return;
+ if (!rsvd) {
+ unsigned long usage =
+ h_cg->nodeinfo[page_to_nid(page)]->usage[idx];
+ /*
+ * This write is not atomic due to fetching usage and writing
+ * to it, but that's fine because we call this with
+ * hugetlb_lock held anyway.
+ */
+ WRITE_ONCE(h_cg->nodeinfo[page_to_nid(page)]->usage[idx],
+ usage + nr_pages);
+ }
}
void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages,
@@ -328,8 +367,17 @@ static void __hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
if (rsvd)
css_put(&h_cg->css);
-
- return;
+ else {
+ unsigned long usage =
+ h_cg->nodeinfo[page_to_nid(page)]->usage[idx];
+ /*
+ * This write is not atomic due to fetching usage and writing
+ * to it, but that's fine because we call this with
+ * hugetlb_lock held anyway.
+ */
+ WRITE_ONCE(h_cg->nodeinfo[page_to_nid(page)]->usage[idx],
+ usage - nr_pages);
+ }
}
void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
@@ -418,6 +466,59 @@ enum {
RES_RSVD_FAILCNT,
};
+static int hugetlb_cgroup_read_numa_stat(struct seq_file *seq, void *dummy)
+{
+ int nid;
+ struct cftype *cft = seq_cft(seq);
+ int idx = MEMFILE_IDX(cft->private);
+ bool legacy = MEMFILE_ATTR(cft->private);
+ struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(seq_css(seq));
+ struct cgroup_subsys_state *css;
+ unsigned long usage;
+
+ if (legacy) {
+ /* Add up usage across all nodes for the non-hierarchical total. */
+ usage = 0;
+ for_each_node_state(nid, N_MEMORY)
+ usage += READ_ONCE(h_cg->nodeinfo[nid]->usage[idx]);
+ seq_printf(seq, "total=%lu", usage * PAGE_SIZE);
+
+ /* Simply print the per-node usage for the non-hierarchical total. */
+ for_each_node_state(nid, N_MEMORY)
+ seq_printf(seq, " N%d=%lu", nid,
+ READ_ONCE(h_cg->nodeinfo[nid]->usage[idx]) *
+ PAGE_SIZE);
+ seq_putc(seq, '\n');
+ }
+
+ /*
+ * The hierarchical total is pretty much the value recorded by the
+ * counter, so use that.
+ */
+ seq_printf(seq, "%stotal=%lu", legacy ? "hierarchical_" : "",
+ page_counter_read(&h_cg->hugepage[idx]) * PAGE_SIZE);
+
+ /*
+ * For each node, transverse the css tree to obtain the hierarchical
+ * node usage.
+ */
+ for_each_node_state(nid, N_MEMORY) {
+ usage = 0;
+ rcu_read_lock();
+ css_for_each_descendant_pre(css, &h_cg->css) {
+ usage += READ_ONCE(hugetlb_cgroup_from_css(css)
+ ->nodeinfo[nid]
+ ->usage[idx]);
+ }
+ rcu_read_unlock();
+ seq_printf(seq, " N%d=%lu", nid, usage * PAGE_SIZE);
+ }
+
+ seq_putc(seq, '\n');
+
+ return 0;
+}
+
static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css,
struct cftype *cft)
{
@@ -668,8 +769,14 @@ static void __init __hugetlb_cgroup_file_dfl_init(int idx)
events_local_file[idx]);
cft->flags = CFTYPE_NOT_ON_ROOT;
- /* NULL terminate the last cft */
+ /* Add the numa stat file */
cft = &h->cgroup_files_dfl[6];
+ snprintf(cft->name, MAX_CFTYPE_NAME, "%s.numa_stat", buf);
+ cft->seq_show = hugetlb_cgroup_read_numa_stat;
+ cft->flags = CFTYPE_NOT_ON_ROOT;
+
+ /* NULL terminate the last cft */
+ cft = &h->cgroup_files_dfl[7];
memset(cft, 0, sizeof(*cft));
WARN_ON(cgroup_add_dfl_cftypes(&hugetlb_cgrp_subsys,
@@ -739,8 +846,14 @@ static void __init __hugetlb_cgroup_file_legacy_init(int idx)
cft->write = hugetlb_cgroup_reset;
cft->read_u64 = hugetlb_cgroup_read_u64;
- /* NULL terminate the last cft */
+ /* Add the numa stat file */
cft = &h->cgroup_files_legacy[8];
+ snprintf(cft->name, MAX_CFTYPE_NAME, "%s.numa_stat", buf);
+ cft->private = MEMFILE_PRIVATE(idx, 1);
+ cft->seq_show = hugetlb_cgroup_read_numa_stat;
+
+ /* NULL terminate the last cft */
+ cft = &h->cgroup_files_legacy[9];
memset(cft, 0, sizeof(*cft));
WARN_ON(cgroup_add_legacy_cftypes(&hugetlb_cgrp_subsys,