All of lore.kernel.org
 help / color / mirror / Atom feed
From: Gang Li <ligang.bdlg@bytedance.com>
To: mhocko@suse.com, akpm@linux-foundation.org, surenb@google.com
Cc: hca@linux.ibm.com, gor@linux.ibm.com, agordeev@linux.ibm.com,
	borntraeger@linux.ibm.com, svens@linux.ibm.com,
	viro@zeniv.linux.org.uk, ebiederm@xmission.com,
	keescook@chromium.org, rostedt@goodmis.org, mingo@redhat.com,
	peterz@infradead.org, acme@kernel.org, mark.rutland@arm.com,
	alexander.shishkin@linux.intel.com, jolsa@kernel.org,
	namhyung@kernel.org, david@redhat.com, imbrenda@linux.ibm.com,
	adobriyan@gmail.com, yang.yang29@zte.com.cn, brauner@kernel.org,
	stephen.s.brennan@oracle.com, zhengqi.arch@bytedance.com,
	haolee.swjtu@gmail.com, xu.xin16@zte.com.cn,
	Liam.Howlett@Oracle.com, ohoono.kwon@samsung.com,
	peterx@redhat.com, arnd@arndb.de, shy828301@gmail.com,
	alex.sierra@amd.com, xianting.tian@linux.alibaba.com,
	willy@infradead.org, ccross@google.com, vbabka@suse.cz,
	sujiaxun@uniontech.com, sfr@canb.auug.org.au,
	vasily.averin@linux.dev, mgorman@suse.de, vvghjk1234@gmail.com,
	tglx@linutronix.de, luto@kernel.org, bigeasy@linutronix.de,
	fenghua.yu@intel.com, linux-s390@vger.kernel.org,
	linux-kernel@vger.kernel.org, linux-fsdevel@vger.kernel.org,
	linux-mm@kvack.org, linux-perf-users@vger.kernel.org,
	Gang Li <ligang.bdlg@bytedance.com>
Subject: [PATCH v2 2/5] mm: add numa_count field for rss_stat
Date: Fri,  8 Jul 2022 16:21:26 +0800	[thread overview]
Message-ID: <20220708082129.80115-3-ligang.bdlg@bytedance.com> (raw)
In-Reply-To: <20220708082129.80115-1-ligang.bdlg@bytedance.com>

This patch add new fields `numa_count` for mm_rss_stat and
task_rss_stat.

`numa_count` are in the size of `sizeof(long) * num_possible_numa()`.
To reduce mem consumption, they only contain the sum of rss which is
needed by `oom_badness` instead of recording different kinds of rss
sepratly.

Signed-off-by: Gang Li <ligang.bdlg@bytedance.com>
---
 include/linux/mm_types_task.h |  6 +++
 kernel/fork.c                 | 70 +++++++++++++++++++++++++++++++++--
 2 files changed, 73 insertions(+), 3 deletions(-)

diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h
index 32512af31721..9fd34ab484f4 100644
--- a/include/linux/mm_types_task.h
+++ b/include/linux/mm_types_task.h
@@ -52,11 +52,17 @@ enum {
 struct task_rss_stat {
 	int events;	/* for synchronization threshold */
 	int count[NR_MM_COUNTERS];
+#ifdef CONFIG_NUMA
+	int *numa_count;
+#endif
 };
 #endif /* USE_SPLIT_PTE_PTLOCKS */
 
 struct mm_rss_stat {
 	atomic_long_t count[NR_MM_COUNTERS];
+#ifdef CONFIG_NUMA
+	atomic_long_t *numa_count;
+#endif
 };
 
 struct page_frag {
diff --git a/kernel/fork.c b/kernel/fork.c
index 23f0ba3affe5..f4f93d6fecd5 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -140,6 +140,10 @@ DEFINE_PER_CPU(unsigned long, process_counts) = 0;
 
 __cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
 
+#if (defined SPLIT_RSS_COUNTING) && (defined CONFIG_NUMA)
+#define SPLIT_RSS_NUMA_COUNTING
+#endif
+
 #ifdef CONFIG_PROVE_RCU
 int lockdep_tasklist_lock_is_held(void)
 {
@@ -757,6 +761,16 @@ static void check_mm(struct mm_struct *mm)
 				 mm, resident_page_types[i], x);
 	}
 
+#ifdef CONFIG_NUMA
+	for (i = 0; i < num_possible_nodes(); i++) {
+		long x = atomic_long_read(&mm->rss_stat.numa_count[i]);
+
+		if (unlikely(x))
+			pr_alert("BUG: Bad rss-counter state mm:%p node:%d val:%ld\n",
+				 mm, i, x);
+	}
+#endif
+
 	if (mm_pgtables_bytes(mm))
 		pr_alert("BUG: non-zero pgtables_bytes on freeing mm: %ld\n",
 				mm_pgtables_bytes(mm));
@@ -769,6 +783,29 @@ static void check_mm(struct mm_struct *mm)
 #define allocate_mm()	(kmem_cache_alloc(mm_cachep, GFP_KERNEL))
 #define free_mm(mm)	(kmem_cache_free(mm_cachep, (mm)))
 
+#ifdef CONFIG_NUMA
+static inline void mm_free_rss_stat(struct mm_struct *mm)
+{
+	kfree(mm->rss_stat.numa_count);
+}
+
+static inline int mm_init_rss_stat(struct mm_struct *mm)
+{
+	memset(&mm->rss_stat.count, 0, sizeof(mm->rss_stat.count));
+	mm->rss_stat.numa_count = kcalloc(num_possible_nodes(), sizeof(atomic_long_t), GFP_KERNEL);
+	if (unlikely(!mm->rss_stat.numa_count))
+		return -ENOMEM;
+	return 0;
+}
+#else
+static inline void mm_free_rss_stat(struct mm_struct *mm) {}
+static inline int mm_init_rss_stat(struct mm_struct *mm)
+{
+	memset(&mm->rss_stat.count, 0, sizeof(mm->rss_stat.count));
+	return 0;
+}
+#endif
+
 /*
  * Called when the last reference to the mm
  * is dropped: either by a lazy thread or by
@@ -783,6 +820,7 @@ void __mmdrop(struct mm_struct *mm)
 	destroy_context(mm);
 	mmu_notifier_subscriptions_destroy(mm);
 	check_mm(mm);
+	mm_free_rss_stat(mm);
 	put_user_ns(mm->user_ns);
 	mm_pasid_drop(mm);
 	free_mm(mm);
@@ -824,12 +862,22 @@ static inline void put_signal_struct(struct signal_struct *sig)
 		free_signal_struct(sig);
 }
 
+#ifdef SPLIT_RSS_NUMA_COUNTING
+void rss_stat_free(struct task_struct *p)
+{
+	kfree(p->rss_stat.numa_count);
+}
+#else
+void rss_stat_free(struct task_struct *p) {}
+#endif
+
 void __put_task_struct(struct task_struct *tsk)
 {
 	WARN_ON(!tsk->exit_state);
 	WARN_ON(refcount_read(&tsk->usage));
 	WARN_ON(tsk == current);
 
+	rss_stat_free(tsk);
 	io_uring_free(tsk);
 	cgroup_free(tsk);
 	task_numa_free(tsk, true);
@@ -956,6 +1004,7 @@ void set_task_stack_end_magic(struct task_struct *tsk)
 static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 {
 	struct task_struct *tsk;
+	int *numa_count __maybe_unused;
 	int err;
 
 	if (node == NUMA_NO_NODE)
@@ -977,9 +1026,16 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 #endif
 	account_kernel_stack(tsk, 1);
 
+#ifdef SPLIT_RSS_NUMA_COUNTING
+	numa_count = kcalloc(num_possible_nodes(), sizeof(int), GFP_KERNEL);
+	if (!numa_count)
+		goto free_stack;
+	tsk->rss_stat.numa_count = numa_count;
+#endif
+
 	err = scs_prepare(tsk, node);
 	if (err)
-		goto free_stack;
+		goto free_rss_stat;
 
 #ifdef CONFIG_SECCOMP
 	/*
@@ -1045,6 +1101,10 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
 
 	return tsk;
 
+free_rss_stat:
+#ifdef SPLIT_RSS_NUMA_COUNTING
+	kfree(numa_count);
+#endif
 free_stack:
 	exit_task_stack_account(tsk);
 	free_thread_stack(tsk);
@@ -1114,7 +1174,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 	mm->map_count = 0;
 	mm->locked_vm = 0;
 	atomic64_set(&mm->pinned_vm, 0);
-	memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
 	spin_lock_init(&mm->page_table_lock);
 	spin_lock_init(&mm->arg_lock);
 	mm_init_cpumask(mm);
@@ -1141,6 +1200,9 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
 	if (mm_alloc_pgd(mm))
 		goto fail_nopgd;
 
+	if (mm_init_rss_stat(mm))
+		goto fail_nocontext;
+
 	if (init_new_context(p, mm))
 		goto fail_nocontext;
 
@@ -2142,7 +2204,9 @@ static __latent_entropy struct task_struct *copy_process(
 	p->io_uring = NULL;
 #endif
 
-#if defined(SPLIT_RSS_COUNTING)
+#ifdef SPLIT_RSS_NUMA_COUNTING
+	memset(&p->rss_stat, 0, sizeof(p->rss_stat) - sizeof(p->rss_stat.numa_count));
+#else
 	memset(&p->rss_stat, 0, sizeof(p->rss_stat));
 #endif
 
-- 
2.20.1


  parent reply	other threads:[~2022-07-08  8:23 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-08  8:21 [PATCH v2 0/5] mm, oom: Introduce per numa node oom for CONSTRAINT_{MEMORY_POLICY,CPUSET} Gang Li
2022-07-08  8:21 ` [PATCH v2 1/5] mm: add a new parameter `node` to `get/add/inc/dec_mm_counter` Gang Li
2022-07-12  6:33   ` [mm] c20f7bacef: WARNING:possible_circular_locking_dependency_detected kernel test robot
2022-07-12  6:33     ` kernel test robot
2022-07-08  8:21 ` Gang Li [this message]
2022-07-08 12:22   ` [PATCH v2 2/5] mm: add numa_count field for rss_stat kernel test robot
2022-07-08  8:21 ` [PATCH v2 3/5] mm: add numa fields for tracepoint rss_stat Gang Li
2022-07-08 17:31   ` Steven Rostedt
2022-07-08  8:21 ` [PATCH v2 4/5] mm: enable per numa node rss_stat count Gang Li
2022-07-08  8:21 ` [PATCH v2 5/5] mm, oom: enable per numa node oom for CONSTRAINT_{MEMORY_POLICY,CPUSET} Gang Li
2022-07-08  8:54 ` [PATCH v2 0/5] mm, oom: Introduce " Michal Hocko
2022-07-08  9:25   ` Gang Li
2022-07-08  9:37     ` Michal Hocko
2022-07-12 11:12   ` Abel Wu
2022-07-12 13:35     ` Michal Hocko
2022-07-12 15:00       ` Abel Wu
2022-07-18 12:11         ` Michal Hocko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220708082129.80115-3-ligang.bdlg@bytedance.com \
    --to=ligang.bdlg@bytedance.com \
    --cc=Liam.Howlett@Oracle.com \
    --cc=acme@kernel.org \
    --cc=adobriyan@gmail.com \
    --cc=agordeev@linux.ibm.com \
    --cc=akpm@linux-foundation.org \
    --cc=alex.sierra@amd.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=arnd@arndb.de \
    --cc=bigeasy@linutronix.de \
    --cc=borntraeger@linux.ibm.com \
    --cc=brauner@kernel.org \
    --cc=ccross@google.com \
    --cc=david@redhat.com \
    --cc=ebiederm@xmission.com \
    --cc=fenghua.yu@intel.com \
    --cc=gor@linux.ibm.com \
    --cc=haolee.swjtu@gmail.com \
    --cc=hca@linux.ibm.com \
    --cc=imbrenda@linux.ibm.com \
    --cc=jolsa@kernel.org \
    --cc=keescook@chromium.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=linux-s390@vger.kernel.org \
    --cc=luto@kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mgorman@suse.de \
    --cc=mhocko@suse.com \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=ohoono.kwon@samsung.com \
    --cc=peterx@redhat.com \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    --cc=sfr@canb.auug.org.au \
    --cc=shy828301@gmail.com \
    --cc=stephen.s.brennan@oracle.com \
    --cc=sujiaxun@uniontech.com \
    --cc=surenb@google.com \
    --cc=svens@linux.ibm.com \
    --cc=tglx@linutronix.de \
    --cc=vasily.averin@linux.dev \
    --cc=vbabka@suse.cz \
    --cc=viro@zeniv.linux.org.uk \
    --cc=vvghjk1234@gmail.com \
    --cc=willy@infradead.org \
    --cc=xianting.tian@linux.alibaba.com \
    --cc=xu.xin16@zte.com.cn \
    --cc=yang.yang29@zte.com.cn \
    --cc=zhengqi.arch@bytedance.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.