linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 0/3] memcg: reduce memory overhead of memory cgroups
@ 2022-09-07  4:35 Shakeel Butt
  2022-09-07  4:35 ` [PATCH 1/3] memcg: extract memcg_vmstats from struct mem_cgroup Shakeel Butt
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Shakeel Butt @ 2022-09-07  4:35 UTC (permalink / raw)
  To: Johannes Weiner, Michal Hocko, Roman Gushchin, Muchun Song
  Cc: Andrew Morton, cgroups, linux-mm, linux-kernel, Shakeel Butt

Currently a lot of memory is wasted to maintain the vmevents for memory
cgroups as we have multiple arrays of size NR_VM_EVENT_ITEMS which can
be as large as 110. However memcg code uses small portion of those
entries. This patch series eliminate this overhead by removing the
unneeded vmevent entries from memory cgroup data structures.

Shakeel Butt (3):
  memcg: extract memcg_vmstats from struct mem_cgroup
  memcg: rearrange code
  memcg: reduce size of memcg vmstats structures

 include/linux/memcontrol.h |  37 +---------
 mm/memcontrol.c            | 145 ++++++++++++++++++++++++++++---------
 2 files changed, 113 insertions(+), 69 deletions(-)

-- 
2.37.2.789.g6183377224-goog



^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 1/3] memcg: extract memcg_vmstats from struct mem_cgroup
  2022-09-07  4:35 [PATCH 0/3] memcg: reduce memory overhead of memory cgroups Shakeel Butt
@ 2022-09-07  4:35 ` Shakeel Butt
  2022-09-09  0:26   ` Michal Koutný
  2022-09-07  4:35 ` [PATCH 2/3] memcg: rearrange code Shakeel Butt
  2022-09-07  4:35 ` [PATCH 3/3] memcg: reduce size of memcg vmstats structures Shakeel Butt
  2 siblings, 1 reply; 11+ messages in thread
From: Shakeel Butt @ 2022-09-07  4:35 UTC (permalink / raw)
  To: Johannes Weiner, Michal Hocko, Roman Gushchin, Muchun Song
  Cc: Andrew Morton, cgroups, linux-mm, linux-kernel, Shakeel Butt

This is a preparatory patch to reduce the memory overhead of memory
cgroup. The struct memcg_vmstats is the largest object embedded into the
struct mem_cgroup. This patch extracts struct memcg_vmstats from struct
mem_cgroup to ease the following patches in reducing the size of struct
memcg_vmstats.

Signed-off-by: Shakeel Butt <shakeelb@google.com>
---
 include/linux/memcontrol.h | 37 +++----------------------
 mm/memcontrol.c            | 57 ++++++++++++++++++++++++++++++++------
 2 files changed, 52 insertions(+), 42 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index ca0df42662ad..dc7d40e575d5 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -80,29 +80,8 @@ enum mem_cgroup_events_target {
 	MEM_CGROUP_NTARGETS,
 };
 
-struct memcg_vmstats_percpu {
-	/* Local (CPU and cgroup) page state & events */
-	long			state[MEMCG_NR_STAT];
-	unsigned long		events[NR_VM_EVENT_ITEMS];
-
-	/* Delta calculation for lockless upward propagation */
-	long			state_prev[MEMCG_NR_STAT];
-	unsigned long		events_prev[NR_VM_EVENT_ITEMS];
-
-	/* Cgroup1: threshold notifications & softlimit tree updates */
-	unsigned long		nr_page_events;
-	unsigned long		targets[MEM_CGROUP_NTARGETS];
-};
-
-struct memcg_vmstats {
-	/* Aggregated (CPU and subtree) page state & events */
-	long			state[MEMCG_NR_STAT];
-	unsigned long		events[NR_VM_EVENT_ITEMS];
-
-	/* Pending child counts during tree propagation */
-	long			state_pending[MEMCG_NR_STAT];
-	unsigned long		events_pending[NR_VM_EVENT_ITEMS];
-};
+struct memcg_vmstats_percpu;
+struct memcg_vmstats;
 
 struct mem_cgroup_reclaim_iter {
 	struct mem_cgroup *position;
@@ -298,7 +277,7 @@ struct mem_cgroup {
 	CACHELINE_PADDING(_pad1_);
 
 	/* memory.stat */
-	struct memcg_vmstats	vmstats;
+	struct memcg_vmstats	*vmstats;
 
 	/* memory.events */
 	atomic_long_t		memory_events[MEMCG_NR_MEMORY_EVENTS];
@@ -1001,15 +980,7 @@ static inline void mod_memcg_page_state(struct page *page,
 	rcu_read_unlock();
 }
 
-static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
-{
-	long x = READ_ONCE(memcg->vmstats.state[idx]);
-#ifdef CONFIG_SMP
-	if (x < 0)
-		x = 0;
-#endif
-	return x;
-}
+unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx);
 
 static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
 					      enum node_stat_item idx)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 0a1a8a846870..b195d4ca2a72 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -669,6 +669,40 @@ static void flush_memcg_stats_dwork(struct work_struct *w)
 	queue_delayed_work(system_unbound_wq, &stats_flush_dwork, FLUSH_TIME);
 }
 
+struct memcg_vmstats_percpu {
+	/* Local (CPU and cgroup) page state & events */
+	long			state[MEMCG_NR_STAT];
+	unsigned long		events[NR_VM_EVENT_ITEMS];
+
+	/* Delta calculation for lockless upward propagation */
+	long			state_prev[MEMCG_NR_STAT];
+	unsigned long		events_prev[NR_VM_EVENT_ITEMS];
+
+	/* Cgroup1: threshold notifications & softlimit tree updates */
+	unsigned long		nr_page_events;
+	unsigned long		targets[MEM_CGROUP_NTARGETS];
+};
+
+struct memcg_vmstats {
+	/* Aggregated (CPU and subtree) page state & events */
+	long			state[MEMCG_NR_STAT];
+	unsigned long		events[NR_VM_EVENT_ITEMS];
+
+	/* Pending child counts during tree propagation */
+	long			state_pending[MEMCG_NR_STAT];
+	unsigned long		events_pending[NR_VM_EVENT_ITEMS];
+};
+
+unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
+{
+	long x = READ_ONCE(memcg->vmstats->state[idx]);
+#ifdef CONFIG_SMP
+	if (x < 0)
+		x = 0;
+#endif
+	return x;
+}
+
 /**
  * __mod_memcg_state - update cgroup memory statistics
  * @memcg: the memory cgroup
@@ -827,7 +861,7 @@ void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
 
 static unsigned long memcg_events(struct mem_cgroup *memcg, int event)
 {
-	return READ_ONCE(memcg->vmstats.events[event]);
+	return READ_ONCE(memcg->vmstats->events[event]);
 }
 
 static unsigned long memcg_events_local(struct mem_cgroup *memcg, int event)
@@ -5170,6 +5204,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
 
 	for_each_node(node)
 		free_mem_cgroup_per_node_info(memcg, node);
+	kfree(memcg->vmstats);
 	free_percpu(memcg->vmstats_percpu);
 	kfree(memcg);
 }
@@ -5199,6 +5234,10 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
 		goto fail;
 	}
 
+	memcg->vmstats = kzalloc(sizeof(struct memcg_vmstats), GFP_KERNEL);
+	if (!memcg->vmstats)
+		goto fail;
+
 	memcg->vmstats_percpu = alloc_percpu_gfp(struct memcg_vmstats_percpu,
 						 GFP_KERNEL_ACCOUNT);
 	if (!memcg->vmstats_percpu)
@@ -5418,9 +5457,9 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
 		 * below us. We're in a per-cpu loop here and this is
 		 * a global counter, so the first cycle will get them.
 		 */
-		delta = memcg->vmstats.state_pending[i];
+		delta = memcg->vmstats->state_pending[i];
 		if (delta)
-			memcg->vmstats.state_pending[i] = 0;
+			memcg->vmstats->state_pending[i] = 0;
 
 		/* Add CPU changes on this level since the last flush */
 		v = READ_ONCE(statc->state[i]);
@@ -5433,15 +5472,15 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
 			continue;
 
 		/* Aggregate counts on this level and propagate upwards */
-		memcg->vmstats.state[i] += delta;
+		memcg->vmstats->state[i] += delta;
 		if (parent)
-			parent->vmstats.state_pending[i] += delta;
+			parent->vmstats->state_pending[i] += delta;
 	}
 
 	for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
-		delta = memcg->vmstats.events_pending[i];
+		delta = memcg->vmstats->events_pending[i];
 		if (delta)
-			memcg->vmstats.events_pending[i] = 0;
+			memcg->vmstats->events_pending[i] = 0;
 
 		v = READ_ONCE(statc->events[i]);
 		if (v != statc->events_prev[i]) {
@@ -5452,9 +5491,9 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
 		if (!delta)
 			continue;
 
-		memcg->vmstats.events[i] += delta;
+		memcg->vmstats->events[i] += delta;
 		if (parent)
-			parent->vmstats.events_pending[i] += delta;
+			parent->vmstats->events_pending[i] += delta;
 	}
 
 	for_each_node_state(nid, N_MEMORY) {
-- 
2.37.2.789.g6183377224-goog



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 2/3] memcg: rearrange code
  2022-09-07  4:35 [PATCH 0/3] memcg: reduce memory overhead of memory cgroups Shakeel Butt
  2022-09-07  4:35 ` [PATCH 1/3] memcg: extract memcg_vmstats from struct mem_cgroup Shakeel Butt
@ 2022-09-07  4:35 ` Shakeel Butt
  2022-09-07  4:35 ` [PATCH 3/3] memcg: reduce size of memcg vmstats structures Shakeel Butt
  2 siblings, 0 replies; 11+ messages in thread
From: Shakeel Butt @ 2022-09-07  4:35 UTC (permalink / raw)
  To: Johannes Weiner, Michal Hocko, Roman Gushchin, Muchun Song
  Cc: Andrew Morton, cgroups, linux-mm, linux-kernel, Shakeel Butt

This is a preparatory patch for easing the review of the follow up patch
which will reduce the memory overhead of memory cgroups.

Signed-off-by: Shakeel Butt <shakeelb@google.com>
---
 mm/memcontrol.c | 46 +++++++++++++++++++++++-----------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b195d4ca2a72..d0ccc16ed416 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -669,6 +669,29 @@ static void flush_memcg_stats_dwork(struct work_struct *w)
 	queue_delayed_work(system_unbound_wq, &stats_flush_dwork, FLUSH_TIME);
 }
 
+/* Subset of vm_event_item to report for memcg event stats */
+static const unsigned int memcg_vm_event_stat[] = {
+	PGSCAN_KSWAPD,
+	PGSCAN_DIRECT,
+	PGSTEAL_KSWAPD,
+	PGSTEAL_DIRECT,
+	PGFAULT,
+	PGMAJFAULT,
+	PGREFILL,
+	PGACTIVATE,
+	PGDEACTIVATE,
+	PGLAZYFREE,
+	PGLAZYFREED,
+#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP)
+	ZSWPIN,
+	ZSWPOUT,
+#endif
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+	THP_FAULT_ALLOC,
+	THP_COLLAPSE_ALLOC,
+#endif
+};
+
 struct memcg_vmstats_percpu {
 	/* Local (CPU and cgroup) page state & events */
 	long			state[MEMCG_NR_STAT];
@@ -1501,29 +1524,6 @@ static inline unsigned long memcg_page_state_output(struct mem_cgroup *memcg,
 	return memcg_page_state(memcg, item) * memcg_page_state_unit(item);
 }
 
-/* Subset of vm_event_item to report for memcg event stats */
-static const unsigned int memcg_vm_event_stat[] = {
-	PGSCAN_KSWAPD,
-	PGSCAN_DIRECT,
-	PGSTEAL_KSWAPD,
-	PGSTEAL_DIRECT,
-	PGFAULT,
-	PGMAJFAULT,
-	PGREFILL,
-	PGACTIVATE,
-	PGDEACTIVATE,
-	PGLAZYFREE,
-	PGLAZYFREED,
-#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_ZSWAP)
-	ZSWPIN,
-	ZSWPOUT,
-#endif
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-	THP_FAULT_ALLOC,
-	THP_COLLAPSE_ALLOC,
-#endif
-};
-
 static void memory_stat_format(struct mem_cgroup *memcg, char *buf, int bufsize)
 {
 	struct seq_buf s;
-- 
2.37.2.789.g6183377224-goog



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH 3/3] memcg: reduce size of memcg vmstats structures
  2022-09-07  4:35 [PATCH 0/3] memcg: reduce memory overhead of memory cgroups Shakeel Butt
  2022-09-07  4:35 ` [PATCH 1/3] memcg: extract memcg_vmstats from struct mem_cgroup Shakeel Butt
  2022-09-07  4:35 ` [PATCH 2/3] memcg: rearrange code Shakeel Butt
@ 2022-09-07  4:35 ` Shakeel Butt
  2022-09-07 23:27   ` Roman Gushchin
                     ` (2 more replies)
  2 siblings, 3 replies; 11+ messages in thread
From: Shakeel Butt @ 2022-09-07  4:35 UTC (permalink / raw)
  To: Johannes Weiner, Michal Hocko, Roman Gushchin, Muchun Song
  Cc: Andrew Morton, cgroups, linux-mm, linux-kernel, Shakeel Butt

The struct memcg_vmstats and struct memcg_vmstats_percpu contains two
arrays each for events of size NR_VM_EVENT_ITEMS which can be as large
as 110. However the memcg v1 only uses 4 of those while memcg v2 uses
15. The union of both is 17. On a 64 bit system, we are wasting
approximately ((110 - 17) * 8 * 2) * (nr_cpus + 1) bytes which is
significant on large machines.

This patch reduces the size of the given structures by adding one
indirection and only stores array of events which are actually used by
the memcg code. With this patch, the size of memcg_vmstats has reduced
from 2544 bytes to 1056 bytes while the size of memcg_vmstats_percpu has
reduced from 2568 bytes to 1080 bytes.

Signed-off-by: Shakeel Butt <shakeelb@google.com>
---
 mm/memcontrol.c | 52 ++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 43 insertions(+), 9 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d0ccc16ed416..a60012be6140 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -671,6 +671,8 @@ static void flush_memcg_stats_dwork(struct work_struct *w)
 
 /* Subset of vm_event_item to report for memcg event stats */
 static const unsigned int memcg_vm_event_stat[] = {
+	PGPGIN,
+	PGPGOUT,
 	PGSCAN_KSWAPD,
 	PGSCAN_DIRECT,
 	PGSTEAL_KSWAPD,
@@ -692,14 +694,30 @@ static const unsigned int memcg_vm_event_stat[] = {
 #endif
 };
 
+#define NR_MEMCG_EVENTS ARRAY_SIZE(memcg_vm_event_stat)
+static int mem_cgroup_events_index[NR_VM_EVENT_ITEMS] __read_mostly;
+
+static void init_memcg_events(void)
+{
+	int i;
+
+	for (i = 0; i < NR_MEMCG_EVENTS; ++i)
+		mem_cgroup_events_index[memcg_vm_event_stat[i]] = i + 1;
+}
+
+static inline int memcg_events_index(enum vm_event_item idx)
+{
+	return mem_cgroup_events_index[idx] - 1;
+}
+
 struct memcg_vmstats_percpu {
 	/* Local (CPU and cgroup) page state & events */
 	long			state[MEMCG_NR_STAT];
-	unsigned long		events[NR_VM_EVENT_ITEMS];
+	unsigned long		events[NR_MEMCG_EVENTS];
 
 	/* Delta calculation for lockless upward propagation */
 	long			state_prev[MEMCG_NR_STAT];
-	unsigned long		events_prev[NR_VM_EVENT_ITEMS];
+	unsigned long		events_prev[NR_MEMCG_EVENTS];
 
 	/* Cgroup1: threshold notifications & softlimit tree updates */
 	unsigned long		nr_page_events;
@@ -709,11 +727,11 @@ struct memcg_vmstats_percpu {
 struct memcg_vmstats {
 	/* Aggregated (CPU and subtree) page state & events */
 	long			state[MEMCG_NR_STAT];
-	unsigned long		events[NR_VM_EVENT_ITEMS];
+	unsigned long		events[NR_MEMCG_EVENTS];
 
 	/* Pending child counts during tree propagation */
 	long			state_pending[MEMCG_NR_STAT];
-	unsigned long		events_pending[NR_VM_EVENT_ITEMS];
+	unsigned long		events_pending[NR_MEMCG_EVENTS];
 };
 
 unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
@@ -873,24 +891,34 @@ void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val)
 void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
 			  unsigned long count)
 {
-	if (mem_cgroup_disabled())
+	int index = memcg_events_index(idx);
+
+	if (mem_cgroup_disabled() || index < 0)
 		return;
 
 	memcg_stats_lock();
-	__this_cpu_add(memcg->vmstats_percpu->events[idx], count);
+	__this_cpu_add(memcg->vmstats_percpu->events[index], count);
 	memcg_rstat_updated(memcg, count);
 	memcg_stats_unlock();
 }
 
 static unsigned long memcg_events(struct mem_cgroup *memcg, int event)
 {
-	return READ_ONCE(memcg->vmstats->events[event]);
+	int index = memcg_events_index(event);
+
+	if (index < 0)
+		return 0;
+	return READ_ONCE(memcg->vmstats->events[index]);
 }
 
 static unsigned long memcg_events_local(struct mem_cgroup *memcg, int event)
 {
 	long x = 0;
 	int cpu;
+	int index = memcg_events_index(event);
+
+	if (index < 0)
+		return 0;
 
 	for_each_possible_cpu(cpu)
 		x += per_cpu(memcg->vmstats_percpu->events[event], cpu);
@@ -1564,10 +1592,15 @@ static void memory_stat_format(struct mem_cgroup *memcg, char *buf, int bufsize)
 		       memcg_events(memcg, PGSTEAL_KSWAPD) +
 		       memcg_events(memcg, PGSTEAL_DIRECT));
 
-	for (i = 0; i < ARRAY_SIZE(memcg_vm_event_stat); i++)
+	for (i = 0; i < ARRAY_SIZE(memcg_vm_event_stat); i++) {
+		if (memcg_vm_event_stat[i] == PGPGIN ||
+		    memcg_vm_event_stat[i] == PGPGOUT)
+			continue;
+
 		seq_buf_printf(&s, "%s %lu\n",
 			       vm_event_name(memcg_vm_event_stat[i]),
 			       memcg_events(memcg, memcg_vm_event_stat[i]));
+	}
 
 	/* The above should easily fit into one page */
 	WARN_ON_ONCE(seq_buf_has_overflowed(&s));
@@ -5309,6 +5342,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 		page_counter_init(&memcg->kmem, &parent->kmem);
 		page_counter_init(&memcg->tcpmem, &parent->tcpmem);
 	} else {
+		init_memcg_events();
 		page_counter_init(&memcg->memory, NULL);
 		page_counter_init(&memcg->swap, NULL);
 		page_counter_init(&memcg->kmem, NULL);
@@ -5477,7 +5511,7 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
 			parent->vmstats->state_pending[i] += delta;
 	}
 
-	for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
+	for (i = 0; i < NR_MEMCG_EVENTS; i++) {
 		delta = memcg->vmstats->events_pending[i];
 		if (delta)
 			memcg->vmstats->events_pending[i] = 0;
-- 
2.37.2.789.g6183377224-goog



^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] memcg: reduce size of memcg vmstats structures
  2022-09-07  4:35 ` [PATCH 3/3] memcg: reduce size of memcg vmstats structures Shakeel Butt
@ 2022-09-07 23:27   ` Roman Gushchin
  2022-09-08  2:35   ` Shakeel Butt
  2022-09-09  0:23   ` Michal Koutný
  2 siblings, 0 replies; 11+ messages in thread
From: Roman Gushchin @ 2022-09-07 23:27 UTC (permalink / raw)
  To: Shakeel Butt
  Cc: Johannes Weiner, Michal Hocko, Muchun Song, Andrew Morton,
	cgroups, linux-mm, linux-kernel

On Wed, Sep 07, 2022 at 04:35:37AM +0000, Shakeel Butt wrote:
> The struct memcg_vmstats and struct memcg_vmstats_percpu contains two
> arrays each for events of size NR_VM_EVENT_ITEMS which can be as large
> as 110. However the memcg v1 only uses 4 of those while memcg v2 uses
> 15. The union of both is 17. On a 64 bit system, we are wasting
> approximately ((110 - 17) * 8 * 2) * (nr_cpus + 1) bytes which is
> significant on large machines.
> 
> This patch reduces the size of the given structures by adding one
> indirection and only stores array of events which are actually used by
> the memcg code. With this patch, the size of memcg_vmstats has reduced
> from 2544 bytes to 1056 bytes while the size of memcg_vmstats_percpu has
> reduced from 2568 bytes to 1080 bytes.

This is pretty impressive!

Thank you, Shakeel!

Acked-by: Roman Gushchin <roman.gushchin@linux.dev>
for the series.

Thanks!


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] memcg: reduce size of memcg vmstats structures
  2022-09-07  4:35 ` [PATCH 3/3] memcg: reduce size of memcg vmstats structures Shakeel Butt
  2022-09-07 23:27   ` Roman Gushchin
@ 2022-09-08  2:35   ` Shakeel Butt
  2022-09-08 22:37     ` Andrew Morton
  2022-09-09  0:23   ` Michal Koutný
  2 siblings, 1 reply; 11+ messages in thread
From: Shakeel Butt @ 2022-09-08  2:35 UTC (permalink / raw)
  To: Johannes Weiner, Michal Hocko, Roman Gushchin, Muchun Song
  Cc: Andrew Morton, Cgroups, Linux MM, LKML

On Tue, Sep 6, 2022 at 9:36 PM Shakeel Butt <shakeelb@google.com> wrote:
>
[...]
>
>  static unsigned long memcg_events_local(struct mem_cgroup *memcg, int event)
>  {
>         long x = 0;
>         int cpu;
> +       int index = memcg_events_index(event);
> +
> +       if (index < 0)
> +               return 0;
>
>         for_each_possible_cpu(cpu)
>                 x += per_cpu(memcg->vmstats_percpu->events[event], cpu);

Andrew, can you please replace 'event' in the above line with 'index'?
I had this correct in the original single patch but messed up while
breaking up that patch into three patches for easier review.


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] memcg: reduce size of memcg vmstats structures
  2022-09-08  2:35   ` Shakeel Butt
@ 2022-09-08 22:37     ` Andrew Morton
  0 siblings, 0 replies; 11+ messages in thread
From: Andrew Morton @ 2022-09-08 22:37 UTC (permalink / raw)
  To: Shakeel Butt
  Cc: Johannes Weiner, Michal Hocko, Roman Gushchin, Muchun Song,
	Cgroups, Linux MM, LKML

On Wed, 7 Sep 2022 19:35:10 -0700 Shakeel Butt <shakeelb@google.com> wrote:

> On Tue, Sep 6, 2022 at 9:36 PM Shakeel Butt <shakeelb@google.com> wrote:
> >
> [...]
> >
> >  static unsigned long memcg_events_local(struct mem_cgroup *memcg, int event)
> >  {
> >         long x = 0;
> >         int cpu;
> > +       int index = memcg_events_index(event);
> > +
> > +       if (index < 0)
> > +               return 0;
> >
> >         for_each_possible_cpu(cpu)
> >                 x += per_cpu(memcg->vmstats_percpu->events[event], cpu);
> 
> Andrew, can you please replace 'event' in the above line with 'index'?
> I had this correct in the original single patch but messed up while
> breaking up that patch into three patches for easier review.

No probs.

From: Andrew Morton <akpm@linux-foundation.org>
Subject: memcg-reduce-size-of-memcg-vmstats-structures-fix
Date: Thu Sep  8 03:35:53 PM PDT 2022

fix memcg_events_local() array index, per Shakeel

Link: https://lkml.kernel.org/r/CALvZod70Mvxr+Nzb6k0yiU2RFYjTD=0NFhKK-Eyp+5ejd1PSFw@mail.gmail.com
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Muchun Song <songmuchun@bytedance.com>
Cc: Roman Gushchin <roman.gushchin@linux.dev>
Cc: Shakeel Butt <shakeelb@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---

 mm/memcontrol.c |    2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

--- a/mm/memcontrol.c~memcg-reduce-size-of-memcg-vmstats-structures-fix
+++ a/mm/memcontrol.c
@@ -921,7 +921,7 @@ static unsigned long memcg_events_local(
 		return 0;
 
 	for_each_possible_cpu(cpu)
-		x += per_cpu(memcg->vmstats_percpu->events[event], cpu);
+		x += per_cpu(memcg->vmstats_percpu->events[index], cpu);
 	return x;
 }
 
_



^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] memcg: reduce size of memcg vmstats structures
  2022-09-07  4:35 ` [PATCH 3/3] memcg: reduce size of memcg vmstats structures Shakeel Butt
  2022-09-07 23:27   ` Roman Gushchin
  2022-09-08  2:35   ` Shakeel Butt
@ 2022-09-09  0:23   ` Michal Koutný
  2022-09-09 17:14     ` Shakeel Butt
  2 siblings, 1 reply; 11+ messages in thread
From: Michal Koutný @ 2022-09-09  0:23 UTC (permalink / raw)
  To: Shakeel Butt
  Cc: Johannes Weiner, Michal Hocko, Roman Gushchin, Muchun Song,
	Andrew Morton, cgroups, linux-mm, linux-kernel

Hello.

On Wed, Sep 07, 2022 at 04:35:37AM +0000, Shakeel Butt <shakeelb@google.com> wrote:
>  /* Subset of vm_event_item to report for memcg event stats */
>  static const unsigned int memcg_vm_event_stat[] = {
> +	PGPGIN,
> +	PGPGOUT,
>  	PGSCAN_KSWAPD,
>  	PGSCAN_DIRECT,
>  	PGSTEAL_KSWAPD,

What about adding a dummy entry at the beginning like:

 static const unsigned int memcg_vm_event_stat[] = {
+	NR_VM_EVENT_ITEMS,
+	PGPGIN,
+	PGPGOUT,
 	PGSCAN_KSWAPD,
 	PGSCAN_DIRECT,


> @@ -692,14 +694,30 @@ static const unsigned int memcg_vm_event_stat[] = {
>  #endif
>  };
>  
> +#define NR_MEMCG_EVENTS ARRAY_SIZE(memcg_vm_event_stat)
> +static int mem_cgroup_events_index[NR_VM_EVENT_ITEMS] __read_mostly;
> +
> +static void init_memcg_events(void)
> +{
> +	int i;
> +
> +	for (i = 0; i < NR_MEMCG_EVENTS; ++i)
> +		mem_cgroup_events_index[memcg_vm_event_stat[i]] = i + 1;

Start such loops from i = 1, save i to the table.

> +}
> +
> +static inline int memcg_events_index(enum vm_event_item idx)
> +{
> +	return mem_cgroup_events_index[idx] - 1;
> +}

And the there'd be no need for the reverse transforms -1.

I.e. it might be just a negligible micro-optimization but since the
event updates are on some fast (albeit longer) paths, it may be worth
sacrificing one of the saved 8Bs in favor of no arithmetics.

What do you think about this?

>  static unsigned long memcg_events(struct mem_cgroup *memcg, int event)
>  {
> -	return READ_ONCE(memcg->vmstats->events[event]);
> +	int index = memcg_events_index(event);
> +
> +	if (index < 0)
> +		return 0;

As a bonus these undefined maps could use the zero at the dummy location
without branch (slow paths though).


> @@ -5477,7 +5511,7 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
>  			parent->vmstats->state_pending[i] += delta;
>  	}
>  
> -	for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
> +	for (i = 0; i < NR_MEMCG_EVENTS; i++) {

I applaud this part :-)


Michal


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/3] memcg: extract memcg_vmstats from struct mem_cgroup
  2022-09-07  4:35 ` [PATCH 1/3] memcg: extract memcg_vmstats from struct mem_cgroup Shakeel Butt
@ 2022-09-09  0:26   ` Michal Koutný
  2022-09-09 16:11     ` Shakeel Butt
  0 siblings, 1 reply; 11+ messages in thread
From: Michal Koutný @ 2022-09-09  0:26 UTC (permalink / raw)
  To: Shakeel Butt
  Cc: Johannes Weiner, Michal Hocko, Roman Gushchin, Muchun Song,
	Andrew Morton, cgroups, linux-mm, linux-kernel

Hi.

On Wed, Sep 07, 2022 at 04:35:35AM +0000, Shakeel Butt <shakeelb@google.com> wrote:
> This is a preparatory patch to reduce the memory overhead of memory
> cgroup. The struct memcg_vmstats is the largest object embedded into the
> struct mem_cgroup. 
> This patch extracts struct memcg_vmstats from struct
> mem_cgroup to ease the following patches in reducing the size of struct
> memcg_vmstats.

Is the reason for the extraction just moving things away from the header
file?
Or is the separate allocation+indirection somehow beneficial wrt, e.g.
fragmentation?

Thanks,
Michal


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 1/3] memcg: extract memcg_vmstats from struct mem_cgroup
  2022-09-09  0:26   ` Michal Koutný
@ 2022-09-09 16:11     ` Shakeel Butt
  0 siblings, 0 replies; 11+ messages in thread
From: Shakeel Butt @ 2022-09-09 16:11 UTC (permalink / raw)
  To: Michal Koutný
  Cc: Johannes Weiner, Michal Hocko, Roman Gushchin, Muchun Song,
	Andrew Morton, Cgroups, Linux MM, LKML

On Thu, Sep 8, 2022 at 5:26 PM Michal Koutný <mkoutny@suse.com> wrote:
>
> Hi.
>
> On Wed, Sep 07, 2022 at 04:35:35AM +0000, Shakeel Butt <shakeelb@google.com> wrote:
> > This is a preparatory patch to reduce the memory overhead of memory
> > cgroup. The struct memcg_vmstats is the largest object embedded into the
> > struct mem_cgroup.
> > This patch extracts struct memcg_vmstats from struct
> > mem_cgroup to ease the following patches in reducing the size of struct
> > memcg_vmstats.
>
> Is the reason for the extraction just moving things away from the header
> file?
> Or is the separate allocation+indirection somehow beneficial wrt, e.g.
> fragmentation?
>

The main reason was to move away from the head file. I have not yet
measured the performance impact of these changes. I am planning to
rearrange struct mem_cgroup and will do some performance tests after
that.


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 3/3] memcg: reduce size of memcg vmstats structures
  2022-09-09  0:23   ` Michal Koutný
@ 2022-09-09 17:14     ` Shakeel Butt
  0 siblings, 0 replies; 11+ messages in thread
From: Shakeel Butt @ 2022-09-09 17:14 UTC (permalink / raw)
  To: Michal Koutný
  Cc: Johannes Weiner, Michal Hocko, Roman Gushchin, Muchun Song,
	Andrew Morton, Cgroups, Linux MM, LKML

On Thu, Sep 8, 2022 at 5:23 PM Michal Koutný <mkoutny@suse.com> wrote:
>
> Hello.
>
> On Wed, Sep 07, 2022 at 04:35:37AM +0000, Shakeel Butt <shakeelb@google.com> wrote:
> >  /* Subset of vm_event_item to report for memcg event stats */
> >  static const unsigned int memcg_vm_event_stat[] = {
> > +     PGPGIN,
> > +     PGPGOUT,
> >       PGSCAN_KSWAPD,
> >       PGSCAN_DIRECT,
> >       PGSTEAL_KSWAPD,
>
> What about adding a dummy entry at the beginning like:
>
>  static const unsigned int memcg_vm_event_stat[] = {
> +       NR_VM_EVENT_ITEMS,
> +       PGPGIN,
> +       PGPGOUT,
>         PGSCAN_KSWAPD,
>         PGSCAN_DIRECT,
>
>
> > @@ -692,14 +694,30 @@ static const unsigned int memcg_vm_event_stat[] = {
> >  #endif
> >  };
> >
> > +#define NR_MEMCG_EVENTS ARRAY_SIZE(memcg_vm_event_stat)
> > +static int mem_cgroup_events_index[NR_VM_EVENT_ITEMS] __read_mostly;
> > +
> > +static void init_memcg_events(void)
> > +{
> > +     int i;
> > +
> > +     for (i = 0; i < NR_MEMCG_EVENTS; ++i)
> > +             mem_cgroup_events_index[memcg_vm_event_stat[i]] = i + 1;
>
> Start such loops from i = 1, save i to the table.
>
> > +}
> > +
> > +static inline int memcg_events_index(enum vm_event_item idx)
> > +{
> > +     return mem_cgroup_events_index[idx] - 1;
> > +}
>
> And the there'd be no need for the reverse transforms -1.
>
> I.e. it might be just a negligible micro-optimization but since the
> event updates are on some fast (albeit longer) paths, it may be worth
> sacrificing one of the saved 8Bs in favor of no arithmetics.
>
> What do you think about this?
>
> >  static unsigned long memcg_events(struct mem_cgroup *memcg, int event)
> >  {
> > -     return READ_ONCE(memcg->vmstats->events[event]);
> > +     int index = memcg_events_index(event);
> > +
> > +     if (index < 0)
> > +             return 0;
>
> As a bonus these undefined maps could use the zero at the dummy location
> without branch (slow paths though).
>
>
> > @@ -5477,7 +5511,7 @@ static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
> >                       parent->vmstats->state_pending[i] += delta;
> >       }
> >
> > -     for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
> > +     for (i = 0; i < NR_MEMCG_EVENTS; i++) {
>
> I applaud this part :-)
>
>

Hi Michal,

Thanks for taking a look. Let me get back to you on this later. I am
at the moment rearranging struct mem_cgroup for better packing and
will be running some benchmarks. Later I will see if your suggestion
has any performance benefit or just more readable code then I will
follow up.

Shakeel


^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2022-09-09 17:14 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-09-07  4:35 [PATCH 0/3] memcg: reduce memory overhead of memory cgroups Shakeel Butt
2022-09-07  4:35 ` [PATCH 1/3] memcg: extract memcg_vmstats from struct mem_cgroup Shakeel Butt
2022-09-09  0:26   ` Michal Koutný
2022-09-09 16:11     ` Shakeel Butt
2022-09-07  4:35 ` [PATCH 2/3] memcg: rearrange code Shakeel Butt
2022-09-07  4:35 ` [PATCH 3/3] memcg: reduce size of memcg vmstats structures Shakeel Butt
2022-09-07 23:27   ` Roman Gushchin
2022-09-08  2:35   ` Shakeel Butt
2022-09-08 22:37     ` Andrew Morton
2022-09-09  0:23   ` Michal Koutný
2022-09-09 17:14     ` Shakeel Butt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).