LKML Archive on lore.kernel.org
 help / color / Atom feed
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
To: linux-kernel@vger.kernel.org
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>,
	stable@vger.kernel.org, Johannes Weiner <hannes@cmpxchg.org>,
	Tejun Heo <tj@kernel.org>, Michal Hocko <mhocko@suse.com>,
	Vladimir Davydov <vdavydov.dev@gmail.com>,
	Roman Gushchin <guro@fb.com>, Rik van Riel <riel@surriel.com>,
	Stephen Rothwell <sfr@canb.auug.org.au>,
	Andrew Morton <akpm@linux-foundation.org>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Sasha Levin <sashal@kernel.org>
Subject: [PATCH 4.14 33/52] mm: memcg: make sure memory.events is uptodate when waking pollers
Date: Mon,  5 Apr 2021 10:53:59 +0200
Message-ID: <20210405085023.076458735@linuxfoundation.org> (raw)
In-Reply-To: <20210405085021.996963957@linuxfoundation.org>

From: Johannes Weiner <hannes@cmpxchg.org>

commit e27be240df53f1a20c659168e722b5d9f16cc7f4 upstream.

Commit a983b5ebee57 ("mm: memcontrol: fix excessive complexity in
memory.stat reporting") added per-cpu drift to all memory cgroup stats
and events shown in memory.stat and memory.events.

For memory.stat this is acceptable.  But memory.events issues file
notifications, and somebody polling the file for changes will be
confused when the counters in it are unchanged after a wakeup.

Luckily, the events in memory.events - MEMCG_LOW, MEMCG_HIGH, MEMCG_MAX,
MEMCG_OOM - are sufficiently rare and high-level that we don't need
per-cpu buffering for them: MEMCG_HIGH and MEMCG_MAX would be the most
frequent, but they're counting invocations of reclaim, which is a
complex operation that touches many shared cachelines.

This splits memory.events from the generic VM events and tracks them in
their own, unbuffered atomic counters.  That's also cleaner, as it
eliminates the ugly enum nesting of VM and cgroup events.

[hannes@cmpxchg.org: "array subscript is above array bounds"]
  Link: http://lkml.kernel.org/r/20180406155441.GA20806@cmpxchg.org
Link: http://lkml.kernel.org/r/20180405175507.GA24817@cmpxchg.org
Fixes: a983b5ebee57 ("mm: memcontrol: fix excessive complexity in memory.stat reporting")
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reported-by: Tejun Heo <tj@kernel.org>
Acked-by: Tejun Heo <tj@kernel.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Roman Gushchin <guro@fb.com>
Cc: Rik van Riel <riel@surriel.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 include/linux/memcontrol.h | 35 ++++++++++++++++++-----------------
 mm/memcontrol.c            | 28 ++++++++++++++++------------
 mm/vmscan.c                |  2 +-
 3 files changed, 35 insertions(+), 30 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index c46016bb25eb..6503a9ca27c1 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -48,13 +48,12 @@ enum memcg_stat_item {
 	MEMCG_NR_STAT,
 };
 
-/* Cgroup-specific events, on top of universal VM events */
-enum memcg_event_item {
-	MEMCG_LOW = NR_VM_EVENT_ITEMS,
+enum memcg_memory_event {
+	MEMCG_LOW,
 	MEMCG_HIGH,
 	MEMCG_MAX,
 	MEMCG_OOM,
-	MEMCG_NR_EVENTS,
+	MEMCG_NR_MEMORY_EVENTS,
 };
 
 struct mem_cgroup_reclaim_cookie {
@@ -88,7 +87,7 @@ enum mem_cgroup_events_target {
 
 struct mem_cgroup_stat_cpu {
 	long count[MEMCG_NR_STAT];
-	unsigned long events[MEMCG_NR_EVENTS];
+	unsigned long events[NR_VM_EVENT_ITEMS];
 	unsigned long nr_page_events;
 	unsigned long targets[MEM_CGROUP_NTARGETS];
 };
@@ -202,7 +201,8 @@ struct mem_cgroup {
 	/* OOM-Killer disable */
 	int		oom_kill_disable;
 
-	/* handle for "memory.events" */
+	/* memory.events */
+	atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS];
 	struct cgroup_file events_file;
 
 	/* protect arrays of thresholds */
@@ -231,9 +231,10 @@ struct mem_cgroup {
 	struct task_struct	*move_lock_task;
 	unsigned long		move_lock_flags;
 
+	/* memory.stat */
 	struct mem_cgroup_stat_cpu __percpu *stat_cpu;
 	atomic_long_t		stat[MEMCG_NR_STAT];
-	atomic_long_t		events[MEMCG_NR_EVENTS];
+	atomic_long_t		events[NR_VM_EVENT_ITEMS];
 
 	unsigned long		socket_pressure;
 
@@ -645,9 +646,9 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
 						gfp_t gfp_mask,
 						unsigned long *total_scanned);
 
-/* idx can be of type enum memcg_event_item or vm_event_item */
 static inline void __count_memcg_events(struct mem_cgroup *memcg,
-					int idx, unsigned long count)
+					enum vm_event_item idx,
+					unsigned long count)
 {
 	unsigned long x;
 
@@ -663,7 +664,8 @@ static inline void __count_memcg_events(struct mem_cgroup *memcg,
 }
 
 static inline void count_memcg_events(struct mem_cgroup *memcg,
-				      int idx, unsigned long count)
+				      enum vm_event_item idx,
+				      unsigned long count)
 {
 	unsigned long flags;
 
@@ -672,9 +674,8 @@ static inline void count_memcg_events(struct mem_cgroup *memcg,
 	local_irq_restore(flags);
 }
 
-/* idx can be of type enum memcg_event_item or vm_event_item */
 static inline void count_memcg_page_event(struct page *page,
-					  int idx)
+					  enum vm_event_item idx)
 {
 	if (page->mem_cgroup)
 		count_memcg_events(page->mem_cgroup, idx, 1);
@@ -698,10 +699,10 @@ static inline void count_memcg_event_mm(struct mm_struct *mm,
 	rcu_read_unlock();
 }
 
-static inline void mem_cgroup_event(struct mem_cgroup *memcg,
-				    enum memcg_event_item event)
+static inline void memcg_memory_event(struct mem_cgroup *memcg,
+				      enum memcg_memory_event event)
 {
-	count_memcg_events(memcg, event, 1);
+	atomic_long_inc(&memcg->memory_events[event]);
 	cgroup_file_notify(&memcg->events_file);
 }
 
@@ -721,8 +722,8 @@ static inline bool mem_cgroup_disabled(void)
 	return true;
 }
 
-static inline void mem_cgroup_event(struct mem_cgroup *memcg,
-				    enum memcg_event_item event)
+static inline void memcg_memory_event(struct mem_cgroup *memcg,
+				      enum memcg_memory_event event)
 {
 }
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4e763cdccb33..31972189a827 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1872,7 +1872,7 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu)
 			}
 		}
 
-		for (i = 0; i < MEMCG_NR_EVENTS; i++) {
+		for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
 			long x;
 
 			x = this_cpu_xchg(memcg->stat_cpu->events[i], 0);
@@ -1891,7 +1891,7 @@ static void reclaim_high(struct mem_cgroup *memcg,
 	do {
 		if (page_counter_read(&memcg->memory) <= memcg->high)
 			continue;
-		mem_cgroup_event(memcg, MEMCG_HIGH);
+		memcg_memory_event(memcg, MEMCG_HIGH);
 		try_to_free_mem_cgroup_pages(memcg, nr_pages, gfp_mask, true);
 	} while ((memcg = parent_mem_cgroup(memcg)));
 }
@@ -1982,7 +1982,7 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
 	if (!gfpflags_allow_blocking(gfp_mask))
 		goto nomem;
 
-	mem_cgroup_event(mem_over_limit, MEMCG_MAX);
+	memcg_memory_event(mem_over_limit, MEMCG_MAX);
 
 	nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages,
 						    gfp_mask, may_swap);
@@ -2025,7 +2025,7 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
 	if (fatal_signal_pending(current))
 		goto force;
 
-	mem_cgroup_event(mem_over_limit, MEMCG_OOM);
+	memcg_memory_event(mem_over_limit, MEMCG_OOM);
 
 	mem_cgroup_oom(mem_over_limit, gfp_mask,
 		       get_order(nr_pages * PAGE_SIZE));
@@ -2790,10 +2790,10 @@ static void tree_events(struct mem_cgroup *memcg, unsigned long *events)
 	struct mem_cgroup *iter;
 	int i;
 
-	memset(events, 0, sizeof(*events) * MEMCG_NR_EVENTS);
+	memset(events, 0, sizeof(*events) * NR_VM_EVENT_ITEMS);
 
 	for_each_mem_cgroup_tree(iter, memcg) {
-		for (i = 0; i < MEMCG_NR_EVENTS; i++)
+		for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
 			events[i] += memcg_sum_events(iter, i);
 	}
 }
@@ -5299,7 +5299,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,
 			continue;
 		}
 
-		mem_cgroup_event(memcg, MEMCG_OOM);
+		memcg_memory_event(memcg, MEMCG_OOM);
 		if (!mem_cgroup_out_of_memory(memcg, GFP_KERNEL, 0))
 			break;
 	}
@@ -5312,10 +5312,14 @@ static int memory_events_show(struct seq_file *m, void *v)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
 
-	seq_printf(m, "low %lu\n", memcg_sum_events(memcg, MEMCG_LOW));
-	seq_printf(m, "high %lu\n", memcg_sum_events(memcg, MEMCG_HIGH));
-	seq_printf(m, "max %lu\n", memcg_sum_events(memcg, MEMCG_MAX));
-	seq_printf(m, "oom %lu\n", memcg_sum_events(memcg, MEMCG_OOM));
+	seq_printf(m, "low %lu\n",
+		   atomic_long_read(&memcg->memory_events[MEMCG_LOW]));
+	seq_printf(m, "high %lu\n",
+		   atomic_long_read(&memcg->memory_events[MEMCG_HIGH]));
+	seq_printf(m, "max %lu\n",
+		   atomic_long_read(&memcg->memory_events[MEMCG_MAX]));
+	seq_printf(m, "oom %lu\n",
+		   atomic_long_read(&memcg->memory_events[MEMCG_OOM]));
 	seq_printf(m, "oom_kill %lu\n", memcg_sum_events(memcg, OOM_KILL));
 
 	return 0;
@@ -5325,7 +5329,7 @@ static int memory_stat_show(struct seq_file *m, void *v)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
 	unsigned long stat[MEMCG_NR_STAT];
-	unsigned long events[MEMCG_NR_EVENTS];
+	unsigned long events[NR_VM_EVENT_ITEMS];
 	int i;
 
 	/*
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5ee6fbdec8a8..b37e6dd50925 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2628,7 +2628,7 @@ static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
 					sc->memcg_low_skipped = 1;
 					continue;
 				}
-				mem_cgroup_event(memcg, MEMCG_LOW);
+				memcg_memory_event(memcg, MEMCG_LOW);
 			}
 
 			reclaimed = sc->nr_reclaimed;
-- 
2.30.2




  parent reply index

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-05  8:53 [PATCH 4.14 00/52] 4.14.229-rc1 review Greg Kroah-Hartman
2021-04-05  8:53 ` [PATCH 4.14 01/52] selinux: vsock: Set SID for socket returned by accept() Greg Kroah-Hartman
2021-04-05  8:53 ` [PATCH 4.14 02/52] ipv6: weaken the v4mapped source check Greg Kroah-Hartman
2021-04-05  8:53 ` [PATCH 4.14 03/52] ext4: fix bh ref count on error paths Greg Kroah-Hartman
2021-04-05  8:53 ` [PATCH 4.14 04/52] rpc: fix NULL dereference on kmalloc failure Greg Kroah-Hartman
2021-04-05  8:53 ` [PATCH 4.14 05/52] ASoC: rt5640: Fix dac- and adc- vol-tlv values being off by a factor of 10 Greg Kroah-Hartman
2021-04-05  8:53 ` [PATCH 4.14 06/52] ASoC: rt5651: " Greg Kroah-Hartman
2021-04-05  8:53 ` [PATCH 4.14 07/52] ASoC: sgtl5000: set DAP_AVC_CTRL register to correct default value on probe Greg Kroah-Hartman
2021-04-05  8:53 ` [PATCH 4.14 08/52] ASoC: es8316: Simplify adc_pga_gain_tlv table Greg Kroah-Hartman
2021-04-05  8:53 ` [PATCH 4.14 09/52] ASoC: cs42l42: Fix mixer volume control Greg Kroah-Hartman
2021-04-05  8:53 ` [PATCH 4.14 10/52] ASoC: cs42l42: Always wait at least 3ms after reset Greg Kroah-Hartman
2021-04-05  8:53 ` [PATCH 4.14 11/52] powerpc: Force inlining of cpu_has_feature() to avoid build failure Greg Kroah-Hartman
2021-04-05  8:53 ` [PATCH 4.14 12/52] vhost: Fix vhost_vq_reset() Greg Kroah-Hartman
2021-04-05  8:53 ` [PATCH 4.14 13/52] scsi: st: Fix a use after free in st_open() Greg Kroah-Hartman
2021-04-05  8:53 ` [PATCH 4.14 14/52] scsi: qla2xxx: Fix broken #endif placement Greg Kroah-Hartman
2021-04-05  8:53 ` [PATCH 4.14 15/52] staging: comedi: cb_pcidas: fix request_irq() warn Greg Kroah-Hartman
2021-04-05  8:53 ` [PATCH 4.14 16/52] staging: comedi: cb_pcidas64: " Greg Kroah-Hartman
2021-04-05  8:53 ` [PATCH 4.14 17/52] ASoC: rt5659: Update MCLK rate in set_sysclk() Greg Kroah-Hartman
2021-04-05  8:53 ` [PATCH 4.14 18/52] ext4: do not iput inode under running transaction in ext4_rename() Greg Kroah-Hartman
2021-04-05  8:53 ` [PATCH 4.14 19/52] brcmfmac: clear EAP/association status bits on linkdown events Greg Kroah-Hartman
2021-04-05  8:53 ` [PATCH 4.14 20/52] net: ethernet: aquantia: Handle error cleanup of start on open Greg Kroah-Hartman
2021-04-05  8:53 ` [PATCH 4.14 21/52] appletalk: Fix skb allocation size in loopback case Greg Kroah-Hartman
2021-04-05  8:53 ` [PATCH 4.14 22/52] net: wan/lmc: unregister device when no matching device is found Greg Kroah-Hartman
2021-04-05  8:53 ` [PATCH 4.14 23/52] bpf: Remove MTU check in __bpf_skb_max_len Greg Kroah-Hartman
2021-04-05  8:53 ` [PATCH 4.14 24/52] ALSA: usb-audio: Apply sample rate quirk to Logitech Connect Greg Kroah-Hartman
2021-04-05  8:53 ` [PATCH 4.14 25/52] ALSA: hda/realtek: fix a determine_headset_type issue for a Dell AIO Greg Kroah-Hartman
2021-04-05  8:53 ` [PATCH 4.14 26/52] ALSA: hda/realtek: call alc_update_headset_mode() in hp_automute_hook Greg Kroah-Hartman
2021-04-05  8:53 ` [PATCH 4.14 27/52] tracing: Fix stack trace event size Greg Kroah-Hartman
2021-04-05  8:53 ` [PATCH 4.14 28/52] mm: fix race by making init_zero_pfn() early_initcall Greg Kroah-Hartman
2021-04-05  8:53 ` [PATCH 4.14 29/52] drm/amdgpu: fix offset calculation in amdgpu_vm_bo_clear_mappings() Greg Kroah-Hartman
2021-04-05  8:53 ` [PATCH 4.14 30/52] drm/amdgpu: check alignment on CPU page for bo map Greg Kroah-Hartman
2021-04-05  8:53 ` [PATCH 4.14 31/52] reiserfs: update reiserfs_xattrs_initialized() condition Greg Kroah-Hartman
2021-04-05  8:53 ` [PATCH 4.14 32/52] mm: memcontrol: fix NR_WRITEBACK leak in memcg and system stats Greg Kroah-Hartman
2021-04-05  8:53 ` Greg Kroah-Hartman [this message]
2021-04-05  8:54 ` [PATCH 4.14 34/52] mem_cgroup: make sure moving_account, move_lock_task and stat_cpu in the same cacheline Greg Kroah-Hartman
2021-04-05  8:54 ` [PATCH 4.14 35/52] mm: fix oom_kill event handling Greg Kroah-Hartman
2021-04-05  8:54 ` [PATCH 4.14 36/52] mm: writeback: use exact memcg dirty counts Greg Kroah-Hartman
2021-04-05  8:54 ` [PATCH 4.14 37/52] pinctrl: rockchip: fix restore error in resume Greg Kroah-Hartman
2021-04-05  8:54 ` [PATCH 4.14 38/52] extcon: Add stubs for extcon_register_notifier_all() functions Greg Kroah-Hartman
2021-04-05  8:54 ` [PATCH 4.14 39/52] extcon: Fix error handling in extcon_dev_register Greg Kroah-Hartman
2021-04-05  8:54 ` [PATCH 4.14 40/52] firewire: nosy: Fix a use-after-free bug in nosy_ioctl() Greg Kroah-Hartman
2021-04-05  8:54 ` [PATCH 4.14 41/52] usbip: vhci_hcd fix shift out-of-bounds in vhci_hub_control() Greg Kroah-Hartman
2021-04-05  8:54 ` [PATCH 4.14 42/52] USB: quirks: ignore remote wake-up on Fibocom L850-GL LTE modem Greg Kroah-Hartman
2021-04-05  8:54 ` [PATCH 4.14 43/52] usb: musb: Fix suspend with devices connected for a64 Greg Kroah-Hartman
2021-04-05  8:54 ` [PATCH 4.14 44/52] usb: xhci-mtk: fix broken streams issue on 0.96 xHCI Greg Kroah-Hartman
2021-04-05  8:54 ` [PATCH 4.14 45/52] cdc-acm: fix BREAK rx code path adding necessary calls Greg Kroah-Hartman
2021-04-05  8:54 ` [PATCH 4.14 46/52] USB: cdc-acm: untangle a circular dependency between callback and softint Greg Kroah-Hartman
2021-04-05  8:54 ` [PATCH 4.14 47/52] USB: cdc-acm: downgrade message to debug Greg Kroah-Hartman
2021-04-05  8:54 ` [PATCH 4.14 48/52] USB: cdc-acm: fix use-after-free after probe failure Greg Kroah-Hartman
2021-04-05  8:54 ` [PATCH 4.14 49/52] usb: gadget: udc: amd5536udc_pci fix null-ptr-dereference Greg Kroah-Hartman
2021-04-05  8:54 ` [PATCH 4.14 50/52] staging: rtl8192e: Fix incorrect source in memcpy() Greg Kroah-Hartman
2021-04-05  8:54 ` [PATCH 4.14 51/52] staging: rtl8192e: Change state information from u16 to u8 Greg Kroah-Hartman
2021-04-05  8:54 ` [PATCH 4.14 52/52] drivers: video: fbcon: fix NULL dereference in fbcon_cursor() Greg Kroah-Hartman
2021-04-05 17:57 ` [PATCH 4.14 00/52] 4.14.229-rc1 review Guenter Roeck
2021-04-06  7:14 ` Naresh Kamboju
2021-04-07  0:58 ` Samuel Zou

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210405085023.076458735@linuxfoundation.org \
    --to=gregkh@linuxfoundation.org \
    --cc=akpm@linux-foundation.org \
    --cc=guro@fb.com \
    --cc=hannes@cmpxchg.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mhocko@suse.com \
    --cc=riel@surriel.com \
    --cc=sashal@kernel.org \
    --cc=sfr@canb.auug.org.au \
    --cc=stable@vger.kernel.org \
    --cc=tj@kernel.org \
    --cc=torvalds@linux-foundation.org \
    --cc=vdavydov.dev@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

LKML Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/lkml/0 lkml/git/0.git
	git clone --mirror https://lore.kernel.org/lkml/1 lkml/git/1.git
	git clone --mirror https://lore.kernel.org/lkml/2 lkml/git/2.git
	git clone --mirror https://lore.kernel.org/lkml/3 lkml/git/3.git
	git clone --mirror https://lore.kernel.org/lkml/4 lkml/git/4.git
	git clone --mirror https://lore.kernel.org/lkml/5 lkml/git/5.git
	git clone --mirror https://lore.kernel.org/lkml/6 lkml/git/6.git
	git clone --mirror https://lore.kernel.org/lkml/7 lkml/git/7.git
	git clone --mirror https://lore.kernel.org/lkml/8 lkml/git/8.git
	git clone --mirror https://lore.kernel.org/lkml/9 lkml/git/9.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 lkml lkml/ https://lore.kernel.org/lkml \
		linux-kernel@vger.kernel.org
	public-inbox-index lkml

Example config snippet for mirrors

Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-kernel


AGPL code for this site: git clone https://public-inbox.org/public-inbox.git