From: Wei Wang <weiwan@google.com>
To: netdev@vger.kernel.org, "David S . Miller" <davem@davemloft.net>,
Jakub Kicinski <kuba@kernel.org>,
cgroups@vger.kernel.org, linux-mm@kvack.org
Cc: Roman Gushchin <guro@fb.com>, Eric Dumazet <edumazet@google.com>,
Shakeel Butt <shakeelb@google.com>
Subject: [PATCH net-next] net-memcg: pass in gfp_t mask to mem_cgroup_charge_skmem()
Date: Tue, 17 Aug 2021 12:40:03 -0700 [thread overview]
Message-ID: <20210817194003.2102381-1-weiwan@google.com> (raw)
Add gfp_t mask as an input parameter to mem_cgroup_charge_skmem(),
to give more control to the networking stack and enable it to change
memcg charging behavior. In the future, the networking stack may decide
to avoid oom-kills when fallbacks are more appropriate.
One behavior change in mem_cgroup_charge_skmem() by this patch is to
avoid force charging by default and let the caller decide when and if
force charging is needed through the presence or absence of
__GFP_NOFAIL.
Signed-off-by: Wei Wang <weiwan@google.com>
Reviewed-by: Shakeel Butt <shakeelb@google.com>
---
include/linux/memcontrol.h | 3 ++-
include/net/sock.h | 5 +++++
mm/memcontrol.c | 24 +++++++++++-------------
net/core/sock.c | 16 ++++++++++++----
net/ipv4/inet_connection_sock.c | 3 ++-
net/ipv4/tcp_output.c | 3 ++-
6 files changed, 34 insertions(+), 20 deletions(-)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index bfe5c486f4ad..f0ee30881ca9 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -1581,7 +1581,8 @@ static inline void mem_cgroup_flush_foreign(struct bdi_writeback *wb)
#endif /* CONFIG_CGROUP_WRITEBACK */
struct sock;
-bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
+bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages,
+ gfp_t gfp_mask);
void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
#ifdef CONFIG_MEMCG
extern struct static_key_false memcg_sockets_enabled_key;
diff --git a/include/net/sock.h b/include/net/sock.h
index 6e761451c927..95b25777b53e 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2400,6 +2400,11 @@ static inline gfp_t gfp_any(void)
return in_softirq() ? GFP_ATOMIC : GFP_KERNEL;
}
+static inline gfp_t gfp_memcg_charge(void)
+{
+ return in_softirq() ? GFP_NOWAIT : GFP_KERNEL;
+}
+
static inline long sock_rcvtimeo(const struct sock *sk, bool noblock)
{
return noblock ? 0 : sk->sk_rcvtimeo;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 8ef06f9e0db1..be585ceaba98 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -7048,14 +7048,14 @@ void mem_cgroup_sk_free(struct sock *sk)
* mem_cgroup_charge_skmem - charge socket memory
* @memcg: memcg to charge
* @nr_pages: number of pages to charge
+ * @gfp_mask: reclaim mode
*
* Charges @nr_pages to @memcg. Returns %true if the charge fit within
- * @memcg's configured limit, %false if the charge had to be forced.
+ * @memcg's configured limit, %false if it doesn't.
*/
-bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
+bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages,
+ gfp_t gfp_mask)
{
- gfp_t gfp_mask = GFP_KERNEL;
-
if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) {
struct page_counter *fail;
@@ -7063,21 +7063,19 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
memcg->tcpmem_pressure = 0;
return true;
}
- page_counter_charge(&memcg->tcpmem, nr_pages);
memcg->tcpmem_pressure = 1;
+ if (gfp_mask & __GFP_NOFAIL) {
+ page_counter_charge(&memcg->tcpmem, nr_pages);
+ return true;
+ }
return false;
}
- /* Don't block in the packet receive path */
- if (in_softirq())
- gfp_mask = GFP_NOWAIT;
-
- mod_memcg_state(memcg, MEMCG_SOCK, nr_pages);
-
- if (try_charge(memcg, gfp_mask, nr_pages) == 0)
+ if (try_charge(memcg, gfp_mask, nr_pages) == 0) {
+ mod_memcg_state(memcg, MEMCG_SOCK, nr_pages);
return true;
+ }
- try_charge(memcg, gfp_mask|__GFP_NOFAIL, nr_pages);
return false;
}
diff --git a/net/core/sock.c b/net/core/sock.c
index aada649e07e8..950f1e70dbf5 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2728,10 +2728,12 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
{
struct proto *prot = sk->sk_prot;
long allocated = sk_memory_allocated_add(sk, amt);
+ bool memcg_charge = mem_cgroup_sockets_enabled && sk->sk_memcg;
bool charged = true;
- if (mem_cgroup_sockets_enabled && sk->sk_memcg &&
- !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt)))
+ if (memcg_charge &&
+ !(charged = mem_cgroup_charge_skmem(sk->sk_memcg, amt,
+ gfp_memcg_charge())))
goto suppress_allocation;
/* Under limit. */
@@ -2785,8 +2787,14 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
/* Fail only if socket is _under_ its sndbuf.
* In this case we cannot block, so that we have to fail.
*/
- if (sk->sk_wmem_queued + size >= sk->sk_sndbuf)
+ if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) {
+ /* Force charge with __GFP_NOFAIL */
+ if (memcg_charge && !charged) {
+ mem_cgroup_charge_skmem(sk->sk_memcg, amt,
+ gfp_memcg_charge() | __GFP_NOFAIL);
+ }
return 1;
+ }
}
if (kind == SK_MEM_SEND || (kind == SK_MEM_RECV && charged))
@@ -2794,7 +2802,7 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind)
sk_memory_allocated_sub(sk, amt);
- if (mem_cgroup_sockets_enabled && sk->sk_memcg)
+ if (memcg_charge && charged)
mem_cgroup_uncharge_skmem(sk->sk_memcg, amt);
return 0;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 754013fa393b..f25d02ad4a8a 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -534,7 +534,8 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
atomic_read(&newsk->sk_rmem_alloc));
mem_cgroup_sk_alloc(newsk);
if (newsk->sk_memcg && amt)
- mem_cgroup_charge_skmem(newsk->sk_memcg, amt);
+ mem_cgroup_charge_skmem(newsk->sk_memcg, amt,
+ GFP_KERNEL | __GFP_NOFAIL);
release_sock(newsk);
}
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 29553fce8502..6d72f3ea48c4 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3373,7 +3373,8 @@ void sk_forced_mem_schedule(struct sock *sk, int size)
sk_memory_allocated_add(sk, amt);
if (mem_cgroup_sockets_enabled && sk->sk_memcg)
- mem_cgroup_charge_skmem(sk->sk_memcg, amt);
+ mem_cgroup_charge_skmem(sk->sk_memcg, amt,
+ gfp_memcg_charge() | __GFP_NOFAIL);
}
/* Send a FIN. The caller locks the socket for us.
--
2.33.0.rc1.237.g0d66db33f3-goog
next reply other threads:[~2021-08-17 19:40 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-08-17 19:40 Wei Wang [this message]
2021-08-18 10:50 ` [PATCH net-next] net-memcg: pass in gfp_t mask to mem_cgroup_charge_skmem() patchwork-bot+netdevbpf
2022-10-12 23:33 ` Jakub Kicinski
2022-10-13 0:17 ` Shakeel Butt
2022-10-13 0:38 ` Jakub Kicinski
2022-10-13 0:54 ` Shakeel Butt
2022-10-13 1:40 ` Jakub Kicinski
2022-10-13 3:16 ` Jakub Kicinski
2022-10-13 3:34 ` Wei Wang
2022-10-13 3:49 ` Jakub Kicinski
2022-10-13 4:04 ` Wei Wang
2022-10-13 4:18 ` Shakeel Butt
2022-10-13 21:49 ` Jakub Kicinski
2022-10-13 22:02 ` Eric Dumazet
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210817194003.2102381-1-weiwan@google.com \
--to=weiwan@google.com \
--cc=cgroups@vger.kernel.org \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=guro@fb.com \
--cc=kuba@kernel.org \
--cc=linux-mm@kvack.org \
--cc=netdev@vger.kernel.org \
--cc=shakeelb@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).