All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net] Revert "defer call to mem_cgroup_sk_alloc()"
@ 2018-02-02 16:57 Roman Gushchin
  2018-02-02 17:59 ` Eric Dumazet
  0 siblings, 1 reply; 7+ messages in thread
From: Roman Gushchin @ 2018-02-02 16:57 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: netdev, linux-kernel, kernel-team, Roman Gushchin,
	David S . Miller, Johannes Weiner, Tejun Heo

This patch effectively reverts commit 9f1c2674b328 ("net: memcontrol:
defer call to mem_cgroup_sk_alloc()").

Moving mem_cgroup_sk_alloc() to the inet_csk_accept() completely breaks
memcg socket memory accounting, as packets received before memcg
pointer initialization are not accounted and are causing refcounting
underflow on socket release.

Actually the free-after-use problem was fixed by
commit c0576e397508 ("net: call cgroup_sk_alloc() earlier in
sk_clone_lock()") for the cgroup pointer.

So, let's revert it and call mem_cgroup_sk_alloc() just before
cgroup_sk_alloc(). This is safe, as we hold a reference to the socket
we're cloning, and it holds a reference to the memcg.

Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Tejun Heo <tj@kernel.org>
---
 mm/memcontrol.c                 | 14 ++++++++++++++
 net/core/sock.c                 |  5 +----
 net/ipv4/inet_connection_sock.c |  1 -
 3 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 0ae2dc3a1748..0937f2c52c7d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5747,6 +5747,20 @@ void mem_cgroup_sk_alloc(struct sock *sk)
 	if (!mem_cgroup_sockets_enabled)
 		return;
 
+	/*
+	 * Socket cloning can throw us here with sk_memcg already
+	 * filled. It won't however, necessarily happen from
+	 * process context. So the test for root memcg given
+	 * the current task's memcg won't help us in this case.
+	 *
+	 * Respecting the original socket's memcg is a better
+	 * decision in this case.
+	 */
+	if (sk->sk_memcg) {
+		css_get(&sk->sk_memcg->css);
+		return;
+	}
+
 	rcu_read_lock();
 	memcg = mem_cgroup_from_task(current);
 	if (memcg == root_mem_cgroup)
diff --git a/net/core/sock.c b/net/core/sock.c
index 1033f8ab0547..e50e7b3f2223 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1683,16 +1683,13 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 		newsk->sk_dst_pending_confirm = 0;
 		newsk->sk_wmem_queued	= 0;
 		newsk->sk_forward_alloc = 0;
-
-		/* sk->sk_memcg will be populated at accept() time */
-		newsk->sk_memcg = NULL;
-
 		atomic_set(&newsk->sk_drops, 0);
 		newsk->sk_send_head	= NULL;
 		newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
 		atomic_set(&newsk->sk_zckey, 0);
 
 		sock_reset_flag(newsk, SOCK_DONE);
+		mem_cgroup_sk_alloc(newsk);
 		cgroup_sk_alloc(&newsk->sk_cgrp_data);
 
 		rcu_read_lock();
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 12410ec6f7f7..881ac6d046f2 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -475,7 +475,6 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
 		}
 		spin_unlock_bh(&queue->fastopenq.lock);
 	}
-	mem_cgroup_sk_alloc(newsk);
 out:
 	release_sock(sk);
 	if (req)
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH net] Revert "defer call to mem_cgroup_sk_alloc()"
  2018-02-02 16:57 [PATCH net] Revert "defer call to mem_cgroup_sk_alloc()" Roman Gushchin
@ 2018-02-02 17:59 ` Eric Dumazet
  2018-02-02 18:06   ` Roman Gushchin
  0 siblings, 1 reply; 7+ messages in thread
From: Eric Dumazet @ 2018-02-02 17:59 UTC (permalink / raw)
  To: Roman Gushchin, Eric Dumazet
  Cc: netdev, linux-kernel, kernel-team, David S . Miller,
	Johannes Weiner, Tejun Heo

On Fri, 2018-02-02 at 16:57 +0000, Roman Gushchin wrote:
> This patch effectively reverts commit 9f1c2674b328 ("net: memcontrol:
> defer call to mem_cgroup_sk_alloc()").
> 
> Moving mem_cgroup_sk_alloc() to the inet_csk_accept() completely breaks
> memcg socket memory accounting, as packets received before memcg
> pointer initialization are not accounted and are causing refcounting
> underflow on socket release.
> 
> Actually the free-after-use problem was fixed by
> commit c0576e397508 ("net: call cgroup_sk_alloc() earlier in
> sk_clone_lock()") for the cgroup pointer.
> 
> So, let's revert it and call mem_cgroup_sk_alloc() just before
> cgroup_sk_alloc(). This is safe, as we hold a reference to the socket
> we're cloning, and it holds a reference to the memcg.
> 
> Signed-off-by: Roman Gushchin <guro@fb.com>
> Cc: Eric Dumazet <edumazet@google.com>
> Cc: David S. Miller <davem@davemloft.net>
> Cc: Johannes Weiner <hannes@cmpxchg.org>
> Cc: Tejun Heo <tj@kernel.org>
> ---
>  mm/memcontrol.c                 | 14 ++++++++++++++
>  net/core/sock.c                 |  5 +----
>  net/ipv4/inet_connection_sock.c |  1 -
>  3 files changed, 15 insertions(+), 5 deletions(-)
> 
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 0ae2dc3a1748..0937f2c52c7d 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -5747,6 +5747,20 @@ void mem_cgroup_sk_alloc(struct sock *sk)
>  	if (!mem_cgroup_sockets_enabled)
>  		return;
>  
> +	/*
> +	 * Socket cloning can throw us here with sk_memcg already
> +	 * filled. It won't however, necessarily happen from
> +	 * process context. So the test for root memcg given
> +	 * the current task's memcg won't help us in this case.
> +	 *
> +	 * Respecting the original socket's memcg is a better
> +	 * decision in this case.
> +	 */
> +	if (sk->sk_memcg) {

Original commit had a BUG_ON(mem_cgroup_is_root(sk->sk_memcg));

I presume it is no longer useful ?

Thanks

> +		css_get(&sk->sk_memcg->css);
> +		return;
> +	}
> +
>  	rcu_read_lock();

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net] Revert "defer call to mem_cgroup_sk_alloc()"
  2018-02-02 17:59 ` Eric Dumazet
@ 2018-02-02 18:06   ` Roman Gushchin
  2018-02-02 18:39     ` Eric Dumazet
  0 siblings, 1 reply; 7+ messages in thread
From: Roman Gushchin @ 2018-02-02 18:06 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Eric Dumazet, netdev, linux-kernel, kernel-team,
	David S . Miller, Johannes Weiner, Tejun Heo

On Fri, Feb 02, 2018 at 09:59:27AM -0800, Eric Dumazet wrote:
> On Fri, 2018-02-02 at 16:57 +0000, Roman Gushchin wrote:
> > This patch effectively reverts commit 9f1c2674b328 ("net: memcontrol:
> > defer call to mem_cgroup_sk_alloc()").
> > 
> > Moving mem_cgroup_sk_alloc() to the inet_csk_accept() completely breaks
> > memcg socket memory accounting, as packets received before memcg
> > pointer initialization are not accounted and are causing refcounting
> > underflow on socket release.
> > 
> > Actually the free-after-use problem was fixed by
> > commit c0576e397508 ("net: call cgroup_sk_alloc() earlier in
> > sk_clone_lock()") for the cgroup pointer.
> > 
> > So, let's revert it and call mem_cgroup_sk_alloc() just before
> > cgroup_sk_alloc(). This is safe, as we hold a reference to the socket
> > we're cloning, and it holds a reference to the memcg.
> > 
> > Signed-off-by: Roman Gushchin <guro@fb.com>
> > Cc: Eric Dumazet <edumazet@google.com>
> > Cc: David S. Miller <davem@davemloft.net>
> > Cc: Johannes Weiner <hannes@cmpxchg.org>
> > Cc: Tejun Heo <tj@kernel.org>
> > ---
> >  mm/memcontrol.c                 | 14 ++++++++++++++
> >  net/core/sock.c                 |  5 +----
> >  net/ipv4/inet_connection_sock.c |  1 -
> >  3 files changed, 15 insertions(+), 5 deletions(-)
> > 
> > diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> > index 0ae2dc3a1748..0937f2c52c7d 100644
> > --- a/mm/memcontrol.c
> > +++ b/mm/memcontrol.c
> > @@ -5747,6 +5747,20 @@ void mem_cgroup_sk_alloc(struct sock *sk)
> >  	if (!mem_cgroup_sockets_enabled)
> >  		return;
> >  
> > +	/*
> > +	 * Socket cloning can throw us here with sk_memcg already
> > +	 * filled. It won't however, necessarily happen from
> > +	 * process context. So the test for root memcg given
> > +	 * the current task's memcg won't help us in this case.
> > +	 *
> > +	 * Respecting the original socket's memcg is a better
> > +	 * decision in this case.
> > +	 */
> > +	if (sk->sk_memcg) {
> 
> Original commit had a BUG_ON(mem_cgroup_is_root(sk->sk_memcg));
> 
> I presume it is no longer useful ?

Idk, how even we can hit it? And if so, what scary will happen?

If you prefer to have it there, I definitely can return it,
but I see no profit so far.

Thanks!

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net] Revert "defer call to mem_cgroup_sk_alloc()"
  2018-02-02 18:06   ` Roman Gushchin
@ 2018-02-02 18:39     ` Eric Dumazet
  2018-02-02 19:04       ` Roman Gushchin
  0 siblings, 1 reply; 7+ messages in thread
From: Eric Dumazet @ 2018-02-02 18:39 UTC (permalink / raw)
  To: Roman Gushchin
  Cc: Eric Dumazet, netdev, linux-kernel, kernel-team,
	David S . Miller, Johannes Weiner, Tejun Heo

On Fri, 2018-02-02 at 18:06 +0000, Roman Gushchin wrote:
> 
> Idk, how even we can hit it? And if so, what scary will happen?
> 
> If you prefer to have it there, I definitely can return it,
> but I see no profit so far.

I was simply curious this was not mentioned in the changelog.

A revert is normally a true revert, modulo the changes needed by
conflicts and possible changes.

I personally do not care of this BUG_ON(), I had not put it in the
first place.

Thanks.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net] Revert "defer call to mem_cgroup_sk_alloc()"
  2018-02-02 18:39     ` Eric Dumazet
@ 2018-02-02 19:04       ` Roman Gushchin
  2018-02-02 19:34         ` Eric Dumazet
  0 siblings, 1 reply; 7+ messages in thread
From: Roman Gushchin @ 2018-02-02 19:04 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Eric Dumazet, netdev, linux-kernel, kernel-team,
	David S . Miller, Johannes Weiner, Tejun Heo

On Fri, Feb 02, 2018 at 10:39:04AM -0800, Eric Dumazet wrote:
> On Fri, 2018-02-02 at 18:06 +0000, Roman Gushchin wrote:
> > 
> > Idk, how even we can hit it? And if so, what scary will happen?
> > 
> > If you prefer to have it there, I definitely can return it,
> > but I see no profit so far.
> 
> I was simply curious this was not mentioned in the changelog.
> 
> A revert is normally a true revert, modulo the changes needed by
> conflicts and possible changes.
> 
> I personally do not care of this BUG_ON(), I had not put it in the
> first place.

Technically it's not a true revert, but you're totally right.
Let me add a note to the commit description.

Are you ok with the rest?

Thanks!

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH net] Revert "defer call to mem_cgroup_sk_alloc()"
  2018-02-02 19:04       ` Roman Gushchin
@ 2018-02-02 19:34         ` Eric Dumazet
  2018-02-02 19:54           ` Roman Gushchin
  0 siblings, 1 reply; 7+ messages in thread
From: Eric Dumazet @ 2018-02-02 19:34 UTC (permalink / raw)
  To: Roman Gushchin
  Cc: Eric Dumazet, netdev, linux-kernel, kernel-team,
	David S . Miller, Johannes Weiner, Tejun Heo

On Fri, 2018-02-02 at 19:04 +0000, Roman Gushchin wrote:
> On Fri, Feb 02, 2018 at 10:39:04AM -0800, Eric Dumazet wrote:
> > On Fri, 2018-02-02 at 18:06 +0000, Roman Gushchin wrote:
> > > 
> > > Idk, how even we can hit it? And if so, what scary will happen?
> > > 
> > > If you prefer to have it there, I definitely can return it,
> > > but I see no profit so far.
> > 
> > I was simply curious this was not mentioned in the changelog.
> > 
> > A revert is normally a true revert, modulo the changes needed by
> > conflicts and possible changes.
> > 
> > I personally do not care of this BUG_ON(), I had not put it in the
> > first place.
> 
> Technically it's not a true revert, but you're totally right.
> Let me add a note to the commit description.
> 
> Are you ok with the rest?

Sure !

Thanks.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH net] Revert "defer call to mem_cgroup_sk_alloc()"
  2018-02-02 19:34         ` Eric Dumazet
@ 2018-02-02 19:54           ` Roman Gushchin
  0 siblings, 0 replies; 7+ messages in thread
From: Roman Gushchin @ 2018-02-02 19:54 UTC (permalink / raw)
  To: David S. Miller
  Cc: Eric Dumazet, netdev, linux-kernel, kernel-team,
	David S . Miller, Johannes Weiner, Tejun Heo

On Fri, Feb 02, 2018 at 11:34:56AM -0800, Eric Dumazet wrote:
> On Fri, 2018-02-02 at 19:04 +0000, Roman Gushchin wrote:
> > On Fri, Feb 02, 2018 at 10:39:04AM -0800, Eric Dumazet wrote:
> > > On Fri, 2018-02-02 at 18:06 +0000, Roman Gushchin wrote:
> > > > 
> > > > Idk, how even we can hit it? And if so, what scary will happen?
> > > > 
> > > > If you prefer to have it there, I definitely can return it,
> > > > but I see no profit so far.
> > > 
> > > I was simply curious this was not mentioned in the changelog.
> > > 
> > > A revert is normally a true revert, modulo the changes needed by
> > > conflicts and possible changes.
> > > 
> > > I personally do not care of this BUG_ON(), I had not put it in the
> > > first place.
> > 
> > Technically it's not a true revert, but you're totally right.
> > Let me add a note to the commit description.
> > 
> > Are you ok with the rest?
> 
> Sure !
> 
> Thanks.

Hello, David!

Can you, please, pull the patch below?
It should be applied for 4.14+.

Thank you!

Roman

--

>From a0a07f65a38105562bf424d7dc072a2bc4f1569e Mon Sep 17 00:00:00 2001
From: Roman Gushchin <guro@fb.com>
Date: Fri, 2 Feb 2018 15:26:57 +0000
Subject: [PATCH net] Revert "defer call to mem_cgroup_sk_alloc()"

This patch effectively reverts commit 9f1c2674b328 ("net: memcontrol:
defer call to mem_cgroup_sk_alloc()").

Moving mem_cgroup_sk_alloc() to the inet_csk_accept() completely breaks
memcg socket memory accounting, as packets received before memcg
pointer initialization are not accounted and are causing refcounting
underflow on socket release.

Actually the free-after-use problem was fixed by
commit c0576e397508 ("net: call cgroup_sk_alloc() earlier in
sk_clone_lock()") for the cgroup pointer.

So, let's revert it and call mem_cgroup_sk_alloc() just before
cgroup_sk_alloc(). This is safe, as we hold a reference to the socket
we're cloning, and it holds a reference to the memcg.

Also, let's drop BUG_ON(mem_cgroup_is_root()) check from
mem_cgroup_sk_alloc(). I see no reasons why bumping the root
memcg counter is a good reason to panic, and there are no realistic
ways to hit it.

Signed-off-by: Roman Gushchin <guro@fb.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Tejun Heo <tj@kernel.org>
---
 mm/memcontrol.c                 | 14 ++++++++++++++
 net/core/sock.c                 |  5 +----
 net/ipv4/inet_connection_sock.c |  1 -
 3 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 0ae2dc3a1748..0937f2c52c7d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5747,6 +5747,20 @@ void mem_cgroup_sk_alloc(struct sock *sk)
 	if (!mem_cgroup_sockets_enabled)
 		return;
 
+	/*
+	 * Socket cloning can throw us here with sk_memcg already
+	 * filled. It won't however, necessarily happen from
+	 * process context. So the test for root memcg given
+	 * the current task's memcg won't help us in this case.
+	 *
+	 * Respecting the original socket's memcg is a better
+	 * decision in this case.
+	 */
+	if (sk->sk_memcg) {
+		css_get(&sk->sk_memcg->css);
+		return;
+	}
+
 	rcu_read_lock();
 	memcg = mem_cgroup_from_task(current);
 	if (memcg == root_mem_cgroup)
diff --git a/net/core/sock.c b/net/core/sock.c
index 1033f8ab0547..e50e7b3f2223 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1683,16 +1683,13 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 		newsk->sk_dst_pending_confirm = 0;
 		newsk->sk_wmem_queued	= 0;
 		newsk->sk_forward_alloc = 0;
-
-		/* sk->sk_memcg will be populated at accept() time */
-		newsk->sk_memcg = NULL;
-
 		atomic_set(&newsk->sk_drops, 0);
 		newsk->sk_send_head	= NULL;
 		newsk->sk_userlocks	= sk->sk_userlocks & ~SOCK_BINDPORT_LOCK;
 		atomic_set(&newsk->sk_zckey, 0);
 
 		sock_reset_flag(newsk, SOCK_DONE);
+		mem_cgroup_sk_alloc(newsk);
 		cgroup_sk_alloc(&newsk->sk_cgrp_data);
 
 		rcu_read_lock();
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 12410ec6f7f7..881ac6d046f2 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -475,7 +475,6 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern)
 		}
 		spin_unlock_bh(&queue->fastopenq.lock);
 	}
-	mem_cgroup_sk_alloc(newsk);
 out:
 	release_sock(sk);
 	if (req)
-- 
2.14.3

^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2018-02-02 19:54 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-02-02 16:57 [PATCH net] Revert "defer call to mem_cgroup_sk_alloc()" Roman Gushchin
2018-02-02 17:59 ` Eric Dumazet
2018-02-02 18:06   ` Roman Gushchin
2018-02-02 18:39     ` Eric Dumazet
2018-02-02 19:04       ` Roman Gushchin
2018-02-02 19:34         ` Eric Dumazet
2018-02-02 19:54           ` Roman Gushchin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.