All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 net 0/2] sk->sk_forward_alloc fixes.
@ 2023-02-09  1:33 Kuniyuki Iwashima
  2023-02-09  1:33 ` [PATCH v2 net 1/2] dccp/tcp: Avoid negative sk_forward_alloc by ipv6_pinfo.pktoptions Kuniyuki Iwashima
  2023-02-09  1:33 ` [PATCH v2 net 2/2] net: Remove WARN_ON_ONCE(sk->sk_forward_alloc) from sk_stream_kill_queues() Kuniyuki Iwashima
  0 siblings, 2 replies; 5+ messages in thread
From: Kuniyuki Iwashima @ 2023-02-09  1:33 UTC (permalink / raw)
  To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
  Cc: Kuniyuki Iwashima, Kuniyuki Iwashima, netdev

The first patch fixes a negative sk_forward_alloc by adding
sk_rmem_schedule() before skb_set_owner_r(), and second patch
removes an unnecessary WARN_ON_ONCE().

Changes:
  v2:
    * Add the first patch

  v1: https://lore.kernel.org/netdev/20230207183718.54520-1-kuniyu@amazon.com/


Kuniyuki Iwashima (2):
  dccp/tcp: Avoid negative sk_forward_alloc by ipv6_pinfo.pktoptions.
  net: Remove WARN_ON_ONCE(sk->sk_forward_alloc) from
    sk_stream_kill_queues().

 net/caif/caif_socket.c |  1 +
 net/core/stream.c      |  1 -
 net/dccp/ipv6.c        | 23 +++++++++++++++++++----
 net/ipv6/tcp_ipv6.c    | 22 ++++++++++++++++++----
 4 files changed, 38 insertions(+), 9 deletions(-)

-- 
2.30.2


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH v2 net 1/2] dccp/tcp: Avoid negative sk_forward_alloc by ipv6_pinfo.pktoptions.
  2023-02-09  1:33 [PATCH v2 net 0/2] sk->sk_forward_alloc fixes Kuniyuki Iwashima
@ 2023-02-09  1:33 ` Kuniyuki Iwashima
  2023-02-09 11:20   ` Eric Dumazet
  2023-02-09  1:33 ` [PATCH v2 net 2/2] net: Remove WARN_ON_ONCE(sk->sk_forward_alloc) from sk_stream_kill_queues() Kuniyuki Iwashima
  1 sibling, 1 reply; 5+ messages in thread
From: Kuniyuki Iwashima @ 2023-02-09  1:33 UTC (permalink / raw)
  To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
  Cc: Kuniyuki Iwashima, Kuniyuki Iwashima, netdev, Andrii,
	Arnaldo Carvalho de Melo

Eric Dumazet pointed out [0] that when we call skb_set_owner_r()
for ipv6_pinfo.pktoptions, sk_rmem_schedule() has not been called,
resulting in a negative sk_forward_alloc.

Note that in (dccp|tcp)_v6_do_rcv(), we call sk_rmem_schedule()
just after skb_clone() instead of after ipv6_opt_accepted().  This is
because tcp_send_synack() can make sk_forward_alloc negative before
ipv6_opt_accepted() in the crossed SYN-ACK or self-connect() cases.

[0]: https://lore.kernel.org/netdev/CANn89iK9oc20Jdi_41jb9URdF210r7d1Y-+uypbMSbOfY6jqrg@mail.gmail.com/

Fixes: 323fbd0edf3f ("net: dccp: Add handling of IPV6_PKTOPTIONS to dccp_v6_do_rcv()")
Fixes: 3df80d9320bc ("[DCCP]: Introduce DCCPv6")
Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
Cc: Andrii <tulup@mail.ru>
Cc: Arnaldo Carvalho de Melo <acme@mandriva.com>
---
 net/dccp/ipv6.c     | 23 +++++++++++++++++++----
 net/ipv6/tcp_ipv6.c | 22 ++++++++++++++++++----
 2 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 4260fe466993..2687e7ef5b5d 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -554,8 +554,15 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
 		newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC);
 		consume_skb(ireq->pktopts);
 		ireq->pktopts = NULL;
-		if (newnp->pktoptions)
-			skb_set_owner_r(newnp->pktoptions, newsk);
+		if (newnp->pktoptions) {
+			if (sk_rmem_schedule(newsk, newnp->pktoptions,
+					     newnp->pktoptions->truesize)) {
+				skb_set_owner_r(newnp->pktoptions, newsk);
+			} else {
+				__kfree_skb(newnp->pktoptions);
+				newnp->pktoptions = NULL;
+			}
+		}
 	}
 
 	return newsk;
@@ -614,8 +621,17 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 	   by tcp. Feel free to propose better solution.
 					       --ANK (980728)
 	 */
-	if (np->rxopt.all)
+	if (np->rxopt.all) {
 		opt_skb = skb_clone(skb, GFP_ATOMIC);
+		if (opt_skb) {
+			if (sk_rmem_schedule(sk, opt_skb, opt_skb->truesize)) {
+				skb_set_owner_r(opt_skb, sk);
+			} else {
+				__kfree_skb(opt_skb);
+				opt_skb = NULL;
+			}
+		}
+	}
 
 	if (sk->sk_state == DCCP_OPEN) { /* Fast path */
 		if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len))
@@ -679,7 +695,6 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
 		if (ipv6_opt_accepted(sk, opt_skb,
 				      &DCCP_SKB_CB(opt_skb)->header.h6)) {
-			skb_set_owner_r(opt_skb, sk);
 			memmove(IP6CB(opt_skb),
 				&DCCP_SKB_CB(opt_skb)->header.h6,
 				sizeof(struct inet6_skb_parm));
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 11b736a76bd7..95c1078aba5a 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1392,8 +1392,14 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
 			consume_skb(ireq->pktopts);
 			ireq->pktopts = NULL;
 			if (newnp->pktoptions) {
-				tcp_v6_restore_cb(newnp->pktoptions);
-				skb_set_owner_r(newnp->pktoptions, newsk);
+				if (sk_rmem_schedule(newsk, newnp->pktoptions,
+						     newnp->pktoptions->truesize)) {
+					tcp_v6_restore_cb(newnp->pktoptions);
+					skb_set_owner_r(newnp->pktoptions, newsk);
+				} else {
+					__kfree_skb(newnp->pktoptions);
+					newnp->pktoptions = NULL;
+				}
 			}
 		}
 	} else {
@@ -1465,8 +1471,17 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 	   by tcp. Feel free to propose better solution.
 					       --ANK (980728)
 	 */
-	if (np->rxopt.all)
+	if (np->rxopt.all) {
 		opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
+		if (opt_skb) {
+			if (sk_rmem_schedule(sk, opt_skb, opt_skb->truesize)) {
+				skb_set_owner_r(opt_skb, sk);
+			} else {
+				__kfree_skb(opt_skb);
+				opt_skb = NULL;
+			}
+		}
+	}
 
 	reason = SKB_DROP_REASON_NOT_SPECIFIED;
 	if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
@@ -1552,7 +1567,6 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
 		if (np->repflow)
 			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
 		if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
-			skb_set_owner_r(opt_skb, sk);
 			tcp_v6_restore_cb(opt_skb);
 			opt_skb = xchg(&np->pktoptions, opt_skb);
 		} else {
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH v2 net 2/2] net: Remove WARN_ON_ONCE(sk->sk_forward_alloc) from sk_stream_kill_queues().
  2023-02-09  1:33 [PATCH v2 net 0/2] sk->sk_forward_alloc fixes Kuniyuki Iwashima
  2023-02-09  1:33 ` [PATCH v2 net 1/2] dccp/tcp: Avoid negative sk_forward_alloc by ipv6_pinfo.pktoptions Kuniyuki Iwashima
@ 2023-02-09  1:33 ` Kuniyuki Iwashima
  1 sibling, 0 replies; 5+ messages in thread
From: Kuniyuki Iwashima @ 2023-02-09  1:33 UTC (permalink / raw)
  To: David S. Miller, Eric Dumazet, Jakub Kicinski, Paolo Abeni
  Cc: Kuniyuki Iwashima, Kuniyuki Iwashima, netdev, syzbot,
	Christoph Paasch, Matthieu Baerts

Christoph Paasch reported that commit b5fc29233d28 ("inet6: Remove
inet6_destroy_sock() in sk->sk_prot->destroy().") started triggering
WARN_ON_ONCE(sk->sk_forward_alloc) in sk_stream_kill_queues().  [0 - 2]
Also, we can reproduce it by a program in [3].

In the commit, we delay freeing ipv6_pinfo.pktoptions from sk->destroy()
to sk->sk_destruct(), so sk->sk_forward_alloc is no longer zero in
inet_csk_destroy_sock().

The same check has been in inet_sock_destruct() from at least v2.6,
we can just remove the WARN_ON_ONCE().  However, among the users of
sk_stream_kill_queues(), only CAIF is not calling inet_sock_destruct().
Thus, we add the same WARN_ON_ONCE() to caif_sock_destructor().

[0]: https://lore.kernel.org/netdev/39725AB4-88F1-41B3-B07F-949C5CAEFF4F@icloud.com/
[1]: https://github.com/multipath-tcp/mptcp_net-next/issues/341
[2]:
WARNING: CPU: 0 PID: 3232 at net/core/stream.c:212 sk_stream_kill_queues+0x2f9/0x3e0
Modules linked in:
CPU: 0 PID: 3232 Comm: syz-executor.0 Not tainted 6.2.0-rc5ab24eb4698afbe147b424149c529e2a43ec24eb5 #2
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014
RIP: 0010:sk_stream_kill_queues+0x2f9/0x3e0
Code: 03 0f b6 04 02 84 c0 74 08 3c 03 0f 8e ec 00 00 00 8b ab 08 01 00 00 e9 60 ff ff ff e8 d0 5f b6 fe 0f 0b eb 97 e8 c7 5f b6 fe <0f> 0b eb a0 e8 be 5f b6 fe 0f 0b e9 6a fe ff ff e8 02 07 e3 fe e9
RSP: 0018:ffff88810570fc68 EFLAGS: 00010293
RAX: 0000000000000000 RBX: 0000000000000000 RCX: 0000000000000000
RDX: ffff888101f38f40 RSI: ffffffff8285e529 RDI: 0000000000000005
RBP: 0000000000000ce0 R08: 0000000000000005 R09: 0000000000000000
R10: 0000000000000ce0 R11: 0000000000000001 R12: ffff8881009e9488
R13: ffffffff84af2cc0 R14: 0000000000000000 R15: ffff8881009e9458
FS:  00007f7fdfbd5800(0000) GS:ffff88811b600000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000001b32923000 CR3: 00000001062fc006 CR4: 0000000000170ef0
Call Trace:
 <TASK>
 inet_csk_destroy_sock+0x1a1/0x320
 __tcp_close+0xab6/0xe90
 tcp_close+0x30/0xc0
 inet_release+0xe9/0x1f0
 inet6_release+0x4c/0x70
 __sock_release+0xd2/0x280
 sock_close+0x15/0x20
 __fput+0x252/0xa20
 task_work_run+0x169/0x250
 exit_to_user_mode_prepare+0x113/0x120
 syscall_exit_to_user_mode+0x1d/0x40
 do_syscall_64+0x48/0x90
 entry_SYSCALL_64_after_hwframe+0x72/0xdc
RIP: 0033:0x7f7fdf7ae28d
Code: c1 20 00 00 75 10 b8 03 00 00 00 0f 05 48 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 ee fb ff ff 48 89 04 24 b8 03 00 00 00 0f 05 <48> 8b 3c 24 48 89 c2 e8 37 fc ff ff 48 89 d0 48 83 c4 08 48 3d 01
RSP: 002b:00000000007dfbb0 EFLAGS: 00000293 ORIG_RAX: 0000000000000003
RAX: 0000000000000000 RBX: 0000000000000004 RCX: 00007f7fdf7ae28d
RDX: 0000000000000000 RSI: ffffffffffffffff RDI: 0000000000000003
RBP: 0000000000000000 R08: 000000007f338e0f R09: 0000000000000e0f
R10: 000000007f338e13 R11: 0000000000000293 R12: 00007f7fdefff000
R13: 00007f7fdefffcd8 R14: 00007f7fdefffce0 R15: 00007f7fdefffcd8
 </TASK>

[3]: https://lore.kernel.org/netdev/20230208004245.83497-1-kuniyu@amazon.com/

Fixes: b5fc29233d28 ("inet6: Remove inet6_destroy_sock() in sk->sk_prot->destroy().")
Reported-by: syzbot <syzkaller@googlegroups.com>
Reported-by: Christoph Paasch <christophpaasch@icloud.com>
Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
---
Cc: Matthieu Baerts <matthieu.baerts@tessares.net>
---
 net/caif/caif_socket.c | 1 +
 net/core/stream.c      | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 748be7253248..78c9729a6057 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -1015,6 +1015,7 @@ static void caif_sock_destructor(struct sock *sk)
 		return;
 	}
 	sk_stream_kill_queues(&cf_sk->sk);
+	WARN_ON_ONCE(sk->sk_forward_alloc);
 	caif_free_client(&cf_sk->layer);
 }
 
diff --git a/net/core/stream.c b/net/core/stream.c
index cd06750dd329..434446ab14c5 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -209,7 +209,6 @@ void sk_stream_kill_queues(struct sock *sk)
 	sk_mem_reclaim_final(sk);
 
 	WARN_ON_ONCE(sk->sk_wmem_queued);
-	WARN_ON_ONCE(sk->sk_forward_alloc);
 
 	/* It is _impossible_ for the backlog to contain anything
 	 * when we get here.  All user references to this socket
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH v2 net 1/2] dccp/tcp: Avoid negative sk_forward_alloc by ipv6_pinfo.pktoptions.
  2023-02-09  1:33 ` [PATCH v2 net 1/2] dccp/tcp: Avoid negative sk_forward_alloc by ipv6_pinfo.pktoptions Kuniyuki Iwashima
@ 2023-02-09 11:20   ` Eric Dumazet
  2023-02-09 18:09     ` [PATCH v2 net 1/2] dccp/tcp: Avoid negative sk_forward_alloc by ipv6_pinfo.pktoptions.> On Thu, Feb 9, 2023 at 2:34 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote: Kuniyuki Iwashima
  0 siblings, 1 reply; 5+ messages in thread
From: Eric Dumazet @ 2023-02-09 11:20 UTC (permalink / raw)
  To: Kuniyuki Iwashima
  Cc: David S. Miller, Jakub Kicinski, Paolo Abeni, Kuniyuki Iwashima,
	netdev, Andrii, Arnaldo Carvalho de Melo

On Thu, Feb 9, 2023 at 2:34 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
>
> Eric Dumazet pointed out [0] that when we call skb_set_owner_r()
> for ipv6_pinfo.pktoptions, sk_rmem_schedule() has not been called,
> resulting in a negative sk_forward_alloc.
>
> Note that in (dccp|tcp)_v6_do_rcv(), we call sk_rmem_schedule()
> just after skb_clone() instead of after ipv6_opt_accepted().  This is
> because tcp_send_synack() can make sk_forward_alloc negative before
> ipv6_opt_accepted() in the crossed SYN-ACK or self-connect() cases.
>
> [0]: https://lore.kernel.org/netdev/CANn89iK9oc20Jdi_41jb9URdF210r7d1Y-+uypbMSbOfY6jqrg@mail.gmail.com/
>
> Fixes: 323fbd0edf3f ("net: dccp: Add handling of IPV6_PKTOPTIONS to dccp_v6_do_rcv()")
> Fixes: 3df80d9320bc ("[DCCP]: Introduce DCCPv6")
> Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
> ---

Thanks, but I suggest we add a helper to avoid the duplication...

Something like this (this can also be made out-of-line, because this
is not fast path)

Name is probably not well chosen...

diff --git a/include/net/sock.h b/include/net/sock.h
index dcd72e6285b23006051d651630bdd966741cbb01..f5a97aed14345c403b25339fcb86d99bc51233a7
100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2977,4 +2977,19 @@ static inline bool sk_is_readable(struct sock *sk)
                return sk->sk_prot->sock_is_readable(sk);
        return false;
 }
+
+static inline struct sk_buff *
+sk_clone_and_charge_skb(struct sock *sk, struct sk_buff *skb)
+{
+       skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
+       if (skb) {
+               if (sk_rmem_schedule(sk, skb, skb->truesize)) {
+                       skb_set_owner_r(skb, sk);
+                       return skb;
+               }
+               __kfree_skb(skb);
+       }
+       return NULL;
+}
+
 #endif /* _SOCK_H */

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH v2 net 1/2] dccp/tcp: Avoid negative sk_forward_alloc by ipv6_pinfo.pktoptions.> On Thu, Feb 9, 2023 at 2:34 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote:
  2023-02-09 11:20   ` Eric Dumazet
@ 2023-02-09 18:09     ` Kuniyuki Iwashima
  0 siblings, 0 replies; 5+ messages in thread
From: Kuniyuki Iwashima @ 2023-02-09 18:09 UTC (permalink / raw)
  To: edumazet; +Cc: acme, davem, kuba, kuni1840, kuniyu, netdev, pabeni, tulup

From:   Eric Dumazet <edumazet@google.com>
Date:   Thu, 9 Feb 2023 12:20:04 +0100
> >
> > Eric Dumazet pointed out [0] that when we call skb_set_owner_r()
> > for ipv6_pinfo.pktoptions, sk_rmem_schedule() has not been called,
> > resulting in a negative sk_forward_alloc.
> >
> > Note that in (dccp|tcp)_v6_do_rcv(), we call sk_rmem_schedule()
> > just after skb_clone() instead of after ipv6_opt_accepted().  This is
> > because tcp_send_synack() can make sk_forward_alloc negative before
> > ipv6_opt_accepted() in the crossed SYN-ACK or self-connect() cases.
> >
> > [0]: https://lore.kernel.org/netdev/CANn89iK9oc20Jdi_41jb9URdF210r7d1Y-+uypbMSbOfY6jqrg@mail.gmail.com/
> >
> > Fixes: 323fbd0edf3f ("net: dccp: Add handling of IPV6_PKTOPTIONS to dccp_v6_do_rcv()")
> > Fixes: 3df80d9320bc ("[DCCP]: Introduce DCCPv6")
> > Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
> > Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
> > ---
> 
> Thanks, but I suggest we add a helper to avoid the duplication...

This is much cleaner, thank you!

> 
> Something like this (this can also be made out-of-line, because this
> is not fast path)
> 
> Name is probably not well chosen...

or skb_clone_and_charge(skb, sk), skb_clone_and_charge_r(skb, sk), or
skb_clone_and_set_owner_r(skb, sk) ?


> 
> diff --git a/include/net/sock.h b/include/net/sock.h
> index dcd72e6285b23006051d651630bdd966741cbb01..f5a97aed14345c403b25339fcb86d99bc51233a7
> 100644
> --- a/include/net/sock.h
> +++ b/include/net/sock.h
> @@ -2977,4 +2977,19 @@ static inline bool sk_is_readable(struct sock *sk)
>                 return sk->sk_prot->sock_is_readable(sk);
>         return false;
>  }
> +
> +static inline struct sk_buff *
> +sk_clone_and_charge_skb(struct sock *sk, struct sk_buff *skb)
> +{
> +       skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
> +       if (skb) {
> +               if (sk_rmem_schedule(sk, skb, skb->truesize)) {
> +                       skb_set_owner_r(skb, sk);
> +                       return skb;
> +               }
> +               __kfree_skb(skb);
> +       }
> +       return NULL;
> +}
> +
>  #endif /* _SOCK_H */

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2023-02-09 18:10 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-09  1:33 [PATCH v2 net 0/2] sk->sk_forward_alloc fixes Kuniyuki Iwashima
2023-02-09  1:33 ` [PATCH v2 net 1/2] dccp/tcp: Avoid negative sk_forward_alloc by ipv6_pinfo.pktoptions Kuniyuki Iwashima
2023-02-09 11:20   ` Eric Dumazet
2023-02-09 18:09     ` [PATCH v2 net 1/2] dccp/tcp: Avoid negative sk_forward_alloc by ipv6_pinfo.pktoptions.> On Thu, Feb 9, 2023 at 2:34 AM Kuniyuki Iwashima <kuniyu@amazon.com> wrote: Kuniyuki Iwashima
2023-02-09  1:33 ` [PATCH v2 net 2/2] net: Remove WARN_ON_ONCE(sk->sk_forward_alloc) from sk_stream_kill_queues() Kuniyuki Iwashima

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.