All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC net v7] net: skb_expand_head() adjust skb->truesize incorrectly
@ 2021-09-17 16:24 Jakub Kicinski
  2021-09-17 18:15 ` Vasily Averin
  2021-09-18 10:05 ` Vasily Averin
  0 siblings, 2 replies; 19+ messages in thread
From: Jakub Kicinski @ 2021-09-17 16:24 UTC (permalink / raw)
  To: eric.dumazet
  Cc: netdev, Vasily Averin, Christoph Paasch, Hao Sun, Jakub Kicinski

From: Vasily Averin <vvs@virtuozzo.com>

Christoph Paasch reports [1] about incorrect skb->truesize
after skb_expand_head() call in ip6_xmit.
This may happen because of two reasons:
 - skb_set_owner_w() for newly cloned skb is called too early,
   before pskb_expand_head() where truesize is adjusted for (!skb-sk) case.
 - pskb_expand_head() does not adjust truesize in (skb->sk) case.
   In this case sk->sk_wmem_alloc should be adjusted too.

Eric cautions us against increasing sk_wmem_alloc if the old
skb did not hold any wmem references.

[1] https://lkml.org/lkml/2021/8/20/1082

Fixes: f1260ff15a71 ("skbuff: introduce skb_expand_head()")
Reported-by: Christoph Paasch <christoph.paasch@gmail.com>
Reported-by: Hao Sun <sunhao.th@gmail.com>
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
v7: - shift more magic into helpers
    - follow Eric's advice and don't inherit non-wmem sks for now

Looks like we stalled here, let me try to push this forward.
This builds, is it possible to repro without syzcaller?
Anyone willing to test?
---
 include/net/sock.h |  2 ++
 net/core/skbuff.c  | 50 +++++++++++++++++++++++++++++++++++-----------
 net/core/sock.c    | 10 ++++++++++
 3 files changed, 50 insertions(+), 12 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 66a9a90f9558..102e3e1009d1 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1707,6 +1707,8 @@ void sock_pfree(struct sk_buff *skb);
 #define sock_edemux sock_efree
 #endif
 
+bool is_skb_wmem(const struct sk_buff *skb);
+
 int sock_setsockopt(struct socket *sock, int level, int op,
 		    sockptr_t optval, unsigned int optlen);
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7c2ab27fcbf9..5093321c2b65 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1786,6 +1786,24 @@ struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom)
 }
 EXPORT_SYMBOL(skb_realloc_headroom);
 
+static void skb_owner_inherit(struct sk_buff *nskb, struct sk_buff *oskb)
+{
+	if (is_skb_wmem(oskb))
+		skb_set_owner_w(nskb, oskb->sk);
+
+	/* handle rmem sock etc. as needed .. */
+}
+
+static void skb_increase_truesize(struct sk_buff *skb, unsigned int add)
+{
+	if (is_skb_wmem(skb))
+		refcount_add(add, &skb->sk->sk_wmem_alloc);
+	/* handle rmem sock etc. as needed .. */
+	WARN_ON(skb->destructor == sock_rfree);
+
+	skb->truesize += add;
+}
+
 /**
  *	skb_expand_head - reallocate header of &sk_buff
  *	@skb: buffer to reallocate
@@ -1801,6 +1819,7 @@ EXPORT_SYMBOL(skb_realloc_headroom);
 struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom)
 {
 	int delta = headroom - skb_headroom(skb);
+	int osize = skb_end_offset(skb);
 
 	if (WARN_ONCE(delta <= 0,
 		      "%s is expecting an increase in the headroom", __func__))
@@ -1810,21 +1829,28 @@ struct sk_buff *skb_expand_head(struct sk_buff *skb, unsigned int headroom)
 	if (skb_shared(skb)) {
 		struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
 
-		if (likely(nskb)) {
-			if (skb->sk)
-				skb_set_owner_w(nskb, skb->sk);
-			consume_skb(skb);
-		} else {
-			kfree_skb(skb);
-		}
+		if (unlikely(!nskb))
+			goto err_free;
+
+		skb_owner_inherit(nskb, skb);
+		consume_skb(skb);
 		skb = nskb;
 	}
-	if (skb &&
-	    pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
-		kfree_skb(skb);
-		skb = NULL;
-	}
+
+	if (pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC))
+		goto err_free;
+	delta = skb_end_offset(skb) - osize;
+
+	/* pskb_expand_head() will adjust truesize itself for non-sk cases
+	 * todo: move the adjustment there at some point?
+	 */
+	if (skb->sk && skb->destructor != sock_edemux)
+		skb_increase_truesize(skb, delta);
+
 	return skb;
+err_free:
+	kfree_skb(skb);
+	return NULL;
 }
 EXPORT_SYMBOL(skb_expand_head);
 
diff --git a/net/core/sock.c b/net/core/sock.c
index 62627e868e03..1483b4f755ef 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2227,6 +2227,16 @@ void skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
 }
 EXPORT_SYMBOL(skb_set_owner_w);
 
+/* Should clones of this skb count towards skb->sk->sk_wmem_alloc
+ * and use sock_wfree() as their destructor?
+ */
+bool is_skb_wmem(const struct sk_buff *skb)
+{
+	return skb->destructor == sock_wfree ||
+		skb->destructor == __sock_wfree ||
+		(IS_ENABLED(CONFIG_INET) && skb->destructor == tcp_wfree);
+}
+
 static bool can_skb_orphan_partial(const struct sk_buff *skb)
 {
 #ifdef CONFIG_TLS_DEVICE
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2021-10-22 20:50 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-17 16:24 [RFC net v7] net: skb_expand_head() adjust skb->truesize incorrectly Jakub Kicinski
2021-09-17 18:15 ` Vasily Averin
2021-09-18 10:05 ` Vasily Averin
2021-09-20 18:12   ` Jakub Kicinski
2021-09-20 21:41     ` Vasily Averin
2021-09-21  0:39       ` Jakub Kicinski
2021-09-21  6:36         ` Vasily Averin
2021-09-21 21:25           ` Jakub Kicinski
2021-09-20 21:41     ` [PATCH net v8] " Vasily Averin
2021-09-21  5:21       ` Vasily Averin
2021-10-04 13:00         ` [PATCH net v9] " Vasily Averin
2021-10-04 13:14           ` Vasily Averin
2021-10-04 19:26           ` Eric Dumazet
2021-10-05  5:57             ` Vasily Averin
2021-10-20 16:14               ` Eric Dumazet
2021-10-20 16:18               ` Eric Dumazet
2021-10-22 10:28                 ` [PATCH net v10] " Vasily Averin
2021-10-22 19:32                   ` Eric Dumazet
2021-10-22 20:50                   ` patchwork-bot+netdevbpf

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.