All of lore.kernel.org
 help / color / mirror / Atom feed
From: Eric Dumazet <eric.dumazet@gmail.com>
To: Paolo Abeni <pabeni@redhat.com>
Cc: netdev@vger.kernel.org, "David S. Miller" <davem@davemloft.net>,
	James Morris <jmorris@namei.org>,
	Trond Myklebust <trond.myklebust@primarydata.com>,
	Alexander Duyck <alexander.duyck@gmail.com>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Eric Dumazet <edumazet@google.com>,
	Tom Herbert <tom@herbertland.com>,
	Hannes Frederic Sowa <hannes@stressinduktion.org>,
	linux-nfs@vger.kernel.org
Subject: Re: [PATCH net-next] udp: do fwd memory scheduling on dequeue
Date: Fri, 28 Oct 2016 10:50:30 -0700	[thread overview]
Message-ID: <1477677030.7065.250.camel@edumazet-glaptop3.roam.corp.google.com> (raw)
In-Reply-To: <1477674975.7065.245.camel@edumazet-glaptop3.roam.corp.google.com>

On Fri, 2016-10-28 at 10:16 -0700, Eric Dumazet wrote:
> Nice !
> 
> I was working on this as well and my implementation was somewhat
> different.

This is my WIP

Note this can be split in two parts.

1) One adding struct sock *sk param to ip_cmsg_recv_offset()
 
   This was because I left skb->sk NULL for skbs stored in receive
queue.
   You chose instead to set skb->sk, which is unusual (check
skb_orphan() BUG_ON())

2) Udp changes.

Tell me what you think, thanks again !


diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 601258f6e621..52e70431abc9 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3033,9 +3033,13 @@ int __skb_wait_for_more_packets(struct sock *sk, int *err, long *timeo_p,
 				const struct sk_buff *skb);
 struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned flags,
 					int *peeked, int *off, int *err,
-					struct sk_buff **last);
+					struct sk_buff **last,
+					void (*destructor)(struct sock *sk,
+							   struct sk_buff *skb));
 struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags,
-				    int *peeked, int *off, int *err);
+				    int *peeked, int *off, int *err,
+				    void (*destructor)(struct sock *sk,
+						       struct sk_buff *skb));
 struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock,
 				  int *err);
 unsigned int datagram_poll(struct file *file, struct socket *sock,
diff --git a/include/net/ip.h b/include/net/ip.h
index 79b102ffbe16..f91fc376f3fb 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -572,7 +572,8 @@ int ip_options_rcv_srr(struct sk_buff *skb);
  */
 
 void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb);
-void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, int tlen, int offset);
+void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
+			 struct sk_buff *skb, int tlen, int offset);
 int ip_cmsg_send(struct sock *sk, struct msghdr *msg,
 		 struct ipcm_cookie *ipc, bool allow_ipv6);
 int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
@@ -594,7 +595,7 @@ void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport,
 
 static inline void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
 {
-	ip_cmsg_recv_offset(msg, skb, 0, 0);
+	ip_cmsg_recv_offset(msg, skb->sk, skb, 0, 0);
 }
 
 bool icmp_global_allow(void);
diff --git a/include/net/udp.h b/include/net/udp.h
index 18f1e6b91927..0178f4552c4d 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -248,6 +248,7 @@ static inline __be16 udp_flow_src_port(struct net *net, struct sk_buff *skb,
 /* net/ipv4/udp.c */
 void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len);
 int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb);
+void udp_skb_destructor(struct sock *sk, struct sk_buff *skb);
 
 void udp_v4_early_demux(struct sk_buff *skb);
 int udp_get_port(struct sock *sk, unsigned short snum,
diff --git a/net/core/datagram.c b/net/core/datagram.c
index bfb973aebb5b..48228d15f33c 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -198,7 +198,8 @@ static struct sk_buff *skb_set_peeked(struct sk_buff *skb)
  */
 struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
 					int *peeked, int *off, int *err,
-					struct sk_buff **last)
+					struct sk_buff **last,
+					void (*destructor)(struct sock *sk, struct sk_buff *skb))
 {
 	struct sk_buff_head *queue = &sk->sk_receive_queue;
 	struct sk_buff *skb;
@@ -241,9 +242,11 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
 				}
 
 				atomic_inc(&skb->users);
-			} else
+			} else {
 				__skb_unlink(skb, queue);
-
+				if (destructor)
+					destructor(sk, skb);
+			}
 			spin_unlock_irqrestore(&queue->lock, cpu_flags);
 			*off = _off;
 			return skb;
@@ -262,7 +265,9 @@ struct sk_buff *__skb_try_recv_datagram(struct sock *sk, unsigned int flags,
 EXPORT_SYMBOL(__skb_try_recv_datagram);
 
 struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
-				    int *peeked, int *off, int *err)
+				    int *peeked, int *off, int *err,
+				    void (*destructor)(struct sock *sk,
+						       struct sk_buff *skb))
 {
 	struct sk_buff *skb, *last;
 	long timeo;
@@ -271,7 +276,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned int flags,
 
 	do {
 		skb = __skb_try_recv_datagram(sk, flags, peeked, off, err,
-					      &last);
+					      &last, destructor);
 		if (skb)
 			return skb;
 
@@ -290,7 +295,7 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned int flags,
 	int peeked, off = 0;
 
 	return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
-				   &peeked, &off, err);
+				   &peeked, &off, err, NULL);
 }
 EXPORT_SYMBOL(skb_recv_datagram);
 
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index b8a2d63d1fb8..1de70870d0aa 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -153,11 +153,10 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
 	put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin);
 }
 
-void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb,
-			 int tlen, int offset)
+void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk,
+			 struct sk_buff *skb, int tlen, int offset)
 {
-	struct inet_sock *inet = inet_sk(skb->sk);
-	unsigned int flags = inet->cmsg_flags;
+	unsigned int flags = inet_sk(sk)->cmsg_flags;
 
 	/* Ordered by supposed usage frequency */
 	if (flags & IP_CMSG_PKTINFO) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index bfa9609ba0f4..8ff7ee74a992 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1178,20 +1178,20 @@ static void udp_rmem_release(struct sock *sk, int size, int partial)
 
 	atomic_sub(size, &sk->sk_rmem_alloc);
 
-	spin_lock_bh(&sk->sk_receive_queue.lock);
 	sk->sk_forward_alloc += size;
 	amt = (sk->sk_forward_alloc - partial) & ~(SK_MEM_QUANTUM - 1);
 	sk->sk_forward_alloc -= amt;
-	spin_unlock_bh(&sk->sk_receive_queue.lock);
 
 	if (amt)
 		__sk_mem_reduce_allocated(sk, amt >> SK_MEM_QUANTUM_SHIFT);
 }
 
-static void udp_rmem_free(struct sk_buff *skb)
+/* Note : called with sk_receive_queue.lock held */
+void udp_skb_destructor(struct sock *sk, struct sk_buff *skb)
 {
-	udp_rmem_release(skb->sk, skb->truesize, 1);
+	udp_rmem_release(sk, skb->truesize, 1);
 }
+EXPORT_SYMBOL(udp_skb_destructor);
 
 int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
 {
@@ -1220,17 +1220,14 @@ int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb)
 		if (!__sk_mem_raise_allocated(sk, delta, amt, SK_MEM_RECV)) {
 			err = -ENOBUFS;
 			spin_unlock(&list->lock);
-			goto uncharge_drop;
+			goto drop;
 		}
 
 		sk->sk_forward_alloc += delta;
 	}
-
+	atomic_add(size, &sk->sk_rmem_alloc);
 	sk->sk_forward_alloc -= size;
 
-	/* the skb owner in now the udp socket */
-	skb->sk = sk;
-	skb->destructor = udp_rmem_free;
 	skb->dev = NULL;
 	sock_skb_set_dropcount(sk, skb);
 
@@ -1253,9 +1250,14 @@ EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb);
 
 static void udp_destruct_sock(struct sock *sk)
 {
-	/* reclaim completely the forward allocated memory */
-	__skb_queue_purge(&sk->sk_receive_queue);
-	udp_rmem_release(sk, 0, 0);
+	unsigned int total = 0;
+	struct sk_buff *skb;
+
+	while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+		total += skb->truesize;
+		kfree_skb(skb);
+	}
+	udp_rmem_release(sk, total, 0);
 	inet_sock_destruct(sk);
 }
 
@@ -1287,12 +1289,11 @@ EXPORT_SYMBOL_GPL(skb_consume_udp);
  */
 static int first_packet_length(struct sock *sk)
 {
-	struct sk_buff_head list_kill, *rcvq = &sk->sk_receive_queue;
+	struct sk_buff_head *rcvq = &sk->sk_receive_queue;
 	struct sk_buff *skb;
+	int total = 0;
 	int res;
 
-	__skb_queue_head_init(&list_kill);
-
 	spin_lock_bh(&rcvq->lock);
 	while ((skb = skb_peek(rcvq)) != NULL &&
 		udp_lib_checksum_complete(skb)) {
@@ -1302,12 +1303,14 @@ static int first_packet_length(struct sock *sk)
 				IS_UDPLITE(sk));
 		atomic_inc(&sk->sk_drops);
 		__skb_unlink(skb, rcvq);
-		__skb_queue_tail(&list_kill, skb);
+		total += skb->truesize;
+		kfree_skb(skb);
 	}
 	res = skb ? skb->len : -1;
+	if (total)
+		udp_rmem_release(sk, total, 1);
 	spin_unlock_bh(&rcvq->lock);
 
-	__skb_queue_purge(&list_kill);
 	return res;
 }
 
@@ -1363,7 +1366,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
 try_again:
 	peeking = off = sk_peek_offset(sk, flags);
 	skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
-				  &peeked, &off, &err);
+				  &peeked, &off, &err, udp_skb_destructor);
 	if (!skb)
 		return err;
 
@@ -1420,7 +1423,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
 		*addr_len = sizeof(*sin);
 	}
 	if (inet->cmsg_flags)
-		ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr), off);
+		ip_cmsg_recv_offset(msg, sk, skb, sizeof(struct udphdr), off);
 
 	err = copied;
 	if (flags & MSG_TRUNC)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index a7700bbf6788..7e602b89c64d 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -344,7 +344,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 try_again:
 	peeking = off = sk_peek_offset(sk, flags);
 	skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
-				  &peeked, &off, &err);
+				  &peeked, &off, &err, udp_skb_destructor);
 	if (!skb)
 		return err;
 
@@ -425,7 +425,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 
 	if (is_udp4) {
 		if (inet->cmsg_flags)
-			ip_cmsg_recv_offset(msg, skb,
+			ip_cmsg_recv_offset(msg, sk, skb,
 					    sizeof(struct udphdr), off);
 	} else {
 		if (np->rxopt.all)
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 145082e2ba36..8b995f88d000 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2114,7 +2114,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
 
 		skip = sk_peek_offset(sk, flags);
 		skb = __skb_try_recv_datagram(sk, flags, &peeked, &skip, &err,
-					      &last);
+					      &last, NULL);
 		if (skb)
 			break;
 

  reply	other threads:[~2016-10-28 17:50 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-10-28 13:20 [PATCH net-next] udp: do fwd memory scheduling on dequeue Paolo Abeni
2016-10-28 13:20 ` Paolo Abeni
2016-10-28 17:16 ` Eric Dumazet
2016-10-28 17:50   ` Eric Dumazet [this message]
     [not found]     ` <1477677030.7065.250.camel-XN9IlZ5yJG9HTL0Zs8A6p+yfmBU6pStAUsxypvmhUTTZJqsBc5GL+g@public.gmane.org>
2016-10-29  8:17       ` Paolo Abeni
2016-10-29  8:17         ` Paolo Abeni
     [not found]         ` <1477729045.5306.11.camel-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2016-10-29 12:43           ` Eric Dumazet
2016-10-29 12:43             ` Eric Dumazet
2016-10-31 15:02             ` Paolo Abeni
     [not found]               ` <1477926132.6655.10.camel-H+wXaHxf7aLQT0dZR+AlfA@public.gmane.org>
2016-10-31 15:16                 ` Eric Dumazet
2016-10-31 15:16                   ` Eric Dumazet

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1477677030.7065.250.camel@edumazet-glaptop3.roam.corp.google.com \
    --to=eric.dumazet@gmail.com \
    --cc=alexander.duyck@gmail.com \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=hannes@stressinduktion.org \
    --cc=jmorris@namei.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=tom@herbertland.com \
    --cc=trond.myklebust@primarydata.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.