linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2 1/2] udp: UDP socket send queue repair
@ 2021-08-11 15:45 Bui Quang Minh
  2021-08-11 16:14 ` Eric Dumazet
  2021-08-16 14:38 ` Willem de Bruijn
  0 siblings, 2 replies; 9+ messages in thread
From: Bui Quang Minh @ 2021-08-11 15:45 UTC (permalink / raw)
  To: linux-kernel, netdev
  Cc: davem, kuba, yoshfuji, dsahern, willemb, pabeni, avagin,
	alexander, minhquangbui99, lesedorucalin01

In this patch, I implement UDP_REPAIR sockoption and a new path in
udp_recvmsg for dumping the corked packet in UDP socket's send queue.

A userspace program can use recvmsg syscall to get the packet's data and
the msg_name information of the packet. Currently, other related
information in inet_cork that are set in cmsg are not dumped.

While working on this, I was aware of Lese Doru Calin's patch and got some
ideas from it.

Link: https://lore.kernel.org/netdev/20200502082856.GA3152@white/
Signed-off-by: Bui Quang Minh <minhquangbui99@gmail.com>
---
 include/linux/udp.h      |  3 +-
 include/net/udp.h        |  2 +
 include/uapi/linux/udp.h |  1 +
 net/ipv4/udp.c           | 94 +++++++++++++++++++++++++++++++++++++++-
 net/ipv6/udp.c           | 56 +++++++++++++++++++++++-
 5 files changed, 151 insertions(+), 5 deletions(-)

diff --git a/include/linux/udp.h b/include/linux/udp.h
index ae66dadd8543..63df0753966e 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -70,7 +70,8 @@ struct udp_sock {
 #define UDPLITE_SEND_CC  0x2  		/* set via udplite setsockopt         */
 #define UDPLITE_RECV_CC  0x4		/* set via udplite setsocktopt        */
 	__u8		 pcflag;        /* marks socket as UDP-Lite if > 0    */
-	__u8		 unused[3];
+	__u8		 repair;
+	__u8		 unused[2];
 	/*
 	 * For encapsulation sockets.
 	 */
diff --git a/include/net/udp.h b/include/net/udp.h
index 360df454356c..4550e72b9f2a 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -331,6 +331,8 @@ struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
 				 __be16 sport, __be16 dport);
 int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
 		  sk_read_actor_t recv_actor);
+int udp_peek_sndq(struct sock *sk, struct msghdr *msg,
+		  size_t len);
 
 /* UDP uses skb->dev_scratch to cache as much information as possible and avoid
  * possibly multiple cache miss on dequeue()
diff --git a/include/uapi/linux/udp.h b/include/uapi/linux/udp.h
index 4828794efcf8..255d056403da 100644
--- a/include/uapi/linux/udp.h
+++ b/include/uapi/linux/udp.h
@@ -29,6 +29,7 @@ struct udphdr {
 
 /* UDP socket options */
 #define UDP_CORK	1	/* Never send partially complete segments */
+#define UDP_REPAIR	2	/* UDP sock is under repair right now */
 #define UDP_ENCAP	100	/* Set the socket to accept encapsulated packets */
 #define UDP_NO_CHECK6_TX 101	/* Disable sending checksum for UDP6X */
 #define UDP_NO_CHECK6_RX 102	/* Disable accpeting checksum for UDP6 */
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 1a742b710e54..c91148956338 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1826,6 +1826,65 @@ int udp_read_sock(struct sock *sk, read_descriptor_t *desc,
 }
 EXPORT_SYMBOL(udp_read_sock);
 
+static int udp_copy_addr(struct sock *sk, struct msghdr *msg, int *addr_len)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct flowi4 *fl4;
+	DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
+
+	if (udp_sk(sk)->pending != AF_INET)
+		return -EAGAIN;
+
+	if (sin) {
+		fl4 = &inet->cork.fl.u.ip4;
+		sin->sin_family = AF_INET;
+		sin->sin_port = fl4->fl4_dport;
+		sin->sin_addr.s_addr = fl4->daddr;
+		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+		*addr_len = sizeof(*sin);
+	}
+
+	return 0;
+}
+
+int udp_peek_sndq(struct sock *sk, struct msghdr *msg, size_t len)
+{
+	struct sk_buff *skb;
+	int copied = 0, err = 0, peek_off, off, header_off, copy_len;
+
+	peek_off = READ_ONCE(sk->sk_peek_off);
+	if (peek_off < 0)
+		off = 0;
+	else
+		off = peek_off;
+
+	skb_queue_walk(&sk->sk_write_queue, skb) {
+		header_off = skb_transport_offset(skb) + sizeof(struct udphdr);
+		if (off > skb->len - header_off) {
+			off -= skb->len - header_off;
+			continue;
+		}
+
+		if (len > skb->len - off - header_off)
+			copy_len = skb->len - off - header_off;
+		else
+			copy_len = len;
+
+		err = skb_copy_datagram_msg(skb, off + header_off, msg, copy_len);
+		if (err)
+			return err;
+
+		copied += copy_len;
+		len -= copy_len;
+		off = 0;
+	}
+
+	if (peek_off >= 0)
+		sk_peek_offset_bwd(sk, -copied);
+	return copied;
+}
+EXPORT_SYMBOL(udp_peek_sndq);
+
 /*
  * 	This should be easy, if there is something there we
  * 	return it, otherwise we block.
@@ -1841,10 +1900,27 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
 	int off, err, peeking = flags & MSG_PEEK;
 	int is_udplite = IS_UDPLITE(sk);
 	bool checksum_valid = false;
+	struct udp_sock *up = udp_sk(sk);
 
 	if (flags & MSG_ERRQUEUE)
 		return ip_recv_error(sk, msg, len, addr_len);
 
+	if (unlikely(up->repair)) {
+		if (!peeking)
+			return -EPERM;
+
+		lock_sock(sk);
+		err = udp_copy_addr(sk, msg, addr_len);
+		if (err) {
+			release_sock(sk);
+			return err;
+		}
+
+		err = udp_peek_sndq(sk, msg, len);
+		release_sock(sk);
+		return err;
+	}
+
 try_again:
 	off = sk_peek_offset(sk, flags);
 	skb = __skb_recv_udp(sk, flags, noblock, &off, &err);
@@ -1912,7 +1988,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
 						      (struct sockaddr *)sin);
 	}
 
-	if (udp_sk(sk)->gro_enabled)
+	if (up->gro_enabled)
 		udp_cmsg_recv(msg, sk, skb);
 
 	if (inet->cmsg_flags)
@@ -1926,7 +2002,7 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
 	return err;
 
 csum_copy_err:
-	if (!__sk_queue_drop_skb(sk, &udp_sk(sk)->reader_queue, skb, flags,
+	if (!__sk_queue_drop_skb(sk, &up->reader_queue, skb, flags,
 				 udp_skb_destructor)) {
 		UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);
 		UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite);
@@ -2752,6 +2828,16 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 		up->pcflag |= UDPLITE_RECV_CC;
 		break;
 
+	case UDP_REPAIR:
+		if (!sk_net_capable(sk, CAP_NET_ADMIN)) {
+			err = -EPERM;
+			break;
+		}
+
+		up->repair = valbool;
+		sk->sk_peek_off = -1;
+		break;
+
 	default:
 		err = -ENOPROTOOPT;
 		break;
@@ -2820,6 +2906,10 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
 		val = up->pcrlen;
 		break;
 
+	case UDP_REPAIR:
+		val = up->repair;
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index c5e15e94bb00..09b5a489829b 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -313,6 +313,42 @@ static int udp6_skb_len(struct sk_buff *skb)
 	return unlikely(inet6_is_jumbogram(skb)) ? skb->len : udp_skb_len(skb);
 }
 
+static int udp6_copy_addr(struct sock *sk, struct msghdr *msg, int *addr_len)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct flowi4 *fl4;
+	struct flowi6 *fl6;
+	DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);
+
+	if (sin6) {
+		switch (udp_sk(sk)->pending) {
+		case AF_INET:
+			fl4 = &inet->cork.fl.u.ip4;
+			sin6->sin6_family = AF_INET6;
+			sin6->sin6_port = fl4->fl4_dport;
+			ipv6_addr_set_v4mapped(fl4->daddr,
+					       &sin6->sin6_addr);
+			sin6->sin6_flowinfo = 0;
+			sin6->sin6_scope_id = 0;
+			*addr_len = sizeof(*sin6);
+			break;
+		case AF_INET6:
+			fl6 = &inet->cork.fl.u.ip6;
+			sin6->sin6_family = AF_INET6;
+			sin6->sin6_port = fl6->fl6_dport;
+			sin6->sin6_addr = fl6->daddr;
+			sin6->sin6_flowinfo = fl6->flowlabel & IPV6_FLOWINFO_MASK;
+			sin6->sin6_scope_id = fl6->flowi6_oif;
+			*addr_len = sizeof(*sin6);
+			break;
+		default:
+			return -EAGAIN;
+		}
+	}
+
+	return 0;
+}
+
 /*
  *	This should be easy, if there is something there we
  *	return it, otherwise we block.
@@ -330,6 +366,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 	struct udp_mib __percpu *mib;
 	bool checksum_valid = false;
 	int is_udp4;
+	struct udp_sock *up = udp_sk(sk);
 
 	if (flags & MSG_ERRQUEUE)
 		return ipv6_recv_error(sk, msg, len, addr_len);
@@ -337,6 +374,21 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 	if (np->rxpmtu && np->rxopt.bits.rxpmtu)
 		return ipv6_recv_rxpmtu(sk, msg, len, addr_len);
 
+	if (unlikely(up->repair)) {
+		if (!peeking)
+			return -EPERM;
+
+		lock_sock(sk);
+		err = udp6_copy_addr(sk, msg, addr_len);
+		if (err) {
+			release_sock(sk);
+			return err;
+		}
+
+		err = udp_peek_sndq(sk, msg, len);
+		release_sock(sk);
+		return err;
+	}
 try_again:
 	off = sk_peek_offset(sk, flags);
 	skb = __skb_recv_udp(sk, flags, noblock, &off, &err);
@@ -413,7 +465,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 						      (struct sockaddr *)sin6);
 	}
 
-	if (udp_sk(sk)->gro_enabled)
+	if (up->gro_enabled)
 		udp_cmsg_recv(msg, sk, skb);
 
 	if (np->rxopt.all)
@@ -436,7 +488,7 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 	return err;
 
 csum_copy_err:
-	if (!__sk_queue_drop_skb(sk, &udp_sk(sk)->reader_queue, skb, flags,
+	if (!__sk_queue_drop_skb(sk, &up->reader_queue, skb, flags,
 				 udp_skb_destructor)) {
 		SNMP_INC_STATS(mib, UDP_MIB_CSUMERRORS);
 		SNMP_INC_STATS(mib, UDP_MIB_INERRORS);
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 1/2] udp: UDP socket send queue repair
  2021-08-11 15:45 [PATCH v2 1/2] udp: UDP socket send queue repair Bui Quang Minh
@ 2021-08-11 16:14 ` Eric Dumazet
  2021-08-12 13:46   ` Bui Quang Minh
  2021-08-16 14:38 ` Willem de Bruijn
  1 sibling, 1 reply; 9+ messages in thread
From: Eric Dumazet @ 2021-08-11 16:14 UTC (permalink / raw)
  To: Bui Quang Minh, linux-kernel, netdev
  Cc: davem, kuba, yoshfuji, dsahern, willemb, pabeni, avagin,
	alexander, lesedorucalin01



On 8/11/21 5:45 PM, Bui Quang Minh wrote:
> In this patch, I implement UDP_REPAIR sockoption and a new path in
> udp_recvmsg for dumping the corked packet in UDP socket's send queue.
> 
> A userspace program can use recvmsg syscall to get the packet's data and
> the msg_name information of the packet. Currently, other related
> information in inet_cork that are set in cmsg are not dumped.
> 
> While working on this, I was aware of Lese Doru Calin's patch and got some
> ideas from it.


What is the use case for this feature, adding a test in UDP fast path ?

CORKed UDP is hardly used nowadays.

IMO, TCP_REPAIR hijacking standard system calls was a design error,
we should have added new system calls.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 1/2] udp: UDP socket send queue repair
  2021-08-11 16:14 ` Eric Dumazet
@ 2021-08-12 13:46   ` Bui Quang Minh
  2021-08-12 15:51     ` Eric Dumazet
  0 siblings, 1 reply; 9+ messages in thread
From: Bui Quang Minh @ 2021-08-12 13:46 UTC (permalink / raw)
  To: Eric Dumazet, linux-kernel, netdev
  Cc: davem, kuba, yoshfuji, dsahern, willemb, pabeni, avagin,
	alexander, lesedorucalin01



On 8/11/2021 11:14 PM, Eric Dumazet wrote:
> 
> 
> On 8/11/21 5:45 PM, Bui Quang Minh wrote:
>> In this patch, I implement UDP_REPAIR sockoption and a new path in
>> udp_recvmsg for dumping the corked packet in UDP socket's send queue.
>>
>> A userspace program can use recvmsg syscall to get the packet's data and
>> the msg_name information of the packet. Currently, other related
>> information in inet_cork that are set in cmsg are not dumped.
>>
>> While working on this, I was aware of Lese Doru Calin's patch and got some
>> ideas from it.
> 
> 
> What is the use case for this feature, adding a test in UDP fast path ?

This feature is used to help CRIU to dump CORKed UDP packet in send queue. I'm 
sorry for being not aware of the performance perspective here.

> IMO, TCP_REPAIR hijacking standard system calls was a design error,
> we should have added new system calls.

You are right that adding new system calls is a better approach. What do you 
think about adding a new option in getsockopt approach?

Thanks,
Quang Minh.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 1/2] udp: UDP socket send queue repair
  2021-08-12 13:46   ` Bui Quang Minh
@ 2021-08-12 15:51     ` Eric Dumazet
  2021-08-13 11:08       ` Bui Quang Minh
  0 siblings, 1 reply; 9+ messages in thread
From: Eric Dumazet @ 2021-08-12 15:51 UTC (permalink / raw)
  To: Bui Quang Minh, Eric Dumazet, linux-kernel, netdev
  Cc: davem, kuba, yoshfuji, dsahern, willemb, pabeni, avagin,
	alexander, lesedorucalin01



On 8/12/21 3:46 PM, Bui Quang Minh wrote:
> 
> 
> On 8/11/2021 11:14 PM, Eric Dumazet wrote:
>>
>>
>> On 8/11/21 5:45 PM, Bui Quang Minh wrote:
>>> In this patch, I implement UDP_REPAIR sockoption and a new path in
>>> udp_recvmsg for dumping the corked packet in UDP socket's send queue.
>>>
>>> A userspace program can use recvmsg syscall to get the packet's data and
>>> the msg_name information of the packet. Currently, other related
>>> information in inet_cork that are set in cmsg are not dumped.
>>>
>>> While working on this, I was aware of Lese Doru Calin's patch and got some
>>> ideas from it.
>>
>>
>> What is the use case for this feature, adding a test in UDP fast path ?
> 
> This feature is used to help CRIU to dump CORKed UDP packet in send queue. I'm sorry for being not aware of the performance perspective here.

UDP is not reliable.

I find a bit strange we add so many lines of code
for a feature trying very hard to to drop _one_ packet.

I think a much better changelog would be welcomed.

> 
>> IMO, TCP_REPAIR hijacking standard system calls was a design error,
>> we should have added new system calls.
> 
> You are right that adding new system calls is a better approach. What do you think about adding a new option in getsockopt approach?
> 
> Thanks,
> Quang Minh.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 1/2] udp: UDP socket send queue repair
  2021-08-12 15:51     ` Eric Dumazet
@ 2021-08-13 11:08       ` Bui Quang Minh
  2021-08-13 13:00         ` David Laight
  2021-08-16 14:35         ` Willem de Bruijn
  0 siblings, 2 replies; 9+ messages in thread
From: Bui Quang Minh @ 2021-08-13 11:08 UTC (permalink / raw)
  To: Eric Dumazet, linux-kernel, netdev
  Cc: davem, kuba, yoshfuji, dsahern, willemb, pabeni, avagin,
	alexander, lesedorucalin01



On 8/12/2021 10:51 PM, Eric Dumazet wrote:
> 
> 
> On 8/12/21 3:46 PM, Bui Quang Minh wrote:
>>
>>
>> On 8/11/2021 11:14 PM, Eric Dumazet wrote:
>>>
>>>
>>> On 8/11/21 5:45 PM, Bui Quang Minh wrote:
>>>> In this patch, I implement UDP_REPAIR sockoption and a new path in
>>>> udp_recvmsg for dumping the corked packet in UDP socket's send queue.
>>>>
>>>> A userspace program can use recvmsg syscall to get the packet's data and
>>>> the msg_name information of the packet. Currently, other related
>>>> information in inet_cork that are set in cmsg are not dumped.
>>>>
>>>> While working on this, I was aware of Lese Doru Calin's patch and got some
>>>> ideas from it.
>>>
>>>
>>> What is the use case for this feature, adding a test in UDP fast path ?
>>
>> This feature is used to help CRIU to dump CORKed UDP packet in send queue. I'm sorry for being not aware of the performance perspective here.
> 
> UDP is not reliable.
> 
> I find a bit strange we add so many lines of code
> for a feature trying very hard to to drop _one_ packet.
> 
> I think a much better changelog would be welcomed.

The reason we want to dump the packet in send queue is to make to state of the 
application consistent. The scenario is that when an application sends UDP 
packets via UDP_CORK socket or with MSG_MORE, CRIU comes and checkpoints the 
application. If we drop the data in send queue, when application restores, it 
sends some more data then turns off the cork and actually sends a packet. The 
receiving side may get that packet but it's unusual that the first part of that 
packet is missing because we drop it. So we try to solve this problem with some 
help from the Linux kernel.

Thanks,
Quang Minh.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* RE: [PATCH v2 1/2] udp: UDP socket send queue repair
  2021-08-13 11:08       ` Bui Quang Minh
@ 2021-08-13 13:00         ` David Laight
  2021-08-17 16:22           ` avagin
  2021-08-16 14:35         ` Willem de Bruijn
  1 sibling, 1 reply; 9+ messages in thread
From: David Laight @ 2021-08-13 13:00 UTC (permalink / raw)
  To: 'Bui Quang Minh', Eric Dumazet, linux-kernel, netdev
  Cc: davem, kuba, yoshfuji, dsahern, willemb, pabeni, avagin,
	alexander, lesedorucalin01

From: Bui Quang Minh
> Sent: 13 August 2021 12:08
...
> The reason we want to dump the packet in send queue is to make to state of the
> application consistent. The scenario is that when an application sends UDP
> packets via UDP_CORK socket or with MSG_MORE, CRIU comes and checkpoints the
> application. If we drop the data in send queue, when application restores, it
> sends some more data then turns off the cork and actually sends a packet. The
> receiving side may get that packet but it's unusual that the first part of that
> packet is missing because we drop it. So we try to solve this problem with some
> help from the Linux kernel.

Patient: It hurts if I do xxx.
Doctor: Don't do xxx then.

It has to be more efficient to buffer partial UDP packets
in userspace and only send when all the packet is available.

	David

-
Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
Registration No: 1397386 (Wales)

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 1/2] udp: UDP socket send queue repair
  2021-08-13 11:08       ` Bui Quang Minh
  2021-08-13 13:00         ` David Laight
@ 2021-08-16 14:35         ` Willem de Bruijn
  1 sibling, 0 replies; 9+ messages in thread
From: Willem de Bruijn @ 2021-08-16 14:35 UTC (permalink / raw)
  To: Bui Quang Minh
  Cc: Eric Dumazet, LKML, Network Development, David Miller,
	Jakub Kicinski, Hideaki YOSHIFUJI, David Ahern, Willem de Bruijn,
	Paolo Abeni, Andrei Vagin, alexander, Lese Doru Calin

On Fri, Aug 13, 2021 at 4:52 AM Bui Quang Minh <minhquangbui99@gmail.com> wrote:
>
>
>
> On 8/12/2021 10:51 PM, Eric Dumazet wrote:
> >
> >
> > On 8/12/21 3:46 PM, Bui Quang Minh wrote:
> >>
> >>
> >> On 8/11/2021 11:14 PM, Eric Dumazet wrote:
> >>>
> >>>
> >>> On 8/11/21 5:45 PM, Bui Quang Minh wrote:
> >>>> In this patch, I implement UDP_REPAIR sockoption and a new path in
> >>>> udp_recvmsg for dumping the corked packet in UDP socket's send queue.
> >>>>
> >>>> A userspace program can use recvmsg syscall to get the packet's data and
> >>>> the msg_name information of the packet. Currently, other related
> >>>> information in inet_cork that are set in cmsg are not dumped.
> >>>>
> >>>> While working on this, I was aware of Lese Doru Calin's patch and got some
> >>>> ideas from it.
> >>>
> >>>
> >>> What is the use case for this feature, adding a test in UDP fast path ?
> >>
> >> This feature is used to help CRIU to dump CORKed UDP packet in send queue. I'm sorry for being not aware of the performance perspective here.
> >
> > UDP is not reliable.
> >
> > I find a bit strange we add so many lines of code
> > for a feature trying very hard to to drop _one_ packet.
> >
> > I think a much better changelog would be welcomed.
>
> The reason we want to dump the packet in send queue is to make to state of the
> application consistent. The scenario is that when an application sends UDP
> packets via UDP_CORK socket or with MSG_MORE, CRIU comes and checkpoints the
> application. If we drop the data in send queue, when application restores, it
> sends some more data then turns off the cork and actually sends a packet. The
> receiving side may get that packet but it's unusual that the first part of that
> packet is missing because we drop it. So we try to solve this problem with some
> help from the Linux kernel.

Instead of checkpointing the state, how about making the kernel drop
the next packet.

For instance by setting up->pending to something else than AF_UNSPEC,
AF_INET, AF_INET6 from a new setsockopt and testing for this case in
the udp_sendmsg up->pending slowpath.

udp_sendmsg already calls udp_v6_flush_pending_frames on error when
appending to a pending packet, so returning an error on the next call
after restore and have that imply a flush is acceptable. I would
introduce a new error code.

The state can perhaps be inferred in other ways, e.g., from
up->pending && !up->len or up->pending && !skb_peek_tail(queue). But
an explicit up->pending mode will be easier to grasp.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 1/2] udp: UDP socket send queue repair
  2021-08-11 15:45 [PATCH v2 1/2] udp: UDP socket send queue repair Bui Quang Minh
  2021-08-11 16:14 ` Eric Dumazet
@ 2021-08-16 14:38 ` Willem de Bruijn
  1 sibling, 0 replies; 9+ messages in thread
From: Willem de Bruijn @ 2021-08-16 14:38 UTC (permalink / raw)
  To: Bui Quang Minh
  Cc: LKML, Network Development, David Miller, Jakub Kicinski,
	Hideaki YOSHIFUJI, David Ahern, Willem de Bruijn, Paolo Abeni,
	Andrei Vagin, alexander, Lese Doru Calin

On Wed, Aug 11, 2021 at 8:48 AM Bui Quang Minh <minhquangbui99@gmail.com> wrote:
>
> In this patch, I implement UDP_REPAIR sockoption and a new path in
> udp_recvmsg for dumping the corked packet in UDP socket's send queue.
>
> A userspace program can use recvmsg syscall to get the packet's data and
> the msg_name information of the packet. Currently, other related
> information in inet_cork that are set in cmsg are not dumped.

[ intended to include in my previous response ]

What other related information? Fields like transmit_time and gso_size?

This would be another reason to prefer dropping the packet over trying
to restore it incompletely.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 1/2] udp: UDP socket send queue repair
  2021-08-13 13:00         ` David Laight
@ 2021-08-17 16:22           ` avagin
  0 siblings, 0 replies; 9+ messages in thread
From: avagin @ 2021-08-17 16:22 UTC (permalink / raw)
  To: David Laight
  Cc: 'Bui Quang Minh',
	Eric Dumazet, linux-kernel, netdev, davem, kuba, yoshfuji,
	dsahern, willemb, pabeni, alexander, lesedorucalin01

On Fri, Aug 13, 2021 at 01:00:12PM +0000, David Laight wrote:
> From: Bui Quang Minh
> > Sent: 13 August 2021 12:08
> ...
> > The reason we want to dump the packet in send queue is to make to state of the
> > application consistent. The scenario is that when an application sends UDP
> > packets via UDP_CORK socket or with MSG_MORE, CRIU comes and checkpoints the
> > application. If we drop the data in send queue, when application restores, it
> > sends some more data then turns off the cork and actually sends a packet. The
> > receiving side may get that packet but it's unusual that the first part of that
> > packet is missing because we drop it. So we try to solve this problem with some
> > help from the Linux kernel.
> 
> Patient: It hurts if I do xxx.
> Doctor: Don't do xxx then.
> 
> It has to be more efficient to buffer partial UDP packets
> in userspace and only send when all the packet is available.

You are right. It can be more efficient, but we don't have controls over
user-space processes, and they can do whatever the kernel allows them to
do.

> 
> 	David
> 
> -
> Registered Address Lakeside, Bramley Road, Mount Farm, Milton Keynes, MK1 1PT, UK
> Registration No: 1397386 (Wales)

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2021-08-17 16:30 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-11 15:45 [PATCH v2 1/2] udp: UDP socket send queue repair Bui Quang Minh
2021-08-11 16:14 ` Eric Dumazet
2021-08-12 13:46   ` Bui Quang Minh
2021-08-12 15:51     ` Eric Dumazet
2021-08-13 11:08       ` Bui Quang Minh
2021-08-13 13:00         ` David Laight
2021-08-17 16:22           ` avagin
2021-08-16 14:35         ` Willem de Bruijn
2021-08-16 14:38 ` Willem de Bruijn

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).