netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
To: netdev@vger.kernel.org
Cc: Willem de Bruijn <willemb@google.com>
Subject: [PATCH RFC net-next 1/6] net: multiple release time SO_TXTIME
Date: Tue,  9 Jun 2020 10:09:29 -0400	[thread overview]
Message-ID: <20200609140934.110785-2-willemdebruijn.kernel@gmail.com> (raw)
In-Reply-To: <20200609140934.110785-1-willemdebruijn.kernel@gmail.com>

From: Willem de Bruijn <willemb@google.com>

Pace transmission of segments in a UDP GSO datagram.

Batching datagram protocol stack traversals with UDP_SEGMENT saves
significant cycles for large data transfers.

But GSO packets are sent at once. Pacing traffic to internet clients
often requires sending just a few MSS per msec pacing interval.

SO_TXTIME allows delivery of packets at a later time. Extend it
to allow pacing the segments in a UDP GSO packet, to be able to build
larger GSO datagrams.

Add SO_TXTIME flag SOF_TXTIME_MULTI_RELEASE. This reinterprets the
lower 8 bits of the 64-bit release timestamp as

  - bits 4..7: release time interval in usec
  - bits 0..3: number of segments sent per period

So a timestamp of 0x148 means

  - 0x100 initial timestamp in Qdisc selected clocksource
  - every 4 usec release N MSS
  - N is 8

A subsequent qdisc change will pace the individual segments.

Packet transmission can race with the socket option. This is safe.
For predictable behavior, it is up to the caller to not toggle the
feature while packets on a socket are in flight.

Signed-off-by: Willem de Bruijn <willemb@google.com>
---
 include/linux/netdevice.h       |  1 +
 include/net/sock.h              |  3 ++-
 include/uapi/linux/net_tstamp.h |  3 ++-
 net/core/dev.c                  | 44 +++++++++++++++++++++++++++++++++
 net/core/sock.c                 |  4 +++
 5 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1a96e9c4ec36..15ea976dd446 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4528,6 +4528,7 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
 				  netdev_features_t features, bool tx_path);
 struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
 				    netdev_features_t features);
+struct sk_buff *skb_gso_segment_txtime(struct sk_buff *skb);
 
 struct netdev_bonding_info {
 	ifslave	slave;
diff --git a/include/net/sock.h b/include/net/sock.h
index c53cc42b5ab9..491e389b3570 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -493,7 +493,8 @@ struct sock {
 	u8			sk_clockid;
 	u8			sk_txtime_deadline_mode : 1,
 				sk_txtime_report_errors : 1,
-				sk_txtime_unused : 6;
+				sk_txtime_multi_release : 1,
+				sk_txtime_unused : 5;
 
 	struct socket		*sk_socket;
 	void			*sk_user_data;
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index 7ed0b3d1c00a..ca1ae3b6f601 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -162,8 +162,9 @@ struct scm_ts_pktinfo {
 enum txtime_flags {
 	SOF_TXTIME_DEADLINE_MODE = (1 << 0),
 	SOF_TXTIME_REPORT_ERRORS = (1 << 1),
+	SOF_TXTIME_MULTI_RELEASE = (1 << 2),
 
-	SOF_TXTIME_FLAGS_LAST = SOF_TXTIME_REPORT_ERRORS,
+	SOF_TXTIME_FLAGS_LAST = SOF_TXTIME_MULTI_RELEASE,
 	SOF_TXTIME_FLAGS_MASK = (SOF_TXTIME_FLAGS_LAST - 1) |
 				 SOF_TXTIME_FLAGS_LAST
 };
diff --git a/net/core/dev.c b/net/core/dev.c
index 061496a1f640..5058083375fb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3377,6 +3377,50 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
 }
 EXPORT_SYMBOL(__skb_gso_segment);
 
+struct sk_buff *skb_gso_segment_txtime(struct sk_buff *skb)
+{
+	int mss_per_ival, mss_in_cur_ival;
+	struct sk_buff *segs, *seg;
+	struct skb_shared_info *sh;
+	u64 step_ns, tstamp;
+
+	if (!skb->sk || !sk_fullsock(skb->sk) ||
+	    !skb->sk->sk_txtime_multi_release)
+		return NULL;
+
+	/* extract multi release variables mss and stepsize */
+	mss_per_ival = skb->tstamp & 0xF;
+	step_ns = ((skb->tstamp >> 4) & 0xF) * NSEC_PER_MSEC;
+	tstamp = skb->tstamp;
+
+	if (mss_per_ival == 0)
+		return NULL;
+
+	/* skip multi-release if total segs can be sent at once */
+	sh = skb_shinfo(skb);
+	if (sh->gso_segs <= mss_per_ival)
+		return NULL;
+
+	segs = skb_gso_segment(skb, NETIF_F_SG | NETIF_F_HW_CSUM);
+	if (IS_ERR_OR_NULL(segs))
+		return segs;
+
+	mss_in_cur_ival = 0;
+
+	for (seg = segs; seg; seg = seg->next) {
+		seg->tstamp = tstamp & ~0xFF;
+
+		mss_in_cur_ival++;
+		if (mss_in_cur_ival == mss_per_ival) {
+			tstamp += step_ns;
+			mss_in_cur_ival = 0;
+		}
+	}
+
+	return segs;
+}
+EXPORT_SYMBOL_GPL(skb_gso_segment_txtime);
+
 /* Take action when hardware reception checksum errors are detected. */
 #ifdef CONFIG_BUG
 void netdev_rx_csum_fault(struct net_device *dev, struct sk_buff *skb)
diff --git a/net/core/sock.c b/net/core/sock.c
index 6c4acf1f0220..7036b8855154 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1258,6 +1258,8 @@ int sock_setsockopt(struct socket *sock, int level, int optname,
 			!!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE);
 		sk->sk_txtime_report_errors =
 			!!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS);
+		sk->sk_txtime_multi_release =
+			!!(sk_txtime.flags & SOF_TXTIME_MULTI_RELEASE);
 		break;
 
 	case SO_BINDTOIFINDEX:
@@ -1608,6 +1610,8 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
 				  SOF_TXTIME_DEADLINE_MODE : 0;
 		v.txtime.flags |= sk->sk_txtime_report_errors ?
 				  SOF_TXTIME_REPORT_ERRORS : 0;
+		v.txtime.flags |= sk->sk_txtime_multi_release ?
+				  SOF_TXTIME_MULTI_RELEASE : 0;
 		break;
 
 	case SO_BINDTOIFINDEX:
-- 
2.27.0.278.ge193c7cf3a9-goog


  reply	other threads:[~2020-06-09 14:09 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-06-09 14:09 [PATCH RFC net-next 0/6] multi release pacing for UDP GSO Willem de Bruijn
2020-06-09 14:09 ` Willem de Bruijn [this message]
2020-06-09 14:09 ` [PATCH RFC net-next 2/6] net: build gso segs in multi release time SO_TXTIME Willem de Bruijn
2020-06-09 14:09 ` [PATCH RFC net-next 3/6] net_sched: sch_fq: multiple release time support Willem de Bruijn
2020-06-09 15:00   ` Eric Dumazet
2020-06-09 15:10     ` Eric Dumazet
2020-06-09 14:09 ` [PATCH RFC net-next 4/6] selftests/net: so_txtime: support txonly/rxonly modes Willem de Bruijn
2020-06-09 14:09 ` [PATCH RFC net-next 5/6] selftests/net: so_txtime: add gso and multi release pacing Willem de Bruijn
2020-06-09 14:09 ` [PATCH RFC net-next 6/6] selftests/net: upgso bench: add pacing with SO_TXTIME Willem de Bruijn

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200609140934.110785-2-willemdebruijn.kernel@gmail.com \
    --to=willemdebruijn.kernel@gmail.com \
    --cc=netdev@vger.kernel.org \
    --cc=willemb@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).