From mboxrd@z Thu Jan 1 00:00:00 1970 From: Eric Dumazet Subject: [PATCH net-next 5/9] tcp: provide earliest departure time in skb->tstamp Date: Fri, 21 Sep 2018 08:51:50 -0700 Message-ID: <20180921155154.49489-6-edumazet@google.com> References: <20180921155154.49489-1-edumazet@google.com> Mime-Version: 1.0 Content-Transfer-Encoding: 8bit Cc: netdev , Van Jacobson , Neal Cardwell , Yuchung Cheng , Soheil Hassas Yeganeh , Willem de Bruijn , Eric Dumazet , Eric Dumazet To: "David S . Miller" Return-path: Received: from mail-pf1-f194.google.com ([209.85.210.194]:34337 "EHLO mail-pf1-f194.google.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S2390320AbeIUVle (ORCPT ); Fri, 21 Sep 2018 17:41:34 -0400 Received: by mail-pf1-f194.google.com with SMTP id k19-v6so6173703pfi.1 for ; Fri, 21 Sep 2018 08:52:05 -0700 (PDT) In-Reply-To: <20180921155154.49489-1-edumazet@google.com> Sender: netdev-owner@vger.kernel.org List-ID: Switch internal TCP skb->skb_mstamp to skb->skb_mstamp_ns, from usec units to nsec units. Do not clear skb->tstamp before entering IP stacks in TX, so that qdisc or devices can implement pacing based on the earliest departure time instead of socket sk->sk_pacing_rate Packets are fed with tcp_wstamp_ns, and following patch will update tcp_wstamp_ns when both TCP and sch_fq switch to the earliest departure time mechanism. Signed-off-by: Eric Dumazet --- include/linux/skbuff.h | 2 +- include/net/tcp.h | 6 +++--- net/ipv4/syncookies.c | 2 +- net/ipv4/tcp.c | 2 +- net/ipv4/tcp_output.c | 13 ++++++------- net/ipv4/tcp_timer.c | 2 +- 6 files changed, 13 insertions(+), 14 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e3a53ca4a9b51b84b7d75ce87485d4d9109a4cf2..86f337e9a81d5eff360335a19ab09f26ae48fca8 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -689,7 +689,7 @@ struct sk_buff { union { ktime_t tstamp; - u64 skb_mstamp; + u64 skb_mstamp_ns; /* earliest departure time */ }; /* * This is the control buffer. It is free to use for every diff --git a/include/net/tcp.h b/include/net/tcp.h index 370198fdc65d3e863104665e20faefd0e5a09b92..ff15d8e0d525715b17671e64f6abdead9df0a8f3 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -761,13 +761,13 @@ static inline u32 tcp_stamp_us_delta(u64 t1, u64 t0) static inline u32 tcp_skb_timestamp(const struct sk_buff *skb) { - return div_u64(skb->skb_mstamp, USEC_PER_SEC / TCP_TS_HZ); + return div_u64(skb->skb_mstamp_ns, NSEC_PER_SEC / TCP_TS_HZ); } /* provide the departure time in us unit */ static inline u64 tcp_skb_timestamp_us(const struct sk_buff *skb) { - return skb->skb_mstamp; + return div_u64(skb->skb_mstamp_ns, NSEC_PER_USEC); } @@ -813,7 +813,7 @@ struct tcp_skb_cb { #define TCPCB_SACKED_RETRANS 0x02 /* SKB retransmitted */ #define TCPCB_LOST 0x04 /* SKB is lost */ #define TCPCB_TAGBITS 0x07 /* All tag bits */ -#define TCPCB_REPAIRED 0x10 /* SKB repaired (no skb_mstamp) */ +#define TCPCB_REPAIRED 0x10 /* SKB repaired (no skb_mstamp_ns) */ #define TCPCB_EVER_RETRANS 0x80 /* Ever retransmitted frame */ #define TCPCB_RETRANS (TCPCB_SACKED_RETRANS|TCPCB_EVER_RETRANS| \ TCPCB_REPAIRED) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index c3387dfd725bf99bcddefb9fb4f1dc98f5dd7f23..606f868d9f3fde1c3140aa7eecde87d2ec32b5f2 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -88,7 +88,7 @@ u64 cookie_init_timestamp(struct request_sock *req) ts <<= TSBITS; ts |= options; } - return (u64)ts * (USEC_PER_SEC / TCP_TS_HZ); + return (u64)ts * (NSEC_PER_SEC / TCP_TS_HZ); } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 67670fac7c8de510df351fe3a835b554cc4759a9..69c236943f56bd0749e5efb18de97e69898f1bde 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1295,7 +1295,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) copy = size_goal; /* All packets are restored as if they have - * already been sent. skb_mstamp isn't set to + * already been sent. skb_mstamp_ns isn't set to * avoid wrong rtt estimation. */ if (tp->repair) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 5a8105e84f7c1a876bbd15e8050c2574c1fbe162..957f7a0e21c06cae9f0d3bed57017bbc0a36c880 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1014,7 +1014,7 @@ static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb) static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb) { - skb->skb_mstamp = tp->tcp_mstamp; + skb->skb_mstamp_ns = tp->tcp_wstamp_ns; list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue); } @@ -1061,7 +1061,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, if (unlikely(!skb)) return -ENOBUFS; } - skb->skb_mstamp = tp->tcp_mstamp; + skb->skb_mstamp_ns = tp->tcp_wstamp_ns; inet = inet_sk(sk); tcb = TCP_SKB_CB(skb); @@ -1165,8 +1165,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb); skb_shinfo(skb)->gso_size = tcp_skb_mss(skb); - /* Our usage of tstamp should remain private */ - skb->tstamp = 0; + /* Leave earliest departure time in skb->tstamp (skb->skb_mstamp_ns) */ /* Cleanup our debris for IP stacks */ memset(skb->cb, 0, max(sizeof(struct inet_skb_parm), @@ -3221,10 +3220,10 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, memset(&opts, 0, sizeof(opts)); #ifdef CONFIG_SYN_COOKIES if (unlikely(req->cookie_ts)) - skb->skb_mstamp = cookie_init_timestamp(req); + skb->skb_mstamp_ns = cookie_init_timestamp(req); else #endif - skb->skb_mstamp = tcp_clock_us(); + skb->skb_mstamp_ns = tcp_clock_ns(); #ifdef CONFIG_TCP_MD5SIG rcu_read_lock(); @@ -3440,7 +3439,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) err = tcp_transmit_skb(sk, syn_data, 1, sk->sk_allocation); - syn->skb_mstamp = syn_data->skb_mstamp; + syn->skb_mstamp_ns = syn_data->skb_mstamp_ns; /* Now full SYN+DATA was cloned and sent (or not), * remove the SYN from the original skb (syn_data) diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 7fdf222a0bdfe9775970082f6b5dcdcc82b2ae1a..61023d50cd604d5e19464a32c33b65d29c75c81e 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -360,7 +360,7 @@ static void tcp_probe_timer(struct sock *sk) */ start_ts = tcp_skb_timestamp(skb); if (!start_ts) - skb->skb_mstamp = tp->tcp_mstamp; + skb->skb_mstamp_ns = tp->tcp_wstamp_ns; else if (icsk->icsk_user_timeout && (s32)(tcp_time_stamp(tp) - start_ts) > icsk->icsk_user_timeout) goto abort; -- 2.19.0.444.g18242da7ef-goog