All of lore.kernel.org
 help / color / mirror / Atom feed
From: Abhishek Chauhan <quic_abchauha@quicinc.com>
To: "David S. Miller" <davem@davemloft.net>,
	Eric Dumazet <edumazet@google.com>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
	netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
	Andrew Halaney <ahalaney@redhat.com>,
	Willem de Bruijn <willemdebruijn.kernel@gmail.com>,
	Martin KaFai Lau <martin.lau@kernel.org>,
	Martin KaFai Lau <martin.lau@linux.dev>,
	Daniel Borkmann <daniel@iogearbox.net>, bpf <bpf@vger.kernel.org>
Cc: kernel@quicinc.com
Subject: [RFC PATCH bpf-next v4 2/2] net: Add additional bit to support clockid_t timestamp type
Date: Wed, 17 Apr 2024 17:43:08 -0700	[thread overview]
Message-ID: <20240418004308.1009262-3-quic_abchauha@quicinc.com> (raw)
In-Reply-To: <20240418004308.1009262-1-quic_abchauha@quicinc.com>

tstamp_type is now set based on actual clockid_t compressed
into 2 bits.

To make the design scalable for future needs this commit bring in
the change to extend the tstamp_type:1 to tstamp_type:2 to support
other clockid_t timestamp.

We now support CLOCK_TAI as part of tstamp_type as part of this
commit with exisiting support CLOCK_MONOTONIC and CLOCK_REALTIME.

Link: https://lore.kernel.org/netdev/bc037db4-58bb-4861-ac31-a361a93841d3@linux.dev/
Signed-off-by: Abhishek Chauhan <quic_abchauha@quicinc.com>
---
Changes since v3
- Carefully reviewed BPF APIs and made changes in 
  BPF code as well. 
- Re-used actual clockid_t values since skbuff.h 
  indirectly includes uapi/linux/time.h
- Added CLOCK_TAI as part of the skb_set_delivery_time
  handling instead of CLOCK_USER
- Added default in switch for unsupported and invalid 
  timestamp with an WARN_ONCE.
- All of the above comments were given by Willem  
- Made changes in filter.c as per Martin's comments
  to handle invalid cases in bpf code with addition of
  SKB_TAI_DELIVERY_TIME_MASK

Changes since v2
- Minor changes to commit subject

Changes since v1 
- identified additional changes in BPF framework.
- Bit shift in SKB_MONO_DELIVERY_TIME_MASK and TC_AT_INGRESS_MASK.
- Made changes in skb_set_delivery_time to keep changes similar to 
  previous code for mono_delivery_time and just setting tstamp_type
  bit 1 for userspace timestamp.

 include/linux/skbuff.h                        | 21 +++++++---
 include/uapi/linux/bpf.h                      |  1 +
 net/core/filter.c                             | 40 ++++++++++++++++---
 net/ipv4/ip_output.c                          |  2 +-
 net/ipv4/raw.c                                |  2 +-
 net/ipv6/ip6_output.c                         |  2 +-
 net/ipv6/raw.c                                |  2 +-
 net/packet/af_packet.c                        |  7 ++--
 .../selftests/bpf/prog_tests/ctx_rewrite.c    | 13 ++++--
 9 files changed, 68 insertions(+), 22 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2e7811c7e4db..647522ef41cd 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -703,14 +703,17 @@ typedef unsigned char *sk_buff_data_t;
 #endif
 
 /**
- * tstamp_type:1 can take 2 values each
+ * tstamp_type:2 can take 4 values each
  * represented by time base in skb
  * 0x0 => real timestamp_type
  * 0x1 => mono timestamp_type
+ * 0x2 => tai timestamp_type
+ * 0x3 => undefined timestamp_type
  */
 enum skb_tstamp_type {
 	SKB_CLOCK_REAL,	/* Time base is skb is REALTIME */
 	SKB_CLOCK_MONO,	/* Time base is skb is MONOTONIC */
+	SKB_CLOCK_TAI,	/* Time base in skb is TAI */
 };
 
 /**
@@ -833,7 +836,8 @@ enum skb_tstamp_type {
  *	@tstamp_type: When set, skb->tstamp has the
  *		delivery_time in mono clock base (i.e. EDT).  Otherwise, the
  *		skb->tstamp has the (rcv) timestamp at ingress and
- *		delivery_time at egress.
+ *		delivery_time at egress or skb->tstamp defined by skb->sk->sk_clockid
+ *		coming from userspace
  *	@napi_id: id of the NAPI struct this skb came from
  *	@sender_cpu: (aka @napi_id) source CPU in XPS
  *	@alloc_cpu: CPU which did the skb allocation.
@@ -961,7 +965,7 @@ struct sk_buff {
 	/* private: */
 	__u8			__mono_tc_offset[0];
 	/* public: */
-	__u8			tstamp_type:1;	/* See SKB_CLOCK_*_MASK */
+	__u8			tstamp_type:2;	/* See skb_tstamp_type enum */
 #ifdef CONFIG_NET_XGRESS
 	__u8			tc_at_ingress:1;	/* See TC_AT_INGRESS_MASK */
 	__u8			tc_skip_classify:1;
@@ -1096,10 +1100,12 @@ struct sk_buff {
  */
 #ifdef __BIG_ENDIAN_BITFIELD
 #define SKB_MONO_DELIVERY_TIME_MASK	(1 << 7)
-#define TC_AT_INGRESS_MASK		(1 << 6)
+#define SKB_TAI_DELIVERY_TIME_MASK	(1 << 6)
+#define TC_AT_INGRESS_MASK		(1 << 5)
 #else
 #define SKB_MONO_DELIVERY_TIME_MASK	(1 << 0)
-#define TC_AT_INGRESS_MASK		(1 << 1)
+#define SKB_TAI_DELIVERY_TIME_MASK	(1 << 1)
+#define TC_AT_INGRESS_MASK		(1 << 2)
 #endif
 #define SKB_BF_MONO_TC_OFFSET		offsetof(struct sk_buff, __mono_tc_offset)
 
@@ -4206,6 +4212,11 @@ static inline void skb_set_delivery_time(struct sk_buff *skb, ktime_t kt,
 	case CLOCK_MONOTONIC:
 		skb->tstamp_type = SKB_CLOCK_MONO;
 		break;
+	case CLOCK_TAI:
+		skb->tstamp_type = SKB_CLOCK_TAI;
+		break;
+	default:
+		WARN_ONCE(true, "clockid %d not supported", tstamp_type);
 	}
 }
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index cee0a7915c08..1376ed5ece10 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -6209,6 +6209,7 @@ union {					\
 enum {
 	BPF_SKB_TSTAMP_UNSPEC,
 	BPF_SKB_TSTAMP_DELIVERY_MONO,	/* tstamp has mono delivery time */
+	BPF_SKB_TSTAMP_DELIVERY_TAI,	/* tstamp has tai delivery time */
 	/* For any BPF_SKB_TSTAMP_* that the bpf prog cannot handle,
 	 * the bpf prog should handle it like BPF_SKB_TSTAMP_UNSPEC
 	 * and try to deduce it by ingress, egress or skb->sk->sk_clockid.
diff --git a/net/core/filter.c b/net/core/filter.c
index b1192e672cbb..de86c42b5859 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -7711,6 +7711,12 @@ BPF_CALL_3(bpf_skb_set_tstamp, struct sk_buff *, skb,
 		skb->tstamp = tstamp;
 		skb->tstamp_type = SKB_CLOCK_MONO;
 		break;
+	case BPF_SKB_TSTAMP_DELIVERY_TAI:
+		if (!tstamp)
+			return -EINVAL;
+		skb->tstamp = tstamp;
+		skb->tstamp_type = SKB_CLOCK_TAI;
+		break;
 	case BPF_SKB_TSTAMP_UNSPEC:
 		if (tstamp)
 			return -EINVAL;
@@ -9372,10 +9378,16 @@ static struct bpf_insn *bpf_convert_tstamp_type_read(const struct bpf_insn *si,
 	*insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg,
 			      SKB_BF_MONO_TC_OFFSET);
 	*insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg,
-				SKB_MONO_DELIVERY_TIME_MASK, 2);
+				SKB_MONO_DELIVERY_TIME_MASK | SKB_TAI_DELIVERY_TIME_MASK, 2);
+	*insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg,
+				SKB_MONO_DELIVERY_TIME_MASK, 3);
+	*insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg,
+				SKB_TAI_DELIVERY_TIME_MASK, 4);
 	*insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_UNSPEC);
 	*insn++ = BPF_JMP_A(1);
 	*insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_DELIVERY_MONO);
+	*insn++ = BPF_JMP_A(1);
+	*insn++ = BPF_MOV32_IMM(value_reg, BPF_SKB_TSTAMP_DELIVERY_TAI);
 
 	return insn;
 }
@@ -9418,10 +9430,26 @@ static struct bpf_insn *bpf_convert_tstamp_read(const struct bpf_prog *prog,
 		__u8 tmp_reg = BPF_REG_AX;
 
 		*insn++ = BPF_LDX_MEM(BPF_B, tmp_reg, skb_reg, SKB_BF_MONO_TC_OFFSET);
+		/*check if all three bits are set*/
 		*insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg,
-					TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK);
-		*insn++ = BPF_JMP32_IMM(BPF_JNE, tmp_reg,
-					TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK, 2);
+					TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK |
+					SKB_TAI_DELIVERY_TIME_MASK);
+		/*if all 3 bits are set jump 3 instructions and clear the register */
+		*insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg,
+					TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK |
+					SKB_TAI_DELIVERY_TIME_MASK, 4);
+		/*Now check Mono is set with ingress mask if so clear */
+		*insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg,
+					TC_AT_INGRESS_MASK | SKB_MONO_DELIVERY_TIME_MASK, 3);
+		/*Now Check tai is set with ingress mask if so clear */
+		*insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg,
+					TC_AT_INGRESS_MASK | SKB_TAI_DELIVERY_TIME_MASK, 2);
+		/*Now Check tai and mono are set if so clear */
+		*insn++ = BPF_JMP32_IMM(BPF_JEQ, tmp_reg,
+					SKB_MONO_DELIVERY_TIME_MASK |
+					SKB_TAI_DELIVERY_TIME_MASK, 1);
+		/* goto <store> */
+		*insn++ = BPF_JMP_A(2);
 		/* skb->tc_at_ingress && skb->tstamp_type:1,
 		 * read 0 as the (rcv) timestamp.
 		 */
@@ -9456,9 +9484,11 @@ static struct bpf_insn *bpf_convert_tstamp_write(const struct bpf_prog *prog,
 		/* Writing __sk_buff->tstamp as ingress, goto <clear> */
 		*insn++ = BPF_JMP32_IMM(BPF_JSET, tmp_reg, TC_AT_INGRESS_MASK, 1);
 		/* goto <store> */
-		*insn++ = BPF_JMP_A(2);
+		*insn++ = BPF_JMP_A(3);
 		/* <clear>: mono_delivery_time or (skb->tstamp_type:1) */
 		*insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_MONO_DELIVERY_TIME_MASK);
+		/* <clear>: tai delivery_time or (skb->tstamp_type:2) */
+		*insn++ = BPF_ALU32_IMM(BPF_AND, tmp_reg, ~SKB_TAI_DELIVERY_TIME_MASK);
 		*insn++ = BPF_STX_MEM(BPF_B, skb_reg, tmp_reg, SKB_BF_MONO_TC_OFFSET);
 	}
 #endif
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index f29349151203..91e80a3d411f 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1457,7 +1457,7 @@ struct sk_buff *__ip_make_skb(struct sock *sk,
 
 	skb->priority = (cork->tos != -1) ? cork->priority: READ_ONCE(sk->sk_priority);
 	skb->mark = cork->mark;
-	skb->tstamp = cork->transmit_time;
+	skb_set_delivery_time(skb, cork->transmit_time, sk->sk_clockid);
 	/*
 	 * Steal rt from cork.dst to avoid a pair of atomic_inc/atomic_dec
 	 * on dst refcount
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index dcb11f22cbf2..bbc46a40c8b6 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -360,7 +360,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
 	skb->protocol = htons(ETH_P_IP);
 	skb->priority = READ_ONCE(sk->sk_priority);
 	skb->mark = sockc->mark;
-	skb->tstamp = sockc->transmit_time;
+	skb_set_delivery_time(skb, sockc->transmit_time, sk->sk_clockid);
 	skb_dst_set(skb, &rt->dst);
 	*rtp = NULL;
 
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index a9e819115622..0b8193bdd98f 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1924,7 +1924,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk,
 
 	skb->priority = READ_ONCE(sk->sk_priority);
 	skb->mark = cork->base.mark;
-	skb->tstamp = cork->base.transmit_time;
+	skb_set_delivery_time(skb, cork->base.transmit_time, sk->sk_clockid);
 
 	ip6_cork_steal_dst(skb, cork);
 	IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 0d896ca7b589..625f3a917e50 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -621,7 +621,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
 	skb->protocol = htons(ETH_P_IPV6);
 	skb->priority = READ_ONCE(sk->sk_priority);
 	skb->mark = sockc->mark;
-	skb->tstamp = sockc->transmit_time;
+	skb_set_delivery_time(skb, sockc->transmit_time, sk->sk_clockid);
 
 	skb_put(skb, length);
 	skb_reset_network_header(skb);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 8c6d3fbb4ed8..356c96f23370 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2056,8 +2056,7 @@ static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg,
 	skb->dev = dev;
 	skb->priority = READ_ONCE(sk->sk_priority);
 	skb->mark = READ_ONCE(sk->sk_mark);
-	skb->tstamp = sockc.transmit_time;
-
+	skb_set_delivery_time(skb, sockc.transmit_time, sk->sk_clockid);
 	skb_setup_tx_timestamp(skb, sockc.tsflags);
 
 	if (unlikely(extra_len == 4))
@@ -2585,7 +2584,7 @@ static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
 	skb->dev = dev;
 	skb->priority = READ_ONCE(po->sk.sk_priority);
 	skb->mark = READ_ONCE(po->sk.sk_mark);
-	skb->tstamp = sockc->transmit_time;
+	skb_set_delivery_time(skb, sockc->transmit_time, po->sk.sk_clockid);
 	skb_setup_tx_timestamp(skb, sockc->tsflags);
 	skb_zcopy_set_nouarg(skb, ph.raw);
 
@@ -3063,7 +3062,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	skb->dev = dev;
 	skb->priority = READ_ONCE(sk->sk_priority);
 	skb->mark = sockc.mark;
-	skb->tstamp = sockc.transmit_time;
+	skb_set_delivery_time(skb, sockc.transmit_time, sk->sk_clockid);
 
 	if (unlikely(extra_len == 4))
 		skb->no_fcs = 1;
diff --git a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
index 3b7c57fe55a5..9d0c33d7f52e 100644
--- a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
+++ b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
@@ -69,15 +69,20 @@ static struct test_case test_cases[] = {
 	{
 		N(SCHED_CLS, struct __sk_buff, tstamp),
 		.read  = "r11 = *(u8 *)($ctx + sk_buff::__mono_tc_offset);"
-			 "w11 &= 3;"
-			 "if w11 != 0x3 goto pc+2;"
+			 "w11 &= 7;"
+			 "if w11 == 0x7 goto pc+4;"
+			 "if w11 == 0x5 goto pc+3;"
+			 "if w11 == 0x6 goto pc+2;"
+			 "if w11 == 0x3 goto pc+1;"
+			 "goto pc+2"
 			 "$dst = 0;"
 			 "goto pc+1;"
 			 "$dst = *(u64 *)($ctx + sk_buff::tstamp);",
 		.write = "r11 = *(u8 *)($ctx + sk_buff::__mono_tc_offset);"
-			 "if w11 & 0x2 goto pc+1;"
-			 "goto pc+2;"
+			 "if w11 & 0x4 goto pc+1;"
+			 "goto pc+3;"
 			 "w11 &= -2;"
+			 "w11 &= -3;"
 			 "*(u8 *)($ctx + sk_buff::__mono_tc_offset) = r11;"
 			 "*(u64 *)($ctx + sk_buff::tstamp) = $src;",
 	},
-- 
2.25.1


  parent reply	other threads:[~2024-04-18  0:43 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-04-18  0:43 [RFC PATCH bpf-next v4 0/2] Replace mono_delivery_time with tstamp_type Abhishek Chauhan
2024-04-18  0:43 ` [RFC PATCH bpf-next v4 1/2] net: Rename mono_delivery_time to tstamp_type for scalabilty Abhishek Chauhan
2024-04-18 18:47   ` Willem de Bruijn
2024-04-18 19:58     ` Abhishek Chauhan (ABC)
2024-04-18 20:11       ` Willem de Bruijn
2024-04-18 20:38         ` Abhishek Chauhan (ABC)
2024-04-18 20:49           ` Willem de Bruijn
2024-04-18 20:51             ` Abhishek Chauhan (ABC)
2024-04-18  0:43 ` Abhishek Chauhan [this message]
2024-04-18 19:06   ` [RFC PATCH bpf-next v4 2/2] net: Add additional bit to support clockid_t timestamp type Willem de Bruijn
2024-04-18 20:10     ` Abhishek Chauhan (ABC)
2024-04-18 21:57       ` Martin KaFai Lau
2024-04-19  0:30         ` Abhishek Chauhan (ABC)
2024-04-20  1:13           ` Abhishek Chauhan (ABC)
2024-04-22 18:46             ` Martin KaFai Lau
2024-04-24 18:03               ` Abhishek Chauhan (ABC)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240418004308.1009262-3-quic_abchauha@quicinc.com \
    --to=quic_abchauha@quicinc.com \
    --cc=ahalaney@redhat.com \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=edumazet@google.com \
    --cc=kernel@quicinc.com \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=martin.lau@kernel.org \
    --cc=martin.lau@linux.dev \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=willemdebruijn.kernel@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.