bpf.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH net-next v7 02/11] sock: Make sk_protocol a 16-bit value
       [not found] <20200109155924.30122-1-mathew.j.martineau@linux.intel.com>
@ 2020-01-09 15:59 ` Mat Martineau
  2020-01-09 19:05   ` David Miller
  0 siblings, 1 reply; 4+ messages in thread
From: Mat Martineau @ 2020-01-09 15:59 UTC (permalink / raw)
  To: netdev, mptcp
  Cc: Mat Martineau, Alexei Starovoitov, Daniel Borkmann, bpf,
	Paolo Abeni, Matthieu Baerts

Match the 16-bit width of skbuff->protocol. Fills an 8-bit hole so
sizeof(struct sock) does not change.

Also take care of BPF field access for sk_type/sk_protocol. Both of them
are now outside the bitfield, so we can use load instructions without
further shifting/masking.

v5 -> v6:
 - update eBPF accessors, too (Intel's kbuild test robot)
v2 -> v3:
 - keep 'sk_type' 2 bytes aligned (Eric)
v1 -> v2:
 - preserve sk_pacing_shift as bit field (Eric)

Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: bpf@vger.kernel.org
Co-developed-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Co-developed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
---
 include/net/sock.h          | 25 ++++------------
 include/trace/events/sock.h |  2 +-
 net/core/filter.c           | 60 ++++++++++++++-----------------------
 3 files changed, 28 insertions(+), 59 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 091e55428415..8766f9bc3e70 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -436,30 +436,15 @@ struct sock {
 	 * Because of non atomicity rules, all
 	 * changes are protected by socket lock.
 	 */
-	unsigned int		__sk_flags_offset[0];
-#ifdef __BIG_ENDIAN_BITFIELD
-#define SK_FL_PROTO_SHIFT  16
-#define SK_FL_PROTO_MASK   0x00ff0000
-
-#define SK_FL_TYPE_SHIFT   0
-#define SK_FL_TYPE_MASK    0x0000ffff
-#else
-#define SK_FL_PROTO_SHIFT  8
-#define SK_FL_PROTO_MASK   0x0000ff00
-
-#define SK_FL_TYPE_SHIFT   16
-#define SK_FL_TYPE_MASK    0xffff0000
-#endif
-
-	unsigned int		sk_padding : 1,
+	u8			sk_padding : 1,
 				sk_kern_sock : 1,
 				sk_no_check_tx : 1,
 				sk_no_check_rx : 1,
-				sk_userlocks : 4,
-				sk_protocol  : 8,
-				sk_type      : 16;
-	u16			sk_gso_max_segs;
+				sk_userlocks : 4;
 	u8			sk_pacing_shift;
+	u16			sk_type;
+	u16			sk_protocol;
+	u16			sk_gso_max_segs;
 	unsigned long	        sk_lingertime;
 	struct proto		*sk_prot_creator;
 	rwlock_t		sk_callback_lock;
diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h
index 51fe9f6719eb..3ff12b90048d 100644
--- a/include/trace/events/sock.h
+++ b/include/trace/events/sock.h
@@ -147,7 +147,7 @@ TRACE_EVENT(inet_sock_set_state,
 		__field(__u16, sport)
 		__field(__u16, dport)
 		__field(__u16, family)
-		__field(__u8, protocol)
+		__field(__u16, protocol)
 		__array(__u8, saddr, 4)
 		__array(__u8, daddr, 4)
 		__array(__u8, saddr_v6, 16)
diff --git a/net/core/filter.c b/net/core/filter.c
index 42fd17c48c5f..ef01c5599501 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -7607,21 +7607,21 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
 		break;
 
 	case offsetof(struct bpf_sock, type):
-		BUILD_BUG_ON(HWEIGHT32(SK_FL_TYPE_MASK) != BITS_PER_BYTE * 2);
-		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
-				      offsetof(struct sock, __sk_flags_offset));
-		*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
-		*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
-		*target_size = 2;
+		*insn++ = BPF_LDX_MEM(
+			BPF_FIELD_SIZEOF(struct sock, sk_type),
+			si->dst_reg, si->src_reg,
+			bpf_target_off(struct sock, sk_type,
+				       sizeof_field(struct sock, sk_type),
+				       target_size));
 		break;
 
 	case offsetof(struct bpf_sock, protocol):
-		BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE);
-		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
-				      offsetof(struct sock, __sk_flags_offset));
-		*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
-		*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT);
-		*target_size = 1;
+		*insn++ = BPF_LDX_MEM(
+			BPF_FIELD_SIZEOF(struct sock, sk_protocol),
+			si->dst_reg, si->src_reg,
+			bpf_target_off(struct sock, sk_protocol,
+				       sizeof_field(struct sock, sk_protocol),
+				       target_size));
 		break;
 
 	case offsetof(struct bpf_sock, src_ip4):
@@ -7903,20 +7903,13 @@ static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
 		break;
 
 	case offsetof(struct bpf_sock_addr, type):
-		SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(
-			struct bpf_sock_addr_kern, struct sock, sk,
-			__sk_flags_offset, BPF_W, 0);
-		*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
-		*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
+		SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
+					    struct sock, sk, sk_type);
 		break;
 
 	case offsetof(struct bpf_sock_addr, protocol):
-		SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(
-			struct bpf_sock_addr_kern, struct sock, sk,
-			__sk_flags_offset, BPF_W, 0);
-		*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
-		*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg,
-					SK_FL_PROTO_SHIFT);
+		SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
+					    struct sock, sk, sk_protocol);
 		break;
 
 	case offsetof(struct bpf_sock_addr, msg_src_ip4):
@@ -8835,11 +8828,11 @@ sk_reuseport_is_valid_access(int off, int size,
 				    skb,				\
 				    SKB_FIELD)
 
-#define SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(SK_FIELD, BPF_SIZE, EXTRA_OFF) \
-	SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(struct sk_reuseport_kern,	\
-					     struct sock,		\
-					     sk,			\
-					     SK_FIELD, BPF_SIZE, EXTRA_OFF)
+#define SK_REUSEPORT_LOAD_SK_FIELD(SK_FIELD)				\
+	SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern,		\
+				    struct sock,			\
+				    sk,					\
+				    SK_FIELD)
 
 static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
 					   const struct bpf_insn *si,
@@ -8863,16 +8856,7 @@ static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
 		break;
 
 	case offsetof(struct sk_reuseport_md, ip_protocol):
-		BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE);
-		SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(__sk_flags_offset,
-						    BPF_W, 0);
-		*insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
-		*insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg,
-					SK_FL_PROTO_SHIFT);
-		/* SK_FL_PROTO_MASK and SK_FL_PROTO_SHIFT are endian
-		 * aware.  No further narrowing or masking is needed.
-		 */
-		*target_size = 1;
+		SK_REUSEPORT_LOAD_SK_FIELD(sk_protocol);
 		break;
 
 	case offsetof(struct sk_reuseport_md, data_end):
-- 
2.24.1


^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH net-next v7 02/11] sock: Make sk_protocol a 16-bit value
  2020-01-09 15:59 ` [PATCH net-next v7 02/11] sock: Make sk_protocol a 16-bit value Mat Martineau
@ 2020-01-09 19:05   ` David Miller
  2020-01-09 21:25     ` [MPTCP] " Florian Westphal
  0 siblings, 1 reply; 4+ messages in thread
From: David Miller @ 2020-01-09 19:05 UTC (permalink / raw)
  To: mathew.j.martineau
  Cc: netdev, mptcp, ast, daniel, bpf, pabeni, matthieu.baerts

From: Mat Martineau <mathew.j.martineau@linux.intel.com>
Date: Thu,  9 Jan 2020 07:59:15 -0800

> Match the 16-bit width of skbuff->protocol. Fills an 8-bit hole so
> sizeof(struct sock) does not change.
> 
> Also take care of BPF field access for sk_type/sk_protocol. Both of them
> are now outside the bitfield, so we can use load instructions without
> further shifting/masking.
> 
> v5 -> v6:
>  - update eBPF accessors, too (Intel's kbuild test robot)
> v2 -> v3:
>  - keep 'sk_type' 2 bytes aligned (Eric)
> v1 -> v2:
>  - preserve sk_pacing_shift as bit field (Eric)
> 
> Cc: Alexei Starovoitov <ast@kernel.org>
> Cc: Daniel Borkmann <daniel@iogearbox.net>
> Cc: bpf@vger.kernel.org
> Co-developed-by: Paolo Abeni <pabeni@redhat.com>
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> Co-developed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
> Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
> Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>

This is worrisome for me.

We have lots of places that now are going to be assigning  sk->sk_protocol
into a u8 somewhere else.  A lot of them are ok because limits are enforced
in various places, but for example:

net/ipv6/udp.c:	fl6.flowi6_proto = sk->sk_protocol;
net/l2tp/l2tp_ip6.c:	fl6.flowi6_proto = sk->sk_protocol;

net/ipv6/inet6_connection_sock.c:	fl6->flowi6_proto = sk->sk_protocol;

net/ipv6/af_inet6.c:		fl6.flowi6_proto = sk->sk_protocol;
net/ipv6/datagram.c:	fl6->flowi6_proto = sk->sk_protocol;

This is one just one small example situation, where flowi6_proto is a u8.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [MPTCP] Re: [PATCH net-next v7 02/11] sock: Make sk_protocol a 16-bit value
  2020-01-09 19:05   ` David Miller
@ 2020-01-09 21:25     ` Florian Westphal
  2020-01-10  2:43       ` David Miller
  0 siblings, 1 reply; 4+ messages in thread
From: Florian Westphal @ 2020-01-09 21:25 UTC (permalink / raw)
  To: David Miller; +Cc: mathew.j.martineau, netdev, mptcp, ast, daniel, bpf

David Miller <davem@davemloft.net> wrote:
> From: Mat Martineau <mathew.j.martineau@linux.intel.com>
> Date: Thu,  9 Jan 2020 07:59:15 -0800
> 
> > Match the 16-bit width of skbuff->protocol. Fills an 8-bit hole so
> > sizeof(struct sock) does not change.
> > 
> > Also take care of BPF field access for sk_type/sk_protocol. Both of them
> > are now outside the bitfield, so we can use load instructions without
> > further shifting/masking.
> > 
> > v5 -> v6:
> >  - update eBPF accessors, too (Intel's kbuild test robot)
> > v2 -> v3:
> >  - keep 'sk_type' 2 bytes aligned (Eric)
> > v1 -> v2:
> >  - preserve sk_pacing_shift as bit field (Eric)
> > 
> > Cc: Alexei Starovoitov <ast@kernel.org>
> > Cc: Daniel Borkmann <daniel@iogearbox.net>
> > Cc: bpf@vger.kernel.org
> > Co-developed-by: Paolo Abeni <pabeni@redhat.com>
> > Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> > Co-developed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
> > Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
> > Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
> 
> This is worrisome for me.
> 
> We have lots of places that now are going to be assigning  sk->sk_protocol
> into a u8 somewhere else.  A lot of them are ok because limits are enforced
> in various places, but for example:
> 
> net/ipv6/udp.c:	fl6.flowi6_proto = sk->sk_protocol;
> net/l2tp/l2tp_ip6.c:	fl6.flowi6_proto = sk->sk_protocol;
> 
> net/ipv6/inet6_connection_sock.c:	fl6->flowi6_proto = sk->sk_protocol;
> 
> net/ipv6/af_inet6.c:		fl6.flowi6_proto = sk->sk_protocol;
> net/ipv6/datagram.c:	fl6->flowi6_proto = sk->sk_protocol;
> 
> This is one just one small example situation, where flowi6_proto is a u8.

There are parts in the stack (e.g. in setsockopt code paths) that test
sk->sk_protocol vs. IPPROTO_TCP, then call tcp specific code under the sane
assumption that sk is a tcp_sock struct.

With 8bit sk_protocol, mptcp_sock structs (which is what kernel gets via
file descriptor number) would be treated as a tcp socket, because
"IPPROTO_MPTCP & 0xff" yields IPPROTO_TCP.

Changing IPPROTO_MPTCP to a value <= 255 could lead to conflicts with
real inet protocols in the future, so we can't redefine it to a 8bit
value.

If we keep sk_protocol as 8bit field, we will need to make sure that all
places testing sk_protocol == IPPROTO_TCP gain an additional sanity check
to tell tcp and mptcp sockets apart.  Moreover, any further changes to
kernel code would need same extra test, so this is a non-starter to me.

Alternatively we could change the first member of mptcp_sk struct from
inet_connection_sock to a full tcp_sock struct.  Thats roughly 1k increase
of mptcp_sock struct to ~ 3744 bytes, but then we would not have to
worry about mptcp sockets ending up in tcp code paths.

If you think such a size increase is ok I could give that solution a shot
and see what other problems with 8bit sk_protocol might remain.

Mat reported /sys/kernel/debug/tracing/trace lists mptcp sockets as
IPPROTO_TCP in the '8 bit sk_protocol' case, but if thats the only issue
this might have a smaller/acceptable "avoidance fix".

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [MPTCP] Re: [PATCH net-next v7 02/11] sock: Make sk_protocol a 16-bit value
  2020-01-09 21:25     ` [MPTCP] " Florian Westphal
@ 2020-01-10  2:43       ` David Miller
  0 siblings, 0 replies; 4+ messages in thread
From: David Miller @ 2020-01-10  2:43 UTC (permalink / raw)
  To: fw; +Cc: mathew.j.martineau, netdev, mptcp, ast, daniel, bpf

From: Florian Westphal <fw@strlen.de>
Date: Thu, 9 Jan 2020 22:25:28 +0100

> If you think such a size increase is ok I could give that solution a shot
> and see what other problems with 8bit sk_protocol might remain.
> 
> Mat reported /sys/kernel/debug/tracing/trace lists mptcp sockets as
> IPPROTO_TCP in the '8 bit sk_protocol' case, but if thats the only issue
> this might have a smaller/acceptable "avoidance fix".

Ok I'll apply the current series as-is with the 16-bit sk_protocol.  Let's
see how this goes in practice.

Thanks for explaining.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2020-01-10  2:43 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20200109155924.30122-1-mathew.j.martineau@linux.intel.com>
2020-01-09 15:59 ` [PATCH net-next v7 02/11] sock: Make sk_protocol a 16-bit value Mat Martineau
2020-01-09 19:05   ` David Miller
2020-01-09 21:25     ` [MPTCP] " Florian Westphal
2020-01-10  2:43       ` David Miller

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).