* [PATCH net-next v7 02/11] sock: Make sk_protocol a 16-bit value
[not found] <20200109155924.30122-1-mathew.j.martineau@linux.intel.com>
@ 2020-01-09 15:59 ` Mat Martineau
2020-01-09 19:05 ` David Miller
0 siblings, 1 reply; 4+ messages in thread
From: Mat Martineau @ 2020-01-09 15:59 UTC (permalink / raw)
To: netdev, mptcp
Cc: Mat Martineau, Alexei Starovoitov, Daniel Borkmann, bpf,
Paolo Abeni, Matthieu Baerts
Match the 16-bit width of skbuff->protocol. Fills an 8-bit hole so
sizeof(struct sock) does not change.
Also take care of BPF field access for sk_type/sk_protocol. Both of them
are now outside the bitfield, so we can use load instructions without
further shifting/masking.
v5 -> v6:
- update eBPF accessors, too (Intel's kbuild test robot)
v2 -> v3:
- keep 'sk_type' 2 bytes aligned (Eric)
v1 -> v2:
- preserve sk_pacing_shift as bit field (Eric)
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: bpf@vger.kernel.org
Co-developed-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Co-developed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
---
include/net/sock.h | 25 ++++------------
include/trace/events/sock.h | 2 +-
net/core/filter.c | 60 ++++++++++++++-----------------------
3 files changed, 28 insertions(+), 59 deletions(-)
diff --git a/include/net/sock.h b/include/net/sock.h
index 091e55428415..8766f9bc3e70 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -436,30 +436,15 @@ struct sock {
* Because of non atomicity rules, all
* changes are protected by socket lock.
*/
- unsigned int __sk_flags_offset[0];
-#ifdef __BIG_ENDIAN_BITFIELD
-#define SK_FL_PROTO_SHIFT 16
-#define SK_FL_PROTO_MASK 0x00ff0000
-
-#define SK_FL_TYPE_SHIFT 0
-#define SK_FL_TYPE_MASK 0x0000ffff
-#else
-#define SK_FL_PROTO_SHIFT 8
-#define SK_FL_PROTO_MASK 0x0000ff00
-
-#define SK_FL_TYPE_SHIFT 16
-#define SK_FL_TYPE_MASK 0xffff0000
-#endif
-
- unsigned int sk_padding : 1,
+ u8 sk_padding : 1,
sk_kern_sock : 1,
sk_no_check_tx : 1,
sk_no_check_rx : 1,
- sk_userlocks : 4,
- sk_protocol : 8,
- sk_type : 16;
- u16 sk_gso_max_segs;
+ sk_userlocks : 4;
u8 sk_pacing_shift;
+ u16 sk_type;
+ u16 sk_protocol;
+ u16 sk_gso_max_segs;
unsigned long sk_lingertime;
struct proto *sk_prot_creator;
rwlock_t sk_callback_lock;
diff --git a/include/trace/events/sock.h b/include/trace/events/sock.h
index 51fe9f6719eb..3ff12b90048d 100644
--- a/include/trace/events/sock.h
+++ b/include/trace/events/sock.h
@@ -147,7 +147,7 @@ TRACE_EVENT(inet_sock_set_state,
__field(__u16, sport)
__field(__u16, dport)
__field(__u16, family)
- __field(__u8, protocol)
+ __field(__u16, protocol)
__array(__u8, saddr, 4)
__array(__u8, daddr, 4)
__array(__u8, saddr_v6, 16)
diff --git a/net/core/filter.c b/net/core/filter.c
index 42fd17c48c5f..ef01c5599501 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -7607,21 +7607,21 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct bpf_sock, type):
- BUILD_BUG_ON(HWEIGHT32(SK_FL_TYPE_MASK) != BITS_PER_BYTE * 2);
- *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
- offsetof(struct sock, __sk_flags_offset));
- *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
- *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
- *target_size = 2;
+ *insn++ = BPF_LDX_MEM(
+ BPF_FIELD_SIZEOF(struct sock, sk_type),
+ si->dst_reg, si->src_reg,
+ bpf_target_off(struct sock, sk_type,
+ sizeof_field(struct sock, sk_type),
+ target_size));
break;
case offsetof(struct bpf_sock, protocol):
- BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE);
- *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg,
- offsetof(struct sock, __sk_flags_offset));
- *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
- *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT);
- *target_size = 1;
+ *insn++ = BPF_LDX_MEM(
+ BPF_FIELD_SIZEOF(struct sock, sk_protocol),
+ si->dst_reg, si->src_reg,
+ bpf_target_off(struct sock, sk_protocol,
+ sizeof_field(struct sock, sk_protocol),
+ target_size));
break;
case offsetof(struct bpf_sock, src_ip4):
@@ -7903,20 +7903,13 @@ static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct bpf_sock_addr, type):
- SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(
- struct bpf_sock_addr_kern, struct sock, sk,
- __sk_flags_offset, BPF_W, 0);
- *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK);
- *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT);
+ SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
+ struct sock, sk, sk_type);
break;
case offsetof(struct bpf_sock_addr, protocol):
- SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(
- struct bpf_sock_addr_kern, struct sock, sk,
- __sk_flags_offset, BPF_W, 0);
- *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
- *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg,
- SK_FL_PROTO_SHIFT);
+ SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern,
+ struct sock, sk, sk_protocol);
break;
case offsetof(struct bpf_sock_addr, msg_src_ip4):
@@ -8835,11 +8828,11 @@ sk_reuseport_is_valid_access(int off, int size,
skb, \
SKB_FIELD)
-#define SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(SK_FIELD, BPF_SIZE, EXTRA_OFF) \
- SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(struct sk_reuseport_kern, \
- struct sock, \
- sk, \
- SK_FIELD, BPF_SIZE, EXTRA_OFF)
+#define SK_REUSEPORT_LOAD_SK_FIELD(SK_FIELD) \
+ SOCK_ADDR_LOAD_NESTED_FIELD(struct sk_reuseport_kern, \
+ struct sock, \
+ sk, \
+ SK_FIELD)
static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
const struct bpf_insn *si,
@@ -8863,16 +8856,7 @@ static u32 sk_reuseport_convert_ctx_access(enum bpf_access_type type,
break;
case offsetof(struct sk_reuseport_md, ip_protocol):
- BUILD_BUG_ON(HWEIGHT32(SK_FL_PROTO_MASK) != BITS_PER_BYTE);
- SK_REUSEPORT_LOAD_SK_FIELD_SIZE_OFF(__sk_flags_offset,
- BPF_W, 0);
- *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK);
- *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg,
- SK_FL_PROTO_SHIFT);
- /* SK_FL_PROTO_MASK and SK_FL_PROTO_SHIFT are endian
- * aware. No further narrowing or masking is needed.
- */
- *target_size = 1;
+ SK_REUSEPORT_LOAD_SK_FIELD(sk_protocol);
break;
case offsetof(struct sk_reuseport_md, data_end):
--
2.24.1
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH net-next v7 02/11] sock: Make sk_protocol a 16-bit value
2020-01-09 15:59 ` [PATCH net-next v7 02/11] sock: Make sk_protocol a 16-bit value Mat Martineau
@ 2020-01-09 19:05 ` David Miller
2020-01-09 21:25 ` [MPTCP] " Florian Westphal
0 siblings, 1 reply; 4+ messages in thread
From: David Miller @ 2020-01-09 19:05 UTC (permalink / raw)
To: mathew.j.martineau
Cc: netdev, mptcp, ast, daniel, bpf, pabeni, matthieu.baerts
From: Mat Martineau <mathew.j.martineau@linux.intel.com>
Date: Thu, 9 Jan 2020 07:59:15 -0800
> Match the 16-bit width of skbuff->protocol. Fills an 8-bit hole so
> sizeof(struct sock) does not change.
>
> Also take care of BPF field access for sk_type/sk_protocol. Both of them
> are now outside the bitfield, so we can use load instructions without
> further shifting/masking.
>
> v5 -> v6:
> - update eBPF accessors, too (Intel's kbuild test robot)
> v2 -> v3:
> - keep 'sk_type' 2 bytes aligned (Eric)
> v1 -> v2:
> - preserve sk_pacing_shift as bit field (Eric)
>
> Cc: Alexei Starovoitov <ast@kernel.org>
> Cc: Daniel Borkmann <daniel@iogearbox.net>
> Cc: bpf@vger.kernel.org
> Co-developed-by: Paolo Abeni <pabeni@redhat.com>
> Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> Co-developed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
> Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
> Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
This is worrisome for me.
We have lots of places that now are going to be assigning sk->sk_protocol
into a u8 somewhere else. A lot of them are ok because limits are enforced
in various places, but for example:
net/ipv6/udp.c: fl6.flowi6_proto = sk->sk_protocol;
net/l2tp/l2tp_ip6.c: fl6.flowi6_proto = sk->sk_protocol;
net/ipv6/inet6_connection_sock.c: fl6->flowi6_proto = sk->sk_protocol;
net/ipv6/af_inet6.c: fl6.flowi6_proto = sk->sk_protocol;
net/ipv6/datagram.c: fl6->flowi6_proto = sk->sk_protocol;
This is one just one small example situation, where flowi6_proto is a u8.
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [MPTCP] Re: [PATCH net-next v7 02/11] sock: Make sk_protocol a 16-bit value
2020-01-09 19:05 ` David Miller
@ 2020-01-09 21:25 ` Florian Westphal
2020-01-10 2:43 ` David Miller
0 siblings, 1 reply; 4+ messages in thread
From: Florian Westphal @ 2020-01-09 21:25 UTC (permalink / raw)
To: David Miller; +Cc: mathew.j.martineau, netdev, mptcp, ast, daniel, bpf
David Miller <davem@davemloft.net> wrote:
> From: Mat Martineau <mathew.j.martineau@linux.intel.com>
> Date: Thu, 9 Jan 2020 07:59:15 -0800
>
> > Match the 16-bit width of skbuff->protocol. Fills an 8-bit hole so
> > sizeof(struct sock) does not change.
> >
> > Also take care of BPF field access for sk_type/sk_protocol. Both of them
> > are now outside the bitfield, so we can use load instructions without
> > further shifting/masking.
> >
> > v5 -> v6:
> > - update eBPF accessors, too (Intel's kbuild test robot)
> > v2 -> v3:
> > - keep 'sk_type' 2 bytes aligned (Eric)
> > v1 -> v2:
> > - preserve sk_pacing_shift as bit field (Eric)
> >
> > Cc: Alexei Starovoitov <ast@kernel.org>
> > Cc: Daniel Borkmann <daniel@iogearbox.net>
> > Cc: bpf@vger.kernel.org
> > Co-developed-by: Paolo Abeni <pabeni@redhat.com>
> > Signed-off-by: Paolo Abeni <pabeni@redhat.com>
> > Co-developed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
> > Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
> > Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
>
> This is worrisome for me.
>
> We have lots of places that now are going to be assigning sk->sk_protocol
> into a u8 somewhere else. A lot of them are ok because limits are enforced
> in various places, but for example:
>
> net/ipv6/udp.c: fl6.flowi6_proto = sk->sk_protocol;
> net/l2tp/l2tp_ip6.c: fl6.flowi6_proto = sk->sk_protocol;
>
> net/ipv6/inet6_connection_sock.c: fl6->flowi6_proto = sk->sk_protocol;
>
> net/ipv6/af_inet6.c: fl6.flowi6_proto = sk->sk_protocol;
> net/ipv6/datagram.c: fl6->flowi6_proto = sk->sk_protocol;
>
> This is one just one small example situation, where flowi6_proto is a u8.
There are parts in the stack (e.g. in setsockopt code paths) that test
sk->sk_protocol vs. IPPROTO_TCP, then call tcp specific code under the sane
assumption that sk is a tcp_sock struct.
With 8bit sk_protocol, mptcp_sock structs (which is what kernel gets via
file descriptor number) would be treated as a tcp socket, because
"IPPROTO_MPTCP & 0xff" yields IPPROTO_TCP.
Changing IPPROTO_MPTCP to a value <= 255 could lead to conflicts with
real inet protocols in the future, so we can't redefine it to a 8bit
value.
If we keep sk_protocol as 8bit field, we will need to make sure that all
places testing sk_protocol == IPPROTO_TCP gain an additional sanity check
to tell tcp and mptcp sockets apart. Moreover, any further changes to
kernel code would need same extra test, so this is a non-starter to me.
Alternatively we could change the first member of mptcp_sk struct from
inet_connection_sock to a full tcp_sock struct. Thats roughly 1k increase
of mptcp_sock struct to ~ 3744 bytes, but then we would not have to
worry about mptcp sockets ending up in tcp code paths.
If you think such a size increase is ok I could give that solution a shot
and see what other problems with 8bit sk_protocol might remain.
Mat reported /sys/kernel/debug/tracing/trace lists mptcp sockets as
IPPROTO_TCP in the '8 bit sk_protocol' case, but if thats the only issue
this might have a smaller/acceptable "avoidance fix".
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [MPTCP] Re: [PATCH net-next v7 02/11] sock: Make sk_protocol a 16-bit value
2020-01-09 21:25 ` [MPTCP] " Florian Westphal
@ 2020-01-10 2:43 ` David Miller
0 siblings, 0 replies; 4+ messages in thread
From: David Miller @ 2020-01-10 2:43 UTC (permalink / raw)
To: fw; +Cc: mathew.j.martineau, netdev, mptcp, ast, daniel, bpf
From: Florian Westphal <fw@strlen.de>
Date: Thu, 9 Jan 2020 22:25:28 +0100
> If you think such a size increase is ok I could give that solution a shot
> and see what other problems with 8bit sk_protocol might remain.
>
> Mat reported /sys/kernel/debug/tracing/trace lists mptcp sockets as
> IPPROTO_TCP in the '8 bit sk_protocol' case, but if thats the only issue
> this might have a smaller/acceptable "avoidance fix".
Ok I'll apply the current series as-is with the 16-bit sk_protocol. Let's
see how this goes in practice.
Thanks for explaining.
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2020-01-10 2:43 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
[not found] <20200109155924.30122-1-mathew.j.martineau@linux.intel.com>
2020-01-09 15:59 ` [PATCH net-next v7 02/11] sock: Make sk_protocol a 16-bit value Mat Martineau
2020-01-09 19:05 ` David Miller
2020-01-09 21:25 ` [MPTCP] " Florian Westphal
2020-01-10 2:43 ` David Miller
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).