All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next v2 0/4] packet: add cBPF and eBPF fanout modes
@ 2015-08-15  2:31 Willem de Bruijn
  2015-08-15  2:31 ` [PATCH net-next v2 1/4] packet: add classic BPF fanout mode Willem de Bruijn
                   ` (4 more replies)
  0 siblings, 5 replies; 11+ messages in thread
From: Willem de Bruijn @ 2015-08-15  2:31 UTC (permalink / raw)
  To: netdev; +Cc: davem, edumazet, daniel, ast, Willem de Bruijn

From: Willem de Bruijn <willemb@google.com>

Allow programmable fanout modes. Support both classical BPF programs
passed directly and extended BPF programs passed by file descriptor.

One use case is packet steering by deep packet inspection, for
instance for packet steering by application layer header fields.

Separate the configuration of the fanout mode and the configuration
of the program, to allow dynamic updates to the latter at runtime.

Changes
  v1 -> v2:
    - follow SO_LOCK_FILTER semantics on filter updates
    - only accept eBPF programs of type BPF_PROG_TYPE_SOCKET_FILTER
    - rename PACKET_FANOUT_BPF to PACKET_FANOUT_CBPF to match
      man 2 bpf usage: "classic" vs. "extended" BPF.

Willem de Bruijn (4):
  packet: add classic BPF fanout mode
  packet: add extended BPF fanout mode
  selftests/net: test classic bpf fanout mode
  selftests/net: test extended BPF fanout mode

 include/uapi/linux/if_packet.h             |   3 +
 net/packet/af_packet.c                     | 130 ++++++++++++++++++++++++++++-
 net/packet/internal.h                      |   5 +-
 tools/testing/selftests/net/psock_fanout.c |  69 ++++++++++++++-
 tools/testing/selftests/net/psock_lib.h    |  29 +++++--
 5 files changed, 222 insertions(+), 14 deletions(-)

-- 
2.5.0.276.gf5e568e

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH net-next v2 1/4] packet: add classic BPF fanout mode
  2015-08-15  2:31 [PATCH net-next v2 0/4] packet: add cBPF and eBPF fanout modes Willem de Bruijn
@ 2015-08-15  2:31 ` Willem de Bruijn
  2015-08-16  4:58   ` Alexei Starovoitov
                     ` (2 more replies)
  2015-08-15  2:31 ` [PATCH net-next v2 2/4] packet: add extended " Willem de Bruijn
                   ` (3 subsequent siblings)
  4 siblings, 3 replies; 11+ messages in thread
From: Willem de Bruijn @ 2015-08-15  2:31 UTC (permalink / raw)
  To: netdev; +Cc: davem, edumazet, daniel, ast, Willem de Bruijn

From: Willem de Bruijn <willemb@google.com>

Add fanout mode PACKET_FANOUT_CBPF that accepts a classic BPF program
to select a socket.

This avoids having to keep adding special case fanout modes. One
example use case is application layer load balancing. The QUIC
protocol, for instance, encodes a connection ID in UDP payload.

Also add socket option SOL_PACKET/PACKET_FANOUT_DATA that updates data
associated with the socket group. Fanout mode PACKET_FANOUT_CBPF is the
only user so far.

Signed-off-by: Willem de Bruijn <willemb@google.com>
---
 include/uapi/linux/if_packet.h |  2 +
 net/packet/af_packet.c         | 99 +++++++++++++++++++++++++++++++++++++++++-
 net/packet/internal.h          |  5 ++-
 3 files changed, 104 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h
index d3d715f8c..a4bb16f 100644
--- a/include/uapi/linux/if_packet.h
+++ b/include/uapi/linux/if_packet.h
@@ -55,6 +55,7 @@ struct sockaddr_ll {
 #define PACKET_TX_HAS_OFF		19
 #define PACKET_QDISC_BYPASS		20
 #define PACKET_ROLLOVER_STATS		21
+#define PACKET_FANOUT_DATA		22
 
 #define PACKET_FANOUT_HASH		0
 #define PACKET_FANOUT_LB		1
@@ -62,6 +63,7 @@ struct sockaddr_ll {
 #define PACKET_FANOUT_ROLLOVER		3
 #define PACKET_FANOUT_RND		4
 #define PACKET_FANOUT_QM		5
+#define PACKET_FANOUT_CBPF		6
 #define PACKET_FANOUT_FLAG_ROLLOVER	0x1000
 #define PACKET_FANOUT_FLAG_DEFRAG	0x8000
 
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index b5afe53..8869d07 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -92,6 +92,7 @@
 #ifdef CONFIG_INET
 #include <net/inet_common.h>
 #endif
+#include <linux/bpf.h>
 
 #include "internal.h"
 
@@ -1410,6 +1411,22 @@ static unsigned int fanout_demux_qm(struct packet_fanout *f,
 	return skb_get_queue_mapping(skb) % num;
 }
 
+static unsigned int fanout_demux_bpf(struct packet_fanout *f,
+				     struct sk_buff *skb,
+				     unsigned int num)
+{
+	struct bpf_prog *prog;
+	unsigned int ret = 0;
+
+	rcu_read_lock();
+	prog = rcu_dereference(f->bpf_prog);
+	if (prog)
+		ret = BPF_PROG_RUN(prog, skb) % num;
+	rcu_read_unlock();
+
+	return ret;
+}
+
 static bool fanout_has_flag(struct packet_fanout *f, u16 flag)
 {
 	return f->flags & (flag >> 8);
@@ -1454,6 +1471,9 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
 	case PACKET_FANOUT_ROLLOVER:
 		idx = fanout_demux_rollover(f, skb, 0, false, num);
 		break;
+	case PACKET_FANOUT_CBPF:
+		idx = fanout_demux_bpf(f, skb, num);
+		break;
 	}
 
 	if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER))
@@ -1502,6 +1522,74 @@ static bool match_fanout_group(struct packet_type *ptype, struct sock *sk)
 	return false;
 }
 
+static void fanout_init_data(struct packet_fanout *f)
+{
+	switch (f->type) {
+	case PACKET_FANOUT_LB:
+		atomic_set(&f->rr_cur, 0);
+		break;
+	case PACKET_FANOUT_CBPF:
+		RCU_INIT_POINTER(f->bpf_prog, NULL);
+		break;
+	}
+}
+
+static void __fanout_set_data_bpf(struct packet_fanout *f, struct bpf_prog *new)
+{
+	struct bpf_prog *old;
+
+	spin_lock(&f->lock);
+	old = rcu_dereference_protected(f->bpf_prog, lockdep_is_held(&f->lock));
+	rcu_assign_pointer(f->bpf_prog, new);
+	spin_unlock(&f->lock);
+
+	if (old) {
+		synchronize_net();
+		bpf_prog_destroy(old);
+	}
+}
+
+static int fanout_set_data_cbpf(struct packet_sock *po, char __user *data,
+				unsigned int len)
+{
+	struct bpf_prog *new;
+	struct sock_fprog fprog;
+	int ret;
+
+	if (sock_flag(&po->sk, SOCK_FILTER_LOCKED))
+		return -EPERM;
+	if (len != sizeof(fprog))
+		return -EINVAL;
+	if (copy_from_user(&fprog, data, len))
+		return -EFAULT;
+
+	ret = bpf_prog_create_from_user(&new, &fprog, NULL);
+	if (ret)
+		return ret;
+
+	__fanout_set_data_bpf(po->fanout, new);
+	return 0;
+}
+
+static int fanout_set_data(struct packet_sock *po, char __user *data,
+			   unsigned int len)
+{
+	switch (po->fanout->type) {
+	case PACKET_FANOUT_CBPF:
+		return fanout_set_data_cbpf(po, data, len);
+	default:
+		return -EINVAL;
+	};
+}
+
+static void fanout_release_data(struct packet_fanout *f)
+{
+	switch (f->type) {
+	case PACKET_FANOUT_CBPF:
+		__fanout_set_data_bpf(f, NULL);
+	};
+}
+
 static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 {
 	struct packet_sock *po = pkt_sk(sk);
@@ -1519,6 +1607,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 	case PACKET_FANOUT_CPU:
 	case PACKET_FANOUT_RND:
 	case PACKET_FANOUT_QM:
+	case PACKET_FANOUT_CBPF:
 		break;
 	default:
 		return -EINVAL;
@@ -1561,10 +1650,10 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 		match->id = id;
 		match->type = type;
 		match->flags = flags;
-		atomic_set(&match->rr_cur, 0);
 		INIT_LIST_HEAD(&match->list);
 		spin_lock_init(&match->lock);
 		atomic_set(&match->sk_ref, 0);
+		fanout_init_data(match);
 		match->prot_hook.type = po->prot_hook.type;
 		match->prot_hook.dev = po->prot_hook.dev;
 		match->prot_hook.func = packet_rcv_fanout;
@@ -1610,6 +1699,7 @@ static void fanout_release(struct sock *sk)
 	if (atomic_dec_and_test(&f->sk_ref)) {
 		list_del(&f->list);
 		dev_remove_pack(&f->prot_hook);
+		fanout_release_data(f);
 		kfree(f);
 	}
 	mutex_unlock(&fanout_mutex);
@@ -3529,6 +3619,13 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 
 		return fanout_add(sk, val & 0xffff, val >> 16);
 	}
+	case PACKET_FANOUT_DATA:
+	{
+		if (!po->fanout)
+			return -EINVAL;
+
+		return fanout_set_data(po, optval, optlen);
+	}
 	case PACKET_TX_HAS_OFF:
 	{
 		unsigned int val;
diff --git a/net/packet/internal.h b/net/packet/internal.h
index e20b3e8..9ee4631 100644
--- a/net/packet/internal.h
+++ b/net/packet/internal.h
@@ -79,7 +79,10 @@ struct packet_fanout {
 	u16			id;
 	u8			type;
 	u8			flags;
-	atomic_t		rr_cur;
+	union {
+		atomic_t		rr_cur;
+		struct bpf_prog __rcu	*bpf_prog;
+	};
 	struct list_head	list;
 	struct sock		*arr[PACKET_FANOUT_MAX];
 	spinlock_t		lock;
-- 
2.5.0.276.gf5e568e

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH net-next v2 2/4] packet: add extended BPF fanout mode
  2015-08-15  2:31 [PATCH net-next v2 0/4] packet: add cBPF and eBPF fanout modes Willem de Bruijn
  2015-08-15  2:31 ` [PATCH net-next v2 1/4] packet: add classic BPF fanout mode Willem de Bruijn
@ 2015-08-15  2:31 ` Willem de Bruijn
  2015-08-16  4:58   ` Alexei Starovoitov
  2015-08-16  9:15   ` Daniel Borkmann
  2015-08-15  2:31 ` [PATCH net-next v2 3/4] selftests/net: test classic bpf " Willem de Bruijn
                   ` (2 subsequent siblings)
  4 siblings, 2 replies; 11+ messages in thread
From: Willem de Bruijn @ 2015-08-15  2:31 UTC (permalink / raw)
  To: netdev; +Cc: davem, edumazet, daniel, ast, Willem de Bruijn

From: Willem de Bruijn <willemb@google.com>

Add fanout mode PACKET_FANOUT_EBPF that accepts an en extended BPF
program to select a socket.

Update the internal eBPF program by passing to socket option
SOL_PACKET/PACKET_FANOUT_DATA a file descriptor returned by bpf().

Signed-off-by: Willem de Bruijn <willemb@google.com>
---
 include/uapi/linux/if_packet.h |  1 +
 net/packet/af_packet.c         | 31 +++++++++++++++++++++++++++++++
 2 files changed, 32 insertions(+)

diff --git a/include/uapi/linux/if_packet.h b/include/uapi/linux/if_packet.h
index a4bb16f..9e7edfd 100644
--- a/include/uapi/linux/if_packet.h
+++ b/include/uapi/linux/if_packet.h
@@ -64,6 +64,7 @@ struct sockaddr_ll {
 #define PACKET_FANOUT_RND		4
 #define PACKET_FANOUT_QM		5
 #define PACKET_FANOUT_CBPF		6
+#define PACKET_FANOUT_EBPF		7
 #define PACKET_FANOUT_FLAG_ROLLOVER	0x1000
 #define PACKET_FANOUT_FLAG_DEFRAG	0x8000
 
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 8869d07..7b8e39a 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1472,6 +1472,7 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev,
 		idx = fanout_demux_rollover(f, skb, 0, false, num);
 		break;
 	case PACKET_FANOUT_CBPF:
+	case PACKET_FANOUT_EBPF:
 		idx = fanout_demux_bpf(f, skb, num);
 		break;
 	}
@@ -1529,6 +1530,7 @@ static void fanout_init_data(struct packet_fanout *f)
 		atomic_set(&f->rr_cur, 0);
 		break;
 	case PACKET_FANOUT_CBPF:
+	case PACKET_FANOUT_EBPF:
 		RCU_INIT_POINTER(f->bpf_prog, NULL);
 		break;
 	}
@@ -1571,12 +1573,39 @@ static int fanout_set_data_cbpf(struct packet_sock *po, char __user *data,
 	return 0;
 }
 
+static int fanout_set_data_ebpf(struct packet_sock *po, char __user *data,
+				unsigned int len)
+{
+	struct bpf_prog *new;
+	u32 fd;
+
+	if (sock_flag(&po->sk, SOCK_FILTER_LOCKED))
+		return -EPERM;
+	if (len != sizeof(fd))
+		return -EINVAL;
+	if (copy_from_user(&fd, data, len))
+		return -EFAULT;
+
+	new = bpf_prog_get(fd);
+	if (IS_ERR(new))
+		return PTR_ERR(new);
+	if (new->type != BPF_PROG_TYPE_SOCKET_FILTER) {
+		bpf_prog_put(new);
+		return -EINVAL;
+	}
+
+	__fanout_set_data_bpf(po->fanout, new);
+	return 0;
+}
+
 static int fanout_set_data(struct packet_sock *po, char __user *data,
 			   unsigned int len)
 {
 	switch (po->fanout->type) {
 	case PACKET_FANOUT_CBPF:
 		return fanout_set_data_cbpf(po, data, len);
+	case PACKET_FANOUT_EBPF:
+		return fanout_set_data_ebpf(po, data, len);
 	default:
 		return -EINVAL;
 	};
@@ -1586,6 +1615,7 @@ static void fanout_release_data(struct packet_fanout *f)
 {
 	switch (f->type) {
 	case PACKET_FANOUT_CBPF:
+	case PACKET_FANOUT_EBPF:
 		__fanout_set_data_bpf(f, NULL);
 	};
 }
@@ -1608,6 +1638,7 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags)
 	case PACKET_FANOUT_RND:
 	case PACKET_FANOUT_QM:
 	case PACKET_FANOUT_CBPF:
+	case PACKET_FANOUT_EBPF:
 		break;
 	default:
 		return -EINVAL;
-- 
2.5.0.276.gf5e568e

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH net-next v2 3/4] selftests/net: test classic bpf fanout mode
  2015-08-15  2:31 [PATCH net-next v2 0/4] packet: add cBPF and eBPF fanout modes Willem de Bruijn
  2015-08-15  2:31 ` [PATCH net-next v2 1/4] packet: add classic BPF fanout mode Willem de Bruijn
  2015-08-15  2:31 ` [PATCH net-next v2 2/4] packet: add extended " Willem de Bruijn
@ 2015-08-15  2:31 ` Willem de Bruijn
  2015-08-15  2:31 ` [PATCH net-next v2 4/4] selftests/net: test extended BPF " Willem de Bruijn
  2015-08-17 21:23 ` [PATCH net-next v2 0/4] packet: add cBPF and eBPF fanout modes David Miller
  4 siblings, 0 replies; 11+ messages in thread
From: Willem de Bruijn @ 2015-08-15  2:31 UTC (permalink / raw)
  To: netdev; +Cc: davem, edumazet, daniel, ast, Willem de Bruijn

From: Willem de Bruijn <willemb@google.com>

Test PACKET_FANOUT_CBPF by inserting a cBPF program that selects a
socket by payload. Requires modifying the test program to send
packets with multiple payloads.

Also fix a bug in testing the return value of mmap()

Signed-off-by: Willem de Bruijn <willemb@google.com>
---
 tools/testing/selftests/net/psock_fanout.c | 16 ++++++++++++----
 tools/testing/selftests/net/psock_lib.h    | 29 +++++++++++++++++++++--------
 2 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
index 08c2a36..baf46a2 100644
--- a/tools/testing/selftests/net/psock_fanout.c
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -19,6 +19,7 @@
  *   - PACKET_FANOUT_LB
  *   - PACKET_FANOUT_CPU
  *   - PACKET_FANOUT_ROLLOVER
+ *   - PACKET_FANOUT_CBPF
  *
  * Todo:
  * - functionality: PACKET_FANOUT_FLAG_DEFRAG
@@ -115,8 +116,8 @@ static char *sock_fanout_open_ring(int fd)
 
 	ring = mmap(0, req.tp_block_size * req.tp_block_nr,
 		    PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
-	if (!ring) {
-		fprintf(stderr, "packetsock ring mmap\n");
+	if (ring == MAP_FAILED) {
+		perror("packetsock ring mmap");
 		exit(1);
 	}
 
@@ -209,6 +210,7 @@ static int test_datapath(uint16_t typeflags, int port_off,
 {
 	const int expect0[] = { 0, 0 };
 	char *rings[2];
+	uint8_t type = typeflags & 0xFF;
 	int fds[2], fds_udp[2][2], ret;
 
 	fprintf(stderr, "test: datapath 0x%hx\n", typeflags);
@@ -219,6 +221,9 @@ static int test_datapath(uint16_t typeflags, int port_off,
 		fprintf(stderr, "ERROR: failed open\n");
 		exit(1);
 	}
+	if (type == PACKET_FANOUT_CBPF)
+		sock_setfilter(fds[0], SOL_PACKET, PACKET_FANOUT_DATA);
+
 	rings[0] = sock_fanout_open_ring(fds[0]);
 	rings[1] = sock_fanout_open_ring(fds[1]);
 	pair_udp_open(fds_udp[0], PORT_BASE);
@@ -227,11 +232,11 @@ static int test_datapath(uint16_t typeflags, int port_off,
 
 	/* Send data, but not enough to overflow a queue */
 	pair_udp_send(fds_udp[0], 15);
-	pair_udp_send(fds_udp[1], 5);
+	pair_udp_send_char(fds_udp[1], 5, DATA_CHAR_1);
 	ret = sock_fanout_read(fds, rings, expect1);
 
 	/* Send more data, overflow the queue */
-	pair_udp_send(fds_udp[0], 15);
+	pair_udp_send_char(fds_udp[0], 15, DATA_CHAR_1);
 	/* TODO: ensure consistent order between expect1 and expect2 */
 	ret |= sock_fanout_read(fds, rings, expect2);
 
@@ -275,6 +280,7 @@ int main(int argc, char **argv)
 	const int expect_rb[2][2]	= { { 15, 5 },  { 20, 15 } };
 	const int expect_cpu0[2][2]	= { { 20, 0 },  { 20, 0 } };
 	const int expect_cpu1[2][2]	= { { 0, 20 },  { 0, 20 } };
+	const int expect_bpf[2][2]	= { { 15, 5 },  { 15, 20 } };
 	int port_off = 2, tries = 5, ret;
 
 	test_control_single();
@@ -295,6 +301,8 @@ int main(int argc, char **argv)
 			     port_off, expect_lb[0], expect_lb[1]);
 	ret |= test_datapath(PACKET_FANOUT_ROLLOVER,
 			     port_off, expect_rb[0], expect_rb[1]);
+	ret |= test_datapath(PACKET_FANOUT_CBPF,
+			     port_off, expect_bpf[0], expect_bpf[1]);
 
 	set_cpuaffinity(0);
 	ret |= test_datapath(PACKET_FANOUT_CPU, port_off,
diff --git a/tools/testing/selftests/net/psock_lib.h b/tools/testing/selftests/net/psock_lib.h
index 37da54a..24bc7ec 100644
--- a/tools/testing/selftests/net/psock_lib.h
+++ b/tools/testing/selftests/net/psock_lib.h
@@ -30,6 +30,7 @@
 
 #define DATA_LEN			100
 #define DATA_CHAR			'a'
+#define DATA_CHAR_1			'b'
 
 #define PORT_BASE			8000
 
@@ -37,29 +38,36 @@
 # define __maybe_unused		__attribute__ ((__unused__))
 #endif
 
-static __maybe_unused void pair_udp_setfilter(int fd)
+static __maybe_unused void sock_setfilter(int fd, int lvl, int optnum)
 {
 	struct sock_filter bpf_filter[] = {
 		{ 0x80, 0, 0, 0x00000000 },  /* LD  pktlen		      */
-		{ 0x35, 0, 5, DATA_LEN   },  /* JGE DATA_LEN  [f goto nomatch]*/
+		{ 0x35, 0, 4, DATA_LEN   },  /* JGE DATA_LEN  [f goto nomatch]*/
 		{ 0x30, 0, 0, 0x00000050 },  /* LD  ip[80]		      */
-		{ 0x15, 0, 3, DATA_CHAR  },  /* JEQ DATA_CHAR [f goto nomatch]*/
-		{ 0x30, 0, 0, 0x00000051 },  /* LD  ip[81]		      */
-		{ 0x15, 0, 1, DATA_CHAR  },  /* JEQ DATA_CHAR [f goto nomatch]*/
+		{ 0x15, 1, 0, DATA_CHAR  },  /* JEQ DATA_CHAR   [t goto match]*/
+		{ 0x15, 0, 1, DATA_CHAR_1},  /* JEQ DATA_CHAR_1 [t goto match]*/
 		{ 0x06, 0, 0, 0x00000060 },  /* RET match	              */
 		{ 0x06, 0, 0, 0x00000000 },  /* RET no match		      */
 	};
 	struct sock_fprog bpf_prog;
 
+	if (lvl == SOL_PACKET && optnum == PACKET_FANOUT_DATA)
+		bpf_filter[5].code = 0x16;   /* RET A			      */
+
 	bpf_prog.filter = bpf_filter;
 	bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter);
-	if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf_prog,
+	if (setsockopt(fd, lvl, optnum, &bpf_prog,
 		       sizeof(bpf_prog))) {
 		perror("setsockopt SO_ATTACH_FILTER");
 		exit(1);
 	}
 }
 
+static __maybe_unused void pair_udp_setfilter(int fd)
+{
+	sock_setfilter(fd, SOL_SOCKET, SO_ATTACH_FILTER);
+}
+
 static __maybe_unused void pair_udp_open(int fds[], uint16_t port)
 {
 	struct sockaddr_in saddr, daddr;
@@ -96,11 +104,11 @@ static __maybe_unused void pair_udp_open(int fds[], uint16_t port)
 	}
 }
 
-static __maybe_unused void pair_udp_send(int fds[], int num)
+static __maybe_unused void pair_udp_send_char(int fds[], int num, char payload)
 {
 	char buf[DATA_LEN], rbuf[DATA_LEN];
 
-	memset(buf, DATA_CHAR, sizeof(buf));
+	memset(buf, payload, sizeof(buf));
 	while (num--) {
 		/* Should really handle EINTR and EAGAIN */
 		if (write(fds[0], buf, sizeof(buf)) != sizeof(buf)) {
@@ -118,6 +126,11 @@ static __maybe_unused void pair_udp_send(int fds[], int num)
 	}
 }
 
+static __maybe_unused void pair_udp_send(int fds[], int num)
+{
+	return pair_udp_send_char(fds, num, DATA_CHAR);
+}
+
 static __maybe_unused void pair_udp_close(int fds[])
 {
 	close(fds[0]);
-- 
2.5.0.276.gf5e568e

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* [PATCH net-next v2 4/4] selftests/net: test extended BPF fanout mode
  2015-08-15  2:31 [PATCH net-next v2 0/4] packet: add cBPF and eBPF fanout modes Willem de Bruijn
                   ` (2 preceding siblings ...)
  2015-08-15  2:31 ` [PATCH net-next v2 3/4] selftests/net: test classic bpf " Willem de Bruijn
@ 2015-08-15  2:31 ` Willem de Bruijn
  2015-08-17 21:23 ` [PATCH net-next v2 0/4] packet: add cBPF and eBPF fanout modes David Miller
  4 siblings, 0 replies; 11+ messages in thread
From: Willem de Bruijn @ 2015-08-15  2:31 UTC (permalink / raw)
  To: netdev; +Cc: davem, edumazet, daniel, ast, Willem de Bruijn

From: Willem de Bruijn <willemb@google.com>

Test PACKET_FANOUT_EBPF by inserting a program into the the kernel
with bpf(), then attaching it to the fanout group. Observe the same
payload-based distribution as in the PACKET_FANOUT_CBPF test.

Signed-off-by: Willem de Bruijn <willemb@google.com>
---
 tools/testing/selftests/net/psock_fanout.c | 53 ++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)

diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
index baf46a2..4124593 100644
--- a/tools/testing/selftests/net/psock_fanout.c
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -20,6 +20,7 @@
  *   - PACKET_FANOUT_CPU
  *   - PACKET_FANOUT_ROLLOVER
  *   - PACKET_FANOUT_CBPF
+ *   - PACKET_FANOUT_EBPF
  *
  * Todo:
  * - functionality: PACKET_FANOUT_FLAG_DEFRAG
@@ -45,7 +46,9 @@
 #include <arpa/inet.h>
 #include <errno.h>
 #include <fcntl.h>
+#include <linux/unistd.h>	/* for __NR_bpf */
 #include <linux/filter.h>
+#include <linux/bpf.h>
 #include <linux/if_packet.h>
 #include <net/ethernet.h>
 #include <netinet/ip.h>
@@ -92,6 +95,51 @@ static int sock_fanout_open(uint16_t typeflags, int num_packets)
 	return fd;
 }
 
+static void sock_fanout_set_ebpf(int fd)
+{
+	const int len_off = __builtin_offsetof(struct __sk_buff, len);
+	struct bpf_insn prog[] = {
+		{ BPF_ALU64 | BPF_MOV | BPF_X,   6, 1, 0, 0 },
+		{ BPF_LDX   | BPF_W   | BPF_MEM, 0, 6, len_off, 0 },
+		{ BPF_JMP   | BPF_JGE | BPF_K,   0, 0, 1, DATA_LEN },
+		{ BPF_JMP   | BPF_JA  | BPF_K,   0, 0, 4, 0 },
+		{ BPF_LD    | BPF_B   | BPF_ABS, 0, 0, 0, 0x50 },
+		{ BPF_JMP   | BPF_JEQ | BPF_K,   0, 0, 2, DATA_CHAR },
+		{ BPF_JMP   | BPF_JEQ | BPF_K,   0, 0, 1, DATA_CHAR_1 },
+		{ BPF_ALU   | BPF_MOV | BPF_K,   0, 0, 0, 0 },
+		{ BPF_JMP   | BPF_EXIT,          0, 0, 0, 0 }
+	};
+	char log_buf[512];
+	union bpf_attr attr;
+	int pfd;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+	attr.insns = (unsigned long) prog;
+	attr.insn_cnt = sizeof(prog) / sizeof(prog[0]);
+	attr.license = (unsigned long) "GPL";
+	attr.log_buf = (unsigned long) log_buf,
+	attr.log_size = sizeof(log_buf),
+	attr.log_level = 1,
+
+	pfd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
+	if (pfd < 0) {
+		perror("bpf");
+		fprintf(stderr, "bpf verifier:\n%s\n", log_buf);
+		exit(1);
+	}
+
+	if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT_DATA, &pfd, sizeof(pfd))) {
+		perror("fanout data ebpf");
+		exit(1);
+	}
+
+	if (close(pfd)) {
+		perror("close ebpf");
+		exit(1);
+	}
+}
+
 static char *sock_fanout_open_ring(int fd)
 {
 	struct tpacket_req req = {
@@ -223,6 +271,8 @@ static int test_datapath(uint16_t typeflags, int port_off,
 	}
 	if (type == PACKET_FANOUT_CBPF)
 		sock_setfilter(fds[0], SOL_PACKET, PACKET_FANOUT_DATA);
+	else if (type == PACKET_FANOUT_EBPF)
+		sock_fanout_set_ebpf(fds[0]);
 
 	rings[0] = sock_fanout_open_ring(fds[0]);
 	rings[1] = sock_fanout_open_ring(fds[1]);
@@ -301,8 +351,11 @@ int main(int argc, char **argv)
 			     port_off, expect_lb[0], expect_lb[1]);
 	ret |= test_datapath(PACKET_FANOUT_ROLLOVER,
 			     port_off, expect_rb[0], expect_rb[1]);
+
 	ret |= test_datapath(PACKET_FANOUT_CBPF,
 			     port_off, expect_bpf[0], expect_bpf[1]);
+	ret |= test_datapath(PACKET_FANOUT_EBPF,
+			     port_off, expect_bpf[0], expect_bpf[1]);
 
 	set_cpuaffinity(0);
 	ret |= test_datapath(PACKET_FANOUT_CPU, port_off,
-- 
2.5.0.276.gf5e568e

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH net-next v2 1/4] packet: add classic BPF fanout mode
  2015-08-15  2:31 ` [PATCH net-next v2 1/4] packet: add classic BPF fanout mode Willem de Bruijn
@ 2015-08-16  4:58   ` Alexei Starovoitov
  2015-08-16  9:15   ` Daniel Borkmann
  2015-08-17 14:29   ` Eric Dumazet
  2 siblings, 0 replies; 11+ messages in thread
From: Alexei Starovoitov @ 2015-08-16  4:58 UTC (permalink / raw)
  To: Willem de Bruijn, netdev; +Cc: davem, edumazet, daniel

On 8/14/15 7:31 PM, Willem de Bruijn wrote:
> From: Willem de Bruijn<willemb@google.com>
>
> Add fanout mode PACKET_FANOUT_CBPF that accepts a classic BPF program
> to select a socket.
>
> This avoids having to keep adding special case fanout modes. One
> example use case is application layer load balancing. The QUIC
> protocol, for instance, encodes a connection ID in UDP payload.
>
> Also add socket option SOL_PACKET/PACKET_FANOUT_DATA that updates data
> associated with the socket group. Fanout mode PACKET_FANOUT_CBPF is the
> only user so far.
>
> Signed-off-by: Willem de Bruijn<willemb@google.com>

bpf bits looks good.
Acked-by: Alexei Starovoitov <ast@plumgrid.com>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net-next v2 2/4] packet: add extended BPF fanout mode
  2015-08-15  2:31 ` [PATCH net-next v2 2/4] packet: add extended " Willem de Bruijn
@ 2015-08-16  4:58   ` Alexei Starovoitov
  2015-08-16  9:15   ` Daniel Borkmann
  1 sibling, 0 replies; 11+ messages in thread
From: Alexei Starovoitov @ 2015-08-16  4:58 UTC (permalink / raw)
  To: Willem de Bruijn, netdev; +Cc: davem, edumazet, daniel

On 8/14/15 7:31 PM, Willem de Bruijn wrote:
> From: Willem de Bruijn<willemb@google.com>
>
> Add fanout mode PACKET_FANOUT_EBPF that accepts an en extended BPF
> program to select a socket.
>
> Update the internal eBPF program by passing to socket option
> SOL_PACKET/PACKET_FANOUT_DATA a file descriptor returned by bpf().
>
> Signed-off-by: Willem de Bruijn<willemb@google.com>

Acked-by: Alexei Starovoitov <ast@plumgrid.com>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net-next v2 1/4] packet: add classic BPF fanout mode
  2015-08-15  2:31 ` [PATCH net-next v2 1/4] packet: add classic BPF fanout mode Willem de Bruijn
  2015-08-16  4:58   ` Alexei Starovoitov
@ 2015-08-16  9:15   ` Daniel Borkmann
  2015-08-17 14:29   ` Eric Dumazet
  2 siblings, 0 replies; 11+ messages in thread
From: Daniel Borkmann @ 2015-08-16  9:15 UTC (permalink / raw)
  To: Willem de Bruijn, netdev; +Cc: davem, edumazet, ast

On 08/15/2015 04:31 AM, Willem de Bruijn wrote:
> From: Willem de Bruijn <willemb@google.com>
>
> Add fanout mode PACKET_FANOUT_CBPF that accepts a classic BPF program
> to select a socket.
>
> This avoids having to keep adding special case fanout modes. One
> example use case is application layer load balancing. The QUIC
> protocol, for instance, encodes a connection ID in UDP payload.
>
> Also add socket option SOL_PACKET/PACKET_FANOUT_DATA that updates data
> associated with the socket group. Fanout mode PACKET_FANOUT_CBPF is the
> only user so far.
>
> Signed-off-by: Willem de Bruijn <willemb@google.com>

Patch looks good to me as well (sorry for the lag, on travel).

Thanks Willem!

Acked-by: Daniel Borkmann <daniel@iogearbox.net>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net-next v2 2/4] packet: add extended BPF fanout mode
  2015-08-15  2:31 ` [PATCH net-next v2 2/4] packet: add extended " Willem de Bruijn
  2015-08-16  4:58   ` Alexei Starovoitov
@ 2015-08-16  9:15   ` Daniel Borkmann
  1 sibling, 0 replies; 11+ messages in thread
From: Daniel Borkmann @ 2015-08-16  9:15 UTC (permalink / raw)
  To: Willem de Bruijn, netdev; +Cc: davem, edumazet, ast

On 08/15/2015 04:31 AM, Willem de Bruijn wrote:
> From: Willem de Bruijn <willemb@google.com>
>
> Add fanout mode PACKET_FANOUT_EBPF that accepts an en extended BPF
> program to select a socket.
>
> Update the internal eBPF program by passing to socket option
> SOL_PACKET/PACKET_FANOUT_DATA a file descriptor returned by bpf().
>
> Signed-off-by: Willem de Bruijn <willemb@google.com>

Acked-by: Daniel Borkmann <daniel@iogearbox.net>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net-next v2 1/4] packet: add classic BPF fanout mode
  2015-08-15  2:31 ` [PATCH net-next v2 1/4] packet: add classic BPF fanout mode Willem de Bruijn
  2015-08-16  4:58   ` Alexei Starovoitov
  2015-08-16  9:15   ` Daniel Borkmann
@ 2015-08-17 14:29   ` Eric Dumazet
  2 siblings, 0 replies; 11+ messages in thread
From: Eric Dumazet @ 2015-08-17 14:29 UTC (permalink / raw)
  To: Willem de Bruijn; +Cc: netdev, davem, edumazet, daniel, ast

On Fri, 2015-08-14 at 22:31 -0400, Willem de Bruijn wrote:
> From: Willem de Bruijn <willemb@google.com>
> 
> Add fanout mode PACKET_FANOUT_CBPF that accepts a classic BPF program
> to select a socket.
> 
> This avoids having to keep adding special case fanout modes. One
> example use case is application layer load balancing. The QUIC
> protocol, for instance, encodes a connection ID in UDP payload.
> 
> Also add socket option SOL_PACKET/PACKET_FANOUT_DATA that updates data
> associated with the socket group. Fanout mode PACKET_FANOUT_CBPF is the
> only user so far.
> 
> Signed-off-by: Willem de Bruijn <willemb@google.com>
> ---

Oops, this was the patch I meant to approve

Acked-by: Eric Dumazet <edumazet@google.com>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH net-next v2 0/4] packet: add cBPF and eBPF fanout modes
  2015-08-15  2:31 [PATCH net-next v2 0/4] packet: add cBPF and eBPF fanout modes Willem de Bruijn
                   ` (3 preceding siblings ...)
  2015-08-15  2:31 ` [PATCH net-next v2 4/4] selftests/net: test extended BPF " Willem de Bruijn
@ 2015-08-17 21:23 ` David Miller
  4 siblings, 0 replies; 11+ messages in thread
From: David Miller @ 2015-08-17 21:23 UTC (permalink / raw)
  To: willemb; +Cc: netdev, edumazet, daniel, ast

From: Willem de Bruijn <willemb@google.com>
Date: Fri, 14 Aug 2015 22:31:33 -0400

> From: Willem de Bruijn <willemb@google.com>
> 
> Allow programmable fanout modes. Support both classical BPF programs
> passed directly and extended BPF programs passed by file descriptor.
> 
> One use case is packet steering by deep packet inspection, for
> instance for packet steering by application layer header fields.
> 
> Separate the configuration of the fanout mode and the configuration
> of the program, to allow dynamic updates to the latter at runtime.
> 
> Changes
>   v1 -> v2:
>     - follow SO_LOCK_FILTER semantics on filter updates
>     - only accept eBPF programs of type BPF_PROG_TYPE_SOCKET_FILTER
>     - rename PACKET_FANOUT_BPF to PACKET_FANOUT_CBPF to match
>       man 2 bpf usage: "classic" vs. "extended" BPF.

Series applied, although I hope that synchronize_net() doesn't become
a scalability issue in some workload in the future.

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2015-08-17 21:23 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-08-15  2:31 [PATCH net-next v2 0/4] packet: add cBPF and eBPF fanout modes Willem de Bruijn
2015-08-15  2:31 ` [PATCH net-next v2 1/4] packet: add classic BPF fanout mode Willem de Bruijn
2015-08-16  4:58   ` Alexei Starovoitov
2015-08-16  9:15   ` Daniel Borkmann
2015-08-17 14:29   ` Eric Dumazet
2015-08-15  2:31 ` [PATCH net-next v2 2/4] packet: add extended " Willem de Bruijn
2015-08-16  4:58   ` Alexei Starovoitov
2015-08-16  9:15   ` Daniel Borkmann
2015-08-15  2:31 ` [PATCH net-next v2 3/4] selftests/net: test classic bpf " Willem de Bruijn
2015-08-15  2:31 ` [PATCH net-next v2 4/4] selftests/net: test extended BPF " Willem de Bruijn
2015-08-17 21:23 ` [PATCH net-next v2 0/4] packet: add cBPF and eBPF fanout modes David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.