All of lore.kernel.org
 help / color / mirror / Atom feed
* [bpf-next PATCH v2 0/4] bpf, sockmap BPF_F_INGRESS support
@ 2018-03-27 17:23 John Fastabend
  2018-03-27 17:23 ` [bpf-next PATCH v2 1/4] bpf: sockmap redirect ingress support John Fastabend
                   ` (3 more replies)
  0 siblings, 4 replies; 8+ messages in thread
From: John Fastabend @ 2018-03-27 17:23 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, davem

This series adds the BPF_F_INGRESS flag support to the redirect APIs.
Bringing the sockmap API in-line with the cls_bpf redirect APIs.

We add it to both variants of sockmap programs, the first patch adds
support for tx ulp hooks and the third patch adds support for the recv
skb hooks. Patches two and four add tests for the corresponding
ingress redirect hooks.

Follow on patches can address busy polling support, but next series
from me will move the sockmap sample program into selftests.

v2: added static to function definition caught by kbuild bot

---

John Fastabend (4):
      bpf: sockmap redirect ingress support
      bpf: sockmap, add BPF_F_INGRESS tests
      bpf: sockmap, BPF_F_INGRESS flag for BPF_SK_SKB_STREAM_VERDICT:
      bpf: sockmap, more BPF_SK_SKB_STREAM_VERDICT tests


 include/linux/filter.h          |    2 
 include/net/sock.h              |    1 
 kernel/bpf/sockmap.c            |  290 ++++++++++++++++++++++++++++++++++++---
 net/core/filter.c               |    4 -
 net/ipv4/tcp.c                  |   10 +
 samples/sockmap/sockmap_kern.c  |   62 +++++++-
 samples/sockmap/sockmap_test.sh |   40 +++++
 samples/sockmap/sockmap_user.c  |   58 ++++++++
 8 files changed, 430 insertions(+), 37 deletions(-)

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [bpf-next PATCH v2 1/4] bpf: sockmap redirect ingress support
  2018-03-27 17:23 [bpf-next PATCH v2 0/4] bpf, sockmap BPF_F_INGRESS support John Fastabend
@ 2018-03-27 17:23 ` John Fastabend
  2018-03-27 17:23 ` [bpf-next PATCH v2 2/4] bpf: sockmap, add BPF_F_INGRESS tests John Fastabend
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 8+ messages in thread
From: John Fastabend @ 2018-03-27 17:23 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, davem

Add support for the BPF_F_INGRESS flag in sk_msg redirect helper.
To do this add a scatterlist ring for receiving socks to check
before calling into regular recvmsg call path. Additionally, because
the poll wakeup logic only checked the skb recv queue we need to
add a hook in TCP stack (similar to write side) so that we have
a way to wake up polling socks when a scatterlist is redirected
to that sock.

After this all that is needed is for the redirect helper to
push the scatterlist into the psock receive queue.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
 include/linux/filter.h |    1 
 include/net/sock.h     |    1 
 kernel/bpf/sockmap.c   |  198 +++++++++++++++++++++++++++++++++++++++++++++++-
 net/core/filter.c      |    2 
 net/ipv4/tcp.c         |   10 ++
 5 files changed, 207 insertions(+), 5 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 109d05c..d0e207f 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -521,6 +521,7 @@ struct sk_msg_buff {
 	__u32 key;
 	__u32 flags;
 	struct bpf_map *map;
+	struct list_head list;
 };
 
 /* Compute the linear packet data range [data, data_end) which
diff --git a/include/net/sock.h b/include/net/sock.h
index 7093111..b8ff435 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1085,6 +1085,7 @@ struct proto {
 #endif
 
 	bool			(*stream_memory_free)(const struct sock *sk);
+	bool			(*stream_memory_read)(const struct sock *sk);
 	/* Memory pressure */
 	void			(*enter_memory_pressure)(struct sock *sk);
 	void			(*leave_memory_pressure)(struct sock *sk);
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 69c5bcc..b30bada 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -41,6 +41,8 @@
 #include <linux/mm.h>
 #include <net/strparser.h>
 #include <net/tcp.h>
+#include <linux/ptr_ring.h>
+#include <net/inet_common.h>
 
 #define SOCK_CREATE_FLAG_MASK \
 	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
@@ -82,6 +84,7 @@ struct smap_psock {
 	int sg_size;
 	int eval;
 	struct sk_msg_buff *cork;
+	struct list_head ingress;
 
 	struct strparser strp;
 	struct bpf_prog *bpf_tx_msg;
@@ -103,6 +106,8 @@ struct smap_psock {
 };
 
 static void smap_release_sock(struct smap_psock *psock, struct sock *sock);
+static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+			   int nonblock, int flags, int *addr_len);
 static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size);
 static int bpf_tcp_sendpage(struct sock *sk, struct page *page,
 			    int offset, size_t size, int flags);
@@ -112,6 +117,21 @@ static inline struct smap_psock *smap_psock_sk(const struct sock *sk)
 	return rcu_dereference_sk_user_data(sk);
 }
 
+static bool bpf_tcp_stream_read(const struct sock *sk)
+{
+	struct smap_psock *psock;
+	bool empty = true;
+
+	rcu_read_lock();
+	psock = smap_psock_sk(sk);
+	if (unlikely(!psock))
+		goto out;
+	empty = list_empty(&psock->ingress);
+out:
+	rcu_read_unlock();
+	return !empty;
+}
+
 static struct proto tcp_bpf_proto;
 static int bpf_tcp_init(struct sock *sk)
 {
@@ -135,6 +155,8 @@ static int bpf_tcp_init(struct sock *sk)
 	if (psock->bpf_tx_msg) {
 		tcp_bpf_proto.sendmsg = bpf_tcp_sendmsg;
 		tcp_bpf_proto.sendpage = bpf_tcp_sendpage;
+		tcp_bpf_proto.recvmsg = bpf_tcp_recvmsg;
+		tcp_bpf_proto.stream_memory_read = bpf_tcp_stream_read;
 	}
 
 	sk->sk_prot = &tcp_bpf_proto;
@@ -170,6 +192,7 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
 {
 	void (*close_fun)(struct sock *sk, long timeout);
 	struct smap_psock_map_entry *e, *tmp;
+	struct sk_msg_buff *md, *mtmp;
 	struct smap_psock *psock;
 	struct sock *osk;
 
@@ -188,6 +211,12 @@ static void bpf_tcp_close(struct sock *sk, long timeout)
 	close_fun = psock->save_close;
 
 	write_lock_bh(&sk->sk_callback_lock);
+	list_for_each_entry_safe(md, mtmp, &psock->ingress, list) {
+		list_del(&md->list);
+		free_start_sg(psock->sock, md);
+		kfree(md);
+	}
+
 	list_for_each_entry_safe(e, tmp, &psock->maps, list) {
 		osk = cmpxchg(e->entry, sk, NULL);
 		if (osk == sk) {
@@ -468,6 +497,72 @@ static unsigned int smap_do_tx_msg(struct sock *sk,
 	return _rc;
 }
 
+static int bpf_tcp_ingress(struct sock *sk, int apply_bytes,
+			   struct smap_psock *psock,
+			   struct sk_msg_buff *md, int flags)
+{
+	bool apply = apply_bytes;
+	size_t size, copied = 0;
+	struct sk_msg_buff *r;
+	int err = 0, i;
+
+	r = kzalloc(sizeof(struct sk_msg_buff), __GFP_NOWARN | GFP_KERNEL);
+	if (unlikely(!r))
+		return -ENOMEM;
+
+	lock_sock(sk);
+	r->sg_start = md->sg_start;
+	i = md->sg_start;
+
+	do {
+		r->sg_data[i] = md->sg_data[i];
+
+		size = (apply && apply_bytes < md->sg_data[i].length) ?
+			apply_bytes : md->sg_data[i].length;
+
+		if (!sk_wmem_schedule(sk, size)) {
+			if (!copied)
+				err = -ENOMEM;
+			break;
+		}
+
+		sk_mem_charge(sk, size);
+		r->sg_data[i].length = size;
+		md->sg_data[i].length -= size;
+		md->sg_data[i].offset += size;
+		copied += size;
+
+		if (md->sg_data[i].length) {
+			get_page(sg_page(&r->sg_data[i]));
+			r->sg_end = (i + 1) == MAX_SKB_FRAGS ? 0 : i + 1;
+		} else {
+			i++;
+			if (i == MAX_SKB_FRAGS)
+				i = 0;
+			r->sg_end = i;
+		}
+
+		if (apply) {
+			apply_bytes -= size;
+			if (!apply_bytes)
+				break;
+		}
+	} while (i != md->sg_end);
+
+	md->sg_start = i;
+
+	if (!err) {
+		list_add_tail(&r->list, &psock->ingress);
+		sk->sk_data_ready(sk);
+	} else {
+		free_start_sg(sk, r);
+		kfree(r);
+	}
+
+	release_sock(sk);
+	return err;
+}
+
 static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
 				       struct sk_msg_buff *md,
 				       int flags)
@@ -475,6 +570,7 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
 	struct smap_psock *psock;
 	struct scatterlist *sg;
 	int i, err, free = 0;
+	bool ingress = !!(md->flags & BPF_F_INGRESS);
 
 	sg = md->sg_data;
 
@@ -487,9 +583,14 @@ static int bpf_tcp_sendmsg_do_redirect(struct sock *sk, int send,
 		goto out_rcu;
 
 	rcu_read_unlock();
-	lock_sock(sk);
-	err = bpf_tcp_push(sk, send, md, flags, false);
-	release_sock(sk);
+
+	if (ingress) {
+		err = bpf_tcp_ingress(sk, send, psock, md, flags);
+	} else {
+		lock_sock(sk);
+		err = bpf_tcp_push(sk, send, md, flags, false);
+		release_sock(sk);
+	}
 	smap_release_sock(psock, sk);
 	if (unlikely(err))
 		goto out;
@@ -623,6 +724,89 @@ static int bpf_exec_tx_verdict(struct smap_psock *psock,
 	return err;
 }
 
+static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
+			   int nonblock, int flags, int *addr_len)
+{
+	struct iov_iter *iter = &msg->msg_iter;
+	struct smap_psock *psock;
+	int copied = 0;
+
+	if (unlikely(flags & MSG_ERRQUEUE))
+		return inet_recv_error(sk, msg, len, addr_len);
+
+	rcu_read_lock();
+	psock = smap_psock_sk(sk);
+	if (unlikely(!psock))
+		goto out;
+
+	if (unlikely(!refcount_inc_not_zero(&psock->refcnt)))
+		goto out;
+
+	if (!skb_queue_empty(&sk->sk_receive_queue))
+		goto out;
+	rcu_read_unlock();
+
+	lock_sock(sk);
+	while (copied != len) {
+		struct scatterlist *sg;
+		struct sk_msg_buff *md;
+		int i;
+
+		md = list_first_entry_or_null(&psock->ingress,
+					      struct sk_msg_buff, list);
+		if (unlikely(!md))
+			break;
+		i = md->sg_start;
+		do {
+			struct page *page;
+			int n, copy;
+
+			sg = &md->sg_data[i];
+			copy = sg->length;
+			page = sg_page(sg);
+
+			if (copied + copy > len)
+				copy = len - copied;
+
+			n = copy_page_to_iter(page, sg->offset, copy, iter);
+			if (n != copy) {
+				md->sg_start = i;
+				release_sock(sk);
+				smap_release_sock(psock, sk);
+				return -EFAULT;
+			}
+
+			copied += copy;
+			sg->offset += copy;
+			sg->length -= copy;
+			sk_mem_uncharge(sk, copy);
+
+			if (!sg->length) {
+				i++;
+				if (i == MAX_SKB_FRAGS)
+					i = 0;
+				put_page(page);
+			}
+			if (copied == len)
+				break;
+		} while (i != md->sg_end);
+		md->sg_start = i;
+
+		if (!sg->length && md->sg_start == md->sg_end) {
+			list_del(&md->list);
+			kfree(md);
+		}
+	}
+
+	release_sock(sk);
+	smap_release_sock(psock, sk);
+	return copied;
+out:
+	rcu_read_unlock();
+	return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len);
+}
+
+
 static int bpf_tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
 {
 	int flags = msg->msg_flags | MSG_NO_SHARED_FRAGS;
@@ -1107,6 +1291,7 @@ static void sock_map_remove_complete(struct bpf_stab *stab)
 static void smap_gc_work(struct work_struct *w)
 {
 	struct smap_psock_map_entry *e, *tmp;
+	struct sk_msg_buff *md, *mtmp;
 	struct smap_psock *psock;
 
 	psock = container_of(w, struct smap_psock, gc_work);
@@ -1131,6 +1316,12 @@ static void smap_gc_work(struct work_struct *w)
 		kfree(psock->cork);
 	}
 
+	list_for_each_entry_safe(md, mtmp, &psock->ingress, list) {
+		list_del(&md->list);
+		free_start_sg(psock->sock, md);
+		kfree(md);
+	}
+
 	list_for_each_entry_safe(e, tmp, &psock->maps, list) {
 		list_del(&e->list);
 		kfree(e);
@@ -1160,6 +1351,7 @@ static struct smap_psock *smap_init_psock(struct sock *sock,
 	INIT_WORK(&psock->tx_work, smap_tx_work);
 	INIT_WORK(&psock->gc_work, smap_gc_work);
 	INIT_LIST_HEAD(&psock->maps);
+	INIT_LIST_HEAD(&psock->ingress);
 	refcount_set(&psock->refcnt, 1);
 
 	rcu_assign_sk_user_data(sock, psock);
diff --git a/net/core/filter.c b/net/core/filter.c
index 00c711c..11b1f16 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1894,7 +1894,7 @@ struct sock *do_sk_redirect_map(struct sk_buff *skb)
 	   struct bpf_map *, map, u32, key, u64, flags)
 {
 	/* If user passes invalid input drop the packet. */
-	if (unlikely(flags))
+	if (unlikely(flags & ~(BPF_F_INGRESS)))
 		return SK_DROP;
 
 	msg->key = key;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0c31be3..bccc4c2 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -485,6 +485,14 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags)
 	}
 }
 
+static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
+					  int target, struct sock *sk)
+{
+	return (tp->rcv_nxt - tp->copied_seq >= target) ||
+		(sk->sk_prot->stream_memory_read ?
+		sk->sk_prot->stream_memory_read(sk) : false);
+}
+
 /*
  *	Wait for a TCP event.
  *
@@ -554,7 +562,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
 		    tp->urg_data)
 			target++;
 
-		if (tp->rcv_nxt - tp->copied_seq >= target)
+		if (tcp_stream_is_readable(tp, target, sk))
 			mask |= EPOLLIN | EPOLLRDNORM;
 
 		if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [bpf-next PATCH v2 2/4] bpf: sockmap, add BPF_F_INGRESS tests
  2018-03-27 17:23 [bpf-next PATCH v2 0/4] bpf, sockmap BPF_F_INGRESS support John Fastabend
  2018-03-27 17:23 ` [bpf-next PATCH v2 1/4] bpf: sockmap redirect ingress support John Fastabend
@ 2018-03-27 17:23 ` John Fastabend
  2018-03-27 17:23 ` [bpf-next PATCH v2 3/4] bpf: sockmap, BPF_F_INGRESS flag for BPF_SK_SKB_STREAM_VERDICT: John Fastabend
  2018-03-27 17:23 ` [bpf-next PATCH v2 4/4] bpf: sockmap, more BPF_SK_SKB_STREAM_VERDICT tests John Fastabend
  3 siblings, 0 replies; 8+ messages in thread
From: John Fastabend @ 2018-03-27 17:23 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, davem

Add a set of tests to verify ingress flag in redirect helpers
works correctly with various msg sizes.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
 samples/sockmap/sockmap_kern.c  |   41 +++++++++++++++++++++++++++++----------
 samples/sockmap/sockmap_test.sh |   22 ++++++++++++++++++++-
 samples/sockmap/sockmap_user.c  |   35 +++++++++++++++++++++++++++++++++
 3 files changed, 87 insertions(+), 11 deletions(-)

diff --git a/samples/sockmap/sockmap_kern.c b/samples/sockmap/sockmap_kern.c
index 9ad5ba7..ca28722 100644
--- a/samples/sockmap/sockmap_kern.c
+++ b/samples/sockmap/sockmap_kern.c
@@ -54,7 +54,7 @@ struct bpf_map_def SEC("maps") sock_map_redir = {
 	.type = BPF_MAP_TYPE_SOCKMAP,
 	.key_size = sizeof(int),
 	.value_size = sizeof(int),
-	.max_entries = 1,
+	.max_entries = 20,
 };
 
 struct bpf_map_def SEC("maps") sock_apply_bytes = {
@@ -78,6 +78,13 @@ struct bpf_map_def SEC("maps") sock_pull_bytes = {
 	.max_entries = 2
 };
 
+struct bpf_map_def SEC("maps") sock_redir_flags = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 1
+};
+
 
 SEC("sk_skb1")
 int bpf_prog1(struct __sk_buff *skb)
@@ -197,8 +204,9 @@ int bpf_prog5(struct sk_msg_md *msg)
 SEC("sk_msg3")
 int bpf_prog6(struct sk_msg_md *msg)
 {
-	int *bytes, zero = 0, one = 1;
-	int *start, *end;
+	int *bytes, zero = 0, one = 1, key = 0;
+	int *start, *end, *f;
+	__u64 flags = 0;
 
 	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
 	if (bytes)
@@ -210,15 +218,22 @@ int bpf_prog6(struct sk_msg_md *msg)
 	end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
 	if (start && end)
 		bpf_msg_pull_data(msg, *start, *end, 0);
-	return bpf_msg_redirect_map(msg, &sock_map_redir, zero, 0);
+	f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
+	if (f && *f) {
+		key = 2;
+		flags = *f;
+	}
+	return bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
 }
 
 SEC("sk_msg4")
 int bpf_prog7(struct sk_msg_md *msg)
 {
-	int err1 = 0, err2 = 0, zero = 0, one = 1;
-	int *bytes, *start, *end, len1, len2;
+	int err1 = 0, err2 = 0, zero = 0, one = 1, key = 0;
+	int *f, *bytes, *start, *end, len1, len2;
+	__u64 flags = 0;
 
+		int err;
 	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
 	if (bytes)
 		err1 = bpf_msg_apply_bytes(msg, *bytes);
@@ -229,7 +244,6 @@ int bpf_prog7(struct sk_msg_md *msg)
 	start = bpf_map_lookup_elem(&sock_pull_bytes, &zero);
 	end = bpf_map_lookup_elem(&sock_pull_bytes, &one);
 	if (start && end) {
-		int err;
 
 		bpf_printk("sk_msg2: pull(%i:%i)\n",
 			   start ? *start : 0, end ? *end : 0);
@@ -241,9 +255,16 @@ int bpf_prog7(struct sk_msg_md *msg)
 		bpf_printk("sk_msg2: length update %i->%i\n",
 			   len1, len2);
 	}
-	bpf_printk("sk_msg3: redirect(%iB) err1=%i err2=%i\n",
-		   len1, err1, err2);
-	return bpf_msg_redirect_map(msg, &sock_map_redir, zero, 0);
+	f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
+	if (f && *f) {
+		key = 2;
+		flags = *f;
+	}
+	bpf_printk("sk_msg3: redirect(%iB) flags=%i err=%i\n",
+		   len1, flags, err1 ? err1 : err2);
+	err = bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
+	bpf_printk("sk_msg3: err %i\n", err);
+	return err;
 }
 
 SEC("sk_msg5")
diff --git a/samples/sockmap/sockmap_test.sh b/samples/sockmap/sockmap_test.sh
index 6d8cc40..13b205f 100755
--- a/samples/sockmap/sockmap_test.sh
+++ b/samples/sockmap/sockmap_test.sh
@@ -1,5 +1,5 @@
 #Test a bunch of positive cases to verify basic functionality
-for prog in "--txmsg" "--txmsg_redir" "--txmsg_drop"; do
+for prog in "--txmsg_redir --txmsg_ingress" "--txmsg" "--txmsg_redir" "--txmsg_redir --txmsg_ingress" "--txmsg_drop"; do
 for t in "sendmsg" "sendpage"; do
 for r in 1 10 100; do
 	for i in 1 10 100; do
@@ -100,6 +100,16 @@ for t in "sendmsg" "sendpage"; do
 	sleep 2
 done
 
+prog="--txmsg_redir --txmsg_apply 1 --txmsg_ingress"
+
+for t in "sendmsg" "sendpage"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+
 # Test apply and redirect with larger value than send
 r=1
 i=8
@@ -113,6 +123,16 @@ for t in "sendmsg" "sendpage"; do
 	sleep 2
 done
 
+prog="--txmsg_redir --txmsg_apply 2048 --txmsg_ingress"
+
+for t in "sendmsg" "sendpage"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
+
 # Test apply and redirect with apply that never reaches limit
 r=1024
 i=1
diff --git a/samples/sockmap/sockmap_user.c b/samples/sockmap/sockmap_user.c
index 07aa237..f7503f4 100644
--- a/samples/sockmap/sockmap_user.c
+++ b/samples/sockmap/sockmap_user.c
@@ -64,6 +64,7 @@
 int txmsg_cork;
 int txmsg_start;
 int txmsg_end;
+int txmsg_ingress;
 
 static const struct option long_options[] = {
 	{"help",	no_argument,		NULL, 'h' },
@@ -83,6 +84,7 @@
 	{"txmsg_cork",	required_argument,	NULL, 'k'},
 	{"txmsg_start", required_argument,	NULL, 's'},
 	{"txmsg_end",	required_argument,	NULL, 'e'},
+	{"txmsg_ingress", no_argument,		&txmsg_ingress, 1 },
 	{0, 0, NULL, 0 }
 };
 
@@ -793,6 +795,39 @@ int main(int argc, char **argv)
 				return err;
 			}
 		}
+
+		if (txmsg_ingress) {
+			int in = BPF_F_INGRESS;
+
+			i = 0;
+			err = bpf_map_update_elem(map_fd[6], &i, &in, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
+					err, strerror(errno));
+			}
+			i = 1;
+			err = bpf_map_update_elem(map_fd[1], &i, &p1, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (p1 txmsg): %d (%s)\n",
+					err, strerror(errno));
+			}
+			err = bpf_map_update_elem(map_fd[2], &i, &p1, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (p1 redir): %d (%s)\n",
+					err, strerror(errno));
+			}
+
+			i = 2;
+			err = bpf_map_update_elem(map_fd[2], &i, &p2, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (p2 txmsg): %d (%s)\n",
+					err, strerror(errno));
+			}
+		}
 	}
 
 	if (txmsg_drop)

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [bpf-next PATCH v2 3/4] bpf: sockmap, BPF_F_INGRESS flag for BPF_SK_SKB_STREAM_VERDICT:
  2018-03-27 17:23 [bpf-next PATCH v2 0/4] bpf, sockmap BPF_F_INGRESS support John Fastabend
  2018-03-27 17:23 ` [bpf-next PATCH v2 1/4] bpf: sockmap redirect ingress support John Fastabend
  2018-03-27 17:23 ` [bpf-next PATCH v2 2/4] bpf: sockmap, add BPF_F_INGRESS tests John Fastabend
@ 2018-03-27 17:23 ` John Fastabend
  2018-03-28 14:21   ` Daniel Borkmann
  2018-03-27 17:23 ` [bpf-next PATCH v2 4/4] bpf: sockmap, more BPF_SK_SKB_STREAM_VERDICT tests John Fastabend
  3 siblings, 1 reply; 8+ messages in thread
From: John Fastabend @ 2018-03-27 17:23 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, davem

Add support for the BPF_F_INGRESS flag in skb redirect helper. To
do this convert skb into a scatterlist and push into ingress queue.
This is the same logic that is used in the sk_msg redirect helper
so it should feel familiar.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
 include/linux/filter.h |    1 +
 kernel/bpf/sockmap.c   |   94 +++++++++++++++++++++++++++++++++++++++---------
 net/core/filter.c      |    2 +
 3 files changed, 78 insertions(+), 19 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index d0e207f..7de778a 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -521,6 +521,7 @@ struct sk_msg_buff {
 	__u32 key;
 	__u32 flags;
 	struct bpf_map *map;
+	struct sk_buff *skb;
 	struct list_head list;
 };
 
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index b30bada..b52e1c4 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -785,7 +785,8 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 				i++;
 				if (i == MAX_SKB_FRAGS)
 					i = 0;
-				put_page(page);
+				if (!md->skb)
+					put_page(page);
 			}
 			if (copied == len)
 				break;
@@ -794,6 +795,8 @@ static int bpf_tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
 
 		if (!sg->length && md->sg_start == md->sg_end) {
 			list_del(&md->list);
+			if (md->skb)
+				consume_skb(md->skb);
 			kfree(md);
 		}
 	}
@@ -1045,27 +1048,72 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
 		__SK_DROP;
 }
 
+static int smap_do_ingress(struct smap_psock *psock, struct sk_buff *skb)
+{
+	struct sock *sk = psock->sock;
+	int copied = 0, num_sg;
+	struct sk_msg_buff *r;
+
+	r = kzalloc(sizeof(struct sk_msg_buff), __GFP_NOWARN | GFP_ATOMIC);
+	if (unlikely(!r))
+		return -EAGAIN;
+
+	if (!sk_rmem_schedule(sk, skb, skb->len)) {
+		kfree(r);
+		return -EAGAIN;
+	}
+	sk_mem_charge(sk, skb->len);
+
+	sg_init_table(r->sg_data, MAX_SKB_FRAGS);
+	num_sg = skb_to_sgvec(skb, r->sg_data, 0, skb->len);
+	if (unlikely(num_sg < 0)) {
+		kfree(r);
+		return num_sg;
+	}
+	copied = skb->len;
+	r->sg_start = 0;
+	r->sg_end = num_sg == MAX_SKB_FRAGS ? 0 : num_sg;
+	r->skb = skb;
+	list_add_tail(&r->list, &psock->ingress);
+	sk->sk_data_ready(sk);
+	return copied;
+}
+
 static void smap_do_verdict(struct smap_psock *psock, struct sk_buff *skb)
 {
+	struct smap_psock *peer;
 	struct sock *sk;
+	__u32 in;
 	int rc;
 
 	rc = smap_verdict_func(psock, skb);
 	switch (rc) {
 	case __SK_REDIRECT:
 		sk = do_sk_redirect_map(skb);
-		if (likely(sk)) {
-			struct smap_psock *peer = smap_psock_sk(sk);
-
-			if (likely(peer &&
-				   test_bit(SMAP_TX_RUNNING, &peer->state) &&
-				   !sock_flag(sk, SOCK_DEAD) &&
-				   sock_writeable(sk))) {
-				skb_set_owner_w(skb, sk);
-				skb_queue_tail(&peer->rxqueue, skb);
-				schedule_work(&peer->tx_work);
-				break;
-			}
+		if (!sk) {
+			kfree_skb(skb);
+			break;
+		}
+
+		peer = smap_psock_sk(sk);
+		in = (TCP_SKB_CB(skb)->bpf.flags) & BPF_F_INGRESS;
+
+		if (unlikely(!peer || sock_flag(sk, SOCK_DEAD) ||
+			     !test_bit(SMAP_TX_RUNNING, &peer->state))) {
+			kfree_skb(skb);
+			break;
+		}
+
+		if (!in && sock_writeable(sk)) {
+			skb_set_owner_w(skb, sk);
+			skb_queue_tail(&peer->rxqueue, skb);
+			schedule_work(&peer->tx_work);
+			break;
+		} else if (in &&
+			   atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) {
+			skb_queue_tail(&peer->rxqueue, skb);
+			schedule_work(&peer->tx_work);
+			break;
 		}
 	/* Fall through and free skb otherwise */
 	case __SK_DROP:
@@ -1127,15 +1175,23 @@ static void smap_tx_work(struct work_struct *w)
 	}
 
 	while ((skb = skb_dequeue(&psock->rxqueue))) {
+		__u32 flags;
+
 		rem = skb->len;
 		off = 0;
 start:
+		flags = (TCP_SKB_CB(skb)->bpf.flags) & BPF_F_INGRESS;
 		do {
-			if (likely(psock->sock->sk_socket))
-				n = skb_send_sock_locked(psock->sock,
-							 skb, off, rem);
-			else
+			if (likely(psock->sock->sk_socket)) {
+				if (flags)
+					n = smap_do_ingress(psock, skb);
+				else
+					n = skb_send_sock_locked(psock->sock,
+								 skb, off, rem);
+			} else {
 				n = -EINVAL;
+			}
+
 			if (n <= 0) {
 				if (n == -EAGAIN) {
 					/* Retry when space is available */
@@ -1153,7 +1209,9 @@ static void smap_tx_work(struct work_struct *w)
 			rem -= n;
 			off += n;
 		} while (rem);
-		kfree_skb(skb);
+
+		if (!flags)
+			kfree_skb(skb);
 	}
 out:
 	release_sock(psock->sock);
diff --git a/net/core/filter.c b/net/core/filter.c
index 11b1f16..b46916d 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1855,7 +1855,7 @@ int skb_do_redirect(struct sk_buff *skb)
 	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
 
 	/* If user passes invalid input drop the packet. */
-	if (unlikely(flags))
+	if (unlikely(flags & ~(BPF_F_INGRESS)))
 		return SK_DROP;
 
 	tcb->bpf.key = key;

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [bpf-next PATCH v2 4/4] bpf: sockmap, more BPF_SK_SKB_STREAM_VERDICT tests
  2018-03-27 17:23 [bpf-next PATCH v2 0/4] bpf, sockmap BPF_F_INGRESS support John Fastabend
                   ` (2 preceding siblings ...)
  2018-03-27 17:23 ` [bpf-next PATCH v2 3/4] bpf: sockmap, BPF_F_INGRESS flag for BPF_SK_SKB_STREAM_VERDICT: John Fastabend
@ 2018-03-27 17:23 ` John Fastabend
  3 siblings, 0 replies; 8+ messages in thread
From: John Fastabend @ 2018-03-27 17:23 UTC (permalink / raw)
  To: ast, daniel; +Cc: netdev, davem

Add BPF_SK_SKB_STREAM_VERDICT tests for ingress hook. While
we do this also bring stream tests in-line with MSG based
testing.

A map for skb options is added for userland to push options
at BPF programs.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
 samples/sockmap/sockmap_kern.c  |   21 ++++++++++++++++++---
 samples/sockmap/sockmap_test.sh |   20 +++++++++++++++++++-
 samples/sockmap/sockmap_user.c  |   23 +++++++++++++++++++++++
 3 files changed, 60 insertions(+), 4 deletions(-)

diff --git a/samples/sockmap/sockmap_kern.c b/samples/sockmap/sockmap_kern.c
index ca28722..9ff8bc5 100644
--- a/samples/sockmap/sockmap_kern.c
+++ b/samples/sockmap/sockmap_kern.c
@@ -85,6 +85,12 @@ struct bpf_map_def SEC("maps") sock_redir_flags = {
 	.max_entries = 1
 };
 
+struct bpf_map_def SEC("maps") sock_skb_opts = {
+	.type = BPF_MAP_TYPE_ARRAY,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 1
+};
 
 SEC("sk_skb1")
 int bpf_prog1(struct __sk_buff *skb)
@@ -97,15 +103,24 @@ int bpf_prog2(struct __sk_buff *skb)
 {
 	__u32 lport = skb->local_port;
 	__u32 rport = skb->remote_port;
-	int ret = 0;
+	int len, *f, ret, zero = 0;
+	__u64 flags = 0;
 
 	if (lport == 10000)
 		ret = 10;
 	else
 		ret = 1;
 
-	bpf_printk("sockmap: %d -> %d @ %d\n", lport, bpf_ntohl(rport), ret);
-	return bpf_sk_redirect_map(skb, &sock_map, ret, 0);
+	len = (__u32)skb->data_end - (__u32)skb->data;
+	f = bpf_map_lookup_elem(&sock_skb_opts, &zero);
+	if (f && *f) {
+		ret = 3;
+		flags = *f;
+	}
+
+	bpf_printk("sk_skb2: redirect(%iB) flags=%i\n",
+		   len, flags);
+	return bpf_sk_redirect_map(skb, &sock_map, ret, flags);
 }
 
 SEC("sockops")
diff --git a/samples/sockmap/sockmap_test.sh b/samples/sockmap/sockmap_test.sh
index 13b205f..ace75f0 100755
--- a/samples/sockmap/sockmap_test.sh
+++ b/samples/sockmap/sockmap_test.sh
@@ -1,5 +1,5 @@
 #Test a bunch of positive cases to verify basic functionality
-for prog in "--txmsg_redir --txmsg_ingress" "--txmsg" "--txmsg_redir" "--txmsg_redir --txmsg_ingress" "--txmsg_drop"; do
+for prog in  "--txmsg_redir --txmsg_skb" "--txmsg_redir --txmsg_ingress" "--txmsg" "--txmsg_redir" "--txmsg_redir --txmsg_ingress" "--txmsg_drop"; do
 for t in "sendmsg" "sendpage"; do
 for r in 1 10 100; do
 	for i in 1 10 100; do
@@ -109,6 +109,15 @@ for t in "sendmsg" "sendpage"; do
 	sleep 2
 done
 
+prog="--txmsg_redir --txmsg_apply 1 --txmsg_skb"
+
+for t in "sendmsg" "sendpage"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
 
 # Test apply and redirect with larger value than send
 r=1
@@ -132,6 +141,15 @@ for t in "sendmsg" "sendpage"; do
 	sleep 2
 done
 
+prog="--txmsg_redir --txmsg_apply 2048 --txmsg_skb"
+
+for t in "sendmsg" "sendpage"; do
+	TEST="./sockmap --cgroup /mnt/cgroup2/ -t $t -r $r -i $i -l $l $prog"
+	echo $TEST
+	$TEST
+	sleep 2
+done
+
 
 # Test apply and redirect with apply that never reaches limit
 r=1024
diff --git a/samples/sockmap/sockmap_user.c b/samples/sockmap/sockmap_user.c
index f7503f4..6f23349 100644
--- a/samples/sockmap/sockmap_user.c
+++ b/samples/sockmap/sockmap_user.c
@@ -65,6 +65,7 @@
 int txmsg_start;
 int txmsg_end;
 int txmsg_ingress;
+int txmsg_skb;
 
 static const struct option long_options[] = {
 	{"help",	no_argument,		NULL, 'h' },
@@ -85,6 +86,7 @@
 	{"txmsg_start", required_argument,	NULL, 's'},
 	{"txmsg_end",	required_argument,	NULL, 'e'},
 	{"txmsg_ingress", no_argument,		&txmsg_ingress, 1 },
+	{"txmsg_skb", no_argument,		&txmsg_skb, 1 },
 	{0, 0, NULL, 0 }
 };
 
@@ -828,6 +830,27 @@ int main(int argc, char **argv)
 					err, strerror(errno));
 			}
 		}
+
+		if (txmsg_skb) {
+			int skb_fd = (test == SENDMSG || test == SENDPAGE) ? p2 : p1;
+			int ingress = BPF_F_INGRESS;
+
+			i = 0;
+			err = bpf_map_update_elem(map_fd[7], &i, &ingress, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
+					err, strerror(errno));
+			}
+
+			i = 3;
+			err = bpf_map_update_elem(map_fd[0], &i, &skb_fd, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
+					err, strerror(errno));
+			}
+		}
 	}
 
 	if (txmsg_drop)

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [bpf-next PATCH v2 3/4] bpf: sockmap, BPF_F_INGRESS flag for BPF_SK_SKB_STREAM_VERDICT:
  2018-03-27 17:23 ` [bpf-next PATCH v2 3/4] bpf: sockmap, BPF_F_INGRESS flag for BPF_SK_SKB_STREAM_VERDICT: John Fastabend
@ 2018-03-28 14:21   ` Daniel Borkmann
  2018-03-28 15:45     ` John Fastabend
  0 siblings, 1 reply; 8+ messages in thread
From: Daniel Borkmann @ 2018-03-28 14:21 UTC (permalink / raw)
  To: John Fastabend, ast; +Cc: netdev, davem

On 03/27/2018 07:23 PM, John Fastabend wrote:
> Add support for the BPF_F_INGRESS flag in skb redirect helper. To
> do this convert skb into a scatterlist and push into ingress queue.
> This is the same logic that is used in the sk_msg redirect helper
> so it should feel familiar.
> 
> Signed-off-by: John Fastabend <john.fastabend@gmail.com>
> ---
>  include/linux/filter.h |    1 +
>  kernel/bpf/sockmap.c   |   94 +++++++++++++++++++++++++++++++++++++++---------
>  net/core/filter.c      |    2 +
>  3 files changed, 78 insertions(+), 19 deletions(-)
[...]
>  		if (!sg->length && md->sg_start == md->sg_end) {
>  			list_del(&md->list);
> +			if (md->skb)
> +				consume_skb(md->skb);
>  			kfree(md);
>  		}
>  	}
> @@ -1045,27 +1048,72 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
>  		__SK_DROP;
>  }
>  
> +static int smap_do_ingress(struct smap_psock *psock, struct sk_buff *skb)
> +{
> +	struct sock *sk = psock->sock;
> +	int copied = 0, num_sg;
> +	struct sk_msg_buff *r;
> +
> +	r = kzalloc(sizeof(struct sk_msg_buff), __GFP_NOWARN | GFP_ATOMIC);
> +	if (unlikely(!r))
> +		return -EAGAIN;
> +
> +	if (!sk_rmem_schedule(sk, skb, skb->len)) {
> +		kfree(r);
> +		return -EAGAIN;
> +	}
> +	sk_mem_charge(sk, skb->len);

Usually mem accounting is based on truesize. This is not done here since
you need the exact length of the skb for the sg list later on, right?

> +	sg_init_table(r->sg_data, MAX_SKB_FRAGS);
> +	num_sg = skb_to_sgvec(skb, r->sg_data, 0, skb->len);
> +	if (unlikely(num_sg < 0)) {
> +		kfree(r);

Don't we need to undo the mem charge here in case of error?

> +		return num_sg;
> +	}
> +	copied = skb->len;
> +	r->sg_start = 0;
> +	r->sg_end = num_sg == MAX_SKB_FRAGS ? 0 : num_sg;
> +	r->skb = skb;
> +	list_add_tail(&r->list, &psock->ingress);
> +	sk->sk_data_ready(sk);
> +	return copied;
> +}

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [bpf-next PATCH v2 3/4] bpf: sockmap, BPF_F_INGRESS flag for BPF_SK_SKB_STREAM_VERDICT:
  2018-03-28 14:21   ` Daniel Borkmann
@ 2018-03-28 15:45     ` John Fastabend
  2018-03-28 15:46       ` Daniel Borkmann
  0 siblings, 1 reply; 8+ messages in thread
From: John Fastabend @ 2018-03-28 15:45 UTC (permalink / raw)
  To: Daniel Borkmann, ast; +Cc: netdev, davem

On 03/28/2018 07:21 AM, Daniel Borkmann wrote:
> On 03/27/2018 07:23 PM, John Fastabend wrote:
>> Add support for the BPF_F_INGRESS flag in skb redirect helper. To
>> do this convert skb into a scatterlist and push into ingress queue.
>> This is the same logic that is used in the sk_msg redirect helper
>> so it should feel familiar.
>>
>> Signed-off-by: John Fastabend <john.fastabend@gmail.com>
>> ---
>>  include/linux/filter.h |    1 +
>>  kernel/bpf/sockmap.c   |   94 +++++++++++++++++++++++++++++++++++++++---------
>>  net/core/filter.c      |    2 +
>>  3 files changed, 78 insertions(+), 19 deletions(-)
> [...]
>>  		if (!sg->length && md->sg_start == md->sg_end) {
>>  			list_del(&md->list);
>> +			if (md->skb)
>> +				consume_skb(md->skb);
>>  			kfree(md);
>>  		}
>>  	}
>> @@ -1045,27 +1048,72 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
>>  		__SK_DROP;
>>  }
>>  
>> +static int smap_do_ingress(struct smap_psock *psock, struct sk_buff *skb)
>> +{
>> +	struct sock *sk = psock->sock;
>> +	int copied = 0, num_sg;
>> +	struct sk_msg_buff *r;
>> +
>> +	r = kzalloc(sizeof(struct sk_msg_buff), __GFP_NOWARN | GFP_ATOMIC);
>> +	if (unlikely(!r))
>> +		return -EAGAIN;
>> +
>> +	if (!sk_rmem_schedule(sk, skb, skb->len)) {
>> +		kfree(r);
>> +		return -EAGAIN;
>> +	}
>> +	sk_mem_charge(sk, skb->len);
> 
> Usually mem accounting is based on truesize. This is not done here since
> you need the exact length of the skb for the sg list later on, right?

Correct.

> 
>> +	sg_init_table(r->sg_data, MAX_SKB_FRAGS);
>> +	num_sg = skb_to_sgvec(skb, r->sg_data, 0, skb->len);
>> +	if (unlikely(num_sg < 0)) {
>> +		kfree(r);
> 
> Don't we need to undo the mem charge here in case of error?
> 

Actually, I'll just move the sk_mem_charge() down below this error
then we don't need to unwind it.

>> +		return num_sg;
>> +	}
>> +	copied = skb->len;
>> +	r->sg_start = 0;
>> +	r->sg_end = num_sg == MAX_SKB_FRAGS ? 0 : num_sg;
>> +	r->skb = skb;
>> +	list_add_tail(&r->list, &psock->ingress);
>> +	sk->sk_data_ready(sk);
>> +	return copied;
>> +}

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [bpf-next PATCH v2 3/4] bpf: sockmap, BPF_F_INGRESS flag for BPF_SK_SKB_STREAM_VERDICT:
  2018-03-28 15:45     ` John Fastabend
@ 2018-03-28 15:46       ` Daniel Borkmann
  0 siblings, 0 replies; 8+ messages in thread
From: Daniel Borkmann @ 2018-03-28 15:46 UTC (permalink / raw)
  To: John Fastabend, ast; +Cc: netdev, davem

On 03/28/2018 05:45 PM, John Fastabend wrote:
> On 03/28/2018 07:21 AM, Daniel Borkmann wrote:
>> On 03/27/2018 07:23 PM, John Fastabend wrote:
>>> Add support for the BPF_F_INGRESS flag in skb redirect helper. To
>>> do this convert skb into a scatterlist and push into ingress queue.
>>> This is the same logic that is used in the sk_msg redirect helper
>>> so it should feel familiar.
>>>
>>> Signed-off-by: John Fastabend <john.fastabend@gmail.com>
>>> ---
>>>  include/linux/filter.h |    1 +
>>>  kernel/bpf/sockmap.c   |   94 +++++++++++++++++++++++++++++++++++++++---------
>>>  net/core/filter.c      |    2 +
>>>  3 files changed, 78 insertions(+), 19 deletions(-)
>> [...]
>>>  		if (!sg->length && md->sg_start == md->sg_end) {
>>>  			list_del(&md->list);
>>> +			if (md->skb)
>>> +				consume_skb(md->skb);
>>>  			kfree(md);
>>>  		}
>>>  	}
>>> @@ -1045,27 +1048,72 @@ static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb)
>>>  		__SK_DROP;
>>>  }
>>>  
>>> +static int smap_do_ingress(struct smap_psock *psock, struct sk_buff *skb)
>>> +{
>>> +	struct sock *sk = psock->sock;
>>> +	int copied = 0, num_sg;
>>> +	struct sk_msg_buff *r;
>>> +
>>> +	r = kzalloc(sizeof(struct sk_msg_buff), __GFP_NOWARN | GFP_ATOMIC);
>>> +	if (unlikely(!r))
>>> +		return -EAGAIN;
>>> +
>>> +	if (!sk_rmem_schedule(sk, skb, skb->len)) {
>>> +		kfree(r);
>>> +		return -EAGAIN;
>>> +	}
>>> +	sk_mem_charge(sk, skb->len);
>>
>> Usually mem accounting is based on truesize. This is not done here since
>> you need the exact length of the skb for the sg list later on, right?
> 
> Correct.
> 
>>
>>> +	sg_init_table(r->sg_data, MAX_SKB_FRAGS);
>>> +	num_sg = skb_to_sgvec(skb, r->sg_data, 0, skb->len);
>>> +	if (unlikely(num_sg < 0)) {
>>> +		kfree(r);
>>
>> Don't we need to undo the mem charge here in case of error?
>>
> 
> Actually, I'll just move the sk_mem_charge() down below this error
> then we don't need to unwind it.

Agree, good point.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2018-03-28 15:46 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-03-27 17:23 [bpf-next PATCH v2 0/4] bpf, sockmap BPF_F_INGRESS support John Fastabend
2018-03-27 17:23 ` [bpf-next PATCH v2 1/4] bpf: sockmap redirect ingress support John Fastabend
2018-03-27 17:23 ` [bpf-next PATCH v2 2/4] bpf: sockmap, add BPF_F_INGRESS tests John Fastabend
2018-03-27 17:23 ` [bpf-next PATCH v2 3/4] bpf: sockmap, BPF_F_INGRESS flag for BPF_SK_SKB_STREAM_VERDICT: John Fastabend
2018-03-28 14:21   ` Daniel Borkmann
2018-03-28 15:45     ` John Fastabend
2018-03-28 15:46       ` Daniel Borkmann
2018-03-27 17:23 ` [bpf-next PATCH v2 4/4] bpf: sockmap, more BPF_SK_SKB_STREAM_VERDICT tests John Fastabend

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.