BPF Archive on lore.kernel.org
 help / color / Atom feed
From: Toshiaki Makita <toshiaki.makita1@gmail.com>
To: Alexei Starovoitov <ast@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Martin KaFai Lau <kafai@fb.com>, Song Liu <songliubraving@fb.com>,
	Yonghong Song <yhs@fb.com>,
	"David S. Miller" <davem@davemloft.net>,
	Jakub Kicinski <jakub.kicinski@netronome.com>,
	Jesper Dangaard Brouer <hawk@kernel.org>,
	John Fastabend <john.fastabend@gmail.com>,
	Jamal Hadi Salim <jhs@mojatatu.com>,
	Cong Wang <xiyou.wangcong@gmail.com>,
	Jiri Pirko <jiri@resnulli.us>
Cc: Toshiaki Makita <toshiaki.makita1@gmail.com>,
	netdev@vger.kernel.org, bpf@vger.kernel.org,
	William Tu <u9012063@gmail.com>
Subject: [RFC PATCH bpf-next 06/14] xdp_flow: Add flow entry insertion/deletion logic in UMH
Date: Tue, 13 Aug 2019 21:05:50 +0900
Message-ID: <20190813120558.6151-7-toshiaki.makita1@gmail.com> (raw)
In-Reply-To: <20190813120558.6151-1-toshiaki.makita1@gmail.com>

This logic will be used when xdp_flow kmod requests flow
insertion/deleteion.

On insertion, find a free entry and populate it, then update next index
pointer of its previous entry. On deletion, set the next index pointer
of the prev entry to the next index of the entry to be deleted.

Signed-off-by: Toshiaki Makita <toshiaki.makita1@gmail.com>
---
 net/xdp_flow/umh_bpf.h      |  15 ++
 net/xdp_flow/xdp_flow_umh.c | 470 +++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 483 insertions(+), 2 deletions(-)

diff --git a/net/xdp_flow/umh_bpf.h b/net/xdp_flow/umh_bpf.h
index b4fe0c6..4e4633f 100644
--- a/net/xdp_flow/umh_bpf.h
+++ b/net/xdp_flow/umh_bpf.h
@@ -15,4 +15,19 @@ struct xdp_flow_mask_entry {
 	int next;
 };
 
+static inline bool flow_equal(const struct xdp_flow_key *key1,
+			      const struct xdp_flow_key *key2)
+{
+	long *lkey1 = (long *)key1;
+	long *lkey2 = (long *)key2;
+	int i;
+
+	for (i = 0; i < sizeof(*key1); i += sizeof(long)) {
+		if (*lkey1++ != *lkey2++)
+			return false;
+	}
+
+	return true;
+}
+
 #endif
diff --git a/net/xdp_flow/xdp_flow_umh.c b/net/xdp_flow/xdp_flow_umh.c
index e35666a..9a4769b 100644
--- a/net/xdp_flow/xdp_flow_umh.c
+++ b/net/xdp_flow/xdp_flow_umh.c
@@ -19,6 +19,8 @@
 extern char xdp_flow_bpf_end;
 int progfile_fd;
 
+#define zalloc(size) calloc(1, (size))
+
 /* FIXME: syslog is used for easy debugging. As writing /dev/log can be stuck
  * due to reader side, should use another log mechanism like kmsg.
  */
@@ -38,6 +40,8 @@ struct netdev_info {
 	struct netdev_info_key key;
 	struct hlist_node node;
 	struct bpf_object *obj;
+	int free_slot_top;
+	int free_slots[MAX_FLOW_MASKS];
 };
 
 DEFINE_HASHTABLE(netdev_info_table, 16);
@@ -268,6 +272,57 @@ static struct netdev_info *get_netdev_info(const struct mbox_request *req)
 	return netdev_info;
 }
 
+static void init_flow_masks_free_slot(struct netdev_info *netdev_info)
+{
+	int i;
+
+	for (i = 0; i < MAX_FLOW_MASKS; i++)
+		netdev_info->free_slots[MAX_FLOW_MASKS - 1 - i] = i;
+	netdev_info->free_slot_top = MAX_FLOW_MASKS - 1;
+}
+
+static int get_flow_masks_free_slot(const struct netdev_info *netdev_info)
+{
+	if (netdev_info->free_slot_top < 0)
+		return -ENOBUFS;
+
+	return netdev_info->free_slots[netdev_info->free_slot_top];
+}
+
+static int add_flow_masks_free_slot(struct netdev_info *netdev_info, int slot)
+{
+	if (unlikely(netdev_info->free_slot_top >= MAX_FLOW_MASKS - 1)) {
+		pr_warn("BUG: free_slot overflow: top=%d, slot=%d\n",
+			netdev_info->free_slot_top, slot);
+		return -EOVERFLOW;
+	}
+
+	netdev_info->free_slots[++netdev_info->free_slot_top] = slot;
+
+	return 0;
+}
+
+static void delete_flow_masks_free_slot(struct netdev_info *netdev_info,
+					int slot)
+{
+	int top_slot;
+
+	if (unlikely(netdev_info->free_slot_top < 0)) {
+		pr_warn("BUG: free_slot underflow: top=%d, slot=%d\n",
+			netdev_info->free_slot_top, slot);
+		return;
+	}
+
+	top_slot = netdev_info->free_slots[netdev_info->free_slot_top];
+	if (unlikely(top_slot != slot)) {
+		pr_warn("BUG: inconsistent free_slot top: top_slot=%d, slot=%d\n",
+			top_slot, slot);
+		return;
+	}
+
+	netdev_info->free_slot_top--;
+}
+
 static int handle_load(const struct mbox_request *req, __u32 *prog_id)
 {
 	struct netdev_info *netdev_info;
@@ -291,6 +346,8 @@ static int handle_load(const struct mbox_request *req, __u32 *prog_id)
 	}
 	netdev_info->key.ifindex = key.ifindex;
 
+	init_flow_masks_free_slot(netdev_info);
+
 	prog_fd = load_bpf(req->ifindex, &netdev_info->obj);
 	if (prog_fd < 0) {
 		err = prog_fd;
@@ -331,14 +388,423 @@ static int handle_unload(const struct mbox_request *req)
 	return 0;
 }
 
+static int get_table_fd(const struct netdev_info *netdev_info,
+			const char *table_name)
+{
+	char errbuf[ERRBUF_SIZE];
+	struct bpf_map *map;
+	int map_fd;
+	int err;
+
+	map = bpf_object__find_map_by_name(netdev_info->obj, table_name);
+	if (!map) {
+		pr_err("BUG: %s map not found.\n", table_name);
+		return -ENOENT;
+	}
+
+	map_fd = bpf_map__fd(map);
+	if (map_fd < 0) {
+		err = libbpf_err(map_fd, errbuf);
+		pr_err("Invalid map fd: %s\n", errbuf);
+		return err;
+	}
+
+	return map_fd;
+}
+
+static int get_flow_masks_head_fd(const struct netdev_info *netdev_info)
+{
+	return get_table_fd(netdev_info, "flow_masks_head");
+}
+
+static int get_flow_masks_head(int head_fd, int *head)
+{
+	int err, zero = 0;
+
+	if (bpf_map_lookup_elem(head_fd, &zero, head)) {
+		err = -errno;
+		pr_err("Cannot get flow_masks_head: %s\n", strerror(errno));
+		return err;
+	}
+
+	return 0;
+}
+
+static int update_flow_masks_head(int head_fd, int head)
+{
+	int err, zero = 0;
+
+	if (bpf_map_update_elem(head_fd, &zero, &head, 0)) {
+		err = -errno;
+		pr_err("Cannot update flow_masks_head: %s\n", strerror(errno));
+		return err;
+	}
+
+	return 0;
+}
+
+static int get_flow_masks_fd(const struct netdev_info *netdev_info)
+{
+	return get_table_fd(netdev_info, "flow_masks");
+}
+
+static int get_flow_tables_fd(const struct netdev_info *netdev_info)
+{
+	return get_table_fd(netdev_info, "flow_tables");
+}
+
+static int __flow_table_insert_elem(int flow_table_fd,
+				    const struct xdp_flow *flow)
+{
+	int err = 0;
+
+	if (bpf_map_update_elem(flow_table_fd, &flow->key, &flow->actions, 0)) {
+		err = -errno;
+		pr_err("Cannot insert flow entry: %s\n",
+		       strerror(errno));
+	}
+
+	return err;
+}
+
+static void __flow_table_delete_elem(int flow_table_fd,
+				     const struct xdp_flow *flow)
+{
+	bpf_map_delete_elem(flow_table_fd, &flow->key);
+}
+
+static int flow_table_insert_elem(struct netdev_info *netdev_info,
+				  const struct xdp_flow *flow)
+{
+	int masks_fd, head_fd, flow_tables_fd, flow_table_fd, free_slot, head;
+	struct xdp_flow_mask_entry *entry, *pentry;
+	int err, cnt, idx, pidx;
+
+	masks_fd = get_flow_masks_fd(netdev_info);
+	if (masks_fd < 0)
+		return masks_fd;
+
+	head_fd = get_flow_masks_head_fd(netdev_info);
+	if (head_fd < 0)
+		return head_fd;
+
+	err = get_flow_masks_head(head_fd, &head);
+	if (err)
+		return err;
+
+	flow_tables_fd = get_flow_tables_fd(netdev_info);
+	if (flow_tables_fd < 0)
+		return flow_tables_fd;
+
+	entry = zalloc(sizeof(*entry));
+	if (!entry) {
+		pr_err("Memory allocation for flow_masks entry failed\n");
+		return -ENOMEM;
+	}
+
+	pentry = zalloc(sizeof(*pentry));
+	if (!pentry) {
+		flow_table_fd = -ENOMEM;
+		pr_err("Memory allocation for flow_masks prev entry failed\n");
+		goto err_entry;
+	}
+
+	idx = head;
+	for (cnt = 0; cnt < MAX_FLOW_MASKS; cnt++) {
+		if (idx == FLOW_MASKS_TAIL)
+			break;
+
+		if (bpf_map_lookup_elem(masks_fd, &idx, entry)) {
+			err = -errno;
+			pr_err("Cannot lookup flow_masks: %s\n",
+			       strerror(errno));
+			goto err;
+		}
+
+		if (entry->priority == flow->priority &&
+		    flow_equal(&entry->mask, &flow->mask)) {
+			__u32 id;
+
+			if (bpf_map_lookup_elem(flow_tables_fd, &idx, &id)) {
+				err = -errno;
+				pr_err("Cannot lookup flow_tables: %s\n",
+				       strerror(errno));
+				goto err;
+			}
+
+			flow_table_fd = bpf_map_get_fd_by_id(id);
+			if (flow_table_fd < 0) {
+				err = -errno;
+				pr_err("Cannot get flow_table fd by id: %s\n",
+				       strerror(errno));
+				goto err;
+			}
+
+			err = __flow_table_insert_elem(flow_table_fd, flow);
+			if (err)
+				goto out;
+
+			entry->count++;
+			if (bpf_map_update_elem(masks_fd, &idx, entry, 0)) {
+				err = -errno;
+				pr_err("Cannot update flow_masks count: %s\n",
+				       strerror(errno));
+				__flow_table_delete_elem(flow_table_fd, flow);
+				goto out;
+			}
+
+			goto out;
+		}
+
+		if (entry->priority > flow->priority)
+			break;
+
+		*pentry = *entry;
+		pidx = idx;
+		idx = entry->next;
+	}
+
+	if (unlikely(cnt == MAX_FLOW_MASKS && idx != FLOW_MASKS_TAIL)) {
+		err = -EINVAL;
+		pr_err("Cannot lookup flow_masks: Broken flow_masks list\n");
+		goto out;
+	}
+
+	/* Flow mask was not found. Create a new one */
+
+	free_slot = get_flow_masks_free_slot(netdev_info);
+	if (free_slot < 0) {
+		err = free_slot;
+		goto err;
+	}
+
+	entry->mask = flow->mask;
+	entry->priority = flow->priority;
+	entry->count = 1;
+	entry->next = idx;
+	if (bpf_map_update_elem(masks_fd, &free_slot, entry, 0)) {
+		err = -errno;
+		pr_err("Cannot update flow_masks: %s\n", strerror(errno));
+		goto err;
+	}
+
+	flow_table_fd = bpf_create_map(BPF_MAP_TYPE_HASH,
+				       sizeof(struct xdp_flow_key),
+				       sizeof(struct xdp_flow_actions),
+				       MAX_FLOWS, 0);
+	if (flow_table_fd < 0) {
+		err = -errno;
+		pr_err("map creation for flow_table failed: %s\n",
+		       strerror(errno));
+		goto err;
+	}
+
+	err = __flow_table_insert_elem(flow_table_fd, flow);
+	if (err)
+		goto out;
+
+	if (bpf_map_update_elem(flow_tables_fd, &free_slot, &flow_table_fd, 0)) {
+		err = -errno;
+		pr_err("Failed to insert flow_table into flow_tables: %s\n",
+		       strerror(errno));
+		goto out;
+	}
+
+	if (cnt == 0) {
+		err = update_flow_masks_head(head_fd, free_slot);
+		if (err)
+			goto err_flow_table;
+	} else {
+		pentry->next = free_slot;
+		/* This effectively only updates one byte of entry->next */
+		if (bpf_map_update_elem(masks_fd, &pidx, pentry, 0)) {
+			err = -errno;
+			pr_err("Cannot update flow_masks prev entry: %s\n",
+			       strerror(errno));
+			goto err_flow_table;
+		}
+	}
+	delete_flow_masks_free_slot(netdev_info, free_slot);
+out:
+	close(flow_table_fd);
+err:
+	free(pentry);
+err_entry:
+	free(entry);
+
+	return err;
+
+err_flow_table:
+	bpf_map_delete_elem(flow_tables_fd, &free_slot);
+
+	goto out;
+}
+
+static int flow_table_delete_elem(struct netdev_info *netdev_info,
+				  const struct xdp_flow *flow)
+{
+	int masks_fd, head_fd, flow_tables_fd, flow_table_fd, head;
+	struct xdp_flow_mask_entry *entry, *pentry;
+	int err, cnt, idx, pidx;
+	__u32 id;
+
+	masks_fd = get_flow_masks_fd(netdev_info);
+	if (masks_fd < 0)
+		return masks_fd;
+
+	head_fd = get_flow_masks_head_fd(netdev_info);
+	if (head_fd < 0)
+		return head_fd;
+
+	err = get_flow_masks_head(head_fd, &head);
+	if (err)
+		return err;
+
+	flow_tables_fd = get_flow_tables_fd(netdev_info);
+	if (flow_tables_fd < 0)
+		return flow_tables_fd;
+
+	entry = zalloc(sizeof(*entry));
+	if (!entry) {
+		pr_err("Memory allocation for flow_masks entry failed\n");
+		return -ENOMEM;
+	}
+
+	pentry = zalloc(sizeof(*pentry));
+	if (!pentry) {
+		err = -ENOMEM;
+		pr_err("Memory allocation for flow_masks prev entry failed\n");
+		goto err_pentry;
+	}
+
+	idx = head;
+	for (cnt = 0; cnt < MAX_FLOW_MASKS; cnt++) {
+		if (idx == FLOW_MASKS_TAIL) {
+			err = -ENOENT;
+			pr_err("Cannot lookup flow_masks: %s\n",
+			       strerror(-err));
+			goto out;
+		}
+
+		if (bpf_map_lookup_elem(masks_fd, &idx, entry)) {
+			err = -errno;
+			pr_err("Cannot lookup flow_masks: %s\n",
+			       strerror(errno));
+			goto out;
+		}
+
+		if (entry->priority > flow->priority) {
+			err = -ENOENT;
+			pr_err("Cannot lookup flow_masks: %s\n",
+			       strerror(-err));
+			goto out;
+		}
+
+		if (entry->priority == flow->priority &&
+		    flow_equal(&entry->mask, &flow->mask))
+			break;
+
+		*pentry = *entry;
+		pidx = idx;
+		idx = entry->next;
+	}
+
+	if (unlikely(cnt == MAX_FLOW_MASKS)) {
+		err = -ENOENT;
+		pr_err("Cannot lookup flow_masks: Broken flow_masks list\n");
+		goto out;
+	}
+
+	if (bpf_map_lookup_elem(flow_tables_fd, &idx, &id)) {
+		err = -errno;
+		pr_err("Cannot lookup flow_tables: %s\n",
+		       strerror(errno));
+		goto out;
+	}
+
+	flow_table_fd = bpf_map_get_fd_by_id(id);
+	if (flow_table_fd < 0) {
+		err = -errno;
+		pr_err("Cannot get flow_table fd by id: %s\n",
+		       strerror(errno));
+		goto out;
+	}
+
+	__flow_table_delete_elem(flow_table_fd, flow);
+	close(flow_table_fd);
+
+	if (--entry->count > 0) {
+		if (bpf_map_update_elem(masks_fd, &idx, entry, 0)) {
+			err = -errno;
+			pr_err("Cannot update flow_masks count: %s\n",
+			       strerror(errno));
+		}
+
+		goto out;
+	}
+
+	if (unlikely(entry->count < 0)) {
+		pr_warn("flow_masks has negative count: %d\n",
+			entry->count);
+	}
+
+	if (cnt == 0) {
+		err = update_flow_masks_head(head_fd, entry->next);
+		if (err)
+			goto out;
+	} else {
+		pentry->next = entry->next;
+		/* This effectively only updates one byte of entry->next */
+		if (bpf_map_update_elem(masks_fd, &pidx, pentry, 0)) {
+			err = -errno;
+			pr_err("Cannot update flow_masks prev entry: %s\n",
+			       strerror(errno));
+			goto out;
+		}
+	}
+
+	bpf_map_delete_elem(flow_tables_fd, &idx);
+	err = add_flow_masks_free_slot(netdev_info, idx);
+	if (err)
+		pr_err("Cannot add flow_masks free slot: %s\n", strerror(-err));
+out:
+	free(pentry);
+err_pentry:
+	free(entry);
+
+	return err;
+}
+
 static int handle_replace(struct mbox_request *req)
 {
-	return -EOPNOTSUPP;
+	struct netdev_info *netdev_info;
+	int err;
+
+	netdev_info = get_netdev_info(req);
+	if (IS_ERR(netdev_info))
+		return PTR_ERR(netdev_info);
+
+	err = flow_table_insert_elem(netdev_info, &req->flow);
+	if (err)
+		return err;
+
+	return 0;
 }
 
 static int handle_delete(const struct mbox_request *req)
 {
-	return -EOPNOTSUPP;
+	struct netdev_info *netdev_info;
+	int err;
+
+	netdev_info = get_netdev_info(req);
+	if (IS_ERR(netdev_info))
+		return PTR_ERR(netdev_info);
+
+	err = flow_table_delete_elem(netdev_info, &req->flow);
+	if (err)
+		return err;
+
+	return 0;
 }
 
 static void loop(void)
-- 
1.8.3.1


  parent reply index

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-13 12:05 [RFC PATCH bpf-next 00/14] xdp_flow: Flow offload to XDP Toshiaki Makita
2019-08-13 12:05 ` [RFC PATCH bpf-next 01/14] xdp_flow: Add skeleton of XDP based TC offload driver Toshiaki Makita
2019-08-13 12:05 ` [RFC PATCH bpf-next 02/14] xdp_flow: Add skeleton bpf program for XDP Toshiaki Makita
2019-08-13 12:05 ` [RFC PATCH bpf-next 03/14] bpf: Add API to get program from id Toshiaki Makita
2019-08-13 12:05 ` [RFC PATCH bpf-next 04/14] xdp_flow: Attach bpf prog to XDP in kernel after UMH loaded program Toshiaki Makita
2019-08-13 12:05 ` [RFC PATCH bpf-next 05/14] xdp_flow: Prepare flow tables in bpf Toshiaki Makita
2019-08-13 12:05 ` Toshiaki Makita [this message]
2019-08-13 12:05 ` [RFC PATCH bpf-next 07/14] xdp_flow: Add flow handling and basic actions in bpf prog Toshiaki Makita
2019-08-13 12:05 ` [RFC PATCH bpf-next 08/14] xdp_flow: Implement flow replacement/deletion logic in xdp_flow kmod Toshiaki Makita
2019-08-13 12:05 ` [RFC PATCH bpf-next 09/14] xdp_flow: Add netdev feature for enabling TC flower offload to XDP Toshiaki Makita
2019-08-13 12:05 ` [RFC PATCH bpf-next 10/14] xdp_flow: Implement redirect action Toshiaki Makita
2019-08-13 12:05 ` [RFC PATCH bpf-next 11/14] xdp_flow: Implement vlan_push action Toshiaki Makita
2019-08-13 12:05 ` [RFC PATCH bpf-next 12/14] bpf, selftest: Add test for xdp_flow Toshiaki Makita
2019-08-13 12:05 ` [RFC PATCH bpf-next 13/14] i40e: prefetch xdp->data before running XDP prog Toshiaki Makita
2019-08-13 12:05 ` [RFC PATCH bpf-next 14/14] bpf, hashtab: Compare keys in long Toshiaki Makita
2019-08-14  1:44 ` [RFC PATCH bpf-next 00/14] xdp_flow: Flow offload to XDP Alexei Starovoitov
2019-08-14  7:33   ` Toshiaki Makita
2019-08-15 10:59     ` Toshiaki Makita
2019-08-14 17:07 ` Stanislav Fomichev
2019-08-15 10:26   ` Toshiaki Makita
2019-08-15 15:21     ` Stanislav Fomichev
2019-08-15 19:22       ` Jakub Kicinski
2019-08-16  1:28         ` Toshiaki Makita
2019-08-16 18:52           ` Jakub Kicinski
2019-08-17 14:01             ` Toshiaki Makita
2019-08-19 18:15               ` Jakub Kicinski
2019-08-21  8:49                 ` Toshiaki Makita
2019-08-21 18:38                   ` Jakub Kicinski
2019-08-16 15:59         ` Stanislav Fomichev
2019-08-16 16:20           ` Stanislav Fomichev
2019-08-16  1:09       ` Toshiaki Makita
2019-08-16 15:35         ` Stanislav Fomichev
2019-08-17 14:10           ` Toshiaki Makita
2019-08-15 15:46 ` William Tu
2019-08-16  1:38   ` Toshiaki Makita

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190813120558.6151-7-toshiaki.makita1@gmail.com \
    --to=toshiaki.makita1@gmail.com \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=davem@davemloft.net \
    --cc=hawk@kernel.org \
    --cc=jakub.kicinski@netronome.com \
    --cc=jhs@mojatatu.com \
    --cc=jiri@resnulli.us \
    --cc=john.fastabend@gmail.com \
    --cc=kafai@fb.com \
    --cc=netdev@vger.kernel.org \
    --cc=songliubraving@fb.com \
    --cc=u9012063@gmail.com \
    --cc=xiyou.wangcong@gmail.com \
    --cc=yhs@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

BPF Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/bpf/0 bpf/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 bpf bpf/ https://lore.kernel.org/bpf \
		bpf@vger.kernel.org bpf@archiver.kernel.org
	public-inbox-index bpf


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.bpf


AGPL code for this site: git clone https://public-inbox.org/ public-inbox