All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next 0/4]: Allow head adjustment in XDP prog
@ 2016-12-02 23:23 Martin KaFai Lau
  2016-12-02 23:23 ` [PATCH net-next 1/4] bpf: xdp: " Martin KaFai Lau
                   ` (4 more replies)
  0 siblings, 5 replies; 21+ messages in thread
From: Martin KaFai Lau @ 2016-12-02 23:23 UTC (permalink / raw)
  To: netdev
  Cc: Alexei Starovoitov, Brenden Blanco, Daniel Borkmann,
	David Miller, Saeed Mahameed, Tariq Toukan, Kernel Team

This series adds a helper to allow head adjustment in XDP prog.  mlx4
driver has been modified to support this feature.  An example is written
to encapsulate a packet with an IPv4/v6 header and then XDP_TX it
out.

Thanks,
--Martin

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH net-next 1/4] bpf: xdp: Allow head adjustment in XDP prog
  2016-12-02 23:23 [PATCH net-next 0/4]: Allow head adjustment in XDP prog Martin KaFai Lau
@ 2016-12-02 23:23 ` Martin KaFai Lau
  2016-12-03  0:22   ` Daniel Borkmann
  2016-12-03 15:24   ` Jesper Dangaard Brouer
  2016-12-02 23:23 ` [PATCH net-next 2/4] mlx4: xdp: Allow raising MTU up to one page minus eth and vlan hdrs Martin KaFai Lau
                   ` (3 subsequent siblings)
  4 siblings, 2 replies; 21+ messages in thread
From: Martin KaFai Lau @ 2016-12-02 23:23 UTC (permalink / raw)
  To: netdev
  Cc: Alexei Starovoitov, Brenden Blanco, Daniel Borkmann,
	David Miller, Saeed Mahameed, Tariq Toukan, Kernel Team

This patch allows XDP prog to extend/remove the packet
data at the head (like adding or removing header).  It is
done by adding a new XDP helper bpf_xdp_adjust_head().

It also renames bpf_helper_changes_skb_data() to
bpf_helper_changes_pkt_data() to better reflect
that XDP prog does not work on skb.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
 arch/powerpc/net/bpf_jit_comp64.c |  4 ++--
 arch/s390/net/bpf_jit_comp.c      |  2 +-
 arch/x86/net/bpf_jit_comp.c       |  2 +-
 include/linux/filter.h            |  2 +-
 include/uapi/linux/bpf.h          | 11 ++++++++++-
 kernel/bpf/core.c                 |  2 +-
 kernel/bpf/verifier.c             |  2 +-
 net/core/filter.c                 | 34 ++++++++++++++++++++++++++++++++--
 8 files changed, 49 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 0fe98a567125..73a5cf18fd84 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -766,7 +766,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 			func = (u8 *) __bpf_call_base + imm;
 
 			/* Save skb pointer if we need to re-cache skb data */
-			if (bpf_helper_changes_skb_data(func))
+			if (bpf_helper_changes_pkt_data(func))
 				PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
 
 			bpf_jit_emit_func_call(image, ctx, (u64)func);
@@ -775,7 +775,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
 			PPC_MR(b2p[BPF_REG_0], 3);
 
 			/* refresh skb cache */
-			if (bpf_helper_changes_skb_data(func)) {
+			if (bpf_helper_changes_pkt_data(func)) {
 				/* reload skb pointer to r3 */
 				PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
 				bpf_jit_emit_skb_loads(image, ctx);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index bee281f3163d..167b31b186c1 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -981,7 +981,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
 		EMIT2(0x0d00, REG_14, REG_W1);
 		/* lgr %b0,%r2: load return value into %b0 */
 		EMIT4(0xb9040000, BPF_REG_0, REG_2);
-		if (bpf_helper_changes_skb_data((void *)func)) {
+		if (bpf_helper_changes_pkt_data((void *)func)) {
 			jit->seen |= SEEN_SKB_CHANGE;
 			/* lg %b1,ST_OFF_SKBP(%r15) */
 			EMIT6_DISP_LH(0xe3000000, 0x0004, BPF_REG_1, REG_0,
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index fe04a04dab8e..e76d1af60f7a 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -853,7 +853,7 @@ xadd:			if (is_imm8(insn->off))
 			func = (u8 *) __bpf_call_base + imm32;
 			jmp_offset = func - (image + addrs[i]);
 			if (seen_ld_abs) {
-				reload_skb_data = bpf_helper_changes_skb_data(func);
+				reload_skb_data = bpf_helper_changes_pkt_data(func);
 				if (reload_skb_data) {
 					EMIT1(0x57); /* push %rdi */
 					jmp_offset += 22; /* pop, mov, sub, mov */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 97338134398f..3c02de77ad6a 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -590,7 +590,7 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
 u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 
 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog);
-bool bpf_helper_changes_skb_data(void *func);
+bool bpf_helper_changes_pkt_data(void *func);
 
 struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
 				       const struct bpf_insn *patch, u32 len);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6123d9b8e828..0eb0e87dbe9f 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -424,6 +424,12 @@ union bpf_attr {
  *     @len: length of header to be pushed in front
  *     @flags: Flags (unused for now)
  *     Return: 0 on success or negative error
+ *
+ * int bpf_xdp_adjust_head(xdp_md, delta)
+ *     Adjust the xdp_md.data by delta
+ *     @xdp_md: pointer to xdp_md
+ *     @delta: An positive/negative integer to be added to xdp_md.data
+ *     Return: 0 on success or negative on error
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -469,7 +475,8 @@ union bpf_attr {
 	FN(csum_update),		\
 	FN(set_hash_invalid),		\
 	FN(get_numa_node_id),		\
-	FN(skb_change_head),
+	FN(skb_change_head),		\
+	FN(xdp_adjust_head),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -576,6 +583,8 @@ struct bpf_sock {
 	__u32 protocol;
 };
 
+#define XDP_PACKET_HEADROOM 256
+
 /* User return codes for XDP prog type.
  * A valid XDP program must return one of these defined values. All other
  * return codes are reserved for future use. Unknown return codes will result
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 82a04143368e..871e2f398cf5 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1078,7 +1078,7 @@ struct bpf_prog * __weak bpf_int_jit_compile(struct bpf_prog *prog)
 	return prog;
 }
 
-bool __weak bpf_helper_changes_skb_data(void *func)
+bool __weak bpf_helper_changes_pkt_data(void *func)
 {
 	return false;
 }
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8135cb1077ee..870b9d9ad11c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1216,7 +1216,7 @@ static int check_call(struct bpf_verifier_env *env, int func_id)
 		return -EINVAL;
 	}
 
-	changes_data = bpf_helper_changes_skb_data(fn->func);
+	changes_data = bpf_helper_changes_pkt_data(fn->func);
 
 	memset(&meta, 0, sizeof(meta));
 	meta.pkt_access = fn->pkt_access;
diff --git a/net/core/filter.c b/net/core/filter.c
index 56b43587d200..6902e2f73e38 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2234,7 +2234,34 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = {
 	.arg3_type	= ARG_ANYTHING,
 };
 
-bool bpf_helper_changes_skb_data(void *func)
+BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
+{
+	/* Both mlx4 and mlx5 driver align each packet to PAGE_SIZE when
+	 * XDP prog is set.
+	 * If the above is not true for the other drivers to support
+	 * bpf_xdp_adjust_head, struct xdp_buff can be extended.
+	 */
+	void *head = (void *)((unsigned long)xdp->data & PAGE_MASK);
+	void *new_data = xdp->data + offset;
+
+	if (new_data < head || new_data >= xdp->data_end)
+		/* The packet length must be >=1 */
+		return -EINVAL;
+
+	xdp->data = new_data;
+
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
+	.func		= bpf_xdp_adjust_head,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+};
+
+bool bpf_helper_changes_pkt_data(void *func)
 {
 	if (func == bpf_skb_vlan_push ||
 	    func == bpf_skb_vlan_pop ||
@@ -2244,7 +2271,8 @@ bool bpf_helper_changes_skb_data(void *func)
 	    func == bpf_skb_change_tail ||
 	    func == bpf_skb_pull_data ||
 	    func == bpf_l3_csum_replace ||
-	    func == bpf_l4_csum_replace)
+	    func == bpf_l4_csum_replace ||
+	    func == bpf_xdp_adjust_head)
 		return true;
 
 	return false;
@@ -2670,6 +2698,8 @@ xdp_func_proto(enum bpf_func_id func_id)
 		return &bpf_xdp_event_output_proto;
 	case BPF_FUNC_get_smp_processor_id:
 		return &bpf_get_smp_processor_id_proto;
+	case BPF_FUNC_xdp_adjust_head:
+		return &bpf_xdp_adjust_head_proto;
 	default:
 		return sk_filter_func_proto(func_id);
 	}
-- 
2.5.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH net-next 2/4] mlx4: xdp: Allow raising MTU up to one page minus eth and vlan hdrs
  2016-12-02 23:23 [PATCH net-next 0/4]: Allow head adjustment in XDP prog Martin KaFai Lau
  2016-12-02 23:23 ` [PATCH net-next 1/4] bpf: xdp: " Martin KaFai Lau
@ 2016-12-02 23:23 ` Martin KaFai Lau
  2016-12-03  0:07   ` Rick Jones
  2016-12-03  0:38   ` Eric Dumazet
  2016-12-02 23:23 ` [PATCH net-next 3/4] mlx4: xdp: Reserve headroom for receiving packet when XDP prog is active Martin KaFai Lau
                   ` (2 subsequent siblings)
  4 siblings, 2 replies; 21+ messages in thread
From: Martin KaFai Lau @ 2016-12-02 23:23 UTC (permalink / raw)
  To: netdev
  Cc: Alexei Starovoitov, Brenden Blanco, Daniel Borkmann,
	David Miller, Saeed Mahameed, Tariq Toukan, Kernel Team

When XDP prog is attached, it is currently limiting
MTU to be FRAG_SZ0 - ETH_HLEN - (2 * VLAN_HLEN) which is 1514
in x86.

AFAICT, since mlx4 is doing one page per packet for XDP,
we can at least raise the MTU limitation up to
PAGE_SIZE - ETH_HLEN - (2 * VLAN_HLEN) which this patch is
doing.  It will be useful in the next patch which allows
XDP program to extend the packet by adding new header(s).

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 28 +++++++++++-----
 drivers/net/ethernet/mellanox/mlx4/en_rx.c     | 46 ++++++++++++++------------
 2 files changed, 44 insertions(+), 30 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 091b904262bc..5df0bbd88d67 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -51,6 +51,8 @@
 #include "mlx4_en.h"
 #include "en_port.h"
 
+#define MLX4_EN_MAX_XDP_MTU ((int)(PAGE_SIZE - ETH_HLEN - (2 * VLAN_HLEN)))
+
 int mlx4_en_setup_tc(struct net_device *dev, u8 up)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
@@ -2262,6 +2264,19 @@ void mlx4_en_destroy_netdev(struct net_device *dev)
 		free_netdev(dev);
 }
 
+static bool mlx4_en_check_xdp_mtu(struct net_device *dev, int mtu)
+{
+	struct mlx4_en_priv *priv = netdev_priv(dev);
+
+	if (mtu > MLX4_EN_MAX_XDP_MTU) {
+		en_err(priv, "mtu:%d > max:%d when XDP prog is attached\n",
+		       mtu, MLX4_EN_MAX_XDP_MTU);
+		return false;
+	}
+
+	return true;
+}
+
 static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
@@ -2271,11 +2286,10 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu)
 	en_dbg(DRV, priv, "Change MTU called - current:%d new:%d\n",
 		 dev->mtu, new_mtu);
 
-	if (priv->tx_ring_num[TX_XDP] && MLX4_EN_EFF_MTU(new_mtu) > FRAG_SZ0) {
-		en_err(priv, "MTU size:%d requires frags but XDP running\n",
-		       new_mtu);
-		return -EOPNOTSUPP;
-	}
+	if (priv->tx_ring_num[TX_XDP] &&
+	    !mlx4_en_check_xdp_mtu(dev, new_mtu))
+		return -ENOTSUPP;
+
 	dev->mtu = new_mtu;
 
 	if (netif_running(dev)) {
@@ -2723,10 +2737,8 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog)
 		return 0;
 	}
 
-	if (priv->num_frags > 1) {
-		en_err(priv, "Cannot set XDP if MTU requires multiple frags\n");
+	if (!mlx4_en_check_xdp_mtu(dev, dev->mtu))
 		return -EOPNOTSUPP;
-	}
 
 	tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
 	if (!tmp)
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 6562f78b07f4..23e9d04d1ef4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -1164,37 +1164,39 @@ static const int frag_sizes[] = {
 
 void mlx4_en_calc_rx_buf(struct net_device *dev)
 {
-	enum dma_data_direction dma_dir = PCI_DMA_FROMDEVICE;
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	int eff_mtu = MLX4_EN_EFF_MTU(dev->mtu);
-	int order = MLX4_EN_ALLOC_PREFER_ORDER;
-	u32 align = SMP_CACHE_BYTES;
-	int buf_size = 0;
 	int i = 0;
 
 	/* bpf requires buffers to be set up as 1 packet per page.
 	 * This only works when num_frags == 1.
 	 */
 	if (priv->tx_ring_num[TX_XDP]) {
-		dma_dir = PCI_DMA_BIDIRECTIONAL;
-		/* This will gain efficient xdp frame recycling at the expense
-		 * of more costly truesize accounting
+		priv->frag_info[0].order = 0;
+		priv->frag_info[0].frag_size = eff_mtu;
+		priv->frag_info[0].frag_prefix_size = 0;
+		/* This will gain efficient xdp frame recycling at the
+		 * expense of more costly truesize accounting
 		 */
-		align = PAGE_SIZE;
-		order = 0;
-	}
-
-	while (buf_size < eff_mtu) {
-		priv->frag_info[i].order = order;
-		priv->frag_info[i].frag_size =
-			(eff_mtu > buf_size + frag_sizes[i]) ?
-				frag_sizes[i] : eff_mtu - buf_size;
-		priv->frag_info[i].frag_prefix_size = buf_size;
-		priv->frag_info[i].frag_stride =
-				ALIGN(priv->frag_info[i].frag_size, align);
-		priv->frag_info[i].dma_dir = dma_dir;
-		buf_size += priv->frag_info[i].frag_size;
-		i++;
+		priv->frag_info[0].frag_stride = PAGE_SIZE;
+		priv->frag_info[0].dma_dir = PCI_DMA_BIDIRECTIONAL;
+		i = 1;
+	} else {
+		int buf_size = 0;
+
+		while (buf_size < eff_mtu) {
+			priv->frag_info[i].order = MLX4_EN_ALLOC_PREFER_ORDER;
+			priv->frag_info[i].frag_size =
+				(eff_mtu > buf_size + frag_sizes[i]) ?
+					frag_sizes[i] : eff_mtu - buf_size;
+			priv->frag_info[i].frag_prefix_size = buf_size;
+			priv->frag_info[i].frag_stride =
+				ALIGN(priv->frag_info[i].frag_size,
+				      SMP_CACHE_BYTES);
+			priv->frag_info[i].dma_dir = PCI_DMA_FROMDEVICE;
+			buf_size += priv->frag_info[i].frag_size;
+			i++;
+		}
 	}
 
 	priv->num_frags = i;
-- 
2.5.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH net-next 3/4] mlx4: xdp: Reserve headroom for receiving packet when XDP prog is active
  2016-12-02 23:23 [PATCH net-next 0/4]: Allow head adjustment in XDP prog Martin KaFai Lau
  2016-12-02 23:23 ` [PATCH net-next 1/4] bpf: xdp: " Martin KaFai Lau
  2016-12-02 23:23 ` [PATCH net-next 2/4] mlx4: xdp: Allow raising MTU up to one page minus eth and vlan hdrs Martin KaFai Lau
@ 2016-12-02 23:23 ` Martin KaFai Lau
  2016-12-02 23:23 ` [PATCH net-next 4/4] bpf: xdp: Add XDP example for head adjustment Martin KaFai Lau
  2016-12-03 15:17 ` [PATCH net-next 0/4]: Allow head adjustment in XDP prog Jesper Dangaard Brouer
  4 siblings, 0 replies; 21+ messages in thread
From: Martin KaFai Lau @ 2016-12-02 23:23 UTC (permalink / raw)
  To: netdev
  Cc: Alexei Starovoitov, Brenden Blanco, Daniel Borkmann,
	David Miller, Saeed Mahameed, Tariq Toukan, Kernel Team

Reserve XDP_PACKET_HEADROOM when XDP prog is active.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 17 +++++++++++++++--
 drivers/net/ethernet/mellanox/mlx4/en_rx.c     | 23 +++++++++++++++++------
 drivers/net/ethernet/mellanox/mlx4/en_tx.c     |  9 +++++----
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h   |  3 ++-
 4 files changed, 39 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 5df0bbd88d67..fb6d87dbc350 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -51,7 +51,8 @@
 #include "mlx4_en.h"
 #include "en_port.h"
 
-#define MLX4_EN_MAX_XDP_MTU ((int)(PAGE_SIZE - ETH_HLEN - (2 * VLAN_HLEN)))
+#define MLX4_EN_MAX_XDP_MTU ((int)(PAGE_SIZE - ETH_HLEN - (2 * VLAN_HLEN) - \
+				   XDP_PACKET_HEADROOM))
 
 int mlx4_en_setup_tc(struct net_device *dev, u8 up)
 {
@@ -1551,6 +1552,7 @@ int mlx4_en_start_port(struct net_device *dev)
 	struct mlx4_en_tx_ring *tx_ring;
 	int rx_index = 0;
 	int err = 0;
+	int mtu;
 	int i, t;
 	int j;
 	u8 mc_list[16] = {0};
@@ -1684,8 +1686,12 @@ int mlx4_en_start_port(struct net_device *dev)
 	}
 
 	/* Configure port */
+	mtu = priv->rx_skb_size + ETH_FCS_LEN;
+	if (priv->tx_ring_num[TX_XDP])
+		mtu += XDP_PACKET_HEADROOM;
+
 	err = mlx4_SET_PORT_general(mdev->dev, priv->port,
-				    priv->rx_skb_size + ETH_FCS_LEN,
+				    mtu,
 				    priv->prof->tx_pause,
 				    priv->prof->tx_ppp,
 				    priv->prof->rx_pause,
@@ -2268,6 +2274,13 @@ static bool mlx4_en_check_xdp_mtu(struct net_device *dev, int mtu)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 
+	if (mtu + XDP_PACKET_HEADROOM > priv->max_mtu) {
+		en_err(priv,
+		       "Device max mtu:%d does not allow %d bytes reserved headroom for XDP prog\n",
+		       priv->max_mtu, XDP_PACKET_HEADROOM);
+		return false;
+	}
+
 	if (mtu > MLX4_EN_MAX_XDP_MTU) {
 		en_err(priv, "mtu:%d > max:%d when XDP prog is attached\n",
 		       mtu, MLX4_EN_MAX_XDP_MTU);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 23e9d04d1ef4..324771ac929e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -96,7 +96,6 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
 	struct mlx4_en_rx_alloc page_alloc[MLX4_EN_MAX_RX_FRAGS];
 	const struct mlx4_en_frag_info *frag_info;
 	struct page *page;
-	dma_addr_t dma;
 	int i;
 
 	for (i = 0; i < priv->num_frags; i++) {
@@ -115,9 +114,10 @@ static int mlx4_en_alloc_frags(struct mlx4_en_priv *priv,
 
 	for (i = 0; i < priv->num_frags; i++) {
 		frags[i] = ring_alloc[i];
-		dma = ring_alloc[i].dma + ring_alloc[i].page_offset;
+		frags[i].page_offset += priv->frag_info[i].rx_headroom;
+		rx_desc->data[i].addr = cpu_to_be64(frags[i].dma +
+						    frags[i].page_offset);
 		ring_alloc[i] = page_alloc[i];
-		rx_desc->data[i].addr = cpu_to_be64(dma);
 	}
 
 	return 0;
@@ -250,7 +250,8 @@ static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv,
 
 	if (ring->page_cache.index > 0) {
 		frags[0] = ring->page_cache.buf[--ring->page_cache.index];
-		rx_desc->data[0].addr = cpu_to_be64(frags[0].dma);
+		rx_desc->data[0].addr = cpu_to_be64(frags[0].dma +
+						    frags[0].page_offset);
 		return 0;
 	}
 
@@ -889,6 +890,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 		if (xdp_prog) {
 			struct xdp_buff xdp;
 			dma_addr_t dma;
+			void *pg_addr, *orig_data;
 			u32 act;
 
 			dma = be64_to_cpu(rx_desc->data[0].addr);
@@ -896,11 +898,18 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 						priv->frag_info[0].frag_size,
 						DMA_FROM_DEVICE);
 
-			xdp.data = page_address(frags[0].page) +
-							frags[0].page_offset;
+			pg_addr = page_address(frags[0].page);
+			orig_data = pg_addr + frags[0].page_offset;
+			xdp.data = orig_data;
 			xdp.data_end = xdp.data + length;
 
 			act = bpf_prog_run_xdp(xdp_prog, &xdp);
+
+			if (xdp.data != orig_data) {
+				length = xdp.data_end - xdp.data;
+				frags[0].page_offset = xdp.data - pg_addr;
+			}
+
 			switch (act) {
 			case XDP_PASS:
 				break;
@@ -1180,6 +1189,7 @@ void mlx4_en_calc_rx_buf(struct net_device *dev)
 		 */
 		priv->frag_info[0].frag_stride = PAGE_SIZE;
 		priv->frag_info[0].dma_dir = PCI_DMA_BIDIRECTIONAL;
+		priv->frag_info[0].rx_headroom = XDP_PACKET_HEADROOM;
 		i = 1;
 	} else {
 		int buf_size = 0;
@@ -1194,6 +1204,7 @@ void mlx4_en_calc_rx_buf(struct net_device *dev)
 				ALIGN(priv->frag_info[i].frag_size,
 				      SMP_CACHE_BYTES);
 			priv->frag_info[i].dma_dir = PCI_DMA_FROMDEVICE;
+			priv->frag_info[i].rx_headroom = 0;
 			buf_size += priv->frag_info[i].frag_size;
 			i++;
 		}
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 4b597dca5c52..9e5f38cefe5f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -354,7 +354,7 @@ u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv,
 	struct mlx4_en_rx_alloc frame = {
 		.page = tx_info->page,
 		.dma = tx_info->map0_dma,
-		.page_offset = 0,
+		.page_offset = XDP_PACKET_HEADROOM,
 		.page_size = PAGE_SIZE,
 	};
 
@@ -1132,7 +1132,7 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
 	tx_info->page = frame->page;
 	frame->page = NULL;
 	tx_info->map0_dma = dma;
-	tx_info->map0_byte_count = length;
+	tx_info->map0_byte_count = length + frame->page_offset;
 	tx_info->nr_txbb = nr_txbb;
 	tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN);
 	tx_info->data_offset = (void *)data - (void *)tx_desc;
@@ -1141,9 +1141,10 @@ netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_ring *rx_ring,
 	tx_info->linear = 1;
 	tx_info->inl = 0;
 
-	dma_sync_single_for_device(priv->ddev, dma, length, PCI_DMA_TODEVICE);
+	dma_sync_single_range_for_device(priv->ddev, dma, frame->page_offset,
+					 length, PCI_DMA_TODEVICE);
 
-	data->addr = cpu_to_be64(dma);
+	data->addr = cpu_to_be64(dma + frame->page_offset);
 	data->lkey = ring->mr_key;
 	dma_wmb();
 	data->byte_count = cpu_to_be32(length);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 20a936428f4a..ba1c6cd0cc79 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -475,7 +475,8 @@ struct mlx4_en_frag_info {
 	u16 frag_prefix_size;
 	u32 frag_stride;
 	enum dma_data_direction dma_dir;
-	int order;
+	u16 order;
+	u16 rx_headroom;
 };
 
 #ifdef CONFIG_MLX4_EN_DCB
-- 
2.5.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH net-next 4/4] bpf: xdp: Add XDP example for head adjustment
  2016-12-02 23:23 [PATCH net-next 0/4]: Allow head adjustment in XDP prog Martin KaFai Lau
                   ` (2 preceding siblings ...)
  2016-12-02 23:23 ` [PATCH net-next 3/4] mlx4: xdp: Reserve headroom for receiving packet when XDP prog is active Martin KaFai Lau
@ 2016-12-02 23:23 ` Martin KaFai Lau
  2016-12-03 15:17 ` [PATCH net-next 0/4]: Allow head adjustment in XDP prog Jesper Dangaard Brouer
  4 siblings, 0 replies; 21+ messages in thread
From: Martin KaFai Lau @ 2016-12-02 23:23 UTC (permalink / raw)
  To: netdev
  Cc: Alexei Starovoitov, Brenden Blanco, Daniel Borkmann,
	David Miller, Saeed Mahameed, Tariq Toukan, Kernel Team

The XDP prog checks if the incoming packet matches any VIP:PORT
combination in the BPF hashmap.  If it is, it will encapsulate
the packet with a IPv4/v6 header as instructed by the value of
the BPF hashmap and then XDP_TX it out.

The VIP:PORT -> IP-Encap-Info can be specified by the cmd args
of the user prog.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
 samples/bpf/Makefile              |   4 +
 samples/bpf/bpf_helpers.h         |   2 +
 samples/bpf/bpf_load.c            |  94 ++++++++++++++
 samples/bpf/bpf_load.h            |   1 +
 samples/bpf/xdp1_user.c           |  93 --------------
 samples/bpf/xdp_tx_iptnl_common.h |  37 ++++++
 samples/bpf/xdp_tx_iptnl_kern.c   | 232 ++++++++++++++++++++++++++++++++++
 samples/bpf/xdp_tx_iptnl_user.c   | 253 ++++++++++++++++++++++++++++++++++++++
 8 files changed, 623 insertions(+), 93 deletions(-)
 create mode 100644 samples/bpf/xdp_tx_iptnl_common.h
 create mode 100644 samples/bpf/xdp_tx_iptnl_kern.c
 create mode 100644 samples/bpf/xdp_tx_iptnl_user.c

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index bfc2cb88a1f7..e4d6be8bd94b 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -32,6 +32,7 @@ hostprogs-y += trace_event
 hostprogs-y += sampleip
 hostprogs-y += tc_l2_redirect
 hostprogs-y += lwt_len_hist
+hostprogs-y += xdp_tx_iptnl
 
 test_lru_dist-objs := test_lru_dist.o libbpf.o
 sock_example-objs := sock_example.o libbpf.o
@@ -65,6 +66,7 @@ trace_event-objs := bpf_load.o libbpf.o trace_event_user.o
 sampleip-objs := bpf_load.o libbpf.o sampleip_user.o
 tc_l2_redirect-objs := bpf_load.o libbpf.o tc_l2_redirect_user.o
 lwt_len_hist-objs := bpf_load.o libbpf.o lwt_len_hist_user.o
+xdp_tx_iptnl-objs := bpf_load.o libbpf.o xdp_tx_iptnl_user.o
 
 # Tell kbuild to always build the programs
 always := $(hostprogs-y)
@@ -97,6 +99,7 @@ always += test_current_task_under_cgroup_kern.o
 always += trace_event_kern.o
 always += sampleip_kern.o
 always += lwt_len_hist_kern.o
+always += xdp_tx_iptnl_kern.o
 
 HOSTCFLAGS += -I$(objtree)/usr/include
 HOSTCFLAGS += -I$(srctree)/tools/testing/selftests/bpf/
@@ -127,6 +130,7 @@ HOSTLOADLIBES_trace_event += -lelf
 HOSTLOADLIBES_sampleip += -lelf
 HOSTLOADLIBES_tc_l2_redirect += -l elf
 HOSTLOADLIBES_lwt_len_hist += -l elf
+HOSTLOADLIBES_xdp_tx_iptnl += -lelf
 
 # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
 #  make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
diff --git a/samples/bpf/bpf_helpers.h b/samples/bpf/bpf_helpers.h
index a246c6122629..8e9dca50b73a 100644
--- a/samples/bpf/bpf_helpers.h
+++ b/samples/bpf/bpf_helpers.h
@@ -57,6 +57,8 @@ static int (*bpf_skb_set_tunnel_opt)(void *ctx, void *md, int size) =
 	(void *) BPF_FUNC_skb_set_tunnel_opt;
 static unsigned long long (*bpf_get_prandom_u32)(void) =
 	(void *) BPF_FUNC_get_prandom_u32;
+static int (*bpf_xdp_adjust_head)(void *ctx, int offset) =
+	(void *) BPF_FUNC_xdp_adjust_head;
 
 /* llvm builtin functions that eBPF C program may use to
  * emit BPF_LD_ABS and BPF_LD_IND instructions
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 49b45ccbe153..e30b6de94f2e 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -12,6 +12,10 @@
 #include <linux/bpf.h>
 #include <linux/filter.h>
 #include <linux/perf_event.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <sys/types.h>
+#include <sys/socket.h>
 #include <sys/syscall.h>
 #include <sys/ioctl.h>
 #include <sys/mman.h>
@@ -450,3 +454,93 @@ struct ksym *ksym_search(long key)
 	/* out of range. return _stext */
 	return &syms[0];
 }
+
+int set_link_xdp_fd(int ifindex, int fd)
+{
+	struct sockaddr_nl sa;
+	int sock, seq = 0, len, ret = -1;
+	char buf[4096];
+	struct nlattr *nla, *nla_xdp;
+	struct {
+		struct nlmsghdr  nh;
+		struct ifinfomsg ifinfo;
+		char             attrbuf[64];
+	} req;
+	struct nlmsghdr *nh;
+	struct nlmsgerr *err;
+
+	memset(&sa, 0, sizeof(sa));
+	sa.nl_family = AF_NETLINK;
+
+	sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+	if (sock < 0) {
+		printf("open netlink socket: %s\n", strerror(errno));
+		return -1;
+	}
+
+	if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
+		printf("bind to netlink: %s\n", strerror(errno));
+		goto cleanup;
+	}
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+	req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+	req.nh.nlmsg_type = RTM_SETLINK;
+	req.nh.nlmsg_pid = 0;
+	req.nh.nlmsg_seq = ++seq;
+	req.ifinfo.ifi_family = AF_UNSPEC;
+	req.ifinfo.ifi_index = ifindex;
+	nla = (struct nlattr *)(((char *)&req)
+				+ NLMSG_ALIGN(req.nh.nlmsg_len));
+	nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
+
+	nla_xdp = (struct nlattr *)((char *)nla + NLA_HDRLEN);
+	nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
+	nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
+	memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
+	nla->nla_len = NLA_HDRLEN + nla_xdp->nla_len;
+
+	req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
+
+	if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		printf("send to netlink: %s\n", strerror(errno));
+		goto cleanup;
+	}
+
+	len = recv(sock, buf, sizeof(buf), 0);
+	if (len < 0) {
+		printf("recv from netlink: %s\n", strerror(errno));
+		goto cleanup;
+	}
+
+	for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
+	     nh = NLMSG_NEXT(nh, len)) {
+		if (nh->nlmsg_pid != getpid()) {
+			printf("Wrong pid %d, expected %d\n",
+			       nh->nlmsg_pid, getpid());
+			goto cleanup;
+		}
+		if (nh->nlmsg_seq != seq) {
+			printf("Wrong seq %d, expected %d\n",
+			       nh->nlmsg_seq, seq);
+			goto cleanup;
+		}
+		switch (nh->nlmsg_type) {
+		case NLMSG_ERROR:
+			err = (struct nlmsgerr *)NLMSG_DATA(nh);
+			if (!err->error)
+				continue;
+			printf("nlmsg error %s\n", strerror(-err->error));
+			goto cleanup;
+		case NLMSG_DONE:
+			break;
+		}
+	}
+
+	ret = 0;
+
+cleanup:
+	close(sock);
+	return ret;
+}
diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h
index 4adeeef53ad6..fb46a421ab41 100644
--- a/samples/bpf/bpf_load.h
+++ b/samples/bpf/bpf_load.h
@@ -31,4 +31,5 @@ struct ksym {
 
 int load_kallsyms(void);
 struct ksym *ksym_search(long key);
+int set_link_xdp_fd(int ifindex, int fd);
 #endif
diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c
index 2b2150d6d6f7..5f040a0d7712 100644
--- a/samples/bpf/xdp1_user.c
+++ b/samples/bpf/xdp1_user.c
@@ -5,111 +5,18 @@
  * License as published by the Free Software Foundation.
  */
 #include <linux/bpf.h>
-#include <linux/netlink.h>
-#include <linux/rtnetlink.h>
 #include <assert.h>
 #include <errno.h>
 #include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <sys/socket.h>
 #include <unistd.h>
 
 #include "bpf_load.h"
 #include "bpf_util.h"
 #include "libbpf.h"
 
-static int set_link_xdp_fd(int ifindex, int fd)
-{
-	struct sockaddr_nl sa;
-	int sock, seq = 0, len, ret = -1;
-	char buf[4096];
-	struct nlattr *nla, *nla_xdp;
-	struct {
-		struct nlmsghdr  nh;
-		struct ifinfomsg ifinfo;
-		char             attrbuf[64];
-	} req;
-	struct nlmsghdr *nh;
-	struct nlmsgerr *err;
-
-	memset(&sa, 0, sizeof(sa));
-	sa.nl_family = AF_NETLINK;
-
-	sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
-	if (sock < 0) {
-		printf("open netlink socket: %s\n", strerror(errno));
-		return -1;
-	}
-
-	if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) {
-		printf("bind to netlink: %s\n", strerror(errno));
-		goto cleanup;
-	}
-
-	memset(&req, 0, sizeof(req));
-	req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
-	req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
-	req.nh.nlmsg_type = RTM_SETLINK;
-	req.nh.nlmsg_pid = 0;
-	req.nh.nlmsg_seq = ++seq;
-	req.ifinfo.ifi_family = AF_UNSPEC;
-	req.ifinfo.ifi_index = ifindex;
-	nla = (struct nlattr *)(((char *)&req)
-				+ NLMSG_ALIGN(req.nh.nlmsg_len));
-	nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/;
-
-	nla_xdp = (struct nlattr *)((char *)nla + NLA_HDRLEN);
-	nla_xdp->nla_type = 1/*IFLA_XDP_FD*/;
-	nla_xdp->nla_len = NLA_HDRLEN + sizeof(int);
-	memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd));
-	nla->nla_len = NLA_HDRLEN + nla_xdp->nla_len;
-
-	req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len);
-
-	if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
-		printf("send to netlink: %s\n", strerror(errno));
-		goto cleanup;
-	}
-
-	len = recv(sock, buf, sizeof(buf), 0);
-	if (len < 0) {
-		printf("recv from netlink: %s\n", strerror(errno));
-		goto cleanup;
-	}
-
-	for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
-	     nh = NLMSG_NEXT(nh, len)) {
-		if (nh->nlmsg_pid != getpid()) {
-			printf("Wrong pid %d, expected %d\n",
-			       nh->nlmsg_pid, getpid());
-			goto cleanup;
-		}
-		if (nh->nlmsg_seq != seq) {
-			printf("Wrong seq %d, expected %d\n",
-			       nh->nlmsg_seq, seq);
-			goto cleanup;
-		}
-		switch (nh->nlmsg_type) {
-		case NLMSG_ERROR:
-			err = (struct nlmsgerr *)NLMSG_DATA(nh);
-			if (!err->error)
-				continue;
-			printf("nlmsg error %s\n", strerror(-err->error));
-			goto cleanup;
-		case NLMSG_DONE:
-			break;
-		}
-	}
-
-	ret = 0;
-
-cleanup:
-	close(sock);
-	return ret;
-}
-
 static int ifindex;
 
 static void int_exit(int sig)
diff --git a/samples/bpf/xdp_tx_iptnl_common.h b/samples/bpf/xdp_tx_iptnl_common.h
new file mode 100644
index 000000000000..dd12cc35110f
--- /dev/null
+++ b/samples/bpf/xdp_tx_iptnl_common.h
@@ -0,0 +1,37 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _SAMPLES_BPF_XDP_TX_IPTNL_COMMON_H
+#define _SAMPLES_BPF_XDP_TX_IPTNL_COMMON_H
+
+#include <linux/types.h>
+
+#define MAX_IPTNL_ENTRIES 256U
+
+struct vip {
+	union {
+		__u32 v6[4];
+		__u32 v4;
+	} daddr;
+	__u16 dport;
+	__u16 family;
+	__u8 protocol;
+};
+
+struct iptnl_info {
+	union {
+		__u32 v6[4];
+		__u32 v4;
+	} saddr;
+	union {
+		__u32 v6[4];
+		__u32 v4;
+	} daddr;
+	__u16 family;
+	__u8 dmac[6];
+};
+
+#endif
diff --git a/samples/bpf/xdp_tx_iptnl_kern.c b/samples/bpf/xdp_tx_iptnl_kern.c
new file mode 100644
index 000000000000..a3c583f17f09
--- /dev/null
+++ b/samples/bpf/xdp_tx_iptnl_kern.c
@@ -0,0 +1,232 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <uapi/linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include "bpf_helpers.h"
+#include "xdp_tx_iptnl_common.h"
+
+struct bpf_map_def SEC("maps") rxcnt = {
+	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
+	.key_size = sizeof(__u32),
+	.value_size = sizeof(__u64),
+	.max_entries = 256,
+};
+
+struct bpf_map_def SEC("maps") vip2tnl = {
+	.type = BPF_MAP_TYPE_HASH,
+	.key_size = sizeof(struct vip),
+	.value_size = sizeof(struct iptnl_info),
+	.max_entries = MAX_IPTNL_ENTRIES,
+};
+
+static __always_inline void count_tx(u32 protocol)
+{
+	u64 *rxcnt_count;
+
+	rxcnt_count = bpf_map_lookup_elem(&rxcnt, &protocol);
+	if (rxcnt_count)
+		*rxcnt_count += 1;
+}
+
+static __always_inline int get_dport(void *trans_data, void *data_end,
+				     u8 protocol)
+{
+	struct tcphdr *th;
+	struct udphdr *uh;
+
+	switch (protocol) {
+	case IPPROTO_TCP:
+		th = (struct tcphdr *)trans_data;
+		if (th + 1 > data_end)
+			return -1;
+		return th->dest;
+	case IPPROTO_UDP:
+		uh = (struct udphdr *)trans_data;
+		if (uh + 1 > data_end)
+			return -1;
+		return uh->dest;
+	default:
+		return 0;
+	}
+}
+
+static __always_inline void set_ethhdr(struct ethhdr *new_eth,
+				       const struct ethhdr *old_eth,
+				       const struct iptnl_info *tnl,
+				       __be16 h_proto)
+{
+	memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source));
+	memcpy(new_eth->h_dest, tnl->dmac, sizeof(new_eth->h_dest));
+	new_eth->h_proto = h_proto;
+}
+
+static __always_inline int handle_ipv4(struct xdp_md *xdp)
+{
+	void *data_end = (void *)(long)xdp->data_end;
+	void *data = (void *)(long)xdp->data;
+	struct iptnl_info *tnl;
+	struct ethhdr *new_eth;
+	struct ethhdr *old_eth;
+	struct iphdr *iph = data + sizeof(struct ethhdr);
+	u16 *next_iph_u16;
+	u16 payload_len;
+	struct vip vip = {};
+	int dport;
+	u32 csum = 0;
+	int i;
+
+	if (iph + 1 > data_end)
+		return XDP_DROP;
+
+	dport = get_dport(iph + 1, data_end, iph->protocol);
+	if (dport == -1)
+		return XDP_DROP;
+
+	vip.protocol = iph->protocol;
+	vip.family = AF_INET;
+	vip.daddr.v4 = iph->daddr;
+	vip.dport = dport;
+	payload_len = ntohs(iph->tot_len);
+
+	tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
+	/* It only does v4-in-v4 */
+	if (!tnl || tnl->family != AF_INET)
+		return XDP_PASS;
+
+	/* The vip key is found.  Add an IP header and send it out */
+
+	if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
+		return XDP_DROP;
+
+	data = (void *)(long)xdp->data;
+	data_end = (void *)(long)xdp->data_end;
+
+	new_eth = data;
+	iph = data + sizeof(*new_eth);
+	old_eth = data + sizeof(*iph);
+
+	if (new_eth + 1 > data_end ||
+	    old_eth + 1 > data_end ||
+	    iph + 1 > data_end )
+		return XDP_DROP;
+
+	set_ethhdr(new_eth, old_eth, tnl, htons(ETH_P_IP));
+
+	iph->version = 4;
+	iph->ihl = sizeof(*iph) >> 2;
+	iph->frag_off =	0;
+	iph->protocol = IPPROTO_IPIP;
+	iph->check = 0;
+	iph->tos = 0;
+	iph->tot_len = htons(payload_len + sizeof(*iph));
+	iph->daddr = tnl->daddr.v4;
+	iph->saddr = tnl->saddr.v4;
+	iph->ttl = 8;
+
+	next_iph_u16 = (u16 *)iph;
+#pragma clang loop unroll(full)
+	for (i = 0; i < sizeof(*iph) >> 1; i++)
+		csum += *next_iph_u16++;
+
+	iph->check = ~((csum & 0xffff) + (csum >> 16));
+
+	count_tx(vip.protocol);
+
+	return XDP_TX;
+}
+
+static __always_inline int handle_ipv6(struct xdp_md *xdp)
+{
+	void *data_end = (void *)(long)xdp->data_end;
+	void *data = (void *)(long)xdp->data;
+	struct iptnl_info *tnl;
+	struct ethhdr *new_eth;
+	struct ethhdr *old_eth;
+	struct ipv6hdr *ip6h = data + sizeof(struct ethhdr);
+	__u16 payload_len;
+	struct vip vip = {};
+	int dport;
+
+	if (ip6h + 1 > data_end)
+		return XDP_DROP;
+
+	dport = get_dport(ip6h + 1, data_end, ip6h->nexthdr);
+	if (dport == -1)
+		return XDP_DROP;
+
+	vip.protocol = ip6h->nexthdr;
+	vip.family = AF_INET6;
+	memcpy(vip.daddr.v6, ip6h->daddr.s6_addr32, sizeof(vip.daddr));
+	vip.dport = dport;
+	payload_len = ip6h->payload_len;
+
+	tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
+	/* It only does v6-in-v6 */
+	if (!tnl || tnl->family != AF_INET6)
+		return XDP_PASS;
+
+	/* The vip key is found.  Add an IP header and send it out */
+
+	if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
+		return XDP_DROP;
+
+	data = (void *)(long)xdp->data;
+	data_end = (void *)(long)xdp->data_end;
+
+	new_eth = data;
+	ip6h = data + sizeof(*new_eth);
+	old_eth = data + sizeof(*ip6h);
+
+	if (new_eth + 1 > data_end ||
+	    old_eth + 1 > data_end ||
+	    ip6h + 1 > data_end )
+		return XDP_DROP;
+
+	set_ethhdr(new_eth, old_eth, tnl, htons(ETH_P_IPV6));
+
+	ip6h->version = 6;
+	ip6h->priority = 0;
+	memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
+	ip6h->payload_len = htons(ntohs(payload_len) + sizeof(*ip6h));
+	ip6h->nexthdr = IPPROTO_IPV6;
+	ip6h->hop_limit = 8;
+	memcpy(ip6h->saddr.s6_addr32, tnl->saddr.v6, sizeof(tnl->saddr.v6));
+	memcpy(ip6h->daddr.s6_addr32, tnl->daddr.v6, sizeof(tnl->daddr.v6));
+
+	count_tx(vip.protocol);
+
+	return XDP_TX;
+}
+
+SEC("xdp_tx_iptnl")
+int _xdp_tx_iptnl(struct xdp_md *xdp)
+{
+	void *data_end = (void *)(long)xdp->data_end;
+	void *data = (void *)(long)xdp->data;
+	struct ethhdr *eth = data;
+	__u16 h_proto;
+
+	if (eth + 1 > data_end)
+		return XDP_DROP;
+
+	h_proto = eth->h_proto;
+
+	if (h_proto == htons(ETH_P_IP))
+		return handle_ipv4(xdp);
+	else if (h_proto == htons(ETH_P_IPV6))
+
+		return handle_ipv6(xdp);
+	else
+		return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/samples/bpf/xdp_tx_iptnl_user.c b/samples/bpf/xdp_tx_iptnl_user.c
new file mode 100644
index 000000000000..9aeef7579af4
--- /dev/null
+++ b/samples/bpf/xdp_tx_iptnl_user.c
@@ -0,0 +1,253 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/bpf.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <arpa/inet.h>
+#include <netinet/ether.h>
+#include <unistd.h>
+#include <time.h>
+#include "bpf_load.h"
+#include "libbpf.h"
+#include "bpf_util.h"
+#include "xdp_tx_iptnl_common.h"
+
+#define STATS_INTERVAL_S 2U
+
+static int ifindex = -1;
+
+static void int_exit(int sig)
+{
+	if (ifindex > -1)
+		set_link_xdp_fd(ifindex, -1);
+	exit(0);
+}
+
+/* simple per-protocol drop counter
+ */
+static void poll_stats(unsigned int kill_after_s)
+{
+	const unsigned int nr_protos = 256;
+	unsigned int nr_cpus = bpf_num_possible_cpus();
+	time_t started_at = time(NULL);
+	__u64 values[nr_cpus], prev[nr_protos][nr_cpus];
+	__u32 proto;
+	int i;
+
+	memset(prev, 0, sizeof(prev));
+
+	while (!kill_after_s || time(NULL) - started_at <= kill_after_s) {
+		sleep(STATS_INTERVAL_S);
+
+		for (proto = 0; proto < nr_protos; proto++) {
+			__u64 sum = 0;
+
+			assert(bpf_lookup_elem(map_fd[0], &proto, values) == 0);
+			for (i = 0; i < nr_cpus; i++)
+				sum += (values[i] - prev[proto][i]);
+
+			if (sum)
+				printf("proto %u: sum:%10llu pkts, rate:%10llu pkts/s\n",
+				       proto, sum, sum / STATS_INTERVAL_S);
+			memcpy(prev[proto], values, sizeof(values));
+		}
+	}
+}
+
+static void usage(const char *cmd)
+{
+	printf("Usage: %s [...]\n", cmd);
+	printf("    -i <ifindex> Interface Index\n");
+	printf("    -a <vip-service-address> IPv4 or IPv6\n");
+	printf("    -p <vip-service-port> A port range (e.g. 433-444) is also allowed\n");
+	printf("    -s <source-ip> Used in the IPTunnel Header\n");
+	printf("    -d <dest-ip> Used in the IPTunnel header>\n");
+	printf("    -m <dest-MAC> Used in sending the IP Tunneled pkt>\n");
+	printf("    -T <stop-after-X-seconds> Default: 0 (forever)\n");
+	printf("    -P <IP-Protocol> Default is TCP\n");
+	printf("    -h Display this help\n");
+}
+
+static int parse_ipstr(const char *ipstr, unsigned int *addr)
+{
+	if (inet_pton(AF_INET6, ipstr, addr) == 1) {
+		return AF_INET6;
+	} else if (inet_pton(AF_INET, ipstr, addr) == 1) {
+		addr[1] = addr[2] = addr[3] = 0;
+		return AF_INET;
+	}
+
+	fprintf(stderr, "%s is an invalid IP\n", ipstr);
+	return AF_UNSPEC;
+}
+
+static int parse_ports(const char *port_str, int *min_port, int *max_port)
+{
+	char *end;
+	long tmp_min_port;
+	long tmp_max_port;
+
+	tmp_min_port = strtol(optarg, &end, 10);
+	if (tmp_min_port < 1 || tmp_min_port > 65535) {
+		fprintf(stderr, "Invalid port(s):%s\n", optarg);
+		return 1;
+	}
+
+	if (*end == '-') {
+		end++;
+		tmp_max_port = strtol(end, NULL, 10);
+		if (tmp_max_port < 1 || tmp_max_port > 65535) {
+			fprintf(stderr, "Invalid port(s):%s\n", optarg);
+			return 1;
+		}
+	} else {
+		tmp_max_port = tmp_min_port;
+	}
+
+	if (tmp_min_port > tmp_max_port) {
+		fprintf(stderr, "Invalid port(s):%s\n", optarg);
+		return 1;
+	}
+
+	if (tmp_max_port - tmp_min_port + 1 > MAX_IPTNL_ENTRIES) {
+		fprintf(stderr, "Port range (%s) is larger than %u\n",
+			port_str, MAX_IPTNL_ENTRIES);
+		return 1;
+	}
+	*min_port = tmp_min_port;
+	*max_port = tmp_max_port;
+
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	unsigned char opt_flags[256] = {};
+	unsigned int kill_after_s = 0;
+	const char *optstr = "i:a:p:s:d:m:T:P:";
+	int min_port = 0, max_port = 0;
+	struct iptnl_info tnl = {};
+	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+	struct vip vip = {};
+	char filename[256];
+	int opt;
+	int i;
+
+	if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+		perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)");
+		return 1;
+	}
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+
+	if (load_bpf_file(filename)) {
+		printf("%s", bpf_log_buf);
+		return 1;
+	}
+
+	if (!prog_fd[0]) {
+		printf("load_bpf_file: %s\n", strerror(errno));
+		return 1;
+	}
+
+	tnl.family = AF_UNSPEC;
+	vip.protocol = IPPROTO_TCP;
+
+	for (i = 0; i < strlen(optstr); i++)
+		if ('a' <= optstr[i] && optstr[i] <= 'z')
+			opt_flags[(unsigned char)optstr[i]] = 1;
+
+	while ((opt = getopt(argc, argv, optstr)) != -1) {
+		unsigned short family;
+		unsigned int *v6;
+
+		switch (opt) {
+		case 'i':
+			ifindex = atoi(optarg);
+			break;
+		case 'a':
+			vip.family = parse_ipstr(optarg, vip.daddr.v6);
+			if (vip.family == AF_UNSPEC)
+				return 1;
+			break;
+		case 'p':
+			if (parse_ports(optarg, &min_port, &max_port))
+				return 1;
+			break;
+		case 'P':
+			vip.protocol = atoi(optarg);
+			break;
+		case 's':
+		case 'd':
+			if (opt == 's')
+				v6 = tnl.saddr.v6;
+			else
+				v6 = tnl.daddr.v6;
+
+			family = parse_ipstr(optarg, v6);
+			if (family == AF_UNSPEC)
+				return 1;
+			if (tnl.family == AF_UNSPEC) {
+				tnl.family = family;
+			} else if (tnl.family != family) {
+				fprintf(stderr,
+					"The IP version of the src and dst addresses used in the IP encapsulation does not match\n");
+				return 1;
+			}
+			break;
+		case 'm':
+			if (!ether_aton_r(optarg,
+					  (struct ether_addr *)tnl.dmac)) {
+				fprintf(stderr, "Invalid mac address:%s\n",
+					optarg);
+				return 1;
+			}
+			break;
+		case 'T':
+			kill_after_s = atoi(optarg);
+			break;
+		default:
+			usage(argv[0]);
+			return 1;
+		}
+		opt_flags[opt] = 0;
+	}
+
+	for (i = 0; i < strlen(optstr); i++) {
+		if (opt_flags[(unsigned int)optstr[i]]) {
+			fprintf(stderr, "Missing argument -%c\n", optstr[i]);
+			usage(argv[0]);
+			return 1;
+		}
+	}
+
+	signal(SIGINT, int_exit);
+
+	while (min_port <= max_port) {
+		vip.dport = htons(min_port++);
+		if (bpf_update_elem(map_fd[1], &vip, &tnl, BPF_NOEXIST)) {
+			perror("bpf_update_elem(&vip2tnl)");
+			return 1;
+		}
+	}
+
+	if (set_link_xdp_fd(ifindex, prog_fd[0]) < 0) {
+		printf("link set xdp fd failed\n");
+		return 1;
+	}
+
+	poll_stats(kill_after_s);
+
+	set_link_xdp_fd(ifindex, -1);
+
+	return 0;
+}
-- 
2.5.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next 2/4] mlx4: xdp: Allow raising MTU up to one page minus eth and vlan hdrs
  2016-12-02 23:23 ` [PATCH net-next 2/4] mlx4: xdp: Allow raising MTU up to one page minus eth and vlan hdrs Martin KaFai Lau
@ 2016-12-03  0:07   ` Rick Jones
  2016-12-04  3:19     ` Martin KaFai Lau
  2016-12-03  0:38   ` Eric Dumazet
  1 sibling, 1 reply; 21+ messages in thread
From: Rick Jones @ 2016-12-03  0:07 UTC (permalink / raw)
  To: Martin KaFai Lau, netdev
  Cc: Alexei Starovoitov, Brenden Blanco, Daniel Borkmann,
	David Miller, Saeed Mahameed, Tariq Toukan, Kernel Team

On 12/02/2016 03:23 PM, Martin KaFai Lau wrote:
> When XDP prog is attached, it is currently limiting
> MTU to be FRAG_SZ0 - ETH_HLEN - (2 * VLAN_HLEN) which is 1514
> in x86.
>
> AFAICT, since mlx4 is doing one page per packet for XDP,
> we can at least raise the MTU limitation up to
> PAGE_SIZE - ETH_HLEN - (2 * VLAN_HLEN) which this patch is
> doing.  It will be useful in the next patch which allows
> XDP program to extend the packet by adding new header(s).

Is mlx4 the only driver doing page-per-packet?

rick jones

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next 1/4] bpf: xdp: Allow head adjustment in XDP prog
  2016-12-02 23:23 ` [PATCH net-next 1/4] bpf: xdp: " Martin KaFai Lau
@ 2016-12-03  0:22   ` Daniel Borkmann
  2016-12-03  3:43     ` Martin KaFai Lau
  2016-12-03 15:24   ` Jesper Dangaard Brouer
  1 sibling, 1 reply; 21+ messages in thread
From: Daniel Borkmann @ 2016-12-03  0:22 UTC (permalink / raw)
  To: Martin KaFai Lau, netdev
  Cc: Alexei Starovoitov, Brenden Blanco, David Miller, Saeed Mahameed,
	Tariq Toukan, Kernel Team

On 12/03/2016 12:23 AM, Martin KaFai Lau wrote:
> This patch allows XDP prog to extend/remove the packet
> data at the head (like adding or removing header).  It is
> done by adding a new XDP helper bpf_xdp_adjust_head().
>
> It also renames bpf_helper_changes_skb_data() to
> bpf_helper_changes_pkt_data() to better reflect
> that XDP prog does not work on skb.
>
> Signed-off-by: Martin KaFai Lau <kafai@fb.com>
[...]
> diff --git a/net/core/filter.c b/net/core/filter.c
> index 56b43587d200..6902e2f73e38 100644
> --- a/net/core/filter.c
> +++ b/net/core/filter.c
> @@ -2234,7 +2234,34 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = {
>   	.arg3_type	= ARG_ANYTHING,
>   };
>
> -bool bpf_helper_changes_skb_data(void *func)
> +BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
> +{
> +	/* Both mlx4 and mlx5 driver align each packet to PAGE_SIZE when
> +	 * XDP prog is set.
> +	 * If the above is not true for the other drivers to support
> +	 * bpf_xdp_adjust_head, struct xdp_buff can be extended.
> +	 */
> +	void *head = (void *)((unsigned long)xdp->data & PAGE_MASK);
> +	void *new_data = xdp->data + offset;
> +
> +	if (new_data < head || new_data >= xdp->data_end)
> +		/* The packet length must be >=1 */

Patch looks generally good to me. Should the min pkt len here be
limited to ETH_HLEN instead of 1?

> +		return -EINVAL;
> +
> +	xdp->data = new_data;
> +
> +	return 0;
> +}
> +
> +static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
> +	.func		= bpf_xdp_adjust_head,
> +	.gpl_only	= false,
> +	.ret_type	= RET_INTEGER,
> +	.arg1_type	= ARG_PTR_TO_CTX,
> +	.arg2_type	= ARG_ANYTHING,
> +};
> +
> +bool bpf_helper_changes_pkt_data(void *func)
>   {
>   	if (func == bpf_skb_vlan_push ||
>   	    func == bpf_skb_vlan_pop ||
[...]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next 2/4] mlx4: xdp: Allow raising MTU up to one page minus eth and vlan hdrs
  2016-12-02 23:23 ` [PATCH net-next 2/4] mlx4: xdp: Allow raising MTU up to one page minus eth and vlan hdrs Martin KaFai Lau
  2016-12-03  0:07   ` Rick Jones
@ 2016-12-03  0:38   ` Eric Dumazet
  2016-12-03  0:53     ` Alexei Starovoitov
  1 sibling, 1 reply; 21+ messages in thread
From: Eric Dumazet @ 2016-12-03  0:38 UTC (permalink / raw)
  To: Martin KaFai Lau
  Cc: netdev, Alexei Starovoitov, Brenden Blanco, Daniel Borkmann,
	David Miller, Saeed Mahameed, Tariq Toukan, Kernel Team

On Fri, 2016-12-02 at 15:23 -0800, Martin KaFai Lau wrote:
> When XDP prog is attached, it is currently limiting
> MTU to be FRAG_SZ0 - ETH_HLEN - (2 * VLAN_HLEN) which is 1514
> in x86.
> 
> AFAICT, since mlx4 is doing one page per packet for XDP,
> we can at least raise the MTU limitation up to
> PAGE_SIZE - ETH_HLEN - (2 * VLAN_HLEN) which this patch is
> doing.  It will be useful in the next patch which allows
> XDP program to extend the packet by adding new header(s).
> 
> Signed-off-by: Martin KaFai Lau <kafai@fb.com>
> ---

Have you tested your patch on a host with PAGE_SIZE = 64 KB ?

Looks XDP really kills arches with bigger pages :(

Thanks.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next 2/4] mlx4: xdp: Allow raising MTU up to one page minus eth and vlan hdrs
  2016-12-03  0:38   ` Eric Dumazet
@ 2016-12-03  0:53     ` Alexei Starovoitov
  2016-12-03  2:15       ` Eric Dumazet
  2016-12-04 23:31       ` Saeed Mahameed
  0 siblings, 2 replies; 21+ messages in thread
From: Alexei Starovoitov @ 2016-12-03  0:53 UTC (permalink / raw)
  To: Eric Dumazet, Martin KaFai Lau
  Cc: netdev, Brenden Blanco, Daniel Borkmann, David Miller,
	Saeed Mahameed, Tariq Toukan, Kernel Team

On 12/2/16 4:38 PM, Eric Dumazet wrote:
> On Fri, 2016-12-02 at 15:23 -0800, Martin KaFai Lau wrote:
>> When XDP prog is attached, it is currently limiting
>> MTU to be FRAG_SZ0 - ETH_HLEN - (2 * VLAN_HLEN) which is 1514
>> in x86.
>>
>> AFAICT, since mlx4 is doing one page per packet for XDP,
>> we can at least raise the MTU limitation up to
>> PAGE_SIZE - ETH_HLEN - (2 * VLAN_HLEN) which this patch is
>> doing.  It will be useful in the next patch which allows
>> XDP program to extend the packet by adding new header(s).
>>
>> Signed-off-by: Martin KaFai Lau <kafai@fb.com>
>> ---
>
> Have you tested your patch on a host with PAGE_SIZE = 64 KB ?
>
> Looks XDP really kills arches with bigger pages :(

I'm afraid xdp mlx[45] support was not tested on arches
with 64k pages at all. Not just this patch.
I think people who care about such archs should test?
Note page per packet is not a hard requirement for all drivers
and all archs. For mlx[45] it was the easiest and the most
convenient way to achieve desired performance.
If there are ways to do the same performance differently,
I'm all ears :)

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next 2/4] mlx4: xdp: Allow raising MTU up to one page minus eth and vlan hdrs
  2016-12-03  0:53     ` Alexei Starovoitov
@ 2016-12-03  2:15       ` Eric Dumazet
  2016-12-03  3:42         ` Martin KaFai Lau
  2016-12-04 23:31       ` Saeed Mahameed
  1 sibling, 1 reply; 21+ messages in thread
From: Eric Dumazet @ 2016-12-03  2:15 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Martin KaFai Lau, netdev, Brenden Blanco, Daniel Borkmann,
	David Miller, Saeed Mahameed, Tariq Toukan, Kernel Team

On Fri, 2016-12-02 at 16:53 -0800, Alexei Starovoitov wrote:
> On 12/2/16 4:38 PM, Eric Dumazet wrote:
> > On Fri, 2016-12-02 at 15:23 -0800, Martin KaFai Lau wrote:
> >> When XDP prog is attached, it is currently limiting
> >> MTU to be FRAG_SZ0 - ETH_HLEN - (2 * VLAN_HLEN) which is 1514
> >> in x86.
> >>
> >> AFAICT, since mlx4 is doing one page per packet for XDP,
> >> we can at least raise the MTU limitation up to
> >> PAGE_SIZE - ETH_HLEN - (2 * VLAN_HLEN) which this patch is
> >> doing.  It will be useful in the next patch which allows
> >> XDP program to extend the packet by adding new header(s).
> >>
> >> Signed-off-by: Martin KaFai Lau <kafai@fb.com>
> >> ---
> >
> > Have you tested your patch on a host with PAGE_SIZE = 64 KB ?
> >
> > Looks XDP really kills arches with bigger pages :(
> 
> I'm afraid xdp mlx[45] support was not tested on arches
> with 64k pages at all. Not just this patch.
> I think people who care about such archs should test?
> Note page per packet is not a hard requirement for all drivers
> and all archs. For mlx[45] it was the easiest and the most
> convenient way to achieve desired performance.
> If there are ways to do the same performance differently,
> I'm all ears :)
> 

My question was more like :

Can we double check all these patches wont break mlx4 driver (non XDP
path) on arches with PAGE_SIZE=64KB.

I have no plan using XDP before a while, but I certainly know some
customers are using mlx4 on powerpc.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next 2/4] mlx4: xdp: Allow raising MTU up to one page minus eth and vlan hdrs
  2016-12-03  2:15       ` Eric Dumazet
@ 2016-12-03  3:42         ` Martin KaFai Lau
  2016-12-03  3:59           ` Eric Dumazet
  0 siblings, 1 reply; 21+ messages in thread
From: Martin KaFai Lau @ 2016-12-03  3:42 UTC (permalink / raw)
  To: Eric Dumazet
  Cc: Alexei Starovoitov, netdev, Brenden Blanco, Daniel Borkmann,
	David Miller, Saeed Mahameed, Tariq Toukan, Kernel Team

On Fri, Dec 02, 2016 at 06:15:26PM -0800, Eric Dumazet wrote:
> On Fri, 2016-12-02 at 16:53 -0800, Alexei Starovoitov wrote:
> > On 12/2/16 4:38 PM, Eric Dumazet wrote:
> > > On Fri, 2016-12-02 at 15:23 -0800, Martin KaFai Lau wrote:
> > >> When XDP prog is attached, it is currently limiting
> > >> MTU to be FRAG_SZ0 - ETH_HLEN - (2 * VLAN_HLEN) which is 1514
> > >> in x86.
> > >>
> > >> AFAICT, since mlx4 is doing one page per packet for XDP,
> > >> we can at least raise the MTU limitation up to
> > >> PAGE_SIZE - ETH_HLEN - (2 * VLAN_HLEN) which this patch is
> > >> doing.  It will be useful in the next patch which allows
> > >> XDP program to extend the packet by adding new header(s).
> > >>
> > >> Signed-off-by: Martin KaFai Lau <kafai@fb.com>
> > >> ---
> > >
> > > Have you tested your patch on a host with PAGE_SIZE = 64 KB ?
> > >
> > > Looks XDP really kills arches with bigger pages :(
> >
> > I'm afraid xdp mlx[45] support was not tested on arches
> > with 64k pages at all. Not just this patch.
> > I think people who care about such archs should test?
> > Note page per packet is not a hard requirement for all drivers
> > and all archs. For mlx[45] it was the easiest and the most
> > convenient way to achieve desired performance.
> > If there are ways to do the same performance differently,
> > I'm all ears :)
> >
>
> My question was more like :
>
> Can we double check all these patches wont break mlx4 driver (non XDP
> path) on arches with PAGE_SIZE=64KB.
The page/pkt requirement is not added by this patch.  The earlier
XDP patch series has already ensured this page/pkt requirement
is effective only when XDP prog is attached.

In the earlier XDP patches, MTU is limited to 1514 when
XDP is ative.   This patch is to allow fully use of the
page for a packet (and also only matter when XDP is active).

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next 1/4] bpf: xdp: Allow head adjustment in XDP prog
  2016-12-03  0:22   ` Daniel Borkmann
@ 2016-12-03  3:43     ` Martin KaFai Lau
  0 siblings, 0 replies; 21+ messages in thread
From: Martin KaFai Lau @ 2016-12-03  3:43 UTC (permalink / raw)
  To: Daniel Borkmann
  Cc: netdev, Alexei Starovoitov, Brenden Blanco, David Miller,
	Saeed Mahameed, Tariq Toukan, Kernel Team

On Sat, Dec 03, 2016 at 01:22:05AM +0100, Daniel Borkmann wrote:
> On 12/03/2016 12:23 AM, Martin KaFai Lau wrote:
> >This patch allows XDP prog to extend/remove the packet
> >data at the head (like adding or removing header).  It is
> >done by adding a new XDP helper bpf_xdp_adjust_head().
> >
> >It also renames bpf_helper_changes_skb_data() to
> >bpf_helper_changes_pkt_data() to better reflect
> >that XDP prog does not work on skb.
> >
> >Signed-off-by: Martin KaFai Lau <kafai@fb.com>
> [...]
> >diff --git a/net/core/filter.c b/net/core/filter.c
> >index 56b43587d200..6902e2f73e38 100644
> >--- a/net/core/filter.c
> >+++ b/net/core/filter.c
> >@@ -2234,7 +2234,34 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = {
> >  	.arg3_type	= ARG_ANYTHING,
> >  };
> >
> >-bool bpf_helper_changes_skb_data(void *func)
> >+BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
> >+{
> >+	/* Both mlx4 and mlx5 driver align each packet to PAGE_SIZE when
> >+	 * XDP prog is set.
> >+	 * If the above is not true for the other drivers to support
> >+	 * bpf_xdp_adjust_head, struct xdp_buff can be extended.
> >+	 */
> >+	void *head = (void *)((unsigned long)xdp->data & PAGE_MASK);
> >+	void *new_data = xdp->data + offset;
> >+
> >+	if (new_data < head || new_data >= xdp->data_end)
> >+		/* The packet length must be >=1 */
>
> Patch looks generally good to me. Should the min pkt len here be
> limited to ETH_HLEN instead of 1?
Make sense.  Will make the change.

>
> >+		return -EINVAL;
> >+
> >+	xdp->data = new_data;
> >+
> >+	return 0;
> >+}
> >+
> >+static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
> >+	.func		= bpf_xdp_adjust_head,
> >+	.gpl_only	= false,
> >+	.ret_type	= RET_INTEGER,
> >+	.arg1_type	= ARG_PTR_TO_CTX,
> >+	.arg2_type	= ARG_ANYTHING,
> >+};
> >+
> >+bool bpf_helper_changes_pkt_data(void *func)
> >  {
> >  	if (func == bpf_skb_vlan_push ||
> >  	    func == bpf_skb_vlan_pop ||
> [...]

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next 2/4] mlx4: xdp: Allow raising MTU up to one page minus eth and vlan hdrs
  2016-12-03  3:42         ` Martin KaFai Lau
@ 2016-12-03  3:59           ` Eric Dumazet
  0 siblings, 0 replies; 21+ messages in thread
From: Eric Dumazet @ 2016-12-03  3:59 UTC (permalink / raw)
  To: Martin KaFai Lau
  Cc: Alexei Starovoitov, netdev, Brenden Blanco, Daniel Borkmann,
	David Miller, Saeed Mahameed, Tariq Toukan, Kernel Team

On Fri, 2016-12-02 at 19:42 -0800, Martin KaFai Lau wrote:
> On Fri, Dec 02, 2016 at 06:15:26PM -0800, Eric Dumazet wrote:
> > My question was more like :
> >
> > Can we double check all these patches wont break mlx4 driver (non XDP
> > path) on arches with PAGE_SIZE=64KB.
> The page/pkt requirement is not added by this patch.  The earlier
> XDP patch series has already ensured this page/pkt requirement
> is effective only when XDP prog is attached.
> 
> In the earlier XDP patches, MTU is limited to 1514 when
> XDP is ative.   This patch is to allow fully use of the
> page for a packet (and also only matter when XDP is active).

OK, thanks for the clarification.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next 0/4]: Allow head adjustment in XDP prog
  2016-12-02 23:23 [PATCH net-next 0/4]: Allow head adjustment in XDP prog Martin KaFai Lau
                   ` (3 preceding siblings ...)
  2016-12-02 23:23 ` [PATCH net-next 4/4] bpf: xdp: Add XDP example for head adjustment Martin KaFai Lau
@ 2016-12-03 15:17 ` Jesper Dangaard Brouer
  4 siblings, 0 replies; 21+ messages in thread
From: Jesper Dangaard Brouer @ 2016-12-03 15:17 UTC (permalink / raw)
  To: Martin KaFai Lau
  Cc: brouer, netdev, Alexei Starovoitov, Brenden Blanco,
	Daniel Borkmann, David Miller, Saeed Mahameed, Tariq Toukan,
	Kernel Team

On Fri, 2 Dec 2016 15:23:29 -0800
Martin KaFai Lau <kafai@fb.com> wrote:

> This series adds a helper to allow head adjustment in XDP prog.  mlx4
> driver has been modified to support this feature.  An example is written
> to encapsulate a packet with an IPv4/v6 header and then XDP_TX it
> out.

Hi Martin,

It is great to see work in this area.

Push/pop of headers is listed as on of the missing features here:
 https://prototype-kernel.readthedocs.io/en/latest/networking/XDP/implementation/missing_features.html
We can hopefully soon cross that of the list :-)

Header push and pop desc:
 https://prototype-kernel.readthedocs.io/en/latest/networking/XDP/design/requirements.html#header-push-and-pop

Thanks for working on this! :-)))
-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next 1/4] bpf: xdp: Allow head adjustment in XDP prog
  2016-12-02 23:23 ` [PATCH net-next 1/4] bpf: xdp: " Martin KaFai Lau
  2016-12-03  0:22   ` Daniel Borkmann
@ 2016-12-03 15:24   ` Jesper Dangaard Brouer
  2016-12-03 19:32     ` Martin KaFai Lau
  1 sibling, 1 reply; 21+ messages in thread
From: Jesper Dangaard Brouer @ 2016-12-03 15:24 UTC (permalink / raw)
  To: Martin KaFai Lau
  Cc: brouer, netdev, Alexei Starovoitov, Brenden Blanco,
	Daniel Borkmann, David Miller, Saeed Mahameed, Tariq Toukan,
	Kernel Team

On Fri, 2 Dec 2016 15:23:30 -0800
Martin KaFai Lau <kafai@fb.com> wrote:

> -bool bpf_helper_changes_skb_data(void *func)
> +BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
> +{
> +	/* Both mlx4 and mlx5 driver align each packet to PAGE_SIZE when
> +	 * XDP prog is set.
> +	 * If the above is not true for the other drivers to support
> +	 * bpf_xdp_adjust_head, struct xdp_buff can be extended.
> +	 */
> +	void *head = (void *)((unsigned long)xdp->data & PAGE_MASK);
> +	void *new_data = xdp->data + offset;
> +
> +	if (new_data < head || new_data >= xdp->data_end)
> +		/* The packet length must be >=1 */
> +		return -EINVAL;
> +
> +	xdp->data = new_data;
> +
> +	return 0;
> +}

First time I read this code, I was about to complain about you didn't
use XDP_PACKET_HEADROOM in your boundary check.  But then I noticed the
PAGE_MASK.  If you rename "head" to "page_boundary" or "page_start"
then IMHO the code would be more readable.

-- 
Best regards,
  Jesper Dangaard Brouer
  MSc.CS, Principal Kernel Engineer at Red Hat
  LinkedIn: http://www.linkedin.com/in/brouer

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next 1/4] bpf: xdp: Allow head adjustment in XDP prog
  2016-12-03 15:24   ` Jesper Dangaard Brouer
@ 2016-12-03 19:32     ` Martin KaFai Lau
  2016-12-03 20:21       ` Daniel Borkmann
  0 siblings, 1 reply; 21+ messages in thread
From: Martin KaFai Lau @ 2016-12-03 19:32 UTC (permalink / raw)
  To: Jesper Dangaard Brouer
  Cc: netdev, Alexei Starovoitov, Brenden Blanco, Daniel Borkmann,
	David Miller, Saeed Mahameed, Tariq Toukan, Kernel Team

On Sat, Dec 03, 2016 at 04:24:13PM +0100, Jesper Dangaard Brouer wrote:
> On Fri, 2 Dec 2016 15:23:30 -0800
> Martin KaFai Lau <kafai@fb.com> wrote:
>
> > -bool bpf_helper_changes_skb_data(void *func)
> > +BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
> > +{
> > +	/* Both mlx4 and mlx5 driver align each packet to PAGE_SIZE when
> > +	 * XDP prog is set.
> > +	 * If the above is not true for the other drivers to support
> > +	 * bpf_xdp_adjust_head, struct xdp_buff can be extended.
> > +	 */
> > +	void *head = (void *)((unsigned long)xdp->data & PAGE_MASK);
> > +	void *new_data = xdp->data + offset;
> > +
> > +	if (new_data < head || new_data >= xdp->data_end)
> > +		/* The packet length must be >=1 */
> > +		return -EINVAL;
> > +
> > +	xdp->data = new_data;
> > +
> > +	return 0;
> > +}
>
> First time I read this code, I was about to complain about you didn't
> use XDP_PACKET_HEADROOM in your boundary check.  But then I noticed the
> PAGE_MASK.  If you rename "head" to "page_boundary" or "page_start"
> then IMHO the code would be more readable.
bpf_xdp_adjust_head() could be called multiple times.  Hence,
XDP_PACKET_HEADROOM is not used in the boundary check.

My thinking is "head" here can closely resemble the meaning of
skb->head as a boundary.  I think missing the info on
what head it is could be the confusing part.

Instead of skb boundary (there is no skb here) or
page boundary (other future XDP driver may not align like mlx4/5),
I think may be "pkt_head" can give more clarity here and also
for furture XDP-capble driver?

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next 1/4] bpf: xdp: Allow head adjustment in XDP prog
  2016-12-03 19:32     ` Martin KaFai Lau
@ 2016-12-03 20:21       ` Daniel Borkmann
  0 siblings, 0 replies; 21+ messages in thread
From: Daniel Borkmann @ 2016-12-03 20:21 UTC (permalink / raw)
  To: Martin KaFai Lau, Jesper Dangaard Brouer
  Cc: netdev, Alexei Starovoitov, Brenden Blanco, David Miller,
	Saeed Mahameed, Tariq Toukan, Kernel Team

On 12/03/2016 08:32 PM, Martin KaFai Lau wrote:
> On Sat, Dec 03, 2016 at 04:24:13PM +0100, Jesper Dangaard Brouer wrote:
>> On Fri, 2 Dec 2016 15:23:30 -0800
>> Martin KaFai Lau <kafai@fb.com> wrote:
>>
>>> -bool bpf_helper_changes_skb_data(void *func)
>>> +BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
>>> +{
>>> +	/* Both mlx4 and mlx5 driver align each packet to PAGE_SIZE when
>>> +	 * XDP prog is set.
>>> +	 * If the above is not true for the other drivers to support
>>> +	 * bpf_xdp_adjust_head, struct xdp_buff can be extended.
>>> +	 */
>>> +	void *head = (void *)((unsigned long)xdp->data & PAGE_MASK);
>>> +	void *new_data = xdp->data + offset;
>>> +
>>> +	if (new_data < head || new_data >= xdp->data_end)
>>> +		/* The packet length must be >=1 */
>>> +		return -EINVAL;
>>> +
>>> +	xdp->data = new_data;
>>> +
>>> +	return 0;
>>> +}
>>
>> First time I read this code, I was about to complain about you didn't
>> use XDP_PACKET_HEADROOM in your boundary check.  But then I noticed the
>> PAGE_MASK.  If you rename "head" to "page_boundary" or "page_start"
>> then IMHO the code would be more readable.
> bpf_xdp_adjust_head() could be called multiple times.  Hence,
> XDP_PACKET_HEADROOM is not used in the boundary check.
>
> My thinking is "head" here can closely resemble the meaning of
> skb->head as a boundary.  I think missing the info on
> what head it is could be the confusing part.
>
> Instead of skb boundary (there is no skb here) or
> page boundary (other future XDP driver may not align like mlx4/5),
> I think may be "pkt_head" can give more clarity here and also
> for furture XDP-capble driver?

I think as-is with head is also fine with me, but if it should be
something better readable (?), perhaps as such (modulo the min len
part):

BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset)
{
	unsigned long addr = (unsigned long)xdp->data & PAGE_MASK;
	void *data_hard_start = (void *)addr;
	void *data = xdp->data + offset;

	if (unlikely(data < data_hard_start || data >= xdp->data_end))
		return -EINVAL;

	xdp->data = data;
	return 0;
}

Thanks,
Daniel

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next 2/4] mlx4: xdp: Allow raising MTU up to one page minus eth and vlan hdrs
  2016-12-03  0:07   ` Rick Jones
@ 2016-12-04  3:19     ` Martin KaFai Lau
  2016-12-05 17:46       ` Jakub Kicinski
  0 siblings, 1 reply; 21+ messages in thread
From: Martin KaFai Lau @ 2016-12-04  3:19 UTC (permalink / raw)
  To: Rick Jones
  Cc: netdev, Alexei Starovoitov, Brenden Blanco, Daniel Borkmann,
	David Miller, Saeed Mahameed, Tariq Toukan, Kernel Team

On Fri, Dec 02, 2016 at 04:07:09PM -0800, Rick Jones wrote:
> On 12/02/2016 03:23 PM, Martin KaFai Lau wrote:
> >When XDP prog is attached, it is currently limiting
> >MTU to be FRAG_SZ0 - ETH_HLEN - (2 * VLAN_HLEN) which is 1514
> >in x86.
> >
> >AFAICT, since mlx4 is doing one page per packet for XDP,
> >we can at least raise the MTU limitation up to
> >PAGE_SIZE - ETH_HLEN - (2 * VLAN_HLEN) which this patch is
> >doing.  It will be useful in the next patch which allows
> >XDP program to extend the packet by adding new header(s).
>
> Is mlx4 the only driver doing page-per-packet?
Sorry for the late reply.  This allocation scheme is only effective
when XDP is active.  AFAIK, only mlx4/5 supports XDP now.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next 2/4] mlx4: xdp: Allow raising MTU up to one page minus eth and vlan hdrs
  2016-12-03  0:53     ` Alexei Starovoitov
  2016-12-03  2:15       ` Eric Dumazet
@ 2016-12-04 23:31       ` Saeed Mahameed
  2016-12-05  0:52         ` Eric Dumazet
  1 sibling, 1 reply; 21+ messages in thread
From: Saeed Mahameed @ 2016-12-04 23:31 UTC (permalink / raw)
  To: Alexei Starovoitov
  Cc: Eric Dumazet, Martin KaFai Lau, Linux Netdev List,
	Brenden Blanco, Daniel Borkmann, David Miller, Saeed Mahameed,
	Tariq Toukan, Kernel Team

On Sat, Dec 3, 2016 at 2:53 AM, Alexei Starovoitov <ast@fb.com> wrote:
> On 12/2/16 4:38 PM, Eric Dumazet wrote:
>>
>> On Fri, 2016-12-02 at 15:23 -0800, Martin KaFai Lau wrote:
>>>
>>> When XDP prog is attached, it is currently limiting
>>> MTU to be FRAG_SZ0 - ETH_HLEN - (2 * VLAN_HLEN) which is 1514
>>> in x86.
>>>
>>> AFAICT, since mlx4 is doing one page per packet for XDP,
>>> we can at least raise the MTU limitation up to
>>> PAGE_SIZE - ETH_HLEN - (2 * VLAN_HLEN) which this patch is
>>> doing.  It will be useful in the next patch which allows
>>> XDP program to extend the packet by adding new header(s).
>>>
>>> Signed-off-by: Martin KaFai Lau <kafai@fb.com>
>>> ---
>>
>>
>> Have you tested your patch on a host with PAGE_SIZE = 64 KB ?
>>
>> Looks XDP really kills arches with bigger pages :(
>
>
> I'm afraid xdp mlx[45] support was not tested on arches
> with 64k pages at all. Not just this patch.

Yep, in mlx5 page per packet became the default, with or without XDP,
unlike mlx4.
currently we allow 64KB pages per packet! which is wrong and need to be fixed.

I will get to this task soon.

> I think people who care about such archs should test?

We do test mlx5 and mlx4 on PPC arch. other than we require more
memory than we need, we don't see any issues. and we don't test XDP on
those archs.

> Note page per packet is not a hard requirement for all drivers
> and all archs. For mlx[45] it was the easiest and the most
> convenient way to achieve desired performance.
> If there are ways to do the same performance differently,
> I'm all ears :)
>

when bigger pages, i.e  PAGE_SIZE > 8K, my current low hanging fruit
options for mlx5 are
1. start sharing pages for multi packets.
2. Go back to the SKB allocator (allocate ring of SKBs on advance
rather than page per packet/s).

this means that default RX memory scheme will be different than XDP's
on such ARCHs (XDP wil still use page per packet)

Alexei, we should start considering PPC archs for XDP use cases,
demanding page per packet on those archs is a little bit heavy
requirement

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next 2/4] mlx4: xdp: Allow raising MTU up to one page minus eth and vlan hdrs
  2016-12-04 23:31       ` Saeed Mahameed
@ 2016-12-05  0:52         ` Eric Dumazet
  0 siblings, 0 replies; 21+ messages in thread
From: Eric Dumazet @ 2016-12-05  0:52 UTC (permalink / raw)
  To: Saeed Mahameed
  Cc: Alexei Starovoitov, Martin KaFai Lau, Linux Netdev List,
	Brenden Blanco, Daniel Borkmann, David Miller, Saeed Mahameed,
	Tariq Toukan, Kernel Team

On Mon, 2016-12-05 at 01:31 +0200, Saeed Mahameed wrote:

> Alexei, we should start considering PPC archs for XDP use cases,
> demanding page per packet on those archs is a little bit heavy
> requirement

Well, 'little' is an understatement ;)

Note that PPC had serious problems before  commit bd68a2a854ad5a85f0
("net: set SK_MEM_QUANTUM to 4096")

So I suspect one page per frame will likely be a huge problem
for hosts dealing with 10^5 or more TCP sockets.

Either skb->truesize is set to 64KB and TCP window must be really tiny,
or skb->truesize is set to ~2KB and OOM is waiting to happen.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH net-next 2/4] mlx4: xdp: Allow raising MTU up to one page minus eth and vlan hdrs
  2016-12-04  3:19     ` Martin KaFai Lau
@ 2016-12-05 17:46       ` Jakub Kicinski
  0 siblings, 0 replies; 21+ messages in thread
From: Jakub Kicinski @ 2016-12-05 17:46 UTC (permalink / raw)
  To: Martin KaFai Lau
  Cc: Rick Jones, netdev, Alexei Starovoitov, Brenden Blanco,
	Daniel Borkmann, David Miller, Saeed Mahameed, Tariq Toukan,
	Kernel Team

On Sat, 3 Dec 2016 19:19:58 -0800, Martin KaFai Lau wrote:
> AFAIK, only mlx4/5 supports XDP now.

That is, unfortunately, not true.

$ git checkout net-next/master
$ git grep ndo_xdp drivers/
drivers/net/ethernet/mellanox/mlx4/en_netdev.c: .ndo_xdp                = mlx4_xdp,
drivers/net/ethernet/mellanox/mlx4/en_netdev.c: .ndo_xdp                = mlx4_xdp,
drivers/net/ethernet/mellanox/mlx5/core/en_main.c:      .ndo_xdp                 = mlx5e_xdp,
drivers/net/ethernet/mellanox/mlx5/core/en_main.c:      .ndo_xdp                 = mlx5e_xdp,
drivers/net/ethernet/netronome/nfp/nfp_net_common.c:    .ndo_xdp                = nfp_net_xdp,
drivers/net/ethernet/qlogic/qede/qede_main.c:   .ndo_xdp = qede_xdp,

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2016-12-05 17:46 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-12-02 23:23 [PATCH net-next 0/4]: Allow head adjustment in XDP prog Martin KaFai Lau
2016-12-02 23:23 ` [PATCH net-next 1/4] bpf: xdp: " Martin KaFai Lau
2016-12-03  0:22   ` Daniel Borkmann
2016-12-03  3:43     ` Martin KaFai Lau
2016-12-03 15:24   ` Jesper Dangaard Brouer
2016-12-03 19:32     ` Martin KaFai Lau
2016-12-03 20:21       ` Daniel Borkmann
2016-12-02 23:23 ` [PATCH net-next 2/4] mlx4: xdp: Allow raising MTU up to one page minus eth and vlan hdrs Martin KaFai Lau
2016-12-03  0:07   ` Rick Jones
2016-12-04  3:19     ` Martin KaFai Lau
2016-12-05 17:46       ` Jakub Kicinski
2016-12-03  0:38   ` Eric Dumazet
2016-12-03  0:53     ` Alexei Starovoitov
2016-12-03  2:15       ` Eric Dumazet
2016-12-03  3:42         ` Martin KaFai Lau
2016-12-03  3:59           ` Eric Dumazet
2016-12-04 23:31       ` Saeed Mahameed
2016-12-05  0:52         ` Eric Dumazet
2016-12-02 23:23 ` [PATCH net-next 3/4] mlx4: xdp: Reserve headroom for receiving packet when XDP prog is active Martin KaFai Lau
2016-12-02 23:23 ` [PATCH net-next 4/4] bpf: xdp: Add XDP example for head adjustment Martin KaFai Lau
2016-12-03 15:17 ` [PATCH net-next 0/4]: Allow head adjustment in XDP prog Jesper Dangaard Brouer

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.