All of lore.kernel.org
 help / color / mirror / Atom feed
* [Intel-wired-lan] [net-next PATCH v2 1/3] ixgbe: add XDP support for pass and drop actions
@ 2017-03-02  0:54 John Fastabend
  2017-03-02  0:54 ` [Intel-wired-lan] [net-next PATCH v2 2/3] ixgbe: add support for XDP_TX action John Fastabend
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: John Fastabend @ 2017-03-02  0:54 UTC (permalink / raw)
  To: intel-wired-lan

Basic XDP drop support for ixgbe. Uses READ_ONCE/xchg semantics on XDP
programs instead of rcu primitives as suggested by Daniel Borkmann and
Alex Duyck.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h      |    2 
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |  114 ++++++++++++++++++++++++-
 2 files changed, 113 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index b812913..2d12c24 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -273,6 +273,7 @@ struct ixgbe_ring {
 	struct ixgbe_ring *next;	/* pointer to next ring in q_vector */
 	struct ixgbe_q_vector *q_vector; /* backpointer to host q_vector */
 	struct net_device *netdev;	/* netdev ring belongs to */
+	struct bpf_prog *xdp_prog;
 	struct device *dev;		/* device for DMA mapping */
 	struct ixgbe_fwd_adapter *l2_accel_priv;
 	void *desc;			/* descriptor ring memory */
@@ -510,6 +511,7 @@ struct ixgbe_adapter {
 	unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
 	/* OS defined structs */
 	struct net_device *netdev;
+	struct bpf_prog *xdp_prog;
 	struct pci_dev *pdev;
 
 	unsigned long state;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index e3da397..0b802b5 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -49,6 +49,9 @@
 #include <linux/if_macvlan.h>
 #include <linux/if_bridge.h>
 #include <linux/prefetch.h>
+#include <linux/bpf.h>
+#include <linux/bpf_trace.h>
+#include <linux/atomic.h>
 #include <scsi/fc/fc_fcoe.h>
 #include <net/udp_tunnel.h>
 #include <net/pkt_cls.h>
@@ -2051,7 +2054,7 @@ static void ixgbe_put_rx_buffer(struct ixgbe_ring *rx_ring,
 		/* hand second half of page back to the ring */
 		ixgbe_reuse_rx_page(rx_ring, rx_buffer);
 	} else {
-		if (IXGBE_CB(skb)->dma == rx_buffer->dma) {
+		if (skb && IXGBE_CB(skb)->dma == rx_buffer->dma) {
 			/* the page has been released from the ring */
 			IXGBE_CB(skb)->page_released = true;
 		} else {
@@ -2157,6 +2160,50 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
 	return skb;
 }
 
+#define IXGBE_XDP_PASS 0
+#define IXGBE_XDP_CONSUMED 1
+
+static int ixgbe_run_xdp(struct ixgbe_ring  *rx_ring,
+			 struct ixgbe_rx_buffer *rx_buffer,
+			 unsigned int size)
+{
+	int result = IXGBE_XDP_PASS;
+	struct bpf_prog *xdp_prog;
+	struct xdp_buff xdp;
+	void *addr;
+	u32 act;
+
+	rcu_read_lock();
+	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
+
+	if (!xdp_prog)
+		goto xdp_out;
+
+	addr = page_address(rx_buffer->page) + rx_buffer->page_offset;
+	xdp.data_hard_start = addr;
+	xdp.data = addr;
+	xdp.data_end = addr + size;
+
+	act = bpf_prog_run_xdp(xdp_prog, &xdp);
+	switch (act) {
+	case XDP_PASS:
+		goto xdp_out;
+	default:
+		bpf_warn_invalid_xdp_action(act);
+	case XDP_TX:
+	case XDP_ABORTED:
+		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
+		/* fallthrough -- handle aborts by dropping packet */
+	case XDP_DROP:
+		rx_buffer->pagecnt_bias++; /* give page back */
+		result = IXGBE_XDP_CONSUMED;
+		break;
+	}
+xdp_out:
+	rcu_read_unlock();
+	return result;
+}
+
 /**
  * ixgbe_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
  * @q_vector: structure containing interrupt and ring information
@@ -2187,6 +2234,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
 		struct ixgbe_rx_buffer *rx_buffer;
 		struct sk_buff *skb;
 		unsigned int size;
+		int consumed;
 
 		/* return some buffers to hardware, one at a time is too slow */
 		if (cleaned_count >= IXGBE_RX_BUFFER_WRITE) {
@@ -2207,6 +2255,18 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
 
 		rx_buffer = ixgbe_get_rx_buffer(rx_ring, rx_desc, &skb, size);
 
+		consumed = ixgbe_run_xdp(rx_ring, rx_buffer, size);
+		rcu_read_unlock();
+
+		if (consumed) {
+			ixgbe_put_rx_buffer(rx_ring, rx_buffer, skb);
+			cleaned_count++;
+			ixgbe_is_non_eop(rx_ring, rx_desc, skb);
+			total_rx_packets++;
+			total_rx_bytes += size;
+			continue;
+		}
+
 		/* retrieve a buffer from the ring */
 		if (skb)
 			ixgbe_add_rx_frag(rx_ring, rx_buffer, skb, size);
@@ -6121,9 +6181,13 @@ static int ixgbe_setup_all_rx_resources(struct ixgbe_adapter *adapter)
 	int i, err = 0;
 
 	for (i = 0; i < adapter->num_rx_queues; i++) {
-		err = ixgbe_setup_rx_resources(adapter->rx_ring[i]);
-		if (!err)
+		struct ixgbe_ring *rx_ring = adapter->rx_ring[i];
+
+		err = ixgbe_setup_rx_resources(rx_ring);
+		if (!err) {
+			xchg(&rx_ring->xdp_prog, adapter->xdp_prog);
 			continue;
+		}
 
 		e_err(probe, "Allocation for Rx Queue %u failed\n", i);
 		goto err_setup_rx;
@@ -6191,6 +6255,7 @@ void ixgbe_free_rx_resources(struct ixgbe_ring *rx_ring)
 
 	vfree(rx_ring->rx_buffer_info);
 	rx_ring->rx_buffer_info = NULL;
+	xchg(&rx_ring->xdp_prog, NULL);
 
 	/* if not set, then don't free */
 	if (!rx_ring->desc)
@@ -9455,6 +9520,48 @@ static void ixgbe_fwd_del(struct net_device *pdev, void *priv)
 	return features;
 }
 
+static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
+{
+	int i, frame_size = dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+	struct bpf_prog *old_adapter_prog;
+
+	/* verify ixgbe ring attributes are sufficient for XDP */
+	for (i = 0; i < adapter->num_rx_queues; i++) {
+		struct ixgbe_ring *ring = adapter->rx_ring[i];
+
+		if (ring_is_rsc_enabled(ring))
+			return -EINVAL;
+
+		if (frame_size > ixgbe_rx_bufsz(ring))
+			return -EINVAL;
+	}
+
+	old_adapter_prog = xchg(&adapter->xdp_prog, prog);
+	for (i = 0; i < adapter->num_rx_queues; i++)
+		xchg(&adapter->rx_ring[i]->xdp_prog, adapter->xdp_prog);
+
+	if (old_adapter_prog)
+		bpf_prog_put(old_adapter_prog);
+
+	return 0;
+}
+
+static int ixgbe_xdp(struct net_device *dev, struct netdev_xdp *xdp)
+{
+	struct ixgbe_adapter *adapter = netdev_priv(dev);
+
+	switch (xdp->command) {
+	case XDP_SETUP_PROG:
+		return ixgbe_xdp_setup(dev, xdp->prog);
+	case XDP_QUERY_PROG:
+		xdp->prog_attached = !!(adapter->rx_ring[0]->xdp_prog);
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
 static const struct net_device_ops ixgbe_netdev_ops = {
 	.ndo_open		= ixgbe_open,
 	.ndo_stop		= ixgbe_close,
@@ -9500,6 +9607,7 @@ static void ixgbe_fwd_del(struct net_device *pdev, void *priv)
 	.ndo_udp_tunnel_add	= ixgbe_add_udp_tunnel_port,
 	.ndo_udp_tunnel_del	= ixgbe_del_udp_tunnel_port,
 	.ndo_features_check	= ixgbe_features_check,
+	.ndo_xdp		= ixgbe_xdp,
 };
 
 /**


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [Intel-wired-lan] [net-next PATCH v2 2/3] ixgbe: add support for XDP_TX action
  2017-03-02  0:54 [Intel-wired-lan] [net-next PATCH v2 1/3] ixgbe: add XDP support for pass and drop actions John Fastabend
@ 2017-03-02  0:54 ` John Fastabend
  2017-03-02 16:39   ` Alexander Duyck
  2017-03-02  0:55 ` [Intel-wired-lan] [net-next PATCH v2 3/3] ixgbe: xdp support for adjust head John Fastabend
  2017-03-02 16:22 ` [Intel-wired-lan] [net-next PATCH v2 1/3] ixgbe: add XDP support for pass and drop actions Alexander Duyck
  2 siblings, 1 reply; 9+ messages in thread
From: John Fastabend @ 2017-03-02  0:54 UTC (permalink / raw)
  To: intel-wired-lan

Add support for XDP_TX action.

A couple design choices were made here. First I use a new ring
pointer structure xdp_ring[] in the adapter struct instead of
pushing the newly allocated xdp TX rings into the tx_ring[]
structure. This means we have to duplicate loops around rings
in places we want to initialize both TX rings and XDP rings.
But by making it explicit it is obvious when we are using XDP
rings and when we are using TX rings. Further we don't have
to do ring arithmatic which is error prone. As a proof point
for doing this my first patches used only a single ring structure
and introduced bugs in FCoE code and macvlan code paths.

Second I am aware this is not the most optimized version of
this code possible. I want to get baseline support in using
the most readable format possible and then once this series
is included I will optimize the TX path in another series
of patches.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h         |   18 +
 drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c |   27 ++
 drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c     |   87 ++++++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c    |  290 +++++++++++++++++++---
 4 files changed, 367 insertions(+), 55 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 2d12c24..f2a1409 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -190,7 +190,10 @@ struct vf_macvlans {
 struct ixgbe_tx_buffer {
 	union ixgbe_adv_tx_desc *next_to_watch;
 	unsigned long time_stamp;
-	struct sk_buff *skb;
+	union {
+		struct sk_buff *skb;
+		void *data; /* XDP uses addr ptr on irq_clean */
+	};
 	unsigned int bytecount;
 	unsigned short gso_segs;
 	__be16 protocol;
@@ -243,6 +246,7 @@ enum ixgbe_ring_state_t {
 	__IXGBE_TX_XPS_INIT_DONE,
 	__IXGBE_TX_DETECT_HANG,
 	__IXGBE_HANG_CHECK_ARMED,
+	__IXGBE_TX_XDP_RING,
 };
 
 #define ring_uses_build_skb(ring) \
@@ -269,6 +273,12 @@ struct ixgbe_fwd_adapter {
 	set_bit(__IXGBE_RX_RSC_ENABLED, &(ring)->state)
 #define clear_ring_rsc_enabled(ring) \
 	clear_bit(__IXGBE_RX_RSC_ENABLED, &(ring)->state)
+#define ring_is_xdp(ring) \
+	test_bit(__IXGBE_TX_XDP_RING, &(ring)->state)
+#define set_ring_xdp(ring) \
+	set_bit(__IXGBE_TX_XDP_RING, &(ring)->state)
+#define clear_ring_xdp(ring) \
+	clear_bit(__IXGBE_TX_XDP_RING, &(ring)->state)
 struct ixgbe_ring {
 	struct ixgbe_ring *next;	/* pointer to next ring in q_vector */
 	struct ixgbe_q_vector *q_vector; /* backpointer to host q_vector */
@@ -335,6 +345,7 @@ enum ixgbe_ring_f_enum {
 #define IXGBE_MAX_FCOE_INDICES		8
 #define MAX_RX_QUEUES			(IXGBE_MAX_FDIR_INDICES + 1)
 #define MAX_TX_QUEUES			(IXGBE_MAX_FDIR_INDICES + 1)
+#define MAX_XDP_QUEUES			(IXGBE_MAX_FDIR_INDICES + 1)
 #define IXGBE_MAX_L2A_QUEUES		4
 #define IXGBE_BAD_L2A_QUEUE		3
 #define IXGBE_MAX_MACVLANS		31
@@ -578,6 +589,10 @@ struct ixgbe_adapter {
 	__be16 vxlan_port;
 	__be16 geneve_port;
 
+	/* XDP */
+	int num_xdp_queues;
+	struct ixgbe_ring *xdp_ring[MAX_XDP_QUEUES];
+
 	/* TX */
 	struct ixgbe_ring *tx_ring[MAX_TX_QUEUES] ____cacheline_aligned_in_smp;
 
@@ -624,6 +639,7 @@ struct ixgbe_adapter {
 
 	u64 tx_busy;
 	unsigned int tx_ring_count;
+	unsigned int xdp_ring_count;
 	unsigned int rx_ring_count;
 
 	u32 link_speed;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
index d3e02ac..93ce2cc 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
@@ -1031,7 +1031,7 @@ static int ixgbe_set_ringparam(struct net_device *netdev,
 {
 	struct ixgbe_adapter *adapter = netdev_priv(netdev);
 	struct ixgbe_ring *temp_ring;
-	int i, err = 0;
+	int i, j, err = 0;
 	u32 new_rx_count, new_tx_count;
 
 	if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
@@ -1057,15 +1057,19 @@ static int ixgbe_set_ringparam(struct net_device *netdev,
 	if (!netif_running(adapter->netdev)) {
 		for (i = 0; i < adapter->num_tx_queues; i++)
 			adapter->tx_ring[i]->count = new_tx_count;
+		for (i = 0; i < adapter->num_xdp_queues; i++)
+			adapter->xdp_ring[i]->count = new_tx_count;
 		for (i = 0; i < adapter->num_rx_queues; i++)
 			adapter->rx_ring[i]->count = new_rx_count;
 		adapter->tx_ring_count = new_tx_count;
+		adapter->xdp_ring_count = new_tx_count;
 		adapter->rx_ring_count = new_rx_count;
 		goto clear_reset;
 	}
 
 	/* allocate temporary buffer to store rings in */
 	i = max_t(int, adapter->num_tx_queues, adapter->num_rx_queues);
+	i = max_t(int, i, adapter->num_xdp_queues);
 	temp_ring = vmalloc(i * sizeof(struct ixgbe_ring));
 
 	if (!temp_ring) {
@@ -1097,12 +1101,33 @@ static int ixgbe_set_ringparam(struct net_device *netdev,
 			}
 		}
 
+		for (j = 0; j < adapter->num_xdp_queues; i++, j++) {
+			memcpy(&temp_ring[i], adapter->xdp_ring[j],
+			       sizeof(struct ixgbe_ring));
+
+			temp_ring[i].count = new_tx_count;
+			err = ixgbe_setup_tx_resources(&temp_ring[i]);
+			if (err) {
+				while (i) {
+					i--;
+					ixgbe_free_tx_resources(&temp_ring[i]);
+				}
+				goto err_setup;
+			}
+		}
+
 		for (i = 0; i < adapter->num_tx_queues; i++) {
 			ixgbe_free_tx_resources(adapter->tx_ring[i]);
 
 			memcpy(adapter->tx_ring[i], &temp_ring[i],
 			       sizeof(struct ixgbe_ring));
 		}
+		for (j = 0; j < adapter->num_xdp_queues; i++, j++) {
+			ixgbe_free_tx_resources(adapter->xdp_ring[j]);
+
+			memcpy(adapter->xdp_ring[j], &temp_ring[i],
+			       sizeof(struct ixgbe_ring));
+		}
 
 		adapter->tx_ring_count = new_tx_count;
 	}
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
index 1b8be7d..cfaa23c 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
@@ -73,6 +73,11 @@ static bool ixgbe_cache_ring_dcb_sriov(struct ixgbe_adapter *adapter)
 			reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask);
 		adapter->tx_ring[i]->reg_idx = reg_idx;
 	}
+	for (i = 0; i < adapter->num_xdp_queues; i++, reg_idx++) {
+		if ((reg_idx & ~vmdq->mask) >= tcs)
+			reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask);
+		adapter->xdp_ring[i]->reg_idx = reg_idx;
+	}
 
 #ifdef IXGBE_FCOE
 	/* nothing to do if FCoE is disabled */
@@ -248,6 +253,12 @@ static bool ixgbe_cache_ring_sriov(struct ixgbe_adapter *adapter)
 		adapter->tx_ring[i]->reg_idx = reg_idx;
 	}
 
+	for (i = 0; i < adapter->num_xdp_queues; i++, reg_idx++) {
+		if ((reg_idx & rss->mask) >= rss->indices)
+			reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask);
+		adapter->xdp_ring[i]->reg_idx = reg_idx;
+	}
+
 #ifdef IXGBE_FCOE
 	/* FCoE uses a linear block of queues so just assigning 1:1 */
 	for (; i < adapter->num_tx_queues; i++, reg_idx++)
@@ -267,12 +278,14 @@ static bool ixgbe_cache_ring_sriov(struct ixgbe_adapter *adapter)
  **/
 static bool ixgbe_cache_ring_rss(struct ixgbe_adapter *adapter)
 {
-	int i;
+	int i, reg_idx;
 
 	for (i = 0; i < adapter->num_rx_queues; i++)
 		adapter->rx_ring[i]->reg_idx = i;
-	for (i = 0; i < adapter->num_tx_queues; i++)
-		adapter->tx_ring[i]->reg_idx = i;
+	for (i = 0, reg_idx = 0; i < adapter->num_tx_queues; i++, reg_idx++)
+		adapter->tx_ring[i]->reg_idx = reg_idx;
+	for (i = 0; i < adapter->num_xdp_queues; i++, reg_idx++)
+		adapter->xdp_ring[i]->reg_idx = reg_idx;
 
 	return true;
 }
@@ -308,6 +321,14 @@ static void ixgbe_cache_ring_register(struct ixgbe_adapter *adapter)
 	ixgbe_cache_ring_rss(adapter);
 }
 
+static int ixgbe_xdp_queues(struct ixgbe_adapter *adapter)
+{
+	if (nr_cpu_ids > MAX_XDP_QUEUES)
+		return 0;
+
+	return adapter->xdp_prog ? nr_cpu_ids : 0;
+}
+
 #define IXGBE_RSS_64Q_MASK	0x3F
 #define IXGBE_RSS_16Q_MASK	0xF
 #define IXGBE_RSS_8Q_MASK	0x7
@@ -382,6 +403,7 @@ static bool ixgbe_set_dcb_sriov_queues(struct ixgbe_adapter *adapter)
 	adapter->num_rx_queues_per_pool = tcs;
 
 	adapter->num_tx_queues = vmdq_i * tcs;
+	adapter->num_xdp_queues = 0;
 	adapter->num_rx_queues = vmdq_i * tcs;
 
 #ifdef IXGBE_FCOE
@@ -479,6 +501,7 @@ static bool ixgbe_set_dcb_queues(struct ixgbe_adapter *adapter)
 		netdev_set_tc_queue(dev, i, rss_i, rss_i * i);
 
 	adapter->num_tx_queues = rss_i * tcs;
+	adapter->num_xdp_queues = 0;
 	adapter->num_rx_queues = rss_i * tcs;
 
 	return true;
@@ -549,6 +572,7 @@ static bool ixgbe_set_sriov_queues(struct ixgbe_adapter *adapter)
 
 	adapter->num_rx_queues = vmdq_i * rss_i;
 	adapter->num_tx_queues = vmdq_i * rss_i;
+	adapter->num_xdp_queues = 0;
 
 	/* disable ATR as it is not supported when VMDq is enabled */
 	adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
@@ -669,6 +693,7 @@ static bool ixgbe_set_rss_queues(struct ixgbe_adapter *adapter)
 #endif /* IXGBE_FCOE */
 	adapter->num_rx_queues = rss_i;
 	adapter->num_tx_queues = rss_i;
+	adapter->num_xdp_queues = ixgbe_xdp_queues(adapter);
 
 	return true;
 }
@@ -689,6 +714,7 @@ static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter)
 	/* Start with base case */
 	adapter->num_rx_queues = 1;
 	adapter->num_tx_queues = 1;
+	adapter->num_xdp_queues = 0;
 	adapter->num_rx_pools = adapter->num_rx_queues;
 	adapter->num_rx_queues_per_pool = 1;
 
@@ -719,8 +745,11 @@ static int ixgbe_acquire_msix_vectors(struct ixgbe_adapter *adapter)
 	struct ixgbe_hw *hw = &adapter->hw;
 	int i, vectors, vector_threshold;
 
-	/* We start by asking for one vector per queue pair */
+	/* We start by asking for one vector per queue pair with XDP queues
+	 * being stacked with TX queues.
+	 */
 	vectors = max(adapter->num_rx_queues, adapter->num_tx_queues);
+	vectors = max(vectors, adapter->num_xdp_queues);
 
 	/* It is easy to be greedy for MSI-X vectors. However, it really
 	 * doesn't do much good if we have a lot more vectors than CPUs. We'll
@@ -800,6 +829,8 @@ static void ixgbe_add_ring(struct ixgbe_ring *ring,
  * @v_idx: index of vector in adapter struct
  * @txr_count: total number of Tx rings to allocate
  * @txr_idx: index of first Tx ring to allocate
+ * @xdp_count: total number of XDP rings to allocate
+ * @xdp_idx: index of first XDP ring to allocate
  * @rxr_count: total number of Rx rings to allocate
  * @rxr_idx: index of first Rx ring to allocate
  *
@@ -808,6 +839,7 @@ static void ixgbe_add_ring(struct ixgbe_ring *ring,
 static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter,
 				int v_count, int v_idx,
 				int txr_count, int txr_idx,
+				int xdp_count, int xdp_idx,
 				int rxr_count, int rxr_idx)
 {
 	struct ixgbe_q_vector *q_vector;
@@ -909,6 +941,33 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter,
 		ring++;
 	}
 
+	while (xdp_count) {
+		/* assign generic ring traits */
+		ring->dev = &adapter->pdev->dev;
+		ring->netdev = adapter->netdev;
+
+		/* configure backlink on ring */
+		ring->q_vector = q_vector;
+
+		/* update q_vector Tx values */
+		ixgbe_add_ring(ring, &q_vector->tx);
+
+		/* apply Tx specific ring traits */
+		ring->count = adapter->tx_ring_count;
+		ring->queue_index = xdp_idx;
+		set_ring_xdp(ring);
+
+		/* assign ring to adapter */
+		adapter->xdp_ring[xdp_idx] = ring;
+
+		/* update count and index */
+		xdp_count--;
+		xdp_idx += v_count;
+
+		/* push pointer to next ring */
+		ring++;
+	}
+
 	while (rxr_count) {
 		/* assign generic ring traits */
 		ring->dev = &adapter->pdev->dev;
@@ -1002,17 +1061,18 @@ static int ixgbe_alloc_q_vectors(struct ixgbe_adapter *adapter)
 	int q_vectors = adapter->num_q_vectors;
 	int rxr_remaining = adapter->num_rx_queues;
 	int txr_remaining = adapter->num_tx_queues;
-	int rxr_idx = 0, txr_idx = 0, v_idx = 0;
+	int xdp_remaining = adapter->num_xdp_queues;
+	int rxr_idx = 0, txr_idx = 0, xdp_idx = 0, v_idx = 0;
 	int err;
 
 	/* only one q_vector if MSI-X is disabled. */
 	if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED))
 		q_vectors = 1;
 
-	if (q_vectors >= (rxr_remaining + txr_remaining)) {
+	if (q_vectors >= (rxr_remaining + txr_remaining + xdp_remaining)) {
 		for (; rxr_remaining; v_idx++) {
 			err = ixgbe_alloc_q_vector(adapter, q_vectors, v_idx,
-						   0, 0, 1, rxr_idx);
+						   0, 0, 0, 0, 1, rxr_idx);
 
 			if (err)
 				goto err_out;
@@ -1026,8 +1086,11 @@ static int ixgbe_alloc_q_vectors(struct ixgbe_adapter *adapter)
 	for (; v_idx < q_vectors; v_idx++) {
 		int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
 		int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
+		int xqpv = DIV_ROUND_UP(xdp_remaining, q_vectors - v_idx);
+
 		err = ixgbe_alloc_q_vector(adapter, q_vectors, v_idx,
 					   tqpv, txr_idx,
+					   xqpv, xdp_idx,
 					   rqpv, rxr_idx);
 
 		if (err)
@@ -1036,14 +1099,17 @@ static int ixgbe_alloc_q_vectors(struct ixgbe_adapter *adapter)
 		/* update counts and index */
 		rxr_remaining -= rqpv;
 		txr_remaining -= tqpv;
+		xdp_remaining -= xqpv;
 		rxr_idx++;
 		txr_idx++;
+		xdp_idx++;
 	}
 
 	return 0;
 
 err_out:
 	adapter->num_tx_queues = 0;
+	adapter->num_xdp_queues = 0;
 	adapter->num_rx_queues = 0;
 	adapter->num_q_vectors = 0;
 
@@ -1066,6 +1132,7 @@ static void ixgbe_free_q_vectors(struct ixgbe_adapter *adapter)
 	int v_idx = adapter->num_q_vectors;
 
 	adapter->num_tx_queues = 0;
+	adapter->num_xdp_queues = 0;
 	adapter->num_rx_queues = 0;
 	adapter->num_q_vectors = 0;
 
@@ -1172,9 +1239,10 @@ int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter)
 
 	ixgbe_cache_ring_register(adapter);
 
-	e_dev_info("Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u\n",
+	e_dev_info("Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u XDP Queue count = %u\n",
 		   (adapter->num_rx_queues > 1) ? "Enabled" : "Disabled",
-		   adapter->num_rx_queues, adapter->num_tx_queues);
+		   adapter->num_rx_queues, adapter->num_tx_queues,
+		   adapter->num_xdp_queues);
 
 	set_bit(__IXGBE_DOWN, &adapter->state);
 
@@ -1195,6 +1263,7 @@ int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter)
 void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter *adapter)
 {
 	adapter->num_tx_queues = 0;
+	adapter->num_xdp_queues = 0;
 	adapter->num_rx_queues = 0;
 
 	ixgbe_free_q_vectors(adapter);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 0b802b5..e754fe0 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -594,6 +594,19 @@ static void ixgbe_regdump(struct ixgbe_hw *hw, struct ixgbe_reg_info *reginfo)
 
 }
 
+static void ixgbe_print_buffer(struct ixgbe_ring *ring, int n)
+{
+	struct ixgbe_tx_buffer *tx_buffer;
+
+	tx_buffer = &ring->tx_buffer_info[ring->next_to_clean];
+	pr_info(" %5d %5X %5X %016llX %08X %p %016llX\n",
+		   n, ring->next_to_use, ring->next_to_clean,
+		   (u64)dma_unmap_addr(tx_buffer, dma),
+		   dma_unmap_len(tx_buffer, len),
+		   tx_buffer->next_to_watch,
+		   (u64)tx_buffer->time_stamp);
+}
+
 /*
  * ixgbe_dump - Print registers, tx-rings and rx-rings
  */
@@ -603,7 +616,7 @@ static void ixgbe_dump(struct ixgbe_adapter *adapter)
 	struct ixgbe_hw *hw = &adapter->hw;
 	struct ixgbe_reg_info *reginfo;
 	int n = 0;
-	struct ixgbe_ring *tx_ring;
+	struct ixgbe_ring *ring;
 	struct ixgbe_tx_buffer *tx_buffer;
 	union ixgbe_adv_tx_desc *tx_desc;
 	struct my_u0 { u64 a; u64 b; } *u0;
@@ -644,14 +657,13 @@ static void ixgbe_dump(struct ixgbe_adapter *adapter)
 		"Queue [NTU] [NTC] [bi(ntc)->dma  ]",
 		"leng", "ntw", "timestamp");
 	for (n = 0; n < adapter->num_tx_queues; n++) {
-		tx_ring = adapter->tx_ring[n];
-		tx_buffer = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
-		pr_info(" %5d %5X %5X %016llX %08X %p %016llX\n",
-			   n, tx_ring->next_to_use, tx_ring->next_to_clean,
-			   (u64)dma_unmap_addr(tx_buffer, dma),
-			   dma_unmap_len(tx_buffer, len),
-			   tx_buffer->next_to_watch,
-			   (u64)tx_buffer->time_stamp);
+		ring = adapter->tx_ring[n];
+		ixgbe_print_buffer(ring, n);
+	}
+
+	for (n = 0; n < adapter->num_xdp_queues; n++) {
+		ring = adapter->xdp_ring[n];
+		ixgbe_print_buffer(ring, n);
 	}
 
 	/* Print TX Rings */
@@ -696,28 +708,28 @@ static void ixgbe_dump(struct ixgbe_adapter *adapter)
 	 */
 
 	for (n = 0; n < adapter->num_tx_queues; n++) {
-		tx_ring = adapter->tx_ring[n];
+		ring = adapter->tx_ring[n];
 		pr_info("------------------------------------\n");
-		pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
+		pr_info("TX QUEUE INDEX = %d\n", ring->queue_index);
 		pr_info("------------------------------------\n");
 		pr_info("%s%s    %s              %s        %s          %s\n",
 			"T [desc]     [address 63:0  ] ",
 			"[PlPOIdStDDt Ln] [bi->dma       ] ",
 			"leng", "ntw", "timestamp", "bi->skb");
 
-		for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
-			tx_desc = IXGBE_TX_DESC(tx_ring, i);
-			tx_buffer = &tx_ring->tx_buffer_info[i];
+		for (i = 0; ring->desc && (i < ring->count); i++) {
+			tx_desc = IXGBE_TX_DESC(ring, i);
+			tx_buffer = &ring->tx_buffer_info[i];
 			u0 = (struct my_u0 *)tx_desc;
 			if (dma_unmap_len(tx_buffer, len) > 0) {
 				const char *ring_desc;
 
-				if (i == tx_ring->next_to_use &&
-				    i == tx_ring->next_to_clean)
+				if (i == ring->next_to_use &&
+				    i == ring->next_to_clean)
 					ring_desc = " NTC/U";
-				else if (i == tx_ring->next_to_use)
+				else if (i == ring->next_to_use)
 					ring_desc = " NTU";
-				else if (i == tx_ring->next_to_clean)
+				else if (i == ring->next_to_clean)
 					ring_desc = " NTC";
 				else
 					ring_desc = "";
@@ -987,6 +999,10 @@ static void ixgbe_update_xoff_rx_lfc(struct ixgbe_adapter *adapter)
 	for (i = 0; i < adapter->num_tx_queues; i++)
 		clear_bit(__IXGBE_HANG_CHECK_ARMED,
 			  &adapter->tx_ring[i]->state);
+
+	for (i = 0; i < adapter->num_xdp_queues; i++)
+		clear_bit(__IXGBE_HANG_CHECK_ARMED,
+			  &adapter->xdp_ring[i]->state);
 }
 
 static void ixgbe_update_xoff_received(struct ixgbe_adapter *adapter)
@@ -1031,6 +1047,14 @@ static void ixgbe_update_xoff_received(struct ixgbe_adapter *adapter)
 		if (xoff[tc])
 			clear_bit(__IXGBE_HANG_CHECK_ARMED, &tx_ring->state);
 	}
+
+	for (i = 0; i < adapter->num_xdp_queues; i++) {
+		struct ixgbe_ring *xdp_ring = adapter->xdp_ring[i];
+
+		tc = xdp_ring->dcb_tc;
+		if (xoff[tc])
+			clear_bit(__IXGBE_HANG_CHECK_ARMED, &xdp_ring->state);
+	}
 }
 
 static u64 ixgbe_get_tx_completed(struct ixgbe_ring *ring)
@@ -1182,7 +1206,10 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
 		total_packets += tx_buffer->gso_segs;
 
 		/* free the skb */
-		napi_consume_skb(tx_buffer->skb, napi_budget);
+		if (ring_is_xdp(tx_ring))
+			page_frag_free(tx_buffer->data);
+		else
+			napi_consume_skb(tx_buffer->skb, napi_budget);
 
 		/* unmap skb header data */
 		dma_unmap_single(tx_ring->dev,
@@ -1243,7 +1270,7 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
 	if (check_for_tx_hang(tx_ring) && ixgbe_check_tx_hang(tx_ring)) {
 		/* schedule immediate reset if we believe we hung */
 		struct ixgbe_hw *hw = &adapter->hw;
-		e_err(drv, "Detected Tx Unit Hang\n"
+		e_err(drv, "Detected Tx Unit Hang %s\n"
 			"  Tx Queue             <%d>\n"
 			"  TDH, TDT             <%x>, <%x>\n"
 			"  next_to_use          <%x>\n"
@@ -1251,13 +1278,16 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
 			"tx_buffer_info[next_to_clean]\n"
 			"  time_stamp           <%lx>\n"
 			"  jiffies              <%lx>\n",
+			ring_is_xdp(tx_ring) ? "(XDP)" : "",
 			tx_ring->queue_index,
 			IXGBE_READ_REG(hw, IXGBE_TDH(tx_ring->reg_idx)),
 			IXGBE_READ_REG(hw, IXGBE_TDT(tx_ring->reg_idx)),
 			tx_ring->next_to_use, i,
 			tx_ring->tx_buffer_info[i].time_stamp, jiffies);
 
-		netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
+		if (!ring_is_xdp(tx_ring))
+			netif_stop_subqueue(tx_ring->netdev,
+					    tx_ring->queue_index);
 
 		e_info(probe,
 		       "tx hang %d detected on queue %d, resetting adapter\n",
@@ -1270,6 +1300,9 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
 		return true;
 	}
 
+	if (ring_is_xdp(tx_ring))
+		return !!budget;
+
 	netdev_tx_completed_queue(txring_txq(tx_ring),
 				  total_packets, total_bytes);
 
@@ -2162,8 +2195,13 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
 
 #define IXGBE_XDP_PASS 0
 #define IXGBE_XDP_CONSUMED 1
+#define IXGBE_XDP_TX 2
 
-static int ixgbe_run_xdp(struct ixgbe_ring  *rx_ring,
+static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
+			       struct xdp_buff *xdp);
+
+static int ixgbe_run_xdp(struct ixgbe_adapter *adapter,
+			 struct ixgbe_ring  *rx_ring,
 			 struct ixgbe_rx_buffer *rx_buffer,
 			 unsigned int size)
 {
@@ -2187,10 +2225,16 @@ static int ixgbe_run_xdp(struct ixgbe_ring  *rx_ring,
 	act = bpf_prog_run_xdp(xdp_prog, &xdp);
 	switch (act) {
 	case XDP_PASS:
-		goto xdp_out;
+		break;
+	case XDP_TX:
+		result = ixgbe_xmit_xdp_ring(adapter, &xdp);
+		if (result == IXGBE_XDP_TX)
+			break;
+		else
+			rx_buffer->pagecnt_bias++; /* give page back */
+		break;
 	default:
 		bpf_warn_invalid_xdp_action(act);
-	case XDP_TX:
 	case XDP_ABORTED:
 		trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
 		/* fallthrough -- handle aborts by dropping packet */
@@ -2222,8 +2266,8 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
 			       const int budget)
 {
 	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
-#ifdef IXGBE_FCOE
 	struct ixgbe_adapter *adapter = q_vector->adapter;
+#ifdef IXGBE_FCOE
 	int ddp_bytes;
 	unsigned int mss = 0;
 #endif /* IXGBE_FCOE */
@@ -2255,8 +2299,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
 
 		rx_buffer = ixgbe_get_rx_buffer(rx_ring, rx_desc, &skb, size);
 
-		consumed = ixgbe_run_xdp(rx_ring, rx_buffer, size);
-		rcu_read_unlock();
+		consumed = ixgbe_run_xdp(adapter, rx_ring, rx_buffer, size);
 
 		if (consumed) {
 			ixgbe_put_rx_buffer(rx_ring, rx_buffer, skb);
@@ -2921,6 +2964,7 @@ static irqreturn_t ixgbe_msix_other(int irq, void *data)
 						       &ring->state))
 					reinit_count++;
 			}
+
 			if (reinit_count) {
 				/* no more flow director interrupts until after init */
 				IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EIMC_FLOW_DIR);
@@ -3436,6 +3480,8 @@ static void ixgbe_configure_tx(struct ixgbe_adapter *adapter)
 	/* Setup the HW Tx Head and Tail descriptor pointers */
 	for (i = 0; i < adapter->num_tx_queues; i++)
 		ixgbe_configure_tx_ring(adapter, adapter->tx_ring[i]);
+	for (i = 0; i < adapter->num_xdp_queues; i++)
+		ixgbe_configure_tx_ring(adapter, adapter->xdp_ring[i]);
 }
 
 static void ixgbe_enable_rx_drop(struct ixgbe_adapter *adapter,
@@ -5605,7 +5651,8 @@ static void ixgbe_clean_tx_ring(struct ixgbe_ring *tx_ring)
 	}
 
 	/* reset BQL for queue */
-	netdev_tx_reset_queue(txring_txq(tx_ring));
+	if (!ring_is_xdp(tx_ring))
+		netdev_tx_reset_queue(txring_txq(tx_ring));
 
 	/* reset next_to_use and next_to_clean */
 	tx_ring->next_to_use = 0;
@@ -5634,6 +5681,8 @@ static void ixgbe_clean_all_tx_rings(struct ixgbe_adapter *adapter)
 
 	for (i = 0; i < adapter->num_tx_queues; i++)
 		ixgbe_clean_tx_ring(adapter->tx_ring[i]);
+	for (i = 0; i < adapter->num_xdp_queues; i++)
+		ixgbe_clean_tx_ring(adapter->xdp_ring[i]);
 }
 
 static void ixgbe_fdir_filter_exit(struct ixgbe_adapter *adapter)
@@ -5728,6 +5777,11 @@ void ixgbe_down(struct ixgbe_adapter *adapter)
 		u8 reg_idx = adapter->tx_ring[i]->reg_idx;
 		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), IXGBE_TXDCTL_SWFLSH);
 	}
+	for (i = 0; i < adapter->num_xdp_queues; i++) {
+		u8 reg_idx = adapter->xdp_ring[i]->reg_idx;
+
+		IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), IXGBE_TXDCTL_SWFLSH);
+	}
 
 	/* Disable the Tx DMA engine on 82599 and later MAC */
 	switch (hw->mac.type) {
@@ -6096,7 +6150,7 @@ int ixgbe_setup_tx_resources(struct ixgbe_ring *tx_ring)
  **/
 static int ixgbe_setup_all_tx_resources(struct ixgbe_adapter *adapter)
 {
-	int i, err = 0;
+	int i, j = 0, err = 0;
 
 	for (i = 0; i < adapter->num_tx_queues; i++) {
 		err = ixgbe_setup_tx_resources(adapter->tx_ring[i]);
@@ -6106,10 +6160,21 @@ static int ixgbe_setup_all_tx_resources(struct ixgbe_adapter *adapter)
 		e_err(probe, "Allocation for Tx Queue %u failed\n", i);
 		goto err_setup_tx;
 	}
+	for (j = 0; j < adapter->num_xdp_queues; j++) {
+		err = ixgbe_setup_tx_resources(adapter->xdp_ring[j]);
+		if (!err)
+			continue;
+
+		e_err(probe, "Allocation for Tx Queue %u failed\n", j);
+		goto err_setup_tx;
+	}
+
 
 	return 0;
 err_setup_tx:
 	/* rewind the index freeing the rings as we go */
+	while (j--)
+		ixgbe_free_tx_resources(adapter->xdp_ring[j]);
 	while (i--)
 		ixgbe_free_tx_resources(adapter->tx_ring[i]);
 	return err;
@@ -6241,6 +6306,9 @@ static void ixgbe_free_all_tx_resources(struct ixgbe_adapter *adapter)
 	for (i = 0; i < adapter->num_tx_queues; i++)
 		if (adapter->tx_ring[i]->desc)
 			ixgbe_free_tx_resources(adapter->tx_ring[i]);
+	for (i = 0; i < adapter->num_xdp_queues; i++)
+		if (adapter->xdp_ring[i]->desc)
+			ixgbe_free_tx_resources(adapter->xdp_ring[i]);
 }
 
 /**
@@ -6660,6 +6728,14 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter)
 		bytes += tx_ring->stats.bytes;
 		packets += tx_ring->stats.packets;
 	}
+	for (i = 0; i < adapter->num_xdp_queues; i++) {
+		struct ixgbe_ring *xdp_ring = adapter->xdp_ring[i];
+
+		restart_queue += xdp_ring->tx_stats.restart_queue;
+		tx_busy += xdp_ring->tx_stats.tx_busy;
+		bytes += xdp_ring->stats.bytes;
+		packets += xdp_ring->stats.packets;
+	}
 	adapter->restart_queue = restart_queue;
 	adapter->tx_busy = tx_busy;
 	netdev->stats.tx_bytes = bytes;
@@ -6853,6 +6929,9 @@ static void ixgbe_fdir_reinit_subtask(struct ixgbe_adapter *adapter)
 		for (i = 0; i < adapter->num_tx_queues; i++)
 			set_bit(__IXGBE_TX_FDIR_INIT_DONE,
 				&(adapter->tx_ring[i]->state));
+		for (i = 0; i < adapter->num_xdp_queues; i++)
+			set_bit(__IXGBE_TX_FDIR_INIT_DONE,
+				&adapter->xdp_ring[i]->state);
 		/* re-enable flow director interrupts */
 		IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR);
 	} else {
@@ -6886,6 +6965,8 @@ static void ixgbe_check_hang_subtask(struct ixgbe_adapter *adapter)
 	if (netif_carrier_ok(adapter->netdev)) {
 		for (i = 0; i < adapter->num_tx_queues; i++)
 			set_check_for_tx_hang(adapter->tx_ring[i]);
+		for (i = 0; i < adapter->num_xdp_queues; i++)
+			set_check_for_tx_hang(adapter->xdp_ring[i]);
 	}
 
 	if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) {
@@ -7116,6 +7197,13 @@ static bool ixgbe_ring_tx_pending(struct ixgbe_adapter *adapter)
 			return true;
 	}
 
+	for (i = 0; i < adapter->num_xdp_queues; i++) {
+		struct ixgbe_ring *ring = adapter->xdp_ring[i];
+
+		if (ring->next_to_use != ring->next_to_clean)
+			return true;
+	}
+
 	return false;
 }
 
@@ -8076,6 +8164,85 @@ static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb,
 #endif
 }
 
+static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
+			       struct xdp_buff *xdp)
+{
+	struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()];
+	struct ixgbe_tx_buffer *tx_buffer;
+	union ixgbe_adv_tx_desc *tx_desc;
+	u32 len, tx_flags = 0;
+	dma_addr_t dma;
+	u16 count, i;
+	u32 cmd_type;
+
+	len = xdp->data_end - xdp->data;
+	count = TXD_USE_COUNT(len);
+
+	/* record the location of the first descriptor for this packet */
+	tx_buffer = &ring->tx_buffer_info[ring->next_to_use];
+	tx_buffer->skb = NULL;
+	tx_buffer->bytecount = len;
+	tx_buffer->gso_segs = 1;
+	tx_buffer->protocol = 0;
+
+	tx_buffer->tx_flags = tx_flags;
+	i = ring->next_to_use;
+	tx_desc = IXGBE_TX_DESC(ring, i);
+
+	dma = dma_map_single(ring->dev, xdp->data, len, DMA_TO_DEVICE);
+	if (dma_mapping_error(ring->dev, dma))
+		goto dma_error;
+
+	dma_unmap_len_set(tx_buffer, len, len);
+	dma_unmap_addr_set(tx_buffer, dma, dma);
+	tx_buffer->data = xdp->data;
+	tx_desc->read.buffer_addr = cpu_to_le64(dma);
+
+	/* put descriptor type bits */
+	cmd_type = IXGBE_ADVTXD_DTYP_DATA |
+		   IXGBE_ADVTXD_DCMD_DEXT |
+		   IXGBE_ADVTXD_DCMD_IFCS;
+	cmd_type |= len | IXGBE_TXD_CMD;
+	tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+	tx_desc->read.olinfo_status =
+		cpu_to_le32(len << IXGBE_ADVTXD_PAYLEN_SHIFT);
+
+#ifdef CONFIG_PCI_IOV
+	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
+		tx_desc->read.olinfo_status |= IXGBE_ADVTXD_CC;
+		ixgbe_tx_ctxtdesc(tx_ring, ETH_HLEN, 0, 0, 0);
+	}
+#endif
+
+	/* Force memory writes to complete before letting h/w know there
+	 * are new descriptors to fetch.  (Only applicable for weak-ordered
+	 * memory model archs, such as IA-64).
+	 *
+	 * We also need this memory barrier to make certain all of the
+	 * status bits have been updated before next_to_watch is written.
+	 */
+	wmb();
+
+	/* set next_to_watch value indicating a packet is present */
+	i++;
+	if (i == ring->count)
+		i = 0;
+
+	tx_buffer->next_to_watch = tx_desc;
+	ring->next_to_use = i;
+
+	writel(i, ring->tail);
+
+	/* we need this if more than one processor can write to our tail
+	 * at a time, it synchronizes IO on IA64/Altix systems
+	 */
+	mmiowb();
+	return IXGBE_XDP_TX;
+dma_error:
+	ring->next_to_use = i;
+	return IXGBE_XDP_CONSUMED;
+}
+
 netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
 			  struct ixgbe_adapter *adapter,
 			  struct ixgbe_ring *tx_ring)
@@ -8367,6 +8534,23 @@ static void ixgbe_netpoll(struct net_device *netdev)
 
 #endif
 
+static void ixgbe_get_ring_stats64(struct rtnl_link_stats64 *stats,
+				   struct ixgbe_ring *ring)
+{
+	u64 bytes, packets;
+	unsigned int start;
+
+	if (ring) {
+		do {
+			start = u64_stats_fetch_begin_irq(&ring->syncp);
+			packets = ring->stats.packets;
+			bytes   = ring->stats.bytes;
+		} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
+		stats->tx_packets += packets;
+		stats->tx_bytes   += bytes;
+	}
+}
+
 static void ixgbe_get_stats64(struct net_device *netdev,
 			      struct rtnl_link_stats64 *stats)
 {
@@ -8392,18 +8576,13 @@ static void ixgbe_get_stats64(struct net_device *netdev,
 
 	for (i = 0; i < adapter->num_tx_queues; i++) {
 		struct ixgbe_ring *ring = ACCESS_ONCE(adapter->tx_ring[i]);
-		u64 bytes, packets;
-		unsigned int start;
 
-		if (ring) {
-			do {
-				start = u64_stats_fetch_begin_irq(&ring->syncp);
-				packets = ring->stats.packets;
-				bytes   = ring->stats.bytes;
-			} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
-			stats->tx_packets += packets;
-			stats->tx_bytes   += bytes;
-		}
+		ixgbe_get_ring_stats64(stats, ring);
+	}
+	for (i = 0; i < adapter->num_xdp_queues; i++) {
+		struct ixgbe_ring *ring = ACCESS_ONCE(adapter->xdp_ring[i]);
+
+		ixgbe_get_ring_stats64(stats, ring);
 	}
 	rcu_read_unlock();
 
@@ -9524,7 +9703,13 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
 {
 	int i, frame_size = dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
-	struct bpf_prog *old_adapter_prog;
+	struct bpf_prog *old_prog;
+
+	if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
+		return -EINVAL;
+
+	if (adapter->flags & IXGBE_FLAG_DCB_ENABLED)
+		return -EINVAL;
 
 	/* verify ixgbe ring attributes are sufficient for XDP */
 	for (i = 0; i < adapter->num_rx_queues; i++) {
@@ -9537,12 +9722,26 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
 			return -EINVAL;
 	}
 
-	old_adapter_prog = xchg(&adapter->xdp_prog, prog);
+	if (nr_cpu_ids > MAX_XDP_QUEUES)
+		return -ENOMEM;
+
+	old_prog = xchg(&adapter->xdp_prog, prog);
+
+	/* If transitioning XDP modes reconfigure rings */
+	if (!!adapter->xdp_prog != !!old_prog) {
+		int err = ixgbe_setup_tc(dev, netdev_get_num_tc(dev));
+
+		if (err) {
+			rcu_assign_pointer(adapter->xdp_prog, old_prog);
+			return -EINVAL;
+		}
+	}
+
 	for (i = 0; i < adapter->num_rx_queues; i++)
 		xchg(&adapter->rx_ring[i]->xdp_prog, adapter->xdp_prog);
 
-	if (old_adapter_prog)
-		bpf_prog_put(old_adapter_prog);
+	if (old_prog)
+		bpf_prog_put(old_prog);
 
 	return 0;
 }
@@ -10048,6 +10247,9 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	for (i = 0; i < adapter->num_tx_queues; i++)
 		u64_stats_init(&adapter->tx_ring[i]->syncp);
 
+	for (i = 0; i < adapter->num_xdp_queues; i++)
+		u64_stats_init(&adapter->xdp_ring[i]->syncp);
+
 	/* WOL not supported for all devices */
 	adapter->wol = 0;
 	hw->eeprom.ops.read(hw, 0x2c, &adapter->eeprom_cap);


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [Intel-wired-lan] [net-next PATCH v2 3/3] ixgbe: xdp support for adjust head
  2017-03-02  0:54 [Intel-wired-lan] [net-next PATCH v2 1/3] ixgbe: add XDP support for pass and drop actions John Fastabend
  2017-03-02  0:54 ` [Intel-wired-lan] [net-next PATCH v2 2/3] ixgbe: add support for XDP_TX action John Fastabend
@ 2017-03-02  0:55 ` John Fastabend
  2017-03-02 19:43   ` Alexander Duyck
  2017-03-02 16:22 ` [Intel-wired-lan] [net-next PATCH v2 1/3] ixgbe: add XDP support for pass and drop actions Alexander Duyck
  2 siblings, 1 reply; 9+ messages in thread
From: John Fastabend @ 2017-03-02  0:55 UTC (permalink / raw)
  To: intel-wired-lan

Add adjust_head support for XDP however at the moment we are only
adding IXGBE_SKB_PAD bytes of headroom to align with driver paths.

The infrastructure is is such that a follow on patch can extend
headroom up to 196B without changing RX path.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |   38 +++++++++++++++++--------
 1 file changed, 26 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index e754fe0..c8bf64e 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -2026,6 +2026,7 @@ static bool ixgbe_can_reuse_rx_page(struct ixgbe_rx_buffer *rx_buffer)
 static void ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring,
 			      struct ixgbe_rx_buffer *rx_buffer,
 			      struct sk_buff *skb,
+			      unsigned int headroom,
 			      unsigned int size)
 {
 #if (PAGE_SIZE < 8192)
@@ -2036,7 +2037,8 @@ static void ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring,
 				SKB_DATA_ALIGN(size);
 #endif
 	skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
-			rx_buffer->page_offset, size, truesize);
+			rx_buffer->page_offset - (IXGBE_SKB_PAD - headroom),
+			size, truesize);
 #if (PAGE_SIZE < 8192)
 	rx_buffer->page_offset ^= truesize;
 #else
@@ -2109,6 +2111,7 @@ static void ixgbe_put_rx_buffer(struct ixgbe_ring *rx_ring,
 static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
 					   struct ixgbe_rx_buffer *rx_buffer,
 					   union ixgbe_adv_rx_desc *rx_desc,
+					   unsigned int headroom,
 					   unsigned int size)
 {
 	void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
@@ -2117,6 +2120,7 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
 #else
 	unsigned int truesize = SKB_DATA_ALIGN(size);
 #endif
+	unsigned int off_page;
 	struct sk_buff *skb;
 
 	/* prefetch first cache line of first page */
@@ -2130,12 +2134,14 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
 	if (unlikely(!skb))
 		return NULL;
 
+	off_page = IXGBE_SKB_PAD - headroom;
+
 	if (size > IXGBE_RX_HDR_SIZE) {
 		if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))
 			IXGBE_CB(skb)->dma = rx_buffer->dma;
 
 		skb_add_rx_frag(skb, 0, rx_buffer->page,
-				rx_buffer->page_offset,
+				rx_buffer->page_offset - off_page,
 				size, truesize);
 #if (PAGE_SIZE < 8192)
 		rx_buffer->page_offset ^= truesize;
@@ -2143,7 +2149,8 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
 		rx_buffer->page_offset += truesize;
 #endif
 	} else {
-		memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
+		memcpy(__skb_put(skb, size), va - off_page,
+		       ALIGN(size, sizeof(long)));
 		rx_buffer->pagecnt_bias++;
 	}
 
@@ -2153,6 +2160,7 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
 static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
 				       struct ixgbe_rx_buffer *rx_buffer,
 				       union ixgbe_adv_rx_desc *rx_desc,
+				       unsigned int headroom,
 				       unsigned int size)
 {
 	void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
@@ -2176,7 +2184,7 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
 		return NULL;
 
 	/* update pointers within the skb to store the data */
-	skb_reserve(skb, IXGBE_SKB_PAD);
+	skb_reserve(skb, headroom);
 	__skb_put(skb, size);
 
 	/* record DMA address if this is the start of a chain of buffers */
@@ -2203,7 +2211,8 @@ static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
 static int ixgbe_run_xdp(struct ixgbe_adapter *adapter,
 			 struct ixgbe_ring  *rx_ring,
 			 struct ixgbe_rx_buffer *rx_buffer,
-			 unsigned int size)
+			 unsigned int *headroom,
+			 unsigned int *size)
 {
 	int result = IXGBE_XDP_PASS;
 	struct bpf_prog *xdp_prog;
@@ -2218,14 +2227,16 @@ static int ixgbe_run_xdp(struct ixgbe_adapter *adapter,
 		goto xdp_out;
 
 	addr = page_address(rx_buffer->page) + rx_buffer->page_offset;
-	xdp.data_hard_start = addr;
+	xdp.data_hard_start = addr - *headroom;
 	xdp.data = addr;
-	xdp.data_end = addr + size;
+	xdp.data_end = addr + *size;
 
 	act = bpf_prog_run_xdp(xdp_prog, &xdp);
 	switch (act) {
 	case XDP_PASS:
-		break;
+		*headroom = xdp.data - xdp.data_hard_start;
+		*size = xdp.data_end - xdp.data;
+		return IXGBE_XDP_PASS;
 	case XDP_TX:
 		result = ixgbe_xmit_xdp_ring(adapter, &xdp);
 		if (result == IXGBE_XDP_TX)
@@ -2274,6 +2285,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
 	u16 cleaned_count = ixgbe_desc_unused(rx_ring);
 
 	while (likely(total_rx_packets < budget)) {
+		unsigned int headroom = ixgbe_rx_offset(rx_ring);
 		union ixgbe_adv_rx_desc *rx_desc;
 		struct ixgbe_rx_buffer *rx_buffer;
 		struct sk_buff *skb;
@@ -2299,7 +2311,8 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
 
 		rx_buffer = ixgbe_get_rx_buffer(rx_ring, rx_desc, &skb, size);
 
-		consumed = ixgbe_run_xdp(adapter, rx_ring, rx_buffer, size);
+		consumed = ixgbe_run_xdp(adapter, rx_ring, rx_buffer,
+					 &headroom, &size);
 
 		if (consumed) {
 			ixgbe_put_rx_buffer(rx_ring, rx_buffer, skb);
@@ -2312,13 +2325,14 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
 
 		/* retrieve a buffer from the ring */
 		if (skb)
-			ixgbe_add_rx_frag(rx_ring, rx_buffer, skb, size);
+			ixgbe_add_rx_frag(rx_ring, rx_buffer, skb,
+					  headroom, size);
 		else if (ring_uses_build_skb(rx_ring))
 			skb = ixgbe_build_skb(rx_ring, rx_buffer,
-					      rx_desc, size);
+					      rx_desc, headroom, size);
 		else
 			skb = ixgbe_construct_skb(rx_ring, rx_buffer,
-						  rx_desc, size);
+						  rx_desc, headroom, size);
 
 		/* exit if we failed to retrieve a buffer */
 		if (!skb) {


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [Intel-wired-lan] [net-next PATCH v2 1/3] ixgbe: add XDP support for pass and drop actions
  2017-03-02  0:54 [Intel-wired-lan] [net-next PATCH v2 1/3] ixgbe: add XDP support for pass and drop actions John Fastabend
  2017-03-02  0:54 ` [Intel-wired-lan] [net-next PATCH v2 2/3] ixgbe: add support for XDP_TX action John Fastabend
  2017-03-02  0:55 ` [Intel-wired-lan] [net-next PATCH v2 3/3] ixgbe: xdp support for adjust head John Fastabend
@ 2017-03-02 16:22 ` Alexander Duyck
  2017-03-03 16:23   ` John Fastabend
  2 siblings, 1 reply; 9+ messages in thread
From: Alexander Duyck @ 2017-03-02 16:22 UTC (permalink / raw)
  To: intel-wired-lan

On Wed, Mar 1, 2017 at 4:54 PM, John Fastabend <john.fastabend@gmail.com> wrote:
> Basic XDP drop support for ixgbe. Uses READ_ONCE/xchg semantics on XDP
> programs instead of rcu primitives as suggested by Daniel Borkmann and
> Alex Duyck.
>
> Signed-off-by: John Fastabend <john.r.fastabend@intel.com>

So it still looks like this is doing a bunch of hacking in the Rx
path.  I suggested a few items below to streamline this.

> ---
>  drivers/net/ethernet/intel/ixgbe/ixgbe.h      |    2
>  drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |  114 ++++++++++++++++++++++++-
>  2 files changed, 113 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
> index b812913..2d12c24 100644
> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
> @@ -273,6 +273,7 @@ struct ixgbe_ring {
>         struct ixgbe_ring *next;        /* pointer to next ring in q_vector */
>         struct ixgbe_q_vector *q_vector; /* backpointer to host q_vector */
>         struct net_device *netdev;      /* netdev ring belongs to */
> +       struct bpf_prog *xdp_prog;
>         struct device *dev;             /* device for DMA mapping */
>         struct ixgbe_fwd_adapter *l2_accel_priv;
>         void *desc;                     /* descriptor ring memory */
> @@ -510,6 +511,7 @@ struct ixgbe_adapter {
>         unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
>         /* OS defined structs */
>         struct net_device *netdev;
> +       struct bpf_prog *xdp_prog;
>         struct pci_dev *pdev;
>
>         unsigned long state;
> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> index e3da397..0b802b5 100644
> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> @@ -49,6 +49,9 @@
>  #include <linux/if_macvlan.h>
>  #include <linux/if_bridge.h>
>  #include <linux/prefetch.h>
> +#include <linux/bpf.h>
> +#include <linux/bpf_trace.h>
> +#include <linux/atomic.h>
>  #include <scsi/fc/fc_fcoe.h>
>  #include <net/udp_tunnel.h>
>  #include <net/pkt_cls.h>
> @@ -2051,7 +2054,7 @@ static void ixgbe_put_rx_buffer(struct ixgbe_ring *rx_ring,
>                 /* hand second half of page back to the ring */
>                 ixgbe_reuse_rx_page(rx_ring, rx_buffer);
>         } else {
> -               if (IXGBE_CB(skb)->dma == rx_buffer->dma) {
> +               if (skb && IXGBE_CB(skb)->dma == rx_buffer->dma) {
>                         /* the page has been released from the ring */
>                         IXGBE_CB(skb)->page_released = true;
>                 } else {

So you can probably change this to "if (!IS_ERR(skb) &&
IXGBE_CB(skb)->dma == rx_buffer->dma) {" more on why below.

> @@ -2157,6 +2160,50 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
>         return skb;
>  }
>
> +#define IXGBE_XDP_PASS 0
> +#define IXGBE_XDP_CONSUMED 1
> +
> +static int ixgbe_run_xdp(struct ixgbe_ring  *rx_ring,
> +                        struct ixgbe_rx_buffer *rx_buffer,
> +                        unsigned int size)
> +{

Change the return type here to an sk_buff pointer.

> +       int result = IXGBE_XDP_PASS;
> +       struct bpf_prog *xdp_prog;
> +       struct xdp_buff xdp;
> +       void *addr;
> +       u32 act;
> +
> +       rcu_read_lock();
> +       xdp_prog = READ_ONCE(rx_ring->xdp_prog);
> +
> +       if (!xdp_prog)
> +               goto xdp_out;
> +
> +       addr = page_address(rx_buffer->page) + rx_buffer->page_offset;
> +       xdp.data_hard_start = addr;
> +       xdp.data = addr;
> +       xdp.data_end = addr + size;
> +
> +       act = bpf_prog_run_xdp(xdp_prog, &xdp);
> +       switch (act) {
> +       case XDP_PASS:
> +               goto xdp_out;
> +       default:
> +               bpf_warn_invalid_xdp_action(act);
> +       case XDP_TX:
> +       case XDP_ABORTED:
> +               trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
> +               /* fallthrough -- handle aborts by dropping packet */
> +       case XDP_DROP:
> +               rx_buffer->pagecnt_bias++; /* give page back */
> +               result = IXGBE_XDP_CONSUMED;
> +               break;
> +       }
> +xdp_out:
> +       rcu_read_unlock();
> +       return result;
> +}
> +

Instead of returning result directly I would suggest returning
"ERR_PTR(-result)", or you can cast IXGBE_XDP_CONSUMED as - 1 and just
return "ERR_PTR(result)".

>  /**
>   * ixgbe_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
>   * @q_vector: structure containing interrupt and ring information
> @@ -2187,6 +2234,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
>                 struct ixgbe_rx_buffer *rx_buffer;
>                 struct sk_buff *skb;
>                 unsigned int size;
> +               int consumed;
>
>                 /* return some buffers to hardware, one at a time is too slow */
>                 if (cleaned_count >= IXGBE_RX_BUFFER_WRITE) {

With the changes above this bit becomes unnecessary.

> @@ -2207,6 +2255,18 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
>
>                 rx_buffer = ixgbe_get_rx_buffer(rx_ring, rx_desc, &skb, size);
>
> +               consumed = ixgbe_run_xdp(rx_ring, rx_buffer, size);
> +               rcu_read_unlock();
> +

Did you leave an rcu_read_unlock here?  I'm assuming you meant to drop this.

> +               if (consumed) {
> +                       ixgbe_put_rx_buffer(rx_ring, rx_buffer, skb);
> +                       cleaned_count++;
> +                       ixgbe_is_non_eop(rx_ring, rx_desc, skb);
> +                       total_rx_packets++;
> +                       total_rx_bytes += size;
> +                       continue;
> +               }
> +

Looks like this wasn't adding one back to the pagecnt_bias.  That
would trigger a memory leak.

This bit will need to change a bit.  Basically it should become:
    /* retrieve a buffer from the ring */
    if (!skb)
        skb = ixgbe_run_xdp(rx_ring, rx_buffer, size);

    if (IS_ERR(skb)) {
        total_rx_packets++;
        total_rx_bytes += size;
        rx_buffer->pagecnt_bias++;
    } else if (skb)

Then the only other change that should be needed is a minor tweak to
ixgbe_cleanup_headers() by adding the following to the start of the
function:
    if (IS_ERR(skb))
        return true;

>                 /* retrieve a buffer from the ring */
>                 if (skb)
>                         ixgbe_add_rx_frag(rx_ring, rx_buffer, skb, size);
> @@ -6121,9 +6181,13 @@ static int ixgbe_setup_all_rx_resources(struct ixgbe_adapter *adapter)
>         int i, err = 0;
>
>         for (i = 0; i < adapter->num_rx_queues; i++) {
> -               err = ixgbe_setup_rx_resources(adapter->rx_ring[i]);
> -               if (!err)
> +               struct ixgbe_ring *rx_ring = adapter->rx_ring[i];
> +
> +               err = ixgbe_setup_rx_resources(rx_ring);
> +               if (!err) {
> +                       xchg(&rx_ring->xdp_prog, adapter->xdp_prog);
>                         continue;
> +               }
>
>                 e_err(probe, "Allocation for Rx Queue %u failed\n", i);
>                 goto err_setup_rx;

I still think it makes more sense to just place this in
ixgbe_setup_rx_resources.  No point in putting here as it just adds
more complication.

If I am not mistaken all you would have to add is the xchg line and
that would be it.

> @@ -6191,6 +6255,7 @@ void ixgbe_free_rx_resources(struct ixgbe_ring *rx_ring)
>
>         vfree(rx_ring->rx_buffer_info);
>         rx_ring->rx_buffer_info = NULL;
> +       xchg(&rx_ring->xdp_prog, NULL);
>
>         /* if not set, then don't free */
>         if (!rx_ring->desc)

Just for the sake of symmetry I would suggest placing the xchg in
front of the vfree.  Also this code being here makes a stronger
argument for us setting up xdp in setup_rx_resources.

> @@ -9455,6 +9520,48 @@ static void ixgbe_fwd_del(struct net_device *pdev, void *priv)
>         return features;
>  }
>
> +static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
> +{
> +       int i, frame_size = dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
> +       struct ixgbe_adapter *adapter = netdev_priv(dev);
> +       struct bpf_prog *old_adapter_prog;
> +
> +       /* verify ixgbe ring attributes are sufficient for XDP */
> +       for (i = 0; i < adapter->num_rx_queues; i++) {
> +               struct ixgbe_ring *ring = adapter->rx_ring[i];
> +
> +               if (ring_is_rsc_enabled(ring))
> +                       return -EINVAL;
> +
> +               if (frame_size > ixgbe_rx_bufsz(ring))
> +                       return -EINVAL;
> +       }
> +
> +       old_adapter_prog = xchg(&adapter->xdp_prog, prog);
> +       for (i = 0; i < adapter->num_rx_queues; i++)
> +               xchg(&adapter->rx_ring[i]->xdp_prog, adapter->xdp_prog);
> +
> +       if (old_adapter_prog)
> +               bpf_prog_put(old_adapter_prog);
> +
> +       return 0;
> +}
> +
> +static int ixgbe_xdp(struct net_device *dev, struct netdev_xdp *xdp)
> +{
> +       struct ixgbe_adapter *adapter = netdev_priv(dev);
> +
> +       switch (xdp->command) {
> +       case XDP_SETUP_PROG:
> +               return ixgbe_xdp_setup(dev, xdp->prog);
> +       case XDP_QUERY_PROG:
> +               xdp->prog_attached = !!(adapter->rx_ring[0]->xdp_prog);
> +               return 0;
> +       default:
> +               return -EINVAL;
> +       }
> +}
> +
>  static const struct net_device_ops ixgbe_netdev_ops = {
>         .ndo_open               = ixgbe_open,
>         .ndo_stop               = ixgbe_close,
> @@ -9500,6 +9607,7 @@ static void ixgbe_fwd_del(struct net_device *pdev, void *priv)
>         .ndo_udp_tunnel_add     = ixgbe_add_udp_tunnel_port,
>         .ndo_udp_tunnel_del     = ixgbe_del_udp_tunnel_port,
>         .ndo_features_check     = ixgbe_features_check,
> +       .ndo_xdp                = ixgbe_xdp,
>  };
>
>  /**
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Intel-wired-lan] [net-next PATCH v2 2/3] ixgbe: add support for XDP_TX action
  2017-03-02  0:54 ` [Intel-wired-lan] [net-next PATCH v2 2/3] ixgbe: add support for XDP_TX action John Fastabend
@ 2017-03-02 16:39   ` Alexander Duyck
  2017-03-03 16:40     ` John Fastabend
  0 siblings, 1 reply; 9+ messages in thread
From: Alexander Duyck @ 2017-03-02 16:39 UTC (permalink / raw)
  To: intel-wired-lan

On Wed, Mar 1, 2017 at 4:54 PM, John Fastabend <john.fastabend@gmail.com> wrote:
> Add support for XDP_TX action.
>
> A couple design choices were made here. First I use a new ring
> pointer structure xdp_ring[] in the adapter struct instead of
> pushing the newly allocated xdp TX rings into the tx_ring[]
> structure. This means we have to duplicate loops around rings
> in places we want to initialize both TX rings and XDP rings.
> But by making it explicit it is obvious when we are using XDP
> rings and when we are using TX rings. Further we don't have
> to do ring arithmatic which is error prone. As a proof point
> for doing this my first patches used only a single ring structure
> and introduced bugs in FCoE code and macvlan code paths.
>
> Second I am aware this is not the most optimized version of
> this code possible. I want to get baseline support in using
> the most readable format possible and then once this series
> is included I will optimize the TX path in another series
> of patches.
>
> Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
> ---
>  drivers/net/ethernet/intel/ixgbe/ixgbe.h         |   18 +
>  drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c |   27 ++
>  drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c     |   87 ++++++-
>  drivers/net/ethernet/intel/ixgbe/ixgbe_main.c    |  290 +++++++++++++++++++---
>  4 files changed, 367 insertions(+), 55 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
> index 2d12c24..f2a1409 100644
> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
> @@ -190,7 +190,10 @@ struct vf_macvlans {
>  struct ixgbe_tx_buffer {
>         union ixgbe_adv_tx_desc *next_to_watch;
>         unsigned long time_stamp;
> -       struct sk_buff *skb;
> +       union {
> +               struct sk_buff *skb;
> +               void *data; /* XDP uses addr ptr on irq_clean */
> +       };
>         unsigned int bytecount;
>         unsigned short gso_segs;
>         __be16 protocol;
> @@ -243,6 +246,7 @@ enum ixgbe_ring_state_t {
>         __IXGBE_TX_XPS_INIT_DONE,
>         __IXGBE_TX_DETECT_HANG,
>         __IXGBE_HANG_CHECK_ARMED,
> +       __IXGBE_TX_XDP_RING,
>  };
>
>  #define ring_uses_build_skb(ring) \
> @@ -269,6 +273,12 @@ struct ixgbe_fwd_adapter {
>         set_bit(__IXGBE_RX_RSC_ENABLED, &(ring)->state)
>  #define clear_ring_rsc_enabled(ring) \
>         clear_bit(__IXGBE_RX_RSC_ENABLED, &(ring)->state)
> +#define ring_is_xdp(ring) \
> +       test_bit(__IXGBE_TX_XDP_RING, &(ring)->state)
> +#define set_ring_xdp(ring) \
> +       set_bit(__IXGBE_TX_XDP_RING, &(ring)->state)
> +#define clear_ring_xdp(ring) \
> +       clear_bit(__IXGBE_TX_XDP_RING, &(ring)->state)
>  struct ixgbe_ring {
>         struct ixgbe_ring *next;        /* pointer to next ring in q_vector */
>         struct ixgbe_q_vector *q_vector; /* backpointer to host q_vector */
> @@ -335,6 +345,7 @@ enum ixgbe_ring_f_enum {
>  #define IXGBE_MAX_FCOE_INDICES         8
>  #define MAX_RX_QUEUES                  (IXGBE_MAX_FDIR_INDICES + 1)
>  #define MAX_TX_QUEUES                  (IXGBE_MAX_FDIR_INDICES + 1)
> +#define MAX_XDP_QUEUES                 (IXGBE_MAX_FDIR_INDICES + 1)
>  #define IXGBE_MAX_L2A_QUEUES           4
>  #define IXGBE_BAD_L2A_QUEUE            3
>  #define IXGBE_MAX_MACVLANS             31
> @@ -578,6 +589,10 @@ struct ixgbe_adapter {
>         __be16 vxlan_port;
>         __be16 geneve_port;
>
> +       /* XDP */
> +       int num_xdp_queues;
> +       struct ixgbe_ring *xdp_ring[MAX_XDP_QUEUES];
> +
>         /* TX */
>         struct ixgbe_ring *tx_ring[MAX_TX_QUEUES] ____cacheline_aligned_in_smp;
>
> @@ -624,6 +639,7 @@ struct ixgbe_adapter {
>
>         u64 tx_busy;
>         unsigned int tx_ring_count;
> +       unsigned int xdp_ring_count;
>         unsigned int rx_ring_count;
>
>         u32 link_speed;
> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
> index d3e02ac..93ce2cc 100644
> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c
> @@ -1031,7 +1031,7 @@ static int ixgbe_set_ringparam(struct net_device *netdev,
>  {
>         struct ixgbe_adapter *adapter = netdev_priv(netdev);
>         struct ixgbe_ring *temp_ring;
> -       int i, err = 0;
> +       int i, j, err = 0;
>         u32 new_rx_count, new_tx_count;
>
>         if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
> @@ -1057,15 +1057,19 @@ static int ixgbe_set_ringparam(struct net_device *netdev,
>         if (!netif_running(adapter->netdev)) {
>                 for (i = 0; i < adapter->num_tx_queues; i++)
>                         adapter->tx_ring[i]->count = new_tx_count;
> +               for (i = 0; i < adapter->num_xdp_queues; i++)
> +                       adapter->xdp_ring[i]->count = new_tx_count;
>                 for (i = 0; i < adapter->num_rx_queues; i++)
>                         adapter->rx_ring[i]->count = new_rx_count;
>                 adapter->tx_ring_count = new_tx_count;
> +               adapter->xdp_ring_count = new_tx_count;
>                 adapter->rx_ring_count = new_rx_count;
>                 goto clear_reset;
>         }
>
>         /* allocate temporary buffer to store rings in */
>         i = max_t(int, adapter->num_tx_queues, adapter->num_rx_queues);
> +       i = max_t(int, i, adapter->num_xdp_queues);
>         temp_ring = vmalloc(i * sizeof(struct ixgbe_ring));
>
>         if (!temp_ring) {
> @@ -1097,12 +1101,33 @@ static int ixgbe_set_ringparam(struct net_device *netdev,
>                         }
>                 }
>
> +               for (j = 0; j < adapter->num_xdp_queues; i++, j++) {
> +                       memcpy(&temp_ring[i], adapter->xdp_ring[j],
> +                              sizeof(struct ixgbe_ring));
> +
> +                       temp_ring[i].count = new_tx_count;
> +                       err = ixgbe_setup_tx_resources(&temp_ring[i]);
> +                       if (err) {
> +                               while (i) {
> +                                       i--;
> +                                       ixgbe_free_tx_resources(&temp_ring[i]);
> +                               }
> +                               goto err_setup;
> +                       }
> +               }
> +

It looks like this is broken.  I would say just get rid of j and just
use i for all of this like we did for the Tx and Rx rings.  I don't
think you need J anymore since you aren't really playing with an
offset anyway.

>                 for (i = 0; i < adapter->num_tx_queues; i++) {
>                         ixgbe_free_tx_resources(adapter->tx_ring[i]);
>
>                         memcpy(adapter->tx_ring[i], &temp_ring[i],
>                                sizeof(struct ixgbe_ring));
>                 }
> +               for (j = 0; j < adapter->num_xdp_queues; i++, j++) {
> +                       ixgbe_free_tx_resources(adapter->xdp_ring[j]);
> +
> +                       memcpy(adapter->xdp_ring[j], &temp_ring[i],
> +                              sizeof(struct ixgbe_ring));
> +               }
>

Same here.  Just use "i" everywhere.

>                 adapter->tx_ring_count = new_tx_count;
>         }
> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
> index 1b8be7d..cfaa23c 100644
> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c
> @@ -73,6 +73,11 @@ static bool ixgbe_cache_ring_dcb_sriov(struct ixgbe_adapter *adapter)
>                         reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask);
>                 adapter->tx_ring[i]->reg_idx = reg_idx;
>         }
> +       for (i = 0; i < adapter->num_xdp_queues; i++, reg_idx++) {
> +               if ((reg_idx & ~vmdq->mask) >= tcs)
> +                       reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask);
> +               adapter->xdp_ring[i]->reg_idx = reg_idx;
> +       }
>
>  #ifdef IXGBE_FCOE
>         /* nothing to do if FCoE is disabled */
> @@ -248,6 +253,12 @@ static bool ixgbe_cache_ring_sriov(struct ixgbe_adapter *adapter)
>                 adapter->tx_ring[i]->reg_idx = reg_idx;
>         }
>
> +       for (i = 0; i < adapter->num_xdp_queues; i++, reg_idx++) {
> +               if ((reg_idx & rss->mask) >= rss->indices)
> +                       reg_idx = __ALIGN_MASK(reg_idx, ~vmdq->mask);
> +               adapter->xdp_ring[i]->reg_idx = reg_idx;
> +       }
> +
>  #ifdef IXGBE_FCOE
>         /* FCoE uses a linear block of queues so just assigning 1:1 */
>         for (; i < adapter->num_tx_queues; i++, reg_idx++)
> @@ -267,12 +278,14 @@ static bool ixgbe_cache_ring_sriov(struct ixgbe_adapter *adapter)
>   **/
>  static bool ixgbe_cache_ring_rss(struct ixgbe_adapter *adapter)
>  {
> -       int i;
> +       int i, reg_idx;
>
>         for (i = 0; i < adapter->num_rx_queues; i++)
>                 adapter->rx_ring[i]->reg_idx = i;
> -       for (i = 0; i < adapter->num_tx_queues; i++)
> -               adapter->tx_ring[i]->reg_idx = i;
> +       for (i = 0, reg_idx = 0; i < adapter->num_tx_queues; i++, reg_idx++)
> +               adapter->tx_ring[i]->reg_idx = reg_idx;
> +       for (i = 0; i < adapter->num_xdp_queues; i++, reg_idx++)
> +               adapter->xdp_ring[i]->reg_idx = reg_idx;
>
>         return true;
>  }
> @@ -308,6 +321,14 @@ static void ixgbe_cache_ring_register(struct ixgbe_adapter *adapter)
>         ixgbe_cache_ring_rss(adapter);
>  }
>
> +static int ixgbe_xdp_queues(struct ixgbe_adapter *adapter)
> +{
> +       if (nr_cpu_ids > MAX_XDP_QUEUES)
> +               return 0;
> +
> +       return adapter->xdp_prog ? nr_cpu_ids : 0;
> +}
> +
>  #define IXGBE_RSS_64Q_MASK     0x3F
>  #define IXGBE_RSS_16Q_MASK     0xF
>  #define IXGBE_RSS_8Q_MASK      0x7
> @@ -382,6 +403,7 @@ static bool ixgbe_set_dcb_sriov_queues(struct ixgbe_adapter *adapter)
>         adapter->num_rx_queues_per_pool = tcs;
>
>         adapter->num_tx_queues = vmdq_i * tcs;
> +       adapter->num_xdp_queues = 0;
>         adapter->num_rx_queues = vmdq_i * tcs;
>
>  #ifdef IXGBE_FCOE
> @@ -479,6 +501,7 @@ static bool ixgbe_set_dcb_queues(struct ixgbe_adapter *adapter)
>                 netdev_set_tc_queue(dev, i, rss_i, rss_i * i);
>
>         adapter->num_tx_queues = rss_i * tcs;
> +       adapter->num_xdp_queues = 0;
>         adapter->num_rx_queues = rss_i * tcs;
>
>         return true;
> @@ -549,6 +572,7 @@ static bool ixgbe_set_sriov_queues(struct ixgbe_adapter *adapter)
>
>         adapter->num_rx_queues = vmdq_i * rss_i;
>         adapter->num_tx_queues = vmdq_i * rss_i;
> +       adapter->num_xdp_queues = 0;
>
>         /* disable ATR as it is not supported when VMDq is enabled */
>         adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE;
> @@ -669,6 +693,7 @@ static bool ixgbe_set_rss_queues(struct ixgbe_adapter *adapter)
>  #endif /* IXGBE_FCOE */
>         adapter->num_rx_queues = rss_i;
>         adapter->num_tx_queues = rss_i;
> +       adapter->num_xdp_queues = ixgbe_xdp_queues(adapter);
>
>         return true;
>  }
> @@ -689,6 +714,7 @@ static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter)
>         /* Start with base case */
>         adapter->num_rx_queues = 1;
>         adapter->num_tx_queues = 1;
> +       adapter->num_xdp_queues = 0;
>         adapter->num_rx_pools = adapter->num_rx_queues;
>         adapter->num_rx_queues_per_pool = 1;
>
> @@ -719,8 +745,11 @@ static int ixgbe_acquire_msix_vectors(struct ixgbe_adapter *adapter)
>         struct ixgbe_hw *hw = &adapter->hw;
>         int i, vectors, vector_threshold;
>
> -       /* We start by asking for one vector per queue pair */
> +       /* We start by asking for one vector per queue pair with XDP queues
> +        * being stacked with TX queues.
> +        */
>         vectors = max(adapter->num_rx_queues, adapter->num_tx_queues);
> +       vectors = max(vectors, adapter->num_xdp_queues);
>
>         /* It is easy to be greedy for MSI-X vectors. However, it really
>          * doesn't do much good if we have a lot more vectors than CPUs. We'll
> @@ -800,6 +829,8 @@ static void ixgbe_add_ring(struct ixgbe_ring *ring,
>   * @v_idx: index of vector in adapter struct
>   * @txr_count: total number of Tx rings to allocate
>   * @txr_idx: index of first Tx ring to allocate
> + * @xdp_count: total number of XDP rings to allocate
> + * @xdp_idx: index of first XDP ring to allocate
>   * @rxr_count: total number of Rx rings to allocate
>   * @rxr_idx: index of first Rx ring to allocate
>   *
> @@ -808,6 +839,7 @@ static void ixgbe_add_ring(struct ixgbe_ring *ring,
>  static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter,
>                                 int v_count, int v_idx,
>                                 int txr_count, int txr_idx,
> +                               int xdp_count, int xdp_idx,
>                                 int rxr_count, int rxr_idx)
>  {
>         struct ixgbe_q_vector *q_vector;
> @@ -909,6 +941,33 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter,
>                 ring++;
>         }
>
> +       while (xdp_count) {
> +               /* assign generic ring traits */
> +               ring->dev = &adapter->pdev->dev;
> +               ring->netdev = adapter->netdev;
> +
> +               /* configure backlink on ring */
> +               ring->q_vector = q_vector;
> +
> +               /* update q_vector Tx values */
> +               ixgbe_add_ring(ring, &q_vector->tx);

We might want to just add XDP queues as a separate ring container in
the q_vector.  Just a thought, though I am not sure what extra
complications it would add.

> +               /* apply Tx specific ring traits */
> +               ring->count = adapter->tx_ring_count;
> +               ring->queue_index = xdp_idx;
> +               set_ring_xdp(ring);
> +
> +               /* assign ring to adapter */
> +               adapter->xdp_ring[xdp_idx] = ring;
> +
> +               /* update count and index */
> +               xdp_count--;
> +               xdp_idx += v_count;
> +
> +               /* push pointer to next ring */
> +               ring++;
> +       }
> +
>         while (rxr_count) {
>                 /* assign generic ring traits */
>                 ring->dev = &adapter->pdev->dev;
> @@ -1002,17 +1061,18 @@ static int ixgbe_alloc_q_vectors(struct ixgbe_adapter *adapter)
>         int q_vectors = adapter->num_q_vectors;
>         int rxr_remaining = adapter->num_rx_queues;
>         int txr_remaining = adapter->num_tx_queues;
> -       int rxr_idx = 0, txr_idx = 0, v_idx = 0;
> +       int xdp_remaining = adapter->num_xdp_queues;
> +       int rxr_idx = 0, txr_idx = 0, xdp_idx = 0, v_idx = 0;
>         int err;
>
>         /* only one q_vector if MSI-X is disabled. */
>         if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED))
>                 q_vectors = 1;
>
> -       if (q_vectors >= (rxr_remaining + txr_remaining)) {
> +       if (q_vectors >= (rxr_remaining + txr_remaining + xdp_remaining)) {
>                 for (; rxr_remaining; v_idx++) {
>                         err = ixgbe_alloc_q_vector(adapter, q_vectors, v_idx,
> -                                                  0, 0, 1, rxr_idx);
> +                                                  0, 0, 0, 0, 1, rxr_idx);
>
>                         if (err)
>                                 goto err_out;
> @@ -1026,8 +1086,11 @@ static int ixgbe_alloc_q_vectors(struct ixgbe_adapter *adapter)
>         for (; v_idx < q_vectors; v_idx++) {
>                 int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
>                 int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
> +               int xqpv = DIV_ROUND_UP(xdp_remaining, q_vectors - v_idx);
> +
>                 err = ixgbe_alloc_q_vector(adapter, q_vectors, v_idx,
>                                            tqpv, txr_idx,
> +                                          xqpv, xdp_idx,
>                                            rqpv, rxr_idx);
>
>                 if (err)
> @@ -1036,14 +1099,17 @@ static int ixgbe_alloc_q_vectors(struct ixgbe_adapter *adapter)
>                 /* update counts and index */
>                 rxr_remaining -= rqpv;
>                 txr_remaining -= tqpv;
> +               xdp_remaining -= xqpv;
>                 rxr_idx++;
>                 txr_idx++;
> +               xdp_idx++;
>         }
>
>         return 0;
>
>  err_out:
>         adapter->num_tx_queues = 0;
> +       adapter->num_xdp_queues = 0;
>         adapter->num_rx_queues = 0;
>         adapter->num_q_vectors = 0;
>
> @@ -1066,6 +1132,7 @@ static void ixgbe_free_q_vectors(struct ixgbe_adapter *adapter)
>         int v_idx = adapter->num_q_vectors;
>
>         adapter->num_tx_queues = 0;
> +       adapter->num_xdp_queues = 0;
>         adapter->num_rx_queues = 0;
>         adapter->num_q_vectors = 0;
>
> @@ -1172,9 +1239,10 @@ int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter)
>
>         ixgbe_cache_ring_register(adapter);
>
> -       e_dev_info("Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u\n",
> +       e_dev_info("Multiqueue %s: Rx Queue count = %u, Tx Queue count = %u XDP Queue count = %u\n",
>                    (adapter->num_rx_queues > 1) ? "Enabled" : "Disabled",
> -                  adapter->num_rx_queues, adapter->num_tx_queues);
> +                  adapter->num_rx_queues, adapter->num_tx_queues,
> +                  adapter->num_xdp_queues);
>
>         set_bit(__IXGBE_DOWN, &adapter->state);
>
> @@ -1195,6 +1263,7 @@ int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter)
>  void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter *adapter)
>  {
>         adapter->num_tx_queues = 0;
> +       adapter->num_xdp_queues = 0;
>         adapter->num_rx_queues = 0;
>
>         ixgbe_free_q_vectors(adapter);
> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> index 0b802b5..e754fe0 100644
> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> @@ -594,6 +594,19 @@ static void ixgbe_regdump(struct ixgbe_hw *hw, struct ixgbe_reg_info *reginfo)
>
>  }
>
> +static void ixgbe_print_buffer(struct ixgbe_ring *ring, int n)
> +{
> +       struct ixgbe_tx_buffer *tx_buffer;
> +
> +       tx_buffer = &ring->tx_buffer_info[ring->next_to_clean];
> +       pr_info(" %5d %5X %5X %016llX %08X %p %016llX\n",
> +                  n, ring->next_to_use, ring->next_to_clean,
> +                  (u64)dma_unmap_addr(tx_buffer, dma),
> +                  dma_unmap_len(tx_buffer, len),
> +                  tx_buffer->next_to_watch,
> +                  (u64)tx_buffer->time_stamp);
> +}
> +
>  /*
>   * ixgbe_dump - Print registers, tx-rings and rx-rings
>   */
> @@ -603,7 +616,7 @@ static void ixgbe_dump(struct ixgbe_adapter *adapter)
>         struct ixgbe_hw *hw = &adapter->hw;
>         struct ixgbe_reg_info *reginfo;
>         int n = 0;
> -       struct ixgbe_ring *tx_ring;
> +       struct ixgbe_ring *ring;
>         struct ixgbe_tx_buffer *tx_buffer;
>         union ixgbe_adv_tx_desc *tx_desc;
>         struct my_u0 { u64 a; u64 b; } *u0;
> @@ -644,14 +657,13 @@ static void ixgbe_dump(struct ixgbe_adapter *adapter)
>                 "Queue [NTU] [NTC] [bi(ntc)->dma  ]",
>                 "leng", "ntw", "timestamp");
>         for (n = 0; n < adapter->num_tx_queues; n++) {
> -               tx_ring = adapter->tx_ring[n];
> -               tx_buffer = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
> -               pr_info(" %5d %5X %5X %016llX %08X %p %016llX\n",
> -                          n, tx_ring->next_to_use, tx_ring->next_to_clean,
> -                          (u64)dma_unmap_addr(tx_buffer, dma),
> -                          dma_unmap_len(tx_buffer, len),
> -                          tx_buffer->next_to_watch,
> -                          (u64)tx_buffer->time_stamp);
> +               ring = adapter->tx_ring[n];
> +               ixgbe_print_buffer(ring, n);
> +       }
> +
> +       for (n = 0; n < adapter->num_xdp_queues; n++) {
> +               ring = adapter->xdp_ring[n];
> +               ixgbe_print_buffer(ring, n);
>         }
>
>         /* Print TX Rings */
> @@ -696,28 +708,28 @@ static void ixgbe_dump(struct ixgbe_adapter *adapter)
>          */
>
>         for (n = 0; n < adapter->num_tx_queues; n++) {
> -               tx_ring = adapter->tx_ring[n];
> +               ring = adapter->tx_ring[n];
>                 pr_info("------------------------------------\n");
> -               pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
> +               pr_info("TX QUEUE INDEX = %d\n", ring->queue_index);
>                 pr_info("------------------------------------\n");
>                 pr_info("%s%s    %s              %s        %s          %s\n",
>                         "T [desc]     [address 63:0  ] ",
>                         "[PlPOIdStDDt Ln] [bi->dma       ] ",
>                         "leng", "ntw", "timestamp", "bi->skb");
>
> -               for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
> -                       tx_desc = IXGBE_TX_DESC(tx_ring, i);
> -                       tx_buffer = &tx_ring->tx_buffer_info[i];
> +               for (i = 0; ring->desc && (i < ring->count); i++) {
> +                       tx_desc = IXGBE_TX_DESC(ring, i);
> +                       tx_buffer = &ring->tx_buffer_info[i];
>                         u0 = (struct my_u0 *)tx_desc;
>                         if (dma_unmap_len(tx_buffer, len) > 0) {
>                                 const char *ring_desc;
>
> -                               if (i == tx_ring->next_to_use &&
> -                                   i == tx_ring->next_to_clean)
> +                               if (i == ring->next_to_use &&
> +                                   i == ring->next_to_clean)
>                                         ring_desc = " NTC/U";
> -                               else if (i == tx_ring->next_to_use)
> +                               else if (i == ring->next_to_use)
>                                         ring_desc = " NTU";
> -                               else if (i == tx_ring->next_to_clean)
> +                               else if (i == ring->next_to_clean)
>                                         ring_desc = " NTC";
>                                 else
>                                         ring_desc = "";
> @@ -987,6 +999,10 @@ static void ixgbe_update_xoff_rx_lfc(struct ixgbe_adapter *adapter)
>         for (i = 0; i < adapter->num_tx_queues; i++)
>                 clear_bit(__IXGBE_HANG_CHECK_ARMED,
>                           &adapter->tx_ring[i]->state);
> +
> +       for (i = 0; i < adapter->num_xdp_queues; i++)
> +               clear_bit(__IXGBE_HANG_CHECK_ARMED,
> +                         &adapter->xdp_ring[i]->state);
>  }
>
>  static void ixgbe_update_xoff_received(struct ixgbe_adapter *adapter)
> @@ -1031,6 +1047,14 @@ static void ixgbe_update_xoff_received(struct ixgbe_adapter *adapter)
>                 if (xoff[tc])
>                         clear_bit(__IXGBE_HANG_CHECK_ARMED, &tx_ring->state);
>         }
> +
> +       for (i = 0; i < adapter->num_xdp_queues; i++) {
> +               struct ixgbe_ring *xdp_ring = adapter->xdp_ring[i];
> +
> +               tc = xdp_ring->dcb_tc;
> +               if (xoff[tc])
> +                       clear_bit(__IXGBE_HANG_CHECK_ARMED, &xdp_ring->state);
> +       }
>  }
>
>  static u64 ixgbe_get_tx_completed(struct ixgbe_ring *ring)
> @@ -1182,7 +1206,10 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
>                 total_packets += tx_buffer->gso_segs;
>
>                 /* free the skb */
> -               napi_consume_skb(tx_buffer->skb, napi_budget);
> +               if (ring_is_xdp(tx_ring))
> +                       page_frag_free(tx_buffer->data);
> +               else
> +                       napi_consume_skb(tx_buffer->skb, napi_budget);
>
>                 /* unmap skb header data */
>                 dma_unmap_single(tx_ring->dev,
> @@ -1243,7 +1270,7 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
>         if (check_for_tx_hang(tx_ring) && ixgbe_check_tx_hang(tx_ring)) {
>                 /* schedule immediate reset if we believe we hung */
>                 struct ixgbe_hw *hw = &adapter->hw;
> -               e_err(drv, "Detected Tx Unit Hang\n"
> +               e_err(drv, "Detected Tx Unit Hang %s\n"
>                         "  Tx Queue             <%d>\n"
>                         "  TDH, TDT             <%x>, <%x>\n"
>                         "  next_to_use          <%x>\n"
> @@ -1251,13 +1278,16 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
>                         "tx_buffer_info[next_to_clean]\n"
>                         "  time_stamp           <%lx>\n"
>                         "  jiffies              <%lx>\n",
> +                       ring_is_xdp(tx_ring) ? "(XDP)" : "",
>                         tx_ring->queue_index,
>                         IXGBE_READ_REG(hw, IXGBE_TDH(tx_ring->reg_idx)),
>                         IXGBE_READ_REG(hw, IXGBE_TDT(tx_ring->reg_idx)),
>                         tx_ring->next_to_use, i,
>                         tx_ring->tx_buffer_info[i].time_stamp, jiffies);
>
> -               netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
> +               if (!ring_is_xdp(tx_ring))
> +                       netif_stop_subqueue(tx_ring->netdev,
> +                                           tx_ring->queue_index);
>
>                 e_info(probe,
>                        "tx hang %d detected on queue %d, resetting adapter\n",
> @@ -1270,6 +1300,9 @@ static bool ixgbe_clean_tx_irq(struct ixgbe_q_vector *q_vector,
>                 return true;
>         }
>
> +       if (ring_is_xdp(tx_ring))
> +               return !!budget;
> +
>         netdev_tx_completed_queue(txring_txq(tx_ring),
>                                   total_packets, total_bytes);
>
> @@ -2162,8 +2195,13 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
>
>  #define IXGBE_XDP_PASS 0
>  #define IXGBE_XDP_CONSUMED 1
> +#define IXGBE_XDP_TX 2
>
> -static int ixgbe_run_xdp(struct ixgbe_ring  *rx_ring,
> +static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
> +                              struct xdp_buff *xdp);
> +
> +static int ixgbe_run_xdp(struct ixgbe_adapter *adapter,
> +                        struct ixgbe_ring  *rx_ring,
>                          struct ixgbe_rx_buffer *rx_buffer,
>                          unsigned int size)
>  {
> @@ -2187,10 +2225,16 @@ static int ixgbe_run_xdp(struct ixgbe_ring  *rx_ring,
>         act = bpf_prog_run_xdp(xdp_prog, &xdp);
>         switch (act) {
>         case XDP_PASS:
> -               goto xdp_out;
> +               break;
> +       case XDP_TX:
> +               result = ixgbe_xmit_xdp_ring(adapter, &xdp);
> +               if (result == IXGBE_XDP_TX)
> +                       break;
> +               else
> +                       rx_buffer->pagecnt_bias++; /* give page back */
> +               break;
>         default:
>                 bpf_warn_invalid_xdp_action(act);
> -       case XDP_TX:
>         case XDP_ABORTED:
>                 trace_xdp_exception(rx_ring->netdev, xdp_prog, act);
>                 /* fallthrough -- handle aborts by dropping packet */
> @@ -2222,8 +2266,8 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
>                                const int budget)
>  {
>         unsigned int total_rx_bytes = 0, total_rx_packets = 0;
> -#ifdef IXGBE_FCOE
>         struct ixgbe_adapter *adapter = q_vector->adapter;
> +#ifdef IXGBE_FCOE
>         int ddp_bytes;
>         unsigned int mss = 0;
>  #endif /* IXGBE_FCOE */
> @@ -2255,8 +2299,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
>
>                 rx_buffer = ixgbe_get_rx_buffer(rx_ring, rx_desc, &skb, size);
>
> -               consumed = ixgbe_run_xdp(rx_ring, rx_buffer, size);
> -               rcu_read_unlock();
> +               consumed = ixgbe_run_xdp(adapter, rx_ring, rx_buffer, size);

Okay so this is where you fixed the rcu_read_unlock that you leaked from v1.

>
>                 if (consumed) {
>                         ixgbe_put_rx_buffer(rx_ring, rx_buffer, skb);

So if you end up going with my suggestions to patch 1/3 you would need
to make a slight tweak in the if (IS_ERR(skb)) path.

If you are transmitting the buffer you need to update the page_offset
by the truesize of the buffer, otherwise you can update pagecnt_bias.
So something along the lines of:
    if (PTR_ERR(skb) == IXGBE_XDP_TX) {
#if (PAGE_SIZE < 8192)
        rx_buffer->page_offset ^= ixgbe_rx_pg_size(rx_ring) / 2;
#else
        rx_buffer->page_offset += ring_uses_build_skb(rx_ring) ?
                                          SKB_DATA_ALIGN(IXGBE_SKB_PAD + size) :
                                          SKB_DATA_ALIGN(size);
#endif
    } else {
        rx_buffer->pagecnt_bias++;
    }

> @@ -2921,6 +2964,7 @@ static irqreturn_t ixgbe_msix_other(int irq, void *data)
>                                                        &ring->state))
>                                         reinit_count++;
>                         }
> +
>                         if (reinit_count) {
>                                 /* no more flow director interrupts until after init */
>                                 IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_EIMC_FLOW_DIR);

White space noise.

> @@ -3436,6 +3480,8 @@ static void ixgbe_configure_tx(struct ixgbe_adapter *adapter)
>         /* Setup the HW Tx Head and Tail descriptor pointers */
>         for (i = 0; i < adapter->num_tx_queues; i++)
>                 ixgbe_configure_tx_ring(adapter, adapter->tx_ring[i]);
> +       for (i = 0; i < adapter->num_xdp_queues; i++)
> +               ixgbe_configure_tx_ring(adapter, adapter->xdp_ring[i]);
>  }
>
>  static void ixgbe_enable_rx_drop(struct ixgbe_adapter *adapter,
> @@ -5605,7 +5651,8 @@ static void ixgbe_clean_tx_ring(struct ixgbe_ring *tx_ring)
>         }
>
>         /* reset BQL for queue */
> -       netdev_tx_reset_queue(txring_txq(tx_ring));
> +       if (!ring_is_xdp(tx_ring))
> +               netdev_tx_reset_queue(txring_txq(tx_ring));
>
>         /* reset next_to_use and next_to_clean */
>         tx_ring->next_to_use = 0;
> @@ -5634,6 +5681,8 @@ static void ixgbe_clean_all_tx_rings(struct ixgbe_adapter *adapter)
>
>         for (i = 0; i < adapter->num_tx_queues; i++)
>                 ixgbe_clean_tx_ring(adapter->tx_ring[i]);
> +       for (i = 0; i < adapter->num_xdp_queues; i++)
> +               ixgbe_clean_tx_ring(adapter->xdp_ring[i]);
>  }
>
>  static void ixgbe_fdir_filter_exit(struct ixgbe_adapter *adapter)
> @@ -5728,6 +5777,11 @@ void ixgbe_down(struct ixgbe_adapter *adapter)
>                 u8 reg_idx = adapter->tx_ring[i]->reg_idx;
>                 IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), IXGBE_TXDCTL_SWFLSH);
>         }
> +       for (i = 0; i < adapter->num_xdp_queues; i++) {
> +               u8 reg_idx = adapter->xdp_ring[i]->reg_idx;
> +
> +               IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), IXGBE_TXDCTL_SWFLSH);
> +       }
>
>         /* Disable the Tx DMA engine on 82599 and later MAC */
>         switch (hw->mac.type) {
> @@ -6096,7 +6150,7 @@ int ixgbe_setup_tx_resources(struct ixgbe_ring *tx_ring)
>   **/
>  static int ixgbe_setup_all_tx_resources(struct ixgbe_adapter *adapter)
>  {
> -       int i, err = 0;
> +       int i, j = 0, err = 0;
>
>         for (i = 0; i < adapter->num_tx_queues; i++) {
>                 err = ixgbe_setup_tx_resources(adapter->tx_ring[i]);
> @@ -6106,10 +6160,21 @@ static int ixgbe_setup_all_tx_resources(struct ixgbe_adapter *adapter)
>                 e_err(probe, "Allocation for Tx Queue %u failed\n", i);
>                 goto err_setup_tx;
>         }
> +       for (j = 0; j < adapter->num_xdp_queues; j++) {
> +               err = ixgbe_setup_tx_resources(adapter->xdp_ring[j]);
> +               if (!err)
> +                       continue;
> +
> +               e_err(probe, "Allocation for Tx Queue %u failed\n", j);
> +               goto err_setup_tx;
> +       }
> +
>
>         return 0;
>  err_setup_tx:
>         /* rewind the index freeing the rings as we go */
> +       while (j--)
> +               ixgbe_free_tx_resources(adapter->xdp_ring[j]);
>         while (i--)
>                 ixgbe_free_tx_resources(adapter->tx_ring[i]);
>         return err;
> @@ -6241,6 +6306,9 @@ static void ixgbe_free_all_tx_resources(struct ixgbe_adapter *adapter)
>         for (i = 0; i < adapter->num_tx_queues; i++)
>                 if (adapter->tx_ring[i]->desc)
>                         ixgbe_free_tx_resources(adapter->tx_ring[i]);
> +       for (i = 0; i < adapter->num_xdp_queues; i++)
> +               if (adapter->xdp_ring[i]->desc)
> +                       ixgbe_free_tx_resources(adapter->xdp_ring[i]);
>  }
>
>  /**
> @@ -6660,6 +6728,14 @@ void ixgbe_update_stats(struct ixgbe_adapter *adapter)
>                 bytes += tx_ring->stats.bytes;
>                 packets += tx_ring->stats.packets;
>         }
> +       for (i = 0; i < adapter->num_xdp_queues; i++) {
> +               struct ixgbe_ring *xdp_ring = adapter->xdp_ring[i];
> +
> +               restart_queue += xdp_ring->tx_stats.restart_queue;
> +               tx_busy += xdp_ring->tx_stats.tx_busy;
> +               bytes += xdp_ring->stats.bytes;
> +               packets += xdp_ring->stats.packets;
> +       }
>         adapter->restart_queue = restart_queue;
>         adapter->tx_busy = tx_busy;
>         netdev->stats.tx_bytes = bytes;
> @@ -6853,6 +6929,9 @@ static void ixgbe_fdir_reinit_subtask(struct ixgbe_adapter *adapter)
>                 for (i = 0; i < adapter->num_tx_queues; i++)
>                         set_bit(__IXGBE_TX_FDIR_INIT_DONE,
>                                 &(adapter->tx_ring[i]->state));
> +               for (i = 0; i < adapter->num_xdp_queues; i++)
> +                       set_bit(__IXGBE_TX_FDIR_INIT_DONE,
> +                               &adapter->xdp_ring[i]->state);
>                 /* re-enable flow director interrupts */
>                 IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMS_FLOW_DIR);
>         } else {
> @@ -6886,6 +6965,8 @@ static void ixgbe_check_hang_subtask(struct ixgbe_adapter *adapter)
>         if (netif_carrier_ok(adapter->netdev)) {
>                 for (i = 0; i < adapter->num_tx_queues; i++)
>                         set_check_for_tx_hang(adapter->tx_ring[i]);
> +               for (i = 0; i < adapter->num_xdp_queues; i++)
> +                       set_check_for_tx_hang(adapter->xdp_ring[i]);
>         }
>
>         if (!(adapter->flags & IXGBE_FLAG_MSIX_ENABLED)) {
> @@ -7116,6 +7197,13 @@ static bool ixgbe_ring_tx_pending(struct ixgbe_adapter *adapter)
>                         return true;
>         }
>
> +       for (i = 0; i < adapter->num_xdp_queues; i++) {
> +               struct ixgbe_ring *ring = adapter->xdp_ring[i];
> +
> +               if (ring->next_to_use != ring->next_to_clean)
> +                       return true;
> +       }
> +
>         return false;
>  }
>
> @@ -8076,6 +8164,85 @@ static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb,
>  #endif
>  }
>
> +static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
> +                              struct xdp_buff *xdp)
> +{
> +       struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()];
> +       struct ixgbe_tx_buffer *tx_buffer;
> +       union ixgbe_adv_tx_desc *tx_desc;
> +       u32 len, tx_flags = 0;
> +       dma_addr_t dma;
> +       u16 count, i;
> +       u32 cmd_type;
> +
> +       len = xdp->data_end - xdp->data;
> +       count = TXD_USE_COUNT(len);
> +
> +       /* record the location of the first descriptor for this packet */
> +       tx_buffer = &ring->tx_buffer_info[ring->next_to_use];
> +       tx_buffer->skb = NULL;
> +       tx_buffer->bytecount = len;
> +       tx_buffer->gso_segs = 1;
> +       tx_buffer->protocol = 0;
> +
> +       tx_buffer->tx_flags = tx_flags;
> +       i = ring->next_to_use;
> +       tx_desc = IXGBE_TX_DESC(ring, i);
> +
> +       dma = dma_map_single(ring->dev, xdp->data, len, DMA_TO_DEVICE);
> +       if (dma_mapping_error(ring->dev, dma))
> +               goto dma_error;
> +
> +       dma_unmap_len_set(tx_buffer, len, len);
> +       dma_unmap_addr_set(tx_buffer, dma, dma);
> +       tx_buffer->data = xdp->data;
> +       tx_desc->read.buffer_addr = cpu_to_le64(dma);
> +
> +       /* put descriptor type bits */
> +       cmd_type = IXGBE_ADVTXD_DTYP_DATA |
> +                  IXGBE_ADVTXD_DCMD_DEXT |
> +                  IXGBE_ADVTXD_DCMD_IFCS;
> +       cmd_type |= len | IXGBE_TXD_CMD;
> +       tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
> +       tx_desc->read.olinfo_status =
> +               cpu_to_le32(len << IXGBE_ADVTXD_PAYLEN_SHIFT);
> +
> +#ifdef CONFIG_PCI_IOV
> +       if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) {
> +               tx_desc->read.olinfo_status |= IXGBE_ADVTXD_CC;
> +               ixgbe_tx_ctxtdesc(tx_ring, ETH_HLEN, 0, 0, 0);
> +       }
> +#endif
> +
> +       /* Force memory writes to complete before letting h/w know there
> +        * are new descriptors to fetch.  (Only applicable for weak-ordered
> +        * memory model archs, such as IA-64).
> +        *
> +        * We also need this memory barrier to make certain all of the
> +        * status bits have been updated before next_to_watch is written.
> +        */
> +       wmb();
> +
> +       /* set next_to_watch value indicating a packet is present */
> +       i++;
> +       if (i == ring->count)
> +               i = 0;
> +
> +       tx_buffer->next_to_watch = tx_desc;
> +       ring->next_to_use = i;
> +
> +       writel(i, ring->tail);
> +
> +       /* we need this if more than one processor can write to our tail
> +        * at a time, it synchronizes IO on IA64/Altix systems
> +        */
> +       mmiowb();
> +       return IXGBE_XDP_TX;
> +dma_error:
> +       ring->next_to_use = i;
> +       return IXGBE_XDP_CONSUMED;
> +}
> +
>  netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
>                           struct ixgbe_adapter *adapter,
>                           struct ixgbe_ring *tx_ring)
> @@ -8367,6 +8534,23 @@ static void ixgbe_netpoll(struct net_device *netdev)
>
>  #endif
>
> +static void ixgbe_get_ring_stats64(struct rtnl_link_stats64 *stats,
> +                                  struct ixgbe_ring *ring)
> +{
> +       u64 bytes, packets;
> +       unsigned int start;
> +
> +       if (ring) {
> +               do {
> +                       start = u64_stats_fetch_begin_irq(&ring->syncp);
> +                       packets = ring->stats.packets;
> +                       bytes   = ring->stats.bytes;
> +               } while (u64_stats_fetch_retry_irq(&ring->syncp, start));
> +               stats->tx_packets += packets;
> +               stats->tx_bytes   += bytes;
> +       }
> +}
> +
>  static void ixgbe_get_stats64(struct net_device *netdev,
>                               struct rtnl_link_stats64 *stats)
>  {
> @@ -8392,18 +8576,13 @@ static void ixgbe_get_stats64(struct net_device *netdev,
>
>         for (i = 0; i < adapter->num_tx_queues; i++) {
>                 struct ixgbe_ring *ring = ACCESS_ONCE(adapter->tx_ring[i]);
> -               u64 bytes, packets;
> -               unsigned int start;
>
> -               if (ring) {
> -                       do {
> -                               start = u64_stats_fetch_begin_irq(&ring->syncp);
> -                               packets = ring->stats.packets;
> -                               bytes   = ring->stats.bytes;
> -                       } while (u64_stats_fetch_retry_irq(&ring->syncp, start));
> -                       stats->tx_packets += packets;
> -                       stats->tx_bytes   += bytes;
> -               }
> +               ixgbe_get_ring_stats64(stats, ring);
> +       }
> +       for (i = 0; i < adapter->num_xdp_queues; i++) {
> +               struct ixgbe_ring *ring = ACCESS_ONCE(adapter->xdp_ring[i]);
> +
> +               ixgbe_get_ring_stats64(stats, ring);
>         }
>         rcu_read_unlock();
>
> @@ -9524,7 +9703,13 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
>  {
>         int i, frame_size = dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
>         struct ixgbe_adapter *adapter = netdev_priv(dev);
> -       struct bpf_prog *old_adapter_prog;
> +       struct bpf_prog *old_prog;
> +
> +       if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED)
> +               return -EINVAL;
> +
> +       if (adapter->flags & IXGBE_FLAG_DCB_ENABLED)
> +               return -EINVAL;
>
>         /* verify ixgbe ring attributes are sufficient for XDP */
>         for (i = 0; i < adapter->num_rx_queues; i++) {
> @@ -9537,12 +9722,26 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog)
>                         return -EINVAL;
>         }
>
> -       old_adapter_prog = xchg(&adapter->xdp_prog, prog);
> +       if (nr_cpu_ids > MAX_XDP_QUEUES)
> +               return -ENOMEM;
> +
> +       old_prog = xchg(&adapter->xdp_prog, prog);
> +
> +       /* If transitioning XDP modes reconfigure rings */
> +       if (!!adapter->xdp_prog != !!old_prog) {
> +               int err = ixgbe_setup_tc(dev, netdev_get_num_tc(dev));
> +
> +               if (err) {
> +                       rcu_assign_pointer(adapter->xdp_prog, old_prog);
> +                       return -EINVAL;
> +               }
> +       }
> +
>         for (i = 0; i < adapter->num_rx_queues; i++)
>                 xchg(&adapter->rx_ring[i]->xdp_prog, adapter->xdp_prog);
>
> -       if (old_adapter_prog)
> -               bpf_prog_put(old_adapter_prog);
> +       if (old_prog)
> +               bpf_prog_put(old_prog);
>
>         return 0;
>  }
> @@ -10048,6 +10247,9 @@ static int ixgbe_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
>         for (i = 0; i < adapter->num_tx_queues; i++)
>                 u64_stats_init(&adapter->tx_ring[i]->syncp);
>
> +       for (i = 0; i < adapter->num_xdp_queues; i++)
> +               u64_stats_init(&adapter->xdp_ring[i]->syncp);
> +
>         /* WOL not supported for all devices */
>         adapter->wol = 0;
>         hw->eeprom.ops.read(hw, 0x2c, &adapter->eeprom_cap);
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Intel-wired-lan] [net-next PATCH v2 3/3] ixgbe: xdp support for adjust head
  2017-03-02  0:55 ` [Intel-wired-lan] [net-next PATCH v2 3/3] ixgbe: xdp support for adjust head John Fastabend
@ 2017-03-02 19:43   ` Alexander Duyck
  2017-03-03 17:07     ` John Fastabend
  0 siblings, 1 reply; 9+ messages in thread
From: Alexander Duyck @ 2017-03-02 19:43 UTC (permalink / raw)
  To: intel-wired-lan

On Wed, Mar 1, 2017 at 4:55 PM, John Fastabend <john.fastabend@gmail.com> wrote:
> Add adjust_head support for XDP however at the moment we are only
> adding IXGBE_SKB_PAD bytes of headroom to align with driver paths.
>
> The infrastructure is is such that a follow on patch can extend
> headroom up to 196B without changing RX path.

I still owe you the 192B headroom patch.  I will hopefully get to it today.

> Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
> ---
>  drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |   38 +++++++++++++++++--------
>  1 file changed, 26 insertions(+), 12 deletions(-)
>
> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> index e754fe0..c8bf64e 100644
> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
> @@ -2026,6 +2026,7 @@ static bool ixgbe_can_reuse_rx_page(struct ixgbe_rx_buffer *rx_buffer)
>  static void ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring,
>                               struct ixgbe_rx_buffer *rx_buffer,
>                               struct sk_buff *skb,
> +                             unsigned int headroom,
>                               unsigned int size)
>  {
>  #if (PAGE_SIZE < 8192)

You shouldn't need any changes to ixgbe_add_rx_frag since there is no
headroom here as this would be the second buffer in a scatter-gather
list.

> @@ -2036,7 +2037,8 @@ static void ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring,
>                                 SKB_DATA_ALIGN(size);
>  #endif
>         skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
> -                       rx_buffer->page_offset, size, truesize);
> +                       rx_buffer->page_offset - (IXGBE_SKB_PAD - headroom),
> +                       size, truesize);
>  #if (PAGE_SIZE < 8192)
>         rx_buffer->page_offset ^= truesize;
>  #else

So as per comment above this could be dropped.

> @@ -2109,6 +2111,7 @@ static void ixgbe_put_rx_buffer(struct ixgbe_ring *rx_ring,
>  static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
>                                            struct ixgbe_rx_buffer *rx_buffer,
>                                            union ixgbe_adv_rx_desc *rx_desc,
> +                                          unsigned int headroom,
>                                            unsigned int size)
>  {
>         void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;

I was thinking. Instead of adding headroom why don't we just replace
rx_buffer for construct and build skb with the xdp_buf structure
pointer.  Then you just allocate it in ixgbe_clean_rx_irq, populate it
in ixgbe_run_xdp, and could pull the needed bits into
ixgbe_construct_skb since we are already having to convert the page
offsets and such anyway.

> @@ -2117,6 +2120,7 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
>  #else
>         unsigned int truesize = SKB_DATA_ALIGN(size);
>  #endif
> +       unsigned int off_page;
>         struct sk_buff *skb;
>
>         /* prefetch first cache line of first page */
> @@ -2130,12 +2134,14 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
>         if (unlikely(!skb))
>                 return NULL;
>
> +       off_page = IXGBE_SKB_PAD - headroom;
> +
>         if (size > IXGBE_RX_HDR_SIZE) {
>                 if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))
>                         IXGBE_CB(skb)->dma = rx_buffer->dma;
>
>                 skb_add_rx_frag(skb, 0, rx_buffer->page,
> -                               rx_buffer->page_offset,
> +                               rx_buffer->page_offset - off_page,
>                                 size, truesize);
>  #if (PAGE_SIZE < 8192)
>                 rx_buffer->page_offset ^= truesize;

As far as computing the bits needed for skb_add_rx_frag I think you
can refer to the code used in igb for how to handle that since I think
I was having to get the page address from the page for that anyway.

> @@ -2143,7 +2149,8 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
>                 rx_buffer->page_offset += truesize;
>  #endif
>         } else {
> -               memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
> +               memcpy(__skb_put(skb, size), va - off_page,
> +                      ALIGN(size, sizeof(long)));
>                 rx_buffer->pagecnt_bias++;
>         }
>
> @@ -2153,6 +2160,7 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
>  static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
>                                        struct ixgbe_rx_buffer *rx_buffer,
>                                        union ixgbe_adv_rx_desc *rx_desc,
> +                                      unsigned int headroom,
>                                        unsigned int size)
>  {
>         void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
> @@ -2176,7 +2184,7 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
>                 return NULL;
>
>         /* update pointers within the skb to store the data */
> -       skb_reserve(skb, IXGBE_SKB_PAD);
> +       skb_reserve(skb, headroom);
>         __skb_put(skb, size);
>
>         /* record DMA address if this is the start of a chain of buffers */
> @@ -2203,7 +2211,8 @@ static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
>  static int ixgbe_run_xdp(struct ixgbe_adapter *adapter,
>                          struct ixgbe_ring  *rx_ring,
>                          struct ixgbe_rx_buffer *rx_buffer,
> -                        unsigned int size)
> +                        unsigned int *headroom,
> +                        unsigned int *size)
>  {
>         int result = IXGBE_XDP_PASS;
>         struct bpf_prog *xdp_prog;
> @@ -2218,14 +2227,16 @@ static int ixgbe_run_xdp(struct ixgbe_adapter *adapter,
>                 goto xdp_out;
>
>         addr = page_address(rx_buffer->page) + rx_buffer->page_offset;
> -       xdp.data_hard_start = addr;
> +       xdp.data_hard_start = addr - *headroom;
>         xdp.data = addr;
> -       xdp.data_end = addr + size;
> +       xdp.data_end = addr + *size;
>
>         act = bpf_prog_run_xdp(xdp_prog, &xdp);
>         switch (act) {
>         case XDP_PASS:
> -               break;
> +               *headroom = xdp.data - xdp.data_hard_start;
> +               *size = xdp.data_end - xdp.data;
> +               return IXGBE_XDP_PASS;
>         case XDP_TX:
>                 result = ixgbe_xmit_xdp_ring(adapter, &xdp);
>                 if (result == IXGBE_XDP_TX)
> @@ -2274,6 +2285,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
>         u16 cleaned_count = ixgbe_desc_unused(rx_ring);
>
>         while (likely(total_rx_packets < budget)) {
> +               unsigned int headroom = ixgbe_rx_offset(rx_ring);
>                 union ixgbe_adv_rx_desc *rx_desc;
>                 struct ixgbe_rx_buffer *rx_buffer;
>                 struct sk_buff *skb;
> @@ -2299,7 +2311,8 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
>
>                 rx_buffer = ixgbe_get_rx_buffer(rx_ring, rx_desc, &skb, size);
>
> -               consumed = ixgbe_run_xdp(adapter, rx_ring, rx_buffer, size);
> +               consumed = ixgbe_run_xdp(adapter, rx_ring, rx_buffer,
> +                                        &headroom, &size);
>
>                 if (consumed) {
>                         ixgbe_put_rx_buffer(rx_ring, rx_buffer, skb);

Just looking this over I think passing xdp_buf instead of rx_buffer to
the functions below would likely be a much better way to go.

> @@ -2312,13 +2325,14 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
>
>                 /* retrieve a buffer from the ring */
>                 if (skb)
> -                       ixgbe_add_rx_frag(rx_ring, rx_buffer, skb, size);
> +                       ixgbe_add_rx_frag(rx_ring, rx_buffer, skb,
> +                                         headroom, size);
>                 else if (ring_uses_build_skb(rx_ring))
>                         skb = ixgbe_build_skb(rx_ring, rx_buffer,
> -                                             rx_desc, size);
> +                                             rx_desc, headroom, size);
>                 else
>                         skb = ixgbe_construct_skb(rx_ring, rx_buffer,
> -                                                 rx_desc, size);
> +                                                 rx_desc, headroom, size);
>
>                 /* exit if we failed to retrieve a buffer */
>                 if (!skb) {
>

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Intel-wired-lan] [net-next PATCH v2 1/3] ixgbe: add XDP support for pass and drop actions
  2017-03-02 16:22 ` [Intel-wired-lan] [net-next PATCH v2 1/3] ixgbe: add XDP support for pass and drop actions Alexander Duyck
@ 2017-03-03 16:23   ` John Fastabend
  0 siblings, 0 replies; 9+ messages in thread
From: John Fastabend @ 2017-03-03 16:23 UTC (permalink / raw)
  To: intel-wired-lan

On 17-03-02 08:22 AM, Alexander Duyck wrote:
> On Wed, Mar 1, 2017 at 4:54 PM, John Fastabend <john.fastabend@gmail.com> wrote:
>> Basic XDP drop support for ixgbe. Uses READ_ONCE/xchg semantics on XDP
>> programs instead of rcu primitives as suggested by Daniel Borkmann and
>> Alex Duyck.
>>
>> Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
> 
> So it still looks like this is doing a bunch of hacking in the Rx
> path.  I suggested a few items below to streamline this.
> 
>> ---
>>  drivers/net/ethernet/intel/ixgbe/ixgbe.h      |    2
>>  drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |  114 ++++++++++++++++++++++++-
>>  2 files changed, 113 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
>> index b812913..2d12c24 100644
>> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
>> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
>> @@ -273,6 +273,7 @@ struct ixgbe_ring {
>>         struct ixgbe_ring *next;        /* pointer to next ring in q_vector */
>>         struct ixgbe_q_vector *q_vector; /* backpointer to host q_vector */
>>         struct net_device *netdev;      /* netdev ring belongs to */
>> +       struct bpf_prog *xdp_prog;
>>         struct device *dev;             /* device for DMA mapping */
>>         struct ixgbe_fwd_adapter *l2_accel_priv;
>>         void *desc;                     /* descriptor ring memory */
>> @@ -510,6 +511,7 @@ struct ixgbe_adapter {
>>         unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
>>         /* OS defined structs */
>>         struct net_device *netdev;
>> +       struct bpf_prog *xdp_prog;
>>         struct pci_dev *pdev;
>>
>>         unsigned long state;
>> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
>> index e3da397..0b802b5 100644
>> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
>> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
>> @@ -49,6 +49,9 @@
>>  #include <linux/if_macvlan.h>
>>  #include <linux/if_bridge.h>
>>  #include <linux/prefetch.h>
>> +#include <linux/bpf.h>
>> +#include <linux/bpf_trace.h>
>> +#include <linux/atomic.h>
>>  #include <scsi/fc/fc_fcoe.h>
>>  #include <net/udp_tunnel.h>
>>  #include <net/pkt_cls.h>
>> @@ -2051,7 +2054,7 @@ static void ixgbe_put_rx_buffer(struct ixgbe_ring *rx_ring,
>>                 /* hand second half of page back to the ring */
>>                 ixgbe_reuse_rx_page(rx_ring, rx_buffer);
>>         } else {
>> -               if (IXGBE_CB(skb)->dma == rx_buffer->dma) {
>> +               if (skb && IXGBE_CB(skb)->dma == rx_buffer->dma) {
>>                         /* the page has been released from the ring */
>>                         IXGBE_CB(skb)->page_released = true;
>>                 } else {
> 
> So you can probably change this to "if (!IS_ERR(skb) &&
> IXGBE_CB(skb)->dma == rx_buffer->dma) {" more on why below.
> 

OK I will go with this. I think using 'int consumed' is a bit easier to read for
folks not overly familiar with the driver, but I get the point it removes some
special cases.

should have a v3 shortly.

[...]

>> @@ -2207,6 +2255,18 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
>>
>>                 rx_buffer = ixgbe_get_rx_buffer(rx_ring, rx_desc, &skb, size);
>>
>> +               consumed = ixgbe_run_xdp(rx_ring, rx_buffer, size);
>> +               rcu_read_unlock();
>> +
> 
> Did you leave an rcu_read_unlock here?  I'm assuming you meant to drop this.
> 

ugh patch sequence error its fixed in v2 as you note.

>> +               if (consumed) {
>> +                       ixgbe_put_rx_buffer(rx_ring, rx_buffer, skb);
>> +                       cleaned_count++;
>> +                       ixgbe_is_non_eop(rx_ring, rx_desc, skb);
>> +                       total_rx_packets++;
>> +                       total_rx_bytes += size;
>> +                       continue;
>> +               }
>> +
> 
> Looks like this wasn't adding one back to the pagecnt_bias.  That
> would trigger a memory leak.
> 

It is done in the ixgbe_run_xdp path. Perhaps its better to do here.

[...]

>>                         ixgbe_add_rx_frag(rx_ring, rx_buffer, skb, size);
>> @@ -6121,9 +6181,13 @@ static int ixgbe_setup_all_rx_resources(struct ixgbe_adapter *adapter)
>>         int i, err = 0;
>>
>>         for (i = 0; i < adapter->num_rx_queues; i++) {
>> -               err = ixgbe_setup_rx_resources(adapter->rx_ring[i]);
>> -               if (!err)
>> +               struct ixgbe_ring *rx_ring = adapter->rx_ring[i];
>> +
>> +               err = ixgbe_setup_rx_resources(rx_ring);
>> +               if (!err) {
>> +                       xchg(&rx_ring->xdp_prog, adapter->xdp_prog);
>>                         continue;
>> +               }
>>

Sure done.

>>                 e_err(probe, "Allocation for Rx Queue %u failed\n", i);
>>                 goto err_setup_rx;
> 
> I still think it makes more sense to just place this in
> ixgbe_setup_rx_resources.  No point in putting here as it just adds
> more complication.
> 
> If I am not mistaken all you would have to add is the xchg line and
> that would be it.
> 
>> @@ -6191,6 +6255,7 @@ void ixgbe_free_rx_resources(struct ixgbe_ring *rx_ring)
>>
>>         vfree(rx_ring->rx_buffer_info);
>>         rx_ring->rx_buffer_info = NULL;
>> +       xchg(&rx_ring->xdp_prog, NULL);
>>
>>         /* if not set, then don't free */
>>         if (!rx_ring->desc)
> 
> Just for the sake of symmetry I would suggest placing the xchg in
> front of the vfree.  Also this code being here makes a stronger
> argument for us setting up xdp in setup_rx_resources.
> 

done.


Thanks,
John


^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Intel-wired-lan] [net-next PATCH v2 2/3] ixgbe: add support for XDP_TX action
  2017-03-02 16:39   ` Alexander Duyck
@ 2017-03-03 16:40     ` John Fastabend
  0 siblings, 0 replies; 9+ messages in thread
From: John Fastabend @ 2017-03-03 16:40 UTC (permalink / raw)
  To: intel-wired-lan

On 17-03-02 08:39 AM, Alexander Duyck wrote:
> On Wed, Mar 1, 2017 at 4:54 PM, John Fastabend <john.fastabend@gmail.com> wrote:
>> Add support for XDP_TX action.
>>
>> A couple design choices were made here. First I use a new ring
>> pointer structure xdp_ring[] in the adapter struct instead of
>> pushing the newly allocated xdp TX rings into the tx_ring[]
>> structure. This means we have to duplicate loops around rings
>> in places we want to initialize both TX rings and XDP rings.
>> But by making it explicit it is obvious when we are using XDP
>> rings and when we are using TX rings. Further we don't have
>> to do ring arithmatic which is error prone. As a proof point
>> for doing this my first patches used only a single ring structure
>> and introduced bugs in FCoE code and macvlan code paths.
>>
>> Second I am aware this is not the most optimized version of
>> this code possible. I want to get baseline support in using
>> the most readable format possible and then once this series
>> is included I will optimize the TX path in another series
>> of patches.
>>
>> Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
>> ---

[...]

>>
>> +               for (j = 0; j < adapter->num_xdp_queues; i++, j++) {
>> +                       memcpy(&temp_ring[i], adapter->xdp_ring[j],
>> +                              sizeof(struct ixgbe_ring));
>> +
>> +                       temp_ring[i].count = new_tx_count;
>> +                       err = ixgbe_setup_tx_resources(&temp_ring[i]);
>> +                       if (err) {
>> +                               while (i) {
>> +                                       i--;
>> +                                       ixgbe_free_tx_resources(&temp_ring[i]);
>> +                               }
>> +                               goto err_setup;
>> +                       }
>> +               }
>> +
> 
> It looks like this is broken.  I would say just get rid of j and just
> use i for all of this like we did for the Tx and Rx rings.  I don't
> think you need J anymore since you aren't really playing with an
> offset anyway.
> 

Yep fixed up the counter usage throughout. This was a holdout from using the
single tx_ring[] array.

[...]

>>         struct ixgbe_q_vector *q_vector;
>> @@ -909,6 +941,33 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter,
>>                 ring++;
>>         }
>>
>> +       while (xdp_count) {
>> +               /* assign generic ring traits */
>> +               ring->dev = &adapter->pdev->dev;
>> +               ring->netdev = adapter->netdev;
>> +
>> +               /* configure backlink on ring */
>> +               ring->q_vector = q_vector;
>> +
>> +               /* update q_vector Tx values */
>> +               ixgbe_add_ring(ring, &q_vector->tx);
> 
> We might want to just add XDP queues as a separate ring container in
> the q_vector.  Just a thought, though I am not sure what extra
> complications it would add.
> 

I'll keep it as is for now.

[...]

>> @@ -2255,8 +2299,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
>>
>>                 rx_buffer = ixgbe_get_rx_buffer(rx_ring, rx_desc, &skb, size);
>>
>> -               consumed = ixgbe_run_xdp(rx_ring, rx_buffer, size);
>> -               rcu_read_unlock();
>> +               consumed = ixgbe_run_xdp(adapter, rx_ring, rx_buffer, size);
> 
> Okay so this is where you fixed the rcu_read_unlock that you leaked from v1.
> 
>>
>>                 if (consumed) {
>>                         ixgbe_put_rx_buffer(rx_ring, rx_buffer, skb);
> 
> So if you end up going with my suggestions to patch 1/3 you would need
> to make a slight tweak in the if (IS_ERR(skb)) path.
> 
> If you are transmitting the buffer you need to update the page_offset
> by the truesize of the buffer, otherwise you can update pagecnt_bias.
> So something along the lines of:
>     if (PTR_ERR(skb) == IXGBE_XDP_TX) {
> #if (PAGE_SIZE < 8192)
>         rx_buffer->page_offset ^= ixgbe_rx_pg_size(rx_ring) / 2;
> #else
>         rx_buffer->page_offset += ring_uses_build_skb(rx_ring) ?
>                                           SKB_DATA_ALIGN(IXGBE_SKB_PAD + size) :
>                                           SKB_DATA_ALIGN(size);
> #endif
>     } else {
>         rx_buffer->pagecnt_bias++;
>     }
> 

Or just do it in ixgbe_run_xdp directly.


Thanks,
John

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Intel-wired-lan] [net-next PATCH v2 3/3] ixgbe: xdp support for adjust head
  2017-03-02 19:43   ` Alexander Duyck
@ 2017-03-03 17:07     ` John Fastabend
  0 siblings, 0 replies; 9+ messages in thread
From: John Fastabend @ 2017-03-03 17:07 UTC (permalink / raw)
  To: intel-wired-lan

On 17-03-02 11:43 AM, Alexander Duyck wrote:
> On Wed, Mar 1, 2017 at 4:55 PM, John Fastabend <john.fastabend@gmail.com> wrote:
>> Add adjust_head support for XDP however at the moment we are only
>> adding IXGBE_SKB_PAD bytes of headroom to align with driver paths.
>>
>> The infrastructure is is such that a follow on patch can extend
>> headroom up to 196B without changing RX path.
> 
> I still owe you the 192B headroom patch.  I will hopefully get to it today.
> 

I see the patch on the list thanks.

>> Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
>> ---
>>  drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |   38 +++++++++++++++++--------
>>  1 file changed, 26 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
>> index e754fe0..c8bf64e 100644
>> --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
>> +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
>> @@ -2026,6 +2026,7 @@ static bool ixgbe_can_reuse_rx_page(struct ixgbe_rx_buffer *rx_buffer)
>>  static void ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring,
>>                               struct ixgbe_rx_buffer *rx_buffer,
>>                               struct sk_buff *skb,
>> +                             unsigned int headroom,
>>                               unsigned int size)
>>  {
>>  #if (PAGE_SIZE < 8192)
> 
> You shouldn't need any changes to ixgbe_add_rx_frag since there is no
> headroom here as this would be the second buffer in a scatter-gather
> list.
> 
>> @@ -2036,7 +2037,8 @@ static void ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring,
>>                                 SKB_DATA_ALIGN(size);
>>  #endif
>>         skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
>> -                       rx_buffer->page_offset, size, truesize);
>> +                       rx_buffer->page_offset - (IXGBE_SKB_PAD - headroom),
>> +                       size, truesize);
>>  #if (PAGE_SIZE < 8192)
>>         rx_buffer->page_offset ^= truesize;
>>  #else
> 
> So as per comment above this could be dropped.
> 

Yep thanks.

>> @@ -2109,6 +2111,7 @@ static void ixgbe_put_rx_buffer(struct ixgbe_ring *rx_ring,
>>  static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
>>                                            struct ixgbe_rx_buffer *rx_buffer,
>>                                            union ixgbe_adv_rx_desc *rx_desc,
>> +                                          unsigned int headroom,
>>                                            unsigned int size)
>>  {
>>         void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
> 
> I was thinking. Instead of adding headroom why don't we just replace
> rx_buffer for construct and build skb with the xdp_buf structure
> pointer.  Then you just allocate it in ixgbe_clean_rx_irq, populate it
> in ixgbe_run_xdp, and could pull the needed bits into
> ixgbe_construct_skb since we are already having to convert the page
> offsets and such anyway.

Well we use dma pointer and page_offset is handled slightly differently
in both cases.

Its tempting to go this route with a ixgbe_promote_xdp_to_skb() handler. How
about pushing this series with headroom, size and playing with this for a bit.

> 
>> @@ -2117,6 +2120,7 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
>>  #else
>>         unsigned int truesize = SKB_DATA_ALIGN(size);
>>  #endif
>> +       unsigned int off_page;
>>         struct sk_buff *skb;
>>
>>         /* prefetch first cache line of first page */
>> @@ -2130,12 +2134,14 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
>>         if (unlikely(!skb))
>>                 return NULL;
>>
>> +       off_page = IXGBE_SKB_PAD - headroom;
>> +
>>         if (size > IXGBE_RX_HDR_SIZE) {
>>                 if (!ixgbe_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))
>>                         IXGBE_CB(skb)->dma = rx_buffer->dma;
>>
>>                 skb_add_rx_frag(skb, 0, rx_buffer->page,
>> -                               rx_buffer->page_offset,
>> +                               rx_buffer->page_offset - off_page,
>>                                 size, truesize);
>>  #if (PAGE_SIZE < 8192)
>>                 rx_buffer->page_offset ^= truesize;
> 
> As far as computing the bits needed for skb_add_rx_frag I think you
> can refer to the code used in igb for how to handle that since I think
> I was having to get the page address from the page for that anyway.
> 

Not sure I'm following this comment. Are you suggesting this is not correct
as is? Or is this related to the xdp and rx_buffer comment.

>> @@ -2143,7 +2149,8 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
>>                 rx_buffer->page_offset += truesize;
>>  #endif
>>         } else {
>> -               memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long)));
>> +               memcpy(__skb_put(skb, size), va - off_page,
>> +                      ALIGN(size, sizeof(long)));
>>                 rx_buffer->pagecnt_bias++;
>>         }
>>
>> @@ -2153,6 +2160,7 @@ static struct sk_buff *ixgbe_construct_skb(struct ixgbe_ring *rx_ring,
>>  static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
>>                                        struct ixgbe_rx_buffer *rx_buffer,
>>                                        union ixgbe_adv_rx_desc *rx_desc,
>> +                                      unsigned int headroom,
>>                                        unsigned int size)
>>  {
>>         void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
>> @@ -2176,7 +2184,7 @@ static struct sk_buff *ixgbe_build_skb(struct ixgbe_ring *rx_ring,
>>                 return NULL;
>>
>>         /* update pointers within the skb to store the data */
>> -       skb_reserve(skb, IXGBE_SKB_PAD);
>> +       skb_reserve(skb, headroom);
>>         __skb_put(skb, size);
>>
>>         /* record DMA address if this is the start of a chain of buffers */
>> @@ -2203,7 +2211,8 @@ static int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter,
>>  static int ixgbe_run_xdp(struct ixgbe_adapter *adapter,
>>                          struct ixgbe_ring  *rx_ring,
>>                          struct ixgbe_rx_buffer *rx_buffer,
>> -                        unsigned int size)
>> +                        unsigned int *headroom,
>> +                        unsigned int *size)
>>  {
>>         int result = IXGBE_XDP_PASS;
>>         struct bpf_prog *xdp_prog;
>> @@ -2218,14 +2227,16 @@ static int ixgbe_run_xdp(struct ixgbe_adapter *adapter,
>>                 goto xdp_out;
>>
>>         addr = page_address(rx_buffer->page) + rx_buffer->page_offset;
>> -       xdp.data_hard_start = addr;
>> +       xdp.data_hard_start = addr - *headroom;
>>         xdp.data = addr;
>> -       xdp.data_end = addr + size;
>> +       xdp.data_end = addr + *size;
>>
>>         act = bpf_prog_run_xdp(xdp_prog, &xdp);
>>         switch (act) {
>>         case XDP_PASS:
>> -               break;
>> +               *headroom = xdp.data - xdp.data_hard_start;
>> +               *size = xdp.data_end - xdp.data;
>> +               return IXGBE_XDP_PASS;
>>         case XDP_TX:
>>                 result = ixgbe_xmit_xdp_ring(adapter, &xdp);
>>                 if (result == IXGBE_XDP_TX)
>> @@ -2274,6 +2285,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
>>         u16 cleaned_count = ixgbe_desc_unused(rx_ring);
>>
>>         while (likely(total_rx_packets < budget)) {
>> +               unsigned int headroom = ixgbe_rx_offset(rx_ring);
>>                 union ixgbe_adv_rx_desc *rx_desc;
>>                 struct ixgbe_rx_buffer *rx_buffer;
>>                 struct sk_buff *skb;
>> @@ -2299,7 +2311,8 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
>>
>>                 rx_buffer = ixgbe_get_rx_buffer(rx_ring, rx_desc, &skb, size);
>>
>> -               consumed = ixgbe_run_xdp(adapter, rx_ring, rx_buffer, size);
>> +               consumed = ixgbe_run_xdp(adapter, rx_ring, rx_buffer,
>> +                                        &headroom, &size);
>>
>>                 if (consumed) {
>>                         ixgbe_put_rx_buffer(rx_ring, rx_buffer, skb);
> 
> Just looking this over I think passing xdp_buf instead of rx_buffer to
> the functions below would likely be a much better way to go.


much better? In the construct_skb case we need to get the rx_buffer->dma address
from somewhere.

> 
>> @@ -2312,13 +2325,14 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
>>
>>                 /* retrieve a buffer from the ring */
>>                 if (skb)
>> -                       ixgbe_add_rx_frag(rx_ring, rx_buffer, skb, size);
>> +                       ixgbe_add_rx_frag(rx_ring, rx_buffer, skb,
>> +                                         headroom, size);
>>                 else if (ring_uses_build_skb(rx_ring))
>>                         skb = ixgbe_build_skb(rx_ring, rx_buffer,
>> -                                             rx_desc, size);
>> +                                             rx_desc, headroom, size);
>>                 else
>>                         skb = ixgbe_construct_skb(rx_ring, rx_buffer,
>> -                                                 rx_desc, size);
>> +                                                 rx_desc, headroom, size);
>>
>>                 /* exit if we failed to retrieve a buffer */
>>                 if (!skb) {
>>


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2017-03-03 17:07 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-03-02  0:54 [Intel-wired-lan] [net-next PATCH v2 1/3] ixgbe: add XDP support for pass and drop actions John Fastabend
2017-03-02  0:54 ` [Intel-wired-lan] [net-next PATCH v2 2/3] ixgbe: add support for XDP_TX action John Fastabend
2017-03-02 16:39   ` Alexander Duyck
2017-03-03 16:40     ` John Fastabend
2017-03-02  0:55 ` [Intel-wired-lan] [net-next PATCH v2 3/3] ixgbe: xdp support for adjust head John Fastabend
2017-03-02 19:43   ` Alexander Duyck
2017-03-03 17:07     ` John Fastabend
2017-03-02 16:22 ` [Intel-wired-lan] [net-next PATCH v2 1/3] ixgbe: add XDP support for pass and drop actions Alexander Duyck
2017-03-03 16:23   ` John Fastabend

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.