All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next 0/9] alx: add multi queue support
@ 2016-10-21 10:49 Tobias Regnery
  2016-10-21 10:49 ` [PATCH net-next 1/9] alx: refactor descriptor allocation Tobias Regnery
                   ` (9 more replies)
  0 siblings, 10 replies; 12+ messages in thread
From: Tobias Regnery @ 2016-10-21 10:49 UTC (permalink / raw)
  To: jcliburn, chris.snook, netdev; +Cc: davem, Tobias Regnery

This patchset lays the groundwork for multi queue support in the alx driver
and enables multi queue support for the tx path by default. The hardware
supports up to 4 tx queues. 

The rx path is a little bit harder because apparently (based on the limited
information from the downstream driver) the hardware supports up to 8 rss
queues but only has one hardware descriptor ring on the rx side. So the rx
path will be part of another patchset.

This work is based on the downstream driver at github.com/qca/alx

I had a hard time splitting these changes up into reasonable parts because
this is my first bigger kernel patchset, so please be patient if this is not
the right approach.

Tobias Regnery (9):
  alx: refactor descriptor allocation
  alx: extend data structures for multi queue support
  alx: add ability to allocate and free alx_napi structures
  alx: switch to per queue data structures
  alx: prepare interrupt functions for multiple queues
  alx: prepare resource allocation for multi queue support
  alx: prepare tx path for multi queue support
  alx: enable msi-x interrupts by default
  alx: enable multiple tx queues

 drivers/net/ethernet/atheros/alx/alx.h  |  36 ++-
 drivers/net/ethernet/atheros/alx/main.c | 554 ++++++++++++++++++++++----------
 2 files changed, 420 insertions(+), 170 deletions(-)

-- 
2.7.4

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH net-next 1/9] alx: refactor descriptor allocation
  2016-10-21 10:49 [PATCH net-next 0/9] alx: add multi queue support Tobias Regnery
@ 2016-10-21 10:49 ` Tobias Regnery
  2016-10-23 15:50   ` David Miller
  2016-10-21 10:49 ` [PATCH net-next 2/9] alx: extend data structures for multi queue support Tobias Regnery
                   ` (8 subsequent siblings)
  9 siblings, 1 reply; 12+ messages in thread
From: Tobias Regnery @ 2016-10-21 10:49 UTC (permalink / raw)
  To: jcliburn, chris.snook, netdev; +Cc: davem, Tobias Regnery

Split the allocation of descriptor memory and the buffer allocation into a
tx and rx function. This is in preparation for multiple queues where we
need to iterate over the new functions.

Signed-off-by: Tobias Regnery <tobias.regnery@gmail.com>
---
 drivers/net/ethernet/atheros/alx/main.c | 97 ++++++++++++++++++---------------
 1 file changed, 53 insertions(+), 44 deletions(-)

diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index eccbacd96201..b7e67dd3d995 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -573,19 +573,41 @@ static int alx_set_mac_address(struct net_device *netdev, void *data)
 	return 0;
 }
 
-static int alx_alloc_descriptors(struct alx_priv *alx)
+static int alx_alloc_tx_ring(struct alx_priv *alx, struct alx_tx_queue *txq,
+			     int offset)
 {
-	alx->txq.bufs = kcalloc(alx->tx_ringsz,
-				sizeof(struct alx_buffer),
-				GFP_KERNEL);
-	if (!alx->txq.bufs)
+	txq->bufs = kcalloc(alx->tx_ringsz, sizeof(struct alx_buffer), GFP_KERNEL);
+	if (!txq->bufs)
 		return -ENOMEM;
 
-	alx->rxq.bufs = kcalloc(alx->rx_ringsz,
-				sizeof(struct alx_buffer),
-				GFP_KERNEL);
-	if (!alx->rxq.bufs)
-		goto out_free;
+	txq->tpd = alx->descmem.virt + offset;
+	txq->tpd_dma = alx->descmem.dma + offset;
+	offset += sizeof(struct alx_txd) * alx->tx_ringsz;
+
+	return offset;
+}
+
+static int alx_alloc_rx_ring(struct alx_priv *alx, struct alx_rx_queue *rxq,
+			     int offset)
+{
+	rxq->bufs = kcalloc(alx->rx_ringsz, sizeof(struct alx_buffer), GFP_KERNEL);
+	if (!rxq->bufs)
+		return -ENOMEM;
+
+	rxq->rrd = (void *)((u8 *)alx->descmem.virt + offset);
+	rxq->rrd_dma = alx->descmem.dma + offset;
+	offset += sizeof(struct alx_rrd) * alx->rx_ringsz;
+
+	rxq->rfd = (void *)((u8 *)alx->descmem.virt + offset);
+	rxq->rfd_dma = alx->descmem.dma + offset;
+	offset += sizeof(struct alx_rfd) * alx->rx_ringsz;
+
+	return offset;
+}
+
+static int alx_alloc_rings(struct alx_priv *alx)
+{
+	int offset = 0;
 
 	/* physical tx/rx ring descriptors
 	 *
@@ -601,45 +623,23 @@ static int alx_alloc_descriptors(struct alx_priv *alx)
 						&alx->descmem.dma,
 						GFP_KERNEL);
 	if (!alx->descmem.virt)
-		goto out_free;
-
-	alx->txq.tpd = alx->descmem.virt;
-	alx->txq.tpd_dma = alx->descmem.dma;
+		return -ENOMEM;
 
-	/* alignment requirement for next block */
+	/* alignment requirements */
 	BUILD_BUG_ON(sizeof(struct alx_txd) % 8);
-
-	alx->rxq.rrd =
-		(void *)((u8 *)alx->descmem.virt +
-			 sizeof(struct alx_txd) * alx->tx_ringsz);
-	alx->rxq.rrd_dma = alx->descmem.dma +
-			   sizeof(struct alx_txd) * alx->tx_ringsz;
-
-	/* alignment requirement for next block */
 	BUILD_BUG_ON(sizeof(struct alx_rrd) % 8);
 
-	alx->rxq.rfd =
-		(void *)((u8 *)alx->descmem.virt +
-			 sizeof(struct alx_txd) * alx->tx_ringsz +
-			 sizeof(struct alx_rrd) * alx->rx_ringsz);
-	alx->rxq.rfd_dma = alx->descmem.dma +
-			   sizeof(struct alx_txd) * alx->tx_ringsz +
-			   sizeof(struct alx_rrd) * alx->rx_ringsz;
-
-	return 0;
-out_free:
-	kfree(alx->txq.bufs);
-	kfree(alx->rxq.bufs);
-	return -ENOMEM;
-}
-
-static int alx_alloc_rings(struct alx_priv *alx)
-{
-	int err;
+	offset = alx_alloc_tx_ring(alx, &alx->txq, offset);
+	if (offset < 0) {
+		netdev_err(alx->dev, "Allocation of tx buffer failed!\n");
+		goto out_free;
+	}
 
-	err = alx_alloc_descriptors(alx);
-	if (err)
-		return err;
+	offset = alx_alloc_rx_ring(alx, &alx->rxq, offset);
+	if (offset < 0) {
+		netdev_err(alx->dev, "Allocation of rx buffer failed!\n");
+		goto out_free;
+	}
 
 	alx->int_mask &= ~ALX_ISR_ALL_QUEUES;
 	alx->int_mask |= ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0;
@@ -647,7 +647,16 @@ static int alx_alloc_rings(struct alx_priv *alx)
 	netif_napi_add(alx->dev, &alx->napi, alx_poll, 64);
 
 	alx_reinit_rings(alx);
+
 	return 0;
+out_free:
+	kfree(alx->txq.bufs);
+	kfree(alx->rxq.bufs);
+	dma_free_coherent(&alx->hw.pdev->dev,
+			  alx->descmem.size,
+			  alx->descmem.virt,
+			  alx->descmem.dma);
+	return -ENOMEM;
 }
 
 static void alx_free_rings(struct alx_priv *alx)
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH net-next 2/9] alx: extend data structures for multi queue support
  2016-10-21 10:49 [PATCH net-next 0/9] alx: add multi queue support Tobias Regnery
  2016-10-21 10:49 ` [PATCH net-next 1/9] alx: refactor descriptor allocation Tobias Regnery
@ 2016-10-21 10:49 ` Tobias Regnery
  2016-10-21 10:49 ` [PATCH net-next 3/9] alx: add ability to allocate and free alx_napi structures Tobias Regnery
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Tobias Regnery @ 2016-10-21 10:49 UTC (permalink / raw)
  To: jcliburn, chris.snook, netdev; +Cc: davem, Tobias Regnery

Extend the driver data structures to be able to handle multiple queues.

Based on the downstream driver at github.com/qca/alx

Signed-off-by: Tobias Regnery <tobias.regnery@gmail.com>
---
 drivers/net/ethernet/atheros/alx/alx.h | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/drivers/net/ethernet/atheros/alx/alx.h b/drivers/net/ethernet/atheros/alx/alx.h
index 6cac919272ea..0859053525de 100644
--- a/drivers/net/ethernet/atheros/alx/alx.h
+++ b/drivers/net/ethernet/atheros/alx/alx.h
@@ -50,6 +50,10 @@ struct alx_buffer {
 };
 
 struct alx_rx_queue {
+	struct net_device *netdev;
+	struct device *dev;
+	struct alx_napi *np;
+
 	struct alx_rrd *rrd;
 	dma_addr_t rrd_dma;
 
@@ -58,16 +62,26 @@ struct alx_rx_queue {
 
 	struct alx_buffer *bufs;
 
+	u16 count;
 	u16 write_idx, read_idx;
 	u16 rrd_read_idx;
+	u16 queue_idx;
 };
 #define ALX_RX_ALLOC_THRESH	32
 
 struct alx_tx_queue {
+	struct net_device *netdev;
+	struct device *dev;
+
 	struct alx_txd *tpd;
 	dma_addr_t tpd_dma;
+
 	struct alx_buffer *bufs;
+
+	u16 count;
 	u16 write_idx, read_idx;
+	u16 queue_idx;
+	u16 p_reg, c_reg;
 };
 
 #define ALX_DEFAULT_TX_WORK 128
@@ -76,6 +90,18 @@ enum alx_device_quirks {
 	ALX_DEV_QUIRK_MSI_INTX_DISABLE_BUG = BIT(0),
 };
 
+struct alx_napi {
+	struct napi_struct	napi;
+	struct alx_priv		*alx;
+	struct alx_rx_queue	*rxq;
+	struct alx_tx_queue	*txq;
+	int			vec_idx;
+	u32			vec_mask;
+	char			irq_lbl[IFNAMSIZ + 8];
+};
+
+#define ALX_MAX_NAPIS 8
+
 #define ALX_FLAG_USING_MSIX	BIT(0)
 #define ALX_FLAG_USING_MSI	BIT(1)
 
@@ -96,6 +122,11 @@ struct alx_priv {
 		unsigned int size;
 	} descmem;
 
+	struct alx_napi *qnapi[ALX_MAX_NAPIS];
+	int num_txq;
+	int num_rxq;
+	int num_napi;
+
 	/* protect int_mask updates */
 	spinlock_t irq_lock;
 	u32 int_mask;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH net-next 3/9] alx: add ability to allocate and free alx_napi structures
  2016-10-21 10:49 [PATCH net-next 0/9] alx: add multi queue support Tobias Regnery
  2016-10-21 10:49 ` [PATCH net-next 1/9] alx: refactor descriptor allocation Tobias Regnery
  2016-10-21 10:49 ` [PATCH net-next 2/9] alx: extend data structures for multi queue support Tobias Regnery
@ 2016-10-21 10:49 ` Tobias Regnery
  2016-10-21 10:49 ` [PATCH net-next 4/9] alx: switch to per queue data structures Tobias Regnery
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Tobias Regnery @ 2016-10-21 10:49 UTC (permalink / raw)
  To: jcliburn, chris.snook, netdev; +Cc: davem, Tobias Regnery

Add new functions to allocate and free the alx_napi structures and use them
in __alx_open and __alx_stop. We only allocate one of these structures for
now, as the rest of the driver is not yet ready for multiple queues.

We switch over the setup of the interrupt mask and the call to netif_napi_add
to the new function because we must adjust these later on a per queue basis.

Based on the downstream driver at github.com/qca/alx

Signed-off-by: Tobias Regnery <tobias.regnery@gmail.com>
---
 drivers/net/ethernet/atheros/alx/main.c | 99 ++++++++++++++++++++++++++-------
 1 file changed, 78 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index b7e67dd3d995..77b225535600 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -632,45 +632,96 @@ static int alx_alloc_rings(struct alx_priv *alx)
 	offset = alx_alloc_tx_ring(alx, &alx->txq, offset);
 	if (offset < 0) {
 		netdev_err(alx->dev, "Allocation of tx buffer failed!\n");
-		goto out_free;
+		return -ENOMEM;
 	}
 
 	offset = alx_alloc_rx_ring(alx, &alx->rxq, offset);
 	if (offset < 0) {
 		netdev_err(alx->dev, "Allocation of rx buffer failed!\n");
-		goto out_free;
+		return -ENOMEM;
 	}
 
-	alx->int_mask &= ~ALX_ISR_ALL_QUEUES;
-	alx->int_mask |= ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0;
-
-	netif_napi_add(alx->dev, &alx->napi, alx_poll, 64);
-
 	alx_reinit_rings(alx);
 
 	return 0;
-out_free:
-	kfree(alx->txq.bufs);
-	kfree(alx->rxq.bufs);
-	dma_free_coherent(&alx->hw.pdev->dev,
-			  alx->descmem.size,
-			  alx->descmem.virt,
-			  alx->descmem.dma);
-	return -ENOMEM;
 }
 
 static void alx_free_rings(struct alx_priv *alx)
 {
-	netif_napi_del(&alx->napi);
 	alx_free_buffers(alx);
 
 	kfree(alx->txq.bufs);
 	kfree(alx->rxq.bufs);
 
-	dma_free_coherent(&alx->hw.pdev->dev,
-			  alx->descmem.size,
-			  alx->descmem.virt,
-			  alx->descmem.dma);
+	if (!alx->descmem.virt)
+		dma_free_coherent(&alx->hw.pdev->dev,
+				  alx->descmem.size,
+				  alx->descmem.virt,
+				  alx->descmem.dma);
+}
+
+static void alx_free_napis(struct alx_priv *alx)
+{
+	struct alx_napi *np;
+
+	np = alx->qnapi[0];
+	if (!np)
+		return;
+
+	netif_napi_del(&alx->napi);
+	kfree(np->txq);
+	kfree(np->rxq);
+	kfree(np);
+	alx->qnapi[0] = NULL;
+}
+
+static int alx_alloc_napis(struct alx_priv *alx)
+{
+	struct alx_napi *np;
+	struct alx_rx_queue *rxq;
+	struct alx_tx_queue *txq;
+
+	alx->int_mask &= ~ALX_ISR_ALL_QUEUES;
+	alx->int_mask |= ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0;
+
+	/* allocate alx_napi structures */
+	np = kzalloc(sizeof(struct alx_napi), GFP_KERNEL);
+	if (!np)
+		goto err_out;
+
+	np->alx = alx;
+	netif_napi_add(alx->dev, &alx->napi, alx_poll, 64);
+	alx->qnapi[0] = np;
+
+	/* allocate tx queues */
+	np = alx->qnapi[0];
+	txq = kzalloc(sizeof(*txq), GFP_KERNEL);
+	if (!txq)
+		goto err_out;
+
+	np->txq = txq;
+	txq->count = alx->tx_ringsz;
+	txq->netdev = alx->dev;
+	txq->dev = &alx->hw.pdev->dev;
+
+	/* allocate rx queues */
+	np = alx->qnapi[0];
+	rxq = kzalloc(sizeof(*rxq), GFP_KERNEL);
+	if (!rxq)
+		goto err_out;
+
+	np->rxq = rxq;
+	rxq->np = alx->qnapi[0];
+	rxq->count = alx->rx_ringsz;
+	rxq->netdev = alx->dev;
+	rxq->dev = &alx->hw.pdev->dev;
+
+	return 0;
+
+err_out:
+	netdev_err(alx->dev, "error allocating internal structures\n");
+	alx_free_napis(alx);
+	return -ENOMEM;
 }
 
 static void alx_config_vector_mapping(struct alx_priv *alx)
@@ -1031,10 +1082,14 @@ static int __alx_open(struct alx_priv *alx, bool resume)
 	if (!resume)
 		netif_carrier_off(alx->dev);
 
-	err = alx_alloc_rings(alx);
+	err = alx_alloc_napis(alx);
 	if (err)
 		goto out_disable_adv_intr;
 
+	err = alx_alloc_rings(alx);
+	if (err)
+		goto out_free_rings;
+
 	alx_configure(alx);
 
 	err = alx_request_irq(alx);
@@ -1054,6 +1109,7 @@ static int __alx_open(struct alx_priv *alx, bool resume)
 
 out_free_rings:
 	alx_free_rings(alx);
+	alx_free_napis(alx);
 out_disable_adv_intr:
 	alx_disable_advanced_intr(alx);
 	return err;
@@ -1064,6 +1120,7 @@ static void __alx_stop(struct alx_priv *alx)
 	alx_halt(alx);
 	alx_free_irq(alx);
 	alx_free_rings(alx);
+	alx_free_napis(alx);
 }
 
 static const char *alx_speed_desc(struct alx_hw *hw)
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH net-next 4/9] alx: switch to per queue data structures
  2016-10-21 10:49 [PATCH net-next 0/9] alx: add multi queue support Tobias Regnery
                   ` (2 preceding siblings ...)
  2016-10-21 10:49 ` [PATCH net-next 3/9] alx: add ability to allocate and free alx_napi structures Tobias Regnery
@ 2016-10-21 10:49 ` Tobias Regnery
  2016-10-21 10:49 ` [PATCH net-next 5/9] alx: prepare interrupt functions for multiple queues Tobias Regnery
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Tobias Regnery @ 2016-10-21 10:49 UTC (permalink / raw)
  To: jcliburn, chris.snook, netdev; +Cc: davem, Tobias Regnery

Remove the tx and rx queue structures from the alx_priv structure and switch
everything over to the queue pointers in the alx_napi structure.

Based on the downstream driver at github.com/qca/alx

Signed-off-by: Tobias Regnery <tobias.regnery@gmail.com>
---
 drivers/net/ethernet/atheros/alx/alx.h  |   5 -
 drivers/net/ethernet/atheros/alx/main.c | 183 ++++++++++++++++----------------
 2 files changed, 93 insertions(+), 95 deletions(-)

diff --git a/drivers/net/ethernet/atheros/alx/alx.h b/drivers/net/ethernet/atheros/alx/alx.h
index 0859053525de..d4a409139ea2 100644
--- a/drivers/net/ethernet/atheros/alx/alx.h
+++ b/drivers/net/ethernet/atheros/alx/alx.h
@@ -113,7 +113,6 @@ struct alx_priv {
 	/* msi-x vectors */
 	int num_vec;
 	struct msix_entry *msix_entries;
-	char irq_lbl[IFNAMSIZ + 8];
 
 	/* all descriptor memory */
 	struct {
@@ -135,10 +134,6 @@ struct alx_priv {
 	unsigned int rx_ringsz;
 	unsigned int rxbuf_size;
 
-	struct napi_struct napi;
-	struct alx_tx_queue txq;
-	struct alx_rx_queue rxq;
-
 	struct work_struct link_check_wk;
 	struct work_struct reset_wk;
 
diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index 77b225535600..5d058e075752 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -55,12 +55,12 @@ static bool msix = false;
 module_param(msix, bool, 0);
 MODULE_PARM_DESC(msix, "Enable msi-x interrupt support");
 
-static void alx_free_txbuf(struct alx_priv *alx, int entry)
+static void alx_free_txbuf(struct alx_tx_queue *txq, int entry)
 {
-	struct alx_buffer *txb = &alx->txq.bufs[entry];
+	struct alx_buffer *txb = &txq->bufs[entry];
 
 	if (dma_unmap_len(txb, size)) {
-		dma_unmap_single(&alx->hw.pdev->dev,
+		dma_unmap_single(txq->dev,
 				 dma_unmap_addr(txb, dma),
 				 dma_unmap_len(txb, size),
 				 DMA_TO_DEVICE);
@@ -75,7 +75,7 @@ static void alx_free_txbuf(struct alx_priv *alx, int entry)
 
 static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp)
 {
-	struct alx_rx_queue *rxq = &alx->rxq;
+	struct alx_rx_queue *rxq = alx->qnapi[0]->rxq;
 	struct sk_buff *skb;
 	struct alx_buffer *cur_buf;
 	dma_addr_t dma;
@@ -143,22 +143,22 @@ static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp)
 	return count;
 }
 
-static inline int alx_tpd_avail(struct alx_priv *alx)
+static inline int alx_tpd_avail(struct alx_tx_queue *txq)
 {
-	struct alx_tx_queue *txq = &alx->txq;
-
 	if (txq->write_idx >= txq->read_idx)
-		return alx->tx_ringsz + txq->read_idx - txq->write_idx - 1;
+		return txq->count + txq->read_idx - txq->write_idx - 1;
 	return txq->read_idx - txq->write_idx - 1;
 }
 
-static bool alx_clean_tx_irq(struct alx_priv *alx)
+static bool alx_clean_tx_irq(struct alx_tx_queue *txq)
 {
-	struct alx_tx_queue *txq = &alx->txq;
+	struct alx_priv *alx;
 	u16 hw_read_idx, sw_read_idx;
 	unsigned int total_bytes = 0, total_packets = 0;
 	int budget = ALX_DEFAULT_TX_WORK;
 
+	alx = netdev_priv(txq->netdev);
+
 	sw_read_idx = txq->read_idx;
 	hw_read_idx = alx_read_mem16(&alx->hw, ALX_TPD_PRI0_CIDX);
 
@@ -173,19 +173,19 @@ static bool alx_clean_tx_irq(struct alx_priv *alx)
 				budget--;
 			}
 
-			alx_free_txbuf(alx, sw_read_idx);
+			alx_free_txbuf(txq, sw_read_idx);
 
-			if (++sw_read_idx == alx->tx_ringsz)
+			if (++sw_read_idx == txq->count)
 				sw_read_idx = 0;
 		}
 		txq->read_idx = sw_read_idx;
 
-		netdev_completed_queue(alx->dev, total_packets, total_bytes);
+		netdev_completed_queue(txq->netdev, total_packets, total_bytes);
 	}
 
-	if (netif_queue_stopped(alx->dev) && netif_carrier_ok(alx->dev) &&
-	    alx_tpd_avail(alx) > alx->tx_ringsz/4)
-		netif_wake_queue(alx->dev);
+	if (netif_queue_stopped(txq->netdev) && netif_carrier_ok(txq->netdev) &&
+	    alx_tpd_avail(txq) > txq->count / 4)
+		netif_wake_queue(txq->netdev);
 
 	return sw_read_idx == hw_read_idx;
 }
@@ -200,15 +200,17 @@ static void alx_schedule_reset(struct alx_priv *alx)
 	schedule_work(&alx->reset_wk);
 }
 
-static int alx_clean_rx_irq(struct alx_priv *alx, int budget)
+static int alx_clean_rx_irq(struct alx_rx_queue *rxq, int budget)
 {
-	struct alx_rx_queue *rxq = &alx->rxq;
+	struct alx_priv *alx;
 	struct alx_rrd *rrd;
 	struct alx_buffer *rxb;
 	struct sk_buff *skb;
 	u16 length, rfd_cleaned = 0;
 	int work = 0;
 
+	alx = netdev_priv(rxq->netdev);
+
 	while (work < budget) {
 		rrd = &rxq->rrd[rxq->rrd_read_idx];
 		if (!(rrd->word3 & cpu_to_le32(1 << RRD_UPDATED_SHIFT)))
@@ -224,7 +226,7 @@ static int alx_clean_rx_irq(struct alx_priv *alx, int budget)
 		}
 
 		rxb = &rxq->bufs[rxq->read_idx];
-		dma_unmap_single(&alx->hw.pdev->dev,
+		dma_unmap_single(rxq->dev,
 				 dma_unmap_addr(rxb, dma),
 				 dma_unmap_len(rxb, size),
 				 DMA_FROM_DEVICE);
@@ -242,7 +244,7 @@ static int alx_clean_rx_irq(struct alx_priv *alx, int budget)
 		length = ALX_GET_FIELD(le32_to_cpu(rrd->word3),
 				       RRD_PKTLEN) - ETH_FCS_LEN;
 		skb_put(skb, length);
-		skb->protocol = eth_type_trans(skb, alx->dev);
+		skb->protocol = eth_type_trans(skb, rxq->netdev);
 
 		skb_checksum_none_assert(skb);
 		if (alx->dev->features & NETIF_F_RXCSUM &&
@@ -259,13 +261,13 @@ static int alx_clean_rx_irq(struct alx_priv *alx, int budget)
 			}
 		}
 
-		napi_gro_receive(&alx->napi, skb);
+		napi_gro_receive(&rxq->np->napi, skb);
 		work++;
 
 next_pkt:
-		if (++rxq->read_idx == alx->rx_ringsz)
+		if (++rxq->read_idx == rxq->count)
 			rxq->read_idx = 0;
-		if (++rxq->rrd_read_idx == alx->rx_ringsz)
+		if (++rxq->rrd_read_idx == rxq->count)
 			rxq->rrd_read_idx = 0;
 
 		if (++rfd_cleaned > ALX_RX_ALLOC_THRESH)
@@ -280,19 +282,20 @@ static int alx_clean_rx_irq(struct alx_priv *alx, int budget)
 
 static int alx_poll(struct napi_struct *napi, int budget)
 {
-	struct alx_priv *alx = container_of(napi, struct alx_priv, napi);
+	struct alx_napi *np = container_of(napi, struct alx_napi, napi);
+	struct alx_priv *alx = np->alx;
 	struct alx_hw *hw = &alx->hw;
 	unsigned long flags;
 	bool tx_complete;
 	int work;
 
-	tx_complete = alx_clean_tx_irq(alx);
-	work = alx_clean_rx_irq(alx, budget);
+	tx_complete = alx_clean_tx_irq(np->txq);
+	work = alx_clean_rx_irq(np->rxq, budget);
 
 	if (!tx_complete || work == budget)
 		return budget;
 
-	napi_complete(&alx->napi);
+	napi_complete(&np->napi);
 
 	/* enable interrupt */
 	if (alx->flags & ALX_FLAG_USING_MSIX) {
@@ -350,7 +353,7 @@ static irqreturn_t alx_intr_handle(struct alx_priv *alx, u32 intr)
 		goto out;
 
 	if (intr & (ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0)) {
-		napi_schedule(&alx->napi);
+		napi_schedule(&alx->qnapi[0]->napi);
 		/* mask rx/tx interrupt, enable them when napi complete */
 		alx->int_mask &= ~ALX_ISR_ALL_QUEUES;
 		alx_write_mem32(hw, ALX_IMR, alx->int_mask);
@@ -365,15 +368,15 @@ static irqreturn_t alx_intr_handle(struct alx_priv *alx, u32 intr)
 
 static irqreturn_t alx_intr_msix_ring(int irq, void *data)
 {
-	struct alx_priv *alx = data;
-	struct alx_hw *hw = &alx->hw;
+	struct alx_napi *np = data;
+	struct alx_hw *hw = &np->alx->hw;
 
 	/* mask interrupt to ACK chip */
 	alx_mask_msix(hw, 1, true);
 	/* clear interrupt status */
 	alx_write_mem32(hw, ALX_ISR, (ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0));
 
-	napi_schedule(&alx->napi);
+	napi_schedule(&np->napi);
 
 	return IRQ_HANDLED;
 }
@@ -428,59 +431,58 @@ static void alx_init_ring_ptrs(struct alx_priv *alx)
 {
 	struct alx_hw *hw = &alx->hw;
 	u32 addr_hi = ((u64)alx->descmem.dma) >> 32;
+	struct alx_napi *np = alx->qnapi[0];
 
-	alx->rxq.read_idx = 0;
-	alx->rxq.write_idx = 0;
-	alx->rxq.rrd_read_idx = 0;
+	np->rxq->read_idx = 0;
+	np->rxq->write_idx = 0;
+	np->rxq->rrd_read_idx = 0;
 	alx_write_mem32(hw, ALX_RX_BASE_ADDR_HI, addr_hi);
-	alx_write_mem32(hw, ALX_RRD_ADDR_LO, alx->rxq.rrd_dma);
+	alx_write_mem32(hw, ALX_RRD_ADDR_LO, np->rxq->rrd_dma);
 	alx_write_mem32(hw, ALX_RRD_RING_SZ, alx->rx_ringsz);
-	alx_write_mem32(hw, ALX_RFD_ADDR_LO, alx->rxq.rfd_dma);
+	alx_write_mem32(hw, ALX_RFD_ADDR_LO, np->rxq->rfd_dma);
 	alx_write_mem32(hw, ALX_RFD_RING_SZ, alx->rx_ringsz);
 	alx_write_mem32(hw, ALX_RFD_BUF_SZ, alx->rxbuf_size);
 
-	alx->txq.read_idx = 0;
-	alx->txq.write_idx = 0;
+	np->txq->read_idx = 0;
+	np->txq->write_idx = 0;
 	alx_write_mem32(hw, ALX_TX_BASE_ADDR_HI, addr_hi);
-	alx_write_mem32(hw, ALX_TPD_PRI0_ADDR_LO, alx->txq.tpd_dma);
+	alx_write_mem32(hw, ALX_TPD_PRI0_ADDR_LO, np->txq->tpd_dma);
 	alx_write_mem32(hw, ALX_TPD_RING_SZ, alx->tx_ringsz);
 
 	/* load these pointers into the chip */
 	alx_write_mem32(hw, ALX_SRAM9, ALX_SRAM_LOAD_PTR);
 }
 
-static void alx_free_txring_buf(struct alx_priv *alx)
+static void alx_free_txring_buf(struct alx_tx_queue *txq)
 {
-	struct alx_tx_queue *txq = &alx->txq;
 	int i;
 
 	if (!txq->bufs)
 		return;
 
-	for (i = 0; i < alx->tx_ringsz; i++)
-		alx_free_txbuf(alx, i);
+	for (i = 0; i < txq->count; i++)
+		alx_free_txbuf(txq, i);
 
-	memset(txq->bufs, 0, alx->tx_ringsz * sizeof(struct alx_buffer));
-	memset(txq->tpd, 0, alx->tx_ringsz * sizeof(struct alx_txd));
+	memset(txq->bufs, 0, txq->count * sizeof(struct alx_buffer));
+	memset(txq->tpd, 0, txq->count * sizeof(struct alx_txd));
 	txq->write_idx = 0;
 	txq->read_idx = 0;
 
-	netdev_reset_queue(alx->dev);
+	netdev_reset_queue(txq->netdev);
 }
 
-static void alx_free_rxring_buf(struct alx_priv *alx)
+static void alx_free_rxring_buf(struct alx_rx_queue *rxq)
 {
-	struct alx_rx_queue *rxq = &alx->rxq;
 	struct alx_buffer *cur_buf;
 	u16 i;
 
 	if (rxq == NULL)
 		return;
 
-	for (i = 0; i < alx->rx_ringsz; i++) {
+	for (i = 0; i < rxq->count; i++) {
 		cur_buf = rxq->bufs + i;
 		if (cur_buf->skb) {
-			dma_unmap_single(&alx->hw.pdev->dev,
+			dma_unmap_single(rxq->dev,
 					 dma_unmap_addr(cur_buf, dma),
 					 dma_unmap_len(cur_buf, size),
 					 DMA_FROM_DEVICE);
@@ -498,8 +500,8 @@ static void alx_free_rxring_buf(struct alx_priv *alx)
 
 static void alx_free_buffers(struct alx_priv *alx)
 {
-	alx_free_txring_buf(alx);
-	alx_free_rxring_buf(alx);
+	alx_free_txring_buf(alx->qnapi[0]->txq);
+	alx_free_rxring_buf(alx->qnapi[0]->rxq);
 }
 
 static int alx_reinit_rings(struct alx_priv *alx)
@@ -576,13 +578,13 @@ static int alx_set_mac_address(struct net_device *netdev, void *data)
 static int alx_alloc_tx_ring(struct alx_priv *alx, struct alx_tx_queue *txq,
 			     int offset)
 {
-	txq->bufs = kcalloc(alx->tx_ringsz, sizeof(struct alx_buffer), GFP_KERNEL);
+	txq->bufs = kcalloc(txq->count, sizeof(struct alx_buffer), GFP_KERNEL);
 	if (!txq->bufs)
 		return -ENOMEM;
 
 	txq->tpd = alx->descmem.virt + offset;
 	txq->tpd_dma = alx->descmem.dma + offset;
-	offset += sizeof(struct alx_txd) * alx->tx_ringsz;
+	offset += sizeof(struct alx_txd) * txq->count;
 
 	return offset;
 }
@@ -590,17 +592,17 @@ static int alx_alloc_tx_ring(struct alx_priv *alx, struct alx_tx_queue *txq,
 static int alx_alloc_rx_ring(struct alx_priv *alx, struct alx_rx_queue *rxq,
 			     int offset)
 {
-	rxq->bufs = kcalloc(alx->rx_ringsz, sizeof(struct alx_buffer), GFP_KERNEL);
+	rxq->bufs = kcalloc(rxq->count, sizeof(struct alx_buffer), GFP_KERNEL);
 	if (!rxq->bufs)
 		return -ENOMEM;
 
 	rxq->rrd = (void *)((u8 *)alx->descmem.virt + offset);
 	rxq->rrd_dma = alx->descmem.dma + offset;
-	offset += sizeof(struct alx_rrd) * alx->rx_ringsz;
+	offset += sizeof(struct alx_rrd) * rxq->count;
 
 	rxq->rfd = (void *)((u8 *)alx->descmem.virt + offset);
 	rxq->rfd_dma = alx->descmem.dma + offset;
-	offset += sizeof(struct alx_rfd) * alx->rx_ringsz;
+	offset += sizeof(struct alx_rfd) * rxq->count;
 
 	return offset;
 }
@@ -629,13 +631,13 @@ static int alx_alloc_rings(struct alx_priv *alx)
 	BUILD_BUG_ON(sizeof(struct alx_txd) % 8);
 	BUILD_BUG_ON(sizeof(struct alx_rrd) % 8);
 
-	offset = alx_alloc_tx_ring(alx, &alx->txq, offset);
+	offset = alx_alloc_tx_ring(alx, alx->qnapi[0]->txq, offset);
 	if (offset < 0) {
 		netdev_err(alx->dev, "Allocation of tx buffer failed!\n");
 		return -ENOMEM;
 	}
 
-	offset = alx_alloc_rx_ring(alx, &alx->rxq, offset);
+	offset = alx_alloc_rx_ring(alx, alx->qnapi[0]->rxq, offset);
 	if (offset < 0) {
 		netdev_err(alx->dev, "Allocation of rx buffer failed!\n");
 		return -ENOMEM;
@@ -648,10 +650,11 @@ static int alx_alloc_rings(struct alx_priv *alx)
 
 static void alx_free_rings(struct alx_priv *alx)
 {
+
 	alx_free_buffers(alx);
 
-	kfree(alx->txq.bufs);
-	kfree(alx->rxq.bufs);
+	kfree(alx->qnapi[0]->txq->bufs);
+	kfree(alx->qnapi[0]->rxq->bufs);
 
 	if (!alx->descmem.virt)
 		dma_free_coherent(&alx->hw.pdev->dev,
@@ -668,7 +671,7 @@ static void alx_free_napis(struct alx_priv *alx)
 	if (!np)
 		return;
 
-	netif_napi_del(&alx->napi);
+	netif_napi_del(&np->napi);
 	kfree(np->txq);
 	kfree(np->rxq);
 	kfree(np);
@@ -690,7 +693,7 @@ static int alx_alloc_napis(struct alx_priv *alx)
 		goto err_out;
 
 	np->alx = alx;
-	netif_napi_add(alx->dev, &alx->napi, alx_poll, 64);
+	netif_napi_add(alx->dev, &np->napi, alx_poll, 64);
 	alx->qnapi[0] = np;
 
 	/* allocate tx queues */
@@ -768,6 +771,7 @@ static int alx_request_msix(struct alx_priv *alx)
 {
 	struct net_device *netdev = alx->dev;
 	int i, err, vector = 0, free_vector = 0;
+	struct alx_napi *np = alx->qnapi[0];
 
 	err = request_irq(alx->msix_entries[0].vector, alx_intr_msix_misc,
 			  0, netdev->name, alx);
@@ -775,10 +779,10 @@ static int alx_request_msix(struct alx_priv *alx)
 		goto out_err;
 
 	vector++;
-	sprintf(alx->irq_lbl, "%s-TxRx-0", netdev->name);
+	sprintf(np->irq_lbl, "%s-TxRx-0", netdev->name);
 
 	err = request_irq(alx->msix_entries[vector].vector,
-			  alx_intr_msix_ring, 0, alx->irq_lbl, alx);
+			  alx_intr_msix_ring, 0, np->irq_lbl, np);
 		if (err)
 			goto out_free;
 
@@ -789,7 +793,7 @@ static int alx_request_msix(struct alx_priv *alx)
 
 	vector--;
 	for (i = 0; i < vector; i++)
-		free_irq(alx->msix_entries[free_vector++].vector, alx);
+		free_irq(alx->msix_entries[free_vector++].vector, alx->qnapi[0]);
 
 out_err:
 	return err;
@@ -905,12 +909,12 @@ static int alx_request_irq(struct alx_priv *alx)
 static void alx_free_irq(struct alx_priv *alx)
 {
 	struct pci_dev *pdev = alx->hw.pdev;
-	int i;
+	int i, vector = 0;
 
 	if (alx->flags & ALX_FLAG_USING_MSIX) {
 		/* we have only 2 vectors without multi queue support */
-		for (i = 0; i < 2; i++)
-			free_irq(alx->msix_entries[i].vector, alx);
+		free_irq(alx->msix_entries[vector++].vector, alx);
+		free_irq(alx->msix_entries[vector++].vector, alx->qnapi[0]);
 	} else {
 		free_irq(pdev->irq, alx);
 	}
@@ -999,7 +1003,7 @@ static void alx_netif_stop(struct alx_priv *alx)
 	if (netif_carrier_ok(alx->dev)) {
 		netif_carrier_off(alx->dev);
 		netif_tx_disable(alx->dev);
-		napi_disable(&alx->napi);
+		napi_disable(&alx->qnapi[0]->napi);
 	}
 }
 
@@ -1069,7 +1073,7 @@ static int alx_change_mtu(struct net_device *netdev, int mtu)
 static void alx_netif_start(struct alx_priv *alx)
 {
 	netif_tx_wake_all_queues(alx->dev);
-	napi_enable(&alx->napi);
+	napi_enable(&alx->qnapi[0]->napi);
 	netif_carrier_on(alx->dev);
 }
 
@@ -1303,9 +1307,8 @@ static int alx_tso(struct sk_buff *skb, struct alx_txd *first)
 	return 1;
 }
 
-static int alx_map_tx_skb(struct alx_priv *alx, struct sk_buff *skb)
+static int alx_map_tx_skb(struct alx_tx_queue *txq, struct sk_buff *skb)
 {
-	struct alx_tx_queue *txq = &alx->txq;
 	struct alx_txd *tpd, *first_tpd;
 	dma_addr_t dma;
 	int maplen, f, first_idx = txq->write_idx;
@@ -1314,7 +1317,7 @@ static int alx_map_tx_skb(struct alx_priv *alx, struct sk_buff *skb)
 	tpd = first_tpd;
 
 	if (tpd->word1 & (1 << TPD_LSO_V2_SHIFT)) {
-		if (++txq->write_idx == alx->tx_ringsz)
+		if (++txq->write_idx == txq->count)
 			txq->write_idx = 0;
 
 		tpd = &txq->tpd[txq->write_idx];
@@ -1324,9 +1327,9 @@ static int alx_map_tx_skb(struct alx_priv *alx, struct sk_buff *skb)
 	}
 
 	maplen = skb_headlen(skb);
-	dma = dma_map_single(&alx->hw.pdev->dev, skb->data, maplen,
+	dma = dma_map_single(txq->dev, skb->data, maplen,
 			     DMA_TO_DEVICE);
-	if (dma_mapping_error(&alx->hw.pdev->dev, dma))
+	if (dma_mapping_error(txq->dev, dma))
 		goto err_dma;
 
 	dma_unmap_len_set(&txq->bufs[txq->write_idx], size, maplen);
@@ -1340,16 +1343,16 @@ static int alx_map_tx_skb(struct alx_priv *alx, struct sk_buff *skb)
 
 		frag = &skb_shinfo(skb)->frags[f];
 
-		if (++txq->write_idx == alx->tx_ringsz)
+		if (++txq->write_idx == txq->count)
 			txq->write_idx = 0;
 		tpd = &txq->tpd[txq->write_idx];
 
 		tpd->word1 = first_tpd->word1;
 
 		maplen = skb_frag_size(frag);
-		dma = skb_frag_dma_map(&alx->hw.pdev->dev, frag, 0,
+		dma = skb_frag_dma_map(txq->dev, frag, 0,
 				       maplen, DMA_TO_DEVICE);
-		if (dma_mapping_error(&alx->hw.pdev->dev, dma))
+		if (dma_mapping_error(txq->dev, dma))
 			goto err_dma;
 		dma_unmap_len_set(&txq->bufs[txq->write_idx], size, maplen);
 		dma_unmap_addr_set(&txq->bufs[txq->write_idx], dma, dma);
@@ -1362,7 +1365,7 @@ static int alx_map_tx_skb(struct alx_priv *alx, struct sk_buff *skb)
 	tpd->word1 |= cpu_to_le32(1 << TPD_EOP_SHIFT);
 	txq->bufs[txq->write_idx].skb = skb;
 
-	if (++txq->write_idx == alx->tx_ringsz)
+	if (++txq->write_idx == txq->count)
 		txq->write_idx = 0;
 
 	return 0;
@@ -1370,8 +1373,8 @@ static int alx_map_tx_skb(struct alx_priv *alx, struct sk_buff *skb)
 err_dma:
 	f = first_idx;
 	while (f != txq->write_idx) {
-		alx_free_txbuf(alx, f);
-		if (++f == alx->tx_ringsz)
+		alx_free_txbuf(txq, f);
+		if (++f == txq->count)
 			f = 0;
 	}
 	return -ENOMEM;
@@ -1381,12 +1384,12 @@ static netdev_tx_t alx_start_xmit(struct sk_buff *skb,
 				  struct net_device *netdev)
 {
 	struct alx_priv *alx = netdev_priv(netdev);
-	struct alx_tx_queue *txq = &alx->txq;
+	struct alx_tx_queue *txq = alx->qnapi[0]->txq;
 	struct alx_txd *first;
 	int tso;
 
-	if (alx_tpd_avail(alx) < alx_tpd_req(skb)) {
-		netif_stop_queue(alx->dev);
+	if (alx_tpd_avail(txq) < alx_tpd_req(skb)) {
+		netif_stop_queue(txq->netdev);
 		goto drop;
 	}
 
@@ -1399,17 +1402,17 @@ static netdev_tx_t alx_start_xmit(struct sk_buff *skb,
 	else if (!tso && alx_tx_csum(skb, first))
 		goto drop;
 
-	if (alx_map_tx_skb(alx, skb) < 0)
+	if (alx_map_tx_skb(txq, skb) < 0)
 		goto drop;
 
-	netdev_sent_queue(alx->dev, skb->len);
+	netdev_sent_queue(txq->netdev, skb->len);
 
 	/* flush updates before updating hardware */
 	wmb();
 	alx_write_mem16(&alx->hw, ALX_TPD_PRI0_PIDX, txq->write_idx);
 
-	if (alx_tpd_avail(alx) < alx->tx_ringsz/8)
-		netif_stop_queue(alx->dev);
+	if (alx_tpd_avail(txq) < txq->count / 8)
+		netif_stop_queue(txq->netdev);
 
 	return NETDEV_TX_OK;
 
@@ -1478,7 +1481,7 @@ static void alx_poll_controller(struct net_device *netdev)
 
 	if (alx->flags & ALX_FLAG_USING_MSIX) {
 		alx_intr_msix_misc(0, alx);
-		alx_intr_msix_ring(0, alx);
+		alx_intr_msix_ring(0, alx->qnapi[0]);
 	} else if (alx->flags & ALX_FLAG_USING_MSI)
 		alx_intr_msi(0, alx);
 	else
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH net-next 5/9] alx: prepare interrupt functions for multiple queues
  2016-10-21 10:49 [PATCH net-next 0/9] alx: add multi queue support Tobias Regnery
                   ` (3 preceding siblings ...)
  2016-10-21 10:49 ` [PATCH net-next 4/9] alx: switch to per queue data structures Tobias Regnery
@ 2016-10-21 10:49 ` Tobias Regnery
  2016-10-21 10:49 ` [PATCH net-next 6/9] alx: prepare resource allocation for multi queue support Tobias Regnery
                   ` (4 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Tobias Regnery @ 2016-10-21 10:49 UTC (permalink / raw)
  To: jcliburn, chris.snook, netdev; +Cc: davem, Tobias Regnery

Extend the interrupt bringup code and the interrupt handler for msi-x
interrupts in order to handle multiple queues.

We must change the poll function because with multiple queues it is possible
that an alx_napi structure has only a tx or only a rx queue pointer.

Based on the downstream driver at github.com/qca/alx

Signed-off-by: Tobias Regnery <tobias.regnery@gmail.com>
---
 drivers/net/ethernet/atheros/alx/main.c | 132 +++++++++++++++++++++++++-------
 1 file changed, 105 insertions(+), 27 deletions(-)

diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index 5d058e075752..624419968df5 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -286,11 +286,13 @@ static int alx_poll(struct napi_struct *napi, int budget)
 	struct alx_priv *alx = np->alx;
 	struct alx_hw *hw = &alx->hw;
 	unsigned long flags;
-	bool tx_complete;
-	int work;
+	bool tx_complete = true;
+	int work = 0;
 
-	tx_complete = alx_clean_tx_irq(np->txq);
-	work = alx_clean_rx_irq(np->rxq, budget);
+	if (np->txq)
+		tx_complete = alx_clean_tx_irq(np->txq);
+	if (np->rxq)
+		work = alx_clean_rx_irq(np->rxq, budget);
 
 	if (!tx_complete || work == budget)
 		return budget;
@@ -299,7 +301,7 @@ static int alx_poll(struct napi_struct *napi, int budget)
 
 	/* enable interrupt */
 	if (alx->flags & ALX_FLAG_USING_MSIX) {
-		alx_mask_msix(hw, 1, false);
+		alx_mask_msix(hw, np->vec_idx, false);
 	} else {
 		spin_lock_irqsave(&alx->irq_lock, flags);
 		alx->int_mask |= ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0;
@@ -372,9 +374,9 @@ static irqreturn_t alx_intr_msix_ring(int irq, void *data)
 	struct alx_hw *hw = &np->alx->hw;
 
 	/* mask interrupt to ACK chip */
-	alx_mask_msix(hw, 1, true);
+	alx_mask_msix(hw, np->vec_idx, true);
 	/* clear interrupt status */
-	alx_write_mem32(hw, ALX_ISR, (ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0));
+	alx_write_mem32(hw, ALX_ISR, np->vec_mask);
 
 	napi_schedule(&np->napi);
 
@@ -678,6 +680,13 @@ static void alx_free_napis(struct alx_priv *alx)
 	alx->qnapi[0] = NULL;
 }
 
+static const u32 tx_vect_mask[] = {ALX_ISR_TX_Q0, ALX_ISR_TX_Q1,
+				   ALX_ISR_TX_Q2, ALX_ISR_TX_Q3};
+static const u32 rx_vect_mask[] = {ALX_ISR_RX_Q0, ALX_ISR_RX_Q1,
+				   ALX_ISR_RX_Q2, ALX_ISR_RX_Q3,
+				   ALX_ISR_RX_Q4, ALX_ISR_RX_Q5,
+				   ALX_ISR_RX_Q6, ALX_ISR_RX_Q7};
+
 static int alx_alloc_napis(struct alx_priv *alx)
 {
 	struct alx_napi *np;
@@ -685,7 +694,6 @@ static int alx_alloc_napis(struct alx_priv *alx)
 	struct alx_tx_queue *txq;
 
 	alx->int_mask &= ~ALX_ISR_ALL_QUEUES;
-	alx->int_mask |= ALX_ISR_TX_Q0 | ALX_ISR_RX_Q0;
 
 	/* allocate alx_napi structures */
 	np = kzalloc(sizeof(struct alx_napi), GFP_KERNEL);
@@ -703,9 +711,12 @@ static int alx_alloc_napis(struct alx_priv *alx)
 		goto err_out;
 
 	np->txq = txq;
+	txq->queue_idx = 0;
 	txq->count = alx->tx_ringsz;
 	txq->netdev = alx->dev;
 	txq->dev = &alx->hw.pdev->dev;
+	np->vec_mask |= tx_vect_mask[0];
+	alx->int_mask |= tx_vect_mask[0];
 
 	/* allocate rx queues */
 	np = alx->qnapi[0];
@@ -715,9 +726,12 @@ static int alx_alloc_napis(struct alx_priv *alx)
 
 	np->rxq = rxq;
 	rxq->np = alx->qnapi[0];
+	rxq->queue_idx = 0;
 	rxq->count = alx->rx_ringsz;
 	rxq->netdev = alx->dev;
 	rxq->dev = &alx->hw.pdev->dev;
+	np->vec_mask |= rx_vect_mask[0];
+	alx->int_mask |= rx_vect_mask[0];
 
 	return 0;
 
@@ -727,24 +741,43 @@ static int alx_alloc_napis(struct alx_priv *alx)
 	return -ENOMEM;
 }
 
+static const int txq_vec_mapping_shift[] = {
+	0, ALX_MSI_MAP_TBL1_TXQ0_SHIFT,
+	0, ALX_MSI_MAP_TBL1_TXQ1_SHIFT,
+	1, ALX_MSI_MAP_TBL2_TXQ2_SHIFT,
+	1, ALX_MSI_MAP_TBL2_TXQ3_SHIFT,
+};
+
 static void alx_config_vector_mapping(struct alx_priv *alx)
 {
 	struct alx_hw *hw = &alx->hw;
-	u32 tbl = 0;
+	u32 tbl[2] = {0, 0};
+	int i, vector, idx, shift;
 
 	if (alx->flags & ALX_FLAG_USING_MSIX) {
-		tbl |= 1 << ALX_MSI_MAP_TBL1_TXQ0_SHIFT;
-		tbl |= 1 << ALX_MSI_MAP_TBL1_RXQ0_SHIFT;
+		/* tx mappings */
+		for (i = 0, vector = 1; i < alx->num_txq; i++, vector++) {
+			idx = txq_vec_mapping_shift[i * 2];
+			shift = txq_vec_mapping_shift[i * 2 + 1];
+			tbl[idx] |= vector << shift;
+		}
+
+		/* rx mapping */
+		tbl[0] |= 1 << ALX_MSI_MAP_TBL1_RXQ0_SHIFT;
 	}
 
-	alx_write_mem32(hw, ALX_MSI_MAP_TBL1, tbl);
-	alx_write_mem32(hw, ALX_MSI_MAP_TBL2, 0);
+	alx_write_mem32(hw, ALX_MSI_MAP_TBL1, tbl[0]);
+	alx_write_mem32(hw, ALX_MSI_MAP_TBL2, tbl[1]);
 	alx_write_mem32(hw, ALX_MSI_ID_MAP, 0);
 }
 
 static bool alx_enable_msix(struct alx_priv *alx)
 {
-	int i, err, num_vec = 2;
+	int i, err, num_vec, num_txq, num_rxq;
+
+	num_txq = 1;
+	num_rxq = 1;
+	num_vec = max_t(int, num_txq, num_rxq) + 1;
 
 	alx->msix_entries = kcalloc(num_vec, sizeof(struct msix_entry),
 				    GFP_KERNEL);
@@ -764,6 +797,10 @@ static bool alx_enable_msix(struct alx_priv *alx)
 	}
 
 	alx->num_vec = num_vec;
+	alx->num_napi = num_vec - 1;
+	alx->num_txq = num_txq;
+	alx->num_rxq = num_rxq;
+
 	return true;
 }
 
@@ -771,21 +808,35 @@ static int alx_request_msix(struct alx_priv *alx)
 {
 	struct net_device *netdev = alx->dev;
 	int i, err, vector = 0, free_vector = 0;
-	struct alx_napi *np = alx->qnapi[0];
 
 	err = request_irq(alx->msix_entries[0].vector, alx_intr_msix_misc,
 			  0, netdev->name, alx);
 	if (err)
 		goto out_err;
 
-	vector++;
-	sprintf(np->irq_lbl, "%s-TxRx-0", netdev->name);
-
-	err = request_irq(alx->msix_entries[vector].vector,
-			  alx_intr_msix_ring, 0, np->irq_lbl, np);
+	for (i = 0; i < alx->num_napi; i++) {
+		struct alx_napi *np = alx->qnapi[i];
+
+		vector++;
+
+		if (np->txq && np->rxq)
+			sprintf(np->irq_lbl, "%s-TxRx-%u", netdev->name,
+				np->txq->queue_idx);
+		else if (np->txq)
+			sprintf(np->irq_lbl, "%s-tx-%u", netdev->name,
+				np->txq->queue_idx);
+		else if (np->rxq)
+			sprintf(np->irq_lbl, "%s-rx-%u", netdev->name,
+				np->rxq->queue_idx);
+		else
+			sprintf(np->irq_lbl, "%s-unused", netdev->name);
+
+		np->vec_idx = vector;
+		err = request_irq(alx->msix_entries[vector].vector,
+				  alx_intr_msix_ring, 0, np->irq_lbl, np);
 		if (err)
 			goto out_free;
-
+	}
 	return 0;
 
 out_free:
@@ -793,7 +844,8 @@ static int alx_request_msix(struct alx_priv *alx)
 
 	vector--;
 	for (i = 0; i < vector; i++)
-		free_irq(alx->msix_entries[free_vector++].vector, alx->qnapi[0]);
+		free_irq(alx->msix_entries[free_vector++].vector,
+			 alx->qnapi[i]);
 
 out_err:
 	return err;
@@ -808,6 +860,9 @@ static void alx_init_intr(struct alx_priv *alx, bool msix)
 
 	if (!(alx->flags & ALX_FLAG_USING_MSIX)) {
 		alx->num_vec = 1;
+		alx->num_napi = 1;
+		alx->num_txq = 1;
+		alx->num_rxq = 1;
 
 		if (!pci_enable_msi(alx->hw.pdev))
 			alx->flags |= ALX_FLAG_USING_MSI;
@@ -863,6 +918,25 @@ static void alx_irq_disable(struct alx_priv *alx)
 	}
 }
 
+static int alx_realloc_resources(struct alx_priv *alx)
+{
+	int err;
+
+	alx_free_rings(alx);
+	alx_free_napis(alx);
+	alx_disable_advanced_intr(alx);
+
+	err = alx_alloc_napis(alx);
+	if (err)
+		return err;
+
+	err = alx_alloc_rings(alx);
+	if (err)
+		return err;
+
+	return 0;
+}
+
 static int alx_request_irq(struct alx_priv *alx)
 {
 	struct pci_dev *pdev = alx->hw.pdev;
@@ -879,8 +953,9 @@ static int alx_request_irq(struct alx_priv *alx)
 			goto out;
 
 		/* msix request failed, realloc resources */
-		alx_disable_advanced_intr(alx);
-		alx_init_intr(alx, false);
+		err = alx_realloc_resources(alx);
+		if (err)
+			goto out;
 	}
 
 	if (alx->flags & ALX_FLAG_USING_MSI) {
@@ -912,9 +987,10 @@ static void alx_free_irq(struct alx_priv *alx)
 	int i, vector = 0;
 
 	if (alx->flags & ALX_FLAG_USING_MSIX) {
-		/* we have only 2 vectors without multi queue support */
 		free_irq(alx->msix_entries[vector++].vector, alx);
-		free_irq(alx->msix_entries[vector++].vector, alx->qnapi[0]);
+		for (i = 0; i < alx->num_napi; i++)
+			free_irq(alx->msix_entries[vector++].vector,
+				 alx->qnapi[i]);
 	} else {
 		free_irq(pdev->irq, alx);
 	}
@@ -1478,10 +1554,12 @@ static int alx_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
 static void alx_poll_controller(struct net_device *netdev)
 {
 	struct alx_priv *alx = netdev_priv(netdev);
+	int i;
 
 	if (alx->flags & ALX_FLAG_USING_MSIX) {
 		alx_intr_msix_misc(0, alx);
-		alx_intr_msix_ring(0, alx->qnapi[0]);
+		for (i = 0; i < alx->num_txq; i++)
+			alx_intr_msix_ring(0, alx->qnapi[i]);
 	} else if (alx->flags & ALX_FLAG_USING_MSI)
 		alx_intr_msi(0, alx);
 	else
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH net-next 6/9] alx: prepare resource allocation for multi queue support
  2016-10-21 10:49 [PATCH net-next 0/9] alx: add multi queue support Tobias Regnery
                   ` (4 preceding siblings ...)
  2016-10-21 10:49 ` [PATCH net-next 5/9] alx: prepare interrupt functions for multiple queues Tobias Regnery
@ 2016-10-21 10:49 ` Tobias Regnery
  2016-10-21 10:49 ` [PATCH net-next 7/9] alx: prepare tx path " Tobias Regnery
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Tobias Regnery @ 2016-10-21 10:49 UTC (permalink / raw)
  To: jcliburn, chris.snook, netdev; +Cc: davem, Tobias Regnery

Allocate, initialise and free alx_tx_queue structs based on the number of
alx_napi structures. Also increase the size of the descriptor memory based
on the number of tx queues in use.

Based on the downstream driver at github.com/qca/alx

Signed-off-by: Tobias Regnery <tobias.regnery@gmail.com>
---
 drivers/net/ethernet/atheros/alx/main.c | 149 +++++++++++++++++++++-----------
 1 file changed, 97 insertions(+), 52 deletions(-)

diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index 624419968df5..19812bdf3e53 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -429,28 +429,45 @@ static irqreturn_t alx_intr_legacy(int irq, void *data)
 	return alx_intr_handle(alx, intr);
 }
 
+static const u16 txring_header_reg[] = {ALX_TPD_PRI0_ADDR_LO,
+					ALX_TPD_PRI1_ADDR_LO,
+					ALX_TPD_PRI2_ADDR_LO,
+					ALX_TPD_PRI3_ADDR_LO};
+
 static void alx_init_ring_ptrs(struct alx_priv *alx)
 {
 	struct alx_hw *hw = &alx->hw;
 	u32 addr_hi = ((u64)alx->descmem.dma) >> 32;
-	struct alx_napi *np = alx->qnapi[0];
+	struct alx_napi *np;
+	int i;
+
+	for (i = 0; i < alx->num_napi; i++) {
+		np = alx->qnapi[i];
+		if (np->txq) {
+			np->txq->read_idx = 0;
+			np->txq->write_idx = 0;
+			alx_write_mem32(hw,
+					txring_header_reg[np->txq->queue_idx],
+					np->txq->tpd_dma);
+		}
+
+		if (np->rxq) {
+			np->rxq->read_idx = 0;
+			np->rxq->write_idx = 0;
+			np->rxq->rrd_read_idx = 0;
+			alx_write_mem32(hw, ALX_RRD_ADDR_LO, np->rxq->rrd_dma);
+			alx_write_mem32(hw, ALX_RFD_ADDR_LO, np->rxq->rfd_dma);
+		}
+	}
+
+	alx_write_mem32(hw, ALX_TX_BASE_ADDR_HI, addr_hi);
+	alx_write_mem32(hw, ALX_TPD_RING_SZ, alx->tx_ringsz);
 
-	np->rxq->read_idx = 0;
-	np->rxq->write_idx = 0;
-	np->rxq->rrd_read_idx = 0;
 	alx_write_mem32(hw, ALX_RX_BASE_ADDR_HI, addr_hi);
-	alx_write_mem32(hw, ALX_RRD_ADDR_LO, np->rxq->rrd_dma);
 	alx_write_mem32(hw, ALX_RRD_RING_SZ, alx->rx_ringsz);
-	alx_write_mem32(hw, ALX_RFD_ADDR_LO, np->rxq->rfd_dma);
 	alx_write_mem32(hw, ALX_RFD_RING_SZ, alx->rx_ringsz);
 	alx_write_mem32(hw, ALX_RFD_BUF_SZ, alx->rxbuf_size);
 
-	np->txq->read_idx = 0;
-	np->txq->write_idx = 0;
-	alx_write_mem32(hw, ALX_TX_BASE_ADDR_HI, addr_hi);
-	alx_write_mem32(hw, ALX_TPD_PRI0_ADDR_LO, np->txq->tpd_dma);
-	alx_write_mem32(hw, ALX_TPD_RING_SZ, alx->tx_ringsz);
-
 	/* load these pointers into the chip */
 	alx_write_mem32(hw, ALX_SRAM9, ALX_SRAM_LOAD_PTR);
 }
@@ -478,7 +495,7 @@ static void alx_free_rxring_buf(struct alx_rx_queue *rxq)
 	struct alx_buffer *cur_buf;
 	u16 i;
 
-	if (rxq == NULL)
+	if (!rxq->bufs)
 		return;
 
 	for (i = 0; i < rxq->count; i++) {
@@ -502,8 +519,14 @@ static void alx_free_rxring_buf(struct alx_rx_queue *rxq)
 
 static void alx_free_buffers(struct alx_priv *alx)
 {
-	alx_free_txring_buf(alx->qnapi[0]->txq);
-	alx_free_rxring_buf(alx->qnapi[0]->rxq);
+	int i;
+
+	for (i = 0; i < alx->num_txq; i++)
+		if (alx->qnapi[i] && alx->qnapi[i]->txq)
+			alx_free_txring_buf(alx->qnapi[i]->txq);
+
+	if (alx->qnapi[0] && alx->qnapi[0]->rxq)
+		alx_free_rxring_buf(alx->qnapi[0]->rxq);
 }
 
 static int alx_reinit_rings(struct alx_priv *alx)
@@ -611,7 +634,7 @@ static int alx_alloc_rx_ring(struct alx_priv *alx, struct alx_rx_queue *rxq,
 
 static int alx_alloc_rings(struct alx_priv *alx)
 {
-	int offset = 0;
+	int i, offset = 0;
 
 	/* physical tx/rx ring descriptors
 	 *
@@ -619,7 +642,8 @@ static int alx_alloc_rings(struct alx_priv *alx)
 	 * 4G boundary (hardware has a single register for high 32 bits
 	 * of addresses only)
 	 */
-	alx->descmem.size = sizeof(struct alx_txd) * alx->tx_ringsz +
+	alx->descmem.size = sizeof(struct alx_txd) * alx->tx_ringsz *
+			    alx->num_txq +
 			    sizeof(struct alx_rrd) * alx->rx_ringsz +
 			    sizeof(struct alx_rfd) * alx->rx_ringsz;
 	alx->descmem.virt = dma_zalloc_coherent(&alx->hw.pdev->dev,
@@ -633,10 +657,12 @@ static int alx_alloc_rings(struct alx_priv *alx)
 	BUILD_BUG_ON(sizeof(struct alx_txd) % 8);
 	BUILD_BUG_ON(sizeof(struct alx_rrd) % 8);
 
-	offset = alx_alloc_tx_ring(alx, alx->qnapi[0]->txq, offset);
-	if (offset < 0) {
-		netdev_err(alx->dev, "Allocation of tx buffer failed!\n");
-		return -ENOMEM;
+	for (i = 0; i < alx->num_txq; i++) {
+		offset = alx_alloc_tx_ring(alx, alx->qnapi[i]->txq, offset);
+		if (offset < 0) {
+			netdev_err(alx->dev, "Allocation of tx buffer failed!\n");
+			return -ENOMEM;
+		}
 	}
 
 	offset = alx_alloc_rx_ring(alx, alx->qnapi[0]->rxq, offset);
@@ -652,11 +678,16 @@ static int alx_alloc_rings(struct alx_priv *alx)
 
 static void alx_free_rings(struct alx_priv *alx)
 {
+	int i;
 
 	alx_free_buffers(alx);
 
-	kfree(alx->qnapi[0]->txq->bufs);
-	kfree(alx->qnapi[0]->rxq->bufs);
+	for (i = 0; i < alx->num_txq; i++)
+		if (alx->qnapi[i] && alx->qnapi[i]->txq)
+			kfree(alx->qnapi[i]->txq->bufs);
+
+	if (alx->qnapi[0] && alx->qnapi[0]->rxq)
+		kfree(alx->qnapi[0]->rxq->bufs);
 
 	if (!alx->descmem.virt)
 		dma_free_coherent(&alx->hw.pdev->dev,
@@ -668,16 +699,19 @@ static void alx_free_rings(struct alx_priv *alx)
 static void alx_free_napis(struct alx_priv *alx)
 {
 	struct alx_napi *np;
+	int i;
 
-	np = alx->qnapi[0];
-	if (!np)
-		return;
-
-	netif_napi_del(&np->napi);
-	kfree(np->txq);
-	kfree(np->rxq);
-	kfree(np);
-	alx->qnapi[0] = NULL;
+	for (i = 0; i < alx->num_napi; i++) {
+		np = alx->qnapi[i];
+		if (!np)
+			continue;
+
+		netif_napi_del(&np->napi);
+		kfree(np->txq);
+		kfree(np->rxq);
+		kfree(np);
+		alx->qnapi[i] = NULL;
+	}
 }
 
 static const u32 tx_vect_mask[] = {ALX_ISR_TX_Q0, ALX_ISR_TX_Q1,
@@ -692,31 +726,36 @@ static int alx_alloc_napis(struct alx_priv *alx)
 	struct alx_napi *np;
 	struct alx_rx_queue *rxq;
 	struct alx_tx_queue *txq;
+	int i;
 
 	alx->int_mask &= ~ALX_ISR_ALL_QUEUES;
 
 	/* allocate alx_napi structures */
-	np = kzalloc(sizeof(struct alx_napi), GFP_KERNEL);
-	if (!np)
-		goto err_out;
+	for (i = 0; i < alx->num_napi; i++) {
+		np = kzalloc(sizeof(struct alx_napi), GFP_KERNEL);
+		if (!np)
+			goto err_out;
 
-	np->alx = alx;
-	netif_napi_add(alx->dev, &np->napi, alx_poll, 64);
-	alx->qnapi[0] = np;
+		np->alx = alx;
+		netif_napi_add(alx->dev, &np->napi, alx_poll, 64);
+		alx->qnapi[i] = np;
+	}
 
 	/* allocate tx queues */
-	np = alx->qnapi[0];
-	txq = kzalloc(sizeof(*txq), GFP_KERNEL);
-	if (!txq)
-		goto err_out;
-
-	np->txq = txq;
-	txq->queue_idx = 0;
-	txq->count = alx->tx_ringsz;
-	txq->netdev = alx->dev;
-	txq->dev = &alx->hw.pdev->dev;
-	np->vec_mask |= tx_vect_mask[0];
-	alx->int_mask |= tx_vect_mask[0];
+	for (i = 0; i < alx->num_txq; i++) {
+		np = alx->qnapi[i];
+		txq = kzalloc(sizeof(*txq), GFP_KERNEL);
+		if (!txq)
+			goto err_out;
+
+		np->txq = txq;
+		txq->queue_idx = i;
+		txq->count = alx->tx_ringsz;
+		txq->netdev = alx->dev;
+		txq->dev = &alx->hw.pdev->dev;
+		np->vec_mask |= tx_vect_mask[i];
+		alx->int_mask |= tx_vect_mask[i];
+	}
 
 	/* allocate rx queues */
 	np = alx->qnapi[0];
@@ -1075,11 +1114,14 @@ static netdev_features_t alx_fix_features(struct net_device *netdev,
 
 static void alx_netif_stop(struct alx_priv *alx)
 {
+	int i;
+
 	netif_trans_update(alx->dev);
 	if (netif_carrier_ok(alx->dev)) {
 		netif_carrier_off(alx->dev);
 		netif_tx_disable(alx->dev);
-		napi_disable(&alx->qnapi[0]->napi);
+		for (i = 0; i < alx->num_napi; i++)
+			napi_disable(&alx->qnapi[i]->napi);
 	}
 }
 
@@ -1148,8 +1190,11 @@ static int alx_change_mtu(struct net_device *netdev, int mtu)
 
 static void alx_netif_start(struct alx_priv *alx)
 {
+	int i;
+
 	netif_tx_wake_all_queues(alx->dev);
-	napi_enable(&alx->qnapi[0]->napi);
+	for (i = 0; i < alx->num_napi; i++)
+		napi_enable(&alx->qnapi[i]->napi);
 	netif_carrier_on(alx->dev);
 }
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH net-next 7/9] alx: prepare tx path for multi queue support
  2016-10-21 10:49 [PATCH net-next 0/9] alx: add multi queue support Tobias Regnery
                   ` (5 preceding siblings ...)
  2016-10-21 10:49 ` [PATCH net-next 6/9] alx: prepare resource allocation for multi queue support Tobias Regnery
@ 2016-10-21 10:49 ` Tobias Regnery
  2016-10-21 10:49 ` [PATCH net-next 8/9] alx: enable msi-x interrupts by default Tobias Regnery
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 12+ messages in thread
From: Tobias Regnery @ 2016-10-21 10:49 UTC (permalink / raw)
  To: jcliburn, chris.snook, netdev; +Cc: davem, Tobias Regnery

This patch prepares the tx path to send data on multiple tx queues. It
introduces per queue register adresses and uses them in the alx_tx_queue
structs.

There are new helper functions for the queue mapping in the tx path.

Based on the downstream driver at github.com/qca/alx

Signed-off-by: Tobias Regnery <tobias.regnery@gmail.com>
---
 drivers/net/ethernet/atheros/alx/main.c | 58 +++++++++++++++++++++++++--------
 1 file changed, 45 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index 19812bdf3e53..39f6247cbbd0 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -143,6 +143,22 @@ static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp)
 	return count;
 }
 
+static struct alx_tx_queue *alx_tx_queue_mapping(struct alx_priv *alx,
+						 struct sk_buff *skb)
+{
+	unsigned int r_idx = skb->queue_mapping;
+
+	if (r_idx >= alx->num_txq)
+		r_idx = r_idx % alx->num_txq;
+
+	return alx->qnapi[r_idx]->txq;
+}
+
+static struct netdev_queue *alx_get_tx_queue(const struct alx_tx_queue *txq)
+{
+	return netdev_get_tx_queue(txq->netdev, txq->queue_idx);
+}
+
 static inline int alx_tpd_avail(struct alx_tx_queue *txq)
 {
 	if (txq->write_idx >= txq->read_idx)
@@ -153,14 +169,16 @@ static inline int alx_tpd_avail(struct alx_tx_queue *txq)
 static bool alx_clean_tx_irq(struct alx_tx_queue *txq)
 {
 	struct alx_priv *alx;
+	struct netdev_queue *tx_queue;
 	u16 hw_read_idx, sw_read_idx;
 	unsigned int total_bytes = 0, total_packets = 0;
 	int budget = ALX_DEFAULT_TX_WORK;
 
 	alx = netdev_priv(txq->netdev);
+	tx_queue = alx_get_tx_queue(txq);
 
 	sw_read_idx = txq->read_idx;
-	hw_read_idx = alx_read_mem16(&alx->hw, ALX_TPD_PRI0_CIDX);
+	hw_read_idx = alx_read_mem16(&alx->hw, txq->c_reg);
 
 	if (sw_read_idx != hw_read_idx) {
 		while (sw_read_idx != hw_read_idx && budget > 0) {
@@ -180,12 +198,12 @@ static bool alx_clean_tx_irq(struct alx_tx_queue *txq)
 		}
 		txq->read_idx = sw_read_idx;
 
-		netdev_completed_queue(txq->netdev, total_packets, total_bytes);
+		netdev_tx_completed_queue(tx_queue, total_packets, total_bytes);
 	}
 
-	if (netif_queue_stopped(txq->netdev) && netif_carrier_ok(txq->netdev) &&
+	if (netif_tx_queue_stopped(tx_queue) && netif_carrier_ok(alx->dev) &&
 	    alx_tpd_avail(txq) > txq->count / 4)
-		netif_wake_queue(txq->netdev);
+		netif_tx_wake_queue(tx_queue);
 
 	return sw_read_idx == hw_read_idx;
 }
@@ -487,7 +505,7 @@ static void alx_free_txring_buf(struct alx_tx_queue *txq)
 	txq->write_idx = 0;
 	txq->read_idx = 0;
 
-	netdev_reset_queue(txq->netdev);
+	netdev_tx_reset_queue(alx_get_tx_queue(txq));
 }
 
 static void alx_free_rxring_buf(struct alx_rx_queue *rxq)
@@ -714,6 +732,10 @@ static void alx_free_napis(struct alx_priv *alx)
 	}
 }
 
+static const u16 tx_pidx_reg[] = {ALX_TPD_PRI0_PIDX, ALX_TPD_PRI1_PIDX,
+				  ALX_TPD_PRI2_PIDX, ALX_TPD_PRI3_PIDX};
+static const u16 tx_cidx_reg[] = {ALX_TPD_PRI0_CIDX, ALX_TPD_PRI1_CIDX,
+				  ALX_TPD_PRI2_CIDX, ALX_TPD_PRI3_CIDX};
 static const u32 tx_vect_mask[] = {ALX_ISR_TX_Q0, ALX_ISR_TX_Q1,
 				   ALX_ISR_TX_Q2, ALX_ISR_TX_Q3};
 static const u32 rx_vect_mask[] = {ALX_ISR_RX_Q0, ALX_ISR_RX_Q1,
@@ -749,6 +771,8 @@ static int alx_alloc_napis(struct alx_priv *alx)
 			goto err_out;
 
 		np->txq = txq;
+		txq->p_reg = tx_pidx_reg[i];
+		txq->c_reg = tx_cidx_reg[i];
 		txq->queue_idx = i;
 		txq->count = alx->tx_ringsz;
 		txq->netdev = alx->dev;
@@ -1501,16 +1525,17 @@ static int alx_map_tx_skb(struct alx_tx_queue *txq, struct sk_buff *skb)
 	return -ENOMEM;
 }
 
-static netdev_tx_t alx_start_xmit(struct sk_buff *skb,
-				  struct net_device *netdev)
+static netdev_tx_t alx_start_xmit_ring(struct sk_buff *skb,
+				       struct alx_tx_queue *txq)
 {
-	struct alx_priv *alx = netdev_priv(netdev);
-	struct alx_tx_queue *txq = alx->qnapi[0]->txq;
+	struct alx_priv *alx;
 	struct alx_txd *first;
 	int tso;
 
+	alx = netdev_priv(txq->netdev);
+
 	if (alx_tpd_avail(txq) < alx_tpd_req(skb)) {
-		netif_stop_queue(txq->netdev);
+		netif_tx_stop_queue(alx_get_tx_queue(txq));
 		goto drop;
 	}
 
@@ -1526,14 +1551,14 @@ static netdev_tx_t alx_start_xmit(struct sk_buff *skb,
 	if (alx_map_tx_skb(txq, skb) < 0)
 		goto drop;
 
-	netdev_sent_queue(txq->netdev, skb->len);
+	netdev_tx_sent_queue(alx_get_tx_queue(txq), skb->len);
 
 	/* flush updates before updating hardware */
 	wmb();
-	alx_write_mem16(&alx->hw, ALX_TPD_PRI0_PIDX, txq->write_idx);
+	alx_write_mem16(&alx->hw, txq->p_reg, txq->write_idx);
 
 	if (alx_tpd_avail(txq) < txq->count / 8)
-		netif_stop_queue(txq->netdev);
+		netif_tx_stop_queue(alx_get_tx_queue(txq));
 
 	return NETDEV_TX_OK;
 
@@ -1542,6 +1567,13 @@ static netdev_tx_t alx_start_xmit(struct sk_buff *skb,
 	return NETDEV_TX_OK;
 }
 
+static netdev_tx_t alx_start_xmit(struct sk_buff *skb,
+				  struct net_device *netdev)
+{
+	struct alx_priv *alx = netdev_priv(netdev);
+	return alx_start_xmit_ring(skb, alx_tx_queue_mapping(alx, skb));
+}
+
 static void alx_tx_timeout(struct net_device *dev)
 {
 	struct alx_priv *alx = netdev_priv(dev);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH net-next 8/9] alx: enable msi-x interrupts by default
  2016-10-21 10:49 [PATCH net-next 0/9] alx: add multi queue support Tobias Regnery
                   ` (6 preceding siblings ...)
  2016-10-21 10:49 ` [PATCH net-next 7/9] alx: prepare tx path " Tobias Regnery
@ 2016-10-21 10:49 ` Tobias Regnery
  2016-10-21 10:49 ` [PATCH net-next 9/9] alx: enable multiple tx queues Tobias Regnery
       [not found] ` <CAMXMK6uPw7N+sve+gePm8ToDaoH7dciWq7ixZQOxpQZ5jr=yOg@mail.gmail.com>
  9 siblings, 0 replies; 12+ messages in thread
From: Tobias Regnery @ 2016-10-21 10:49 UTC (permalink / raw)
  To: jcliburn, chris.snook, netdev; +Cc: davem, Tobias Regnery

Remove the module parameter to enable msi-x support and enable msi-x
interrupts unconditionally by default. This is a preparatory step to enable
multi queue support by default, because this is only working with msi-x
interrupts.

Signed-off-by: Tobias Regnery <tobias.regnery@gmail.com>
---
 drivers/net/ethernet/atheros/alx/main.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index 39f6247cbbd0..3b832090ad2e 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -51,10 +51,6 @@
 
 const char alx_drv_name[] = "alx";
 
-static bool msix = false;
-module_param(msix, bool, 0);
-MODULE_PARM_DESC(msix, "Enable msi-x interrupt support");
-
 static void alx_free_txbuf(struct alx_tx_queue *txq, int entry)
 {
 	struct alx_buffer *txb = &txq->bufs[entry];
@@ -1226,7 +1222,7 @@ static int __alx_open(struct alx_priv *alx, bool resume)
 {
 	int err;
 
-	alx_init_intr(alx, msix);
+	alx_init_intr(alx, true);
 
 	if (!resume)
 		netif_carrier_off(alx->dev);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH net-next 9/9] alx: enable multiple tx queues
  2016-10-21 10:49 [PATCH net-next 0/9] alx: add multi queue support Tobias Regnery
                   ` (7 preceding siblings ...)
  2016-10-21 10:49 ` [PATCH net-next 8/9] alx: enable msi-x interrupts by default Tobias Regnery
@ 2016-10-21 10:49 ` Tobias Regnery
       [not found] ` <CAMXMK6uPw7N+sve+gePm8ToDaoH7dciWq7ixZQOxpQZ5jr=yOg@mail.gmail.com>
  9 siblings, 0 replies; 12+ messages in thread
From: Tobias Regnery @ 2016-10-21 10:49 UTC (permalink / raw)
  To: jcliburn, chris.snook, netdev; +Cc: davem, Tobias Regnery

Enable multiple tx queues by default based on the number of online cpus. The
hardware supports up to four tx queues.

Based on the downstream driver at github.com/qca/alx

Signed-off-by: Tobias Regnery <tobias.regnery@gmail.com>
---
 drivers/net/ethernet/atheros/alx/main.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c
index 3b832090ad2e..df5bfa4480d7 100644
--- a/drivers/net/ethernet/atheros/alx/main.c
+++ b/drivers/net/ethernet/atheros/alx/main.c
@@ -834,7 +834,7 @@ static bool alx_enable_msix(struct alx_priv *alx)
 {
 	int i, err, num_vec, num_txq, num_rxq;
 
-	num_txq = 1;
+	num_txq = min_t(int, num_online_cpus(), ALX_MAX_TX_QUEUES);
 	num_rxq = 1;
 	num_vec = max_t(int, num_txq, num_rxq) + 1;
 
@@ -1241,6 +1241,9 @@ static int __alx_open(struct alx_priv *alx, bool resume)
 	if (err)
 		goto out_free_rings;
 
+	netif_set_real_num_tx_queues(alx->dev, alx->num_txq);
+	netif_set_real_num_rx_queues(alx->dev, alx->num_rxq);
+
 	/* clear old interrupts */
 	alx_write_mem32(&alx->hw, ALX_ISR, ~(u32)ALX_ISR_DIS);
 
@@ -1749,7 +1752,8 @@ static int alx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto out_pci_release;
 	}
 
-	netdev = alloc_etherdev(sizeof(*alx));
+	netdev = alloc_etherdev_mqs(sizeof(*alx),
+				    ALX_MAX_TX_QUEUES, 1);
 	if (!netdev) {
 		err = -ENOMEM;
 		goto out_pci_release;
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH net-next 1/9] alx: refactor descriptor allocation
  2016-10-21 10:49 ` [PATCH net-next 1/9] alx: refactor descriptor allocation Tobias Regnery
@ 2016-10-23 15:50   ` David Miller
  0 siblings, 0 replies; 12+ messages in thread
From: David Miller @ 2016-10-23 15:50 UTC (permalink / raw)
  To: tobias.regnery; +Cc: jcliburn, chris.snook, netdev

From: Tobias Regnery <tobias.regnery@gmail.com>
Date: Fri, 21 Oct 2016 12:49:44 +0200

> +	txq->tpd = alx->descmem.virt + offset;
> +	txq->tpd_dma = alx->descmem.dma + offset;

If all the crazy casting isn't necessary here...

> +	rxq->rrd = (void *)((u8 *)alx->descmem.virt + offset);
> +	rxq->rrd_dma = alx->descmem.dma + offset;
> +	offset += sizeof(struct alx_rrd) * alx->rx_ringsz;
> +
> +	rxq->rfd = (void *)((u8 *)alx->descmem.virt + offset);
> +	rxq->rfd_dma = alx->descmem.dma + offset;
> +	offset += sizeof(struct alx_rfd) * alx->rx_ringsz;

Then it certainly isn't necessary here either.

Void pointer arithmatic is very clearly defined as operating on byte
quantities, so the cast is not necessary for the arithmetic nor the
final pointer type.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH net-next 0/9] alx: add multi queue support
       [not found] ` <CAMXMK6uPw7N+sve+gePm8ToDaoH7dciWq7ixZQOxpQZ5jr=yOg@mail.gmail.com>
@ 2016-10-24 12:54   ` Tobias Regnery
  0 siblings, 0 replies; 12+ messages in thread
From: Tobias Regnery @ 2016-10-24 12:54 UTC (permalink / raw)
  To: Chris Snook, jcliburn, netdev; +Cc: davem

I tested this patchset with my AR8161 ethernet card in different situations:

  - After two weeks of daily use I observed no regression with this patchset.
  - I manually tested the new error paths in the __alx-open function and in
    the other newly added device bringup functions.

  - iperf udp and tcp throughput are exactly the same with and without this
    patchset, regardless of the number of parallel streams.
  - netperf TCP_RR and UDP_RR tests shows a slight performance increase of
    about 1-2% with this patchset.

I don't own any of the other supported cards by the driver, so if someone is
willing to test these patches on one of the other cards, this is highly
appreciated.

Benefits are the split between misc interrupts and the tx / rx interrupts
with the new msi-x support and better multi core cpu utilization.

Sorry for not providing these information in the patchset, I will add these
in the next revision.

--
Tobias

On 21.10.16, Chris Snook wrote:
> Can you please elaborate on the testing and benefits?
> 
> - Chris
> 
> On Fri, Oct 21, 2016 at 3:50 AM Tobias Regnery <tobias.regnery@gmail.com>
> wrote:
> 
> > This patchset lays the groundwork for multi queue support in the alx driver
> > and enables multi queue support for the tx path by default. The hardware
> > supports up to 4 tx queues.
> >
> > The rx path is a little bit harder because apparently (based on the limited
> > information from the downstream driver) the hardware supports up to 8 rss
> > queues but only has one hardware descriptor ring on the rx side. So the rx
> > path will be part of another patchset.
> >
> > This work is based on the downstream driver at github.com/qca/alx
> >
> > I had a hard time splitting these changes up into reasonable parts because
> > this is my first bigger kernel patchset, so please be patient if this is
> > not
> > the right approach.
> >
> > Tobias Regnery (9):
> >   alx: refactor descriptor allocation
> >   alx: extend data structures for multi queue support
> >   alx: add ability to allocate and free alx_napi structures
> >   alx: switch to per queue data structures
> >   alx: prepare interrupt functions for multiple queues
> >   alx: prepare resource allocation for multi queue support
> >   alx: prepare tx path for multi queue support
> >   alx: enable msi-x interrupts by default
> >   alx: enable multiple tx queues
> >
> >  drivers/net/ethernet/atheros/alx/alx.h  |  36 ++-
> >  drivers/net/ethernet/atheros/alx/main.c | 554
> > ++++++++++++++++++++++----------
> >  2 files changed, 420 insertions(+), 170 deletions(-)
> >
> > --
> > 2.7.4
> >
> >

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2016-10-24 12:54 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-10-21 10:49 [PATCH net-next 0/9] alx: add multi queue support Tobias Regnery
2016-10-21 10:49 ` [PATCH net-next 1/9] alx: refactor descriptor allocation Tobias Regnery
2016-10-23 15:50   ` David Miller
2016-10-21 10:49 ` [PATCH net-next 2/9] alx: extend data structures for multi queue support Tobias Regnery
2016-10-21 10:49 ` [PATCH net-next 3/9] alx: add ability to allocate and free alx_napi structures Tobias Regnery
2016-10-21 10:49 ` [PATCH net-next 4/9] alx: switch to per queue data structures Tobias Regnery
2016-10-21 10:49 ` [PATCH net-next 5/9] alx: prepare interrupt functions for multiple queues Tobias Regnery
2016-10-21 10:49 ` [PATCH net-next 6/9] alx: prepare resource allocation for multi queue support Tobias Regnery
2016-10-21 10:49 ` [PATCH net-next 7/9] alx: prepare tx path " Tobias Regnery
2016-10-21 10:49 ` [PATCH net-next 8/9] alx: enable msi-x interrupts by default Tobias Regnery
2016-10-21 10:49 ` [PATCH net-next 9/9] alx: enable multiple tx queues Tobias Regnery
     [not found] ` <CAMXMK6uPw7N+sve+gePm8ToDaoH7dciWq7ixZQOxpQZ5jr=yOg@mail.gmail.com>
2016-10-24 12:54   ` [PATCH net-next 0/9] alx: add multi queue support Tobias Regnery

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.