[PATCH net-next v2 00/10] Wangxun interrupt and RxTx support

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH net-next v2 00/10] Wangxun interrupt and RxTx support
@ 2023-01-31 10:05 Mengyuan Lou
  2023-01-31 10:05 ` [PATCH net-next v2 01/10] net: libwx: Add irq flow functions Mengyuan Lou
                   ` (9 more replies)
  0 siblings, 10 replies; 15+ messages in thread
From: Mengyuan Lou @ 2023-01-31 10:05 UTC (permalink / raw)
  To: netdev; +Cc: jiawenwu, Mengyuan Lou

Configure interrupt, setup RxTx ring, support to receive and transmit
packets.

changes v2:
- Andrew Lunn: https://lore.kernel.org/netdev/Y86kDphvyHj21IxK@lunn.ch/
- Add a judgment when allocate dma for descriptor.

Jiawen Wu (5):
  net: txgbe: Add interrupt support
  net: libwx: Configure Rx and Tx unit on hardware
  net: libwx: Allocate Rx and Tx resources
  net: txgbe: Setup Rx and Tx ring
  net: txgbe: Support Rx and Tx process path

Mengyuan Lou (5):
  net: libwx: Add irq flow functions
  net: ngbe: Add irqs request flow
  net: libwx: Support to receive packets in NAPI
  net: libwx: Add tx path to process packets
  net: ngbe: Support Rx and Tx process path

 drivers/net/ethernet/wangxun/Kconfig          |    1 +
 drivers/net/ethernet/wangxun/libwx/Makefile   |    2 +-
 drivers/net/ethernet/wangxun/libwx/wx_hw.c    |  675 +++++-
 drivers/net/ethernet/wangxun/libwx/wx_hw.h    |    5 +
 drivers/net/ethernet/wangxun/libwx/wx_lib.c   | 2009 +++++++++++++++++
 drivers/net/ethernet/wangxun/libwx/wx_lib.h   |   32 +
 drivers/net/ethernet/wangxun/libwx/wx_type.h  |  315 +++
 drivers/net/ethernet/wangxun/ngbe/ngbe_main.c |  249 +-
 drivers/net/ethernet/wangxun/ngbe/ngbe_type.h |   18 +
 .../net/ethernet/wangxun/txgbe/txgbe_main.c   |  271 ++-
 .../net/ethernet/wangxun/txgbe/txgbe_type.h   |   21 +
 11 files changed, 3580 insertions(+), 18 deletions(-)
 create mode 100644 drivers/net/ethernet/wangxun/libwx/wx_lib.c
 create mode 100644 drivers/net/ethernet/wangxun/libwx/wx_lib.h

-- 
2.39.1


^ permalink raw reply	[flat|nested] 15+ messages in thread

* [PATCH net-next v2 01/10] net: libwx: Add irq flow functions
  2023-01-31 10:05 [PATCH net-next v2 00/10] Wangxun interrupt and RxTx support Mengyuan Lou
@ 2023-01-31 10:05 ` Mengyuan Lou
  2023-02-02  5:39   ` Jakub Kicinski
  2023-02-02  5:45   ` Jakub Kicinski
  2023-01-31 10:05 ` [PATCH net-next v2 02/10] net: ngbe: Add irqs request flow Mengyuan Lou
                   ` (8 subsequent siblings)
  9 siblings, 2 replies; 15+ messages in thread
From: Mengyuan Lou @ 2023-01-31 10:05 UTC (permalink / raw)
  To: netdev; +Cc: jiawenwu, Mengyuan Lou

Add irq flow functions for ngbe and txgbe.
Alloc pcie msix irqs for drivers, otherwise fall back to msi/legacy.

Signed-off-by: Mengyuan Lou <mengyuanlou@net-swift.com>
---
 drivers/net/ethernet/wangxun/libwx/Makefile  |   2 +-
 drivers/net/ethernet/wangxun/libwx/wx_hw.c   |  55 +-
 drivers/net/ethernet/wangxun/libwx/wx_hw.h   |   3 +
 drivers/net/ethernet/wangxun/libwx/wx_lib.c  | 613 +++++++++++++++++++
 drivers/net/ethernet/wangxun/libwx/wx_lib.h  |  20 +
 drivers/net/ethernet/wangxun/libwx/wx_type.h | 102 +++
 6 files changed, 793 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/ethernet/wangxun/libwx/wx_lib.c
 create mode 100644 drivers/net/ethernet/wangxun/libwx/wx_lib.h

diff --git a/drivers/net/ethernet/wangxun/libwx/Makefile b/drivers/net/ethernet/wangxun/libwx/Makefile
index 1ed5e23af944..850d1615cd18 100644
--- a/drivers/net/ethernet/wangxun/libwx/Makefile
+++ b/drivers/net/ethernet/wangxun/libwx/Makefile
@@ -4,4 +4,4 @@
 
 obj-$(CONFIG_LIBWX) += libwx.o
 
-libwx-objs := wx_hw.o
+libwx-objs := wx_hw.o wx_lib.o
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
index 3d7ba0c0df38..a72b02c72189 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
@@ -14,7 +14,7 @@ static void wx_intr_disable(struct wx *wx, u64 qmask)
 {
 	u32 mask;
 
-	mask = (qmask & 0xFFFFFFFF);
+	mask = (qmask & U32_MAX);
 	if (mask)
 		wr32(wx, WX_PX_IMS(0), mask);
 
@@ -25,6 +25,45 @@ static void wx_intr_disable(struct wx *wx, u64 qmask)
 	}
 }
 
+void wx_intr_enable(struct wx *wx, u64 qmask)
+{
+	u32 mask;
+
+	mask = (qmask & U32_MAX);
+	if (mask)
+		wr32(wx, WX_PX_IMC(0), mask);
+	if (wx->mac.type == wx_mac_sp) {
+		mask = (qmask >> 32);
+		if (mask)
+			wr32(wx, WX_PX_IMC(1), mask);
+	}
+}
+EXPORT_SYMBOL(wx_intr_enable);
+
+/**
+ * wx_irq_disable - Mask off interrupt generation on the NIC
+ * @wx: board private structure
+ **/
+void wx_irq_disable(struct wx *wx)
+{
+	struct pci_dev *pdev = wx->pdev;
+
+	wr32(wx, WX_PX_MISC_IEN, 0);
+	wx_intr_disable(wx, WX_INTR_ALL);
+
+	if (pdev->msix_enabled) {
+		int vector;
+
+		for (vector = 0; vector < wx->num_q_vectors; vector++)
+			synchronize_irq(wx->msix_entries[vector].vector);
+
+		synchronize_irq(wx->msix_entries[vector].vector);
+	} else {
+		synchronize_irq(pdev->irq);
+	}
+}
+EXPORT_SYMBOL(wx_irq_disable);
+
 /* cmd_addr is used for some special command:
  * 1. to be sector address, when implemented erase sector command
  * 2. to be flash address when implemented read, write flash address
@@ -844,6 +883,20 @@ void wx_disable_rx(struct wx *wx)
 }
 EXPORT_SYMBOL(wx_disable_rx);
 
+static void wx_configure_isb(struct wx *wx)
+{
+	/* set ISB Address */
+	wr32(wx, WX_PX_ISB_ADDR_L, wx->isb_dma & DMA_BIT_MASK(32));
+	if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT))
+		wr32(wx, WX_PX_ISB_ADDR_H, wx->isb_dma >> 32);
+}
+
+void wx_configure(struct wx *wx)
+{
+	wx_configure_isb(wx);
+}
+EXPORT_SYMBOL(wx_configure);
+
 /**
  *  wx_disable_pcie_master - Disable PCI-express master access
  *  @wx: pointer to hardware structure
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.h b/drivers/net/ethernet/wangxun/libwx/wx_hw.h
index 803983546f3a..60bda129cfa6 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_hw.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.h
@@ -4,6 +4,8 @@
 #ifndef _WX_HW_H_
 #define _WX_HW_H_
 
+void wx_intr_enable(struct wx *wx, u64 qmask);
+void wx_irq_disable(struct wx *wx);
 int wx_check_flash_load(struct wx *wx, u32 check_bit);
 void wx_control_hw(struct wx *wx, bool drv);
 int wx_mng_present(struct wx *wx);
@@ -20,6 +22,7 @@ void wx_mac_set_default_filter(struct wx *wx, u8 *addr);
 void wx_flush_sw_mac_table(struct wx *wx);
 int wx_set_mac(struct net_device *netdev, void *p);
 void wx_disable_rx(struct wx *wx);
+void wx_configure(struct wx *wx);
 int wx_disable_pcie_master(struct wx *wx);
 int wx_stop_adapter(struct wx *wx);
 void wx_reset_misc(struct wx *wx);
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
new file mode 100644
index 000000000000..76711087c184
--- /dev/null
+++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
@@ -0,0 +1,613 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 - 2022 Beijing WangXun Technology Co., Ltd. */
+
+#include <linux/etherdevice.h>
+#include <linux/iopoll.h>
+#include <linux/pci.h>
+
+#include "wx_type.h"
+#include "wx_lib.h"
+
+/**
+ * wx_poll - NAPI polling RX/TX cleanup routine
+ * @napi: napi struct with our devices info in it
+ * @budget: amount of work driver is allowed to do this pass, in packets
+ *
+ * This function will clean all queues associated with a q_vector.
+ **/
+static int wx_poll(struct napi_struct *napi, int budget)
+{
+	return 0;
+}
+
+/**
+ * wx_set_rss_queues: Allocate queues for RSS
+ * @wx: board private structure to initialize
+ *
+ * This is our "base" multiqueue mode.  RSS (Receive Side Scaling) will try
+ * to allocate one Rx queue per CPU, and if available, one Tx queue per CPU.
+ *
+ **/
+static void wx_set_rss_queues(struct wx *wx)
+{
+	wx->num_rx_queues = wx->mac.max_rx_queues;
+	wx->num_tx_queues = wx->mac.max_tx_queues;
+}
+
+static void wx_set_num_queues(struct wx *wx)
+{
+	/* Start with base case */
+	wx->num_rx_queues = 1;
+	wx->num_tx_queues = 1;
+	wx->queues_per_pool = 1;
+
+	wx_set_rss_queues(wx);
+}
+
+/**
+ * wx_acquire_msix_vectors - acquire MSI-X vectors
+ * @wx: board private structure
+ *
+ * Attempts to acquire a suitable range of MSI-X vector interrupts. Will
+ * return a negative error code if unable to acquire MSI-X vectors for any
+ * reason.
+ */
+static int wx_acquire_msix_vectors(struct wx *wx)
+{
+	struct irq_affinity affd = {0, };
+	int nvecs, i;
+
+	nvecs = min_t(int, num_online_cpus(), wx->mac.max_msix_vectors);
+
+	wx->msix_entries = kcalloc(nvecs,
+				   sizeof(struct msix_entry),
+				   GFP_KERNEL);
+	if (!wx->msix_entries)
+		return -ENOMEM;
+
+	nvecs = pci_alloc_irq_vectors_affinity(wx->pdev, nvecs,
+					       nvecs,
+					       PCI_IRQ_MSIX | PCI_IRQ_AFFINITY,
+					       &affd);
+	if (nvecs < 0) {
+		wx_err(wx, "Failed to allocate MSI-X interrupts. Err: %d\n", nvecs);
+		kfree(wx->msix_entries);
+		wx->msix_entries = NULL;
+		return nvecs;
+	}
+
+	for (i = 0; i < nvecs; i++) {
+		wx->msix_entries[i].entry = i;
+		wx->msix_entries[i].vector = pci_irq_vector(wx->pdev, i);
+	}
+
+	/* one for msix_other */
+	nvecs -= 1;
+	wx->num_q_vectors = nvecs;
+	wx->num_rx_queues = nvecs;
+	wx->num_tx_queues = nvecs;
+
+	return 0;
+}
+
+/**
+ * wx_set_interrupt_capability - set MSI-X or MSI if supported
+ * @wx: board private structure to initialize
+ *
+ * Attempt to configure the interrupts using the best available
+ * capabilities of the hardware and the kernel.
+ **/
+static int wx_set_interrupt_capability(struct wx *wx)
+{
+	struct pci_dev *pdev = wx->pdev;
+	int nvecs, ret;
+
+	/* We will try to get MSI-X interrupts first */
+	ret = wx_acquire_msix_vectors(wx);
+	if (ret == 0 || (ret == -ENOMEM))
+		return ret;
+
+	wx->num_rx_queues = 1;
+	wx->num_tx_queues = 1;
+	wx->num_q_vectors = 1;
+
+	/* minmum one for queue, one for misc*/
+	nvecs = 1;
+	nvecs = pci_alloc_irq_vectors(pdev, nvecs,
+				      nvecs, PCI_IRQ_MSI | PCI_IRQ_LEGACY);
+	if (nvecs == 1) {
+		if (pdev->msi_enabled)
+			wx_err(wx, "Fallback to MSI.\n");
+		else
+			wx_err(wx, "Fallback to LEGACY.\n");
+	} else {
+		wx_err(wx, "Failed to allocate MSI/LEGACY interrupts. Error: %d\n", nvecs);
+		return nvecs;
+	}
+
+	pdev->irq = pci_irq_vector(pdev, 0);
+
+	return 0;
+}
+
+/**
+ * wx_cache_ring_rss - Descriptor ring to register mapping for RSS
+ * @wx: board private structure to initialize
+ *
+ * Cache the descriptor ring offsets for RSS, ATR, FCoE, and SR-IOV.
+ *
+ **/
+static void wx_cache_ring_rss(struct wx *wx)
+{
+	u16 i;
+
+	for (i = 0; i < wx->num_rx_queues; i++)
+		wx->rx_ring[i]->reg_idx = i;
+
+	for (i = 0; i < wx->num_tx_queues; i++)
+		wx->tx_ring[i]->reg_idx = i;
+}
+
+static void wx_add_ring(struct wx_ring *ring, struct wx_ring_container *head)
+{
+	ring->next = head->ring;
+	head->ring = ring;
+	head->count++;
+}
+
+/**
+ * wx_alloc_q_vector - Allocate memory for a single interrupt vector
+ * @wx: board private structure to initialize
+ * @v_count: q_vectors allocated on wx, used for ring interleaving
+ * @v_idx: index of vector in wx struct
+ * @txr_count: total number of Tx rings to allocate
+ * @txr_idx: index of first Tx ring to allocate
+ * @rxr_count: total number of Rx rings to allocate
+ * @rxr_idx: index of first Rx ring to allocate
+ *
+ * We allocate one q_vector.  If allocation fails we return -ENOMEM.
+ **/
+static int wx_alloc_q_vector(struct wx *wx,
+			     unsigned int v_count, unsigned int v_idx,
+			     unsigned int txr_count, unsigned int txr_idx,
+			     unsigned int rxr_count, unsigned int rxr_idx)
+{
+	struct wx_q_vector *q_vector;
+	int ring_count, default_itr;
+	struct wx_ring *ring;
+
+	/* note this will allocate space for the ring structure as well! */
+	ring_count = txr_count + rxr_count;
+
+	q_vector = kzalloc(struct_size(q_vector, ring, ring_count),
+			   GFP_KERNEL);
+	if (!q_vector)
+		return -ENOMEM;
+
+	/* initialize NAPI */
+	netif_napi_add(wx->netdev, &q_vector->napi,
+		       wx_poll);
+
+	/* tie q_vector and wx together */
+	wx->q_vector[v_idx] = q_vector;
+	q_vector->wx = wx;
+	q_vector->v_idx = v_idx;
+
+	/* initialize work limits */
+	q_vector->tx.work_limit = wx->tx_work_limit;
+	q_vector->rx.work_limit = wx->rx_work_limit;
+
+	/* initialize pointer to rings */
+	ring = q_vector->ring;
+
+	if (wx->mac.type == wx_mac_sp)
+		default_itr = WX_12K_ITR;
+	else
+		default_itr = WX_7K_ITR;
+	/* initialize ITR */
+	if (txr_count && !rxr_count)
+		/* tx only vector */
+		q_vector->itr = wx->tx_itr_setting ?
+				default_itr : wx->tx_itr_setting;
+	else
+		/* rx or rx/tx vector */
+		q_vector->itr = wx->rx_itr_setting ?
+				default_itr : wx->rx_itr_setting;
+
+	while (txr_count) {
+		/* assign generic ring traits */
+		ring->dev = &wx->pdev->dev;
+		ring->netdev = wx->netdev;
+
+		/* configure backlink on ring */
+		ring->q_vector = q_vector;
+
+		/* update q_vector Tx values */
+		wx_add_ring(ring, &q_vector->tx);
+
+		/* apply Tx specific ring traits */
+		ring->count = wx->tx_ring_count;
+
+		ring->queue_index = txr_idx;
+
+		/* assign ring to wx */
+		wx->tx_ring[txr_idx] = ring;
+
+		/* update count and index */
+		txr_count--;
+		txr_idx += v_count;
+
+		/* push pointer to next ring */
+		ring++;
+	}
+
+	while (rxr_count) {
+		/* assign generic ring traits */
+		ring->dev = &wx->pdev->dev;
+		ring->netdev = wx->netdev;
+
+		/* configure backlink on ring */
+		ring->q_vector = q_vector;
+
+		/* update q_vector Rx values */
+		wx_add_ring(ring, &q_vector->rx);
+
+		/* apply Rx specific ring traits */
+		ring->count = wx->rx_ring_count;
+		ring->queue_index = rxr_idx;
+
+		/* assign ring to wx */
+		wx->rx_ring[rxr_idx] = ring;
+
+		/* update count and index */
+		rxr_count--;
+		rxr_idx += v_count;
+
+		/* push pointer to next ring */
+		ring++;
+	}
+
+	return 0;
+}
+
+/**
+ * wx_free_q_vector - Free memory allocated for specific interrupt vector
+ * @wx: board private structure to initialize
+ * @v_idx: Index of vector to be freed
+ *
+ * This function frees the memory allocated to the q_vector.  In addition if
+ * NAPI is enabled it will delete any references to the NAPI struct prior
+ * to freeing the q_vector.
+ **/
+static void wx_free_q_vector(struct wx *wx, int v_idx)
+{
+	struct wx_q_vector *q_vector = wx->q_vector[v_idx];
+	struct wx_ring *ring;
+
+	wx_for_each_ring(ring, q_vector->tx)
+		wx->tx_ring[ring->queue_index] = NULL;
+
+	wx_for_each_ring(ring, q_vector->rx)
+		wx->rx_ring[ring->queue_index] = NULL;
+
+	wx->q_vector[v_idx] = NULL;
+	netif_napi_del(&q_vector->napi);
+	kfree_rcu(q_vector, rcu);
+}
+
+/**
+ * wx_alloc_q_vectors - Allocate memory for interrupt vectors
+ * @wx: board private structure to initialize
+ *
+ * We allocate one q_vector per queue interrupt.  If allocation fails we
+ * return -ENOMEM.
+ **/
+static int wx_alloc_q_vectors(struct wx *wx)
+{
+	unsigned int rxr_idx = 0, txr_idx = 0, v_idx = 0;
+	unsigned int rxr_remaining = wx->num_rx_queues;
+	unsigned int txr_remaining = wx->num_tx_queues;
+	unsigned int q_vectors = wx->num_q_vectors;
+	int rqpv, tqpv;
+	int err;
+
+	for (; v_idx < q_vectors; v_idx++) {
+		rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
+		tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
+		err = wx_alloc_q_vector(wx, q_vectors, v_idx,
+					tqpv, txr_idx,
+					rqpv, rxr_idx);
+
+		if (err)
+			goto err_out;
+
+		/* update counts and index */
+		rxr_remaining -= rqpv;
+		txr_remaining -= tqpv;
+		rxr_idx++;
+		txr_idx++;
+	}
+
+	return 0;
+
+err_out:
+	wx->num_tx_queues = 0;
+	wx->num_rx_queues = 0;
+	wx->num_q_vectors = 0;
+
+	while (v_idx--)
+		wx_free_q_vector(wx, v_idx);
+
+	return -ENOMEM;
+}
+
+/**
+ * wx_free_q_vectors - Free memory allocated for interrupt vectors
+ * @wx: board private structure to initialize
+ *
+ * This function frees the memory allocated to the q_vectors.  In addition if
+ * NAPI is enabled it will delete any references to the NAPI struct prior
+ * to freeing the q_vector.
+ **/
+static void wx_free_q_vectors(struct wx *wx)
+{
+	int v_idx = wx->num_q_vectors;
+
+	wx->num_tx_queues = 0;
+	wx->num_rx_queues = 0;
+	wx->num_q_vectors = 0;
+
+	while (v_idx--)
+		wx_free_q_vector(wx, v_idx);
+}
+
+void wx_reset_interrupt_capability(struct wx *wx)
+{
+	struct pci_dev *pdev = wx->pdev;
+
+	if (!pdev->msi_enabled && !pdev->msix_enabled)
+		return;
+
+	pci_free_irq_vectors(wx->pdev);
+	if (pdev->msix_enabled) {
+		kfree(wx->msix_entries);
+		wx->msix_entries = NULL;
+	}
+}
+EXPORT_SYMBOL(wx_reset_interrupt_capability);
+
+/**
+ * wx_clear_interrupt_scheme - Clear the current interrupt scheme settings
+ * @wx: board private structure to clear interrupt scheme on
+ *
+ * We go through and clear interrupt specific resources and reset the structure
+ * to pre-load conditions
+ **/
+void wx_clear_interrupt_scheme(struct wx *wx)
+{
+	wx_free_q_vectors(wx);
+	wx_reset_interrupt_capability(wx);
+}
+EXPORT_SYMBOL(wx_clear_interrupt_scheme);
+
+int wx_init_interrupt_scheme(struct wx *wx)
+{
+	int ret;
+
+	/* Number of supported queues */
+	wx_set_num_queues(wx);
+
+	/* Set interrupt mode */
+	ret = wx_set_interrupt_capability(wx);
+	if (ret) {
+		wx_err(wx, "Allocate irq vectors for failed.\n");
+		return ret;
+	}
+
+	/* Allocate memory for queues */
+	ret = wx_alloc_q_vectors(wx);
+	if (ret) {
+		wx_err(wx, "Unable to allocate memory for queue vectors.\n");
+		wx_reset_interrupt_capability(wx);
+		return ret;
+	}
+
+	wx_cache_ring_rss(wx);
+
+	return 0;
+}
+EXPORT_SYMBOL(wx_init_interrupt_scheme);
+
+irqreturn_t wx_msix_clean_rings(int __always_unused irq, void *data)
+{
+	struct wx_q_vector *q_vector = data;
+
+	/* EIAM disabled interrupts (on this vector) for us */
+	if (q_vector->rx.ring || q_vector->tx.ring)
+		napi_schedule_irqoff(&q_vector->napi);
+
+	return IRQ_HANDLED;
+}
+EXPORT_SYMBOL(wx_msix_clean_rings);
+
+void wx_free_irq(struct wx *wx)
+{
+	struct pci_dev *pdev = wx->pdev;
+	int vector;
+
+	if (!(pdev->msix_enabled)) {
+		free_irq(pdev->irq, wx);
+		return;
+	}
+
+	for (vector = 0; vector < wx->num_q_vectors; vector++) {
+		struct wx_q_vector *q_vector = wx->q_vector[vector];
+		struct msix_entry *entry = &wx->msix_entries[vector];
+
+		/* free only the irqs that were actually requested */
+		if (!q_vector->rx.ring && !q_vector->tx.ring)
+			continue;
+
+		free_irq(entry->vector, q_vector);
+	}
+
+	free_irq(wx->msix_entries[vector].vector, wx);
+}
+EXPORT_SYMBOL(wx_free_irq);
+
+/**
+ * wx_setup_isb_resources - allocate interrupt status resources
+ * @wx: board private structure
+ *
+ * Return 0 on success, negative on failure
+ **/
+int wx_setup_isb_resources(struct wx *wx)
+{
+	struct pci_dev *pdev = wx->pdev;
+
+	wx->isb_mem = dma_alloc_coherent(&pdev->dev,
+					 sizeof(u32) * 4,
+					 &wx->isb_dma,
+					 GFP_KERNEL);
+	if (!wx->isb_mem) {
+		wx_err(wx, "Alloc isb_mem failed\n");
+		return -ENOMEM;
+	}
+	memset(wx->isb_mem, 0, sizeof(u32) * 4);
+	return 0;
+}
+EXPORT_SYMBOL(wx_setup_isb_resources);
+
+/**
+ * wx_free_isb_resources - allocate all queues Rx resources
+ * @wx: board private structure
+ *
+ * Return 0 on success, negative on failure
+ **/
+void wx_free_isb_resources(struct wx *wx)
+{
+	struct pci_dev *pdev = wx->pdev;
+
+	dma_free_coherent(&pdev->dev, sizeof(u32) * 4,
+			  wx->isb_mem, wx->isb_dma);
+	wx->isb_mem = NULL;
+}
+EXPORT_SYMBOL(wx_free_isb_resources);
+
+u32 wx_misc_isb(struct wx *wx, enum wx_isb_idx idx)
+{
+	u32 cur_tag = 0;
+
+	cur_tag = wx->isb_mem[WX_ISB_HEADER];
+	wx->isb_tag[idx] = cur_tag;
+
+	return (__force u32)cpu_to_le32(wx->isb_mem[idx]);
+}
+EXPORT_SYMBOL(wx_misc_isb);
+
+/**
+ * wx_set_ivar - set the IVAR registers, mapping interrupt causes to vectors
+ * @wx: pointer to wx struct
+ * @direction: 0 for Rx, 1 for Tx, -1 for other causes
+ * @queue: queue to map the corresponding interrupt to
+ * @msix_vector: the vector to map to the corresponding queue
+ *
+ **/
+static void wx_set_ivar(struct wx *wx, s8 direction,
+			u16 queue, u16 msix_vector)
+{
+	u32 ivar, index;
+
+	if (direction == -1) {
+		/* other causes */
+		msix_vector |= WX_PX_IVAR_ALLOC_VAL;
+		index = 0;
+		ivar = rd32(wx, WX_PX_MISC_IVAR);
+		ivar &= ~(0xFF << index);
+		ivar |= (msix_vector << index);
+		wr32(wx, WX_PX_MISC_IVAR, ivar);
+	} else {
+		/* tx or rx causes */
+		msix_vector |= WX_PX_IVAR_ALLOC_VAL;
+		index = ((16 * (queue & 1)) + (8 * direction));
+		ivar = rd32(wx, WX_PX_IVAR(queue >> 1));
+		ivar &= ~(0xFF << index);
+		ivar |= (msix_vector << index);
+		wr32(wx, WX_PX_IVAR(queue >> 1), ivar);
+	}
+}
+
+/**
+ * wx_write_eitr - write EITR register in hardware specific way
+ * @q_vector: structure containing interrupt and ring information
+ *
+ * This function is made to be called by ethtool and by the driver
+ * when it needs to update EITR registers at runtime.  Hardware
+ * specific quirks/differences are taken care of here.
+ */
+static void wx_write_eitr(struct wx_q_vector *q_vector)
+{
+	struct wx *wx = q_vector->wx;
+	int v_idx = q_vector->v_idx;
+	u32 itr_reg;
+
+	if (wx->mac.type == wx_mac_sp)
+		itr_reg = q_vector->itr & WX_SP_MAX_EITR;
+	else
+		itr_reg = q_vector->itr & WX_EM_MAX_EITR;
+
+	itr_reg |= WX_PX_ITR_CNT_WDIS;
+
+	wr32(wx, WX_PX_ITR(v_idx), itr_reg);
+}
+
+/**
+ * wx_configure_vectors - Configure vectors for hardware
+ * @wx: board private structure
+ *
+ * wx_configure_vectors sets up the hardware to properly generate MSI-X/MSI/LEGACY
+ * interrupts.
+ **/
+void wx_configure_vectors(struct wx *wx)
+{
+	struct pci_dev *pdev = wx->pdev;
+	u32 eitrsel = 0;
+	u16 v_idx;
+
+	if (pdev->msix_enabled) {
+		/* Populate MSIX to EITR Select */
+		wr32(wx, WX_PX_ITRSEL, eitrsel);
+		/* use EIAM to auto-mask when MSI-X interrupt is asserted
+		 * this saves a register write for every interrupt
+		 */
+		wr32(wx, WX_PX_GPIE, WX_PX_GPIE_MODEL);
+	} else {
+		/* legacy interrupts, use EIAM to auto-mask when reading EICR,
+		 * specifically only auto mask tx and rx interrupts.
+		 */
+		wr32(wx, WX_PX_GPIE, 0);
+	}
+
+	/* Populate the IVAR table and set the ITR values to the
+	 * corresponding register.
+	 */
+	for (v_idx = 0; v_idx < wx->num_q_vectors; v_idx++) {
+		struct wx_q_vector *q_vector = wx->q_vector[v_idx];
+		struct wx_ring *ring;
+
+		wx_for_each_ring(ring, q_vector->rx)
+			wx_set_ivar(wx, 0, ring->reg_idx, v_idx);
+
+		wx_for_each_ring(ring, q_vector->tx)
+			wx_set_ivar(wx, 1, ring->reg_idx, v_idx);
+
+		wx_write_eitr(q_vector);
+	}
+
+	wx_set_ivar(wx, -1, 0, v_idx);
+	if (pdev->msix_enabled)
+		wr32(wx, WX_PX_ITR(v_idx), 1950);
+}
+EXPORT_SYMBOL(wx_configure_vectors);
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.h b/drivers/net/ethernet/wangxun/libwx/wx_lib.h
new file mode 100644
index 000000000000..8ae657155f34
--- /dev/null
+++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * WangXun Gigabit PCI Express Linux driver
+ * Copyright (c) 2019 - 2022 Beijing WangXun Technology Co., Ltd.
+ */
+
+#ifndef _WX_LIB_H_
+#define _WX_LIB_H_
+
+void wx_reset_interrupt_capability(struct wx *wx);
+void wx_clear_interrupt_scheme(struct wx *wx);
+int wx_init_interrupt_scheme(struct wx *wx);
+irqreturn_t wx_msix_clean_rings(int __always_unused irq, void *data);
+void wx_free_irq(struct wx *wx);
+int wx_setup_isb_resources(struct wx *wx);
+void wx_free_isb_resources(struct wx *wx);
+u32 wx_misc_isb(struct wx *wx, enum wx_isb_idx idx);
+void wx_configure_vectors(struct wx *wx);
+
+#endif /* _NGBE_LIB_H_ */
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h
index c86a37914d43..7368c681221f 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_type.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h
@@ -5,6 +5,7 @@
 #define _WX_TYPE_H_
 
 #include <linux/bitfield.h>
+#include <linux/netdevice.h>
 
 /* Vendor ID */
 #ifndef PCI_VENDOR_ID_WANGXUN
@@ -66,6 +67,22 @@
 #define WX_CFG_PORT_CTL              0x14400
 #define WX_CFG_PORT_CTL_DRV_LOAD     BIT(3)
 
+/* GPIO Registers */
+#define WX_GPIO_DR                   0x14800
+#define WX_GPIO_DR_0                 BIT(0) /* SDP0 Data Value */
+#define WX_GPIO_DR_1                 BIT(1) /* SDP1 Data Value */
+#define WX_GPIO_DDR                  0x14804
+#define WX_GPIO_DDR_0                BIT(0) /* SDP0 IO direction */
+#define WX_GPIO_DDR_1                BIT(1) /* SDP1 IO direction */
+#define WX_GPIO_CTL                  0x14808
+#define WX_GPIO_INTEN                0x14830
+#define WX_GPIO_INTEN_0              BIT(0)
+#define WX_GPIO_INTEN_1              BIT(1)
+#define WX_GPIO_INTMASK              0x14834
+#define WX_GPIO_INTTYPE_LEVEL        0x14838
+#define WX_GPIO_POLARITY             0x1483C
+#define WX_GPIO_EOI                  0x1484C
+
 /*********************** Transmit DMA registers **************************/
 /* transmit global control */
 #define WX_TDM_CTL                   0x18000
@@ -151,8 +168,28 @@
 /* Interrupt Registers */
 #define WX_BME_CTL                   0x12020
 #define WX_PX_MISC_IC                0x100
+#define WX_PX_MISC_ICS               0x104
+#define WX_PX_MISC_IEN               0x108
+#define WX_PX_INTA                   0x110
+#define WX_PX_GPIE                   0x118
+#define WX_PX_GPIE_MODEL             BIT(0)
+#define WX_PX_IC                     0x120
 #define WX_PX_IMS(_i)                (0x140 + (_i) * 4)
+#define WX_PX_IMC(_i)                (0x150 + (_i) * 4)
+#define WX_PX_ISB_ADDR_L             0x160
+#define WX_PX_ISB_ADDR_H             0x164
 #define WX_PX_TRANSACTION_PENDING    0x168
+#define WX_PX_ITRSEL                 0x180
+#define WX_PX_ITR(_i)                (0x200 + (_i) * 4)
+#define WX_PX_ITR_CNT_WDIS           BIT(31)
+#define WX_PX_MISC_IVAR              0x4FC
+#define WX_PX_IVAR(_i)               (0x500 + (_i) * 4)
+
+#define WX_PX_IVAR_ALLOC_VAL         0x80 /* Interrupt Allocation valid */
+#define WX_7K_ITR                    595
+#define WX_12K_ITR                   336
+#define WX_SP_MAX_EITR               0x00000FF8U
+#define WX_EM_MAX_EITR               0x00007FFCU
 
 /* transmit DMA Registers */
 #define WX_PX_TR_CFG(_i)             (0x03010 + ((_i) * 0x40))
@@ -312,6 +349,59 @@ enum wx_reset_type {
 	WX_GLOBAL_RESET
 };
 
+/* iterator for handling rings in ring container */
+#define wx_for_each_ring(posm, headm) \
+	for (posm = (headm).ring; posm; posm = posm->next)
+
+struct wx_ring_container {
+	struct wx_ring *ring;           /* pointer to linked list of rings */
+	u16 work_limit;                 /* total work allowed per interrupt */
+	u8 count;                       /* total number of rings in vector */
+	u8 itr;                         /* current ITR setting for ring */
+};
+
+struct wx_ring {
+	struct wx_ring *next;           /* pointer to next ring in q_vector */
+	struct wx_q_vector *q_vector;   /* backpointer to host q_vector */
+	struct net_device *netdev;      /* netdev ring belongs to */
+	struct device *dev;             /* device for DMA mapping */
+
+	u16 count;                      /* amount of descriptors */
+
+	u8 queue_index; /* needed for multiqueue queue management */
+	u8 reg_idx;                     /* holds the special value that gets
+					 * the hardware register offset
+					 * associated with this ring, which is
+					 * different for DCB and RSS modes
+					 */
+} ____cacheline_internodealigned_in_smp;
+
+struct wx_q_vector {
+	struct wx *wx;
+	int cpu;        /* CPU for DCA */
+	u16 v_idx;      /* index of q_vector within array, also used for
+			 * finding the bit in EICR and friends that
+			 * represents the vector for this ring
+			 */
+	u16 itr;        /* Interrupt throttle rate written to EITR */
+	struct wx_ring_container rx, tx;
+	struct napi_struct napi;
+	struct rcu_head rcu;    /* to avoid race with update stats on free */
+
+	char name[IFNAMSIZ + 17];
+
+	/* for dynamic allocation of rings associated with this q_vector */
+	struct wx_ring ring[0] ____cacheline_internodealigned_in_smp;
+};
+
+enum wx_isb_idx {
+	WX_ISB_HEADER,
+	WX_ISB_MISC,
+	WX_ISB_VEC0,
+	WX_ISB_VEC1,
+	WX_ISB_MAX
+};
+
 struct wx {
 	u8 __iomem *hw_addr;
 	struct pci_dev *pdev;
@@ -360,6 +450,18 @@ struct wx {
 	u32 tx_ring_count;
 	u32 rx_ring_count;
 
+	struct wx_ring *tx_ring[64] ____cacheline_aligned_in_smp;
+	struct wx_ring *rx_ring[64];
+	struct wx_q_vector *q_vector[64];
+
+	unsigned int queues_per_pool;
+	struct msix_entry *msix_entries;
+
+	/* misc interrupt status block */
+	dma_addr_t isb_dma;
+	u32 *isb_mem;
+	u32 isb_tag[WX_ISB_MAX];
+
 #define WX_MAX_RETA_ENTRIES 128
 	u8 rss_indir_tbl[WX_MAX_RETA_ENTRIES];
 
-- 
2.39.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH net-next v2 02/10] net: ngbe: Add irqs request flow
  2023-01-31 10:05 [PATCH net-next v2 00/10] Wangxun interrupt and RxTx support Mengyuan Lou
  2023-01-31 10:05 ` [PATCH net-next v2 01/10] net: libwx: Add irq flow functions Mengyuan Lou
@ 2023-01-31 10:05 ` Mengyuan Lou
  2023-01-31 10:05 ` [PATCH net-next v2 03/10] net: txgbe: Add interrupt support Mengyuan Lou
                   ` (7 subsequent siblings)
  9 siblings, 0 replies; 15+ messages in thread
From: Mengyuan Lou @ 2023-01-31 10:05 UTC (permalink / raw)
  To: netdev; +Cc: jiawenwu, Mengyuan Lou

Add request_irq for tx/rx rings and misc other events.
If the application is successful, config vertors for interrupts.
Enable some base interrupts mask in ngbe_irq_enable.

Signed-off-by: Mengyuan Lou <mengyuanlou@net-swift.com>
---
 drivers/net/ethernet/wangxun/ngbe/ngbe_main.c | 189 +++++++++++++++++-
 drivers/net/ethernet/wangxun/ngbe/ngbe_type.h |  14 ++
 2 files changed, 202 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
index ed52f80b5475..a9772f929274 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
@@ -13,6 +13,7 @@
 
 #include "../libwx/wx_type.h"
 #include "../libwx/wx_hw.h"
+#include "../libwx/wx_lib.h"
 #include "ngbe_type.h"
 #include "ngbe_mdio.h"
 #include "ngbe_hw.h"
@@ -148,6 +149,161 @@ static int ngbe_sw_init(struct wx *wx)
 	return 0;
 }
 
+/**
+ * ngbe_irq_enable - Enable default interrupt generation settings
+ * @wx: board private structure
+ * @queues: enable all queues interrupts
+ **/
+static void ngbe_irq_enable(struct wx *wx, bool queues)
+{
+	u32 mask;
+
+	/* enable misc interrupt */
+	mask = NGBE_PX_MISC_IEN_MASK;
+
+	wr32(wx, WX_GPIO_DDR, WX_GPIO_DDR_0);
+	wr32(wx, WX_GPIO_INTEN, WX_GPIO_INTEN_0 | WX_GPIO_INTEN_1);
+	wr32(wx, WX_GPIO_INTTYPE_LEVEL, 0x0);
+	wr32(wx, WX_GPIO_POLARITY, wx->gpio_ctrl ? 0 : 0x3);
+
+	wr32(wx, WX_PX_MISC_IEN, mask);
+
+	/* mask interrupt */
+	if (queues)
+		wx_intr_enable(wx, NGBE_INTR_ALL);
+	else
+		wx_intr_enable(wx, NGBE_INTR_MISC(wx));
+}
+
+/**
+ * ngbe_intr - msi/legacy mode Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a network interface device structure
+ **/
+static irqreturn_t ngbe_intr(int __always_unused irq, void *data)
+{
+	struct wx_q_vector *q_vector;
+	struct wx *wx  = data;
+	struct pci_dev *pdev;
+	u32 eicr;
+
+	q_vector = wx->q_vector[0];
+	pdev = wx->pdev;
+
+	eicr = wx_misc_isb(wx, WX_ISB_VEC0);
+	if (!eicr) {
+		/* shared interrupt alert!
+		 * the interrupt that we masked before the EICR read.
+		 */
+		if (netif_running(wx->netdev))
+			ngbe_irq_enable(wx, true);
+		return IRQ_NONE;        /* Not our interrupt */
+	}
+	wx->isb_mem[WX_ISB_VEC0] = 0;
+	if (!(pdev->msi_enabled))
+		wr32(wx, WX_PX_INTA, 1);
+
+	wx->isb_mem[WX_ISB_MISC] = 0;
+	/* would disable interrupts here but it is auto disabled */
+	napi_schedule_irqoff(&q_vector->napi);
+
+	if (netif_running(wx->netdev))
+		ngbe_irq_enable(wx, false);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t ngbe_msix_other(int __always_unused irq, void *data)
+{
+	struct wx *wx = data;
+
+	/* re-enable the original interrupt state, no lsc, no queues */
+	if (netif_running(wx->netdev))
+		ngbe_irq_enable(wx, false);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * ngbe_request_msix_irqs - Initialize MSI-X interrupts
+ * @wx: board private structure
+ *
+ * ngbe_request_msix_irqs allocates MSI-X vectors and requests
+ * interrupts from the kernel.
+ **/
+static int ngbe_request_msix_irqs(struct wx *wx)
+{
+	struct net_device *netdev = wx->netdev;
+	int vector, err;
+
+	for (vector = 0; vector < wx->num_q_vectors; vector++) {
+		struct wx_q_vector *q_vector = wx->q_vector[vector];
+		struct msix_entry *entry = &wx->msix_entries[vector];
+
+		if (q_vector->tx.ring && q_vector->rx.ring)
+			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
+				 "%s-TxRx-%d", netdev->name, entry->entry);
+		else
+			/* skip this unused q_vector */
+			continue;
+
+		err = request_irq(entry->vector, wx_msix_clean_rings, 0,
+				  q_vector->name, q_vector);
+		if (err) {
+			wx_err(wx, "request_irq failed for MSIX interrupt %s Error: %d\n",
+			       q_vector->name, err);
+			goto free_queue_irqs;
+		}
+	}
+
+	err = request_irq(wx->msix_entries[vector].vector,
+			  ngbe_msix_other, 0, netdev->name, wx);
+
+	if (err) {
+		wx_err(wx, "request_irq for msix_other failed: %d\n", err);
+		goto free_queue_irqs;
+	}
+
+	return 0;
+
+free_queue_irqs:
+	while (vector) {
+		vector--;
+		free_irq(wx->msix_entries[vector].vector,
+			 wx->q_vector[vector]);
+	}
+	wx_reset_interrupt_capability(wx);
+	return err;
+}
+
+/**
+ * ngbe_request_irq - initialize interrupts
+ * @wx: board private structure
+ *
+ * Attempts to configure interrupts using the best available
+ * capabilities of the hardware and kernel.
+ **/
+static int ngbe_request_irq(struct wx *wx)
+{
+	struct net_device *netdev = wx->netdev;
+	struct pci_dev *pdev = wx->pdev;
+	int err;
+
+	if (pdev->msix_enabled)
+		err = ngbe_request_msix_irqs(wx);
+	else if (pdev->msi_enabled)
+		err = request_irq(pdev->irq, ngbe_intr, 0,
+				  netdev->name, wx);
+	else
+		err = request_irq(pdev->irq, ngbe_intr, IRQF_SHARED,
+				  netdev->name, wx);
+
+	if (err)
+		wx_err(wx, "request_irq failed, Error %d\n", err);
+
+	return err;
+}
+
 static void ngbe_disable_device(struct wx *wx)
 {
 	struct net_device *netdev = wx->netdev;
@@ -157,6 +313,7 @@ static void ngbe_disable_device(struct wx *wx)
 	netif_tx_disable(netdev);
 	if (wx->gpio_ctrl)
 		ngbe_sfp_modules_txrx_powerctl(wx, false);
+	wx_irq_disable(wx);
 }
 
 static void ngbe_down(struct wx *wx)
@@ -167,8 +324,15 @@ static void ngbe_down(struct wx *wx)
 
 static void ngbe_up(struct wx *wx)
 {
+	wx_configure_vectors(wx);
+
+	/* clear any pending interrupts, may auto mask */
+	rd32(wx, WX_PX_IC);
+	rd32(wx, WX_PX_MISC_IC);
+	ngbe_irq_enable(wx, true);
 	if (wx->gpio_ctrl)
 		ngbe_sfp_modules_txrx_powerctl(wx, true);
+
 	phy_start(wx->phydev);
 }
 
@@ -187,12 +351,26 @@ static int ngbe_open(struct net_device *netdev)
 	int err;
 
 	wx_control_hw(wx, true);
+
+	err = wx_setup_isb_resources(wx);
+	if (err)
+		return err;
+
+	wx_configure(wx);
+
+	err = ngbe_request_irq(wx);
+	if (err)
+		goto err_req_irq;
+
 	err = ngbe_phy_connect(wx);
 	if (err)
 		return err;
 	ngbe_up(wx);
 
 	return 0;
+err_req_irq:
+	wx_free_isb_resources(wx);
+	return err;
 }
 
 /**
@@ -211,6 +389,8 @@ static int ngbe_close(struct net_device *netdev)
 	struct wx *wx = netdev_priv(netdev);
 
 	ngbe_down(wx);
+	wx_free_irq(wx);
+	wx_free_isb_resources(wx);
 	phy_disconnect(wx->phydev);
 	wx_control_hw(wx, false);
 
@@ -411,10 +591,14 @@ static int ngbe_probe(struct pci_dev *pdev,
 	eth_hw_addr_set(netdev, wx->mac.perm_addr);
 	wx_mac_set_default_filter(wx, wx->mac.perm_addr);
 
+	err = wx_init_interrupt_scheme(wx);
+	if (err)
+		goto err_free_mac_table;
+
 	/* phy Interface Configuration */
 	err = ngbe_mdio_init(wx);
 	if (err)
-		goto err_free_mac_table;
+		goto err_clear_interrupt_scheme;
 
 	err = register_netdev(netdev);
 	if (err)
@@ -431,6 +615,8 @@ static int ngbe_probe(struct pci_dev *pdev,
 
 err_register:
 	wx_control_hw(wx, false);
+err_clear_interrupt_scheme:
+	wx_clear_interrupt_scheme(wx);
 err_free_mac_table:
 	kfree(wx->mac_table);
 err_pci_release_regions:
@@ -462,6 +648,7 @@ static void ngbe_remove(struct pci_dev *pdev)
 				     pci_select_bars(pdev, IORESOURCE_MEM));
 
 	kfree(wx->mac_table);
+	wx_clear_interrupt_scheme(wx);
 	pci_disable_pcie_error_reporting(pdev);
 
 	pci_disable_device(pdev);
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
index fd71260f73de..b60f5f0f64fa 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
@@ -90,6 +90,20 @@ enum NGBE_MSCA_CMD_value {
 #define NGBE_GPIO_DDR_0				BIT(0) /* SDP0 IO direction */
 #define NGBE_GPIO_DDR_1				BIT(1) /* SDP1 IO direction */
 
+/* Extended Interrupt Enable Set */
+#define NGBE_PX_MISC_IEN_DEV_RST		BIT(10)
+#define NGBE_PX_MISC_IEN_ETH_LK			BIT(18)
+#define NGBE_PX_MISC_IEN_INT_ERR		BIT(20)
+#define NGBE_PX_MISC_IEN_GPIO			BIT(26)
+#define NGBE_PX_MISC_IEN_MASK ( \
+				NGBE_PX_MISC_IEN_DEV_RST | \
+				NGBE_PX_MISC_IEN_ETH_LK | \
+				NGBE_PX_MISC_IEN_INT_ERR | \
+				NGBE_PX_MISC_IEN_GPIO)
+
+#define NGBE_INTR_ALL				0x1FF
+#define NGBE_INTR_MISC(A)			BIT((A)->num_q_vectors)
+
 #define NGBE_PHY_CONFIG(reg_offset)		(0x14000 + ((reg_offset) * 4))
 #define NGBE_CFG_LAN_SPEED			0x14440
 #define NGBE_CFG_PORT_ST			0x14404
-- 
2.39.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH net-next v2 03/10] net: txgbe: Add interrupt support
  2023-01-31 10:05 [PATCH net-next v2 00/10] Wangxun interrupt and RxTx support Mengyuan Lou
  2023-01-31 10:05 ` [PATCH net-next v2 01/10] net: libwx: Add irq flow functions Mengyuan Lou
  2023-01-31 10:05 ` [PATCH net-next v2 02/10] net: ngbe: Add irqs request flow Mengyuan Lou
@ 2023-01-31 10:05 ` Mengyuan Lou
  2023-01-31 10:05 ` [PATCH net-next v2 04/10] net: libwx: Configure Rx and Tx unit on hardware Mengyuan Lou
                   ` (6 subsequent siblings)
  9 siblings, 0 replies; 15+ messages in thread
From: Mengyuan Lou @ 2023-01-31 10:05 UTC (permalink / raw)
  To: netdev; +Cc: jiawenwu

From: Jiawen Wu <jiawenwu@trustnetic.com>

Determine proper interrupt scheme to enable and handle interrupt.

Signed-off-by: Jiawen Wu <jiawenwu@trustnetic.com>
---
 .../net/ethernet/wangxun/txgbe/txgbe_main.c   | 201 ++++++++++++++++++
 .../net/ethernet/wangxun/txgbe/txgbe_type.h   |  18 ++
 2 files changed, 219 insertions(+)

diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
index aa4d09df3b01..48cca2fb54c7 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
@@ -11,6 +11,7 @@
 #include <net/ip.h>
 
 #include "../libwx/wx_type.h"
+#include "../libwx/wx_lib.h"
 #include "../libwx/wx_hw.h"
 #include "txgbe_type.h"
 #include "txgbe_hw.h"
@@ -72,9 +73,158 @@ static int txgbe_enumerate_functions(struct wx *wx)
 	return physfns;
 }
 
+/**
+ * txgbe_irq_enable - Enable default interrupt generation settings
+ * @wx: pointer to private structure
+ * @queues: enable irqs for queues
+ **/
+static void txgbe_irq_enable(struct wx *wx, bool queues)
+{
+	/* unmask interrupt */
+	wx_intr_enable(wx, TXGBE_INTR_MISC(wx));
+	if (queues)
+		wx_intr_enable(wx, TXGBE_INTR_QALL(wx));
+}
+
+/**
+ * txgbe_intr - msi/legacy mode Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a network interface device structure
+ **/
+static irqreturn_t txgbe_intr(int __always_unused irq, void *data)
+{
+	struct wx_q_vector *q_vector;
+	struct wx *wx  = data;
+	struct pci_dev *pdev;
+	u32 eicr;
+
+	q_vector = wx->q_vector[0];
+	pdev = wx->pdev;
+
+	eicr = wx_misc_isb(wx, WX_ISB_VEC0);
+	if (!eicr) {
+		/* shared interrupt alert!
+		 * the interrupt that we masked before the ICR read.
+		 */
+		if (netif_running(wx->netdev))
+			txgbe_irq_enable(wx, true);
+		return IRQ_NONE;        /* Not our interrupt */
+	}
+	wx->isb_mem[WX_ISB_VEC0] = 0;
+	if (!(pdev->msi_enabled))
+		wr32(wx, WX_PX_INTA, 1);
+
+	wx->isb_mem[WX_ISB_MISC] = 0;
+	/* would disable interrupts here but it is auto disabled */
+	napi_schedule_irqoff(&q_vector->napi);
+
+	/* re-enable link(maybe) and non-queue interrupts, no flush.
+	 * txgbe_poll will re-enable the queue interrupts
+	 */
+	if (netif_running(wx->netdev))
+		txgbe_irq_enable(wx, false);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t txgbe_msix_other(int __always_unused irq, void *data)
+{
+	struct wx *wx = data;
+
+	/* re-enable the original interrupt state */
+	if (netif_running(wx->netdev))
+		txgbe_irq_enable(wx, false);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * txgbe_request_msix_irqs - Initialize MSI-X interrupts
+ * @wx: board private structure
+ *
+ * Allocate MSI-X vectors and request interrupts from the kernel.
+ **/
+static int txgbe_request_msix_irqs(struct wx *wx)
+{
+	struct net_device *netdev = wx->netdev;
+	int vector, err;
+
+	for (vector = 0; vector < wx->num_q_vectors; vector++) {
+		struct wx_q_vector *q_vector = wx->q_vector[vector];
+		struct msix_entry *entry = &wx->msix_entries[vector];
+
+		if (q_vector->tx.ring && q_vector->rx.ring)
+			snprintf(q_vector->name, sizeof(q_vector->name) - 1,
+				 "%s-TxRx-%d", netdev->name, entry->entry);
+		else
+			/* skip this unused q_vector */
+			continue;
+
+		err = request_irq(entry->vector, wx_msix_clean_rings, 0,
+				  q_vector->name, q_vector);
+		if (err) {
+			wx_err(wx, "request_irq failed for MSIX interrupt %s Error: %d\n",
+			       q_vector->name, err);
+			goto free_queue_irqs;
+		}
+	}
+
+	err = request_irq(wx->msix_entries[vector].vector,
+			  txgbe_msix_other, 0, netdev->name, wx);
+	if (err) {
+		wx_err(wx, "request_irq for msix_other failed: %d\n", err);
+		goto free_queue_irqs;
+	}
+
+	return 0;
+
+free_queue_irqs:
+	while (vector) {
+		vector--;
+		free_irq(wx->msix_entries[vector].vector,
+			 wx->q_vector[vector]);
+	}
+	wx_reset_interrupt_capability(wx);
+	return err;
+}
+
+/**
+ * txgbe_request_irq - initialize interrupts
+ * @wx: board private structure
+ *
+ * Attempt to configure interrupts using the best available
+ * capabilities of the hardware and kernel.
+ **/
+static int txgbe_request_irq(struct wx *wx)
+{
+	struct net_device *netdev = wx->netdev;
+	struct pci_dev *pdev = wx->pdev;
+	int err;
+
+	if (pdev->msix_enabled)
+		err = txgbe_request_msix_irqs(wx);
+	else if (pdev->msi_enabled)
+		err = request_irq(wx->pdev->irq, &txgbe_intr, 0,
+				  netdev->name, wx);
+	else
+		err = request_irq(wx->pdev->irq, &txgbe_intr, IRQF_SHARED,
+				  netdev->name, wx);
+
+	if (err)
+		wx_err(wx, "request_irq failed, Error %d\n", err);
+
+	return err;
+}
+
 static void txgbe_up_complete(struct wx *wx)
 {
 	wx_control_hw(wx, true);
+	wx_configure_vectors(wx);
+
+	/* clear any pending interrupts, may auto mask */
+	rd32(wx, WX_PX_IC);
+	rd32(wx, WX_PX_MISC_IC);
+	txgbe_irq_enable(wx, true);
 }
 
 static void txgbe_reset(struct wx *wx)
@@ -104,6 +254,8 @@ static void txgbe_disable_device(struct wx *wx)
 	netif_carrier_off(netdev);
 	netif_tx_disable(netdev);
 
+	wx_irq_disable(wx);
+
 	if (wx->bus.func < 2)
 		wr32m(wx, TXGBE_MIS_PRB_CTL, TXGBE_MIS_PRB_CTL_LAN_UP(wx->bus.func), 0);
 	else
@@ -132,6 +284,7 @@ static void txgbe_down(struct wx *wx)
  **/
 static int txgbe_sw_init(struct wx *wx)
 {
+	u16 msix_count = 0;
 	int err;
 
 	wx->mac.num_rar_entries = TXGBE_SP_RAR_ENTRIES;
@@ -156,6 +309,25 @@ static int txgbe_sw_init(struct wx *wx)
 		break;
 	}
 
+	/* Set common capability flags and settings */
+	wx->max_q_vectors = TXGBE_MAX_MSIX_VECTORS;
+	err = wx_get_pcie_msix_counts(wx, &msix_count, TXGBE_MAX_MSIX_VECTORS);
+	if (err)
+		wx_err(wx, "Do not support MSI-X\n");
+	wx->mac.max_msix_vectors = msix_count;
+
+	/* enable itr by default in dynamic mode */
+	wx->rx_itr_setting = 1;
+	wx->tx_itr_setting = 1;
+
+	/* set default ring sizes */
+	wx->tx_ring_count = TXGBE_DEFAULT_TXD;
+	wx->rx_ring_count = TXGBE_DEFAULT_RXD;
+
+	/* set default work limits */
+	wx->tx_work_limit = TXGBE_DEFAULT_TX_WORK;
+	wx->rx_work_limit = TXGBE_DEFAULT_RX_WORK;
+
 	return 0;
 }
 
@@ -171,10 +343,28 @@ static int txgbe_sw_init(struct wx *wx)
 static int txgbe_open(struct net_device *netdev)
 {
 	struct wx *wx = netdev_priv(netdev);
+	int err;
+
+	err = wx_setup_isb_resources(wx);
+	if (err)
+		goto err_reset;
+
+	wx_configure(wx);
+
+	err = txgbe_request_irq(wx);
+	if (err)
+		goto err_free_isb;
 
 	txgbe_up_complete(wx);
 
 	return 0;
+
+err_free_isb:
+	wx_free_isb_resources(wx);
+err_reset:
+	txgbe_reset(wx);
+
+	return err;
 }
 
 /**
@@ -187,6 +377,9 @@ static int txgbe_open(struct net_device *netdev)
 static void txgbe_close_suspend(struct wx *wx)
 {
 	txgbe_disable_device(wx);
+
+	wx_free_irq(wx);
+	wx_free_isb_resources(wx);
 }
 
 /**
@@ -205,6 +398,8 @@ static int txgbe_close(struct net_device *netdev)
 	struct wx *wx = netdev_priv(netdev);
 
 	txgbe_down(wx);
+	wx_free_irq(wx);
+	wx_free_isb_resources(wx);
 	wx_control_hw(wx, false);
 
 	return 0;
@@ -367,6 +562,10 @@ static int txgbe_probe(struct pci_dev *pdev,
 	eth_hw_addr_set(netdev, wx->mac.perm_addr);
 	wx_mac_set_default_filter(wx, wx->mac.perm_addr);
 
+	err = wx_init_interrupt_scheme(wx);
+	if (err)
+		goto err_free_mac_table;
+
 	/* Save off EEPROM version number and Option Rom version which
 	 * together make a unique identify for the eeprom
 	 */
@@ -435,6 +634,7 @@ static int txgbe_probe(struct pci_dev *pdev,
 	return 0;
 
 err_release_hw:
+	wx_clear_interrupt_scheme(wx);
 	wx_control_hw(wx, false);
 err_free_mac_table:
 	kfree(wx->mac_table);
@@ -468,6 +668,7 @@ static void txgbe_remove(struct pci_dev *pdev)
 				     pci_select_bars(pdev, IORESOURCE_MEM));
 
 	kfree(wx->mac_table);
+	wx_clear_interrupt_scheme(wx);
 
 	pci_disable_pcie_error_reporting(pdev);
 
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
index cbd705a9f4bd..ec28cf60c91d 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
@@ -67,6 +67,7 @@
 #define TXGBE_PBANUM1_PTR                       0x06
 #define TXGBE_PBANUM_PTR_GUARD                  0xFAFA
 
+#define TXGBE_MAX_MSIX_VECTORS          64
 #define TXGBE_MAX_FDIR_INDICES          63
 
 #define TXGBE_MAX_RX_QUEUES   (TXGBE_MAX_FDIR_INDICES + 1)
@@ -77,6 +78,23 @@
 #define TXGBE_SP_RAR_ENTRIES    128
 #define TXGBE_SP_MC_TBL_SIZE    128
 
+/* TX/RX descriptor defines */
+#define TXGBE_DEFAULT_TXD               512
+#define TXGBE_DEFAULT_TX_WORK           256
+
+#if (PAGE_SIZE < 8192)
+#define TXGBE_DEFAULT_RXD               512
+#define TXGBE_DEFAULT_RX_WORK           256
+#else
+#define TXGBE_DEFAULT_RXD               256
+#define TXGBE_DEFAULT_RX_WORK           128
+#endif
+
+#define TXGBE_INTR_MISC(A)    BIT((A)->num_q_vectors)
+#define TXGBE_INTR_QALL(A)    (TXGBE_INTR_MISC(A) - 1)
+
+#define TXGBE_MAX_EITR        GENMASK(11, 3)
+
 extern char txgbe_driver_name[];
 
 #endif /* _TXGBE_TYPE_H_ */
-- 
2.39.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH net-next v2 04/10] net: libwx: Configure Rx and Tx unit on hardware
  2023-01-31 10:05 [PATCH net-next v2 00/10] Wangxun interrupt and RxTx support Mengyuan Lou
                   ` (2 preceding siblings ...)
  2023-01-31 10:05 ` [PATCH net-next v2 03/10] net: txgbe: Add interrupt support Mengyuan Lou
@ 2023-01-31 10:05 ` Mengyuan Lou
  2023-01-31 10:05 ` [PATCH net-next v2 05/10] net: libwx: Allocate Rx and Tx resources Mengyuan Lou
                   ` (5 subsequent siblings)
  9 siblings, 0 replies; 15+ messages in thread
From: Mengyuan Lou @ 2023-01-31 10:05 UTC (permalink / raw)
  To: netdev; +Cc: jiawenwu

From: Jiawen Wu <jiawenwu@trustnetic.com>

Configure hardware for preparing to process packets. Including configure
receive and transmit unit of the MAC layer, and setup the specific rings.

Signed-off-by: Jiawen Wu <jiawenwu@trustnetic.com>
---
 drivers/net/ethernet/wangxun/libwx/wx_hw.c   | 597 +++++++++++++++++++
 drivers/net/ethernet/wangxun/libwx/wx_hw.h   |   2 +
 drivers/net/ethernet/wangxun/libwx/wx_type.h |  85 +++
 3 files changed, 684 insertions(+)

diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
index a72b02c72189..554d0875bc62 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
@@ -804,6 +804,37 @@ void wx_flush_sw_mac_table(struct wx *wx)
 }
 EXPORT_SYMBOL(wx_flush_sw_mac_table);
 
+static int wx_add_mac_filter(struct wx *wx, u8 *addr, u16 pool)
+{
+	u32 i;
+
+	if (is_zero_ether_addr(addr))
+		return -EINVAL;
+
+	for (i = 0; i < wx->mac.num_rar_entries; i++) {
+		if (wx->mac_table[i].state & WX_MAC_STATE_IN_USE) {
+			if (ether_addr_equal(addr, wx->mac_table[i].addr)) {
+				if (wx->mac_table[i].pools != (1ULL << pool)) {
+					memcpy(wx->mac_table[i].addr, addr, ETH_ALEN);
+					wx->mac_table[i].pools |= (1ULL << pool);
+					wx_sync_mac_table(wx);
+					return i;
+				}
+			}
+		}
+
+		if (wx->mac_table[i].state & WX_MAC_STATE_IN_USE)
+			continue;
+		wx->mac_table[i].state |= (WX_MAC_STATE_MODIFIED |
+					   WX_MAC_STATE_IN_USE);
+		memcpy(wx->mac_table[i].addr, addr, ETH_ALEN);
+		wx->mac_table[i].pools |= (1ULL << pool);
+		wx_sync_mac_table(wx);
+		return i;
+	}
+	return -ENOMEM;
+}
+
 static int wx_del_mac_filter(struct wx *wx, u8 *addr, u16 pool)
 {
 	u32 i;
@@ -828,6 +859,184 @@ static int wx_del_mac_filter(struct wx *wx, u8 *addr, u16 pool)
 	return -ENOMEM;
 }
 
+static int wx_available_rars(struct wx *wx)
+{
+	u32 i, count = 0;
+
+	for (i = 0; i < wx->mac.num_rar_entries; i++) {
+		if (wx->mac_table[i].state == 0)
+			count++;
+	}
+
+	return count;
+}
+
+/**
+ * wx_write_uc_addr_list - write unicast addresses to RAR table
+ * @netdev: network interface device structure
+ * @pool: index for mac table
+ *
+ * Writes unicast address list to the RAR table.
+ * Returns: -ENOMEM on failure/insufficient address space
+ *                0 on no addresses written
+ *                X on writing X addresses to the RAR table
+ **/
+static int wx_write_uc_addr_list(struct net_device *netdev, int pool)
+{
+	struct wx *wx = netdev_priv(netdev);
+	int count = 0;
+
+	/* return ENOMEM indicating insufficient memory for addresses */
+	if (netdev_uc_count(netdev) > wx_available_rars(wx))
+		return -ENOMEM;
+
+	if (!netdev_uc_empty(netdev)) {
+		struct netdev_hw_addr *ha;
+
+		netdev_for_each_uc_addr(ha, netdev) {
+			wx_del_mac_filter(wx, ha->addr, pool);
+			wx_add_mac_filter(wx, ha->addr, pool);
+			count++;
+		}
+	}
+	return count;
+}
+
+/**
+ *  wx_mta_vector - Determines bit-vector in multicast table to set
+ *  @wx: pointer to private structure
+ *  @mc_addr: the multicast address
+ *
+ *  Extracts the 12 bits, from a multicast address, to determine which
+ *  bit-vector to set in the multicast table. The hardware uses 12 bits, from
+ *  incoming rx multicast addresses, to determine the bit-vector to check in
+ *  the MTA. Which of the 4 combination, of 12-bits, the hardware uses is set
+ *  by the MO field of the MCSTCTRL. The MO field is set during initialization
+ *  to mc_filter_type.
+ **/
+static u32 wx_mta_vector(struct wx *wx, u8 *mc_addr)
+{
+	u32 vector = 0;
+
+	switch (wx->mac.mc_filter_type) {
+	case 0:   /* use bits [47:36] of the address */
+		vector = ((mc_addr[4] >> 4) | (((u16)mc_addr[5]) << 4));
+		break;
+	case 1:   /* use bits [46:35] of the address */
+		vector = ((mc_addr[4] >> 3) | (((u16)mc_addr[5]) << 5));
+		break;
+	case 2:   /* use bits [45:34] of the address */
+		vector = ((mc_addr[4] >> 2) | (((u16)mc_addr[5]) << 6));
+		break;
+	case 3:   /* use bits [43:32] of the address */
+		vector = ((mc_addr[4]) | (((u16)mc_addr[5]) << 8));
+		break;
+	default:  /* Invalid mc_filter_type */
+		wx_err(wx, "MC filter type param set incorrectly\n");
+		break;
+	}
+
+	/* vector can only be 12-bits or boundary will be exceeded */
+	vector &= 0xFFF;
+	return vector;
+}
+
+/**
+ *  wx_set_mta - Set bit-vector in multicast table
+ *  @wx: pointer to private structure
+ *  @mc_addr: Multicast address
+ *
+ *  Sets the bit-vector in the multicast table.
+ **/
+static void wx_set_mta(struct wx *wx, u8 *mc_addr)
+{
+	u32 vector, vector_bit, vector_reg;
+
+	wx->addr_ctrl.mta_in_use++;
+
+	vector = wx_mta_vector(wx, mc_addr);
+	wx_dbg(wx, " bit-vector = 0x%03X\n", vector);
+
+	/* The MTA is a register array of 128 32-bit registers. It is treated
+	 * like an array of 4096 bits.  We want to set bit
+	 * BitArray[vector_value]. So we figure out what register the bit is
+	 * in, read it, OR in the new bit, then write back the new value.  The
+	 * register is determined by the upper 7 bits of the vector value and
+	 * the bit within that register are determined by the lower 5 bits of
+	 * the value.
+	 */
+	vector_reg = (vector >> 5) & 0x7F;
+	vector_bit = vector & 0x1F;
+	wx->mac.mta_shadow[vector_reg] |= (1 << vector_bit);
+}
+
+/**
+ *  wx_update_mc_addr_list - Updates MAC list of multicast addresses
+ *  @wx: pointer to private structure
+ *  @netdev: pointer to net device structure
+ *
+ *  The given list replaces any existing list. Clears the MC addrs from receive
+ *  address registers and the multicast table. Uses unused receive address
+ *  registers for the first multicast addresses, and hashes the rest into the
+ *  multicast table.
+ **/
+static void wx_update_mc_addr_list(struct wx *wx, struct net_device *netdev)
+{
+	struct netdev_hw_addr *ha;
+	u32 i, psrctl;
+
+	/* Set the new number of MC addresses that we are being requested to
+	 * use.
+	 */
+	wx->addr_ctrl.num_mc_addrs = netdev_mc_count(netdev);
+	wx->addr_ctrl.mta_in_use = 0;
+
+	/* Clear mta_shadow */
+	wx_dbg(wx, " Clearing MTA\n");
+	memset(&wx->mac.mta_shadow, 0, sizeof(wx->mac.mta_shadow));
+
+	/* Update mta_shadow */
+	netdev_for_each_mc_addr(ha, netdev) {
+		wx_dbg(wx, " Adding the multicast addresses:\n");
+		wx_set_mta(wx, ha->addr);
+	}
+
+	/* Enable mta */
+	for (i = 0; i < wx->mac.mcft_size; i++)
+		wr32a(wx, WX_PSR_MC_TBL(0), i,
+		      wx->mac.mta_shadow[i]);
+
+	if (wx->addr_ctrl.mta_in_use > 0) {
+		psrctl = rd32(wx, WX_PSR_CTL);
+		psrctl &= ~(WX_PSR_CTL_MO | WX_PSR_CTL_MFE);
+		psrctl |= WX_PSR_CTL_MFE |
+			  (wx->mac.mc_filter_type << WX_PSR_CTL_MO_SHIFT);
+		wr32(wx, WX_PSR_CTL, psrctl);
+	}
+
+	wx_dbg(wx, "Update mc addr list Complete\n");
+}
+
+/**
+ * wx_write_mc_addr_list - write multicast addresses to MTA
+ * @netdev: network interface device structure
+ *
+ * Writes multicast address list to the MTA hash table.
+ * Returns: 0 on no addresses written
+ *          X on writing X addresses to MTA
+ **/
+static int wx_write_mc_addr_list(struct net_device *netdev)
+{
+	struct wx *wx = netdev_priv(netdev);
+
+	if (!netif_running(netdev))
+		return 0;
+
+	wx_update_mc_addr_list(wx, netdev);
+
+	return netdev_mc_count(netdev);
+}
+
 /**
  * wx_set_mac - Change the Ethernet Address of the NIC
  * @netdev: network interface device structure
@@ -883,6 +1092,385 @@ void wx_disable_rx(struct wx *wx)
 }
 EXPORT_SYMBOL(wx_disable_rx);
 
+static void wx_enable_rx(struct wx *wx)
+{
+	u32 psrctl;
+
+	/* enable mac receiver */
+	wr32m(wx, WX_MAC_RX_CFG,
+	      WX_MAC_RX_CFG_RE, WX_MAC_RX_CFG_RE);
+
+	wr32m(wx, WX_RDB_PB_CTL,
+	      WX_RDB_PB_CTL_RXEN, WX_RDB_PB_CTL_RXEN);
+
+	if (wx->mac.set_lben) {
+		psrctl = rd32(wx, WX_PSR_CTL);
+		psrctl |= WX_PSR_CTL_SW_EN;
+		wr32(wx, WX_PSR_CTL, psrctl);
+		wx->mac.set_lben = false;
+	}
+}
+
+/**
+ * wx_set_rxpba - Initialize Rx packet buffer
+ * @wx: pointer to private structure
+ **/
+static void wx_set_rxpba(struct wx *wx)
+{
+	u32 rxpktsize, txpktsize, txpbthresh;
+
+	rxpktsize = wx->mac.rx_pb_size << WX_RDB_PB_SZ_SHIFT;
+	wr32(wx, WX_RDB_PB_SZ(0), rxpktsize);
+
+	/* Only support an equally distributed Tx packet buffer strategy. */
+	txpktsize = wx->mac.tx_pb_size;
+	txpbthresh = (txpktsize / 1024) - WX_TXPKT_SIZE_MAX;
+	wr32(wx, WX_TDB_PB_SZ(0), txpktsize);
+	wr32(wx, WX_TDM_PB_THRE(0), txpbthresh);
+}
+
+static void wx_configure_port(struct wx *wx)
+{
+	u32 value, i;
+
+	value = WX_CFG_PORT_CTL_D_VLAN | WX_CFG_PORT_CTL_QINQ;
+	wr32m(wx, WX_CFG_PORT_CTL,
+	      WX_CFG_PORT_CTL_D_VLAN |
+	      WX_CFG_PORT_CTL_QINQ,
+	      value);
+
+	wr32(wx, WX_CFG_TAG_TPID(0),
+	     ETH_P_8021Q | ETH_P_8021AD << 16);
+	wx->tpid[0] = ETH_P_8021Q;
+	wx->tpid[1] = ETH_P_8021AD;
+	for (i = 1; i < 4; i++)
+		wr32(wx, WX_CFG_TAG_TPID(i),
+		     ETH_P_8021Q | ETH_P_8021Q << 16);
+	for (i = 2; i < 8; i++)
+		wx->tpid[i] = ETH_P_8021Q;
+}
+
+/**
+ *  wx_disable_sec_rx_path - Stops the receive data path
+ *  @wx: pointer to private structure
+ *
+ *  Stops the receive data path and waits for the HW to internally empty
+ *  the Rx security block
+ **/
+static int wx_disable_sec_rx_path(struct wx *wx)
+{
+	u32 secrx;
+
+	wr32m(wx, WX_RSC_CTL,
+	      WX_RSC_CTL_RX_DIS, WX_RSC_CTL_RX_DIS);
+
+	return read_poll_timeout(rd32, secrx, secrx & WX_RSC_ST_RSEC_RDY,
+				 1000, 40000, false, wx, WX_RSC_ST);
+}
+
+/**
+ *  wx_enable_sec_rx_path - Enables the receive data path
+ *  @wx: pointer to private structure
+ *
+ *  Enables the receive data path.
+ **/
+static void wx_enable_sec_rx_path(struct wx *wx)
+{
+	wr32m(wx, WX_RSC_CTL, WX_RSC_CTL_RX_DIS, 0);
+	WX_WRITE_FLUSH(wx);
+}
+
+void wx_set_rx_mode(struct net_device *netdev)
+{
+	struct wx *wx = netdev_priv(netdev);
+	u32 fctrl, vmolr, vlnctrl;
+	int count;
+
+	/* Check for Promiscuous and All Multicast modes */
+	fctrl = rd32(wx, WX_PSR_CTL);
+	fctrl &= ~(WX_PSR_CTL_UPE | WX_PSR_CTL_MPE);
+	vmolr = rd32(wx, WX_PSR_VM_L2CTL(0));
+	vmolr &= ~(WX_PSR_VM_L2CTL_UPE |
+		   WX_PSR_VM_L2CTL_MPE |
+		   WX_PSR_VM_L2CTL_ROPE |
+		   WX_PSR_VM_L2CTL_ROMPE);
+	vlnctrl = rd32(wx, WX_PSR_VLAN_CTL);
+	vlnctrl &= ~(WX_PSR_VLAN_CTL_VFE | WX_PSR_VLAN_CTL_CFIEN);
+
+	/* set all bits that we expect to always be set */
+	fctrl |= WX_PSR_CTL_BAM | WX_PSR_CTL_MFE;
+	vmolr |= WX_PSR_VM_L2CTL_BAM |
+		 WX_PSR_VM_L2CTL_AUPE |
+		 WX_PSR_VM_L2CTL_VACC;
+	vlnctrl |= WX_PSR_VLAN_CTL_VFE;
+
+	wx->addr_ctrl.user_set_promisc = false;
+	if (netdev->flags & IFF_PROMISC) {
+		wx->addr_ctrl.user_set_promisc = true;
+		fctrl |= WX_PSR_CTL_UPE | WX_PSR_CTL_MPE;
+		/* pf don't want packets routing to vf, so clear UPE */
+		vmolr |= WX_PSR_VM_L2CTL_MPE;
+		vlnctrl &= ~WX_PSR_VLAN_CTL_VFE;
+	}
+
+	if (netdev->flags & IFF_ALLMULTI) {
+		fctrl |= WX_PSR_CTL_MPE;
+		vmolr |= WX_PSR_VM_L2CTL_MPE;
+	}
+
+	if (netdev->features & NETIF_F_RXALL) {
+		vmolr |= (WX_PSR_VM_L2CTL_UPE | WX_PSR_VM_L2CTL_MPE);
+		vlnctrl &= ~WX_PSR_VLAN_CTL_VFE;
+		/* receive bad packets */
+		wr32m(wx, WX_RSC_CTL,
+		      WX_RSC_CTL_SAVE_MAC_ERR,
+		      WX_RSC_CTL_SAVE_MAC_ERR);
+	} else {
+		vmolr |= WX_PSR_VM_L2CTL_ROPE | WX_PSR_VM_L2CTL_ROMPE;
+	}
+
+	/* Write addresses to available RAR registers, if there is not
+	 * sufficient space to store all the addresses then enable
+	 * unicast promiscuous mode
+	 */
+	count = wx_write_uc_addr_list(netdev, 0);
+	if (count < 0) {
+		vmolr &= ~WX_PSR_VM_L2CTL_ROPE;
+		vmolr |= WX_PSR_VM_L2CTL_UPE;
+	}
+
+	/* Write addresses to the MTA, if the attempt fails
+	 * then we should just turn on promiscuous mode so
+	 * that we can at least receive multicast traffic
+	 */
+	count = wx_write_mc_addr_list(netdev);
+	if (count < 0) {
+		vmolr &= ~WX_PSR_VM_L2CTL_ROMPE;
+		vmolr |= WX_PSR_VM_L2CTL_MPE;
+	}
+
+	wr32(wx, WX_PSR_VLAN_CTL, vlnctrl);
+	wr32(wx, WX_PSR_CTL, fctrl);
+	wr32(wx, WX_PSR_VM_L2CTL(0), vmolr);
+}
+EXPORT_SYMBOL(wx_set_rx_mode);
+
+static void wx_set_rx_buffer_len(struct wx *wx)
+{
+	struct net_device *netdev = wx->netdev;
+	u32 mhadd, max_frame;
+
+	max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
+	/* adjust max frame to be at least the size of a standard frame */
+	if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN))
+		max_frame = (ETH_FRAME_LEN + ETH_FCS_LEN);
+
+	mhadd = rd32(wx, WX_PSR_MAX_SZ);
+	if (max_frame != mhadd)
+		wr32(wx, WX_PSR_MAX_SZ, max_frame);
+}
+
+/* Disable the specified rx queue */
+void wx_disable_rx_queue(struct wx *wx, struct wx_ring *ring)
+{
+	u8 reg_idx = ring->reg_idx;
+	u32 rxdctl;
+	int ret;
+
+	/* write value back with RRCFG.EN bit cleared */
+	wr32m(wx, WX_PX_RR_CFG(reg_idx),
+	      WX_PX_RR_CFG_RR_EN, 0);
+
+	/* the hardware may take up to 100us to really disable the rx queue */
+	ret = read_poll_timeout(rd32, rxdctl, !(rxdctl & WX_PX_RR_CFG_RR_EN),
+				10, 100, true, wx, WX_PX_RR_CFG(reg_idx));
+
+	if (ret == -ETIMEDOUT) {
+		/* Just for information */
+		wx_err(wx,
+		       "RRCFG.EN on Rx queue %d not cleared within the polling period\n",
+		       reg_idx);
+	}
+}
+EXPORT_SYMBOL(wx_disable_rx_queue);
+
+static void wx_enable_rx_queue(struct wx *wx, struct wx_ring *ring)
+{
+	u8 reg_idx = ring->reg_idx;
+	u32 rxdctl;
+	int ret;
+
+	ret = read_poll_timeout(rd32, rxdctl, rxdctl & WX_PX_RR_CFG_RR_EN,
+				1000, 10000, true, wx, WX_PX_RR_CFG(reg_idx));
+
+	if (ret == -ETIMEDOUT) {
+		/* Just for information */
+		wx_err(wx,
+		       "RRCFG.EN on Rx queue %d not set within the polling period\n",
+		       reg_idx);
+	}
+}
+
+static void wx_configure_srrctl(struct wx *wx,
+				struct wx_ring *rx_ring)
+{
+	u16 reg_idx = rx_ring->reg_idx;
+	u32 srrctl;
+
+	srrctl = rd32(wx, WX_PX_RR_CFG(reg_idx));
+	srrctl &= ~(WX_PX_RR_CFG_RR_HDR_SZ |
+		    WX_PX_RR_CFG_RR_BUF_SZ |
+		    WX_PX_RR_CFG_SPLIT_MODE);
+	/* configure header buffer length, needed for RSC */
+	srrctl |= WX_RXBUFFER_256 << WX_PX_RR_CFG_BHDRSIZE_SHIFT;
+
+	/* configure the packet buffer length */
+	srrctl |= WX_RX_BUFSZ >> WX_PX_RR_CFG_BSIZEPKT_SHIFT;
+
+	wr32(wx, WX_PX_RR_CFG(reg_idx), srrctl);
+}
+
+static void wx_configure_tx_ring(struct wx *wx,
+				 struct wx_ring *ring)
+{
+	u32 txdctl = WX_PX_TR_CFG_ENABLE;
+	u8 reg_idx = ring->reg_idx;
+	int ret;
+
+	/* disable queue to avoid issues while updating state */
+	wr32(wx, WX_PX_TR_CFG(reg_idx), WX_PX_TR_CFG_SWFLSH);
+	WX_WRITE_FLUSH(wx);
+
+	/* reset head and tail pointers */
+	wr32(wx, WX_PX_TR_RP(reg_idx), 0);
+	wr32(wx, WX_PX_TR_WP(reg_idx), 0);
+	ring->tail = wx->hw_addr + WX_PX_TR_WP(reg_idx);
+
+	if (ring->count < WX_MAX_TXD)
+		txdctl |= ring->count / 128 << WX_PX_TR_CFG_TR_SIZE_SHIFT;
+	txdctl |= 0x20 << WX_PX_TR_CFG_WTHRESH_SHIFT;
+
+	/* enable queue */
+	wr32(wx, WX_PX_TR_CFG(reg_idx), txdctl);
+
+	/* poll to verify queue is enabled */
+	ret = read_poll_timeout(rd32, txdctl, txdctl & WX_PX_TR_CFG_ENABLE,
+				1000, 10000, true, wx, WX_PX_TR_CFG(reg_idx));
+	if (ret == -ETIMEDOUT)
+		wx_err(wx, "Could not enable Tx Queue %d\n", reg_idx);
+}
+
+static void wx_configure_rx_ring(struct wx *wx,
+				 struct wx_ring *ring)
+{
+	u16 reg_idx = ring->reg_idx;
+	u32 rxdctl;
+
+	/* disable queue to avoid issues while updating state */
+	rxdctl = rd32(wx, WX_PX_RR_CFG(reg_idx));
+	wx_disable_rx_queue(wx, ring);
+
+	if (ring->count == WX_MAX_RXD)
+		rxdctl |= 0 << WX_PX_RR_CFG_RR_SIZE_SHIFT;
+	else
+		rxdctl |= (ring->count / 128) << WX_PX_RR_CFG_RR_SIZE_SHIFT;
+
+	rxdctl |= 0x1 << WX_PX_RR_CFG_RR_THER_SHIFT;
+	wr32(wx, WX_PX_RR_CFG(reg_idx), rxdctl);
+
+	/* reset head and tail pointers */
+	wr32(wx, WX_PX_RR_RP(reg_idx), 0);
+	wr32(wx, WX_PX_RR_WP(reg_idx), 0);
+	ring->tail = wx->hw_addr + WX_PX_RR_WP(reg_idx);
+
+	wx_configure_srrctl(wx, ring);
+
+	/* enable receive descriptor ring */
+	wr32m(wx, WX_PX_RR_CFG(reg_idx),
+	      WX_PX_RR_CFG_RR_EN, WX_PX_RR_CFG_RR_EN);
+
+	wx_enable_rx_queue(wx, ring);
+}
+
+/**
+ * wx_configure_tx - Configure Transmit Unit after Reset
+ * @wx: pointer to private structure
+ *
+ * Configure the Tx unit of the MAC after a reset.
+ **/
+static void wx_configure_tx(struct wx *wx)
+{
+	u32 i;
+
+	/* TDM_CTL.TE must be before Tx queues are enabled */
+	wr32m(wx, WX_TDM_CTL,
+	      WX_TDM_CTL_TE, WX_TDM_CTL_TE);
+
+	/* Setup the HW Tx Head and Tail descriptor pointers */
+	for (i = 0; i < wx->num_tx_queues; i++)
+		wx_configure_tx_ring(wx, wx->tx_ring[i]);
+
+	wr32m(wx, WX_TSC_BUF_AE, WX_TSC_BUF_AE_THR, 0x10);
+
+	if (wx->mac.type == wx_mac_em)
+		wr32m(wx, WX_TSC_CTL, WX_TSC_CTL_TX_DIS | WX_TSC_CTL_TSEC_DIS, 0x1);
+
+	/* enable mac transmitter */
+	wr32m(wx, WX_MAC_TX_CFG,
+	      WX_MAC_TX_CFG_TE, WX_MAC_TX_CFG_TE);
+}
+
+/**
+ * wx_configure_rx - Configure Receive Unit after Reset
+ * @wx: pointer to private structure
+ *
+ * Configure the Rx unit of the MAC after a reset.
+ **/
+static void wx_configure_rx(struct wx *wx)
+{
+	u32 psrtype, i;
+	int ret;
+
+	wx_disable_rx(wx);
+
+	psrtype = WX_RDB_PL_CFG_L4HDR |
+		  WX_RDB_PL_CFG_L3HDR |
+		  WX_RDB_PL_CFG_L2HDR |
+		  WX_RDB_PL_CFG_TUN_TUNHDR |
+		  WX_RDB_PL_CFG_TUN_TUNHDR;
+	wr32(wx, WX_RDB_PL_CFG(0), psrtype);
+
+	/* enable hw crc stripping */
+	wr32m(wx, WX_RSC_CTL, WX_RSC_CTL_CRC_STRIP, WX_RSC_CTL_CRC_STRIP);
+
+	if (wx->mac.type == wx_mac_sp) {
+		u32 psrctl;
+
+		/* RSC Setup */
+		psrctl = rd32(wx, WX_PSR_CTL);
+		psrctl |= WX_PSR_CTL_RSC_ACK; /* Disable RSC for ACK packets */
+		psrctl |= WX_PSR_CTL_RSC_DIS;
+		wr32(wx, WX_PSR_CTL, psrctl);
+	}
+
+	/* set_rx_buffer_len must be called before ring initialization */
+	wx_set_rx_buffer_len(wx);
+
+	/* Setup the HW Rx Head and Tail Descriptor Pointers and
+	 * the Base and Length of the Rx Descriptor Ring
+	 */
+	for (i = 0; i < wx->num_rx_queues; i++)
+		wx_configure_rx_ring(wx, wx->rx_ring[i]);
+
+	/* Enable all receives, disable security engine prior to block traffic */
+	ret = wx_disable_sec_rx_path(wx);
+	if (ret < 0)
+		wx_err(wx, "The register status is abnormal, please check device.");
+
+	wx_enable_rx(wx);
+	wx_enable_sec_rx_path(wx);
+}
+
 static void wx_configure_isb(struct wx *wx)
 {
 	/* set ISB Address */
@@ -893,6 +1481,15 @@ static void wx_configure_isb(struct wx *wx)
 
 void wx_configure(struct wx *wx)
 {
+	wx_set_rxpba(wx);
+	wx_configure_port(wx);
+
+	wx_set_rx_mode(wx->netdev);
+
+	wx_enable_sec_rx_path(wx);
+
+	wx_configure_tx(wx);
+	wx_configure_rx(wx);
 	wx_configure_isb(wx);
 }
 EXPORT_SYMBOL(wx_configure);
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.h b/drivers/net/ethernet/wangxun/libwx/wx_hw.h
index 60bda129cfa6..44dfd6ea442a 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_hw.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.h
@@ -22,6 +22,8 @@ void wx_mac_set_default_filter(struct wx *wx, u8 *addr);
 void wx_flush_sw_mac_table(struct wx *wx);
 int wx_set_mac(struct net_device *netdev, void *p);
 void wx_disable_rx(struct wx *wx);
+void wx_set_rx_mode(struct net_device *netdev);
+void wx_disable_rx_queue(struct wx *wx, struct wx_ring *ring);
 void wx_configure(struct wx *wx);
 int wx_disable_pcie_master(struct wx *wx);
 int wx_stop_adapter(struct wx *wx);
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h
index 7368c681221f..988878ddba47 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_type.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h
@@ -66,6 +66,9 @@
 /* port cfg Registers */
 #define WX_CFG_PORT_CTL              0x14400
 #define WX_CFG_PORT_CTL_DRV_LOAD     BIT(3)
+#define WX_CFG_PORT_CTL_QINQ         BIT(2)
+#define WX_CFG_PORT_CTL_D_VLAN       BIT(0) /* double vlan*/
+#define WX_CFG_TAG_TPID(_i)          (0x14430 + ((_i) * 4))
 
 /* GPIO Registers */
 #define WX_GPIO_DR                   0x14800
@@ -88,15 +91,25 @@
 #define WX_TDM_CTL                   0x18000
 /* TDM CTL BIT */
 #define WX_TDM_CTL_TE                BIT(0) /* Transmit Enable */
+#define WX_TDM_PB_THRE(_i)           (0x18020 + ((_i) * 4))
 
 /***************************** RDB registers *********************************/
 /* receive packet buffer */
 #define WX_RDB_PB_CTL                0x19000
 #define WX_RDB_PB_CTL_RXEN           BIT(31) /* Enable Receiver */
 #define WX_RDB_PB_CTL_DISABLED       BIT(0)
+#define WX_RDB_PB_SZ(_i)             (0x19020 + ((_i) * 4))
+#define WX_RDB_PB_SZ_SHIFT           10
 /* statistic */
 #define WX_RDB_PFCMACDAL             0x19210
 #define WX_RDB_PFCMACDAH             0x19214
+/* ring assignment */
+#define WX_RDB_PL_CFG(_i)            (0x19300 + ((_i) * 4))
+#define WX_RDB_PL_CFG_L4HDR          BIT(1)
+#define WX_RDB_PL_CFG_L3HDR          BIT(2)
+#define WX_RDB_PL_CFG_L2HDR          BIT(3)
+#define WX_RDB_PL_CFG_TUN_TUNHDR     BIT(4)
+#define WX_RDB_PL_CFG_TUN_OUTL2HDR   BIT(5)
 
 /******************************* PSR Registers *******************************/
 /* psr control */
@@ -114,10 +127,24 @@
 #define WX_PSR_CTL_MO_SHIFT          5
 #define WX_PSR_CTL_MO                (0x3 << WX_PSR_CTL_MO_SHIFT)
 #define WX_PSR_CTL_TPE               BIT(4)
+#define WX_PSR_MAX_SZ                0x15020
+#define WX_PSR_VLAN_CTL              0x15088
+#define WX_PSR_VLAN_CTL_CFIEN        BIT(29)  /* bit 29 */
+#define WX_PSR_VLAN_CTL_VFE          BIT(30)  /* bit 30 */
 /* mcasst/ucast overflow tbl */
 #define WX_PSR_MC_TBL(_i)            (0x15200  + ((_i) * 4))
 #define WX_PSR_UC_TBL(_i)            (0x15400 + ((_i) * 4))
 
+/* VM L2 contorl */
+#define WX_PSR_VM_L2CTL(_i)          (0x15600 + ((_i) * 4))
+#define WX_PSR_VM_L2CTL_UPE          BIT(4) /* unicast promiscuous */
+#define WX_PSR_VM_L2CTL_VACC         BIT(6) /* accept nomatched vlan */
+#define WX_PSR_VM_L2CTL_AUPE         BIT(8) /* accept untagged packets */
+#define WX_PSR_VM_L2CTL_ROMPE        BIT(9) /* accept packets in MTA tbl */
+#define WX_PSR_VM_L2CTL_ROPE         BIT(10) /* accept packets in UC tbl */
+#define WX_PSR_VM_L2CTL_BAM          BIT(11) /* accept broadcast packets */
+#define WX_PSR_VM_L2CTL_MPE          BIT(12) /* multicast promiscuous */
+
 /* Management */
 #define WX_PSR_MNG_FLEX_SEL          0x1582C
 #define WX_PSR_MNG_FLEX_DW_L(_i)     (0x15A00 + ((_i) * 16))
@@ -139,6 +166,27 @@
 #define WX_PSR_MAC_SWC_IDX           0x16210
 #define WX_CLEAR_VMDQ_ALL            0xFFFFFFFFU
 
+/********************************* RSEC **************************************/
+/* general rsec */
+#define WX_RSC_CTL                   0x17000
+#define WX_RSC_CTL_SAVE_MAC_ERR      BIT(6)
+#define WX_RSC_CTL_CRC_STRIP         BIT(2)
+#define WX_RSC_CTL_RX_DIS            BIT(1)
+#define WX_RSC_ST                    0x17004
+#define WX_RSC_ST_RSEC_RDY           BIT(0)
+
+/****************************** TDB ******************************************/
+#define WX_TDB_PB_SZ(_i)             (0x1CC00 + ((_i) * 4))
+#define WX_TXPKT_SIZE_MAX            0xA /* Max Tx Packet size */
+
+/****************************** TSEC *****************************************/
+/* Security Control Registers */
+#define WX_TSC_CTL                   0x1D000
+#define WX_TSC_CTL_TX_DIS            BIT(1)
+#define WX_TSC_CTL_TSEC_DIS          BIT(0)
+#define WX_TSC_BUF_AE                0x1D00C
+#define WX_TSC_BUF_AE_THR            GENMASK(9, 0)
+
 /************************************** MNG ********************************/
 #define WX_MNG_SWFW_SYNC             0x1E008
 #define WX_MNG_SWFW_SYNC_SW_MB       BIT(2)
@@ -192,6 +240,10 @@
 #define WX_EM_MAX_EITR               0x00007FFCU
 
 /* transmit DMA Registers */
+#define WX_PX_TR_BAL(_i)             (0x03000 + ((_i) * 0x40))
+#define WX_PX_TR_BAH(_i)             (0x03004 + ((_i) * 0x40))
+#define WX_PX_TR_WP(_i)              (0x03008 + ((_i) * 0x40))
+#define WX_PX_TR_RP(_i)              (0x0300C + ((_i) * 0x40))
 #define WX_PX_TR_CFG(_i)             (0x03010 + ((_i) * 0x40))
 /* Transmit Config masks */
 #define WX_PX_TR_CFG_ENABLE          BIT(0) /* Ena specific Tx Queue */
@@ -201,8 +253,22 @@
 #define WX_PX_TR_CFG_THRE_SHIFT      8
 
 /* Receive DMA Registers */
+#define WX_PX_RR_BAL(_i)             (0x01000 + ((_i) * 0x40))
+#define WX_PX_RR_BAH(_i)             (0x01004 + ((_i) * 0x40))
+#define WX_PX_RR_WP(_i)              (0x01008 + ((_i) * 0x40))
+#define WX_PX_RR_RP(_i)              (0x0100C + ((_i) * 0x40))
 #define WX_PX_RR_CFG(_i)             (0x01010 + ((_i) * 0x40))
 /* PX_RR_CFG bit definitions */
+#define WX_PX_RR_CFG_SPLIT_MODE      BIT(26)
+#define WX_PX_RR_CFG_RR_THER_SHIFT   16
+#define WX_PX_RR_CFG_RR_HDR_SZ       GENMASK(15, 12)
+#define WX_PX_RR_CFG_RR_BUF_SZ       GENMASK(11, 8)
+#define WX_PX_RR_CFG_BHDRSIZE_SHIFT  6 /* 64byte resolution (>> 6)
+					* + at bit 8 offset (<< 12)
+					*  = (<< 6)
+					*/
+#define WX_PX_RR_CFG_BSIZEPKT_SHIFT  2 /* so many KBs */
+#define WX_PX_RR_CFG_RR_SIZE_SHIFT   1
 #define WX_PX_RR_CFG_RR_EN           BIT(0)
 
 /* Number of 80 microseconds we wait for PCI Express master disable */
@@ -230,6 +296,20 @@
 #define WX_MAC_STATE_MODIFIED        0x2
 #define WX_MAC_STATE_IN_USE          0x4
 
+#define WX_MAX_RXD                   8192
+#define WX_MAX_TXD                   8192
+
+/* Supported Rx Buffer Sizes */
+#define WX_RXBUFFER_256      256    /* Used for skb receive header */
+#define WX_RXBUFFER_2K       2048
+#define WX_MAX_RXBUFFER      16384  /* largest size for single descriptor */
+
+#if MAX_SKB_FRAGS < 8
+#define WX_RX_BUFSZ      ALIGN(WX_MAX_RXBUFFER / MAX_SKB_FRAGS, 1024)
+#else
+#define WX_RX_BUFSZ      WX_RXBUFFER_2K
+#endif
+
 #define WX_CFG_PORT_ST               0x14404
 
 /* Host Interface Command Structures */
@@ -307,9 +387,12 @@ struct wx_mac_info {
 	bool set_lben;
 	u8 addr[ETH_ALEN];
 	u8 perm_addr[ETH_ALEN];
+	u32 mta_shadow[128];
 	s32 mc_filter_type;
 	u32 mcft_size;
 	u32 num_rar_entries;
+	u32 rx_pb_size;
+	u32 tx_pb_size;
 	u32 max_tx_queues;
 	u32 max_rx_queues;
 
@@ -365,6 +448,7 @@ struct wx_ring {
 	struct wx_q_vector *q_vector;   /* backpointer to host q_vector */
 	struct net_device *netdev;      /* netdev ring belongs to */
 	struct device *dev;             /* device for DMA mapping */
+	u8 __iomem *tail;
 
 	u16 count;                      /* amount of descriptors */
 
@@ -421,6 +505,7 @@ struct wx {
 	u16 oem_svid;
 	u16 msg_enable;
 	bool adapter_stopped;
+	u16 tpid[8];
 	char eeprom_id[32];
 	enum wx_reset_type reset_type;
 
-- 
2.39.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH net-next v2 05/10] net: libwx: Allocate Rx and Tx resources
  2023-01-31 10:05 [PATCH net-next v2 00/10] Wangxun interrupt and RxTx support Mengyuan Lou
                   ` (3 preceding siblings ...)
  2023-01-31 10:05 ` [PATCH net-next v2 04/10] net: libwx: Configure Rx and Tx unit on hardware Mengyuan Lou
@ 2023-01-31 10:05 ` Mengyuan Lou
  2023-02-02  5:51   ` Jakub Kicinski
  2023-01-31 10:05 ` [PATCH net-next v2 06/10] net: txgbe: Setup Rx and Tx ring Mengyuan Lou
                   ` (4 subsequent siblings)
  9 siblings, 1 reply; 15+ messages in thread
From: Mengyuan Lou @ 2023-01-31 10:05 UTC (permalink / raw)
  To: netdev; +Cc: jiawenwu, Mengyuan Lou

From: Jiawen Wu <jiawenwu@trustnetic.com>

Setup Rx and Tx descriptors for specefic rings.

Signed-off-by: Jiawen Wu <jiawenwu@trustnetic.com>
Signed-off-by: Mengyuan Lou <mengyuanlou@net-swift.com>
---
 drivers/net/ethernet/wangxun/Kconfig         |   1 +
 drivers/net/ethernet/wangxun/libwx/wx_hw.c   |   8 +
 drivers/net/ethernet/wangxun/libwx/wx_lib.c  | 304 +++++++++++++++++++
 drivers/net/ethernet/wangxun/libwx/wx_lib.h  |   2 +
 drivers/net/ethernet/wangxun/libwx/wx_type.h |  74 +++++
 5 files changed, 389 insertions(+)

diff --git a/drivers/net/ethernet/wangxun/Kconfig b/drivers/net/ethernet/wangxun/Kconfig
index 0922beac3ec0..c9d88673d306 100644
--- a/drivers/net/ethernet/wangxun/Kconfig
+++ b/drivers/net/ethernet/wangxun/Kconfig
@@ -18,6 +18,7 @@ if NET_VENDOR_WANGXUN
 
 config LIBWX
 	tristate
+	select PAGE_POOL
 	help
 	Common library for Wangxun(R) Ethernet drivers.
 
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
index 554d0875bc62..a9a6bfff58ef 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
@@ -1335,12 +1335,16 @@ static void wx_configure_tx_ring(struct wx *wx,
 {
 	u32 txdctl = WX_PX_TR_CFG_ENABLE;
 	u8 reg_idx = ring->reg_idx;
+	u64 tdba = ring->dma;
 	int ret;
 
 	/* disable queue to avoid issues while updating state */
 	wr32(wx, WX_PX_TR_CFG(reg_idx), WX_PX_TR_CFG_SWFLSH);
 	WX_WRITE_FLUSH(wx);
 
+	wr32(wx, WX_PX_TR_BAL(reg_idx), tdba & DMA_BIT_MASK(32));
+	wr32(wx, WX_PX_TR_BAH(reg_idx), tdba >> 32);
+
 	/* reset head and tail pointers */
 	wr32(wx, WX_PX_TR_RP(reg_idx), 0);
 	wr32(wx, WX_PX_TR_WP(reg_idx), 0);
@@ -1364,12 +1368,16 @@ static void wx_configure_rx_ring(struct wx *wx,
 				 struct wx_ring *ring)
 {
 	u16 reg_idx = ring->reg_idx;
+	u64 rdba = ring->dma;
 	u32 rxdctl;
 
 	/* disable queue to avoid issues while updating state */
 	rxdctl = rd32(wx, WX_PX_RR_CFG(reg_idx));
 	wx_disable_rx_queue(wx, ring);
 
+	wr32(wx, WX_PX_RR_BAL(reg_idx), rdba & DMA_BIT_MASK(32));
+	wr32(wx, WX_PX_RR_BAH(reg_idx), rdba >> 32);
+
 	if (ring->count == WX_MAX_RXD)
 		rxdctl |= 0 << WX_PX_RR_CFG_RR_SIZE_SHIFT;
 	else
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
index 76711087c184..e4d1feaa1c95 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2019 - 2022 Beijing WangXun Technology Co., Ltd. */
 
 #include <linux/etherdevice.h>
+#include <net/page_pool.h>
 #include <linux/iopoll.h>
 #include <linux/pci.h>
 
@@ -192,6 +193,9 @@ static int wx_alloc_q_vector(struct wx *wx,
 	wx->q_vector[v_idx] = q_vector;
 	q_vector->wx = wx;
 	q_vector->v_idx = v_idx;
+	if (cpu_online(v_idx))
+		q_vector->numa_node = cpu_to_node(v_idx);
+
 
 	/* initialize work limits */
 	q_vector->tx.work_limit = wx->tx_work_limit;
@@ -610,4 +614,304 @@ void wx_configure_vectors(struct wx *wx)
 }
 EXPORT_SYMBOL(wx_configure_vectors);
 
+/**
+ * wx_free_rx_resources - Free Rx Resources
+ * @rx_ring: ring to clean the resources from
+ *
+ * Free all receive software resources
+ **/
+static void wx_free_rx_resources(struct wx_ring *rx_ring)
+{
+	kvfree(rx_ring->rx_buffer_info);
+	rx_ring->rx_buffer_info = NULL;
+
+	/* if not set, then don't free */
+	if (!rx_ring->desc)
+		return;
+
+	dma_free_coherent(rx_ring->dev, rx_ring->size,
+			  rx_ring->desc, rx_ring->dma);
+
+	rx_ring->desc = NULL;
+
+	if (rx_ring->page_pool) {
+		page_pool_destroy(rx_ring->page_pool);
+		rx_ring->page_pool = NULL;
+	}
+}
+
+/**
+ * wx_free_all_rx_resources - Free Rx Resources for All Queues
+ * @wx: pointer to hardware structure
+ *
+ * Free all receive software resources
+ **/
+static void wx_free_all_rx_resources(struct wx *wx)
+{
+	int i;
+
+	for (i = 0; i < wx->num_rx_queues; i++)
+		wx_free_rx_resources(wx->rx_ring[i]);
+}
+
+/**
+ * wx_free_tx_resources - Free Tx Resources per Queue
+ * @tx_ring: Tx descriptor ring for a specific queue
+ *
+ * Free all transmit software resources
+ **/
+static void wx_free_tx_resources(struct wx_ring *tx_ring)
+{
+	kvfree(tx_ring->tx_buffer_info);
+	tx_ring->tx_buffer_info = NULL;
+
+	/* if not set, then don't free */
+	if (!tx_ring->desc)
+		return;
+
+	dma_free_coherent(tx_ring->dev, tx_ring->size,
+			  tx_ring->desc, tx_ring->dma);
+	tx_ring->desc = NULL;
+}
+
+/**
+ * wx_free_all_tx_resources - Free Tx Resources for All Queues
+ * @wx: pointer to hardware structure
+ *
+ * Free all transmit software resources
+ **/
+static void wx_free_all_tx_resources(struct wx *wx)
+{
+	int i;
+
+	for (i = 0; i < wx->num_tx_queues; i++)
+		wx_free_tx_resources(wx->tx_ring[i]);
+}
+
+void wx_free_resources(struct wx *wx)
+{
+	wx_free_isb_resources(wx);
+	wx_free_all_rx_resources(wx);
+	wx_free_all_tx_resources(wx);
+}
+EXPORT_SYMBOL(wx_free_resources);
+
+static int wx_alloc_page_pool(struct wx_ring *rx_ring)
+{
+	int ret = 0;
+
+	struct page_pool_params pp_params = {
+		.flags = PP_FLAG_DMA_MAP | PP_FLAG_DMA_SYNC_DEV,
+		.order = 0,
+		.pool_size = rx_ring->size,
+		.nid = dev_to_node(rx_ring->dev),
+		.dev = rx_ring->dev,
+		.dma_dir = DMA_FROM_DEVICE,
+		.offset = 0,
+		.max_len = PAGE_SIZE,
+	};
+
+	rx_ring->page_pool = page_pool_create(&pp_params);
+	if (IS_ERR(rx_ring->page_pool)) {
+		rx_ring->page_pool = NULL;
+		ret = PTR_ERR(rx_ring->page_pool);
+	}
+
+	return ret;
+}
+
+/**
+ * wx_setup_rx_resources - allocate Rx resources (Descriptors)
+ * @rx_ring: rx descriptor ring (for a specific queue) to setup
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int wx_setup_rx_resources(struct wx_ring *rx_ring)
+{
+	struct device *dev = rx_ring->dev;
+	int orig_node = dev_to_node(dev);
+	int numa_node = NUMA_NO_NODE;
+	int size, ret;
+
+	size = sizeof(struct wx_rx_buffer) * rx_ring->count;
+
+	if (rx_ring->q_vector)
+		numa_node = rx_ring->q_vector->numa_node;
+
+	rx_ring->rx_buffer_info = kvmalloc_node(size, GFP_KERNEL, numa_node);
+	if (!rx_ring->rx_buffer_info)
+		rx_ring->rx_buffer_info = kvmalloc(size, GFP_KERNEL);
+	if (!rx_ring->rx_buffer_info)
+		goto err;
+
+	/* Round up to nearest 4K */
+	rx_ring->size = rx_ring->count * sizeof(union wx_rx_desc);
+	rx_ring->size = ALIGN(rx_ring->size, 4096);
+
+	set_dev_node(dev, numa_node);
+	rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
+					   &rx_ring->dma, GFP_KERNEL);
+	if (!rx_ring->desc) {
+		set_dev_node(dev, orig_node);
+		rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
+						   &rx_ring->dma, GFP_KERNEL);
+	}
+
+	if (!rx_ring->desc)
+		goto err;
+
+	ret = wx_alloc_page_pool(rx_ring);
+	if (ret < 0) {
+		dev_err(rx_ring->dev, "Page pool creation failed: %d\n", ret);
+		goto err;
+	}
+
+	return 0;
+err:
+	kvfree(rx_ring->rx_buffer_info);
+	rx_ring->rx_buffer_info = NULL;
+	dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n");
+	return -ENOMEM;
+}
+
+/**
+ * wx_setup_all_rx_resources - allocate all queues Rx resources
+ * @wx: pointer to hardware structure
+ *
+ * If this function returns with an error, then it's possible one or
+ * more of the rings is populated (while the rest are not).  It is the
+ * callers duty to clean those orphaned rings.
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int wx_setup_all_rx_resources(struct wx *wx)
+{
+	int i, err = 0;
+
+	for (i = 0; i < wx->num_rx_queues; i++) {
+		err = wx_setup_rx_resources(wx->rx_ring[i]);
+		if (!err)
+			continue;
+
+		wx_err(wx, "Allocation for Rx Queue %u failed\n", i);
+		goto err_setup_rx;
+	}
+
+		return 0;
+err_setup_rx:
+	/* rewind the index freeing the rings as we go */
+	while (i--)
+		wx_free_rx_resources(wx->rx_ring[i]);
+	return err;
+}
+
+/**
+ * wx_setup_tx_resources - allocate Tx resources (Descriptors)
+ * @tx_ring: tx descriptor ring (for a specific queue) to setup
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int wx_setup_tx_resources(struct wx_ring *tx_ring)
+{
+	struct device *dev = tx_ring->dev;
+	int orig_node = dev_to_node(dev);
+	int numa_node = NUMA_NO_NODE;
+	int size;
+
+	size = sizeof(struct wx_tx_buffer) * tx_ring->count;
+
+	if (tx_ring->q_vector)
+		numa_node = tx_ring->q_vector->numa_node;
+
+	tx_ring->tx_buffer_info = kvmalloc_node(size, GFP_KERNEL, numa_node);
+	if (!tx_ring->tx_buffer_info)
+		tx_ring->tx_buffer_info = kvmalloc(size, GFP_KERNEL);
+	if (!tx_ring->tx_buffer_info)
+		goto err;
+
+	/* round up to nearest 4K */
+	tx_ring->size = tx_ring->count * sizeof(union wx_tx_desc);
+	tx_ring->size = ALIGN(tx_ring->size, 4096);
+
+	set_dev_node(dev, numa_node);
+	tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
+					   &tx_ring->dma, GFP_KERNEL);
+	if (!tx_ring->desc) {
+		set_dev_node(dev, orig_node);
+		tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
+						   &tx_ring->dma, GFP_KERNEL);
+	}
+
+	if (!tx_ring->desc)
+		goto err;
+
+	return 0;
+
+err:
+	kvfree(tx_ring->tx_buffer_info);
+	tx_ring->tx_buffer_info = NULL;
+	dev_err(dev, "Unable to allocate memory for the Tx descriptor ring\n");
+	return -ENOMEM;
+}
+
+/**
+ * wx_setup_all_tx_resources - allocate all queues Tx resources
+ * @wx: pointer to private structure
+ *
+ * If this function returns with an error, then it's possible one or
+ * more of the rings is populated (while the rest are not).  It is the
+ * callers duty to clean those orphaned rings.
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int wx_setup_all_tx_resources(struct wx *wx)
+{
+	int i, err = 0;
+
+	for (i = 0; i < wx->num_tx_queues; i++) {
+		err = wx_setup_tx_resources(wx->tx_ring[i]);
+		if (!err)
+			continue;
+
+		wx_err(wx, "Allocation for Tx Queue %u failed\n", i);
+		goto err_setup_tx;
+	}
+
+	return 0;
+err_setup_tx:
+	/* rewind the index freeing the rings as we go */
+	while (i--)
+		wx_free_tx_resources(wx->tx_ring[i]);
+	return err;
+}
+
+int wx_setup_resources(struct wx *wx)
+{
+	int err;
+
+	/* allocate transmit descriptors */
+	err = wx_setup_all_tx_resources(wx);
+	if (err)
+		return err;
+
+	/* allocate receive descriptors */
+	err = wx_setup_all_rx_resources(wx);
+	if (err)
+		goto err_free_tx;
+
+	err = wx_setup_isb_resources(wx);
+	if (err)
+		goto err_free_rx;
+
+	return 0;
+
+err_free_rx:
+	wx_free_all_rx_resources(wx);
+err_free_tx:
+	wx_free_all_tx_resources(wx);
+
+	return err;
+}
+EXPORT_SYMBOL(wx_setup_resources);
+
 MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.h b/drivers/net/ethernet/wangxun/libwx/wx_lib.h
index 8ae657155f34..6fa95752fc42 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_lib.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.h
@@ -16,5 +16,7 @@ int wx_setup_isb_resources(struct wx *wx);
 void wx_free_isb_resources(struct wx *wx);
 u32 wx_misc_isb(struct wx *wx, enum wx_isb_idx idx);
 void wx_configure_vectors(struct wx *wx);
+void wx_free_resources(struct wx *wx);
+int wx_setup_resources(struct wx *wx);
 
 #endif /* _NGBE_LIB_H_ */
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h
index 988878ddba47..1863c6cbc6c6 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_type.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h
@@ -432,6 +432,71 @@ enum wx_reset_type {
 	WX_GLOBAL_RESET
 };
 
+/* Transmit Descriptor */
+union wx_tx_desc {
+	struct {
+		__le64 buffer_addr; /* Address of descriptor's data buf */
+		__le32 cmd_type_len;
+		__le32 olinfo_status;
+	} read;
+	struct {
+		__le64 rsvd; /* Reserved */
+		__le32 nxtseq_seed;
+		__le32 status;
+	} wb;
+};
+
+/* Receive Descriptor */
+union wx_rx_desc {
+	struct {
+		__le64 pkt_addr; /* Packet buffer address */
+		__le64 hdr_addr; /* Header buffer address */
+	} read;
+	struct {
+		struct {
+			union {
+				__le32 data;
+				struct {
+					__le16 pkt_info; /* RSS, Pkt type */
+					__le16 hdr_info; /* Splithdr, hdrlen */
+				} hs_rss;
+			} lo_dword;
+			union {
+				__le32 rss; /* RSS Hash */
+				struct {
+					__le16 ip_id; /* IP id */
+					__le16 csum; /* Packet Checksum */
+				} csum_ip;
+			} hi_dword;
+		} lower;
+		struct {
+			__le32 status_error; /* ext status/error */
+			__le16 length; /* Packet length */
+			__le16 vlan; /* VLAN tag */
+		} upper;
+	} wb;  /* writeback */
+};
+
+/* wrapper around a pointer to a socket buffer,
+ * so a DMA handle can be stored along with the buffer
+ */
+struct wx_tx_buffer {
+	union wx_tx_desc *next_to_watch;
+	struct sk_buff *skb;
+	unsigned int bytecount;
+	unsigned short gso_segs;
+	DEFINE_DMA_UNMAP_ADDR(dma);
+	DEFINE_DMA_UNMAP_LEN(len);
+};
+
+struct wx_rx_buffer {
+	struct sk_buff *skb;
+	dma_addr_t dma;
+	dma_addr_t page_dma;
+	struct page *page;
+	unsigned int page_offset;
+};
+
 /* iterator for handling rings in ring container */
 #define wx_for_each_ring(posm, headm) \
 	for (posm = (headm).ring; posm; posm = posm->next)
@@ -448,7 +513,15 @@ struct wx_ring {
 	struct wx_q_vector *q_vector;   /* backpointer to host q_vector */
 	struct net_device *netdev;      /* netdev ring belongs to */
 	struct device *dev;             /* device for DMA mapping */
+	struct page_pool *page_pool;
+	void *desc;                     /* descriptor ring memory */
+	union {
+		struct wx_tx_buffer *tx_buffer_info;
+		struct wx_rx_buffer *rx_buffer_info;
+	};
 	u8 __iomem *tail;
+	dma_addr_t dma;                 /* phys. address of descriptor ring */
+	unsigned int size;              /* length in bytes */
 
 	u16 count;                      /* amount of descriptors */
 
@@ -463,6 +536,7 @@ struct wx_ring {
 struct wx_q_vector {
 	struct wx *wx;
 	int cpu;        /* CPU for DCA */
+	int numa_node;
 	u16 v_idx;      /* index of q_vector within array, also used for
 			 * finding the bit in EICR and friends that
 			 * represents the vector for this ring
-- 
2.39.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH net-next v2 06/10] net: txgbe: Setup Rx and Tx ring
  2023-01-31 10:05 [PATCH net-next v2 00/10] Wangxun interrupt and RxTx support Mengyuan Lou
                   ` (4 preceding siblings ...)
  2023-01-31 10:05 ` [PATCH net-next v2 05/10] net: libwx: Allocate Rx and Tx resources Mengyuan Lou
@ 2023-01-31 10:05 ` Mengyuan Lou
  2023-01-31 10:05 ` [PATCH net-next v2 07/10] net: libwx: Support to receive packets in NAPI Mengyuan Lou
                   ` (3 subsequent siblings)
  9 siblings, 0 replies; 15+ messages in thread
From: Mengyuan Lou @ 2023-01-31 10:05 UTC (permalink / raw)
  To: netdev; +Cc: jiawenwu

From: Jiawen Wu <jiawenwu@trustnetic.com>

Improve the configuration of Rx and Tx ring, set Rx flags and implement
ndo_set_rx_mode ops.

Signed-off-by: Jiawen Wu <jiawenwu@trustnetic.com>
---
 drivers/net/ethernet/wangxun/libwx/wx_type.h  |  1 +
 .../net/ethernet/wangxun/txgbe/txgbe_main.c   | 43 +++++++++++++++++--
 .../net/ethernet/wangxun/txgbe/txgbe_type.h   |  3 ++
 3 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h
index 1863c6cbc6c6..7b247af0ff17 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_type.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h
@@ -200,6 +200,7 @@
 #define WX_MAC_TX_CFG                0x11000
 #define WX_MAC_TX_CFG_TE             BIT(0)
 #define WX_MAC_TX_CFG_SPEED_MASK     GENMASK(30, 29)
+#define WX_MAC_TX_CFG_SPEED_10G      FIELD_PREP(WX_MAC_TX_CFG_SPEED_MASK, 0)
 #define WX_MAC_TX_CFG_SPEED_1G       FIELD_PREP(WX_MAC_TX_CFG_SPEED_MASK, 3)
 #define WX_MAC_RX_CFG                0x11004
 #define WX_MAC_RX_CFG_RE             BIT(0)
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
index 48cca2fb54c7..3b50acb09699 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
@@ -218,6 +218,8 @@ static int txgbe_request_irq(struct wx *wx)
 
 static void txgbe_up_complete(struct wx *wx)
 {
+	u32 reg;
+
 	wx_control_hw(wx, true);
 	wx_configure_vectors(wx);
 
@@ -225,6 +227,15 @@ static void txgbe_up_complete(struct wx *wx)
 	rd32(wx, WX_PX_IC);
 	rd32(wx, WX_PX_MISC_IC);
 	txgbe_irq_enable(wx, true);
+
+	/* Configure MAC Rx and Tx when link is up */
+	reg = rd32(wx, WX_MAC_RX_CFG);
+	wr32(wx, WX_MAC_RX_CFG, reg);
+	wr32(wx, WX_MAC_PKT_FLT, WX_MAC_PKT_FLT_PR);
+	reg = rd32(wx, WX_MAC_WDG_TIMEOUT);
+	wr32(wx, WX_MAC_WDG_TIMEOUT, reg);
+	reg = rd32(wx, WX_MAC_TX_CFG);
+	wr32(wx, WX_MAC_TX_CFG, (reg & ~WX_MAC_TX_CFG_SPEED_MASK) | WX_MAC_TX_CFG_SPEED_10G);
 }
 
 static void txgbe_reset(struct wx *wx)
@@ -246,11 +257,17 @@ static void txgbe_reset(struct wx *wx)
 static void txgbe_disable_device(struct wx *wx)
 {
 	struct net_device *netdev = wx->netdev;
+	u32 i;
 
 	wx_disable_pcie_master(wx);
 	/* disable receives */
 	wx_disable_rx(wx);
 
+	/* disable all enabled rx queues */
+	for (i = 0; i < wx->num_rx_queues; i++)
+		/* this call also flushes the previous write */
+		wx_disable_rx_queue(wx, wx->rx_ring[i]);
+
 	netif_carrier_off(netdev);
 	netif_tx_disable(netdev);
 
@@ -268,6 +285,13 @@ static void txgbe_disable_device(struct wx *wx)
 		wr32m(wx, WX_MAC_TX_CFG, WX_MAC_TX_CFG_TE, 0);
 	}
 
+	/* disable transmits in the hardware now that interrupts are off */
+	for (i = 0; i < wx->num_tx_queues; i++) {
+		u8 reg_idx = wx->tx_ring[i]->reg_idx;
+
+		wr32(wx, WX_PX_TR_CFG(reg_idx), WX_PX_TR_CFG_SWFLSH);
+	}
+
 	/* Disable the Tx DMA engine */
 	wr32m(wx, WX_TDM_CTL, WX_TDM_CTL_TE, 0);
 }
@@ -291,6 +315,8 @@ static int txgbe_sw_init(struct wx *wx)
 	wx->mac.max_tx_queues = TXGBE_SP_MAX_TX_QUEUES;
 	wx->mac.max_rx_queues = TXGBE_SP_MAX_RX_QUEUES;
 	wx->mac.mcft_size = TXGBE_SP_MC_TBL_SIZE;
+	wx->mac.rx_pb_size = TXGBE_SP_RX_PB_SIZE;
+	wx->mac.tx_pb_size = TXGBE_SP_TDB_PB_SZ;
 
 	/* PCI config space info */
 	err = wx_sw_init(wx);
@@ -345,7 +371,7 @@ static int txgbe_open(struct net_device *netdev)
 	struct wx *wx = netdev_priv(netdev);
 	int err;
 
-	err = wx_setup_isb_resources(wx);
+	err = wx_setup_resources(wx);
 	if (err)
 		goto err_reset;
 
@@ -379,7 +405,7 @@ static void txgbe_close_suspend(struct wx *wx)
 	txgbe_disable_device(wx);
 
 	wx_free_irq(wx);
-	wx_free_isb_resources(wx);
+	wx_free_resources(wx);
 }
 
 /**
@@ -399,7 +425,7 @@ static int txgbe_close(struct net_device *netdev)
 
 	txgbe_down(wx);
 	wx_free_irq(wx);
-	wx_free_isb_resources(wx);
+	wx_free_resources(wx);
 	wx_control_hw(wx, false);
 
 	return 0;
@@ -445,6 +471,7 @@ static const struct net_device_ops txgbe_netdev_ops = {
 	.ndo_open               = txgbe_open,
 	.ndo_stop               = txgbe_close,
 	.ndo_start_xmit         = txgbe_xmit_frame,
+	.ndo_set_rx_mode        = wx_set_rx_mode,
 	.ndo_validate_addr      = eth_validate_addr,
 	.ndo_set_mac_address    = wx_set_mac,
 };
@@ -549,6 +576,16 @@ static int txgbe_probe(struct pci_dev *pdev,
 	}
 
 	netdev->features |= NETIF_F_HIGHDMA;
+	netdev->features = NETIF_F_SG;
+
+	/* copy netdev features into list of user selectable features */
+	netdev->hw_features |= netdev->features | NETIF_F_RXALL;
+
+	netdev->priv_flags |= IFF_UNICAST_FLT;
+	netdev->priv_flags |= IFF_SUPP_NOFCS;
+
+	netdev->min_mtu = ETH_MIN_MTU;
+	netdev->max_mtu = TXGBE_MAX_JUMBO_FRAME_SIZE - (ETH_HLEN + ETH_FCS_LEN);
 
 	/* make sure the EEPROM is good */
 	err = txgbe_validate_eeprom_checksum(wx, NULL);
diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
index ec28cf60c91d..563ea51deca6 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_type.h
@@ -77,6 +77,9 @@
 #define TXGBE_SP_MAX_RX_QUEUES  128
 #define TXGBE_SP_RAR_ENTRIES    128
 #define TXGBE_SP_MC_TBL_SIZE    128
+#define TXGBE_SP_RX_PB_SIZE     512
+#define TXGBE_SP_TDB_PB_SZ      (160 * 1024) /* 160KB Packet Buffer */
+#define TXGBE_MAX_JUMBO_FRAME_SIZE      9432 /* max payload 9414 */
 
 /* TX/RX descriptor defines */
 #define TXGBE_DEFAULT_TXD               512
-- 
2.39.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH net-next v2 07/10] net: libwx: Support to receive packets in NAPI
  2023-01-31 10:05 [PATCH net-next v2 00/10] Wangxun interrupt and RxTx support Mengyuan Lou
                   ` (5 preceding siblings ...)
  2023-01-31 10:05 ` [PATCH net-next v2 06/10] net: txgbe: Setup Rx and Tx ring Mengyuan Lou
@ 2023-01-31 10:05 ` Mengyuan Lou
  2023-01-31 10:05 ` [PATCH net-next v2 08/10] net: libwx: Add tx path to process packets Mengyuan Lou
                   ` (2 subsequent siblings)
  9 siblings, 0 replies; 15+ messages in thread
From: Mengyuan Lou @ 2023-01-31 10:05 UTC (permalink / raw)
  To: netdev; +Cc: jiawenwu, Mengyuan Lou

Clean all queues associated with a q_vector, to simple receive packets
without hardware features.

Signed-off-by: Mengyuan Lou <mengyuanlou@net-swift.com>
---
 drivers/net/ethernet/wangxun/libwx/wx_hw.c   |  11 +
 drivers/net/ethernet/wangxun/libwx/wx_lib.c  | 655 ++++++++++++++++++-
 drivers/net/ethernet/wangxun/libwx/wx_lib.h  |   7 +
 drivers/net/ethernet/wangxun/libwx/wx_type.h |  34 +
 4 files changed, 706 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
index a9a6bfff58ef..2d8b5b9323d7 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
@@ -8,6 +8,7 @@
 #include <linux/pci.h>
 
 #include "wx_type.h"
+#include "wx_lib.h"
 #include "wx_hw.h"
 
 static void wx_intr_disable(struct wx *wx, u64 qmask)
@@ -1368,6 +1369,7 @@ static void wx_configure_rx_ring(struct wx *wx,
 				 struct wx_ring *ring)
 {
 	u16 reg_idx = ring->reg_idx;
+	union wx_rx_desc *rx_desc;
 	u64 rdba = ring->dma;
 	u32 rxdctl;
 
@@ -1393,11 +1395,20 @@ static void wx_configure_rx_ring(struct wx *wx,
 
 	wx_configure_srrctl(wx, ring);
 
+	/* initialize rx_buffer_info */
+	memset(ring->rx_buffer_info, 0,
+	       sizeof(struct wx_rx_buffer) * ring->count);
+
+	/* initialize Rx descriptor 0 */
+	rx_desc = WX_RX_DESC(ring, 0);
+	rx_desc->wb.upper.length = 0;
+
 	/* enable receive descriptor ring */
 	wr32m(wx, WX_PX_RR_CFG(reg_idx),
 	      WX_PX_RR_CFG_RR_EN, WX_PX_RR_CFG_RR_EN);
 
 	wx_enable_rx_queue(wx, ring);
+	wx_alloc_rx_buffers(ring, wx_desc_unused(ring));
 }
 
 /**
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
index e4d1feaa1c95..fc72b0222327 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
@@ -8,6 +8,501 @@
 
 #include "wx_type.h"
 #include "wx_lib.h"
+#include "wx_hw.h"
+
+/* wx_test_staterr - tests bits in Rx descriptor status and error fields */
+static __le32 wx_test_staterr(union wx_rx_desc *rx_desc,
+			      const u32 stat_err_bits)
+{
+	return rx_desc->wb.upper.status_error & cpu_to_le32(stat_err_bits);
+}
+
+static bool wx_can_reuse_rx_page(struct wx_rx_buffer *rx_buffer,
+				 int rx_buffer_pgcnt)
+{
+	unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
+	struct page *page = rx_buffer->page;
+
+	/* avoid re-using remote and pfmemalloc pages */
+	if (!dev_page_is_reusable(page))
+		return false;
+
+#if (PAGE_SIZE < 8192)
+	/* if we are only owner of page we can reuse it */
+	if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1))
+		return false;
+#endif
+
+	/* If we have drained the page fragment pool we need to update
+	 * the pagecnt_bias and page count so that we fully restock the
+	 * number of references the driver holds.
+	 */
+	if (unlikely(pagecnt_bias == 1)) {
+		page_ref_add(page, USHRT_MAX - 1);
+		rx_buffer->pagecnt_bias = USHRT_MAX;
+	}
+
+	return true;
+}
+
+/**
+ * wx_reuse_rx_page - page flip buffer and store it back on the ring
+ * @rx_ring: rx descriptor ring to store buffers on
+ * @old_buff: donor buffer to have page reused
+ *
+ * Synchronizes page for reuse by the adapter
+ **/
+static void wx_reuse_rx_page(struct wx_ring *rx_ring,
+			     struct wx_rx_buffer *old_buff)
+{
+	u16 nta = rx_ring->next_to_alloc;
+	struct wx_rx_buffer *new_buff;
+
+	new_buff = &rx_ring->rx_buffer_info[nta];
+
+	/* update, and store next to alloc */
+	nta++;
+	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
+
+	/* transfer page from old buffer to new buffer */
+	new_buff->page = old_buff->page;
+	new_buff->page_dma = old_buff->page_dma;
+	new_buff->page_offset = old_buff->page_offset;
+	new_buff->pagecnt_bias	= old_buff->pagecnt_bias;
+}
+
+static void wx_dma_sync_frag(struct wx_ring *rx_ring,
+			     struct wx_rx_buffer *rx_buffer)
+{
+	struct sk_buff *skb = rx_buffer->skb;
+	skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
+
+	dma_sync_single_range_for_cpu(rx_ring->dev,
+				      WX_CB(skb)->dma,
+				      skb_frag_off(frag),
+				      skb_frag_size(frag),
+				      DMA_FROM_DEVICE);
+
+	/* If the page was released, just unmap it. */
+	if (unlikely(WX_CB(skb)->page_released))
+		page_pool_put_full_page(rx_ring->page_pool, rx_buffer->page, false);
+}
+
+static struct wx_rx_buffer *wx_get_rx_buffer(struct wx_ring *rx_ring,
+					     union wx_rx_desc *rx_desc,
+					     struct sk_buff **skb,
+					     int *rx_buffer_pgcnt)
+{
+	struct wx_rx_buffer *rx_buffer;
+	unsigned int size;
+
+	rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+	size = le16_to_cpu(rx_desc->wb.upper.length);
+
+#if (PAGE_SIZE < 8192)
+	*rx_buffer_pgcnt = page_count(rx_buffer->page);
+#else
+	*rx_buffer_pgcnt = 0;
+#endif
+
+	prefetchw(rx_buffer->page);
+	*skb = rx_buffer->skb;
+
+	/* Delay unmapping of the first packet. It carries the header
+	 * information, HW may still access the header after the writeback.
+	 * Only unmap it when EOP is reached
+	 */
+	if (!wx_test_staterr(rx_desc, WX_RXD_STAT_EOP)) {
+		if (!*skb)
+			goto skip_sync;
+	} else {
+		if (*skb)
+			wx_dma_sync_frag(rx_ring, rx_buffer);
+	}
+
+	/* we are reusing so sync this buffer for CPU use */
+	dma_sync_single_range_for_cpu(rx_ring->dev,
+				      rx_buffer->dma,
+				      rx_buffer->page_offset,
+				      size,
+				      DMA_FROM_DEVICE);
+skip_sync:
+	rx_buffer->pagecnt_bias--;
+
+	return rx_buffer;
+}
+
+static void wx_put_rx_buffer(struct wx_ring *rx_ring,
+			     struct wx_rx_buffer *rx_buffer,
+			     struct sk_buff *skb,
+			     int rx_buffer_pgcnt)
+{
+	if (wx_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) {
+		/* hand second half of page back to the ring */
+		wx_reuse_rx_page(rx_ring, rx_buffer);
+	} else {
+		if (!IS_ERR(skb) && WX_CB(skb)->dma == rx_buffer->dma)
+			/* the page has been released from the ring */
+			WX_CB(skb)->page_released = true;
+		else
+			page_pool_put_full_page(rx_ring->page_pool, rx_buffer->page, false);
+
+		__page_frag_cache_drain(rx_buffer->page,
+					rx_buffer->pagecnt_bias);
+	}
+
+	/* clear contents of rx_buffer */
+	rx_buffer->page = NULL;
+	rx_buffer->skb = NULL;
+}
+
+static struct sk_buff *wx_build_skb(struct wx_ring *rx_ring,
+				    struct wx_rx_buffer *rx_buffer,
+				    union wx_rx_desc *rx_desc)
+{
+	unsigned int size = le16_to_cpu(rx_desc->wb.upper.length);
+#if (PAGE_SIZE < 8192)
+	unsigned int truesize = WX_RX_BUFSZ;
+#else
+	unsigned int truesize = ALIGN(size, L1_CACHE_BYTES);
+#endif
+	struct sk_buff *skb = rx_buffer->skb;
+
+	if (!skb) {
+		void *page_addr = page_address(rx_buffer->page) +
+				  rx_buffer->page_offset;
+
+		/* prefetch first cache line of first page */
+		prefetch(page_addr);
+#if L1_CACHE_BYTES < 128
+		prefetch(page_addr + L1_CACHE_BYTES);
+#endif
+
+		/* allocate a skb to store the frags */
+		skb = napi_alloc_skb(&rx_ring->q_vector->napi, WX_RXBUFFER_256);
+		if (unlikely(!skb))
+			return NULL;
+
+		/* we will be copying header into skb->data in
+		 * pskb_may_pull so it is in our interest to prefetch
+		 * it now to avoid a possible cache miss
+		 */
+		prefetchw(skb->data);
+
+		if (size <= WX_RXBUFFER_256) {
+			memcpy(__skb_put(skb, size), page_addr,
+			       ALIGN(size, sizeof(long)));
+			rx_buffer->pagecnt_bias++;
+
+			return skb;
+		}
+
+		if (!wx_test_staterr(rx_desc, WX_RXD_STAT_EOP))
+			WX_CB(skb)->dma = rx_buffer->dma;
+
+		skb_add_rx_frag(skb, 0, rx_buffer->page,
+				rx_buffer->page_offset,
+				size, truesize);
+		goto out;
+
+	} else {
+		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
+				rx_buffer->page_offset, size, truesize);
+	}
+
+out:
+#if (PAGE_SIZE < 8192)
+	/* flip page offset to other buffer */
+	rx_buffer->page_offset ^= truesize;
+#else
+	/* move offset up to the next cache line */
+	rx_buffer->page_offset += truesize;
+#endif
+
+	return skb;
+}
+
+static bool wx_alloc_mapped_page(struct wx_ring *rx_ring,
+				 struct wx_rx_buffer *bi)
+{
+	struct page *page = bi->page;
+	dma_addr_t dma;
+
+	/* since we are recycling buffers we should seldom need to alloc */
+	if (likely(page))
+		return true;
+
+	page = page_pool_dev_alloc_pages(rx_ring->page_pool);
+	WARN_ON(!page);
+	dma = page_pool_get_dma_addr(page);
+
+	bi->page_dma = dma;
+	bi->page = page;
+	bi->page_offset = 0;
+	page_ref_add(page, USHRT_MAX - 1);
+	bi->pagecnt_bias = USHRT_MAX;
+
+	return true;
+}
+
+/**
+ * wx_alloc_rx_buffers - Replace used receive buffers
+ * @rx_ring: ring to place buffers on
+ * @cleaned_count: number of buffers to replace
+ **/
+void wx_alloc_rx_buffers(struct wx_ring *rx_ring, u16 cleaned_count)
+{
+	u16 i = rx_ring->next_to_use;
+	union wx_rx_desc *rx_desc;
+	struct wx_rx_buffer *bi;
+
+	/* nothing to do */
+	if (!cleaned_count)
+		return;
+
+	rx_desc = WX_RX_DESC(rx_ring, i);
+	bi = &rx_ring->rx_buffer_info[i];
+	i -= rx_ring->count;
+
+	do {
+		if (!wx_alloc_mapped_page(rx_ring, bi))
+			break;
+
+		/* sync the buffer for use by the device */
+		dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
+						 bi->page_offset,
+						 WX_RX_BUFSZ,
+						 DMA_FROM_DEVICE);
+
+		rx_desc->read.pkt_addr =
+			cpu_to_le64(bi->page_dma + bi->page_offset);
+
+		rx_desc++;
+		bi++;
+		i++;
+		if (unlikely(!i)) {
+			rx_desc = WX_RX_DESC(rx_ring, 0);
+			bi = rx_ring->rx_buffer_info;
+			i -= rx_ring->count;
+		}
+
+		/* clear the status bits for the next_to_use descriptor */
+		rx_desc->wb.upper.status_error = 0;
+
+		cleaned_count--;
+	} while (cleaned_count);
+
+	i += rx_ring->count;
+
+	if (rx_ring->next_to_use != i) {
+		rx_ring->next_to_use = i;
+		/* update next to alloc since we have filled the ring */
+		rx_ring->next_to_alloc = i;
+
+		/* Force memory writes to complete before letting h/w
+		 * know there are new descriptors to fetch.  (Only
+		 * applicable for weak-ordered memory model archs,
+		 * such as IA-64).
+		 */
+		wmb();
+		writel(i, rx_ring->tail);
+	}
+}
+
+u16 wx_desc_unused(struct wx_ring *ring)
+{
+	u16 ntc = ring->next_to_clean;
+	u16 ntu = ring->next_to_use;
+
+	return ((ntc > ntu) ? 0 : ring->count) + ntc - ntu - 1;
+}
+
+/**
+ * wx_is_non_eop - process handling of non-EOP buffers
+ * @rx_ring: Rx ring being processed
+ * @rx_desc: Rx descriptor for current buffer
+ * @skb: Current socket buffer containing buffer in progress
+ *
+ * This function updates next to clean. If the buffer is an EOP buffer
+ * this function exits returning false, otherwise it will place the
+ * sk_buff in the next buffer to be chained and return true indicating
+ * that this is in fact a non-EOP buffer.
+ **/
+static bool wx_is_non_eop(struct wx_ring *rx_ring,
+			  union wx_rx_desc *rx_desc,
+			  struct sk_buff *skb)
+{
+	u32 ntc = rx_ring->next_to_clean + 1;
+
+	/* fetch, update, and store next to clean */
+	ntc = (ntc < rx_ring->count) ? ntc : 0;
+	rx_ring->next_to_clean = ntc;
+
+	prefetch(WX_RX_DESC(rx_ring, ntc));
+
+	/* if we are the last buffer then there is nothing else to do */
+	if (likely(wx_test_staterr(rx_desc, WX_RXD_STAT_EOP)))
+		return false;
+
+	rx_ring->rx_buffer_info[ntc].skb = skb;
+
+	return true;
+}
+
+static void wx_pull_tail(struct sk_buff *skb)
+{
+	skb_frag_t *frag = &skb_shinfo(skb)->frags[0];
+	unsigned int pull_len;
+	unsigned char *va;
+
+	/* it is valid to use page_address instead of kmap since we are
+	 * working with pages allocated out of the lomem pool per
+	 * alloc_page(GFP_ATOMIC)
+	 */
+	va = skb_frag_address(frag);
+
+	/* we need the header to contain the greater of either ETH_HLEN or
+	 * 60 bytes if the skb->len is less than 60 for skb_pad.
+	 */
+	pull_len = eth_get_headlen(skb->dev, va, WX_RXBUFFER_256);
+
+	/* align pull length to size of long to optimize memcpy performance */
+	skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long)));
+
+	/* update all of the pointers */
+	skb_frag_size_sub(frag, pull_len);
+	skb_frag_off_add(frag, pull_len);
+	skb->data_len -= pull_len;
+	skb->tail += pull_len;
+}
+
+/**
+ * wx_cleanup_headers - Correct corrupted or empty headers
+ * @rx_ring: rx descriptor ring packet is being transacted on
+ * @rx_desc: pointer to the EOP Rx descriptor
+ * @skb: pointer to current skb being fixed
+ *
+ * Check for corrupted packet headers caused by senders on the local L2
+ * embedded NIC switch not setting up their Tx Descriptors right.  These
+ * should be very rare.
+ *
+ * Also address the case where we are pulling data in on pages only
+ * and as such no data is present in the skb header.
+ *
+ * In addition if skb is not at least 60 bytes we need to pad it so that
+ * it is large enough to qualify as a valid Ethernet frame.
+ *
+ * Returns true if an error was encountered and skb was freed.
+ **/
+static bool wx_cleanup_headers(struct wx_ring *rx_ring,
+			       union wx_rx_desc *rx_desc,
+			       struct sk_buff *skb)
+{
+	struct net_device *netdev = rx_ring->netdev;
+
+	/* verify that the packet does not have any known errors */
+	if (!netdev ||
+	    unlikely(wx_test_staterr(rx_desc, WX_RXD_ERR_RXE) &&
+		     !(netdev->features & NETIF_F_RXALL))) {
+		dev_kfree_skb_any(skb);
+		return true;
+	}
+
+	/* place header in linear portion of buffer */
+	if (!skb_headlen(skb))
+		wx_pull_tail(skb);
+
+	/* if eth_skb_pad returns an error the skb was freed */
+	if (eth_skb_pad(skb))
+		return true;
+
+	return false;
+}
+
+/**
+ * wx_clean_rx_irq - Clean completed descriptors from Rx ring - bounce buf
+ * @q_vector: structure containing interrupt and ring information
+ * @rx_ring: rx descriptor ring to transact packets on
+ * @budget: Total limit on number of packets to process
+ *
+ * This function provides a "bounce buffer" approach to Rx interrupt
+ * processing.  The advantage to this is that on systems that have
+ * expensive overhead for IOMMU access this provides a means of avoiding
+ * it by maintaining the mapping of the page to the system.
+ *
+ * Returns amount of work completed.
+ **/
+static int wx_clean_rx_irq(struct wx_q_vector *q_vector,
+			   struct wx_ring *rx_ring,
+			   int budget)
+{
+	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+	u16 cleaned_count = wx_desc_unused(rx_ring);
+
+	do {
+		struct wx_rx_buffer *rx_buffer;
+		union wx_rx_desc *rx_desc;
+		struct sk_buff *skb;
+		int rx_buffer_pgcnt;
+
+		/* return some buffers to hardware, one at a time is too slow */
+		if (cleaned_count >= WX_RX_BUFFER_WRITE) {
+			wx_alloc_rx_buffers(rx_ring, cleaned_count);
+			cleaned_count = 0;
+		}
+
+		rx_desc = WX_RX_DESC(rx_ring, rx_ring->next_to_clean);
+		if (!wx_test_staterr(rx_desc, WX_RXD_STAT_DD))
+			break;
+
+		/* This memory barrier is needed to keep us from reading
+		 * any other fields out of the rx_desc until we know the
+		 * descriptor has been written back
+		 */
+		dma_rmb();
+
+		rx_buffer = wx_get_rx_buffer(rx_ring, rx_desc, &skb, &rx_buffer_pgcnt);
+
+		/* retrieve a buffer from the ring */
+		skb = wx_build_skb(rx_ring, rx_buffer, rx_desc);
+
+		/* exit if we failed to retrieve a buffer */
+		if (!skb) {
+			rx_buffer->pagecnt_bias++;
+			break;
+		}
+
+		wx_put_rx_buffer(rx_ring, rx_buffer, skb, rx_buffer_pgcnt);
+		cleaned_count++;
+
+		/* place incomplete frames back on ring for completion */
+		if (wx_is_non_eop(rx_ring, rx_desc, skb))
+			continue;
+
+		/* verify the packet layout is correct */
+		if (wx_cleanup_headers(rx_ring, rx_desc, skb))
+			continue;
+
+		/* probably a little skewed due to removing CRC */
+		total_rx_bytes += skb->len;
+
+		skb_record_rx_queue(skb, rx_ring->queue_index);
+		skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+		napi_gro_receive(&q_vector->napi, skb);
+
+		/* update budget accounting */
+		total_rx_packets++;
+	} while (likely(total_rx_packets < budget));
+
+	u64_stats_update_begin(&rx_ring->syncp);
+	rx_ring->stats.packets += total_rx_packets;
+	rx_ring->stats.bytes += total_rx_bytes;
+	u64_stats_update_end(&rx_ring->syncp);
+	q_vector->rx.total_packets += total_rx_packets;
+	q_vector->rx.total_bytes += total_rx_bytes;
+
+	return total_rx_packets;
+}
 
 /**
  * wx_poll - NAPI polling RX/TX cleanup routine
@@ -18,8 +513,68 @@
  **/
 static int wx_poll(struct napi_struct *napi, int budget)
 {
-	return 0;
+	struct wx_q_vector *q_vector = container_of(napi, struct wx_q_vector, napi);
+	int per_ring_budget, work_done = 0;
+	struct wx *wx = q_vector->wx;
+	bool clean_complete = true;
+	struct wx_ring *ring;
+
+	/* Exit if we are called by netpoll */
+	if (budget <= 0)
+		return budget;
+
+	/* attempt to distribute budget to each queue fairly, but don't allow
+	 * the budget to go below 1 because we'll exit polling
+	 */
+	if (q_vector->rx.count > 1)
+		per_ring_budget = max(budget / q_vector->rx.count, 1);
+	else
+		per_ring_budget = budget;
+
+	wx_for_each_ring(ring, q_vector->rx) {
+		int cleaned = wx_clean_rx_irq(q_vector, ring, per_ring_budget);
+
+		work_done += cleaned;
+		if (cleaned >= per_ring_budget)
+			clean_complete = false;
+	}
+
+	/* If all work not completed, return budget and keep polling */
+	if (!clean_complete)
+		return budget;
+
+	/* all work done, exit the polling mode */
+	if (likely(napi_complete_done(napi, work_done))) {
+		if (netif_running(wx->netdev))
+			wx_intr_enable(wx, WX_INTR_Q(q_vector->v_idx));
+	};
+
+	return min(work_done, budget - 1);
+}
+
+void wx_napi_enable_all(struct wx *wx)
+{
+	struct wx_q_vector *q_vector;
+	int q_idx;
+
+	for (q_idx = 0; q_idx < wx->num_q_vectors; q_idx++) {
+		q_vector = wx->q_vector[q_idx];
+		napi_enable(&q_vector->napi);
+	}
+}
+EXPORT_SYMBOL(wx_napi_enable_all);
+
+void wx_napi_disable_all(struct wx *wx)
+{
+	struct wx_q_vector *q_vector;
+	int q_idx;
+
+	for (q_idx = 0; q_idx < wx->num_q_vectors; q_idx++) {
+		q_vector = wx->q_vector[q_idx];
+		napi_disable(&q_vector->napi);
+	}
 }
+EXPORT_SYMBOL(wx_napi_disable_all);
 
 /**
  * wx_set_rss_queues: Allocate queues for RSS
@@ -614,6 +1169,68 @@ void wx_configure_vectors(struct wx *wx)
 }
 EXPORT_SYMBOL(wx_configure_vectors);
 
+/**
+ * wx_clean_rx_ring - Free Rx Buffers per Queue
+ * @rx_ring: ring to free buffers from
+ **/
+static void wx_clean_rx_ring(struct wx_ring *rx_ring)
+{
+	struct wx_rx_buffer *rx_buffer;
+	u16 i = rx_ring->next_to_clean;
+
+	rx_buffer = &rx_ring->rx_buffer_info[i];
+
+	/* Free all the Rx ring sk_buffs */
+	while (i != rx_ring->next_to_alloc) {
+		if (rx_buffer->skb) {
+			struct sk_buff *skb = rx_buffer->skb;
+
+			if (WX_CB(skb)->page_released)
+				page_pool_put_full_page(rx_ring->page_pool, rx_buffer->page, false);
+
+			dev_kfree_skb(skb);
+		}
+
+		/* Invalidate cache lines that may have been written to by
+		 * device so that we avoid corrupting memory.
+		 */
+		dma_sync_single_range_for_cpu(rx_ring->dev,
+					      rx_buffer->dma,
+					      rx_buffer->page_offset,
+					      WX_RX_BUFSZ,
+					      DMA_FROM_DEVICE);
+
+		/* free resources associated with mapping */
+		page_pool_put_full_page(rx_ring->page_pool, rx_buffer->page, false);
+		__page_frag_cache_drain(rx_buffer->page,
+					rx_buffer->pagecnt_bias);
+
+		i++;
+		rx_buffer++;
+		if (i == rx_ring->count) {
+			i = 0;
+			rx_buffer = rx_ring->rx_buffer_info;
+		}
+	}
+
+	rx_ring->next_to_alloc = 0;
+	rx_ring->next_to_clean = 0;
+	rx_ring->next_to_use = 0;
+}
+
+/**
+ * wx_clean_all_rx_rings - Free Rx Buffers for all queues
+ * @wx: board private structure
+ **/
+void wx_clean_all_rx_rings(struct wx *wx)
+{
+	int i;
+
+	for (i = 0; i < wx->num_rx_queues; i++)
+		wx_clean_rx_ring(wx->rx_ring[i]);
+}
+EXPORT_SYMBOL(wx_clean_all_rx_rings);
+
 /**
  * wx_free_rx_resources - Free Rx Resources
  * @rx_ring: ring to clean the resources from
@@ -622,6 +1239,7 @@ EXPORT_SYMBOL(wx_configure_vectors);
  **/
 static void wx_free_rx_resources(struct wx_ring *rx_ring)
 {
+	wx_clean_rx_ring(rx_ring);
 	kvfree(rx_ring->rx_buffer_info);
 	rx_ring->rx_buffer_info = NULL;
 
@@ -760,6 +1378,9 @@ static int wx_setup_rx_resources(struct wx_ring *rx_ring)
 	if (!rx_ring->desc)
 		goto err;
 
+	rx_ring->next_to_clean = 0;
+	rx_ring->next_to_use = 0;
+
 	ret = wx_alloc_page_pool(rx_ring);
 	if (ret < 0) {
 		dev_err(rx_ring->dev, "Page pool creation failed: %d\n", ret);
@@ -914,4 +1535,36 @@ int wx_setup_resources(struct wx *wx)
 }
 EXPORT_SYMBOL(wx_setup_resources);
 
+/**
+ * wx_get_stats64 - Get System Network Statistics
+ * @netdev: network interface device structure
+ * @stats: storage space for 64bit statistics
+ */
+void wx_get_stats64(struct net_device *netdev,
+		    struct rtnl_link_stats64 *stats)
+{
+	struct wx *wx = netdev_priv(netdev);
+	int i;
+
+	rcu_read_lock();
+	for (i = 0; i < wx->num_rx_queues; i++) {
+		struct wx_ring *ring = READ_ONCE(wx->rx_ring[i]);
+		u64 bytes, packets;
+		unsigned int start;
+
+		if (ring) {
+			do {
+				start = u64_stats_fetch_begin(&ring->syncp);
+				packets = ring->stats.packets;
+				bytes   = ring->stats.bytes;
+			} while (u64_stats_fetch_retry(&ring->syncp, start));
+			stats->rx_packets += packets;
+			stats->rx_bytes   += bytes;
+		}
+	}
+
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL(wx_get_stats64);
+
 MODULE_LICENSE("GPL");
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.h b/drivers/net/ethernet/wangxun/libwx/wx_lib.h
index 6fa95752fc42..8fc7f6f3d40e 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_lib.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.h
@@ -7,6 +7,10 @@
 #ifndef _WX_LIB_H_
 #define _WX_LIB_H_
 
+void wx_alloc_rx_buffers(struct wx_ring *rx_ring, u16 cleaned_count);
+u16 wx_desc_unused(struct wx_ring *ring);
+void wx_napi_enable_all(struct wx *wx);
+void wx_napi_disable_all(struct wx *wx);
 void wx_reset_interrupt_capability(struct wx *wx);
 void wx_clear_interrupt_scheme(struct wx *wx);
 int wx_init_interrupt_scheme(struct wx *wx);
@@ -16,7 +20,10 @@ int wx_setup_isb_resources(struct wx *wx);
 void wx_free_isb_resources(struct wx *wx);
 u32 wx_misc_isb(struct wx *wx, enum wx_isb_idx idx);
 void wx_configure_vectors(struct wx *wx);
+void wx_clean_all_rx_rings(struct wx *wx);
 void wx_free_resources(struct wx *wx);
 int wx_setup_resources(struct wx *wx);
+void wx_get_stats64(struct net_device *netdev,
+		    struct rtnl_link_stats64 *stats);
 
 #endif /* _NGBE_LIB_H_ */
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h
index 7b247af0ff17..fd05c0597d94 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_type.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h
@@ -311,8 +311,15 @@
 #define WX_RX_BUFSZ      WX_RXBUFFER_2K
 #endif
 
+#define WX_RX_BUFFER_WRITE   16      /* Must be power of 2 */
 #define WX_CFG_PORT_ST               0x14404
 
+/******************* Receive Descriptor bit definitions **********************/
+#define WX_RXD_STAT_DD               BIT(0) /* Done */
+#define WX_RXD_STAT_EOP              BIT(1) /* End of Packet */
+
+#define WX_RXD_ERR_RXE               BIT(29) /* Any MAC Error */
+
 /* Host Interface Command Structures */
 struct wx_hic_hdr {
 	u8 cmd;
@@ -433,6 +440,15 @@ enum wx_reset_type {
 	WX_GLOBAL_RESET
 };
 
+struct wx_cb {
+	dma_addr_t dma;
+	u16     append_cnt;      /* number of skb's appended */
+	bool    page_released;
+	bool    dma_released;
+};
+
+#define WX_CB(skb) ((struct wx_cb *)(skb)->cb)
+
 /* Transmit Descriptor */
 union wx_tx_desc {
 	struct {
@@ -478,6 +494,9 @@ union wx_rx_desc {
 	} wb;  /* writeback */
 };
 
+#define WX_RX_DESC(R, i)     \
+	(&(((union wx_rx_desc *)((R)->desc))[i]))
+
 /* wrapper around a pointer to a socket buffer,
  * so a DMA handle can be stored along with the buffer
  */
@@ -496,6 +515,12 @@ struct wx_rx_buffer {
 	dma_addr_t page_dma;
 	struct page *page;
 	unsigned int page_offset;
+	u16 pagecnt_bias;
+};
+
+struct wx_queue_stats {
+	u64 packets;
+	u64 bytes;
 };
 
 /* iterator for handling rings in ring container */
@@ -505,6 +530,8 @@ struct wx_rx_buffer {
 struct wx_ring_container {
 	struct wx_ring *ring;           /* pointer to linked list of rings */
 	u16 work_limit;                 /* total work allowed per interrupt */
+	unsigned int total_bytes;       /* total bytes processed this int */
+	unsigned int total_packets;     /* total packets processed this int */
 	u8 count;                       /* total number of rings in vector */
 	u8 itr;                         /* current ITR setting for ring */
 };
@@ -532,6 +559,12 @@ struct wx_ring {
 					 * associated with this ring, which is
 					 * different for DCB and RSS modes
 					 */
+	u16 next_to_use;
+	u16 next_to_clean;
+	u16 next_to_alloc;
+
+	struct wx_queue_stats stats;
+	struct u64_stats_sync syncp;
 } ____cacheline_internodealigned_in_smp;
 
 struct wx_q_vector {
@@ -633,6 +666,7 @@ struct wx {
 };
 
 #define WX_INTR_ALL (~0ULL)
+#define WX_INTR_Q(i) BIT(i)
 
 /* register operations */
 #define wr32(a, reg, value)	writel((value), ((a)->hw_addr + (reg)))
-- 
2.39.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH net-next v2 08/10] net: libwx: Add tx path to process packets
  2023-01-31 10:05 [PATCH net-next v2 00/10] Wangxun interrupt and RxTx support Mengyuan Lou
                   ` (6 preceding siblings ...)
  2023-01-31 10:05 ` [PATCH net-next v2 07/10] net: libwx: Support to receive packets in NAPI Mengyuan Lou
@ 2023-01-31 10:05 ` Mengyuan Lou
  2023-01-31 10:05 ` [PATCH net-next v2 09/10] net: txgbe: Support Rx and Tx process path Mengyuan Lou
  2023-01-31 10:05 ` [PATCH net-next v2 10/10] net: ngbe: " Mengyuan Lou
  9 siblings, 0 replies; 15+ messages in thread
From: Mengyuan Lou @ 2023-01-31 10:05 UTC (permalink / raw)
  To: netdev; +Cc: jiawenwu, Mengyuan Lou

Support to transmit packets without hardware features.

Signed-off-by: Mengyuan Lou <mengyuanlou@net-swift.com>
---
 drivers/net/ethernet/wangxun/libwx/wx_hw.c   |   4 +
 drivers/net/ethernet/wangxun/libwx/wx_lib.c  | 439 +++++++++++++++++++
 drivers/net/ethernet/wangxun/libwx/wx_lib.h  |   3 +
 drivers/net/ethernet/wangxun/libwx/wx_type.h |  19 +
 4 files changed, 465 insertions(+)

diff --git a/drivers/net/ethernet/wangxun/libwx/wx_hw.c b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
index 2d8b5b9323d7..c68dcec218d3 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_hw.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_hw.c
@@ -1355,6 +1355,10 @@ static void wx_configure_tx_ring(struct wx *wx,
 		txdctl |= ring->count / 128 << WX_PX_TR_CFG_TR_SIZE_SHIFT;
 	txdctl |= 0x20 << WX_PX_TR_CFG_WTHRESH_SHIFT;
 
+	/* reinitialize tx_buffer_info */
+	memset(ring->tx_buffer_info, 0,
+	       sizeof(struct wx_tx_buffer) * ring->count);
+
 	/* enable queue */
 	wr32(wx, WX_PX_TR_CFG(reg_idx), txdctl);
 
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.c b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
index fc72b0222327..4f9102e6f725 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_lib.c
+++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.c
@@ -504,6 +504,134 @@ static int wx_clean_rx_irq(struct wx_q_vector *q_vector,
 	return total_rx_packets;
 }
 
+static struct netdev_queue *wx_txring_txq(const struct wx_ring *ring)
+{
+	return netdev_get_tx_queue(ring->netdev, ring->queue_index);
+}
+
+/**
+ * wx_clean_tx_irq - Reclaim resources after transmit completes
+ * @q_vector: structure containing interrupt and ring information
+ * @tx_ring: tx ring to clean
+ * @napi_budget: Used to determine if we are in netpoll
+ **/
+static bool wx_clean_tx_irq(struct wx_q_vector *q_vector,
+			    struct wx_ring *tx_ring, int napi_budget)
+{
+	unsigned int total_bytes = 0, total_packets = 0;
+	unsigned int budget = q_vector->tx.work_limit;
+	unsigned int i = tx_ring->next_to_clean;
+	struct wx_tx_buffer *tx_buffer;
+	union wx_tx_desc *tx_desc;
+
+	if (!netif_carrier_ok(tx_ring->netdev))
+		return true;
+
+	tx_buffer = &tx_ring->tx_buffer_info[i];
+	tx_desc = WX_TX_DESC(tx_ring, i);
+	i -= tx_ring->count;
+
+	do {
+		union wx_tx_desc *eop_desc = tx_buffer->next_to_watch;
+
+		/* if next_to_watch is not set then there is no work pending */
+		if (!eop_desc)
+			break;
+
+		/* prevent any other reads prior to eop_desc */
+		smp_rmb();
+
+		/* if DD is not set pending work has not been completed */
+		if (!(eop_desc->wb.status & cpu_to_le32(WX_TXD_STAT_DD)))
+			break;
+
+		/* clear next_to_watch to prevent false hangs */
+		tx_buffer->next_to_watch = NULL;
+
+		/* update the statistics for this packet */
+		total_bytes += tx_buffer->bytecount;
+		total_packets += tx_buffer->gso_segs;
+
+		/* free the skb */
+		napi_consume_skb(tx_buffer->skb, napi_budget);
+
+		/* unmap skb header data */
+		dma_unmap_single(tx_ring->dev,
+				 dma_unmap_addr(tx_buffer, dma),
+				 dma_unmap_len(tx_buffer, len),
+				 DMA_TO_DEVICE);
+
+		/* clear tx_buffer data */
+		dma_unmap_len_set(tx_buffer, len, 0);
+
+		/* unmap remaining buffers */
+		while (tx_desc != eop_desc) {
+			tx_buffer++;
+			tx_desc++;
+			i++;
+			if (unlikely(!i)) {
+				i -= tx_ring->count;
+				tx_buffer = tx_ring->tx_buffer_info;
+				tx_desc = WX_TX_DESC(tx_ring, 0);
+			}
+
+			/* unmap any remaining paged data */
+			if (dma_unmap_len(tx_buffer, len)) {
+				dma_unmap_page(tx_ring->dev,
+					       dma_unmap_addr(tx_buffer, dma),
+					       dma_unmap_len(tx_buffer, len),
+					       DMA_TO_DEVICE);
+				dma_unmap_len_set(tx_buffer, len, 0);
+			}
+		}
+
+		/* move us one more past the eop_desc for start of next pkt */
+		tx_buffer++;
+		tx_desc++;
+		i++;
+		if (unlikely(!i)) {
+			i -= tx_ring->count;
+			tx_buffer = tx_ring->tx_buffer_info;
+			tx_desc = WX_TX_DESC(tx_ring, 0);
+		}
+
+		/* issue prefetch for next Tx descriptor */
+		prefetch(tx_desc);
+
+		/* update budget accounting */
+		budget--;
+	} while (likely(budget));
+
+	i += tx_ring->count;
+	tx_ring->next_to_clean = i;
+	u64_stats_update_begin(&tx_ring->syncp);
+	tx_ring->stats.bytes += total_bytes;
+	tx_ring->stats.packets += total_packets;
+	u64_stats_update_end(&tx_ring->syncp);
+	q_vector->tx.total_bytes += total_bytes;
+	q_vector->tx.total_packets += total_packets;
+
+	netdev_tx_completed_queue(wx_txring_txq(tx_ring),
+				  total_packets, total_bytes);
+
+#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
+	if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
+		     (wx_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD))) {
+		/* Make sure that anybody stopping the queue after this
+		 * sees the new next_to_clean.
+		 */
+		smp_mb();
+
+		if (__netif_subqueue_stopped(tx_ring->netdev,
+					     tx_ring->queue_index) &&
+		    netif_running(tx_ring->netdev))
+			netif_wake_subqueue(tx_ring->netdev,
+					    tx_ring->queue_index);
+	}
+
+	return !!budget;
+}
+
 /**
  * wx_poll - NAPI polling RX/TX cleanup routine
  * @napi: napi struct with our devices info in it
@@ -519,6 +647,11 @@ static int wx_poll(struct napi_struct *napi, int budget)
 	bool clean_complete = true;
 	struct wx_ring *ring;
 
+	wx_for_each_ring(ring, q_vector->tx) {
+		if (!wx_clean_tx_irq(q_vector, ring, budget))
+			clean_complete = false;
+	}
+
 	/* Exit if we are called by netpoll */
 	if (budget <= 0)
 		return budget;
@@ -552,6 +685,216 @@ static int wx_poll(struct napi_struct *napi, int budget)
 	return min(work_done, budget - 1);
 }
 
+static int wx_maybe_stop_tx(struct wx_ring *tx_ring, u16 size)
+{
+	if (likely(wx_desc_unused(tx_ring) >= size))
+		return 0;
+
+	netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
+
+	/* For the next check */
+	smp_mb();
+
+	/* We need to check again in a case another CPU has just
+	 * made room available.
+	 */
+	if (likely(wx_desc_unused(tx_ring) < size))
+		return -EBUSY;
+
+	/* A reprieve! - use start_queue because it doesn't call schedule */
+	netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
+
+	return 0;
+}
+
+static void wx_tx_map(struct wx_ring *tx_ring,
+		      struct wx_tx_buffer *first)
+{
+	struct sk_buff *skb = first->skb;
+	struct wx_tx_buffer *tx_buffer;
+	u16 i = tx_ring->next_to_use;
+	unsigned int data_len, size;
+	union wx_tx_desc *tx_desc;
+	skb_frag_t *frag;
+	dma_addr_t dma;
+	u32 cmd_type;
+
+	cmd_type = WX_TXD_DTYP_DATA | WX_TXD_IFCS;
+	tx_desc = WX_TX_DESC(tx_ring, i);
+
+	tx_desc->read.olinfo_status = cpu_to_le32(skb->len << WX_TXD_PAYLEN_SHIFT);
+
+	size = skb_headlen(skb);
+	data_len = skb->data_len;
+	dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
+
+	tx_buffer = first;
+
+	for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
+		if (dma_mapping_error(tx_ring->dev, dma))
+			goto dma_error;
+
+		/* record length, and DMA address */
+		dma_unmap_len_set(tx_buffer, len, size);
+		dma_unmap_addr_set(tx_buffer, dma, dma);
+
+		tx_desc->read.buffer_addr = cpu_to_le64(dma);
+
+		while (unlikely(size > WX_MAX_DATA_PER_TXD)) {
+			tx_desc->read.cmd_type_len =
+				cpu_to_le32(cmd_type ^ WX_MAX_DATA_PER_TXD);
+
+			i++;
+			tx_desc++;
+			if (i == tx_ring->count) {
+				tx_desc = WX_TX_DESC(tx_ring, 0);
+				i = 0;
+			}
+			tx_desc->read.olinfo_status = 0;
+
+			dma += WX_MAX_DATA_PER_TXD;
+			size -= WX_MAX_DATA_PER_TXD;
+
+			tx_desc->read.buffer_addr = cpu_to_le64(dma);
+		}
+
+		if (likely(!data_len))
+			break;
+
+		tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size);
+
+		i++;
+		tx_desc++;
+		if (i == tx_ring->count) {
+			tx_desc = WX_TX_DESC(tx_ring, 0);
+			i = 0;
+		}
+		tx_desc->read.olinfo_status = 0;
+
+		size = skb_frag_size(frag);
+
+		data_len -= size;
+
+		dma = skb_frag_dma_map(tx_ring->dev, frag, 0, size,
+				       DMA_TO_DEVICE);
+
+		tx_buffer = &tx_ring->tx_buffer_info[i];
+	}
+
+	/* write last descriptor with RS and EOP bits */
+	cmd_type |= size | WX_TXD_EOP | WX_TXD_RS;
+	tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
+
+	netdev_tx_sent_queue(wx_txring_txq(tx_ring), first->bytecount);
+
+	skb_tx_timestamp(skb);
+
+	/* Force memory writes to complete before letting h/w know there
+	 * are new descriptors to fetch.  (Only applicable for weak-ordered
+	 * memory model archs, such as IA-64).
+	 *
+	 * We also need this memory barrier to make certain all of the
+	 * status bits have been updated before next_to_watch is written.
+	 */
+	wmb();
+
+	/* set next_to_watch value indicating a packet is present */
+	first->next_to_watch = tx_desc;
+
+	i++;
+	if (i == tx_ring->count)
+		i = 0;
+
+	tx_ring->next_to_use = i;
+
+	wx_maybe_stop_tx(tx_ring, DESC_NEEDED);
+
+	if (netif_xmit_stopped(wx_txring_txq(tx_ring)) || !netdev_xmit_more())
+		writel(i, tx_ring->tail);
+
+	return;
+dma_error:
+	dev_err(tx_ring->dev, "TX DMA map failed\n");
+
+	/* clear dma mappings for failed tx_buffer_info map */
+	for (;;) {
+		tx_buffer = &tx_ring->tx_buffer_info[i];
+		if (dma_unmap_len(tx_buffer, len))
+			dma_unmap_page(tx_ring->dev,
+				       dma_unmap_addr(tx_buffer, dma),
+				       dma_unmap_len(tx_buffer, len),
+				       DMA_TO_DEVICE);
+		dma_unmap_len_set(tx_buffer, len, 0);
+		if (tx_buffer == first)
+			break;
+		if (i == 0)
+			i += tx_ring->count;
+		i--;
+	}
+
+	dev_kfree_skb_any(first->skb);
+	first->skb = NULL;
+
+	tx_ring->next_to_use = i;
+}
+
+static netdev_tx_t wx_xmit_frame_ring(struct sk_buff *skb,
+				      struct wx_ring *tx_ring)
+{
+	u16 count = TXD_USE_COUNT(skb_headlen(skb));
+	struct wx_tx_buffer *first;
+	unsigned short f;
+
+	/* need: 1 descriptor per page * PAGE_SIZE/WX_MAX_DATA_PER_TXD,
+	 *       + 1 desc for skb_headlen/WX_MAX_DATA_PER_TXD,
+	 *       + 2 desc gap to keep tail from touching head,
+	 *       + 1 desc for context descriptor,
+	 * otherwise try next time
+	 */
+	for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
+		count += TXD_USE_COUNT(skb_frag_size(&skb_shinfo(skb)->
+						     frags[f]));
+
+	if (wx_maybe_stop_tx(tx_ring, count + 3))
+		return NETDEV_TX_BUSY;
+
+	/* record the location of the first descriptor for this packet */
+	first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
+	first->skb = skb;
+	first->bytecount = skb->len;
+	first->gso_segs = 1;
+
+	wx_tx_map(tx_ring, first);
+
+	return NETDEV_TX_OK;
+}
+
+netdev_tx_t wx_xmit_frame(struct sk_buff *skb,
+			  struct net_device *netdev)
+{
+	unsigned int r_idx = skb->queue_mapping;
+	struct wx *wx = netdev_priv(netdev);
+	struct wx_ring *tx_ring;
+
+	if (!netif_carrier_ok(netdev)) {
+		dev_kfree_skb_any(skb);
+		return NETDEV_TX_OK;
+	}
+
+	/* The minimum packet size for olinfo paylen is 17 so pad the skb
+	 * in order to meet this minimum size requirement.
+	 */
+	if (skb_put_padto(skb, 17))
+		return NETDEV_TX_OK;
+
+	if (r_idx >= wx->num_tx_queues)
+		r_idx = r_idx % wx->num_tx_queues;
+	tx_ring = wx->tx_ring[r_idx];
+
+	return wx_xmit_frame_ring(skb, tx_ring);
+}
+EXPORT_SYMBOL(wx_xmit_frame);
+
 void wx_napi_enable_all(struct wx *wx)
 {
 	struct wx_q_vector *q_vector;
@@ -1272,6 +1615,81 @@ static void wx_free_all_rx_resources(struct wx *wx)
 		wx_free_rx_resources(wx->rx_ring[i]);
 }
 
+/**
+ * wx_clean_tx_ring - Free Tx Buffers
+ * @tx_ring: ring to be cleaned
+ **/
+static void wx_clean_tx_ring(struct wx_ring *tx_ring)
+{
+	struct wx_tx_buffer *tx_buffer;
+	u16 i = tx_ring->next_to_clean;
+
+	tx_buffer = &tx_ring->tx_buffer_info[i];
+
+	while (i != tx_ring->next_to_use) {
+		union wx_tx_desc *eop_desc, *tx_desc;
+
+		/* Free all the Tx ring sk_buffs */
+		dev_kfree_skb_any(tx_buffer->skb);
+
+		/* unmap skb header data */
+		dma_unmap_single(tx_ring->dev,
+				 dma_unmap_addr(tx_buffer, dma),
+				 dma_unmap_len(tx_buffer, len),
+				 DMA_TO_DEVICE);
+
+		/* check for eop_desc to determine the end of the packet */
+		eop_desc = tx_buffer->next_to_watch;
+		tx_desc = WX_TX_DESC(tx_ring, i);
+
+		/* unmap remaining buffers */
+		while (tx_desc != eop_desc) {
+			tx_buffer++;
+			tx_desc++;
+			i++;
+			if (unlikely(i == tx_ring->count)) {
+				i = 0;
+				tx_buffer = tx_ring->tx_buffer_info;
+				tx_desc = WX_TX_DESC(tx_ring, 0);
+			}
+
+			/* unmap any remaining paged data */
+			if (dma_unmap_len(tx_buffer, len))
+				dma_unmap_page(tx_ring->dev,
+					       dma_unmap_addr(tx_buffer, dma),
+					       dma_unmap_len(tx_buffer, len),
+					       DMA_TO_DEVICE);
+		}
+
+		/* move us one more past the eop_desc for start of next pkt */
+		tx_buffer++;
+		i++;
+		if (unlikely(i == tx_ring->count)) {
+			i = 0;
+			tx_buffer = tx_ring->tx_buffer_info;
+		}
+	}
+
+	netdev_tx_reset_queue(wx_txring_txq(tx_ring));
+
+	/* reset next_to_use and next_to_clean */
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+}
+
+/**
+ * wx_clean_all_tx_rings - Free Tx Buffers for all queues
+ * @wx: board private structure
+ **/
+void wx_clean_all_tx_rings(struct wx *wx)
+{
+	int i;
+
+	for (i = 0; i < wx->num_tx_queues; i++)
+		wx_clean_tx_ring(wx->tx_ring[i]);
+}
+EXPORT_SYMBOL(wx_clean_all_tx_rings);
+
 /**
  * wx_free_tx_resources - Free Tx Resources per Queue
  * @tx_ring: Tx descriptor ring for a specific queue
@@ -1280,6 +1698,7 @@ static void wx_free_all_rx_resources(struct wx *wx)
  **/
 static void wx_free_tx_resources(struct wx_ring *tx_ring)
 {
+	wx_clean_tx_ring(tx_ring);
 	kvfree(tx_ring->tx_buffer_info);
 	tx_ring->tx_buffer_info = NULL;
 
@@ -1466,6 +1885,9 @@ static int wx_setup_tx_resources(struct wx_ring *tx_ring)
 	if (!tx_ring->desc)
 		goto err;
 
+	tx_ring->next_to_use = 0;
+	tx_ring->next_to_clean = 0;
+
 	return 0;
 
 err:
@@ -1563,6 +1985,23 @@ void wx_get_stats64(struct net_device *netdev,
 		}
 	}
 
+	for (i = 0; i < wx->num_tx_queues; i++) {
+		struct wx_ring *ring = READ_ONCE(wx->tx_ring[i]);
+		u64 bytes, packets;
+		unsigned int start;
+
+		if (ring) {
+			do {
+				start = u64_stats_fetch_begin(&ring->syncp);
+				packets = ring->stats.packets;
+				bytes   = ring->stats.bytes;
+			} while (u64_stats_fetch_retry(&ring->syncp,
+							   start));
+			stats->tx_packets += packets;
+			stats->tx_bytes   += bytes;
+		}
+	}
+
 	rcu_read_unlock();
 }
 EXPORT_SYMBOL(wx_get_stats64);
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_lib.h b/drivers/net/ethernet/wangxun/libwx/wx_lib.h
index 8fc7f6f3d40e..50ee41f1fa10 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_lib.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_lib.h
@@ -9,6 +9,8 @@
 
 void wx_alloc_rx_buffers(struct wx_ring *rx_ring, u16 cleaned_count);
 u16 wx_desc_unused(struct wx_ring *ring);
+netdev_tx_t wx_xmit_frame(struct sk_buff *skb,
+			  struct net_device *netdev);
 void wx_napi_enable_all(struct wx *wx);
 void wx_napi_disable_all(struct wx *wx);
 void wx_reset_interrupt_capability(struct wx *wx);
@@ -21,6 +23,7 @@ void wx_free_isb_resources(struct wx *wx);
 u32 wx_misc_isb(struct wx *wx, enum wx_isb_idx idx);
 void wx_configure_vectors(struct wx *wx);
 void wx_clean_all_rx_rings(struct wx *wx);
+void wx_clean_all_tx_rings(struct wx *wx);
 void wx_free_resources(struct wx *wx);
 int wx_setup_resources(struct wx *wx);
 void wx_get_stats64(struct net_device *netdev,
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h
index fd05c0597d94..4e988896ad97 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_type.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h
@@ -312,6 +312,15 @@
 #endif
 
 #define WX_RX_BUFFER_WRITE   16      /* Must be power of 2 */
+
+#define WX_MAX_DATA_PER_TXD  BIT(14)
+/* Tx Descriptors needed, worst case */
+#define TXD_USE_COUNT(S)     DIV_ROUND_UP((S), WX_MAX_DATA_PER_TXD)
+#define DESC_NEEDED          (MAX_SKB_FRAGS + 4)
+
+/* Ether Types */
+#define WX_ETH_P_CNM                 0x22E7
+
 #define WX_CFG_PORT_ST               0x14404
 
 /******************* Receive Descriptor bit definitions **********************/
@@ -320,6 +329,14 @@
 
 #define WX_RXD_ERR_RXE               BIT(29) /* Any MAC Error */
 
+/*********************** Transmit Descriptor Config Masks ****************/
+#define WX_TXD_STAT_DD               BIT(0)  /* Descriptor Done */
+#define WX_TXD_DTYP_DATA             0       /* Adv Data Descriptor */
+#define WX_TXD_PAYLEN_SHIFT          13      /* Desc PAYLEN shift */
+#define WX_TXD_EOP                   BIT(24) /* End of Packet */
+#define WX_TXD_IFCS                  BIT(25) /* Insert FCS */
+#define WX_TXD_RS                    BIT(27) /* Report Status */
+
 /* Host Interface Command Structures */
 struct wx_hic_hdr {
 	u8 cmd;
@@ -496,6 +513,8 @@ union wx_rx_desc {
 
 #define WX_RX_DESC(R, i)     \
 	(&(((union wx_rx_desc *)((R)->desc))[i]))
+#define WX_TX_DESC(R, i)     \
+	(&(((union wx_tx_desc *)((R)->desc))[i]))
 
 /* wrapper around a pointer to a socket buffer,
  * so a DMA handle can be stored along with the buffer
-- 
2.39.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH net-next v2 09/10] net: txgbe: Support Rx and Tx process path
  2023-01-31 10:05 [PATCH net-next v2 00/10] Wangxun interrupt and RxTx support Mengyuan Lou
                   ` (7 preceding siblings ...)
  2023-01-31 10:05 ` [PATCH net-next v2 08/10] net: libwx: Add tx path to process packets Mengyuan Lou
@ 2023-01-31 10:05 ` Mengyuan Lou
  2023-01-31 10:05 ` [PATCH net-next v2 10/10] net: ngbe: " Mengyuan Lou
  9 siblings, 0 replies; 15+ messages in thread
From: Mengyuan Lou @ 2023-01-31 10:05 UTC (permalink / raw)
  To: netdev; +Cc: jiawenwu

From: Jiawen Wu <jiawenwu@trustnetic.com>

Clean Rx and Tx ring interrupts, process packets in the data path.

Signed-off-by: Jiawen Wu <jiawenwu@trustnetic.com>
---
 .../net/ethernet/wangxun/txgbe/txgbe_main.c   | 37 ++++++++++++++-----
 1 file changed, 28 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
index 3b50acb09699..094df377726b 100644
--- a/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
+++ b/drivers/net/ethernet/wangxun/txgbe/txgbe_main.c
@@ -223,6 +223,10 @@ static void txgbe_up_complete(struct wx *wx)
 	wx_control_hw(wx, true);
 	wx_configure_vectors(wx);
 
+	/* make sure to complete pre-operations */
+	smp_mb__before_atomic();
+	wx_napi_enable_all(wx);
+
 	/* clear any pending interrupts, may auto mask */
 	rd32(wx, WX_PX_IC);
 	rd32(wx, WX_PX_MISC_IC);
@@ -236,6 +240,10 @@ static void txgbe_up_complete(struct wx *wx)
 	wr32(wx, WX_MAC_WDG_TIMEOUT, reg);
 	reg = rd32(wx, WX_MAC_TX_CFG);
 	wr32(wx, WX_MAC_TX_CFG, (reg & ~WX_MAC_TX_CFG_SPEED_MASK) | WX_MAC_TX_CFG_SPEED_10G);
+
+	/* enable transmits */
+	netif_tx_start_all_queues(wx->netdev);
+	netif_carrier_on(wx->netdev);
 }
 
 static void txgbe_reset(struct wx *wx)
@@ -268,10 +276,12 @@ static void txgbe_disable_device(struct wx *wx)
 		/* this call also flushes the previous write */
 		wx_disable_rx_queue(wx, wx->rx_ring[i]);
 
+	netif_tx_stop_all_queues(netdev);
 	netif_carrier_off(netdev);
 	netif_tx_disable(netdev);
 
 	wx_irq_disable(wx);
+	wx_napi_disable_all(wx);
 
 	if (wx->bus.func < 2)
 		wr32m(wx, TXGBE_MIS_PRB_CTL, TXGBE_MIS_PRB_CTL_LAN_UP(wx->bus.func), 0);
@@ -300,6 +310,9 @@ static void txgbe_down(struct wx *wx)
 {
 	txgbe_disable_device(wx);
 	txgbe_reset(wx);
+
+	wx_clean_all_tx_rings(wx);
+	wx_clean_all_rx_rings(wx);
 }
 
 /**
@@ -381,10 +394,21 @@ static int txgbe_open(struct net_device *netdev)
 	if (err)
 		goto err_free_isb;
 
+	/* Notify the stack of the actual queue counts. */
+	err = netif_set_real_num_tx_queues(netdev, wx->num_tx_queues);
+	if (err)
+		goto err_free_irq;
+
+	err = netif_set_real_num_rx_queues(netdev, wx->num_rx_queues);
+	if (err)
+		goto err_free_irq;
+
 	txgbe_up_complete(wx);
 
 	return 0;
 
+err_free_irq:
+	wx_free_irq(wx);
 err_free_isb:
 	wx_free_isb_resources(wx);
 err_reset:
@@ -403,8 +427,6 @@ static int txgbe_open(struct net_device *netdev)
 static void txgbe_close_suspend(struct wx *wx)
 {
 	txgbe_disable_device(wx);
-
-	wx_free_irq(wx);
 	wx_free_resources(wx);
 }
 
@@ -461,19 +483,14 @@ static void txgbe_shutdown(struct pci_dev *pdev)
 	}
 }
 
-static netdev_tx_t txgbe_xmit_frame(struct sk_buff *skb,
-				    struct net_device *netdev)
-{
-	return NETDEV_TX_OK;
-}
-
 static const struct net_device_ops txgbe_netdev_ops = {
 	.ndo_open               = txgbe_open,
 	.ndo_stop               = txgbe_close,
-	.ndo_start_xmit         = txgbe_xmit_frame,
+	.ndo_start_xmit         = wx_xmit_frame,
 	.ndo_set_rx_mode        = wx_set_rx_mode,
 	.ndo_validate_addr      = eth_validate_addr,
 	.ndo_set_mac_address    = wx_set_mac,
+	.ndo_get_stats64        = wx_get_stats64,
 };
 
 /**
@@ -647,6 +664,8 @@ static int txgbe_probe(struct pci_dev *pdev,
 
 	pci_set_drvdata(pdev, wx);
 
+	netif_tx_stop_all_queues(netdev);
+
 	/* calculate the expected PCIe bandwidth required for optimal
 	 * performance. Note that some older parts will never have enough
 	 * bandwidth due to being older generation PCIe parts. We clamp these
-- 
2.39.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* [PATCH net-next v2 10/10] net: ngbe: Support Rx and Tx process path
  2023-01-31 10:05 [PATCH net-next v2 00/10] Wangxun interrupt and RxTx support Mengyuan Lou
                   ` (8 preceding siblings ...)
  2023-01-31 10:05 ` [PATCH net-next v2 09/10] net: txgbe: Support Rx and Tx process path Mengyuan Lou
@ 2023-01-31 10:05 ` Mengyuan Lou
  9 siblings, 0 replies; 15+ messages in thread
From: Mengyuan Lou @ 2023-01-31 10:05 UTC (permalink / raw)
  To: netdev; +Cc: jiawenwu, Mengyuan Lou

Add enable and disable operation process for ngbe open/close.
Clean Rx and Tx ring interrupts, process packets in the data path.

Signed-off-by: Mengyuan Lou <mengyuanlou@net-swift.com>
---
 drivers/net/ethernet/wangxun/ngbe/ngbe_main.c | 70 +++++++++++++++----
 drivers/net/ethernet/wangxun/ngbe/ngbe_type.h |  4 ++
 2 files changed, 61 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
index a9772f929274..f94d415daf3c 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c
@@ -113,6 +113,9 @@ static int ngbe_sw_init(struct wx *wx)
 	wx->mac.num_rar_entries = NGBE_RAR_ENTRIES;
 	wx->mac.max_rx_queues = NGBE_MAX_RX_QUEUES;
 	wx->mac.max_tx_queues = NGBE_MAX_TX_QUEUES;
+	wx->mac.mcft_size = NGBE_MC_TBL_SIZE;
+	wx->mac.rx_pb_size = NGBE_RX_PB_SIZE;
+	wx->mac.tx_pb_size = NGBE_TDB_PB_SZ;
 
 	/* PCI config space info */
 	err = wx_sw_init(wx);
@@ -307,25 +310,46 @@ static int ngbe_request_irq(struct wx *wx)
 static void ngbe_disable_device(struct wx *wx)
 {
 	struct net_device *netdev = wx->netdev;
+	u32 i;
 
+	/* disable all enabled rx queues */
+	for (i = 0; i < wx->num_rx_queues; i++)
+		/* this call also flushes the previous write */
+		wx_disable_rx_queue(wx, wx->rx_ring[i]);
 	/* disable receives */
 	wx_disable_rx(wx);
+	wx_napi_disable_all(wx);
+	netif_tx_stop_all_queues(netdev);
 	netif_tx_disable(netdev);
 	if (wx->gpio_ctrl)
 		ngbe_sfp_modules_txrx_powerctl(wx, false);
 	wx_irq_disable(wx);
+	/* disable transmits in the hardware now that interrupts are off */
+	for (i = 0; i < wx->num_tx_queues; i++) {
+		u8 reg_idx = wx->tx_ring[i]->reg_idx;
+
+		wr32(wx, WX_PX_TR_CFG(reg_idx), WX_PX_TR_CFG_SWFLSH);
+	}
 }
 
 static void ngbe_down(struct wx *wx)
 {
 	phy_stop(wx->phydev);
 	ngbe_disable_device(wx);
+	wx_clean_all_tx_rings(wx);
+	wx_clean_all_rx_rings(wx);
 }
 
 static void ngbe_up(struct wx *wx)
 {
 	wx_configure_vectors(wx);
 
+	/* make sure to complete pre-operations */
+	smp_mb__before_atomic();
+	wx_napi_enable_all(wx);
+	/* enable transmits */
+	netif_tx_start_all_queues(wx->netdev);
+
 	/* clear any pending interrupts, may auto mask */
 	rd32(wx, WX_PX_IC);
 	rd32(wx, WX_PX_MISC_IC);
@@ -352,7 +376,7 @@ static int ngbe_open(struct net_device *netdev)
 
 	wx_control_hw(wx, true);
 
-	err = wx_setup_isb_resources(wx);
+	err = wx_setup_resources(wx);
 	if (err)
 		return err;
 
@@ -360,16 +384,29 @@ static int ngbe_open(struct net_device *netdev)
 
 	err = ngbe_request_irq(wx);
 	if (err)
-		goto err_req_irq;
+		goto err_free_resources;
 
 	err = ngbe_phy_connect(wx);
 	if (err)
-		return err;
+		goto err_free_irq;
+
+	err = netif_set_real_num_tx_queues(netdev, wx->num_tx_queues);
+	if (err)
+		goto err_dis_phy;
+
+	err = netif_set_real_num_rx_queues(netdev, wx->num_rx_queues);
+	if (err)
+		goto err_dis_phy;
+
 	ngbe_up(wx);
 
 	return 0;
-err_req_irq:
-	wx_free_isb_resources(wx);
+err_dis_phy:
+	phy_disconnect(wx->phydev);
+err_free_irq:
+	wx_free_irq(wx);
+err_free_resources:
+	wx_free_resources(wx);
 	return err;
 }
 
@@ -390,19 +427,13 @@ static int ngbe_close(struct net_device *netdev)
 
 	ngbe_down(wx);
 	wx_free_irq(wx);
-	wx_free_isb_resources(wx);
+	wx_free_resources(wx);
 	phy_disconnect(wx->phydev);
 	wx_control_hw(wx, false);
 
 	return 0;
 }
 
-static netdev_tx_t ngbe_xmit_frame(struct sk_buff *skb,
-				   struct net_device *netdev)
-{
-	return NETDEV_TX_OK;
-}
-
 static void ngbe_dev_shutdown(struct pci_dev *pdev, bool *enable_wake)
 {
 	struct wx *wx = pci_get_drvdata(pdev);
@@ -438,9 +469,11 @@ static void ngbe_shutdown(struct pci_dev *pdev)
 static const struct net_device_ops ngbe_netdev_ops = {
 	.ndo_open               = ngbe_open,
 	.ndo_stop               = ngbe_close,
-	.ndo_start_xmit         = ngbe_xmit_frame,
+	.ndo_start_xmit         = wx_xmit_frame,
+	.ndo_set_rx_mode        = wx_set_rx_mode,
 	.ndo_validate_addr      = eth_validate_addr,
 	.ndo_set_mac_address    = wx_set_mac,
+	.ndo_get_stats64        = wx_get_stats64,
 };
 
 /**
@@ -516,6 +549,17 @@ static int ngbe_probe(struct pci_dev *pdev,
 	netdev->netdev_ops = &ngbe_netdev_ops;
 
 	netdev->features |= NETIF_F_HIGHDMA;
+	netdev->features = NETIF_F_SG;
+
+	/* copy netdev features into list of user selectable features */
+	netdev->hw_features |= netdev->features |
+			       NETIF_F_RXALL;
+
+	netdev->priv_flags |= IFF_UNICAST_FLT;
+	netdev->priv_flags |= IFF_SUPP_NOFCS;
+
+	netdev->min_mtu = ETH_MIN_MTU;
+	netdev->max_mtu = NGBE_MAX_JUMBO_FRAME_SIZE - (ETH_HLEN + ETH_FCS_LEN);
 
 	wx->bd_number = func_nums;
 	/* setup the private structure */
diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
index b60f5f0f64fa..a2351349785e 100644
--- a/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
+++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_type.h
@@ -134,6 +134,10 @@ enum NGBE_MSCA_CMD_value {
 #define NGBE_ETH_LENGTH_OF_ADDRESS		6
 #define NGBE_MAX_MSIX_VECTORS			0x09
 #define NGBE_RAR_ENTRIES			32
+#define NGBE_RX_PB_SIZE				42
+#define NGBE_MC_TBL_SIZE			128
+#define NGBE_TDB_PB_SZ				(20 * 1024) /* 160KB Packet Buffer */
+#define NGBE_MAX_JUMBO_FRAME_SIZE		9432 /* max payload 9414 */
 
 /* TX/RX descriptor defines */
 #define NGBE_DEFAULT_TXD			512 /* default ring size */
-- 
2.39.1


^ permalink raw reply related	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next v2 01/10] net: libwx: Add irq flow functions
  2023-01-31 10:05 ` [PATCH net-next v2 01/10] net: libwx: Add irq flow functions Mengyuan Lou
@ 2023-02-02  5:39   ` Jakub Kicinski
  2023-02-02  5:45   ` Jakub Kicinski
  1 sibling, 0 replies; 15+ messages in thread
From: Jakub Kicinski @ 2023-02-02  5:39 UTC (permalink / raw)
  To: Mengyuan Lou; +Cc: netdev, jiawenwu

On Tue, 31 Jan 2023 18:05:32 +0800 Mengyuan Lou wrote:
> +	if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT))
> +		wr32(wx, WX_PX_ISB_ADDR_H, wx->isb_dma >> 32);

The compilers may still warn about this on 32bit systems,
please use lower_32_bits().

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next v2 01/10] net: libwx: Add irq flow functions
  2023-01-31 10:05 ` [PATCH net-next v2 01/10] net: libwx: Add irq flow functions Mengyuan Lou
  2023-02-02  5:39   ` Jakub Kicinski
@ 2023-02-02  5:45   ` Jakub Kicinski
  1 sibling, 0 replies; 15+ messages in thread
From: Jakub Kicinski @ 2023-02-02  5:45 UTC (permalink / raw)
  To: Mengyuan Lou; +Cc: netdev, jiawenwu

On Tue, 31 Jan 2023 18:05:32 +0800 Mengyuan Lou wrote:
> +	/* initialize work limits */
> +	q_vector->tx.work_limit = wx->tx_work_limit;
> +	q_vector->rx.work_limit = wx->rx_work_limit;

How is the work limit used? 

> +	wx->isb_mem = dma_alloc_coherent(&pdev->dev,
> +					 sizeof(u32) * 4,
> +					 &wx->isb_dma,
> +					 GFP_KERNEL);
> +	if (!wx->isb_mem) {
> +		wx_err(wx, "Alloc isb_mem failed\n");
> +		return -ENOMEM;
> +	}
> +	memset(wx->isb_mem, 0, sizeof(u32) * 4);

unnecessary, dma_alloc_coherent() clears the memory.

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next v2 05/10] net: libwx: Allocate Rx and Tx resources
  2023-01-31 10:05 ` [PATCH net-next v2 05/10] net: libwx: Allocate Rx and Tx resources Mengyuan Lou
@ 2023-02-02  5:51   ` Jakub Kicinski
  2023-02-02 14:32     ` mengyuanlou
  0 siblings, 1 reply; 15+ messages in thread
From: Jakub Kicinski @ 2023-02-02  5:51 UTC (permalink / raw)
  To: Mengyuan Lou; +Cc: netdev, jiawenwu

On Tue, 31 Jan 2023 18:05:36 +0800 Mengyuan Lou wrote:
> +/* Transmit Descriptor */
> +union wx_tx_desc {
> +	struct {
> +		__le64 buffer_addr; /* Address of descriptor's data buf */
> +		__le32 cmd_type_len;
> +		__le32 olinfo_status;
> +	} read;
> +	struct {
> +		__le64 rsvd; /* Reserved */
> +		__le32 nxtseq_seed;
> +		__le32 status;
> +	} wb;
> +};
> +
> +/* Receive Descriptor */
> +union wx_rx_desc {
> +	struct {
> +		__le64 pkt_addr; /* Packet buffer address */
> +		__le64 hdr_addr; /* Header buffer address */
> +	} read;
> +	struct {
> +		struct {
> +			union {
> +				__le32 data;
> +				struct {
> +					__le16 pkt_info; /* RSS, Pkt type */
> +					__le16 hdr_info; /* Splithdr, hdrlen */
> +				} hs_rss;
> +			} lo_dword;
> +			union {
> +				__le32 rss; /* RSS Hash */
> +				struct {
> +					__le16 ip_id; /* IP id */
> +					__le16 csum; /* Packet Checksum */
> +				} csum_ip;
> +			} hi_dword;
> +		} lower;
> +		struct {
> +			__le32 status_error; /* ext status/error */
> +			__le16 length; /* Packet length */
> +			__le16 vlan; /* VLAN tag */
> +		} upper;
> +	} wb;  /* writeback */
> +};

How close of a copy of Intel Niantic is your device?

^ permalink raw reply	[flat|nested] 15+ messages in thread

* Re: [PATCH net-next v2 05/10] net: libwx: Allocate Rx and Tx resources
  2023-02-02  5:51   ` Jakub Kicinski
@ 2023-02-02 14:32     ` mengyuanlou
  0 siblings, 0 replies; 15+ messages in thread
From: mengyuanlou @ 2023-02-02 14:32 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: netdev, Jiawen Wu, 张宇弘, 张玲玲

Our device is a different design from Intel’s product. The protocol of receive/transmit queue between driver and ASIC is similar to Intel 82599. If you are familiar with Intel design, the knowledge could save you a lot of time to understand the operation flows. For example, the receive/transmit descriptors are submitted to the queues. ASIC fetches the descriptors from host memory and processes them. Done bit is set to the writeback descriptor in the same queue. But there are many differences in receive/transmit descriptor definition and register implementation. These differences may need your attention. For example, we have packet type field defined in receive/transmit descriptor. Driver uses the info to get ASIC parse result and deliver host info to ASIC. And comparing with 82599, we have a few different features such as QCN, qinq, vxlan and etc.

> 2023年2月2日 13:51，Jakub Kicinski <kuba@kernel.org> 写道：
> 
> On Tue, 31 Jan 2023 18:05:36 +0800 Mengyuan Lou wrote:
>> +/* Transmit Descriptor */
>> +union wx_tx_desc {
>> +	struct {
>> +		__le64 buffer_addr; /* Address of descriptor's data buf */
>> +		__le32 cmd_type_len;
>> +		__le32 olinfo_status;
>> +	} read;
>> +	struct {
>> +		__le64 rsvd; /* Reserved */
>> +		__le32 nxtseq_seed;
>> +		__le32 status;
>> +	} wb;
>> +};
>> +
>> +/* Receive Descriptor */
>> +union wx_rx_desc {
>> +	struct {
>> +		__le64 pkt_addr; /* Packet buffer address */
>> +		__le64 hdr_addr; /* Header buffer address */
>> +	} read;
>> +	struct {
>> +		struct {
>> +			union {
>> +				__le32 data;
>> +				struct {
>> +					__le16 pkt_info; /* RSS, Pkt type */
>> +					__le16 hdr_info; /* Splithdr, hdrlen */
>> +				} hs_rss;
>> +			} lo_dword;
>> +			union {
>> +				__le32 rss; /* RSS Hash */
>> +				struct {
>> +					__le16 ip_id; /* IP id */
>> +					__le16 csum; /* Packet Checksum */
>> +				} csum_ip;
>> +			} hi_dword;
>> +		} lower;
>> +		struct {
>> +			__le32 status_error; /* ext status/error */
>> +			__le16 length; /* Packet length */
>> +			__le16 vlan; /* VLAN tag */
>> +		} upper;
>> +	} wb;  /* writeback */
>> +};
> 
> How close of a copy of Intel Niantic is your device?
> 

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2023-02-02 14:33 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-01-31 10:05 [PATCH net-next v2 00/10] Wangxun interrupt and RxTx support Mengyuan Lou
2023-01-31 10:05 ` [PATCH net-next v2 01/10] net: libwx: Add irq flow functions Mengyuan Lou
2023-02-02  5:39   ` Jakub Kicinski
2023-02-02  5:45   ` Jakub Kicinski
2023-01-31 10:05 ` [PATCH net-next v2 02/10] net: ngbe: Add irqs request flow Mengyuan Lou
2023-01-31 10:05 ` [PATCH net-next v2 03/10] net: txgbe: Add interrupt support Mengyuan Lou
2023-01-31 10:05 ` [PATCH net-next v2 04/10] net: libwx: Configure Rx and Tx unit on hardware Mengyuan Lou
2023-01-31 10:05 ` [PATCH net-next v2 05/10] net: libwx: Allocate Rx and Tx resources Mengyuan Lou
2023-02-02  5:51   ` Jakub Kicinski
2023-02-02 14:32     ` mengyuanlou
2023-01-31 10:05 ` [PATCH net-next v2 06/10] net: txgbe: Setup Rx and Tx ring Mengyuan Lou
2023-01-31 10:05 ` [PATCH net-next v2 07/10] net: libwx: Support to receive packets in NAPI Mengyuan Lou
2023-01-31 10:05 ` [PATCH net-next v2 08/10] net: libwx: Add tx path to process packets Mengyuan Lou
2023-01-31 10:05 ` [PATCH net-next v2 09/10] net: txgbe: Support Rx and Tx process path Mengyuan Lou
2023-01-31 10:05 ` [PATCH net-next v2 10/10] net: ngbe: " Mengyuan Lou

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.