linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/4] net-next/hinic:replace multiply and division operators
@ 2018-11-20  5:47 Xue Chaojing
  2018-11-20  5:47 ` [PATCH 2/4] net-next/hinic:add rx checksum offload for HiNIC Xue Chaojing
                   ` (3 more replies)
  0 siblings, 4 replies; 9+ messages in thread
From: Xue Chaojing @ 2018-11-20  5:47 UTC (permalink / raw)
  To: davem
  Cc: linux-kernel, xuechaojing, netdev, wulike1, chiqijun, fy.wang,
	tony.qu, luoshaokai

To improve performance, this patch uses bit operations to replace
multiply and division operators.

Signed-off-by: Xue Chaojing <xuechaojing@huawei.com>
---
 .../net/ethernet/huawei/hinic/hinic_hw_wq.c   | 55 ++++++++++++-------
 .../net/ethernet/huawei/hinic/hinic_hw_wq.h   |  3 +-
 2 files changed, 38 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
index f92f1bf3901a..1dfa7eb05c10 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
@@ -74,12 +74,6 @@
 			((void *)((cmdq_pages)->shadow_page_vaddr) \
 				+ (wq)->block_idx * CMDQ_BLOCK_SIZE)
 
-#define WQE_PAGE_OFF(wq, idx)   (((idx) & ((wq)->num_wqebbs_per_page - 1)) * \
-					(wq)->wqebb_size)
-
-#define WQE_PAGE_NUM(wq, idx)   (((idx) / ((wq)->num_wqebbs_per_page)) \
-					& ((wq)->num_q_pages - 1))
-
 #define WQ_PAGE_ADDR(wq, idx)           \
 			((wq)->shadow_block_vaddr[WQE_PAGE_NUM(wq, idx)])
 
@@ -93,6 +87,17 @@
 		(((unsigned long)(wqe) - (unsigned long)(wq)->shadow_wqe) \
 			/ (wq)->max_wqe_size)
 
+static inline int WQE_PAGE_OFF(struct hinic_wq *wq, u16 idx)
+{
+	return (((idx) & ((wq)->num_wqebbs_per_page - 1))
+		<< (wq)->wqebb_size_shift);
+}
+
+static inline int WQE_PAGE_NUM(struct hinic_wq *wq, u16 idx)
+{
+	return (((idx) >> ((wq)->wqebbs_per_page_shift))
+		& ((wq)->num_q_pages - 1));
+}
 /**
  * queue_alloc_page - allocate page for Queue
  * @hwif: HW interface for allocating DMA
@@ -513,10 +518,11 @@ int hinic_wq_allocate(struct hinic_wqs *wqs, struct hinic_wq *wq,
 	struct hinic_hwif *hwif = wqs->hwif;
 	struct pci_dev *pdev = hwif->pdev;
 	u16 num_wqebbs_per_page;
+	u16 wqebb_size_shift;
 	int err;
 
-	if (wqebb_size == 0) {
-		dev_err(&pdev->dev, "wqebb_size must be > 0\n");
+	if (!is_power_of_2(wqebb_size)) {
+		dev_err(&pdev->dev, "wqebb_size must be power of 2\n");
 		return -EINVAL;
 	}
 
@@ -530,9 +536,11 @@ int hinic_wq_allocate(struct hinic_wqs *wqs, struct hinic_wq *wq,
 		return -EINVAL;
 	}
 
-	num_wqebbs_per_page = ALIGN(wq_page_size, wqebb_size) / wqebb_size;
+	wqebb_size_shift = ilog2(wqebb_size);
+	num_wqebbs_per_page = ALIGN(wq_page_size, wqebb_size)
+				>> wqebb_size_shift;
 
-	if (num_wqebbs_per_page & (num_wqebbs_per_page - 1)) {
+	if (!is_power_of_2(num_wqebbs_per_page)) {
 		dev_err(&pdev->dev, "num wqebbs per page must be power of 2\n");
 		return -EINVAL;
 	}
@@ -550,7 +558,8 @@ int hinic_wq_allocate(struct hinic_wqs *wqs, struct hinic_wq *wq,
 	wq->q_depth = q_depth;
 	wq->max_wqe_size = max_wqe_size;
 	wq->num_wqebbs_per_page = num_wqebbs_per_page;
-
+	wq->wqebbs_per_page_shift = ilog2(num_wqebbs_per_page);
+	wq->wqebb_size_shift = wqebb_size_shift;
 	wq->block_vaddr = WQ_BASE_VADDR(wqs, wq);
 	wq->shadow_block_vaddr = WQ_BASE_ADDR(wqs, wq);
 	wq->block_paddr = WQ_BASE_PADDR(wqs, wq);
@@ -604,11 +613,13 @@ int hinic_wqs_cmdq_alloc(struct hinic_cmdq_pages *cmdq_pages,
 			 u16 q_depth, u16 max_wqe_size)
 {
 	struct pci_dev *pdev = hwif->pdev;
+	u16 num_wqebbs_per_page_shift;
 	u16 num_wqebbs_per_page;
+	u16 wqebb_size_shift;
 	int i, j, err = -ENOMEM;
 
-	if (wqebb_size == 0) {
-		dev_err(&pdev->dev, "wqebb_size must be > 0\n");
+	if (!is_power_of_2(wqebb_size)) {
+		dev_err(&pdev->dev, "wqebb_size must be power of 2\n");
 		return -EINVAL;
 	}
 
@@ -622,9 +633,11 @@ int hinic_wqs_cmdq_alloc(struct hinic_cmdq_pages *cmdq_pages,
 		return -EINVAL;
 	}
 
-	num_wqebbs_per_page = ALIGN(wq_page_size, wqebb_size) / wqebb_size;
+	wqebb_size_shift = ilog2(wqebb_size);
+	num_wqebbs_per_page = ALIGN(wq_page_size, wqebb_size)
+				>> wqebb_size_shift;
 
-	if (num_wqebbs_per_page & (num_wqebbs_per_page - 1)) {
+	if (!is_power_of_2(num_wqebbs_per_page)) {
 		dev_err(&pdev->dev, "num wqebbs per page must be power of 2\n");
 		return -EINVAL;
 	}
@@ -636,6 +649,7 @@ int hinic_wqs_cmdq_alloc(struct hinic_cmdq_pages *cmdq_pages,
 		dev_err(&pdev->dev, "Failed to allocate CMDQ page\n");
 		return err;
 	}
+	num_wqebbs_per_page_shift = ilog2(num_wqebbs_per_page);
 
 	for (i = 0; i < cmdq_blocks; i++) {
 		wq[i].hwif = hwif;
@@ -647,7 +661,8 @@ int hinic_wqs_cmdq_alloc(struct hinic_cmdq_pages *cmdq_pages,
 		wq[i].q_depth = q_depth;
 		wq[i].max_wqe_size = max_wqe_size;
 		wq[i].num_wqebbs_per_page = num_wqebbs_per_page;
-
+		wq[i].wqebbs_per_page_shift = num_wqebbs_per_page_shift;
+		wq[i].wqebb_size_shift = wqebb_size_shift;
 		wq[i].block_vaddr = CMDQ_BASE_VADDR(cmdq_pages, &wq[i]);
 		wq[i].shadow_block_vaddr = CMDQ_BASE_ADDR(cmdq_pages, &wq[i]);
 		wq[i].block_paddr = CMDQ_BASE_PADDR(cmdq_pages, &wq[i]);
@@ -741,7 +756,7 @@ struct hinic_hw_wqe *hinic_get_wqe(struct hinic_wq *wq, unsigned int wqe_size,
 
 	*prod_idx = MASKED_WQE_IDX(wq, atomic_read(&wq->prod_idx));
 
-	num_wqebbs = ALIGN(wqe_size, wq->wqebb_size) / wq->wqebb_size;
+	num_wqebbs = ALIGN(wqe_size, wq->wqebb_size) >> wq->wqebb_size_shift;
 
 	if (atomic_sub_return(num_wqebbs, &wq->delta) <= 0) {
 		atomic_add(num_wqebbs, &wq->delta);
@@ -795,7 +810,8 @@ void hinic_return_wqe(struct hinic_wq *wq, unsigned int wqe_size)
  **/
 void hinic_put_wqe(struct hinic_wq *wq, unsigned int wqe_size)
 {
-	int num_wqebbs = ALIGN(wqe_size, wq->wqebb_size) / wq->wqebb_size;
+	int num_wqebbs = ALIGN(wqe_size, wq->wqebb_size)
+			>> wq->wqebb_size_shift;
 
 	atomic_add(num_wqebbs, &wq->cons_idx);
 
@@ -813,7 +829,8 @@ void hinic_put_wqe(struct hinic_wq *wq, unsigned int wqe_size)
 struct hinic_hw_wqe *hinic_read_wqe(struct hinic_wq *wq, unsigned int wqe_size,
 				    u16 *cons_idx)
 {
-	int num_wqebbs = ALIGN(wqe_size, wq->wqebb_size) / wq->wqebb_size;
+	int num_wqebbs = ALIGN(wqe_size, wq->wqebb_size)
+			>> wq->wqebb_size_shift;
 	u16 curr_cons_idx, end_cons_idx;
 	int curr_pg, end_pg;
 
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h
index 9b66545ba563..0a936cd6709b 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h
@@ -39,7 +39,8 @@ struct hinic_wq {
 	u16             q_depth;
 	u16             max_wqe_size;
 	u16             num_wqebbs_per_page;
-
+	u16		wqebbs_per_page_shift;
+	u16		wqebb_size_shift;
 	/* The addresses are 64 bit in the HW */
 	u64             block_paddr;
 	void            **shadow_block_vaddr;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 2/4] net-next/hinic:add rx checksum offload for HiNIC
  2018-11-20  5:47 [PATCH 1/4] net-next/hinic:replace multiply and division operators Xue Chaojing
@ 2018-11-20  5:47 ` Xue Chaojing
  2018-11-20 18:38   ` David Miller
  2018-11-20  5:47 ` [PATCH 3/4] net-next/hinic:fix a bug in set mac address Xue Chaojing
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 9+ messages in thread
From: Xue Chaojing @ 2018-11-20  5:47 UTC (permalink / raw)
  To: davem
  Cc: linux-kernel, xuechaojing, netdev, wulike1, chiqijun, fy.wang,
	tony.qu, luoshaokai

In order to improve performance, this patch adds rx checksum offload
for the HiNIC driver. Performance test(Iperf) shows more than 80%
improvement in TCP streams.

Signed-off-by: Xue Chaojing <xuechaojing@huawei.com>
---
 .../net/ethernet/huawei/hinic/hinic_hw_dev.h  |  2 ++
 .../net/ethernet/huawei/hinic/hinic_hw_wqe.h  |  4 +++
 .../net/ethernet/huawei/hinic/hinic_main.c    |  7 ++++-
 .../net/ethernet/huawei/hinic/hinic_port.c    | 28 +++++++++++++++++++
 .../net/ethernet/huawei/hinic/hinic_port.h    | 10 +++++++
 drivers/net/ethernet/huawei/hinic/hinic_rx.c  | 24 ++++++++++++++++
 drivers/net/ethernet/huawei/hinic/hinic_rx.h  |  4 +++
 7 files changed, 78 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
index 097b5502603f..d1a7d2522d82 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
@@ -50,6 +50,8 @@ enum hinic_port_cmd {
 
 	HINIC_PORT_CMD_GET_LINK_STATE   = 24,
 
+	HINIC_PORT_CMD_SET_RX_CSUM	= 26,
+
 	HINIC_PORT_CMD_SET_PORT_STATE   = 41,
 
 	HINIC_PORT_CMD_FWCTXT_INIT      = 69,
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h
index 9754d6ed5f4a..138941527872 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h
@@ -170,6 +170,10 @@
 
 #define HINIC_RQ_CQE_STATUS_RXDONE_MASK         0x1
 
+#define HINIC_RQ_CQE_STATUS_CSUM_ERR_SHIFT	0
+
+#define HINIC_RQ_CQE_STATUS_CSUM_ERR_MASK	0xFFFFU
+
 #define HINIC_RQ_CQE_STATUS_GET(val, member)    \
 		(((val) >> HINIC_RQ_CQE_STATUS_##member##_SHIFT) & \
 		 HINIC_RQ_CQE_STATUS_##member##_MASK)
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
index fdf2bdb6b0d0..c675077f9774 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
@@ -806,7 +806,8 @@ static const struct net_device_ops hinic_netdev_ops = {
 static void netdev_features_init(struct net_device *netdev)
 {
 	netdev->hw_features = NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_IP_CSUM |
-			      NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6;
+			      NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6 |
+			      NETIF_F_RXCSUM;
 
 	netdev->vlan_features = netdev->hw_features;
 
@@ -869,12 +870,16 @@ static int set_features(struct hinic_dev *nic_dev,
 			netdev_features_t features, bool force_change)
 {
 	netdev_features_t changed = force_change ? ~0 : pre_features ^ features;
+	u32 csum_en = HINIC_RX_CSUM_OFFLOAD_EN;
 	int err = 0;
 
 	if (changed & NETIF_F_TSO)
 		err = hinic_port_set_tso(nic_dev, (features & NETIF_F_TSO) ?
 					 HINIC_TSO_ENABLE : HINIC_TSO_DISABLE);
 
+	if (changed & NETIF_F_RXCSUM)
+		err = hinic_set_rx_csum_offload(nic_dev, csum_en);
+
 	return err;
 }
 
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.c b/drivers/net/ethernet/huawei/hinic/hinic_port.c
index 7575a7d3bd9f..e9f76e904610 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_port.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_port.c
@@ -409,3 +409,31 @@ int hinic_port_set_tso(struct hinic_dev *nic_dev, enum hinic_tso_state state)
 
 	return 0;
 }
+
+int hinic_set_rx_csum_offload(struct hinic_dev *nic_dev, u32 en)
+{
+	struct hinic_checksum_offload rx_csum_cfg = {0};
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	u16 out_size;
+	int err;
+
+	if (!hwdev)
+		return -EINVAL;
+
+	rx_csum_cfg.func_id = HINIC_HWIF_FUNC_IDX(hwif);
+	rx_csum_cfg.rx_csum_offload = en;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_RX_CSUM,
+				 &rx_csum_cfg, sizeof(rx_csum_cfg),
+				 &rx_csum_cfg, &out_size);
+	if (err || !out_size || rx_csum_cfg.status) {
+		dev_err(&pdev->dev,
+			"Failed to set rx csum offload, ret = %d\n",
+			rx_csum_cfg.status);
+		return -EINVAL;
+	}
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.h b/drivers/net/ethernet/huawei/hinic/hinic_port.h
index f6e3220fe28f..02d896eed455 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_port.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_port.h
@@ -183,6 +183,15 @@ struct hinic_tso_config {
 	u8	resv2[3];
 };
 
+struct hinic_checksum_offload {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	u16	func_id;
+	u16	rsvd1;
+	u32	rx_csum_offload;
+};
 int hinic_port_add_mac(struct hinic_dev *nic_dev, const u8 *addr,
 		       u16 vlan_id);
 
@@ -213,4 +222,5 @@ int hinic_port_get_cap(struct hinic_dev *nic_dev,
 
 int hinic_port_set_tso(struct hinic_dev *nic_dev, enum hinic_tso_state state);
 
+int hinic_set_rx_csum_offload(struct hinic_dev *nic_dev, u32 en);
 #endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
index 4c0f7eda1166..93e8f207f6da 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
@@ -89,6 +89,28 @@ static void rxq_stats_init(struct hinic_rxq *rxq)
 	hinic_rxq_clean_stats(rxq);
 }
 
+static void rx_csum(struct hinic_rxq *rxq, u16 cons_idx,
+		    struct sk_buff *skb)
+{
+	struct net_device *netdev = rxq->netdev;
+	struct hinic_rq_cqe *cqe;
+	struct hinic_rq *rq;
+	u32 csum_err;
+	u32 status;
+
+	rq = rxq->rq;
+	cqe = rq->cqe[cons_idx];
+	status = be32_to_cpu(cqe->status);
+	csum_err = HINIC_RQ_CQE_STATUS_GET(status, CSUM_ERR);
+
+	if (!(netdev->features & NETIF_F_RXCSUM))
+		return;
+
+	if (!csum_err)
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	else
+		skb->ip_summed = CHECKSUM_NONE;
+}
 /**
  * rx_alloc_skb - allocate skb and map it to dma address
  * @rxq: rx queue
@@ -328,6 +350,8 @@ static int rxq_recv(struct hinic_rxq *rxq, int budget)
 
 		rx_unmap_skb(rxq, hinic_sge_to_dma(&sge));
 
+		rx_csum(rxq, ci, skb);
+
 		prefetch(skb->data);
 
 		pkt_len = sge.len;
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.h b/drivers/net/ethernet/huawei/hinic/hinic_rx.h
index 27c9af4b1c12..ab3fabab91b2 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_rx.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.h
@@ -23,6 +23,10 @@
 
 #include "hinic_hw_qp.h"
 
+#define HINIC_RX_CSUM_OFFLOAD_EN	0xFFF
+#define HINIC_RX_CSUM_HW_CHECK_NONE	BIT(7)
+#define HINIC_RX_CSUM_IPSU_OTHER_ERR	BIT(8)
+
 struct hinic_rxq_stats {
 	u64                     pkts;
 	u64                     bytes;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 3/4] net-next/hinic:fix a bug in set mac address
  2018-11-20  5:47 [PATCH 1/4] net-next/hinic:replace multiply and division operators Xue Chaojing
  2018-11-20  5:47 ` [PATCH 2/4] net-next/hinic:add rx checksum offload for HiNIC Xue Chaojing
@ 2018-11-20  5:47 ` Xue Chaojing
  2018-11-20 18:38   ` David Miller
  2018-11-20  5:47 ` [PATCH 4/4] net-next/hinic: fix a bug in rx data flow Xue Chaojing
  2018-11-20 18:38 ` [PATCH 1/4] net-next/hinic:replace multiply and division operators David Miller
  3 siblings, 1 reply; 9+ messages in thread
From: Xue Chaojing @ 2018-11-20  5:47 UTC (permalink / raw)
  To: davem
  Cc: linux-kernel, xuechaojing, netdev, wulike1, chiqijun, fy.wang,
	tony.qu, luoshaokai

In add_mac_addr(), if the MAC address is a muliticast address,
it will not be set, which causes the network card fail to receive
the multicast packet. This patch fixes this bug.

Signed-off-by: Xue Chaojing <xuechaojing@huawei.com>
---
 drivers/net/ethernet/huawei/hinic/hinic_main.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
index c675077f9774..6d48dc62a44b 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
@@ -600,9 +600,6 @@ static int add_mac_addr(struct net_device *netdev, const u8 *addr)
 	u16 vid = 0;
 	int err;
 
-	if (!is_valid_ether_addr(addr))
-		return -EADDRNOTAVAIL;
-
 	netif_info(nic_dev, drv, netdev, "set mac addr = %02x %02x %02x %02x %02x %02x\n",
 		   addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]);
 
@@ -726,6 +723,7 @@ static void set_rx_mode(struct work_struct *work)
 {
 	struct hinic_rx_mode_work *rx_mode_work = work_to_rx_mode_work(work);
 	struct hinic_dev *nic_dev = rx_mode_work_to_nic_dev(rx_mode_work);
+	struct netdev_hw_addr *ha;
 
 	netif_info(nic_dev, drv, nic_dev->netdev, "set rx mode work\n");
 
@@ -733,6 +731,9 @@ static void set_rx_mode(struct work_struct *work)
 
 	__dev_uc_sync(nic_dev->netdev, add_mac_addr, remove_mac_addr);
 	__dev_mc_sync(nic_dev->netdev, add_mac_addr, remove_mac_addr);
+
+	netdev_for_each_mc_addr(ha, nic_dev->netdev)
+		add_mac_addr(nic_dev->netdev, ha->addr);
 }
 
 static void hinic_set_rx_mode(struct net_device *netdev)
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH 4/4] net-next/hinic: fix a bug in rx data flow
  2018-11-20  5:47 [PATCH 1/4] net-next/hinic:replace multiply and division operators Xue Chaojing
  2018-11-20  5:47 ` [PATCH 2/4] net-next/hinic:add rx checksum offload for HiNIC Xue Chaojing
  2018-11-20  5:47 ` [PATCH 3/4] net-next/hinic:fix a bug in set mac address Xue Chaojing
@ 2018-11-20  5:47 ` Xue Chaojing
  2018-11-20 18:38   ` David Miller
  2018-11-20 18:38 ` [PATCH 1/4] net-next/hinic:replace multiply and division operators David Miller
  3 siblings, 1 reply; 9+ messages in thread
From: Xue Chaojing @ 2018-11-20  5:47 UTC (permalink / raw)
  To: davem
  Cc: linux-kernel, xuechaojing, netdev, wulike1, chiqijun, fy.wang,
	tony.qu, luoshaokai

In rx_alloc_pkts(), there is a loop call of tasklet, which causes
100% cpu utilization, even no packets are being received. This patch
fixes this bug.

Signed-off-by: Xue Chaojing <xuechaojing@huawei.com>
---
 drivers/net/ethernet/huawei/hinic/hinic_rx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
index 93e8f207f6da..f86f2e693224 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
@@ -229,9 +229,9 @@ static int rx_alloc_pkts(struct hinic_rxq *rxq)
 		wmb();  /* write all the wqes before update PI */
 
 		hinic_rq_update(rxq->rq, prod_idx);
+		tasklet_schedule(&rxq->rx_task);
 	}
 
-	tasklet_schedule(&rxq->rx_task);
 	return i;
 }
 
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH 1/4] net-next/hinic:replace multiply and division operators
  2018-11-20  5:47 [PATCH 1/4] net-next/hinic:replace multiply and division operators Xue Chaojing
                   ` (2 preceding siblings ...)
  2018-11-20  5:47 ` [PATCH 4/4] net-next/hinic: fix a bug in rx data flow Xue Chaojing
@ 2018-11-20 18:38 ` David Miller
  3 siblings, 0 replies; 9+ messages in thread
From: David Miller @ 2018-11-20 18:38 UTC (permalink / raw)
  To: xuechaojing
  Cc: linux-kernel, netdev, wulike1, chiqijun, fy.wang, tony.qu, luoshaokai

From: Xue Chaojing <xuechaojing@huawei.com>
Date: Tue, 20 Nov 2018 05:47:31 +0000

> To improve performance, this patch uses bit operations to replace
> multiply and division operators.
> 
> Signed-off-by: Xue Chaojing <xuechaojing@huawei.com>

Applied.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 3/4] net-next/hinic:fix a bug in set mac address
  2018-11-20  5:47 ` [PATCH 3/4] net-next/hinic:fix a bug in set mac address Xue Chaojing
@ 2018-11-20 18:38   ` David Miller
  0 siblings, 0 replies; 9+ messages in thread
From: David Miller @ 2018-11-20 18:38 UTC (permalink / raw)
  To: xuechaojing
  Cc: linux-kernel, netdev, wulike1, chiqijun, fy.wang, tony.qu, luoshaokai

From: Xue Chaojing <xuechaojing@huawei.com>
Date: Tue, 20 Nov 2018 05:47:33 +0000

> In add_mac_addr(), if the MAC address is a muliticast address,
> it will not be set, which causes the network card fail to receive
> the multicast packet. This patch fixes this bug.
> 
> Signed-off-by: Xue Chaojing <xuechaojing@huawei.com>

Applied.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 2/4] net-next/hinic:add rx checksum offload for HiNIC
  2018-11-20  5:47 ` [PATCH 2/4] net-next/hinic:add rx checksum offload for HiNIC Xue Chaojing
@ 2018-11-20 18:38   ` David Miller
  0 siblings, 0 replies; 9+ messages in thread
From: David Miller @ 2018-11-20 18:38 UTC (permalink / raw)
  To: xuechaojing
  Cc: linux-kernel, netdev, wulike1, chiqijun, fy.wang, tony.qu, luoshaokai

From: Xue Chaojing <xuechaojing@huawei.com>
Date: Tue, 20 Nov 2018 05:47:32 +0000

> In order to improve performance, this patch adds rx checksum offload
> for the HiNIC driver. Performance test(Iperf) shows more than 80%
> improvement in TCP streams.
> 
> Signed-off-by: Xue Chaojing <xuechaojing@huawei.com>

Applied.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH 4/4] net-next/hinic: fix a bug in rx data flow
  2018-11-20  5:47 ` [PATCH 4/4] net-next/hinic: fix a bug in rx data flow Xue Chaojing
@ 2018-11-20 18:38   ` David Miller
  0 siblings, 0 replies; 9+ messages in thread
From: David Miller @ 2018-11-20 18:38 UTC (permalink / raw)
  To: xuechaojing
  Cc: linux-kernel, netdev, wulike1, chiqijun, fy.wang, tony.qu, luoshaokai

From: Xue Chaojing <xuechaojing@huawei.com>
Date: Tue, 20 Nov 2018 05:47:34 +0000

> In rx_alloc_pkts(), there is a loop call of tasklet, which causes
> 100% cpu utilization, even no packets are being received. This patch
> fixes this bug.
> 
> Signed-off-by: Xue Chaojing <xuechaojing@huawei.com>

Applied.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH 2/4] net-next/hinic:add rx checksum offload for HiNIC
  2018-11-19  6:12 Xue Chaojing
@ 2018-11-19  6:12 ` Xue Chaojing
  0 siblings, 0 replies; 9+ messages in thread
From: Xue Chaojing @ 2018-11-19  6:12 UTC (permalink / raw)
  To: davem
  Cc: linux-kernel, xuechaojing, netdev, wulike1, chiqijun, fy.wang,
	tony.qu, luoshaokai

In order to improve performance, this patch adds rx checksum offload
for the HiNIC driver. Performance test(Iperf) shows more than 80%
improvement in TCP streams.

Signed-off-by: Xue Chaojing <xuechaojing@huawei.com>
---
 .../net/ethernet/huawei/hinic/hinic_hw_dev.h  |  2 ++
 .../net/ethernet/huawei/hinic/hinic_hw_wqe.h  |  4 +++
 .../net/ethernet/huawei/hinic/hinic_main.c    |  8 +++++-
 .../net/ethernet/huawei/hinic/hinic_port.c    | 28 +++++++++++++++++++
 .../net/ethernet/huawei/hinic/hinic_port.h    | 10 +++++++
 drivers/net/ethernet/huawei/hinic/hinic_rx.c  | 24 ++++++++++++++++
 drivers/net/ethernet/huawei/hinic/hinic_rx.h  |  4 +++
 7 files changed, 79 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
index 097b5502603f..d1a7d2522d82 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
@@ -50,6 +50,8 @@ enum hinic_port_cmd {
 
 	HINIC_PORT_CMD_GET_LINK_STATE   = 24,
 
+	HINIC_PORT_CMD_SET_RX_CSUM	= 26,
+
 	HINIC_PORT_CMD_SET_PORT_STATE   = 41,
 
 	HINIC_PORT_CMD_FWCTXT_INIT      = 69,
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h
index 9754d6ed5f4a..138941527872 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wqe.h
@@ -170,6 +170,10 @@
 
 #define HINIC_RQ_CQE_STATUS_RXDONE_MASK         0x1
 
+#define HINIC_RQ_CQE_STATUS_CSUM_ERR_SHIFT	0
+
+#define HINIC_RQ_CQE_STATUS_CSUM_ERR_MASK	0xFFFFU
+
 #define HINIC_RQ_CQE_STATUS_GET(val, member)    \
 		(((val) >> HINIC_RQ_CQE_STATUS_##member##_SHIFT) & \
 		 HINIC_RQ_CQE_STATUS_##member##_MASK)
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
index fdf2bdb6b0d0..0179b6bf124c 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
@@ -726,6 +726,7 @@ static void set_rx_mode(struct work_struct *work)
 {
 	struct hinic_rx_mode_work *rx_mode_work = work_to_rx_mode_work(work);
 	struct hinic_dev *nic_dev = rx_mode_work_to_nic_dev(rx_mode_work);
+	struct netdev_hw_addr *ha;
 
 	netif_info(nic_dev, drv, nic_dev->netdev, "set rx mode work\n");
 
@@ -806,7 +807,8 @@ static const struct net_device_ops hinic_netdev_ops = {
 static void netdev_features_init(struct net_device *netdev)
 {
 	netdev->hw_features = NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_IP_CSUM |
-			      NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6;
+			      NETIF_F_IPV6_CSUM | NETIF_F_TSO | NETIF_F_TSO6 |
+			      NETIF_F_RXCSUM;
 
 	netdev->vlan_features = netdev->hw_features;
 
@@ -869,12 +871,16 @@ static int set_features(struct hinic_dev *nic_dev,
 			netdev_features_t features, bool force_change)
 {
 	netdev_features_t changed = force_change ? ~0 : pre_features ^ features;
+	u32 csum_en = HINIC_RX_CSUM_OFFLOAD_EN;
 	int err = 0;
 
 	if (changed & NETIF_F_TSO)
 		err = hinic_port_set_tso(nic_dev, (features & NETIF_F_TSO) ?
 					 HINIC_TSO_ENABLE : HINIC_TSO_DISABLE);
 
+	if (changed & NETIF_F_RXCSUM)
+		err = hinic_set_rx_csum_offload(nic_dev, csum_en);
+
 	return err;
 }
 
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.c b/drivers/net/ethernet/huawei/hinic/hinic_port.c
index 7575a7d3bd9f..e9f76e904610 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_port.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_port.c
@@ -409,3 +409,31 @@ int hinic_port_set_tso(struct hinic_dev *nic_dev, enum hinic_tso_state state)
 
 	return 0;
 }
+
+int hinic_set_rx_csum_offload(struct hinic_dev *nic_dev, u32 en)
+{
+	struct hinic_checksum_offload rx_csum_cfg = {0};
+	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	struct hinic_hwif *hwif = hwdev->hwif;
+	struct pci_dev *pdev = hwif->pdev;
+	u16 out_size;
+	int err;
+
+	if (!hwdev)
+		return -EINVAL;
+
+	rx_csum_cfg.func_id = HINIC_HWIF_FUNC_IDX(hwif);
+	rx_csum_cfg.rx_csum_offload = en;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_RX_CSUM,
+				 &rx_csum_cfg, sizeof(rx_csum_cfg),
+				 &rx_csum_cfg, &out_size);
+	if (err || !out_size || rx_csum_cfg.status) {
+		dev_err(&pdev->dev,
+			"Failed to set rx csum offload, ret = %d\n",
+			rx_csum_cfg.status);
+		return -EINVAL;
+	}
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.h b/drivers/net/ethernet/huawei/hinic/hinic_port.h
index f6e3220fe28f..02d896eed455 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_port.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_port.h
@@ -183,6 +183,15 @@ struct hinic_tso_config {
 	u8	resv2[3];
 };
 
+struct hinic_checksum_offload {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	u16	func_id;
+	u16	rsvd1;
+	u32	rx_csum_offload;
+};
 int hinic_port_add_mac(struct hinic_dev *nic_dev, const u8 *addr,
 		       u16 vlan_id);
 
@@ -213,4 +222,5 @@ int hinic_port_get_cap(struct hinic_dev *nic_dev,
 
 int hinic_port_set_tso(struct hinic_dev *nic_dev, enum hinic_tso_state state);
 
+int hinic_set_rx_csum_offload(struct hinic_dev *nic_dev, u32 en);
 #endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
index 4c0f7eda1166..93e8f207f6da 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
@@ -89,6 +89,28 @@ static void rxq_stats_init(struct hinic_rxq *rxq)
 	hinic_rxq_clean_stats(rxq);
 }
 
+static void rx_csum(struct hinic_rxq *rxq, u16 cons_idx,
+		    struct sk_buff *skb)
+{
+	struct net_device *netdev = rxq->netdev;
+	struct hinic_rq_cqe *cqe;
+	struct hinic_rq *rq;
+	u32 csum_err;
+	u32 status;
+
+	rq = rxq->rq;
+	cqe = rq->cqe[cons_idx];
+	status = be32_to_cpu(cqe->status);
+	csum_err = HINIC_RQ_CQE_STATUS_GET(status, CSUM_ERR);
+
+	if (!(netdev->features & NETIF_F_RXCSUM))
+		return;
+
+	if (!csum_err)
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+	else
+		skb->ip_summed = CHECKSUM_NONE;
+}
 /**
  * rx_alloc_skb - allocate skb and map it to dma address
  * @rxq: rx queue
@@ -328,6 +350,8 @@ static int rxq_recv(struct hinic_rxq *rxq, int budget)
 
 		rx_unmap_skb(rxq, hinic_sge_to_dma(&sge));
 
+		rx_csum(rxq, ci, skb);
+
 		prefetch(skb->data);
 
 		pkt_len = sge.len;
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.h b/drivers/net/ethernet/huawei/hinic/hinic_rx.h
index 27c9af4b1c12..ab3fabab91b2 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_rx.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.h
@@ -23,6 +23,10 @@
 
 #include "hinic_hw_qp.h"
 
+#define HINIC_RX_CSUM_OFFLOAD_EN	0xFFF
+#define HINIC_RX_CSUM_HW_CHECK_NONE	BIT(7)
+#define HINIC_RX_CSUM_IPSU_OTHER_ERR	BIT(8)
+
 struct hinic_rxq_stats {
 	u64                     pkts;
 	u64                     bytes;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2018-11-20 18:38 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-11-20  5:47 [PATCH 1/4] net-next/hinic:replace multiply and division operators Xue Chaojing
2018-11-20  5:47 ` [PATCH 2/4] net-next/hinic:add rx checksum offload for HiNIC Xue Chaojing
2018-11-20 18:38   ` David Miller
2018-11-20  5:47 ` [PATCH 3/4] net-next/hinic:fix a bug in set mac address Xue Chaojing
2018-11-20 18:38   ` David Miller
2018-11-20  5:47 ` [PATCH 4/4] net-next/hinic: fix a bug in rx data flow Xue Chaojing
2018-11-20 18:38   ` David Miller
2018-11-20 18:38 ` [PATCH 1/4] net-next/hinic:replace multiply and division operators David Miller
  -- strict thread matches above, loose matches on Subject: below --
2018-11-19  6:12 Xue Chaojing
2018-11-19  6:12 ` [PATCH 2/4] net-next/hinic:add rx checksum offload for HiNIC Xue Chaojing

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).