All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net-next 0/6] ionic Rx updates
@ 2021-03-10 19:26 Shannon Nelson
  2021-03-10 19:26 ` [PATCH net-next 1/6] ionic: move rx_page_alloc and free Shannon Nelson
                   ` (6 more replies)
  0 siblings, 7 replies; 10+ messages in thread
From: Shannon Nelson @ 2021-03-10 19:26 UTC (permalink / raw)
  To: netdev, davem, kuba; +Cc: drivers, Shannon Nelson

The ionic driver's Rx path is due for an overhaul in order to
better use memory buffers and to clean up the data structures.

The first two patches convert the driver to using page sharing
between buffers so as to lessen the  page alloc and free overhead.

The remaining patches clean up the structs and fastpath code for
better efficency.

Shannon Nelson (6):
  ionic: move rx_page_alloc and free
  ionic: implement Rx page reuse
  ionic: optimize fastpath struct usage
  ionic: simplify rx skb alloc
  ionic: rebuild debugfs on qcq swap
  ionic: simplify use of completion types

 .../net/ethernet/pensando/ionic/ionic_dev.c   |   4 +-
 .../net/ethernet/pensando/ionic/ionic_dev.h   |  19 +-
 .../net/ethernet/pensando/ionic/ionic_lif.c   |   6 +-
 .../net/ethernet/pensando/ionic/ionic_lif.h   |  22 +-
 .../net/ethernet/pensando/ionic/ionic_main.c  |   4 +-
 .../net/ethernet/pensando/ionic/ionic_txrx.c  | 374 +++++++++---------
 6 files changed, 226 insertions(+), 203 deletions(-)

-- 
2.17.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH net-next 1/6] ionic: move rx_page_alloc and free
  2021-03-10 19:26 [PATCH net-next 0/6] ionic Rx updates Shannon Nelson
@ 2021-03-10 19:26 ` Shannon Nelson
  2021-03-10 19:26 ` [PATCH net-next 2/6] ionic: implement Rx page reuse Shannon Nelson
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 10+ messages in thread
From: Shannon Nelson @ 2021-03-10 19:26 UTC (permalink / raw)
  To: netdev, davem, kuba; +Cc: drivers, Shannon Nelson

Move ionic_rx_page_alloc() and ionic_rx_page_free() to earlier
in the file to make the next patch easier to review.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
---
 .../net/ethernet/pensando/ionic/ionic_txrx.c  | 140 +++++++++---------
 1 file changed, 70 insertions(+), 70 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index 162a1ff1e9d2..70b997f302ac 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -66,6 +66,76 @@ static struct sk_buff *ionic_rx_skb_alloc(struct ionic_queue *q,
 	return skb;
 }
 
+static int ionic_rx_page_alloc(struct ionic_queue *q,
+			       struct ionic_page_info *page_info)
+{
+	struct ionic_lif *lif = q->lif;
+	struct ionic_rx_stats *stats;
+	struct net_device *netdev;
+	struct device *dev;
+
+	netdev = lif->netdev;
+	dev = lif->ionic->dev;
+	stats = q_to_rx_stats(q);
+
+	if (unlikely(!page_info)) {
+		net_err_ratelimited("%s: %s invalid page_info in alloc\n",
+				    netdev->name, q->name);
+		return -EINVAL;
+	}
+
+	page_info->page = dev_alloc_page();
+	if (unlikely(!page_info->page)) {
+		net_err_ratelimited("%s: %s page alloc failed\n",
+				    netdev->name, q->name);
+		stats->alloc_err++;
+		return -ENOMEM;
+	}
+
+	page_info->dma_addr = dma_map_page(dev, page_info->page, 0, PAGE_SIZE,
+					   DMA_FROM_DEVICE);
+	if (unlikely(dma_mapping_error(dev, page_info->dma_addr))) {
+		put_page(page_info->page);
+		page_info->dma_addr = 0;
+		page_info->page = NULL;
+		net_err_ratelimited("%s: %s dma map failed\n",
+				    netdev->name, q->name);
+		stats->dma_map_err++;
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static void ionic_rx_page_free(struct ionic_queue *q,
+			       struct ionic_page_info *page_info)
+{
+	struct ionic_lif *lif = q->lif;
+	struct net_device *netdev;
+	struct device *dev;
+
+	netdev = lif->netdev;
+	dev = lif->ionic->dev;
+
+	if (unlikely(!page_info)) {
+		net_err_ratelimited("%s: %s invalid page_info in free\n",
+				    netdev->name, q->name);
+		return;
+	}
+
+	if (unlikely(!page_info->page)) {
+		net_err_ratelimited("%s: %s invalid page in free\n",
+				    netdev->name, q->name);
+		return;
+	}
+
+	dma_unmap_page(dev, page_info->dma_addr, PAGE_SIZE, DMA_FROM_DEVICE);
+
+	put_page(page_info->page);
+	page_info->dma_addr = 0;
+	page_info->page = NULL;
+}
+
 static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
 				      struct ionic_desc_info *desc_info,
 				      struct ionic_cq_info *cq_info)
@@ -253,76 +323,6 @@ static bool ionic_rx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
 	return true;
 }
 
-static int ionic_rx_page_alloc(struct ionic_queue *q,
-			       struct ionic_page_info *page_info)
-{
-	struct ionic_lif *lif = q->lif;
-	struct ionic_rx_stats *stats;
-	struct net_device *netdev;
-	struct device *dev;
-
-	netdev = lif->netdev;
-	dev = lif->ionic->dev;
-	stats = q_to_rx_stats(q);
-
-	if (unlikely(!page_info)) {
-		net_err_ratelimited("%s: %s invalid page_info in alloc\n",
-				    netdev->name, q->name);
-		return -EINVAL;
-	}
-
-	page_info->page = dev_alloc_page();
-	if (unlikely(!page_info->page)) {
-		net_err_ratelimited("%s: %s page alloc failed\n",
-				    netdev->name, q->name);
-		stats->alloc_err++;
-		return -ENOMEM;
-	}
-
-	page_info->dma_addr = dma_map_page(dev, page_info->page, 0, PAGE_SIZE,
-					   DMA_FROM_DEVICE);
-	if (unlikely(dma_mapping_error(dev, page_info->dma_addr))) {
-		put_page(page_info->page);
-		page_info->dma_addr = 0;
-		page_info->page = NULL;
-		net_err_ratelimited("%s: %s dma map failed\n",
-				    netdev->name, q->name);
-		stats->dma_map_err++;
-		return -EIO;
-	}
-
-	return 0;
-}
-
-static void ionic_rx_page_free(struct ionic_queue *q,
-			       struct ionic_page_info *page_info)
-{
-	struct ionic_lif *lif = q->lif;
-	struct net_device *netdev;
-	struct device *dev;
-
-	netdev = lif->netdev;
-	dev = lif->ionic->dev;
-
-	if (unlikely(!page_info)) {
-		net_err_ratelimited("%s: %s invalid page_info in free\n",
-				    netdev->name, q->name);
-		return;
-	}
-
-	if (unlikely(!page_info->page)) {
-		net_err_ratelimited("%s: %s invalid page in free\n",
-				    netdev->name, q->name);
-		return;
-	}
-
-	dma_unmap_page(dev, page_info->dma_addr, PAGE_SIZE, DMA_FROM_DEVICE);
-
-	put_page(page_info->page);
-	page_info->dma_addr = 0;
-	page_info->page = NULL;
-}
-
 void ionic_rx_fill(struct ionic_queue *q)
 {
 	struct net_device *netdev = q->lif->netdev;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH net-next 2/6] ionic: implement Rx page reuse
  2021-03-10 19:26 [PATCH net-next 0/6] ionic Rx updates Shannon Nelson
  2021-03-10 19:26 ` [PATCH net-next 1/6] ionic: move rx_page_alloc and free Shannon Nelson
@ 2021-03-10 19:26 ` Shannon Nelson
  2021-03-11  2:14   ` Alexander Duyck
  2021-03-10 19:26 ` [PATCH net-next 3/6] ionic: optimize fastpath struct usage Shannon Nelson
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 10+ messages in thread
From: Shannon Nelson @ 2021-03-10 19:26 UTC (permalink / raw)
  To: netdev, davem, kuba; +Cc: drivers, Shannon Nelson

Rework the Rx buffer allocations to use pages twice when using
normal MTU in order to cut down on buffer allocation and mapping
overhead.

Instead of tracking individual pages, in which we may have
wasted half the space when using standard 1500 MTU, we track
buffers which use half pages, so we can use the second half
of the page rather than allocate and map a new page once the
first buffer has been used.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
---
 .../net/ethernet/pensando/ionic/ionic_dev.h   |  12 +-
 .../net/ethernet/pensando/ionic/ionic_txrx.c  | 215 +++++++++++-------
 2 files changed, 138 insertions(+), 89 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
index 690768ff0143..0f877c86eba6 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -170,9 +170,15 @@ typedef void (*ionic_desc_cb)(struct ionic_queue *q,
 			      struct ionic_desc_info *desc_info,
 			      struct ionic_cq_info *cq_info, void *cb_arg);
 
-struct ionic_page_info {
+#define IONIC_PAGE_SIZE				PAGE_SIZE
+#define IONIC_PAGE_SPLIT_SZ			(PAGE_SIZE / 2)
+#define IONIC_PAGE_GFP_MASK			(GFP_ATOMIC | __GFP_NOWARN |\
+						 __GFP_COMP | __GFP_MEMALLOC)
+
+struct ionic_buf_info {
 	struct page *page;
 	dma_addr_t dma_addr;
+	u32 page_offset;
 };
 
 struct ionic_desc_info {
@@ -187,8 +193,8 @@ struct ionic_desc_info {
 		struct ionic_txq_sg_desc *txq_sg_desc;
 		struct ionic_rxq_sg_desc *rxq_sgl_desc;
 	};
-	unsigned int npages;
-	struct ionic_page_info pages[IONIC_RX_MAX_SG_ELEMS + 1];
+	unsigned int nbufs;
+	struct ionic_buf_info bufs[IONIC_RX_MAX_SG_ELEMS + 1];
 	ionic_desc_cb cb;
 	void *cb_arg;
 };
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index 70b997f302ac..3e13cfee9ecd 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -54,7 +54,7 @@ static struct sk_buff *ionic_rx_skb_alloc(struct ionic_queue *q,
 	if (frags)
 		skb = napi_get_frags(&q_to_qcq(q)->napi);
 	else
-		skb = netdev_alloc_skb_ip_align(netdev, len);
+		skb = napi_alloc_skb(&q_to_qcq(q)->napi, len);
 
 	if (unlikely(!skb)) {
 		net_warn_ratelimited("%s: SKB alloc failed on %s!\n",
@@ -66,8 +66,15 @@ static struct sk_buff *ionic_rx_skb_alloc(struct ionic_queue *q,
 	return skb;
 }
 
+static void ionic_rx_buf_reset(struct ionic_buf_info *buf_info)
+{
+	buf_info->page = NULL;
+	buf_info->page_offset = 0;
+	buf_info->dma_addr = 0;
+}
+
 static int ionic_rx_page_alloc(struct ionic_queue *q,
-			       struct ionic_page_info *page_info)
+			       struct ionic_buf_info *buf_info)
 {
 	struct ionic_lif *lif = q->lif;
 	struct ionic_rx_stats *stats;
@@ -78,26 +85,26 @@ static int ionic_rx_page_alloc(struct ionic_queue *q,
 	dev = lif->ionic->dev;
 	stats = q_to_rx_stats(q);
 
-	if (unlikely(!page_info)) {
-		net_err_ratelimited("%s: %s invalid page_info in alloc\n",
+	if (unlikely(!buf_info)) {
+		net_err_ratelimited("%s: %s invalid buf_info in alloc\n",
 				    netdev->name, q->name);
 		return -EINVAL;
 	}
 
-	page_info->page = dev_alloc_page();
-	if (unlikely(!page_info->page)) {
+	buf_info->page = alloc_pages(IONIC_PAGE_GFP_MASK, 0);
+	if (unlikely(!buf_info->page)) {
 		net_err_ratelimited("%s: %s page alloc failed\n",
 				    netdev->name, q->name);
 		stats->alloc_err++;
 		return -ENOMEM;
 	}
+	buf_info->page_offset = 0;
 
-	page_info->dma_addr = dma_map_page(dev, page_info->page, 0, PAGE_SIZE,
-					   DMA_FROM_DEVICE);
-	if (unlikely(dma_mapping_error(dev, page_info->dma_addr))) {
-		put_page(page_info->page);
-		page_info->dma_addr = 0;
-		page_info->page = NULL;
+	buf_info->dma_addr = dma_map_page(dev, buf_info->page, buf_info->page_offset,
+					  IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
+	if (unlikely(dma_mapping_error(dev, buf_info->dma_addr))) {
+		__free_pages(buf_info->page, 0);
+		ionic_rx_buf_reset(buf_info);
 		net_err_ratelimited("%s: %s dma map failed\n",
 				    netdev->name, q->name);
 		stats->dma_map_err++;
@@ -108,32 +115,46 @@ static int ionic_rx_page_alloc(struct ionic_queue *q,
 }
 
 static void ionic_rx_page_free(struct ionic_queue *q,
-			       struct ionic_page_info *page_info)
+			       struct ionic_buf_info *buf_info)
 {
-	struct ionic_lif *lif = q->lif;
-	struct net_device *netdev;
-	struct device *dev;
-
-	netdev = lif->netdev;
-	dev = lif->ionic->dev;
+	struct net_device *netdev = q->lif->netdev;
+	struct device *dev = q->lif->ionic->dev;
 
-	if (unlikely(!page_info)) {
-		net_err_ratelimited("%s: %s invalid page_info in free\n",
+	if (unlikely(!buf_info)) {
+		net_err_ratelimited("%s: %s invalid buf_info in free\n",
 				    netdev->name, q->name);
 		return;
 	}
 
-	if (unlikely(!page_info->page)) {
-		net_err_ratelimited("%s: %s invalid page in free\n",
-				    netdev->name, q->name);
+	if (!buf_info->page)
 		return;
-	}
 
-	dma_unmap_page(dev, page_info->dma_addr, PAGE_SIZE, DMA_FROM_DEVICE);
+	dma_unmap_page(dev, buf_info->dma_addr, IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
+	__free_pages(buf_info->page, 0);
+	ionic_rx_buf_reset(buf_info);
+}
+
+static bool ionic_rx_buf_recycle(struct ionic_queue *q,
+				 struct ionic_buf_info *buf_info, u32 used)
+{
+	u32 size;
+
+	/* don't re-use pages allocated in low-mem condition */
+	if (page_is_pfmemalloc(buf_info->page))
+		return false;
+
+	/* don't re-use buffers from non-local numa nodes */
+	if (page_to_nid(buf_info->page) != numa_mem_id())
+		return false;
+
+	size = ALIGN(used, IONIC_PAGE_SPLIT_SZ);
+	buf_info->page_offset += size;
+	if (buf_info->page_offset >= IONIC_PAGE_SIZE)
+		return false;
+
+	get_page(buf_info->page);
 
-	put_page(page_info->page);
-	page_info->dma_addr = 0;
-	page_info->page = NULL;
+	return true;
 }
 
 static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
@@ -142,16 +163,16 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
 {
 	struct ionic_rxq_comp *comp = cq_info->cq_desc;
 	struct device *dev = q->lif->ionic->dev;
-	struct ionic_page_info *page_info;
+	struct ionic_buf_info *buf_info;
 	struct sk_buff *skb;
 	unsigned int i;
 	u16 frag_len;
 	u16 len;
 
-	page_info = &desc_info->pages[0];
+	buf_info = &desc_info->bufs[0];
 	len = le16_to_cpu(comp->len);
 
-	prefetch(page_address(page_info->page) + NET_IP_ALIGN);
+	prefetch(buf_info->page);
 
 	skb = ionic_rx_skb_alloc(q, len, true);
 	if (unlikely(!skb))
@@ -159,7 +180,7 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
 
 	i = comp->num_sg_elems + 1;
 	do {
-		if (unlikely(!page_info->page)) {
+		if (unlikely(!buf_info->page)) {
 			struct napi_struct *napi = &q_to_qcq(q)->napi;
 
 			napi->skb = NULL;
@@ -167,15 +188,25 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
 			return NULL;
 		}
 
-		frag_len = min(len, (u16)PAGE_SIZE);
+		frag_len = min_t(u16, len, IONIC_PAGE_SIZE - buf_info->page_offset);
 		len -= frag_len;
 
-		dma_unmap_page(dev, dma_unmap_addr(page_info, dma_addr),
-			       PAGE_SIZE, DMA_FROM_DEVICE);
+		dma_sync_single_for_cpu(dev,
+					buf_info->dma_addr + buf_info->page_offset,
+					frag_len, DMA_FROM_DEVICE);
+
 		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
-				page_info->page, 0, frag_len, PAGE_SIZE);
-		page_info->page = NULL;
-		page_info++;
+				buf_info->page, buf_info->page_offset, frag_len,
+				IONIC_PAGE_SIZE);
+
+		if (!ionic_rx_buf_recycle(q, buf_info, frag_len)) {
+			dma_unmap_page(dev, buf_info->dma_addr,
+				       IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
+			ionic_rx_buf_reset(buf_info);
+		}
+
+		buf_info++;
+
 		i--;
 	} while (i > 0);
 
@@ -188,26 +219,26 @@ static struct sk_buff *ionic_rx_copybreak(struct ionic_queue *q,
 {
 	struct ionic_rxq_comp *comp = cq_info->cq_desc;
 	struct device *dev = q->lif->ionic->dev;
-	struct ionic_page_info *page_info;
+	struct ionic_buf_info *buf_info;
 	struct sk_buff *skb;
 	u16 len;
 
-	page_info = &desc_info->pages[0];
+	buf_info = &desc_info->bufs[0];
 	len = le16_to_cpu(comp->len);
 
 	skb = ionic_rx_skb_alloc(q, len, false);
 	if (unlikely(!skb))
 		return NULL;
 
-	if (unlikely(!page_info->page)) {
+	if (unlikely(!buf_info->page)) {
 		dev_kfree_skb(skb);
 		return NULL;
 	}
 
-	dma_sync_single_for_cpu(dev, dma_unmap_addr(page_info, dma_addr),
+	dma_sync_single_for_cpu(dev, buf_info->dma_addr + buf_info->page_offset,
 				len, DMA_FROM_DEVICE);
-	skb_copy_to_linear_data(skb, page_address(page_info->page), len);
-	dma_sync_single_for_device(dev, dma_unmap_addr(page_info, dma_addr),
+	skb_copy_to_linear_data(skb, page_address(buf_info->page) + buf_info->page_offset, len);
+	dma_sync_single_for_device(dev, buf_info->dma_addr + buf_info->page_offset,
 				   len, DMA_FROM_DEVICE);
 
 	skb_put(skb, len);
@@ -327,64 +358,73 @@ void ionic_rx_fill(struct ionic_queue *q)
 {
 	struct net_device *netdev = q->lif->netdev;
 	struct ionic_desc_info *desc_info;
-	struct ionic_page_info *page_info;
 	struct ionic_rxq_sg_desc *sg_desc;
 	struct ionic_rxq_sg_elem *sg_elem;
+	struct ionic_buf_info *buf_info;
 	struct ionic_rxq_desc *desc;
+	unsigned int max_sg_elems;
 	unsigned int remain_len;
-	unsigned int seg_len;
+	unsigned int frag_len;
 	unsigned int nfrags;
 	unsigned int i, j;
 	unsigned int len;
 
 	len = netdev->mtu + ETH_HLEN + VLAN_HLEN;
-	nfrags = round_up(len, PAGE_SIZE) / PAGE_SIZE;
+	max_sg_elems = q->lif->qtype_info[IONIC_QTYPE_RXQ].max_sg_elems;
 
 	for (i = ionic_q_space_avail(q); i; i--) {
+		nfrags = 0;
 		remain_len = len;
 		desc_info = &q->info[q->head_idx];
 		desc = desc_info->desc;
-		sg_desc = desc_info->sg_desc;
-		page_info = &desc_info->pages[0];
+		buf_info = &desc_info->bufs[0];
 
-		if (page_info->page) { /* recycle the buffer */
-			ionic_rxq_post(q, false, ionic_rx_clean, NULL);
-			continue;
-		}
-
-		/* fill main descriptor - pages[0] */
-		desc->opcode = (nfrags > 1) ? IONIC_RXQ_DESC_OPCODE_SG :
-					      IONIC_RXQ_DESC_OPCODE_SIMPLE;
-		desc_info->npages = nfrags;
-		if (unlikely(ionic_rx_page_alloc(q, page_info))) {
-			desc->addr = 0;
-			desc->len = 0;
-			return;
+		if (!buf_info->page) { /* alloc a new buffer? */
+			if (unlikely(ionic_rx_page_alloc(q, buf_info))) {
+				desc->addr = 0;
+				desc->len = 0;
+				return;
+			}
 		}
-		desc->addr = cpu_to_le64(page_info->dma_addr);
-		seg_len = min_t(unsigned int, PAGE_SIZE, len);
-		desc->len = cpu_to_le16(seg_len);
-		remain_len -= seg_len;
-		page_info++;
 
-		/* fill sg descriptors - pages[1..n] */
-		for (j = 0; j < nfrags - 1; j++) {
-			if (page_info->page) /* recycle the sg buffer */
-				continue;
+		/* fill main descriptor - buf[0] */
+		desc->addr = cpu_to_le64(buf_info->dma_addr + buf_info->page_offset);
+		frag_len = min_t(u16, len, IONIC_PAGE_SIZE - buf_info->page_offset);
+		desc->len = cpu_to_le16(frag_len);
+		remain_len -= frag_len;
+		buf_info++;
+		nfrags++;
 
+		/* fill sg descriptors - buf[1..n] */
+		sg_desc = desc_info->sg_desc;
+		for (j = 0; remain_len > 0 && j < max_sg_elems; j++) {
 			sg_elem = &sg_desc->elems[j];
-			if (unlikely(ionic_rx_page_alloc(q, page_info))) {
-				sg_elem->addr = 0;
-				sg_elem->len = 0;
-				return;
+			if (!buf_info->page) { /* alloc a new sg buffer? */
+				if (unlikely(ionic_rx_page_alloc(q, buf_info))) {
+					sg_elem->addr = 0;
+					sg_elem->len = 0;
+					return;
+				}
 			}
-			sg_elem->addr = cpu_to_le64(page_info->dma_addr);
-			seg_len = min_t(unsigned int, PAGE_SIZE, remain_len);
-			sg_elem->len = cpu_to_le16(seg_len);
-			remain_len -= seg_len;
-			page_info++;
+
+			sg_elem->addr = cpu_to_le64(buf_info->dma_addr + buf_info->page_offset);
+			frag_len = min_t(u16, remain_len, IONIC_PAGE_SIZE - buf_info->page_offset);
+			sg_elem->len = cpu_to_le16(frag_len);
+			remain_len -= frag_len;
+			buf_info++;
+			nfrags++;
 		}
 
+		/* clear end sg element as a sentinel */
+		if (j < max_sg_elems) {
+			sg_elem = &sg_desc->elems[j];
+			memset(sg_elem, 0, sizeof(*sg_elem));
+		}
+
+		desc->opcode = (nfrags > 1) ? IONIC_RXQ_DESC_OPCODE_SG :
+					      IONIC_RXQ_DESC_OPCODE_SIMPLE;
+		desc_info->nbufs = nfrags;
+
 		ionic_rxq_post(q, false, ionic_rx_clean, NULL);
 	}
 
@@ -395,21 +435,24 @@ void ionic_rx_fill(struct ionic_queue *q)
 void ionic_rx_empty(struct ionic_queue *q)
 {
 	struct ionic_desc_info *desc_info;
-	struct ionic_page_info *page_info;
+	struct ionic_buf_info *buf_info;
 	unsigned int i, j;
 
 	for (i = 0; i < q->num_descs; i++) {
 		desc_info = &q->info[i];
 		for (j = 0; j < IONIC_RX_MAX_SG_ELEMS + 1; j++) {
-			page_info = &desc_info->pages[j];
-			if (page_info->page)
-				ionic_rx_page_free(q, page_info);
+			buf_info = &desc_info->bufs[j];
+			if (buf_info->page)
+				ionic_rx_page_free(q, buf_info);
 		}
 
-		desc_info->npages = 0;
+		desc_info->nbufs = 0;
 		desc_info->cb = NULL;
 		desc_info->cb_arg = NULL;
 	}
+
+	q->head_idx = 0;
+	q->tail_idx = 0;
 }
 
 static void ionic_dim_update(struct ionic_qcq *qcq)
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH net-next 3/6] ionic: optimize fastpath struct usage
  2021-03-10 19:26 [PATCH net-next 0/6] ionic Rx updates Shannon Nelson
  2021-03-10 19:26 ` [PATCH net-next 1/6] ionic: move rx_page_alloc and free Shannon Nelson
  2021-03-10 19:26 ` [PATCH net-next 2/6] ionic: implement Rx page reuse Shannon Nelson
@ 2021-03-10 19:26 ` Shannon Nelson
  2021-03-10 19:26 ` [PATCH net-next 4/6] ionic: simplify rx skb alloc Shannon Nelson
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 10+ messages in thread
From: Shannon Nelson @ 2021-03-10 19:26 UTC (permalink / raw)
  To: netdev, davem, kuba; +Cc: drivers, Shannon Nelson

Clean up a couple of struct uses to make for better fast path
access.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
---
 .../net/ethernet/pensando/ionic/ionic_dev.c   |  4 +--
 .../net/ethernet/pensando/ionic/ionic_dev.h   |  7 ++--
 .../net/ethernet/pensando/ionic/ionic_lif.c   |  3 +-
 .../net/ethernet/pensando/ionic/ionic_lif.h   | 22 ++++++-------
 .../net/ethernet/pensando/ionic/ionic_main.c  |  4 +--
 .../net/ethernet/pensando/ionic/ionic_txrx.c  | 33 +++++++++----------
 6 files changed, 34 insertions(+), 39 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
index fb2b5bf179d7..b951bf5bbdc4 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
@@ -585,9 +585,9 @@ void ionic_q_sg_map(struct ionic_queue *q, void *base, dma_addr_t base_pa)
 void ionic_q_post(struct ionic_queue *q, bool ring_doorbell, ionic_desc_cb cb,
 		  void *cb_arg)
 {
-	struct device *dev = q->lif->ionic->dev;
 	struct ionic_desc_info *desc_info;
 	struct ionic_lif *lif = q->lif;
+	struct device *dev = q->dev;
 
 	desc_info = &q->info[q->head_idx];
 	desc_info->cb = cb;
@@ -629,7 +629,7 @@ void ionic_q_service(struct ionic_queue *q, struct ionic_cq_info *cq_info,
 
 	/* stop index must be for a descriptor that is not yet completed */
 	if (unlikely(!ionic_q_is_posted(q, stop_index)))
-		dev_err(q->lif->ionic->dev,
+		dev_err(q->dev,
 			"ionic stop is not posted %s stop %u tail %u head %u\n",
 			q->name, stop_index, q->tail_idx, q->head_idx);
 
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
index 0f877c86eba6..339824cfd618 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -205,10 +205,12 @@ struct ionic_queue {
 	struct device *dev;
 	struct ionic_lif *lif;
 	struct ionic_desc_info *info;
+	u64 dbval;
 	u16 head_idx;
 	u16 tail_idx;
 	unsigned int index;
 	unsigned int num_descs;
+	unsigned int max_sg_elems;
 	u64 dbell_count;
 	u64 stop;
 	u64 wake;
@@ -217,7 +219,6 @@ struct ionic_queue {
 	unsigned int type;
 	unsigned int hw_index;
 	unsigned int hw_type;
-	u64 dbval;
 	union {
 		void *base;
 		struct ionic_txq_desc *txq;
@@ -235,7 +236,7 @@ struct ionic_queue {
 	unsigned int sg_desc_size;
 	unsigned int pid;
 	char name[IONIC_QUEUE_NAME_MAX_SZ];
-};
+} ____cacheline_aligned_in_smp;
 
 #define IONIC_INTR_INDEX_NOT_ASSIGNED	-1
 #define IONIC_INTR_NAME_MAX_SZ		32
@@ -262,7 +263,7 @@ struct ionic_cq {
 	u64 compl_count;
 	void *base;
 	dma_addr_t base_pa;
-};
+} ____cacheline_aligned_in_smp;
 
 struct ionic;
 
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 11140915c2da..6f8a5daaadfa 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -495,6 +495,7 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type,
 		goto err_out;
 	}
 
+	new->q.dev = dev;
 	new->flags = flags;
 
 	new->q.info = devm_kcalloc(dev, num_descs, sizeof(*new->q.info),
@@ -506,6 +507,7 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type,
 	}
 
 	new->q.type = type;
+	new->q.max_sg_elems = lif->qtype_info[type].max_sg_elems;
 
 	err = ionic_q_init(lif, idev, &new->q, index, name, num_descs,
 			   desc_size, sg_desc_size, pid);
@@ -2450,7 +2452,6 @@ int ionic_lif_alloc(struct ionic *ionic)
 	lif->index = 0;
 	lif->ntxq_descs = IONIC_DEF_TXRX_DESC;
 	lif->nrxq_descs = IONIC_DEF_TXRX_DESC;
-	lif->tx_budget = IONIC_TX_BUDGET_DEFAULT;
 
 	/* Convert the default coalesce value to actual hw resolution */
 	lif->rx_coalesce_usecs = IONIC_ITR_COAL_USEC_DEFAULT;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
index 563dba384a53..8ffda32a0a7d 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
@@ -159,16 +159,11 @@ struct ionic_qtype_info {
 
 #define IONIC_LIF_NAME_MAX_SZ		32
 struct ionic_lif {
-	char name[IONIC_LIF_NAME_MAX_SZ];
-	struct list_head list;
 	struct net_device *netdev;
 	DECLARE_BITMAP(state, IONIC_LIF_F_STATE_SIZE);
 	struct ionic *ionic;
-	bool registered;
 	unsigned int index;
 	unsigned int hw_index;
-	unsigned int kern_pid;
-	u64 __iomem *kern_dbpage;
 	struct mutex queue_lock;	/* lock for queue structures */
 	spinlock_t adminq_lock;		/* lock for AdminQ operations */
 	struct ionic_qcq *adminqcq;
@@ -177,20 +172,25 @@ struct ionic_lif {
 	struct ionic_tx_stats *txqstats;
 	struct ionic_qcq **rxqcqs;
 	struct ionic_rx_stats *rxqstats;
+	struct ionic_deferred deferred;
+	struct work_struct tx_timeout_work;
 	u64 last_eid;
+	unsigned int kern_pid;
+	u64 __iomem *kern_dbpage;
 	unsigned int neqs;
 	unsigned int nxqs;
 	unsigned int ntxq_descs;
 	unsigned int nrxq_descs;
 	u32 rx_copybreak;
-	u32 tx_budget;
 	unsigned int rx_mode;
 	u64 hw_features;
+	bool registered;
 	bool mc_overflow;
-	unsigned int nmcast;
 	bool uc_overflow;
 	u16 lif_type;
+	unsigned int nmcast;
 	unsigned int nucast;
+	char name[IONIC_LIF_NAME_MAX_SZ];
 
 	union ionic_lif_identity *identity;
 	struct ionic_lif_info *info;
@@ -205,16 +205,14 @@ struct ionic_lif {
 	u32 rss_ind_tbl_sz;
 
 	struct ionic_rx_filters rx_filters;
-	struct ionic_deferred deferred;
-	unsigned long *dbid_inuse;
-	unsigned int dbid_count;
-	struct dentry *dentry;
 	u32 rx_coalesce_usecs;		/* what the user asked for */
 	u32 rx_coalesce_hw;		/* what the hw is using */
 	u32 tx_coalesce_usecs;		/* what the user asked for */
 	u32 tx_coalesce_hw;		/* what the hw is using */
+	unsigned long *dbid_inuse;
+	unsigned int dbid_count;
 
-	struct work_struct tx_timeout_work;
+	struct dentry *dentry;
 };
 
 struct ionic_queue_params {
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c
index fbc57de6683e..14ece909a451 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_main.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c
@@ -234,17 +234,15 @@ static void ionic_adminq_cb(struct ionic_queue *q,
 {
 	struct ionic_admin_ctx *ctx = cb_arg;
 	struct ionic_admin_comp *comp;
-	struct device *dev;
 
 	if (!ctx)
 		return;
 
 	comp = cq_info->cq_desc;
-	dev = &q->lif->netdev->dev;
 
 	memcpy(&ctx->comp, comp, sizeof(*comp));
 
-	dev_dbg(dev, "comp admin queue command:\n");
+	dev_dbg(q->dev, "comp admin queue command:\n");
 	dynamic_hex_dump("comp ", DUMP_PREFIX_OFFSET, 16, 1,
 			 &ctx->comp, sizeof(ctx->comp), true);
 
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index 3e13cfee9ecd..c472c14b3a80 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -82,7 +82,7 @@ static int ionic_rx_page_alloc(struct ionic_queue *q,
 	struct device *dev;
 
 	netdev = lif->netdev;
-	dev = lif->ionic->dev;
+	dev = q->dev;
 	stats = q_to_rx_stats(q);
 
 	if (unlikely(!buf_info)) {
@@ -118,7 +118,7 @@ static void ionic_rx_page_free(struct ionic_queue *q,
 			       struct ionic_buf_info *buf_info)
 {
 	struct net_device *netdev = q->lif->netdev;
-	struct device *dev = q->lif->ionic->dev;
+	struct device *dev = q->dev;
 
 	if (unlikely(!buf_info)) {
 		net_err_ratelimited("%s: %s invalid buf_info in free\n",
@@ -162,8 +162,8 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
 				      struct ionic_cq_info *cq_info)
 {
 	struct ionic_rxq_comp *comp = cq_info->cq_desc;
-	struct device *dev = q->lif->ionic->dev;
 	struct ionic_buf_info *buf_info;
+	struct device *dev = q->dev;
 	struct sk_buff *skb;
 	unsigned int i;
 	u16 frag_len;
@@ -218,8 +218,8 @@ static struct sk_buff *ionic_rx_copybreak(struct ionic_queue *q,
 					  struct ionic_cq_info *cq_info)
 {
 	struct ionic_rxq_comp *comp = cq_info->cq_desc;
-	struct device *dev = q->lif->ionic->dev;
 	struct ionic_buf_info *buf_info;
+	struct device *dev = q->dev;
 	struct sk_buff *skb;
 	u16 len;
 
@@ -362,7 +362,6 @@ void ionic_rx_fill(struct ionic_queue *q)
 	struct ionic_rxq_sg_elem *sg_elem;
 	struct ionic_buf_info *buf_info;
 	struct ionic_rxq_desc *desc;
-	unsigned int max_sg_elems;
 	unsigned int remain_len;
 	unsigned int frag_len;
 	unsigned int nfrags;
@@ -370,7 +369,6 @@ void ionic_rx_fill(struct ionic_queue *q)
 	unsigned int len;
 
 	len = netdev->mtu + ETH_HLEN + VLAN_HLEN;
-	max_sg_elems = q->lif->qtype_info[IONIC_QTYPE_RXQ].max_sg_elems;
 
 	for (i = ionic_q_space_avail(q); i; i--) {
 		nfrags = 0;
@@ -397,7 +395,7 @@ void ionic_rx_fill(struct ionic_queue *q)
 
 		/* fill sg descriptors - buf[1..n] */
 		sg_desc = desc_info->sg_desc;
-		for (j = 0; remain_len > 0 && j < max_sg_elems; j++) {
+		for (j = 0; remain_len > 0 && j < q->max_sg_elems; j++) {
 			sg_elem = &sg_desc->elems[j];
 			if (!buf_info->page) { /* alloc a new sg buffer? */
 				if (unlikely(ionic_rx_page_alloc(q, buf_info))) {
@@ -416,7 +414,7 @@ void ionic_rx_fill(struct ionic_queue *q)
 		}
 
 		/* clear end sg element as a sentinel */
-		if (j < max_sg_elems) {
+		if (j < q->max_sg_elems) {
 			sg_elem = &sg_desc->elems[j];
 			memset(sg_elem, 0, sizeof(*sg_elem));
 		}
@@ -568,7 +566,7 @@ int ionic_txrx_napi(struct napi_struct *napi, int budget)
 	idev = &lif->ionic->idev;
 	txcq = &lif->txqcqs[qi]->cq;
 
-	tx_work_done = ionic_cq_service(txcq, lif->tx_budget,
+	tx_work_done = ionic_cq_service(txcq, IONIC_TX_BUDGET_DEFAULT,
 					ionic_tx_service, NULL, NULL);
 
 	rx_work_done = ionic_cq_service(rxcq, budget,
@@ -601,7 +599,7 @@ static dma_addr_t ionic_tx_map_single(struct ionic_queue *q,
 				      void *data, size_t len)
 {
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
-	struct device *dev = q->lif->ionic->dev;
+	struct device *dev = q->dev;
 	dma_addr_t dma_addr;
 
 	dma_addr = dma_map_single(dev, data, len, DMA_TO_DEVICE);
@@ -619,7 +617,7 @@ static dma_addr_t ionic_tx_map_frag(struct ionic_queue *q,
 				    size_t offset, size_t len)
 {
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
-	struct device *dev = q->lif->ionic->dev;
+	struct device *dev = q->dev;
 	dma_addr_t dma_addr;
 
 	dma_addr = skb_frag_dma_map(dev, frag, offset, len, DMA_TO_DEVICE);
@@ -640,7 +638,7 @@ static void ionic_tx_clean(struct ionic_queue *q,
 	struct ionic_txq_sg_elem *elem = sg_desc->elems;
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
 	struct ionic_txq_desc *desc = desc_info->desc;
-	struct device *dev = q->lif->ionic->dev;
+	struct device *dev = q->dev;
 	u8 opcode, flags, nsge;
 	u16 queue_index;
 	unsigned int i;
@@ -822,8 +820,8 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
 {
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
 	struct ionic_desc_info *rewind_desc_info;
-	struct device *dev = q->lif->ionic->dev;
 	struct ionic_txq_sg_elem *elem;
+	struct device *dev = q->dev;
 	struct ionic_txq_desc *desc;
 	unsigned int frag_left = 0;
 	unsigned int offset = 0;
@@ -994,7 +992,7 @@ static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb)
 {
 	struct ionic_txq_desc *desc = q->info[q->head_idx].txq_desc;
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
-	struct device *dev = q->lif->ionic->dev;
+	struct device *dev = q->dev;
 	dma_addr_t dma_addr;
 	bool has_vlan;
 	u8 flags = 0;
@@ -1034,7 +1032,7 @@ static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb)
 {
 	struct ionic_txq_desc *desc = q->info[q->head_idx].txq_desc;
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
-	struct device *dev = q->lif->ionic->dev;
+	struct device *dev = q->dev;
 	dma_addr_t dma_addr;
 	bool has_vlan;
 	u8 flags = 0;
@@ -1071,7 +1069,7 @@ static int ionic_tx_skb_frags(struct ionic_queue *q, struct sk_buff *skb)
 	unsigned int len_left = skb->len - skb_headlen(skb);
 	struct ionic_txq_sg_elem *elem = sg_desc->elems;
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
-	struct device *dev = q->lif->ionic->dev;
+	struct device *dev = q->dev;
 	dma_addr_t dma_addr;
 	skb_frag_t *frag;
 	u16 len;
@@ -1120,7 +1118,6 @@ static int ionic_tx(struct ionic_queue *q, struct sk_buff *skb)
 
 static int ionic_tx_descs_needed(struct ionic_queue *q, struct sk_buff *skb)
 {
-	int sg_elems = q->lif->qtype_info[IONIC_QTYPE_TXQ].max_sg_elems;
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
 	int err;
 
@@ -1129,7 +1126,7 @@ static int ionic_tx_descs_needed(struct ionic_queue *q, struct sk_buff *skb)
 		return (skb->len / skb_shinfo(skb)->gso_size) + 1;
 
 	/* If non-TSO, just need 1 desc and nr_frags sg elems */
-	if (skb_shinfo(skb)->nr_frags <= sg_elems)
+	if (skb_shinfo(skb)->nr_frags <= q->max_sg_elems)
 		return 1;
 
 	/* Too many frags, so linearize */
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH net-next 4/6] ionic: simplify rx skb alloc
  2021-03-10 19:26 [PATCH net-next 0/6] ionic Rx updates Shannon Nelson
                   ` (2 preceding siblings ...)
  2021-03-10 19:26 ` [PATCH net-next 3/6] ionic: optimize fastpath struct usage Shannon Nelson
@ 2021-03-10 19:26 ` Shannon Nelson
  2021-03-10 19:26 ` [PATCH net-next 5/6] ionic: rebuild debugfs on qcq swap Shannon Nelson
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 10+ messages in thread
From: Shannon Nelson @ 2021-03-10 19:26 UTC (permalink / raw)
  To: netdev, davem, kuba; +Cc: drivers, Shannon Nelson

Remove an unnecessary layer over rx skb allocation.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
---
 .../net/ethernet/pensando/ionic/ionic_txrx.c  | 63 +++++++------------
 1 file changed, 22 insertions(+), 41 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index c472c14b3a80..cd2540ff9251 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -10,12 +10,6 @@
 #include "ionic_lif.h"
 #include "ionic_txrx.h"
 
-static void ionic_rx_clean(struct ionic_queue *q,
-			   struct ionic_desc_info *desc_info,
-			   struct ionic_cq_info *cq_info,
-			   void *cb_arg);
-
-static bool ionic_rx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info);
 
 static bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info);
 
@@ -40,32 +34,6 @@ static inline struct netdev_queue *q_to_ndq(struct ionic_queue *q)
 	return netdev_get_tx_queue(q->lif->netdev, q->index);
 }
 
-static struct sk_buff *ionic_rx_skb_alloc(struct ionic_queue *q,
-					  unsigned int len, bool frags)
-{
-	struct ionic_lif *lif = q->lif;
-	struct ionic_rx_stats *stats;
-	struct net_device *netdev;
-	struct sk_buff *skb;
-
-	netdev = lif->netdev;
-	stats = &q->lif->rxqstats[q->index];
-
-	if (frags)
-		skb = napi_get_frags(&q_to_qcq(q)->napi);
-	else
-		skb = napi_alloc_skb(&q_to_qcq(q)->napi, len);
-
-	if (unlikely(!skb)) {
-		net_warn_ratelimited("%s: SKB alloc failed on %s!\n",
-				     netdev->name, q->name);
-		stats->alloc_err++;
-		return NULL;
-	}
-
-	return skb;
-}
-
 static void ionic_rx_buf_reset(struct ionic_buf_info *buf_info)
 {
 	buf_info->page = NULL;
@@ -76,12 +44,10 @@ static void ionic_rx_buf_reset(struct ionic_buf_info *buf_info)
 static int ionic_rx_page_alloc(struct ionic_queue *q,
 			       struct ionic_buf_info *buf_info)
 {
-	struct ionic_lif *lif = q->lif;
+	struct net_device *netdev = q->lif->netdev;
 	struct ionic_rx_stats *stats;
-	struct net_device *netdev;
 	struct device *dev;
 
-	netdev = lif->netdev;
 	dev = q->dev;
 	stats = q_to_rx_stats(q);
 
@@ -162,21 +128,29 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
 				      struct ionic_cq_info *cq_info)
 {
 	struct ionic_rxq_comp *comp = cq_info->cq_desc;
+	struct net_device *netdev = q->lif->netdev;
 	struct ionic_buf_info *buf_info;
+	struct ionic_rx_stats *stats;
 	struct device *dev = q->dev;
 	struct sk_buff *skb;
 	unsigned int i;
 	u16 frag_len;
 	u16 len;
 
+	stats = q_to_rx_stats(q);
+
 	buf_info = &desc_info->bufs[0];
 	len = le16_to_cpu(comp->len);
 
 	prefetch(buf_info->page);
 
-	skb = ionic_rx_skb_alloc(q, len, true);
-	if (unlikely(!skb))
+	skb = napi_get_frags(&q_to_qcq(q)->napi);
+	if (unlikely(!skb)) {
+		net_warn_ratelimited("%s: SKB alloc failed on %s!\n",
+				     netdev->name, q->name);
+		stats->alloc_err++;
 		return NULL;
+	}
 
 	i = comp->num_sg_elems + 1;
 	do {
@@ -218,17 +192,25 @@ static struct sk_buff *ionic_rx_copybreak(struct ionic_queue *q,
 					  struct ionic_cq_info *cq_info)
 {
 	struct ionic_rxq_comp *comp = cq_info->cq_desc;
+	struct net_device *netdev = q->lif->netdev;
 	struct ionic_buf_info *buf_info;
+	struct ionic_rx_stats *stats;
 	struct device *dev = q->dev;
 	struct sk_buff *skb;
 	u16 len;
 
+	stats = q_to_rx_stats(q);
+
 	buf_info = &desc_info->bufs[0];
 	len = le16_to_cpu(comp->len);
 
-	skb = ionic_rx_skb_alloc(q, len, false);
-	if (unlikely(!skb))
+	skb = napi_alloc_skb(&q_to_qcq(q)->napi, len);
+	if (unlikely(!skb)) {
+		net_warn_ratelimited("%s: SKB alloc failed on %s!\n",
+				     netdev->name, q->name);
+		stats->alloc_err++;
 		return NULL;
+	}
 
 	if (unlikely(!buf_info->page)) {
 		dev_kfree_skb(skb);
@@ -253,13 +235,12 @@ static void ionic_rx_clean(struct ionic_queue *q,
 			   void *cb_arg)
 {
 	struct ionic_rxq_comp *comp = cq_info->cq_desc;
+	struct net_device *netdev = q->lif->netdev;
 	struct ionic_qcq *qcq = q_to_qcq(q);
 	struct ionic_rx_stats *stats;
-	struct net_device *netdev;
 	struct sk_buff *skb;
 
 	stats = q_to_rx_stats(q);
-	netdev = q->lif->netdev;
 
 	if (comp->status) {
 		stats->dropped++;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH net-next 5/6] ionic: rebuild debugfs on qcq swap
  2021-03-10 19:26 [PATCH net-next 0/6] ionic Rx updates Shannon Nelson
                   ` (3 preceding siblings ...)
  2021-03-10 19:26 ` [PATCH net-next 4/6] ionic: simplify rx skb alloc Shannon Nelson
@ 2021-03-10 19:26 ` Shannon Nelson
  2021-03-10 19:26 ` [PATCH net-next 6/6] ionic: simplify use of completion types Shannon Nelson
  2021-03-10 23:40 ` [PATCH net-next 0/6] ionic Rx updates patchwork-bot+netdevbpf
  6 siblings, 0 replies; 10+ messages in thread
From: Shannon Nelson @ 2021-03-10 19:26 UTC (permalink / raw)
  To: netdev, davem, kuba; +Cc: drivers, Shannon Nelson

With a reconfigure of each queue is needed a rebuild of
the matching debugfs information.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
---
 drivers/net/ethernet/pensando/ionic/ionic_lif.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 6f8a5daaadfa..48d3c7685b6c 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -2204,6 +2204,9 @@ static void ionic_swap_queues(struct ionic_qcq *a, struct ionic_qcq *b)
 	swap(a->cq_base,      b->cq_base);
 	swap(a->cq_base_pa,   b->cq_base_pa);
 	swap(a->cq_size,      b->cq_size);
+
+	ionic_debugfs_del_qcq(a);
+	ionic_debugfs_add_qcq(a->q.lif, a);
 }
 
 int ionic_reconfigure_queues(struct ionic_lif *lif,
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH net-next 6/6] ionic: simplify use of completion types
  2021-03-10 19:26 [PATCH net-next 0/6] ionic Rx updates Shannon Nelson
                   ` (4 preceding siblings ...)
  2021-03-10 19:26 ` [PATCH net-next 5/6] ionic: rebuild debugfs on qcq swap Shannon Nelson
@ 2021-03-10 19:26 ` Shannon Nelson
  2021-03-10 23:40 ` [PATCH net-next 0/6] ionic Rx updates patchwork-bot+netdevbpf
  6 siblings, 0 replies; 10+ messages in thread
From: Shannon Nelson @ 2021-03-10 19:26 UTC (permalink / raw)
  To: netdev, davem, kuba; +Cc: drivers, Shannon Nelson

Make better use of our struct types and type checking by passing
the actual Rx or Tx completion type rather than a generic void
pointer type.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
---
 .../net/ethernet/pensando/ionic/ionic_txrx.c  | 19 +++++++------------
 1 file changed, 7 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index cd2540ff9251..c63e6e7aa47b 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -125,9 +125,8 @@ static bool ionic_rx_buf_recycle(struct ionic_queue *q,
 
 static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
 				      struct ionic_desc_info *desc_info,
-				      struct ionic_cq_info *cq_info)
+				      struct ionic_rxq_comp *comp)
 {
-	struct ionic_rxq_comp *comp = cq_info->cq_desc;
 	struct net_device *netdev = q->lif->netdev;
 	struct ionic_buf_info *buf_info;
 	struct ionic_rx_stats *stats;
@@ -155,9 +154,6 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
 	i = comp->num_sg_elems + 1;
 	do {
 		if (unlikely(!buf_info->page)) {
-			struct napi_struct *napi = &q_to_qcq(q)->napi;
-
-			napi->skb = NULL;
 			dev_kfree_skb(skb);
 			return NULL;
 		}
@@ -189,9 +185,8 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
 
 static struct sk_buff *ionic_rx_copybreak(struct ionic_queue *q,
 					  struct ionic_desc_info *desc_info,
-					  struct ionic_cq_info *cq_info)
+					  struct ionic_rxq_comp *comp)
 {
-	struct ionic_rxq_comp *comp = cq_info->cq_desc;
 	struct net_device *netdev = q->lif->netdev;
 	struct ionic_buf_info *buf_info;
 	struct ionic_rx_stats *stats;
@@ -234,7 +229,7 @@ static void ionic_rx_clean(struct ionic_queue *q,
 			   struct ionic_cq_info *cq_info,
 			   void *cb_arg)
 {
-	struct ionic_rxq_comp *comp = cq_info->cq_desc;
+	struct ionic_rxq_comp *comp = cq_info->rxcq;
 	struct net_device *netdev = q->lif->netdev;
 	struct ionic_qcq *qcq = q_to_qcq(q);
 	struct ionic_rx_stats *stats;
@@ -251,9 +246,9 @@ static void ionic_rx_clean(struct ionic_queue *q,
 	stats->bytes += le16_to_cpu(comp->len);
 
 	if (le16_to_cpu(comp->len) <= q->lif->rx_copybreak)
-		skb = ionic_rx_copybreak(q, desc_info, cq_info);
+		skb = ionic_rx_copybreak(q, desc_info, comp);
 	else
-		skb = ionic_rx_frags(q, desc_info, cq_info);
+		skb = ionic_rx_frags(q, desc_info, comp);
 
 	if (unlikely(!skb)) {
 		stats->dropped++;
@@ -309,7 +304,7 @@ static void ionic_rx_clean(struct ionic_queue *q,
 
 static bool ionic_rx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
 {
-	struct ionic_rxq_comp *comp = cq_info->cq_desc;
+	struct ionic_rxq_comp *comp = cq_info->rxcq;
 	struct ionic_queue *q = cq->bound_q;
 	struct ionic_desc_info *desc_info;
 
@@ -661,7 +656,7 @@ static void ionic_tx_clean(struct ionic_queue *q,
 
 static bool ionic_tx_service(struct ionic_cq *cq, struct ionic_cq_info *cq_info)
 {
-	struct ionic_txq_comp *comp = cq_info->cq_desc;
+	struct ionic_txq_comp *comp = cq_info->txcq;
 	struct ionic_queue *q = cq->bound_q;
 	struct ionic_desc_info *desc_info;
 	u16 index;
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next 0/6] ionic Rx updates
  2021-03-10 19:26 [PATCH net-next 0/6] ionic Rx updates Shannon Nelson
                   ` (5 preceding siblings ...)
  2021-03-10 19:26 ` [PATCH net-next 6/6] ionic: simplify use of completion types Shannon Nelson
@ 2021-03-10 23:40 ` patchwork-bot+netdevbpf
  6 siblings, 0 replies; 10+ messages in thread
From: patchwork-bot+netdevbpf @ 2021-03-10 23:40 UTC (permalink / raw)
  To: Shannon Nelson; +Cc: netdev, davem, kuba, drivers

Hello:

This series was applied to netdev/net-next.git (refs/heads/master):

On Wed, 10 Mar 2021 11:26:25 -0800 you wrote:
> The ionic driver's Rx path is due for an overhaul in order to
> better use memory buffers and to clean up the data structures.
> 
> The first two patches convert the driver to using page sharing
> between buffers so as to lessen the  page alloc and free overhead.
> 
> The remaining patches clean up the structs and fastpath code for
> better efficency.
> 
> [...]

Here is the summary with links:
  - [net-next,1/6] ionic: move rx_page_alloc and free
    https://git.kernel.org/netdev/net-next/c/2b5720f26908
  - [net-next,2/6] ionic: implement Rx page reuse
    https://git.kernel.org/netdev/net-next/c/4b0a7539a372
  - [net-next,3/6] ionic: optimize fastpath struct usage
    https://git.kernel.org/netdev/net-next/c/f37bc3462e80
  - [net-next,4/6] ionic: simplify rx skb alloc
    https://git.kernel.org/netdev/net-next/c/89e572e7369f
  - [net-next,5/6] ionic: rebuild debugfs on qcq swap
    https://git.kernel.org/netdev/net-next/c/55eda6bbe0c8
  - [net-next,6/6] ionic: simplify use of completion types
    https://git.kernel.org/netdev/net-next/c/a25edab93b28

You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html



^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next 2/6] ionic: implement Rx page reuse
  2021-03-10 19:26 ` [PATCH net-next 2/6] ionic: implement Rx page reuse Shannon Nelson
@ 2021-03-11  2:14   ` Alexander Duyck
  2021-03-12 20:01     ` Shannon Nelson
  0 siblings, 1 reply; 10+ messages in thread
From: Alexander Duyck @ 2021-03-11  2:14 UTC (permalink / raw)
  To: Shannon Nelson; +Cc: Netdev, David Miller, Jakub Kicinski, Pensando Drivers

On Wed, Mar 10, 2021 at 11:28 AM Shannon Nelson <snelson@pensando.io> wrote:
>
> Rework the Rx buffer allocations to use pages twice when using
> normal MTU in order to cut down on buffer allocation and mapping
> overhead.
>
> Instead of tracking individual pages, in which we may have
> wasted half the space when using standard 1500 MTU, we track
> buffers which use half pages, so we can use the second half
> of the page rather than allocate and map a new page once the
> first buffer has been used.
>
> Signed-off-by: Shannon Nelson <snelson@pensando.io>

So looking at the approach taken here it just seems like you are doing
the linear walk approach and getting 2 uses per 4K page. If you are
taking that route it might make more sense to just split the page and
use both pieces immediately to populate 2 entries instead of waiting
on the next loop through the ring. Then you could just split the page
into multiple buffers and fill your sg list using less total pages
rather than having 2K gaps between your entries. An added advantage
would be that you could simply merge the page fragments in the event
that you have something writing to the full 2K buffers and you cannot
use copybreak.

> ---
>  .../net/ethernet/pensando/ionic/ionic_dev.h   |  12 +-
>  .../net/ethernet/pensando/ionic/ionic_txrx.c  | 215 +++++++++++-------
>  2 files changed, 138 insertions(+), 89 deletions(-)
>
> diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
> index 690768ff0143..0f877c86eba6 100644
> --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
> +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
> @@ -170,9 +170,15 @@ typedef void (*ionic_desc_cb)(struct ionic_queue *q,
>                               struct ionic_desc_info *desc_info,
>                               struct ionic_cq_info *cq_info, void *cb_arg);
>
> -struct ionic_page_info {
> +#define IONIC_PAGE_SIZE                                PAGE_SIZE
> +#define IONIC_PAGE_SPLIT_SZ                    (PAGE_SIZE / 2)

This probably doesn't work out too well when the page size gets up to
64K. I don't know of too many networks that support a 32K MTU.. :)

> +#define IONIC_PAGE_GFP_MASK                    (GFP_ATOMIC | __GFP_NOWARN |\
> +                                                __GFP_COMP | __GFP_MEMALLOC)
> +
> +struct ionic_buf_info {
>         struct page *page;
>         dma_addr_t dma_addr;
> +       u32 page_offset;
>  };

I'm not really sure the rename was needed. You are still just working
with a page aren't you? It would actually reduce the complexity of
this patch a bunch as you could drop the renaming changes.

>  struct ionic_desc_info {
> @@ -187,8 +193,8 @@ struct ionic_desc_info {
>                 struct ionic_txq_sg_desc *txq_sg_desc;
>                 struct ionic_rxq_sg_desc *rxq_sgl_desc;
>         };
> -       unsigned int npages;
> -       struct ionic_page_info pages[IONIC_RX_MAX_SG_ELEMS + 1];
> +       unsigned int nbufs;
> +       struct ionic_buf_info bufs[IONIC_RX_MAX_SG_ELEMS + 1];
>         ionic_desc_cb cb;
>         void *cb_arg;
>  };
> diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
> index 70b997f302ac..3e13cfee9ecd 100644
> --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
> +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
> @@ -54,7 +54,7 @@ static struct sk_buff *ionic_rx_skb_alloc(struct ionic_queue *q,
>         if (frags)
>                 skb = napi_get_frags(&q_to_qcq(q)->napi);
>         else
> -               skb = netdev_alloc_skb_ip_align(netdev, len);
> +               skb = napi_alloc_skb(&q_to_qcq(q)->napi, len);
>
>         if (unlikely(!skb)) {
>                 net_warn_ratelimited("%s: SKB alloc failed on %s!\n",
> @@ -66,8 +66,15 @@ static struct sk_buff *ionic_rx_skb_alloc(struct ionic_queue *q,
>         return skb;
>  }
>
> +static void ionic_rx_buf_reset(struct ionic_buf_info *buf_info)
> +{
> +       buf_info->page = NULL;
> +       buf_info->page_offset = 0;
> +       buf_info->dma_addr = 0;
> +}
> +

Technically speaking you probably only need to reset the page value.
You could hold off on resetting the page_offset and dma_addr until you
actually are populating the page.

>  static int ionic_rx_page_alloc(struct ionic_queue *q,
> -                              struct ionic_page_info *page_info)
> +                              struct ionic_buf_info *buf_info)
>  {
>         struct ionic_lif *lif = q->lif;
>         struct ionic_rx_stats *stats;
> @@ -78,26 +85,26 @@ static int ionic_rx_page_alloc(struct ionic_queue *q,
>         dev = lif->ionic->dev;
>         stats = q_to_rx_stats(q);
>
> -       if (unlikely(!page_info)) {
> -               net_err_ratelimited("%s: %s invalid page_info in alloc\n",
> +       if (unlikely(!buf_info)) {
> +               net_err_ratelimited("%s: %s invalid buf_info in alloc\n",
>                                     netdev->name, q->name);
>                 return -EINVAL;
>         }
>
> -       page_info->page = dev_alloc_page();
> -       if (unlikely(!page_info->page)) {
> +       buf_info->page = alloc_pages(IONIC_PAGE_GFP_MASK, 0);
> +       if (unlikely(!buf_info->page)) {
>                 net_err_ratelimited("%s: %s page alloc failed\n",
>                                     netdev->name, q->name);
>                 stats->alloc_err++;
>                 return -ENOMEM;
>         }
> +       buf_info->page_offset = 0;
>
> -       page_info->dma_addr = dma_map_page(dev, page_info->page, 0, PAGE_SIZE,
> -                                          DMA_FROM_DEVICE);
> -       if (unlikely(dma_mapping_error(dev, page_info->dma_addr))) {
> -               put_page(page_info->page);
> -               page_info->dma_addr = 0;
> -               page_info->page = NULL;
> +       buf_info->dma_addr = dma_map_page(dev, buf_info->page, buf_info->page_offset,
> +                                         IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
> +       if (unlikely(dma_mapping_error(dev, buf_info->dma_addr))) {
> +               __free_pages(buf_info->page, 0);
> +               ionic_rx_buf_reset(buf_info);
>                 net_err_ratelimited("%s: %s dma map failed\n",
>                                     netdev->name, q->name);
>                 stats->dma_map_err++;

So one thing I would change about the setup here is that I would not
store anything in buf_info page until after you have stored the
page_offset and dma_addr. That way you know that the other two are
only valid because a page is present. In addition you can avoid having
to do the extra cleanup as they should only be read if info->page is
set.

> @@ -108,32 +115,46 @@ static int ionic_rx_page_alloc(struct ionic_queue *q,
>  }
>
>  static void ionic_rx_page_free(struct ionic_queue *q,
> -                              struct ionic_page_info *page_info)
> +                              struct ionic_buf_info *buf_info)
>  {
> -       struct ionic_lif *lif = q->lif;
> -       struct net_device *netdev;
> -       struct device *dev;
> -
> -       netdev = lif->netdev;
> -       dev = lif->ionic->dev;
> +       struct net_device *netdev = q->lif->netdev;
> +       struct device *dev = q->lif->ionic->dev;
>
> -       if (unlikely(!page_info)) {
> -               net_err_ratelimited("%s: %s invalid page_info in free\n",
> +       if (unlikely(!buf_info)) {
> +               net_err_ratelimited("%s: %s invalid buf_info in free\n",
>                                     netdev->name, q->name);
>                 return;
>         }
>
> -       if (unlikely(!page_info->page)) {
> -               net_err_ratelimited("%s: %s invalid page in free\n",
> -                                   netdev->name, q->name);
> +       if (!buf_info->page)
>                 return;
> -       }
>
> -       dma_unmap_page(dev, page_info->dma_addr, PAGE_SIZE, DMA_FROM_DEVICE);
> +       dma_unmap_page(dev, buf_info->dma_addr, IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
> +       __free_pages(buf_info->page, 0);
> +       ionic_rx_buf_reset(buf_info);
> +}
> +
> +static bool ionic_rx_buf_recycle(struct ionic_queue *q,
> +                                struct ionic_buf_info *buf_info, u32 used)
> +{
> +       u32 size;
> +
> +       /* don't re-use pages allocated in low-mem condition */
> +       if (page_is_pfmemalloc(buf_info->page))
> +               return false;
> +
> +       /* don't re-use buffers from non-local numa nodes */
> +       if (page_to_nid(buf_info->page) != numa_mem_id())
> +               return false;
> +

So I am not sure if either of these is really needed if you are just
going to be freeing the page after all 4K is consumed. With the Intel
drivers we were bouncing back and forth between the upper and lower
halves. With this it looks like you do a linear walk and then exit
when you have reached the end of the page. With that being the case
the memory locality check is kind of moot since you will flush the
page after two uses anyway. In addition the pfmemalloc check might
also be moot since it may actually be more efficient to reuse the page
rather than use a full 4K and attempt to allocate yet another page.

> +       size = ALIGN(used, IONIC_PAGE_SPLIT_SZ);
> +       buf_info->page_offset += size;
> +       if (buf_info->page_offset >= IONIC_PAGE_SIZE)
> +               return false;
> +
> +       get_page(buf_info->page);

The get_page per 2K section will add up in terms of cost as it is an
expensive atomic operation. You might see if you can get away with
batching it to do one atomic add per allocation instead of one per
use.

> -       put_page(page_info->page);
> -       page_info->dma_addr = 0;
> -       page_info->page = NULL;
> +       return true;
>  }
>
>  static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
> @@ -142,16 +163,16 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
>  {
>         struct ionic_rxq_comp *comp = cq_info->cq_desc;
>         struct device *dev = q->lif->ionic->dev;
> -       struct ionic_page_info *page_info;
> +       struct ionic_buf_info *buf_info;
>         struct sk_buff *skb;
>         unsigned int i;
>         u16 frag_len;
>         u16 len;
>
> -       page_info = &desc_info->pages[0];
> +       buf_info = &desc_info->bufs[0];
>         len = le16_to_cpu(comp->len);
>
> -       prefetch(page_address(page_info->page) + NET_IP_ALIGN);
> +       prefetch(buf_info->page);

Just want to confirm this is what you meant to do. The old code was
prefetching the first line of the data. The new code is just
prefetching the page struct. You may want to change this to a
prefetchw if you are expecting to use this to improve the performance
for the get_page call.

>
>         skb = ionic_rx_skb_alloc(q, len, true);
>         if (unlikely(!skb))
> @@ -159,7 +180,7 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
>
>         i = comp->num_sg_elems + 1;
>         do {
> -               if (unlikely(!page_info->page)) {
> +               if (unlikely(!buf_info->page)) {
>                         struct napi_struct *napi = &q_to_qcq(q)->napi;
>
>                         napi->skb = NULL;
> @@ -167,15 +188,25 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
>                         return NULL;
>                 }
>
> -               frag_len = min(len, (u16)PAGE_SIZE);
> +               frag_len = min_t(u16, len, IONIC_PAGE_SIZE - buf_info->page_offset);
>                 len -= frag_len;
>
> -               dma_unmap_page(dev, dma_unmap_addr(page_info, dma_addr),
> -                              PAGE_SIZE, DMA_FROM_DEVICE);
> +               dma_sync_single_for_cpu(dev,
> +                                       buf_info->dma_addr + buf_info->page_offset,
> +                                       frag_len, DMA_FROM_DEVICE);
> +
>                 skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
> -                               page_info->page, 0, frag_len, PAGE_SIZE);
> -               page_info->page = NULL;
> -               page_info++;
> +                               buf_info->page, buf_info->page_offset, frag_len,
> +                               IONIC_PAGE_SIZE);
> +
> +               if (!ionic_rx_buf_recycle(q, buf_info, frag_len)) {
> +                       dma_unmap_page(dev, buf_info->dma_addr,
> +                                      IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
> +                       ionic_rx_buf_reset(buf_info);
> +               }
> +

If you don't unmap it don't you need to sync the remaining portion of
the page for use by the device?

> +               buf_info++;
> +
>                 i--;
>         } while (i > 0);
>
> @@ -188,26 +219,26 @@ static struct sk_buff *ionic_rx_copybreak(struct ionic_queue *q,
>  {
>         struct ionic_rxq_comp *comp = cq_info->cq_desc;
>         struct device *dev = q->lif->ionic->dev;
> -       struct ionic_page_info *page_info;
> +       struct ionic_buf_info *buf_info;
>         struct sk_buff *skb;
>         u16 len;
>
> -       page_info = &desc_info->pages[0];
> +       buf_info = &desc_info->bufs[0];
>         len = le16_to_cpu(comp->len);
>
>         skb = ionic_rx_skb_alloc(q, len, false);
>         if (unlikely(!skb))
>                 return NULL;
>
> -       if (unlikely(!page_info->page)) {
> +       if (unlikely(!buf_info->page)) {
>                 dev_kfree_skb(skb);
>                 return NULL;
>         }
>
> -       dma_sync_single_for_cpu(dev, dma_unmap_addr(page_info, dma_addr),
> +       dma_sync_single_for_cpu(dev, buf_info->dma_addr + buf_info->page_offset,
>                                 len, DMA_FROM_DEVICE);
> -       skb_copy_to_linear_data(skb, page_address(page_info->page), len);
> -       dma_sync_single_for_device(dev, dma_unmap_addr(page_info, dma_addr),
> +       skb_copy_to_linear_data(skb, page_address(buf_info->page) + buf_info->page_offset, len);
> +       dma_sync_single_for_device(dev, buf_info->dma_addr + buf_info->page_offset,
>                                    len, DMA_FROM_DEVICE);
>
>         skb_put(skb, len);
> @@ -327,64 +358,73 @@ void ionic_rx_fill(struct ionic_queue *q)
>  {
>         struct net_device *netdev = q->lif->netdev;
>         struct ionic_desc_info *desc_info;
> -       struct ionic_page_info *page_info;
>         struct ionic_rxq_sg_desc *sg_desc;
>         struct ionic_rxq_sg_elem *sg_elem;
> +       struct ionic_buf_info *buf_info;
>         struct ionic_rxq_desc *desc;
> +       unsigned int max_sg_elems;
>         unsigned int remain_len;
> -       unsigned int seg_len;
> +       unsigned int frag_len;
>         unsigned int nfrags;
>         unsigned int i, j;
>         unsigned int len;
>
>         len = netdev->mtu + ETH_HLEN + VLAN_HLEN;
> -       nfrags = round_up(len, PAGE_SIZE) / PAGE_SIZE;
> +       max_sg_elems = q->lif->qtype_info[IONIC_QTYPE_RXQ].max_sg_elems;
>
>         for (i = ionic_q_space_avail(q); i; i--) {
> +               nfrags = 0;
>                 remain_len = len;
>                 desc_info = &q->info[q->head_idx];
>                 desc = desc_info->desc;
> -               sg_desc = desc_info->sg_desc;
> -               page_info = &desc_info->pages[0];
> +               buf_info = &desc_info->bufs[0];
>
> -               if (page_info->page) { /* recycle the buffer */
> -                       ionic_rxq_post(q, false, ionic_rx_clean, NULL);
> -                       continue;
> -               }
> -
> -               /* fill main descriptor - pages[0] */
> -               desc->opcode = (nfrags > 1) ? IONIC_RXQ_DESC_OPCODE_SG :
> -                                             IONIC_RXQ_DESC_OPCODE_SIMPLE;
> -               desc_info->npages = nfrags;
> -               if (unlikely(ionic_rx_page_alloc(q, page_info))) {
> -                       desc->addr = 0;
> -                       desc->len = 0;
> -                       return;
> +               if (!buf_info->page) { /* alloc a new buffer? */
> +                       if (unlikely(ionic_rx_page_alloc(q, buf_info))) {
> +                               desc->addr = 0;
> +                               desc->len = 0;
> +                               return;
> +                       }
>                 }
> -               desc->addr = cpu_to_le64(page_info->dma_addr);
> -               seg_len = min_t(unsigned int, PAGE_SIZE, len);
> -               desc->len = cpu_to_le16(seg_len);
> -               remain_len -= seg_len;
> -               page_info++;
>
> -               /* fill sg descriptors - pages[1..n] */
> -               for (j = 0; j < nfrags - 1; j++) {
> -                       if (page_info->page) /* recycle the sg buffer */
> -                               continue;
> +               /* fill main descriptor - buf[0] */
> +               desc->addr = cpu_to_le64(buf_info->dma_addr + buf_info->page_offset);
> +               frag_len = min_t(u16, len, IONIC_PAGE_SIZE - buf_info->page_offset);
> +               desc->len = cpu_to_le16(frag_len);
> +               remain_len -= frag_len;
> +               buf_info++;
> +               nfrags++;
>
> +               /* fill sg descriptors - buf[1..n] */
> +               sg_desc = desc_info->sg_desc;
> +               for (j = 0; remain_len > 0 && j < max_sg_elems; j++) {
>                         sg_elem = &sg_desc->elems[j];
> -                       if (unlikely(ionic_rx_page_alloc(q, page_info))) {
> -                               sg_elem->addr = 0;
> -                               sg_elem->len = 0;
> -                               return;
> +                       if (!buf_info->page) { /* alloc a new sg buffer? */
> +                               if (unlikely(ionic_rx_page_alloc(q, buf_info))) {
> +                                       sg_elem->addr = 0;
> +                                       sg_elem->len = 0;
> +                                       return;
> +                               }
>                         }
> -                       sg_elem->addr = cpu_to_le64(page_info->dma_addr);
> -                       seg_len = min_t(unsigned int, PAGE_SIZE, remain_len);
> -                       sg_elem->len = cpu_to_le16(seg_len);
> -                       remain_len -= seg_len;
> -                       page_info++;
> +
> +                       sg_elem->addr = cpu_to_le64(buf_info->dma_addr + buf_info->page_offset);
> +                       frag_len = min_t(u16, remain_len, IONIC_PAGE_SIZE - buf_info->page_offset);
> +                       sg_elem->len = cpu_to_le16(frag_len);
> +                       remain_len -= frag_len;
> +                       buf_info++;
> +                       nfrags++;
>                 }
>
> +               /* clear end sg element as a sentinel */
> +               if (j < max_sg_elems) {
> +                       sg_elem = &sg_desc->elems[j];
> +                       memset(sg_elem, 0, sizeof(*sg_elem));
> +               }
> +
> +               desc->opcode = (nfrags > 1) ? IONIC_RXQ_DESC_OPCODE_SG :
> +                                             IONIC_RXQ_DESC_OPCODE_SIMPLE;
> +               desc_info->nbufs = nfrags;
> +
>                 ionic_rxq_post(q, false, ionic_rx_clean, NULL);
>         }
>
> @@ -395,21 +435,24 @@ void ionic_rx_fill(struct ionic_queue *q)
>  void ionic_rx_empty(struct ionic_queue *q)
>  {
>         struct ionic_desc_info *desc_info;
> -       struct ionic_page_info *page_info;
> +       struct ionic_buf_info *buf_info;
>         unsigned int i, j;
>
>         for (i = 0; i < q->num_descs; i++) {
>                 desc_info = &q->info[i];
>                 for (j = 0; j < IONIC_RX_MAX_SG_ELEMS + 1; j++) {
> -                       page_info = &desc_info->pages[j];
> -                       if (page_info->page)
> -                               ionic_rx_page_free(q, page_info);
> +                       buf_info = &desc_info->bufs[j];
> +                       if (buf_info->page)
> +                               ionic_rx_page_free(q, buf_info);
>                 }
>
> -               desc_info->npages = 0;
> +               desc_info->nbufs = 0;
>                 desc_info->cb = NULL;
>                 desc_info->cb_arg = NULL;
>         }
> +
> +       q->head_idx = 0;
> +       q->tail_idx = 0;
>  }
>
>  static void ionic_dim_update(struct ionic_qcq *qcq)
> --
> 2.17.1
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net-next 2/6] ionic: implement Rx page reuse
  2021-03-11  2:14   ` Alexander Duyck
@ 2021-03-12 20:01     ` Shannon Nelson
  0 siblings, 0 replies; 10+ messages in thread
From: Shannon Nelson @ 2021-03-12 20:01 UTC (permalink / raw)
  To: Alexander Duyck; +Cc: Netdev, David Miller, Jakub Kicinski, Pensando Drivers

On 3/10/21 6:14 PM, Alexander Duyck wrote:
> On Wed, Mar 10, 2021 at 11:28 AM Shannon Nelson <snelson@pensando.io> wrote:
>> Rework the Rx buffer allocations to use pages twice when using
>> normal MTU in order to cut down on buffer allocation and mapping
>> overhead.
>>
>> Instead of tracking individual pages, in which we may have
>> wasted half the space when using standard 1500 MTU, we track
>> buffers which use half pages, so we can use the second half
>> of the page rather than allocate and map a new page once the
>> first buffer has been used.
>>
>> Signed-off-by: Shannon Nelson <snelson@pensando.io>
> So looking at the approach taken here it just seems like you are doing
> the linear walk approach and getting 2 uses per 4K page. If you are
> taking that route it might make more sense to just split the page and
> use both pieces immediately to populate 2 entries instead of waiting
> on the next loop through the ring. Then you could just split the page
> into multiple buffers and fill your sg list using less total pages
> rather than having 2K gaps between your entries. An added advantage
> would be that you could simply merge the page fragments in the event
> that you have something writing to the full 2K buffers and you cannot
> use copybreak.

Thanks, Alex, for the detailed review.  I'll work with our internal 
performance guy to fold these comments into an update.
sln


>
>> ---
>>   .../net/ethernet/pensando/ionic/ionic_dev.h   |  12 +-
>>   .../net/ethernet/pensando/ionic/ionic_txrx.c  | 215 +++++++++++-------
>>   2 files changed, 138 insertions(+), 89 deletions(-)
>>
>> diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
>> index 690768ff0143..0f877c86eba6 100644
>> --- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
>> +++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
>> @@ -170,9 +170,15 @@ typedef void (*ionic_desc_cb)(struct ionic_queue *q,
>>                                struct ionic_desc_info *desc_info,
>>                                struct ionic_cq_info *cq_info, void *cb_arg);
>>
>> -struct ionic_page_info {
>> +#define IONIC_PAGE_SIZE                                PAGE_SIZE
>> +#define IONIC_PAGE_SPLIT_SZ                    (PAGE_SIZE / 2)
> This probably doesn't work out too well when the page size gets up to
> 64K. I don't know of too many networks that support a 32K MTU.. :)
>
>> +#define IONIC_PAGE_GFP_MASK                    (GFP_ATOMIC | __GFP_NOWARN |\
>> +                                                __GFP_COMP | __GFP_MEMALLOC)
>> +
>> +struct ionic_buf_info {
>>          struct page *page;
>>          dma_addr_t dma_addr;
>> +       u32 page_offset;
>>   };
> I'm not really sure the rename was needed. You are still just working
> with a page aren't you? It would actually reduce the complexity of
> this patch a bunch as you could drop the renaming changes.
>
>>   struct ionic_desc_info {
>> @@ -187,8 +193,8 @@ struct ionic_desc_info {
>>                  struct ionic_txq_sg_desc *txq_sg_desc;
>>                  struct ionic_rxq_sg_desc *rxq_sgl_desc;
>>          };
>> -       unsigned int npages;
>> -       struct ionic_page_info pages[IONIC_RX_MAX_SG_ELEMS + 1];
>> +       unsigned int nbufs;
>> +       struct ionic_buf_info bufs[IONIC_RX_MAX_SG_ELEMS + 1];
>>          ionic_desc_cb cb;
>>          void *cb_arg;
>>   };
>> diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
>> index 70b997f302ac..3e13cfee9ecd 100644
>> --- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
>> +++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
>> @@ -54,7 +54,7 @@ static struct sk_buff *ionic_rx_skb_alloc(struct ionic_queue *q,
>>          if (frags)
>>                  skb = napi_get_frags(&q_to_qcq(q)->napi);
>>          else
>> -               skb = netdev_alloc_skb_ip_align(netdev, len);
>> +               skb = napi_alloc_skb(&q_to_qcq(q)->napi, len);
>>
>>          if (unlikely(!skb)) {
>>                  net_warn_ratelimited("%s: SKB alloc failed on %s!\n",
>> @@ -66,8 +66,15 @@ static struct sk_buff *ionic_rx_skb_alloc(struct ionic_queue *q,
>>          return skb;
>>   }
>>
>> +static void ionic_rx_buf_reset(struct ionic_buf_info *buf_info)
>> +{
>> +       buf_info->page = NULL;
>> +       buf_info->page_offset = 0;
>> +       buf_info->dma_addr = 0;
>> +}
>> +
> Technically speaking you probably only need to reset the page value.
> You could hold off on resetting the page_offset and dma_addr until you
> actually are populating the page.
>
>>   static int ionic_rx_page_alloc(struct ionic_queue *q,
>> -                              struct ionic_page_info *page_info)
>> +                              struct ionic_buf_info *buf_info)
>>   {
>>          struct ionic_lif *lif = q->lif;
>>          struct ionic_rx_stats *stats;
>> @@ -78,26 +85,26 @@ static int ionic_rx_page_alloc(struct ionic_queue *q,
>>          dev = lif->ionic->dev;
>>          stats = q_to_rx_stats(q);
>>
>> -       if (unlikely(!page_info)) {
>> -               net_err_ratelimited("%s: %s invalid page_info in alloc\n",
>> +       if (unlikely(!buf_info)) {
>> +               net_err_ratelimited("%s: %s invalid buf_info in alloc\n",
>>                                      netdev->name, q->name);
>>                  return -EINVAL;
>>          }
>>
>> -       page_info->page = dev_alloc_page();
>> -       if (unlikely(!page_info->page)) {
>> +       buf_info->page = alloc_pages(IONIC_PAGE_GFP_MASK, 0);
>> +       if (unlikely(!buf_info->page)) {
>>                  net_err_ratelimited("%s: %s page alloc failed\n",
>>                                      netdev->name, q->name);
>>                  stats->alloc_err++;
>>                  return -ENOMEM;
>>          }
>> +       buf_info->page_offset = 0;
>>
>> -       page_info->dma_addr = dma_map_page(dev, page_info->page, 0, PAGE_SIZE,
>> -                                          DMA_FROM_DEVICE);
>> -       if (unlikely(dma_mapping_error(dev, page_info->dma_addr))) {
>> -               put_page(page_info->page);
>> -               page_info->dma_addr = 0;
>> -               page_info->page = NULL;
>> +       buf_info->dma_addr = dma_map_page(dev, buf_info->page, buf_info->page_offset,
>> +                                         IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
>> +       if (unlikely(dma_mapping_error(dev, buf_info->dma_addr))) {
>> +               __free_pages(buf_info->page, 0);
>> +               ionic_rx_buf_reset(buf_info);
>>                  net_err_ratelimited("%s: %s dma map failed\n",
>>                                      netdev->name, q->name);
>>                  stats->dma_map_err++;
> So one thing I would change about the setup here is that I would not
> store anything in buf_info page until after you have stored the
> page_offset and dma_addr. That way you know that the other two are
> only valid because a page is present. In addition you can avoid having
> to do the extra cleanup as they should only be read if info->page is
> set.
>
>> @@ -108,32 +115,46 @@ static int ionic_rx_page_alloc(struct ionic_queue *q,
>>   }
>>
>>   static void ionic_rx_page_free(struct ionic_queue *q,
>> -                              struct ionic_page_info *page_info)
>> +                              struct ionic_buf_info *buf_info)
>>   {
>> -       struct ionic_lif *lif = q->lif;
>> -       struct net_device *netdev;
>> -       struct device *dev;
>> -
>> -       netdev = lif->netdev;
>> -       dev = lif->ionic->dev;
>> +       struct net_device *netdev = q->lif->netdev;
>> +       struct device *dev = q->lif->ionic->dev;
>>
>> -       if (unlikely(!page_info)) {
>> -               net_err_ratelimited("%s: %s invalid page_info in free\n",
>> +       if (unlikely(!buf_info)) {
>> +               net_err_ratelimited("%s: %s invalid buf_info in free\n",
>>                                      netdev->name, q->name);
>>                  return;
>>          }
>>
>> -       if (unlikely(!page_info->page)) {
>> -               net_err_ratelimited("%s: %s invalid page in free\n",
>> -                                   netdev->name, q->name);
>> +       if (!buf_info->page)
>>                  return;
>> -       }
>>
>> -       dma_unmap_page(dev, page_info->dma_addr, PAGE_SIZE, DMA_FROM_DEVICE);
>> +       dma_unmap_page(dev, buf_info->dma_addr, IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
>> +       __free_pages(buf_info->page, 0);
>> +       ionic_rx_buf_reset(buf_info);
>> +}
>> +
>> +static bool ionic_rx_buf_recycle(struct ionic_queue *q,
>> +                                struct ionic_buf_info *buf_info, u32 used)
>> +{
>> +       u32 size;
>> +
>> +       /* don't re-use pages allocated in low-mem condition */
>> +       if (page_is_pfmemalloc(buf_info->page))
>> +               return false;
>> +
>> +       /* don't re-use buffers from non-local numa nodes */
>> +       if (page_to_nid(buf_info->page) != numa_mem_id())
>> +               return false;
>> +
> So I am not sure if either of these is really needed if you are just
> going to be freeing the page after all 4K is consumed. With the Intel
> drivers we were bouncing back and forth between the upper and lower
> halves. With this it looks like you do a linear walk and then exit
> when you have reached the end of the page. With that being the case
> the memory locality check is kind of moot since you will flush the
> page after two uses anyway. In addition the pfmemalloc check might
> also be moot since it may actually be more efficient to reuse the page
> rather than use a full 4K and attempt to allocate yet another page.
>
>> +       size = ALIGN(used, IONIC_PAGE_SPLIT_SZ);
>> +       buf_info->page_offset += size;
>> +       if (buf_info->page_offset >= IONIC_PAGE_SIZE)
>> +               return false;
>> +
>> +       get_page(buf_info->page);
> The get_page per 2K section will add up in terms of cost as it is an
> expensive atomic operation. You might see if you can get away with
> batching it to do one atomic add per allocation instead of one per
> use.
>
>> -       put_page(page_info->page);
>> -       page_info->dma_addr = 0;
>> -       page_info->page = NULL;
>> +       return true;
>>   }
>>
>>   static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
>> @@ -142,16 +163,16 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
>>   {
>>          struct ionic_rxq_comp *comp = cq_info->cq_desc;
>>          struct device *dev = q->lif->ionic->dev;
>> -       struct ionic_page_info *page_info;
>> +       struct ionic_buf_info *buf_info;
>>          struct sk_buff *skb;
>>          unsigned int i;
>>          u16 frag_len;
>>          u16 len;
>>
>> -       page_info = &desc_info->pages[0];
>> +       buf_info = &desc_info->bufs[0];
>>          len = le16_to_cpu(comp->len);
>>
>> -       prefetch(page_address(page_info->page) + NET_IP_ALIGN);
>> +       prefetch(buf_info->page);
> Just want to confirm this is what you meant to do. The old code was
> prefetching the first line of the data. The new code is just
> prefetching the page struct. You may want to change this to a
> prefetchw if you are expecting to use this to improve the performance
> for the get_page call.
>
>>          skb = ionic_rx_skb_alloc(q, len, true);
>>          if (unlikely(!skb))
>> @@ -159,7 +180,7 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
>>
>>          i = comp->num_sg_elems + 1;
>>          do {
>> -               if (unlikely(!page_info->page)) {
>> +               if (unlikely(!buf_info->page)) {
>>                          struct napi_struct *napi = &q_to_qcq(q)->napi;
>>
>>                          napi->skb = NULL;
>> @@ -167,15 +188,25 @@ static struct sk_buff *ionic_rx_frags(struct ionic_queue *q,
>>                          return NULL;
>>                  }
>>
>> -               frag_len = min(len, (u16)PAGE_SIZE);
>> +               frag_len = min_t(u16, len, IONIC_PAGE_SIZE - buf_info->page_offset);
>>                  len -= frag_len;
>>
>> -               dma_unmap_page(dev, dma_unmap_addr(page_info, dma_addr),
>> -                              PAGE_SIZE, DMA_FROM_DEVICE);
>> +               dma_sync_single_for_cpu(dev,
>> +                                       buf_info->dma_addr + buf_info->page_offset,
>> +                                       frag_len, DMA_FROM_DEVICE);
>> +
>>                  skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags,
>> -                               page_info->page, 0, frag_len, PAGE_SIZE);
>> -               page_info->page = NULL;
>> -               page_info++;
>> +                               buf_info->page, buf_info->page_offset, frag_len,
>> +                               IONIC_PAGE_SIZE);
>> +
>> +               if (!ionic_rx_buf_recycle(q, buf_info, frag_len)) {
>> +                       dma_unmap_page(dev, buf_info->dma_addr,
>> +                                      IONIC_PAGE_SIZE, DMA_FROM_DEVICE);
>> +                       ionic_rx_buf_reset(buf_info);
>> +               }
>> +
> If you don't unmap it don't you need to sync the remaining portion of
> the page for use by the device?
>
>> +               buf_info++;
>> +
>>                  i--;
>>          } while (i > 0);
>>
>> @@ -188,26 +219,26 @@ static struct sk_buff *ionic_rx_copybreak(struct ionic_queue *q,
>>   {
>>          struct ionic_rxq_comp *comp = cq_info->cq_desc;
>>          struct device *dev = q->lif->ionic->dev;
>> -       struct ionic_page_info *page_info;
>> +       struct ionic_buf_info *buf_info;
>>          struct sk_buff *skb;
>>          u16 len;
>>
>> -       page_info = &desc_info->pages[0];
>> +       buf_info = &desc_info->bufs[0];
>>          len = le16_to_cpu(comp->len);
>>
>>          skb = ionic_rx_skb_alloc(q, len, false);
>>          if (unlikely(!skb))
>>                  return NULL;
>>
>> -       if (unlikely(!page_info->page)) {
>> +       if (unlikely(!buf_info->page)) {
>>                  dev_kfree_skb(skb);
>>                  return NULL;
>>          }
>>
>> -       dma_sync_single_for_cpu(dev, dma_unmap_addr(page_info, dma_addr),
>> +       dma_sync_single_for_cpu(dev, buf_info->dma_addr + buf_info->page_offset,
>>                                  len, DMA_FROM_DEVICE);
>> -       skb_copy_to_linear_data(skb, page_address(page_info->page), len);
>> -       dma_sync_single_for_device(dev, dma_unmap_addr(page_info, dma_addr),
>> +       skb_copy_to_linear_data(skb, page_address(buf_info->page) + buf_info->page_offset, len);
>> +       dma_sync_single_for_device(dev, buf_info->dma_addr + buf_info->page_offset,
>>                                     len, DMA_FROM_DEVICE);
>>
>>          skb_put(skb, len);
>> @@ -327,64 +358,73 @@ void ionic_rx_fill(struct ionic_queue *q)
>>   {
>>          struct net_device *netdev = q->lif->netdev;
>>          struct ionic_desc_info *desc_info;
>> -       struct ionic_page_info *page_info;
>>          struct ionic_rxq_sg_desc *sg_desc;
>>          struct ionic_rxq_sg_elem *sg_elem;
>> +       struct ionic_buf_info *buf_info;
>>          struct ionic_rxq_desc *desc;
>> +       unsigned int max_sg_elems;
>>          unsigned int remain_len;
>> -       unsigned int seg_len;
>> +       unsigned int frag_len;
>>          unsigned int nfrags;
>>          unsigned int i, j;
>>          unsigned int len;
>>
>>          len = netdev->mtu + ETH_HLEN + VLAN_HLEN;
>> -       nfrags = round_up(len, PAGE_SIZE) / PAGE_SIZE;
>> +       max_sg_elems = q->lif->qtype_info[IONIC_QTYPE_RXQ].max_sg_elems;
>>
>>          for (i = ionic_q_space_avail(q); i; i--) {
>> +               nfrags = 0;
>>                  remain_len = len;
>>                  desc_info = &q->info[q->head_idx];
>>                  desc = desc_info->desc;
>> -               sg_desc = desc_info->sg_desc;
>> -               page_info = &desc_info->pages[0];
>> +               buf_info = &desc_info->bufs[0];
>>
>> -               if (page_info->page) { /* recycle the buffer */
>> -                       ionic_rxq_post(q, false, ionic_rx_clean, NULL);
>> -                       continue;
>> -               }
>> -
>> -               /* fill main descriptor - pages[0] */
>> -               desc->opcode = (nfrags > 1) ? IONIC_RXQ_DESC_OPCODE_SG :
>> -                                             IONIC_RXQ_DESC_OPCODE_SIMPLE;
>> -               desc_info->npages = nfrags;
>> -               if (unlikely(ionic_rx_page_alloc(q, page_info))) {
>> -                       desc->addr = 0;
>> -                       desc->len = 0;
>> -                       return;
>> +               if (!buf_info->page) { /* alloc a new buffer? */
>> +                       if (unlikely(ionic_rx_page_alloc(q, buf_info))) {
>> +                               desc->addr = 0;
>> +                               desc->len = 0;
>> +                               return;
>> +                       }
>>                  }
>> -               desc->addr = cpu_to_le64(page_info->dma_addr);
>> -               seg_len = min_t(unsigned int, PAGE_SIZE, len);
>> -               desc->len = cpu_to_le16(seg_len);
>> -               remain_len -= seg_len;
>> -               page_info++;
>>
>> -               /* fill sg descriptors - pages[1..n] */
>> -               for (j = 0; j < nfrags - 1; j++) {
>> -                       if (page_info->page) /* recycle the sg buffer */
>> -                               continue;
>> +               /* fill main descriptor - buf[0] */
>> +               desc->addr = cpu_to_le64(buf_info->dma_addr + buf_info->page_offset);
>> +               frag_len = min_t(u16, len, IONIC_PAGE_SIZE - buf_info->page_offset);
>> +               desc->len = cpu_to_le16(frag_len);
>> +               remain_len -= frag_len;
>> +               buf_info++;
>> +               nfrags++;
>>
>> +               /* fill sg descriptors - buf[1..n] */
>> +               sg_desc = desc_info->sg_desc;
>> +               for (j = 0; remain_len > 0 && j < max_sg_elems; j++) {
>>                          sg_elem = &sg_desc->elems[j];
>> -                       if (unlikely(ionic_rx_page_alloc(q, page_info))) {
>> -                               sg_elem->addr = 0;
>> -                               sg_elem->len = 0;
>> -                               return;
>> +                       if (!buf_info->page) { /* alloc a new sg buffer? */
>> +                               if (unlikely(ionic_rx_page_alloc(q, buf_info))) {
>> +                                       sg_elem->addr = 0;
>> +                                       sg_elem->len = 0;
>> +                                       return;
>> +                               }
>>                          }
>> -                       sg_elem->addr = cpu_to_le64(page_info->dma_addr);
>> -                       seg_len = min_t(unsigned int, PAGE_SIZE, remain_len);
>> -                       sg_elem->len = cpu_to_le16(seg_len);
>> -                       remain_len -= seg_len;
>> -                       page_info++;
>> +
>> +                       sg_elem->addr = cpu_to_le64(buf_info->dma_addr + buf_info->page_offset);
>> +                       frag_len = min_t(u16, remain_len, IONIC_PAGE_SIZE - buf_info->page_offset);
>> +                       sg_elem->len = cpu_to_le16(frag_len);
>> +                       remain_len -= frag_len;
>> +                       buf_info++;
>> +                       nfrags++;
>>                  }
>>
>> +               /* clear end sg element as a sentinel */
>> +               if (j < max_sg_elems) {
>> +                       sg_elem = &sg_desc->elems[j];
>> +                       memset(sg_elem, 0, sizeof(*sg_elem));
>> +               }
>> +
>> +               desc->opcode = (nfrags > 1) ? IONIC_RXQ_DESC_OPCODE_SG :
>> +                                             IONIC_RXQ_DESC_OPCODE_SIMPLE;
>> +               desc_info->nbufs = nfrags;
>> +
>>                  ionic_rxq_post(q, false, ionic_rx_clean, NULL);
>>          }
>>
>> @@ -395,21 +435,24 @@ void ionic_rx_fill(struct ionic_queue *q)
>>   void ionic_rx_empty(struct ionic_queue *q)
>>   {
>>          struct ionic_desc_info *desc_info;
>> -       struct ionic_page_info *page_info;
>> +       struct ionic_buf_info *buf_info;
>>          unsigned int i, j;
>>
>>          for (i = 0; i < q->num_descs; i++) {
>>                  desc_info = &q->info[i];
>>                  for (j = 0; j < IONIC_RX_MAX_SG_ELEMS + 1; j++) {
>> -                       page_info = &desc_info->pages[j];
>> -                       if (page_info->page)
>> -                               ionic_rx_page_free(q, page_info);
>> +                       buf_info = &desc_info->bufs[j];
>> +                       if (buf_info->page)
>> +                               ionic_rx_page_free(q, buf_info);
>>                  }
>>
>> -               desc_info->npages = 0;
>> +               desc_info->nbufs = 0;
>>                  desc_info->cb = NULL;
>>                  desc_info->cb_arg = NULL;
>>          }
>> +
>> +       q->head_idx = 0;
>> +       q->tail_idx = 0;
>>   }
>>
>>   static void ionic_dim_update(struct ionic_qcq *qcq)
>> --
>> 2.17.1
>>


^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2021-03-12 20:02 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-10 19:26 [PATCH net-next 0/6] ionic Rx updates Shannon Nelson
2021-03-10 19:26 ` [PATCH net-next 1/6] ionic: move rx_page_alloc and free Shannon Nelson
2021-03-10 19:26 ` [PATCH net-next 2/6] ionic: implement Rx page reuse Shannon Nelson
2021-03-11  2:14   ` Alexander Duyck
2021-03-12 20:01     ` Shannon Nelson
2021-03-10 19:26 ` [PATCH net-next 3/6] ionic: optimize fastpath struct usage Shannon Nelson
2021-03-10 19:26 ` [PATCH net-next 4/6] ionic: simplify rx skb alloc Shannon Nelson
2021-03-10 19:26 ` [PATCH net-next 5/6] ionic: rebuild debugfs on qcq swap Shannon Nelson
2021-03-10 19:26 ` [PATCH net-next 6/6] ionic: simplify use of completion types Shannon Nelson
2021-03-10 23:40 ` [PATCH net-next 0/6] ionic Rx updates patchwork-bot+netdevbpf

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.