All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH net v6 1/2] net/ps3_gelic_net: Fix RX sk_buff length
  2023-02-26  2:25 [PATCH net v6 0/2] net/ps3_gelic_net: DMA related fixes Geoff Levand
@ 2023-02-26  2:25 ` Geoff Levand
  2023-02-28  2:20   ` Jakub Kicinski
  2023-02-28 16:12   ` Alexander Lobakin
  2023-02-26  2:25 ` [PATCH net v6 2/2] net/ps3_gelic_net: Use dma_mapping_error Geoff Levand
  1 sibling, 2 replies; 10+ messages in thread
From: Geoff Levand @ 2023-02-26  2:25 UTC (permalink / raw)
  To: netdev, Jakub Kicinski, David S. Miller, Alexander Lobakin,
	Alexander Duyck, Paolo Abeni

The Gelic Ethernet device needs to have the RX sk_buffs aligned to
GELIC_NET_RXBUF_ALIGN and the length of the RX sk_buffs must be a
multiple of GELIC_NET_RXBUF_ALIGN.

The current Gelic Ethernet driver was not allocating sk_buffs large
enough to allow for this alignment.

Fixes various randomly occurring runtime network errors.

Fixes: 02c1889166b4 ("ps3: gigabit ethernet driver for PS3, take3")
Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 drivers/net/ethernet/toshiba/ps3_gelic_net.c | 66 +++++++++++---------
 drivers/net/ethernet/toshiba/ps3_gelic_net.h |  2 +-
 2 files changed, 39 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
index cf8de8a7a8a1..7e12e041acd3 100644
--- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c
+++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
@@ -365,51 +365,61 @@ static int gelic_card_init_chain(struct gelic_card *card,
  *
  * allocates a new rx skb, iommu-maps it and attaches it to the descriptor.
  * Activate the descriptor state-wise
+ *
+ * Gelic RX sk_buffs must be aligned to GELIC_NET_RXBUF_ALIGN and the length
+ * must be a multiple of GELIC_NET_RXBUF_ALIGN.
  */
 static int gelic_descr_prepare_rx(struct gelic_card *card,
 				  struct gelic_descr *descr)
 {
-	int offset;
-	unsigned int bufsize;
+	struct device *dev = ctodev(card);
+	void *napi_buff;
 
 	if (gelic_descr_get_status(descr) !=  GELIC_DESCR_DMA_NOT_IN_USE)
 		dev_info(ctodev(card), "%s: ERROR status\n", __func__);
-	/* we need to round up the buffer size to a multiple of 128 */
-	bufsize = ALIGN(GELIC_NET_MAX_MTU, GELIC_NET_RXBUF_ALIGN);
-
-	/* and we need to have it 128 byte aligned, therefore we allocate a
-	 * bit more */
-	descr->skb = dev_alloc_skb(bufsize + GELIC_NET_RXBUF_ALIGN - 1);
-	if (!descr->skb) {
-		descr->buf_addr = 0; /* tell DMAC don't touch memory */
-		return -ENOMEM;
-	}
-	descr->buf_size = cpu_to_be32(bufsize);
+
 	descr->dmac_cmd_status = 0;
 	descr->result_size = 0;
 	descr->valid_size = 0;
 	descr->data_error = 0;
+	descr->buf_size = cpu_to_be32(GELIC_NET_MAX_MTU);
+
+	napi_buff = napi_alloc_frag_align(GELIC_NET_MAX_MTU,
+		GELIC_NET_RXBUF_ALIGN);
+
+	if (unlikely(!napi_buff)) {
+		descr->skb = NULL;
+		descr->buf_addr = 0;
+		descr->buf_size = 0;
+		return -ENOMEM;
+	}
+
+	descr->skb = napi_build_skb(napi_buff, GELIC_NET_MAX_MTU);
+
+	if (unlikely(!descr->skb)) {
+		skb_free_frag(napi_buff);
+		descr->skb = NULL;
+		descr->buf_addr = 0;
+		descr->buf_size = 0;
+		return -ENOMEM;
+	}
+
+	descr->buf_addr = cpu_to_be32(dma_map_single(dev, napi_buff,
+		GELIC_NET_MAX_MTU, DMA_FROM_DEVICE));
 
-	offset = ((unsigned long)descr->skb->data) &
-		(GELIC_NET_RXBUF_ALIGN - 1);
-	if (offset)
-		skb_reserve(descr->skb, GELIC_NET_RXBUF_ALIGN - offset);
-	/* io-mmu-map the skb */
-	descr->buf_addr = cpu_to_be32(dma_map_single(ctodev(card),
-						     descr->skb->data,
-						     GELIC_NET_MAX_MTU,
-						     DMA_FROM_DEVICE));
 	if (!descr->buf_addr) {
-		dev_kfree_skb_any(descr->skb);
+		skb_free_frag(napi_buff);
 		descr->skb = NULL;
-		dev_info(ctodev(card),
-			 "%s:Could not iommu-map rx buffer\n", __func__);
+		descr->buf_addr = 0;
+		descr->buf_size = 0;
+		dev_err_once(dev, "%s:Could not iommu-map rx buffer\n",
+			__func__);
 		gelic_descr_set_status(descr, GELIC_DESCR_DMA_NOT_IN_USE);
 		return -ENOMEM;
-	} else {
-		gelic_descr_set_status(descr, GELIC_DESCR_DMA_CARDOWNED);
-		return 0;
 	}
+
+	gelic_descr_set_status(descr, GELIC_DESCR_DMA_CARDOWNED);
+	return 0;
 }
 
 /**
diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.h b/drivers/net/ethernet/toshiba/ps3_gelic_net.h
index 68f324ed4eaf..d3868eb7234c 100644
--- a/drivers/net/ethernet/toshiba/ps3_gelic_net.h
+++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.h
@@ -19,7 +19,7 @@
 #define GELIC_NET_RX_DESCRIPTORS        128 /* num of descriptors */
 #define GELIC_NET_TX_DESCRIPTORS        128 /* num of descriptors */
 
-#define GELIC_NET_MAX_MTU               VLAN_ETH_FRAME_LEN
+#define GELIC_NET_MAX_MTU               1920
 #define GELIC_NET_MIN_MTU               VLAN_ETH_ZLEN
 #define GELIC_NET_RXBUF_ALIGN           128
 #define GELIC_CARD_RX_CSUM_DEFAULT      1 /* hw chksum */
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH net v6 0/2] net/ps3_gelic_net: DMA related fixes
@ 2023-02-26  2:25 Geoff Levand
  2023-02-26  2:25 ` [PATCH net v6 1/2] net/ps3_gelic_net: Fix RX sk_buff length Geoff Levand
  2023-02-26  2:25 ` [PATCH net v6 2/2] net/ps3_gelic_net: Use dma_mapping_error Geoff Levand
  0 siblings, 2 replies; 10+ messages in thread
From: Geoff Levand @ 2023-02-26  2:25 UTC (permalink / raw)
  To: netdev, Jakub Kicinski, David S. Miller, Alexander Lobakin,
	Alexander Duyck, Paolo Abeni

v6: Reworked and cleaned up patches.
v5: Some additional patch cleanups.
v4: More patch cleanups.
v3: Cleaned up patches as requested.

Geoff Levand (2):
  net/ps3_gelic_net: Fix RX sk_buff length
  net/ps3_gelic_net: Use dma_mapping_error

 drivers/net/ethernet/toshiba/ps3_gelic_net.c | 103 ++++++++++---------
 drivers/net/ethernet/toshiba/ps3_gelic_net.h |   2 +-
 2 files changed, 58 insertions(+), 47 deletions(-)

-- 
2.34.1


^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH net v6 2/2] net/ps3_gelic_net: Use dma_mapping_error
  2023-02-26  2:25 [PATCH net v6 0/2] net/ps3_gelic_net: DMA related fixes Geoff Levand
  2023-02-26  2:25 ` [PATCH net v6 1/2] net/ps3_gelic_net: Fix RX sk_buff length Geoff Levand
@ 2023-02-26  2:25 ` Geoff Levand
  2023-02-28 16:14   ` Alexander Lobakin
  1 sibling, 1 reply; 10+ messages in thread
From: Geoff Levand @ 2023-02-26  2:25 UTC (permalink / raw)
  To: netdev, Jakub Kicinski, David S. Miller, Alexander Lobakin,
	Alexander Duyck, Paolo Abeni

The current Gelic Etherenet driver was checking the return value of its
dma_map_single call, and not using the dma_mapping_error() routine.

Fixes runtime problems like these:

  DMA-API: ps3_gelic_driver sb_05: device driver failed to check map error
  WARNING: CPU: 0 PID: 0 at kernel/dma/debug.c:1027 .check_unmap+0x888/0x8dc

Fixes: 02c1889166b4 ("ps3: gigabit ethernet driver for PS3, take3")
Signed-off-by: Geoff Levand <geoff@infradead.org>
---
 drivers/net/ethernet/toshiba/ps3_gelic_net.c | 37 ++++++++++----------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
index 7e12e041acd3..2f7505609447 100644
--- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c
+++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
@@ -309,23 +309,31 @@ static int gelic_card_init_chain(struct gelic_card *card,
 				 struct gelic_descr_chain *chain,
 				 struct gelic_descr *start_descr, int no)
 {
-	int i;
+	struct device *dev = ctodev(card);
 	struct gelic_descr *descr;
+	int i;
 
-	descr = start_descr;
-	memset(descr, 0, sizeof(*descr) * no);
+	memset(start_descr, 0, no * sizeof(*start_descr));
 
 	/* set up the hardware pointers in each descriptor */
-	for (i = 0; i < no; i++, descr++) {
+	for (i = 0, descr = start_descr; i < no; i++, descr++) {
 		gelic_descr_set_status(descr, GELIC_DESCR_DMA_NOT_IN_USE);
 		descr->bus_addr =
 			dma_map_single(ctodev(card), descr,
 				       GELIC_DESCR_SIZE,
 				       DMA_BIDIRECTIONAL);
 
-		if (!descr->bus_addr)
-			goto iommu_error;
+		if (dma_mapping_error(dev, descr->bus_addr)) {
+			dev_err_once(dev, "%s:%d: dma_mapping_error\n",
+				__func__, __LINE__);
 
+			for (i--, descr--; i >= 0; i--, descr--) {
+				dma_unmap_single(ctodev(card), descr->bus_addr,
+					GELIC_DESCR_SIZE, DMA_BIDIRECTIONAL);
+			}
+			return -ENOMEM;
+		}
+ 
 		descr->next = descr + 1;
 		descr->prev = descr - 1;
 	}
@@ -346,14 +354,6 @@ static int gelic_card_init_chain(struct gelic_card *card,
 	(descr - 1)->next_descr_addr = 0;
 
 	return 0;
-
-iommu_error:
-	for (i--, descr--; 0 <= i; i--, descr--)
-		if (descr->bus_addr)
-			dma_unmap_single(ctodev(card), descr->bus_addr,
-					 GELIC_DESCR_SIZE,
-					 DMA_BIDIRECTIONAL);
-	return -ENOMEM;
 }
 
 /**
@@ -407,7 +407,7 @@ static int gelic_descr_prepare_rx(struct gelic_card *card,
 	descr->buf_addr = cpu_to_be32(dma_map_single(dev, napi_buff,
 		GELIC_NET_MAX_MTU, DMA_FROM_DEVICE));
 
-	if (!descr->buf_addr) {
+	if (dma_mapping_error(dev, descr->buf_addr)) {
 		skb_free_frag(napi_buff);
 		descr->skb = NULL;
 		descr->buf_addr = 0;
@@ -773,6 +773,7 @@ static int gelic_descr_prepare_tx(struct gelic_card *card,
 				  struct gelic_descr *descr,
 				  struct sk_buff *skb)
 {
+	struct device *dev = ctodev(card);
 	dma_addr_t buf;
 
 	if (card->vlan_required) {
@@ -787,10 +788,10 @@ static int gelic_descr_prepare_tx(struct gelic_card *card,
 		skb = skb_tmp;
 	}
 
-	buf = dma_map_single(ctodev(card), skb->data, skb->len, DMA_TO_DEVICE);
+	buf = dma_map_single(dev, skb->data, skb->len, DMA_TO_DEVICE);
 
-	if (!buf) {
-		dev_err(ctodev(card),
+	if (dma_mapping_error(dev, buf)) {
+		dev_err_once(dev,
 			"dma map 2 failed (%p, %i). Dropping packet\n",
 			skb->data, skb->len);
 		return -ENOMEM;
-- 
2.34.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH net v6 1/2] net/ps3_gelic_net: Fix RX sk_buff length
  2023-02-26  2:25 ` [PATCH net v6 1/2] net/ps3_gelic_net: Fix RX sk_buff length Geoff Levand
@ 2023-02-28  2:20   ` Jakub Kicinski
  2023-02-28 15:47     ` Alexander Lobakin
  2023-02-28 16:12   ` Alexander Lobakin
  1 sibling, 1 reply; 10+ messages in thread
From: Jakub Kicinski @ 2023-02-28  2:20 UTC (permalink / raw)
  To: Geoff Levand
  Cc: netdev, David S. Miller, Alexander Lobakin, Alexander Duyck, Paolo Abeni

On Sun, 26 Feb 2023 02:25:42 +0000 Geoff Levand wrote:
> +	napi_buff = napi_alloc_frag_align(GELIC_NET_MAX_MTU,
> +		GELIC_NET_RXBUF_ALIGN);

You're changing how the buffers are allocated.

> +	if (unlikely(!napi_buff)) {
> +		descr->skb = NULL;
> +		descr->buf_addr = 0;
> +		descr->buf_size = 0;

Wiping the descriptors on failure.

> +		return -ENOMEM;
> +	}

And generally reshuffling the code.

Once again - please don't do any of that in a bug fix.
Describe precisely what the problem is and fix that problem,
Once the fix is accepted you can send separate patches with 
other improvements.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net v6 1/2] net/ps3_gelic_net: Fix RX sk_buff length
  2023-02-28  2:20   ` Jakub Kicinski
@ 2023-02-28 15:47     ` Alexander Lobakin
  2023-02-28 20:31       ` Jakub Kicinski
  0 siblings, 1 reply; 10+ messages in thread
From: Alexander Lobakin @ 2023-02-28 15:47 UTC (permalink / raw)
  To: Jakub Kicinski
  Cc: Geoff Levand, netdev, David S. Miller, Alexander Lobakin,
	Alexander Duyck, Paolo Abeni

From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 27 Feb 2023 18:20:40 -0800

> On Sun, 26 Feb 2023 02:25:42 +0000 Geoff Levand wrote:
>> +	napi_buff = napi_alloc_frag_align(GELIC_NET_MAX_MTU,
>> +		GELIC_NET_RXBUF_ALIGN);
> 
> You're changing how the buffers are allocated.
> 
>> +	if (unlikely(!napi_buff)) {
>> +		descr->skb = NULL;
>> +		descr->buf_addr = 0;
>> +		descr->buf_size = 0;
> 
> Wiping the descriptors on failure.
> 
>> +		return -ENOMEM;
>> +	}
> 
> And generally reshuffling the code.
> 
> Once again - please don't do any of that in a bug fix.
> Describe precisely what the problem is and fix that problem,

IIRC the original problem is that the skb linear parts are not always
aligned to a boundary which this particular HW requires. So initially
there was something like "allocate len + alignment - 1, then
PTR_ALIGN()", but I said that it's a waste of memory and we shouldn't do
that, using napi_alloc_frag_align() instead.
I guess if that would've been described, this could go as a fix? I don't
think wasting memory is a good fix, even if we need to change the
allocation scheme...

> Once the fix is accepted you can send separate patches with 
> other improvements.

Thanks,
Olek

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net v6 1/2] net/ps3_gelic_net: Fix RX sk_buff length
  2023-02-26  2:25 ` [PATCH net v6 1/2] net/ps3_gelic_net: Fix RX sk_buff length Geoff Levand
  2023-02-28  2:20   ` Jakub Kicinski
@ 2023-02-28 16:12   ` Alexander Lobakin
  2023-03-26 20:38     ` Geoff Levand
  1 sibling, 1 reply; 10+ messages in thread
From: Alexander Lobakin @ 2023-02-28 16:12 UTC (permalink / raw)
  To: Geoff Levand
  Cc: Jakub Kicinski, netdev, David S. Miller, Alexander Duyck, Paolo Abeni

From: Geoff Levand <geoff@infradead.org>
Date: Sun, 26 Feb 2023 02:25:42 +0000

> The Gelic Ethernet device needs to have the RX sk_buffs aligned to
> GELIC_NET_RXBUF_ALIGN and the length of the RX sk_buffs must be a
> multiple of GELIC_NET_RXBUF_ALIGN.

[...]

>  static int gelic_descr_prepare_rx(struct gelic_card *card,
>  				  struct gelic_descr *descr)
>  {
> -	int offset;
> -	unsigned int bufsize;
> +	struct device *dev = ctodev(card);
> +	void *napi_buff;
>  
>  	if (gelic_descr_get_status(descr) !=  GELIC_DESCR_DMA_NOT_IN_USE)
>  		dev_info(ctodev(card), "%s: ERROR status\n", __func__);
> -	/* we need to round up the buffer size to a multiple of 128 */
> -	bufsize = ALIGN(GELIC_NET_MAX_MTU, GELIC_NET_RXBUF_ALIGN);
> -
> -	/* and we need to have it 128 byte aligned, therefore we allocate a
> -	 * bit more */
> -	descr->skb = dev_alloc_skb(bufsize + GELIC_NET_RXBUF_ALIGN - 1);
> -	if (!descr->skb) {
> -		descr->buf_addr = 0; /* tell DMAC don't touch memory */
> -		return -ENOMEM;
> -	}
> -	descr->buf_size = cpu_to_be32(bufsize);
> +
>  	descr->dmac_cmd_status = 0;
>  	descr->result_size = 0;
>  	descr->valid_size = 0;
>  	descr->data_error = 0;
> +	descr->buf_size = cpu_to_be32(GELIC_NET_MAX_MTU);
> +
> +	napi_buff = napi_alloc_frag_align(GELIC_NET_MAX_MTU,
> +		GELIC_NET_RXBUF_ALIGN);

Must be aligned to the opening brace.

> +

Redundant newline.

> +	if (unlikely(!napi_buff)) {
> +		descr->skb = NULL;
> +		descr->buf_addr = 0;
> +		descr->buf_size = 0;
> +		return -ENOMEM;
> +	}
> +
> +	descr->skb = napi_build_skb(napi_buff, GELIC_NET_MAX_MTU);

You're mixing two, no, three things here.

1. MTU. I.e. max L3+ payload len. It doesn't include Eth header, VLAN
   and FCS.
2. Max frame size. It is MTU + the abovementioned. Usually it's
   something like `some power of two - 1`.
3. skb truesize.
   It is: frame size (i.e. 2) + headroom (usually %NET_SKB_PAD when
   !XDP, %XDP_PACKET_HEADROOM otherwise, plus %NET_IP_ALIGN) + tailroom
   (SKB_DATA_ALIGN(sizeof(struct skb_shared_info), see
    SKB_WITH_OVERHEAD() and adjacent macros).

I'm not sure whether your definition is the first or the second... or
maybe third? You had 1522, but then changed to 1920? You must pass the
third to napi_build_skb().
So you should calculate the truesize first, then allocate a frag and
build an skb. Then skb->data will point to the free space with the
length of your max frame size.
And the truesize calculation might be not just a hardcoded value, but an
expression where you add all those head- and tailrooms, so that they
will be calculated depending on the platform's configuration.

Your current don't reserve any space as headroom, so that frames / HW
visible part starts at the very beginning of a frag. It's okay, I mean,
there will be reallocations when the stack needs more headroom, but
definitely not something to kill the system. You could leave it to an
improvement series in the future*.
But it either way *must* include tailroom, e.g.
SKB_DATA_ALIGN(see_above), otherwise your HW might overwrite kernel
structures.

* given that the required HW alignment is 128, I'd just allocate 128
bytes more and then do `skb_reserve(skb, RXBUF_HW_ALIGN` right after
napi_build_skb() to avoid reallocations.

> +

This is also redundant.

> +	if (unlikely(!descr->skb)) {
> +		skb_free_frag(napi_buff);
> +		descr->skb = NULL;
> +		descr->buf_addr = 0;
> +		descr->buf_size = 0;
> +		return -ENOMEM;
> +	}
> +
> +	descr->buf_addr = cpu_to_be32(dma_map_single(dev, napi_buff,
> +		GELIC_NET_MAX_MTU, DMA_FROM_DEVICE));
>  
> -	offset = ((unsigned long)descr->skb->data) &
> -		(GELIC_NET_RXBUF_ALIGN - 1);
> -	if (offset)
> -		skb_reserve(descr->skb, GELIC_NET_RXBUF_ALIGN - offset);
> -	/* io-mmu-map the skb */
> -	descr->buf_addr = cpu_to_be32(dma_map_single(ctodev(card),
> -						     descr->skb->data,
> -						     GELIC_NET_MAX_MTU,
> -						     DMA_FROM_DEVICE));
>  	if (!descr->buf_addr) {
> -		dev_kfree_skb_any(descr->skb);
> +		skb_free_frag(napi_buff);
>  		descr->skb = NULL;
> -		dev_info(ctodev(card),
> -			 "%s:Could not iommu-map rx buffer\n", __func__);
> +		descr->buf_addr = 0;
> +		descr->buf_size = 0;
> +		dev_err_once(dev, "%s:Could not iommu-map rx buffer\n",
> +			__func__);
>  		gelic_descr_set_status(descr, GELIC_DESCR_DMA_NOT_IN_USE);
>  		return -ENOMEM;
> -	} else {
> -		gelic_descr_set_status(descr, GELIC_DESCR_DMA_CARDOWNED);
> -		return 0;
>  	}
> +
> +	gelic_descr_set_status(descr, GELIC_DESCR_DMA_CARDOWNED);
> +	return 0;

An empty newline is preferred before any `return`.

>  }
>  
>  /**
> diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.h b/drivers/net/ethernet/toshiba/ps3_gelic_net.h
> index 68f324ed4eaf..d3868eb7234c 100644
> --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.h
> +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.h
> @@ -19,7 +19,7 @@
>  #define GELIC_NET_RX_DESCRIPTORS        128 /* num of descriptors */
>  #define GELIC_NET_TX_DESCRIPTORS        128 /* num of descriptors */
>  
> -#define GELIC_NET_MAX_MTU               VLAN_ETH_FRAME_LEN
> +#define GELIC_NET_MAX_MTU               1920
>  #define GELIC_NET_MIN_MTU               VLAN_ETH_ZLEN
>  #define GELIC_NET_RXBUF_ALIGN           128
>  #define GELIC_CARD_RX_CSUM_DEFAULT      1 /* hw chksum */

Thanks,
Olek

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net v6 2/2] net/ps3_gelic_net: Use dma_mapping_error
  2023-02-26  2:25 ` [PATCH net v6 2/2] net/ps3_gelic_net: Use dma_mapping_error Geoff Levand
@ 2023-02-28 16:14   ` Alexander Lobakin
  0 siblings, 0 replies; 10+ messages in thread
From: Alexander Lobakin @ 2023-02-28 16:14 UTC (permalink / raw)
  To: Geoff Levand
  Cc: netdev, Jakub Kicinski, David S. Miller, Alexander Duyck, Paolo Abeni

From: Geoff Levand <geoff@infradead.org>
Date: Sun, 26 Feb 2023 02:25:43 +0000

> The current Gelic Etherenet driver was checking the return value of its
> dma_map_single call, and not using the dma_mapping_error() routine.
> 
> Fixes runtime problems like these:
> 
>   DMA-API: ps3_gelic_driver sb_05: device driver failed to check map error
>   WARNING: CPU: 0 PID: 0 at kernel/dma/debug.c:1027 .check_unmap+0x888/0x8dc
> 
> Fixes: 02c1889166b4 ("ps3: gigabit ethernet driver for PS3, take3")
> Signed-off-by: Geoff Levand <geoff@infradead.org>
> ---
>  drivers/net/ethernet/toshiba/ps3_gelic_net.c | 37 ++++++++++----------
>  1 file changed, 19 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
> index 7e12e041acd3..2f7505609447 100644
> --- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c
> +++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
> @@ -309,23 +309,31 @@ static int gelic_card_init_chain(struct gelic_card *card,
>  				 struct gelic_descr_chain *chain,
>  				 struct gelic_descr *start_descr, int no)
>  {
> -	int i;
> +	struct device *dev = ctodev(card);
>  	struct gelic_descr *descr;
> +	int i;
>  
> -	descr = start_descr;
> -	memset(descr, 0, sizeof(*descr) * no);
> +	memset(start_descr, 0, no * sizeof(*start_descr));

If you allocated the descriptors using dma_alloc_coherent(), they're
already cleared, memset() is redundant.

>  
>  	/* set up the hardware pointers in each descriptor */
> -	for (i = 0; i < no; i++, descr++) {
> +	for (i = 0, descr = start_descr; i < no; i++, descr++) {
>  		gelic_descr_set_status(descr, GELIC_DESCR_DMA_NOT_IN_USE);
>  		descr->bus_addr =
>  			dma_map_single(ctodev(card), descr,
>  				       GELIC_DESCR_SIZE,
>  				       DMA_BIDIRECTIONAL);
>  

Redundant newline as well.

> -		if (!descr->bus_addr)
> -			goto iommu_error;
> +		if (dma_mapping_error(dev, descr->bus_addr)) {
> +			dev_err_once(dev, "%s:%d: dma_mapping_error\n",
> +				__func__, __LINE__);
>  
> +			for (i--, descr--; i >= 0; i--, descr--) {
> +				dma_unmap_single(ctodev(card), descr->bus_addr,
> +					GELIC_DESCR_SIZE, DMA_BIDIRECTIONAL);
> +			}
> +			return -ENOMEM;
> +		}
> + 
>  		descr->next = descr + 1;
>  		descr->prev = descr - 1;
>  	}
> @@ -346,14 +354,6 @@ static int gelic_card_init_chain(struct gelic_card *card,
>  	(descr - 1)->next_descr_addr = 0;
>  
>  	return 0;
> -
> -iommu_error:
> -	for (i--, descr--; 0 <= i; i--, descr--)
> -		if (descr->bus_addr)
> -			dma_unmap_single(ctodev(card), descr->bus_addr,
> -					 GELIC_DESCR_SIZE,
> -					 DMA_BIDIRECTIONAL);
> -	return -ENOMEM;
>  }
>  
>  /**
> @@ -407,7 +407,7 @@ static int gelic_descr_prepare_rx(struct gelic_card *card,
>  	descr->buf_addr = cpu_to_be32(dma_map_single(dev, napi_buff,
>  		GELIC_NET_MAX_MTU, DMA_FROM_DEVICE));
>  
> -	if (!descr->buf_addr) {
> +	if (dma_mapping_error(dev, descr->buf_addr)) {
>  		skb_free_frag(napi_buff);
>  		descr->skb = NULL;
>  		descr->buf_addr = 0;
> @@ -773,6 +773,7 @@ static int gelic_descr_prepare_tx(struct gelic_card *card,
>  				  struct gelic_descr *descr,
>  				  struct sk_buff *skb)
>  {
> +	struct device *dev = ctodev(card);
>  	dma_addr_t buf;
>  
>  	if (card->vlan_required) {
> @@ -787,10 +788,10 @@ static int gelic_descr_prepare_tx(struct gelic_card *card,
>  		skb = skb_tmp;
>  	}
>  
> -	buf = dma_map_single(ctodev(card), skb->data, skb->len, DMA_TO_DEVICE);
> +	buf = dma_map_single(dev, skb->data, skb->len, DMA_TO_DEVICE);
>  

(same)

> -	if (!buf) {
> -		dev_err(ctodev(card),
> +	if (dma_mapping_error(dev, buf)) {
> +		dev_err_once(dev,
>  			"dma map 2 failed (%p, %i). Dropping packet\n",
>  			skb->data, skb->len);
>  		return -ENOMEM;
Thanks,
Olek

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net v6 1/2] net/ps3_gelic_net: Fix RX sk_buff length
  2023-02-28 15:47     ` Alexander Lobakin
@ 2023-02-28 20:31       ` Jakub Kicinski
  2023-03-05  2:07         ` Geoff Levand
  0 siblings, 1 reply; 10+ messages in thread
From: Jakub Kicinski @ 2023-02-28 20:31 UTC (permalink / raw)
  To: Alexander Lobakin
  Cc: Geoff Levand, netdev, David S. Miller, Alexander Lobakin,
	Alexander Duyck, Paolo Abeni

On Tue, 28 Feb 2023 16:47:25 +0100 Alexander Lobakin wrote:
> >> +		return -ENOMEM;
> >> +	}  
> > 
> > And generally reshuffling the code.
> > 
> > Once again - please don't do any of that in a bug fix.
> > Describe precisely what the problem is and fix that problem,  
> 
> IIRC the original problem is that the skb linear parts are not always
> aligned to a boundary which this particular HW requires. So initially
> there was something like "allocate len + alignment - 1, then
> PTR_ALIGN()",

Let's focus on where the bug is. At a quick look I'm guessing that 
the bug is that we align the CPU buffer instead of the DMA buffer.
We should pass the entire allocate len + alignment - 1 as length 
to dma_map() and then align the dma_addr. dma_addr is what the device
sees. Not the virtual address of skb->data.

If I'm right the bug is not in fact directly addressed by the patch.
I'm guessing the patch helps because at least the patch passes the
aligned length to the dma_map(), rather than GELIC_NET_MAX_MTU (which
is not a multiple of 128).

> but I said that it's a waste of memory and we shouldn't do
> that, using napi_alloc_frag_align() instead.
> I guess if that would've been described, this could go as a fix? I don't
> think wasting memory is a good fix, even if we need to change the
> allocation scheme...

In general doing such a rewrite may be fine, if the author is an expert
and the commit message is very precise. Here we are at v6 already and
IMHO the problem is not even well understood.

Let's focus on understanding the problem and writing a _minimal_ fix.

The waste of memory claim can't be taken seriously when the MTU define
is bumped by 500 with no mention in the commit msg, as you also noticed.

Minimal fix, please.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net v6 1/2] net/ps3_gelic_net: Fix RX sk_buff length
  2023-02-28 20:31       ` Jakub Kicinski
@ 2023-03-05  2:07         ` Geoff Levand
  0 siblings, 0 replies; 10+ messages in thread
From: Geoff Levand @ 2023-03-05  2:07 UTC (permalink / raw)
  To: Jakub Kicinski, Alexander Lobakin
  Cc: netdev, David S. Miller, Alexander Lobakin, Alexander Duyck, Paolo Abeni

Hi,

On 2/28/23 12:31, Jakub Kicinski wrote:
>>
>> IIRC the original problem is that the skb linear parts are not always
>> aligned to a boundary which this particular HW requires. So initially
>> there was something like "allocate len + alignment - 1, then
>> PTR_ALIGN()",
> 
> Let's focus on where the bug is. At a quick look I'm guessing that 
> the bug is that we align the CPU buffer instead of the DMA buffer.
> We should pass the entire allocate len + alignment - 1 as length 
> to dma_map() and then align the dma_addr. dma_addr is what the device
> sees. Not the virtual address of skb->data.
> 
> If I'm right the bug is not in fact directly addressed by the patch.
> I'm guessing the patch helps because at least the patch passes the
> aligned length to the dma_map(), rather than GELIC_NET_MAX_MTU (which
> is not a multiple of 128).

I found some old notes for the gelic network device.  It had values
for the max frame size, and the max MTU size.  These values are the
same as what is in the 'spider net' driver.  For patch set v7 I just
took what the spider net driver was doing for its RX DMA buffer
allocation and applied that to the gelic driver.

I think your comment about aligning the DMA buffer is addressed by
the lines:

    offset = ((unsigned long)descr->skb->data) &
		(GELIC_NET_RXBUF_ALIGN - 1);
    if (offset)
        skb_reserve(descr->skb, GELIC_NET_RXBUF_ALIGN - offset);

I tried to do some thorough testing of v7, and I couldn't get it to
error.

-Geoff




^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH net v6 1/2] net/ps3_gelic_net: Fix RX sk_buff length
  2023-02-28 16:12   ` Alexander Lobakin
@ 2023-03-26 20:38     ` Geoff Levand
  0 siblings, 0 replies; 10+ messages in thread
From: Geoff Levand @ 2023-03-26 20:38 UTC (permalink / raw)
  To: Alexander Lobakin
  Cc: Jakub Kicinski, netdev, David S. Miller, Alexander Duyck, Paolo Abeni

Hi,

I've gone back to setting up the napi routines.

On 2/28/23 08:12, Alexander Lobakin wrote:
> From: Geoff Levand <geoff@infradead.org>
> Date: Sun, 26 Feb 2023 02:25:42 +0000
> 
>> The Gelic Ethernet device needs to have the RX sk_buffs aligned to
>> GELIC_NET_RXBUF_ALIGN and the length of the RX sk_buffs must be a
>> multiple of GELIC_NET_RXBUF_ALIGN.

>> +
>> +	napi_buff = napi_alloc_frag_align(GELIC_NET_MAX_MTU,
>> +		GELIC_NET_RXBUF_ALIGN);
>> +
>> +	descr->skb = napi_build_skb(napi_buff, GELIC_NET_MAX_MTU);
> 
> You're mixing two, no, three things here.
> 
> 1. MTU. I.e. max L3+ payload len. It doesn't include Eth header, VLAN
>    and FCS.
> 2. Max frame size. It is MTU + the abovementioned. Usually it's
>    something like `some power of two - 1`.
> 3. skb truesize.
>    It is: frame size (i.e. 2) + headroom (usually %NET_SKB_PAD when
>    !XDP, %XDP_PACKET_HEADROOM otherwise, plus %NET_IP_ALIGN) + tailroom
>    (SKB_DATA_ALIGN(sizeof(struct skb_shared_info), see
>     SKB_WITH_OVERHEAD() and adjacent macros).
> 
> I'm not sure whether your definition is the first or the second... or
> maybe third? You had 1522, but then changed to 1920? You must pass the
> third to napi_build_skb().
> So you should calculate the truesize first, then allocate a frag and
> build an skb. Then skb->data will point to the free space with the
> length of your max frame size.
> And the truesize calculation might be not just a hardcoded value, but an
> expression where you add all those head- and tailrooms, so that they
> will be calculated depending on the platform's configuration.
> 
> Your current don't reserve any space as headroom, so that frames / HW
> visible part starts at the very beginning of a frag. It's okay, I mean,
> there will be reallocations when the stack needs more headroom, but
> definitely not something to kill the system. You could leave it to an
> improvement series in the future*.
> But it either way *must* include tailroom, e.g.
> SKB_DATA_ALIGN(see_above), otherwise your HW might overwrite kernel
> structures.
> 
> * given that the required HW alignment is 128, I'd just allocate 128
> bytes more and then do `skb_reserve(skb, RXBUF_HW_ALIGN` right after
> napi_build_skb() to avoid reallocations.

Looking at the docs for the PS3's gelic network device I found
that the DMA buffer it uses has a fixed layout:

  VLAN Data   2 bytes
  Dest MAC    6 bytes
  Source MAC  6 bytes
  Type/Length 2 bytes
  DATA        46-2294 bytes

So, the max DMA buffer size is 2310, which I guess is the same
as the MAX_FRAME size, which is given as 2312.  That's about
18.05*128.  So if the napi_buff size is 19*128 = 2432 and the
start aligned to 128, that should give me what I need:

  #define GELIC_NET_RXBUF_ALIGN 128
  static const unsigned int napi_buff_size = 19 * GELIC_NET_RXBUF_ALIGN;

  napi_buff = napi_alloc_frag_align(napi_buff_size, GELIC_NET_RXBUF_ALIGN);

  descr->skb = napi_build_skb(napi_buff, napi_buff_size);

  cpu_addr = dma_map_single(dev, napi_buff, napi_buff_size, DMA_FROM_DEVICE);

You can find the actual patch here:

  https://git.kernel.org/pub/scm/linux/kernel/git/geoff/ps3-linux.git/commit/?h=ps3-queue-v6.3--gelic-work&id=629de5a5d2875354c5d48fca7f5c1d24f4bf3a8e

I did some rigorous testing with this and didn't have any
problems.

-Geoff




 






^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2023-03-26 20:38 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-26  2:25 [PATCH net v6 0/2] net/ps3_gelic_net: DMA related fixes Geoff Levand
2023-02-26  2:25 ` [PATCH net v6 1/2] net/ps3_gelic_net: Fix RX sk_buff length Geoff Levand
2023-02-28  2:20   ` Jakub Kicinski
2023-02-28 15:47     ` Alexander Lobakin
2023-02-28 20:31       ` Jakub Kicinski
2023-03-05  2:07         ` Geoff Levand
2023-02-28 16:12   ` Alexander Lobakin
2023-03-26 20:38     ` Geoff Levand
2023-02-26  2:25 ` [PATCH net v6 2/2] net/ps3_gelic_net: Use dma_mapping_error Geoff Levand
2023-02-28 16:14   ` Alexander Lobakin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.