All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/4] bgmac: simplify tx ring index handling
@ 2015-04-12 10:08 Felix Fietkau
  2015-04-12 10:08 ` [PATCH 2/4] bgmac: leave interrupts disabled as long as there is work to do Felix Fietkau
                   ` (2 more replies)
  0 siblings, 3 replies; 10+ messages in thread
From: Felix Fietkau @ 2015-04-12 10:08 UTC (permalink / raw)
  To: netdev; +Cc: zajec5, hauke

Keep incrementing ring->start and ring->end instead of pointing it to
the actual ring slot entry. This simplifies the calculation of the
number of free slots.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
---
 drivers/net/ethernet/broadcom/bgmac.c | 44 ++++++++++++++---------------------
 drivers/net/ethernet/broadcom/bgmac.h |  6 ++---
 2 files changed, 20 insertions(+), 30 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index fa8f9e1..73374b4 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -142,11 +142,10 @@ static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
 {
 	struct device *dma_dev = bgmac->core->dma_dev;
 	struct net_device *net_dev = bgmac->net_dev;
-	struct bgmac_slot_info *slot = &ring->slots[ring->end];
-	int free_slots;
+	int index = ring->end % BGMAC_TX_RING_SLOTS;
+	struct bgmac_slot_info *slot = &ring->slots[index];
 	int nr_frags;
 	u32 flags;
-	int index = ring->end;
 	int i;
 
 	if (skb->len > BGMAC_DESC_CTL1_LEN) {
@@ -158,13 +157,7 @@ static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
 		skb_checksum_help(skb);
 
 	nr_frags = skb_shinfo(skb)->nr_frags;
-
-	if (ring->start <= ring->end)
-		free_slots = ring->start - ring->end + BGMAC_TX_RING_SLOTS;
-	else
-		free_slots = ring->start - ring->end;
-
-	if (free_slots <= nr_frags + 1) {
+	if (ring->end - ring->start + nr_frags + 1 >= BGMAC_TX_RING_SLOTS) {
 		bgmac_err(bgmac, "TX ring is full, queue should be stopped!\n");
 		netif_stop_queue(net_dev);
 		return NETDEV_TX_BUSY;
@@ -200,7 +193,7 @@ static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
 	}
 
 	slot->skb = skb;
-
+	ring->end += nr_frags + 1;
 	netdev_sent_queue(net_dev, skb->len);
 
 	wmb();
@@ -208,13 +201,12 @@ static netdev_tx_t bgmac_dma_tx_add(struct bgmac *bgmac,
 	/* Increase ring->end to point empty slot. We tell hardware the first
 	 * slot it should *not* read.
 	 */
-	ring->end = (index + 1) % BGMAC_TX_RING_SLOTS;
 	bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_TX_INDEX,
 		    ring->index_base +
-		    ring->end * sizeof(struct bgmac_dma_desc));
+		    (ring->end % BGMAC_TX_RING_SLOTS) *
+		    sizeof(struct bgmac_dma_desc));
 
-	free_slots -= nr_frags + 1;
-	if (free_slots < 8)
+	if (ring->end - ring->start >= BGMAC_TX_RING_SLOTS - 8)
 		netif_stop_queue(net_dev);
 
 	return NETDEV_TX_OK;
@@ -256,17 +248,17 @@ static void bgmac_dma_tx_free(struct bgmac *bgmac, struct bgmac_dma_ring *ring)
 	empty_slot &= BGMAC_DMA_TX_STATDPTR;
 	empty_slot /= sizeof(struct bgmac_dma_desc);
 
-	while (ring->start != empty_slot) {
-		struct bgmac_slot_info *slot = &ring->slots[ring->start];
-		u32 ctl1 = le32_to_cpu(ring->cpu_base[ring->start].ctl1);
-		int len = ctl1 & BGMAC_DESC_CTL1_LEN;
+	while (ring->start != ring->end) {
+		int slot_idx = ring->start % BGMAC_TX_RING_SLOTS;
+		struct bgmac_slot_info *slot = &ring->slots[slot_idx];
+		u32 ctl1;
+		int len;
 
-		if (!slot->dma_addr) {
-			bgmac_err(bgmac, "Hardware reported transmission for empty TX ring slot %d! End of ring: %d\n",
-				  ring->start, ring->end);
-			goto next;
-		}
+		if (slot_idx == empty_slot)
+			break;
 
+		ctl1 = le32_to_cpu(ring->cpu_base[slot_idx].ctl1);
+		len = ctl1 & BGMAC_DESC_CTL1_LEN;
 		if (ctl1 & BGMAC_DESC_CTL0_SOF)
 			/* Unmap no longer used buffer */
 			dma_unmap_single(dma_dev, slot->dma_addr, len,
@@ -284,10 +276,8 @@ static void bgmac_dma_tx_free(struct bgmac *bgmac, struct bgmac_dma_ring *ring)
 			slot->skb = NULL;
 		}
 
-next:
 		slot->dma_addr = 0;
-		if (++ring->start >= BGMAC_TX_RING_SLOTS)
-			ring->start = 0;
+		ring->start++;
 		freed = true;
 	}
 
diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h
index 3ad965f..5a198d5 100644
--- a/drivers/net/ethernet/broadcom/bgmac.h
+++ b/drivers/net/ethernet/broadcom/bgmac.h
@@ -414,10 +414,10 @@ enum bgmac_dma_ring_type {
  * empty.
  */
 struct bgmac_dma_ring {
-	u16 num_slots;
-	u16 start;
-	u16 end;
+	u32 start;
+	u32 end;
 
+	u16 num_slots;
 	u16 mmio_base;
 	struct bgmac_dma_desc *cpu_base;
 	dma_addr_t dma_base;
-- 
2.2.2

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 2/4] bgmac: leave interrupts disabled as long as there is work to do
  2015-04-12 10:08 [PATCH 1/4] bgmac: simplify tx ring index handling Felix Fietkau
@ 2015-04-12 10:08 ` Felix Fietkau
  2015-04-12 10:08 ` [PATCH 3/4] bgmac: set received skb headroom to NET_SKB_PAD Felix Fietkau
  2015-04-12 10:08 ` [PATCH 4/4] bgmac: fix DMA rx corruption Felix Fietkau
  2 siblings, 0 replies; 10+ messages in thread
From: Felix Fietkau @ 2015-04-12 10:08 UTC (permalink / raw)
  To: netdev; +Cc: zajec5, hauke

Always poll rx and tx during NAPI poll instead of relying on the status
of the first interrupt. This prevents bgmac_poll from leaving unfinished
work around until the next IRQ.
In my tests this makes bridging/routing throughput under heavy load more
stable and ensures that no new IRQs arrive as long as bgmac_poll uses up
the entire budget.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
---
 drivers/net/ethernet/broadcom/bgmac.c | 31 ++++++++++---------------------
 drivers/net/ethernet/broadcom/bgmac.h |  1 -
 2 files changed, 10 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index 73374b4..66876c5 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -1105,8 +1105,6 @@ static void bgmac_chip_reset(struct bgmac *bgmac)
 	bgmac_phy_init(bgmac);
 
 	netdev_reset_queue(bgmac->net_dev);
-
-	bgmac->int_status = 0;
 }
 
 static void bgmac_chip_intrs_on(struct bgmac *bgmac)
@@ -1221,14 +1219,13 @@ static irqreturn_t bgmac_interrupt(int irq, void *dev_id)
 	if (!int_status)
 		return IRQ_NONE;
 
-	/* Ack */
-	bgmac_write(bgmac, BGMAC_INT_STATUS, int_status);
+	int_status &= ~(BGMAC_IS_TX0 | BGMAC_IS_RX);
+	if (int_status)
+		bgmac_err(bgmac, "Unknown IRQs: 0x%08X\n", int_status);
 
 	/* Disable new interrupts until handling existing ones */
 	bgmac_chip_intrs_off(bgmac);
 
-	bgmac->int_status = int_status;
-
 	napi_schedule(&bgmac->napi);
 
 	return IRQ_HANDLED;
@@ -1237,25 +1234,17 @@ static irqreturn_t bgmac_interrupt(int irq, void *dev_id)
 static int bgmac_poll(struct napi_struct *napi, int weight)
 {
 	struct bgmac *bgmac = container_of(napi, struct bgmac, napi);
-	struct bgmac_dma_ring *ring;
 	int handled = 0;
 
-	if (bgmac->int_status & BGMAC_IS_TX0) {
-		ring = &bgmac->tx_ring[0];
-		bgmac_dma_tx_free(bgmac, ring);
-		bgmac->int_status &= ~BGMAC_IS_TX0;
-	}
+	/* Ack */
+	bgmac_write(bgmac, BGMAC_INT_STATUS, ~0);
 
-	if (bgmac->int_status & BGMAC_IS_RX) {
-		ring = &bgmac->rx_ring[0];
-		handled += bgmac_dma_rx_read(bgmac, ring, weight);
-		bgmac->int_status &= ~BGMAC_IS_RX;
-	}
+	bgmac_dma_tx_free(bgmac, &bgmac->tx_ring[0]);
+	handled += bgmac_dma_rx_read(bgmac, &bgmac->rx_ring[0], weight);
 
-	if (bgmac->int_status) {
-		bgmac_err(bgmac, "Unknown IRQs: 0x%08X\n", bgmac->int_status);
-		bgmac->int_status = 0;
-	}
+	/* poll again if more events arrived in the mean time */
+	if (bgmac_read(bgmac, BGMAC_INT_STATUS) & (BGMAC_IS_TX0 | BGMAC_IS_RX))
+		return handled;
 
 	if (handled < weight) {
 		napi_complete(napi);
diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h
index 5a198d5..abd50d1 100644
--- a/drivers/net/ethernet/broadcom/bgmac.h
+++ b/drivers/net/ethernet/broadcom/bgmac.h
@@ -452,7 +452,6 @@ struct bgmac {
 
 	/* Int */
 	u32 int_mask;
-	u32 int_status;
 
 	/* Current MAC state */
 	int mac_speed;
-- 
2.2.2

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 3/4] bgmac: set received skb headroom to NET_SKB_PAD
  2015-04-12 10:08 [PATCH 1/4] bgmac: simplify tx ring index handling Felix Fietkau
  2015-04-12 10:08 ` [PATCH 2/4] bgmac: leave interrupts disabled as long as there is work to do Felix Fietkau
@ 2015-04-12 10:08 ` Felix Fietkau
  2015-04-12 10:08 ` [PATCH 4/4] bgmac: fix DMA rx corruption Felix Fietkau
  2 siblings, 0 replies; 10+ messages in thread
From: Felix Fietkau @ 2015-04-12 10:08 UTC (permalink / raw)
  To: netdev; +Cc: zajec5, hauke

A packet buffer offset of 30 bytes is inefficient, because the first 2
bytes end up in a different cacheline.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
---
 drivers/net/ethernet/broadcom/bgmac.c | 14 ++++++++------
 drivers/net/ethernet/broadcom/bgmac.h |  4 +++-
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index 66876c5..e332de8 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -342,13 +342,13 @@ static int bgmac_dma_rx_skb_for_slot(struct bgmac *bgmac,
 		return -ENOMEM;
 
 	/* Poison - if everything goes fine, hardware will overwrite it */
-	rx = buf;
+	rx = buf + BGMAC_RX_BUF_OFFSET;
 	rx->len = cpu_to_le16(0xdead);
 	rx->flags = cpu_to_le16(0xbeef);
 
 	/* Map skb for the DMA */
-	dma_addr = dma_map_single(dma_dev, buf, BGMAC_RX_BUF_SIZE,
-				  DMA_FROM_DEVICE);
+	dma_addr = dma_map_single(dma_dev, buf + BGMAC_RX_BUF_OFFSET,
+				  BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE);
 	if (dma_mapping_error(dma_dev, dma_addr)) {
 		bgmac_err(bgmac, "DMA mapping error\n");
 		put_page(virt_to_head_page(buf));
@@ -399,7 +399,7 @@ static int bgmac_dma_rx_read(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
 	while (ring->start != ring->end) {
 		struct device *dma_dev = bgmac->core->dma_dev;
 		struct bgmac_slot_info *slot = &ring->slots[ring->start];
-		struct bgmac_rx_header *rx = slot->buf;
+		struct bgmac_rx_header *rx = slot->buf + BGMAC_RX_BUF_OFFSET;
 		struct sk_buff *skb;
 		void *buf = slot->buf;
 		u16 len, flags;
@@ -450,8 +450,10 @@ static int bgmac_dma_rx_read(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
 					 BGMAC_RX_BUF_SIZE, DMA_FROM_DEVICE);
 
 			skb = build_skb(buf, BGMAC_RX_ALLOC_SIZE);
-			skb_put(skb, BGMAC_RX_FRAME_OFFSET + len);
-			skb_pull(skb, BGMAC_RX_FRAME_OFFSET);
+			skb_put(skb, BGMAC_RX_FRAME_OFFSET +
+				BGMAC_RX_BUF_OFFSET + len);
+			skb_pull(skb, BGMAC_RX_FRAME_OFFSET +
+				 BGMAC_RX_BUF_OFFSET);
 
 			skb_checksum_none_assert(skb);
 			skb->protocol = eth_type_trans(skb, bgmac->net_dev);
diff --git a/drivers/net/ethernet/broadcom/bgmac.h b/drivers/net/ethernet/broadcom/bgmac.h
index abd50d1..3db0d52 100644
--- a/drivers/net/ethernet/broadcom/bgmac.h
+++ b/drivers/net/ethernet/broadcom/bgmac.h
@@ -360,9 +360,11 @@
 
 #define BGMAC_RX_HEADER_LEN			28		/* Last 24 bytes are unused. Well... */
 #define BGMAC_RX_FRAME_OFFSET			30		/* There are 2 unused bytes between header and real data */
+#define BGMAC_RX_BUF_OFFSET			(NET_SKB_PAD + NET_IP_ALIGN - \
+						 BGMAC_RX_FRAME_OFFSET)
 #define BGMAC_RX_MAX_FRAME_SIZE			1536		/* Copied from b44/tg3 */
 #define BGMAC_RX_BUF_SIZE			(BGMAC_RX_FRAME_OFFSET + BGMAC_RX_MAX_FRAME_SIZE)
-#define BGMAC_RX_ALLOC_SIZE			(SKB_DATA_ALIGN(BGMAC_RX_BUF_SIZE) + \
+#define BGMAC_RX_ALLOC_SIZE			(SKB_DATA_ALIGN(BGMAC_RX_BUF_SIZE + BGMAC_RX_BUF_OFFSET) + \
 						 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
 
 #define BGMAC_BFL_ENETROBO			0x0010		/* has ephy roboswitch spi */
-- 
2.2.2

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* [PATCH 4/4] bgmac: fix DMA rx corruption
  2015-04-12 10:08 [PATCH 1/4] bgmac: simplify tx ring index handling Felix Fietkau
  2015-04-12 10:08 ` [PATCH 2/4] bgmac: leave interrupts disabled as long as there is work to do Felix Fietkau
  2015-04-12 10:08 ` [PATCH 3/4] bgmac: set received skb headroom to NET_SKB_PAD Felix Fietkau
@ 2015-04-12 10:08 ` Felix Fietkau
  2015-04-12 10:31   ` Eric Dumazet
                     ` (2 more replies)
  2 siblings, 3 replies; 10+ messages in thread
From: Felix Fietkau @ 2015-04-12 10:08 UTC (permalink / raw)
  To: netdev; +Cc: zajec5, hauke

The driver needs to inform the hardware about the first invalid (not yet
filled) rx slot, by writing its DMA descriptor pointer offset to the
BGMAC_DMA_RX_INDEX register.

This register was set to a value exceeding the rx ring size, effectively
allowing the hardware constant access to the full ring, regardless of
which slots are initialized.

Fix this by updating the register in bgmac_dma_rx_setup_desc.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
---
 drivers/net/ethernet/broadcom/bgmac.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
index e332de8..856ceee 100644
--- a/drivers/net/ethernet/broadcom/bgmac.c
+++ b/drivers/net/ethernet/broadcom/bgmac.c
@@ -380,6 +380,12 @@ static void bgmac_dma_rx_setup_desc(struct bgmac *bgmac,
 	dma_desc->addr_high = cpu_to_le32(upper_32_bits(ring->slots[desc_idx].dma_addr));
 	dma_desc->ctl0 = cpu_to_le32(ctl0);
 	dma_desc->ctl1 = cpu_to_le32(ctl1);
+
+	desc_idx = (desc_idx + 1) % BGMAC_RX_RING_SLOTS;
+
+	bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_INDEX,
+			ring->index_base +
+			desc_idx * sizeof(struct bgmac_dma_desc));
 }
 
 static int bgmac_dma_rx_read(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
@@ -394,9 +400,7 @@ static int bgmac_dma_rx_read(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
 	end_slot &= BGMAC_DMA_RX_STATDPTR;
 	end_slot /= sizeof(struct bgmac_dma_desc);
 
-	ring->end = end_slot;
-
-	while (ring->start != ring->end) {
+	while (ring->start != end_slot) {
 		struct device *dma_dev = bgmac->core->dma_dev;
 		struct bgmac_slot_info *slot = &ring->slots[ring->start];
 		struct bgmac_rx_header *rx = slot->buf + BGMAC_RX_BUF_OFFSET;
@@ -693,10 +697,6 @@ static void bgmac_dma_init(struct bgmac *bgmac)
 		for (j = 0; j < ring->num_slots; j++)
 			bgmac_dma_rx_setup_desc(bgmac, ring, j);
 
-		bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_INDEX,
-			    ring->index_base +
-			    ring->num_slots * sizeof(struct bgmac_dma_desc));
-
 		ring->start = 0;
 		ring->end = 0;
 	}
-- 
2.2.2

^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH 4/4] bgmac: fix DMA rx corruption
  2015-04-12 10:08 ` [PATCH 4/4] bgmac: fix DMA rx corruption Felix Fietkau
@ 2015-04-12 10:31   ` Eric Dumazet
  2015-04-12 10:43     ` Felix Fietkau
  2015-04-12 10:31   ` Jakub Kiciński
  2015-04-12 11:11   ` Sergei Shtylyov
  2 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2015-04-12 10:31 UTC (permalink / raw)
  To: Felix Fietkau; +Cc: netdev, zajec5, hauke

On Sun, 2015-04-12 at 12:08 +0200, Felix Fietkau wrote:
> The driver needs to inform the hardware about the first invalid (not yet
> filled) rx slot, by writing its DMA descriptor pointer offset to the
> BGMAC_DMA_RX_INDEX register.
> 
> This register was set to a value exceeding the rx ring size, effectively
> allowing the hardware constant access to the full ring, regardless of
> which slots are initialized.
> 
> Fix this by updating the register in bgmac_dma_rx_setup_desc.
> 
> Signed-off-by: Felix Fietkau <nbd@openwrt.org>
> ---
>  drivers/net/ethernet/broadcom/bgmac.c | 14 +++++++-------
>  1 file changed, 7 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
> index e332de8..856ceee 100644
> --- a/drivers/net/ethernet/broadcom/bgmac.c
> +++ b/drivers/net/ethernet/broadcom/bgmac.c
> @@ -380,6 +380,12 @@ static void bgmac_dma_rx_setup_desc(struct bgmac *bgmac,
>  	dma_desc->addr_high = cpu_to_le32(upper_32_bits(ring->slots[desc_idx].dma_addr));
>  	dma_desc->ctl0 = cpu_to_le32(ctl0);
>  	dma_desc->ctl1 = cpu_to_le32(ctl1);
> +
> +	desc_idx = (desc_idx + 1) % BGMAC_RX_RING_SLOTS;
> +
> +	bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_INDEX,
> +			ring->index_base +
> +			desc_idx * sizeof(struct bgmac_dma_desc));
>  }
>  
>  static int bgmac_dma_rx_read(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
> @@ -394,9 +400,7 @@ static int bgmac_dma_rx_read(struct bgmac *bgmac, struct bgmac_dma_ring *ring,
>  	end_slot &= BGMAC_DMA_RX_STATDPTR;
>  	end_slot /= sizeof(struct bgmac_dma_desc);
>  
> -	ring->end = end_slot;
> -
> -	while (ring->start != ring->end) {
> +	while (ring->start != end_slot) {
>  		struct device *dma_dev = bgmac->core->dma_dev;
>  		struct bgmac_slot_info *slot = &ring->slots[ring->start];
>  		struct bgmac_rx_header *rx = slot->buf + BGMAC_RX_BUF_OFFSET;
> @@ -693,10 +697,6 @@ static void bgmac_dma_init(struct bgmac *bgmac)
>  		for (j = 0; j < ring->num_slots; j++)
>  			bgmac_dma_rx_setup_desc(bgmac, ring, j);
>  

Missing dma_wmb() here  (or legacy wmb() for stable kernels)

> -		bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_INDEX,
> -			    ring->index_base +
> -			    ring->num_slots * sizeof(struct bgmac_dma_desc));
> -


This might be better for performance to perform one single bgmac_write()
at the end of bgmac_dma_rx_read(), and leave this one in place as well,
not for performance since this is slow path, but correctness.

Also I am surprised there is no memory barrier to make sure changes to
dma_desc are committed to memory ?

I would use dma_wmb() before bgmac_write(), like the wmb() done in
bgmac_dma_tx_add()

Thanks

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4/4] bgmac: fix DMA rx corruption
  2015-04-12 10:08 ` [PATCH 4/4] bgmac: fix DMA rx corruption Felix Fietkau
  2015-04-12 10:31   ` Eric Dumazet
@ 2015-04-12 10:31   ` Jakub Kiciński
  2015-04-12 11:11   ` Sergei Shtylyov
  2 siblings, 0 replies; 10+ messages in thread
From: Jakub Kiciński @ 2015-04-12 10:31 UTC (permalink / raw)
  To: Felix Fietkau; +Cc: netdev, zajec5, hauke

On Sun, 12 Apr 2015 12:08:12 +0200, Felix Fietkau wrote:
> The driver needs to inform the hardware about the first invalid (not yet
> filled) rx slot, by writing its DMA descriptor pointer offset to the
> BGMAC_DMA_RX_INDEX register.
> 
> This register was set to a value exceeding the rx ring size, effectively
> allowing the hardware constant access to the full ring, regardless of
> which slots are initialized.
> 
> Fix this by updating the register in bgmac_dma_rx_setup_desc.
> 
> Signed-off-by: Felix Fietkau <nbd@openwrt.org>
> ---
>  drivers/net/ethernet/broadcom/bgmac.c | 14 +++++++-------
>  1 file changed, 7 insertions(+), 7 deletions(-)
> 
> diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
> index e332de8..856ceee 100644
> --- a/drivers/net/ethernet/broadcom/bgmac.c
> +++ b/drivers/net/ethernet/broadcom/bgmac.c
> @@ -380,6 +380,12 @@ static void bgmac_dma_rx_setup_desc(struct bgmac *bgmac,
>  	dma_desc->addr_high = cpu_to_le32(upper_32_bits(ring->slots[desc_idx].dma_addr));
>  	dma_desc->ctl0 = cpu_to_le32(ctl0);
>  	dma_desc->ctl1 = cpu_to_le32(ctl1);
> +
> +	desc_idx = (desc_idx + 1) % BGMAC_RX_RING_SLOTS;
> +
> +	bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_INDEX,
> +			ring->index_base +
> +			desc_idx * sizeof(struct bgmac_dma_desc));
>  }

Newb question if I may: why is no mem barrier necessary in this case?

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4/4] bgmac: fix DMA rx corruption
  2015-04-12 10:31   ` Eric Dumazet
@ 2015-04-12 10:43     ` Felix Fietkau
  2015-04-12 17:24       ` Eric Dumazet
  0 siblings, 1 reply; 10+ messages in thread
From: Felix Fietkau @ 2015-04-12 10:43 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev, zajec5, hauke

On 2015-04-12 12:31, Eric Dumazet wrote:
> On Sun, 2015-04-12 at 12:08 +0200, Felix Fietkau wrote:
>> The driver needs to inform the hardware about the first invalid (not yet
>> filled) rx slot, by writing its DMA descriptor pointer offset to the
>> BGMAC_DMA_RX_INDEX register.
>> 
>> This register was set to a value exceeding the rx ring size, effectively
>> allowing the hardware constant access to the full ring, regardless of
>> which slots are initialized.
>> 
>> Fix this by updating the register in bgmac_dma_rx_setup_desc.
>> 
>> Signed-off-by: Felix Fietkau <nbd@openwrt.org>
>> ---
>>  drivers/net/ethernet/broadcom/bgmac.c | 14 +++++++-------
>>  1 file changed, 7 insertions(+), 7 deletions(-)
>> 
>> diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
>> index e332de8..856ceee 100644
>> --- a/drivers/net/ethernet/broadcom/bgmac.c
>> +++ b/drivers/net/ethernet/broadcom/bgmac.c
>> @@ -693,10 +697,6 @@ static void bgmac_dma_init(struct bgmac *bgmac)
>>  		for (j = 0; j < ring->num_slots; j++)
>>  			bgmac_dma_rx_setup_desc(bgmac, ring, j);
>>  
> 
> Missing dma_wmb() here  (or legacy wmb() for stable kernels)
Right, thanks.

>> -		bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_INDEX,
>> -			    ring->index_base +
>> -			    ring->num_slots * sizeof(struct bgmac_dma_desc));
>> -
> 
> 
> This might be better for performance to perform one single bgmac_write()
> at the end of bgmac_dma_rx_read(), and leave this one in place as well,
> not for performance since this is slow path, but correctness.
I intentionally made it write this field for every slot update, because
it might potentially allow the hardware to push frames faster when under
pressure. The CPU isn't fast enough to handle gigabit speeds anyway.

And by the way, the value that is written in this removed call is quite
wrong, so I'd rather just remove it instead of leaving a duplicate here.

> Also I am surprised there is no memory barrier to make sure changes to
> dma_desc are committed to memory ?
> 
> I would use dma_wmb() before bgmac_write(), like the wmb() done in
> bgmac_dma_tx_add()
Will add that.

- Felix

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4/4] bgmac: fix DMA rx corruption
  2015-04-12 10:08 ` [PATCH 4/4] bgmac: fix DMA rx corruption Felix Fietkau
  2015-04-12 10:31   ` Eric Dumazet
  2015-04-12 10:31   ` Jakub Kiciński
@ 2015-04-12 11:11   ` Sergei Shtylyov
  2 siblings, 0 replies; 10+ messages in thread
From: Sergei Shtylyov @ 2015-04-12 11:11 UTC (permalink / raw)
  To: Felix Fietkau, netdev; +Cc: zajec5, hauke

Hello.

On 4/12/2015 1:08 PM, Felix Fietkau wrote:

> The driver needs to inform the hardware about the first invalid (not yet
> filled) rx slot, by writing its DMA descriptor pointer offset to the
> BGMAC_DMA_RX_INDEX register.

> This register was set to a value exceeding the rx ring size, effectively
> allowing the hardware constant access to the full ring, regardless of
> which slots are initialized.

> Fix this by updating the register in bgmac_dma_rx_setup_desc.

> Signed-off-by: Felix Fietkau <nbd@openwrt.org>
> ---
>   drivers/net/ethernet/broadcom/bgmac.c | 14 +++++++-------
>   1 file changed, 7 insertions(+), 7 deletions(-)

> diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c
> index e332de8..856ceee 100644
> --- a/drivers/net/ethernet/broadcom/bgmac.c
> +++ b/drivers/net/ethernet/broadcom/bgmac.c
> @@ -380,6 +380,12 @@ static void bgmac_dma_rx_setup_desc(struct bgmac *bgmac,
>   	dma_desc->addr_high = cpu_to_le32(upper_32_bits(ring->slots[desc_idx].dma_addr));
>   	dma_desc->ctl0 = cpu_to_le32(ctl0);
>   	dma_desc->ctl1 = cpu_to_le32(ctl1);
> +
> +	desc_idx = (desc_idx + 1) % BGMAC_RX_RING_SLOTS;
> +
> +	bgmac_write(bgmac, ring->mmio_base + BGMAC_DMA_RX_INDEX,
> +			ring->index_base +
> +			desc_idx * sizeof(struct bgmac_dma_desc));

    The continuation lines should start on the next column after ( on the 
first line.

[...]

WBR, Sergei

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4/4] bgmac: fix DMA rx corruption
  2015-04-12 10:43     ` Felix Fietkau
@ 2015-04-12 17:24       ` Eric Dumazet
  2015-04-12 17:28         ` Felix Fietkau
  0 siblings, 1 reply; 10+ messages in thread
From: Eric Dumazet @ 2015-04-12 17:24 UTC (permalink / raw)
  To: Felix Fietkau; +Cc: netdev, zajec5, hauke

On Sun, 2015-04-12 at 12:43 +0200, Felix Fietkau wrote:
> On 2015-04-12 12:31, Eric Dumazet wrote:

> > This might be better for performance to perform one single bgmac_write()
> > at the end of bgmac_dma_rx_read(), and leave this one in place as well,
> > not for performance since this is slow path, but correctness.
> I intentionally made it write this field for every slot update, because
> it might potentially allow the hardware to push frames faster when under
> pressure. The CPU isn't fast enough to handle gigabit speeds anyway.

If CPU is not fast enough, then it makes sense to optimize this part,
and save cpu cycles for actual processing.

You know, even a fast cpu is not able to keep up at 40Gbits.

You have a clear opportunity to have batching right there, take it.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH 4/4] bgmac: fix DMA rx corruption
  2015-04-12 17:24       ` Eric Dumazet
@ 2015-04-12 17:28         ` Felix Fietkau
  0 siblings, 0 replies; 10+ messages in thread
From: Felix Fietkau @ 2015-04-12 17:28 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev, zajec5, hauke

On 2015-04-12 19:24, Eric Dumazet wrote:
> On Sun, 2015-04-12 at 12:43 +0200, Felix Fietkau wrote:
>> On 2015-04-12 12:31, Eric Dumazet wrote:
> 
>> > This might be better for performance to perform one single bgmac_write()
>> > at the end of bgmac_dma_rx_read(), and leave this one in place as well,
>> > not for performance since this is slow path, but correctness.
>> I intentionally made it write this field for every slot update, because
>> it might potentially allow the hardware to push frames faster when under
>> pressure. The CPU isn't fast enough to handle gigabit speeds anyway.
> 
> If CPU is not fast enough, then it makes sense to optimize this part,
> and save cpu cycles for actual processing.
> 
> You know, even a fast cpu is not able to keep up at 40Gbits.
> 
> You have a clear opportunity to have batching right there, take it.
Already did that in v2. :)

- Felix

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2015-04-12 17:28 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-04-12 10:08 [PATCH 1/4] bgmac: simplify tx ring index handling Felix Fietkau
2015-04-12 10:08 ` [PATCH 2/4] bgmac: leave interrupts disabled as long as there is work to do Felix Fietkau
2015-04-12 10:08 ` [PATCH 3/4] bgmac: set received skb headroom to NET_SKB_PAD Felix Fietkau
2015-04-12 10:08 ` [PATCH 4/4] bgmac: fix DMA rx corruption Felix Fietkau
2015-04-12 10:31   ` Eric Dumazet
2015-04-12 10:43     ` Felix Fietkau
2015-04-12 17:24       ` Eric Dumazet
2015-04-12 17:28         ` Felix Fietkau
2015-04-12 10:31   ` Jakub Kiciński
2015-04-12 11:11   ` Sergei Shtylyov

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.