All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 04/46] net/wireless: p54: remove useless dma_sync_single_for_device(DMA_FROM_DEVICE)
  2011-07-11  0:52 [PATCH v2 00/46] Clean up RX copybreak and DMA handling Michał Mirosław
                   ` (4 preceding siblings ...)
  2011-07-11  0:52 ` [PATCH v2 03/46] net drivers: remove unnecessary dma_sync_to_device(DMA_FROM_DEVICE) Michał Mirosław
@ 2011-07-11  0:52 ` Michał Mirosław
  2011-07-11 15:15     ` Pavel Roskin
  2011-07-12  4:50     ` Felix Fietkau
  2011-07-11  0:52   ` Michał Mirosław
                   ` (11 subsequent siblings)
  17 siblings, 2 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-11  0:52 UTC (permalink / raw)
  To: netdev; +Cc: Christian Lamparter, John W. Linville, linux-wireless

Also constify pointers used in frame parsers to verify assumptions.

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
---
 drivers/net/wireless/p54/p54pci.c |    2 --
 drivers/net/wireless/p54/txrx.c   |   22 +++++++++++-----------
 2 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/drivers/net/wireless/p54/p54pci.c b/drivers/net/wireless/p54/p54pci.c
index 1b75317..4491d33 100644
--- a/drivers/net/wireless/p54/p54pci.c
+++ b/drivers/net/wireless/p54/p54pci.c
@@ -229,8 +229,6 @@ static void p54p_check_rx_ring(struct ieee80211_hw *dev, u32 *index,
 			desc->host_addr = cpu_to_le32(0);
 		} else {
 			skb_trim(skb, 0);
-			pci_dma_sync_single_for_device(priv->pdev, dma_addr,
-				priv->common.rx_mtu + 32, PCI_DMA_FROMDEVICE);
 			desc->len = cpu_to_le16(priv->common.rx_mtu + 32);
 		}
 
diff --git a/drivers/net/wireless/p54/txrx.c b/drivers/net/wireless/p54/txrx.c
index 042842e..b7ecd89 100644
--- a/drivers/net/wireless/p54/txrx.c
+++ b/drivers/net/wireless/p54/txrx.c
@@ -325,7 +325,7 @@ static void p54_pspoll_workaround(struct p54_common *priv, struct sk_buff *skb)
 
 static int p54_rx_data(struct p54_common *priv, struct sk_buff *skb)
 {
-	struct p54_rx_data *hdr = (struct p54_rx_data *) skb->data;
+	const struct p54_rx_data *hdr = (void *)skb->data;
 	struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb);
 	u16 freq = le16_to_cpu(hdr->freq);
 	size_t header_len = sizeof(*hdr);
@@ -387,8 +387,8 @@ static int p54_rx_data(struct p54_common *priv, struct sk_buff *skb)
 
 static void p54_rx_frame_sent(struct p54_common *priv, struct sk_buff *skb)
 {
-	struct p54_hdr *hdr = (struct p54_hdr *) skb->data;
-	struct p54_frame_sent *payload = (struct p54_frame_sent *) hdr->data;
+	const struct p54_hdr *hdr = (void *)skb->data;
+	const struct p54_frame_sent *payload = (void *)hdr->data;
 	struct ieee80211_tx_info *info;
 	struct p54_hdr *entry_hdr;
 	struct p54_tx_data *entry_data;
@@ -481,8 +481,8 @@ static void p54_rx_frame_sent(struct p54_common *priv, struct sk_buff *skb)
 static void p54_rx_eeprom_readback(struct p54_common *priv,
 				   struct sk_buff *skb)
 {
-	struct p54_hdr *hdr = (struct p54_hdr *) skb->data;
-	struct p54_eeprom_lm86 *eeprom = (struct p54_eeprom_lm86 *) hdr->data;
+	const struct p54_hdr *hdr = (void *)skb->data;
+	const struct p54_eeprom_lm86 *eeprom = (void *)hdr->data;
 	struct sk_buff *tmp;
 
 	if (!priv->eeprom)
@@ -504,8 +504,8 @@ static void p54_rx_eeprom_readback(struct p54_common *priv,
 
 static void p54_rx_stats(struct p54_common *priv, struct sk_buff *skb)
 {
-	struct p54_hdr *hdr = (struct p54_hdr *) skb->data;
-	struct p54_statistics *stats = (struct p54_statistics *) hdr->data;
+	const struct p54_hdr *hdr = (void *)skb->data;
+	const struct p54_statistics *stats = (void *)hdr->data;
 	struct sk_buff *tmp;
 	u32 tsf32;
 
@@ -529,8 +529,8 @@ static void p54_rx_stats(struct p54_common *priv, struct sk_buff *skb)
 
 static void p54_rx_trap(struct p54_common *priv, struct sk_buff *skb)
 {
-	struct p54_hdr *hdr = (struct p54_hdr *) skb->data;
-	struct p54_trap *trap = (struct p54_trap *) hdr->data;
+	const struct p54_hdr *hdr = (void *)skb->data;
+	const struct p54_trap *trap = (void *)hdr->data;
 	u16 event = le16_to_cpu(trap->event);
 	u16 freq = le16_to_cpu(trap->frequency);
 
@@ -565,7 +565,7 @@ static void p54_rx_trap(struct p54_common *priv, struct sk_buff *skb)
 
 static int p54_rx_control(struct p54_common *priv, struct sk_buff *skb)
 {
-	struct p54_hdr *hdr = (struct p54_hdr *) skb->data;
+	const struct p54_hdr *hdr = (void *)skb->data;
 
 	switch (le16_to_cpu(hdr->type)) {
 	case P54_CONTROL_TYPE_TXDONE:
@@ -595,7 +595,7 @@ static int p54_rx_control(struct p54_common *priv, struct sk_buff *skb)
 int p54_rx(struct ieee80211_hw *dev, struct sk_buff *skb)
 {
 	struct p54_common *priv = dev->priv;
-	u16 type = le16_to_cpu(*((__le16 *)skb->data));
+	u16 type = le16_to_cpu(*(const __le16 *)skb->data);
 
 	if (type & P54_HDR_FLAG_CONTROL)
 		return p54_rx_control(priv, skb);
-- 
1.7.5.4


^ permalink raw reply related	[flat|nested] 76+ messages in thread

* [PATCH v2 02/46] net: wrap common patterns of rx handler code
  2011-07-11  0:52 [PATCH v2 00/46] Clean up RX copybreak and DMA handling Michał Mirosław
@ 2011-07-11  0:52 ` Michał Mirosław
  2011-07-11  0:52 ` [PATCH v2 05/46] net: bnx2x: fix DMA sync direction Michał Mirosław
                   ` (16 subsequent siblings)
  17 siblings, 0 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-11  0:52 UTC (permalink / raw)
  To: netdev

Introduce dev_skb_finish_rx_dma() and dev_skb_finish_rx_dma_refill() ---
two common patterns for rx handling as seen in various network drivers
that implement rx_copybreak idea (copying smaller packets, passing larger
ones in original skb).

The common pattern (as implemented in dev_skb_finish_rx_dma()) is:

if (packet len < threshold)
       allocate new, smaller skb
       sync DMA buffer to cpu
       copy packet's data
       give DMA buffer back to device
       pass new skb
       reuse buffer in rx ring
else (or if skb alloc for copy failed)
       unmap DMA buffer
       pass skb
       remove buffer from rx ring
       [refill rx ring later]

This scheme is modified by some drivers to immediately refill rx slot before
passing original rx skb up the stack. Those drivers have also a problem that
they drop packets from the head of the queue when that allocation fails. This
forces unnecessary retransmits and can deadlock if the device is used for
swapping over network.  To mark this case, dev_skb_finish_rx_dma_refill()
implementing it, is marked as deprecated to encourage driver maintainers to
look into the matter.

Those functions are called from rx handler hot path and have a lot of arguments,
and so are inlined. This should allow compiler to better optimize the code with
calling code.

v2:
 - remove unnecessary dma_sync_single_for_device()
   [See: DMA-API.txt, part. Id, DMA_FROM_DEVICE description]
 - check dma_mapping_error() in dev_skb_finish_rx_dma_refill()
 - handle RX_OFFSET (padding inserted by hardware before packet's data)

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
---
 include/linux/skbuff.h |  142 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 142 insertions(+), 0 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c873897..bf51006 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -29,6 +29,7 @@
 #include <linux/rcupdate.h>
 #include <linux/dmaengine.h>
 #include <linux/hrtimer.h>
+#include <linux/dma-mapping.h>
 
 /* Don't change this without changing skb_csum_unnecessary! */
 #define CHECKSUM_NONE 0
@@ -2310,5 +2311,146 @@ static inline void skb_checksum_none_assert(struct sk_buff *skb)
 
 bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off);
 
+/**
+ * __dev_skb_finish_rx_dma - finish skb after DMA'd RX
+ * @skb: skb to finish
+ * @len: packet data length
+ * @copybreak: maximum packet size to copy
+ * @rx_offset: count of bytes prepended to packet's data by hardware
+ * @dma_dev: device used for DMA
+ * @dma_buf: DMA mapping address
+ * @dma_len: DMA mapping length
+ *
+ * This function finishes DMA mapping (sync for copied, unmap otherwise) for
+ * a packet and copies it to new skb if its size is at or below @copybreak
+ * threshold.
+ *
+ * Returns new skb or NULL if the copy wasn't made.
+ */
+static inline struct sk_buff *__dev_skb_finish_rx_dma(struct sk_buff *skb,
+	unsigned int len, unsigned int copybreak, unsigned int rx_offset,
+	struct device *dma_dev, dma_addr_t dma_buf, size_t dma_len)
+{
+	if (len < copybreak) {
+		struct sk_buff *skb2 = netdev_alloc_skb_ip_align(skb->dev, len);
+		if (likely(skb2)) {
+			dma_sync_single_for_cpu(dma_dev, dma_buf,
+				len + rx_offset, DMA_FROM_DEVICE);
+			skb_copy_to_linear_data(skb2, skb->data, len);
+			return skb2;
+		}
+	}
+
+	/* else or copy failed */
+
+	dma_unmap_single(dma_dev, dma_buf, dma_len, DMA_FROM_DEVICE);
+	return NULL;
+}
+
+/**
+ * dev_skb_finish_rx_dma - finish skb after DMA'd RX
+ * @pskb: pointer to variable holding skb to finish
+ * @len: packet data length
+ * @copybreak: maximum packet size to copy
+ * @rx_offset: count of bytes prepended to packet's data by hardware
+ * @dma_dev: device used for DMA
+ * @dma_buf: DMA mapping address
+ * @dma_len: DMA mapping length
+ *
+ * This function finishes DMA mapping (sync for copied, unmap otherwise) for
+ * a packet and copies it to new skb if its size is at or below @copybreak
+ * threshold.  Like __dev_skb_finish_rx_dma().
+ *
+ * Returns the skb - old or copied. *pskb is cleared if the skb wasn't copied.
+ */
+static inline struct sk_buff *dev_skb_finish_rx_dma(struct sk_buff **pskb,
+	unsigned int len, unsigned int copybreak, unsigned int rx_offset,
+	struct device *dma_dev, dma_addr_t dma_buf, size_t dma_len)
+{
+	struct sk_buff *skb2;
+
+	skb2 = __dev_skb_finish_rx_dma(*pskb, len, copybreak, rx_offset,
+		dma_dev, dma_buf, dma_len);
+
+	if (!skb2) {
+		/* not copied */
+		skb2 = *pskb;
+		*pskb = NULL;
+	}
+
+	skb_put(skb2, len);
+	return skb2;
+}
+
+/**
+ * dev_skb_finish_rx_dma_refill - finish skb after DMA'd RX and refill the slot
+ * @pskb: pointer to variable holding skb to finish
+ * @len: packet data length
+ * @copybreak: maximum packet size to copy
+ * @ip_align: new skb's alignment offset
+ * @rx_offset: count of bytes prepended by HW before packet's data
+ * @dma_dev: device used for DMA
+ * @dma_buf: DMA mapping address
+ * @dma_len: DMA mapping length
+ * @dma_align: required DMA buffer alignment for new skbs
+ *
+ * This function finishes DMA mapping (sync for copied, unmap otherwise) for
+ * a packet and copies it to new skb if its size is at or below @copybreak
+ * threshold.  Like __dev_skb_finish_rx_dma().
+ *
+ * *pskb is filled with new mapped skb if the skb wasn't copied.
+ * Returns the skb - old or copied, or NULL if refill failed.
+ *
+ * NOTE:
+ * This will effectively drop the packet in case of memory pressure. This
+ * might not be wanted when swapping over network. It's better to throttle
+ * the receiver queue (refill later) as the packet might be needed to
+ * reclaim some memory.
+ */
+static inline __deprecated struct sk_buff *dev_skb_finish_rx_dma_refill(
+	struct sk_buff **pskb, unsigned int len, unsigned int copybreak,
+	unsigned int ip_align, unsigned int rx_offset,
+	struct device *dma_dev, dma_addr_t *dma_buf, size_t dma_len,
+	size_t dma_align)
+{
+	struct sk_buff *skb;
+
+	skb = __dev_skb_finish_rx_dma(*pskb, len, copybreak, rx_offset,
+		dma_dev, *dma_buf, dma_len);
+
+	if (!skb) {
+		/* not copied */
+		skb = *pskb;
+		/* netdev_alloc_skb_ip_align() */
+		*pskb = __netdev_alloc_skb_aligned(skb->dev, dma_len + ip_align,
+			dma_align, GFP_ATOMIC);
+		if (likely(*pskb))
+			skb_reserve(*pskb, ip_align + rx_offset);
+		else {
+			/* no memory - drop packet */
+			*pskb = skb;
+			skb = NULL;
+		}
+
+		*dma_buf = dma_map_single(dma_dev, (*pskb)->data - rx_offset,
+			dma_len, DMA_FROM_DEVICE);
+		if (dma_mapping_error(dma_dev, *dma_buf)) {
+			BUG_ON(!skb);	/* caller can't handle this case */
+			kfree_skb(*pskb);
+			*pskb = skb;
+			skb = NULL;
+			*dma_buf = dma_map_single(dma_dev,
+				(*pskb)->data - rx_offset, dma_len,
+				DMA_FROM_DEVICE);
+			BUG_ON(dma_mapping_error(dma_dev, *dma_buf));
+		}
+	}
+
+	if (likely(skb))
+		skb_put(skb, len);
+
+	return skb;
+}
+
 #endif	/* __KERNEL__ */
 #endif	/* _LINUX_SKBUFF_H */
-- 
1.7.5.4


^ permalink raw reply related	[flat|nested] 76+ messages in thread

* [PATCH v2 03/46] net drivers: remove unnecessary dma_sync_to_device(DMA_FROM_DEVICE)
  2011-07-11  0:52 [PATCH v2 00/46] Clean up RX copybreak and DMA handling Michał Mirosław
                   ` (3 preceding siblings ...)
  2011-07-11  0:52 ` [PATCH v2 01/46] net: introduce __netdev_alloc_skb_aligned() Michał Mirosław
@ 2011-07-11  0:52 ` Michał Mirosław
  2011-07-11  8:30   ` Vlad Zolotarov
  2011-07-11  0:52 ` [PATCH v2 04/46] net/wireless: p54: remove useless dma_sync_single_for_device(DMA_FROM_DEVICE) Michał Mirosław
                   ` (12 subsequent siblings)
  17 siblings, 1 reply; 76+ messages in thread
From: Michał Mirosław @ 2011-07-11  0:52 UTC (permalink / raw)
  To: netdev
  Cc: linux-wireless, Eilon Greenstein, Gary Zambrano,
	Stephen Hemminger, Stefano Brivio, e1000-devel, Matt Carlson,
	Jesse Brandeburg, Francois Romieu, Realtek linux nic maintainers,
	John W. Linville, Ron Mercer, Michael Chan, Jitendra Kalsaria,
	Divy Le Ray, Bruce Allan, Hartley Sweeten, John Ronciak,
	Jon Mason, linux-driver, Larry Finger

dma_sync_to_device() is not needed when only device modifies the buffer.
See: DMA-API.txt, part. Id, DMA_FROM_DEVICE description.

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
---
 drivers/net/arm/ep93xx_eth.c         |    3 ---
 drivers/net/b44.c                    |    4 ----
 drivers/net/bnx2.c                   |    4 ----
 drivers/net/bnx2x/bnx2x_cmn.h        |    5 -----
 drivers/net/cassini.c                |   12 ------------
 drivers/net/cxgb3/sge.c              |    3 ---
 drivers/net/e100.c                   |    3 ---
 drivers/net/e1000e/netdev.c          |    4 +---
 drivers/net/mlx4/en_rx.c             |    2 --
 drivers/net/qlge/qlge_main.c         |   11 -----------
 drivers/net/r8169.c                  |    1 -
 drivers/net/s2io.c                   |    6 +-----
 drivers/net/skge.c                   |    3 ---
 drivers/net/sky2.c                   |    2 --
 drivers/net/tg3.c                    |    1 -
 drivers/net/tokenring/olympic.c      |    6 ------
 drivers/net/vxge/vxge-main.c         |    3 ---
 drivers/net/wireless/b43legacy/dma.c |   19 -------------------
 18 files changed, 2 insertions(+), 90 deletions(-)

diff --git a/drivers/net/arm/ep93xx_eth.c b/drivers/net/arm/ep93xx_eth.c
index 4317af8..ba3bf43 100644
--- a/drivers/net/arm/ep93xx_eth.c
+++ b/drivers/net/arm/ep93xx_eth.c
@@ -289,9 +289,6 @@ static int ep93xx_rx(struct net_device *dev, int processed, int budget)
 			dma_sync_single_for_cpu(dev->dev.parent, rxd->buf_addr,
 						length, DMA_FROM_DEVICE);
 			skb_copy_to_linear_data(skb, ep->rx_buf[entry], length);
-			dma_sync_single_for_device(dev->dev.parent,
-						   rxd->buf_addr, length,
-						   DMA_FROM_DEVICE);
 			skb_put(skb, length);
 			skb->protocol = eth_type_trans(skb, dev);
 
diff --git a/drivers/net/b44.c b/drivers/net/b44.c
index 6c4ef96..033029f 100644
--- a/drivers/net/b44.c
+++ b/drivers/net/b44.c
@@ -739,10 +739,6 @@ static void b44_recycle_rx(struct b44 *bp, int src_idx, u32 dest_idx_unmasked)
 		b44_sync_dma_desc_for_device(bp->sdev, bp->rx_ring_dma,
 					     dest_idx * sizeof(*dest_desc),
 					     DMA_BIDIRECTIONAL);
-
-	dma_sync_single_for_device(bp->sdev->dma_dev, dest_map->mapping,
-				   RX_PKT_BUF_SZ,
-				   DMA_FROM_DEVICE);
 }
 
 static int b44_rx(struct b44 *bp, int budget)
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 7915d14..d627886 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -2927,10 +2927,6 @@ bnx2_reuse_rx_skb(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr,
 	cons_rx_buf = &rxr->rx_buf_ring[cons];
 	prod_rx_buf = &rxr->rx_buf_ring[prod];
 
-	dma_sync_single_for_device(&bp->pdev->dev,
-		dma_unmap_addr(cons_rx_buf, mapping),
-		BNX2_RX_OFFSET + BNX2_RX_COPY_THRESH, PCI_DMA_FROMDEVICE);
-
 	rxr->rx_prod_bseq += bp->rx_buf_use_size;
 
 	prod_rx_buf->skb = skb;
diff --git a/drivers/net/bnx2x/bnx2x_cmn.h b/drivers/net/bnx2x/bnx2x_cmn.h
index c016e20..c9e49a0 100644
--- a/drivers/net/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/bnx2x/bnx2x_cmn.h
@@ -923,16 +923,11 @@ static inline int bnx2x_alloc_rx_skb(struct bnx2x *bp,
 static inline void bnx2x_reuse_rx_skb(struct bnx2x_fastpath *fp,
 				      u16 cons, u16 prod)
 {
-	struct bnx2x *bp = fp->bp;
 	struct sw_rx_bd *cons_rx_buf = &fp->rx_buf_ring[cons];
 	struct sw_rx_bd *prod_rx_buf = &fp->rx_buf_ring[prod];
 	struct eth_rx_bd *cons_bd = &fp->rx_desc_ring[cons];
 	struct eth_rx_bd *prod_bd = &fp->rx_desc_ring[prod];
 
-	dma_sync_single_for_device(&bp->pdev->dev,
-				   dma_unmap_addr(cons_rx_buf, mapping),
-				   RX_COPY_THRESH, DMA_FROM_DEVICE);
-
 	dma_unmap_addr_set(prod_rx_buf, mapping,
 			   dma_unmap_addr(cons_rx_buf, mapping));
 	prod_rx_buf->skb = cons_rx_buf->skb;
diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c
index b414f5a..788ab13 100644
--- a/drivers/net/cassini.c
+++ b/drivers/net/cassini.c
@@ -1997,8 +1997,6 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc,
 				    PCI_DMA_FROMDEVICE);
 		addr = cas_page_map(page->buffer);
 		memcpy(p, addr + off, i);
-		pci_dma_sync_single_for_device(cp->pdev, page->dma_addr + off, i,
-				    PCI_DMA_FROMDEVICE);
 		cas_page_unmap(addr);
 		RX_USED_ADD(page, 0x100);
 		p += hlen;
@@ -2032,8 +2030,6 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc,
 		if (p == (char *) skb->data) { /* not split */
 			addr = cas_page_map(page->buffer);
 			memcpy(p, addr + off, RX_COPY_MIN);
-			pci_dma_sync_single_for_device(cp->pdev, page->dma_addr + off, i,
-					PCI_DMA_FROMDEVICE);
 			cas_page_unmap(addr);
 			off += RX_COPY_MIN;
 			swivel = RX_COPY_MIN;
@@ -2063,9 +2059,6 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc,
 			pci_dma_sync_single_for_cpu(cp->pdev, page->dma_addr,
 					    hlen + cp->crc_size,
 					    PCI_DMA_FROMDEVICE);
-			pci_dma_sync_single_for_device(cp->pdev, page->dma_addr,
-					    hlen + cp->crc_size,
-					    PCI_DMA_FROMDEVICE);
 
 			skb_shinfo(skb)->nr_frags++;
 			skb->data_len += hlen;
@@ -2106,8 +2099,6 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc,
 				    PCI_DMA_FROMDEVICE);
 		addr = cas_page_map(page->buffer);
 		memcpy(p, addr + off, i);
-		pci_dma_sync_single_for_device(cp->pdev, page->dma_addr + off, i,
-				    PCI_DMA_FROMDEVICE);
 		cas_page_unmap(addr);
 		if (p == (char *) skb->data) /* not split */
 			RX_USED_ADD(page, cp->mtu_stride);
@@ -2124,9 +2115,6 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc,
 					    PCI_DMA_FROMDEVICE);
 			addr = cas_page_map(page->buffer);
 			memcpy(p, addr, dlen + cp->crc_size);
-			pci_dma_sync_single_for_device(cp->pdev, page->dma_addr,
-					    dlen + cp->crc_size,
-					    PCI_DMA_FROMDEVICE);
 			cas_page_unmap(addr);
 			RX_USED_ADD(page, dlen + cp->crc_size);
 		}
diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index 76bf589..3196fdd 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -517,9 +517,6 @@ nomem:				q->alloc_failed++;
 			dma_unmap_addr_set(sd, dma_addr, mapping);
 
 			add_one_rx_chunk(mapping, d, q->gen);
-			pci_dma_sync_single_for_device(adap->pdev, mapping,
-						q->buf_size - SGE_PG_RSVD,
-						PCI_DMA_FROMDEVICE);
 		} else {
 			void *buf_start;
 
diff --git a/drivers/net/e100.c b/drivers/net/e100.c
index c1352c6..73034af 100644
--- a/drivers/net/e100.c
+++ b/drivers/net/e100.c
@@ -1944,9 +1944,6 @@ static int e100_rx_indicate(struct nic *nic, struct rx *rx,
 
 			if (ioread8(&nic->csr->scb.status) & rus_no_res)
 				nic->ru_running = RU_SUSPENDED;
-		pci_dma_sync_single_for_device(nic->pdev, rx->dma_addr,
-					       sizeof(struct rfd),
-					       PCI_DMA_FROMDEVICE);
 		return -ENODATA;
 	}
 
diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index ed7a93d..f9b16cf 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -1172,12 +1172,10 @@ static bool e1000_clean_rx_irq_ps(struct e1000_adapter *adapter,
 			 * very long
 			 */
 			dma_sync_single_for_cpu(&pdev->dev, ps_page->dma,
-						PAGE_SIZE, DMA_FROM_DEVICE);
+						l1, DMA_FROM_DEVICE);
 			vaddr = kmap_atomic(ps_page->page, KM_SKB_DATA_SOFTIRQ);
 			memcpy(skb_tail_pointer(skb), vaddr, l1);
 			kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ);
-			dma_sync_single_for_device(&pdev->dev, ps_page->dma,
-						   PAGE_SIZE, DMA_FROM_DEVICE);
 
 			/* remove the CRC */
 			if (!(adapter->flags2 & FLAG2_CRC_STRIPPING))
diff --git a/drivers/net/mlx4/en_rx.c b/drivers/net/mlx4/en_rx.c
index 5197b50..ee15295 100644
--- a/drivers/net/mlx4/en_rx.c
+++ b/drivers/net/mlx4/en_rx.c
@@ -483,8 +483,6 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv,
 		dma_sync_single_for_cpu(&mdev->pdev->dev, dma, length,
 					DMA_FROM_DEVICE);
 		skb_copy_to_linear_data(skb, va, length);
-		dma_sync_single_for_device(&mdev->pdev->dev, dma, length,
-					   DMA_FROM_DEVICE);
 		skb->tail += length;
 	} else {
 
diff --git a/drivers/net/qlge/qlge_main.c b/drivers/net/qlge/qlge_main.c
index 68fbfac..48dd59b 100644
--- a/drivers/net/qlge/qlge_main.c
+++ b/drivers/net/qlge/qlge_main.c
@@ -1195,9 +1195,6 @@ static void ql_update_lbq(struct ql_adapter *qdev, struct rx_ring *rx_ring)
 					rx_ring->lbq_buf_size);
 				*lbq_desc->addr = cpu_to_le64(map);
 
-			pci_dma_sync_single_for_device(qdev->pdev, map,
-						rx_ring->lbq_buf_size,
-						PCI_DMA_FROMDEVICE);
 			clean_idx++;
 			if (clean_idx == rx_ring->lbq_len)
 				clean_idx = 0;
@@ -1801,14 +1798,6 @@ static struct sk_buff *ql_build_rx_skb(struct ql_adapter *qdev,
 						    PCI_DMA_FROMDEVICE);
 			memcpy(skb_put(skb, length),
 			       sbq_desc->p.skb->data, length);
-			pci_dma_sync_single_for_device(qdev->pdev,
-						       dma_unmap_addr
-						       (sbq_desc,
-							mapaddr),
-						       dma_unmap_len
-						       (sbq_desc,
-							maplen),
-						       PCI_DMA_FROMDEVICE);
 		} else {
 			netif_printk(qdev, rx_status, KERN_DEBUG, qdev->ndev,
 				     "%d bytes in a single small buffer.\n",
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index ef1a43d..e2c2884 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -5005,7 +5005,6 @@ static struct sk_buff *rtl8169_try_rx_copy(void *data,
 	skb = netdev_alloc_skb_ip_align(tp->dev, pkt_size);
 	if (skb)
 		memcpy(skb->data, data, pkt_size);
-	dma_sync_single_for_device(d, addr, pkt_size, DMA_FROM_DEVICE);
 
 	return skb;
 }
diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c
index 043850b..acf7105 100644
--- a/drivers/net/s2io.c
+++ b/drivers/net/s2io.c
@@ -2636,11 +2636,7 @@ static int fill_rx_buffers(struct s2io_nic *nic, struct ring_info *ring,
 				if (pci_dma_mapping_error(nic->pdev,
 							  rxdp3->Buffer0_ptr))
 					goto pci_map_failed;
-			} else
-				pci_dma_sync_single_for_device(ring->pdev,
-							       (dma_addr_t)rxdp3->Buffer0_ptr,
-							       BUF0_LEN,
-							       PCI_DMA_FROMDEVICE);
+			}
 
 			rxdp->Control_2 = SET_BUFFER0_SIZE_3(BUF0_LEN);
 			if (ring->rxd_mode == RXD_MODE_3B) {
diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index 98ec614..11e5229 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -3031,9 +3031,6 @@ static struct sk_buff *skge_rx_get(struct net_device *dev,
 					    dma_unmap_addr(e, mapaddr),
 					    len, PCI_DMA_FROMDEVICE);
 		skb_copy_from_linear_data(e->skb, skb->data, len);
-		pci_dma_sync_single_for_device(skge->hw->pdev,
-					       dma_unmap_addr(e, mapaddr),
-					       len, PCI_DMA_FROMDEVICE);
 		skge_rx_reuse(e, skge->rx_buf_size);
 	} else {
 		struct sk_buff *nskb;
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index 57339da..5f720b9 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -2422,8 +2422,6 @@ static struct sk_buff *receive_copy(struct sky2_port *sky2,
 		skb_copy_from_linear_data(re->skb, skb->data, length);
 		skb->ip_summed = re->skb->ip_summed;
 		skb->csum = re->skb->csum;
-		pci_dma_sync_single_for_device(sky2->hw->pdev, re->data_addr,
-					       length, PCI_DMA_FROMDEVICE);
 		re->skb->ip_summed = CHECKSUM_NONE;
 		skb_put(skb, length);
 	}
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 8211b9a..b43d473 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -5002,7 +5002,6 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget)
 			skb_put(copy_skb, len);
 			pci_dma_sync_single_for_cpu(tp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
 			skb_copy_from_linear_data(skb, copy_skb->data, len);
-			pci_dma_sync_single_for_device(tp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
 
 			/* We'll reuse the original ring buffer. */
 			skb = copy_skb;
diff --git a/drivers/net/tokenring/olympic.c b/drivers/net/tokenring/olympic.c
index e3855ae..30fb6e8 100644
--- a/drivers/net/tokenring/olympic.c
+++ b/drivers/net/tokenring/olympic.c
@@ -837,9 +837,6 @@ static void olympic_rx(struct net_device *dev)
 							skb_copy_from_linear_data(olympic_priv->rx_ring_skb[rx_ring_last_received],
 								      skb_put(skb,length - 4),
 								      length - 4);
-							pci_dma_sync_single_for_device(olympic_priv->pdev,
-								le32_to_cpu(olympic_priv->olympic_rx_ring[rx_ring_last_received].buffer),
-								olympic_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ;
 							skb->protocol = tr_type_trans(skb,dev) ; 
 							netif_rx(skb) ; 
 						} 
@@ -856,9 +853,6 @@ static void olympic_rx(struct net_device *dev)
 							skb_copy_from_linear_data(olympic_priv->rx_ring_skb[rx_ring_last_received],
 								      skb_put(skb, cpy_length),
 								      cpy_length);
-							pci_dma_sync_single_for_device(olympic_priv->pdev,
-								le32_to_cpu(olympic_priv->olympic_rx_ring[rx_ring_last_received].buffer),
-								olympic_priv->pkt_buf_sz,PCI_DMA_FROMDEVICE) ;
 						} while (--i) ; 
 						skb_trim(skb,skb->len-4) ; 
 						skb->protocol = tr_type_trans(skb,dev);
diff --git a/drivers/net/vxge/vxge-main.c b/drivers/net/vxge/vxge-main.c
index 15d878b..00d435d 100644
--- a/drivers/net/vxge/vxge-main.c
+++ b/drivers/net/vxge/vxge-main.c
@@ -323,9 +323,6 @@ vxge_rx_complete(struct vxge_ring *ring, struct sk_buff *skb, u16 vlan,
 static inline void vxge_re_pre_post(void *dtr, struct vxge_ring *ring,
 				    struct vxge_rx_priv *rx_priv)
 {
-	pci_dma_sync_single_for_device(ring->pdev,
-		rx_priv->data_dma, rx_priv->data_size, PCI_DMA_FROMDEVICE);
-
 	vxge_hw_ring_rxd_1b_set(dtr, rx_priv->data_dma, rx_priv->data_size);
 	vxge_hw_ring_rxd_pre_post(ring->handle, dtr);
 }
diff --git a/drivers/net/wireless/b43legacy/dma.c b/drivers/net/wireless/b43legacy/dma.c
index c33934a..11839be 100644
--- a/drivers/net/wireless/b43legacy/dma.c
+++ b/drivers/net/wireless/b43legacy/dma.c
@@ -433,17 +433,6 @@ void sync_descbuffer_for_cpu(struct b43legacy_dmaring *ring,
 }
 
 static inline
-void sync_descbuffer_for_device(struct b43legacy_dmaring *ring,
-				dma_addr_t addr,
-				size_t len)
-{
-	B43legacy_WARN_ON(ring->tx);
-
-	dma_sync_single_for_device(ring->dev->dev->dma_dev,
-				   addr, len, DMA_FROM_DEVICE);
-}
-
-static inline
 void free_descriptor_buffer(struct b43legacy_dmaring *ring,
 			    struct b43legacy_dmadesc_meta *meta,
 			    int irq_context)
@@ -1556,8 +1545,6 @@ static void dma_rx(struct b43legacy_dmaring *ring,
 		}
 		b43legacy_handle_hwtxstatus(ring->dev, hw);
 		/* recycle the descriptor buffer. */
-		sync_descbuffer_for_device(ring, meta->dmaaddr,
-					   ring->rx_buffersize);
 
 		return;
 	}
@@ -1573,8 +1560,6 @@ static void dma_rx(struct b43legacy_dmaring *ring,
 		} while (len == 0 && i++ < 5);
 		if (unlikely(len == 0)) {
 			/* recycle the descriptor buffer. */
-			sync_descbuffer_for_device(ring, meta->dmaaddr,
-						   ring->rx_buffersize);
 			goto drop;
 		}
 	}
@@ -1590,8 +1575,6 @@ static void dma_rx(struct b43legacy_dmaring *ring,
 		while (1) {
 			desc = ops->idx2desc(ring, *slot, &meta);
 			/* recycle the descriptor buffer. */
-			sync_descbuffer_for_device(ring, meta->dmaaddr,
-						   ring->rx_buffersize);
 			*slot = next_slot(ring, *slot);
 			cnt++;
 			tmp -= ring->rx_buffersize;
@@ -1609,8 +1592,6 @@ static void dma_rx(struct b43legacy_dmaring *ring,
 	if (unlikely(err)) {
 		b43legacydbg(ring->dev->wl, "DMA RX: setup_rx_descbuffer()"
 			     " failed\n");
-		sync_descbuffer_for_device(ring, dmaaddr,
-					   ring->rx_buffersize);
 		goto drop;
 	}
 
-- 
1.7.5.4


------------------------------------------------------------------------------
All of the data generated in your IT infrastructure is seriously valuable.
Why? It contains a definitive record of application performance, security 
threats, fraudulent activity, and more. Splunk takes this data and makes 
sense of it. IT sense. And common sense.
http://p.sf.net/sfu/splunk-d2d-c2
_______________________________________________
E1000-devel mailing list
E1000-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/e1000-devel
To learn more about Intel&#174; Ethernet, visit http://communities.intel.com/community/wired

^ permalink raw reply related	[flat|nested] 76+ messages in thread

* [PATCH v2 06/46] net/tokenring: 3c359: fix DMA API usage
  2011-07-11  0:52 [PATCH v2 00/46] Clean up RX copybreak and DMA handling Michał Mirosław
  2011-07-11  0:52 ` [PATCH v2 02/46] net: wrap common patterns of rx handler code Michał Mirosław
  2011-07-11  0:52 ` [PATCH v2 05/46] net: bnx2x: fix DMA sync direction Michał Mirosław
@ 2011-07-11  0:52 ` Michał Mirosław
  2011-07-11  0:52 ` [PATCH v2 01/46] net: introduce __netdev_alloc_skb_aligned() Michał Mirosław
                   ` (14 subsequent siblings)
  17 siblings, 0 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-11  0:52 UTC (permalink / raw)
  To: netdev

TX/RX rings should be allocated from DMA coherent memory (driver does
not sync the descriptors in any way).

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
---
 drivers/net/tokenring/3c359.c |   49 +++++++++++++++++++++++++----------------
 drivers/net/tokenring/3c359.h |    4 +-
 2 files changed, 32 insertions(+), 21 deletions(-)

diff --git a/drivers/net/tokenring/3c359.c b/drivers/net/tokenring/3c359.c
index b6162fe..d321640 100644
--- a/drivers/net/tokenring/3c359.c
+++ b/drivers/net/tokenring/3c359.c
@@ -672,21 +672,25 @@ static int xl_open(struct net_device *dev)
 	 * Now to set up the Rx and Tx buffer structures
 	 */
 	/* These MUST be on 8 byte boundaries */
-	xl_priv->xl_tx_ring = kzalloc((sizeof(struct xl_tx_desc) * XL_TX_RING_SIZE) + 7, GFP_DMA | GFP_KERNEL);
+	xl_priv->xl_tx_ring = dma_alloc_coherent(&xl_priv->pdev->dev,
+		sizeof(struct xl_tx_desc) * XL_TX_RING_SIZE,
+		&xl_priv->tx_ring_dma_addr, GFP_KERNEL);
 	if (xl_priv->xl_tx_ring == NULL) {
 		printk(KERN_WARNING "%s: Not enough memory to allocate tx buffers.\n",
 				     dev->name);
-		free_irq(dev->irq,dev);
-		return -ENOMEM;
+		goto err_free_irq;
 	}
-	xl_priv->xl_rx_ring = kzalloc((sizeof(struct xl_rx_desc) * XL_RX_RING_SIZE) +7, GFP_DMA | GFP_KERNEL);
+	xl_priv->xl_rx_ring = dma_alloc_coherent(&xl_priv->pdev->dev,
+		sizeof(struct xl_rx_desc) * XL_RX_RING_SIZE,
+		&xl_priv->rx_ring_dma_addr, GFP_KERNEL);
 	if (xl_priv->xl_rx_ring == NULL) {
 		printk(KERN_WARNING "%s: Not enough memory to allocate rx buffers.\n",
 				     dev->name);
-		free_irq(dev->irq,dev);
-		kfree(xl_priv->xl_tx_ring);
-		return -ENOMEM;
+		goto err_free_tx;
 	}
+	/* dma_alloc_coherent() should provide for the following */
+	BUG_ON(!IS_ALIGNED((unsigned long)xl_priv->xl_tx_ring, 8));
+	BUG_ON(!IS_ALIGNED((unsigned long)xl_priv->xl_rx_ring, 8));
 
 	 /* Setup Rx Ring */
 	 for (i=0 ; i < XL_RX_RING_SIZE ; i++) { 
@@ -704,10 +708,7 @@ static int xl_open(struct net_device *dev)
 
 	if (i==0) { 
 		printk(KERN_WARNING "%s: Not enough memory to allocate rx buffers. Adapter disabled\n",dev->name);
-		free_irq(dev->irq,dev) ; 
-		kfree(xl_priv->xl_tx_ring);
-		kfree(xl_priv->xl_rx_ring);
-		return -EIO ; 
+		goto err_free_rxtx;
 	} 
 
 	xl_priv->rx_ring_no = i ; 
@@ -722,8 +723,6 @@ static int xl_open(struct net_device *dev)
 	
 	/* Setup Tx Ring */
 	
-	xl_priv->tx_ring_dma_addr = pci_map_single(xl_priv->pdev,xl_priv->xl_tx_ring, sizeof(struct xl_tx_desc) * XL_TX_RING_SIZE,PCI_DMA_TODEVICE) ; 
-	
 	xl_priv->tx_ring_head = 1 ; 
 	xl_priv->tx_ring_tail = 255 ; /* Special marker for first packet */
 	xl_priv->free_ring_entries = XL_TX_RING_SIZE ; 
@@ -752,7 +751,18 @@ static int xl_open(struct net_device *dev)
 
 	netif_start_queue(dev) ; 	
 	return 0;
-	
+
+err_free_rxtx:
+	dma_free_coherent(&xl_priv->pdev->dev,
+		sizeof(struct xl_rx_desc) * XL_RX_RING_SIZE,
+		xl_priv->xl_rx_ring, xl_priv->rx_ring_dma_addr);
+err_free_tx:
+	dma_free_coherent(&xl_priv->pdev->dev,
+		sizeof(struct xl_tx_desc) * XL_TX_RING_SIZE,
+		xl_priv->xl_tx_ring, xl_priv->tx_ring_dma_addr);
+err_free_irq:
+	free_irq(dev->irq,dev);
+	return -ENOMEM;
 }	
 
 static int xl_open_hw(struct net_device *dev) 
@@ -1060,12 +1070,13 @@ static void xl_freemem(struct net_device *dev)
 	} 
 
 	/* unmap ring */
-	pci_unmap_single(xl_priv->pdev,xl_priv->rx_ring_dma_addr, sizeof(struct xl_rx_desc) * XL_RX_RING_SIZE, PCI_DMA_FROMDEVICE) ; 
-	
-	pci_unmap_single(xl_priv->pdev,xl_priv->tx_ring_dma_addr, sizeof(struct xl_tx_desc) * XL_TX_RING_SIZE, PCI_DMA_TODEVICE) ; 
+	dma_free_coherent(&xl_priv->pdev->dev,
+		sizeof(struct xl_rx_desc) * XL_RX_RING_SIZE,
+		xl_priv->xl_rx_ring, xl_priv->rx_ring_dma_addr);
 
-	kfree(xl_priv->xl_rx_ring) ; 
-	kfree(xl_priv->xl_tx_ring) ; 
+	dma_free_coherent(&xl_priv->pdev->dev,
+		sizeof(struct xl_tx_desc) * XL_TX_RING_SIZE,
+		xl_priv->xl_tx_ring, xl_priv->tx_ring_dma_addr);
 
 	return  ; 
 }
diff --git a/drivers/net/tokenring/3c359.h b/drivers/net/tokenring/3c359.h
index bcb1a6b..baefdd3 100644
--- a/drivers/net/tokenring/3c359.h
+++ b/drivers/net/tokenring/3c359.h
@@ -282,8 +282,8 @@ struct xl_private {
 	unsigned char xl_functional_addr[4] ; 
 	u16 xl_addr_table_addr, xl_parms_addr ; 
 	u8 xl_laa[6] ; 
-	u32 rx_ring_dma_addr ; 
-	u32 tx_ring_dma_addr ; 
+	dma_addr_t rx_ring_dma_addr;
+	dma_addr_t tx_ring_dma_addr;
 
 	/* firmware section */
 	const struct firmware *fw;
-- 
1.7.5.4


^ permalink raw reply related	[flat|nested] 76+ messages in thread

* [PATCH v2 05/46] net: bnx2x: fix DMA sync direction
  2011-07-11  0:52 [PATCH v2 00/46] Clean up RX copybreak and DMA handling Michał Mirosław
  2011-07-11  0:52 ` [PATCH v2 02/46] net: wrap common patterns of rx handler code Michał Mirosław
@ 2011-07-11  0:52 ` Michał Mirosław
  2011-07-11  0:52 ` [PATCH v2 06/46] net/tokenring: 3c359: fix DMA API usage Michał Mirosław
                   ` (15 subsequent siblings)
  17 siblings, 0 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-11  0:52 UTC (permalink / raw)
  To: netdev; +Cc: Eilon Greenstein

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
---
 drivers/net/bnx2x/bnx2x_cmn.c     |    2 +-
 drivers/net/bnx2x/bnx2x_ethtool.c |    2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/bnx2x/bnx2x_cmn.c b/drivers/net/bnx2x/bnx2x_cmn.c
index bb75560..4f9164c 100644
--- a/drivers/net/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/bnx2x/bnx2x_cmn.c
@@ -658,7 +658,7 @@ int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
 			/* non TPA */
 			len = le16_to_cpu(cqe_fp->pkt_len);
 			pad = cqe_fp->placement_offset;
-			dma_sync_single_for_device(&bp->pdev->dev,
+			dma_sync_single_for_cpu(&bp->pdev->dev,
 					dma_unmap_addr(rx_buf, mapping),
 						       pad + RX_COPY_THRESH,
 						       DMA_FROM_DEVICE);
diff --git a/drivers/net/bnx2x/bnx2x_ethtool.c b/drivers/net/bnx2x/bnx2x_ethtool.c
index 1a3ed41..9a98b83 100644
--- a/drivers/net/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/bnx2x/bnx2x_ethtool.c
@@ -1751,7 +1751,7 @@ static int bnx2x_run_loopback(struct bnx2x *bp, int loopback_mode)
 		goto test_loopback_rx_exit;
 
 	rx_buf = &fp_rx->rx_buf_ring[RX_BD(fp_rx->rx_bd_cons)];
-	dma_sync_single_for_device(&bp->pdev->dev,
+	dma_sync_single_for_cpu(&bp->pdev->dev,
 				   dma_unmap_addr(rx_buf, mapping),
 				   fp_rx->rx_buf_size, DMA_FROM_DEVICE);
 	skb = rx_buf->skb;
-- 
1.7.5.4


^ permalink raw reply related	[flat|nested] 76+ messages in thread

* [PATCH v2 00/46] Clean up RX copybreak and DMA handling
@ 2011-07-11  0:52 Michał Mirosław
  2011-07-11  0:52 ` [PATCH v2 02/46] net: wrap common patterns of rx handler code Michał Mirosław
                   ` (17 more replies)
  0 siblings, 18 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-11  0:52 UTC (permalink / raw)
  To: netdev

Quick rx_copybreak cleaning turned out to rise more dust around...

Most drivers use dma_sync_single_to_device() for buffers that are
written only by device. That's probably because of bad example in
current Documentation/DMA-API-HOWTO.txt. Why is it bad? It unnecessarily
flushes cachelines and CPU write buffers for memory that is not used
by the hardware. Documentation/DMA-API.txt (part Id) describes exactly
when streaming DMA buffers should be synced and in which direction.
Syncing DMA_FROM_DEVICE mappings for device is not among them.

A lot of drivers drop packets already DMA'd in on memory pressure. This
is suboptimal:
  1. under packet storm and memory pressure NIC keeps generating interrupts
     (if non-NAPI) and indicating new buffers because it always has free
     RX buffers --- this only wastes CPU and bus bandwidth transferring
     data that is going to be immediately discarded;
  2. for users of swap over network (NBD, NFS - assumming those get
     fixed, or maybe FCoE?) this can cause deadlock if the packets
     (acks maybe?) are needed to reclaim memory.

It unlikely that you'll ever hit the dark scenarios above, but if you do
you'll have a hard time debugging.

Menu:
 1..2:
	wrap code commonly used in drivers into generic functions
 3..4:
	remove dma_sync_to_device(DMA_FROM_DEVICE) from drivers not
	converted to common rx handling functions
 5..10:
	fix various DMA API usage bugs in rx handling code
 11..14:
	clean up rx buffer allocation in sun* drivers
 15:
	b43: use kfree_skb() if the skb is known not to have been used
 16:
	cxgb3: remove code dropping packets on memory pressure in driver
 46:
	mark some drivers that drop packets from rx queue head when under
	memory pressure

Left out for later:
 17..34:
	convert multiple drivers to common rx_copybreak handling
 35..45:
	convert multiple drivers to common rx_copybreak handling; those
	drivers drop rxed packets when under memory pressure

Patches 17..45 are more-or-less templated and in the same spirit as
the earlier version.
[davem: I plan send them after/if you agree to take patch 2. Or earlier,
if you want them all at once anyway.]

Best Regards,
Michał Mirosław

---

Michał Mirosław (46):
  net: introduce __netdev_alloc_skb_aligned()
  net: wrap common patterns of rx handler code
  net drivers: remove unnecessary dma_sync_to_device(DMA_FROM_DEVICE)
  net/wireless: p54: remove useless
    dma_sync_single_for_device(DMA_FROM_DEVICE)
  net: bnx2x: fix DMA sync direction
  net/tokenring: 3c359: fix DMA API usage
  net/wireless: ath9k: fix DMA API usage
  net/wireless: b43: fix DMA direction for RX buffers
  net: octeon_mgmt: fix DMA unmap size
  net: jme: convert to generic DMA API
  net: sungem: cleanup RX skb allocation
  net: sunhme: cleanup RX skb allocation
  net: sunbmac: cleanup RX skb allocation
  net: sunbmac: cleanup magic '34'
  net/wireless: b43: use kfree_skb() for untouched skbs
  net: cxgb3: don't drop packets on memory pressure in driver
  net: 3c59x: use common rx_copybreak handling
  net: epic100: use common rx_copybreak handling
  net: fealnx: use common rx_copybreak handling
  net: hamachi: use common rx_copybreak handling
  net: bcm63xx: use common rx_copybreak handling
  net: chelsio: use common rx_copybreak handling
  net: cxgb3: use common rx_copybreak handling
  net: dl2k: use common rx_copybreak handling
  net: natsemi: use common rx_copybreak handling
  net: sis190: use common rx_copybreak handling
  net: starfire: use common rx_copybreak handling
  net: sundance: use common rx_copybreak handling
  net: tulip/interrupt: use common rx_copybreak handling
  net: tulip/winbond-840: use common rx_copybreak handling
  net: typhoon: use common rx_copybreak handling
  net: via-rhine: use common rx_copybreak handling
  net: via-velocity: use common rx_copybreak handling
  net: yellowfin: use common rx_copybreak handling
  net: lib82596: use common rx_copybreak handling [strict refill!]
  net: pcnet32: use common rx_copybreak handling [strict refill!]
  net: sgiseeq: use common rx_copybreak handling [strict refill!]
  net: tulip/de2104x: use common rx_copybreak handling [strict refill!]
  net: sunhme: use common rx_copybreak handling [strict refill!]
  net: sunbmac: use common rx_copybreak handling [strict refill!]
  net: rrunner: use common rx_copybreak handling [strict refill!]
  net: greth: use common rx_copybreak handling [strict refill!]
  net: sunbmac: use common rx_copybreak handling [strict refill!]
  net: tokenring/3c359: use common rx_copybreak handling [strict
    refill!]
  net/wireless: adm8211: use common rx_copybreak handling [strict
    refill!]
  net: mark drivers that drop packets from rx queue head under memory
    pressure

 drivers/net/3c59x.c                         |   23 +---
 drivers/net/arm/ep93xx_eth.c                |    6 +-
 drivers/net/b44.c                           |    4 -
 drivers/net/bcm63xx_enet.c                  |   28 +----
 drivers/net/bnx2.c                          |    7 +-
 drivers/net/bnx2x/bnx2x_cmn.c               |    5 +-
 drivers/net/bnx2x/bnx2x_cmn.h               |    5 -
 drivers/net/bnx2x/bnx2x_ethtool.c           |    2 +-
 drivers/net/cassini.c                       |   15 +--
 drivers/net/chelsio/sge.c                   |   42 +------
 drivers/net/cxgb3/sge.c                     |   57 ++-------
 drivers/net/dl2k.c                          |   28 +----
 drivers/net/e100.c                          |    3 -
 drivers/net/e1000e/netdev.c                 |    4 +-
 drivers/net/epic100.c                       |   32 ++----
 drivers/net/fealnx.c                        |   40 ++-----
 drivers/net/greth.c                         |   67 ++++-------
 drivers/net/hamachi.c                       |   42 +------
 drivers/net/jme.c                           |   40 +++----
 drivers/net/lib82596.c                      |   70 +++--------
 drivers/net/mlx4/en_rx.c                    |    8 +-
 drivers/net/natsemi.c                       |   37 ++-----
 drivers/net/octeon/octeon_mgmt.c            |    9 +-
 drivers/net/pcnet32.c                       |   53 ++-------
 drivers/net/qlge/qlge_main.c                |   11 --
 drivers/net/r8169.c                         |    4 +-
 drivers/net/rrunner.c                       |   66 +++--------
 drivers/net/s2io.c                          |    6 +-
 drivers/net/sgiseeq.c                       |   42 +++-----
 drivers/net/sis190.c                        |   40 +------
 drivers/net/skge.c                          |    6 +-
 drivers/net/sky2.c                          |    4 +-
 drivers/net/starfire.c                      |   28 ++---
 drivers/net/sunbmac.c                       |   78 +++----------
 drivers/net/sunbmac.h                       |   18 +---
 drivers/net/sundance.c                      |   26 +---
 drivers/net/sungem.c                        |   84 +++----------
 drivers/net/sungem.h                        |    4 +-
 drivers/net/sunhme.c                        |   61 ++--------
 drivers/net/sunhme.h                        |   14 +--
 drivers/net/tg3.c                           |    3 +-
 drivers/net/tokenring/3c359.c               |   74 ++++++------
 drivers/net/tokenring/3c359.h               |    4 +-
 drivers/net/tokenring/olympic.c             |    8 +-
 drivers/net/tulip/de2104x.c                 |   39 ++-----
 drivers/net/tulip/interrupt.c               |   77 +++----------
 drivers/net/tulip/winbond-840.c             |   28 +----
 drivers/net/typhoon.c                       |   26 +---
 drivers/net/via-rhine.c                     |   38 +-----
 drivers/net/via-velocity.c                  |   61 ++--------
 drivers/net/vxge/vxge-main.c                |    6 +-
 drivers/net/wireless/adm8211.c              |   41 +------
 drivers/net/wireless/ath/ath9k/ar9003_mac.c |    4 +-
 drivers/net/wireless/ath/ath9k/ar9003_mac.h |    2 +-
 drivers/net/wireless/ath/ath9k/recv.c       |   10 +-
 drivers/net/wireless/b43/dma.c              |   15 ++-
 drivers/net/wireless/b43legacy/dma.c        |   19 ---
 drivers/net/wireless/p54/p54pci.c           |    2 -
 drivers/net/wireless/p54/txrx.c             |   22 ++--
 drivers/net/yellowfin.c                     |   27 +----
 include/linux/skbuff.h                      |  169 +++++++++++++++++++++++++++
 61 files changed, 586 insertions(+), 1208 deletions(-)

-- 
1.7.5.4


^ permalink raw reply	[flat|nested] 76+ messages in thread

* [PATCH v2 01/46] net: introduce __netdev_alloc_skb_aligned()
  2011-07-11  0:52 [PATCH v2 00/46] Clean up RX copybreak and DMA handling Michał Mirosław
                   ` (2 preceding siblings ...)
  2011-07-11  0:52 ` [PATCH v2 06/46] net/tokenring: 3c359: fix DMA API usage Michał Mirosław
@ 2011-07-11  0:52 ` Michał Mirosław
  2011-07-11  5:46   ` [PATCH net-next-2.6] net: introduce build_skb() Eric Dumazet
  2011-07-11  0:52 ` [PATCH v2 03/46] net drivers: remove unnecessary dma_sync_to_device(DMA_FROM_DEVICE) Michał Mirosław
                   ` (13 subsequent siblings)
  17 siblings, 1 reply; 76+ messages in thread
From: Michał Mirosław @ 2011-07-11  0:52 UTC (permalink / raw)
  To: netdev

Introduce __netdev_alloc_skb_aligned() to return skb with skb->data
aligned at specified 2^n multiple.

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
---
 include/linux/skbuff.h |   27 +++++++++++++++++++++++++++
 1 files changed, 27 insertions(+), 0 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 32ada53..c873897 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1561,6 +1561,33 @@ extern struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
 		unsigned int length, gfp_t gfp_mask);
 
 /**
+ *	__netdev_alloc_skb_aligned - allocate an skbuff for rx on a specific device
+ *	@dev: network device to receive on
+ *	@length: length to allocate
+ *	@align: required skb->data alignment
+ *	@gfp_mask: get_free_pages mask, passed to alloc_skb
+ *
+ *	Allocate a new &sk_buff and assign it a usage count of one. The
+ *	buffer has unspecified headroom built in. Users should allocate
+ *	the headroom they think they need without accounting for the
+ *	built in space. The built in space is used for optimisations.
+ *
+ *	%NULL is returned if there is no free memory.
+ */
+static inline struct sk_buff *__netdev_alloc_skb_aligned(struct net_device *dev,
+		unsigned int length, unsigned int align, gfp_t gfp_mask)
+{
+	struct sk_buff *skb;
+
+	skb = __alloc_skb(length + NET_SKB_PAD + align, gfp_mask, 0, NUMA_NO_NODE);
+	if (likely(skb)) {
+		skb_reserve(skb, PTR_ALIGN(skb->data + NET_SKB_PAD, align) - skb->data);
+		skb->dev = dev;
+	}
+	return skb;
+}
+
+/**
  *	netdev_alloc_skb - allocate an skbuff for rx on a specific device
  *	@dev: network device to receive on
  *	@length: length to allocate
-- 
1.7.5.4


^ permalink raw reply related	[flat|nested] 76+ messages in thread

* [PATCH v2 08/46] net/wireless: b43: fix DMA direction for RX buffers
  2011-07-11  0:52 [PATCH v2 00/46] Clean up RX copybreak and DMA handling Michał Mirosław
                   ` (6 preceding siblings ...)
  2011-07-11  0:52   ` Michał Mirosław
@ 2011-07-11  0:52 ` Michał Mirosław
  2011-07-11  0:52 ` [PATCH v2 09/46] net: octeon_mgmt: fix DMA unmap size Michał Mirosław
                   ` (9 subsequent siblings)
  17 siblings, 0 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-11  0:52 UTC (permalink / raw)
  To: netdev; +Cc: Stefano Brivio, linux-wireless

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
---
 drivers/net/wireless/b43/dma.c |    9 +++++----
 1 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/b43/dma.c b/drivers/net/wireless/b43/dma.c
index 7a09a46..15b11f0 100644
--- a/drivers/net/wireless/b43/dma.c
+++ b/drivers/net/wireless/b43/dma.c
@@ -336,8 +336,9 @@ static inline
 		dmaaddr = dma_map_single(ring->dev->dev->dma_dev,
 					 buf, len, DMA_TO_DEVICE);
 	} else {
+		/* DMA_BIDIRECTIONAL because of b43_poison_rx_buffer() */
 		dmaaddr = dma_map_single(ring->dev->dev->dma_dev,
-					 buf, len, DMA_FROM_DEVICE);
+					 buf, len, DMA_BIDIRECTIONAL);
 	}
 
 	return dmaaddr;
@@ -352,7 +353,7 @@ static inline
 				 addr, len, DMA_TO_DEVICE);
 	} else {
 		dma_unmap_single(ring->dev->dev->dma_dev,
-				 addr, len, DMA_FROM_DEVICE);
+				 addr, len, DMA_BIDIRECTIONAL);
 	}
 }
 
@@ -362,7 +363,7 @@ static inline
 {
 	B43_WARN_ON(ring->tx);
 	dma_sync_single_for_cpu(ring->dev->dev->dma_dev,
-				    addr, len, DMA_FROM_DEVICE);
+				    addr, len, DMA_BIDIRECTIONAL);
 }
 
 static inline
@@ -371,7 +372,7 @@ static inline
 {
 	B43_WARN_ON(ring->tx);
 	dma_sync_single_for_device(ring->dev->dev->dma_dev,
-				   addr, len, DMA_FROM_DEVICE);
+				   addr, len, DMA_BIDIRECTIONAL);
 }
 
 static inline
-- 
1.7.5.4


^ permalink raw reply related	[flat|nested] 76+ messages in thread

* [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
@ 2011-07-11  0:52   ` Michał Mirosław
  0 siblings, 0 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-11  0:52 UTC (permalink / raw)
  To: netdev
  Cc: Luis R. Rodriguez, Jouni Malinen, Vasanthakumar Thiagarajan,
	Senthil Balasubramanian, linux-wireless, ath9k-devel

Also constify buf_addr for ath9k_hw_process_rxdesc_edma() to verify
assumptions --- dma_sync_single_for_device() call can be removed.

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
---
 drivers/net/wireless/ath/ath9k/ar9003_mac.c |    4 ++--
 drivers/net/wireless/ath/ath9k/ar9003_mac.h |    2 +-
 drivers/net/wireless/ath/ath9k/recv.c       |   10 +++-------
 3 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/ath/ath9k/ar9003_mac.c b/drivers/net/wireless/ath/ath9k/ar9003_mac.c
index 575e185..2d211b6 100644
--- a/drivers/net/wireless/ath/ath9k/ar9003_mac.c
+++ b/drivers/net/wireless/ath/ath9k/ar9003_mac.c
@@ -524,9 +524,9 @@ void ath9k_hw_addrxbuf_edma(struct ath_hw *ah, u32 rxdp,
 EXPORT_SYMBOL(ath9k_hw_addrxbuf_edma);
 
 int ath9k_hw_process_rxdesc_edma(struct ath_hw *ah, struct ath_rx_status *rxs,
-				 void *buf_addr)
+				 const void *buf_addr)
 {
-	struct ar9003_rxs *rxsp = (struct ar9003_rxs *) buf_addr;
+	const struct ar9003_rxs *rxsp = buf_addr;
 	unsigned int phyerr;
 
 	/* TODO: byte swap on big endian for ar9300_10 */
diff --git a/drivers/net/wireless/ath/ath9k/ar9003_mac.h b/drivers/net/wireless/ath/ath9k/ar9003_mac.h
index c504493..c310edc 100644
--- a/drivers/net/wireless/ath/ath9k/ar9003_mac.h
+++ b/drivers/net/wireless/ath/ath9k/ar9003_mac.h
@@ -114,7 +114,7 @@ void ath9k_hw_addrxbuf_edma(struct ath_hw *ah, u32 rxdp,
 
 int ath9k_hw_process_rxdesc_edma(struct ath_hw *ah,
 				 struct ath_rx_status *rxs,
-				 void *buf_addr);
+				 const void *buf_addr);
 void ath9k_hw_reset_txstatus_ring(struct ath_hw *ah);
 void ath9k_hw_setup_statusring(struct ath_hw *ah, void *ts_start,
 			       u32 ts_paddr_start,
diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
index 70dc8ec..c5f46d5 100644
--- a/drivers/net/wireless/ath/ath9k/recv.c
+++ b/drivers/net/wireless/ath/ath9k/recv.c
@@ -156,7 +156,7 @@ static bool ath_rx_edma_buf_link(struct ath_softc *sc,
 	ATH_RXBUF_RESET(bf);
 	memset(skb->data, 0, ah->caps.rx_status_len);
 	dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
-				ah->caps.rx_status_len, DMA_TO_DEVICE);
+				ah->caps.rx_status_len, DMA_BIDIRECTIONAL);
 
 	SKB_CB_ATHBUF(skb) = bf;
 	ath9k_hw_addrxbuf_edma(ah, bf->bf_buf_addr, qtype);
@@ -684,15 +684,11 @@ static bool ath_edma_get_buffers(struct ath_softc *sc,
 	BUG_ON(!bf);
 
 	dma_sync_single_for_cpu(sc->dev, bf->bf_buf_addr,
-				common->rx_bufsize, DMA_FROM_DEVICE);
+				common->rx_bufsize, DMA_BIDIRECTIONAL);
 
 	ret = ath9k_hw_process_rxdesc_edma(ah, NULL, skb->data);
-	if (ret == -EINPROGRESS) {
-		/*let device gain the buffer again*/
-		dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
-				common->rx_bufsize, DMA_FROM_DEVICE);
+	if (ret == -EINPROGRESS)
 		return false;
-	}
 
 	__skb_unlink(skb, &rx_edma->rx_fifo);
 	if (ret == -EINVAL) {
-- 
1.7.5.4


^ permalink raw reply related	[flat|nested] 76+ messages in thread

* [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
@ 2011-07-11  0:52   ` Michał Mirosław
  0 siblings, 0 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-11  0:52 UTC (permalink / raw)
  To: netdev-u79uwXL29TY76Z2rM5mHXA
  Cc: Luis R. Rodriguez, Jouni Malinen, Vasanthakumar Thiagarajan,
	Senthil Balasubramanian, linux-wireless-u79uwXL29TY76Z2rM5mHXA,
	ath9k-devel-xDcbHBWguxHbcTqmT+pZeQ

Also constify buf_addr for ath9k_hw_process_rxdesc_edma() to verify
assumptions --- dma_sync_single_for_device() call can be removed.

Signed-off-by: Michał Mirosław <mirq-linux-CoA6ZxLDdyEEUmgCuDUIdw@public.gmane.org>
---
 drivers/net/wireless/ath/ath9k/ar9003_mac.c |    4 ++--
 drivers/net/wireless/ath/ath9k/ar9003_mac.h |    2 +-
 drivers/net/wireless/ath/ath9k/recv.c       |   10 +++-------
 3 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/ath/ath9k/ar9003_mac.c b/drivers/net/wireless/ath/ath9k/ar9003_mac.c
index 575e185..2d211b6 100644
--- a/drivers/net/wireless/ath/ath9k/ar9003_mac.c
+++ b/drivers/net/wireless/ath/ath9k/ar9003_mac.c
@@ -524,9 +524,9 @@ void ath9k_hw_addrxbuf_edma(struct ath_hw *ah, u32 rxdp,
 EXPORT_SYMBOL(ath9k_hw_addrxbuf_edma);
 
 int ath9k_hw_process_rxdesc_edma(struct ath_hw *ah, struct ath_rx_status *rxs,
-				 void *buf_addr)
+				 const void *buf_addr)
 {
-	struct ar9003_rxs *rxsp = (struct ar9003_rxs *) buf_addr;
+	const struct ar9003_rxs *rxsp = buf_addr;
 	unsigned int phyerr;
 
 	/* TODO: byte swap on big endian for ar9300_10 */
diff --git a/drivers/net/wireless/ath/ath9k/ar9003_mac.h b/drivers/net/wireless/ath/ath9k/ar9003_mac.h
index c504493..c310edc 100644
--- a/drivers/net/wireless/ath/ath9k/ar9003_mac.h
+++ b/drivers/net/wireless/ath/ath9k/ar9003_mac.h
@@ -114,7 +114,7 @@ void ath9k_hw_addrxbuf_edma(struct ath_hw *ah, u32 rxdp,
 
 int ath9k_hw_process_rxdesc_edma(struct ath_hw *ah,
 				 struct ath_rx_status *rxs,
-				 void *buf_addr);
+				 const void *buf_addr);
 void ath9k_hw_reset_txstatus_ring(struct ath_hw *ah);
 void ath9k_hw_setup_statusring(struct ath_hw *ah, void *ts_start,
 			       u32 ts_paddr_start,
diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
index 70dc8ec..c5f46d5 100644
--- a/drivers/net/wireless/ath/ath9k/recv.c
+++ b/drivers/net/wireless/ath/ath9k/recv.c
@@ -156,7 +156,7 @@ static bool ath_rx_edma_buf_link(struct ath_softc *sc,
 	ATH_RXBUF_RESET(bf);
 	memset(skb->data, 0, ah->caps.rx_status_len);
 	dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
-				ah->caps.rx_status_len, DMA_TO_DEVICE);
+				ah->caps.rx_status_len, DMA_BIDIRECTIONAL);
 
 	SKB_CB_ATHBUF(skb) = bf;
 	ath9k_hw_addrxbuf_edma(ah, bf->bf_buf_addr, qtype);
@@ -684,15 +684,11 @@ static bool ath_edma_get_buffers(struct ath_softc *sc,
 	BUG_ON(!bf);
 
 	dma_sync_single_for_cpu(sc->dev, bf->bf_buf_addr,
-				common->rx_bufsize, DMA_FROM_DEVICE);
+				common->rx_bufsize, DMA_BIDIRECTIONAL);
 
 	ret = ath9k_hw_process_rxdesc_edma(ah, NULL, skb->data);
-	if (ret == -EINPROGRESS) {
-		/*let device gain the buffer again*/
-		dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
-				common->rx_bufsize, DMA_FROM_DEVICE);
+	if (ret == -EINPROGRESS)
 		return false;
-	}
 
 	__skb_unlink(skb, &rx_edma->rx_fifo);
 	if (ret == -EINVAL) {
-- 
1.7.5.4

--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply related	[flat|nested] 76+ messages in thread

* [ath9k-devel] [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
@ 2011-07-11  0:52   ` Michał Mirosław
  0 siblings, 0 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-11  0:52 UTC (permalink / raw)
  To: ath9k-devel

Also constify buf_addr for ath9k_hw_process_rxdesc_edma() to verify
assumptions --- dma_sync_single_for_device() call can be removed.

Signed-off-by: Micha? Miros?aw <mirq-linux@rere.qmqm.pl>
---
 drivers/net/wireless/ath/ath9k/ar9003_mac.c |    4 ++--
 drivers/net/wireless/ath/ath9k/ar9003_mac.h |    2 +-
 drivers/net/wireless/ath/ath9k/recv.c       |   10 +++-------
 3 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/ath/ath9k/ar9003_mac.c b/drivers/net/wireless/ath/ath9k/ar9003_mac.c
index 575e185..2d211b6 100644
--- a/drivers/net/wireless/ath/ath9k/ar9003_mac.c
+++ b/drivers/net/wireless/ath/ath9k/ar9003_mac.c
@@ -524,9 +524,9 @@ void ath9k_hw_addrxbuf_edma(struct ath_hw *ah, u32 rxdp,
 EXPORT_SYMBOL(ath9k_hw_addrxbuf_edma);
 
 int ath9k_hw_process_rxdesc_edma(struct ath_hw *ah, struct ath_rx_status *rxs,
-				 void *buf_addr)
+				 const void *buf_addr)
 {
-	struct ar9003_rxs *rxsp = (struct ar9003_rxs *) buf_addr;
+	const struct ar9003_rxs *rxsp = buf_addr;
 	unsigned int phyerr;
 
 	/* TODO: byte swap on big endian for ar9300_10 */
diff --git a/drivers/net/wireless/ath/ath9k/ar9003_mac.h b/drivers/net/wireless/ath/ath9k/ar9003_mac.h
index c504493..c310edc 100644
--- a/drivers/net/wireless/ath/ath9k/ar9003_mac.h
+++ b/drivers/net/wireless/ath/ath9k/ar9003_mac.h
@@ -114,7 +114,7 @@ void ath9k_hw_addrxbuf_edma(struct ath_hw *ah, u32 rxdp,
 
 int ath9k_hw_process_rxdesc_edma(struct ath_hw *ah,
 				 struct ath_rx_status *rxs,
-				 void *buf_addr);
+				 const void *buf_addr);
 void ath9k_hw_reset_txstatus_ring(struct ath_hw *ah);
 void ath9k_hw_setup_statusring(struct ath_hw *ah, void *ts_start,
 			       u32 ts_paddr_start,
diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
index 70dc8ec..c5f46d5 100644
--- a/drivers/net/wireless/ath/ath9k/recv.c
+++ b/drivers/net/wireless/ath/ath9k/recv.c
@@ -156,7 +156,7 @@ static bool ath_rx_edma_buf_link(struct ath_softc *sc,
 	ATH_RXBUF_RESET(bf);
 	memset(skb->data, 0, ah->caps.rx_status_len);
 	dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
-				ah->caps.rx_status_len, DMA_TO_DEVICE);
+				ah->caps.rx_status_len, DMA_BIDIRECTIONAL);
 
 	SKB_CB_ATHBUF(skb) = bf;
 	ath9k_hw_addrxbuf_edma(ah, bf->bf_buf_addr, qtype);
@@ -684,15 +684,11 @@ static bool ath_edma_get_buffers(struct ath_softc *sc,
 	BUG_ON(!bf);
 
 	dma_sync_single_for_cpu(sc->dev, bf->bf_buf_addr,
-				common->rx_bufsize, DMA_FROM_DEVICE);
+				common->rx_bufsize, DMA_BIDIRECTIONAL);
 
 	ret = ath9k_hw_process_rxdesc_edma(ah, NULL, skb->data);
-	if (ret == -EINPROGRESS) {
-		/*let device gain the buffer again*/
-		dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
-				common->rx_bufsize, DMA_FROM_DEVICE);
+	if (ret == -EINPROGRESS)
 		return false;
-	}
 
 	__skb_unlink(skb, &rx_edma->rx_fifo);
 	if (ret == -EINVAL) {
-- 
1.7.5.4

^ permalink raw reply related	[flat|nested] 76+ messages in thread

* [PATCH v2 10/46] net: jme: convert to generic DMA API
  2011-07-11  0:52 [PATCH v2 00/46] Clean up RX copybreak and DMA handling Michał Mirosław
                   ` (8 preceding siblings ...)
  2011-07-11  0:52 ` [PATCH v2 09/46] net: octeon_mgmt: fix DMA unmap size Michał Mirosław
@ 2011-07-11  0:52 ` Michał Mirosław
  2011-07-11  0:52 ` [PATCH v2 12/46] net: sunhme: cleanup RX skb allocation Michał Mirosław
                   ` (7 subsequent siblings)
  17 siblings, 0 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-11  0:52 UTC (permalink / raw)
  To: netdev; +Cc: Guo-Fu Tseng

This also fixes bad pci_dma_map_page() usage and missing RX unmaps.

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
---
 drivers/net/jme.c |   37 ++++++++++++-------------------------
 1 files changed, 12 insertions(+), 25 deletions(-)

diff --git a/drivers/net/jme.c b/drivers/net/jme.c
index 6b2a5e7..ad69dae 100644
--- a/drivers/net/jme.c
+++ b/drivers/net/jme.c
@@ -759,11 +759,8 @@ jme_make_new_rx_buf(struct jme_adapter *jme, int i)
 
 	rxbi->skb = skb;
 	rxbi->len = skb_tailroom(skb);
-	rxbi->mapping = pci_map_page(jme->pdev,
-					virt_to_page(skb->data),
-					offset_in_page(skb->data),
-					rxbi->len,
-					PCI_DMA_FROMDEVICE);
+	rxbi->mapping = dma_map_single(&jme->pdev->dev, skb->data,
+					rxbi->len, DMA_FROM_DEVICE);
 
 	return 0;
 }
@@ -776,10 +773,10 @@ jme_free_rx_buf(struct jme_adapter *jme, int i)
 	rxbi += i;
 
 	if (rxbi->skb) {
-		pci_unmap_page(jme->pdev,
+		dma_unmap_single(&jme->pdev->dev,
 				 rxbi->mapping,
 				 rxbi->len,
-				 PCI_DMA_FROMDEVICE);
+				 DMA_FROM_DEVICE);
 		dev_kfree_skb(rxbi->skb);
 		rxbi->skb = NULL;
 		rxbi->mapping = 0;
@@ -1022,17 +1019,12 @@ jme_alloc_and_feed_skb(struct jme_adapter *jme, int idx)
 	rxbi += idx;
 
 	skb = rxbi->skb;
-	pci_dma_sync_single_for_cpu(jme->pdev,
-					rxbi->mapping,
-					rxbi->len,
-					PCI_DMA_FROMDEVICE);
+	dma_unmap_single(&jme->pdev->dev, rxbi->mapping, rxbi->len,
+			 DMA_FROM_DEVICE);
 
 	if (unlikely(jme_make_new_rx_buf(jme, idx))) {
-		pci_dma_sync_single_for_device(jme->pdev,
-						rxbi->mapping,
-						rxbi->len,
-						PCI_DMA_FROMDEVICE);
-
+		rxbi->mapping = dma_map_single(&jme->pdev->dev, skb->data,
+						rxbi->len, DMA_FROM_DEVICE);
 		++(NET_STAT(jme).rx_dropped);
 	} else {
 		framesize = le16_to_cpu(rxdesc->descwb.framesize)
@@ -1476,10 +1468,10 @@ jme_tx_clean_tasklet(unsigned long arg)
 				ttxbi = txbi + ((i + j) & (mask));
 				txdesc[(i + j) & (mask)].dw[0] = 0;
 
-				pci_unmap_page(jme->pdev,
+				dma_unmap_page(&jme->pdev->dev,
 						 ttxbi->mapping,
 						 ttxbi->len,
-						 PCI_DMA_TODEVICE);
+						 DMA_TO_DEVICE);
 
 				ttxbi->mapping = 0;
 				ttxbi->len = 0;
@@ -1883,16 +1875,11 @@ jme_fill_tx_map(struct pci_dev *pdev,
 {
 	dma_addr_t dmaaddr;
 
-	dmaaddr = pci_map_page(pdev,
+	dmaaddr = dma_map_page(&pdev->dev,
 				page,
 				page_offset,
 				len,
-				PCI_DMA_TODEVICE);
-
-	pci_dma_sync_single_for_device(pdev,
-				       dmaaddr,
-				       len,
-				       PCI_DMA_TODEVICE);
+				DMA_TO_DEVICE);
 
 	txdesc->dw[0] = 0;
 	txdesc->dw[1] = 0;
-- 
1.7.5.4


^ permalink raw reply related	[flat|nested] 76+ messages in thread

* [PATCH v2 09/46] net: octeon_mgmt: fix DMA unmap size
  2011-07-11  0:52 [PATCH v2 00/46] Clean up RX copybreak and DMA handling Michał Mirosław
                   ` (7 preceding siblings ...)
  2011-07-11  0:52 ` [PATCH v2 08/46] net/wireless: b43: fix DMA direction for RX buffers Michał Mirosław
@ 2011-07-11  0:52 ` Michał Mirosław
  2011-07-11  0:52 ` [PATCH v2 10/46] net: jme: convert to generic DMA API Michał Mirosław
                   ` (8 subsequent siblings)
  17 siblings, 0 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-11  0:52 UTC (permalink / raw)
  To: netdev

Also: use netdev_alloc_skb_ip_align() for readability.

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
---
 drivers/net/octeon/octeon_mgmt.c |    9 ++++-----
 1 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/net/octeon/octeon_mgmt.c b/drivers/net/octeon/octeon_mgmt.c
index 429e08c..dd4a57a 100644
--- a/drivers/net/octeon/octeon_mgmt.c
+++ b/drivers/net/octeon/octeon_mgmt.c
@@ -152,16 +152,15 @@ static void octeon_mgmt_rx_fill_ring(struct net_device *netdev)
 		struct sk_buff *skb;
 
 		/* CN56XX pass 1 needs 8 bytes of padding.  */
-		size = netdev->mtu + OCTEON_MGMT_RX_HEADROOM + 8 + NET_IP_ALIGN;
+		size = netdev->mtu + OCTEON_MGMT_RX_HEADROOM + 8;
 
-		skb = netdev_alloc_skb(netdev, size);
+		skb = netdev_alloc_skb_ip_align(netdev, size);
 		if (!skb)
 			break;
-		skb_reserve(skb, NET_IP_ALIGN);
 		__skb_queue_tail(&p->rx_list, skb);
 
 		re.d64 = 0;
-		re.s.len = size;
+		re.s.len = size = skb_tailroom(skb);
 		re.s.addr = dma_map_single(p->dev, skb->data,
 					   size,
 					   DMA_FROM_DEVICE);
@@ -297,7 +296,7 @@ static u64 octeon_mgmt_dequeue_rx_buffer(struct octeon_mgmt *p,
 	*pskb = __skb_dequeue(&p->rx_list);
 
 	dma_unmap_single(p->dev, re.s.addr,
-			 ETH_FRAME_LEN + OCTEON_MGMT_RX_HEADROOM,
+			 skb_tailroom(*pskb),
 			 DMA_FROM_DEVICE);
 
 	return re.d64;
-- 
1.7.5.4


^ permalink raw reply related	[flat|nested] 76+ messages in thread

* [PATCH v2 13/46] net: sunbmac: cleanup RX skb allocation
  2011-07-11  0:52 [PATCH v2 00/46] Clean up RX copybreak and DMA handling Michał Mirosław
                   ` (10 preceding siblings ...)
  2011-07-11  0:52 ` [PATCH v2 12/46] net: sunhme: cleanup RX skb allocation Michał Mirosław
@ 2011-07-11  0:52 ` Michał Mirosław
  2011-07-11  0:52 ` [PATCH v2 11/46] net: sungem: " Michał Mirosław
                   ` (5 subsequent siblings)
  17 siblings, 0 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-11  0:52 UTC (permalink / raw)
  To: netdev

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
---
 drivers/net/sunbmac.c |   22 +++++++++++-----------
 drivers/net/sunbmac.h |   18 +-----------------
 2 files changed, 12 insertions(+), 28 deletions(-)

diff --git a/drivers/net/sunbmac.c b/drivers/net/sunbmac.c
index 297a424..e28d3ea 100644
--- a/drivers/net/sunbmac.c
+++ b/drivers/net/sunbmac.c
@@ -228,15 +228,15 @@ static void bigmac_init_rings(struct bigmac *bp, int from_irq)
 	for (i = 0; i < RX_RING_SIZE; i++) {
 		struct sk_buff *skb;
 
-		skb = big_mac_alloc_skb(RX_BUF_ALLOC_SIZE, gfp_flags);
+		skb = __netdev_alloc_skb_aligned(dev,
+						 RX_BUF_ALLOC_SIZE,
+						 SUNBMAC_RX_ALIGNMENT,
+						 gfp_flags);
 		if (!skb)
 			continue;
 
 		bp->rx_skbs[i] = skb;
-		skb->dev = dev;
 
-		/* Because we reserve afterwards. */
-		skb_put(skb, ETH_FRAME_LEN);
 		skb_reserve(skb, 34);
 
 		bb->be_rxd[i].rx_addr =
@@ -828,7 +828,10 @@ static void bigmac_rx(struct bigmac *bp)
 			struct sk_buff *new_skb;
 
 			/* Now refill the entry, if we can. */
-			new_skb = big_mac_alloc_skb(RX_BUF_ALLOC_SIZE, GFP_ATOMIC);
+			new_skb = __netdev_alloc_skb_aligned(dev,
+							     RX_BUF_ALLOC_SIZE,
+							     SUNBMAC_RX_ALIGNMENT,
+							     GFP_ATOMIC);
 			if (new_skb == NULL) {
 				drops++;
 				goto drop_it;
@@ -838,8 +841,6 @@ static void bigmac_rx(struct bigmac *bp)
 					 RX_BUF_ALLOC_SIZE - 34,
 					 DMA_FROM_DEVICE);
 			bp->rx_skbs[elem] = new_skb;
-			new_skb->dev = bp->dev;
-			skb_put(new_skb, ETH_FRAME_LEN);
 			skb_reserve(new_skb, 34);
 			this->rx_addr =
 				dma_map_single(&bp->bigmac_op->dev,
@@ -849,16 +850,15 @@ static void bigmac_rx(struct bigmac *bp)
 			this->rx_flags =
 				(RXD_OWN | ((RX_BUF_ALLOC_SIZE - 34) & RXD_LENGTH));
 
-			/* Trim the original skb for the netif. */
-			skb_trim(skb, len);
+			skb_put(skb, len);
 		} else {
-			struct sk_buff *copy_skb = dev_alloc_skb(len + 2);
+			struct sk_buff *copy_skb =
+				netdev_alloc_skb_ip_align(dev, len);
 
 			if (copy_skb == NULL) {
 				drops++;
 				goto drop_it;
 			}
-			skb_reserve(copy_skb, 2);
 			skb_put(copy_skb, len);
 			dma_sync_single_for_cpu(&bp->bigmac_op->dev,
 						this->rx_addr, len,
diff --git a/drivers/net/sunbmac.h b/drivers/net/sunbmac.h
index 4943e97..63dab2f 100644
--- a/drivers/net/sunbmac.h
+++ b/drivers/net/sunbmac.h
@@ -334,22 +334,6 @@ struct bigmac {
 	struct net_device	*dev;
 };
 
-/* We use this to acquire receive skb's that we can DMA directly into. */
-#define ALIGNED_RX_SKB_ADDR(addr) \
-        ((((unsigned long)(addr) + (64 - 1)) & ~(64 - 1)) - (unsigned long)(addr))
-
-static inline struct sk_buff *big_mac_alloc_skb(unsigned int length, gfp_t gfp_flags)
-{
-	struct sk_buff *skb;
-
-	skb = alloc_skb(length + 64, gfp_flags);
-	if(skb) {
-		int offset = ALIGNED_RX_SKB_ADDR(skb->data);
-
-		if(offset)
-			skb_reserve(skb, offset);
-	}
-	return skb;
-}
+#define SUNBMAC_RX_ALIGNMENT 64
 
 #endif /* !(_SUNBMAC_H) */
-- 
1.7.5.4


^ permalink raw reply related	[flat|nested] 76+ messages in thread

* [PATCH v2 11/46] net: sungem: cleanup RX skb allocation
  2011-07-11  0:52 [PATCH v2 00/46] Clean up RX copybreak and DMA handling Michał Mirosław
                   ` (11 preceding siblings ...)
  2011-07-11  0:52 ` [PATCH v2 13/46] net: sunbmac: " Michał Mirosław
@ 2011-07-11  0:52 ` Michał Mirosław
  2011-07-11  0:52 ` [PATCH v2 16/46] net: cxgb3: don't drop packets on memory pressure in driver Michał Mirosław
                   ` (4 subsequent siblings)
  17 siblings, 0 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-11  0:52 UTC (permalink / raw)
  To: netdev

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
---
 drivers/net/sungem.c |   34 +++++++++++-----------------------
 drivers/net/sungem.h |    4 +++-
 2 files changed, 14 insertions(+), 24 deletions(-)

diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index ade35dd..e82617f 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -743,21 +743,6 @@ static __inline__ void gem_post_rxds(struct gem *gp, int limit)
 	}
 }
 
-#define ALIGNED_RX_SKB_ADDR(addr) \
-        ((((unsigned long)(addr) + (64UL - 1UL)) & ~(64UL - 1UL)) - (unsigned long)(addr))
-static __inline__ struct sk_buff *gem_alloc_skb(struct net_device *dev, int size,
-						gfp_t gfp_flags)
-{
-	struct sk_buff *skb = alloc_skb(size + 64, gfp_flags);
-
-	if (likely(skb)) {
-		unsigned long offset = ALIGNED_RX_SKB_ADDR(skb->data);
-		skb_reserve(skb, offset);
-		skb->dev = dev;
-	}
-	return skb;
-}
-
 static int gem_rx(struct gem *gp, int work_to_do)
 {
 	struct net_device *dev = gp->dev;
@@ -821,7 +806,10 @@ static int gem_rx(struct gem *gp, int work_to_do)
 		if (len > RX_COPY_THRESHOLD) {
 			struct sk_buff *new_skb;
 
-			new_skb = gem_alloc_skb(dev, RX_BUF_ALLOC_SIZE(gp), GFP_ATOMIC);
+			new_skb = __netdev_alloc_skb_aligned(dev,
+							     RX_BUF_ALLOC_SIZE(gp),
+							     SUNGEM_RX_ALIGNMENT,
+							     GFP_ATOMIC);
 			if (new_skb == NULL) {
 				drops++;
 				goto drop_it;
@@ -830,7 +818,6 @@ static int gem_rx(struct gem *gp, int work_to_do)
 				       RX_BUF_ALLOC_SIZE(gp),
 				       PCI_DMA_FROMDEVICE);
 			gp->rx_skbs[entry] = new_skb;
-			skb_put(new_skb, (gp->rx_buf_sz + RX_OFFSET));
 			rxd->buffer = cpu_to_le64(pci_map_page(gp->pdev,
 							       virt_to_page(new_skb->data),
 							       offset_in_page(new_skb->data),
@@ -838,17 +825,16 @@ static int gem_rx(struct gem *gp, int work_to_do)
 							       PCI_DMA_FROMDEVICE));
 			skb_reserve(new_skb, RX_OFFSET);
 
-			/* Trim the original skb for the netif. */
-			skb_trim(skb, len);
+			skb_put(skb, len);
 		} else {
-			struct sk_buff *copy_skb = netdev_alloc_skb(dev, len + 2);
+			struct sk_buff *copy_skb =
+				netdev_alloc_skb_ip_align(dev, len);
 
 			if (copy_skb == NULL) {
 				drops++;
 				goto drop_it;
 			}
 
-			skb_reserve(copy_skb, 2);
 			skb_put(copy_skb, len);
 			pci_dma_sync_single_for_cpu(gp->pdev, dma_addr, len, PCI_DMA_FROMDEVICE);
 			skb_copy_from_linear_data(skb, copy_skb->data, len);
@@ -1637,7 +1623,10 @@ static void gem_init_rings(struct gem *gp)
 		struct sk_buff *skb;
 		struct gem_rxd *rxd = &gb->rxd[i];
 
-		skb = gem_alloc_skb(dev, RX_BUF_ALLOC_SIZE(gp), GFP_KERNEL);
+		skb = __netdev_alloc_skb_aligned(dev,
+						 RX_BUF_ALLOC_SIZE(gp),
+						 SUNGEM_RX_ALIGNMENT,
+						 GFP_KERNEL);
 		if (!skb) {
 			rxd->buffer = 0;
 			rxd->status_word = 0;
@@ -1645,7 +1634,6 @@ static void gem_init_rings(struct gem *gp)
 		}
 
 		gp->rx_skbs[i] = skb;
-		skb_put(skb, (gp->rx_buf_sz + RX_OFFSET));
 		dma_addr = pci_map_page(gp->pdev,
 					virt_to_page(skb->data),
 					offset_in_page(skb->data),
diff --git a/drivers/net/sungem.h b/drivers/net/sungem.h
index 835ce1b..0d486ce 100644
--- a/drivers/net/sungem.h
+++ b/drivers/net/sungem.h
@@ -935,7 +935,9 @@ struct gem_rxd {
 	  (GP)->tx_old - (GP)->tx_new - 1)
 
 #define RX_OFFSET          2
-#define RX_BUF_ALLOC_SIZE(gp)	((gp)->rx_buf_sz + 28 + RX_OFFSET + 64)
+#define SUNGEM_RX_ALIGNMENT 64		/* min: cache line size, see comment above */
+#define RX_BUF_ALLOC_SIZE(gp)	\
+	ALIGN((gp)->rx_buf_sz + RX_OFFSET, SUNGEM_RX_ALIGNMENT)
 
 #define RX_COPY_THRESHOLD  256
 
-- 
1.7.5.4


^ permalink raw reply related	[flat|nested] 76+ messages in thread

* [PATCH v2 12/46] net: sunhme: cleanup RX skb allocation
  2011-07-11  0:52 [PATCH v2 00/46] Clean up RX copybreak and DMA handling Michał Mirosław
                   ` (9 preceding siblings ...)
  2011-07-11  0:52 ` [PATCH v2 10/46] net: jme: convert to generic DMA API Michał Mirosław
@ 2011-07-11  0:52 ` Michał Mirosław
  2011-07-11  0:52 ` [PATCH v2 13/46] net: sunbmac: " Michał Mirosław
                   ` (6 subsequent siblings)
  17 siblings, 0 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-11  0:52 UTC (permalink / raw)
  To: netdev

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
---
 drivers/net/sunhme.c |   18 ++++++++----------
 drivers/net/sunhme.h |   14 +-------------
 2 files changed, 9 insertions(+), 23 deletions(-)

diff --git a/drivers/net/sunhme.c b/drivers/net/sunhme.c
index 856e05b..c73fdad 100644
--- a/drivers/net/sunhme.c
+++ b/drivers/net/sunhme.c
@@ -1265,7 +1265,8 @@ static void happy_meal_init_rings(struct happy_meal *hp)
 	for (i = 0; i < RX_RING_SIZE; i++) {
 		struct sk_buff *skb;
 
-		skb = happy_meal_alloc_skb(RX_BUF_ALLOC_SIZE, GFP_ATOMIC);
+		skb = __netdev_alloc_skb_aligned(dev, RX_BUF_ALLOC_SIZE,
+			SUNHME_RX_ALIGNMENT, GFP_ATOMIC);
 		if (!skb) {
 			hme_write_rxd(hp, &hb->happy_meal_rxd[i], 0, 0);
 			continue;
@@ -1273,8 +1274,6 @@ static void happy_meal_init_rings(struct happy_meal *hp)
 		hp->rx_skbs[i] = skb;
 		skb->dev = dev;
 
-		/* Because we reserve afterwards. */
-		skb_put(skb, (ETH_FRAME_LEN + RX_OFFSET + 4));
 		hme_write_rxd(hp, &hb->happy_meal_rxd[i],
 			      (RXFLAG_OWN | ((RX_BUF_ALLOC_SIZE - RX_OFFSET) << 16)),
 			      dma_map_single(hp->dma_dev, skb->data, RX_BUF_ALLOC_SIZE,
@@ -2025,32 +2024,31 @@ static void happy_meal_rx(struct happy_meal *hp, struct net_device *dev)
 			struct sk_buff *new_skb;
 
 			/* Now refill the entry, if we can. */
-			new_skb = happy_meal_alloc_skb(RX_BUF_ALLOC_SIZE, GFP_ATOMIC);
+			new_skb = __netdev_alloc_skb_aligned(dev,
+							     RX_BUF_ALLOC_SIZE,
+							     SUNHME_RX_ALIGNMENT,
+							     GFP_ATOMIC);
 			if (new_skb == NULL) {
 				drops++;
 				goto drop_it;
 			}
 			dma_unmap_single(hp->dma_dev, dma_addr, RX_BUF_ALLOC_SIZE, DMA_FROM_DEVICE);
 			hp->rx_skbs[elem] = new_skb;
-			new_skb->dev = dev;
-			skb_put(new_skb, (ETH_FRAME_LEN + RX_OFFSET + 4));
 			hme_write_rxd(hp, this,
 				      (RXFLAG_OWN|((RX_BUF_ALLOC_SIZE-RX_OFFSET)<<16)),
 				      dma_map_single(hp->dma_dev, new_skb->data, RX_BUF_ALLOC_SIZE,
 						     DMA_FROM_DEVICE));
 			skb_reserve(new_skb, RX_OFFSET);
 
-			/* Trim the original skb for the netif. */
-			skb_trim(skb, len);
+			skb_put(skb, len);
 		} else {
-			struct sk_buff *copy_skb = dev_alloc_skb(len + 2);
+			struct sk_buff *copy_skb = netdev_alloc_skb_ip_align(dev, len);
 
 			if (copy_skb == NULL) {
 				drops++;
 				goto drop_it;
 			}
 
-			skb_reserve(copy_skb, 2);
 			skb_put(copy_skb, len);
 			dma_sync_single_for_cpu(hp->dma_dev, dma_addr, len, DMA_FROM_DEVICE);
 			skb_copy_from_linear_data(skb, copy_skb->data, len);
diff --git a/drivers/net/sunhme.h b/drivers/net/sunhme.h
index 64f2783..f584eb0 100644
--- a/drivers/net/sunhme.h
+++ b/drivers/net/sunhme.h
@@ -495,18 +495,6 @@ struct quattro {
 	int			  nranges;
 };
 
-/* We use this to acquire receive skb's that we can DMA directly into. */
-#define ALIGNED_RX_SKB_ADDR(addr) \
-        ((((unsigned long)(addr) + (64UL - 1UL)) & ~(64UL - 1UL)) - (unsigned long)(addr))
-#define happy_meal_alloc_skb(__length, __gfp_flags) \
-({	struct sk_buff *__skb; \
-	__skb = alloc_skb((__length) + 64, (__gfp_flags)); \
-	if(__skb) { \
-		int __offset = (int) ALIGNED_RX_SKB_ADDR(__skb->data); \
-		if(__offset) \
-			skb_reserve(__skb, __offset); \
-	} \
-	__skb; \
-})
+#define SUNHME_RX_ALIGNMENT 64
 
 #endif /* !(_SUNHME_H) */
-- 
1.7.5.4


^ permalink raw reply related	[flat|nested] 76+ messages in thread

* [PATCH v2 15/46] net/wireless: b43: use kfree_skb() for untouched skbs
  2011-07-11  0:52 [PATCH v2 00/46] Clean up RX copybreak and DMA handling Michał Mirosław
                   ` (15 preceding siblings ...)
  2011-07-11  0:52 ` [PATCH v2 46/46] net: mark drivers that drop packets from rx queue head under memory pressure Michał Mirosław
@ 2011-07-11  0:52 ` Michał Mirosław
  2011-07-11  6:54 ` [PATCH v2 00/46] Clean up RX copybreak and DMA handling David Miller
  17 siblings, 0 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-11  0:52 UTC (permalink / raw)
  To: netdev; +Cc: Stefano Brivio, John W. Linville, linux-wireless

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
---
 drivers/net/wireless/b43/dma.c |    6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/b43/dma.c b/drivers/net/wireless/b43/dma.c
index 15b11f0..ed78f14 100644
--- a/drivers/net/wireless/b43/dma.c
+++ b/drivers/net/wireless/b43/dma.c
@@ -592,7 +592,7 @@ static int setup_rx_descbuffer(struct b43_dmaring *ring,
 		/* ugh. try to realloc in zone_dma */
 		gfp_flags |= GFP_DMA;
 
-		dev_kfree_skb_any(skb);
+		kfree_skb(skb);
 
 		skb = __dev_alloc_skb(ring->rx_buffersize, gfp_flags);
 		if (unlikely(!skb))
@@ -602,7 +602,7 @@ static int setup_rx_descbuffer(struct b43_dmaring *ring,
 					 ring->rx_buffersize, 0);
 		if (b43_dma_mapping_error(ring, dmaaddr, ring->rx_buffersize, 0)) {
 			b43err(ring->dev->wl, "RX DMA buffer allocation failed\n");
-			dev_kfree_skb_any(skb);
+			kfree_skb(skb);
 			return -EIO;
 		}
 	}
@@ -645,7 +645,7 @@ static int alloc_initial_descbuffers(struct b43_dmaring *ring)
 		desc = ring->ops->idx2desc(ring, i, &meta);
 
 		unmap_descbuffer(ring, meta->dmaaddr, ring->rx_buffersize, 0);
-		dev_kfree_skb(meta->skb);
+		kfree_skb(meta->skb);
 	}
 	goto out;
 }
-- 
1.7.5.4


^ permalink raw reply related	[flat|nested] 76+ messages in thread

* [PATCH v2 16/46] net: cxgb3: don't drop packets on memory pressure in driver
  2011-07-11  0:52 [PATCH v2 00/46] Clean up RX copybreak and DMA handling Michał Mirosław
                   ` (12 preceding siblings ...)
  2011-07-11  0:52 ` [PATCH v2 11/46] net: sungem: " Michał Mirosław
@ 2011-07-11  0:52 ` Michał Mirosław
  2011-07-11  0:52 ` [PATCH v2 14/46] net: sunbmac: cleanup magic '34' Michał Mirosław
                   ` (3 subsequent siblings)
  17 siblings, 0 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-11  0:52 UTC (permalink / raw)
  To: netdev; +Cc: Divy Le Ray

Dropping received packets should be left to upper layers.

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
---
 drivers/net/cxgb3/sge.c |    8 ++------
 1 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/net/cxgb3/sge.c b/drivers/net/cxgb3/sge.c
index 3196fdd..d322d3e 100644
--- a/drivers/net/cxgb3/sge.c
+++ b/drivers/net/cxgb3/sge.c
@@ -66,7 +66,6 @@
 #define FL1_PG_ORDER (PAGE_SIZE > 8192 ? 0 : 1)
 #define FL1_PG_ALLOC_SIZE (PAGE_SIZE << FL1_PG_ORDER)
 
-#define SGE_RX_DROP_THRES 16
 #define RX_RECLAIM_PERIOD (HZ/4)
 
 /*
@@ -2338,13 +2337,10 @@ no_mem:
 				}
 
 				skb = get_packet_pg(adap, fl, q,
-						    G_RSPD_LEN(len),
-						    eth ?
-						    SGE_RX_DROP_THRES : 0);
+						    G_RSPD_LEN(len), 0);
 				q->pg_skb = skb;
 			} else
-				skb = get_packet(adap, fl, G_RSPD_LEN(len),
-						 eth ? SGE_RX_DROP_THRES : 0);
+				skb = get_packet(adap, fl, G_RSPD_LEN(len), 0);
 			if (unlikely(!skb)) {
 				if (!eth)
 					goto no_mem;
-- 
1.7.5.4


^ permalink raw reply related	[flat|nested] 76+ messages in thread

* [PATCH v2 14/46] net: sunbmac: cleanup magic '34'
  2011-07-11  0:52 [PATCH v2 00/46] Clean up RX copybreak and DMA handling Michał Mirosław
                   ` (13 preceding siblings ...)
  2011-07-11  0:52 ` [PATCH v2 16/46] net: cxgb3: don't drop packets on memory pressure in driver Michał Mirosław
@ 2011-07-11  0:52 ` Michał Mirosław
  2011-07-11  0:52 ` [PATCH v2 46/46] net: mark drivers that drop packets from rx queue head under memory pressure Michał Mirosław
                   ` (2 subsequent siblings)
  17 siblings, 0 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-11  0:52 UTC (permalink / raw)
  To: netdev

Offset of 34 bytes (32+2) after aligning the skb->data to 64 looks
suspicious. Remove the alignment, and use NET_IP_ALIGN instead of the magic.

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
---
 drivers/net/sunbmac.c |   32 ++++++++++++++------------------
 1 files changed, 14 insertions(+), 18 deletions(-)

diff --git a/drivers/net/sunbmac.c b/drivers/net/sunbmac.c
index e28d3ea..efc5389 100644
--- a/drivers/net/sunbmac.c
+++ b/drivers/net/sunbmac.c
@@ -228,24 +228,21 @@ static void bigmac_init_rings(struct bigmac *bp, int from_irq)
 	for (i = 0; i < RX_RING_SIZE; i++) {
 		struct sk_buff *skb;
 
-		skb = __netdev_alloc_skb_aligned(dev,
-						 RX_BUF_ALLOC_SIZE,
-						 SUNBMAC_RX_ALIGNMENT,
-						 gfp_flags);
+		skb = __netdev_alloc_skb(dev, RX_BUF_ALLOC_SIZE,
+					 gfp_flags);
 		if (!skb)
 			continue;
 
 		bp->rx_skbs[i] = skb;
-
-		skb_reserve(skb, 34);
+		skb_reserve(skb, NET_IP_ALIGN);
 
 		bb->be_rxd[i].rx_addr =
 			dma_map_single(&bp->bigmac_op->dev,
 				       skb->data,
-				       RX_BUF_ALLOC_SIZE - 34,
+				       RX_BUF_ALLOC_SIZE - NET_IP_ALIGN,
 				       DMA_FROM_DEVICE);
 		bb->be_rxd[i].rx_flags =
-			(RXD_OWN | ((RX_BUF_ALLOC_SIZE - 34) & RXD_LENGTH));
+			(RXD_OWN | ((RX_BUF_ALLOC_SIZE - NET_IP_ALIGN) & RXD_LENGTH));
 	}
 
 	for (i = 0; i < TX_RING_SIZE; i++)
@@ -820,7 +817,7 @@ static void bigmac_rx(struct bigmac *bp)
 			/* Return it to the BigMAC. */
 			bp->enet_stats.rx_dropped++;
 			this->rx_flags =
-				(RXD_OWN | ((RX_BUF_ALLOC_SIZE - 34) & RXD_LENGTH));
+				(RXD_OWN | ((RX_BUF_ALLOC_SIZE - NET_IP_ALIGN) & RXD_LENGTH));
 			goto next;
 		}
 		skb = bp->rx_skbs[elem];
@@ -828,27 +825,26 @@ static void bigmac_rx(struct bigmac *bp)
 			struct sk_buff *new_skb;
 
 			/* Now refill the entry, if we can. */
-			new_skb = __netdev_alloc_skb_aligned(dev,
-							     RX_BUF_ALLOC_SIZE,
-							     SUNBMAC_RX_ALIGNMENT,
-							     GFP_ATOMIC);
+			new_skb = __netdev_alloc_skb(dev,
+						     RX_BUF_ALLOC_SIZE,
+						     GFP_ATOMIC);
 			if (new_skb == NULL) {
 				drops++;
 				goto drop_it;
 			}
 			dma_unmap_single(&bp->bigmac_op->dev,
 					 this->rx_addr,
-					 RX_BUF_ALLOC_SIZE - 34,
+					 RX_BUF_ALLOC_SIZE - NET_IP_ALIGN,
 					 DMA_FROM_DEVICE);
 			bp->rx_skbs[elem] = new_skb;
-			skb_reserve(new_skb, 34);
+			skb_reserve(new_skb, NET_IP_ALIGN);
 			this->rx_addr =
 				dma_map_single(&bp->bigmac_op->dev,
 					       new_skb->data,
-					       RX_BUF_ALLOC_SIZE - 34,
+					       RX_BUF_ALLOC_SIZE - NET_IP_ALIGN,
 					       DMA_FROM_DEVICE);
 			this->rx_flags =
-				(RXD_OWN | ((RX_BUF_ALLOC_SIZE - 34) & RXD_LENGTH));
+				(RXD_OWN | ((RX_BUF_ALLOC_SIZE - NET_IP_ALIGN) & RXD_LENGTH));
 
 			skb_put(skb, len);
 		} else {
@@ -870,7 +866,7 @@ static void bigmac_rx(struct bigmac *bp)
 
 			/* Reuse original ring buffer. */
 			this->rx_flags =
-				(RXD_OWN | ((RX_BUF_ALLOC_SIZE - 34) & RXD_LENGTH));
+				(RXD_OWN | ((RX_BUF_ALLOC_SIZE - NET_IP_ALIGN) & RXD_LENGTH));
 
 			skb = copy_skb;
 		}
-- 
1.7.5.4


^ permalink raw reply related	[flat|nested] 76+ messages in thread

* [PATCH v2 46/46] net: mark drivers that drop packets from rx queue head under memory pressure
  2011-07-11  0:52 [PATCH v2 00/46] Clean up RX copybreak and DMA handling Michał Mirosław
                   ` (14 preceding siblings ...)
  2011-07-11  0:52 ` [PATCH v2 14/46] net: sunbmac: cleanup magic '34' Michał Mirosław
@ 2011-07-11  0:52 ` Michał Mirosław
  2011-07-11  5:40   ` Francois Romieu
                     ` (2 more replies)
  2011-07-11  0:52 ` [PATCH v2 15/46] net/wireless: b43: use kfree_skb() for untouched skbs Michał Mirosław
  2011-07-11  6:54 ` [PATCH v2 00/46] Clean up RX copybreak and DMA handling David Miller
  17 siblings, 3 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-11  0:52 UTC (permalink / raw)
  To: netdev
  Cc: Hartley Sweeten, Michael Chan, Eilon Greenstein, Guo-Fu Tseng,
	Realtek linux nic maintainers, Francois Romieu,
	Stephen Hemminger, Matt Carlson, Jon Mason

Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
---
 drivers/net/arm/ep93xx_eth.c    |    3 +++
 drivers/net/bnx2.c              |    3 +++
 drivers/net/bnx2x/bnx2x_cmn.c   |    3 +++
 drivers/net/cassini.c           |    3 +++
 drivers/net/jme.c               |    3 +++
 drivers/net/mlx4/en_rx.c        |    6 ++++++
 drivers/net/r8169.c             |    3 +++
 drivers/net/skge.c              |    3 +++
 drivers/net/sky2.c              |    2 ++
 drivers/net/tg3.c               |    2 ++
 drivers/net/tokenring/olympic.c |    2 ++
 drivers/net/vxge/vxge-main.c    |    3 +++
 12 files changed, 36 insertions(+), 0 deletions(-)

diff --git a/drivers/net/arm/ep93xx_eth.c b/drivers/net/arm/ep93xx_eth.c
index ba3bf43..55a42c0 100644
--- a/drivers/net/arm/ep93xx_eth.c
+++ b/drivers/net/arm/ep93xx_eth.c
@@ -282,6 +282,9 @@ static int ep93xx_rx(struct net_device *dev, int processed, int budget)
 		if (rstat0 & RSTAT0_CRCI)
 			length -= 4;
 
+#warning drops packets from rx queue head on memory pressure
+#warning (like dev_skb_finish_rx_dma_refill() users)
+
 		skb = dev_alloc_skb(length + 2);
 		if (likely(skb != NULL)) {
 			struct ep93xx_rdesc *rxd = &ep->descs->rdesc[entry];
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index d627886..14f9a5f 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -2952,6 +2952,9 @@ bnx2_rx_skb(struct bnx2 *bp, struct bnx2_rx_ring_info *rxr, struct sk_buff *skb,
 	int err;
 	u16 prod = ring_idx & 0xffff;
 
+#warning drops packets from rx queue head on memory pressure
+#warning (like dev_skb_finish_rx_dma_refill() users)
+
 	err = bnx2_alloc_rx_skb(bp, rxr, prod, GFP_ATOMIC);
 	if (unlikely(err)) {
 		bnx2_reuse_rx_skb(bp, rxr, skb, (u16) (ring_idx >> 16), prod);
diff --git a/drivers/net/bnx2x/bnx2x_cmn.c b/drivers/net/bnx2x/bnx2x_cmn.c
index 4f9164c..a6da01a 100644
--- a/drivers/net/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/bnx2x/bnx2x_cmn.c
@@ -673,6 +673,9 @@ int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
 				goto reuse_rx;
 			}
 
+#warning drops packets from rx queue head on memory pressure
+#warning (like dev_skb_finish_rx_dma_refill() users)
+
 			/* Since we don't have a jumbo ring
 			 * copy small packets if mtu > 1500
 			 */
diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c
index 788ab13..a05a490 100644
--- a/drivers/net/cassini.c
+++ b/drivers/net/cassini.c
@@ -1975,6 +1975,9 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc,
 	else
 		alloclen = max(hlen, RX_COPY_MIN);
 
+#warning drops packets from rx queue head on memory pressure
+#warning (like dev_skb_finish_rx_dma_refill() users)
+
 	skb = dev_alloc_skb(alloclen + swivel + cp->crc_size);
 	if (skb == NULL)
 		return -1;
diff --git a/drivers/net/jme.c b/drivers/net/jme.c
index ad69dae..e9ac9bd 100644
--- a/drivers/net/jme.c
+++ b/drivers/net/jme.c
@@ -1022,6 +1022,9 @@ jme_alloc_and_feed_skb(struct jme_adapter *jme, int idx)
 	dma_unmap_single(&jme->pdev->dev, rxbi->mapping, rxbi->len,
 			 DMA_FROM_DEVICE);
 
+#warning drops packets from rx queue head on memory pressure
+#warning (like dev_skb_finish_rx_dma_refill() users)
+
 	if (unlikely(jme_make_new_rx_buf(jme, idx))) {
 		rxbi->mapping = dma_map_single(&jme->pdev->dev, skb->data,
 						rxbi->len, DMA_FROM_DEVICE);
diff --git a/drivers/net/mlx4/en_rx.c b/drivers/net/mlx4/en_rx.c
index ee15295..e2baa3f 100644
--- a/drivers/net/mlx4/en_rx.c
+++ b/drivers/net/mlx4/en_rx.c
@@ -413,6 +413,9 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
 	int nr;
 	dma_addr_t dma;
 
+#warning drops packets from rx queue head on memory pressure
+#warning (like dev_skb_finish_rx_dma_refill() users)
+
 	/* Collect used fragments while replacing them in the HW descirptors */
 	for (nr = 0; nr < priv->num_frags; nr++) {
 		frag_info = &priv->frag_info[nr];
@@ -462,6 +465,9 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv,
 	int used_frags;
 	dma_addr_t dma;
 
+#warning drops packets from rx queue head on memory pressure
+#warning (like dev_skb_finish_rx_dma_refill() users)
+
 	skb = dev_alloc_skb(SMALL_PACKET_SIZE + NET_IP_ALIGN);
 	if (!skb) {
 		en_dbg(RX_ERR, priv, "Failed allocating skb\n");
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index e2c2884..ce4bdaf 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -4999,6 +4999,9 @@ static struct sk_buff *rtl8169_try_rx_copy(void *data,
 	struct sk_buff *skb;
 	struct device *d = &tp->pci_dev->dev;
 
+#warning drops packets from rx queue head on memory pressure
+#warning (like dev_skb_finish_rx_dma_refill() users)
+
 	data = rtl8169_align(data);
 	dma_sync_single_for_cpu(d, addr, pkt_size, DMA_FROM_DEVICE);
 	prefetch(data);
diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index 11e5229..79bf015 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -3022,6 +3022,9 @@ static struct sk_buff *skge_rx_get(struct net_device *dev,
 	if (phy_length(skge->hw, status) != len)
 		goto error;
 
+#warning drops packets from rx queue head on memory pressure
+#warning (like dev_skb_finish_rx_dma_refill() users)
+
 	if (len < RX_COPY_THRESHOLD) {
 		skb = netdev_alloc_skb_ip_align(dev, len);
 		if (!skb)
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index 5f720b9..aaf56e4 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -2540,6 +2540,8 @@ okay:
 		skb = receive_copy(sky2, re, length);
 	else
 		skb = receive_new(sky2, re, length);
+#warning drops packets from rx queue head on memory pressure
+#warning (like dev_skb_finish_rx_dma_refill() users)
 
 	dev->stats.rx_dropped += (skb == NULL);
 
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index b43d473..42e0d31 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -4973,6 +4973,8 @@ static int tg3_rx(struct tg3_napi *tnapi, int budget)
 
 			skb_size = tg3_alloc_rx_skb(tp, tpr, opaque_key,
 						    *post_ptr);
+#warning drops packets from rx queue head on memory pressure
+#warning (like dev_skb_finish_rx_dma_refill() users)
 			if (skb_size < 0)
 				goto drop_it;
 
diff --git a/drivers/net/tokenring/olympic.c b/drivers/net/tokenring/olympic.c
index 30fb6e8..e0c3cca 100644
--- a/drivers/net/tokenring/olympic.c
+++ b/drivers/net/tokenring/olympic.c
@@ -797,6 +797,8 @@ static void olympic_rx(struct net_device *dev)
 				}
 
 				if (skb == NULL) {
+#warning drops packets from rx queue head on memory pressure
+#warning (like dev_skb_finish_rx_dma_refill() users)
 					printk(KERN_WARNING "%s: Not enough memory to copy packet to upper layers.\n",dev->name) ;
 					dev->stats.rx_dropped++;
 					/* Update counters even though we don't transfer the frame */
diff --git a/drivers/net/vxge/vxge-main.c b/drivers/net/vxge/vxge-main.c
index 00d435d..00a9003 100644
--- a/drivers/net/vxge/vxge-main.c
+++ b/drivers/net/vxge/vxge-main.c
@@ -414,6 +414,9 @@ vxge_rx_1b_compl(struct __vxge_hw_ring *ringh, void *dtr,
 			}
 		}
 
+#warning drops packets from rx queue head on memory pressure
+#warning (like dev_skb_finish_rx_dma_refill() users)
+
 		if (pkt_length > VXGE_LL_RX_COPY_THRESHOLD) {
 			if (vxge_rx_alloc(dtr, ring, data_size) != NULL) {
 				if (!vxge_rx_map(dtr, ring)) {
-- 
1.7.5.4


^ permalink raw reply related	[flat|nested] 76+ messages in thread

* Re: [PATCH v2 46/46] net: mark drivers that drop packets from rx queue head under memory pressure
  2011-07-11  0:52 ` [PATCH v2 46/46] net: mark drivers that drop packets from rx queue head under memory pressure Michał Mirosław
@ 2011-07-11  5:40   ` Francois Romieu
  2011-07-11  6:47   ` Eilon Greenstein
  2011-07-11 15:24   ` Stephen Hemminger
  2 siblings, 0 replies; 76+ messages in thread
From: Francois Romieu @ 2011-07-11  5:40 UTC (permalink / raw)
  To: Michał Mirosław; +Cc: netdev, Realtek linux nic maintainers

Michał Mirosław <mirq-linux@rere.qmqm.pl> :
[...]
> diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
> index e2c2884..ce4bdaf 100644
> --- a/drivers/net/r8169.c
> +++ b/drivers/net/r8169.c
> @@ -4999,6 +4999,9 @@ static struct sk_buff *rtl8169_try_rx_copy(void *data,
>  	struct sk_buff *skb;
>  	struct device *d = &tp->pci_dev->dev;
>  
> +#warning drops packets from rx queue head on memory pressure
> +#warning (like dev_skb_finish_rx_dma_refill() users)
> +
>  	data = rtl8169_align(data);
>  	dma_sync_single_for_cpu(d, addr, pkt_size, DMA_FROM_DEVICE);
>  	prefetch(data);

The commit messages explain why the driver works this way. I'd rather avoid the
noise and - especially - the dubious patches it may generate.

-- 
Ueimor

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [PATCH net-next-2.6] net: introduce build_skb()
  2011-07-11  0:52 ` [PATCH v2 01/46] net: introduce __netdev_alloc_skb_aligned() Michał Mirosław
@ 2011-07-11  5:46   ` Eric Dumazet
  2011-07-11 10:53     ` Michał Mirosław
  2011-07-12 15:40     ` Eric Dumazet
  0 siblings, 2 replies; 76+ messages in thread
From: Eric Dumazet @ 2011-07-11  5:46 UTC (permalink / raw)
  To: Michał Mirosław; +Cc: netdev

Le lundi 11 juillet 2011 à 02:52 +0200, Michał Mirosław a écrit :
> Introduce __netdev_alloc_skb_aligned() to return skb with skb->data
> aligned at specified 2^n multiple.
> 
> Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
> ---

Hi Michal


Could we synchronize our work to not introduce things that might
disappear shortly ?

Here is the RFC patch about build_skb() :

[PATCH] net: introduce build_skb()

One of the thing we discussed during netdev 2011 conference was the idea
to change network drivers to allocate/populate their skb at RX
completion time, right before feeding the skb to network stack.

Right now, we allocate skbs when populating the RX ring, and thats a
waste of CPU cache, since allocating skb means a full memset() to clear
the skb and its skb_shared_info portion. By the time NIC fills a frame
in data buffer and host can get it, cpu probably threw away the cache
lines from its caches, because of huge RX ring sizes.

So the deal would be to allocate only the data buffer for the NIC to
populate its RX ring buffer. And use build_skb() at RX completion to
attach a data buffer (now filled with an ethernet frame) to a new skb,
initialize the skb_shared_info portion, and give the hot skb to network
stack.

build_skb() is the function to allocate an skb, caller providing the
data buffer that should be attached to it. Drivers are expected to call 
skb_reserve() right after build_skb() to let skb->data points to the
Ethernet frame (usually skipping NET_SKB_PAD and NET_IP_ALIGN)


Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
---
 include/linux/skbuff.h |    1 
 net/core/skbuff.c      |   48 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 32ada53..5e903e7 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -507,6 +507,7 @@ static inline struct rtable *skb_rtable(const struct sk_buff *skb)
 extern void kfree_skb(struct sk_buff *skb);
 extern void consume_skb(struct sk_buff *skb);
 extern void	       __kfree_skb(struct sk_buff *skb);
+extern struct sk_buff *build_skb(void *data, unsigned int size);
 extern struct sk_buff *__alloc_skb(unsigned int size,
 				   gfp_t priority, int fclone, int node);
 static inline struct sk_buff *alloc_skb(unsigned int size,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index d220119..9193d7e 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -234,6 +234,54 @@ nodata:
 EXPORT_SYMBOL(__alloc_skb);
 
 /**
+ * build_skb - build a network buffer
+ * @data: data buffer provider by caller
+ * @size: size of data buffer, not including skb_shared_info
+ *
+ * Allocate a new &sk_buff. Caller provides space holding head and
+ * skb_shared_info. Mostly used in driver RX path.
+ * The return is the buffer. On a failure the return is %NULL.
+ * Notes :
+ *  Before IO, driver allocates only data buffer where NIC put incoming frame
+ *  Driver SHOULD add room at head (NET_SKB_PAD) and
+ *  MUST add room tail (to hold skb_shared_info)
+ *  After IO, driver calls build_skb(), to get a hot skb instead of a cold one
+ *  before giving packet to stack. RX rings only contains data buffers, not
+ *  full skbs.
+ */
+struct sk_buff *build_skb(void *data, unsigned int size)
+{
+	struct skb_shared_info *shinfo;
+	struct sk_buff *skb;
+
+	skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
+	if (!skb)
+		return NULL;
+
+	size = SKB_DATA_ALIGN(size);
+
+	memset(skb, 0, offsetof(struct sk_buff, tail));
+	skb->truesize = size + sizeof(struct sk_buff);
+	atomic_set(&skb->users, 1);
+	skb->head = data;
+	skb->data = data;
+	skb_reset_tail_pointer(skb);
+	skb->end = skb->tail + size;
+#ifdef NET_SKBUFF_DATA_USES_OFFSET
+	skb->mac_header = ~0U;
+#endif
+
+	/* make sure we initialize shinfo sequentially */
+	shinfo = skb_shinfo(skb);
+	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
+	atomic_set(&shinfo->dataref, 1);
+	kmemcheck_annotate_variable(shinfo->destructor_arg);
+
+	return skb;
+}
+EXPORT_SYMBOL(build_skb);
+
+/**
  *	__netdev_alloc_skb - allocate an skbuff for rx on a specific device
  *	@dev: network device to receive on
  *	@length: length to allocate



^ permalink raw reply related	[flat|nested] 76+ messages in thread

* Re: [PATCH v2 46/46] net: mark drivers that drop packets from rx queue head under memory pressure
  2011-07-11  0:52 ` [PATCH v2 46/46] net: mark drivers that drop packets from rx queue head under memory pressure Michał Mirosław
  2011-07-11  5:40   ` Francois Romieu
@ 2011-07-11  6:47   ` Eilon Greenstein
  2011-07-11 10:04     ` Michał Mirosław
  2011-07-11 15:24   ` Stephen Hemminger
  2 siblings, 1 reply; 76+ messages in thread
From: Eilon Greenstein @ 2011-07-11  6:47 UTC (permalink / raw)
  To: Michał Mirosław
  Cc: netdev, Hartley Sweeten, Michael Chan, Guo-Fu Tseng,
	Realtek linux nic maintainers, Francois Romieu,
	Stephen Hemminger, Matthew Carlson, Jon Mason

On Sun, 2011-07-10 at 17:52 -0700, Michał Mirosław wrote:
> Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
> ---

> diff --git a/drivers/net/bnx2x/bnx2x_cmn.c b/drivers/net/bnx2x/bnx2x_cmn.c
> index 4f9164c..a6da01a 100644
> --- a/drivers/net/bnx2x/bnx2x_cmn.c
> +++ b/drivers/net/bnx2x/bnx2x_cmn.c
> @@ -673,6 +673,9 @@ int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
>  				goto reuse_rx;
>  			}
>  
> +#warning drops packets from rx queue head on memory pressure
> +#warning (like dev_skb_finish_rx_dma_refill() users)
> +

We have the dropless_fc module parameter that can be configured if the
user prefers pausing on host memory pressure - the problem with that
feature is that it is enough that one of the ring runs out of memory and
the entire port is stopped. When running with 16 rings, this can lead to
serious throughput degradation - this is why it is kept as a user
configurable option.

>  			/* Since we don't have a jumbo ring
>  			 * copy small packets if mtu > 1500
>  			 */




^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2 00/46] Clean up RX copybreak and DMA handling
  2011-07-11  0:52 [PATCH v2 00/46] Clean up RX copybreak and DMA handling Michał Mirosław
                   ` (16 preceding siblings ...)
  2011-07-11  0:52 ` [PATCH v2 15/46] net/wireless: b43: use kfree_skb() for untouched skbs Michał Mirosław
@ 2011-07-11  6:54 ` David Miller
  2011-07-11  9:16   ` Michał Mirosław
  2011-07-11 12:36   ` Ben Hutchings
  17 siblings, 2 replies; 76+ messages in thread
From: David Miller @ 2011-07-11  6:54 UTC (permalink / raw)
  To: mirq-linux; +Cc: netdev

From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Mon, 11 Jul 2011 02:52:46 +0200 (CEST)

>   1. under packet storm and memory pressure NIC keeps generating interrupts
>      (if non-NAPI) and indicating new buffers because it always has free
>      RX buffers --- this only wastes CPU and bus bandwidth transferring
>      data that is going to be immediately discarded;

Actually, this is exactly how I, and others advise people to implement
drivers.  It is the right thing to do.

The worst thing that can happen is to let the RX ring empty of
buffers.  Some cards hang as a result of this, and also it causes head
of line blocking on multiqueue cards, etc.

So the first thing the driver should do is try to allocate a
replacement buffer.

And if that fails, it should give the RX packet right back to the
card, and not pass it up the stack.

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2 03/46] net drivers: remove unnecessary dma_sync_to_device(DMA_FROM_DEVICE)
  2011-07-11  0:52 ` [PATCH v2 03/46] net drivers: remove unnecessary dma_sync_to_device(DMA_FROM_DEVICE) Michał Mirosław
@ 2011-07-11  8:30   ` Vlad Zolotarov
  2011-07-11  9:29     ` Michał Mirosław
  0 siblings, 1 reply; 76+ messages in thread
From: Vlad Zolotarov @ 2011-07-11  8:30 UTC (permalink / raw)
  To: Michał Mirosław
  Cc: linux-wireless, Eilon Greenstein, Gary Zambrano,
	Stephen Hemminger, Stefano Brivio, e1000-devel, Matthew Carlson,
	Jesse Brandeburg, Francois Romieu, Realtek linux nic maintainers,
	John W. Linville, Ron Mercer, Michael Chan, Jitendra Kalsaria,
	Divy Le Ray, netdev, Bruce Allan, Hartley Sweeten, John Ronciak,
	Jon

>         prod_rx_buf->skb = skb;
> diff --git a/drivers/net/bnx2x/bnx2x_cmn.h b/drivers/net/bnx2x/bnx2x_cmn.h
> index c016e20..c9e49a0 100644
> --- a/drivers/net/bnx2x/bnx2x_cmn.h
> +++ b/drivers/net/bnx2x/bnx2x_cmn.h
> @@ -923,16 +923,11 @@ static inline int bnx2x_alloc_rx_skb(struct bnx2x
> *bp, static inline void bnx2x_reuse_rx_skb(struct bnx2x_fastpath *fp,
>                                       u16 cons, u16 prod)
>  {
> -       struct bnx2x *bp = fp->bp;
>         struct sw_rx_bd *cons_rx_buf = &fp->rx_buf_ring[cons];
>         struct sw_rx_bd *prod_rx_buf = &fp->rx_buf_ring[prod];
>         struct eth_rx_bd *cons_bd = &fp->rx_desc_ring[cons];
>         struct eth_rx_bd *prod_bd = &fp->rx_desc_ring[prod];
> 
> -       dma_sync_single_for_device(&bp->pdev->dev,
> -                                  dma_unmap_addr(cons_rx_buf, mapping),
> -                                  RX_COPY_THRESH, DMA_FROM_DEVICE);
> -
>         dma_unmap_addr_set(prod_rx_buf, mapping,
>                            dma_unmap_addr(cons_rx_buf, mapping));
>         prod_rx_buf->skb = cons_rx_buf->skb;

Michal, pls., note that this function is only called for buffers which were 
previously dma_synced towards CPU (your "[PATCH v2 05/46] net: bnx2x: fix DMA 
sync direction" properly fixes the direction of the first call which was 
incorrect). Then, according to the 3d edition of the "Linux device drivers" 
book, chapter 15, "Setting up streaming DMA mappings" article, end of the page 
449, when we call for dma_syc_single_for_cpu() the buffer ownership gets to 
the CPU and CPU may safely access the buffer (in particular, we read it). Then 
the author says: "Before the device accesses the buffer, however, ownership 
should be transfered back to it with: dma_sync_single_for_device().

The DMA-API.txt document u've referenced doesn't refer the above function, so, 
it's unclear how your fix may be based on it. On the other hand it clearly 
contradicts the "Linux device driver" book.

Pls., comment.

thanks,
vlad


------------------------------------------------------------------------------
All of the data generated in your IT infrastructure is seriously valuable.
Why? It contains a definitive record of application performance, security 
threats, fraudulent activity, and more. Splunk takes this data and makes 
sense of it. IT sense. And common sense.
http://p.sf.net/sfu/splunk-d2d-c2
_______________________________________________
E1000-devel mailing list
E1000-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/e1000-devel
To learn more about Intel&#174; Ethernet, visit http://communities.intel.com/community/wired

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2 00/46] Clean up RX copybreak and DMA handling
  2011-07-11  6:54 ` [PATCH v2 00/46] Clean up RX copybreak and DMA handling David Miller
@ 2011-07-11  9:16   ` Michał Mirosław
  2011-07-11  9:24     ` David Miller
  2011-07-11 12:36   ` Ben Hutchings
  1 sibling, 1 reply; 76+ messages in thread
From: Michał Mirosław @ 2011-07-11  9:16 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

On Sun, Jul 10, 2011 at 11:54:58PM -0700, David Miller wrote:
> From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
> Date: Mon, 11 Jul 2011 02:52:46 +0200 (CEST)
> 
> >   1. under packet storm and memory pressure NIC keeps generating interrupts
> >      (if non-NAPI) and indicating new buffers because it always has free
> >      RX buffers --- this only wastes CPU and bus bandwidth transferring
> >      data that is going to be immediately discarded;
> Actually, this is exactly how I, and others advise people to implement
> drivers.  It is the right thing to do.
> 
> The worst thing that can happen is to let the RX ring empty of
> buffers.  Some cards hang as a result of this, and also it causes head
> of line blocking on multiqueue cards, etc.
> 
> So the first thing the driver should do is try to allocate a
> replacement buffer.
> 
> And if that fails, it should give the RX packet right back to the
> card, and not pass it up the stack.

For now, lets ignore those badly broken cards which can't cope with
insufficient receive buffers. (BTW, are there that many of them?
Some examples, please?)

Lets compare the two cases (replacing buffers immediately vs replacing
later) under the hostile conditions. Keep in mind that the strategy
doesn't matter much when the buffers can be allocated right away --- the
discussion is about the corner case when memory runs out.

1. replacing buffers immediately

Packet is indicated in queue N, theres no memory for new skb, so its
dropped, and the buffer goes back to free list. In parallel, queue M
(!= N) indicates new packet. Still, there's no memory for new skb so
its also dropped and its buffer is reused. The effect is that all
packets are dropped, whatever queue they appear on.

2. replacing buffers later

Packet is indicated in queue N, its delivered up the stack. No new buffer
is available, so after a while queue stalls and the packets are dropped
by the card. If the queues share free buffer list, then all get stalled
at the same time, if not they run out independently. Net effect is the
same as above --- all packets are dropped.

The differences are:
 - where the packets are dropped:
   1. in driver core after transfer
   2. in the card
 - where accounting happens:
   1. in driver: rx_dropped
   2. in card: rx discards
 - memory usage:
   1. memory is held in empty rx ring buffers
   2. memory is held in packets waiting to be processed
 - CPU usage:
   1. >0% - queues are cleared repeatedly, card 'thinks' everything is ok
   2. 0% - queues are stalled, no more rx indications
 - hardware throttling (or pause frame generation):
   1. broken --- card always sees full free rx ring, so does not try to
      throttle (unless driver also indicates congestion to the card)
   2. hardware throttling is possible as the card sees only really free
      rx buffers

The HOL blocking does not matter here, because there's only one head ---
the system memory. If I misunderstood this point, please explain it further.

Scheme #1 has the potential use when combined with small emergency buffer
pool if the driver looks for specific packets or indications that come
in the same queue as other packets. These are rare cases, though.

Best Regards,
Michał Mirosław

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2 00/46] Clean up RX copybreak and DMA handling
  2011-07-11  9:16   ` Michał Mirosław
@ 2011-07-11  9:24     ` David Miller
  2011-07-11  9:47       ` Michał Mirosław
  0 siblings, 1 reply; 76+ messages in thread
From: David Miller @ 2011-07-11  9:24 UTC (permalink / raw)
  To: mirq-linux; +Cc: netdev

From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Mon, 11 Jul 2011 11:16:49 +0200

> Packet is indicated in queue N, theres no memory for new skb, so its
> dropped, and the buffer goes back to free list. In parallel, queue M
> (!= N) indicates new packet. Still, there's no memory for new skb so
> its also dropped and its buffer is reused. The effect is that all
> packets are dropped, whatever queue they appear on.

Why would queue M (!= N) fail just because N did?  They may be
allocating out of different NUMA nodes, and thus succeed.

> The HOL blocking does not matter here, because there's only one head
> --- the system memory. If I misunderstood this point, please explain
> it further.

Multiqueue drivers are moving towards placing the queues on different
NUMA nodes, and in that scenerio one queue might succeed even if the
other fails.

Back to the hardware hanging issue, it's real.  Getting into a
situation where the RX ring lacks any buffers at all is the least
tested path for these chips.

Testing fate is a really bad idea, and this is why I always propose to
keep the hardware with RX buffers to use in all circumstances.

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2 03/46] net drivers: remove unnecessary dma_sync_to_device(DMA_FROM_DEVICE)
  2011-07-11  8:30   ` Vlad Zolotarov
@ 2011-07-11  9:29     ` Michał Mirosław
  2011-07-11  9:46       ` Vlad Zolotarov
  0 siblings, 1 reply; 76+ messages in thread
From: Michał Mirosław @ 2011-07-11  9:29 UTC (permalink / raw)
  To: Vlad Zolotarov
  Cc: linux-wireless, Eilon Greenstein, Gary Zambrano,
	Stephen Hemminger, Stefano Brivio, e1000-devel, Matthew Carlson,
	Jesse Brandeburg, Francois Romieu, Realtek linux nic maintainers,
	John W. Linville, Ron Mercer, Michael Chan, Jitendra Kalsaria,
	Divy Le Ray, netdev, Bruce Allan, Hartley Sweeten, John Ronciak,
	Jon

On Mon, Jul 11, 2011 at 11:30:39AM +0300, Vlad Zolotarov wrote:
> >         prod_rx_buf->skb = skb;
> > diff --git a/drivers/net/bnx2x/bnx2x_cmn.h b/drivers/net/bnx2x/bnx2x_cmn.h
> > index c016e20..c9e49a0 100644
> > --- a/drivers/net/bnx2x/bnx2x_cmn.h
> > +++ b/drivers/net/bnx2x/bnx2x_cmn.h
> > @@ -923,16 +923,11 @@ static inline int bnx2x_alloc_rx_skb(struct bnx2x
> > *bp, static inline void bnx2x_reuse_rx_skb(struct bnx2x_fastpath *fp,
> >                                       u16 cons, u16 prod)
> >  {
> > -       struct bnx2x *bp = fp->bp;
> >         struct sw_rx_bd *cons_rx_buf = &fp->rx_buf_ring[cons];
> >         struct sw_rx_bd *prod_rx_buf = &fp->rx_buf_ring[prod];
> >         struct eth_rx_bd *cons_bd = &fp->rx_desc_ring[cons];
> >         struct eth_rx_bd *prod_bd = &fp->rx_desc_ring[prod];
> > 
> > -       dma_sync_single_for_device(&bp->pdev->dev,
> > -                                  dma_unmap_addr(cons_rx_buf, mapping),
> > -                                  RX_COPY_THRESH, DMA_FROM_DEVICE);
> > -
> >         dma_unmap_addr_set(prod_rx_buf, mapping,
> >                            dma_unmap_addr(cons_rx_buf, mapping));
> >         prod_rx_buf->skb = cons_rx_buf->skb;
> Michal, pls., note that this function is only called for buffers which were 
> previously dma_synced towards CPU (your "[PATCH v2 05/46] net: bnx2x: fix DMA 
> sync direction" properly fixes the direction of the first call which was 
> incorrect). Then, according to the 3d edition of the "Linux device drivers" 
> book, chapter 15, "Setting up streaming DMA mappings" article, end of the page 
> 449, when we call for dma_syc_single_for_cpu() the buffer ownership gets to 
> the CPU and CPU may safely access the buffer (in particular, we read it). Then 
> the author says: "Before the device accesses the buffer, however, ownership 
> should be transfered back to it with: dma_sync_single_for_device().
> 
> The DMA-API.txt document u've referenced doesn't refer the above function, so, 
> it's unclear how your fix may be based on it. On the other hand it clearly 
> contradicts the "Linux device driver" book.

DMA-API.txt describes what synchronization points are necessary for what DMA
mapping types (direction). dma_sync_single_for_cpu/device() are functions
realising those points. Note that example DMA-API-HOWTO.txt is misleading
as it has dma_sync_single_for_device() where its not required by DMA-API.txt.

In this case, you don't need to sync to device for mappings that haven't
been written to by CPU. CPU caches will be invalidated anyway by next
dma_sync_single_for_cpu() or dma_unmap_single() and the CPU should not
ever write to cachelines that belong to FROM_DEVICE mappings.

The best source is the code. I looked through random implementations of
dma_sync_*_to_*() and in to_device() cases these are CPU write buffer
flushes and bounce buffer copying to the mapping - both actions are useless
(and potentially harmful in the bounce-buffer case) when the mapping hasn't
been written to after sync_to_cpu().

Best Regards,
Michał Mirosław

------------------------------------------------------------------------------
All of the data generated in your IT infrastructure is seriously valuable.
Why? It contains a definitive record of application performance, security 
threats, fraudulent activity, and more. Splunk takes this data and makes 
sense of it. IT sense. And common sense.
http://p.sf.net/sfu/splunk-d2d-c2
_______________________________________________
E1000-devel mailing list
E1000-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/e1000-devel
To learn more about Intel&#174; Ethernet, visit http://communities.intel.com/community/wired

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2 03/46] net drivers: remove unnecessary dma_sync_to_device(DMA_FROM_DEVICE)
  2011-07-11  9:29     ` Michał Mirosław
@ 2011-07-11  9:46       ` Vlad Zolotarov
  0 siblings, 0 replies; 76+ messages in thread
From: Vlad Zolotarov @ 2011-07-11  9:46 UTC (permalink / raw)
  To: Michał Mirosław
  Cc: linux-wireless, Eilon Greenstein, Gary Zambrano,
	Stephen Hemminger, Stefano Brivio, e1000-devel, Matthew Carlson,
	Jesse Brandeburg, Francois Romieu, Realtek linux nic maintainers,
	John W. Linville, Ron Mercer, Michael Chan, Jitendra Kalsaria,
	Divy Le Ray, netdev, Bruce Allan, Hartley Sweeten, John Ronciak,
	Jon

On Monday 11 July 2011 12:29:09 Michał Mirosław wrote:
> On Mon, Jul 11, 2011 at 11:30:39AM +0300, Vlad Zolotarov wrote:
> > >         prod_rx_buf->skb = skb;
> > > 
> > > diff --git a/drivers/net/bnx2x/bnx2x_cmn.h
> > > b/drivers/net/bnx2x/bnx2x_cmn.h index c016e20..c9e49a0 100644
> > > --- a/drivers/net/bnx2x/bnx2x_cmn.h
> > > +++ b/drivers/net/bnx2x/bnx2x_cmn.h
> > > @@ -923,16 +923,11 @@ static inline int bnx2x_alloc_rx_skb(struct bnx2x
> > > *bp, static inline void bnx2x_reuse_rx_skb(struct bnx2x_fastpath *fp,
> > > 
> > >                                       u16 cons, u16 prod)
> > >  
> > >  {
> > > 
> > > -       struct bnx2x *bp = fp->bp;
> > > 
> > >         struct sw_rx_bd *cons_rx_buf = &fp->rx_buf_ring[cons];
> > >         struct sw_rx_bd *prod_rx_buf = &fp->rx_buf_ring[prod];
> > >         struct eth_rx_bd *cons_bd = &fp->rx_desc_ring[cons];
> > >         struct eth_rx_bd *prod_bd = &fp->rx_desc_ring[prod];
> > > 
> > > -       dma_sync_single_for_device(&bp->pdev->dev,
> > > -                                  dma_unmap_addr(cons_rx_buf,
> > > mapping), -                                  RX_COPY_THRESH,
> > > DMA_FROM_DEVICE); -
> > > 
> > >         dma_unmap_addr_set(prod_rx_buf, mapping,
> > >         
> > >                            dma_unmap_addr(cons_rx_buf, mapping));
> > >         
> > >         prod_rx_buf->skb = cons_rx_buf->skb;
> > 
> > Michal, pls., note that this function is only called for buffers which
> > were previously dma_synced towards CPU (your "[PATCH v2 05/46] net:
> > bnx2x: fix DMA sync direction" properly fixes the direction of the first
> > call which was incorrect). Then, according to the 3d edition of the
> > "Linux device drivers" book, chapter 15, "Setting up streaming DMA
> > mappings" article, end of the page 449, when we call for
> > dma_syc_single_for_cpu() the buffer ownership gets to the CPU and CPU
> > may safely access the buffer (in particular, we read it). Then the
> > author says: "Before the device accesses the buffer, however, ownership
> > should be transfered back to it with: dma_sync_single_for_device().
> > 
> > The DMA-API.txt document u've referenced doesn't refer the above
> > function, so, it's unclear how your fix may be based on it. On the other
> > hand it clearly contradicts the "Linux device driver" book.
> 
> DMA-API.txt describes what synchronization points are necessary for what
> DMA mapping types (direction). dma_sync_single_for_cpu/device() are
> functions realising those points. Note that example DMA-API-HOWTO.txt is
> misleading as it has dma_sync_single_for_device() where its not required
> by DMA-API.txt.
> 
> In this case, you don't need to sync to device for mappings that haven't
> been written to by CPU. CPU caches will be invalidated anyway by next
> dma_sync_single_for_cpu() or dma_unmap_single() and the CPU should not
> ever write to cachelines that belong to FROM_DEVICE mappings.

Okay, I see the section in the doc u r talking about... I agree. We may drop 
these sync_single() in the bnx2x_reuse_rx_skb().

> 
> The best source is the code. 

Hmmm... The code is bug prone, so I'd stick to the Doc...;)

Thanks, Michal.
vlad


------------------------------------------------------------------------------
All of the data generated in your IT infrastructure is seriously valuable.
Why? It contains a definitive record of application performance, security 
threats, fraudulent activity, and more. Splunk takes this data and makes 
sense of it. IT sense. And common sense.
http://p.sf.net/sfu/splunk-d2d-c2
_______________________________________________
E1000-devel mailing list
E1000-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/e1000-devel
To learn more about Intel&#174; Ethernet, visit http://communities.intel.com/community/wired

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2 00/46] Clean up RX copybreak and DMA handling
  2011-07-11  9:24     ` David Miller
@ 2011-07-11  9:47       ` Michał Mirosław
  2011-07-11 10:11         ` David Miller
  0 siblings, 1 reply; 76+ messages in thread
From: Michał Mirosław @ 2011-07-11  9:47 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

On Mon, Jul 11, 2011 at 02:24:03AM -0700, David Miller wrote:
> From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
> Date: Mon, 11 Jul 2011 11:16:49 +0200
> > Packet is indicated in queue N, theres no memory for new skb, so its
> > dropped, and the buffer goes back to free list. In parallel, queue M
> > (!= N) indicates new packet. Still, there's no memory for new skb so
> > its also dropped and its buffer is reused. The effect is that all
> > packets are dropped, whatever queue they appear on.
> Why would queue M (!= N) fail just because N did?  They may be
> allocating out of different NUMA nodes, and thus succeed.
> 
> > The HOL blocking does not matter here, because there's only one head
> > --- the system memory. If I misunderstood this point, please explain
> > it further.
> Multiqueue drivers are moving towards placing the queues on different
> NUMA nodes, and in that scenerio one queue might succeed even if the
> other fails.

I assumed that all queues get buffers from the same rx free ring. If
queues have their own free list, then we can treat them as separate NICs
for this discussion. Queues on one NUMA node stall, others go on normally.

> Back to the hardware hanging issue, it's real.  Getting into a
> situation where the RX ring lacks any buffers at all is the least
> tested path for these chips.
> 
> Testing fate is a really bad idea, and this is why I always propose to
> keep the hardware with RX buffers to use in all circumstances.

Catch 22: The chips are not tested because they have always free buffers,
they are provided with endless rx buffers because they are not being
tested. I'd rather test them well rather than workaround a phantom issue.
Tripping on empty free rx buffer ring is still possible even with scheme
#1 (eg. with lots of NICs receiving heavy traffic) --- just harder to
recognise and debug (if it breaks at all) when it happens.

Best Regards,
Michał Mirosław

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2 46/46] net: mark drivers that drop packets from rx queue head under memory pressure
  2011-07-11  6:47   ` Eilon Greenstein
@ 2011-07-11 10:04     ` Michał Mirosław
  2011-07-11 10:16       ` Eilon Greenstein
  0 siblings, 1 reply; 76+ messages in thread
From: Michał Mirosław @ 2011-07-11 10:04 UTC (permalink / raw)
  To: Eilon Greenstein
  Cc: netdev, Hartley Sweeten, Michael Chan, Guo-Fu Tseng,
	Realtek linux nic maintainers, Francois Romieu,
	Stephen Hemminger, Matthew Carlson, Jon Mason

On Mon, Jul 11, 2011 at 09:47:08AM +0300, Eilon Greenstein wrote:
> On Sun, 2011-07-10 at 17:52 -0700, Michał Mirosław wrote:
> > Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
> > ---
> 
> > diff --git a/drivers/net/bnx2x/bnx2x_cmn.c b/drivers/net/bnx2x/bnx2x_cmn.c
> > index 4f9164c..a6da01a 100644
> > --- a/drivers/net/bnx2x/bnx2x_cmn.c
> > +++ b/drivers/net/bnx2x/bnx2x_cmn.c
> > @@ -673,6 +673,9 @@ int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
> >  				goto reuse_rx;
> >  			}
> >  
> > +#warning drops packets from rx queue head on memory pressure
> > +#warning (like dev_skb_finish_rx_dma_refill() users)
> > +
> 
> We have the dropless_fc module parameter that can be configured if the
> user prefers pausing on host memory pressure - the problem with that
> feature is that it is enough that one of the ring runs out of memory and
> the entire port is stopped. When running with 16 rings, this can lead to
> serious throughput degradation - this is why it is kept as a user
> configurable option.

From the code it look like dropless_fc just enables sending of pause
frames.  If that's disabled, then what happens when one queue runs out
of free rx buffers?

Best Regards,
Michał Mirosław

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2 00/46] Clean up RX copybreak and DMA handling
  2011-07-11  9:47       ` Michał Mirosław
@ 2011-07-11 10:11         ` David Miller
  2011-07-11 11:17           ` Michał Mirosław
  0 siblings, 1 reply; 76+ messages in thread
From: David Miller @ 2011-07-11 10:11 UTC (permalink / raw)
  To: mirq-linux; +Cc: netdev

From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Mon, 11 Jul 2011 11:47:23 +0200

> Catch 22: The chips are not tested because they have always free buffers,
> they are provided with endless rx buffers because they are not being
> tested. I'd rather test them well rather than workaround a phantom issue.
> Tripping on empty free rx buffer ring is still possible even with scheme
> #1 (eg. with lots of NICs receiving heavy traffic) --- just harder to
> recognise and debug (if it breaks at all) when it happens.

I do not support taking this risk.

Please do not submit patches which move away from the long
standing allocation failure handling scheme.

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2 46/46] net: mark drivers that drop packets from rx queue head under memory pressure
  2011-07-11 10:04     ` Michał Mirosław
@ 2011-07-11 10:16       ` Eilon Greenstein
  0 siblings, 0 replies; 76+ messages in thread
From: Eilon Greenstein @ 2011-07-11 10:16 UTC (permalink / raw)
  To: Michał Mirosław
  Cc: netdev, Hartley Sweeten, Michael Chan, Guo-Fu Tseng,
	Realtek linux nic maintainers, Francois Romieu,
	Stephen Hemminger, Matthew Carlson, Jon Mason

On Mon, 2011-07-11 at 03:04 -0700, Michał Mirosław wrote:
> On Mon, Jul 11, 2011 at 09:47:08AM +0300, Eilon Greenstein wrote:
> > On Sun, 2011-07-10 at 17:52 -0700, Michał Mirosław wrote:
> > > Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
> > > ---
> > 
> > > diff --git a/drivers/net/bnx2x/bnx2x_cmn.c b/drivers/net/bnx2x/bnx2x_cmn.c
> > > index 4f9164c..a6da01a 100644
> > > --- a/drivers/net/bnx2x/bnx2x_cmn.c
> > > +++ b/drivers/net/bnx2x/bnx2x_cmn.c
> > > @@ -673,6 +673,9 @@ int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
> > >  				goto reuse_rx;
> > >  			}
> > >  
> > > +#warning drops packets from rx queue head on memory pressure
> > > +#warning (like dev_skb_finish_rx_dma_refill() users)
> > > +
> > 
> > We have the dropless_fc module parameter that can be configured if the
> > user prefers pausing on host memory pressure - the problem with that
> > feature is that it is enough that one of the ring runs out of memory and
> > the entire port is stopped. When running with 16 rings, this can lead to
> > serious throughput degradation - this is why it is kept as a user
> > configurable option.
> 
> From the code it look like dropless_fc just enables sending of pause
> frames.  If that's disabled, then what happens when one queue runs out
> of free rx buffers?
> 

Actually, I was too fast before and did not read it all through. After I
did, I saw that Dave already replied...

The dropless_fc is not really related to this case. It is about the
driver not keeping up with the FW/HW and not about the driver failing to
allocate a buffer (well, not directly - if that will happen with the
suggested patch we will run out of space on the ring). If the ring is
full, the FW will drop the packet. But if the FW is not fast enough and
the internal chip buffer is getting full - the HW will send pause. So
when pause is enabled, without dropless_fc packets will still be dropped
if the host is too slow but not if the chip is too slow (when exceeding
the chip max PPS with small packets). When dropless_fc is set, packet
will not be dropped but in multi-ring scenario we are likely to be under
utilizing the link in case some (possibly only one) ring on one CPU is
not keeping up while the other rings (on other CPUs) still have room and
possibly idling. 

Regards,
Eilon



^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH net-next-2.6] net: introduce build_skb()
  2011-07-11  5:46   ` [PATCH net-next-2.6] net: introduce build_skb() Eric Dumazet
@ 2011-07-11 10:53     ` Michał Mirosław
  2011-07-12 15:40     ` Eric Dumazet
  1 sibling, 0 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-11 10:53 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: netdev

On Mon, Jul 11, 2011 at 07:46:46AM +0200, Eric Dumazet wrote:
> Le lundi 11 juillet 2011 à 02:52 +0200, Michał Mirosław a écrit :
> > Introduce __netdev_alloc_skb_aligned() to return skb with skb->data
> > aligned at specified 2^n multiple.
> > 
> > Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
> Hi Michal
> 
> Could we synchronize our work to not introduce things that might
> disappear shortly ?

Sure. Are you saying that you'll convert all drivers to build_skb()? :-)

> Here is the RFC patch about build_skb() :
> 
> [PATCH] net: introduce build_skb()
> 
> One of the thing we discussed during netdev 2011 conference was the idea
> to change network drivers to allocate/populate their skb at RX
> completion time, right before feeding the skb to network stack.
> 
> Right now, we allocate skbs when populating the RX ring, and thats a
> waste of CPU cache, since allocating skb means a full memset() to clear
> the skb and its skb_shared_info portion. By the time NIC fills a frame
> in data buffer and host can get it, cpu probably threw away the cache
> lines from its caches, because of huge RX ring sizes.
> 
> So the deal would be to allocate only the data buffer for the NIC to
> populate its RX ring buffer. And use build_skb() at RX completion to
> attach a data buffer (now filled with an ethernet frame) to a new skb,
> initialize the skb_shared_info portion, and give the hot skb to network
> stack.
> 
> build_skb() is the function to allocate an skb, caller providing the
> data buffer that should be attached to it. Drivers are expected to call 
> skb_reserve() right after build_skb() to let skb->data points to the
> Ethernet frame (usually skipping NET_SKB_PAD and NET_IP_ALIGN)
[...]
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -234,6 +234,54 @@ nodata:
[...]
>  /**
> + * build_skb - build a network buffer
> + * @data: data buffer provider by caller
> + * @size: size of data buffer, not including skb_shared_info
> + *
> + * Allocate a new &sk_buff. Caller provides space holding head and
> + * skb_shared_info. Mostly used in driver RX path.
> + * The return is the buffer. On a failure the return is %NULL.
> + * Notes :
> + *  Before IO, driver allocates only data buffer where NIC put incoming frame
> + *  Driver SHOULD add room at head (NET_SKB_PAD) and
> + *  MUST add room tail (to hold skb_shared_info)
> + *  After IO, driver calls build_skb(), to get a hot skb instead of a cold one
> + *  before giving packet to stack. RX rings only contains data buffers, not
> + *  full skbs.
> + */
> +struct sk_buff *build_skb(void *data, unsigned int size)
> +{
> +	struct skb_shared_info *shinfo;
> +	struct sk_buff *skb;
> +
> +	skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
> +	if (!skb)
> +		return NULL;
> +
> +	size = SKB_DATA_ALIGN(size);
> +
> +	memset(skb, 0, offsetof(struct sk_buff, tail));
> +	skb->truesize = size + sizeof(struct sk_buff);
> +	atomic_set(&skb->users, 1);
> +	skb->head = data;
> +	skb->data = data;
> +	skb_reset_tail_pointer(skb);
> +	skb->end = skb->tail + size;
> +#ifdef NET_SKBUFF_DATA_USES_OFFSET
> +	skb->mac_header = ~0U;
> +#endif
> +
> +	/* make sure we initialize shinfo sequentially */
> +	shinfo = skb_shinfo(skb);
> +	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
> +	atomic_set(&shinfo->dataref, 1);
> +	kmemcheck_annotate_variable(shinfo->destructor_arg);
> +
> +	return skb;
> +}
> +EXPORT_SYMBOL(build_skb);

I like the idea. From driver writer perspective I would like to also
see a function that given max frame size and DMA aligment would allocate
the buffer for me.

In short:

 * rx_refill:

	[ptr, size] = alloc_rx_buffer(size, alignment, offset);
	[dma_addr] = map_buffer(ptr, size);
	append to rx buffer list

 * rx_poll:

	unmap_buffer(dma_addr, size);
	[skb] = build_skb(ptr, size);
	if (!skb)
		reuse_buffer
		return
	skb_reserve(skb, rx_offset);
	skb_put(skb, pkt_len);
	(indicate offloads)
	rx_skb(skb);
	call rx_refill

 * rx_poll with copybreak:

	sync_buffer_to_cpu(dma_addr, data_len);
	[skb, copied] = build_or_copy_skb(ptr, size, hw_rx_offset, pkt_len);
	if (copied || !skb)
		append to rx buffer list
	else
		unmap_buffer(dma_addr, size);
	if (!skb)
		return;
	(indicate offloads)
	rx_skb(skb);
	if (!copied)
		call rx_refill


For even less driver code this could happen:

 * rx_refill(ptr, dma_addr)
	[size/alignment stored in queue or net_device struct]

	if (is_rx_free_list_full)
		return -EBUSY;
	append to rx buffer list [ptr, dma_addr, size]
	return !is_rx_free_list_full;

 * rx_poll with copybreak:
	[copy threshold stored in queue or net_device struct]

	[skb] = finish_rx(ptr, dma_addr, size, hw_rx_offset, pkt_len);
	if (!skb)
		return;
	(fill in offloads: checksum/etc)
	rx_skb(skb);


This could be extended to handle frames spanning multiple buffers.

BTW, napi_get_frags() + napi_gro_frags() use similar idea of allocating
skb late.

Best Regards,
Michał Mirosław

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2 00/46] Clean up RX copybreak and DMA handling
  2011-07-11 10:11         ` David Miller
@ 2011-07-11 11:17           ` Michał Mirosław
  0 siblings, 0 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-11 11:17 UTC (permalink / raw)
  To: David Miller; +Cc: netdev

On Mon, Jul 11, 2011 at 03:11:28AM -0700, David Miller wrote:
> From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
> Date: Mon, 11 Jul 2011 11:47:23 +0200
> 
> > Catch 22: The chips are not tested because they have always free buffers,
> > they are provided with endless rx buffers because they are not being
> > tested. I'd rather test them well rather than workaround a phantom issue.
> > Tripping on empty free rx buffer ring is still possible even with scheme
> > #1 (eg. with lots of NICs receiving heavy traffic) --- just harder to
> > recognise and debug (if it breaks at all) when it happens.
> I do not support taking this risk.

The problem will be waiting, just pushed into dark corner. ;)
The card+driver need to survive empty rx buffer list anyway.

This issue will come back with people attacking bufferbloat issues
(they will want to reduce queue sizes and so increase the frequency
when free rx buffers run out).

> Please do not submit patches which move away from the long
> standing allocation failure handling scheme.

Sure. In this series it's patch 16 --- because it was trivial to do. Pending
patches only wrap the existing behaviour (where clearly scheme #1 or #2
is used).

Best Regards,
Michał Mirosław

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2 00/46] Clean up RX copybreak and DMA handling
  2011-07-11  6:54 ` [PATCH v2 00/46] Clean up RX copybreak and DMA handling David Miller
  2011-07-11  9:16   ` Michał Mirosław
@ 2011-07-11 12:36   ` Ben Hutchings
  1 sibling, 0 replies; 76+ messages in thread
From: Ben Hutchings @ 2011-07-11 12:36 UTC (permalink / raw)
  To: David Miller; +Cc: mirq-linux, netdev

On Sun, 2011-07-10 at 23:54 -0700, David Miller wrote:
> From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
> Date: Mon, 11 Jul 2011 02:52:46 +0200 (CEST)
> 
> >   1. under packet storm and memory pressure NIC keeps generating interrupts
> >      (if non-NAPI) and indicating new buffers because it always has free
> >      RX buffers --- this only wastes CPU and bus bandwidth transferring
> >      data that is going to be immediately discarded;
> 
> Actually, this is exactly how I, and others advise people to implement
> drivers.  It is the right thing to do.
> 
> The worst thing that can happen is to let the RX ring empty of
> buffers.  Some cards hang as a result of this, and also it causes head
> of line blocking on multiqueue cards, etc.

The controllers you are familiar with might do head-of-line blocking
when a single RX queue is empty.  But any multiqueue controller that is
supposed to support untrusted queues (required for SR-IOV) had better
not.  This is certainly not done on Solarflare controllers (packets for
that queue just get dropped until it's refilled) and I doubt it's done
on many others.

I also think it's quite reasonable for the RX queue to stop interrupting
when the host is already too busy to refill it.  Some drivers might not
recover correctly, but this is not a hardware issue.

> So the first thing the driver should do is try to allocate a
> replacement buffer.
> 
> And if that fails, it should give the RX packet right back to the
> card, and not pass it up the stack.

I agree this is a reasonable and generic way to deal with empty RX
queues.

Ben.

-- 
Ben Hutchings, Senior Software Engineer, Solarflare
Not speaking for my employer; that's the marketing department's job.
They asked us to note that Solarflare product names are trademarked.


^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2 04/46] net/wireless: p54: remove useless dma_sync_single_for_device(DMA_FROM_DEVICE)
@ 2011-07-11 15:15     ` Pavel Roskin
  0 siblings, 0 replies; 76+ messages in thread
From: Pavel Roskin @ 2011-07-11 15:15 UTC (permalink / raw)
  To: Michał Mirosław
  Cc: netdev, Christian Lamparter, John W. Linville, linux-wireless

On 07/10/2011 08:52 PM, Michał Mirosław wrote:
> Also constify pointers used in frame parsers to verify assumptions.

Cleanups are better done separately.

> -	u16 type = le16_to_cpu(*((__le16 *)skb->data));
> +	u16 type = le16_to_cpu(*(const __le16 *)skb->data);

I think it would be more appropriate to use get_unaligned_le16() here. 
No casts should be needed then.

That's not an objection, just a suggestion :)

-- 
Regards,
Pavel Roskin

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2 04/46] net/wireless: p54: remove useless dma_sync_single_for_device(DMA_FROM_DEVICE)
@ 2011-07-11 15:15     ` Pavel Roskin
  0 siblings, 0 replies; 76+ messages in thread
From: Pavel Roskin @ 2011-07-11 15:15 UTC (permalink / raw)
  To: Michał Mirosław
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA, Christian Lamparter,
	John W. Linville, linux-wireless-u79uwXL29TY76Z2rM5mHXA

On 07/10/2011 08:52 PM, Michał Mirosław wrote:
> Also constify pointers used in frame parsers to verify assumptions.

Cleanups are better done separately.

> -	u16 type = le16_to_cpu(*((__le16 *)skb->data));
> +	u16 type = le16_to_cpu(*(const __le16 *)skb->data);

I think it would be more appropriate to use get_unaligned_le16() here. 
No casts should be needed then.

That's not an objection, just a suggestion :)

-- 
Regards,
Pavel Roskin
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2 46/46] net: mark drivers that drop packets from rx queue head under memory pressure
  2011-07-11  0:52 ` [PATCH v2 46/46] net: mark drivers that drop packets from rx queue head under memory pressure Michał Mirosław
  2011-07-11  5:40   ` Francois Romieu
  2011-07-11  6:47   ` Eilon Greenstein
@ 2011-07-11 15:24   ` Stephen Hemminger
  2 siblings, 0 replies; 76+ messages in thread
From: Stephen Hemminger @ 2011-07-11 15:24 UTC (permalink / raw)
  To: Michał Mirosław
  Cc: netdev, Hartley Sweeten, Michael Chan, Eilon Greenstein,
	Guo-Fu Tseng, Realtek linux nic maintainers, Francois Romieu,
	Matt Carlson, Jon Mason

On Mon, 11 Jul 2011 02:52:50 +0200 (CEST)
Michał Mirosław <mirq-linux@rere.qmqm.pl> wrote:

> Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
> ---
>  drivers/net/arm/ep93xx_eth.c    |    3 +++
>  drivers/net/bnx2.c              |    3 +++
>  drivers/net/bnx2x/bnx2x_cmn.c   |    3 +++
>  drivers/net/cassini.c           |    3 +++
>  drivers/net/jme.c               |    3 +++
>  drivers/net/mlx4/en_rx.c        |    6 ++++++
>  drivers/net/r8169.c             |    3 +++
>  drivers/net/skge.c              |    3 +++
>  drivers/net/sky2.c              |    2 ++
>  drivers/net/tg3.c               |    2 ++
>  drivers/net/tokenring/olympic.c |    2 ++
>  drivers/net/vxge/vxge-main.c    |    3 +++
>  12 files changed, 36 insertions(+), 0 deletions(-)
> 

Nak. This is normal behavior and putting in a compile warning is just
useless extra noise.

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [ath9k-devel] [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
  2011-07-11  0:52   ` Michał Mirosław
  (?)
@ 2011-07-12  4:36     ` Felix Fietkau
  -1 siblings, 0 replies; 76+ messages in thread
From: Felix Fietkau @ 2011-07-12  4:36 UTC (permalink / raw)
  To: Michał Mirosław
  Cc: netdev, linux-wireless, Jouni Malinen, Senthil Balasubramanian,
	ath9k-devel, Vasanthakumar Thiagarajan

On 2011-07-11 8:52 AM, Michał Mirosław wrote:
> Also constify buf_addr for ath9k_hw_process_rxdesc_edma() to verify
> assumptions --- dma_sync_single_for_device() call can be removed.
>
> Signed-off-by: Michał Mirosław<mirq-linux@rere.qmqm.pl>
> ---
>   drivers/net/wireless/ath/ath9k/ar9003_mac.c |    4 ++--
>   drivers/net/wireless/ath/ath9k/ar9003_mac.h |    2 +-
>   drivers/net/wireless/ath/ath9k/recv.c       |   10 +++-------
>   3 files changed, 6 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
> index 70dc8ec..c5f46d5 100644
> --- a/drivers/net/wireless/ath/ath9k/recv.c
> +++ b/drivers/net/wireless/ath/ath9k/recv.c
> @@ -684,15 +684,11 @@ static bool ath_edma_get_buffers(struct ath_softc *sc,
>   	BUG_ON(!bf);
>
>   	dma_sync_single_for_cpu(sc->dev, bf->bf_buf_addr,
> -				common->rx_bufsize, DMA_FROM_DEVICE);
> +				common->rx_bufsize, DMA_BIDIRECTIONAL);
>
>   	ret = ath9k_hw_process_rxdesc_edma(ah, NULL, skb->data);
> -	if (ret == -EINPROGRESS) {
> -		/*let device gain the buffer again*/
> -		dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
> -				common->rx_bufsize, DMA_FROM_DEVICE);
> +	if (ret == -EINPROGRESS)
>   		return false;
> -	}
>
>   	__skb_unlink(skb,&rx_edma->rx_fifo);
>   	if (ret == -EINVAL) {
I have strong doubts about this change. On most MIPS devices, 
dma_sync_single_for_cpu is a no-op, whereas dma_sync_single_for_device 
flushes the cache range. With this change, the CPU could cache the DMA 
status part behind skb->data and that cache entry would not be flushed 
inbetween calls to this functions on the same buffer, likely leading to 
rx stalls.

- Felix

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
@ 2011-07-12  4:36     ` Felix Fietkau
  0 siblings, 0 replies; 76+ messages in thread
From: Felix Fietkau @ 2011-07-12  4:36 UTC (permalink / raw)
  To: Michał Mirosław
  Cc: netdev, linux-wireless, Jouni Malinen, Senthil Balasubramanian,
	ath9k-devel, Vasanthakumar Thiagarajan

On 2011-07-11 8:52 AM, Michał Mirosław wrote:
> Also constify buf_addr for ath9k_hw_process_rxdesc_edma() to verify
> assumptions --- dma_sync_single_for_device() call can be removed.
>
> Signed-off-by: Michał Mirosław<mirq-linux@rere.qmqm.pl>
> ---
>   drivers/net/wireless/ath/ath9k/ar9003_mac.c |    4 ++--
>   drivers/net/wireless/ath/ath9k/ar9003_mac.h |    2 +-
>   drivers/net/wireless/ath/ath9k/recv.c       |   10 +++-------
>   3 files changed, 6 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
> index 70dc8ec..c5f46d5 100644
> --- a/drivers/net/wireless/ath/ath9k/recv.c
> +++ b/drivers/net/wireless/ath/ath9k/recv.c
> @@ -684,15 +684,11 @@ static bool ath_edma_get_buffers(struct ath_softc *sc,
>   	BUG_ON(!bf);
>
>   	dma_sync_single_for_cpu(sc->dev, bf->bf_buf_addr,
> -				common->rx_bufsize, DMA_FROM_DEVICE);
> +				common->rx_bufsize, DMA_BIDIRECTIONAL);
>
>   	ret = ath9k_hw_process_rxdesc_edma(ah, NULL, skb->data);
> -	if (ret == -EINPROGRESS) {
> -		/*let device gain the buffer again*/
> -		dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
> -				common->rx_bufsize, DMA_FROM_DEVICE);
> +	if (ret == -EINPROGRESS)
>   		return false;
> -	}
>
>   	__skb_unlink(skb,&rx_edma->rx_fifo);
>   	if (ret == -EINVAL) {
I have strong doubts about this change. On most MIPS devices, 
dma_sync_single_for_cpu is a no-op, whereas dma_sync_single_for_device 
flushes the cache range. With this change, the CPU could cache the DMA 
status part behind skb->data and that cache entry would not be flushed 
inbetween calls to this functions on the same buffer, likely leading to 
rx stalls.

- Felix
_______________________________________________
ath9k-devel mailing list
ath9k-devel@lists.ath9k.org
https://lists.ath9k.org/mailman/listinfo/ath9k-devel

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [ath9k-devel] [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
@ 2011-07-12  4:36     ` Felix Fietkau
  0 siblings, 0 replies; 76+ messages in thread
From: Felix Fietkau @ 2011-07-12  4:36 UTC (permalink / raw)
  To: ath9k-devel

On 2011-07-11 8:52 AM, Micha? Miros?aw wrote:
> Also constify buf_addr for ath9k_hw_process_rxdesc_edma() to verify
> assumptions --- dma_sync_single_for_device() call can be removed.
>
> Signed-off-by: Micha? Miros?aw<mirq-linux@rere.qmqm.pl>
> ---
>   drivers/net/wireless/ath/ath9k/ar9003_mac.c |    4 ++--
>   drivers/net/wireless/ath/ath9k/ar9003_mac.h |    2 +-
>   drivers/net/wireless/ath/ath9k/recv.c       |   10 +++-------
>   3 files changed, 6 insertions(+), 10 deletions(-)
>
> diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
> index 70dc8ec..c5f46d5 100644
> --- a/drivers/net/wireless/ath/ath9k/recv.c
> +++ b/drivers/net/wireless/ath/ath9k/recv.c
> @@ -684,15 +684,11 @@ static bool ath_edma_get_buffers(struct ath_softc *sc,
>   	BUG_ON(!bf);
>
>   	dma_sync_single_for_cpu(sc->dev, bf->bf_buf_addr,
> -				common->rx_bufsize, DMA_FROM_DEVICE);
> +				common->rx_bufsize, DMA_BIDIRECTIONAL);
>
>   	ret = ath9k_hw_process_rxdesc_edma(ah, NULL, skb->data);
> -	if (ret == -EINPROGRESS) {
> -		/*let device gain the buffer again*/
> -		dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
> -				common->rx_bufsize, DMA_FROM_DEVICE);
> +	if (ret == -EINPROGRESS)
>   		return false;
> -	}
>
>   	__skb_unlink(skb,&rx_edma->rx_fifo);
>   	if (ret == -EINVAL) {
I have strong doubts about this change. On most MIPS devices, 
dma_sync_single_for_cpu is a no-op, whereas dma_sync_single_for_device 
flushes the cache range. With this change, the CPU could cache the DMA 
status part behind skb->data and that cache entry would not be flushed 
inbetween calls to this functions on the same buffer, likely leading to 
rx stalls.

- Felix

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2 04/46] net/wireless: p54: remove useless dma_sync_single_for_device(DMA_FROM_DEVICE)
@ 2011-07-12  4:50     ` Felix Fietkau
  0 siblings, 0 replies; 76+ messages in thread
From: Felix Fietkau @ 2011-07-12  4:50 UTC (permalink / raw)
  To: Michał Mirosław
  Cc: netdev, Christian Lamparter, John W. Linville, linux-wireless

On 2011-07-11 8:52 AM, Michał Mirosław wrote:
> Also constify pointers used in frame parsers to verify assumptions.
>
> Signed-off-by: Michał Mirosław<mirq-linux@rere.qmqm.pl>
> ---
>   drivers/net/wireless/p54/p54pci.c |    2 --
>   drivers/net/wireless/p54/txrx.c   |   22 +++++++++++-----------
>   2 files changed, 11 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/net/wireless/p54/p54pci.c b/drivers/net/wireless/p54/p54pci.c
> index 1b75317..4491d33 100644
> --- a/drivers/net/wireless/p54/p54pci.c
> +++ b/drivers/net/wireless/p54/p54pci.c
> @@ -229,8 +229,6 @@ static void p54p_check_rx_ring(struct ieee80211_hw *dev, u32 *index,
>   			desc->host_addr = cpu_to_le32(0);
>   		} else {
>   			skb_trim(skb, 0);
> -			pci_dma_sync_single_for_device(priv->pdev, dma_addr,
> -				priv->common.rx_mtu + 32, PCI_DMA_FROMDEVICE);
>   			desc->len = cpu_to_le16(priv->common.rx_mtu + 32);
>   		}
>
This part does not look correct to me - same issue as your ath9k change, 
which I commented on earlier. I don't think this call to 
dma_sync_single_for_device is useless

- Felix


^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2 04/46] net/wireless: p54: remove useless dma_sync_single_for_device(DMA_FROM_DEVICE)
@ 2011-07-12  4:50     ` Felix Fietkau
  0 siblings, 0 replies; 76+ messages in thread
From: Felix Fietkau @ 2011-07-12  4:50 UTC (permalink / raw)
  To: Michał Mirosław
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA, Christian Lamparter,
	John W. Linville, linux-wireless-u79uwXL29TY76Z2rM5mHXA

On 2011-07-11 8:52 AM, Michał Mirosław wrote:
> Also constify pointers used in frame parsers to verify assumptions.
>
> Signed-off-by: Michał Mirosław<mirq-linux-CoA6ZxLDdyEEUmgCuDUIdw@public.gmane.org>
> ---
>   drivers/net/wireless/p54/p54pci.c |    2 --
>   drivers/net/wireless/p54/txrx.c   |   22 +++++++++++-----------
>   2 files changed, 11 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/net/wireless/p54/p54pci.c b/drivers/net/wireless/p54/p54pci.c
> index 1b75317..4491d33 100644
> --- a/drivers/net/wireless/p54/p54pci.c
> +++ b/drivers/net/wireless/p54/p54pci.c
> @@ -229,8 +229,6 @@ static void p54p_check_rx_ring(struct ieee80211_hw *dev, u32 *index,
>   			desc->host_addr = cpu_to_le32(0);
>   		} else {
>   			skb_trim(skb, 0);
> -			pci_dma_sync_single_for_device(priv->pdev, dma_addr,
> -				priv->common.rx_mtu + 32, PCI_DMA_FROMDEVICE);
>   			desc->len = cpu_to_le16(priv->common.rx_mtu + 32);
>   		}
>
This part does not look correct to me - same issue as your ath9k change, 
which I commented on earlier. I don't think this call to 
dma_sync_single_for_device is useless

- Felix

--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [ath9k-devel] [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
@ 2011-07-12  5:30       ` Ben Greear
  0 siblings, 0 replies; 76+ messages in thread
From: Ben Greear @ 2011-07-12  5:30 UTC (permalink / raw)
  To: Felix Fietkau
  Cc: Michał Mirosław, netdev, linux-wireless, Jouni Malinen,
	Senthil Balasubramanian, ath9k-devel, Vasanthakumar Thiagarajan

On 07/11/2011 09:36 PM, Felix Fietkau wrote:
> On 2011-07-11 8:52 AM, Michał Mirosław wrote:
>> Also constify buf_addr for ath9k_hw_process_rxdesc_edma() to verify
>> assumptions --- dma_sync_single_for_device() call can be removed.
>>
>> Signed-off-by: Michał Mirosław<mirq-linux@rere.qmqm.pl>
>> ---
>> drivers/net/wireless/ath/ath9k/ar9003_mac.c | 4 ++--
>> drivers/net/wireless/ath/ath9k/ar9003_mac.h | 2 +-
>> drivers/net/wireless/ath/ath9k/recv.c | 10 +++-------
>> 3 files changed, 6 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
>> index 70dc8ec..c5f46d5 100644
>> --- a/drivers/net/wireless/ath/ath9k/recv.c
>> +++ b/drivers/net/wireless/ath/ath9k/recv.c
>> @@ -684,15 +684,11 @@ static bool ath_edma_get_buffers(struct ath_softc *sc,
>> BUG_ON(!bf);
>>
>> dma_sync_single_for_cpu(sc->dev, bf->bf_buf_addr,
>> - common->rx_bufsize, DMA_FROM_DEVICE);
>> + common->rx_bufsize, DMA_BIDIRECTIONAL);
>>
>> ret = ath9k_hw_process_rxdesc_edma(ah, NULL, skb->data);
>> - if (ret == -EINPROGRESS) {
>> - /*let device gain the buffer again*/
>> - dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
>> - common->rx_bufsize, DMA_FROM_DEVICE);
>> + if (ret == -EINPROGRESS)
>> return false;
>> - }
>>
>> __skb_unlink(skb,&rx_edma->rx_fifo);
>> if (ret == -EINVAL) {
> I have strong doubts about this change. On most MIPS devices, dma_sync_single_for_cpu is a no-op, whereas dma_sync_single_for_device flushes the cache range.
> With this change, the CPU could cache the DMA status part behind skb->data and that cache entry would not be flushed inbetween calls to this functions on the
> same buffer, likely leading to rx stalls.

At the very least, it would need heavy testing.  It took a very long time to get
the ath9k DMA issues (mostly?) resolved...so we shouldn't go mucking in this
code on theory...

Thanks,
Ben

>
> - Felix
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html


-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [ath9k-devel] [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
@ 2011-07-12  5:30       ` Ben Greear
  0 siblings, 0 replies; 76+ messages in thread
From: Ben Greear @ 2011-07-12  5:30 UTC (permalink / raw)
  To: Felix Fietkau
  Cc: Michał Mirosław, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-wireless-u79uwXL29TY76Z2rM5mHXA, Jouni Malinen,
	Senthil Balasubramanian, ath9k-devel-juf53994utBLZpfksSYvnA,
	Vasanthakumar Thiagarajan

On 07/11/2011 09:36 PM, Felix Fietkau wrote:
> On 2011-07-11 8:52 AM, Michał Mirosław wrote:
>> Also constify buf_addr for ath9k_hw_process_rxdesc_edma() to verify
>> assumptions --- dma_sync_single_for_device() call can be removed.
>>
>> Signed-off-by: Michał Mirosław<mirq-linux-CoA6ZxLDdyEEUmgCuDUIdw@public.gmane.org>
>> ---
>> drivers/net/wireless/ath/ath9k/ar9003_mac.c | 4 ++--
>> drivers/net/wireless/ath/ath9k/ar9003_mac.h | 2 +-
>> drivers/net/wireless/ath/ath9k/recv.c | 10 +++-------
>> 3 files changed, 6 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
>> index 70dc8ec..c5f46d5 100644
>> --- a/drivers/net/wireless/ath/ath9k/recv.c
>> +++ b/drivers/net/wireless/ath/ath9k/recv.c
>> @@ -684,15 +684,11 @@ static bool ath_edma_get_buffers(struct ath_softc *sc,
>> BUG_ON(!bf);
>>
>> dma_sync_single_for_cpu(sc->dev, bf->bf_buf_addr,
>> - common->rx_bufsize, DMA_FROM_DEVICE);
>> + common->rx_bufsize, DMA_BIDIRECTIONAL);
>>
>> ret = ath9k_hw_process_rxdesc_edma(ah, NULL, skb->data);
>> - if (ret == -EINPROGRESS) {
>> - /*let device gain the buffer again*/
>> - dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
>> - common->rx_bufsize, DMA_FROM_DEVICE);
>> + if (ret == -EINPROGRESS)
>> return false;
>> - }
>>
>> __skb_unlink(skb,&rx_edma->rx_fifo);
>> if (ret == -EINVAL) {
> I have strong doubts about this change. On most MIPS devices, dma_sync_single_for_cpu is a no-op, whereas dma_sync_single_for_device flushes the cache range.
> With this change, the CPU could cache the DMA status part behind skb->data and that cache entry would not be flushed inbetween calls to this functions on the
> same buffer, likely leading to rx stalls.

At the very least, it would need heavy testing.  It took a very long time to get
the ath9k DMA issues (mostly?) resolved...so we shouldn't go mucking in this
code on theory...

Thanks,
Ben

>
> - Felix
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html


-- 
Ben Greear <greearb-my8/4N5VtI7c+919tysfdA@public.gmane.org>
Candela Technologies Inc  http://www.candelatech.com
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [ath9k-devel] [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
@ 2011-07-12  5:30       ` Ben Greear
  0 siblings, 0 replies; 76+ messages in thread
From: Ben Greear @ 2011-07-12  5:30 UTC (permalink / raw)
  To: ath9k-devel

On 07/11/2011 09:36 PM, Felix Fietkau wrote:
> On 2011-07-11 8:52 AM, Micha? Miros?aw wrote:
>> Also constify buf_addr for ath9k_hw_process_rxdesc_edma() to verify
>> assumptions --- dma_sync_single_for_device() call can be removed.
>>
>> Signed-off-by: Micha? Miros?aw<mirq-linux@rere.qmqm.pl>
>> ---
>> drivers/net/wireless/ath/ath9k/ar9003_mac.c | 4 ++--
>> drivers/net/wireless/ath/ath9k/ar9003_mac.h | 2 +-
>> drivers/net/wireless/ath/ath9k/recv.c | 10 +++-------
>> 3 files changed, 6 insertions(+), 10 deletions(-)
>>
>> diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
>> index 70dc8ec..c5f46d5 100644
>> --- a/drivers/net/wireless/ath/ath9k/recv.c
>> +++ b/drivers/net/wireless/ath/ath9k/recv.c
>> @@ -684,15 +684,11 @@ static bool ath_edma_get_buffers(struct ath_softc *sc,
>> BUG_ON(!bf);
>>
>> dma_sync_single_for_cpu(sc->dev, bf->bf_buf_addr,
>> - common->rx_bufsize, DMA_FROM_DEVICE);
>> + common->rx_bufsize, DMA_BIDIRECTIONAL);
>>
>> ret = ath9k_hw_process_rxdesc_edma(ah, NULL, skb->data);
>> - if (ret == -EINPROGRESS) {
>> - /*let device gain the buffer again*/
>> - dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
>> - common->rx_bufsize, DMA_FROM_DEVICE);
>> + if (ret == -EINPROGRESS)
>> return false;
>> - }
>>
>> __skb_unlink(skb,&rx_edma->rx_fifo);
>> if (ret == -EINVAL) {
> I have strong doubts about this change. On most MIPS devices, dma_sync_single_for_cpu is a no-op, whereas dma_sync_single_for_device flushes the cache range.
> With this change, the CPU could cache the DMA status part behind skb->data and that cache entry would not be flushed inbetween calls to this functions on the
> same buffer, likely leading to rx stalls.

At the very least, it would need heavy testing.  It took a very long time to get
the ath9k DMA issues (mostly?) resolved...so we shouldn't go mucking in this
code on theory...

Thanks,
Ben

>
> - Felix
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo at vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html


-- 
Ben Greear <greearb@candelatech.com>
Candela Technologies Inc  http://www.candelatech.com

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [ath9k-devel] [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
@ 2011-07-12  9:55       ` Michał Mirosław
  0 siblings, 0 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-12  9:55 UTC (permalink / raw)
  To: Felix Fietkau
  Cc: netdev, linux-wireless, Jouni Malinen, Senthil Balasubramanian,
	ath9k-devel, Vasanthakumar Thiagarajan, Ralf Baechle, linux-mips

On Tue, Jul 12, 2011 at 12:36:06PM +0800, Felix Fietkau wrote:
> On 2011-07-11 8:52 AM, Michał Mirosław wrote:
> >Also constify buf_addr for ath9k_hw_process_rxdesc_edma() to verify
> >assumptions --- dma_sync_single_for_device() call can be removed.
> >
> >Signed-off-by: Michał Mirosław<mirq-linux@rere.qmqm.pl>
> >---
> >  drivers/net/wireless/ath/ath9k/ar9003_mac.c |    4 ++--
> >  drivers/net/wireless/ath/ath9k/ar9003_mac.h |    2 +-
> >  drivers/net/wireless/ath/ath9k/recv.c       |   10 +++-------
> >  3 files changed, 6 insertions(+), 10 deletions(-)
> >
> >diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
> >index 70dc8ec..c5f46d5 100644
> >--- a/drivers/net/wireless/ath/ath9k/recv.c
> >+++ b/drivers/net/wireless/ath/ath9k/recv.c
> >@@ -684,15 +684,11 @@ static bool ath_edma_get_buffers(struct ath_softc *sc,
> >  	BUG_ON(!bf);
> >
> >  	dma_sync_single_for_cpu(sc->dev, bf->bf_buf_addr,
> >-				common->rx_bufsize, DMA_FROM_DEVICE);
> >+				common->rx_bufsize, DMA_BIDIRECTIONAL);
> >
> >  	ret = ath9k_hw_process_rxdesc_edma(ah, NULL, skb->data);
> >-	if (ret == -EINPROGRESS) {
> >-		/*let device gain the buffer again*/
> >-		dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
> >-				common->rx_bufsize, DMA_FROM_DEVICE);
> >+	if (ret == -EINPROGRESS)
> >  		return false;
> >-	}
> >
> >  	__skb_unlink(skb,&rx_edma->rx_fifo);
> >  	if (ret == -EINVAL) {
> I have strong doubts about this change. On most MIPS devices,
> dma_sync_single_for_cpu is a no-op, whereas
> dma_sync_single_for_device flushes the cache range. With this
> change, the CPU could cache the DMA status part behind skb->data and
> that cache entry would not be flushed inbetween calls to this
> functions on the same buffer, likely leading to rx stalls.

You're suggesting a platform implementation bug then. If the platform is not
cache-coherent, it should invalidate relevant CPU cache lines for sync_to_cpu
and unmap cases. Do other devices show such symptoms on MIPS systems?

I'm not familiar with the platform internals, so we should ask MIPS people.

[added Cc: linux-mips]

Best Regards,
Michał Mirosław

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [ath9k-devel] [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
@ 2011-07-12  9:55       ` Michał Mirosław
  0 siblings, 0 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-12  9:55 UTC (permalink / raw)
  To: Felix Fietkau
  Cc: netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-wireless-u79uwXL29TY76Z2rM5mHXA, Jouni Malinen,
	Senthil Balasubramanian, ath9k-devel-xDcbHBWguxHbcTqmT+pZeQ,
	Vasanthakumar Thiagarajan, Ralf Baechle,
	linux-mips-6z/3iImG2C8G8FEW9MqTrA

On Tue, Jul 12, 2011 at 12:36:06PM +0800, Felix Fietkau wrote:
> On 2011-07-11 8:52 AM, Michał Mirosław wrote:
> >Also constify buf_addr for ath9k_hw_process_rxdesc_edma() to verify
> >assumptions --- dma_sync_single_for_device() call can be removed.
> >
> >Signed-off-by: Michał Mirosław<mirq-linux-CoA6ZxLDdyEEUmgCuDUIdw@public.gmane.org>
> >---
> >  drivers/net/wireless/ath/ath9k/ar9003_mac.c |    4 ++--
> >  drivers/net/wireless/ath/ath9k/ar9003_mac.h |    2 +-
> >  drivers/net/wireless/ath/ath9k/recv.c       |   10 +++-------
> >  3 files changed, 6 insertions(+), 10 deletions(-)
> >
> >diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
> >index 70dc8ec..c5f46d5 100644
> >--- a/drivers/net/wireless/ath/ath9k/recv.c
> >+++ b/drivers/net/wireless/ath/ath9k/recv.c
> >@@ -684,15 +684,11 @@ static bool ath_edma_get_buffers(struct ath_softc *sc,
> >  	BUG_ON(!bf);
> >
> >  	dma_sync_single_for_cpu(sc->dev, bf->bf_buf_addr,
> >-				common->rx_bufsize, DMA_FROM_DEVICE);
> >+				common->rx_bufsize, DMA_BIDIRECTIONAL);
> >
> >  	ret = ath9k_hw_process_rxdesc_edma(ah, NULL, skb->data);
> >-	if (ret == -EINPROGRESS) {
> >-		/*let device gain the buffer again*/
> >-		dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
> >-				common->rx_bufsize, DMA_FROM_DEVICE);
> >+	if (ret == -EINPROGRESS)
> >  		return false;
> >-	}
> >
> >  	__skb_unlink(skb,&rx_edma->rx_fifo);
> >  	if (ret == -EINVAL) {
> I have strong doubts about this change. On most MIPS devices,
> dma_sync_single_for_cpu is a no-op, whereas
> dma_sync_single_for_device flushes the cache range. With this
> change, the CPU could cache the DMA status part behind skb->data and
> that cache entry would not be flushed inbetween calls to this
> functions on the same buffer, likely leading to rx stalls.

You're suggesting a platform implementation bug then. If the platform is not
cache-coherent, it should invalidate relevant CPU cache lines for sync_to_cpu
and unmap cases. Do other devices show such symptoms on MIPS systems?

I'm not familiar with the platform internals, so we should ask MIPS people.

[added Cc: linux-mips]

Best Regards,
Michał Mirosław
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [ath9k-devel] [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
@ 2011-07-12  9:55       ` Michał Mirosław
  0 siblings, 0 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-12  9:55 UTC (permalink / raw)
  To: ath9k-devel

On Tue, Jul 12, 2011 at 12:36:06PM +0800, Felix Fietkau wrote:
> On 2011-07-11 8:52 AM, Micha? Miros?aw wrote:
> >Also constify buf_addr for ath9k_hw_process_rxdesc_edma() to verify
> >assumptions --- dma_sync_single_for_device() call can be removed.
> >
> >Signed-off-by: Micha? Miros?aw<mirq-linux@rere.qmqm.pl>
> >---
> >  drivers/net/wireless/ath/ath9k/ar9003_mac.c |    4 ++--
> >  drivers/net/wireless/ath/ath9k/ar9003_mac.h |    2 +-
> >  drivers/net/wireless/ath/ath9k/recv.c       |   10 +++-------
> >  3 files changed, 6 insertions(+), 10 deletions(-)
> >
> >diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
> >index 70dc8ec..c5f46d5 100644
> >--- a/drivers/net/wireless/ath/ath9k/recv.c
> >+++ b/drivers/net/wireless/ath/ath9k/recv.c
> >@@ -684,15 +684,11 @@ static bool ath_edma_get_buffers(struct ath_softc *sc,
> >  	BUG_ON(!bf);
> >
> >  	dma_sync_single_for_cpu(sc->dev, bf->bf_buf_addr,
> >-				common->rx_bufsize, DMA_FROM_DEVICE);
> >+				common->rx_bufsize, DMA_BIDIRECTIONAL);
> >
> >  	ret = ath9k_hw_process_rxdesc_edma(ah, NULL, skb->data);
> >-	if (ret == -EINPROGRESS) {
> >-		/*let device gain the buffer again*/
> >-		dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
> >-				common->rx_bufsize, DMA_FROM_DEVICE);
> >+	if (ret == -EINPROGRESS)
> >  		return false;
> >-	}
> >
> >  	__skb_unlink(skb,&rx_edma->rx_fifo);
> >  	if (ret == -EINVAL) {
> I have strong doubts about this change. On most MIPS devices,
> dma_sync_single_for_cpu is a no-op, whereas
> dma_sync_single_for_device flushes the cache range. With this
> change, the CPU could cache the DMA status part behind skb->data and
> that cache entry would not be flushed inbetween calls to this
> functions on the same buffer, likely leading to rx stalls.

You're suggesting a platform implementation bug then. If the platform is not
cache-coherent, it should invalidate relevant CPU cache lines for sync_to_cpu
and unmap cases. Do other devices show such symptoms on MIPS systems?

I'm not familiar with the platform internals, so we should ask MIPS people.

[added Cc: linux-mips]

Best Regards,
Micha? Miros?aw

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [ath9k-devel] [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
  2011-07-12  9:55       ` Michał Mirosław
  (?)
@ 2011-07-12 12:54         ` Felix Fietkau
  -1 siblings, 0 replies; 76+ messages in thread
From: Felix Fietkau @ 2011-07-12 12:54 UTC (permalink / raw)
  To: Michał Mirosław
  Cc: Felix Fietkau, netdev, linux-wireless, Jouni Malinen,
	Senthil Balasubramanian, ath9k-devel, Vasanthakumar Thiagarajan,
	Ralf Baechle, linux-mips

On 12.07.2011, at 17:55, Michał Mirosław <mirq-linux@rere.qmqm.pl> wrote:

> On Tue, Jul 12, 2011 at 12:36:06PM +0800, Felix Fietkau wrote:
>> On 2011-07-11 8:52 AM, Michał Mirosław wrote:
>>> Also constify buf_addr for ath9k_hw_process_rxdesc_edma() to verify
>>> assumptions --- dma_sync_single_for_device() call can be removed.
>>> 
>>> Signed-off-by: Michał Mirosław<mirq-linux@rere.qmqm.pl>
>>> ---
>>> drivers/net/wireless/ath/ath9k/ar9003_mac.c |    4 ++--
>>> drivers/net/wireless/ath/ath9k/ar9003_mac.h |    2 +-
>>> drivers/net/wireless/ath/ath9k/recv.c       |   10 +++-------
>>> 3 files changed, 6 insertions(+), 10 deletions(-)
>>> 
>>> diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
>>> index 70dc8ec..c5f46d5 100644
>>> --- a/drivers/net/wireless/ath/ath9k/recv.c
>>> +++ b/drivers/net/wireless/ath/ath9k/recv.c
>>> @@ -684,15 +684,11 @@ static bool ath_edma_get_buffers(struct ath_softc *sc,
>>>    BUG_ON(!bf);
>>> 
>>>    dma_sync_single_for_cpu(sc->dev, bf->bf_buf_addr,
>>> -                common->rx_bufsize, DMA_FROM_DEVICE);
>>> +                common->rx_bufsize, DMA_BIDIRECTIONAL);
>>> 
>>>    ret = ath9k_hw_process_rxdesc_edma(ah, NULL, skb->data);
>>> -    if (ret == -EINPROGRESS) {
>>> -        /*let device gain the buffer again*/
>>> -        dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
>>> -                common->rx_bufsize, DMA_FROM_DEVICE);
>>> +    if (ret == -EINPROGRESS)
>>>        return false;
>>> -    }
>>> 
>>>    __skb_unlink(skb,&rx_edma->rx_fifo);
>>>    if (ret == -EINVAL) {
>> I have strong doubts about this change. On most MIPS devices,
>> dma_sync_single_for_cpu is a no-op, whereas
>> dma_sync_single_for_device flushes the cache range. With this
>> change, the CPU could cache the DMA status part behind skb->data and
>> that cache entry would not be flushed inbetween calls to this
>> functions on the same buffer, likely leading to rx stalls.
> 
> You're suggesting a platform implementation bug then. If the platform is not
> cache-coherent, it should invalidate relevant CPU cache lines for sync_to_cpu
> and unmap cases. Do other devices show such symptoms on MIPS systems?
> 
> I'm not familiar with the platform internals, so we should ask MIPS people.
I only mentioned MIPS to describe the potential side effect of this change. From my current understanding of the DMA API, it would be wrong on other platforms as well. I believe the _for_device function needs to be used to transfer ownership of the buffer back to the device, before calling _for_cpu again later for another read.
This is definitely required in this case, because when the return code is -EINPROGRESS, the driver waits for the hardware to complete this buffer, and the next call has to fetch the memory again after the device has updated it.
This is handled properly by the current code without your change.

- Felix

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
@ 2011-07-12 12:54         ` Felix Fietkau
  0 siblings, 0 replies; 76+ messages in thread
From: Felix Fietkau @ 2011-07-12 12:54 UTC (permalink / raw)
  To: Michał Mirosław
  Cc: linux-mips, netdev, linux-wireless, Ralf Baechle, Jouni Malinen,
	Senthil Balasubramanian, ath9k-devel, Vasanthakumar Thiagarajan

On 12.07.2011, at 17:55, Michał Mirosław <mirq-linux@rere.qmqm.pl> wrote:

> On Tue, Jul 12, 2011 at 12:36:06PM +0800, Felix Fietkau wrote:
>> On 2011-07-11 8:52 AM, Michał Mirosław wrote:
>>> Also constify buf_addr for ath9k_hw_process_rxdesc_edma() to verify
>>> assumptions --- dma_sync_single_for_device() call can be removed.
>>> 
>>> Signed-off-by: Michał Mirosław<mirq-linux@rere.qmqm.pl>
>>> ---
>>> drivers/net/wireless/ath/ath9k/ar9003_mac.c |    4 ++--
>>> drivers/net/wireless/ath/ath9k/ar9003_mac.h |    2 +-
>>> drivers/net/wireless/ath/ath9k/recv.c       |   10 +++-------
>>> 3 files changed, 6 insertions(+), 10 deletions(-)
>>> 
>>> diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
>>> index 70dc8ec..c5f46d5 100644
>>> --- a/drivers/net/wireless/ath/ath9k/recv.c
>>> +++ b/drivers/net/wireless/ath/ath9k/recv.c
>>> @@ -684,15 +684,11 @@ static bool ath_edma_get_buffers(struct ath_softc *sc,
>>>    BUG_ON(!bf);
>>> 
>>>    dma_sync_single_for_cpu(sc->dev, bf->bf_buf_addr,
>>> -                common->rx_bufsize, DMA_FROM_DEVICE);
>>> +                common->rx_bufsize, DMA_BIDIRECTIONAL);
>>> 
>>>    ret = ath9k_hw_process_rxdesc_edma(ah, NULL, skb->data);
>>> -    if (ret == -EINPROGRESS) {
>>> -        /*let device gain the buffer again*/
>>> -        dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
>>> -                common->rx_bufsize, DMA_FROM_DEVICE);
>>> +    if (ret == -EINPROGRESS)
>>>        return false;
>>> -    }
>>> 
>>>    __skb_unlink(skb,&rx_edma->rx_fifo);
>>>    if (ret == -EINVAL) {
>> I have strong doubts about this change. On most MIPS devices,
>> dma_sync_single_for_cpu is a no-op, whereas
>> dma_sync_single_for_device flushes the cache range. With this
>> change, the CPU could cache the DMA status part behind skb->data and
>> that cache entry would not be flushed inbetween calls to this
>> functions on the same buffer, likely leading to rx stalls.
> 
> You're suggesting a platform implementation bug then. If the platform is not
> cache-coherent, it should invalidate relevant CPU cache lines for sync_to_cpu
> and unmap cases. Do other devices show such symptoms on MIPS systems?
> 
> I'm not familiar with the platform internals, so we should ask MIPS people.
I only mentioned MIPS to describe the potential side effect of this change. From my current understanding of the DMA API, it would be wrong on other platforms as well. I believe the _for_device function needs to be used to transfer ownership of the buffer back to the device, before calling _for_cpu again later for another read.
This is definitely required in this case, because when the return code is -EINPROGRESS, the driver waits for the hardware to complete this buffer, and the next call has to fetch the memory again after the device has updated it.
This is handled properly by the current code without your change.

- Felix
_______________________________________________
ath9k-devel mailing list
ath9k-devel@lists.ath9k.org
https://lists.ath9k.org/mailman/listinfo/ath9k-devel

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [ath9k-devel] [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
@ 2011-07-12 12:54         ` Felix Fietkau
  0 siblings, 0 replies; 76+ messages in thread
From: Felix Fietkau @ 2011-07-12 12:54 UTC (permalink / raw)
  To: ath9k-devel

On 12.07.2011, at 17:55, Micha? Miros?aw <mirq-linux@rere.qmqm.pl> wrote:

> On Tue, Jul 12, 2011 at 12:36:06PM +0800, Felix Fietkau wrote:
>> On 2011-07-11 8:52 AM, Micha? Miros?aw wrote:
>>> Also constify buf_addr for ath9k_hw_process_rxdesc_edma() to verify
>>> assumptions --- dma_sync_single_for_device() call can be removed.
>>> 
>>> Signed-off-by: Micha? Miros?aw<mirq-linux@rere.qmqm.pl>
>>> ---
>>> drivers/net/wireless/ath/ath9k/ar9003_mac.c |    4 ++--
>>> drivers/net/wireless/ath/ath9k/ar9003_mac.h |    2 +-
>>> drivers/net/wireless/ath/ath9k/recv.c       |   10 +++-------
>>> 3 files changed, 6 insertions(+), 10 deletions(-)
>>> 
>>> diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
>>> index 70dc8ec..c5f46d5 100644
>>> --- a/drivers/net/wireless/ath/ath9k/recv.c
>>> +++ b/drivers/net/wireless/ath/ath9k/recv.c
>>> @@ -684,15 +684,11 @@ static bool ath_edma_get_buffers(struct ath_softc *sc,
>>>    BUG_ON(!bf);
>>> 
>>>    dma_sync_single_for_cpu(sc->dev, bf->bf_buf_addr,
>>> -                common->rx_bufsize, DMA_FROM_DEVICE);
>>> +                common->rx_bufsize, DMA_BIDIRECTIONAL);
>>> 
>>>    ret = ath9k_hw_process_rxdesc_edma(ah, NULL, skb->data);
>>> -    if (ret == -EINPROGRESS) {
>>> -        /*let device gain the buffer again*/
>>> -        dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
>>> -                common->rx_bufsize, DMA_FROM_DEVICE);
>>> +    if (ret == -EINPROGRESS)
>>>        return false;
>>> -    }
>>> 
>>>    __skb_unlink(skb,&rx_edma->rx_fifo);
>>>    if (ret == -EINVAL) {
>> I have strong doubts about this change. On most MIPS devices,
>> dma_sync_single_for_cpu is a no-op, whereas
>> dma_sync_single_for_device flushes the cache range. With this
>> change, the CPU could cache the DMA status part behind skb->data and
>> that cache entry would not be flushed inbetween calls to this
>> functions on the same buffer, likely leading to rx stalls.
> 
> You're suggesting a platform implementation bug then. If the platform is not
> cache-coherent, it should invalidate relevant CPU cache lines for sync_to_cpu
> and unmap cases. Do other devices show such symptoms on MIPS systems?
> 
> I'm not familiar with the platform internals, so we should ask MIPS people.
I only mentioned MIPS to describe the potential side effect of this change. From my current understanding of the DMA API, it would be wrong on other platforms as well. I believe the _for_device function needs to be used to transfer ownership of the buffer back to the device, before calling _for_cpu again later for another read.
This is definitely required in this case, because when the return code is -EINPROGRESS, the driver waits for the hardware to complete this buffer, and the next call has to fetch the memory again after the device has updated it.
This is handled properly by the current code without your change.

- Felix

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [ath9k-devel] [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
@ 2011-07-12 13:03           ` Michał Mirosław
  0 siblings, 0 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-12 13:03 UTC (permalink / raw)
  To: Felix Fietkau
  Cc: Felix Fietkau, netdev, linux-wireless, Jouni Malinen,
	Senthil Balasubramanian, ath9k-devel, Vasanthakumar Thiagarajan,
	Ralf Baechle, linux-mips

On Tue, Jul 12, 2011 at 08:54:32PM +0800, Felix Fietkau wrote:
> On 12.07.2011, at 17:55, Michał Mirosław <mirq-linux@rere.qmqm.pl> wrote:
> 
> > On Tue, Jul 12, 2011 at 12:36:06PM +0800, Felix Fietkau wrote:
> >> On 2011-07-11 8:52 AM, Michał Mirosław wrote:
> >>> Also constify buf_addr for ath9k_hw_process_rxdesc_edma() to verify
> >>> assumptions --- dma_sync_single_for_device() call can be removed.
> >>> 
> >>> Signed-off-by: Michał Mirosław<mirq-linux@rere.qmqm.pl>
> >>> ---
> >>> drivers/net/wireless/ath/ath9k/ar9003_mac.c |    4 ++--
> >>> drivers/net/wireless/ath/ath9k/ar9003_mac.h |    2 +-
> >>> drivers/net/wireless/ath/ath9k/recv.c       |   10 +++-------
> >>> 3 files changed, 6 insertions(+), 10 deletions(-)
> >>> 
> >>> diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
> >>> index 70dc8ec..c5f46d5 100644
> >>> --- a/drivers/net/wireless/ath/ath9k/recv.c
> >>> +++ b/drivers/net/wireless/ath/ath9k/recv.c
> >>> @@ -684,15 +684,11 @@ static bool ath_edma_get_buffers(struct ath_softc *sc,
> >>>    BUG_ON(!bf);
> >>> 
> >>>    dma_sync_single_for_cpu(sc->dev, bf->bf_buf_addr,
> >>> -                common->rx_bufsize, DMA_FROM_DEVICE);
> >>> +                common->rx_bufsize, DMA_BIDIRECTIONAL);
> >>> 
> >>>    ret = ath9k_hw_process_rxdesc_edma(ah, NULL, skb->data);
> >>> -    if (ret == -EINPROGRESS) {
> >>> -        /*let device gain the buffer again*/
> >>> -        dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
> >>> -                common->rx_bufsize, DMA_FROM_DEVICE);
> >>> +    if (ret == -EINPROGRESS)
> >>>        return false;
> >>> -    }
> >>> 
> >>>    __skb_unlink(skb,&rx_edma->rx_fifo);
> >>>    if (ret == -EINVAL) {
> >> I have strong doubts about this change. On most MIPS devices,
> >> dma_sync_single_for_cpu is a no-op, whereas
> >> dma_sync_single_for_device flushes the cache range. With this
> >> change, the CPU could cache the DMA status part behind skb->data and
> >> that cache entry would not be flushed inbetween calls to this
> >> functions on the same buffer, likely leading to rx stalls.
> > 
> > You're suggesting a platform implementation bug then. If the platform is not
> > cache-coherent, it should invalidate relevant CPU cache lines for sync_to_cpu
> > and unmap cases. Do other devices show such symptoms on MIPS systems?
> > 
> > I'm not familiar with the platform internals, so we should ask MIPS people.
> I only mentioned MIPS to describe the potential side effect of this change. From my current understanding of the DMA API, it would be wrong on other platforms as well. I believe the _for_device function needs to be used to transfer ownership of the buffer back to the device, before calling _for_cpu again later for another read.

What you're saying reminds the wording in DMA-API-HOWTO.txt that I find
wrong (or at least misleading) compared to what DMA-API.txt describes.
DMA sync calls do not transfer the ownership of the buffer - they are
cache synchronization points, ownership passing is handled entirely by
the driver.

> This is definitely required in this case, because when the return code is -EINPROGRESS, the driver waits for the hardware to complete this buffer, and the next call has to fetch the memory again after the device has updated it.

Correctness of this access should be provided by sync_to_cpu() call.

Best Regards,
Michał Mirosław

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [ath9k-devel] [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
@ 2011-07-12 13:03           ` Michał Mirosław
  0 siblings, 0 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-12 13:03 UTC (permalink / raw)
  To: Felix Fietkau
  Cc: Felix Fietkau, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-wireless-u79uwXL29TY76Z2rM5mHXA, Jouni Malinen,
	Senthil Balasubramanian, ath9k-devel-xDcbHBWguxHbcTqmT+pZeQ,
	Vasanthakumar Thiagarajan, Ralf Baechle,
	linux-mips-6z/3iImG2C8G8FEW9MqTrA

On Tue, Jul 12, 2011 at 08:54:32PM +0800, Felix Fietkau wrote:
> On 12.07.2011, at 17:55, Michał Mirosław <mirq-linux-CoA6ZxLDdyEEUmgCuDUIdw@public.gmane.org> wrote:
> 
> > On Tue, Jul 12, 2011 at 12:36:06PM +0800, Felix Fietkau wrote:
> >> On 2011-07-11 8:52 AM, Michał Mirosław wrote:
> >>> Also constify buf_addr for ath9k_hw_process_rxdesc_edma() to verify
> >>> assumptions --- dma_sync_single_for_device() call can be removed.
> >>> 
> >>> Signed-off-by: Michał Mirosław<mirq-linux-CoA6ZxLDdyEEUmgCuDUIdw@public.gmane.org>
> >>> ---
> >>> drivers/net/wireless/ath/ath9k/ar9003_mac.c |    4 ++--
> >>> drivers/net/wireless/ath/ath9k/ar9003_mac.h |    2 +-
> >>> drivers/net/wireless/ath/ath9k/recv.c       |   10 +++-------
> >>> 3 files changed, 6 insertions(+), 10 deletions(-)
> >>> 
> >>> diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
> >>> index 70dc8ec..c5f46d5 100644
> >>> --- a/drivers/net/wireless/ath/ath9k/recv.c
> >>> +++ b/drivers/net/wireless/ath/ath9k/recv.c
> >>> @@ -684,15 +684,11 @@ static bool ath_edma_get_buffers(struct ath_softc *sc,
> >>>    BUG_ON(!bf);
> >>> 
> >>>    dma_sync_single_for_cpu(sc->dev, bf->bf_buf_addr,
> >>> -                common->rx_bufsize, DMA_FROM_DEVICE);
> >>> +                common->rx_bufsize, DMA_BIDIRECTIONAL);
> >>> 
> >>>    ret = ath9k_hw_process_rxdesc_edma(ah, NULL, skb->data);
> >>> -    if (ret == -EINPROGRESS) {
> >>> -        /*let device gain the buffer again*/
> >>> -        dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
> >>> -                common->rx_bufsize, DMA_FROM_DEVICE);
> >>> +    if (ret == -EINPROGRESS)
> >>>        return false;
> >>> -    }
> >>> 
> >>>    __skb_unlink(skb,&rx_edma->rx_fifo);
> >>>    if (ret == -EINVAL) {
> >> I have strong doubts about this change. On most MIPS devices,
> >> dma_sync_single_for_cpu is a no-op, whereas
> >> dma_sync_single_for_device flushes the cache range. With this
> >> change, the CPU could cache the DMA status part behind skb->data and
> >> that cache entry would not be flushed inbetween calls to this
> >> functions on the same buffer, likely leading to rx stalls.
> > 
> > You're suggesting a platform implementation bug then. If the platform is not
> > cache-coherent, it should invalidate relevant CPU cache lines for sync_to_cpu
> > and unmap cases. Do other devices show such symptoms on MIPS systems?
> > 
> > I'm not familiar with the platform internals, so we should ask MIPS people.
> I only mentioned MIPS to describe the potential side effect of this change. From my current understanding of the DMA API, it would be wrong on other platforms as well. I believe the _for_device function needs to be used to transfer ownership of the buffer back to the device, before calling _for_cpu again later for another read.

What you're saying reminds the wording in DMA-API-HOWTO.txt that I find
wrong (or at least misleading) compared to what DMA-API.txt describes.
DMA sync calls do not transfer the ownership of the buffer - they are
cache synchronization points, ownership passing is handled entirely by
the driver.

> This is definitely required in this case, because when the return code is -EINPROGRESS, the driver waits for the hardware to complete this buffer, and the next call has to fetch the memory again after the device has updated it.

Correctness of this access should be provided by sync_to_cpu() call.

Best Regards,
Michał Mirosław
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [ath9k-devel] [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
@ 2011-07-12 13:03           ` Michał Mirosław
  0 siblings, 0 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-12 13:03 UTC (permalink / raw)
  To: ath9k-devel

On Tue, Jul 12, 2011 at 08:54:32PM +0800, Felix Fietkau wrote:
> On 12.07.2011, at 17:55, Micha? Miros?aw <mirq-linux@rere.qmqm.pl> wrote:
> 
> > On Tue, Jul 12, 2011 at 12:36:06PM +0800, Felix Fietkau wrote:
> >> On 2011-07-11 8:52 AM, Micha? Miros?aw wrote:
> >>> Also constify buf_addr for ath9k_hw_process_rxdesc_edma() to verify
> >>> assumptions --- dma_sync_single_for_device() call can be removed.
> >>> 
> >>> Signed-off-by: Micha? Miros?aw<mirq-linux@rere.qmqm.pl>
> >>> ---
> >>> drivers/net/wireless/ath/ath9k/ar9003_mac.c |    4 ++--
> >>> drivers/net/wireless/ath/ath9k/ar9003_mac.h |    2 +-
> >>> drivers/net/wireless/ath/ath9k/recv.c       |   10 +++-------
> >>> 3 files changed, 6 insertions(+), 10 deletions(-)
> >>> 
> >>> diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
> >>> index 70dc8ec..c5f46d5 100644
> >>> --- a/drivers/net/wireless/ath/ath9k/recv.c
> >>> +++ b/drivers/net/wireless/ath/ath9k/recv.c
> >>> @@ -684,15 +684,11 @@ static bool ath_edma_get_buffers(struct ath_softc *sc,
> >>>    BUG_ON(!bf);
> >>> 
> >>>    dma_sync_single_for_cpu(sc->dev, bf->bf_buf_addr,
> >>> -                common->rx_bufsize, DMA_FROM_DEVICE);
> >>> +                common->rx_bufsize, DMA_BIDIRECTIONAL);
> >>> 
> >>>    ret = ath9k_hw_process_rxdesc_edma(ah, NULL, skb->data);
> >>> -    if (ret == -EINPROGRESS) {
> >>> -        /*let device gain the buffer again*/
> >>> -        dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
> >>> -                common->rx_bufsize, DMA_FROM_DEVICE);
> >>> +    if (ret == -EINPROGRESS)
> >>>        return false;
> >>> -    }
> >>> 
> >>>    __skb_unlink(skb,&rx_edma->rx_fifo);
> >>>    if (ret == -EINVAL) {
> >> I have strong doubts about this change. On most MIPS devices,
> >> dma_sync_single_for_cpu is a no-op, whereas
> >> dma_sync_single_for_device flushes the cache range. With this
> >> change, the CPU could cache the DMA status part behind skb->data and
> >> that cache entry would not be flushed inbetween calls to this
> >> functions on the same buffer, likely leading to rx stalls.
> > 
> > You're suggesting a platform implementation bug then. If the platform is not
> > cache-coherent, it should invalidate relevant CPU cache lines for sync_to_cpu
> > and unmap cases. Do other devices show such symptoms on MIPS systems?
> > 
> > I'm not familiar with the platform internals, so we should ask MIPS people.
> I only mentioned MIPS to describe the potential side effect of this change. From my current understanding of the DMA API, it would be wrong on other platforms as well. I believe the _for_device function needs to be used to transfer ownership of the buffer back to the device, before calling _for_cpu again later for another read.

What you're saying reminds the wording in DMA-API-HOWTO.txt that I find
wrong (or at least misleading) compared to what DMA-API.txt describes.
DMA sync calls do not transfer the ownership of the buffer - they are
cache synchronization points, ownership passing is handled entirely by
the driver.

> This is definitely required in this case, because when the return code is -EINPROGRESS, the driver waits for the hardware to complete this buffer, and the next call has to fetch the memory again after the device has updated it.

Correctness of this access should be provided by sync_to_cpu() call.

Best Regards,
Micha? Miros?aw

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [ath9k-devel] [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
@ 2011-07-12 14:21             ` Felix Fietkau
  0 siblings, 0 replies; 76+ messages in thread
From: Felix Fietkau @ 2011-07-12 14:21 UTC (permalink / raw)
  To: Michał Mirosław
  Cc: Felix Fietkau, netdev, linux-wireless, Jouni Malinen,
	Senthil Balasubramanian, ath9k-devel, Vasanthakumar Thiagarajan,
	Ralf Baechle, linux-mips

On 12.07.2011, at 21:03, Michał Mirosław <mirq-linux@rere.qmqm.pl> wrote:

> On Tue, Jul 12, 2011 at 08:54:32PM +0800, Felix Fietkau wrote:
>> On 12.07.2011, at 17:55, Michał Mirosław <mirq-linux@rere.qmqm.pl> wrote:
>> 
>>> On Tue, Jul 12, 2011 at 12:36:06PM +0800, Felix Fietkau wrote:
>>>> On 2011-07-11 8:52 AM, Michał Mirosław wrote:
>>>>> Also constify buf_addr for ath9k_hw_process_rxdesc_edma() to verify
>>>>> assumptions --- dma_sync_single_for_device() call can be removed.
>>>>> 
>>>>> Signed-off-by: Michał Mirosław<mirq-linux@rere.qmqm.pl>
>>>>> ---
>>>>> drivers/net/wireless/ath/ath9k/ar9003_mac.c |    4 ++--
>>>>> drivers/net/wireless/ath/ath9k/ar9003_mac.h |    2 +-
>>>>> drivers/net/wireless/ath/ath9k/recv.c       |   10 +++-------
>>>>> 3 files changed, 6 insertions(+), 10 deletions(-)
>>>>> 
>>>>> diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
>>>>> index 70dc8ec..c5f46d5 100644
>>>>> --- a/drivers/net/wireless/ath/ath9k/recv.c
>>>>> +++ b/drivers/net/wireless/ath/ath9k/recv.c
>>>>> @@ -684,15 +684,11 @@ static bool ath_edma_get_buffers(struct ath_softc *sc,
>>>>>   BUG_ON(!bf);
>>>>> 
>>>>>   dma_sync_single_for_cpu(sc->dev, bf->bf_buf_addr,
>>>>> -                common->rx_bufsize, DMA_FROM_DEVICE);
>>>>> +                common->rx_bufsize, DMA_BIDIRECTIONAL);
>>>>> 
>>>>>   ret = ath9k_hw_process_rxdesc_edma(ah, NULL, skb->data);
>>>>> -    if (ret == -EINPROGRESS) {
>>>>> -        /*let device gain the buffer again*/
>>>>> -        dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
>>>>> -                common->rx_bufsize, DMA_FROM_DEVICE);
>>>>> +    if (ret == -EINPROGRESS)
>>>>>       return false;
>>>>> -    }
>>>>> 
>>>>>   __skb_unlink(skb,&rx_edma->rx_fifo);
>>>>>   if (ret == -EINVAL) {
>>>> I have strong doubts about this change. On most MIPS devices,
>>>> dma_sync_single_for_cpu is a no-op, whereas
>>>> dma_sync_single_for_device flushes the cache range. With this
>>>> change, the CPU could cache the DMA status part behind skb->data and
>>>> that cache entry would not be flushed inbetween calls to this
>>>> functions on the same buffer, likely leading to rx stalls.
>>> 
>>> You're suggesting a platform implementation bug then. If the platform is not
>>> cache-coherent, it should invalidate relevant CPU cache lines for sync_to_cpu
>>> and unmap cases. Do other devices show such symptoms on MIPS systems?
>>> 
>>> I'm not familiar with the platform internals, so we should ask MIPS people.
>> I only mentioned MIPS to describe the potential side effect of this change. From my current understanding of the DMA API, it would be wrong on other platforms as well. I believe the _for_device function needs to be used to transfer ownership of the buffer back to the device, before calling _for_cpu again later for another read.
> 
> What you're saying reminds the wording in DMA-API-HOWTO.txt that I find
> wrong (or at least misleading) compared to what DMA-API.txt describes.
> DMA sync calls do not transfer the ownership of the buffer - they are
> cache synchronization points, ownership passing is handled entirely by
> the driver.
What I meant was that the DMA sync calls reflect the ownership transfer of the memory regions. In this case ownership is transferred between device and CPU multiple times and the code reflects that.

> 
>> This is definitely required in this case, because when the return code is -EINPROGRESS, the driver waits for the hardware to complete this buffer, and the next call has to fetch the memory again after the device has updated it.
> 
> Correctness of this access should be provided by sync_to_cpu() call.
At least in MIPS I'm sure it isn't. If I remember correctly, it also isn't on ARM, so I'm pretty sure that either your understanding of the API is incorrect, or arch code does not implement it properly. In either case, this change (and probably also the p54 one) should not be merged.

- Felix

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [ath9k-devel] [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
@ 2011-07-12 14:21             ` Felix Fietkau
  0 siblings, 0 replies; 76+ messages in thread
From: Felix Fietkau @ 2011-07-12 14:21 UTC (permalink / raw)
  To: Michał Mirosław
  Cc: Felix Fietkau, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-wireless-u79uwXL29TY76Z2rM5mHXA, Jouni Malinen,
	Senthil Balasubramanian, ath9k-devel-xDcbHBWguxHbcTqmT+pZeQ,
	Vasanthakumar Thiagarajan, Ralf Baechle,
	linux-mips-6z/3iImG2C8G8FEW9MqTrA

On 12.07.2011, at 21:03, Michał Mirosław <mirq-linux-CoA6ZxLDdyE@public.gmane.orgm.pl> wrote:

> On Tue, Jul 12, 2011 at 08:54:32PM +0800, Felix Fietkau wrote:
>> On 12.07.2011, at 17:55, Michał Mirosław <mirq-linux@rere.qmqm.pl> wrote:
>> 
>>> On Tue, Jul 12, 2011 at 12:36:06PM +0800, Felix Fietkau wrote:
>>>> On 2011-07-11 8:52 AM, Michał Mirosław wrote:
>>>>> Also constify buf_addr for ath9k_hw_process_rxdesc_edma() to verify
>>>>> assumptions --- dma_sync_single_for_device() call can be removed.
>>>>> 
>>>>> Signed-off-by: Michał Mirosław<mirq-linux-CoA6ZxLDdyEEUmgCuDUIdw@public.gmane.org>
>>>>> ---
>>>>> drivers/net/wireless/ath/ath9k/ar9003_mac.c |    4 ++--
>>>>> drivers/net/wireless/ath/ath9k/ar9003_mac.h |    2 +-
>>>>> drivers/net/wireless/ath/ath9k/recv.c       |   10 +++-------
>>>>> 3 files changed, 6 insertions(+), 10 deletions(-)
>>>>> 
>>>>> diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
>>>>> index 70dc8ec..c5f46d5 100644
>>>>> --- a/drivers/net/wireless/ath/ath9k/recv.c
>>>>> +++ b/drivers/net/wireless/ath/ath9k/recv.c
>>>>> @@ -684,15 +684,11 @@ static bool ath_edma_get_buffers(struct ath_softc *sc,
>>>>>   BUG_ON(!bf);
>>>>> 
>>>>>   dma_sync_single_for_cpu(sc->dev, bf->bf_buf_addr,
>>>>> -                common->rx_bufsize, DMA_FROM_DEVICE);
>>>>> +                common->rx_bufsize, DMA_BIDIRECTIONAL);
>>>>> 
>>>>>   ret = ath9k_hw_process_rxdesc_edma(ah, NULL, skb->data);
>>>>> -    if (ret == -EINPROGRESS) {
>>>>> -        /*let device gain the buffer again*/
>>>>> -        dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
>>>>> -                common->rx_bufsize, DMA_FROM_DEVICE);
>>>>> +    if (ret == -EINPROGRESS)
>>>>>       return false;
>>>>> -    }
>>>>> 
>>>>>   __skb_unlink(skb,&rx_edma->rx_fifo);
>>>>>   if (ret == -EINVAL) {
>>>> I have strong doubts about this change. On most MIPS devices,
>>>> dma_sync_single_for_cpu is a no-op, whereas
>>>> dma_sync_single_for_device flushes the cache range. With this
>>>> change, the CPU could cache the DMA status part behind skb->data and
>>>> that cache entry would not be flushed inbetween calls to this
>>>> functions on the same buffer, likely leading to rx stalls.
>>> 
>>> You're suggesting a platform implementation bug then. If the platform is not
>>> cache-coherent, it should invalidate relevant CPU cache lines for sync_to_cpu
>>> and unmap cases. Do other devices show such symptoms on MIPS systems?
>>> 
>>> I'm not familiar with the platform internals, so we should ask MIPS people.
>> I only mentioned MIPS to describe the potential side effect of this change. From my current understanding of the DMA API, it would be wrong on other platforms as well. I believe the _for_device function needs to be used to transfer ownership of the buffer back to the device, before calling _for_cpu again later for another read.
> 
> What you're saying reminds the wording in DMA-API-HOWTO.txt that I find
> wrong (or at least misleading) compared to what DMA-API.txt describes.
> DMA sync calls do not transfer the ownership of the buffer - they are
> cache synchronization points, ownership passing is handled entirely by
> the driver.
What I meant was that the DMA sync calls reflect the ownership transfer of the memory regions. In this case ownership is transferred between device and CPU multiple times and the code reflects that.

> 
>> This is definitely required in this case, because when the return code is -EINPROGRESS, the driver waits for the hardware to complete this buffer, and the next call has to fetch the memory again after the device has updated it.
> 
> Correctness of this access should be provided by sync_to_cpu() call.
At least in MIPS I'm sure it isn't. If I remember correctly, it also isn't on ARM, so I'm pretty sure that either your understanding of the API is incorrect, or arch code does not implement it properly. In either case, this change (and probably also the p54 one) should not be merged.

- Felix--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [ath9k-devel] [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
@ 2011-07-12 14:21             ` Felix Fietkau
  0 siblings, 0 replies; 76+ messages in thread
From: Felix Fietkau @ 2011-07-12 14:21 UTC (permalink / raw)
  To: ath9k-devel

On 12.07.2011, at 21:03, Micha? Miros?aw <mirq-linux@rere.qmqm.pl> wrote:

> On Tue, Jul 12, 2011 at 08:54:32PM +0800, Felix Fietkau wrote:
>> On 12.07.2011, at 17:55, Micha? Miros?aw <mirq-linux@rere.qmqm.pl> wrote:
>> 
>>> On Tue, Jul 12, 2011 at 12:36:06PM +0800, Felix Fietkau wrote:
>>>> On 2011-07-11 8:52 AM, Micha? Miros?aw wrote:
>>>>> Also constify buf_addr for ath9k_hw_process_rxdesc_edma() to verify
>>>>> assumptions --- dma_sync_single_for_device() call can be removed.
>>>>> 
>>>>> Signed-off-by: Micha? Miros?aw<mirq-linux@rere.qmqm.pl>
>>>>> ---
>>>>> drivers/net/wireless/ath/ath9k/ar9003_mac.c |    4 ++--
>>>>> drivers/net/wireless/ath/ath9k/ar9003_mac.h |    2 +-
>>>>> drivers/net/wireless/ath/ath9k/recv.c       |   10 +++-------
>>>>> 3 files changed, 6 insertions(+), 10 deletions(-)
>>>>> 
>>>>> diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
>>>>> index 70dc8ec..c5f46d5 100644
>>>>> --- a/drivers/net/wireless/ath/ath9k/recv.c
>>>>> +++ b/drivers/net/wireless/ath/ath9k/recv.c
>>>>> @@ -684,15 +684,11 @@ static bool ath_edma_get_buffers(struct ath_softc *sc,
>>>>>   BUG_ON(!bf);
>>>>> 
>>>>>   dma_sync_single_for_cpu(sc->dev, bf->bf_buf_addr,
>>>>> -                common->rx_bufsize, DMA_FROM_DEVICE);
>>>>> +                common->rx_bufsize, DMA_BIDIRECTIONAL);
>>>>> 
>>>>>   ret = ath9k_hw_process_rxdesc_edma(ah, NULL, skb->data);
>>>>> -    if (ret == -EINPROGRESS) {
>>>>> -        /*let device gain the buffer again*/
>>>>> -        dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
>>>>> -                common->rx_bufsize, DMA_FROM_DEVICE);
>>>>> +    if (ret == -EINPROGRESS)
>>>>>       return false;
>>>>> -    }
>>>>> 
>>>>>   __skb_unlink(skb,&rx_edma->rx_fifo);
>>>>>   if (ret == -EINVAL) {
>>>> I have strong doubts about this change. On most MIPS devices,
>>>> dma_sync_single_for_cpu is a no-op, whereas
>>>> dma_sync_single_for_device flushes the cache range. With this
>>>> change, the CPU could cache the DMA status part behind skb->data and
>>>> that cache entry would not be flushed inbetween calls to this
>>>> functions on the same buffer, likely leading to rx stalls.
>>> 
>>> You're suggesting a platform implementation bug then. If the platform is not
>>> cache-coherent, it should invalidate relevant CPU cache lines for sync_to_cpu
>>> and unmap cases. Do other devices show such symptoms on MIPS systems?
>>> 
>>> I'm not familiar with the platform internals, so we should ask MIPS people.
>> I only mentioned MIPS to describe the potential side effect of this change. From my current understanding of the DMA API, it would be wrong on other platforms as well. I believe the _for_device function needs to be used to transfer ownership of the buffer back to the device, before calling _for_cpu again later for another read.
> 
> What you're saying reminds the wording in DMA-API-HOWTO.txt that I find
> wrong (or at least misleading) compared to what DMA-API.txt describes.
> DMA sync calls do not transfer the ownership of the buffer - they are
> cache synchronization points, ownership passing is handled entirely by
> the driver.
What I meant was that the DMA sync calls reflect the ownership transfer of the memory regions. In this case ownership is transferred between device and CPU multiple times and the code reflects that.

> 
>> This is definitely required in this case, because when the return code is -EINPROGRESS, the driver waits for the hardware to complete this buffer, and the next call has to fetch the memory again after the device has updated it.
> 
> Correctness of this access should be provided by sync_to_cpu() call.
At least in MIPS I'm sure it isn't. If I remember correctly, it also isn't on ARM, so I'm pretty sure that either your understanding of the API is incorrect, or arch code does not implement it properly. In either case, this change (and probably also the p54 one) should not be merged.

- Felix

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH net-next-2.6] net: introduce build_skb()
  2011-07-11  5:46   ` [PATCH net-next-2.6] net: introduce build_skb() Eric Dumazet
  2011-07-11 10:53     ` Michał Mirosław
@ 2011-07-12 15:40     ` Eric Dumazet
  2011-07-12 15:54       ` Michał Mirosław
  1 sibling, 1 reply; 76+ messages in thread
From: Eric Dumazet @ 2011-07-12 15:40 UTC (permalink / raw)
  To: Michał Mirosław, David Miller; +Cc: netdev

Le lundi 11 juillet 2011 à 07:46 +0200, Eric Dumazet a écrit :

> [PATCH] net: introduce build_skb()
> 
> One of the thing we discussed during netdev 2011 conference was the idea
> to change network drivers to allocate/populate their skb at RX
> completion time, right before feeding the skb to network stack.
> 
> Right now, we allocate skbs when populating the RX ring, and thats a
> waste of CPU cache, since allocating skb means a full memset() to clear
> the skb and its skb_shared_info portion. By the time NIC fills a frame
> in data buffer and host can get it, cpu probably threw away the cache
> lines from its caches, because of huge RX ring sizes.
> 
> So the deal would be to allocate only the data buffer for the NIC to
> populate its RX ring buffer. And use build_skb() at RX completion to
> attach a data buffer (now filled with an ethernet frame) to a new skb,
> initialize the skb_shared_info portion, and give the hot skb to network
> stack.

Update :

First results are impressive : About 15% of throughput increase with igb
driver on my small desktop machine, and I am limited by the wire
speed :)

(AMD Athlon(tm) II X2 B24 Processor, 3GHz, cache size : 1024K)

setup : One dual port Intel card : Ethernet controller: Intel
Corporation 82576 Gigabit Network Connection (rev 01)

eth1 direct attach on eth2, Gigabit speed.
eth2 RX ring set to 4096 slots (default is 256)

CPU0 : pktgen sending on eth1, line rate (1488137pps)
CPU1 : receive eth2 interrupts, packets dropped into raw netfilter table
to bypass upper stacks.

Before patch : 15% packet losses, ksoftirqd/1 using 100% of cpu
After patch : residual losses (less than 0.1 %), ksoftirqd not used, 80%
cpu used 

I'll do more tests with a 10Gb card (ixgbe driver) to not be wire
limited.




^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [PATCH net-next-2.6] net: introduce build_skb()
  2011-07-12 15:40     ` Eric Dumazet
@ 2011-07-12 15:54       ` Michał Mirosław
  0 siblings, 0 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-12 15:54 UTC (permalink / raw)
  To: Eric Dumazet; +Cc: David Miller, netdev

On Tue, Jul 12, 2011 at 05:40:16PM +0200, Eric Dumazet wrote:
> Le lundi 11 juillet 2011 à 07:46 +0200, Eric Dumazet a écrit :
> 
> > [PATCH] net: introduce build_skb()
> > 
> > One of the thing we discussed during netdev 2011 conference was the idea
> > to change network drivers to allocate/populate their skb at RX
> > completion time, right before feeding the skb to network stack.
> > 
> > Right now, we allocate skbs when populating the RX ring, and thats a
> > waste of CPU cache, since allocating skb means a full memset() to clear
> > the skb and its skb_shared_info portion. By the time NIC fills a frame
> > in data buffer and host can get it, cpu probably threw away the cache
> > lines from its caches, because of huge RX ring sizes.
> > 
> > So the deal would be to allocate only the data buffer for the NIC to
> > populate its RX ring buffer. And use build_skb() at RX completion to
> > attach a data buffer (now filled with an ethernet frame) to a new skb,
> > initialize the skb_shared_info portion, and give the hot skb to network
> > stack.
> 
> Update :
> 
> First results are impressive : About 15% of throughput increase with igb
> driver on my small desktop machine, and I am limited by the wire
> speed :)
> 
> (AMD Athlon(tm) II X2 B24 Processor, 3GHz, cache size : 1024K)
> 
> setup : One dual port Intel card : Ethernet controller: Intel
> Corporation 82576 Gigabit Network Connection (rev 01)
> 
> eth1 direct attach on eth2, Gigabit speed.
> eth2 RX ring set to 4096 slots (default is 256)
> 
> CPU0 : pktgen sending on eth1, line rate (1488137pps)
> CPU1 : receive eth2 interrupts, packets dropped into raw netfilter table
> to bypass upper stacks.
> 
> Before patch : 15% packet losses, ksoftirqd/1 using 100% of cpu
> After patch : residual losses (less than 0.1 %), ksoftirqd not used, 80%
> cpu used 
> 
> I'll do more tests with a 10Gb card (ixgbe driver) to not be wire
> limited.

I remember observing similar increase after switching from allocating skb
to allocating pages and using napi_get_frags() + napi_gro_frags(). That
was with sl351x driver posted for review some time ago.

Best Regards,
Michał Mirosław

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [ath9k-devel] [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
  2011-07-12 14:21             ` Felix Fietkau
@ 2011-07-12 15:58               ` Michał Mirosław
  -1 siblings, 0 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-12 15:58 UTC (permalink / raw)
  To: Felix Fietkau
  Cc: Felix Fietkau, netdev, linux-wireless, Jouni Malinen,
	Senthil Balasubramanian, ath9k-devel, Vasanthakumar Thiagarajan,
	Ralf Baechle, linux-mips

On Tue, Jul 12, 2011 at 10:21:05PM +0800, Felix Fietkau wrote:
> On 12.07.2011, at 21:03, Michał Mirosław <mirq-linux@rere.qmqm.pl> wrote:
> 
> > On Tue, Jul 12, 2011 at 08:54:32PM +0800, Felix Fietkau wrote:
> >> On 12.07.2011, at 17:55, Michał Mirosław <mirq-linux@rere.qmqm.pl> wrote:
> >> 
> >>> On Tue, Jul 12, 2011 at 12:36:06PM +0800, Felix Fietkau wrote:
> >>>> On 2011-07-11 8:52 AM, Michał Mirosław wrote:
> >>>>> Also constify buf_addr for ath9k_hw_process_rxdesc_edma() to verify
> >>>>> assumptions --- dma_sync_single_for_device() call can be removed.
> >>>>> 
> >>>>> Signed-off-by: Michał Mirosław<mirq-linux@rere.qmqm.pl>
> >>>>> ---
> >>>>> drivers/net/wireless/ath/ath9k/ar9003_mac.c |    4 ++--
> >>>>> drivers/net/wireless/ath/ath9k/ar9003_mac.h |    2 +-
> >>>>> drivers/net/wireless/ath/ath9k/recv.c       |   10 +++-------
> >>>>> 3 files changed, 6 insertions(+), 10 deletions(-)
> >>>>> 
> >>>>> diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
> >>>>> index 70dc8ec..c5f46d5 100644
> >>>>> --- a/drivers/net/wireless/ath/ath9k/recv.c
> >>>>> +++ b/drivers/net/wireless/ath/ath9k/recv.c
> >>>>> @@ -684,15 +684,11 @@ static bool ath_edma_get_buffers(struct ath_softc *sc,
> >>>>>   BUG_ON(!bf);
> >>>>> 
> >>>>>   dma_sync_single_for_cpu(sc->dev, bf->bf_buf_addr,
> >>>>> -                common->rx_bufsize, DMA_FROM_DEVICE);
> >>>>> +                common->rx_bufsize, DMA_BIDIRECTIONAL);
> >>>>> 
> >>>>>   ret = ath9k_hw_process_rxdesc_edma(ah, NULL, skb->data);
> >>>>> -    if (ret == -EINPROGRESS) {
> >>>>> -        /*let device gain the buffer again*/
> >>>>> -        dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
> >>>>> -                common->rx_bufsize, DMA_FROM_DEVICE);
> >>>>> +    if (ret == -EINPROGRESS)
> >>>>>       return false;
> >>>>> -    }
> >>>>> 
> >>>>>   __skb_unlink(skb,&rx_edma->rx_fifo);
> >>>>>   if (ret == -EINVAL) {
> >>>> I have strong doubts about this change. On most MIPS devices,
> >>>> dma_sync_single_for_cpu is a no-op, whereas
> >>>> dma_sync_single_for_device flushes the cache range. With this
> >>>> change, the CPU could cache the DMA status part behind skb->data and
> >>>> that cache entry would not be flushed inbetween calls to this
> >>>> functions on the same buffer, likely leading to rx stalls.
> >>> You're suggesting a platform implementation bug then. If the platform is not
> >>> cache-coherent, it should invalidate relevant CPU cache lines for sync_to_cpu
> >>> and unmap cases. Do other devices show such symptoms on MIPS systems?
> >>> 
> >>> I'm not familiar with the platform internals, so we should ask MIPS people.
> >> I only mentioned MIPS to describe the potential side effect of this change. From my current understanding of the DMA API, it would be wrong on other platforms as well. I believe the _for_device function needs to be used to transfer ownership of the buffer back to the device, before calling _for_cpu again later for another read.
> > What you're saying reminds the wording in DMA-API-HOWTO.txt that I find
> > wrong (or at least misleading) compared to what DMA-API.txt describes.
> > DMA sync calls do not transfer the ownership of the buffer - they are
> > cache synchronization points, ownership passing is handled entirely by
> > the driver.
> What I meant was that the DMA sync calls reflect the ownership transfer of the memory regions. In this case ownership is transferred between device and CPU multiple times and the code reflects that.
> >> This is definitely required in this case, because when the return code is -EINPROGRESS, the driver waits for the hardware to complete this buffer, and the next call has to fetch the memory again after the device has updated it.
> > Correctness of this access should be provided by sync_to_cpu() call.
> At least in MIPS I'm sure it isn't. If I remember correctly, it also isn't on ARM, so I'm pretty sure that either your understanding of the API is incorrect, or arch code does not implement it properly. In either case, this change (and probably also the p54 one) should not be merged.

I briefly looked through DMA API implementation in MIPS, and except
for R10k and R12k both sync_for_cpu and sync_for_device are no-ops
(see: arch/mips/mm/dma-default.c).  For R10k and R12k the syncs are
in both points, and exactly like I described before - CPU cachelines
are invalidated for DMA_FROM_DEVICE mappings, written back for
DMA_TO_DEVICE, both for DMA_BIDIRECTIONAL (including redundant
mapping+sync direction).

So doing that sync_to_device you are just invalidating the same cachelines
twice for no gain (or do nothing twice in some cases) - they are not read
by CPU between sync_to_device -> sync_to_cpu (unless you have other bugs
in the driver). 

Best Regards,
Michał Mirosław

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [ath9k-devel] [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
@ 2011-07-12 15:58               ` Michał Mirosław
  0 siblings, 0 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-12 15:58 UTC (permalink / raw)
  To: ath9k-devel

On Tue, Jul 12, 2011 at 10:21:05PM +0800, Felix Fietkau wrote:
> On 12.07.2011, at 21:03, Micha? Miros?aw <mirq-linux@rere.qmqm.pl> wrote:
> 
> > On Tue, Jul 12, 2011 at 08:54:32PM +0800, Felix Fietkau wrote:
> >> On 12.07.2011, at 17:55, Micha? Miros?aw <mirq-linux@rere.qmqm.pl> wrote:
> >> 
> >>> On Tue, Jul 12, 2011 at 12:36:06PM +0800, Felix Fietkau wrote:
> >>>> On 2011-07-11 8:52 AM, Micha? Miros?aw wrote:
> >>>>> Also constify buf_addr for ath9k_hw_process_rxdesc_edma() to verify
> >>>>> assumptions --- dma_sync_single_for_device() call can be removed.
> >>>>> 
> >>>>> Signed-off-by: Micha? Miros?aw<mirq-linux@rere.qmqm.pl>
> >>>>> ---
> >>>>> drivers/net/wireless/ath/ath9k/ar9003_mac.c |    4 ++--
> >>>>> drivers/net/wireless/ath/ath9k/ar9003_mac.h |    2 +-
> >>>>> drivers/net/wireless/ath/ath9k/recv.c       |   10 +++-------
> >>>>> 3 files changed, 6 insertions(+), 10 deletions(-)
> >>>>> 
> >>>>> diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
> >>>>> index 70dc8ec..c5f46d5 100644
> >>>>> --- a/drivers/net/wireless/ath/ath9k/recv.c
> >>>>> +++ b/drivers/net/wireless/ath/ath9k/recv.c
> >>>>> @@ -684,15 +684,11 @@ static bool ath_edma_get_buffers(struct ath_softc *sc,
> >>>>>   BUG_ON(!bf);
> >>>>> 
> >>>>>   dma_sync_single_for_cpu(sc->dev, bf->bf_buf_addr,
> >>>>> -                common->rx_bufsize, DMA_FROM_DEVICE);
> >>>>> +                common->rx_bufsize, DMA_BIDIRECTIONAL);
> >>>>> 
> >>>>>   ret = ath9k_hw_process_rxdesc_edma(ah, NULL, skb->data);
> >>>>> -    if (ret == -EINPROGRESS) {
> >>>>> -        /*let device gain the buffer again*/
> >>>>> -        dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
> >>>>> -                common->rx_bufsize, DMA_FROM_DEVICE);
> >>>>> +    if (ret == -EINPROGRESS)
> >>>>>       return false;
> >>>>> -    }
> >>>>> 
> >>>>>   __skb_unlink(skb,&rx_edma->rx_fifo);
> >>>>>   if (ret == -EINVAL) {
> >>>> I have strong doubts about this change. On most MIPS devices,
> >>>> dma_sync_single_for_cpu is a no-op, whereas
> >>>> dma_sync_single_for_device flushes the cache range. With this
> >>>> change, the CPU could cache the DMA status part behind skb->data and
> >>>> that cache entry would not be flushed inbetween calls to this
> >>>> functions on the same buffer, likely leading to rx stalls.
> >>> You're suggesting a platform implementation bug then. If the platform is not
> >>> cache-coherent, it should invalidate relevant CPU cache lines for sync_to_cpu
> >>> and unmap cases. Do other devices show such symptoms on MIPS systems?
> >>> 
> >>> I'm not familiar with the platform internals, so we should ask MIPS people.
> >> I only mentioned MIPS to describe the potential side effect of this change. From my current understanding of the DMA API, it would be wrong on other platforms as well. I believe the _for_device function needs to be used to transfer ownership of the buffer back to the device, before calling _for_cpu again later for another read.
> > What you're saying reminds the wording in DMA-API-HOWTO.txt that I find
> > wrong (or at least misleading) compared to what DMA-API.txt describes.
> > DMA sync calls do not transfer the ownership of the buffer - they are
> > cache synchronization points, ownership passing is handled entirely by
> > the driver.
> What I meant was that the DMA sync calls reflect the ownership transfer of the memory regions. In this case ownership is transferred between device and CPU multiple times and the code reflects that.
> >> This is definitely required in this case, because when the return code is -EINPROGRESS, the driver waits for the hardware to complete this buffer, and the next call has to fetch the memory again after the device has updated it.
> > Correctness of this access should be provided by sync_to_cpu() call.
> At least in MIPS I'm sure it isn't. If I remember correctly, it also isn't on ARM, so I'm pretty sure that either your understanding of the API is incorrect, or arch code does not implement it properly. In either case, this change (and probably also the p54 one) should not be merged.

I briefly looked through DMA API implementation in MIPS, and except
for R10k and R12k both sync_for_cpu and sync_for_device are no-ops
(see: arch/mips/mm/dma-default.c).  For R10k and R12k the syncs are
in both points, and exactly like I described before - CPU cachelines
are invalidated for DMA_FROM_DEVICE mappings, written back for
DMA_TO_DEVICE, both for DMA_BIDIRECTIONAL (including redundant
mapping+sync direction).

So doing that sync_to_device you are just invalidating the same cachelines
twice for no gain (or do nothing twice in some cases) - they are not read
by CPU between sync_to_device -> sync_to_cpu (unless you have other bugs
in the driver). 

Best Regards,
Micha? Miros?aw

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [ath9k-devel] [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
@ 2011-07-12 16:04                 ` Felix Fietkau
  0 siblings, 0 replies; 76+ messages in thread
From: Felix Fietkau @ 2011-07-12 16:04 UTC (permalink / raw)
  To: Michał Mirosław
  Cc: Felix Fietkau, netdev, linux-wireless, Jouni Malinen,
	Senthil Balasubramanian, ath9k-devel, Vasanthakumar Thiagarajan,
	Ralf Baechle, linux-mips

On 12.07.2011, at 23:58, Michał Mirosław <mirq-linux@rere.qmqm.pl> wrote:

> On Tue, Jul 12, 2011 at 10:21:05PM +0800, Felix Fietkau wrote:
>> On 12.07.2011, at 21:03, Michał Mirosław <mirq-linux@rere.qmqm.pl> wrote:
>> 
>>> On Tue, Jul 12, 2011 at 08:54:32PM +0800, Felix Fietkau wrote:
>>>> On 12.07.2011, at 17:55, Michał Mirosław <mirq-linux@rere.qmqm.pl> wrote:
>>>> 
>>>>> On Tue, Jul 12, 2011 at 12:36:06PM +0800, Felix Fietkau wrote:
>>>>>> On 2011-07-11 8:52 AM, Michał Mirosław wrote:
>>>>>>> Also constify buf_addr for ath9k_hw_process_rxdesc_edma() to verify
>>>>>>> assumptions --- dma_sync_single_for_device() call can be removed.
>>>>>>> 
>>>>>>> Signed-off-by: Michał Mirosław<mirq-linux@rere.qmqm.pl>
>>>>>>> ---
>>>>>>> drivers/net/wireless/ath/ath9k/ar9003_mac.c |    4 ++--
>>>>>>> drivers/net/wireless/ath/ath9k/ar9003_mac.h |    2 +-
>>>>>>> drivers/net/wireless/ath/ath9k/recv.c       |   10 +++-------
>>>>>>> 3 files changed, 6 insertions(+), 10 deletions(-)
>>>>>>> 
>>>>>>> diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
>>>>>>> index 70dc8ec..c5f46d5 100644
>>>>>>> --- a/drivers/net/wireless/ath/ath9k/recv.c
>>>>>>> +++ b/drivers/net/wireless/ath/ath9k/recv.c
>>>>>>> @@ -684,15 +684,11 @@ static bool ath_edma_get_buffers(struct ath_softc *sc,
>>>>>>>  BUG_ON(!bf);
>>>>>>> 
>>>>>>>  dma_sync_single_for_cpu(sc->dev, bf->bf_buf_addr,
>>>>>>> -                common->rx_bufsize, DMA_FROM_DEVICE);
>>>>>>> +                common->rx_bufsize, DMA_BIDIRECTIONAL);
>>>>>>> 
>>>>>>>  ret = ath9k_hw_process_rxdesc_edma(ah, NULL, skb->data);
>>>>>>> -    if (ret == -EINPROGRESS) {
>>>>>>> -        /*let device gain the buffer again*/
>>>>>>> -        dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
>>>>>>> -                common->rx_bufsize, DMA_FROM_DEVICE);
>>>>>>> +    if (ret == -EINPROGRESS)
>>>>>>>      return false;
>>>>>>> -    }
>>>>>>> 
>>>>>>>  __skb_unlink(skb,&rx_edma->rx_fifo);
>>>>>>>  if (ret == -EINVAL) {
>>>>>> I have strong doubts about this change. On most MIPS devices,
>>>>>> dma_sync_single_for_cpu is a no-op, whereas
>>>>>> dma_sync_single_for_device flushes the cache range. With this
>>>>>> change, the CPU could cache the DMA status part behind skb->data and
>>>>>> that cache entry would not be flushed inbetween calls to this
>>>>>> functions on the same buffer, likely leading to rx stalls.
>>>>> You're suggesting a platform implementation bug then. If the platform is not
>>>>> cache-coherent, it should invalidate relevant CPU cache lines for sync_to_cpu
>>>>> and unmap cases. Do other devices show such symptoms on MIPS systems?
>>>>> 
>>>>> I'm not familiar with the platform internals, so we should ask MIPS people.
>>>> I only mentioned MIPS to describe the potential side effect of this change. From my current understanding of the DMA API, it would be wrong on other platforms as well. I believe the _for_device function needs to be used to transfer ownership of the buffer back to the device, before calling _for_cpu again later for another read.
>>> What you're saying reminds the wording in DMA-API-HOWTO.txt that I find
>>> wrong (or at least misleading) compared to what DMA-API.txt describes.
>>> DMA sync calls do not transfer the ownership of the buffer - they are
>>> cache synchronization points, ownership passing is handled entirely by
>>> the driver.
>> What I meant was that the DMA sync calls reflect the ownership transfer of the memory regions. In this case ownership is transferred between device and CPU multiple times and the code reflects that.
>>>> This is definitely required in this case, because when the return code is -EINPROGRESS, the driver waits for the hardware to complete this buffer, and the next call has to fetch the memory again after the device has updated it.
>>> Correctness of this access should be provided by sync_to_cpu() call.
>> At least in MIPS I'm sure it isn't. If I remember correctly, it also isn't on ARM, so I'm pretty sure that either your understanding of the API is incorrect, or arch code does not implement it properly. In either case, this change (and probably also the p54 one) should not be merged.
> 
> I briefly looked through DMA API implementation in MIPS, and except
> for R10k and R12k both sync_for_cpu and sync_for_device are no-ops
> (see: arch/mips/mm/dma-default.c).  For R10k and R12k the syncs are
> in both points, and exactly like I described before - CPU cachelines
> are invalidated for DMA_FROM_DEVICE mappings, written back for
> DMA_TO_DEVICE, both for DMA_BIDIRECTIONAL (including redundant
> mapping+sync direction).
> 
> So doing that sync_to_device you are just invalidating the same cachelines
> twice for no gain (or do nothing twice in some cases) - they are not read
> by CPU between sync_to_device -> sync_to_cpu (unless you have other bugs
> in the driver). 
I think you're missing something. It works like this: In the AR9380 rx path, the descriptor is part of the skb. The rx tasklet checks for rx frame completion by calling the sync for cpu, reading the completion flag and (in case of a not completed frame) flushes the cache for that location again (for device). If you remove the for_device call, the next call to this function can see stale data, as the for_cpu call can be a no-op.

- Felix

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [ath9k-devel] [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
@ 2011-07-12 16:04                 ` Felix Fietkau
  0 siblings, 0 replies; 76+ messages in thread
From: Felix Fietkau @ 2011-07-12 16:04 UTC (permalink / raw)
  To: Michał Mirosław
  Cc: Felix Fietkau, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-wireless-u79uwXL29TY76Z2rM5mHXA, Jouni Malinen,
	Senthil Balasubramanian, ath9k-devel-xDcbHBWguxHbcTqmT+pZeQ,
	Vasanthakumar Thiagarajan, Ralf Baechle,
	linux-mips-6z/3iImG2C8G8FEW9MqTrA

On 12.07.2011, at 23:58, Michał Mirosław <mirq-linux-CoA6ZxLDdyE@public.gmane.orgm.pl> wrote:

> On Tue, Jul 12, 2011 at 10:21:05PM +0800, Felix Fietkau wrote:
>> On 12.07.2011, at 21:03, Michał Mirosław <mirq-linux@rere.qmqm.pl> wrote:
>> 
>>> On Tue, Jul 12, 2011 at 08:54:32PM +0800, Felix Fietkau wrote:
>>>> On 12.07.2011, at 17:55, Michał Mirosław <mirq-linux@rere.qmqm.pl> wrote:
>>>> 
>>>>> On Tue, Jul 12, 2011 at 12:36:06PM +0800, Felix Fietkau wrote:
>>>>>> On 2011-07-11 8:52 AM, Michał Mirosław wrote:
>>>>>>> Also constify buf_addr for ath9k_hw_process_rxdesc_edma() to verify
>>>>>>> assumptions --- dma_sync_single_for_device() call can be removed.
>>>>>>> 
>>>>>>> Signed-off-by: Michał Mirosław<mirq-linux-CoA6ZxLDdyHykr9aO5hl4Q@public.gmane.orgl>
>>>>>>> ---
>>>>>>> drivers/net/wireless/ath/ath9k/ar9003_mac.c |    4 ++--
>>>>>>> drivers/net/wireless/ath/ath9k/ar9003_mac.h |    2 +-
>>>>>>> drivers/net/wireless/ath/ath9k/recv.c       |   10 +++-------
>>>>>>> 3 files changed, 6 insertions(+), 10 deletions(-)
>>>>>>> 
>>>>>>> diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
>>>>>>> index 70dc8ec..c5f46d5 100644
>>>>>>> --- a/drivers/net/wireless/ath/ath9k/recv.c
>>>>>>> +++ b/drivers/net/wireless/ath/ath9k/recv.c
>>>>>>> @@ -684,15 +684,11 @@ static bool ath_edma_get_buffers(struct ath_softc *sc,
>>>>>>>  BUG_ON(!bf);
>>>>>>> 
>>>>>>>  dma_sync_single_for_cpu(sc->dev, bf->bf_buf_addr,
>>>>>>> -                common->rx_bufsize, DMA_FROM_DEVICE);
>>>>>>> +                common->rx_bufsize, DMA_BIDIRECTIONAL);
>>>>>>> 
>>>>>>>  ret = ath9k_hw_process_rxdesc_edma(ah, NULL, skb->data);
>>>>>>> -    if (ret == -EINPROGRESS) {
>>>>>>> -        /*let device gain the buffer again*/
>>>>>>> -        dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
>>>>>>> -                common->rx_bufsize, DMA_FROM_DEVICE);
>>>>>>> +    if (ret == -EINPROGRESS)
>>>>>>>      return false;
>>>>>>> -    }
>>>>>>> 
>>>>>>>  __skb_unlink(skb,&rx_edma->rx_fifo);
>>>>>>>  if (ret == -EINVAL) {
>>>>>> I have strong doubts about this change. On most MIPS devices,
>>>>>> dma_sync_single_for_cpu is a no-op, whereas
>>>>>> dma_sync_single_for_device flushes the cache range. With this
>>>>>> change, the CPU could cache the DMA status part behind skb->data and
>>>>>> that cache entry would not be flushed inbetween calls to this
>>>>>> functions on the same buffer, likely leading to rx stalls.
>>>>> You're suggesting a platform implementation bug then. If the platform is not
>>>>> cache-coherent, it should invalidate relevant CPU cache lines for sync_to_cpu
>>>>> and unmap cases. Do other devices show such symptoms on MIPS systems?
>>>>> 
>>>>> I'm not familiar with the platform internals, so we should ask MIPS people.
>>>> I only mentioned MIPS to describe the potential side effect of this change. From my current understanding of the DMA API, it would be wrong on other platforms as well. I believe the _for_device function needs to be used to transfer ownership of the buffer back to the device, before calling _for_cpu again later for another read.
>>> What you're saying reminds the wording in DMA-API-HOWTO.txt that I find
>>> wrong (or at least misleading) compared to what DMA-API.txt describes.
>>> DMA sync calls do not transfer the ownership of the buffer - they are
>>> cache synchronization points, ownership passing is handled entirely by
>>> the driver.
>> What I meant was that the DMA sync calls reflect the ownership transfer of the memory regions. In this case ownership is transferred between device and CPU multiple times and the code reflects that.
>>>> This is definitely required in this case, because when the return code is -EINPROGRESS, the driver waits for the hardware to complete this buffer, and the next call has to fetch the memory again after the device has updated it.
>>> Correctness of this access should be provided by sync_to_cpu() call.
>> At least in MIPS I'm sure it isn't. If I remember correctly, it also isn't on ARM, so I'm pretty sure that either your understanding of the API is incorrect, or arch code does not implement it properly. In either case, this change (and probably also the p54 one) should not be merged.
> 
> I briefly looked through DMA API implementation in MIPS, and except
> for R10k and R12k both sync_for_cpu and sync_for_device are no-ops
> (see: arch/mips/mm/dma-default.c).  For R10k and R12k the syncs are
> in both points, and exactly like I described before - CPU cachelines
> are invalidated for DMA_FROM_DEVICE mappings, written back for
> DMA_TO_DEVICE, both for DMA_BIDIRECTIONAL (including redundant
> mapping+sync direction).
> 
> So doing that sync_to_device you are just invalidating the same cachelines
> twice for no gain (or do nothing twice in some cases) - they are not read
> by CPU between sync_to_device -> sync_to_cpu (unless you have other bugs
> in the driver). 
I think you're missing something. It works like this: In the AR9380 rx path, the descriptor is part of the skb. The rx tasklet checks for rx frame completion by calling the sync for cpu, reading the completion flag and (in case of a not completed frame) flushes the cache for that location again (for device). If you remove the for_device call, the next call to this function can see stale data, as the for_cpu call can be a no-op.

- Felix--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [ath9k-devel] [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
@ 2011-07-12 16:04                 ` Felix Fietkau
  0 siblings, 0 replies; 76+ messages in thread
From: Felix Fietkau @ 2011-07-12 16:04 UTC (permalink / raw)
  To: ath9k-devel

On 12.07.2011, at 23:58, Micha? Miros?aw <mirq-linux@rere.qmqm.pl> wrote:

> On Tue, Jul 12, 2011 at 10:21:05PM +0800, Felix Fietkau wrote:
>> On 12.07.2011, at 21:03, Micha? Miros?aw <mirq-linux@rere.qmqm.pl> wrote:
>> 
>>> On Tue, Jul 12, 2011 at 08:54:32PM +0800, Felix Fietkau wrote:
>>>> On 12.07.2011, at 17:55, Micha? Miros?aw <mirq-linux@rere.qmqm.pl> wrote:
>>>> 
>>>>> On Tue, Jul 12, 2011 at 12:36:06PM +0800, Felix Fietkau wrote:
>>>>>> On 2011-07-11 8:52 AM, Micha? Miros?aw wrote:
>>>>>>> Also constify buf_addr for ath9k_hw_process_rxdesc_edma() to verify
>>>>>>> assumptions --- dma_sync_single_for_device() call can be removed.
>>>>>>> 
>>>>>>> Signed-off-by: Micha? Miros?aw<mirq-linux@rere.qmqm.pl>
>>>>>>> ---
>>>>>>> drivers/net/wireless/ath/ath9k/ar9003_mac.c |    4 ++--
>>>>>>> drivers/net/wireless/ath/ath9k/ar9003_mac.h |    2 +-
>>>>>>> drivers/net/wireless/ath/ath9k/recv.c       |   10 +++-------
>>>>>>> 3 files changed, 6 insertions(+), 10 deletions(-)
>>>>>>> 
>>>>>>> diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
>>>>>>> index 70dc8ec..c5f46d5 100644
>>>>>>> --- a/drivers/net/wireless/ath/ath9k/recv.c
>>>>>>> +++ b/drivers/net/wireless/ath/ath9k/recv.c
>>>>>>> @@ -684,15 +684,11 @@ static bool ath_edma_get_buffers(struct ath_softc *sc,
>>>>>>>  BUG_ON(!bf);
>>>>>>> 
>>>>>>>  dma_sync_single_for_cpu(sc->dev, bf->bf_buf_addr,
>>>>>>> -                common->rx_bufsize, DMA_FROM_DEVICE);
>>>>>>> +                common->rx_bufsize, DMA_BIDIRECTIONAL);
>>>>>>> 
>>>>>>>  ret = ath9k_hw_process_rxdesc_edma(ah, NULL, skb->data);
>>>>>>> -    if (ret == -EINPROGRESS) {
>>>>>>> -        /*let device gain the buffer again*/
>>>>>>> -        dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
>>>>>>> -                common->rx_bufsize, DMA_FROM_DEVICE);
>>>>>>> +    if (ret == -EINPROGRESS)
>>>>>>>      return false;
>>>>>>> -    }
>>>>>>> 
>>>>>>>  __skb_unlink(skb,&rx_edma->rx_fifo);
>>>>>>>  if (ret == -EINVAL) {
>>>>>> I have strong doubts about this change. On most MIPS devices,
>>>>>> dma_sync_single_for_cpu is a no-op, whereas
>>>>>> dma_sync_single_for_device flushes the cache range. With this
>>>>>> change, the CPU could cache the DMA status part behind skb->data and
>>>>>> that cache entry would not be flushed inbetween calls to this
>>>>>> functions on the same buffer, likely leading to rx stalls.
>>>>> You're suggesting a platform implementation bug then. If the platform is not
>>>>> cache-coherent, it should invalidate relevant CPU cache lines for sync_to_cpu
>>>>> and unmap cases. Do other devices show such symptoms on MIPS systems?
>>>>> 
>>>>> I'm not familiar with the platform internals, so we should ask MIPS people.
>>>> I only mentioned MIPS to describe the potential side effect of this change. From my current understanding of the DMA API, it would be wrong on other platforms as well. I believe the _for_device function needs to be used to transfer ownership of the buffer back to the device, before calling _for_cpu again later for another read.
>>> What you're saying reminds the wording in DMA-API-HOWTO.txt that I find
>>> wrong (or at least misleading) compared to what DMA-API.txt describes.
>>> DMA sync calls do not transfer the ownership of the buffer - they are
>>> cache synchronization points, ownership passing is handled entirely by
>>> the driver.
>> What I meant was that the DMA sync calls reflect the ownership transfer of the memory regions. In this case ownership is transferred between device and CPU multiple times and the code reflects that.
>>>> This is definitely required in this case, because when the return code is -EINPROGRESS, the driver waits for the hardware to complete this buffer, and the next call has to fetch the memory again after the device has updated it.
>>> Correctness of this access should be provided by sync_to_cpu() call.
>> At least in MIPS I'm sure it isn't. If I remember correctly, it also isn't on ARM, so I'm pretty sure that either your understanding of the API is incorrect, or arch code does not implement it properly. In either case, this change (and probably also the p54 one) should not be merged.
> 
> I briefly looked through DMA API implementation in MIPS, and except
> for R10k and R12k both sync_for_cpu and sync_for_device are no-ops
> (see: arch/mips/mm/dma-default.c).  For R10k and R12k the syncs are
> in both points, and exactly like I described before - CPU cachelines
> are invalidated for DMA_FROM_DEVICE mappings, written back for
> DMA_TO_DEVICE, both for DMA_BIDIRECTIONAL (including redundant
> mapping+sync direction).
> 
> So doing that sync_to_device you are just invalidating the same cachelines
> twice for no gain (or do nothing twice in some cases) - they are not read
> by CPU between sync_to_device -> sync_to_cpu (unless you have other bugs
> in the driver). 
I think you're missing something. It works like this: In the AR9380 rx path, the descriptor is part of the skb. The rx tasklet checks for rx frame completion by calling the sync for cpu, reading the completion flag and (in case of a not completed frame) flushes the cache for that location again (for device). If you remove the for_device call, the next call to this function can see stale data, as the for_cpu call can be a no-op.

- Felix

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [ath9k-devel] [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
  2011-07-12 16:04                 ` Felix Fietkau
@ 2011-07-12 19:13                   ` Michał Mirosław
  -1 siblings, 0 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-12 19:13 UTC (permalink / raw)
  To: Felix Fietkau
  Cc: Felix Fietkau, netdev, linux-wireless, Jouni Malinen,
	Senthil Balasubramanian, ath9k-devel, Vasanthakumar Thiagarajan,
	Ralf Baechle, linux-mips

On Wed, Jul 13, 2011 at 12:04:50AM +0800, Felix Fietkau wrote:
> On 12.07.2011, at 23:58, Michał Mirosław <mirq-linux@rere.qmqm.pl> wrote:
> 
> > On Tue, Jul 12, 2011 at 10:21:05PM +0800, Felix Fietkau wrote:
> >> On 12.07.2011, at 21:03, Michał Mirosław <mirq-linux@rere.qmqm.pl> wrote:
> >> 
> >>> On Tue, Jul 12, 2011 at 08:54:32PM +0800, Felix Fietkau wrote:
> >>>> On 12.07.2011, at 17:55, Michał Mirosław <mirq-linux@rere.qmqm.pl> wrote:
> >>>> 
> >>>>> On Tue, Jul 12, 2011 at 12:36:06PM +0800, Felix Fietkau wrote:
> >>>>>> On 2011-07-11 8:52 AM, Michał Mirosław wrote:
> >>>>>>> Also constify buf_addr for ath9k_hw_process_rxdesc_edma() to verify
> >>>>>>> assumptions --- dma_sync_single_for_device() call can be removed.
> >>>>>>> 
> >>>>>>> Signed-off-by: Michał Mirosław<mirq-linux@rere.qmqm.pl>
> >>>>>>> ---
> >>>>>>> drivers/net/wireless/ath/ath9k/ar9003_mac.c |    4 ++--
> >>>>>>> drivers/net/wireless/ath/ath9k/ar9003_mac.h |    2 +-
> >>>>>>> drivers/net/wireless/ath/ath9k/recv.c       |   10 +++-------
> >>>>>>> 3 files changed, 6 insertions(+), 10 deletions(-)
> >>>>>>> 
> >>>>>>> diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
> >>>>>>> index 70dc8ec..c5f46d5 100644
> >>>>>>> --- a/drivers/net/wireless/ath/ath9k/recv.c
> >>>>>>> +++ b/drivers/net/wireless/ath/ath9k/recv.c
> >>>>>>> @@ -684,15 +684,11 @@ static bool ath_edma_get_buffers(struct ath_softc *sc,
> >>>>>>>  BUG_ON(!bf);
> >>>>>>> 
> >>>>>>>  dma_sync_single_for_cpu(sc->dev, bf->bf_buf_addr,
> >>>>>>> -                common->rx_bufsize, DMA_FROM_DEVICE);
> >>>>>>> +                common->rx_bufsize, DMA_BIDIRECTIONAL);
> >>>>>>> 
> >>>>>>>  ret = ath9k_hw_process_rxdesc_edma(ah, NULL, skb->data);
> >>>>>>> -    if (ret == -EINPROGRESS) {
> >>>>>>> -        /*let device gain the buffer again*/
> >>>>>>> -        dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
> >>>>>>> -                common->rx_bufsize, DMA_FROM_DEVICE);
> >>>>>>> +    if (ret == -EINPROGRESS)
> >>>>>>>      return false;
> >>>>>>> -    }
> >>>>>>> 
> >>>>>>>  __skb_unlink(skb,&rx_edma->rx_fifo);
> >>>>>>>  if (ret == -EINVAL) {
> >>>>>> I have strong doubts about this change. On most MIPS devices,
> >>>>>> dma_sync_single_for_cpu is a no-op, whereas
> >>>>>> dma_sync_single_for_device flushes the cache range. With this
> >>>>>> change, the CPU could cache the DMA status part behind skb->data and
> >>>>>> that cache entry would not be flushed inbetween calls to this
> >>>>>> functions on the same buffer, likely leading to rx stalls.
> >>>>> You're suggesting a platform implementation bug then. If the platform is not
> >>>>> cache-coherent, it should invalidate relevant CPU cache lines for sync_to_cpu
> >>>>> and unmap cases. Do other devices show such symptoms on MIPS systems?
> >>>>> 
> >>>>> I'm not familiar with the platform internals, so we should ask MIPS people.
> >>>> I only mentioned MIPS to describe the potential side effect of this change. From my current understanding of the DMA API, it would be wrong on other platforms as well. I believe the _for_device function needs to be used to transfer ownership of the buffer back to the device, before calling _for_cpu again later for another read.
> >>> What you're saying reminds the wording in DMA-API-HOWTO.txt that I find
> >>> wrong (or at least misleading) compared to what DMA-API.txt describes.
> >>> DMA sync calls do not transfer the ownership of the buffer - they are
> >>> cache synchronization points, ownership passing is handled entirely by
> >>> the driver.
> >> What I meant was that the DMA sync calls reflect the ownership transfer of the memory regions. In this case ownership is transferred between device and CPU multiple times and the code reflects that.
> >>>> This is definitely required in this case, because when the return code is -EINPROGRESS, the driver waits for the hardware to complete this buffer, and the next call has to fetch the memory again after the device has updated it.
> >>> Correctness of this access should be provided by sync_to_cpu() call.
> >> At least in MIPS I'm sure it isn't. If I remember correctly, it also isn't on ARM, so I'm pretty sure that either your understanding of the API is incorrect, or arch code does not implement it properly. In either case, this change (and probably also the p54 one) should not be merged.
> > 
> > I briefly looked through DMA API implementation in MIPS, and except
> > for R10k and R12k both sync_for_cpu and sync_for_device are no-ops
> > (see: arch/mips/mm/dma-default.c).  For R10k and R12k the syncs are
> > in both points, and exactly like I described before - CPU cachelines
> > are invalidated for DMA_FROM_DEVICE mappings, written back for
> > DMA_TO_DEVICE, both for DMA_BIDIRECTIONAL (including redundant
> > mapping+sync direction).
> > 
> > So doing that sync_to_device you are just invalidating the same cachelines
> > twice for no gain (or do nothing twice in some cases) - they are not read
> > by CPU between sync_to_device -> sync_to_cpu (unless you have other bugs
> > in the driver). 
> I think you're missing something. It works like this: In the AR9380 rx path, the descriptor is part of the skb. The rx tasklet checks for rx frame completion by calling the sync for cpu, reading the completion flag and (in case of a not completed frame) flushes the cache for that location again (for device). If you remove the for_device call, the next call to this function can see stale data, as the for_cpu call can be a no-op.

Is the descriptor modified in any way before being checked again? Looks like
it isn't. That is my assumption - if this doesn't hold, then we're talking
about different things.

When I looked through the DMA API implementation for MIPS, I saw that whenever
sync_to_cpu is a no-op, sync_to_device is also a no-op. So the bug you're
seeing is not related to those calls. It might be that despite no-op sync
primitives, the platform is not cache-coherent --- that is DMA writes by
device do not cause corresponding CPU cache lines to be invalidated.

BTW, cache flush (other name: invalidation) is needed just before reading the
value. Doing it once more earlier does not really matter. Unless you're
modifying some data in the same cache line as mapped buffer --- then this
is a BUG in the driver and should either use DMA_BIDIRECTIONAL if the modified
value is part of the buffer, or move the modified data away.

Best Regards,
Michał Mirosław

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [ath9k-devel] [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
@ 2011-07-12 19:13                   ` Michał Mirosław
  0 siblings, 0 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-12 19:13 UTC (permalink / raw)
  To: ath9k-devel

On Wed, Jul 13, 2011 at 12:04:50AM +0800, Felix Fietkau wrote:
> On 12.07.2011, at 23:58, Micha? Miros?aw <mirq-linux@rere.qmqm.pl> wrote:
> 
> > On Tue, Jul 12, 2011 at 10:21:05PM +0800, Felix Fietkau wrote:
> >> On 12.07.2011, at 21:03, Micha? Miros?aw <mirq-linux@rere.qmqm.pl> wrote:
> >> 
> >>> On Tue, Jul 12, 2011 at 08:54:32PM +0800, Felix Fietkau wrote:
> >>>> On 12.07.2011, at 17:55, Micha? Miros?aw <mirq-linux@rere.qmqm.pl> wrote:
> >>>> 
> >>>>> On Tue, Jul 12, 2011 at 12:36:06PM +0800, Felix Fietkau wrote:
> >>>>>> On 2011-07-11 8:52 AM, Micha? Miros?aw wrote:
> >>>>>>> Also constify buf_addr for ath9k_hw_process_rxdesc_edma() to verify
> >>>>>>> assumptions --- dma_sync_single_for_device() call can be removed.
> >>>>>>> 
> >>>>>>> Signed-off-by: Micha? Miros?aw<mirq-linux@rere.qmqm.pl>
> >>>>>>> ---
> >>>>>>> drivers/net/wireless/ath/ath9k/ar9003_mac.c |    4 ++--
> >>>>>>> drivers/net/wireless/ath/ath9k/ar9003_mac.h |    2 +-
> >>>>>>> drivers/net/wireless/ath/ath9k/recv.c       |   10 +++-------
> >>>>>>> 3 files changed, 6 insertions(+), 10 deletions(-)
> >>>>>>> 
> >>>>>>> diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
> >>>>>>> index 70dc8ec..c5f46d5 100644
> >>>>>>> --- a/drivers/net/wireless/ath/ath9k/recv.c
> >>>>>>> +++ b/drivers/net/wireless/ath/ath9k/recv.c
> >>>>>>> @@ -684,15 +684,11 @@ static bool ath_edma_get_buffers(struct ath_softc *sc,
> >>>>>>>  BUG_ON(!bf);
> >>>>>>> 
> >>>>>>>  dma_sync_single_for_cpu(sc->dev, bf->bf_buf_addr,
> >>>>>>> -                common->rx_bufsize, DMA_FROM_DEVICE);
> >>>>>>> +                common->rx_bufsize, DMA_BIDIRECTIONAL);
> >>>>>>> 
> >>>>>>>  ret = ath9k_hw_process_rxdesc_edma(ah, NULL, skb->data);
> >>>>>>> -    if (ret == -EINPROGRESS) {
> >>>>>>> -        /*let device gain the buffer again*/
> >>>>>>> -        dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
> >>>>>>> -                common->rx_bufsize, DMA_FROM_DEVICE);
> >>>>>>> +    if (ret == -EINPROGRESS)
> >>>>>>>      return false;
> >>>>>>> -    }
> >>>>>>> 
> >>>>>>>  __skb_unlink(skb,&rx_edma->rx_fifo);
> >>>>>>>  if (ret == -EINVAL) {
> >>>>>> I have strong doubts about this change. On most MIPS devices,
> >>>>>> dma_sync_single_for_cpu is a no-op, whereas
> >>>>>> dma_sync_single_for_device flushes the cache range. With this
> >>>>>> change, the CPU could cache the DMA status part behind skb->data and
> >>>>>> that cache entry would not be flushed inbetween calls to this
> >>>>>> functions on the same buffer, likely leading to rx stalls.
> >>>>> You're suggesting a platform implementation bug then. If the platform is not
> >>>>> cache-coherent, it should invalidate relevant CPU cache lines for sync_to_cpu
> >>>>> and unmap cases. Do other devices show such symptoms on MIPS systems?
> >>>>> 
> >>>>> I'm not familiar with the platform internals, so we should ask MIPS people.
> >>>> I only mentioned MIPS to describe the potential side effect of this change. From my current understanding of the DMA API, it would be wrong on other platforms as well. I believe the _for_device function needs to be used to transfer ownership of the buffer back to the device, before calling _for_cpu again later for another read.
> >>> What you're saying reminds the wording in DMA-API-HOWTO.txt that I find
> >>> wrong (or at least misleading) compared to what DMA-API.txt describes.
> >>> DMA sync calls do not transfer the ownership of the buffer - they are
> >>> cache synchronization points, ownership passing is handled entirely by
> >>> the driver.
> >> What I meant was that the DMA sync calls reflect the ownership transfer of the memory regions. In this case ownership is transferred between device and CPU multiple times and the code reflects that.
> >>>> This is definitely required in this case, because when the return code is -EINPROGRESS, the driver waits for the hardware to complete this buffer, and the next call has to fetch the memory again after the device has updated it.
> >>> Correctness of this access should be provided by sync_to_cpu() call.
> >> At least in MIPS I'm sure it isn't. If I remember correctly, it also isn't on ARM, so I'm pretty sure that either your understanding of the API is incorrect, or arch code does not implement it properly. In either case, this change (and probably also the p54 one) should not be merged.
> > 
> > I briefly looked through DMA API implementation in MIPS, and except
> > for R10k and R12k both sync_for_cpu and sync_for_device are no-ops
> > (see: arch/mips/mm/dma-default.c).  For R10k and R12k the syncs are
> > in both points, and exactly like I described before - CPU cachelines
> > are invalidated for DMA_FROM_DEVICE mappings, written back for
> > DMA_TO_DEVICE, both for DMA_BIDIRECTIONAL (including redundant
> > mapping+sync direction).
> > 
> > So doing that sync_to_device you are just invalidating the same cachelines
> > twice for no gain (or do nothing twice in some cases) - they are not read
> > by CPU between sync_to_device -> sync_to_cpu (unless you have other bugs
> > in the driver). 
> I think you're missing something. It works like this: In the AR9380 rx path, the descriptor is part of the skb. The rx tasklet checks for rx frame completion by calling the sync for cpu, reading the completion flag and (in case of a not completed frame) flushes the cache for that location again (for device). If you remove the for_device call, the next call to this function can see stale data, as the for_cpu call can be a no-op.

Is the descriptor modified in any way before being checked again? Looks like
it isn't. That is my assumption - if this doesn't hold, then we're talking
about different things.

When I looked through the DMA API implementation for MIPS, I saw that whenever
sync_to_cpu is a no-op, sync_to_device is also a no-op. So the bug you're
seeing is not related to those calls. It might be that despite no-op sync
primitives, the platform is not cache-coherent --- that is DMA writes by
device do not cause corresponding CPU cache lines to be invalidated.

BTW, cache flush (other name: invalidation) is needed just before reading the
value. Doing it once more earlier does not really matter. Unless you're
modifying some data in the same cache line as mapped buffer --- then this
is a BUG in the driver and should either use DMA_BIDIRECTIONAL if the modified
value is part of the buffer, or move the modified data away.

Best Regards,
Micha? Miros?aw

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [ath9k-devel] [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
@ 2011-07-12 19:32       ` Ralf Baechle
  0 siblings, 0 replies; 76+ messages in thread
From: Ralf Baechle @ 2011-07-12 19:32 UTC (permalink / raw)
  To: Felix Fietkau
  Cc: Michał Mirosław, netdev, linux-wireless, Jouni Malinen,
	Senthil Balasubramanian, ath9k-devel, Vasanthakumar Thiagarajan

On Tue, Jul 12, 2011 at 12:36:06PM +0800, Felix Fietkau wrote:

> >diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
> >index 70dc8ec..c5f46d5 100644
> >--- a/drivers/net/wireless/ath/ath9k/recv.c
> >+++ b/drivers/net/wireless/ath/ath9k/recv.c
> >@@ -684,15 +684,11 @@ static bool ath_edma_get_buffers(struct ath_softc *sc,
> >  	BUG_ON(!bf);
> >
> >  	dma_sync_single_for_cpu(sc->dev, bf->bf_buf_addr,
> >-				common->rx_bufsize, DMA_FROM_DEVICE);
> >+				common->rx_bufsize, DMA_BIDIRECTIONAL);
> >
> >  	ret = ath9k_hw_process_rxdesc_edma(ah, NULL, skb->data);
> >-	if (ret == -EINPROGRESS) {
> >-		/*let device gain the buffer again*/
> >-		dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
> >-				common->rx_bufsize, DMA_FROM_DEVICE);
> >+	if (ret == -EINPROGRESS)
> >  		return false;
> >-	}
> >
> >  	__skb_unlink(skb,&rx_edma->rx_fifo);
> >  	if (ret == -EINVAL) {
> I have strong doubts about this change. On most MIPS devices,
> dma_sync_single_for_cpu is a no-op, whereas
> dma_sync_single_for_device flushes the cache range. With this
> change, the CPU could cache the DMA status part behind skb->data and
> that cache entry would not be flushed inbetween calls to this
> functions on the same buffer, likely leading to rx stalls.

The code was already broken before.  By the time dma_sync_single_for_cpu
and ath9k_hw_process_rxdesc_edma are called, the DMA engine may still be
active in the buffer,  yet the driver is looking at it.

dma_sync_single_for_cpu() is part of changing the buffer ownership from
the device to the CPU.  When it is being called, DMA into the buffer should
already have been completed ...  or else the shit may hit the jet engine.

Imagine what would happen on a hypothetic cache architecture which does not
have a dirty bit, that is which would have to write back every cache line -
even clean lines - to memory in order to evict it.  Corruption.

And don't argue with what the actual MIPS implementation of dma_sync_single_-
for-{cpu,device} is doing.  It's meant to bee treated as a black box; that
abstraction is the whole point of the ABI.  And it seems the driver is also
being used on other architectures than MIPS …

  Ralf

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [ath9k-devel] [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
@ 2011-07-12 19:32       ` Ralf Baechle
  0 siblings, 0 replies; 76+ messages in thread
From: Ralf Baechle @ 2011-07-12 19:32 UTC (permalink / raw)
  To: Felix Fietkau
  Cc: Michał Mirosław, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-wireless-u79uwXL29TY76Z2rM5mHXA, Jouni Malinen,
	Senthil Balasubramanian, ath9k-devel-juf53994utBLZpfksSYvnA,
	Vasanthakumar Thiagarajan

On Tue, Jul 12, 2011 at 12:36:06PM +0800, Felix Fietkau wrote:

> >diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
> >index 70dc8ec..c5f46d5 100644
> >--- a/drivers/net/wireless/ath/ath9k/recv.c
> >+++ b/drivers/net/wireless/ath/ath9k/recv.c
> >@@ -684,15 +684,11 @@ static bool ath_edma_get_buffers(struct ath_softc *sc,
> >  	BUG_ON(!bf);
> >
> >  	dma_sync_single_for_cpu(sc->dev, bf->bf_buf_addr,
> >-				common->rx_bufsize, DMA_FROM_DEVICE);
> >+				common->rx_bufsize, DMA_BIDIRECTIONAL);
> >
> >  	ret = ath9k_hw_process_rxdesc_edma(ah, NULL, skb->data);
> >-	if (ret == -EINPROGRESS) {
> >-		/*let device gain the buffer again*/
> >-		dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
> >-				common->rx_bufsize, DMA_FROM_DEVICE);
> >+	if (ret == -EINPROGRESS)
> >  		return false;
> >-	}
> >
> >  	__skb_unlink(skb,&rx_edma->rx_fifo);
> >  	if (ret == -EINVAL) {
> I have strong doubts about this change. On most MIPS devices,
> dma_sync_single_for_cpu is a no-op, whereas
> dma_sync_single_for_device flushes the cache range. With this
> change, the CPU could cache the DMA status part behind skb->data and
> that cache entry would not be flushed inbetween calls to this
> functions on the same buffer, likely leading to rx stalls.

The code was already broken before.  By the time dma_sync_single_for_cpu
and ath9k_hw_process_rxdesc_edma are called, the DMA engine may still be
active in the buffer,  yet the driver is looking at it.

dma_sync_single_for_cpu() is part of changing the buffer ownership from
the device to the CPU.  When it is being called, DMA into the buffer should
already have been completed ...  or else the shit may hit the jet engine.

Imagine what would happen on a hypothetic cache architecture which does not
have a dirty bit, that is which would have to write back every cache line -
even clean lines - to memory in order to evict it.  Corruption.

And don't argue with what the actual MIPS implementation of dma_sync_single_-
for-{cpu,device} is doing.  It's meant to bee treated as a black box; that
abstraction is the whole point of the ABI.  And it seems the driver is also
being used on other architectures than MIPS …

  Ralf
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [ath9k-devel] [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
@ 2011-07-12 20:53         ` Michał Mirosław
  0 siblings, 0 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-12 20:53 UTC (permalink / raw)
  To: Ralf Baechle
  Cc: Felix Fietkau, netdev, linux-wireless, Jouni Malinen,
	Senthil Balasubramanian, ath9k-devel, Vasanthakumar Thiagarajan

On Tue, Jul 12, 2011 at 08:32:04PM +0100, Ralf Baechle wrote:
> On Tue, Jul 12, 2011 at 12:36:06PM +0800, Felix Fietkau wrote:
> 
> > >diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
> > >index 70dc8ec..c5f46d5 100644
> > >--- a/drivers/net/wireless/ath/ath9k/recv.c
> > >+++ b/drivers/net/wireless/ath/ath9k/recv.c
> > >@@ -684,15 +684,11 @@ static bool ath_edma_get_buffers(struct ath_softc *sc,
> > >  	BUG_ON(!bf);
> > >
> > >  	dma_sync_single_for_cpu(sc->dev, bf->bf_buf_addr,
> > >-				common->rx_bufsize, DMA_FROM_DEVICE);
> > >+				common->rx_bufsize, DMA_BIDIRECTIONAL);
> > >
> > >  	ret = ath9k_hw_process_rxdesc_edma(ah, NULL, skb->data);
> > >-	if (ret == -EINPROGRESS) {
> > >-		/*let device gain the buffer again*/
> > >-		dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
> > >-				common->rx_bufsize, DMA_FROM_DEVICE);
> > >+	if (ret == -EINPROGRESS)
> > >  		return false;
> > >-	}
> > >
> > >  	__skb_unlink(skb,&rx_edma->rx_fifo);
> > >  	if (ret == -EINVAL) {
> > I have strong doubts about this change. On most MIPS devices,
> > dma_sync_single_for_cpu is a no-op, whereas
> > dma_sync_single_for_device flushes the cache range. With this
> > change, the CPU could cache the DMA status part behind skb->data and
> > that cache entry would not be flushed inbetween calls to this
> > functions on the same buffer, likely leading to rx stalls.
> 
> The code was already broken before.  By the time dma_sync_single_for_cpu
> and ath9k_hw_process_rxdesc_edma are called, the DMA engine may still be
> active in the buffer,  yet the driver is looking at it.
> 
> dma_sync_single_for_cpu() is part of changing the buffer ownership from
> the device to the CPU.  When it is being called, DMA into the buffer should
> already have been completed ...  or else the shit may hit the jet engine.

Let's get rid of the "buffer ownership" misunderstanding from the picture.
Ownership is about who is expected to be writing (or is assured the data
is not being changed under his foot). This has nothing to do with DMA API.

DMA API is there for two purposes: to make part of memory visible to
both CPU and device (map/unmap) and to ensure memory consistency in
presence of caches (sync; implicitly done in map/unmap).

In the case we're analysing, the ownership is on the device's side
until it stops writing the buffer. sync_to_cpu doesn't change that.
It only allows the CPU to see more recent data (in case the CPU cached
something earlier).

> Imagine what would happen on a hypothetic cache architecture which does not
> have a dirty bit, that is which would have to write back every cache line -
> even clean lines - to memory in order to evict it.  Corruption.

dma_map_whatever() would mark the memory uncachable on such an architecture.
Otherwise this would violate assumptions on DMA_FROM_DEVICE mappings (or
"device owned buffers") that the CPU does not write to the mapped memory.

Best Regards,
Michał Mirosław

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [ath9k-devel] [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
@ 2011-07-12 20:53         ` Michał Mirosław
  0 siblings, 0 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-12 20:53 UTC (permalink / raw)
  To: Ralf Baechle
  Cc: Felix Fietkau, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-wireless-u79uwXL29TY76Z2rM5mHXA, Jouni Malinen,
	Senthil Balasubramanian, ath9k-devel-juf53994utBLZpfksSYvnA,
	Vasanthakumar Thiagarajan

On Tue, Jul 12, 2011 at 08:32:04PM +0100, Ralf Baechle wrote:
> On Tue, Jul 12, 2011 at 12:36:06PM +0800, Felix Fietkau wrote:
> 
> > >diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
> > >index 70dc8ec..c5f46d5 100644
> > >--- a/drivers/net/wireless/ath/ath9k/recv.c
> > >+++ b/drivers/net/wireless/ath/ath9k/recv.c
> > >@@ -684,15 +684,11 @@ static bool ath_edma_get_buffers(struct ath_softc *sc,
> > >  	BUG_ON(!bf);
> > >
> > >  	dma_sync_single_for_cpu(sc->dev, bf->bf_buf_addr,
> > >-				common->rx_bufsize, DMA_FROM_DEVICE);
> > >+				common->rx_bufsize, DMA_BIDIRECTIONAL);
> > >
> > >  	ret = ath9k_hw_process_rxdesc_edma(ah, NULL, skb->data);
> > >-	if (ret == -EINPROGRESS) {
> > >-		/*let device gain the buffer again*/
> > >-		dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
> > >-				common->rx_bufsize, DMA_FROM_DEVICE);
> > >+	if (ret == -EINPROGRESS)
> > >  		return false;
> > >-	}
> > >
> > >  	__skb_unlink(skb,&rx_edma->rx_fifo);
> > >  	if (ret == -EINVAL) {
> > I have strong doubts about this change. On most MIPS devices,
> > dma_sync_single_for_cpu is a no-op, whereas
> > dma_sync_single_for_device flushes the cache range. With this
> > change, the CPU could cache the DMA status part behind skb->data and
> > that cache entry would not be flushed inbetween calls to this
> > functions on the same buffer, likely leading to rx stalls.
> 
> The code was already broken before.  By the time dma_sync_single_for_cpu
> and ath9k_hw_process_rxdesc_edma are called, the DMA engine may still be
> active in the buffer,  yet the driver is looking at it.
> 
> dma_sync_single_for_cpu() is part of changing the buffer ownership from
> the device to the CPU.  When it is being called, DMA into the buffer should
> already have been completed ...  or else the shit may hit the jet engine.

Let's get rid of the "buffer ownership" misunderstanding from the picture.
Ownership is about who is expected to be writing (or is assured the data
is not being changed under his foot). This has nothing to do with DMA API.

DMA API is there for two purposes: to make part of memory visible to
both CPU and device (map/unmap) and to ensure memory consistency in
presence of caches (sync; implicitly done in map/unmap).

In the case we're analysing, the ownership is on the device's side
until it stops writing the buffer. sync_to_cpu doesn't change that.
It only allows the CPU to see more recent data (in case the CPU cached
something earlier).

> Imagine what would happen on a hypothetic cache architecture which does not
> have a dirty bit, that is which would have to write back every cache line -
> even clean lines - to memory in order to evict it.  Corruption.

dma_map_whatever() would mark the memory uncachable on such an architecture.
Otherwise this would violate assumptions on DMA_FROM_DEVICE mappings (or
"device owned buffers") that the CPU does not write to the mapped memory.

Best Regards,
Michał Mirosław
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [ath9k-devel] [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
@ 2011-07-12 20:53         ` Michał Mirosław
  0 siblings, 0 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-12 20:53 UTC (permalink / raw)
  To: ath9k-devel

On Tue, Jul 12, 2011 at 08:32:04PM +0100, Ralf Baechle wrote:
> On Tue, Jul 12, 2011 at 12:36:06PM +0800, Felix Fietkau wrote:
> 
> > >diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
> > >index 70dc8ec..c5f46d5 100644
> > >--- a/drivers/net/wireless/ath/ath9k/recv.c
> > >+++ b/drivers/net/wireless/ath/ath9k/recv.c
> > >@@ -684,15 +684,11 @@ static bool ath_edma_get_buffers(struct ath_softc *sc,
> > >  	BUG_ON(!bf);
> > >
> > >  	dma_sync_single_for_cpu(sc->dev, bf->bf_buf_addr,
> > >-				common->rx_bufsize, DMA_FROM_DEVICE);
> > >+				common->rx_bufsize, DMA_BIDIRECTIONAL);
> > >
> > >  	ret = ath9k_hw_process_rxdesc_edma(ah, NULL, skb->data);
> > >-	if (ret == -EINPROGRESS) {
> > >-		/*let device gain the buffer again*/
> > >-		dma_sync_single_for_device(sc->dev, bf->bf_buf_addr,
> > >-				common->rx_bufsize, DMA_FROM_DEVICE);
> > >+	if (ret == -EINPROGRESS)
> > >  		return false;
> > >-	}
> > >
> > >  	__skb_unlink(skb,&rx_edma->rx_fifo);
> > >  	if (ret == -EINVAL) {
> > I have strong doubts about this change. On most MIPS devices,
> > dma_sync_single_for_cpu is a no-op, whereas
> > dma_sync_single_for_device flushes the cache range. With this
> > change, the CPU could cache the DMA status part behind skb->data and
> > that cache entry would not be flushed inbetween calls to this
> > functions on the same buffer, likely leading to rx stalls.
> 
> The code was already broken before.  By the time dma_sync_single_for_cpu
> and ath9k_hw_process_rxdesc_edma are called, the DMA engine may still be
> active in the buffer,  yet the driver is looking at it.
> 
> dma_sync_single_for_cpu() is part of changing the buffer ownership from
> the device to the CPU.  When it is being called, DMA into the buffer should
> already have been completed ...  or else the shit may hit the jet engine.

Let's get rid of the "buffer ownership" misunderstanding from the picture.
Ownership is about who is expected to be writing (or is assured the data
is not being changed under his foot). This has nothing to do with DMA API.

DMA API is there for two purposes: to make part of memory visible to
both CPU and device (map/unmap) and to ensure memory consistency in
presence of caches (sync; implicitly done in map/unmap).

In the case we're analysing, the ownership is on the device's side
until it stops writing the buffer. sync_to_cpu doesn't change that.
It only allows the CPU to see more recent data (in case the CPU cached
something earlier).

> Imagine what would happen on a hypothetic cache architecture which does not
> have a dirty bit, that is which would have to write back every cache line -
> even clean lines - to memory in order to evict it.  Corruption.

dma_map_whatever() would mark the memory uncachable on such an architecture.
Otherwise this would violate assumptions on DMA_FROM_DEVICE mappings (or
"device owned buffers") that the CPU does not write to the mapped memory.

Best Regards,
Micha? Miros?aw

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [ath9k-devel] [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
@ 2011-07-12 20:59           ` Michał Mirosław
  0 siblings, 0 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-12 20:59 UTC (permalink / raw)
  To: Ralf Baechle
  Cc: Felix Fietkau, netdev, linux-wireless, Jouni Malinen,
	Senthil Balasubramanian, ath9k-devel, Vasanthakumar Thiagarajan

On Tue, Jul 12, 2011 at 10:53:16PM +0200, Michał Mirosław wrote:
> In the case we're analysing, the ownership is on the device's side
> until it stops writing the buffer.

Just to be clear, this should say: ... until it marks the buffer as done.

Best Regards,
Michał Mirosław

^ permalink raw reply	[flat|nested] 76+ messages in thread

* Re: [ath9k-devel] [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
@ 2011-07-12 20:59           ` Michał Mirosław
  0 siblings, 0 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-12 20:59 UTC (permalink / raw)
  To: Ralf Baechle
  Cc: Felix Fietkau, netdev-u79uwXL29TY76Z2rM5mHXA,
	linux-wireless-u79uwXL29TY76Z2rM5mHXA, Jouni Malinen,
	Senthil Balasubramanian, ath9k-devel-juf53994utBLZpfksSYvnA,
	Vasanthakumar Thiagarajan

On Tue, Jul 12, 2011 at 10:53:16PM +0200, Michał Mirosław wrote:
> In the case we're analysing, the ownership is on the device's side
> until it stops writing the buffer.

Just to be clear, this should say: ... until it marks the buffer as done.

Best Regards,
Michał Mirosław
--
To unsubscribe from this list: send the line "unsubscribe linux-wireless" in
the body of a message to majordomo-u79uwXL29TY76Z2rM5mHXA@public.gmane.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 76+ messages in thread

* [ath9k-devel] [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage
@ 2011-07-12 20:59           ` Michał Mirosław
  0 siblings, 0 replies; 76+ messages in thread
From: Michał Mirosław @ 2011-07-12 20:59 UTC (permalink / raw)
  To: ath9k-devel

On Tue, Jul 12, 2011 at 10:53:16PM +0200, Micha? Miros?aw wrote:
> In the case we're analysing, the ownership is on the device's side
> until it stops writing the buffer.

Just to be clear, this should say: ... until it marks the buffer as done.

Best Regards,
Micha? Miros?aw

^ permalink raw reply	[flat|nested] 76+ messages in thread

end of thread, other threads:[~2011-07-12 20:59 UTC | newest]

Thread overview: 76+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-07-11  0:52 [PATCH v2 00/46] Clean up RX copybreak and DMA handling Michał Mirosław
2011-07-11  0:52 ` [PATCH v2 02/46] net: wrap common patterns of rx handler code Michał Mirosław
2011-07-11  0:52 ` [PATCH v2 05/46] net: bnx2x: fix DMA sync direction Michał Mirosław
2011-07-11  0:52 ` [PATCH v2 06/46] net/tokenring: 3c359: fix DMA API usage Michał Mirosław
2011-07-11  0:52 ` [PATCH v2 01/46] net: introduce __netdev_alloc_skb_aligned() Michał Mirosław
2011-07-11  5:46   ` [PATCH net-next-2.6] net: introduce build_skb() Eric Dumazet
2011-07-11 10:53     ` Michał Mirosław
2011-07-12 15:40     ` Eric Dumazet
2011-07-12 15:54       ` Michał Mirosław
2011-07-11  0:52 ` [PATCH v2 03/46] net drivers: remove unnecessary dma_sync_to_device(DMA_FROM_DEVICE) Michał Mirosław
2011-07-11  8:30   ` Vlad Zolotarov
2011-07-11  9:29     ` Michał Mirosław
2011-07-11  9:46       ` Vlad Zolotarov
2011-07-11  0:52 ` [PATCH v2 04/46] net/wireless: p54: remove useless dma_sync_single_for_device(DMA_FROM_DEVICE) Michał Mirosław
2011-07-11 15:15   ` Pavel Roskin
2011-07-11 15:15     ` Pavel Roskin
2011-07-12  4:50   ` Felix Fietkau
2011-07-12  4:50     ` Felix Fietkau
2011-07-11  0:52 ` [PATCH v2 07/46] net/wireless: ath9k: fix DMA API usage Michał Mirosław
2011-07-11  0:52   ` [ath9k-devel] " Michał Mirosław
2011-07-11  0:52   ` Michał Mirosław
2011-07-12  4:36   ` [ath9k-devel] " Felix Fietkau
2011-07-12  4:36     ` Felix Fietkau
2011-07-12  4:36     ` Felix Fietkau
2011-07-12  5:30     ` [ath9k-devel] " Ben Greear
2011-07-12  5:30       ` Ben Greear
2011-07-12  5:30       ` Ben Greear
2011-07-12  9:55     ` Michał Mirosław
2011-07-12  9:55       ` Michał Mirosław
2011-07-12  9:55       ` Michał Mirosław
2011-07-12 12:54       ` Felix Fietkau
2011-07-12 12:54         ` Felix Fietkau
2011-07-12 12:54         ` Felix Fietkau
2011-07-12 13:03         ` [ath9k-devel] " Michał Mirosław
2011-07-12 13:03           ` Michał Mirosław
2011-07-12 13:03           ` Michał Mirosław
2011-07-12 14:21           ` Felix Fietkau
2011-07-12 14:21             ` Felix Fietkau
2011-07-12 14:21             ` Felix Fietkau
2011-07-12 15:58             ` Michał Mirosław
2011-07-12 15:58               ` Michał Mirosław
2011-07-12 16:04               ` Felix Fietkau
2011-07-12 16:04                 ` Felix Fietkau
2011-07-12 16:04                 ` Felix Fietkau
2011-07-12 19:13                 ` Michał Mirosław
2011-07-12 19:13                   ` Michał Mirosław
2011-07-12 19:32     ` Ralf Baechle
2011-07-12 19:32       ` Ralf Baechle
2011-07-12 20:53       ` Michał Mirosław
2011-07-12 20:53         ` Michał Mirosław
2011-07-12 20:53         ` Michał Mirosław
2011-07-12 20:59         ` Michał Mirosław
2011-07-12 20:59           ` Michał Mirosław
2011-07-12 20:59           ` Michał Mirosław
2011-07-11  0:52 ` [PATCH v2 08/46] net/wireless: b43: fix DMA direction for RX buffers Michał Mirosław
2011-07-11  0:52 ` [PATCH v2 09/46] net: octeon_mgmt: fix DMA unmap size Michał Mirosław
2011-07-11  0:52 ` [PATCH v2 10/46] net: jme: convert to generic DMA API Michał Mirosław
2011-07-11  0:52 ` [PATCH v2 12/46] net: sunhme: cleanup RX skb allocation Michał Mirosław
2011-07-11  0:52 ` [PATCH v2 13/46] net: sunbmac: " Michał Mirosław
2011-07-11  0:52 ` [PATCH v2 11/46] net: sungem: " Michał Mirosław
2011-07-11  0:52 ` [PATCH v2 16/46] net: cxgb3: don't drop packets on memory pressure in driver Michał Mirosław
2011-07-11  0:52 ` [PATCH v2 14/46] net: sunbmac: cleanup magic '34' Michał Mirosław
2011-07-11  0:52 ` [PATCH v2 46/46] net: mark drivers that drop packets from rx queue head under memory pressure Michał Mirosław
2011-07-11  5:40   ` Francois Romieu
2011-07-11  6:47   ` Eilon Greenstein
2011-07-11 10:04     ` Michał Mirosław
2011-07-11 10:16       ` Eilon Greenstein
2011-07-11 15:24   ` Stephen Hemminger
2011-07-11  0:52 ` [PATCH v2 15/46] net/wireless: b43: use kfree_skb() for untouched skbs Michał Mirosław
2011-07-11  6:54 ` [PATCH v2 00/46] Clean up RX copybreak and DMA handling David Miller
2011-07-11  9:16   ` Michał Mirosław
2011-07-11  9:24     ` David Miller
2011-07-11  9:47       ` Michał Mirosław
2011-07-11 10:11         ` David Miller
2011-07-11 11:17           ` Michał Mirosław
2011-07-11 12:36   ` Ben Hutchings

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.