linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v4 01/17] i40e/i40evf: Eliminate duplicate barriers on weakly-ordered archs
       [not found] <1521513753-7325-1-git-send-email-okaya@codeaurora.org>
@ 2018-03-20  2:42 ` Sinan Kaya
  2018-03-20  2:42 ` [PATCH v4 02/17] ixgbe: eliminate " Sinan Kaya
                   ` (15 subsequent siblings)
  16 siblings, 0 replies; 22+ messages in thread
From: Sinan Kaya @ 2018-03-20  2:42 UTC (permalink / raw)
  To: netdev, timur, sulrich
  Cc: linux-arm-msm, linux-arm-kernel, Sinan Kaya, Jeff Kirsher,
	intel-wired-lan, linux-kernel

Code includes wmb() followed by writel(). writel() already has a barrier
on some architectures like arm64.

This ends up CPU observing two barriers back to back before executing the
register write.

Since code already has an explicit barrier call, changing writel() to
writel_relaxed().

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
Reviewed-by: Alexander Duyck <alexander.h.duyck@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_txrx.c   | 8 ++++----
 drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index e554aa6cf..9455869 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -185,7 +185,7 @@ static int i40e_program_fdir_filter(struct i40e_fdir_filter *fdir_data,
 	/* Mark the data descriptor to be watched */
 	first->next_to_watch = tx_desc;
 
-	writel(tx_ring->next_to_use, tx_ring->tail);
+	writel_relaxed(tx_ring->next_to_use, tx_ring->tail);
 	return 0;
 
 dma_fail:
@@ -1375,7 +1375,7 @@ static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
 	 * such as IA-64).
 	 */
 	wmb();
-	writel(val, rx_ring->tail);
+	writel_relaxed(val, rx_ring->tail);
 }
 
 /**
@@ -2258,7 +2258,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 		 */
 		wmb();
 
-		writel(xdp_ring->next_to_use, xdp_ring->tail);
+		writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail);
 	}
 
 	rx_ring->skb = skb;
@@ -3286,7 +3286,7 @@ static inline int i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
 
 	/* notify HW of packet */
 	if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
-		writel(i, tx_ring->tail);
+		writel_relaxed(i, tx_ring->tail);
 
 		/* we need this if more than one processor can write to our tail
 		 * at a time, it synchronizes IO on IA64/Altix systems
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index 357d605..56eea20 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -667,7 +667,7 @@ static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
 	 * such as IA-64).
 	 */
 	wmb();
-	writel(val, rx_ring->tail);
+	writel_relaxed(val, rx_ring->tail);
 }
 
 /**
@@ -2243,7 +2243,7 @@ static inline void i40evf_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
 
 	/* notify HW of packet */
 	if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
-		writel(i, tx_ring->tail);
+		writel_relaxed(i, tx_ring->tail);
 
 		/* we need this if more than one processor can write to our tail
 		 * at a time, it synchronizes IO on IA64/Altix systems
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v4 02/17] ixgbe: eliminate duplicate barriers on weakly-ordered archs
       [not found] <1521513753-7325-1-git-send-email-okaya@codeaurora.org>
  2018-03-20  2:42 ` [PATCH v4 01/17] i40e/i40evf: Eliminate duplicate barriers on weakly-ordered archs Sinan Kaya
@ 2018-03-20  2:42 ` Sinan Kaya
  2018-03-20  2:42 ` [PATCH v4 03/17] igbvf: " Sinan Kaya
                   ` (14 subsequent siblings)
  16 siblings, 0 replies; 22+ messages in thread
From: Sinan Kaya @ 2018-03-20  2:42 UTC (permalink / raw)
  To: netdev, timur, sulrich
  Cc: linux-arm-msm, linux-arm-kernel, Sinan Kaya, Jeff Kirsher,
	intel-wired-lan, linux-kernel

Code includes wmb() followed by writel() in multiple places. writel()
already has a barrier on some architectures like arm64.

This ends up CPU observing two barriers back to back before executing the
register write.

Since code already has an explicit barrier call, changing writel() to
writel_relaxed().

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
Reviewed-by: Alexander Duyck <alexander.h.duyck@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 0da5aa2..58ed70f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -1692,7 +1692,7 @@ void ixgbe_alloc_rx_buffers(struct ixgbe_ring *rx_ring, u16 cleaned_count)
 		 * such as IA-64).
 		 */
 		wmb();
-		writel(i, rx_ring->tail);
+		writel_relaxed(i, rx_ring->tail);
 	}
 }
 
@@ -2453,7 +2453,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
 		 * know there are new descriptors to fetch.
 		 */
 		wmb();
-		writel(ring->next_to_use, ring->tail);
+		writel_relaxed(ring->next_to_use, ring->tail);
 
 		xdp_do_flush_map();
 	}
@@ -8078,7 +8078,7 @@ static int ixgbe_tx_map(struct ixgbe_ring *tx_ring,
 	ixgbe_maybe_stop_tx(tx_ring, DESC_NEEDED);
 
 	if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
-		writel(i, tx_ring->tail);
+		writel_relaxed(i, tx_ring->tail);
 
 		/* we need this if more than one processor can write to our tail
 		 * at a time, it synchronizes IO on IA64/Altix systems
@@ -10014,7 +10014,7 @@ static void ixgbe_xdp_flush(struct net_device *dev)
 	 * are new descriptors to fetch.
 	 */
 	wmb();
-	writel(ring->next_to_use, ring->tail);
+	writel_relaxed(ring->next_to_use, ring->tail);
 
 	return;
 }
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v4 03/17] igbvf: eliminate duplicate barriers on weakly-ordered archs
       [not found] <1521513753-7325-1-git-send-email-okaya@codeaurora.org>
  2018-03-20  2:42 ` [PATCH v4 01/17] i40e/i40evf: Eliminate duplicate barriers on weakly-ordered archs Sinan Kaya
  2018-03-20  2:42 ` [PATCH v4 02/17] ixgbe: eliminate " Sinan Kaya
@ 2018-03-20  2:42 ` Sinan Kaya
  2018-03-20  2:42 ` [PATCH v4 04/17] igb: " Sinan Kaya
                   ` (13 subsequent siblings)
  16 siblings, 0 replies; 22+ messages in thread
From: Sinan Kaya @ 2018-03-20  2:42 UTC (permalink / raw)
  To: netdev, timur, sulrich
  Cc: linux-arm-msm, linux-arm-kernel, Sinan Kaya, Jeff Kirsher,
	intel-wired-lan, linux-kernel

Code includes wmb() followed by writel(). writel() already has a barrier
on some architectures like arm64.

This ends up CPU observing two barriers back to back before executing the
register write.

Since code already has an explicit barrier call, changing writel() to
writel_relaxed().

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
Reviewed-by: Alexander Duyck <alexander.h.duyck@intel.com>
---
 drivers/net/ethernet/intel/igbvf/netdev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
index 4214c15..edb1c34 100644
--- a/drivers/net/ethernet/intel/igbvf/netdev.c
+++ b/drivers/net/ethernet/intel/igbvf/netdev.c
@@ -251,7 +251,7 @@ static void igbvf_alloc_rx_buffers(struct igbvf_ring *rx_ring,
 		 * such as IA-64).
 		*/
 		wmb();
-		writel(i, adapter->hw.hw_addr + rx_ring->tail);
+		writel_relaxed(i, adapter->hw.hw_addr + rx_ring->tail);
 	}
 }
 
@@ -2297,7 +2297,7 @@ static inline void igbvf_tx_queue_adv(struct igbvf_adapter *adapter,
 
 	tx_ring->buffer_info[first].next_to_watch = tx_desc;
 	tx_ring->next_to_use = i;
-	writel(i, adapter->hw.hw_addr + tx_ring->tail);
+	writel_relaxed(i, adapter->hw.hw_addr + tx_ring->tail);
 	/* we need this if more than one processor can write to our tail
 	 * at a time, it synchronizes IO on IA64/Altix systems
 	 */
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v4 04/17] igb: eliminate duplicate barriers on weakly-ordered archs
       [not found] <1521513753-7325-1-git-send-email-okaya@codeaurora.org>
                   ` (2 preceding siblings ...)
  2018-03-20  2:42 ` [PATCH v4 03/17] igbvf: " Sinan Kaya
@ 2018-03-20  2:42 ` Sinan Kaya
  2018-03-20  2:42 ` [PATCH v4 05/17] ixgbevf: keep writel() closer to wmb() Sinan Kaya
                   ` (12 subsequent siblings)
  16 siblings, 0 replies; 22+ messages in thread
From: Sinan Kaya @ 2018-03-20  2:42 UTC (permalink / raw)
  To: netdev, timur, sulrich
  Cc: linux-arm-msm, linux-arm-kernel, Sinan Kaya, Jeff Kirsher,
	intel-wired-lan, linux-kernel

Code includes wmb() followed by writel(). writel() already has a barrier
on some architectures like arm64.

This ends up CPU observing two barriers back to back before executing the
register write.

Since code already has an explicit barrier call, changing writel() to
writel_relaxed().

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
Reviewed-by: Alexander Duyck <alexander.h.duyck@intel.com>
---
 drivers/net/ethernet/intel/igb/igb_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index b88fae7..82aea92 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -5671,7 +5671,7 @@ static int igb_tx_map(struct igb_ring *tx_ring,
 	igb_maybe_stop_tx(tx_ring, DESC_NEEDED);
 
 	if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
-		writel(i, tx_ring->tail);
+		writel_relaxed(i, tx_ring->tail);
 
 		/* we need this if more than one processor can write to our tail
 		 * at a time, it synchronizes IO on IA64/Altix systems
@@ -8072,7 +8072,7 @@ void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count)
 		 * such as IA-64).
 		 */
 		wmb();
-		writel(i, rx_ring->tail);
+		writel_relaxed(i, rx_ring->tail);
 	}
 }
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v4 05/17] ixgbevf: keep writel() closer to wmb()
       [not found] <1521513753-7325-1-git-send-email-okaya@codeaurora.org>
                   ` (3 preceding siblings ...)
  2018-03-20  2:42 ` [PATCH v4 04/17] igb: " Sinan Kaya
@ 2018-03-20  2:42 ` Sinan Kaya
  2018-03-20  2:42 ` [PATCH v4 06/17] ixgbevf: eliminate duplicate barriers on weakly-ordered archs Sinan Kaya
                   ` (11 subsequent siblings)
  16 siblings, 0 replies; 22+ messages in thread
From: Sinan Kaya @ 2018-03-20  2:42 UTC (permalink / raw)
  To: netdev, timur, sulrich
  Cc: linux-arm-msm, linux-arm-kernel, Sinan Kaya, Jeff Kirsher,
	intel-wired-lan, linux-kernel

Remove ixgbevf_write_tail() in favor of moving writel() close to
wmb().

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
Reviewed-by: Alexander Duyck <alexander.h.duyck@intel.com>
---
 drivers/net/ethernet/intel/ixgbevf/ixgbevf.h      | 5 -----
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 4 ++--
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
index f695242..11e893e 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h
@@ -244,11 +244,6 @@ static inline u16 ixgbevf_desc_unused(struct ixgbevf_ring *ring)
 	return ((ntc > ntu) ? 0 : ring->count) + ntc - ntu - 1;
 }
 
-static inline void ixgbevf_write_tail(struct ixgbevf_ring *ring, u32 value)
-{
-	writel(value, ring->tail);
-}
-
 #define IXGBEVF_RX_DESC(R, i)	\
 	(&(((union ixgbe_adv_rx_desc *)((R)->desc))[i]))
 #define IXGBEVF_TX_DESC(R, i)	\
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 9b3d43d..6bf778a 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -659,7 +659,7 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring,
 		 * such as IA-64).
 		 */
 		wmb();
-		ixgbevf_write_tail(rx_ring, i);
+		writel(i, rx_ring->tail);
 	}
 }
 
@@ -3644,7 +3644,7 @@ static void ixgbevf_tx_map(struct ixgbevf_ring *tx_ring,
 	tx_ring->next_to_use = i;
 
 	/* notify HW of packet */
-	ixgbevf_write_tail(tx_ring, i);
+	writel(i, tx_ring->tail);
 
 	return;
 dma_error:
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v4 06/17] ixgbevf: eliminate duplicate barriers on weakly-ordered archs
       [not found] <1521513753-7325-1-git-send-email-okaya@codeaurora.org>
                   ` (4 preceding siblings ...)
  2018-03-20  2:42 ` [PATCH v4 05/17] ixgbevf: keep writel() closer to wmb() Sinan Kaya
@ 2018-03-20  2:42 ` Sinan Kaya
  2018-03-20  2:42 ` [PATCH v4 07/17] fm10k: Eliminate " Sinan Kaya
                   ` (10 subsequent siblings)
  16 siblings, 0 replies; 22+ messages in thread
From: Sinan Kaya @ 2018-03-20  2:42 UTC (permalink / raw)
  To: netdev, timur, sulrich
  Cc: linux-arm-msm, linux-arm-kernel, Sinan Kaya, Jeff Kirsher,
	intel-wired-lan, linux-kernel

Code includes wmb() followed by writel() in multiple places. writel()
already has a barrier on some architectures like arm64.

This ends up CPU observing two barriers back to back before executing the
register write.

Since code already has an explicit barrier call, changing writel() to
writel_relaxed().

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
Reviewed-by: Alexander Duyck <alexander.h.duyck@intel.com>
---
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 6bf778a..774b2a6 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -659,7 +659,7 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring,
 		 * such as IA-64).
 		 */
 		wmb();
-		writel(i, rx_ring->tail);
+		writel_relaxed(i, rx_ring->tail);
 	}
 }
 
@@ -3644,7 +3644,7 @@ static void ixgbevf_tx_map(struct ixgbevf_ring *tx_ring,
 	tx_ring->next_to_use = i;
 
 	/* notify HW of packet */
-	writel(i, tx_ring->tail);
+	writel_relaxed(i, tx_ring->tail);
 
 	return;
 dma_error:
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v4 07/17] fm10k: Eliminate duplicate barriers on weakly-ordered archs
       [not found] <1521513753-7325-1-git-send-email-okaya@codeaurora.org>
                   ` (5 preceding siblings ...)
  2018-03-20  2:42 ` [PATCH v4 06/17] ixgbevf: eliminate duplicate barriers on weakly-ordered archs Sinan Kaya
@ 2018-03-20  2:42 ` Sinan Kaya
  2018-03-20  2:42 ` [PATCH v4 08/17] drivers: net: cxgb: " Sinan Kaya
                   ` (9 subsequent siblings)
  16 siblings, 0 replies; 22+ messages in thread
From: Sinan Kaya @ 2018-03-20  2:42 UTC (permalink / raw)
  To: netdev, timur, sulrich
  Cc: linux-arm-msm, linux-arm-kernel, Sinan Kaya, Jeff Kirsher,
	intel-wired-lan, linux-kernel

Code includes wmb() followed by writel(). writel() already has a
barrier on some architectures like arm64.

This ends up CPU observing two barriers back to back before executing
the register write.

Since code already has an explicit barrier call, changing writel() to
writel_relaxed().

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
---
 drivers/net/ethernet/intel/fm10k/fm10k_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index 8e12aae..eebef01 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -179,7 +179,7 @@ void fm10k_alloc_rx_buffers(struct fm10k_ring *rx_ring, u16 cleaned_count)
 		wmb();
 
 		/* notify hardware of new descriptors */
-		writel(i, rx_ring->tail);
+		writel_relaxed(i, rx_ring->tail);
 	}
 }
 
@@ -1054,7 +1054,7 @@ static void fm10k_tx_map(struct fm10k_ring *tx_ring,
 
 	/* notify HW of packet */
 	if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
-		writel(i, tx_ring->tail);
+		writel_relaxed(i, tx_ring->tail);
 
 		/* we need this if more than one processor can write to our tail
 		 * at a time, it synchronizes IO on IA64/Altix systems
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v4 08/17] drivers: net: cxgb: Eliminate duplicate barriers on weakly-ordered archs
       [not found] <1521513753-7325-1-git-send-email-okaya@codeaurora.org>
                   ` (6 preceding siblings ...)
  2018-03-20  2:42 ` [PATCH v4 07/17] fm10k: Eliminate " Sinan Kaya
@ 2018-03-20  2:42 ` Sinan Kaya
  2018-03-20  2:42 ` [PATCH v4 09/17] net: qla3xxx: " Sinan Kaya
                   ` (8 subsequent siblings)
  16 siblings, 0 replies; 22+ messages in thread
From: Sinan Kaya @ 2018-03-20  2:42 UTC (permalink / raw)
  To: netdev, timur, sulrich
  Cc: linux-arm-msm, linux-arm-kernel, Sinan Kaya, David S. Miller,
	Johannes Berg, Kees Cook, Allen Pais, linux-kernel

Code includes wmb() followed by writel(). writel() already has a barrier on
some architectures like arm64.

This ends up CPU observing two barriers back to back before executing the
register write.

Since code already has an explicit barrier call, changing writel() to
writel_relaxed().

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
---
 drivers/net/ethernet/chelsio/cxgb/sge.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c
index 30de26e..57891bd6 100644
--- a/drivers/net/ethernet/chelsio/cxgb/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb/sge.c
@@ -495,7 +495,7 @@ static struct sk_buff *sched_skb(struct sge *sge, struct sk_buff *skb,
 static inline void doorbell_pio(struct adapter *adapter, u32 val)
 {
 	wmb();
-	writel(val, adapter->regs + A_SG_DOORBELL);
+	writel_relaxed(val, adapter->regs + A_SG_DOORBELL);
 }
 
 /*
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v4 09/17] net: qla3xxx: Eliminate duplicate barriers on weakly-ordered archs
       [not found] <1521513753-7325-1-git-send-email-okaya@codeaurora.org>
                   ` (7 preceding siblings ...)
  2018-03-20  2:42 ` [PATCH v4 08/17] drivers: net: cxgb: " Sinan Kaya
@ 2018-03-20  2:42 ` Sinan Kaya
  2018-03-20  2:42 ` [PATCH v4 10/17] qlcnic: " Sinan Kaya
                   ` (7 subsequent siblings)
  16 siblings, 0 replies; 22+ messages in thread
From: Sinan Kaya @ 2018-03-20  2:42 UTC (permalink / raw)
  To: netdev, timur, sulrich
  Cc: linux-arm-msm, linux-arm-kernel, Sinan Kaya, Dept-GELinuxNICDev,
	linux-kernel

Code includes wmb() followed by writel(). writel() already has a
barrier on some architectures like arm64.

This ends up CPU observing two barriers back to back before executing
the register write.

Since code already has an explicit barrier call, changing writel() to
writel_relaxed().

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
---
 drivers/net/ethernet/qlogic/qla3xxx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c
index 9e5264d..0e71b74 100644
--- a/drivers/net/ethernet/qlogic/qla3xxx.c
+++ b/drivers/net/ethernet/qlogic/qla3xxx.c
@@ -1858,8 +1858,8 @@ static void ql_update_small_bufq_prod_index(struct ql3_adapter *qdev)
 			qdev->small_buf_release_cnt -= 8;
 		}
 		wmb();
-		writel(qdev->small_buf_q_producer_index,
-			&port_regs->CommonRegs.rxSmallQProducerIndex);
+		writel_relaxed(qdev->small_buf_q_producer_index,
+			       &port_regs->CommonRegs.rxSmallQProducerIndex);
 	}
 }
 
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v4 10/17] qlcnic: Eliminate duplicate barriers on weakly-ordered archs
       [not found] <1521513753-7325-1-git-send-email-okaya@codeaurora.org>
                   ` (8 preceding siblings ...)
  2018-03-20  2:42 ` [PATCH v4 09/17] net: qla3xxx: " Sinan Kaya
@ 2018-03-20  2:42 ` Sinan Kaya
  2018-03-20  2:42 ` [PATCH v4 11/17] bnx2x: " Sinan Kaya
                   ` (6 subsequent siblings)
  16 siblings, 0 replies; 22+ messages in thread
From: Sinan Kaya @ 2018-03-20  2:42 UTC (permalink / raw)
  To: netdev, timur, sulrich
  Cc: linux-arm-msm, linux-arm-kernel, Sinan Kaya, Harish Patil,
	Manish Chopra, Dept-GELinuxNICDev, linux-kernel

Code includes wmb() followed by writel(). writel() already has a
barrier on some architectures like arm64.

This ends up CPU observing two barriers back to back before executing
the register write.

Since code already has an explicit barrier call, changing writel() to
writel_relaxed().

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
Acked-by: Manish Chopra <manish.chopra@cavium.com>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
index 46b0372..97c146e7 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_83xx_hw.c
@@ -478,7 +478,7 @@ irqreturn_t qlcnic_83xx_clear_legacy_intr(struct qlcnic_adapter *adapter)
 	wmb();
 
 	/* clear the interrupt trigger control register */
-	writel(0, adapter->isr_int_vec);
+	writel_relaxed(0, adapter->isr_int_vec);
 	intr_val = readl(adapter->isr_int_vec);
 	do {
 		intr_val = readl(adapter->tgt_status_reg);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v4 11/17] bnx2x: Eliminate duplicate barriers on weakly-ordered archs
       [not found] <1521513753-7325-1-git-send-email-okaya@codeaurora.org>
                   ` (9 preceding siblings ...)
  2018-03-20  2:42 ` [PATCH v4 10/17] qlcnic: " Sinan Kaya
@ 2018-03-20  2:42 ` Sinan Kaya
  2018-03-22 10:10   ` Kalluru, Sudarsana
  2018-03-20  2:42 ` [PATCH v4 12/17] net: cxgb4/cxgb4vf: " Sinan Kaya
                   ` (5 subsequent siblings)
  16 siblings, 1 reply; 22+ messages in thread
From: Sinan Kaya @ 2018-03-20  2:42 UTC (permalink / raw)
  To: netdev, timur, sulrich
  Cc: linux-arm-msm, linux-arm-kernel, Sinan Kaya, Ariel Elior,
	everest-linux-l2, linux-kernel

Code includes wmb() followed by writel(). writel() already has a
barrier on some architectures like arm64.

This ends up CPU observing two barriers back to back before executing
the register write.

Since code already has an explicit barrier call, changing writel() to
writel_relaxed().

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h       |  9 ++++++++-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h   |  4 ++--
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c  | 21 +++++++++++----------
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c |  2 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c  |  2 +-
 5 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index 352beff..ac38db9 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -166,6 +166,12 @@ do {						\
 #define REG_RD8(bp, offset)		readb(REG_ADDR(bp, offset))
 #define REG_RD16(bp, offset)		readw(REG_ADDR(bp, offset))
 
+#define REG_WR_RELAXED(bp, offset, val)	writel_relaxed((u32)val,\
+						       REG_ADDR(bp, offset))
+
+#define REG_WR16_RELAXED(bp, offset, val) \
+	writew_relaxed((u16)val, REG_ADDR(bp, offset))
+
 #define REG_WR(bp, offset, val)		writel((u32)val, REG_ADDR(bp, offset))
 #define REG_WR8(bp, offset, val)	writeb((u8)val, REG_ADDR(bp, offset))
 #define REG_WR16(bp, offset, val)	writew((u16)val, REG_ADDR(bp, offset))
@@ -760,7 +766,8 @@ struct bnx2x_fastpath {
 #endif
 #define DOORBELL(bp, cid, val) \
 	do { \
-		writel((u32)(val), bp->doorbells + (bp->db_size * (cid))); \
+		writel_relaxed((u32)(val),\
+				bp->doorbells + (bp->db_size * (cid))); \
 	} while (0)
 
 /* TX CSUM helpers */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index a5265e1..a8ce5c5 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -522,8 +522,8 @@ static inline void bnx2x_update_rx_prod(struct bnx2x *bp,
 	wmb();
 
 	for (i = 0; i < sizeof(rx_prods)/4; i++)
-		REG_WR(bp, fp->ustorm_rx_prods_offset + i*4,
-		       ((u32 *)&rx_prods)[i]);
+		REG_WR_RELAXED(bp, fp->ustorm_rx_prods_offset + i * 4,
+			       ((u32 *)&rx_prods)[i]);
 
 	mmiowb(); /* keep prod updates ordered */
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 74fc9af..2dea1b6 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -1608,8 +1608,8 @@ static void bnx2x_hc_int_enable(struct bnx2x *bp)
 		} else
 			val = 0xffff;
 
-		REG_WR(bp, HC_REG_TRAILING_EDGE_0 + port*8, val);
-		REG_WR(bp, HC_REG_LEADING_EDGE_0 + port*8, val);
+		REG_WR_RELAXED(bp, HC_REG_TRAILING_EDGE_0 + port * 8, val);
+		REG_WR_RELAXED(bp, HC_REG_LEADING_EDGE_0 + port * 8, val);
 	}
 
 	/* Make sure that interrupts are indeed enabled from here on */
@@ -1672,8 +1672,8 @@ static void bnx2x_igu_int_enable(struct bnx2x *bp)
 	} else
 		val = 0xffff;
 
-	REG_WR(bp, IGU_REG_TRAILING_EDGE_LATCH, val);
-	REG_WR(bp, IGU_REG_LEADING_EDGE_LATCH, val);
+	REG_WR_RELAXED(bp, IGU_REG_TRAILING_EDGE_LATCH, val);
+	REG_WR_RELAXED(bp, IGU_REG_LEADING_EDGE_LATCH, val);
 
 	/* Make sure that interrupts are indeed enabled from here on */
 	mmiowb();
@@ -3817,8 +3817,8 @@ static void bnx2x_sp_prod_update(struct bnx2x *bp)
 	 */
 	mb();
 
-	REG_WR16(bp, BAR_XSTRORM_INTMEM + XSTORM_SPQ_PROD_OFFSET(func),
-		 bp->spq_prod_idx);
+	REG_WR16_RELAXED(bp, BAR_XSTRORM_INTMEM + XSTORM_SPQ_PROD_OFFSET(func),
+			 bp->spq_prod_idx);
 	mmiowb();
 }
 
@@ -7761,7 +7761,7 @@ void bnx2x_igu_clear_sb_gen(struct bnx2x *bp, u8 func, u8 idu_sb_id, bool is_pf)
 	barrier();
 	DP(NETIF_MSG_HW, "write 0x%08x to IGU(via GRC) addr 0x%x\n",
 			  ctl, igu_addr_ctl);
-	REG_WR(bp, igu_addr_ctl, ctl);
+	REG_WR_RELAXED(bp, igu_addr_ctl, ctl);
 	mmiowb();
 	barrier();
 
@@ -9720,13 +9720,14 @@ static void bnx2x_process_kill_chip_reset(struct bnx2x *bp, bool global)
 	barrier();
 	mmiowb();
 
-	REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_SET,
-	       reset_mask2 & (~stay_reset2));
+	REG_WR_RELAXED(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_SET,
+		       reset_mask2 & (~stay_reset2));
 
 	barrier();
 	mmiowb();
 
-	REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET, reset_mask1);
+	REG_WR_RELAXED(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET,
+		       reset_mask1);
 	mmiowb();
 }
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index ffa7959..40e55d8 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -105,7 +105,7 @@ static void bnx2x_vf_igu_ack_sb(struct bnx2x *bp, struct bnx2x_virtf *vf,
 
 	DP(NETIF_MSG_HW, "write 0x%08x to IGU(via GRC) addr 0x%x\n",
 	   ctl, igu_addr_ctl);
-	REG_WR(bp, igu_addr_ctl, ctl);
+	REG_WR_RELAXED(bp, igu_addr_ctl, ctl);
 	mmiowb();
 	barrier();
 }
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
index 76a4668..3b2f1bd 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
@@ -170,7 +170,7 @@ static int bnx2x_send_msg2pf(struct bnx2x *bp, u8 *done, dma_addr_t msg_mapping)
 	wmb();
 
 	/* Trigger the PF FW */
-	writeb(1, &zone_data->trigger.vf_pf_channel.addr_valid);
+	writeb_relaxed(1, &zone_data->trigger.vf_pf_channel.addr_valid);
 
 	/* Wait for PF to complete */
 	while ((tout >= 0) && (!*done)) {
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v4 12/17] net: cxgb4/cxgb4vf: Eliminate duplicate barriers on weakly-ordered archs
       [not found] <1521513753-7325-1-git-send-email-okaya@codeaurora.org>
                   ` (10 preceding siblings ...)
  2018-03-20  2:42 ` [PATCH v4 11/17] bnx2x: " Sinan Kaya
@ 2018-03-20  2:42 ` Sinan Kaya
  2018-03-21 23:03   ` Casey Leedom
  2018-03-20  2:42 ` [PATCH v4 13/17] net: cxgb3: " Sinan Kaya
                   ` (4 subsequent siblings)
  16 siblings, 1 reply; 22+ messages in thread
From: Sinan Kaya @ 2018-03-20  2:42 UTC (permalink / raw)
  To: netdev, timur, sulrich
  Cc: linux-arm-msm, linux-arm-kernel, Sinan Kaya, Ganesh Goudar,
	Casey Leedom, linux-kernel

Code includes wmb() followed by writel(). writel() already has a barrier on
some architectures like arm64.

This ends up CPU observing two barriers back to back before executing the
register write.

Create a new wrapper function with relaxed write operator. Use the new
wrapper when a write is following a wmb().

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h      |  6 ++++++
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 13 +++++++------
 drivers/net/ethernet/chelsio/cxgb4/sge.c        | 12 ++++++------
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c      |  2 +-
 drivers/net/ethernet/chelsio/cxgb4vf/adapter.h  | 14 ++++++++++++++
 drivers/net/ethernet/chelsio/cxgb4vf/sge.c      | 18 ++++++++++--------
 6 files changed, 44 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 9040e13..6bde0b9 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -1202,6 +1202,12 @@ static inline void t4_write_reg(struct adapter *adap, u32 reg_addr, u32 val)
 	writel(val, adap->regs + reg_addr);
 }
 
+static inline void t4_write_reg_relaxed(struct adapter *adap, u32 reg_addr,
+					u32 val)
+{
+	writel_relaxed(val, adap->regs + reg_addr);
+}
+
 #ifndef readq
 static inline u64 readq(const volatile void __iomem *addr)
 {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 7b452e8..276472d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -1723,8 +1723,8 @@ int cxgb4_sync_txq_pidx(struct net_device *dev, u16 qid, u16 pidx,
 		else
 			val = PIDX_T5_V(delta);
 		wmb();
-		t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL_A),
-			     QID_V(qid) | val);
+		t4_write_reg_relaxed(adap, MYPF_REG(SGE_PF_KDOORBELL_A),
+				     QID_V(qid) | val);
 	}
 out:
 	return ret;
@@ -1902,8 +1902,9 @@ static void enable_txq_db(struct adapter *adap, struct sge_txq *q)
 		 * are committed before we tell HW about them.
 		 */
 		wmb();
-		t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL_A),
-			     QID_V(q->cntxt_id) | PIDX_V(q->db_pidx_inc));
+		t4_write_reg_relaxed(adap, MYPF_REG(SGE_PF_KDOORBELL_A),
+				     QID_V(q->cntxt_id) |
+						PIDX_V(q->db_pidx_inc));
 		q->db_pidx_inc = 0;
 	}
 	q->db_disabled = 0;
@@ -2003,8 +2004,8 @@ static void sync_txq_pidx(struct adapter *adap, struct sge_txq *q)
 		else
 			val = PIDX_T5_V(delta);
 		wmb();
-		t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL_A),
-			     QID_V(q->cntxt_id) | val);
+		t4_write_reg_relaxed(adap, MYPF_REG(SGE_PF_KDOORBELL_A),
+				     QID_V(q->cntxt_id) | val);
 	}
 out:
 	q->db_disabled = 0;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 6e310a0..7388aac 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -530,11 +530,11 @@ static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
 		 * mechanism.
 		 */
 		if (unlikely(q->bar2_addr == NULL)) {
-			t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL_A),
-				     val | QID_V(q->cntxt_id));
+			t4_write_reg_relaxed(adap, MYPF_REG(SGE_PF_KDOORBELL_A),
+					     val | QID_V(q->cntxt_id));
 		} else {
-			writel(val | QID_V(q->bar2_qid),
-			       q->bar2_addr + SGE_UDB_KDOORBELL);
+			writel_relaxed(val | QID_V(q->bar2_qid),
+				       q->bar2_addr + SGE_UDB_KDOORBELL);
 
 			/* This Write memory Barrier will force the write to
 			 * the User Doorbell area to be flushed.
@@ -986,8 +986,8 @@ inline void cxgb4_ring_tx_db(struct adapter *adap, struct sge_txq *q, int n)
 				      (q->bar2_addr + SGE_UDB_WCDOORBELL),
 				      wr);
 		} else {
-			writel(val | QID_V(q->bar2_qid),
-			       q->bar2_addr + SGE_UDB_KDOORBELL);
+			writel_relaxed(val | QID_V(q->bar2_qid),
+				       q->bar2_addr + SGE_UDB_KDOORBELL);
 		}
 
 		/* This Write Memory Barrier will force the write to the User
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 920bccd..8b723a0 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -139,7 +139,7 @@ void t4_write_indirect(struct adapter *adap, unsigned int addr_reg,
 {
 	while (nregs--) {
 		t4_write_reg(adap, addr_reg, start_idx++);
-		t4_write_reg(adap, data_reg, *vals++);
+		t4_write_reg_relaxed(adap, data_reg, *vals++);
 	}
 }
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
index 5883f09..00247be4 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
@@ -442,6 +442,20 @@ static inline void t4_write_reg(struct adapter *adapter, u32 reg_addr, u32 val)
 	writel(val, adapter->regs + reg_addr);
 }
 
+/**
+ * t4_write_reg_relaxed - write a HW register without ordering guarantees
+ * @adapter: the adapter
+ * @reg_addr: the register address
+ * @val: the value to write
+ *
+ * Write a 32-bit value into the given HW register.
+ */
+static inline void t4_write_reg_relaxed(struct adapter *adapter, u32 reg_addr,
+					u32 val)
+{
+	writel_relaxed(val, adapter->regs + reg_addr);
+}
+
 #ifndef readq
 static inline u64 readq(const volatile void __iomem *addr)
 {
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
index dfce5df..a3a420b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
@@ -546,12 +546,13 @@ static inline void ring_fl_db(struct adapter *adapter, struct sge_fl *fl)
 		 * mechanism.
 		 */
 		if (unlikely(fl->bar2_addr == NULL)) {
-			t4_write_reg(adapter,
-				     T4VF_SGE_BASE_ADDR + SGE_VF_KDOORBELL,
-				     QID_V(fl->cntxt_id) | val);
+			t4_write_reg_relaxed(adapter,
+					     T4VF_SGE_BASE_ADDR +
+							SGE_VF_KDOORBELL,
+					     QID_V(fl->cntxt_id) | val);
 		} else {
-			writel(val | QID_V(fl->bar2_qid),
-			       fl->bar2_addr + SGE_UDB_KDOORBELL);
+			writel_relaxed(val | QID_V(fl->bar2_qid),
+				       fl->bar2_addr + SGE_UDB_KDOORBELL);
 
 			/* This Write memory Barrier will force the write to
 			 * the User Doorbell area to be flushed.
@@ -980,8 +981,9 @@ static inline void ring_tx_db(struct adapter *adapter, struct sge_txq *tq,
 	if (unlikely(tq->bar2_addr == NULL)) {
 		u32 val = PIDX_V(n);
 
-		t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_KDOORBELL,
-			     QID_V(tq->cntxt_id) | val);
+		t4_write_reg_relaxed(adapter,
+				     T4VF_SGE_BASE_ADDR + SGE_VF_KDOORBELL,
+				     QID_V(tq->cntxt_id) | val);
 	} else {
 		u32 val = PIDX_T5_V(n);
 
@@ -1026,7 +1028,7 @@ static inline void ring_tx_db(struct adapter *adapter, struct sge_txq *tq,
 				count--;
 			}
 		} else
-			writel(val | QID_V(tq->bar2_qid),
+			writel_relaxed(val | QID_V(tq->bar2_qid),
 			       tq->bar2_addr + SGE_UDB_KDOORBELL);
 
 		/* This Write Memory Barrier will force the write to the User
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v4 13/17] net: cxgb3: Eliminate duplicate barriers on weakly-ordered archs
       [not found] <1521513753-7325-1-git-send-email-okaya@codeaurora.org>
                   ` (11 preceding siblings ...)
  2018-03-20  2:42 ` [PATCH v4 12/17] net: cxgb4/cxgb4vf: " Sinan Kaya
@ 2018-03-20  2:42 ` Sinan Kaya
  2018-03-20  2:42 ` [PATCH v4 14/17] net: qlge: " Sinan Kaya
                   ` (3 subsequent siblings)
  16 siblings, 0 replies; 22+ messages in thread
From: Sinan Kaya @ 2018-03-20  2:42 UTC (permalink / raw)
  To: netdev, timur, sulrich
  Cc: linux-arm-msm, linux-arm-kernel, Sinan Kaya, Santosh Raspatur,
	linux-kernel

Code includes wmb() followed by writel(). writel() already has a barrier on
some architectures like arm64.

This ends up CPU observing two barriers back to back before executing the
register write.

Create a new wrapper function with relaxed write operator. Use the new
wrapper when a write is following a wmb().

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
---
 drivers/net/ethernet/chelsio/cxgb3/adapter.h |  7 +++++++
 drivers/net/ethernet/chelsio/cxgb3/sge.c     | 19 ++++++++++---------
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb3/adapter.h b/drivers/net/ethernet/chelsio/cxgb3/adapter.h
index 087ff0f..0e21e66 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/adapter.h
+++ b/drivers/net/ethernet/chelsio/cxgb3/adapter.h
@@ -281,6 +281,13 @@ static inline void t3_write_reg(struct adapter *adapter, u32 reg_addr, u32 val)
 	writel(val, adapter->regs + reg_addr);
 }
 
+static inline void t3_write_reg_relaxed(struct adapter *adapter, u32 reg_addr,
+					u32 val)
+{
+	CH_DBG(adapter, MMIO, "setting register 0x%x to 0x%x\n", reg_addr, val);
+	writel_relaxed(val, adapter->regs + reg_addr);
+}
+
 static inline struct port_info *adap2pinfo(struct adapter *adap, int idx)
 {
 	return netdev_priv(adap->port[idx]);
diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c
index e988caa..0baab06 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c
@@ -487,7 +487,8 @@ static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
 	if (q->pend_cred >= q->credits / 4) {
 		q->pend_cred = 0;
 		wmb();
-		t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
+		t3_write_reg_relaxed(adap, A_SG_KDOORBELL,
+				     V_EGRCNTX(q->cntxt_id));
 	}
 }
 
@@ -1058,8 +1059,8 @@ static inline void check_ring_tx_db(struct adapter *adap, struct sge_txq *q)
 	}
 #else
 	wmb();			/* write descriptors before telling HW */
-	t3_write_reg(adap, A_SG_KDOORBELL,
-		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
+	t3_write_reg_relaxed(adap, A_SG_KDOORBELL,
+			     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 #endif
 }
 
@@ -1510,8 +1511,8 @@ static int ctrl_xmit(struct adapter *adap, struct sge_txq *q,
 	}
 	spin_unlock(&q->lock);
 	wmb();
-	t3_write_reg(adap, A_SG_KDOORBELL,
-		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
+	t3_write_reg_relaxed(adap, A_SG_KDOORBELL,
+			     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 	return NET_XMIT_SUCCESS;
 }
 
@@ -1554,8 +1555,8 @@ static void restart_ctrlq(unsigned long data)
 
 	spin_unlock(&q->lock);
 	wmb();
-	t3_write_reg(qs->adap, A_SG_KDOORBELL,
-		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
+	t3_write_reg_relaxed(qs->adap, A_SG_KDOORBELL,
+			     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 }
 
 /*
@@ -1793,8 +1794,8 @@ again:	reclaim_completed_tx(adap, q, TX_RECLAIM_CHUNK);
 #endif
 	wmb();
 	if (likely(written))
-		t3_write_reg(adap, A_SG_KDOORBELL,
-			     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
+		t3_write_reg_relaxed(adap, A_SG_KDOORBELL,
+				     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
 }
 
 /**
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v4 14/17] net: qlge: Eliminate duplicate barriers on weakly-ordered archs
       [not found] <1521513753-7325-1-git-send-email-okaya@codeaurora.org>
                   ` (12 preceding siblings ...)
  2018-03-20  2:42 ` [PATCH v4 13/17] net: cxgb3: " Sinan Kaya
@ 2018-03-20  2:42 ` Sinan Kaya
  2018-03-20  2:42 ` [PATCH v4 15/17] bnxt_en: " Sinan Kaya
                   ` (2 subsequent siblings)
  16 siblings, 0 replies; 22+ messages in thread
From: Sinan Kaya @ 2018-03-20  2:42 UTC (permalink / raw)
  To: netdev, timur, sulrich
  Cc: linux-arm-msm, linux-arm-kernel, Sinan Kaya, Harish Patil,
	Manish Chopra, Dept-GELinuxNICDev, linux-kernel

Code includes wmb() followed by writel(). writel() already has a barrier on
some architectures like arm64.

This ends up CPU observing two barriers back to back before executing the
register write.

Create a new wrapper function with relaxed write operator. Use the new
wrapper when a write is following a wmb().

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
---
 drivers/net/ethernet/qlogic/qlge/qlge.h      | 18 ++++++++++++++++++
 drivers/net/ethernet/qlogic/qlge/qlge_main.c |  2 +-
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qlge/qlge.h b/drivers/net/ethernet/qlogic/qlge/qlge.h
index 84ac50f..1465986 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge.h
+++ b/drivers/net/ethernet/qlogic/qlge/qlge.h
@@ -2185,6 +2185,24 @@ static inline void ql_write_db_reg(u32 val, void __iomem *addr)
 }
 
 /*
+ * Doorbell Registers:
+ * Doorbell registers are virtual registers in the PCI memory space.
+ * The space is allocated by the chip during PCI initialization.  The
+ * device driver finds the doorbell address in BAR 3 in PCI config space.
+ * The registers are used to control outbound and inbound queues. For
+ * example, the producer index for an outbound queue.  Each queue uses
+ * 1 4k chunk of memory.  The lower half of the space is for outbound
+ * queues. The upper half is for inbound queues.
+ * Caller has to guarantee ordering.
+ */
+static inline void ql_write_db_reg_relaxed(u32 val, void __iomem *addr)
+{
+	writel_relaxed(val, addr);
+	mmiowb();
+}
+
+
+/*
  * Shadow Registers:
  * Outbound queues have a consumer index that is maintained by the chip.
  * Inbound queues have a producer index that is maintained by the chip.
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
index 50038d9..c222b7c 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
@@ -2700,7 +2700,7 @@ static netdev_tx_t qlge_send(struct sk_buff *skb, struct net_device *ndev)
 		tx_ring->prod_idx = 0;
 	wmb();
 
-	ql_write_db_reg(tx_ring->prod_idx, tx_ring->prod_idx_db_reg);
+	ql_write_db_reg_relaxed(tx_ring->prod_idx, tx_ring->prod_idx_db_reg);
 	netif_printk(qdev, tx_queued, KERN_DEBUG, qdev->ndev,
 		     "tx queued, slot %d, len %d\n",
 		     tx_ring->prod_idx, skb->len);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v4 15/17] bnxt_en: Eliminate duplicate barriers on weakly-ordered archs
       [not found] <1521513753-7325-1-git-send-email-okaya@codeaurora.org>
                   ` (13 preceding siblings ...)
  2018-03-20  2:42 ` [PATCH v4 14/17] net: qlge: " Sinan Kaya
@ 2018-03-20  2:42 ` Sinan Kaya
  2018-03-20  2:42 ` [PATCH v4 16/17] qed/qede: " Sinan Kaya
  2018-03-20  2:42 ` [PATCH v4 17/17] net: ena: " Sinan Kaya
  16 siblings, 0 replies; 22+ messages in thread
From: Sinan Kaya @ 2018-03-20  2:42 UTC (permalink / raw)
  To: netdev, timur, sulrich
  Cc: linux-arm-msm, linux-arm-kernel, Sinan Kaya, Michael Chan, linux-kernel

Code includes wmb() followed by writel(). writel() already has a barrier on
some architectures like arm64.

This ends up CPU observing two barriers back to back before executing the
register write.

Create a new wrapper function with relaxed write operator. Use the new
wrapper when a write is following a wmb().

Since code already has an explicit barrier call, changing writel() to
writel_relaxed().

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c         |  2 +-
 drivers/net/ethernet/broadcom/bnxt/bnxt.h         | 11 ++++++++++-
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c |  2 +-
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 1500243..befb538 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -1922,7 +1922,7 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget)
 		/* Sync BD data before updating doorbell */
 		wmb();
 
-		bnxt_db_write(bp, db, DB_KEY_TX | prod);
+		bnxt_db_write_relaxed(bp, db, DB_KEY_TX | prod);
 	}
 
 	cpr->cp_raw_cons = raw_cons;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 1989c47..4c0d048 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1402,11 +1402,20 @@ static inline u32 bnxt_tx_avail(struct bnxt *bp, struct bnxt_tx_ring_info *txr)
 }
 
 /* For TX and RX ring doorbells */
+static inline void bnxt_db_write_relaxed(struct bnxt *bp, void __iomem *db,
+					 u32 val)
+{
+	writel_relaxed(val, db);
+	if (bp->flags & BNXT_FLAG_DOUBLE_DB)
+		writel_relaxed(val, db);
+}
+
+/* For TX and RX ring doorbells */
 static inline void bnxt_db_write(struct bnxt *bp, void __iomem *db, u32 val)
 {
 	writel(val, db);
 	if (bp->flags & BNXT_FLAG_DOUBLE_DB)
-		writel(val, db);
+		writel_relaxed(val, db);
 }
 
 extern const u16 bnxt_lhint_arr[];
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 1801582..a1b1060 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -2403,7 +2403,7 @@ static int bnxt_run_loopback(struct bnxt *bp)
 	/* Sync BD data before updating doorbell */
 	wmb();
 
-	bnxt_db_write(bp, txr->tx_doorbell, DB_KEY_TX | txr->tx_prod);
+	bnxt_db_write_relaxed(bp, txr->tx_doorbell, DB_KEY_TX | txr->tx_prod);
 	rc = bnxt_poll_loopback(bp, pkt_size);
 
 	dma_unmap_single(&bp->pdev->dev, map, pkt_size, PCI_DMA_TODEVICE);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v4 16/17] qed/qede: Eliminate duplicate barriers on weakly-ordered archs
       [not found] <1521513753-7325-1-git-send-email-okaya@codeaurora.org>
                   ` (14 preceding siblings ...)
  2018-03-20  2:42 ` [PATCH v4 15/17] bnxt_en: " Sinan Kaya
@ 2018-03-20  2:42 ` Sinan Kaya
  2018-03-20  2:42 ` [PATCH v4 17/17] net: ena: " Sinan Kaya
  16 siblings, 0 replies; 22+ messages in thread
From: Sinan Kaya @ 2018-03-20  2:42 UTC (permalink / raw)
  To: netdev, timur, sulrich
  Cc: linux-arm-msm, linux-arm-kernel, Sinan Kaya, Ariel Elior,
	everest-linux-l2, Harish Patil, Manish Chopra,
	Dept-GELinuxNICDev, linux-kernel

Code includes wmb() followed by writel(). writel() already has a barrier on
some architectures like arm64.

This ends up CPU observing two barriers back to back before executing the
register write.

Create a new wrapper function with relaxed write operator. Use the new
wrapper when a write is following a wmb().

Since code already has an explicit barrier call, changing writel() to
writel_relaxed().

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
---
 drivers/net/ethernet/qlogic/qed/qed.h           |  5 ++++-
 drivers/net/ethernet/qlogic/qed/qed_hw.c        | 12 ++++++++++++
 drivers/net/ethernet/qlogic/qed/qed_hw.h        | 14 ++++++++++++++
 drivers/net/ethernet/qlogic/qed/qed_int.c       |  2 +-
 drivers/net/ethernet/qlogic/qed/qed_l2.c        |  2 +-
 drivers/net/ethernet/qlogic/qed/qed_ll2.c       |  2 +-
 drivers/net/ethernet/qlogic/qed/qed_vf.c        |  7 ++++---
 drivers/net/ethernet/qlogic/qede/qede_ethtool.c |  2 +-
 drivers/net/ethernet/qlogic/qede/qede_fp.c      |  4 ++--
 drivers/net/ethernet/qlogic/qlge/qlge.h         |  1 -
 include/linux/qed/qed_if.h                      | 17 +++++++++++++----
 11 files changed, 53 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index 6948855..241077f 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -818,12 +818,15 @@ u16 qed_get_cm_pq_idx_vf(struct qed_hwfn *p_hwfn, u16 vf);
 						(cdev->regview) + \
 							 (offset))
 
+#define REG_WR_RELAXED(cdev, offset, val)		\
+	writel_relaxed((u32)val, REG_ADDR(cdev, offset))
+
 #define REG_RD(cdev, offset)            readl(REG_ADDR(cdev, offset))
 #define REG_WR(cdev, offset, val)       writel((u32)val, REG_ADDR(cdev, offset))
 #define REG_WR16(cdev, offset, val)     writew((u16)val, REG_ADDR(cdev, offset))
 
 #define DOORBELL(cdev, db_addr, val)			 \
-	writel((u32)val, (void __iomem *)((u8 __iomem *)\
+	writel_relaxed((u32)val, (void __iomem *)((u8 __iomem *)\
 					  (cdev->doorbells) + (db_addr)))
 
 /* Prototypes */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c
index fca2dbd..1d76121 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hw.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c
@@ -222,6 +222,18 @@ struct qed_ptt *qed_get_reserved_ptt(struct qed_hwfn *p_hwfn,
 	return &p_hwfn->p_ptt_pool->ptts[ptt_idx];
 }
 
+void qed_wr_relaxed(struct qed_hwfn *p_hwfn,
+		    struct qed_ptt *p_ptt,
+		    u32 hw_addr, u32 val)
+{
+	u32 bar_addr = qed_set_ptt(p_hwfn, p_ptt, hw_addr);
+
+	REG_WR_RELAXED(p_hwfn, bar_addr, val);
+	DP_VERBOSE(p_hwfn, NETIF_MSG_HW,
+		   "bar_addr 0x%x, hw_addr 0x%x, val 0x%x\n",
+		   bar_addr, hw_addr, val);
+}
+
 void qed_wr(struct qed_hwfn *p_hwfn,
 	    struct qed_ptt *p_ptt,
 	    u32 hw_addr, u32 val)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.h b/drivers/net/ethernet/qlogic/qed/qed_hw.h
index 8db2839..bb4f5ff 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hw.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hw.h
@@ -152,6 +152,20 @@ struct qed_ptt *qed_get_reserved_ptt(struct qed_hwfn *p_hwfn,
 				     enum reserved_ptts ptt_idx);
 
 /**
+ * @brief qed_wr_relaxed - Write value to BAR using the given ptt
+ *			   No ordering guarantee.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param val
+ * @param hw_addr
+ */
+void qed_wr_relaxed(struct qed_hwfn *p_hwfn,
+		    struct qed_ptt *p_ptt,
+		    u32 hw_addr,
+		    u32 val);
+
+/**
  * @brief qed_wr - Write value to BAR using the given ptt
  *
  * @param p_hwfn
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index d3eabcf..5f09253 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -1747,7 +1747,7 @@ static void qed_int_igu_cleanup_sb(struct qed_hwfn *p_hwfn,
 
 	barrier();
 
-	qed_wr(p_hwfn, p_ptt, IGU_REG_COMMAND_REG_CTRL, cmd_ctrl);
+	qed_wr_relaxed(p_hwfn, p_ptt, IGU_REG_COMMAND_REG_CTRL, cmd_ctrl);
 
 	/* Flush the write to IGU */
 	mmiowb();
diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c
index 893ef08..7f3f923b 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_l2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c
@@ -921,7 +921,7 @@ qed_eth_pf_rx_queue_start(struct qed_hwfn *p_hwfn,
 
 	/* Init the rcq, rx bd and rx sge (if valid) producers to 0 */
 	__internal_ram_wr(p_hwfn, *pp_prod, sizeof(u32),
-			  (u32 *)(&init_prod_val));
+			  (u32 *)(&init_prod_val), false);
 
 	return qed_eth_rxq_start_ramrod(p_hwfn, p_cid,
 					bd_max_bytes,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index c4f14fd..211f325 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -1759,7 +1759,7 @@ static void qed_ll2_tx_packet_notify(struct qed_hwfn *p_hwfn,
 	/* Make sure the BDs data is updated before ringing the doorbell */
 	wmb();
 
-	DIRECT_REG_WR(p_tx->doorbell_addr, *((u32 *)&db_msg));
+	DIRECT_REG_WR_RELAXED(p_tx->doorbell_addr, *((u32 *)&db_msg));
 
 	DP_VERBOSE(p_hwfn,
 		   (NETIF_MSG_TX_QUEUED | QED_MSG_LL2),
diff --git a/drivers/net/ethernet/qlogic/qed/qed_vf.c b/drivers/net/ethernet/qlogic/qed/qed_vf.c
index 91b5e9f..6fa5ccb 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_vf.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_vf.c
@@ -123,7 +123,8 @@ static int qed_send_msg2pf(struct qed_hwfn *p_hwfn, u8 *done, u32 resp_size)
 	 */
 	wmb();
 
-	REG_WR(p_hwfn, (uintptr_t)&zone_data->trigger, *((u32 *)&trigger));
+	REG_WR_RELAXED(p_hwfn, (uintptr_t)&zone_data->trigger,
+		       *((u32 *)&trigger));
 
 	/* When PF would be done with the response, it would write back to the
 	 * `done' address. Poll until then.
@@ -758,7 +759,7 @@ qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
 
 		/* Init the rcq, rx bd and rx sge (if valid) producers to 0 */
 		__internal_ram_wr(p_hwfn, *pp_prod, sizeof(u32),
-				  (u32 *)(&init_prod_val));
+				  (u32 *)(&init_prod_val), false);
 	}
 
 	qed_vf_pf_add_qid(p_hwfn, p_cid);
@@ -788,7 +789,7 @@ qed_vf_pf_rxq_start(struct qed_hwfn *p_hwfn,
 
 		/* Init the rcq, rx bd and rx sge (if valid) producers to 0 */
 		__internal_ram_wr(p_hwfn, *pp_prod, sizeof(u32),
-				  (u32 *)&init_prod_val);
+				  (u32 *)&init_prod_val, false);
 	}
 exit:
 	qed_vf_pf_req_end(p_hwfn, rc);
diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index 4ca3847..0d9f63a 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -1417,7 +1417,7 @@ static int qede_selftest_transmit_traffic(struct qede_dev *edev,
 	 */
 	wmb();
 	barrier();
-	writel(txq->tx_db.raw, txq->doorbell_addr);
+	writel_relaxed(txq->tx_db.raw, txq->doorbell_addr);
 
 	/* mmiowb is needed to synchronize doorbell writes from more than one
 	 * processor. It guarantees that the write arrives to the device before
diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c
index dafc079..9dd2124 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_fp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c
@@ -318,7 +318,7 @@ static inline void qede_update_tx_producer(struct qede_tx_queue *txq)
 	 */
 	wmb();
 	barrier();
-	writel(txq->tx_db.raw, txq->doorbell_addr);
+	writel_relaxed(txq->tx_db.raw, txq->doorbell_addr);
 
 	/* mmiowb is needed to synchronize doorbell writes from more than one
 	 * processor. It guarantees that the write arrives to the device before
@@ -581,7 +581,7 @@ void qede_update_rx_prod(struct qede_dev *edev, struct qede_rx_queue *rxq)
 	wmb();
 
 	internal_ram_wr(rxq->hw_rxq_prod_addr, sizeof(rx_prods),
-			(u32 *)&rx_prods);
+			(u32 *)&rx_prods, true);
 
 	/* mmiowb is needed to synchronize doorbell writes from more than one
 	 * processor. It guarantees that the write arrives to the device before
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge.h b/drivers/net/ethernet/qlogic/qlge/qlge.h
index 1465986..01dfdb5 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge.h
+++ b/drivers/net/ethernet/qlogic/qlge/qlge.h
@@ -2201,7 +2201,6 @@ static inline void ql_write_db_reg_relaxed(u32 val, void __iomem *addr)
 	mmiowb();
 }
 
-
 /*
  * Shadow Registers:
  * Outbound queues have a consumer index that is maintained by the chip.
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 15e398c..70f67ad 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -179,6 +179,9 @@ enum qed_led_mode {
 	QED_LED_MODE_RESTORE
 };
 
+#define DIRECT_REG_WR_RELAXED(reg_addr, val) \
+	writel_relaxed((u32)val, (void __iomem *)(reg_addr))
+
 #define DIRECT_REG_WR(reg_addr, val) writel((u32)val, \
 					    (void __iomem *)(reg_addr))
 
@@ -985,20 +988,26 @@ static inline void qed_sb_ack(struct qed_sb_info *sb_info,
 static inline void __internal_ram_wr(void *p_hwfn,
 				     void __iomem *addr,
 				     int size,
-				     u32 *data)
+				     u32 *data,
+				     bool relaxed)
 
 {
 	unsigned int i;
 
 	for (i = 0; i < size / sizeof(*data); i++)
-		DIRECT_REG_WR(&((u32 __iomem *)addr)[i], data[i]);
+		if (relaxed)
+			DIRECT_REG_WR_RELAXED(&((u32 __iomem *)addr)[i],
+					      data[i]);
+		else
+			DIRECT_REG_WR(&((u32 __iomem *)addr)[i], data[i]);
 }
 
 static inline void internal_ram_wr(void __iomem *addr,
 				   int size,
-				   u32 *data)
+				   u32 *data,
+				   bool relaxed)
 {
-	__internal_ram_wr(NULL, addr, size, data);
+	__internal_ram_wr(NULL, addr, size, data, relaxed);
 }
 
 enum qed_rss_caps {
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v4 17/17] net: ena: Eliminate duplicate barriers on weakly-ordered archs
       [not found] <1521513753-7325-1-git-send-email-okaya@codeaurora.org>
                   ` (15 preceding siblings ...)
  2018-03-20  2:42 ` [PATCH v4 16/17] qed/qede: " Sinan Kaya
@ 2018-03-20  2:42 ` Sinan Kaya
  2018-03-25 12:06   ` Belgazal, Netanel
  16 siblings, 1 reply; 22+ messages in thread
From: Sinan Kaya @ 2018-03-20  2:42 UTC (permalink / raw)
  To: netdev, timur, sulrich
  Cc: linux-arm-msm, linux-arm-kernel, Sinan Kaya, Netanel Belgazal,
	Saeed Bishara, Zorik Machulsky, David S. Miller, Tobias Klauser,
	linux-kernel

Code includes barrier() followed by writel(). writel() already has a
barrier
on some architectures like arm64.

This ends up CPU observing two barriers back to back before executing the
register write.

Create a new wrapper function with relaxed write operator. Use the new
wrapper when a write is following a barrier().

Since code already has an explicit barrier call, changing writel() to
writel_relaxed().

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
---
 drivers/net/ethernet/amazon/ena/ena_com.c     |  6 ++++--
 drivers/net/ethernet/amazon/ena/ena_eth_com.h | 22 ++++++++++++++++++++--
 drivers/net/ethernet/amazon/ena/ena_netdev.c  |  4 ++--
 3 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index bf2de52..b6e628f 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -631,7 +631,8 @@ static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset)
 	 */
 	wmb();
 
-	writel(mmio_read_reg, ena_dev->reg_bar + ENA_REGS_MMIO_REG_READ_OFF);
+	writel_relaxed(mmio_read_reg,
+		       ena_dev->reg_bar + ENA_REGS_MMIO_REG_READ_OFF);
 
 	for (i = 0; i < timeout; i++) {
 		if (read_resp->req_id == mmio_read->seq_num)
@@ -1826,7 +1827,8 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data)
 
 	/* write the aenq doorbell after all AENQ descriptors were read */
 	mb();
-	writel((u32)aenq->head, dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF);
+	writel_relaxed((u32)aenq->head,
+		       dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF);
 }
 
 int ena_com_dev_reset(struct ena_com_dev *ena_dev,
diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.h b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
index 2f76572..09ef7cd 100644
--- a/drivers/net/ethernet/amazon/ena/ena_eth_com.h
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
@@ -107,7 +107,8 @@ static inline int ena_com_sq_empty_space(struct ena_com_io_sq *io_sq)
 	return io_sq->q_depth - 1 - cnt;
 }
 
-static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq)
+static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq,
+					    bool relaxed)
 {
 	u16 tail;
 
@@ -116,7 +117,24 @@ static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq)
 	pr_debug("write submission queue doorbell for queue: %d tail: %d\n",
 		 io_sq->qid, tail);
 
-	writel(tail, io_sq->db_addr);
+	if (relaxed)
+		writel_relaxed(tail, io_sq->db_addr);
+	else
+		writel(tail, io_sq->db_addr);
+
+	return 0;
+}
+
+static inline int ena_com_write_sq_doorbell_rel(struct ena_com_io_sq *io_sq)
+{
+	u16 tail;
+
+	tail = io_sq->tail;
+
+	pr_debug("write submission queue doorbell for queue: %d tail: %d\n",
+		 io_sq->qid, tail);
+
+	writel_relaxed(tail, io_sq->db_addr);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 6975150..0530201 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -556,7 +556,7 @@ static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num)
 		 * issue a doorbell
 		 */
 		wmb();
-		ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
+		ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq, true);
 	}
 
 	rx_ring->next_to_use = next_to_use;
@@ -2151,7 +2151,7 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	if (netif_xmit_stopped(txq) || !skb->xmit_more) {
 		/* trigger the dma engine */
-		ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
+		ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq, false);
 		u64_stats_update_begin(&tx_ring->syncp);
 		tx_ring->tx_stats.doorbells++;
 		u64_stats_update_end(&tx_ring->syncp);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH v4 12/17] net: cxgb4/cxgb4vf: Eliminate duplicate barriers on weakly-ordered archs
  2018-03-20  2:42 ` [PATCH v4 12/17] net: cxgb4/cxgb4vf: " Sinan Kaya
@ 2018-03-21 23:03   ` Casey Leedom
  2018-03-22  0:00     ` okaya
  0 siblings, 1 reply; 22+ messages in thread
From: Casey Leedom @ 2018-03-21 23:03 UTC (permalink / raw)
  To: Sinan Kaya, netdev, timur, sulrich
  Cc: linux-arm-msm, linux-arm-kernel, Ganesh GR, linux-kernel,
	Michael Werner, SWise OGC

[[ Appologies for the DUPLICATE email.  I forgot to tell my Mail Agent to
   use Plain Text. -- Casey ]]

  I feel very uncomfortable with these proposed changes.  Our team is right
in the middle of trying to tease our way through the various platform
implementations of writel(), writel_relaxed(), __raw_writel(), etc. in order
to support x86, PowerPC, ARM, etc. with a single code base.  This is
complicated by the somewhat ... "fuzzily defined" semantics and varying
platform implementations of all of these APIs.  (And note that I'm just
picking writel() as an example.)

  Additionally, many of the changes aren't even in fast paths and are thus
unneeded for performance.

  Please don't make these changes.  We're trying to get this all sussed out.

Casey


  
From: Sinan Kaya <okaya@codeaurora.org>
Sent: Monday, March 19, 2018 7:42:27 PM
To: netdev@vger.kernel.org; timur@codeaurora.org; sulrich@codeaurora.org
Cc: linux-arm-msm@vger.kernel.org; linux-arm-kernel@lists.infradead.org; Sinan Kaya; Ganesh GR; Casey Leedom; linux-kernel@vger.kernel.org
Subject: [PATCH v4 12/17] net: cxgb4/cxgb4vf: Eliminate duplicate barriers on weakly-ordered archs
  

Code includes wmb() followed by writel(). writel() already has a barrier on
some architectures like arm64.

This ends up CPU observing two barriers back to back before executing the
register write.

Create a new wrapper function with relaxed write operator. Use the new
wrapper when a write is following a wmb().

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h      |  6 ++++++
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c | 13 +++++++------
 drivers/net/ethernet/chelsio/cxgb4/sge.c        | 12 ++++++------
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c      |  2 +-
 drivers/net/ethernet/chelsio/cxgb4vf/adapter.h  | 14 ++++++++++++++
 drivers/net/ethernet/chelsio/cxgb4vf/sge.c      | 18 ++++++++++--------
 6 files changed, 44 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 9040e13..6bde0b9 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -1202,6 +1202,12 @@ static inline void t4_write_reg(struct adapter *adap, u32 reg_addr, u32 val)
         writel(val, adap->regs + reg_addr);
 }
 
+static inline void t4_write_reg_relaxed(struct adapter *adap, u32 reg_addr,
+                                       u32 val)
+{
+       writel_relaxed(val, adap->regs + reg_addr);
+}
+
 #ifndef readq
 static inline u64 readq(const volatile void __iomem *addr)
 {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 7b452e8..276472d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -1723,8 +1723,8 @@ int cxgb4_sync_txq_pidx(struct net_device *dev, u16 qid, u16 pidx,
                 else
                         val = PIDX_T5_V(delta);
                 wmb();
-               t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL_A),
-                            QID_V(qid) | val);
+               t4_write_reg_relaxed(adap, MYPF_REG(SGE_PF_KDOORBELL_A),
+                                    QID_V(qid) | val);
         }
 out:
         return ret;
@@ -1902,8 +1902,9 @@ static void enable_txq_db(struct adapter *adap, struct sge_txq *q)
                  * are committed before we tell HW about them.
                  */
                 wmb();
-               t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL_A),
-                            QID_V(q->cntxt_id) | PIDX_V(q->db_pidx_inc));
+               t4_write_reg_relaxed(adap, MYPF_REG(SGE_PF_KDOORBELL_A),
+                                    QID_V(q->cntxt_id) |
+                                               PIDX_V(q->db_pidx_inc));
                 q->db_pidx_inc = 0;
         }
         q->db_disabled = 0;
@@ -2003,8 +2004,8 @@ static void sync_txq_pidx(struct adapter *adap, struct sge_txq *q)
                 else
                         val = PIDX_T5_V(delta);
                 wmb();
-               t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL_A),
-                            QID_V(q->cntxt_id) | val);
+               t4_write_reg_relaxed(adap, MYPF_REG(SGE_PF_KDOORBELL_A),
+                                    QID_V(q->cntxt_id) | val);
         }
 out:
         q->db_disabled = 0;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 6e310a0..7388aac 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -530,11 +530,11 @@ static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q)
                  * mechanism.
                  */
                 if (unlikely(q->bar2_addr == NULL)) {
-                       t4_write_reg(adap, MYPF_REG(SGE_PF_KDOORBELL_A),
-                                    val | QID_V(q->cntxt_id));
+                       t4_write_reg_relaxed(adap, MYPF_REG(SGE_PF_KDOORBELL_A),
+                                            val | QID_V(q->cntxt_id));
                 } else {
-                       writel(val | QID_V(q->bar2_qid),
-                              q->bar2_addr + SGE_UDB_KDOORBELL);
+                       writel_relaxed(val | QID_V(q->bar2_qid),
+                                      q->bar2_addr + SGE_UDB_KDOORBELL);
 
                         /* This Write memory Barrier will force the write to
                          * the User Doorbell area to be flushed.
@@ -986,8 +986,8 @@ inline void cxgb4_ring_tx_db(struct adapter *adap, struct sge_txq *q, int n)
                                       (q->bar2_addr + SGE_UDB_WCDOORBELL),
                                       wr);
                 } else {
-                       writel(val | QID_V(q->bar2_qid),
-                              q->bar2_addr + SGE_UDB_KDOORBELL);
+                       writel_relaxed(val | QID_V(q->bar2_qid),
+                                      q->bar2_addr + SGE_UDB_KDOORBELL);
                 }
 
                 /* This Write Memory Barrier will force the write to the User
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 920bccd..8b723a0 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -139,7 +139,7 @@ void t4_write_indirect(struct adapter *adap, unsigned int addr_reg,
 {
         while (nregs--) {
                 t4_write_reg(adap, addr_reg, start_idx++);
-               t4_write_reg(adap, data_reg, *vals++);
+               t4_write_reg_relaxed(adap, data_reg, *vals++);
         }
 }
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
index 5883f09..00247be4 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/adapter.h
@@ -442,6 +442,20 @@ static inline void t4_write_reg(struct adapter *adapter, u32 reg_addr, u32 val)
         writel(val, adapter->regs + reg_addr);
 }
 
+/**
+ * t4_write_reg_relaxed - write a HW register without ordering guarantees
+ * @adapter: the adapter
+ * @reg_addr: the register address
+ * @val: the value to write
+ *
+ * Write a 32-bit value into the given HW register.
+ */
+static inline void t4_write_reg_relaxed(struct adapter *adapter, u32 reg_addr,
+                                       u32 val)
+{
+       writel_relaxed(val, adapter->regs + reg_addr);
+}
+
 #ifndef readq
 static inline u64 readq(const volatile void __iomem *addr)
 {
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
index dfce5df..a3a420b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
@@ -546,12 +546,13 @@ static inline void ring_fl_db(struct adapter *adapter, struct sge_fl *fl)
                  * mechanism.
                  */
                 if (unlikely(fl->bar2_addr == NULL)) {
-                       t4_write_reg(adapter,
-                                    T4VF_SGE_BASE_ADDR + SGE_VF_KDOORBELL,
-                                    QID_V(fl->cntxt_id) | val);
+                       t4_write_reg_relaxed(adapter,
+                                            T4VF_SGE_BASE_ADDR +
+                                                       SGE_VF_KDOORBELL,
+                                            QID_V(fl->cntxt_id) | val);
                 } else {
-                       writel(val | QID_V(fl->bar2_qid),
-                              fl->bar2_addr + SGE_UDB_KDOORBELL);
+                       writel_relaxed(val | QID_V(fl->bar2_qid),
+                                      fl->bar2_addr + SGE_UDB_KDOORBELL);
 
                         /* This Write memory Barrier will force the write to
                          * the User Doorbell area to be flushed.
@@ -980,8 +981,9 @@ static inline void ring_tx_db(struct adapter *adapter, struct sge_txq *tq,
         if (unlikely(tq->bar2_addr == NULL)) {
                 u32 val = PIDX_V(n);
 
-               t4_write_reg(adapter, T4VF_SGE_BASE_ADDR + SGE_VF_KDOORBELL,
-                            QID_V(tq->cntxt_id) | val);
+               t4_write_reg_relaxed(adapter,
+                                    T4VF_SGE_BASE_ADDR + SGE_VF_KDOORBELL,
+                                    QID_V(tq->cntxt_id) | val);
         } else {
                 u32 val = PIDX_T5_V(n);
 
@@ -1026,7 +1028,7 @@ static inline void ring_tx_db(struct adapter *adapter, struct sge_txq *tq,
                                 count--;
                         }
                 } else
-                       writel(val | QID_V(tq->bar2_qid),
+                       writel_relaxed(val | QID_V(tq->bar2_qid),
                                tq->bar2_addr + SGE_UDB_KDOORBELL);
 
                 /* This Write Memory Barrier will force the write to the User
-- 
2.7.4

    

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH v4 12/17] net: cxgb4/cxgb4vf: Eliminate duplicate barriers on weakly-ordered archs
  2018-03-21 23:03   ` Casey Leedom
@ 2018-03-22  0:00     ` okaya
  0 siblings, 0 replies; 22+ messages in thread
From: okaya @ 2018-03-22  0:00 UTC (permalink / raw)
  To: Casey Leedom
  Cc: netdev, timur, sulrich, linux-arm-msm, linux-arm-kernel,
	Ganesh GR, linux-kernel, Michael Werner, SWise OGC

On 2018-03-21 19:03, Casey Leedom wrote:
> [[ Appologies for the DUPLICATE email.  I forgot to tell my Mail Agent 
> to
>    use Plain Text. -- Casey ]]
> 
>   I feel very uncomfortable with these proposed changes.  Our team is 
> right
> in the middle of trying to tease our way through the various platform
> implementations of writel(), writel_relaxed(), __raw_writel(), etc. in 
> order
> to support x86, PowerPC, ARM, etc. with a single code base.  This is
> complicated by the somewhat ... "fuzzily defined" semantics and varying
> platform implementations of all of these APIs.  (And note that I'm just
> picking writel() as an example.)
> 
>   Additionally, many of the changes aren't even in fast paths and are 
> thus
> unneeded for performance.
> 
>   Please don't make these changes.  We're trying to get this all sussed 
> out.
> 

I was also given the feedback to look at performance critical path only. 
I am in the process of revisiting the patches.

If you can point me to the ones that are important, I can try to limit 
the changes to those only.

If your team wants to do it, I can drop this patch as well.

I think the semantics of write API is clear. What was actually 
implemented is another story.

I can share a few of my findings.

A portable driver needs to do this.

descriptor update in mem
wmb ()
writel_relaxed ()
mmiowb ()

Using __raw_write() is wrong as it can get reordered.

Using wmb()+writel() is also wrong for performance reasons.

If something is unclear, please ask.

> 

^ permalink raw reply	[flat|nested] 22+ messages in thread

* RE: [PATCH v4 11/17] bnx2x: Eliminate duplicate barriers on weakly-ordered archs
  2018-03-20  2:42 ` [PATCH v4 11/17] bnx2x: " Sinan Kaya
@ 2018-03-22 10:10   ` Kalluru, Sudarsana
  0 siblings, 0 replies; 22+ messages in thread
From: Kalluru, Sudarsana @ 2018-03-22 10:10 UTC (permalink / raw)
  To: Sinan Kaya, netdev, timur, sulrich
  Cc: linux-arm-msm, linux-arm-kernel, Elior, Ariel,
	Dept-Eng Everest Linux L2, linux-kernel

-----Original Message-----
From: Sinan Kaya [mailto:okaya@codeaurora.org] 
Sent: 20 March 2018 08:12
To: netdev@vger.kernel.org; timur@codeaurora.org; sulrich@codeaurora.org
Cc: linux-arm-msm@vger.kernel.org; linux-arm-kernel@lists.infradead.org; Sinan Kaya <okaya@codeaurora.org>; Elior, Ariel <Ariel.Elior@cavium.com>; Dept-Eng Everest Linux L2 <Dept-EngEverestLinuxL2@cavium.com>; linux-kernel@vger.kernel.org
Subject: [PATCH v4 11/17] bnx2x: Eliminate duplicate barriers on weakly-ordered archs

Code includes wmb() followed by writel(). writel() already has a barrier on some architectures like arm64.

This ends up CPU observing two barriers back to back before executing the register write.

Since code already has an explicit barrier call, changing writel() to writel_relaxed().

Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h       |  9 ++++++++-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h   |  4 ++--
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c  | 21 +++++++++++----------  drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c |  2 +-  drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c  |  2 +-
 5 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index 352beff..ac38db9 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -166,6 +166,12 @@ do {						\
 #define REG_RD8(bp, offset)		readb(REG_ADDR(bp, offset))
 #define REG_RD16(bp, offset)		readw(REG_ADDR(bp, offset))
 
+#define REG_WR_RELAXED(bp, offset, val)	writel_relaxed((u32)val,\
+						       REG_ADDR(bp, offset))
+
+#define REG_WR16_RELAXED(bp, offset, val) \
+	writew_relaxed((u16)val, REG_ADDR(bp, offset))
+
 #define REG_WR(bp, offset, val)		writel((u32)val, REG_ADDR(bp, offset))
 #define REG_WR8(bp, offset, val)	writeb((u8)val, REG_ADDR(bp, offset))
 #define REG_WR16(bp, offset, val)	writew((u16)val, REG_ADDR(bp, offset))
@@ -760,7 +766,8 @@ struct bnx2x_fastpath {  #endif  #define DOORBELL(bp, cid, val) \
 	do { \
-		writel((u32)(val), bp->doorbells + (bp->db_size * (cid))); \
+		writel_relaxed((u32)(val),\
+				bp->doorbells + (bp->db_size * (cid))); \
 	} while (0)
 
 /* TX CSUM helpers */
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
index a5265e1..a8ce5c5 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.h
@@ -522,8 +522,8 @@ static inline void bnx2x_update_rx_prod(struct bnx2x *bp,
 	wmb();
 
 	for (i = 0; i < sizeof(rx_prods)/4; i++)
-		REG_WR(bp, fp->ustorm_rx_prods_offset + i*4,
-		       ((u32 *)&rx_prods)[i]);
+		REG_WR_RELAXED(bp, fp->ustorm_rx_prods_offset + i * 4,
+			       ((u32 *)&rx_prods)[i]);
 
 	mmiowb(); /* keep prod updates ordered */
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 74fc9af..2dea1b6 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -1608,8 +1608,8 @@ static void bnx2x_hc_int_enable(struct bnx2x *bp)
 		} else
 			val = 0xffff;
 
-		REG_WR(bp, HC_REG_TRAILING_EDGE_0 + port*8, val);
-		REG_WR(bp, HC_REG_LEADING_EDGE_0 + port*8, val);
+		REG_WR_RELAXED(bp, HC_REG_TRAILING_EDGE_0 + port * 8, val);
+		REG_WR_RELAXED(bp, HC_REG_LEADING_EDGE_0 + port * 8, val);
 	}
 
 	/* Make sure that interrupts are indeed enabled from here on */ @@ -1672,8 +1672,8 @@ static void bnx2x_igu_int_enable(struct bnx2x *bp)
 	} else
 		val = 0xffff;
 
-	REG_WR(bp, IGU_REG_TRAILING_EDGE_LATCH, val);
-	REG_WR(bp, IGU_REG_LEADING_EDGE_LATCH, val);
+	REG_WR_RELAXED(bp, IGU_REG_TRAILING_EDGE_LATCH, val);
+	REG_WR_RELAXED(bp, IGU_REG_LEADING_EDGE_LATCH, val);
 
 	/* Make sure that interrupts are indeed enabled from here on */
 	mmiowb();
@@ -3817,8 +3817,8 @@ static void bnx2x_sp_prod_update(struct bnx2x *bp)
 	 */
 	mb();
 
-	REG_WR16(bp, BAR_XSTRORM_INTMEM + XSTORM_SPQ_PROD_OFFSET(func),
-		 bp->spq_prod_idx);
+	REG_WR16_RELAXED(bp, BAR_XSTRORM_INTMEM + XSTORM_SPQ_PROD_OFFSET(func),
+			 bp->spq_prod_idx);
 	mmiowb();
 }
 
@@ -7761,7 +7761,7 @@ void bnx2x_igu_clear_sb_gen(struct bnx2x *bp, u8 func, u8 idu_sb_id, bool is_pf)
 	barrier();
 	DP(NETIF_MSG_HW, "write 0x%08x to IGU(via GRC) addr 0x%x\n",
 			  ctl, igu_addr_ctl);
-	REG_WR(bp, igu_addr_ctl, ctl);
+	REG_WR_RELAXED(bp, igu_addr_ctl, ctl);
 	mmiowb();
 	barrier();
 
@@ -9720,13 +9720,14 @@ static void bnx2x_process_kill_chip_reset(struct bnx2x *bp, bool global)
 	barrier();
 	mmiowb();
 
-	REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_SET,
-	       reset_mask2 & (~stay_reset2));
+	REG_WR_RELAXED(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_2_SET,
+		       reset_mask2 & (~stay_reset2));
 
 	barrier();
 	mmiowb();
 
-	REG_WR(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET, reset_mask1);
+	REG_WR_RELAXED(bp, GRCBASE_MISC + MISC_REGISTERS_RESET_REG_1_SET,
+		       reset_mask1);
 	mmiowb();
 }
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index ffa7959..40e55d8 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -105,7 +105,7 @@ static void bnx2x_vf_igu_ack_sb(struct bnx2x *bp, struct bnx2x_virtf *vf,
 
 	DP(NETIF_MSG_HW, "write 0x%08x to IGU(via GRC) addr 0x%x\n",
 	   ctl, igu_addr_ctl);
-	REG_WR(bp, igu_addr_ctl, ctl);
+	REG_WR_RELAXED(bp, igu_addr_ctl, ctl);
 	mmiowb();
 	barrier();
 }
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
index 76a4668..3b2f1bd 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_vfpf.c
@@ -170,7 +170,7 @@ static int bnx2x_send_msg2pf(struct bnx2x *bp, u8 *done, dma_addr_t msg_mapping)
 	wmb();
 
 	/* Trigger the PF FW */
-	writeb(1, &zone_data->trigger.vf_pf_channel.addr_valid);
+	writeb_relaxed(1, &zone_data->trigger.vf_pf_channel.addr_valid);
 
 	/* Wait for PF to complete */
 	while ((tout >= 0) && (!*done)) {
--
2.7.4

Acked-by: Sudarsana Kalluru <Sudarsana.Kalluru@cavium.com>

^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH v4 17/17] net: ena: Eliminate duplicate barriers on weakly-ordered archs
  2018-03-20  2:42 ` [PATCH v4 17/17] net: ena: " Sinan Kaya
@ 2018-03-25 12:06   ` Belgazal, Netanel
  2018-03-25 13:33     ` okaya
  0 siblings, 1 reply; 22+ messages in thread
From: Belgazal, Netanel @ 2018-03-25 12:06 UTC (permalink / raw)
  To: Sinan Kaya, netdev, timur, sulrich, Kiyanovski, Arthur
  Cc: linux-arm-msm, linux-arm-kernel, Bshara, Saeed, Machulsky, Zorik,
	David S. Miller, Tobias Klauser, linux-kernel

I think you should either add a parameter to ena_com_write_sq_doorbell() or add ena_com_write_sq_doorbell_rel().
Right now, you have unused function.

On 3/20/18, 4:43 AM, "Sinan Kaya" <okaya@codeaurora.org> wrote:

    Code includes barrier() followed by writel(). writel() already has a
    barrier
    on some architectures like arm64.
    
    This ends up CPU observing two barriers back to back before executing the
    register write.
    
    Create a new wrapper function with relaxed write operator. Use the new
    wrapper when a write is following a barrier().
    
    Since code already has an explicit barrier call, changing writel() to
    writel_relaxed().
    
    Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
    ---
     drivers/net/ethernet/amazon/ena/ena_com.c     |  6 ++++--
     drivers/net/ethernet/amazon/ena/ena_eth_com.h | 22 ++++++++++++++++++++--
     drivers/net/ethernet/amazon/ena/ena_netdev.c  |  4 ++--
     3 files changed, 26 insertions(+), 6 deletions(-)
    
    diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
    index bf2de52..b6e628f 100644
    --- a/drivers/net/ethernet/amazon/ena/ena_com.c
    +++ b/drivers/net/ethernet/amazon/ena/ena_com.c
    @@ -631,7 +631,8 @@ static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset)
     	 */
     	wmb();
     
    -	writel(mmio_read_reg, ena_dev->reg_bar + ENA_REGS_MMIO_REG_READ_OFF);
    +	writel_relaxed(mmio_read_reg,
    +		       ena_dev->reg_bar + ENA_REGS_MMIO_REG_READ_OFF);
     
     	for (i = 0; i < timeout; i++) {
     		if (read_resp->req_id == mmio_read->seq_num)
    @@ -1826,7 +1827,8 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data)
     
     	/* write the aenq doorbell after all AENQ descriptors were read */
     	mb();
    -	writel((u32)aenq->head, dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF);
    +	writel_relaxed((u32)aenq->head,
    +		       dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF);
     }
     
     int ena_com_dev_reset(struct ena_com_dev *ena_dev,
    diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.h b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
    index 2f76572..09ef7cd 100644
    --- a/drivers/net/ethernet/amazon/ena/ena_eth_com.h
    +++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
    @@ -107,7 +107,8 @@ static inline int ena_com_sq_empty_space(struct ena_com_io_sq *io_sq)
     	return io_sq->q_depth - 1 - cnt;
     }
     
    -static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq)
    +static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq,
    +					    bool relaxed)
     {
     	u16 tail;
     
    @@ -116,7 +117,24 @@ static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq)
     	pr_debug("write submission queue doorbell for queue: %d tail: %d\n",
     		 io_sq->qid, tail);
     
    -	writel(tail, io_sq->db_addr);
    +	if (relaxed)
    +		writel_relaxed(tail, io_sq->db_addr);
    +	else
    +		writel(tail, io_sq->db_addr);
    +
    +	return 0;
    +}
    +
    +static inline int ena_com_write_sq_doorbell_rel(struct ena_com_io_sq *io_sq)
    +{
    +	u16 tail;
    +
    +	tail = io_sq->tail;
    +
    +	pr_debug("write submission queue doorbell for queue: %d tail: %d\n",
    +		 io_sq->qid, tail);
    +
    +	writel_relaxed(tail, io_sq->db_addr);
     
     	return 0;
     }
    diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
    index 6975150..0530201 100644
    --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
    +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
    @@ -556,7 +556,7 @@ static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num)
     		 * issue a doorbell
     		 */
     		wmb();
    -		ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
    +		ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq, true);
     	}
     
     	rx_ring->next_to_use = next_to_use;
    @@ -2151,7 +2151,7 @@ static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev)
     
     	if (netif_xmit_stopped(txq) || !skb->xmit_more) {
     		/* trigger the dma engine */
    -		ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
    +		ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq, false);
     		u64_stats_update_begin(&tx_ring->syncp);
     		tx_ring->tx_stats.doorbells++;
     		u64_stats_update_end(&tx_ring->syncp);
    -- 
    2.7.4
    
    
    

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v4 17/17] net: ena: Eliminate duplicate barriers on weakly-ordered archs
  2018-03-25 12:06   ` Belgazal, Netanel
@ 2018-03-25 13:33     ` okaya
  0 siblings, 0 replies; 22+ messages in thread
From: okaya @ 2018-03-25 13:33 UTC (permalink / raw)
  To: Belgazal, Netanel
  Cc: netdev, timur, sulrich, Kiyanovski, Arthur, linux-arm-msm,
	linux-arm-kernel, Bshara, Saeed, Machulsky, Zorik,
	David S. Miller, Tobias Klauser, linux-kernel

On 2018-03-25 08:06, Belgazal, Netanel wrote:
> I think you should either add a parameter to
> ena_com_write_sq_doorbell() or add ena_com_write_sq_doorbell_rel().
> Right now, you have unused function.

That is true. I got rid of ena_com_write_sq_doorbell_rel.

> 
> On 3/20/18, 4:43 AM, "Sinan Kaya" <okaya@codeaurora.org> wrote:
> 
>     Code includes barrier() followed by writel(). writel() already has 
> a
>     barrier
>     on some architectures like arm64.
> 
>     This ends up CPU observing two barriers back to back before 
> executing the
>     register write.
> 
>     Create a new wrapper function with relaxed write operator. Use the 
> new
>     wrapper when a write is following a barrier().
> 
>     Since code already has an explicit barrier call, changing writel() 
> to
>     writel_relaxed().
> 
>     Signed-off-by: Sinan Kaya <okaya@codeaurora.org>
>     ---
>      drivers/net/ethernet/amazon/ena/ena_com.c     |  6 ++++--
>      drivers/net/ethernet/amazon/ena/ena_eth_com.h | 22 
> ++++++++++++++++++++--
>      drivers/net/ethernet/amazon/ena/ena_netdev.c  |  4 ++--
>      3 files changed, 26 insertions(+), 6 deletions(-)
> 
>     diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c
> b/drivers/net/ethernet/amazon/ena/ena_com.c
>     index bf2de52..b6e628f 100644
>     --- a/drivers/net/ethernet/amazon/ena/ena_com.c
>     +++ b/drivers/net/ethernet/amazon/ena/ena_com.c
>     @@ -631,7 +631,8 @@ static u32 ena_com_reg_bar_read32(struct
> ena_com_dev *ena_dev, u16 offset)
>      	 */
>      	wmb();
> 
>     -	writel(mmio_read_reg, ena_dev->reg_bar + 
> ENA_REGS_MMIO_REG_READ_OFF);
>     +	writel_relaxed(mmio_read_reg,
>     +		       ena_dev->reg_bar + ENA_REGS_MMIO_REG_READ_OFF);
> 
>      	for (i = 0; i < timeout; i++) {
>      		if (read_resp->req_id == mmio_read->seq_num)
>     @@ -1826,7 +1827,8 @@ void ena_com_aenq_intr_handler(struct
> ena_com_dev *dev, void *data)
> 
>      	/* write the aenq doorbell after all AENQ descriptors were read 
> */
>      	mb();
>     -	writel((u32)aenq->head, dev->reg_bar + 
> ENA_REGS_AENQ_HEAD_DB_OFF);
>     +	writel_relaxed((u32)aenq->head,
>     +		       dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF);
>      }
> 
>      int ena_com_dev_reset(struct ena_com_dev *ena_dev,
>     diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.h
> b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
>     index 2f76572..09ef7cd 100644
>     --- a/drivers/net/ethernet/amazon/ena/ena_eth_com.h
>     +++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
>     @@ -107,7 +107,8 @@ static inline int
> ena_com_sq_empty_space(struct ena_com_io_sq *io_sq)
>      	return io_sq->q_depth - 1 - cnt;
>      }
> 
>     -static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq 
> *io_sq)
>     +static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq 
> *io_sq,
>     +					    bool relaxed)
>      {
>      	u16 tail;
> 
>     @@ -116,7 +117,24 @@ static inline int
> ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq)
>      	pr_debug("write submission queue doorbell for queue: %d tail: 
> %d\n",
>      		 io_sq->qid, tail);
> 
>     -	writel(tail, io_sq->db_addr);
>     +	if (relaxed)
>     +		writel_relaxed(tail, io_sq->db_addr);
>     +	else
>     +		writel(tail, io_sq->db_addr);
>     +
>     +	return 0;
>     +}
>     +
>     +static inline int ena_com_write_sq_doorbell_rel(struct
> ena_com_io_sq *io_sq)
>     +{
>     +	u16 tail;
>     +
>     +	tail = io_sq->tail;
>     +
>     +	pr_debug("write submission queue doorbell for queue: %d tail: 
> %d\n",
>     +		 io_sq->qid, tail);
>     +
>     +	writel_relaxed(tail, io_sq->db_addr);
> 
>      	return 0;
>      }
>     diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c
> b/drivers/net/ethernet/amazon/ena/ena_netdev.c
>     index 6975150..0530201 100644
>     --- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
>     +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
>     @@ -556,7 +556,7 @@ static int ena_refill_rx_bufs(struct ena_ring
> *rx_ring, u32 num)
>      		 * issue a doorbell
>      		 */
>      		wmb();
>     -		ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq);
>     +		ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq, true);
>      	}
> 
>      	rx_ring->next_to_use = next_to_use;
>     @@ -2151,7 +2151,7 @@ static netdev_tx_t ena_start_xmit(struct
> sk_buff *skb, struct net_device *dev)
> 
>      	if (netif_xmit_stopped(txq) || !skb->xmit_more) {
>      		/* trigger the dma engine */
>     -		ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq);
>     +		ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq, false);
>      		u64_stats_update_begin(&tx_ring->syncp);
>      		tx_ring->tx_stats.doorbells++;
>      		u64_stats_update_end(&tx_ring->syncp);
>     --
>     2.7.4

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2018-03-25 13:33 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <1521513753-7325-1-git-send-email-okaya@codeaurora.org>
2018-03-20  2:42 ` [PATCH v4 01/17] i40e/i40evf: Eliminate duplicate barriers on weakly-ordered archs Sinan Kaya
2018-03-20  2:42 ` [PATCH v4 02/17] ixgbe: eliminate " Sinan Kaya
2018-03-20  2:42 ` [PATCH v4 03/17] igbvf: " Sinan Kaya
2018-03-20  2:42 ` [PATCH v4 04/17] igb: " Sinan Kaya
2018-03-20  2:42 ` [PATCH v4 05/17] ixgbevf: keep writel() closer to wmb() Sinan Kaya
2018-03-20  2:42 ` [PATCH v4 06/17] ixgbevf: eliminate duplicate barriers on weakly-ordered archs Sinan Kaya
2018-03-20  2:42 ` [PATCH v4 07/17] fm10k: Eliminate " Sinan Kaya
2018-03-20  2:42 ` [PATCH v4 08/17] drivers: net: cxgb: " Sinan Kaya
2018-03-20  2:42 ` [PATCH v4 09/17] net: qla3xxx: " Sinan Kaya
2018-03-20  2:42 ` [PATCH v4 10/17] qlcnic: " Sinan Kaya
2018-03-20  2:42 ` [PATCH v4 11/17] bnx2x: " Sinan Kaya
2018-03-22 10:10   ` Kalluru, Sudarsana
2018-03-20  2:42 ` [PATCH v4 12/17] net: cxgb4/cxgb4vf: " Sinan Kaya
2018-03-21 23:03   ` Casey Leedom
2018-03-22  0:00     ` okaya
2018-03-20  2:42 ` [PATCH v4 13/17] net: cxgb3: " Sinan Kaya
2018-03-20  2:42 ` [PATCH v4 14/17] net: qlge: " Sinan Kaya
2018-03-20  2:42 ` [PATCH v4 15/17] bnxt_en: " Sinan Kaya
2018-03-20  2:42 ` [PATCH v4 16/17] qed/qede: " Sinan Kaya
2018-03-20  2:42 ` [PATCH v4 17/17] net: ena: " Sinan Kaya
2018-03-25 12:06   ` Belgazal, Netanel
2018-03-25 13:33     ` okaya

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).