All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] i40e: Fix eth_i40e_dev_init sequence on ThunderX
@ 2016-11-18 12:52 Satha Rao
  2016-11-18 15:00 ` Bruce Richardson
                   ` (2 more replies)
  0 siblings, 3 replies; 11+ messages in thread
From: Satha Rao @ 2016-11-18 12:52 UTC (permalink / raw)
  To: helin.zhang; +Cc: jingjing.wu, jerin.jacob, jianbo.liu, dev, Satha Rao

i40e_asq_send_command: rd32 & wr32 under ThunderX gives unpredictable
                       results. To solve this include rte memory barriers

Signed-off-by: Satha Rao <skoteshwar@caviumnetworks.com>
---
 drivers/net/i40e/base/i40e_osdep.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/drivers/net/i40e/base/i40e_osdep.h b/drivers/net/i40e/base/i40e_osdep.h
index 38e7ba5..ffa3160 100644
--- a/drivers/net/i40e/base/i40e_osdep.h
+++ b/drivers/net/i40e/base/i40e_osdep.h
@@ -158,7 +158,13 @@ do {                                                            \
 	((volatile uint32_t *)((char *)(a)->hw_addr + (reg)))
 static inline uint32_t i40e_read_addr(volatile void *addr)
 {
+#if defined(RTE_ARCH_ARM64)
+	uint32_t val = rte_le_to_cpu_32(I40E_PCI_REG(addr));
+	rte_rmb();
+	return val;
+#else
 	return rte_le_to_cpu_32(I40E_PCI_REG(addr));
+#endif
 }
 #define I40E_PCI_REG_WRITE(reg, value) \
 	do { I40E_PCI_REG((reg)) = rte_cpu_to_le_32(value); } while (0)
@@ -171,8 +177,16 @@ static inline uint32_t i40e_read_addr(volatile void *addr)
 	I40E_PCI_REG_WRITE(I40E_PCI_REG_ADDR((hw), (reg)), (value))
 
 #define rd32(a, reg) i40e_read_addr(I40E_PCI_REG_ADDR((a), (reg)))
+#if defined(RTE_ARCH_ARM64)
+#define wr32(a, reg, value) \
+	do { \
+		I40E_PCI_REG_WRITE(I40E_PCI_REG_ADDR((a), (reg)), (value)); \
+		rte_wmb(); \
+	} while (0)
+#else
 #define wr32(a, reg, value) \
 	I40E_PCI_REG_WRITE(I40E_PCI_REG_ADDR((a), (reg)), (value))
+#endif
 #define flush(a) i40e_read_addr(I40E_PCI_REG_ADDR((a), (I40E_GLGEN_STAT)))
 
 #define ARRAY_SIZE(arr) (sizeof(arr)/sizeof(arr[0]))
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] i40e: Fix eth_i40e_dev_init sequence on ThunderX
  2016-11-18 12:52 [PATCH v2] i40e: Fix eth_i40e_dev_init sequence on ThunderX Satha Rao
@ 2016-11-18 15:00 ` Bruce Richardson
  2016-11-20 23:21 ` Ananyev, Konstantin
  2017-02-07 14:33 ` Ferruh Yigit
  2 siblings, 0 replies; 11+ messages in thread
From: Bruce Richardson @ 2016-11-18 15:00 UTC (permalink / raw)
  To: Satha Rao; +Cc: helin.zhang, jingjing.wu, jerin.jacob, jianbo.liu, dev

On Fri, Nov 18, 2016 at 04:52:13AM -0800, Satha Rao wrote:
> i40e_asq_send_command: rd32 & wr32 under ThunderX gives unpredictable
>                        results. To solve this include rte memory barriers
> 
> Signed-off-by: Satha Rao <skoteshwar@caviumnetworks.com>
> ---
>  drivers/net/i40e/base/i40e_osdep.h | 14 ++++++++++++++
>  1 file changed, 14 insertions(+)
> 
> diff --git a/drivers/net/i40e/base/i40e_osdep.h b/drivers/net/i40e/base/i40e_osdep.h
> index 38e7ba5..ffa3160 100644
> --- a/drivers/net/i40e/base/i40e_osdep.h
> +++ b/drivers/net/i40e/base/i40e_osdep.h
> @@ -158,7 +158,13 @@ do {                                                            \
>  	((volatile uint32_t *)((char *)(a)->hw_addr + (reg)))
>  static inline uint32_t i40e_read_addr(volatile void *addr)
>  {
> +#if defined(RTE_ARCH_ARM64)
> +	uint32_t val = rte_le_to_cpu_32(I40E_PCI_REG(addr));
> +	rte_rmb();
> +	return val;
> +#else
>  	return rte_le_to_cpu_32(I40E_PCI_REG(addr));
> +#endif
>  }
>  #define I40E_PCI_REG_WRITE(reg, value) \
>  	do { I40E_PCI_REG((reg)) = rte_cpu_to_le_32(value); } while (0)
> @@ -171,8 +177,16 @@ static inline uint32_t i40e_read_addr(volatile void *addr)
>  	I40E_PCI_REG_WRITE(I40E_PCI_REG_ADDR((hw), (reg)), (value))
>  
>  #define rd32(a, reg) i40e_read_addr(I40E_PCI_REG_ADDR((a), (reg)))
> +#if defined(RTE_ARCH_ARM64)
> +#define wr32(a, reg, value) \
> +	do { \
> +		I40E_PCI_REG_WRITE(I40E_PCI_REG_ADDR((a), (reg)), (value)); \
> +		rte_wmb(); \
> +	} while (0)
> +#else
>  #define wr32(a, reg, value) \
>  	I40E_PCI_REG_WRITE(I40E_PCI_REG_ADDR((a), (reg)), (value))
> +#endif
>  #define flush(a) i40e_read_addr(I40E_PCI_REG_ADDR((a), (I40E_GLGEN_STAT)))
>  
>  #define ARRAY_SIZE(arr) (sizeof(arr)/sizeof(arr[0]))
> -- 

Would rte_smp_*mb() functions allow you to get a similar result without
the need for #ifdefs? It should be a full barrier on weakly ordered
platforms which just a compiler barrier on IA.

/Bruce

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] i40e: Fix eth_i40e_dev_init sequence on ThunderX
  2016-11-18 12:52 [PATCH v2] i40e: Fix eth_i40e_dev_init sequence on ThunderX Satha Rao
  2016-11-18 15:00 ` Bruce Richardson
@ 2016-11-20 23:21 ` Ananyev, Konstantin
  2016-11-21 22:16   ` Jerin Jacob
  2017-02-07 14:33 ` Ferruh Yigit
  2 siblings, 1 reply; 11+ messages in thread
From: Ananyev, Konstantin @ 2016-11-20 23:21 UTC (permalink / raw)
  To: Satha Rao, Zhang, Helin; +Cc: Wu, Jingjing, jerin.jacob, jianbo.liu, dev

Hi
> 
> i40e_asq_send_command: rd32 & wr32 under ThunderX gives unpredictable
>                        results. To solve this include rte memory barriers
> 
> Signed-off-by: Satha Rao <skoteshwar@caviumnetworks.com>
> ---
>  drivers/net/i40e/base/i40e_osdep.h | 14 ++++++++++++++
>  1 file changed, 14 insertions(+)
> 
> diff --git a/drivers/net/i40e/base/i40e_osdep.h b/drivers/net/i40e/base/i40e_osdep.h
> index 38e7ba5..ffa3160 100644
> --- a/drivers/net/i40e/base/i40e_osdep.h
> +++ b/drivers/net/i40e/base/i40e_osdep.h
> @@ -158,7 +158,13 @@ do {                                                            \
>  	((volatile uint32_t *)((char *)(a)->hw_addr + (reg)))
>  static inline uint32_t i40e_read_addr(volatile void *addr)
>  {
> +#if defined(RTE_ARCH_ARM64)
> +	uint32_t val = rte_le_to_cpu_32(I40E_PCI_REG(addr));
> +	rte_rmb();
> +	return val;

If you really need an rmb/wmb with MMIO read/writes on ARM,
I think you can avoid #ifdefs here and use rte_smp_rmb/rte_smp_wmb.
BTW, I suppose if you need it for i40e, you would need it for other devices too.
Konstantin

> +#else
>  	return rte_le_to_cpu_32(I40E_PCI_REG(addr));
> +#endif
>  }
>  #define I40E_PCI_REG_WRITE(reg, value) \
>  	do { I40E_PCI_REG((reg)) = rte_cpu_to_le_32(value); } while (0)
> @@ -171,8 +177,16 @@ static inline uint32_t i40e_read_addr(volatile void *addr)
>  	I40E_PCI_REG_WRITE(I40E_PCI_REG_ADDR((hw), (reg)), (value))
> 
>  #define rd32(a, reg) i40e_read_addr(I40E_PCI_REG_ADDR((a), (reg)))
> +#if defined(RTE_ARCH_ARM64)
> +#define wr32(a, reg, value) \
> +	do { \
> +		I40E_PCI_REG_WRITE(I40E_PCI_REG_ADDR((a), (reg)), (value)); \
> +		rte_wmb(); \
> +	} while (0)
> +#else
>  #define wr32(a, reg, value) \
>  	I40E_PCI_REG_WRITE(I40E_PCI_REG_ADDR((a), (reg)), (value))
> +#endif
>  #define flush(a) i40e_read_addr(I40E_PCI_REG_ADDR((a), (I40E_GLGEN_STAT)))
> 
>  #define ARRAY_SIZE(arr) (sizeof(arr)/sizeof(arr[0]))
> --
> 2.7.4

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] i40e: Fix eth_i40e_dev_init sequence on ThunderX
  2016-11-20 23:21 ` Ananyev, Konstantin
@ 2016-11-21 22:16   ` Jerin Jacob
  2016-11-22 13:46     ` Bruce Richardson
  0 siblings, 1 reply; 11+ messages in thread
From: Jerin Jacob @ 2016-11-21 22:16 UTC (permalink / raw)
  To: Ananyev, Konstantin
  Cc: Satha Rao, Zhang, Helin, Wu, Jingjing, jianbo.liu, dev

On Sun, Nov 20, 2016 at 11:21:43PM +0000, Ananyev, Konstantin wrote:
> Hi
> > 
> > i40e_asq_send_command: rd32 & wr32 under ThunderX gives unpredictable
> >                        results. To solve this include rte memory barriers
> > 
> > Signed-off-by: Satha Rao <skoteshwar@caviumnetworks.com>
> > ---
> >  drivers/net/i40e/base/i40e_osdep.h | 14 ++++++++++++++
> >  1 file changed, 14 insertions(+)
> > 
> > diff --git a/drivers/net/i40e/base/i40e_osdep.h b/drivers/net/i40e/base/i40e_osdep.h
> > index 38e7ba5..ffa3160 100644
> > --- a/drivers/net/i40e/base/i40e_osdep.h
> > +++ b/drivers/net/i40e/base/i40e_osdep.h
> > @@ -158,7 +158,13 @@ do {                                                            \
> >  	((volatile uint32_t *)((char *)(a)->hw_addr + (reg)))
> >  static inline uint32_t i40e_read_addr(volatile void *addr)
> >  {
> > +#if defined(RTE_ARCH_ARM64)
> > +	uint32_t val = rte_le_to_cpu_32(I40E_PCI_REG(addr));
> > +	rte_rmb();
> > +	return val;
> 
> If you really need an rmb/wmb with MMIO read/writes on ARM,
> I think you can avoid #ifdefs here and use rte_smp_rmb/rte_smp_wmb.
> BTW, I suppose if you need it for i40e, you would need it for other devices too.

Yes. ARM would need for all devices(typically, the devices on external PCI bus).
I guess rte_smp_rmb may not be the correct abstraction. So we need more of
rte_rmb() as we need only non smp variant on IO side. I guess then it make sense to
create new abstraction in eal with following variants so that each arch
gets opportunity to make what it makes sense that specific platform

rte_readb_relaxed
rte_readw_relaxed
rte_readl_relaxed
rte_readq_relaxed
rte_writeb_relaxed
rte_writew_relaxed
rte_writel_relaxed
rte_writeq_relaxed
rte_readb
rte_readw
rte_readl
rte_readq
rte_writeb
rte_writew
rte_writel
rte_writeq

Thoughts ?

Jerin

> Konstantin
> 
> > +#else
> >  	return rte_le_to_cpu_32(I40E_PCI_REG(addr));
> > +#endif
> >  }
> >  #define I40E_PCI_REG_WRITE(reg, value) \
> >  	do { I40E_PCI_REG((reg)) = rte_cpu_to_le_32(value); } while (0)
> > @@ -171,8 +177,16 @@ static inline uint32_t i40e_read_addr(volatile void *addr)
> >  	I40E_PCI_REG_WRITE(I40E_PCI_REG_ADDR((hw), (reg)), (value))
> > 
> >  #define rd32(a, reg) i40e_read_addr(I40E_PCI_REG_ADDR((a), (reg)))
> > +#if defined(RTE_ARCH_ARM64)
> > +#define wr32(a, reg, value) \
> > +	do { \
> > +		I40E_PCI_REG_WRITE(I40E_PCI_REG_ADDR((a), (reg)), (value)); \
> > +		rte_wmb(); \
> > +	} while (0)
> > +#else
> >  #define wr32(a, reg, value) \
> >  	I40E_PCI_REG_WRITE(I40E_PCI_REG_ADDR((a), (reg)), (value))
> > +#endif
> >  #define flush(a) i40e_read_addr(I40E_PCI_REG_ADDR((a), (I40E_GLGEN_STAT)))
> > 
> >  #define ARRAY_SIZE(arr) (sizeof(arr)/sizeof(arr[0]))
> > --
> > 2.7.4
> 

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] i40e: Fix eth_i40e_dev_init sequence on ThunderX
  2016-11-21 22:16   ` Jerin Jacob
@ 2016-11-22 13:46     ` Bruce Richardson
  2016-11-22 18:49       ` Jerin Jacob
  0 siblings, 1 reply; 11+ messages in thread
From: Bruce Richardson @ 2016-11-22 13:46 UTC (permalink / raw)
  To: Jerin Jacob
  Cc: Ananyev, Konstantin, Satha Rao, Zhang, Helin, Wu, Jingjing,
	jianbo.liu, dev

On Tue, Nov 22, 2016 at 03:46:38AM +0530, Jerin Jacob wrote:
> On Sun, Nov 20, 2016 at 11:21:43PM +0000, Ananyev, Konstantin wrote:
> > Hi
> > > 
> > > i40e_asq_send_command: rd32 & wr32 under ThunderX gives unpredictable
> > >                        results. To solve this include rte memory barriers
> > > 
> > > Signed-off-by: Satha Rao <skoteshwar@caviumnetworks.com>
> > > ---
> > >  drivers/net/i40e/base/i40e_osdep.h | 14 ++++++++++++++
> > >  1 file changed, 14 insertions(+)
> > > 
> > > diff --git a/drivers/net/i40e/base/i40e_osdep.h b/drivers/net/i40e/base/i40e_osdep.h
> > > index 38e7ba5..ffa3160 100644
> > > --- a/drivers/net/i40e/base/i40e_osdep.h
> > > +++ b/drivers/net/i40e/base/i40e_osdep.h
> > > @@ -158,7 +158,13 @@ do {                                                            \
> > >  	((volatile uint32_t *)((char *)(a)->hw_addr + (reg)))
> > >  static inline uint32_t i40e_read_addr(volatile void *addr)
> > >  {
> > > +#if defined(RTE_ARCH_ARM64)
> > > +	uint32_t val = rte_le_to_cpu_32(I40E_PCI_REG(addr));
> > > +	rte_rmb();
> > > +	return val;
> > 
> > If you really need an rmb/wmb with MMIO read/writes on ARM,
> > I think you can avoid #ifdefs here and use rte_smp_rmb/rte_smp_wmb.
> > BTW, I suppose if you need it for i40e, you would need it for other devices too.
> 
> Yes. ARM would need for all devices(typically, the devices on external PCI bus).
> I guess rte_smp_rmb may not be the correct abstraction. So we need more of
> rte_rmb() as we need only non smp variant on IO side. I guess then it make sense to
> create new abstraction in eal with following variants so that each arch
> gets opportunity to make what it makes sense that specific platform
> 
> rte_readb_relaxed
> rte_readw_relaxed
> rte_readl_relaxed
> rte_readq_relaxed
> rte_writeb_relaxed
> rte_writew_relaxed
> rte_writel_relaxed
> rte_writeq_relaxed
> rte_readb
> rte_readw
> rte_readl
> rte_readq
> rte_writeb
> rte_writew
> rte_writel
> rte_writeq
> 
> Thoughts ?
> 

That seems like a lot of API calls!
Perhaps you can clarify - why would the rte_smp_rmb() not work for you?

/Bruce

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] i40e: Fix eth_i40e_dev_init sequence on ThunderX
  2016-11-22 13:46     ` Bruce Richardson
@ 2016-11-22 18:49       ` Jerin Jacob
  2016-11-30 17:52         ` Ananyev, Konstantin
  0 siblings, 1 reply; 11+ messages in thread
From: Jerin Jacob @ 2016-11-22 18:49 UTC (permalink / raw)
  To: Bruce Richardson
  Cc: Ananyev, Konstantin, Satha Rao, Zhang, Helin, Wu, Jingjing,
	jianbo.liu, dev

On Tue, Nov 22, 2016 at 01:46:54PM +0000, Bruce Richardson wrote:
> On Tue, Nov 22, 2016 at 03:46:38AM +0530, Jerin Jacob wrote:
> > On Sun, Nov 20, 2016 at 11:21:43PM +0000, Ananyev, Konstantin wrote:
> > > Hi
> > > > 
> > > > i40e_asq_send_command: rd32 & wr32 under ThunderX gives unpredictable
> > > >                        results. To solve this include rte memory barriers
> > > > 
> > > > Signed-off-by: Satha Rao <skoteshwar@caviumnetworks.com>
> > > > ---
> > > >  drivers/net/i40e/base/i40e_osdep.h | 14 ++++++++++++++
> > > >  1 file changed, 14 insertions(+)
> > > > 
> > > > diff --git a/drivers/net/i40e/base/i40e_osdep.h b/drivers/net/i40e/base/i40e_osdep.h
> > > > index 38e7ba5..ffa3160 100644
> > > > --- a/drivers/net/i40e/base/i40e_osdep.h
> > > > +++ b/drivers/net/i40e/base/i40e_osdep.h
> > > > @@ -158,7 +158,13 @@ do {                                                            \
> > > >  	((volatile uint32_t *)((char *)(a)->hw_addr + (reg)))
> > > >  static inline uint32_t i40e_read_addr(volatile void *addr)
> > > >  {
> > > > +#if defined(RTE_ARCH_ARM64)
> > > > +	uint32_t val = rte_le_to_cpu_32(I40E_PCI_REG(addr));
> > > > +	rte_rmb();
> > > > +	return val;
> > > 
> > > If you really need an rmb/wmb with MMIO read/writes on ARM,
> > > I think you can avoid #ifdefs here and use rte_smp_rmb/rte_smp_wmb.
> > > BTW, I suppose if you need it for i40e, you would need it for other devices too.
> > 
> > Yes. ARM would need for all devices(typically, the devices on external PCI bus).
> > I guess rte_smp_rmb may not be the correct abstraction. So we need more of
> > rte_rmb() as we need only non smp variant on IO side. I guess then it make sense to
> > create new abstraction in eal with following variants so that each arch
> > gets opportunity to make what it makes sense that specific platform
> > 
> > rte_readb_relaxed
> > rte_readw_relaxed
> > rte_readl_relaxed
> > rte_readq_relaxed
> > rte_writeb_relaxed
> > rte_writew_relaxed
> > rte_writel_relaxed
> > rte_writeq_relaxed
> > rte_readb
> > rte_readw
> > rte_readl
> > rte_readq
> > rte_writeb
> > rte_writew
> > rte_writel
> > rte_writeq
> > 
> > Thoughts ?
> > 
> 
> That seems like a lot of API calls!
> Perhaps you can clarify - why would the rte_smp_rmb() not work for you?

Currently arm64 mapped DMB as rte_smp_rmb() for smp case.

Ideally for io barrier and non smp case, we need to map it as DSB and it is
bit heavier than DMB

The linux kernel arm64 mappings
http://lxr.free-electrons.com/source/arch/arm64/include/asm/io.h#L142

DMB vs DSB
https://community.arm.com/thread/3833

The relaxed one are without any barriers.(the use case like accessing on
chip peripherals may need only relaxed versions)

Thoughts on new rte EAL abstraction?

> 
> /Bruce

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] i40e: Fix eth_i40e_dev_init sequence on ThunderX
  2016-11-22 18:49       ` Jerin Jacob
@ 2016-11-30 17:52         ` Ananyev, Konstantin
  2016-11-30 20:54           ` Jerin Jacob
  0 siblings, 1 reply; 11+ messages in thread
From: Ananyev, Konstantin @ 2016-11-30 17:52 UTC (permalink / raw)
  To: Jerin Jacob, Richardson, Bruce
  Cc: Satha Rao, Zhang, Helin, Wu, Jingjing, jianbo.liu, dev

Hi Jerin,

> 
> On Tue, Nov 22, 2016 at 01:46:54PM +0000, Bruce Richardson wrote:
> > On Tue, Nov 22, 2016 at 03:46:38AM +0530, Jerin Jacob wrote:
> > > On Sun, Nov 20, 2016 at 11:21:43PM +0000, Ananyev, Konstantin wrote:
> > > > Hi
> > > > >
> > > > > i40e_asq_send_command: rd32 & wr32 under ThunderX gives unpredictable
> > > > >                        results. To solve this include rte memory barriers
> > > > >
> > > > > Signed-off-by: Satha Rao <skoteshwar@caviumnetworks.com>
> > > > > ---
> > > > >  drivers/net/i40e/base/i40e_osdep.h | 14 ++++++++++++++
> > > > >  1 file changed, 14 insertions(+)
> > > > >
> > > > > diff --git a/drivers/net/i40e/base/i40e_osdep.h b/drivers/net/i40e/base/i40e_osdep.h
> > > > > index 38e7ba5..ffa3160 100644
> > > > > --- a/drivers/net/i40e/base/i40e_osdep.h
> > > > > +++ b/drivers/net/i40e/base/i40e_osdep.h
> > > > > @@ -158,7 +158,13 @@ do {                                                            \
> > > > >  	((volatile uint32_t *)((char *)(a)->hw_addr + (reg)))
> > > > >  static inline uint32_t i40e_read_addr(volatile void *addr)
> > > > >  {
> > > > > +#if defined(RTE_ARCH_ARM64)
> > > > > +	uint32_t val = rte_le_to_cpu_32(I40E_PCI_REG(addr));
> > > > > +	rte_rmb();
> > > > > +	return val;
> > > >
> > > > If you really need an rmb/wmb with MMIO read/writes on ARM,
> > > > I think you can avoid #ifdefs here and use rte_smp_rmb/rte_smp_wmb.
> > > > BTW, I suppose if you need it for i40e, you would need it for other devices too.
> > >
> > > Yes. ARM would need for all devices(typically, the devices on external PCI bus).
> > > I guess rte_smp_rmb may not be the correct abstraction. So we need more of
> > > rte_rmb() as we need only non smp variant on IO side. I guess then it make sense to
> > > create new abstraction in eal with following variants so that each arch
> > > gets opportunity to make what it makes sense that specific platform
> > >
> > > rte_readb_relaxed
> > > rte_readw_relaxed
> > > rte_readl_relaxed
> > > rte_readq_relaxed
> > > rte_writeb_relaxed
> > > rte_writew_relaxed
> > > rte_writel_relaxed
> > > rte_writeq_relaxed
> > > rte_readb
> > > rte_readw
> > > rte_readl
> > > rte_readq
> > > rte_writeb
> > > rte_writew
> > > rte_writel
> > > rte_writeq
> > >
> > > Thoughts ?
> > >
> >
> > That seems like a lot of API calls!
> > Perhaps you can clarify - why would the rte_smp_rmb() not work for you?
> 
> Currently arm64 mapped DMB as rte_smp_rmb() for smp case.
> 
> Ideally for io barrier and non smp case, we need to map it as DSB and it is
> bit heavier than DMB

Ok, so you need some new macro, like rte_io_(r|w)mb or so, that would expand into dmb
for ARM,  correct?

> 
> The linux kernel arm64 mappings
> http://lxr.free-electrons.com/source/arch/arm64/include/asm/io.h#L142
> 
> DMB vs DSB
> https://community.arm.com/thread/3833
> 
> The relaxed one are without any barriers.(the use case like accessing on
> chip peripherals may need only relaxed versions)
> 
> Thoughts on new rte EAL abstraction?

Looks like a lot of macros but if you guys think that would help - NP with that :)
Again, in that case we probably can get rid of driver specific pci reg read/write defines.

Konstantin

> 
> >
> > /Bruce

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] i40e: Fix eth_i40e_dev_init sequence on ThunderX
  2016-11-30 17:52         ` Ananyev, Konstantin
@ 2016-11-30 20:54           ` Jerin Jacob
  2016-12-01 11:38             ` Ananyev, Konstantin
  0 siblings, 1 reply; 11+ messages in thread
From: Jerin Jacob @ 2016-11-30 20:54 UTC (permalink / raw)
  To: Ananyev, Konstantin
  Cc: Richardson, Bruce, Satha Rao, Zhang, Helin, Wu, Jingjing,
	jianbo.liu, dev

On Wed, Nov 30, 2016 at 05:52:02PM +0000, Ananyev, Konstantin wrote:
> Hi Jerin,

Hi Konstantin,

> 
> > 
> > On Tue, Nov 22, 2016 at 01:46:54PM +0000, Bruce Richardson wrote:
> > > On Tue, Nov 22, 2016 at 03:46:38AM +0530, Jerin Jacob wrote:
> > > > On Sun, Nov 20, 2016 at 11:21:43PM +0000, Ananyev, Konstantin wrote:
> > > > Hi
> > > > > >
> > > > > > i40e_asq_send_command: rd32 & wr32 under ThunderX gives unpredictable
> > > > > >                        results. To solve this include rte memory barriers
> > > > > >
> > > > > > Signed-off-by: Satha Rao <skoteshwar@caviumnetworks.com>
> > > > > > ---
> > > > > >  drivers/net/i40e/base/i40e_osdep.h | 14 ++++++++++++++
> > > > > >  1 file changed, 14 insertions(+)
> > > > > >
> > > > > > diff --git a/drivers/net/i40e/base/i40e_osdep.h b/drivers/net/i40e/base/i40e_osdep.h
> > > > > > index 38e7ba5..ffa3160 100644
> > > > > > --- a/drivers/net/i40e/base/i40e_osdep.h
> > > > > > +++ b/drivers/net/i40e/base/i40e_osdep.h
> > > > > > @@ -158,7 +158,13 @@ do {                                                            \
> > > > > >  	((volatile uint32_t *)((char *)(a)->hw_addr + (reg)))
> > > > > >  static inline uint32_t i40e_read_addr(volatile void *addr)
> > > > > >  {
> > > > > > +#if defined(RTE_ARCH_ARM64)
> > > > > > +	uint32_t val = rte_le_to_cpu_32(I40E_PCI_REG(addr));
> > > > > > +	rte_rmb();
> > > > > > +	return val;
> > > > >
> > > > > If you really need an rmb/wmb with MMIO read/writes on ARM,
> > > > > I think you can avoid #ifdefs here and use rte_smp_rmb/rte_smp_wmb.
> > > > > BTW, I suppose if you need it for i40e, you would need it for other devices too.
> > > >
> > > > Yes. ARM would need for all devices(typically, the devices on external PCI bus).
> > > > I guess rte_smp_rmb may not be the correct abstraction. So we need more of
> > > > rte_rmb() as we need only non smp variant on IO side. I guess then it make sense to
> > > > create new abstraction in eal with following variants so that each arch
> > > > gets opportunity to make what it makes sense that specific platform
> > > >
> > > > rte_readb_relaxed
> > > > rte_readw_relaxed
> > > > rte_readl_relaxed
> > > > rte_readq_relaxed
> > > > rte_writeb_relaxed
> > > > rte_writew_relaxed
> > > > rte_writel_relaxed
> > > > rte_writeq_relaxed
> > > > rte_readb
> > > > rte_readw
> > > > rte_readl
> > > > rte_readq
> > > > rte_writeb
> > > > rte_writew
> > > > rte_writel
> > > > rte_writeq
> > > >
> > > > Thoughts ?
> > > >
> > >
> > > That seems like a lot of API calls!
> > > Perhaps you can clarify - why would the rte_smp_rmb() not work for you?
> > 
> > Currently arm64 mapped DMB as rte_smp_rmb() for smp case.
> > 
> > Ideally for io barrier and non smp case, we need to map it as DSB and it is
> > bit heavier than DMB
> 
> Ok, so you need some new macro, like rte_io_(r|w)mb or so, that would expand into dmb
> for ARM,  correct?

The io barrier expands to dsb.
http://lxr.free-electrons.com/source/arch/arm64/include/asm/io.h#L110

> 
> > 
> > The linux kernel arm64 mappings
> > http://lxr.free-electrons.com/source/arch/arm64/include/asm/io.h#L142
> > 
> > DMB vs DSB
> > https://community.arm.com/thread/3833
> > 
> > The relaxed one are without any barriers.(the use case like accessing on
> > chip peripherals may need only relaxed versions)
> > 
> > Thoughts on new rte EAL abstraction?
> 
> Looks like a lot of macros but if you guys think that would help - NP with that :)

I don't have strong opinion here. If there is concern on a lot of macros
then, I can introduce only "rte_io_(r|w)mb" instead of read[b|w|l|q]/write[b|w|l|q]/relaxed.
let me know?

> Again, in that case we probably can get rid of driver specific pci reg read/write defines.
Yes. But, That's going to have a lot of change :-(

If there is no objection then I will introduce
"read[b|w|l|q]/write[b|w|l|q]/relaxed" and then change all external pcie drivers
with new macros.

> 
> Konstantin
> 
> > 
> > >
> > > /Bruce

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] i40e: Fix eth_i40e_dev_init sequence on ThunderX
  2016-11-30 20:54           ` Jerin Jacob
@ 2016-12-01 11:38             ` Ananyev, Konstantin
  0 siblings, 0 replies; 11+ messages in thread
From: Ananyev, Konstantin @ 2016-12-01 11:38 UTC (permalink / raw)
  To: Jerin Jacob
  Cc: Richardson, Bruce, Satha Rao, Zhang, Helin, Wu, Jingjing,
	jianbo.liu, dev


Hi Jerin,
 
> > > > > > >
> > > > > > > i40e_asq_send_command: rd32 & wr32 under ThunderX gives unpredictable
> > > > > > >                        results. To solve this include rte memory barriers
> > > > > > >
> > > > > > > Signed-off-by: Satha Rao <skoteshwar@caviumnetworks.com>
> > > > > > > ---
> > > > > > >  drivers/net/i40e/base/i40e_osdep.h | 14 ++++++++++++++
> > > > > > >  1 file changed, 14 insertions(+)
> > > > > > >
> > > > > > > diff --git a/drivers/net/i40e/base/i40e_osdep.h b/drivers/net/i40e/base/i40e_osdep.h
> > > > > > > index 38e7ba5..ffa3160 100644
> > > > > > > --- a/drivers/net/i40e/base/i40e_osdep.h
> > > > > > > +++ b/drivers/net/i40e/base/i40e_osdep.h
> > > > > > > @@ -158,7 +158,13 @@ do {                                                            \
> > > > > > >  	((volatile uint32_t *)((char *)(a)->hw_addr + (reg)))
> > > > > > >  static inline uint32_t i40e_read_addr(volatile void *addr)
> > > > > > >  {
> > > > > > > +#if defined(RTE_ARCH_ARM64)
> > > > > > > +	uint32_t val = rte_le_to_cpu_32(I40E_PCI_REG(addr));
> > > > > > > +	rte_rmb();
> > > > > > > +	return val;
> > > > > >
> > > > > > If you really need an rmb/wmb with MMIO read/writes on ARM,
> > > > > > I think you can avoid #ifdefs here and use rte_smp_rmb/rte_smp_wmb.
> > > > > > BTW, I suppose if you need it for i40e, you would need it for other devices too.
> > > > >
> > > > > Yes. ARM would need for all devices(typically, the devices on external PCI bus).
> > > > > I guess rte_smp_rmb may not be the correct abstraction. So we need more of
> > > > > rte_rmb() as we need only non smp variant on IO side. I guess then it make sense to
> > > > > create new abstraction in eal with following variants so that each arch
> > > > > gets opportunity to make what it makes sense that specific platform
> > > > >
> > > > > rte_readb_relaxed
> > > > > rte_readw_relaxed
> > > > > rte_readl_relaxed
> > > > > rte_readq_relaxed
> > > > > rte_writeb_relaxed
> > > > > rte_writew_relaxed
> > > > > rte_writel_relaxed
> > > > > rte_writeq_relaxed
> > > > > rte_readb
> > > > > rte_readw
> > > > > rte_readl
> > > > > rte_readq
> > > > > rte_writeb
> > > > > rte_writew
> > > > > rte_writel
> > > > > rte_writeq
> > > > >
> > > > > Thoughts ?
> > > > >
> > > >
> > > > That seems like a lot of API calls!
> > > > Perhaps you can clarify - why would the rte_smp_rmb() not work for you?
> > >
> > > Currently arm64 mapped DMB as rte_smp_rmb() for smp case.
> > >
> > > Ideally for io barrier and non smp case, we need to map it as DSB and it is
> > > bit heavier than DMB
> >
> > Ok, so you need some new macro, like rte_io_(r|w)mb or so, that would expand into dmb
> > for ARM,  correct?
> 
> The io barrier expands to dsb.
> http://lxr.free-electrons.com/source/arch/arm64/include/asm/io.h#L110

Sorry, yes I meant  DSB here.

> 
> >
> > >
> > > The linux kernel arm64 mappings
> > > http://lxr.free-electrons.com/source/arch/arm64/include/asm/io.h#L142
> > >
> > > DMB vs DSB
> > > https://community.arm.com/thread/3833
> > >
> > > The relaxed one are without any barriers.(the use case like accessing on
> > > chip peripherals may need only relaxed versions)
> > >
> > > Thoughts on new rte EAL abstraction?
> >
> > Looks like a lot of macros but if you guys think that would help - NP with that :)
> 
> I don't have strong opinion here. If there is concern on a lot of macros
> then, I can introduce only "rte_io_(r|w)mb" instead of read[b|w|l|q]/write[b|w|l|q]/relaxed.
> let me know?

I think we can have both.
The question is in the amount of work need to be done.

> 
> > Again, in that case we probably can get rid of driver specific pci reg read/write defines.
> Yes. But, That's going to have a lot of change :-(

Yes I agree, the changes would be quite significant.

> 
> If there is no objection then I will introduce
> "read[b|w|l|q]/write[b|w|l|q]/relaxed" and then change all external pcie drivers
> with new macros.

That seems like a good idea to me.
Though as you said that seems quite a significant change.
Probably make sense to do it in 2 stages (just a suggestion): 
First introduce rte_io_(r|w)mb and fix with it existing issues in the particular drivers.
Second replace existing PMD specific xxx_read/write_addr() with your new generic 

Konstantin

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] i40e: Fix eth_i40e_dev_init sequence on ThunderX
  2016-11-18 12:52 [PATCH v2] i40e: Fix eth_i40e_dev_init sequence on ThunderX Satha Rao
  2016-11-18 15:00 ` Bruce Richardson
  2016-11-20 23:21 ` Ananyev, Konstantin
@ 2017-02-07 14:33 ` Ferruh Yigit
  2 siblings, 0 replies; 11+ messages in thread
From: Ferruh Yigit @ 2017-02-07 14:33 UTC (permalink / raw)
  To: Satha Rao, helin.zhang; +Cc: jingjing.wu, jerin.jacob, jianbo.liu, dev

On 11/18/2016 12:52 PM, Satha Rao wrote:
> i40e_asq_send_command: rd32 & wr32 under ThunderX gives unpredictable
>                        results. To solve this include rte memory barriers
> 
> Signed-off-by: Satha Rao <skoteshwar@caviumnetworks.com>

I remove this patch from patchwork.
"I/O device memory read/write API" patch should fix the issue, if not
please shout.

Thanks,
ferruh

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH v2] i40e: Fix eth_i40e_dev_init sequence on ThunderX
       [not found] <mailman.4.1479684107.2722.dev@dpdk.org>
@ 2016-11-30  1:23 ` Asok Tiyyagura
  0 siblings, 0 replies; 11+ messages in thread
From: Asok Tiyyagura @ 2016-11-30  1:23 UTC (permalink / raw)
  To: konstantin.ananyev
  Cc: skoteshwar, helin.zhang, jingjing.wu, jerin.jacob, jianbo.liu, dev

I can confirm we ran into a similar issue while using ixgbe.


-Asok


Hi
>
> i40e_asq_send_command: rd32 & wr32 under ThunderX gives unpredictable
>                        results. To solve this include rte memory barriers
>
> Signed-off-by: Satha Rao <skoteshwar@caviumnetworks.com>
> ---
>  drivers/net/i40e/base/i40e_osdep.h | 14 ++++++++++++++
>  1 file changed, 14 insertions(+)
>
> diff --git a/drivers/net/i40e/base/i40e_osdep.h b/drivers/net/i40e/base/i40e_osdep.h
> index 38e7ba5..ffa3160 100644
> --- a/drivers/net/i40e/base/i40e_osdep.h
> +++ b/drivers/net/i40e/base/i40e_osdep.h
> @@ -158,7 +158,13 @@ do {                                                            \
>        ((volatile uint32_t *)((char *)(a)->hw_addr + (reg)))
>  static inline uint32_t i40e_read_addr(volatile void *addr)
>  {
> +#if defined(RTE_ARCH_ARM64)
> +     uint32_t val = rte_le_to_cpu_32(I40E_PCI_REG(addr));
> +     rte_rmb();
> +     return val;

If you really need an rmb/wmb with MMIO read/writes on ARM,
I think you can avoid #ifdefs here and use rte_smp_rmb/rte_smp_wmb.
BTW, I suppose if you need it for i40e, you would need it for other devices too.
Konstantin

> +#else
>        return rte_le_to_cpu_32(I40E_PCI_REG(addr));
> +#endif
>  }
>  #define I40E_PCI_REG_WRITE(reg, value) \
>        do { I40E_PCI_REG((reg)) = rte_cpu_to_le_32(value); } while (0)
> @@ -171,8 +177,16 @@ static inline uint32_t i40e_read_addr(volatile void *addr)
>        I40E_PCI_REG_WRITE(I40E_PCI_REG_ADDR((hw), (reg)), (value))
>
>  #define rd32(a, reg) i40e_read_addr(I40E_PCI_REG_ADDR((a), (reg)))
> +#if defined(RTE_ARCH_ARM64)
> +#define wr32(a, reg, value) \
> +     do { \
> +             I40E_PCI_REG_WRITE(I40E_PCI_REG_ADDR((a), (reg)), (value)); \
> +             rte_wmb(); \
> +     } while (0)
> +#else
>  #define wr32(a, reg, value) \
>        I40E_PCI_REG_WRITE(I40E_PCI_REG_ADDR((a), (reg)), (value))
> +#endif
>  #define flush(a) i40e_read_addr(I40E_PCI_REG_ADDR((a), (I40E_GLGEN_STAT)))
>
>  #define ARRAY_SIZE(arr) (sizeof(arr)/sizeof(arr[0]))
> --
> 2.7.4

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2017-02-07 14:33 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-11-18 12:52 [PATCH v2] i40e: Fix eth_i40e_dev_init sequence on ThunderX Satha Rao
2016-11-18 15:00 ` Bruce Richardson
2016-11-20 23:21 ` Ananyev, Konstantin
2016-11-21 22:16   ` Jerin Jacob
2016-11-22 13:46     ` Bruce Richardson
2016-11-22 18:49       ` Jerin Jacob
2016-11-30 17:52         ` Ananyev, Konstantin
2016-11-30 20:54           ` Jerin Jacob
2016-12-01 11:38             ` Ananyev, Konstantin
2017-02-07 14:33 ` Ferruh Yigit
     [not found] <mailman.4.1479684107.2722.dev@dpdk.org>
2016-11-30  1:23 ` Asok Tiyyagura

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.