All of lore.kernel.org
 help / color / mirror / Atom feed
* Re: [PATCH] crypto: caam/jr - Remove extra memory barrier during job ring dequeue
@ 2019-05-01  5:49 ` Michael Ellerman
  0 siblings, 0 replies; 14+ messages in thread
From: Michael Ellerman @ 2019-05-01  5:49 UTC (permalink / raw)
  To: vakul.garg
  Cc: linux-crypto, aymen.sghaier, davem, herbert, horia.geanta, linuxppc-dev

Vakul Garg wrote:
> In function caam_jr_dequeue(), a full memory barrier is used before
> writing response job ring's register to signal removal of the completed
> job. Therefore for writing the register, we do not need another write
> memory barrier. Hence it is removed by replacing the call to wr_reg32()
> with a newly defined function wr_reg32_relaxed().
> 
> Signed-off-by: Vakul Garg <vakul.garg@nxp.com>
> ---
>  drivers/crypto/caam/jr.c   | 2 +-
>  drivers/crypto/caam/regs.h | 8 ++++++++
>  2 files changed, 9 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c
> index 4e9b3fca5627..2ce6d7d2ad72 100644
> --- a/drivers/crypto/caam/jr.c
> +++ b/drivers/crypto/caam/jr.c
> @@ -266,7 +266,7 @@ static void caam_jr_dequeue(unsigned long devarg)
>  		mb();
>  
>  		/* set done */
> -		wr_reg32(&jrp->rregs->outring_rmvd, 1);
> +		wr_reg32_relaxed(&jrp->rregs->outring_rmvd, 1);
>  
>  		jrp->out_ring_read_index = (jrp->out_ring_read_index + 1) &
>  					   (JOBR_DEPTH - 1);
> diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
> index 3cd0822ea819..9e912c722e33 100644
> --- a/drivers/crypto/caam/regs.h
> +++ b/drivers/crypto/caam/regs.h
> @@ -96,6 +96,14 @@ cpu_to_caam(16)
>  cpu_to_caam(32)
>  cpu_to_caam(64)
>  
> +static inline void wr_reg32_relaxed(void __iomem *reg, u32 data)
> +{
> +	if (caam_little_end)
> +		writel_relaxed(data, reg);
> +	else
> +		writel_relaxed(cpu_to_be32(data), reg);
> +}
> +
>  static inline void wr_reg32(void __iomem *reg, u32 data)
>  {
>  	if (caam_little_end)

This crashes on my p5020ds. Did you test on powerpc?

# first bad commit: [bbfcac5ff5f26aafa51935a62eb86b6eacfe8a49] crypto: caam/jr - Remove extra memory barrier during job ring dequeue

Log:

  ------------[ cut here ]------------
  kernel BUG at drivers/crypto/caam/jr.c:191!
  Oops: Exception in kernel mode, sig: 5 [#1]
  BE PAGE_SIZE=4K SMP NR_CPUS=24 CoreNet Generic
  Modules linked in:
  CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.1.0-rc1-gcc-8.2.0-00060-gbbfcac5ff5f2 #31
  NIP:  c00000000079d704 LR: c00000000079d498 CTR: c000000000086914
  REGS: c0000000fffc7970 TRAP: 0700   Not tainted  (5.1.0-rc1-gcc-8.2.0-00060-gbbfcac5ff5f2)
  MSR:  0000000080029000 <CE,EE,ME>  CR: 28008484  XER: 00000000
  IRQMASK: 0
  GPR00: c00000000079d6b0 c0000000fffc7c00 c000000000fbc800 0000000000000001
  GPR04: 000000007e080080 000000000000ffc0 0000000000000001 00000000000067d7
  GPR08: 00000000880401a9 0000000000000000 0000000000000001 00000000fa83b2da
  GPR12: 0000000028008224 c00000003ffff800 c000000000fc20b0 0000000000000100
  GPR16: 8920f09520bea117 c000000000def480 0000000000000000 0000000000000001
  GPR20: c000000000fc3940 c0000000f3537e18 0000000000000001 c000000001026cc5
  GPR24: 0000000000000001 c0000000f3328000 0000000000000001 c0000000f3451010
  GPR28: 0000000000000000 0000000000000001 0000000000000000 0000000000000000
  NIP [c00000000079d704] .caam_jr_dequeue+0x2f0/0x410
  LR [c00000000079d498] .caam_jr_dequeue+0x84/0x410
  Call Trace:
  [c0000000fffc7c00] [c00000000079d6b0] .caam_jr_dequeue+0x29c/0x410 (unreliable)
  [c0000000fffc7cd0] [c00000000004fef0] .tasklet_action_common.isra.3+0xac/0x180
  [c0000000fffc7d80] [c000000000a2f99c] .__do_softirq+0x174/0x3f8
  [c0000000fffc7e90] [c00000000004fb94] .irq_exit+0xc4/0xdc
  [c0000000fffc7f00] [c000000000007348] .__do_irq+0x8c/0x1b0
  [c0000000fffc7f90] [c0000000000150c4] .call_do_irq+0x14/0x24
  [c0000000f3137930] [c0000000000074e4] .do_IRQ+0x78/0xd4
  [c0000000f31379c0] [c000000000019998] exc_0x500_common+0xfc/0x100
  --- interrupt: 501 at .book3e_idle+0x24/0x5c
      LR = .book3e_idle+0x24/0x5c
  [c0000000f3137cc0] [c00000000000a6a4] .arch_cpu_idle+0x34/0xa0 (unreliable)
  [c0000000f3137d30] [c000000000a2f2e8] .default_idle_call+0x5c/0x70
  [c0000000f3137da0] [c000000000084210] .do_idle+0x1b0/0x1f4
  [c0000000f3137e40] [c000000000084434] .cpu_startup_entry+0x28/0x30
  [c0000000f3137eb0] [c000000000021538] .start_secondary+0x59c/0x5b0
  [c0000000f3137f90] [c00000000000045c] start_secondary_prolog+0x10/0x14
  Instruction dump:
  7d284a14 e9290018 2fa90000 40de001c 3bbd0001 57bd05fe 7d3db050 712901ff
  7fbd07b4 40e2ffcc 93b500dc 4bffff94 <0fe00000> 78890022 79270020 41d600ec
  ---[ end trace 7bedbdf37a95ab35 ]---

That's hitting:

		/* we should never fail to find a matching descriptor */
		BUG_ON(CIRC_CNT(head, tail + i, JOBR_DEPTH) <= 0);

cheers

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] crypto: caam/jr - Remove extra memory barrier during job ring dequeue
@ 2019-05-01  5:49 ` Michael Ellerman
  0 siblings, 0 replies; 14+ messages in thread
From: Michael Ellerman @ 2019-05-01  5:49 UTC (permalink / raw)
  To: vakul.garg
  Cc: aymen.sghaier, herbert, horia.geanta, linux-crypto, linuxppc-dev, davem

Vakul Garg wrote:
> In function caam_jr_dequeue(), a full memory barrier is used before
> writing response job ring's register to signal removal of the completed
> job. Therefore for writing the register, we do not need another write
> memory barrier. Hence it is removed by replacing the call to wr_reg32()
> with a newly defined function wr_reg32_relaxed().
> 
> Signed-off-by: Vakul Garg <vakul.garg@nxp.com>
> ---
>  drivers/crypto/caam/jr.c   | 2 +-
>  drivers/crypto/caam/regs.h | 8 ++++++++
>  2 files changed, 9 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c
> index 4e9b3fca5627..2ce6d7d2ad72 100644
> --- a/drivers/crypto/caam/jr.c
> +++ b/drivers/crypto/caam/jr.c
> @@ -266,7 +266,7 @@ static void caam_jr_dequeue(unsigned long devarg)
>  		mb();
>  
>  		/* set done */
> -		wr_reg32(&jrp->rregs->outring_rmvd, 1);
> +		wr_reg32_relaxed(&jrp->rregs->outring_rmvd, 1);
>  
>  		jrp->out_ring_read_index = (jrp->out_ring_read_index + 1) &
>  					   (JOBR_DEPTH - 1);
> diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
> index 3cd0822ea819..9e912c722e33 100644
> --- a/drivers/crypto/caam/regs.h
> +++ b/drivers/crypto/caam/regs.h
> @@ -96,6 +96,14 @@ cpu_to_caam(16)
>  cpu_to_caam(32)
>  cpu_to_caam(64)
>  
> +static inline void wr_reg32_relaxed(void __iomem *reg, u32 data)
> +{
> +	if (caam_little_end)
> +		writel_relaxed(data, reg);
> +	else
> +		writel_relaxed(cpu_to_be32(data), reg);
> +}
> +
>  static inline void wr_reg32(void __iomem *reg, u32 data)
>  {
>  	if (caam_little_end)

This crashes on my p5020ds. Did you test on powerpc?

# first bad commit: [bbfcac5ff5f26aafa51935a62eb86b6eacfe8a49] crypto: caam/jr - Remove extra memory barrier during job ring dequeue

Log:

  ------------[ cut here ]------------
  kernel BUG at drivers/crypto/caam/jr.c:191!
  Oops: Exception in kernel mode, sig: 5 [#1]
  BE PAGE_SIZE=4K SMP NR_CPUS=24 CoreNet Generic
  Modules linked in:
  CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.1.0-rc1-gcc-8.2.0-00060-gbbfcac5ff5f2 #31
  NIP:  c00000000079d704 LR: c00000000079d498 CTR: c000000000086914
  REGS: c0000000fffc7970 TRAP: 0700   Not tainted  (5.1.0-rc1-gcc-8.2.0-00060-gbbfcac5ff5f2)
  MSR:  0000000080029000 <CE,EE,ME>  CR: 28008484  XER: 00000000
  IRQMASK: 0
  GPR00: c00000000079d6b0 c0000000fffc7c00 c000000000fbc800 0000000000000001
  GPR04: 000000007e080080 000000000000ffc0 0000000000000001 00000000000067d7
  GPR08: 00000000880401a9 0000000000000000 0000000000000001 00000000fa83b2da
  GPR12: 0000000028008224 c00000003ffff800 c000000000fc20b0 0000000000000100
  GPR16: 8920f09520bea117 c000000000def480 0000000000000000 0000000000000001
  GPR20: c000000000fc3940 c0000000f3537e18 0000000000000001 c000000001026cc5
  GPR24: 0000000000000001 c0000000f3328000 0000000000000001 c0000000f3451010
  GPR28: 0000000000000000 0000000000000001 0000000000000000 0000000000000000
  NIP [c00000000079d704] .caam_jr_dequeue+0x2f0/0x410
  LR [c00000000079d498] .caam_jr_dequeue+0x84/0x410
  Call Trace:
  [c0000000fffc7c00] [c00000000079d6b0] .caam_jr_dequeue+0x29c/0x410 (unreliable)
  [c0000000fffc7cd0] [c00000000004fef0] .tasklet_action_common.isra.3+0xac/0x180
  [c0000000fffc7d80] [c000000000a2f99c] .__do_softirq+0x174/0x3f8
  [c0000000fffc7e90] [c00000000004fb94] .irq_exit+0xc4/0xdc
  [c0000000fffc7f00] [c000000000007348] .__do_irq+0x8c/0x1b0
  [c0000000fffc7f90] [c0000000000150c4] .call_do_irq+0x14/0x24
  [c0000000f3137930] [c0000000000074e4] .do_IRQ+0x78/0xd4
  [c0000000f31379c0] [c000000000019998] exc_0x500_common+0xfc/0x100
  --- interrupt: 501 at .book3e_idle+0x24/0x5c
      LR = .book3e_idle+0x24/0x5c
  [c0000000f3137cc0] [c00000000000a6a4] .arch_cpu_idle+0x34/0xa0 (unreliable)
  [c0000000f3137d30] [c000000000a2f2e8] .default_idle_call+0x5c/0x70
  [c0000000f3137da0] [c000000000084210] .do_idle+0x1b0/0x1f4
  [c0000000f3137e40] [c000000000084434] .cpu_startup_entry+0x28/0x30
  [c0000000f3137eb0] [c000000000021538] .start_secondary+0x59c/0x5b0
  [c0000000f3137f90] [c00000000000045c] start_secondary_prolog+0x10/0x14
  Instruction dump:
  7d284a14 e9290018 2fa90000 40de001c 3bbd0001 57bd05fe 7d3db050 712901ff
  7fbd07b4 40e2ffcc 93b500dc 4bffff94 <0fe00000> 78890022 79270020 41d600ec
  ---[ end trace 7bedbdf37a95ab35 ]---

That's hitting:

		/* we should never fail to find a matching descriptor */
		BUG_ON(CIRC_CNT(head, tail + i, JOBR_DEPTH) <= 0);

cheers

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH] crypto: caam/jr - Remove extra memory barrier during job ring dequeue
  2019-05-01  5:49 ` Michael Ellerman
@ 2019-05-01 14:27   ` Vakul Garg
  -1 siblings, 0 replies; 14+ messages in thread
From: Vakul Garg @ 2019-05-01 14:27 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: linux-crypto, Aymen Sghaier, davem, herbert, Horia Geanta, linuxppc-dev



> -----Original Message-----
> From: Michael Ellerman <mpe@ellerman.id.au>
> Sent: Wednesday, May 1, 2019 11:20 AM
> To: Vakul Garg <vakul.garg@nxp.com>
> Cc: linux-crypto@vger.kernel.org; Aymen Sghaier
> <aymen.sghaier@nxp.com>; davem@davemloft.net;
> herbert@gondor.apana.org.au; Horia Geanta <horia.geanta@nxp.com>;
> linuxppc-dev@lists.ozlabs.org
> Subject: Re: [PATCH] crypto: caam/jr - Remove extra memory barrier during
> job ring dequeue
> 
> Vakul Garg wrote:
> > In function caam_jr_dequeue(), a full memory barrier is used before
> > writing response job ring's register to signal removal of the
> > completed job. Therefore for writing the register, we do not need
> > another write memory barrier. Hence it is removed by replacing the
> > call to wr_reg32() with a newly defined function wr_reg32_relaxed().
> >
> > Signed-off-by: Vakul Garg <vakul.garg@nxp.com>
> > ---
> >  drivers/crypto/caam/jr.c   | 2 +-
> >  drivers/crypto/caam/regs.h | 8 ++++++++
> >  2 files changed, 9 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index
> > 4e9b3fca5627..2ce6d7d2ad72 100644
> > --- a/drivers/crypto/caam/jr.c
> > +++ b/drivers/crypto/caam/jr.c
> > @@ -266,7 +266,7 @@ static void caam_jr_dequeue(unsigned long
> devarg)
> >  		mb();
> >
> >  		/* set done */
> > -		wr_reg32(&jrp->rregs->outring_rmvd, 1);
> > +		wr_reg32_relaxed(&jrp->rregs->outring_rmvd, 1);
> >
> >  		jrp->out_ring_read_index = (jrp->out_ring_read_index + 1) &
> >  					   (JOBR_DEPTH - 1);
> > diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
> > index 3cd0822ea819..9e912c722e33 100644
> > --- a/drivers/crypto/caam/regs.h
> > +++ b/drivers/crypto/caam/regs.h
> > @@ -96,6 +96,14 @@ cpu_to_caam(16)
> >  cpu_to_caam(32)
> >  cpu_to_caam(64)
> >
> > +static inline void wr_reg32_relaxed(void __iomem *reg, u32 data) {
> > +	if (caam_little_end)
> > +		writel_relaxed(data, reg);
> > +	else
> > +		writel_relaxed(cpu_to_be32(data), reg); }
> > +
> >  static inline void wr_reg32(void __iomem *reg, u32 data)  {
> >  	if (caam_little_end)
> 
> This crashes on my p5020ds. Did you test on powerpc?
> 
I did not test on powerpc.

> # first bad commit: [bbfcac5ff5f26aafa51935a62eb86b6eacfe8a49] crypto:
> caam/jr - Remove extra memory barrier during job ring dequeue
> 
> Log:
> 
>   ------------[ cut here ]------------
>   kernel BUG at drivers/crypto/caam/jr.c:191!
>   Oops: Exception in kernel mode, sig: 5 [#1]
>   BE PAGE_SIZE=4K SMP NR_CPUS=24 CoreNet Generic
>   Modules linked in:
>   CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.1.0-rc1-gcc-8.2.0-00060-
> gbbfcac5ff5f2 #31
>   NIP:  c00000000079d704 LR: c00000000079d498 CTR: c000000000086914
>   REGS: c0000000fffc7970 TRAP: 0700   Not tainted  (5.1.0-rc1-gcc-8.2.0-
> 00060-gbbfcac5ff5f2)
>   MSR:  0000000080029000 <CE,EE,ME>  CR: 28008484  XER: 00000000
>   IRQMASK: 0
>   GPR00: c00000000079d6b0 c0000000fffc7c00 c000000000fbc800
> 0000000000000001
>   GPR04: 000000007e080080 000000000000ffc0 0000000000000001
> 00000000000067d7
>   GPR08: 00000000880401a9 0000000000000000 0000000000000001
> 00000000fa83b2da
>   GPR12: 0000000028008224 c00000003ffff800 c000000000fc20b0
> 0000000000000100
>   GPR16: 8920f09520bea117 c000000000def480 0000000000000000
> 0000000000000001
>   GPR20: c000000000fc3940 c0000000f3537e18 0000000000000001
> c000000001026cc5
>   GPR24: 0000000000000001 c0000000f3328000 0000000000000001
> c0000000f3451010
>   GPR28: 0000000000000000 0000000000000001 0000000000000000
> 0000000000000000
>   NIP [c00000000079d704] .caam_jr_dequeue+0x2f0/0x410
>   LR [c00000000079d498] .caam_jr_dequeue+0x84/0x410
>   Call Trace:
>   [c0000000fffc7c00] [c00000000079d6b0] .caam_jr_dequeue+0x29c/0x410
> (unreliable)
>   [c0000000fffc7cd0] [c00000000004fef0]
> .tasklet_action_common.isra.3+0xac/0x180
>   [c0000000fffc7d80] [c000000000a2f99c] .__do_softirq+0x174/0x3f8
>   [c0000000fffc7e90] [c00000000004fb94] .irq_exit+0xc4/0xdc
>   [c0000000fffc7f00] [c000000000007348] .__do_irq+0x8c/0x1b0
>   [c0000000fffc7f90] [c0000000000150c4] .call_do_irq+0x14/0x24
>   [c0000000f3137930] [c0000000000074e4] .do_IRQ+0x78/0xd4
>   [c0000000f31379c0] [c000000000019998]
> exc_0x500_common+0xfc/0x100
>   --- interrupt: 501 at .book3e_idle+0x24/0x5c
>       LR = .book3e_idle+0x24/0x5c
>   [c0000000f3137cc0] [c00000000000a6a4] .arch_cpu_idle+0x34/0xa0
> (unreliable)
>   [c0000000f3137d30] [c000000000a2f2e8] .default_idle_call+0x5c/0x70
>   [c0000000f3137da0] [c000000000084210] .do_idle+0x1b0/0x1f4
>   [c0000000f3137e40] [c000000000084434] .cpu_startup_entry+0x28/0x30
>   [c0000000f3137eb0] [c000000000021538] .start_secondary+0x59c/0x5b0
>   [c0000000f3137f90] [c00000000000045c]
> start_secondary_prolog+0x10/0x14
>   Instruction dump:
>   7d284a14 e9290018 2fa90000 40de001c 3bbd0001 57bd05fe 7d3db050
> 712901ff
>   7fbd07b4 40e2ffcc 93b500dc 4bffff94 <0fe00000> 78890022 79270020
> 41d600ec
>   ---[ end trace 7bedbdf37a95ab35 ]---
> 
> That's hitting:
> 
> 		/* we should never fail to find a matching descriptor */
> 		BUG_ON(CIRC_CNT(head, tail + i, JOBR_DEPTH) <= 0);
> 

Is it hitting under high traffic to caam?
How to reproduce it?
 

> cheers

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH] crypto: caam/jr - Remove extra memory barrier during job ring dequeue
@ 2019-05-01 14:27   ` Vakul Garg
  0 siblings, 0 replies; 14+ messages in thread
From: Vakul Garg @ 2019-05-01 14:27 UTC (permalink / raw)
  To: Michael Ellerman
  Cc: Aymen Sghaier, herbert, Horia Geanta, linux-crypto, linuxppc-dev, davem



> -----Original Message-----
> From: Michael Ellerman <mpe@ellerman.id.au>
> Sent: Wednesday, May 1, 2019 11:20 AM
> To: Vakul Garg <vakul.garg@nxp.com>
> Cc: linux-crypto@vger.kernel.org; Aymen Sghaier
> <aymen.sghaier@nxp.com>; davem@davemloft.net;
> herbert@gondor.apana.org.au; Horia Geanta <horia.geanta@nxp.com>;
> linuxppc-dev@lists.ozlabs.org
> Subject: Re: [PATCH] crypto: caam/jr - Remove extra memory barrier during
> job ring dequeue
> 
> Vakul Garg wrote:
> > In function caam_jr_dequeue(), a full memory barrier is used before
> > writing response job ring's register to signal removal of the
> > completed job. Therefore for writing the register, we do not need
> > another write memory barrier. Hence it is removed by replacing the
> > call to wr_reg32() with a newly defined function wr_reg32_relaxed().
> >
> > Signed-off-by: Vakul Garg <vakul.garg@nxp.com>
> > ---
> >  drivers/crypto/caam/jr.c   | 2 +-
> >  drivers/crypto/caam/regs.h | 8 ++++++++
> >  2 files changed, 9 insertions(+), 1 deletion(-)
> >
> > diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index
> > 4e9b3fca5627..2ce6d7d2ad72 100644
> > --- a/drivers/crypto/caam/jr.c
> > +++ b/drivers/crypto/caam/jr.c
> > @@ -266,7 +266,7 @@ static void caam_jr_dequeue(unsigned long
> devarg)
> >  		mb();
> >
> >  		/* set done */
> > -		wr_reg32(&jrp->rregs->outring_rmvd, 1);
> > +		wr_reg32_relaxed(&jrp->rregs->outring_rmvd, 1);
> >
> >  		jrp->out_ring_read_index = (jrp->out_ring_read_index + 1) &
> >  					   (JOBR_DEPTH - 1);
> > diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
> > index 3cd0822ea819..9e912c722e33 100644
> > --- a/drivers/crypto/caam/regs.h
> > +++ b/drivers/crypto/caam/regs.h
> > @@ -96,6 +96,14 @@ cpu_to_caam(16)
> >  cpu_to_caam(32)
> >  cpu_to_caam(64)
> >
> > +static inline void wr_reg32_relaxed(void __iomem *reg, u32 data) {
> > +	if (caam_little_end)
> > +		writel_relaxed(data, reg);
> > +	else
> > +		writel_relaxed(cpu_to_be32(data), reg); }
> > +
> >  static inline void wr_reg32(void __iomem *reg, u32 data)  {
> >  	if (caam_little_end)
> 
> This crashes on my p5020ds. Did you test on powerpc?
> 
I did not test on powerpc.

> # first bad commit: [bbfcac5ff5f26aafa51935a62eb86b6eacfe8a49] crypto:
> caam/jr - Remove extra memory barrier during job ring dequeue
> 
> Log:
> 
>   ------------[ cut here ]------------
>   kernel BUG at drivers/crypto/caam/jr.c:191!
>   Oops: Exception in kernel mode, sig: 5 [#1]
>   BE PAGE_SIZE=4K SMP NR_CPUS=24 CoreNet Generic
>   Modules linked in:
>   CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.1.0-rc1-gcc-8.2.0-00060-
> gbbfcac5ff5f2 #31
>   NIP:  c00000000079d704 LR: c00000000079d498 CTR: c000000000086914
>   REGS: c0000000fffc7970 TRAP: 0700   Not tainted  (5.1.0-rc1-gcc-8.2.0-
> 00060-gbbfcac5ff5f2)
>   MSR:  0000000080029000 <CE,EE,ME>  CR: 28008484  XER: 00000000
>   IRQMASK: 0
>   GPR00: c00000000079d6b0 c0000000fffc7c00 c000000000fbc800
> 0000000000000001
>   GPR04: 000000007e080080 000000000000ffc0 0000000000000001
> 00000000000067d7
>   GPR08: 00000000880401a9 0000000000000000 0000000000000001
> 00000000fa83b2da
>   GPR12: 0000000028008224 c00000003ffff800 c000000000fc20b0
> 0000000000000100
>   GPR16: 8920f09520bea117 c000000000def480 0000000000000000
> 0000000000000001
>   GPR20: c000000000fc3940 c0000000f3537e18 0000000000000001
> c000000001026cc5
>   GPR24: 0000000000000001 c0000000f3328000 0000000000000001
> c0000000f3451010
>   GPR28: 0000000000000000 0000000000000001 0000000000000000
> 0000000000000000
>   NIP [c00000000079d704] .caam_jr_dequeue+0x2f0/0x410
>   LR [c00000000079d498] .caam_jr_dequeue+0x84/0x410
>   Call Trace:
>   [c0000000fffc7c00] [c00000000079d6b0] .caam_jr_dequeue+0x29c/0x410
> (unreliable)
>   [c0000000fffc7cd0] [c00000000004fef0]
> .tasklet_action_common.isra.3+0xac/0x180
>   [c0000000fffc7d80] [c000000000a2f99c] .__do_softirq+0x174/0x3f8
>   [c0000000fffc7e90] [c00000000004fb94] .irq_exit+0xc4/0xdc
>   [c0000000fffc7f00] [c000000000007348] .__do_irq+0x8c/0x1b0
>   [c0000000fffc7f90] [c0000000000150c4] .call_do_irq+0x14/0x24
>   [c0000000f3137930] [c0000000000074e4] .do_IRQ+0x78/0xd4
>   [c0000000f31379c0] [c000000000019998]
> exc_0x500_common+0xfc/0x100
>   --- interrupt: 501 at .book3e_idle+0x24/0x5c
>       LR = .book3e_idle+0x24/0x5c
>   [c0000000f3137cc0] [c00000000000a6a4] .arch_cpu_idle+0x34/0xa0
> (unreliable)
>   [c0000000f3137d30] [c000000000a2f2e8] .default_idle_call+0x5c/0x70
>   [c0000000f3137da0] [c000000000084210] .do_idle+0x1b0/0x1f4
>   [c0000000f3137e40] [c000000000084434] .cpu_startup_entry+0x28/0x30
>   [c0000000f3137eb0] [c000000000021538] .start_secondary+0x59c/0x5b0
>   [c0000000f3137f90] [c00000000000045c]
> start_secondary_prolog+0x10/0x14
>   Instruction dump:
>   7d284a14 e9290018 2fa90000 40de001c 3bbd0001 57bd05fe 7d3db050
> 712901ff
>   7fbd07b4 40e2ffcc 93b500dc 4bffff94 <0fe00000> 78890022 79270020
> 41d600ec
>   ---[ end trace 7bedbdf37a95ab35 ]---
> 
> That's hitting:
> 
> 		/* we should never fail to find a matching descriptor */
> 		BUG_ON(CIRC_CNT(head, tail + i, JOBR_DEPTH) <= 0);
> 

Is it hitting under high traffic to caam?
How to reproduce it?
 

> cheers

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH] crypto: caam/jr - Remove extra memory barrier during job ring dequeue
  2019-05-01 14:27   ` Vakul Garg
@ 2019-05-02  5:00     ` Michael Ellerman
  -1 siblings, 0 replies; 14+ messages in thread
From: Michael Ellerman @ 2019-05-02  5:00 UTC (permalink / raw)
  To: Vakul Garg
  Cc: linux-crypto, Aymen Sghaier, davem, herbert, Horia Geanta, linuxppc-dev

Vakul Garg <vakul.garg@nxp.com> writes:
>> -----Original Message-----
>> From: Michael Ellerman <mpe@ellerman.id.au>
>> Sent: Wednesday, May 1, 2019 11:20 AM
>> To: Vakul Garg <vakul.garg@nxp.com>
>> Cc: linux-crypto@vger.kernel.org; Aymen Sghaier
>> <aymen.sghaier@nxp.com>; davem@davemloft.net;
>> herbert@gondor.apana.org.au; Horia Geanta <horia.geanta@nxp.com>;
>> linuxppc-dev@lists.ozlabs.org
>> Subject: Re: [PATCH] crypto: caam/jr - Remove extra memory barrier during
>> job ring dequeue
>> 
>> Vakul Garg wrote:
>> > In function caam_jr_dequeue(), a full memory barrier is used before
>> > writing response job ring's register to signal removal of the
>> > completed job. Therefore for writing the register, we do not need
>> > another write memory barrier. Hence it is removed by replacing the
>> > call to wr_reg32() with a newly defined function wr_reg32_relaxed().
>> >
>> > Signed-off-by: Vakul Garg <vakul.garg@nxp.com>
>> > ---
>> >  drivers/crypto/caam/jr.c   | 2 +-
>> >  drivers/crypto/caam/regs.h | 8 ++++++++
>> >  2 files changed, 9 insertions(+), 1 deletion(-)
>> >
>> > diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index
>> > 4e9b3fca5627..2ce6d7d2ad72 100644
>> > --- a/drivers/crypto/caam/jr.c
>> > +++ b/drivers/crypto/caam/jr.c
>> > @@ -266,7 +266,7 @@ static void caam_jr_dequeue(unsigned long
>> devarg)
>> >  		mb();
>> >
>> >  		/* set done */
>> > -		wr_reg32(&jrp->rregs->outring_rmvd, 1);
>> > +		wr_reg32_relaxed(&jrp->rregs->outring_rmvd, 1);
>> >
>> >  		jrp->out_ring_read_index = (jrp->out_ring_read_index + 1) &
>> >  					   (JOBR_DEPTH - 1);
>> > diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
>> > index 3cd0822ea819..9e912c722e33 100644
>> > --- a/drivers/crypto/caam/regs.h
>> > +++ b/drivers/crypto/caam/regs.h
>> > @@ -96,6 +96,14 @@ cpu_to_caam(16)
>> >  cpu_to_caam(32)
>> >  cpu_to_caam(64)
>> >
>> > +static inline void wr_reg32_relaxed(void __iomem *reg, u32 data) {
>> > +	if (caam_little_end)
>> > +		writel_relaxed(data, reg);
>> > +	else
>> > +		writel_relaxed(cpu_to_be32(data), reg); }
>> > +
>> >  static inline void wr_reg32(void __iomem *reg, u32 data)  {
>> >  	if (caam_little_end)
>> 
>> This crashes on my p5020ds. Did you test on powerpc?
>> 
> I did not test on powerpc.

OK, so I might be the first person who has :)

>> # first bad commit: [bbfcac5ff5f26aafa51935a62eb86b6eacfe8a49] crypto:
>> caam/jr - Remove extra memory barrier during job ring dequeue
>> 
>> Log:
>> 
>>   ------------[ cut here ]------------
>>   kernel BUG at drivers/crypto/caam/jr.c:191!
>>   Oops: Exception in kernel mode, sig: 5 [#1]
>>   BE PAGE_SIZE=4K SMP NR_CPUS=24 CoreNet Generic
>>   Modules linked in:
>>   CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.1.0-rc1-gcc-8.2.0-00060-
>> gbbfcac5ff5f2 #31
>>   NIP:  c00000000079d704 LR: c00000000079d498 CTR: c000000000086914
>>   REGS: c0000000fffc7970 TRAP: 0700   Not tainted  (5.1.0-rc1-gcc-8.2.0-
>> 00060-gbbfcac5ff5f2)
>>   MSR:  0000000080029000 <CE,EE,ME>  CR: 28008484  XER: 00000000
>>   IRQMASK: 0
>>   GPR00: c00000000079d6b0 c0000000fffc7c00 c000000000fbc800
>> 0000000000000001
>>   GPR04: 000000007e080080 000000000000ffc0 0000000000000001
>> 00000000000067d7
>>   GPR08: 00000000880401a9 0000000000000000 0000000000000001
>> 00000000fa83b2da
>>   GPR12: 0000000028008224 c00000003ffff800 c000000000fc20b0
>> 0000000000000100
>>   GPR16: 8920f09520bea117 c000000000def480 0000000000000000
>> 0000000000000001
>>   GPR20: c000000000fc3940 c0000000f3537e18 0000000000000001
>> c000000001026cc5
>>   GPR24: 0000000000000001 c0000000f3328000 0000000000000001
>> c0000000f3451010
>>   GPR28: 0000000000000000 0000000000000001 0000000000000000
>> 0000000000000000
>>   NIP [c00000000079d704] .caam_jr_dequeue+0x2f0/0x410
>>   LR [c00000000079d498] .caam_jr_dequeue+0x84/0x410
>>   Call Trace:
>>   [c0000000fffc7c00] [c00000000079d6b0] .caam_jr_dequeue+0x29c/0x410
>> (unreliable)
>>   [c0000000fffc7cd0] [c00000000004fef0]
>> .tasklet_action_common.isra.3+0xac/0x180
>>   [c0000000fffc7d80] [c000000000a2f99c] .__do_softirq+0x174/0x3f8
>>   [c0000000fffc7e90] [c00000000004fb94] .irq_exit+0xc4/0xdc
>>   [c0000000fffc7f00] [c000000000007348] .__do_irq+0x8c/0x1b0
>>   [c0000000fffc7f90] [c0000000000150c4] .call_do_irq+0x14/0x24
>>   [c0000000f3137930] [c0000000000074e4] .do_IRQ+0x78/0xd4
>>   [c0000000f31379c0] [c000000000019998]
>> exc_0x500_common+0xfc/0x100
>>   --- interrupt: 501 at .book3e_idle+0x24/0x5c
>>       LR = .book3e_idle+0x24/0x5c
>>   [c0000000f3137cc0] [c00000000000a6a4] .arch_cpu_idle+0x34/0xa0
>> (unreliable)
>>   [c0000000f3137d30] [c000000000a2f2e8] .default_idle_call+0x5c/0x70
>>   [c0000000f3137da0] [c000000000084210] .do_idle+0x1b0/0x1f4
>>   [c0000000f3137e40] [c000000000084434] .cpu_startup_entry+0x28/0x30
>>   [c0000000f3137eb0] [c000000000021538] .start_secondary+0x59c/0x5b0
>>   [c0000000f3137f90] [c00000000000045c]
>> start_secondary_prolog+0x10/0x14
>>   Instruction dump:
>>   7d284a14 e9290018 2fa90000 40de001c 3bbd0001 57bd05fe 7d3db050
>> 712901ff
>>   7fbd07b4 40e2ffcc 93b500dc 4bffff94 <0fe00000> 78890022 79270020
>> 41d600ec
>>   ---[ end trace 7bedbdf37a95ab35 ]---
>> 
>> That's hitting:
>> 
>> 		/* we should never fail to find a matching descriptor */
>> 		BUG_ON(CIRC_CNT(head, tail + i, JOBR_DEPTH) <= 0);
>> 
>
> Is it hitting under high traffic to caam?
> How to reproduce it?

No that's just booting the system. I don't even have the crypto
selftests enabled.

cheers

^ permalink raw reply	[flat|nested] 14+ messages in thread

* RE: [PATCH] crypto: caam/jr - Remove extra memory barrier during job ring dequeue
@ 2019-05-02  5:00     ` Michael Ellerman
  0 siblings, 0 replies; 14+ messages in thread
From: Michael Ellerman @ 2019-05-02  5:00 UTC (permalink / raw)
  To: Vakul Garg
  Cc: Aymen Sghaier, herbert, Horia Geanta, linux-crypto, linuxppc-dev, davem

Vakul Garg <vakul.garg@nxp.com> writes:
>> -----Original Message-----
>> From: Michael Ellerman <mpe@ellerman.id.au>
>> Sent: Wednesday, May 1, 2019 11:20 AM
>> To: Vakul Garg <vakul.garg@nxp.com>
>> Cc: linux-crypto@vger.kernel.org; Aymen Sghaier
>> <aymen.sghaier@nxp.com>; davem@davemloft.net;
>> herbert@gondor.apana.org.au; Horia Geanta <horia.geanta@nxp.com>;
>> linuxppc-dev@lists.ozlabs.org
>> Subject: Re: [PATCH] crypto: caam/jr - Remove extra memory barrier during
>> job ring dequeue
>> 
>> Vakul Garg wrote:
>> > In function caam_jr_dequeue(), a full memory barrier is used before
>> > writing response job ring's register to signal removal of the
>> > completed job. Therefore for writing the register, we do not need
>> > another write memory barrier. Hence it is removed by replacing the
>> > call to wr_reg32() with a newly defined function wr_reg32_relaxed().
>> >
>> > Signed-off-by: Vakul Garg <vakul.garg@nxp.com>
>> > ---
>> >  drivers/crypto/caam/jr.c   | 2 +-
>> >  drivers/crypto/caam/regs.h | 8 ++++++++
>> >  2 files changed, 9 insertions(+), 1 deletion(-)
>> >
>> > diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index
>> > 4e9b3fca5627..2ce6d7d2ad72 100644
>> > --- a/drivers/crypto/caam/jr.c
>> > +++ b/drivers/crypto/caam/jr.c
>> > @@ -266,7 +266,7 @@ static void caam_jr_dequeue(unsigned long
>> devarg)
>> >  		mb();
>> >
>> >  		/* set done */
>> > -		wr_reg32(&jrp->rregs->outring_rmvd, 1);
>> > +		wr_reg32_relaxed(&jrp->rregs->outring_rmvd, 1);
>> >
>> >  		jrp->out_ring_read_index = (jrp->out_ring_read_index + 1) &
>> >  					   (JOBR_DEPTH - 1);
>> > diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
>> > index 3cd0822ea819..9e912c722e33 100644
>> > --- a/drivers/crypto/caam/regs.h
>> > +++ b/drivers/crypto/caam/regs.h
>> > @@ -96,6 +96,14 @@ cpu_to_caam(16)
>> >  cpu_to_caam(32)
>> >  cpu_to_caam(64)
>> >
>> > +static inline void wr_reg32_relaxed(void __iomem *reg, u32 data) {
>> > +	if (caam_little_end)
>> > +		writel_relaxed(data, reg);
>> > +	else
>> > +		writel_relaxed(cpu_to_be32(data), reg); }
>> > +
>> >  static inline void wr_reg32(void __iomem *reg, u32 data)  {
>> >  	if (caam_little_end)
>> 
>> This crashes on my p5020ds. Did you test on powerpc?
>> 
> I did not test on powerpc.

OK, so I might be the first person who has :)

>> # first bad commit: [bbfcac5ff5f26aafa51935a62eb86b6eacfe8a49] crypto:
>> caam/jr - Remove extra memory barrier during job ring dequeue
>> 
>> Log:
>> 
>>   ------------[ cut here ]------------
>>   kernel BUG at drivers/crypto/caam/jr.c:191!
>>   Oops: Exception in kernel mode, sig: 5 [#1]
>>   BE PAGE_SIZE=4K SMP NR_CPUS=24 CoreNet Generic
>>   Modules linked in:
>>   CPU: 1 PID: 0 Comm: swapper/1 Not tainted 5.1.0-rc1-gcc-8.2.0-00060-
>> gbbfcac5ff5f2 #31
>>   NIP:  c00000000079d704 LR: c00000000079d498 CTR: c000000000086914
>>   REGS: c0000000fffc7970 TRAP: 0700   Not tainted  (5.1.0-rc1-gcc-8.2.0-
>> 00060-gbbfcac5ff5f2)
>>   MSR:  0000000080029000 <CE,EE,ME>  CR: 28008484  XER: 00000000
>>   IRQMASK: 0
>>   GPR00: c00000000079d6b0 c0000000fffc7c00 c000000000fbc800
>> 0000000000000001
>>   GPR04: 000000007e080080 000000000000ffc0 0000000000000001
>> 00000000000067d7
>>   GPR08: 00000000880401a9 0000000000000000 0000000000000001
>> 00000000fa83b2da
>>   GPR12: 0000000028008224 c00000003ffff800 c000000000fc20b0
>> 0000000000000100
>>   GPR16: 8920f09520bea117 c000000000def480 0000000000000000
>> 0000000000000001
>>   GPR20: c000000000fc3940 c0000000f3537e18 0000000000000001
>> c000000001026cc5
>>   GPR24: 0000000000000001 c0000000f3328000 0000000000000001
>> c0000000f3451010
>>   GPR28: 0000000000000000 0000000000000001 0000000000000000
>> 0000000000000000
>>   NIP [c00000000079d704] .caam_jr_dequeue+0x2f0/0x410
>>   LR [c00000000079d498] .caam_jr_dequeue+0x84/0x410
>>   Call Trace:
>>   [c0000000fffc7c00] [c00000000079d6b0] .caam_jr_dequeue+0x29c/0x410
>> (unreliable)
>>   [c0000000fffc7cd0] [c00000000004fef0]
>> .tasklet_action_common.isra.3+0xac/0x180
>>   [c0000000fffc7d80] [c000000000a2f99c] .__do_softirq+0x174/0x3f8
>>   [c0000000fffc7e90] [c00000000004fb94] .irq_exit+0xc4/0xdc
>>   [c0000000fffc7f00] [c000000000007348] .__do_irq+0x8c/0x1b0
>>   [c0000000fffc7f90] [c0000000000150c4] .call_do_irq+0x14/0x24
>>   [c0000000f3137930] [c0000000000074e4] .do_IRQ+0x78/0xd4
>>   [c0000000f31379c0] [c000000000019998]
>> exc_0x500_common+0xfc/0x100
>>   --- interrupt: 501 at .book3e_idle+0x24/0x5c
>>       LR = .book3e_idle+0x24/0x5c
>>   [c0000000f3137cc0] [c00000000000a6a4] .arch_cpu_idle+0x34/0xa0
>> (unreliable)
>>   [c0000000f3137d30] [c000000000a2f2e8] .default_idle_call+0x5c/0x70
>>   [c0000000f3137da0] [c000000000084210] .do_idle+0x1b0/0x1f4
>>   [c0000000f3137e40] [c000000000084434] .cpu_startup_entry+0x28/0x30
>>   [c0000000f3137eb0] [c000000000021538] .start_secondary+0x59c/0x5b0
>>   [c0000000f3137f90] [c00000000000045c]
>> start_secondary_prolog+0x10/0x14
>>   Instruction dump:
>>   7d284a14 e9290018 2fa90000 40de001c 3bbd0001 57bd05fe 7d3db050
>> 712901ff
>>   7fbd07b4 40e2ffcc 93b500dc 4bffff94 <0fe00000> 78890022 79270020
>> 41d600ec
>>   ---[ end trace 7bedbdf37a95ab35 ]---
>> 
>> That's hitting:
>> 
>> 		/* we should never fail to find a matching descriptor */
>> 		BUG_ON(CIRC_CNT(head, tail + i, JOBR_DEPTH) <= 0);
>> 
>
> Is it hitting under high traffic to caam?
> How to reproduce it?

No that's just booting the system. I don't even have the crypto
selftests enabled.

cheers

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] crypto: caam/jr - Remove extra memory barrier during job ring dequeue
  2019-05-01  5:49 ` Michael Ellerman
@ 2019-05-02 11:08   ` Horia Geanta
  -1 siblings, 0 replies; 14+ messages in thread
From: Horia Geanta @ 2019-05-02 11:08 UTC (permalink / raw)
  To: Michael Ellerman, Vakul Garg
  Cc: linux-crypto, Aymen Sghaier, davem, herbert, linuxppc-dev

On 5/1/2019 8:49 AM, Michael Ellerman wrote:
> Vakul Garg wrote:
>> In function caam_jr_dequeue(), a full memory barrier is used before
>> writing response job ring's register to signal removal of the completed
>> job. Therefore for writing the register, we do not need another write
>> memory barrier. Hence it is removed by replacing the call to wr_reg32()
>> with a newly defined function wr_reg32_relaxed().
>>
>> Signed-off-by: Vakul Garg <vakul.garg@nxp.com>
>> ---
>>  drivers/crypto/caam/jr.c   | 2 +-
>>  drivers/crypto/caam/regs.h | 8 ++++++++
>>  2 files changed, 9 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c
>> index 4e9b3fca5627..2ce6d7d2ad72 100644
>> --- a/drivers/crypto/caam/jr.c
>> +++ b/drivers/crypto/caam/jr.c
>> @@ -266,7 +266,7 @@ static void caam_jr_dequeue(unsigned long devarg)
>>  		mb();
>>  
>>  		/* set done */
>> -		wr_reg32(&jrp->rregs->outring_rmvd, 1);
>> +		wr_reg32_relaxed(&jrp->rregs->outring_rmvd, 1);
>>  
>>  		jrp->out_ring_read_index = (jrp->out_ring_read_index + 1) &
>>  					   (JOBR_DEPTH - 1);
>> diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
>> index 3cd0822ea819..9e912c722e33 100644
>> --- a/drivers/crypto/caam/regs.h
>> +++ b/drivers/crypto/caam/regs.h
>> @@ -96,6 +96,14 @@ cpu_to_caam(16)
>>  cpu_to_caam(32)
>>  cpu_to_caam(64)
>>  
>> +static inline void wr_reg32_relaxed(void __iomem *reg, u32 data)
>> +{
>> +	if (caam_little_end)
>> +		writel_relaxed(data, reg);
>> +	else
>> +		writel_relaxed(cpu_to_be32(data), reg);
>> +}
When both core (PPC) and crypto engine (caam) are big endian, data ends up being
swapped - which is incorrect:
writel_relaxed -> writel -> __do_writel -> out_le32 -> swap
cpu_to_be32(data) -> data

>> +
>>  static inline void wr_reg32(void __iomem *reg, u32 data)
>>  {
>>  	if (caam_little_end)
> 
> This crashes on my p5020ds. Did you test on powerpc?
> 
> # first bad commit: [bbfcac5ff5f26aafa51935a62eb86b6eacfe8a49] crypto: caam/jr - Remove extra memory barrier during job ring dequeue

Thanks for the report Michael.

Any hint what would be the proper approach here - to have relaxed I/O accessors
that would work both for ARM and PPC, and avoid ifdeffery etc.?

For non-relaxed version, we used iowriteXX and iowriteXXbe - which work fine on
ARM and PPC, covering all the endianness combinations (core + crypto engine):

static inline void wr_reg32(void __iomem *reg, u32 data)
{
        if (caam_little_end)
                iowrite32(data, reg);
        else
                iowrite32be(data, reg);
}

Thanks,
Horia

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] crypto: caam/jr - Remove extra memory barrier during job ring dequeue
@ 2019-05-02 11:08   ` Horia Geanta
  0 siblings, 0 replies; 14+ messages in thread
From: Horia Geanta @ 2019-05-02 11:08 UTC (permalink / raw)
  To: Michael Ellerman, Vakul Garg
  Cc: herbert, Aymen Sghaier, linuxppc-dev, linux-crypto, davem

On 5/1/2019 8:49 AM, Michael Ellerman wrote:
> Vakul Garg wrote:
>> In function caam_jr_dequeue(), a full memory barrier is used before
>> writing response job ring's register to signal removal of the completed
>> job. Therefore for writing the register, we do not need another write
>> memory barrier. Hence it is removed by replacing the call to wr_reg32()
>> with a newly defined function wr_reg32_relaxed().
>>
>> Signed-off-by: Vakul Garg <vakul.garg@nxp.com>
>> ---
>>  drivers/crypto/caam/jr.c   | 2 +-
>>  drivers/crypto/caam/regs.h | 8 ++++++++
>>  2 files changed, 9 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c
>> index 4e9b3fca5627..2ce6d7d2ad72 100644
>> --- a/drivers/crypto/caam/jr.c
>> +++ b/drivers/crypto/caam/jr.c
>> @@ -266,7 +266,7 @@ static void caam_jr_dequeue(unsigned long devarg)
>>  		mb();
>>  
>>  		/* set done */
>> -		wr_reg32(&jrp->rregs->outring_rmvd, 1);
>> +		wr_reg32_relaxed(&jrp->rregs->outring_rmvd, 1);
>>  
>>  		jrp->out_ring_read_index = (jrp->out_ring_read_index + 1) &
>>  					   (JOBR_DEPTH - 1);
>> diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
>> index 3cd0822ea819..9e912c722e33 100644
>> --- a/drivers/crypto/caam/regs.h
>> +++ b/drivers/crypto/caam/regs.h
>> @@ -96,6 +96,14 @@ cpu_to_caam(16)
>>  cpu_to_caam(32)
>>  cpu_to_caam(64)
>>  
>> +static inline void wr_reg32_relaxed(void __iomem *reg, u32 data)
>> +{
>> +	if (caam_little_end)
>> +		writel_relaxed(data, reg);
>> +	else
>> +		writel_relaxed(cpu_to_be32(data), reg);
>> +}
When both core (PPC) and crypto engine (caam) are big endian, data ends up being
swapped - which is incorrect:
writel_relaxed -> writel -> __do_writel -> out_le32 -> swap
cpu_to_be32(data) -> data

>> +
>>  static inline void wr_reg32(void __iomem *reg, u32 data)
>>  {
>>  	if (caam_little_end)
> 
> This crashes on my p5020ds. Did you test on powerpc?
> 
> # first bad commit: [bbfcac5ff5f26aafa51935a62eb86b6eacfe8a49] crypto: caam/jr - Remove extra memory barrier during job ring dequeue

Thanks for the report Michael.

Any hint what would be the proper approach here - to have relaxed I/O accessors
that would work both for ARM and PPC, and avoid ifdeffery etc.?

For non-relaxed version, we used iowriteXX and iowriteXXbe - which work fine on
ARM and PPC, covering all the endianness combinations (core + crypto engine):

static inline void wr_reg32(void __iomem *reg, u32 data)
{
        if (caam_little_end)
                iowrite32(data, reg);
        else
                iowrite32be(data, reg);
}

Thanks,
Horia

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] crypto: caam/jr - Remove extra memory barrier during job ring dequeue
  2019-05-02 11:08   ` Horia Geanta
@ 2019-05-09  5:23     ` Herbert Xu
  -1 siblings, 0 replies; 14+ messages in thread
From: Herbert Xu @ 2019-05-09  5:23 UTC (permalink / raw)
  To: Horia Geanta
  Cc: Michael Ellerman, Vakul Garg, linux-crypto, Aymen Sghaier, davem,
	linuxppc-dev

On Thu, May 02, 2019 at 11:08:55AM +0000, Horia Geanta wrote:
>
> >> +
> >>  static inline void wr_reg32(void __iomem *reg, u32 data)
> >>  {
> >>  	if (caam_little_end)
> > 
> > This crashes on my p5020ds. Did you test on powerpc?
> > 
> > # first bad commit: [bbfcac5ff5f26aafa51935a62eb86b6eacfe8a49] crypto: caam/jr - Remove extra memory barrier during job ring dequeue
> 
> Thanks for the report Michael.
> 
> Any hint what would be the proper approach here - to have relaxed I/O accessors
> that would work both for ARM and PPC, and avoid ifdeffery etc.?

Since no fix has been found I'm reverting the commit in question.

Thanks,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] crypto: caam/jr - Remove extra memory barrier during job ring dequeue
@ 2019-05-09  5:23     ` Herbert Xu
  0 siblings, 0 replies; 14+ messages in thread
From: Herbert Xu @ 2019-05-09  5:23 UTC (permalink / raw)
  To: Horia Geanta; +Cc: Aymen Sghaier, linux-crypto, Vakul Garg, linuxppc-dev, davem

On Thu, May 02, 2019 at 11:08:55AM +0000, Horia Geanta wrote:
>
> >> +
> >>  static inline void wr_reg32(void __iomem *reg, u32 data)
> >>  {
> >>  	if (caam_little_end)
> > 
> > This crashes on my p5020ds. Did you test on powerpc?
> > 
> > # first bad commit: [bbfcac5ff5f26aafa51935a62eb86b6eacfe8a49] crypto: caam/jr - Remove extra memory barrier during job ring dequeue
> 
> Thanks for the report Michael.
> 
> Any hint what would be the proper approach here - to have relaxed I/O accessors
> that would work both for ARM and PPC, and avoid ifdeffery etc.?

Since no fix has been found I'm reverting the commit in question.

Thanks,
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] crypto: caam/jr - Remove extra memory barrier during job ring dequeue
  2019-05-09  5:23     ` Herbert Xu
@ 2019-05-09 14:48       ` Michael Ellerman
  -1 siblings, 0 replies; 14+ messages in thread
From: Michael Ellerman @ 2019-05-09 14:48 UTC (permalink / raw)
  To: Herbert Xu, Horia Geanta
  Cc: Vakul Garg, linux-crypto, Aymen Sghaier, davem, linuxppc-dev

Herbert Xu <herbert@gondor.apana.org.au> writes:
> On Thu, May 02, 2019 at 11:08:55AM +0000, Horia Geanta wrote:
>> >> +
>> >>  static inline void wr_reg32(void __iomem *reg, u32 data)
>> >>  {
>> >>  	if (caam_little_end)
>> > 
>> > This crashes on my p5020ds. Did you test on powerpc?
>> > 
>> > # first bad commit: [bbfcac5ff5f26aafa51935a62eb86b6eacfe8a49] crypto: caam/jr - Remove extra memory barrier during job ring dequeue
>> 
>> Thanks for the report Michael.
>> 
>> Any hint what would be the proper approach here - to have relaxed I/O accessors
>> that would work both for ARM and PPC, and avoid ifdeffery etc.?
>
> Since no fix has been found I'm reverting the commit in question.

Thanks.

Sorry I haven't had time to look into it with everything else going on
during the merge window.

cheers

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] crypto: caam/jr - Remove extra memory barrier during job ring dequeue
@ 2019-05-09 14:48       ` Michael Ellerman
  0 siblings, 0 replies; 14+ messages in thread
From: Michael Ellerman @ 2019-05-09 14:48 UTC (permalink / raw)
  To: Herbert Xu, Horia Geanta
  Cc: Vakul Garg, Aymen Sghaier, linuxppc-dev, linux-crypto, davem

Herbert Xu <herbert@gondor.apana.org.au> writes:
> On Thu, May 02, 2019 at 11:08:55AM +0000, Horia Geanta wrote:
>> >> +
>> >>  static inline void wr_reg32(void __iomem *reg, u32 data)
>> >>  {
>> >>  	if (caam_little_end)
>> > 
>> > This crashes on my p5020ds. Did you test on powerpc?
>> > 
>> > # first bad commit: [bbfcac5ff5f26aafa51935a62eb86b6eacfe8a49] crypto: caam/jr - Remove extra memory barrier during job ring dequeue
>> 
>> Thanks for the report Michael.
>> 
>> Any hint what would be the proper approach here - to have relaxed I/O accessors
>> that would work both for ARM and PPC, and avoid ifdeffery etc.?
>
> Since no fix has been found I'm reverting the commit in question.

Thanks.

Sorry I haven't had time to look into it with everything else going on
during the merge window.

cheers

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] crypto: caam/jr - Remove extra memory barrier during job ring dequeue
  2019-04-09  6:38 Vakul Garg
@ 2019-04-18 14:23 ` Herbert Xu
  0 siblings, 0 replies; 14+ messages in thread
From: Herbert Xu @ 2019-04-18 14:23 UTC (permalink / raw)
  To: Vakul Garg; +Cc: linux-crypto, Horia Geanta, Aymen Sghaier, davem

On Tue, Apr 09, 2019 at 06:38:08AM +0000, Vakul Garg wrote:
> In function caam_jr_dequeue(), a full memory barrier is used before
> writing response job ring's register to signal removal of the completed
> job. Therefore for writing the register, we do not need another write
> memory barrier. Hence it is removed by replacing the call to wr_reg32()
> with a newly defined function wr_reg32_relaxed().
> 
> Signed-off-by: Vakul Garg <vakul.garg@nxp.com>
> ---
>  drivers/crypto/caam/jr.c   | 2 +-
>  drivers/crypto/caam/regs.h | 8 ++++++++
>  2 files changed, 9 insertions(+), 1 deletion(-)

Patch applied.  Thanks.
-- 
Email: Herbert Xu <herbert@gondor.apana.org.au>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH] crypto: caam/jr - Remove extra memory barrier during job ring dequeue
@ 2019-04-09  6:38 Vakul Garg
  2019-04-18 14:23 ` Herbert Xu
  0 siblings, 1 reply; 14+ messages in thread
From: Vakul Garg @ 2019-04-09  6:38 UTC (permalink / raw)
  To: linux-crypto; +Cc: herbert, Horia Geanta, Aymen Sghaier, davem, Vakul Garg

In function caam_jr_dequeue(), a full memory barrier is used before
writing response job ring's register to signal removal of the completed
job. Therefore for writing the register, we do not need another write
memory barrier. Hence it is removed by replacing the call to wr_reg32()
with a newly defined function wr_reg32_relaxed().

Signed-off-by: Vakul Garg <vakul.garg@nxp.com>
---
 drivers/crypto/caam/jr.c   | 2 +-
 drivers/crypto/caam/regs.h | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c
index 4e9b3fca5627..2ce6d7d2ad72 100644
--- a/drivers/crypto/caam/jr.c
+++ b/drivers/crypto/caam/jr.c
@@ -266,7 +266,7 @@ static void caam_jr_dequeue(unsigned long devarg)
 		mb();
 
 		/* set done */
-		wr_reg32(&jrp->rregs->outring_rmvd, 1);
+		wr_reg32_relaxed(&jrp->rregs->outring_rmvd, 1);
 
 		jrp->out_ring_read_index = (jrp->out_ring_read_index + 1) &
 					   (JOBR_DEPTH - 1);
diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
index 3cd0822ea819..9e912c722e33 100644
--- a/drivers/crypto/caam/regs.h
+++ b/drivers/crypto/caam/regs.h
@@ -96,6 +96,14 @@ cpu_to_caam(16)
 cpu_to_caam(32)
 cpu_to_caam(64)
 
+static inline void wr_reg32_relaxed(void __iomem *reg, u32 data)
+{
+	if (caam_little_end)
+		writel_relaxed(data, reg);
+	else
+		writel_relaxed(cpu_to_be32(data), reg);
+}
+
 static inline void wr_reg32(void __iomem *reg, u32 data)
 {
 	if (caam_little_end)
-- 
2.13.6


^ permalink raw reply related	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2019-05-09 14:51 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-05-01  5:49 [PATCH] crypto: caam/jr - Remove extra memory barrier during job ring dequeue Michael Ellerman
2019-05-01  5:49 ` Michael Ellerman
2019-05-01 14:27 ` Vakul Garg
2019-05-01 14:27   ` Vakul Garg
2019-05-02  5:00   ` Michael Ellerman
2019-05-02  5:00     ` Michael Ellerman
2019-05-02 11:08 ` Horia Geanta
2019-05-02 11:08   ` Horia Geanta
2019-05-09  5:23   ` Herbert Xu
2019-05-09  5:23     ` Herbert Xu
2019-05-09 14:48     ` Michael Ellerman
2019-05-09 14:48       ` Michael Ellerman
  -- strict thread matches above, loose matches on Subject: below --
2019-04-09  6:38 Vakul Garg
2019-04-18 14:23 ` Herbert Xu

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.