alsa-devel.alsa-project.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] [inline assembly] fix pcm_dmix_i386.h assembly chunk interfaces
@ 2020-04-27  7:36 frederic.recoules
  2020-04-27 13:47 ` Takashi Iwai
  0 siblings, 1 reply; 10+ messages in thread
From: frederic.recoules @ 2020-04-27  7:36 UTC (permalink / raw)
  To: alsa-devel; +Cc: Frédéric Recoules

From: Frédéric Recoules <frederic.recoules@orange.fr>

Main changes are:
  - move 'size' and 'old_ebx' to the output list since they are clobbered
  - add the "memory" keyword since input pointers are dereferenced
  - add mmx registers in the clobber list and add an initialization for mm1
  - add ebx in clobbers via a set of macro when GCC is newer than 5.0
    (it will work for other compilers or non-PIC mode too)

Minor changes are:
  - keep consistent the token numbering in the template
  - remove the manual save/restore ebx when it is in the clobber list
  - allows 'dst_step', 'src_step' and 'sum_step' to be given by immediates
  - allows 'size' to be given by register (e.g. ebp)
  - add "cc" keyword since the eflag register is clobbered

Signed-off-by: Frédéric Recoules <frederic.recoules@orange.fr>
---
 src/pcm/pcm_dmix_i386.h | 168 ++++++++++++++++++++++------------------
 1 file changed, 93 insertions(+), 75 deletions(-)

diff --git a/src/pcm/pcm_dmix_i386.h b/src/pcm/pcm_dmix_i386.h
index 2778cb1d..af2f4630 100644
--- a/src/pcm/pcm_dmix_i386.h
+++ b/src/pcm/pcm_dmix_i386.h
@@ -26,6 +26,13 @@
  *
  */
 
+#define COMMA ,
+#if __GNUC__ < 5 && defined(__PIC__)
+#  define GCC_PIC_SWITCH(before,after) before
+#else
+#  define GCC_PIC_SWITCH(before,after) after
+#endif
+
 /*
  *  for plain i386
  */
@@ -47,13 +54,14 @@ static void MIX_AREAS_16(unsigned int size,
 	__asm__ __volatile__ (
 		"\n"
 
-		"\tmovl %%ebx, %7\n"	/* ebx is GOT pointer (-fPIC) */
+		/* ebx is GOT pointer (-fPIC) */
+		GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",)
 		/*
 		 *  initialization, load ESI, EDI, EBX registers
 		 */
-		"\tmovl %1, %%edi\n"
-		"\tmovl %2, %%esi\n"
-		"\tmovl %3, %%ebx\n"
+		"\tmovl %2, %%edi\n"
+		"\tmovl %3, %%esi\n"
+		"\tmovl %4, %%ebx\n"
 		"\tcmpl $0, %0\n"
 		"\tjnz 2f\n"
 		"\tjmp 7f\n"
@@ -64,9 +72,9 @@ static void MIX_AREAS_16(unsigned int size,
 		 */
 		"\t.p2align 4,,15\n"
 		"1:"
-		"\tadd %4, %%edi\n"
-		"\tadd %5, %%esi\n"
-		"\tadd %6, %%ebx\n"
+		"\tadd %5, %%edi\n"
+		"\tadd %6, %%esi\n"
+		"\tadd %7, %%ebx\n"
 
 		/*
 		 *   sample = *src;
@@ -138,15 +146,16 @@ static void MIX_AREAS_16(unsigned int size,
 		"\tjnz 4b\n"
 		"\tdecl %0\n"
 		"\tjnz 1b\n"
-		
-		"7:"
-		"\tmovl %7, %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
 
-		: /* no output regs */
-		: "m" (size), "m" (dst), "m" (src),
-		  "m" (sum), "m" (dst_step), "m" (src_step),
-		  "m" (sum_step), "m" (old_ebx)
-		: "esi", "edi", "edx", "ecx", "eax"
+		"7:"
+		/* ebx is GOT pointer (-fPIC) */
+		GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",)
+
+		: "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx)
+		: "m" (dst), "m" (src), "m" (sum),
+		  "im" (dst_step), "im" (src_step), "im" (sum_step)
+		: "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) "eax",
+		  "memory", "cc"
 	);
 }
 
@@ -171,22 +180,24 @@ static void MIX_AREAS_16_MMX(unsigned int size,
 	__asm__ __volatile__ (
 		"\n"
 
-		"\tmovl %%ebx, %7\n"	/* ebx is GOT pointer (-fPIC) */
+		/* ebx is GOT pointer (-fPIC) */
+		GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",)
 		/*
-		 *  initialization, load ESI, EDI, EBX registers
+		 *  initialization, load ESI, EDI, EBX registers, clear MM1
 		 */
-		"\tmovl %1, %%edi\n"
-		"\tmovl %2, %%esi\n"
-		"\tmovl %3, %%ebx\n"
+		"\tpxor %%mm1, %%mm1\n"
+		"\tmovl %2, %%edi\n"
+		"\tmovl %3, %%esi\n"
+		"\tmovl %4, %%ebx\n"
 		"\tcmpl $0, %0\n"
 		"\tjnz 2f\n"
 		"\tjmp 5f\n"
 
 		"\t.p2align 4,,15\n"
 		"1:"
-		"\tadd %4, %%edi\n"
-		"\tadd %5, %%esi\n"
-		"\tadd %6, %%ebx\n"
+		"\tadd %5, %%edi\n"
+		"\tadd %6, %%esi\n"
+		"\tadd %7, %%ebx\n"
 
 		"2:"
 		/*
@@ -230,13 +241,14 @@ static void MIX_AREAS_16_MMX(unsigned int size,
 		"\tjnz 1b\n"
 		"\temms\n"
                 "5:"
-		"\tmovl %7, %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
-
-		: /* no output regs */
-		: "m" (size), "m" (dst), "m" (src),
-		  "m" (sum), "m" (dst_step), "m" (src_step),
-		  "m" (sum_step), "m" (old_ebx)
-		: "esi", "edi", "edx", "ecx", "eax"
+		/* ebx is GOT pointer (-fPIC) */
+		GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",)
+
+		: "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx)
+		: "m" (dst), "m" (src), "m" (sum),
+		  "im" (dst_step), "im" (src_step), "im" (sum_step)
+		: "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) "eax",
+		  "mm0", "mm1", "memory", "cc"
 	);
 }
 
@@ -261,13 +273,14 @@ static void MIX_AREAS_32(unsigned int size,
 	__asm__ __volatile__ (
 		"\n"
 
-		"\tmovl %%ebx, %7\n"	/* ebx is GOT pointer (-fPIC) */
+		/* ebx is GOT pointer (-fPIC) */
+		GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",)
 		/*
 		 *  initialization, load ESI, EDI, EBX registers
 		 */
-		"\tmovl %1, %%edi\n"
-		"\tmovl %2, %%esi\n"
-		"\tmovl %3, %%ebx\n"
+		"\tmovl %2, %%edi\n"
+		"\tmovl %3, %%esi\n"
+		"\tmovl %4, %%ebx\n"
 		"\tcmpl $0, %0\n"
 		"\tjnz 1f\n"
 		"\tjmp 6f\n"
@@ -337,19 +350,20 @@ static void MIX_AREAS_32(unsigned int size,
 		 */
 		"\tdecl %0\n"
 		"\tjz 6f\n"
-		"\tadd %4, %%edi\n"
-		"\tadd %5, %%esi\n"
-		"\tadd %6, %%ebx\n"
+		"\tadd %5, %%edi\n"
+		"\tadd %6, %%esi\n"
+		"\tadd %7, %%ebx\n"
 		"\tjmp 1b\n"
-		
-		"6:"
-		"\tmovl %7, %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
 
-		: /* no output regs */
-		: "m" (size), "m" (dst), "m" (src),
-		  "m" (sum), "m" (dst_step), "m" (src_step),
-		  "m" (sum_step), "m" (old_ebx)
-		: "esi", "edi", "edx", "ecx", "eax"
+		"6:"
+		/* ebx is GOT pointer (-fPIC) */
+		GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",)
+
+		: "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx)
+		: "m" (dst), "m" (src), "m" (sum),
+		  "im" (dst_step), "im" (src_step), "im" (sum_step)
+		: "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) "eax",
+		  "memory", "cc"
 	);
 }
 
@@ -374,13 +388,14 @@ static void MIX_AREAS_24(unsigned int size,
 	__asm__ __volatile__ (
 		"\n"
 
-		"\tmovl %%ebx, %7\n"	/* ebx is GOT pointer (-fPIC) */
+		/* ebx is GOT pointer (-fPIC) */
+		GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",)
 		/*
 		 *  initialization, load ESI, EDI, EBX registers
 		 */
-		"\tmovl %1, %%edi\n"
-		"\tmovl %2, %%esi\n"
-		"\tmovl %3, %%ebx\n"
+		"\tmovl %2, %%edi\n"
+		"\tmovl %3, %%esi\n"
+		"\tmovl %4, %%ebx\n"
 		"\tcmpl $0, %0\n"
 		"\tjnz 1f\n"
 		"\tjmp 6f\n"
@@ -443,19 +458,20 @@ static void MIX_AREAS_24(unsigned int size,
 		 */
 		"\tdecl %0\n"
 		"\tjz 6f\n"
-		"\tadd %4, %%edi\n"
-		"\tadd %5, %%esi\n"
-		"\tadd %6, %%ebx\n"
+		"\tadd %5, %%edi\n"
+		"\tadd %6, %%esi\n"
+		"\tadd %7, %%ebx\n"
 		"\tjmp 1b\n"
-		
-		"6:"
-		"\tmovl %7, %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
 
-		: /* no output regs */
-		: "m" (size), "m" (dst), "m" (src),
-		  "m" (sum), "m" (dst_step), "m" (src_step),
-		  "m" (sum_step), "m" (old_ebx)
-		: "esi", "edi", "edx", "ecx", "eax"
+		"6:"
+		/* ebx is GOT pointer (-fPIC) */
+		GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",)
+
+		: "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx)
+		: "m" (dst), "m" (src), "m" (sum),
+		  "im" (dst_step), "im" (src_step), "im" (sum_step)
+		: "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) "eax",
+		  "memory", "cc"
 	);
 }
 
@@ -480,13 +496,14 @@ static void MIX_AREAS_24_CMOV(unsigned int size,
 	__asm__ __volatile__ (
 		"\n"
 
-		"\tmovl %%ebx, %7\n"	/* ebx is GOT pointer (-fPIC) */
+		/* ebx is GOT pointer (-fPIC) */
+		GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",)
 		/*
 		 *  initialization, load ESI, EDI, EBX registers
 		 */
-		"\tmovl %1, %%edi\n"
-		"\tmovl %2, %%esi\n"
-		"\tmovl %3, %%ebx\n"
+		"\tmovl %2, %%edi\n"
+		"\tmovl %3, %%esi\n"
+		"\tmovl %4, %%ebx\n"
 		"\tcmpl $0, %0\n"
 		"\tjz 6f\n"
 
@@ -541,19 +558,20 @@ static void MIX_AREAS_24_CMOV(unsigned int size,
 		/*
 		 * while (size-- > 0)
 		 */
-		"\tadd %4, %%edi\n"
-		"\tadd %5, %%esi\n"
-		"\tadd %6, %%ebx\n"
+		"\tadd %5, %%edi\n"
+		"\tadd %6, %%esi\n"
+		"\tadd %7, %%ebx\n"
 		"\tdecl %0\n"
 		"\tjnz 1b\n"
-		
-		"6:"
-		"\tmovl %7, %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
 
-		: /* no output regs */
-		: "m" (size), "m" (dst), "m" (src),
-		  "m" (sum), "m" (dst_step), "m" (src_step),
-		  "m" (sum_step), "m" (old_ebx)
-		: "esi", "edi", "edx", "ecx", "eax"
+		"6:"
+		/* ebx is GOT pointer (-fPIC) */
+		GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",)
+
+		: "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx)
+		: "m" (dst), "m" (src), "m" (sum),
+		  "im" (dst_step), "im" (src_step), "im" (sum_step)
+		: "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) "eax",
+		  "memory", "cc"
 	);
 }
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

* Re: [PATCH] [inline assembly] fix pcm_dmix_i386.h assembly chunk interfaces
  2020-04-27  7:36 [PATCH] [inline assembly] fix pcm_dmix_i386.h assembly chunk interfaces frederic.recoules
@ 2020-04-27 13:47 ` Takashi Iwai
       [not found]   ` <1871992915.6898305.1587998132827.JavaMail.zimbra@univ-grenoble-alpes.fr>
  0 siblings, 1 reply; 10+ messages in thread
From: Takashi Iwai @ 2020-04-27 13:47 UTC (permalink / raw)
  To: frederic.recoules; +Cc: alsa-devel, Frédéric Recoules

On Mon, 27 Apr 2020 09:36:04 +0200,
frederic.recoules@univ-grenoble-alpes.fr wrote:
> 
> From: Frédéric Recoules <frederic.recoules@orange.fr>
> 
> Main changes are:
>   - move 'size' and 'old_ebx' to the output list since they are clobbered
>   - add the "memory" keyword since input pointers are dereferenced
>   - add mmx registers in the clobber list and add an initialization for mm1
>   - add ebx in clobbers via a set of macro when GCC is newer than 5.0
>     (it will work for other compilers or non-PIC mode too)
> 
> Minor changes are:
>   - keep consistent the token numbering in the template
>   - remove the manual save/restore ebx when it is in the clobber list
>   - allows 'dst_step', 'src_step' and 'sum_step' to be given by immediates
>   - allows 'size' to be given by register (e.g. ebp)
>   - add "cc" keyword since the eflag register is clobbered
> 
> Signed-off-by: Frédéric Recoules <frederic.recoules@orange.fr>

When I apply this and build for i386 with gcc9, I got the following
error:
 pcm_dmix_i386.h: In function 'mix_areas_16_mmx':
 pcm_dmix_i386.h:180:2: error: unknown register name 'mm1' in 'asm'
   180 |  __asm__ __volatile__ (
       |  ^~~~~~~
 pcm_dmix_i386.h:180:2: error: unknown register name 'mm0' in 'asm'
 In file included from pcm_dmix_i386.c:31,
                  from pcm_dmix.c:144:
 pcm_dmix_i386.h: In function 'remix_areas_16_mmx':
 pcm_dmix_i386.h:180:2: error: unknown register name 'mm1' in 'asm'
   180 |  __asm__ __volatile__ (
       |  ^~~~~~~
....

Could you check those errors?


thanks,

Takashi

> ---
>  src/pcm/pcm_dmix_i386.h | 168 ++++++++++++++++++++++------------------
>  1 file changed, 93 insertions(+), 75 deletions(-)
> 
> diff --git a/src/pcm/pcm_dmix_i386.h b/src/pcm/pcm_dmix_i386.h
> index 2778cb1d..af2f4630 100644
> --- a/src/pcm/pcm_dmix_i386.h
> +++ b/src/pcm/pcm_dmix_i386.h
> @@ -26,6 +26,13 @@
>   *
>   */
>  
> +#define COMMA ,
> +#if __GNUC__ < 5 && defined(__PIC__)
> +#  define GCC_PIC_SWITCH(before,after) before
> +#else
> +#  define GCC_PIC_SWITCH(before,after) after
> +#endif
> +
>  /*
>   *  for plain i386
>   */
> @@ -47,13 +54,14 @@ static void MIX_AREAS_16(unsigned int size,
>  	__asm__ __volatile__ (
>  		"\n"
>  
> -		"\tmovl %%ebx, %7\n"	/* ebx is GOT pointer (-fPIC) */
> +		/* ebx is GOT pointer (-fPIC) */
> +		GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",)
>  		/*
>  		 *  initialization, load ESI, EDI, EBX registers
>  		 */
> -		"\tmovl %1, %%edi\n"
> -		"\tmovl %2, %%esi\n"
> -		"\tmovl %3, %%ebx\n"
> +		"\tmovl %2, %%edi\n"
> +		"\tmovl %3, %%esi\n"
> +		"\tmovl %4, %%ebx\n"
>  		"\tcmpl $0, %0\n"
>  		"\tjnz 2f\n"
>  		"\tjmp 7f\n"
> @@ -64,9 +72,9 @@ static void MIX_AREAS_16(unsigned int size,
>  		 */
>  		"\t.p2align 4,,15\n"
>  		"1:"
> -		"\tadd %4, %%edi\n"
> -		"\tadd %5, %%esi\n"
> -		"\tadd %6, %%ebx\n"
> +		"\tadd %5, %%edi\n"
> +		"\tadd %6, %%esi\n"
> +		"\tadd %7, %%ebx\n"
>  
>  		/*
>  		 *   sample = *src;
> @@ -138,15 +146,16 @@ static void MIX_AREAS_16(unsigned int size,
>  		"\tjnz 4b\n"
>  		"\tdecl %0\n"
>  		"\tjnz 1b\n"
> -		
> -		"7:"
> -		"\tmovl %7, %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
>  
> -		: /* no output regs */
> -		: "m" (size), "m" (dst), "m" (src),
> -		  "m" (sum), "m" (dst_step), "m" (src_step),
> -		  "m" (sum_step), "m" (old_ebx)
> -		: "esi", "edi", "edx", "ecx", "eax"
> +		"7:"
> +		/* ebx is GOT pointer (-fPIC) */
> +		GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",)
> +
> +		: "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx)
> +		: "m" (dst), "m" (src), "m" (sum),
> +		  "im" (dst_step), "im" (src_step), "im" (sum_step)
> +		: "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) "eax",
> +		  "memory", "cc"
>  	);
>  }
>  
> @@ -171,22 +180,24 @@ static void MIX_AREAS_16_MMX(unsigned int size,
>  	__asm__ __volatile__ (
>  		"\n"
>  
> -		"\tmovl %%ebx, %7\n"	/* ebx is GOT pointer (-fPIC) */
> +		/* ebx is GOT pointer (-fPIC) */
> +		GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",)
>  		/*
> -		 *  initialization, load ESI, EDI, EBX registers
> +		 *  initialization, load ESI, EDI, EBX registers, clear MM1
>  		 */
> -		"\tmovl %1, %%edi\n"
> -		"\tmovl %2, %%esi\n"
> -		"\tmovl %3, %%ebx\n"
> +		"\tpxor %%mm1, %%mm1\n"
> +		"\tmovl %2, %%edi\n"
> +		"\tmovl %3, %%esi\n"
> +		"\tmovl %4, %%ebx\n"
>  		"\tcmpl $0, %0\n"
>  		"\tjnz 2f\n"
>  		"\tjmp 5f\n"
>  
>  		"\t.p2align 4,,15\n"
>  		"1:"
> -		"\tadd %4, %%edi\n"
> -		"\tadd %5, %%esi\n"
> -		"\tadd %6, %%ebx\n"
> +		"\tadd %5, %%edi\n"
> +		"\tadd %6, %%esi\n"
> +		"\tadd %7, %%ebx\n"
>  
>  		"2:"
>  		/*
> @@ -230,13 +241,14 @@ static void MIX_AREAS_16_MMX(unsigned int size,
>  		"\tjnz 1b\n"
>  		"\temms\n"
>                  "5:"
> -		"\tmovl %7, %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
> -
> -		: /* no output regs */
> -		: "m" (size), "m" (dst), "m" (src),
> -		  "m" (sum), "m" (dst_step), "m" (src_step),
> -		  "m" (sum_step), "m" (old_ebx)
> -		: "esi", "edi", "edx", "ecx", "eax"
> +		/* ebx is GOT pointer (-fPIC) */
> +		GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",)
> +
> +		: "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx)
> +		: "m" (dst), "m" (src), "m" (sum),
> +		  "im" (dst_step), "im" (src_step), "im" (sum_step)
> +		: "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) "eax",
> +		  "mm0", "mm1", "memory", "cc"
>  	);
>  }
>  
> @@ -261,13 +273,14 @@ static void MIX_AREAS_32(unsigned int size,
>  	__asm__ __volatile__ (
>  		"\n"
>  
> -		"\tmovl %%ebx, %7\n"	/* ebx is GOT pointer (-fPIC) */
> +		/* ebx is GOT pointer (-fPIC) */
> +		GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",)
>  		/*
>  		 *  initialization, load ESI, EDI, EBX registers
>  		 */
> -		"\tmovl %1, %%edi\n"
> -		"\tmovl %2, %%esi\n"
> -		"\tmovl %3, %%ebx\n"
> +		"\tmovl %2, %%edi\n"
> +		"\tmovl %3, %%esi\n"
> +		"\tmovl %4, %%ebx\n"
>  		"\tcmpl $0, %0\n"
>  		"\tjnz 1f\n"
>  		"\tjmp 6f\n"
> @@ -337,19 +350,20 @@ static void MIX_AREAS_32(unsigned int size,
>  		 */
>  		"\tdecl %0\n"
>  		"\tjz 6f\n"
> -		"\tadd %4, %%edi\n"
> -		"\tadd %5, %%esi\n"
> -		"\tadd %6, %%ebx\n"
> +		"\tadd %5, %%edi\n"
> +		"\tadd %6, %%esi\n"
> +		"\tadd %7, %%ebx\n"
>  		"\tjmp 1b\n"
> -		
> -		"6:"
> -		"\tmovl %7, %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
>  
> -		: /* no output regs */
> -		: "m" (size), "m" (dst), "m" (src),
> -		  "m" (sum), "m" (dst_step), "m" (src_step),
> -		  "m" (sum_step), "m" (old_ebx)
> -		: "esi", "edi", "edx", "ecx", "eax"
> +		"6:"
> +		/* ebx is GOT pointer (-fPIC) */
> +		GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",)
> +
> +		: "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx)
> +		: "m" (dst), "m" (src), "m" (sum),
> +		  "im" (dst_step), "im" (src_step), "im" (sum_step)
> +		: "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) "eax",
> +		  "memory", "cc"
>  	);
>  }
>  
> @@ -374,13 +388,14 @@ static void MIX_AREAS_24(unsigned int size,
>  	__asm__ __volatile__ (
>  		"\n"
>  
> -		"\tmovl %%ebx, %7\n"	/* ebx is GOT pointer (-fPIC) */
> +		/* ebx is GOT pointer (-fPIC) */
> +		GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",)
>  		/*
>  		 *  initialization, load ESI, EDI, EBX registers
>  		 */
> -		"\tmovl %1, %%edi\n"
> -		"\tmovl %2, %%esi\n"
> -		"\tmovl %3, %%ebx\n"
> +		"\tmovl %2, %%edi\n"
> +		"\tmovl %3, %%esi\n"
> +		"\tmovl %4, %%ebx\n"
>  		"\tcmpl $0, %0\n"
>  		"\tjnz 1f\n"
>  		"\tjmp 6f\n"
> @@ -443,19 +458,20 @@ static void MIX_AREAS_24(unsigned int size,
>  		 */
>  		"\tdecl %0\n"
>  		"\tjz 6f\n"
> -		"\tadd %4, %%edi\n"
> -		"\tadd %5, %%esi\n"
> -		"\tadd %6, %%ebx\n"
> +		"\tadd %5, %%edi\n"
> +		"\tadd %6, %%esi\n"
> +		"\tadd %7, %%ebx\n"
>  		"\tjmp 1b\n"
> -		
> -		"6:"
> -		"\tmovl %7, %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
>  
> -		: /* no output regs */
> -		: "m" (size), "m" (dst), "m" (src),
> -		  "m" (sum), "m" (dst_step), "m" (src_step),
> -		  "m" (sum_step), "m" (old_ebx)
> -		: "esi", "edi", "edx", "ecx", "eax"
> +		"6:"
> +		/* ebx is GOT pointer (-fPIC) */
> +		GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",)
> +
> +		: "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx)
> +		: "m" (dst), "m" (src), "m" (sum),
> +		  "im" (dst_step), "im" (src_step), "im" (sum_step)
> +		: "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) "eax",
> +		  "memory", "cc"
>  	);
>  }
>  
> @@ -480,13 +496,14 @@ static void MIX_AREAS_24_CMOV(unsigned int size,
>  	__asm__ __volatile__ (
>  		"\n"
>  
> -		"\tmovl %%ebx, %7\n"	/* ebx is GOT pointer (-fPIC) */
> +		/* ebx is GOT pointer (-fPIC) */
> +		GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",)
>  		/*
>  		 *  initialization, load ESI, EDI, EBX registers
>  		 */
> -		"\tmovl %1, %%edi\n"
> -		"\tmovl %2, %%esi\n"
> -		"\tmovl %3, %%ebx\n"
> +		"\tmovl %2, %%edi\n"
> +		"\tmovl %3, %%esi\n"
> +		"\tmovl %4, %%ebx\n"
>  		"\tcmpl $0, %0\n"
>  		"\tjz 6f\n"
>  
> @@ -541,19 +558,20 @@ static void MIX_AREAS_24_CMOV(unsigned int size,
>  		/*
>  		 * while (size-- > 0)
>  		 */
> -		"\tadd %4, %%edi\n"
> -		"\tadd %5, %%esi\n"
> -		"\tadd %6, %%ebx\n"
> +		"\tadd %5, %%edi\n"
> +		"\tadd %6, %%esi\n"
> +		"\tadd %7, %%ebx\n"
>  		"\tdecl %0\n"
>  		"\tjnz 1b\n"
> -		
> -		"6:"
> -		"\tmovl %7, %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
>  
> -		: /* no output regs */
> -		: "m" (size), "m" (dst), "m" (src),
> -		  "m" (sum), "m" (dst_step), "m" (src_step),
> -		  "m" (sum_step), "m" (old_ebx)
> -		: "esi", "edi", "edx", "ecx", "eax"
> +		"6:"
> +		/* ebx is GOT pointer (-fPIC) */
> +		GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",)
> +
> +		: "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx)
> +		: "m" (dst), "m" (src), "m" (sum),
> +		  "im" (dst_step), "im" (src_step), "im" (sum_step)
> +		: "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) "eax",
> +		  "memory", "cc"
>  	);
>  }
> -- 
> 2.17.1
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] [inline assembly] fix pcm_dmix_i386.h assembly chunk interfaces
       [not found]   ` <1871992915.6898305.1587998132827.JavaMail.zimbra@univ-grenoble-alpes.fr>
@ 2020-04-27 15:12     ` Takashi Iwai
  2020-04-27 16:00       ` FRÉDÉRIC RECOULES
  0 siblings, 1 reply; 10+ messages in thread
From: Takashi Iwai @ 2020-04-27 15:12 UTC (permalink / raw)
  To: FRÉDÉRIC RECOULES; +Cc: alsa-devel

Please don't drop Cc to ML.

And about the comments:

On Mon, 27 Apr 2020 16:35:32 +0200,
FRÉDÉRIC RECOULES wrote:
> 
> I wrongly assumed that the option -mmmx was passed when compiling (re)
> mix_areas_16_mmx.
> I know how to fix it (inspired of ffmpeg):
>  - the configure test if mmx is supported by the compiler option and set a
> macro HAVE_MMX accordingly (maybe something already exist?).

Actually the mmx support isn't about whether the compiler supports it
or not.  With inline asm, the MMX-enabled code is always included in
alsa-lib as well as the code without MMX, then which one to be
executed is dynamically switched at runtime by probing the CPU
capability.


thanks,

Takashi

>  - declare a macro MMX_CLOBBERS(list) that will output the list when HAVE_MMX
> is true and nothing otherwise.
> I will resubmit a patch soon.
> 
> Regards,
> Frédéric Recoules
> 
> ------------------------------------------------------------------------------
> De: "tiwai" <tiwai@suse.de>
> À: "frederic recoules" <frederic.recoules@univ-grenoble-alpes.fr>
> Cc: "alsa-devel" <alsa-devel@alsa-project.org>, "frederic recoules"
> <frederic.recoules@orange.fr>
> Envoyé: Lundi 27 Avril 2020 15:47:02
> Objet: Re: [PATCH] [inline assembly] fix pcm_dmix_i386.h assembly chunk
> interfaces
> 
> On Mon, 27 Apr 2020 09:36:04 +0200,
> frederic.recoules@univ-grenoble-alpes.fr wrote:
> >
> > From: Frédéric Recoules <frederic.recoules@orange.fr>
> >
> > Main changes are:
> >   - move 'size' and 'old_ebx' to the output list since they are clobbered
> >   - add the "memory" keyword since input pointers are dereferenced
> >   - add mmx registers in the clobber list and add an initialization for mm1
> >   - add ebx in clobbers via a set of macro when GCC is newer than 5.0
> >     (it will work for other compilers or non-PIC mode too)
> >
> > Minor changes are:
> >   - keep consistent the token numbering in the template
> >   - remove the manual save/restore ebx when it is in the clobber list
> >   - allows 'dst_step', 'src_step' and 'sum_step' to be given by immediates
> >   - allows 'size' to be given by register (e.g. ebp)
> >   - add "cc" keyword since the eflag register is clobbered
> >
> > Signed-off-by: Frédéric Recoules <frederic.recoules@orange.fr>
> 
> When I apply this and build for i386 with gcc9, I got the following
> error:
>  pcm_dmix_i386.h: In function 'mix_areas_16_mmx':
>  pcm_dmix_i386.h:180:2: error: unknown register name 'mm1' in 'asm'
>    180 |  __asm__ __volatile__ (
>        |  ^~~~~~~
>  pcm_dmix_i386.h:180:2: error: unknown register name 'mm0' in 'asm'
>  In file included from pcm_dmix_i386.c:31,
>                   from pcm_dmix.c:144:
>  pcm_dmix_i386.h: In function 'remix_areas_16_mmx':
>  pcm_dmix_i386.h:180:2: error: unknown register name 'mm1' in 'asm'
>    180 |  __asm__ __volatile__ (
>        |  ^~~~~~~
> ....
> 
> Could you check those errors?
> 
> thanks,
> 
> Takashi
> 
> > ---
> >  src/pcm/pcm_dmix_i386.h | 168 ++++++++++++++++++++++------------------
> >  1 file changed, 93 insertions(+), 75 deletions(-)
> >
> > diff --git a/src/pcm/pcm_dmix_i386.h b/src/pcm/pcm_dmix_i386.h
> > index 2778cb1d..af2f4630 100644
> > --- a/src/pcm/pcm_dmix_i386.h
> > +++ b/src/pcm/pcm_dmix_i386.h
> > @@ -26,6 +26,13 @@
> >   *
> >   */
> >  
> > +#define COMMA ,
> > +#if __GNUC__ < 5 && defined(__PIC__)
> > +#  define GCC_PIC_SWITCH(before,after) before
> > +#else
> > +#  define GCC_PIC_SWITCH(before,after) after
> > +#endif
> > +
> >  /*
> >   *  for plain i386
> >   */
> > @@ -47,13 +54,14 @@ static void MIX_AREAS_16(unsigned int size,
> >          __asm__ __volatile__ (
> >                  "\n"
> >  
> > -                "\tmovl %%ebx, %7\n"        /* ebx is GOT pointer (-fPIC) *
> /
> > +                /* ebx is GOT pointer (-fPIC) */
> > +                GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",)
> >                  /*
> >                   *  initialization, load ESI, EDI, EBX registers
> >                   */
> > -                "\tmovl %1, %%edi\n"
> > -                "\tmovl %2, %%esi\n"
> > -                "\tmovl %3, %%ebx\n"
> > +                "\tmovl %2, %%edi\n"
> > +                "\tmovl %3, %%esi\n"
> > +                "\tmovl %4, %%ebx\n"
> >                  "\tcmpl $0, %0\n"
> >                  "\tjnz 2f\n"
> >                  "\tjmp 7f\n"
> > @@ -64,9 +72,9 @@ static void MIX_AREAS_16(unsigned int size,
> >                   */
> >                  "\t.p2align 4,,15\n"
> >                  "1:"
> > -                "\tadd %4, %%edi\n"
> > -                "\tadd %5, %%esi\n"
> > -                "\tadd %6, %%ebx\n"
> > +                "\tadd %5, %%edi\n"
> > +                "\tadd %6, %%esi\n"
> > +                "\tadd %7, %%ebx\n"
> >  
> >                  /*
> >                   *   sample = *src;
> > @@ -138,15 +146,16 @@ static void MIX_AREAS_16(unsigned int size,
> >                  "\tjnz 4b\n"
> >                  "\tdecl %0\n"
> >                  "\tjnz 1b\n"
> > -                
> > -                "7:"
> > -                "\tmovl %7, %%ebx\n"        /* ebx is GOT pointer (-fPIC) *
> /
> >  
> > -                : /* no output regs */
> > -                : "m" (size), "m" (dst), "m" (src),
> > -                  "m" (sum), "m" (dst_step), "m" (src_step),
> > -                  "m" (sum_step), "m" (old_ebx)
> > -                : "esi", "edi", "edx", "ecx", "eax"
> > +                "7:"
> > +                /* ebx is GOT pointer (-fPIC) */
> > +                GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",)
> > +
> > +                : "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx)
> > +                : "m" (dst), "m" (src), "m" (sum),
> > +                  "im" (dst_step), "im" (src_step), "im" (sum_step)
> > +                : "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA)
> "eax",
> > +                  "memory", "cc"
> >          );
> >  }
> >  
> > @@ -171,22 +180,24 @@ static void MIX_AREAS_16_MMX(unsigned int size,
> >          __asm__ __volatile__ (
> >                  "\n"
> >  
> > -                "\tmovl %%ebx, %7\n"        /* ebx is GOT pointer (-fPIC) *
> /
> > +                /* ebx is GOT pointer (-fPIC) */
> > +                GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",)
> >                  /*
> > -                 *  initialization, load ESI, EDI, EBX registers
> > +                 *  initialization, load ESI, EDI, EBX registers, clear MM1
> >                   */
> > -                "\tmovl %1, %%edi\n"
> > -                "\tmovl %2, %%esi\n"
> > -                "\tmovl %3, %%ebx\n"
> > +                "\tpxor %%mm1, %%mm1\n"
> > +                "\tmovl %2, %%edi\n"
> > +                "\tmovl %3, %%esi\n"
> > +                "\tmovl %4, %%ebx\n"
> >                  "\tcmpl $0, %0\n"
> >                  "\tjnz 2f\n"
> >                  "\tjmp 5f\n"
> >  
> >                  "\t.p2align 4,,15\n"
> >                  "1:"
> > -                "\tadd %4, %%edi\n"
> > -                "\tadd %5, %%esi\n"
> > -                "\tadd %6, %%ebx\n"
> > +                "\tadd %5, %%edi\n"
> > +                "\tadd %6, %%esi\n"
> > +                "\tadd %7, %%ebx\n"
> >  
> >                  "2:"
> >                  /*
> > @@ -230,13 +241,14 @@ static void MIX_AREAS_16_MMX(unsigned int size,
> >                  "\tjnz 1b\n"
> >                  "\temms\n"
> >                  "5:"
> > -                "\tmovl %7, %%ebx\n"        /* ebx is GOT pointer (-fPIC) *
> /
> > -
> > -                : /* no output regs */
> > -                : "m" (size), "m" (dst), "m" (src),
> > -                  "m" (sum), "m" (dst_step), "m" (src_step),
> > -                  "m" (sum_step), "m" (old_ebx)
> > -                : "esi", "edi", "edx", "ecx", "eax"
> > +                /* ebx is GOT pointer (-fPIC) */
> > +                GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",)
> > +
> > +                : "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx)
> > +                : "m" (dst), "m" (src), "m" (sum),
> > +                  "im" (dst_step), "im" (src_step), "im" (sum_step)
> > +                : "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA)
> "eax",
> > +                  "mm0", "mm1", "memory", "cc"
> >          );
> >  }
> >  
> > @@ -261,13 +273,14 @@ static void MIX_AREAS_32(unsigned int size,
> >          __asm__ __volatile__ (
> >                  "\n"
> >  
> > -                "\tmovl %%ebx, %7\n"        /* ebx is GOT pointer (-fPIC) *
> /
> > +                /* ebx is GOT pointer (-fPIC) */
> > +                GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",)
> >                  /*
> >                   *  initialization, load ESI, EDI, EBX registers
> >                   */
> > -                "\tmovl %1, %%edi\n"
> > -                "\tmovl %2, %%esi\n"
> > -                "\tmovl %3, %%ebx\n"
> > +                "\tmovl %2, %%edi\n"
> > +                "\tmovl %3, %%esi\n"
> > +                "\tmovl %4, %%ebx\n"
> >                  "\tcmpl $0, %0\n"
> >                  "\tjnz 1f\n"
> >                  "\tjmp 6f\n"
> > @@ -337,19 +350,20 @@ static void MIX_AREAS_32(unsigned int size,
> >                   */
> >                  "\tdecl %0\n"
> >                  "\tjz 6f\n"
> > -                "\tadd %4, %%edi\n"
> > -                "\tadd %5, %%esi\n"
> > -                "\tadd %6, %%ebx\n"
> > +                "\tadd %5, %%edi\n"
> > +                "\tadd %6, %%esi\n"
> > +                "\tadd %7, %%ebx\n"
> >                  "\tjmp 1b\n"
> > -                
> > -                "6:"
> > -                "\tmovl %7, %%ebx\n"        /* ebx is GOT pointer (-fPIC) *
> /
> >  
> > -                : /* no output regs */
> > -                : "m" (size), "m" (dst), "m" (src),
> > -                  "m" (sum), "m" (dst_step), "m" (src_step),
> > -                  "m" (sum_step), "m" (old_ebx)
> > -                : "esi", "edi", "edx", "ecx", "eax"
> > +                "6:"
> > +                /* ebx is GOT pointer (-fPIC) */
> > +                GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",)
> > +
> > +                : "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx)
> > +                : "m" (dst), "m" (src), "m" (sum),
> > +                  "im" (dst_step), "im" (src_step), "im" (sum_step)
> > +                : "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA)
> "eax",
> > +                  "memory", "cc"
> >          );
> >  }
> >  
> > @@ -374,13 +388,14 @@ static void MIX_AREAS_24(unsigned int size,
> >          __asm__ __volatile__ (
> >                  "\n"
> >  
> > -                "\tmovl %%ebx, %7\n"        /* ebx is GOT pointer (-fPIC) *
> /
> > +                /* ebx is GOT pointer (-fPIC) */
> > +                GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",)
> >                  /*
> >                   *  initialization, load ESI, EDI, EBX registers
> >                   */
> > -                "\tmovl %1, %%edi\n"
> > -                "\tmovl %2, %%esi\n"
> > -                "\tmovl %3, %%ebx\n"
> > +                "\tmovl %2, %%edi\n"
> > +                "\tmovl %3, %%esi\n"
> > +                "\tmovl %4, %%ebx\n"
> >                  "\tcmpl $0, %0\n"
> >                  "\tjnz 1f\n"
> >                  "\tjmp 6f\n"
> > @@ -443,19 +458,20 @@ static void MIX_AREAS_24(unsigned int size,
> >                   */
> >                  "\tdecl %0\n"
> >                  "\tjz 6f\n"
> > -                "\tadd %4, %%edi\n"
> > -                "\tadd %5, %%esi\n"
> > -                "\tadd %6, %%ebx\n"
> > +                "\tadd %5, %%edi\n"
> > +                "\tadd %6, %%esi\n"
> > +                "\tadd %7, %%ebx\n"
> >                  "\tjmp 1b\n"
> > -                
> > -                "6:"
> > -                "\tmovl %7, %%ebx\n"        /* ebx is GOT pointer (-fPIC) *
> /
> >  
> > -                : /* no output regs */
> > -                : "m" (size), "m" (dst), "m" (src),
> > -                  "m" (sum), "m" (dst_step), "m" (src_step),
> > -                  "m" (sum_step), "m" (old_ebx)
> > -                : "esi", "edi", "edx", "ecx", "eax"
> > +                "6:"
> > +                /* ebx is GOT pointer (-fPIC) */
> > +                GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",)
> > +
> > +                : "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx)
> > +                : "m" (dst), "m" (src), "m" (sum),
> > +                  "im" (dst_step), "im" (src_step), "im" (sum_step)
> > +                : "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA)
> "eax",
> > +                  "memory", "cc"
> >          );
> >  }
> >  
> > @@ -480,13 +496,14 @@ static void MIX_AREAS_24_CMOV(unsigned int size,
> >          __asm__ __volatile__ (
> >                  "\n"
> >  
> > -                "\tmovl %%ebx, %7\n"        /* ebx is GOT pointer (-fPIC) *
> /
> > +                /* ebx is GOT pointer (-fPIC) */
> > +                GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",)
> >                  /*
> >                   *  initialization, load ESI, EDI, EBX registers
> >                   */
> > -                "\tmovl %1, %%edi\n"
> > -                "\tmovl %2, %%esi\n"
> > -                "\tmovl %3, %%ebx\n"
> > +                "\tmovl %2, %%edi\n"
> > +                "\tmovl %3, %%esi\n"
> > +                "\tmovl %4, %%ebx\n"
> >                  "\tcmpl $0, %0\n"
> >                  "\tjz 6f\n"
> >  
> > @@ -541,19 +558,20 @@ static void MIX_AREAS_24_CMOV(unsigned int size,
> >                  /*
> >                   * while (size-- > 0)
> >                   */
> > -                "\tadd %4, %%edi\n"
> > -                "\tadd %5, %%esi\n"
> > -                "\tadd %6, %%ebx\n"
> > +                "\tadd %5, %%edi\n"
> > +                "\tadd %6, %%esi\n"
> > +                "\tadd %7, %%ebx\n"
> >                  "\tdecl %0\n"
> >                  "\tjnz 1b\n"
> > -                
> > -                "6:"
> > -                "\tmovl %7, %%ebx\n"        /* ebx is GOT pointer (-fPIC) *
> /
> >  
> > -                : /* no output regs */
> > -                : "m" (size), "m" (dst), "m" (src),
> > -                  "m" (sum), "m" (dst_step), "m" (src_step),
> > -                  "m" (sum_step), "m" (old_ebx)
> > -                : "esi", "edi", "edx", "ecx", "eax"
> > +                "6:"
> > +                /* ebx is GOT pointer (-fPIC) */
> > +                GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",)
> > +
> > +                : "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx)
> > +                : "m" (dst), "m" (src), "m" (sum),
> > +                  "im" (dst_step), "im" (src_step), "im" (sum_step)
> > +                : "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA)
> "eax",
> > +                  "memory", "cc"
> >          );
> >  }
> > --
> > 2.17.1
> >
> 
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] [inline assembly] fix pcm_dmix_i386.h assembly chunk interfaces
  2020-04-27 15:12     ` Takashi Iwai
@ 2020-04-27 16:00       ` FRÉDÉRIC RECOULES
  0 siblings, 0 replies; 10+ messages in thread
From: FRÉDÉRIC RECOULES @ 2020-04-27 16:00 UTC (permalink / raw)
  To: tiwai; +Cc: alsa-devel

> Actually the mmx support isn't about whether the compiler supports it 
> or not. With inline asm, the MMX-enabled code is always included in 
> alsa-lib as well as the code without MMX, then which one to be 
> executed is dynamically switched at runtime by probing the CPU 
> capability. 

Yes and the problem is that if MMX technology is enabled, the 
compiler could technically choose to store some data in these registers; 
data that will be overwritten by the chunk. 
The role of the clobber list is to inform the compiler that these registers 
are used and modified. 

If the MMX technology is not enabled, the compiler will not use them 
anyway, so there is no need to put them in clobbers. 
However (this is the beauty of x86...), they will clobber x87 floating 
point registers so, I would say: 
if HAVE_MMX -> "mm0" , "mm1" 
else -> "st" , "st(1)" , "st(2)" , "st(3)" , "st(4)" , "st(5)" , "st(6)" , "st(7)"

Regards,
Frédéric Recoules

----- Mail original -----
De: "tiwai" <tiwai@suse.de>
À: "FRÉDÉRIC RECOULES" <frederic.recoules@univ-grenoble-alpes.fr>
Cc: "alsa-devel" <alsa-devel@alsa-project.org>
Envoyé: Lundi 27 Avril 2020 17:12:28
Objet: Re: [PATCH] [inline assembly] fix pcm_dmix_i386.h assembly chunk interfaces

Please don't drop Cc to ML. 

And about the comments: 

On Mon, 27 Apr 2020 16:35:32 +0200, 
FRÉDÉRIC RECOULES wrote: 
> 
> I wrongly assumed that the option -mmmx was passed when compiling (re) 
> mix_areas_16_mmx. 
> I know how to fix it (inspired of ffmpeg): 
> - the configure test if mmx is supported by the compiler option and set a 
> macro HAVE_MMX accordingly (maybe something already exist?). 

Actually the mmx support isn't about whether the compiler supports it 
or not. With inline asm, the MMX-enabled code is always included in 
alsa-lib as well as the code without MMX, then which one to be 
executed is dynamically switched at runtime by probing the CPU 
capability. 


thanks, 

Takashi 

> - declare a macro MMX_CLOBBERS(list) that will output the list when HAVE_MMX 
> is true and nothing otherwise. 
> I will resubmit a patch soon. 
> 
> Regards, 
> Frédéric Recoules 
> 
> ------------------------------------------------------------------------------ 
> De: "tiwai" <tiwai@suse.de> 
> À: "frederic recoules" <frederic.recoules@univ-grenoble-alpes.fr> 
> Cc: "alsa-devel" <alsa-devel@alsa-project.org>, "frederic recoules" 
> <frederic.recoules@orange.fr> 
> Envoyé: Lundi 27 Avril 2020 15:47:02 
> Objet: Re: [PATCH] [inline assembly] fix pcm_dmix_i386.h assembly chunk 
> interfaces 
> 
> On Mon, 27 Apr 2020 09:36:04 +0200, 
> frederic.recoules@univ-grenoble-alpes.fr wrote: 
> > 
> > From: Frédéric Recoules <frederic.recoules@orange.fr> 
> > 
> > Main changes are: 
> > - move 'size' and 'old_ebx' to the output list since they are clobbered 
> > - add the "memory" keyword since input pointers are dereferenced 
> > - add mmx registers in the clobber list and add an initialization for mm1 
> > - add ebx in clobbers via a set of macro when GCC is newer than 5.0 
> > (it will work for other compilers or non-PIC mode too) 
> > 
> > Minor changes are: 
> > - keep consistent the token numbering in the template 
> > - remove the manual save/restore ebx when it is in the clobber list 
> > - allows 'dst_step', 'src_step' and 'sum_step' to be given by immediates 
> > - allows 'size' to be given by register (e.g. ebp) 
> > - add "cc" keyword since the eflag register is clobbered 
> > 
> > Signed-off-by: Frédéric Recoules <frederic.recoules@orange.fr> 
> 
> When I apply this and build for i386 with gcc9, I got the following 
> error: 
> pcm_dmix_i386.h: In function 'mix_areas_16_mmx': 
> pcm_dmix_i386.h:180:2: error: unknown register name 'mm1' in 'asm' 
> 180 | __asm__ __volatile__ ( 
> | ^~~~~~~ 
> pcm_dmix_i386.h:180:2: error: unknown register name 'mm0' in 'asm' 
> In file included from pcm_dmix_i386.c:31, 
> from pcm_dmix.c:144: 
> pcm_dmix_i386.h: In function 'remix_areas_16_mmx': 
> pcm_dmix_i386.h:180:2: error: unknown register name 'mm1' in 'asm' 
> 180 | __asm__ __volatile__ ( 
> | ^~~~~~~ 
> .... 
> 
> Could you check those errors? 
> 
> thanks, 
> 
> Takashi 
> 
> > --- 
> > src/pcm/pcm_dmix_i386.h | 168 ++++++++++++++++++++++------------------ 
> > 1 file changed, 93 insertions(+), 75 deletions(-) 
> > 
> > diff --git a/src/pcm/pcm_dmix_i386.h b/src/pcm/pcm_dmix_i386.h 
> > index 2778cb1d..af2f4630 100644 
> > --- a/src/pcm/pcm_dmix_i386.h 
> > +++ b/src/pcm/pcm_dmix_i386.h 
> > @@ -26,6 +26,13 @@ 
> > * 
> > */ 
> > 
> > +#define COMMA , 
> > +#if __GNUC__ < 5 && defined(__PIC__) 
> > +# define GCC_PIC_SWITCH(before,after) before 
> > +#else 
> > +# define GCC_PIC_SWITCH(before,after) after 
> > +#endif 
> > + 
> > /* 
> > * for plain i386 
> > */ 
> > @@ -47,13 +54,14 @@ static void MIX_AREAS_16(unsigned int size, 
> > __asm__ __volatile__ ( 
> > "\n" 
> > 
> > - "\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) * 
> / 
> > + /* ebx is GOT pointer (-fPIC) */ 
> > + GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",) 
> > /* 
> > * initialization, load ESI, EDI, EBX registers 
> > */ 
> > - "\tmovl %1, %%edi\n" 
> > - "\tmovl %2, %%esi\n" 
> > - "\tmovl %3, %%ebx\n" 
> > + "\tmovl %2, %%edi\n" 
> > + "\tmovl %3, %%esi\n" 
> > + "\tmovl %4, %%ebx\n" 
> > "\tcmpl $0, %0\n" 
> > "\tjnz 2f\n" 
> > "\tjmp 7f\n" 
> > @@ -64,9 +72,9 @@ static void MIX_AREAS_16(unsigned int size, 
> > */ 
> > "\t.p2align 4,,15\n" 
> > "1:" 
> > - "\tadd %4, %%edi\n" 
> > - "\tadd %5, %%esi\n" 
> > - "\tadd %6, %%ebx\n" 
> > + "\tadd %5, %%edi\n" 
> > + "\tadd %6, %%esi\n" 
> > + "\tadd %7, %%ebx\n" 
> > 
> > /* 
> > * sample = *src; 
> > @@ -138,15 +146,16 @@ static void MIX_AREAS_16(unsigned int size, 
> > "\tjnz 4b\n" 
> > "\tdecl %0\n" 
> > "\tjnz 1b\n" 
> > - 
> > - "7:" 
> > - "\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) * 
> / 
> > 
> > - : /* no output regs */ 
> > - : "m" (size), "m" (dst), "m" (src), 
> > - "m" (sum), "m" (dst_step), "m" (src_step), 
> > - "m" (sum_step), "m" (old_ebx) 
> > - : "esi", "edi", "edx", "ecx", "eax" 
> > + "7:" 
> > + /* ebx is GOT pointer (-fPIC) */ 
> > + GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",) 
> > + 
> > + : "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx) 
> > + : "m" (dst), "m" (src), "m" (sum), 
> > + "im" (dst_step), "im" (src_step), "im" (sum_step) 
> > + : "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) 
> "eax", 
> > + "memory", "cc" 
> > ); 
> > } 
> > 
> > @@ -171,22 +180,24 @@ static void MIX_AREAS_16_MMX(unsigned int size, 
> > __asm__ __volatile__ ( 
> > "\n" 
> > 
> > - "\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) * 
> / 
> > + /* ebx is GOT pointer (-fPIC) */ 
> > + GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",) 
> > /* 
> > - * initialization, load ESI, EDI, EBX registers 
> > + * initialization, load ESI, EDI, EBX registers, clear MM1 
> > */ 
> > - "\tmovl %1, %%edi\n" 
> > - "\tmovl %2, %%esi\n" 
> > - "\tmovl %3, %%ebx\n" 
> > + "\tpxor %%mm1, %%mm1\n" 
> > + "\tmovl %2, %%edi\n" 
> > + "\tmovl %3, %%esi\n" 
> > + "\tmovl %4, %%ebx\n" 
> > "\tcmpl $0, %0\n" 
> > "\tjnz 2f\n" 
> > "\tjmp 5f\n" 
> > 
> > "\t.p2align 4,,15\n" 
> > "1:" 
> > - "\tadd %4, %%edi\n" 
> > - "\tadd %5, %%esi\n" 
> > - "\tadd %6, %%ebx\n" 
> > + "\tadd %5, %%edi\n" 
> > + "\tadd %6, %%esi\n" 
> > + "\tadd %7, %%ebx\n" 
> > 
> > "2:" 
> > /* 
> > @@ -230,13 +241,14 @@ static void MIX_AREAS_16_MMX(unsigned int size, 
> > "\tjnz 1b\n" 
> > "\temms\n" 
> > "5:" 
> > - "\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) * 
> / 
> > - 
> > - : /* no output regs */ 
> > - : "m" (size), "m" (dst), "m" (src), 
> > - "m" (sum), "m" (dst_step), "m" (src_step), 
> > - "m" (sum_step), "m" (old_ebx) 
> > - : "esi", "edi", "edx", "ecx", "eax" 
> > + /* ebx is GOT pointer (-fPIC) */ 
> > + GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",) 
> > + 
> > + : "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx) 
> > + : "m" (dst), "m" (src), "m" (sum), 
> > + "im" (dst_step), "im" (src_step), "im" (sum_step) 
> > + : "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) 
> "eax", 
> > + "mm0", "mm1", "memory", "cc" 
> > ); 
> > } 
> > 
> > @@ -261,13 +273,14 @@ static void MIX_AREAS_32(unsigned int size, 
> > __asm__ __volatile__ ( 
> > "\n" 
> > 
> > - "\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) * 
> / 
> > + /* ebx is GOT pointer (-fPIC) */ 
> > + GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",) 
> > /* 
> > * initialization, load ESI, EDI, EBX registers 
> > */ 
> > - "\tmovl %1, %%edi\n" 
> > - "\tmovl %2, %%esi\n" 
> > - "\tmovl %3, %%ebx\n" 
> > + "\tmovl %2, %%edi\n" 
> > + "\tmovl %3, %%esi\n" 
> > + "\tmovl %4, %%ebx\n" 
> > "\tcmpl $0, %0\n" 
> > "\tjnz 1f\n" 
> > "\tjmp 6f\n" 
> > @@ -337,19 +350,20 @@ static void MIX_AREAS_32(unsigned int size, 
> > */ 
> > "\tdecl %0\n" 
> > "\tjz 6f\n" 
> > - "\tadd %4, %%edi\n" 
> > - "\tadd %5, %%esi\n" 
> > - "\tadd %6, %%ebx\n" 
> > + "\tadd %5, %%edi\n" 
> > + "\tadd %6, %%esi\n" 
> > + "\tadd %7, %%ebx\n" 
> > "\tjmp 1b\n" 
> > - 
> > - "6:" 
> > - "\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) * 
> / 
> > 
> > - : /* no output regs */ 
> > - : "m" (size), "m" (dst), "m" (src), 
> > - "m" (sum), "m" (dst_step), "m" (src_step), 
> > - "m" (sum_step), "m" (old_ebx) 
> > - : "esi", "edi", "edx", "ecx", "eax" 
> > + "6:" 
> > + /* ebx is GOT pointer (-fPIC) */ 
> > + GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",) 
> > + 
> > + : "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx) 
> > + : "m" (dst), "m" (src), "m" (sum), 
> > + "im" (dst_step), "im" (src_step), "im" (sum_step) 
> > + : "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) 
> "eax", 
> > + "memory", "cc" 
> > ); 
> > } 
> > 
> > @@ -374,13 +388,14 @@ static void MIX_AREAS_24(unsigned int size, 
> > __asm__ __volatile__ ( 
> > "\n" 
> > 
> > - "\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) * 
> / 
> > + /* ebx is GOT pointer (-fPIC) */ 
> > + GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",) 
> > /* 
> > * initialization, load ESI, EDI, EBX registers 
> > */ 
> > - "\tmovl %1, %%edi\n" 
> > - "\tmovl %2, %%esi\n" 
> > - "\tmovl %3, %%ebx\n" 
> > + "\tmovl %2, %%edi\n" 
> > + "\tmovl %3, %%esi\n" 
> > + "\tmovl %4, %%ebx\n" 
> > "\tcmpl $0, %0\n" 
> > "\tjnz 1f\n" 
> > "\tjmp 6f\n" 
> > @@ -443,19 +458,20 @@ static void MIX_AREAS_24(unsigned int size, 
> > */ 
> > "\tdecl %0\n" 
> > "\tjz 6f\n" 
> > - "\tadd %4, %%edi\n" 
> > - "\tadd %5, %%esi\n" 
> > - "\tadd %6, %%ebx\n" 
> > + "\tadd %5, %%edi\n" 
> > + "\tadd %6, %%esi\n" 
> > + "\tadd %7, %%ebx\n" 
> > "\tjmp 1b\n" 
> > - 
> > - "6:" 
> > - "\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) * 
> / 
> > 
> > - : /* no output regs */ 
> > - : "m" (size), "m" (dst), "m" (src), 
> > - "m" (sum), "m" (dst_step), "m" (src_step), 
> > - "m" (sum_step), "m" (old_ebx) 
> > - : "esi", "edi", "edx", "ecx", "eax" 
> > + "6:" 
> > + /* ebx is GOT pointer (-fPIC) */ 
> > + GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",) 
> > + 
> > + : "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx) 
> > + : "m" (dst), "m" (src), "m" (sum), 
> > + "im" (dst_step), "im" (src_step), "im" (sum_step) 
> > + : "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) 
> "eax", 
> > + "memory", "cc" 
> > ); 
> > } 
> > 
> > @@ -480,13 +496,14 @@ static void MIX_AREAS_24_CMOV(unsigned int size, 
> > __asm__ __volatile__ ( 
> > "\n" 
> > 
> > - "\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) * 
> / 
> > + /* ebx is GOT pointer (-fPIC) */ 
> > + GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",) 
> > /* 
> > * initialization, load ESI, EDI, EBX registers 
> > */ 
> > - "\tmovl %1, %%edi\n" 
> > - "\tmovl %2, %%esi\n" 
> > - "\tmovl %3, %%ebx\n" 
> > + "\tmovl %2, %%edi\n" 
> > + "\tmovl %3, %%esi\n" 
> > + "\tmovl %4, %%ebx\n" 
> > "\tcmpl $0, %0\n" 
> > "\tjz 6f\n" 
> > 
> > @@ -541,19 +558,20 @@ static void MIX_AREAS_24_CMOV(unsigned int size, 
> > /* 
> > * while (size-- > 0) 
> > */ 
> > - "\tadd %4, %%edi\n" 
> > - "\tadd %5, %%esi\n" 
> > - "\tadd %6, %%ebx\n" 
> > + "\tadd %5, %%edi\n" 
> > + "\tadd %6, %%esi\n" 
> > + "\tadd %7, %%ebx\n" 
> > "\tdecl %0\n" 
> > "\tjnz 1b\n" 
> > - 
> > - "6:" 
> > - "\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) * 
> / 
> > 
> > - : /* no output regs */ 
> > - : "m" (size), "m" (dst), "m" (src), 
> > - "m" (sum), "m" (dst_step), "m" (src_step), 
> > - "m" (sum_step), "m" (old_ebx) 
> > - : "esi", "edi", "edx", "ecx", "eax" 
> > + "6:" 
> > + /* ebx is GOT pointer (-fPIC) */ 
> > + GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",) 
> > + 
> > + : "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx) 
> > + : "m" (dst), "m" (src), "m" (sum), 
> > + "im" (dst_step), "im" (src_step), "im" (sum_step) 
> > + : "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) 
> "eax", 
> > + "memory", "cc" 
> > ); 
> > } 
> > -- 
> > 2.17.1 
> > 
> 
> 


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] [inline assembly] fix pcm_dmix_i386.h assembly chunk interfaces
  2020-05-05 13:52       ` Takashi Iwai
@ 2020-05-06 18:07         ` FRÉDÉRIC RECOULES
  0 siblings, 0 replies; 10+ messages in thread
From: FRÉDÉRIC RECOULES @ 2020-05-06 18:07 UTC (permalink / raw)
  To: tiwai; +Cc: alsa-devel, frederic recoules

Hi Takashi, 

Thanks for your comments. 
I addressed the style issues and I resubmitted the patches (V3). 

Note about the MMX patch: 
The mm1 register is read by the chunk, but its value is not used 
(the instruction packssdw put the value of mm1 in the high 32 bits of mm0 
but then, only the low 32 bits are used. My tool made an over-approximation 
but it is now fixed). 

By the way, the first 3 patches produce the same binary output as master. 
However, I looked in the 'test' folder, but I do not know how to run a test for 
the pcm. For now, I have no time to investigate how alsa should be run till the 
next week. 

> But there were some issues with the patch format. IIRC, the 
> patch 2 couldn't be applied to the latest git tree cleanly (some space 
> etter issues?), so I had to manually modify it. 

Sorry for that, it looks like my text editor remove space at the end of the line 
automatically. It should not be the case with the new patches. 

Regards, 
Frédéric 


De: "tiwai" <tiwai@suse.de> 
À: "FRÉDÉRIC RECOULES" <frederic.recoules@univ-grenoble-alpes.fr> 
Cc: "alsa-devel" <alsa-devel@alsa-project.org>, "frederic recoules" <frederic.recoules@orange.fr> 
Envoyé: Mardi 5 Mai 2020 15:52:19 
Objet: Re: [PATCH] [inline assembly] fix pcm_dmix_i386.h assembly chunk interfaces 

On Mon, 04 May 2020 21:25:16 +0200, 
FRÉDÉRIC RECOULES wrote: 
> 
> Hi Takashi, 
> 
> I would like an update on the review process ([PATCH */6 V2] [pcm_dmix 
> assembly]) 
> 
> As a reminder, I split the changes in 6 distinct patches. 
> The 3 first patches produce exactly the same binary output, so they do not 
> need testing. 
> The 4th one has just some minor change due to the fact that I added an 
> instruction -- I am highly confident it breaks nothing. 

The compile tests passed with a few different compiler set, so that's 
good. But there were some issues with the patch format. IIRC, the 
patch 2 couldn't be applied to the latest git tree cleanly (some space 
letter issues?), so I had to manually modify it. 

Also, some style issues: 

- Please avoid a prefix like "[configure]" in the subject. 
The prefix with "[PATCH xxx]" is good, and this should remain, but 
the next prefix should be "configure:" instead. Otherwise the 
prefix with the brackets are pruned at applying a patch via git-am. 

- Please give more texts about why the change is done. 
In all your patches, there are no explanations why you change it. 
It's often more important than describing what you're changing. 
For example, the patch 2 "change the token by symbolic names". Why 
is this needed to be symbolic names? Write some more information in 
each patch description. 

- We usually use #ifdef without space between "#" and "ifdef". 
Let's keep that style consistently. 

> If you need I test the 2 last ones (that reduce the size of the produced 
> binary), could you point me out what test I should run? 

We need at least some build tests with different compiler versions and 
check whether dmix actually works (not necessarily on all of them but 
some of those compiled results). 

> Meanwhile, my deadline comes and I would really appreciate to see the patches 
> applied by wednesday night. 

If you can work on the above and resubmit v3 patchset, I'll happily 
apply them. 


Thanks! 

Takashi 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] [inline assembly] fix pcm_dmix_i386.h assembly chunk interfaces
  2020-05-04 19:25     ` FRÉDÉRIC RECOULES
@ 2020-05-05 13:52       ` Takashi Iwai
  2020-05-06 18:07         ` FRÉDÉRIC RECOULES
  0 siblings, 1 reply; 10+ messages in thread
From: Takashi Iwai @ 2020-05-05 13:52 UTC (permalink / raw)
  To: FRÉDÉRIC RECOULES; +Cc: alsa-devel, frederic recoules

On Mon, 04 May 2020 21:25:16 +0200,
FRÉDÉRIC RECOULES wrote:
> 
> Hi Takashi,
> 
> I would like an update on the review process ([PATCH */6 V2] [pcm_dmix
> assembly])
> 
> As a reminder, I split the changes in 6 distinct patches.
> The 3 first patches produce exactly the same binary output, so they do not
> need testing.
> The 4th one has just some minor change due to the fact that I added an
> instruction -- I am highly confident it breaks nothing.

The compile tests passed with a few different compiler set, so that's
good.  But there were some issues with the patch format.  IIRC, the
patch 2 couldn't be applied to the latest git tree cleanly (some space
letter issues?), so I had to manually modify it.

Also, some style issues:

- Please avoid a prefix like "[configure]" in the subject.
  The prefix with "[PATCH xxx]" is good, and this should remain, but
  the next prefix should be "configure:" instead.  Otherwise the
  prefix with the brackets are pruned at applying a patch via git-am.

- Please give more texts about why the change is done.
  In all your patches, there are no explanations why you change it.
  It's often more important than describing what you're changing.
  For example, the patch 2 "change the token by symbolic names".  Why
  is this needed to be symbolic names?  Write some more information in
  each patch description.

- We usually use #ifdef without space between "#" and "ifdef".
  Let's keep that style consistently.

> If you need I test the 2 last ones (that reduce the size of the produced
> binary), could you point me out what test I should run?

We need at least some build tests with different compiler versions and
check whether dmix actually works (not necessarily on all of them but
some of those compiled results).

> Meanwhile, my deadline comes and I would really appreciate to see the patches
> applied by wednesday night.

If you can work on the above and resubmit v3 patchset, I'll happily
apply them.


Thanks!

Takashi

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] [inline assembly] fix pcm_dmix_i386.h assembly chunk interfaces
  2020-04-30  9:41   ` FRÉDÉRIC RECOULES
@ 2020-05-04 19:25     ` FRÉDÉRIC RECOULES
  2020-05-05 13:52       ` Takashi Iwai
  0 siblings, 1 reply; 10+ messages in thread
From: FRÉDÉRIC RECOULES @ 2020-05-04 19:25 UTC (permalink / raw)
  To: tiwai; +Cc: alsa-devel, frederic recoules

Hi Takashi, 

I would like an update on the review process ([PATCH */6 V2] [pcm_dmix assembly]) 

As a reminder, I split the changes in 6 distinct patches. 
The 3 first patches produce exactly the same binary output, so they do not need testing. 
The 4th one has just some minor change due to the fact that I added an instruction -- I am highly confident it breaks nothing. 

If you need I test the 2 last ones (that reduce the size of the produced binary), could you point me out what test I should run? 

Meanwhile, my deadline comes and I would really appreciate to see the patches applied by wednesday night. 

Best regards, 
Frédéric 



De: "FRÉDÉRIC RECOULES" <frederic.recoules@univ-grenoble-alpes.fr> 
À: "tiwai" <tiwai@suse.de> 
Cc: "alsa-devel" <alsa-devel@alsa-project.org>, "frederic recoules" <frederic.recoules@orange.fr> 
Envoyé: Jeudi 30 Avril 2020 11:41:56 
Objet: Re: [PATCH] [inline assembly] fix pcm_dmix_i386.h assembly chunk interfaces 

Hi Takashi, 

I resubmitted the patch (V2) with some modifications: 
- I split the changes so you will not have to apply all of them; 
- the 4 first patches are the ones for safety, and barely produce the same binary output some I am highly confident in the fact they do not need testing; 
- the 2 last patches are small changes that can help the compiler producing a better/smaller code. They have not been tested yet. Have you any ready to go benchmark to test with? 
- the patches work both on 32- and 64bit version. 

To compare the binary output, I used objdump and diff on libpcm.a. I compared the master with each patch with both 32 and 64 versions. 

Hope it will help. 
Regards, 
Frédéric 


De: "tiwai" <tiwai@suse.de> 
À: "frederic recoules" <frederic.recoules@univ-grenoble-alpes.fr> 
Cc: "alsa-devel" <alsa-devel@alsa-project.org>, "frederic recoules" <frederic.recoules@orange.fr> 
Envoyé: Mercredi 29 Avril 2020 10:19:11 
Objet: Re: [PATCH] [inline assembly] fix pcm_dmix_i386.h assembly chunk interfaces 

On Mon, 27 Apr 2020 18:57:07 +0200, 
frederic.recoules@univ-grenoble-alpes.fr wrote: 
> 
> From: Frédéric Recoules <frederic.recoules@orange.fr> 
> 
> Enabler change: 
> - set HAVE_MMX in configure if the compiler is aware of MMX technology 
> 
> Main changes are: 
> - move 'size' and 'old_ebx' to the output list since they are clobbered 
> - add the "memory" keyword since input pointers are dereferenced 
> - add mmx registers in the clobber list and add an initialization for mm1 
> - add ebx in clobbers via a set of macro when GCC is newer than 5.0 
> (it will work for other compilers or non-PIC mode too) 
> 
> Minor changes are: 
> - keep consistent the token numbering in the template 
> - remove the manual save/restore ebx when it is in the clobber list 
> - allows 'dst_step', 'src_step' and 'sum_step' to be given by immediates 
> - allows 'size' to be given by register (e.g. ebp) 
> - add "cc" keyword since the eflag register is clobbered 
> 
> Signed-off-by: Frédéric Recoules <frederic.recoules@orange.fr> 

Thanks. Now I confirmed that the build test passed. 

But before merging the patch: have you actually tested the code on 
your machine and confirmed that it keeps working? If you've tested, 
it'd be helpful to mention about the test coverage, it's a good 
measure for judgment. Since it handles different compile flavors 
(with and without MMX support), I'd like to know whether it's actually 
tested. 

BTW, when you resubmit a patch with the correction, please put "v2" or 
whatever the revision in the subject to tell from the earlier 
patches. Also, it'd be appreciated to list up summaries of changes 
between v1 and v2 in the patch description or after the separator line 
("---") if you don't need to include that in the git commit log. 


thanks, 

Takashi 

> --- 
> configure.ac | 7 ++ 
> src/pcm/pcm_dmix_i386.h | 174 +++++++++++++++++++++++----------------- 
> 2 files changed, 106 insertions(+), 75 deletions(-) 
> 
> diff --git a/configure.ac b/configure.ac 
> index 4b5ab662..1838e50b 100644 
> --- a/configure.ac 
> +++ b/configure.ac 
> @@ -516,6 +516,13 @@ if test -z "$gcc_have_atomics"; then 
> fi 
> AC_MSG_RESULT($gcc_have_atomics) 
> 
> +dnl check mmx register for pcm_dmix_i386 
> + 
> +AC_TRY_LINK([], 
> + [__asm__ volatile ("" : : : "mm0");], 
> + [AC_DEFINE([HAVE_MMX], "1", [MMX technology is enabled])], 
> + []) 
> + 
> PCM_PLUGIN_LIST="copy linear route mulaw alaw adpcm rate plug multi shm file null empty share meter hooks lfloat ladspa dmix dshare dsnoop asym iec958 softvol extplug ioplug mmap_emul" 
> 
> build_pcm_plugin="no" 
> diff --git a/src/pcm/pcm_dmix_i386.h b/src/pcm/pcm_dmix_i386.h 
> index 2778cb1d..aa1717f6 100644 
> --- a/src/pcm/pcm_dmix_i386.h 
> +++ b/src/pcm/pcm_dmix_i386.h 
> @@ -26,6 +26,13 @@ 
> * 
> */ 
> 
> +#define COMMA , 
> +#if __GNUC__ < 5 && defined(__PIC__) 
> +# define GCC_PIC_SWITCH(before,after) before 
> +#else 
> +# define GCC_PIC_SWITCH(before,after) after 
> +#endif 
> + 
> /* 
> * for plain i386 
> */ 
> @@ -47,13 +54,14 @@ static void MIX_AREAS_16(unsigned int size, 
> __asm__ __volatile__ ( 
> "\n" 
> 
> - "\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) */ 
> + /* ebx is GOT pointer (-fPIC) */ 
> + GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",) 
> /* 
> * initialization, load ESI, EDI, EBX registers 
> */ 
> - "\tmovl %1, %%edi\n" 
> - "\tmovl %2, %%esi\n" 
> - "\tmovl %3, %%ebx\n" 
> + "\tmovl %2, %%edi\n" 
> + "\tmovl %3, %%esi\n" 
> + "\tmovl %4, %%ebx\n" 
> "\tcmpl $0, %0\n" 
> "\tjnz 2f\n" 
> "\tjmp 7f\n" 
> @@ -64,9 +72,9 @@ static void MIX_AREAS_16(unsigned int size, 
> */ 
> "\t.p2align 4,,15\n" 
> "1:" 
> - "\tadd %4, %%edi\n" 
> - "\tadd %5, %%esi\n" 
> - "\tadd %6, %%ebx\n" 
> + "\tadd %5, %%edi\n" 
> + "\tadd %6, %%esi\n" 
> + "\tadd %7, %%ebx\n" 
> 
> /* 
> * sample = *src; 
> @@ -138,15 +146,16 @@ static void MIX_AREAS_16(unsigned int size, 
> "\tjnz 4b\n" 
> "\tdecl %0\n" 
> "\tjnz 1b\n" 
> - 
> - "7:" 
> - "\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) */ 
> 
> - : /* no output regs */ 
> - : "m" (size), "m" (dst), "m" (src), 
> - "m" (sum), "m" (dst_step), "m" (src_step), 
> - "m" (sum_step), "m" (old_ebx) 
> - : "esi", "edi", "edx", "ecx", "eax" 
> + "7:" 
> + /* ebx is GOT pointer (-fPIC) */ 
> + GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",) 
> + 
> + : "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx) 
> + : "m" (dst), "m" (src), "m" (sum), 
> + "im" (dst_step), "im" (src_step), "im" (sum_step) 
> + : "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) "eax", 
> + "memory", "cc" 
> ); 
> } 
> 
> @@ -171,22 +180,24 @@ static void MIX_AREAS_16_MMX(unsigned int size, 
> __asm__ __volatile__ ( 
> "\n" 
> 
> - "\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) */ 
> + /* ebx is GOT pointer (-fPIC) */ 
> + GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",) 
> /* 
> - * initialization, load ESI, EDI, EBX registers 
> + * initialization, load ESI, EDI, EBX registers, clear MM1 
> */ 
> - "\tmovl %1, %%edi\n" 
> - "\tmovl %2, %%esi\n" 
> - "\tmovl %3, %%ebx\n" 
> + "\tpxor %%mm1, %%mm1\n" 
> + "\tmovl %2, %%edi\n" 
> + "\tmovl %3, %%esi\n" 
> + "\tmovl %4, %%ebx\n" 
> "\tcmpl $0, %0\n" 
> "\tjnz 2f\n" 
> "\tjmp 5f\n" 
> 
> "\t.p2align 4,,15\n" 
> "1:" 
> - "\tadd %4, %%edi\n" 
> - "\tadd %5, %%esi\n" 
> - "\tadd %6, %%ebx\n" 
> + "\tadd %5, %%edi\n" 
> + "\tadd %6, %%esi\n" 
> + "\tadd %7, %%ebx\n" 
> 
> "2:" 
> /* 
> @@ -230,13 +241,20 @@ static void MIX_AREAS_16_MMX(unsigned int size, 
> "\tjnz 1b\n" 
> "\temms\n" 
> "5:" 
> - "\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) */ 
> - 
> - : /* no output regs */ 
> - : "m" (size), "m" (dst), "m" (src), 
> - "m" (sum), "m" (dst_step), "m" (src_step), 
> - "m" (sum_step), "m" (old_ebx) 
> - : "esi", "edi", "edx", "ecx", "eax" 
> + /* ebx is GOT pointer (-fPIC) */ 
> + GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",) 
> + 
> + : "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx) 
> + : "m" (dst), "m" (src), "m" (sum), 
> + "im" (dst_step), "im" (src_step), "im" (sum_step) 
> + : "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) "eax", 
> + "memory", "cc", 
> +# ifdef HAVE_MMX 
> + "mm0", "mm1" 
> +# else 
> + "st", "st(1)", "st(2)", "st(3)", 
> + "st(4)", "st(5)", "st(6)", "st(7)" 
> +# endif 
> ); 
> } 
> 
> @@ -261,13 +279,14 @@ static void MIX_AREAS_32(unsigned int size, 
> __asm__ __volatile__ ( 
> "\n" 
> 
> - "\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) */ 
> + /* ebx is GOT pointer (-fPIC) */ 
> + GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",) 
> /* 
> * initialization, load ESI, EDI, EBX registers 
> */ 
> - "\tmovl %1, %%edi\n" 
> - "\tmovl %2, %%esi\n" 
> - "\tmovl %3, %%ebx\n" 
> + "\tmovl %2, %%edi\n" 
> + "\tmovl %3, %%esi\n" 
> + "\tmovl %4, %%ebx\n" 
> "\tcmpl $0, %0\n" 
> "\tjnz 1f\n" 
> "\tjmp 6f\n" 
> @@ -337,19 +356,20 @@ static void MIX_AREAS_32(unsigned int size, 
> */ 
> "\tdecl %0\n" 
> "\tjz 6f\n" 
> - "\tadd %4, %%edi\n" 
> - "\tadd %5, %%esi\n" 
> - "\tadd %6, %%ebx\n" 
> + "\tadd %5, %%edi\n" 
> + "\tadd %6, %%esi\n" 
> + "\tadd %7, %%ebx\n" 
> "\tjmp 1b\n" 
> - 
> - "6:" 
> - "\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) */ 
> 
> - : /* no output regs */ 
> - : "m" (size), "m" (dst), "m" (src), 
> - "m" (sum), "m" (dst_step), "m" (src_step), 
> - "m" (sum_step), "m" (old_ebx) 
> - : "esi", "edi", "edx", "ecx", "eax" 
> + "6:" 
> + /* ebx is GOT pointer (-fPIC) */ 
> + GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",) 
> + 
> + : "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx) 
> + : "m" (dst), "m" (src), "m" (sum), 
> + "im" (dst_step), "im" (src_step), "im" (sum_step) 
> + : "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) "eax", 
> + "memory", "cc" 
> ); 
> } 
> 
> @@ -374,13 +394,14 @@ static void MIX_AREAS_24(unsigned int size, 
> __asm__ __volatile__ ( 
> "\n" 
> 
> - "\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) */ 
> + /* ebx is GOT pointer (-fPIC) */ 
> + GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",) 
> /* 
> * initialization, load ESI, EDI, EBX registers 
> */ 
> - "\tmovl %1, %%edi\n" 
> - "\tmovl %2, %%esi\n" 
> - "\tmovl %3, %%ebx\n" 
> + "\tmovl %2, %%edi\n" 
> + "\tmovl %3, %%esi\n" 
> + "\tmovl %4, %%ebx\n" 
> "\tcmpl $0, %0\n" 
> "\tjnz 1f\n" 
> "\tjmp 6f\n" 
> @@ -443,19 +464,20 @@ static void MIX_AREAS_24(unsigned int size, 
> */ 
> "\tdecl %0\n" 
> "\tjz 6f\n" 
> - "\tadd %4, %%edi\n" 
> - "\tadd %5, %%esi\n" 
> - "\tadd %6, %%ebx\n" 
> + "\tadd %5, %%edi\n" 
> + "\tadd %6, %%esi\n" 
> + "\tadd %7, %%ebx\n" 
> "\tjmp 1b\n" 
> - 
> - "6:" 
> - "\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) */ 
> 
> - : /* no output regs */ 
> - : "m" (size), "m" (dst), "m" (src), 
> - "m" (sum), "m" (dst_step), "m" (src_step), 
> - "m" (sum_step), "m" (old_ebx) 
> - : "esi", "edi", "edx", "ecx", "eax" 
> + "6:" 
> + /* ebx is GOT pointer (-fPIC) */ 
> + GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",) 
> + 
> + : "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx) 
> + : "m" (dst), "m" (src), "m" (sum), 
> + "im" (dst_step), "im" (src_step), "im" (sum_step) 
> + : "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) "eax", 
> + "memory", "cc" 
> ); 
> } 
> 
> @@ -480,13 +502,14 @@ static void MIX_AREAS_24_CMOV(unsigned int size, 
> __asm__ __volatile__ ( 
> "\n" 
> 
> - "\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) */ 
> + /* ebx is GOT pointer (-fPIC) */ 
> + GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",) 
> /* 
> * initialization, load ESI, EDI, EBX registers 
> */ 
> - "\tmovl %1, %%edi\n" 
> - "\tmovl %2, %%esi\n" 
> - "\tmovl %3, %%ebx\n" 
> + "\tmovl %2, %%edi\n" 
> + "\tmovl %3, %%esi\n" 
> + "\tmovl %4, %%ebx\n" 
> "\tcmpl $0, %0\n" 
> "\tjz 6f\n" 
> 
> @@ -541,19 +564,20 @@ static void MIX_AREAS_24_CMOV(unsigned int size, 
> /* 
> * while (size-- > 0) 
> */ 
> - "\tadd %4, %%edi\n" 
> - "\tadd %5, %%esi\n" 
> - "\tadd %6, %%ebx\n" 
> + "\tadd %5, %%edi\n" 
> + "\tadd %6, %%esi\n" 
> + "\tadd %7, %%ebx\n" 
> "\tdecl %0\n" 
> "\tjnz 1b\n" 
> - 
> - "6:" 
> - "\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) */ 
> 
> - : /* no output regs */ 
> - : "m" (size), "m" (dst), "m" (src), 
> - "m" (sum), "m" (dst_step), "m" (src_step), 
> - "m" (sum_step), "m" (old_ebx) 
> - : "esi", "edi", "edx", "ecx", "eax" 
> + "6:" 
> + /* ebx is GOT pointer (-fPIC) */ 
> + GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",) 
> + 
> + : "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx) 
> + : "m" (dst), "m" (src), "m" (sum), 
> + "im" (dst_step), "im" (src_step), "im" (sum_step) 
> + : "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) "eax", 
> + "memory", "cc" 
> ); 
> } 
> -- 
> 2.17.1 
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] [inline assembly] fix pcm_dmix_i386.h assembly chunk interfaces
  2020-04-29  8:19 ` Takashi Iwai
@ 2020-04-30  9:41   ` FRÉDÉRIC RECOULES
  2020-05-04 19:25     ` FRÉDÉRIC RECOULES
  0 siblings, 1 reply; 10+ messages in thread
From: FRÉDÉRIC RECOULES @ 2020-04-30  9:41 UTC (permalink / raw)
  To: tiwai; +Cc: alsa-devel, frederic recoules

Hi Takashi, 

I resubmitted the patch (V2) with some modifications: 
- I split the changes so you will not have to apply all of them; 
- the 4 first patches are the ones for safety, and barely produce the same binary output some I am highly confident in the fact they do not need testing; 
- the 2 last patches are small changes that can help the compiler producing a better/smaller code. They have not been tested yet. Have you any ready to go benchmark to test with? 
- the patches work both on 32- and 64bit version. 

To compare the binary output, I used objdump and diff on libpcm.a. I compared the master with each patch with both 32 and 64 versions. 

Hope it will help. 
Regards, 
Frédéric 


De: "tiwai" <tiwai@suse.de> 
À: "frederic recoules" <frederic.recoules@univ-grenoble-alpes.fr> 
Cc: "alsa-devel" <alsa-devel@alsa-project.org>, "frederic recoules" <frederic.recoules@orange.fr> 
Envoyé: Mercredi 29 Avril 2020 10:19:11 
Objet: Re: [PATCH] [inline assembly] fix pcm_dmix_i386.h assembly chunk interfaces 

On Mon, 27 Apr 2020 18:57:07 +0200, 
frederic.recoules@univ-grenoble-alpes.fr wrote: 
> 
> From: Frédéric Recoules <frederic.recoules@orange.fr> 
> 
> Enabler change: 
> - set HAVE_MMX in configure if the compiler is aware of MMX technology 
> 
> Main changes are: 
> - move 'size' and 'old_ebx' to the output list since they are clobbered 
> - add the "memory" keyword since input pointers are dereferenced 
> - add mmx registers in the clobber list and add an initialization for mm1 
> - add ebx in clobbers via a set of macro when GCC is newer than 5.0 
> (it will work for other compilers or non-PIC mode too) 
> 
> Minor changes are: 
> - keep consistent the token numbering in the template 
> - remove the manual save/restore ebx when it is in the clobber list 
> - allows 'dst_step', 'src_step' and 'sum_step' to be given by immediates 
> - allows 'size' to be given by register (e.g. ebp) 
> - add "cc" keyword since the eflag register is clobbered 
> 
> Signed-off-by: Frédéric Recoules <frederic.recoules@orange.fr> 

Thanks. Now I confirmed that the build test passed. 

But before merging the patch: have you actually tested the code on 
your machine and confirmed that it keeps working? If you've tested, 
it'd be helpful to mention about the test coverage, it's a good 
measure for judgment. Since it handles different compile flavors 
(with and without MMX support), I'd like to know whether it's actually 
tested. 

BTW, when you resubmit a patch with the correction, please put "v2" or 
whatever the revision in the subject to tell from the earlier 
patches. Also, it'd be appreciated to list up summaries of changes 
between v1 and v2 in the patch description or after the separator line 
("---") if you don't need to include that in the git commit log. 


thanks, 

Takashi 

> --- 
> configure.ac | 7 ++ 
> src/pcm/pcm_dmix_i386.h | 174 +++++++++++++++++++++++----------------- 
> 2 files changed, 106 insertions(+), 75 deletions(-) 
> 
> diff --git a/configure.ac b/configure.ac 
> index 4b5ab662..1838e50b 100644 
> --- a/configure.ac 
> +++ b/configure.ac 
> @@ -516,6 +516,13 @@ if test -z "$gcc_have_atomics"; then 
> fi 
> AC_MSG_RESULT($gcc_have_atomics) 
> 
> +dnl check mmx register for pcm_dmix_i386 
> + 
> +AC_TRY_LINK([], 
> + [__asm__ volatile ("" : : : "mm0");], 
> + [AC_DEFINE([HAVE_MMX], "1", [MMX technology is enabled])], 
> + []) 
> + 
> PCM_PLUGIN_LIST="copy linear route mulaw alaw adpcm rate plug multi shm file null empty share meter hooks lfloat ladspa dmix dshare dsnoop asym iec958 softvol extplug ioplug mmap_emul" 
> 
> build_pcm_plugin="no" 
> diff --git a/src/pcm/pcm_dmix_i386.h b/src/pcm/pcm_dmix_i386.h 
> index 2778cb1d..aa1717f6 100644 
> --- a/src/pcm/pcm_dmix_i386.h 
> +++ b/src/pcm/pcm_dmix_i386.h 
> @@ -26,6 +26,13 @@ 
> * 
> */ 
> 
> +#define COMMA , 
> +#if __GNUC__ < 5 && defined(__PIC__) 
> +# define GCC_PIC_SWITCH(before,after) before 
> +#else 
> +# define GCC_PIC_SWITCH(before,after) after 
> +#endif 
> + 
> /* 
> * for plain i386 
> */ 
> @@ -47,13 +54,14 @@ static void MIX_AREAS_16(unsigned int size, 
> __asm__ __volatile__ ( 
> "\n" 
> 
> - "\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) */ 
> + /* ebx is GOT pointer (-fPIC) */ 
> + GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",) 
> /* 
> * initialization, load ESI, EDI, EBX registers 
> */ 
> - "\tmovl %1, %%edi\n" 
> - "\tmovl %2, %%esi\n" 
> - "\tmovl %3, %%ebx\n" 
> + "\tmovl %2, %%edi\n" 
> + "\tmovl %3, %%esi\n" 
> + "\tmovl %4, %%ebx\n" 
> "\tcmpl $0, %0\n" 
> "\tjnz 2f\n" 
> "\tjmp 7f\n" 
> @@ -64,9 +72,9 @@ static void MIX_AREAS_16(unsigned int size, 
> */ 
> "\t.p2align 4,,15\n" 
> "1:" 
> - "\tadd %4, %%edi\n" 
> - "\tadd %5, %%esi\n" 
> - "\tadd %6, %%ebx\n" 
> + "\tadd %5, %%edi\n" 
> + "\tadd %6, %%esi\n" 
> + "\tadd %7, %%ebx\n" 
> 
> /* 
> * sample = *src; 
> @@ -138,15 +146,16 @@ static void MIX_AREAS_16(unsigned int size, 
> "\tjnz 4b\n" 
> "\tdecl %0\n" 
> "\tjnz 1b\n" 
> - 
> - "7:" 
> - "\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) */ 
> 
> - : /* no output regs */ 
> - : "m" (size), "m" (dst), "m" (src), 
> - "m" (sum), "m" (dst_step), "m" (src_step), 
> - "m" (sum_step), "m" (old_ebx) 
> - : "esi", "edi", "edx", "ecx", "eax" 
> + "7:" 
> + /* ebx is GOT pointer (-fPIC) */ 
> + GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",) 
> + 
> + : "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx) 
> + : "m" (dst), "m" (src), "m" (sum), 
> + "im" (dst_step), "im" (src_step), "im" (sum_step) 
> + : "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) "eax", 
> + "memory", "cc" 
> ); 
> } 
> 
> @@ -171,22 +180,24 @@ static void MIX_AREAS_16_MMX(unsigned int size, 
> __asm__ __volatile__ ( 
> "\n" 
> 
> - "\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) */ 
> + /* ebx is GOT pointer (-fPIC) */ 
> + GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",) 
> /* 
> - * initialization, load ESI, EDI, EBX registers 
> + * initialization, load ESI, EDI, EBX registers, clear MM1 
> */ 
> - "\tmovl %1, %%edi\n" 
> - "\tmovl %2, %%esi\n" 
> - "\tmovl %3, %%ebx\n" 
> + "\tpxor %%mm1, %%mm1\n" 
> + "\tmovl %2, %%edi\n" 
> + "\tmovl %3, %%esi\n" 
> + "\tmovl %4, %%ebx\n" 
> "\tcmpl $0, %0\n" 
> "\tjnz 2f\n" 
> "\tjmp 5f\n" 
> 
> "\t.p2align 4,,15\n" 
> "1:" 
> - "\tadd %4, %%edi\n" 
> - "\tadd %5, %%esi\n" 
> - "\tadd %6, %%ebx\n" 
> + "\tadd %5, %%edi\n" 
> + "\tadd %6, %%esi\n" 
> + "\tadd %7, %%ebx\n" 
> 
> "2:" 
> /* 
> @@ -230,13 +241,20 @@ static void MIX_AREAS_16_MMX(unsigned int size, 
> "\tjnz 1b\n" 
> "\temms\n" 
> "5:" 
> - "\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) */ 
> - 
> - : /* no output regs */ 
> - : "m" (size), "m" (dst), "m" (src), 
> - "m" (sum), "m" (dst_step), "m" (src_step), 
> - "m" (sum_step), "m" (old_ebx) 
> - : "esi", "edi", "edx", "ecx", "eax" 
> + /* ebx is GOT pointer (-fPIC) */ 
> + GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",) 
> + 
> + : "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx) 
> + : "m" (dst), "m" (src), "m" (sum), 
> + "im" (dst_step), "im" (src_step), "im" (sum_step) 
> + : "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) "eax", 
> + "memory", "cc", 
> +# ifdef HAVE_MMX 
> + "mm0", "mm1" 
> +# else 
> + "st", "st(1)", "st(2)", "st(3)", 
> + "st(4)", "st(5)", "st(6)", "st(7)" 
> +# endif 
> ); 
> } 
> 
> @@ -261,13 +279,14 @@ static void MIX_AREAS_32(unsigned int size, 
> __asm__ __volatile__ ( 
> "\n" 
> 
> - "\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) */ 
> + /* ebx is GOT pointer (-fPIC) */ 
> + GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",) 
> /* 
> * initialization, load ESI, EDI, EBX registers 
> */ 
> - "\tmovl %1, %%edi\n" 
> - "\tmovl %2, %%esi\n" 
> - "\tmovl %3, %%ebx\n" 
> + "\tmovl %2, %%edi\n" 
> + "\tmovl %3, %%esi\n" 
> + "\tmovl %4, %%ebx\n" 
> "\tcmpl $0, %0\n" 
> "\tjnz 1f\n" 
> "\tjmp 6f\n" 
> @@ -337,19 +356,20 @@ static void MIX_AREAS_32(unsigned int size, 
> */ 
> "\tdecl %0\n" 
> "\tjz 6f\n" 
> - "\tadd %4, %%edi\n" 
> - "\tadd %5, %%esi\n" 
> - "\tadd %6, %%ebx\n" 
> + "\tadd %5, %%edi\n" 
> + "\tadd %6, %%esi\n" 
> + "\tadd %7, %%ebx\n" 
> "\tjmp 1b\n" 
> - 
> - "6:" 
> - "\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) */ 
> 
> - : /* no output regs */ 
> - : "m" (size), "m" (dst), "m" (src), 
> - "m" (sum), "m" (dst_step), "m" (src_step), 
> - "m" (sum_step), "m" (old_ebx) 
> - : "esi", "edi", "edx", "ecx", "eax" 
> + "6:" 
> + /* ebx is GOT pointer (-fPIC) */ 
> + GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",) 
> + 
> + : "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx) 
> + : "m" (dst), "m" (src), "m" (sum), 
> + "im" (dst_step), "im" (src_step), "im" (sum_step) 
> + : "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) "eax", 
> + "memory", "cc" 
> ); 
> } 
> 
> @@ -374,13 +394,14 @@ static void MIX_AREAS_24(unsigned int size, 
> __asm__ __volatile__ ( 
> "\n" 
> 
> - "\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) */ 
> + /* ebx is GOT pointer (-fPIC) */ 
> + GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",) 
> /* 
> * initialization, load ESI, EDI, EBX registers 
> */ 
> - "\tmovl %1, %%edi\n" 
> - "\tmovl %2, %%esi\n" 
> - "\tmovl %3, %%ebx\n" 
> + "\tmovl %2, %%edi\n" 
> + "\tmovl %3, %%esi\n" 
> + "\tmovl %4, %%ebx\n" 
> "\tcmpl $0, %0\n" 
> "\tjnz 1f\n" 
> "\tjmp 6f\n" 
> @@ -443,19 +464,20 @@ static void MIX_AREAS_24(unsigned int size, 
> */ 
> "\tdecl %0\n" 
> "\tjz 6f\n" 
> - "\tadd %4, %%edi\n" 
> - "\tadd %5, %%esi\n" 
> - "\tadd %6, %%ebx\n" 
> + "\tadd %5, %%edi\n" 
> + "\tadd %6, %%esi\n" 
> + "\tadd %7, %%ebx\n" 
> "\tjmp 1b\n" 
> - 
> - "6:" 
> - "\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) */ 
> 
> - : /* no output regs */ 
> - : "m" (size), "m" (dst), "m" (src), 
> - "m" (sum), "m" (dst_step), "m" (src_step), 
> - "m" (sum_step), "m" (old_ebx) 
> - : "esi", "edi", "edx", "ecx", "eax" 
> + "6:" 
> + /* ebx is GOT pointer (-fPIC) */ 
> + GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",) 
> + 
> + : "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx) 
> + : "m" (dst), "m" (src), "m" (sum), 
> + "im" (dst_step), "im" (src_step), "im" (sum_step) 
> + : "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) "eax", 
> + "memory", "cc" 
> ); 
> } 
> 
> @@ -480,13 +502,14 @@ static void MIX_AREAS_24_CMOV(unsigned int size, 
> __asm__ __volatile__ ( 
> "\n" 
> 
> - "\tmovl %%ebx, %7\n" /* ebx is GOT pointer (-fPIC) */ 
> + /* ebx is GOT pointer (-fPIC) */ 
> + GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",) 
> /* 
> * initialization, load ESI, EDI, EBX registers 
> */ 
> - "\tmovl %1, %%edi\n" 
> - "\tmovl %2, %%esi\n" 
> - "\tmovl %3, %%ebx\n" 
> + "\tmovl %2, %%edi\n" 
> + "\tmovl %3, %%esi\n" 
> + "\tmovl %4, %%ebx\n" 
> "\tcmpl $0, %0\n" 
> "\tjz 6f\n" 
> 
> @@ -541,19 +564,20 @@ static void MIX_AREAS_24_CMOV(unsigned int size, 
> /* 
> * while (size-- > 0) 
> */ 
> - "\tadd %4, %%edi\n" 
> - "\tadd %5, %%esi\n" 
> - "\tadd %6, %%ebx\n" 
> + "\tadd %5, %%edi\n" 
> + "\tadd %6, %%esi\n" 
> + "\tadd %7, %%ebx\n" 
> "\tdecl %0\n" 
> "\tjnz 1b\n" 
> - 
> - "6:" 
> - "\tmovl %7, %%ebx\n" /* ebx is GOT pointer (-fPIC) */ 
> 
> - : /* no output regs */ 
> - : "m" (size), "m" (dst), "m" (src), 
> - "m" (sum), "m" (dst_step), "m" (src_step), 
> - "m" (sum_step), "m" (old_ebx) 
> - : "esi", "edi", "edx", "ecx", "eax" 
> + "6:" 
> + /* ebx is GOT pointer (-fPIC) */ 
> + GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",) 
> + 
> + : "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx) 
> + : "m" (dst), "m" (src), "m" (sum), 
> + "im" (dst_step), "im" (src_step), "im" (sum_step) 
> + : "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) "eax", 
> + "memory", "cc" 
> ); 
> } 
> -- 
> 2.17.1 
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] [inline assembly] fix pcm_dmix_i386.h assembly chunk interfaces
  2020-04-27 16:57 frederic.recoules
@ 2020-04-29  8:19 ` Takashi Iwai
  2020-04-30  9:41   ` FRÉDÉRIC RECOULES
  0 siblings, 1 reply; 10+ messages in thread
From: Takashi Iwai @ 2020-04-29  8:19 UTC (permalink / raw)
  To: frederic.recoules; +Cc: alsa-devel, Frédéric Recoules

On Mon, 27 Apr 2020 18:57:07 +0200,
frederic.recoules@univ-grenoble-alpes.fr wrote:
> 
> From: Frédéric Recoules <frederic.recoules@orange.fr>
> 
> Enabler change:
>   - set HAVE_MMX in configure if the compiler is aware of MMX technology
> 
> Main changes are:
>   - move 'size' and 'old_ebx' to the output list since they are clobbered
>   - add the "memory" keyword since input pointers are dereferenced
>   - add mmx registers in the clobber list and add an initialization for mm1
>   - add ebx in clobbers via a set of macro when GCC is newer than 5.0
>     (it will work for other compilers or non-PIC mode too)
> 
> Minor changes are:
>   - keep consistent the token numbering in the template
>   - remove the manual save/restore ebx when it is in the clobber list
>   - allows 'dst_step', 'src_step' and 'sum_step' to be given by immediates
>   - allows 'size' to be given by register (e.g. ebp)
>   - add "cc" keyword since the eflag register is clobbered
> 
> Signed-off-by: Frédéric Recoules <frederic.recoules@orange.fr>

Thanks.  Now I confirmed that the build test passed.

But before merging the patch: have you actually tested the code on
your machine and confirmed that it keeps working?  If you've tested,
it'd be helpful to mention about the test coverage, it's a good
measure for judgment.  Since it handles different compile flavors
(with and without MMX support), I'd like to know whether it's actually
tested.

BTW, when you resubmit a patch with the correction, please put "v2" or
whatever the revision in the subject to tell from the earlier
patches.  Also, it'd be appreciated to list up summaries of changes
between v1 and v2 in the patch description or after the separator line
("---") if you don't need to include that in the git commit log.


thanks,

Takashi

> ---
>  configure.ac            |   7 ++
>  src/pcm/pcm_dmix_i386.h | 174 +++++++++++++++++++++++-----------------
>  2 files changed, 106 insertions(+), 75 deletions(-)
> 
> diff --git a/configure.ac b/configure.ac
> index 4b5ab662..1838e50b 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -516,6 +516,13 @@ if test -z "$gcc_have_atomics"; then
>  fi
>  AC_MSG_RESULT($gcc_have_atomics)
>  
> +dnl check mmx register for pcm_dmix_i386
> +
> +AC_TRY_LINK([],
> +    [__asm__ volatile ("" : : : "mm0");],
> +    [AC_DEFINE([HAVE_MMX], "1", [MMX technology is enabled])],
> +    [])
> +
>  PCM_PLUGIN_LIST="copy linear route mulaw alaw adpcm rate plug multi shm file null empty share meter hooks lfloat ladspa dmix dshare dsnoop asym iec958 softvol extplug ioplug mmap_emul"
>  
>  build_pcm_plugin="no"
> diff --git a/src/pcm/pcm_dmix_i386.h b/src/pcm/pcm_dmix_i386.h
> index 2778cb1d..aa1717f6 100644
> --- a/src/pcm/pcm_dmix_i386.h
> +++ b/src/pcm/pcm_dmix_i386.h
> @@ -26,6 +26,13 @@
>   *
>   */
>  
> +#define COMMA ,
> +#if __GNUC__ < 5 && defined(__PIC__)
> +#  define GCC_PIC_SWITCH(before,after) before
> +#else
> +#  define GCC_PIC_SWITCH(before,after) after
> +#endif
> +
>  /*
>   *  for plain i386
>   */
> @@ -47,13 +54,14 @@ static void MIX_AREAS_16(unsigned int size,
>  	__asm__ __volatile__ (
>  		"\n"
>  
> -		"\tmovl %%ebx, %7\n"	/* ebx is GOT pointer (-fPIC) */
> +		/* ebx is GOT pointer (-fPIC) */
> +		GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",)
>  		/*
>  		 *  initialization, load ESI, EDI, EBX registers
>  		 */
> -		"\tmovl %1, %%edi\n"
> -		"\tmovl %2, %%esi\n"
> -		"\tmovl %3, %%ebx\n"
> +		"\tmovl %2, %%edi\n"
> +		"\tmovl %3, %%esi\n"
> +		"\tmovl %4, %%ebx\n"
>  		"\tcmpl $0, %0\n"
>  		"\tjnz 2f\n"
>  		"\tjmp 7f\n"
> @@ -64,9 +72,9 @@ static void MIX_AREAS_16(unsigned int size,
>  		 */
>  		"\t.p2align 4,,15\n"
>  		"1:"
> -		"\tadd %4, %%edi\n"
> -		"\tadd %5, %%esi\n"
> -		"\tadd %6, %%ebx\n"
> +		"\tadd %5, %%edi\n"
> +		"\tadd %6, %%esi\n"
> +		"\tadd %7, %%ebx\n"
>  
>  		/*
>  		 *   sample = *src;
> @@ -138,15 +146,16 @@ static void MIX_AREAS_16(unsigned int size,
>  		"\tjnz 4b\n"
>  		"\tdecl %0\n"
>  		"\tjnz 1b\n"
> -		
> -		"7:"
> -		"\tmovl %7, %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
>  
> -		: /* no output regs */
> -		: "m" (size), "m" (dst), "m" (src),
> -		  "m" (sum), "m" (dst_step), "m" (src_step),
> -		  "m" (sum_step), "m" (old_ebx)
> -		: "esi", "edi", "edx", "ecx", "eax"
> +		"7:"
> +		/* ebx is GOT pointer (-fPIC) */
> +		GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",)
> +
> +		: "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx)
> +		: "m" (dst), "m" (src), "m" (sum),
> +		  "im" (dst_step), "im" (src_step), "im" (sum_step)
> +		: "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) "eax",
> +		  "memory", "cc"
>  	);
>  }
>  
> @@ -171,22 +180,24 @@ static void MIX_AREAS_16_MMX(unsigned int size,
>  	__asm__ __volatile__ (
>  		"\n"
>  
> -		"\tmovl %%ebx, %7\n"	/* ebx is GOT pointer (-fPIC) */
> +		/* ebx is GOT pointer (-fPIC) */
> +		GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",)
>  		/*
> -		 *  initialization, load ESI, EDI, EBX registers
> +		 *  initialization, load ESI, EDI, EBX registers, clear MM1
>  		 */
> -		"\tmovl %1, %%edi\n"
> -		"\tmovl %2, %%esi\n"
> -		"\tmovl %3, %%ebx\n"
> +		"\tpxor %%mm1, %%mm1\n"
> +		"\tmovl %2, %%edi\n"
> +		"\tmovl %3, %%esi\n"
> +		"\tmovl %4, %%ebx\n"
>  		"\tcmpl $0, %0\n"
>  		"\tjnz 2f\n"
>  		"\tjmp 5f\n"
>  
>  		"\t.p2align 4,,15\n"
>  		"1:"
> -		"\tadd %4, %%edi\n"
> -		"\tadd %5, %%esi\n"
> -		"\tadd %6, %%ebx\n"
> +		"\tadd %5, %%edi\n"
> +		"\tadd %6, %%esi\n"
> +		"\tadd %7, %%ebx\n"
>  
>  		"2:"
>  		/*
> @@ -230,13 +241,20 @@ static void MIX_AREAS_16_MMX(unsigned int size,
>  		"\tjnz 1b\n"
>  		"\temms\n"
>                  "5:"
> -		"\tmovl %7, %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
> -
> -		: /* no output regs */
> -		: "m" (size), "m" (dst), "m" (src),
> -		  "m" (sum), "m" (dst_step), "m" (src_step),
> -		  "m" (sum_step), "m" (old_ebx)
> -		: "esi", "edi", "edx", "ecx", "eax"
> +		/* ebx is GOT pointer (-fPIC) */
> +		GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",)
> +
> +		: "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx)
> +		: "m" (dst), "m" (src), "m" (sum),
> +		  "im" (dst_step), "im" (src_step), "im" (sum_step)
> +		: "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) "eax",
> +		  "memory", "cc",
> +#               ifdef HAVE_MMX
> +		  "mm0", "mm1"
> +#               else
> +		  "st", "st(1)", "st(2)", "st(3)",
> +		  "st(4)", "st(5)", "st(6)", "st(7)"
> +#               endif
>  	);
>  }
>  
> @@ -261,13 +279,14 @@ static void MIX_AREAS_32(unsigned int size,
>  	__asm__ __volatile__ (
>  		"\n"
>  
> -		"\tmovl %%ebx, %7\n"	/* ebx is GOT pointer (-fPIC) */
> +		/* ebx is GOT pointer (-fPIC) */
> +		GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",)
>  		/*
>  		 *  initialization, load ESI, EDI, EBX registers
>  		 */
> -		"\tmovl %1, %%edi\n"
> -		"\tmovl %2, %%esi\n"
> -		"\tmovl %3, %%ebx\n"
> +		"\tmovl %2, %%edi\n"
> +		"\tmovl %3, %%esi\n"
> +		"\tmovl %4, %%ebx\n"
>  		"\tcmpl $0, %0\n"
>  		"\tjnz 1f\n"
>  		"\tjmp 6f\n"
> @@ -337,19 +356,20 @@ static void MIX_AREAS_32(unsigned int size,
>  		 */
>  		"\tdecl %0\n"
>  		"\tjz 6f\n"
> -		"\tadd %4, %%edi\n"
> -		"\tadd %5, %%esi\n"
> -		"\tadd %6, %%ebx\n"
> +		"\tadd %5, %%edi\n"
> +		"\tadd %6, %%esi\n"
> +		"\tadd %7, %%ebx\n"
>  		"\tjmp 1b\n"
> -		
> -		"6:"
> -		"\tmovl %7, %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
>  
> -		: /* no output regs */
> -		: "m" (size), "m" (dst), "m" (src),
> -		  "m" (sum), "m" (dst_step), "m" (src_step),
> -		  "m" (sum_step), "m" (old_ebx)
> -		: "esi", "edi", "edx", "ecx", "eax"
> +		"6:"
> +		/* ebx is GOT pointer (-fPIC) */
> +		GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",)
> +
> +		: "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx)
> +		: "m" (dst), "m" (src), "m" (sum),
> +		  "im" (dst_step), "im" (src_step), "im" (sum_step)
> +		: "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) "eax",
> +		  "memory", "cc"
>  	);
>  }
>  
> @@ -374,13 +394,14 @@ static void MIX_AREAS_24(unsigned int size,
>  	__asm__ __volatile__ (
>  		"\n"
>  
> -		"\tmovl %%ebx, %7\n"	/* ebx is GOT pointer (-fPIC) */
> +		/* ebx is GOT pointer (-fPIC) */
> +		GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",)
>  		/*
>  		 *  initialization, load ESI, EDI, EBX registers
>  		 */
> -		"\tmovl %1, %%edi\n"
> -		"\tmovl %2, %%esi\n"
> -		"\tmovl %3, %%ebx\n"
> +		"\tmovl %2, %%edi\n"
> +		"\tmovl %3, %%esi\n"
> +		"\tmovl %4, %%ebx\n"
>  		"\tcmpl $0, %0\n"
>  		"\tjnz 1f\n"
>  		"\tjmp 6f\n"
> @@ -443,19 +464,20 @@ static void MIX_AREAS_24(unsigned int size,
>  		 */
>  		"\tdecl %0\n"
>  		"\tjz 6f\n"
> -		"\tadd %4, %%edi\n"
> -		"\tadd %5, %%esi\n"
> -		"\tadd %6, %%ebx\n"
> +		"\tadd %5, %%edi\n"
> +		"\tadd %6, %%esi\n"
> +		"\tadd %7, %%ebx\n"
>  		"\tjmp 1b\n"
> -		
> -		"6:"
> -		"\tmovl %7, %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
>  
> -		: /* no output regs */
> -		: "m" (size), "m" (dst), "m" (src),
> -		  "m" (sum), "m" (dst_step), "m" (src_step),
> -		  "m" (sum_step), "m" (old_ebx)
> -		: "esi", "edi", "edx", "ecx", "eax"
> +		"6:"
> +		/* ebx is GOT pointer (-fPIC) */
> +		GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",)
> +
> +		: "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx)
> +		: "m" (dst), "m" (src), "m" (sum),
> +		  "im" (dst_step), "im" (src_step), "im" (sum_step)
> +		: "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) "eax",
> +		  "memory", "cc"
>  	);
>  }
>  
> @@ -480,13 +502,14 @@ static void MIX_AREAS_24_CMOV(unsigned int size,
>  	__asm__ __volatile__ (
>  		"\n"
>  
> -		"\tmovl %%ebx, %7\n"	/* ebx is GOT pointer (-fPIC) */
> +		/* ebx is GOT pointer (-fPIC) */
> +		GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",)
>  		/*
>  		 *  initialization, load ESI, EDI, EBX registers
>  		 */
> -		"\tmovl %1, %%edi\n"
> -		"\tmovl %2, %%esi\n"
> -		"\tmovl %3, %%ebx\n"
> +		"\tmovl %2, %%edi\n"
> +		"\tmovl %3, %%esi\n"
> +		"\tmovl %4, %%ebx\n"
>  		"\tcmpl $0, %0\n"
>  		"\tjz 6f\n"
>  
> @@ -541,19 +564,20 @@ static void MIX_AREAS_24_CMOV(unsigned int size,
>  		/*
>  		 * while (size-- > 0)
>  		 */
> -		"\tadd %4, %%edi\n"
> -		"\tadd %5, %%esi\n"
> -		"\tadd %6, %%ebx\n"
> +		"\tadd %5, %%edi\n"
> +		"\tadd %6, %%esi\n"
> +		"\tadd %7, %%ebx\n"
>  		"\tdecl %0\n"
>  		"\tjnz 1b\n"
> -		
> -		"6:"
> -		"\tmovl %7, %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
>  
> -		: /* no output regs */
> -		: "m" (size), "m" (dst), "m" (src),
> -		  "m" (sum), "m" (dst_step), "m" (src_step),
> -		  "m" (sum_step), "m" (old_ebx)
> -		: "esi", "edi", "edx", "ecx", "eax"
> +		"6:"
> +		/* ebx is GOT pointer (-fPIC) */
> +		GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",)
> +
> +		: "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx)
> +		: "m" (dst), "m" (src), "m" (sum),
> +		  "im" (dst_step), "im" (src_step), "im" (sum_step)
> +		: "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) "eax",
> +		  "memory", "cc"
>  	);
>  }
> -- 
> 2.17.1
> 

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH] [inline assembly] fix pcm_dmix_i386.h assembly chunk interfaces
@ 2020-04-27 16:57 frederic.recoules
  2020-04-29  8:19 ` Takashi Iwai
  0 siblings, 1 reply; 10+ messages in thread
From: frederic.recoules @ 2020-04-27 16:57 UTC (permalink / raw)
  To: alsa-devel; +Cc: Frédéric Recoules

From: Frédéric Recoules <frederic.recoules@orange.fr>

Enabler change:
  - set HAVE_MMX in configure if the compiler is aware of MMX technology

Main changes are:
  - move 'size' and 'old_ebx' to the output list since they are clobbered
  - add the "memory" keyword since input pointers are dereferenced
  - add mmx registers in the clobber list and add an initialization for mm1
  - add ebx in clobbers via a set of macro when GCC is newer than 5.0
    (it will work for other compilers or non-PIC mode too)

Minor changes are:
  - keep consistent the token numbering in the template
  - remove the manual save/restore ebx when it is in the clobber list
  - allows 'dst_step', 'src_step' and 'sum_step' to be given by immediates
  - allows 'size' to be given by register (e.g. ebp)
  - add "cc" keyword since the eflag register is clobbered

Signed-off-by: Frédéric Recoules <frederic.recoules@orange.fr>
---
 configure.ac            |   7 ++
 src/pcm/pcm_dmix_i386.h | 174 +++++++++++++++++++++++-----------------
 2 files changed, 106 insertions(+), 75 deletions(-)

diff --git a/configure.ac b/configure.ac
index 4b5ab662..1838e50b 100644
--- a/configure.ac
+++ b/configure.ac
@@ -516,6 +516,13 @@ if test -z "$gcc_have_atomics"; then
 fi
 AC_MSG_RESULT($gcc_have_atomics)
 
+dnl check mmx register for pcm_dmix_i386
+
+AC_TRY_LINK([],
+    [__asm__ volatile ("" : : : "mm0");],
+    [AC_DEFINE([HAVE_MMX], "1", [MMX technology is enabled])],
+    [])
+
 PCM_PLUGIN_LIST="copy linear route mulaw alaw adpcm rate plug multi shm file null empty share meter hooks lfloat ladspa dmix dshare dsnoop asym iec958 softvol extplug ioplug mmap_emul"
 
 build_pcm_plugin="no"
diff --git a/src/pcm/pcm_dmix_i386.h b/src/pcm/pcm_dmix_i386.h
index 2778cb1d..aa1717f6 100644
--- a/src/pcm/pcm_dmix_i386.h
+++ b/src/pcm/pcm_dmix_i386.h
@@ -26,6 +26,13 @@
  *
  */
 
+#define COMMA ,
+#if __GNUC__ < 5 && defined(__PIC__)
+#  define GCC_PIC_SWITCH(before,after) before
+#else
+#  define GCC_PIC_SWITCH(before,after) after
+#endif
+
 /*
  *  for plain i386
  */
@@ -47,13 +54,14 @@ static void MIX_AREAS_16(unsigned int size,
 	__asm__ __volatile__ (
 		"\n"
 
-		"\tmovl %%ebx, %7\n"	/* ebx is GOT pointer (-fPIC) */
+		/* ebx is GOT pointer (-fPIC) */
+		GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",)
 		/*
 		 *  initialization, load ESI, EDI, EBX registers
 		 */
-		"\tmovl %1, %%edi\n"
-		"\tmovl %2, %%esi\n"
-		"\tmovl %3, %%ebx\n"
+		"\tmovl %2, %%edi\n"
+		"\tmovl %3, %%esi\n"
+		"\tmovl %4, %%ebx\n"
 		"\tcmpl $0, %0\n"
 		"\tjnz 2f\n"
 		"\tjmp 7f\n"
@@ -64,9 +72,9 @@ static void MIX_AREAS_16(unsigned int size,
 		 */
 		"\t.p2align 4,,15\n"
 		"1:"
-		"\tadd %4, %%edi\n"
-		"\tadd %5, %%esi\n"
-		"\tadd %6, %%ebx\n"
+		"\tadd %5, %%edi\n"
+		"\tadd %6, %%esi\n"
+		"\tadd %7, %%ebx\n"
 
 		/*
 		 *   sample = *src;
@@ -138,15 +146,16 @@ static void MIX_AREAS_16(unsigned int size,
 		"\tjnz 4b\n"
 		"\tdecl %0\n"
 		"\tjnz 1b\n"
-		
-		"7:"
-		"\tmovl %7, %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
 
-		: /* no output regs */
-		: "m" (size), "m" (dst), "m" (src),
-		  "m" (sum), "m" (dst_step), "m" (src_step),
-		  "m" (sum_step), "m" (old_ebx)
-		: "esi", "edi", "edx", "ecx", "eax"
+		"7:"
+		/* ebx is GOT pointer (-fPIC) */
+		GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",)
+
+		: "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx)
+		: "m" (dst), "m" (src), "m" (sum),
+		  "im" (dst_step), "im" (src_step), "im" (sum_step)
+		: "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) "eax",
+		  "memory", "cc"
 	);
 }
 
@@ -171,22 +180,24 @@ static void MIX_AREAS_16_MMX(unsigned int size,
 	__asm__ __volatile__ (
 		"\n"
 
-		"\tmovl %%ebx, %7\n"	/* ebx is GOT pointer (-fPIC) */
+		/* ebx is GOT pointer (-fPIC) */
+		GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",)
 		/*
-		 *  initialization, load ESI, EDI, EBX registers
+		 *  initialization, load ESI, EDI, EBX registers, clear MM1
 		 */
-		"\tmovl %1, %%edi\n"
-		"\tmovl %2, %%esi\n"
-		"\tmovl %3, %%ebx\n"
+		"\tpxor %%mm1, %%mm1\n"
+		"\tmovl %2, %%edi\n"
+		"\tmovl %3, %%esi\n"
+		"\tmovl %4, %%ebx\n"
 		"\tcmpl $0, %0\n"
 		"\tjnz 2f\n"
 		"\tjmp 5f\n"
 
 		"\t.p2align 4,,15\n"
 		"1:"
-		"\tadd %4, %%edi\n"
-		"\tadd %5, %%esi\n"
-		"\tadd %6, %%ebx\n"
+		"\tadd %5, %%edi\n"
+		"\tadd %6, %%esi\n"
+		"\tadd %7, %%ebx\n"
 
 		"2:"
 		/*
@@ -230,13 +241,20 @@ static void MIX_AREAS_16_MMX(unsigned int size,
 		"\tjnz 1b\n"
 		"\temms\n"
                 "5:"
-		"\tmovl %7, %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
-
-		: /* no output regs */
-		: "m" (size), "m" (dst), "m" (src),
-		  "m" (sum), "m" (dst_step), "m" (src_step),
-		  "m" (sum_step), "m" (old_ebx)
-		: "esi", "edi", "edx", "ecx", "eax"
+		/* ebx is GOT pointer (-fPIC) */
+		GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",)
+
+		: "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx)
+		: "m" (dst), "m" (src), "m" (sum),
+		  "im" (dst_step), "im" (src_step), "im" (sum_step)
+		: "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) "eax",
+		  "memory", "cc",
+#               ifdef HAVE_MMX
+		  "mm0", "mm1"
+#               else
+		  "st", "st(1)", "st(2)", "st(3)",
+		  "st(4)", "st(5)", "st(6)", "st(7)"
+#               endif
 	);
 }
 
@@ -261,13 +279,14 @@ static void MIX_AREAS_32(unsigned int size,
 	__asm__ __volatile__ (
 		"\n"
 
-		"\tmovl %%ebx, %7\n"	/* ebx is GOT pointer (-fPIC) */
+		/* ebx is GOT pointer (-fPIC) */
+		GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",)
 		/*
 		 *  initialization, load ESI, EDI, EBX registers
 		 */
-		"\tmovl %1, %%edi\n"
-		"\tmovl %2, %%esi\n"
-		"\tmovl %3, %%ebx\n"
+		"\tmovl %2, %%edi\n"
+		"\tmovl %3, %%esi\n"
+		"\tmovl %4, %%ebx\n"
 		"\tcmpl $0, %0\n"
 		"\tjnz 1f\n"
 		"\tjmp 6f\n"
@@ -337,19 +356,20 @@ static void MIX_AREAS_32(unsigned int size,
 		 */
 		"\tdecl %0\n"
 		"\tjz 6f\n"
-		"\tadd %4, %%edi\n"
-		"\tadd %5, %%esi\n"
-		"\tadd %6, %%ebx\n"
+		"\tadd %5, %%edi\n"
+		"\tadd %6, %%esi\n"
+		"\tadd %7, %%ebx\n"
 		"\tjmp 1b\n"
-		
-		"6:"
-		"\tmovl %7, %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
 
-		: /* no output regs */
-		: "m" (size), "m" (dst), "m" (src),
-		  "m" (sum), "m" (dst_step), "m" (src_step),
-		  "m" (sum_step), "m" (old_ebx)
-		: "esi", "edi", "edx", "ecx", "eax"
+		"6:"
+		/* ebx is GOT pointer (-fPIC) */
+		GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",)
+
+		: "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx)
+		: "m" (dst), "m" (src), "m" (sum),
+		  "im" (dst_step), "im" (src_step), "im" (sum_step)
+		: "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) "eax",
+		  "memory", "cc"
 	);
 }
 
@@ -374,13 +394,14 @@ static void MIX_AREAS_24(unsigned int size,
 	__asm__ __volatile__ (
 		"\n"
 
-		"\tmovl %%ebx, %7\n"	/* ebx is GOT pointer (-fPIC) */
+		/* ebx is GOT pointer (-fPIC) */
+		GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",)
 		/*
 		 *  initialization, load ESI, EDI, EBX registers
 		 */
-		"\tmovl %1, %%edi\n"
-		"\tmovl %2, %%esi\n"
-		"\tmovl %3, %%ebx\n"
+		"\tmovl %2, %%edi\n"
+		"\tmovl %3, %%esi\n"
+		"\tmovl %4, %%ebx\n"
 		"\tcmpl $0, %0\n"
 		"\tjnz 1f\n"
 		"\tjmp 6f\n"
@@ -443,19 +464,20 @@ static void MIX_AREAS_24(unsigned int size,
 		 */
 		"\tdecl %0\n"
 		"\tjz 6f\n"
-		"\tadd %4, %%edi\n"
-		"\tadd %5, %%esi\n"
-		"\tadd %6, %%ebx\n"
+		"\tadd %5, %%edi\n"
+		"\tadd %6, %%esi\n"
+		"\tadd %7, %%ebx\n"
 		"\tjmp 1b\n"
-		
-		"6:"
-		"\tmovl %7, %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
 
-		: /* no output regs */
-		: "m" (size), "m" (dst), "m" (src),
-		  "m" (sum), "m" (dst_step), "m" (src_step),
-		  "m" (sum_step), "m" (old_ebx)
-		: "esi", "edi", "edx", "ecx", "eax"
+		"6:"
+		/* ebx is GOT pointer (-fPIC) */
+		GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",)
+
+		: "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx)
+		: "m" (dst), "m" (src), "m" (sum),
+		  "im" (dst_step), "im" (src_step), "im" (sum_step)
+		: "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) "eax",
+		  "memory", "cc"
 	);
 }
 
@@ -480,13 +502,14 @@ static void MIX_AREAS_24_CMOV(unsigned int size,
 	__asm__ __volatile__ (
 		"\n"
 
-		"\tmovl %%ebx, %7\n"	/* ebx is GOT pointer (-fPIC) */
+		/* ebx is GOT pointer (-fPIC) */
+		GCC_PIC_SWITCH("\tmovl %%ebx, %1\n",)
 		/*
 		 *  initialization, load ESI, EDI, EBX registers
 		 */
-		"\tmovl %1, %%edi\n"
-		"\tmovl %2, %%esi\n"
-		"\tmovl %3, %%ebx\n"
+		"\tmovl %2, %%edi\n"
+		"\tmovl %3, %%esi\n"
+		"\tmovl %4, %%ebx\n"
 		"\tcmpl $0, %0\n"
 		"\tjz 6f\n"
 
@@ -541,19 +564,20 @@ static void MIX_AREAS_24_CMOV(unsigned int size,
 		/*
 		 * while (size-- > 0)
 		 */
-		"\tadd %4, %%edi\n"
-		"\tadd %5, %%esi\n"
-		"\tadd %6, %%ebx\n"
+		"\tadd %5, %%edi\n"
+		"\tadd %6, %%esi\n"
+		"\tadd %7, %%ebx\n"
 		"\tdecl %0\n"
 		"\tjnz 1b\n"
-		
-		"6:"
-		"\tmovl %7, %%ebx\n"	/* ebx is GOT pointer (-fPIC) */
 
-		: /* no output regs */
-		: "m" (size), "m" (dst), "m" (src),
-		  "m" (sum), "m" (dst_step), "m" (src_step),
-		  "m" (sum_step), "m" (old_ebx)
-		: "esi", "edi", "edx", "ecx", "eax"
+		"6:"
+		/* ebx is GOT pointer (-fPIC) */
+		GCC_PIC_SWITCH("\tmovl %1, %%ebx\n",)
+
+		: "+&rm" (size), GCC_PIC_SWITCH("=m","=X") (old_ebx)
+		: "m" (dst), "m" (src), "m" (sum),
+		  "im" (dst_step), "im" (src_step), "im" (sum_step)
+		: "esi", "edi", "edx", "ecx", GCC_PIC_SWITCH(,"ebx"COMMA) "eax",
+		  "memory", "cc"
 	);
 }
-- 
2.17.1


^ permalink raw reply related	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2020-05-06 18:08 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-27  7:36 [PATCH] [inline assembly] fix pcm_dmix_i386.h assembly chunk interfaces frederic.recoules
2020-04-27 13:47 ` Takashi Iwai
     [not found]   ` <1871992915.6898305.1587998132827.JavaMail.zimbra@univ-grenoble-alpes.fr>
2020-04-27 15:12     ` Takashi Iwai
2020-04-27 16:00       ` FRÉDÉRIC RECOULES
2020-04-27 16:57 frederic.recoules
2020-04-29  8:19 ` Takashi Iwai
2020-04-30  9:41   ` FRÉDÉRIC RECOULES
2020-05-04 19:25     ` FRÉDÉRIC RECOULES
2020-05-05 13:52       ` Takashi Iwai
2020-05-06 18:07         ` FRÉDÉRIC RECOULES

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).