[PATCH v2 kvm-unit-tests 0/2] fix long division routines for ARM eabi

kvm.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

* [PATCH v2 kvm-unit-tests 0/2] fix long division routines for ARM eabi
@ 2021-05-12 10:54 Paolo Bonzini
  2021-05-12 10:54 ` [PATCH v2 kvm-unit-tests 1/2] libcflat: clean up and complete long division routines Paolo Bonzini
                   ` (2 more replies)
  0 siblings, 3 replies; 8+ messages in thread
From: Paolo Bonzini @ 2021-05-12 10:54 UTC (permalink / raw)
  To: kvm; +Cc: Alexandru Elisei

As reported by Alexandru, ARM follows a different convention than
x86 so it needs __aeabi_ldivmod and __aeabi_uldivmod.  Because
it does not use __divdi3 and __moddi3, it also needs __divmoddi4
to build the eabi function upon.

Paolo

v1->v2: fix __divmoddi4, make sure -DTEST covers it

Paolo Bonzini (2):
  libcflat: clean up and complete long division routines
  arm: add eabi version of 64-bit division functions

 arm/Makefile.arm  |  1 +
 lib/arm/ldivmod.S | 32 ++++++++++++++++++++++++++++++++
 lib/ldiv32.c      | 40 ++++++++++++++++++++++++----------------
 3 files changed, 57 insertions(+), 16 deletions(-)
 create mode 100644 lib/arm/ldivmod.S

-- 
2.31.1


^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH v2 kvm-unit-tests 1/2] libcflat: clean up and complete long division routines
  2021-05-12 10:54 [PATCH v2 kvm-unit-tests 0/2] fix long division routines for ARM eabi Paolo Bonzini
@ 2021-05-12 10:54 ` Paolo Bonzini
  2021-05-12 13:44   ` Alexandru Elisei
  2021-05-12 10:54 ` [PATCH v2 kvm-unit-tests 2/2] arm: add eabi version of 64-bit division functions Paolo Bonzini
  2021-05-12 14:04 ` [PATCH v2 kvm-unit-tests 0/2] fix long division routines for ARM eabi Alexandru Elisei
  2 siblings, 1 reply; 8+ messages in thread
From: Paolo Bonzini @ 2021-05-12 10:54 UTC (permalink / raw)
  To: kvm; +Cc: Alexandru Elisei

Avoid possible uninitialized variables on machines where
division by zero does not trap.  Add __divmoddi4, and
use it in __moddi3 and __divdi3.

Reported-by: Alexandru Elisei <alexandru.elisei@arm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 lib/ldiv32.c | 40 ++++++++++++++++++++++++----------------
 1 file changed, 24 insertions(+), 16 deletions(-)

diff --git a/lib/ldiv32.c b/lib/ldiv32.c
index 96f4b35..897a4b9 100644
--- a/lib/ldiv32.c
+++ b/lib/ldiv32.c
@@ -1,6 +1,7 @@
 #include <stdint.h>
 
 extern uint64_t __udivmoddi4(uint64_t num, uint64_t den, uint64_t *p_rem);
+extern int64_t __divmoddi4(int64_t num, int64_t den, int64_t *p_rem);
 extern int64_t __moddi3(int64_t num, int64_t den);
 extern int64_t __divdi3(int64_t num, int64_t den);
 extern uint64_t __udivdi3(uint64_t num, uint64_t den);
@@ -11,8 +12,11 @@ uint64_t __udivmoddi4(uint64_t num, uint64_t den, uint64_t *p_rem)
 	uint64_t quot = 0;
 
 	/* Trigger a division by zero at run time (trick taken from iPXE).  */
-	if (den == 0)
+	if (den == 0) {
+		if (p_rem)
+			*p_rem = 0;
 		return 1/((unsigned)den);
+	}
 
 	if (num >= den) {
 		/* Align den to num to avoid wasting time on leftmost zero bits.  */
@@ -35,31 +39,35 @@ uint64_t __udivmoddi4(uint64_t num, uint64_t den, uint64_t *p_rem)
 	return quot;
 }
 
-int64_t __moddi3(int64_t num, int64_t den)
+int64_t __divmoddi4(int64_t num, int64_t den, int64_t *p_rem)
 {
-	uint64_t mask = num < 0 ? -1 : 0;
+	int32_t nmask = num < 0 ? -1 : 0;
+	int32_t qmask = (num ^ den) < 0 ? -1 : 0;
+	uint64_t quot;
 
 	/* Compute absolute values and do an unsigned division.  */
-	num = (num + mask) ^ mask;
+	num = (num + nmask) ^ nmask;
 	if (den < 0)
 		den = -den;
 
-	/* Copy sign of num into result.  */
-	return (__umoddi3(num, den) + mask) ^ mask;
+	/* Copy sign of num^den into quotient, sign of num into remainder.  */
+	quot = (__udivmoddi4(num, den, (uint64_t *)p_rem) + qmask) ^ qmask;
+	if (p_rem)
+		*p_rem = (*p_rem + nmask) ^ nmask;
+	return quot;
 }
 
-int64_t __divdi3(int64_t num, int64_t den)
+int64_t __moddi3(int64_t num, int64_t den)
 {
-	uint64_t mask = (num ^ den) < 0 ? -1 : 0;
-
-	/* Compute absolute values and do an unsigned division.  */
-	if (num < 0)
-		num = -num;
-	if (den < 0)
-		den = -den;
+	int64_t rem;
+	__divmoddi4(num, den, &rem);
+	return rem;
+}
 
-	/* Copy sign of num^den into result.  */
-	return (__udivdi3(num, den) + mask) ^ mask;
+int64_t __divdi3(int64_t num, int64_t den)
+{
+	int64_t rem;
+	return __divmoddi4(num, den, &rem);
 }
 
 uint64_t __udivdi3(uint64_t num, uint64_t den)
-- 
2.31.1



^ permalink raw reply related	[flat|nested] 8+ messages in thread

* [PATCH v2 kvm-unit-tests 2/2] arm: add eabi version of 64-bit division functions
  2021-05-12 10:54 [PATCH v2 kvm-unit-tests 0/2] fix long division routines for ARM eabi Paolo Bonzini
  2021-05-12 10:54 ` [PATCH v2 kvm-unit-tests 1/2] libcflat: clean up and complete long division routines Paolo Bonzini
@ 2021-05-12 10:54 ` Paolo Bonzini
  2021-05-12 13:44   ` Alexandru Elisei
  2021-05-12 14:04 ` [PATCH v2 kvm-unit-tests 0/2] fix long division routines for ARM eabi Alexandru Elisei
  2 siblings, 1 reply; 8+ messages in thread
From: Paolo Bonzini @ 2021-05-12 10:54 UTC (permalink / raw)
  To: kvm

eabi prescribes different entry points for 64-bit division on
32-bit platforms.  Implement a wrapper for the GCC-style __divmoddi4
and __udivmoddi4 functions.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arm/Makefile.arm  |  1 +
 lib/arm/ldivmod.S | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+)
 create mode 100644 lib/arm/ldivmod.S

diff --git a/arm/Makefile.arm b/arm/Makefile.arm
index 687a8ed..3a4cc6b 100644
--- a/arm/Makefile.arm
+++ b/arm/Makefile.arm
@@ -24,6 +24,7 @@ cflatobjs += lib/arm/spinlock.o
 cflatobjs += lib/arm/processor.o
 cflatobjs += lib/arm/stack.o
 cflatobjs += lib/ldiv32.o
+cflatobjs += lib/arm/ldivmod.o
 
 # arm specific tests
 tests =
diff --git a/lib/arm/ldivmod.S b/lib/arm/ldivmod.S
new file mode 100644
index 0000000..de11ac9
--- /dev/null
+++ b/lib/arm/ldivmod.S
@@ -0,0 +1,32 @@
+// EABI ldivmod and uldivmod implementation based on libcompiler-rt
+//
+// This file is dual licensed under the MIT and the University of Illinois Open
+// Source Licenses.
+
+	.syntax unified
+	.align 2
+	.globl __aeabi_uldivmod
+	.type __aeabi_uldivmod, %function
+__aeabi_uldivmod:
+	push	{r11, lr}
+	sub	sp, sp, #16
+	add	r12, sp, #8
+	str	r12, [sp]                // third argument to __udivmoddi4
+	bl	__udivmoddi4
+	ldr	r2, [sp, #8]             // remainder returned in r2-r3
+	ldr	r3, [sp, #12]
+	add	sp, sp, #16
+	pop	{r11, pc}
+
+	.globl __aeabi_ldivmod
+	.type __aeabi_ldivmod, %function
+__aeabi_ldivmod:
+	push	{r11, lr}
+	sub	sp, sp, #16
+	add	r12, sp, #8
+	str	r12, [sp]                // third argument to __divmoddi4
+	bl	__divmoddi4
+	ldr	r2, [sp, #8]             // remainder returned in r2-r3
+	ldr	r3, [sp, #12]
+	add	sp, sp, #16
+	pop	{r11, pc}
-- 
2.31.1


^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 kvm-unit-tests 2/2] arm: add eabi version of 64-bit division functions
  2021-05-12 10:54 ` [PATCH v2 kvm-unit-tests 2/2] arm: add eabi version of 64-bit division functions Paolo Bonzini
@ 2021-05-12 13:44   ` Alexandru Elisei
  2021-05-12 13:51     ` Paolo Bonzini
  0 siblings, 1 reply; 8+ messages in thread
From: Alexandru Elisei @ 2021-05-12 13:44 UTC (permalink / raw)
  To: Paolo Bonzini, kvm

Hi Paolo,

On 5/12/21 11:54 AM, Paolo Bonzini wrote:
> eabi prescribes different entry points for 64-bit division on
> 32-bit platforms.  Implement a wrapper for the GCC-style __divmoddi4
> and __udivmoddi4 functions.
>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  arm/Makefile.arm  |  1 +
>  lib/arm/ldivmod.S | 32 ++++++++++++++++++++++++++++++++
>  2 files changed, 33 insertions(+)
>  create mode 100644 lib/arm/ldivmod.S
>
> diff --git a/arm/Makefile.arm b/arm/Makefile.arm
> index 687a8ed..3a4cc6b 100644
> --- a/arm/Makefile.arm
> +++ b/arm/Makefile.arm
> @@ -24,6 +24,7 @@ cflatobjs += lib/arm/spinlock.o
>  cflatobjs += lib/arm/processor.o
>  cflatobjs += lib/arm/stack.o
>  cflatobjs += lib/ldiv32.o
> +cflatobjs += lib/arm/ldivmod.o
>  
>  # arm specific tests
>  tests =
> diff --git a/lib/arm/ldivmod.S b/lib/arm/ldivmod.S
> new file mode 100644
> index 0000000..de11ac9
> --- /dev/null
> +++ b/lib/arm/ldivmod.S
> @@ -0,0 +1,32 @@
> +// EABI ldivmod and uldivmod implementation based on libcompiler-rt
> +//
> +// This file is dual licensed under the MIT and the University of Illinois Open
> +// Source Licenses.

At first I was confused about the prototype for these functions, but I suppose
they are the functions defined by [1] and [2], and they take a two int64
arguments, the numerator and the denominator.

[1]
https://android.googlesource.com/toolchain/compiler-rt/+/refs/heads/master/lib/builtins/arm/aeabi_uldivmod.S
[2]
https://android.googlesource.com/toolchain/compiler-rt/+/refs/heads/master/lib/builtins/arm/aeabi_ldivmod.S
> +
> +	.syntax unified
> +	.align 2
> +	.globl __aeabi_uldivmod
> +	.type __aeabi_uldivmod, %function
> +__aeabi_uldivmod:
> +	push	{r11, lr}
> +	sub	sp, sp, #16
> +	add	r12, sp, #8
> +	str	r12, [sp]                // third argument to __udivmoddi4

The way we call __udivmoddi4 looks correct to me. We make room on the stack to
store the remainder, and push that address at the top of the stack so it can be
used by the function as the third argument

> +	bl	__udivmoddi4
> +	ldr	r2, [sp, #8]             // remainder returned in r2-r3
> +	ldr	r3, [sp, #12]
> +	add	sp, sp, #16
> +	pop	{r11, pc}

I'm not sure what is going on here. Is the function returning 2 64bit arguments as
an 128bit vector? Or is the function being called from assembly and this is a
convention between it and the caller? I did a grep in the compiler-rt repo for
__aeabi_uldivmod and couldn't find any uses.

Other than my confusion about the return value, both functions match the
compiler-rt definitions.

Thanks,

Alex

> +
> +	.globl __aeabi_ldivmod
> +	.type __aeabi_ldivmod, %function
> +__aeabi_ldivmod:
> +	push	{r11, lr}
> +	sub	sp, sp, #16
> +	add	r12, sp, #8
> +	str	r12, [sp]                // third argument to __divmoddi4
> +	bl	__divmoddi4
> +	ldr	r2, [sp, #8]             // remainder returned in r2-r3
> +	ldr	r3, [sp, #12]
> +	add	sp, sp, #16
> +	pop	{r11, pc}

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 kvm-unit-tests 1/2] libcflat: clean up and complete long division routines
  2021-05-12 10:54 ` [PATCH v2 kvm-unit-tests 1/2] libcflat: clean up and complete long division routines Paolo Bonzini
@ 2021-05-12 13:44   ` Alexandru Elisei
  0 siblings, 0 replies; 8+ messages in thread
From: Alexandru Elisei @ 2021-05-12 13:44 UTC (permalink / raw)
  To: Paolo Bonzini, kvm

Hi Paolo,

On 5/12/21 11:54 AM, Paolo Bonzini wrote:
> Avoid possible uninitialized variables on machines where
> division by zero does not trap.  Add __divmoddi4, and
> use it in __moddi3 and __divdi3.

Looks good now, I like the change to __moddi3 and __divdi3 as that means that the
tests will cover __divmoddi4, and the functions are now similar to their unsigned
counterparts:

Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com>

Thanks,

Alex

>
> Reported-by: Alexandru Elisei <alexandru.elisei@arm.com>
> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
> ---
>  lib/ldiv32.c | 40 ++++++++++++++++++++++++----------------
>  1 file changed, 24 insertions(+), 16 deletions(-)
>
> diff --git a/lib/ldiv32.c b/lib/ldiv32.c
> index 96f4b35..897a4b9 100644
> --- a/lib/ldiv32.c
> +++ b/lib/ldiv32.c
> @@ -1,6 +1,7 @@
>  #include <stdint.h>
>  
>  extern uint64_t __udivmoddi4(uint64_t num, uint64_t den, uint64_t *p_rem);
> +extern int64_t __divmoddi4(int64_t num, int64_t den, int64_t *p_rem);
>  extern int64_t __moddi3(int64_t num, int64_t den);
>  extern int64_t __divdi3(int64_t num, int64_t den);
>  extern uint64_t __udivdi3(uint64_t num, uint64_t den);
> @@ -11,8 +12,11 @@ uint64_t __udivmoddi4(uint64_t num, uint64_t den, uint64_t *p_rem)
>  	uint64_t quot = 0;
>  
>  	/* Trigger a division by zero at run time (trick taken from iPXE).  */
> -	if (den == 0)
> +	if (den == 0) {
> +		if (p_rem)
> +			*p_rem = 0;
>  		return 1/((unsigned)den);
> +	}
>  
>  	if (num >= den) {
>  		/* Align den to num to avoid wasting time on leftmost zero bits.  */
> @@ -35,31 +39,35 @@ uint64_t __udivmoddi4(uint64_t num, uint64_t den, uint64_t *p_rem)
>  	return quot;
>  }
>  
> -int64_t __moddi3(int64_t num, int64_t den)
> +int64_t __divmoddi4(int64_t num, int64_t den, int64_t *p_rem)
>  {
> -	uint64_t mask = num < 0 ? -1 : 0;
> +	int32_t nmask = num < 0 ? -1 : 0;
> +	int32_t qmask = (num ^ den) < 0 ? -1 : 0;
> +	uint64_t quot;
>  
>  	/* Compute absolute values and do an unsigned division.  */
> -	num = (num + mask) ^ mask;
> +	num = (num + nmask) ^ nmask;
>  	if (den < 0)
>  		den = -den;
>  
> -	/* Copy sign of num into result.  */
> -	return (__umoddi3(num, den) + mask) ^ mask;
> +	/* Copy sign of num^den into quotient, sign of num into remainder.  */
> +	quot = (__udivmoddi4(num, den, (uint64_t *)p_rem) + qmask) ^ qmask;
> +	if (p_rem)
> +		*p_rem = (*p_rem + nmask) ^ nmask;
> +	return quot;
>  }
>  
> -int64_t __divdi3(int64_t num, int64_t den)
> +int64_t __moddi3(int64_t num, int64_t den)
>  {
> -	uint64_t mask = (num ^ den) < 0 ? -1 : 0;
> -
> -	/* Compute absolute values and do an unsigned division.  */
> -	if (num < 0)
> -		num = -num;
> -	if (den < 0)
> -		den = -den;
> +	int64_t rem;
> +	__divmoddi4(num, den, &rem);
> +	return rem;
> +}
>  
> -	/* Copy sign of num^den into result.  */
> -	return (__udivdi3(num, den) + mask) ^ mask;
> +int64_t __divdi3(int64_t num, int64_t den)
> +{
> +	int64_t rem;
> +	return __divmoddi4(num, den, &rem);
>  }
>  
>  uint64_t __udivdi3(uint64_t num, uint64_t den)

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 kvm-unit-tests 2/2] arm: add eabi version of 64-bit division functions
  2021-05-12 13:44   ` Alexandru Elisei
@ 2021-05-12 13:51     ` Paolo Bonzini
  2021-05-12 14:04       ` Alexandru Elisei
  0 siblings, 1 reply; 8+ messages in thread
From: Paolo Bonzini @ 2021-05-12 13:51 UTC (permalink / raw)
  To: Alexandru Elisei, kvm

On 12/05/21 15:44, Alexandru Elisei wrote:
>> +	bl	__udivmoddi4
>> +	ldr	r2, [sp, #8]             // remainder returned in r2-r3
>> +	ldr	r3, [sp, #12]
>> +	add	sp, sp, #16
>> +	pop	{r11, pc}
> 
> I'm not sure what is going on here. Is the function returning 2 64bit arguments as
> an 128bit vector? Or is the function being called from assembly and this is a
> convention between it and the caller?

It's an eABI convention that spans the runtime and the compiler.

https://developer.arm.com/documentation/ihi0043/e/?lang=en#standardized-compiler-helper-functions 
says it returns a "pair of (unsigned) long longs is returned in {{r0, 
r1}, {r2, r3}}, the quotient in {r0, r1}, and the remainder in {r2, r3}."

Paolo


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 kvm-unit-tests 2/2] arm: add eabi version of 64-bit division functions
  2021-05-12 13:51     ` Paolo Bonzini
@ 2021-05-12 14:04       ` Alexandru Elisei
  0 siblings, 0 replies; 8+ messages in thread
From: Alexandru Elisei @ 2021-05-12 14:04 UTC (permalink / raw)
  To: Paolo Bonzini, kvm

Hi Paolo,

On 5/12/21 2:51 PM, Paolo Bonzini wrote:
> On 12/05/21 15:44, Alexandru Elisei wrote:
>>> +    bl    __udivmoddi4
>>> +    ldr    r2, [sp, #8]             // remainder returned in r2-r3
>>> +    ldr    r3, [sp, #12]
>>> +    add    sp, sp, #16
>>> +    pop    {r11, pc}
>>
>> I'm not sure what is going on here. Is the function returning 2 64bit arguments as
>> an 128bit vector? Or is the function being called from assembly and this is a
>> convention between it and the caller?
>
> It's an eABI convention that spans the runtime and the compiler.
>
> https://developer.arm.com/documentation/ihi0043/e/?lang=en#standardized-compiler-helper-functions
> says it returns a "pair of (unsigned) long longs is returned in {{r0, r1}, {r2,
> r3}}, the quotient in {r0, r1}, and the remainder in {r2, r3}."

Thanks for the link, the functions are indeed returning the quotient in {r0, r1}
and remainder in {r2, r3} according to the convention:

Reviewed-by: Alexandru Elisei <alexandru.elisei@arm.com>

Thanks,

Alex


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v2 kvm-unit-tests 0/2] fix long division routines for ARM eabi
  2021-05-12 10:54 [PATCH v2 kvm-unit-tests 0/2] fix long division routines for ARM eabi Paolo Bonzini
  2021-05-12 10:54 ` [PATCH v2 kvm-unit-tests 1/2] libcflat: clean up and complete long division routines Paolo Bonzini
  2021-05-12 10:54 ` [PATCH v2 kvm-unit-tests 2/2] arm: add eabi version of 64-bit division functions Paolo Bonzini
@ 2021-05-12 14:04 ` Alexandru Elisei
  2 siblings, 0 replies; 8+ messages in thread
From: Alexandru Elisei @ 2021-05-12 14:04 UTC (permalink / raw)
  To: Paolo Bonzini, kvm

Hello,

On 5/12/21 11:54 AM, Paolo Bonzini wrote:

> As reported by Alexandru, ARM follows a different convention than
> x86 so it needs __aeabi_ldivmod and __aeabi_uldivmod.  Because
> it does not use __divdi3 and __moddi3, it also needs __divmoddi4
> to build the eabi function upon.
>
> Paolo
>
> v1->v2: fix __divmoddi4, make sure -DTEST covers it
>
> Paolo Bonzini (2):
>   libcflat: clean up and complete long division routines
>   arm: add eabi version of 64-bit division functions
>
>  arm/Makefile.arm  |  1 +
>  lib/arm/ldivmod.S | 32 ++++++++++++++++++++++++++++++++
>  lib/ldiv32.c      | 40 ++++++++++++++++++++++++----------------
>  3 files changed, 57 insertions(+), 16 deletions(-)
>  create mode 100644 lib/arm/ldivmod.S
>
I ran the arm (compiled with arm-none-eabi-gcc and arm-linux-gnu-gcc) and arm64
tests under QEMU TCG, and everything worked as expected.

I ran the arm (compiled with both toolchains) and arm64 tests under qemu and
kvmtool on a Rockpro64 dev board, again I didn't encounter any issues. So for the
entire series:

Tested-by: Alexandru Elisei <alexandru.elisei@arm.com>

Thanks,

Alex


^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2021-05-12 14:03 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-12 10:54 [PATCH v2 kvm-unit-tests 0/2] fix long division routines for ARM eabi Paolo Bonzini
2021-05-12 10:54 ` [PATCH v2 kvm-unit-tests 1/2] libcflat: clean up and complete long division routines Paolo Bonzini
2021-05-12 13:44   ` Alexandru Elisei
2021-05-12 10:54 ` [PATCH v2 kvm-unit-tests 2/2] arm: add eabi version of 64-bit division functions Paolo Bonzini
2021-05-12 13:44   ` Alexandru Elisei
2021-05-12 13:51     ` Paolo Bonzini
2021-05-12 14:04       ` Alexandru Elisei
2021-05-12 14:04 ` [PATCH v2 kvm-unit-tests 0/2] fix long division routines for ARM eabi Alexandru Elisei

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).