* [Qemu-devel] [PATCH v3 1/3] tcg/aarch64: Introduce and use long branch to register
2017-06-30 0:40 [Qemu-devel] [PATCH v3 0/3] Relax code buffer size limitation on aarch64 hosts Pranith Kumar
@ 2017-06-30 0:40 ` Pranith Kumar
2017-06-30 0:40 ` [Qemu-devel] [PATCH v3 2/3] tcg/aarch64: Use ADRP+ADD to compute target address Pranith Kumar
2017-06-30 0:40 ` [Qemu-devel] [PATCH v3 3/3] tcg/aarch64: Enable indirect jump path using LDR (literal) Pranith Kumar
2 siblings, 0 replies; 8+ messages in thread
From: Pranith Kumar @ 2017-06-30 0:40 UTC (permalink / raw)
To: alex.bennee; +Cc: qemu-devel, rth
We can use a branch to register instruction for exit_tb for offsets
greater than 128MB.
CC: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
---
tcg/aarch64/tcg-target.inc.c | 15 +++++++++++++--
1 file changed, 13 insertions(+), 2 deletions(-)
diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
index 1fa3bccc89..8fce11ace7 100644
--- a/tcg/aarch64/tcg-target.inc.c
+++ b/tcg/aarch64/tcg-target.inc.c
@@ -819,6 +819,17 @@ static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
tcg_out_insn(s, 3206, B, offset);
}
+static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target)
+{
+ ptrdiff_t offset = target - s->code_ptr;
+ if (offset == sextract64(offset, 0, 26)) {
+ tcg_out_insn(s, 3206, BL, offset);
+ } else {
+ tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP, (intptr_t)target);
+ tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
+ }
+}
+
static inline void tcg_out_goto_noaddr(TCGContext *s)
{
/* We pay attention here to not modify the branch target by reading from
@@ -1364,10 +1375,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
case INDEX_op_exit_tb:
/* Reuse the zeroing that exists for goto_ptr. */
if (a0 == 0) {
- tcg_out_goto(s, s->code_gen_epilogue);
+ tcg_out_goto_long(s, s->code_gen_epilogue);
} else {
tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_X0, a0);
- tcg_out_goto(s, tb_ret_addr);
+ tcg_out_goto_long(s, tb_ret_addr);
}
break;
--
2.13.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [Qemu-devel] [PATCH v3 2/3] tcg/aarch64: Use ADRP+ADD to compute target address
2017-06-30 0:40 [Qemu-devel] [PATCH v3 0/3] Relax code buffer size limitation on aarch64 hosts Pranith Kumar
2017-06-30 0:40 ` [Qemu-devel] [PATCH v3 1/3] tcg/aarch64: Introduce and use long branch to register Pranith Kumar
@ 2017-06-30 0:40 ` Pranith Kumar
2017-06-30 4:47 ` Richard Henderson
2017-06-30 0:40 ` [Qemu-devel] [PATCH v3 3/3] tcg/aarch64: Enable indirect jump path using LDR (literal) Pranith Kumar
2 siblings, 1 reply; 8+ messages in thread
From: Pranith Kumar @ 2017-06-30 0:40 UTC (permalink / raw)
To: alex.bennee; +Cc: qemu-devel, rth
We use ADRP+ADD to compute the target address for goto_tb. This patch
introduces the NOP instruction which is used to align the above
instruction pair so that we can use one atomic instruction to patch
the destination offsets.
CC: Richard Henderson <rth@twiddle.net>
CC: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
---
accel/tcg/translate-all.c | 2 +-
tcg/aarch64/tcg-target.inc.c | 34 +++++++++++++++++++++++++++++-----
2 files changed, 30 insertions(+), 6 deletions(-)
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index f6ad46b613..65a92dbf67 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -522,7 +522,7 @@ static inline PageDesc *page_find(tb_page_addr_t index)
#elif defined(__powerpc__)
# define MAX_CODE_GEN_BUFFER_SIZE (32u * 1024 * 1024)
#elif defined(__aarch64__)
-# define MAX_CODE_GEN_BUFFER_SIZE (128ul * 1024 * 1024)
+# define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
#elif defined(__s390x__)
/* We have a +- 4GB range on the branches; leave some slop. */
# define MAX_CODE_GEN_BUFFER_SIZE (3ul * 1024 * 1024 * 1024)
diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
index 8fce11ace7..f059d9d781 100644
--- a/tcg/aarch64/tcg-target.inc.c
+++ b/tcg/aarch64/tcg-target.inc.c
@@ -372,6 +372,7 @@ typedef enum {
I3510_EON = 0x4a200000,
I3510_ANDS = 0x6a000000,
+ NOP = 0xd503201f,
/* System instructions. */
DMB_ISH = 0xd50338bf,
DMB_LD = 0x00000100,
@@ -866,10 +867,26 @@ static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
{
tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr;
- tcg_insn_unit *target = (tcg_insn_unit *)addr;
+ tcg_insn_unit i1, i2;
+ uint64_t pair;
- reloc_pc26_atomic(code_ptr, target);
- flush_icache_range(jmp_addr, jmp_addr + 4);
+ ptrdiff_t offset = addr - jmp_addr;
+
+ if (offset == sextract64(offset, 0, 26)) {
+ i1 = NOP;
+ i2 = I3206_B | ((offset >> 2) & 0x3ffffff);
+ } else {
+ offset = (addr >> 12) - (jmp_addr >> 12);
+
+ /* patch ADRP */
+ i2 = deposit32(*code_ptr++, 29, 2, offset & 0x3);
+ i2 = deposit32(i2, 5, 19, offset >> 2);
+ /* patch ADDI */
+ i1 = deposit32(*code_ptr, 10, 12, addr & 0xfff);
+ }
+ pair = (uint64_t)i1 << 32 | i2;
+ atomic_set((uint64_t *)jmp_addr, pair);
+ flush_icache_range(jmp_addr, jmp_addr + 8);
}
static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
@@ -1388,10 +1405,17 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
#endif
/* consistency for USE_DIRECT_JUMP */
tcg_debug_assert(s->tb_jmp_insn_offset != NULL);
+ /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
+ write can be used to patch the target address. */
+ if ((uintptr_t)s->code_ptr & 7) {
+ tcg_out32(s, NOP);
+ }
s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
/* actual branch destination will be patched by
- aarch64_tb_set_jmp_target later, beware retranslation. */
- tcg_out_goto_noaddr(s);
+ aarch64_tb_set_jmp_target later. */
+ tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
+ tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
+ tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
break;
--
2.13.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [Qemu-devel] [PATCH v3 2/3] tcg/aarch64: Use ADRP+ADD to compute target address
2017-06-30 0:40 ` [Qemu-devel] [PATCH v3 2/3] tcg/aarch64: Use ADRP+ADD to compute target address Pranith Kumar
@ 2017-06-30 4:47 ` Richard Henderson
2017-06-30 13:52 ` Pranith Kumar
0 siblings, 1 reply; 8+ messages in thread
From: Richard Henderson @ 2017-06-30 4:47 UTC (permalink / raw)
To: Pranith Kumar, alex.bennee; +Cc: qemu-devel
On 06/29/2017 05:40 PM, Pranith Kumar wrote:
> void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
> {
> tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr;
> - tcg_insn_unit *target = (tcg_insn_unit *)addr;
> + tcg_insn_unit i1, i2;
> + uint64_t pair;
>
> + ptrdiff_t offset = addr - jmp_addr;
> +
> + if (offset == sextract64(offset, 0, 26)) {
> + i1 = NOP;
> + i2 = I3206_B | ((offset >> 2) & 0x3ffffff);
Branch first, since that's the offset you calculated.
Also, the nop need not be executed.
> + } else {
> + offset = (addr >> 12) - (jmp_addr >> 12);
> +
> + /* patch ADRP */
> + i2 = deposit32(*code_ptr++, 29, 2, offset & 0x3);
> + i2 = deposit32(i2, 5, 19, offset >> 2);
> + /* patch ADDI */
> + i1 = deposit32(*code_ptr, 10, 12, addr & 0xfff);
You can't just patch these insns, because they aren't necessarily ADRP+ADD.
Indeed, they will very likely be B and NOP. The first address we patch in is
tb_jmp_reset_offset, which is the following opcode, which is definitely in
range of the branch above.
r~
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [Qemu-devel] [PATCH v3 2/3] tcg/aarch64: Use ADRP+ADD to compute target address
2017-06-30 4:47 ` Richard Henderson
@ 2017-06-30 13:52 ` Pranith Kumar
0 siblings, 0 replies; 8+ messages in thread
From: Pranith Kumar @ 2017-06-30 13:52 UTC (permalink / raw)
To: Richard Henderson; +Cc: Alex Bennée, qemu-devel
On Fri, Jun 30, 2017 at 12:47 AM, Richard Henderson <rth@twiddle.net> wrote:
> On 06/29/2017 05:40 PM, Pranith Kumar wrote:
>>
>> void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
>> {
>> tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr;
>> - tcg_insn_unit *target = (tcg_insn_unit *)addr;
>> + tcg_insn_unit i1, i2;
>> + uint64_t pair;
>> + ptrdiff_t offset = addr - jmp_addr;
>> +
>> + if (offset == sextract64(offset, 0, 26)) {
>> + i1 = NOP;
>> + i2 = I3206_B | ((offset >> 2) & 0x3ffffff);
>
>
> Branch first, since that's the offset you calculated.
> Also, the nop need not be executed.
This is exactly how I form the instruction pair below (B+NOP, not
NOP+B). But I get your point. It is confusing to use i1 for the second
instruction. I'll change it.
>
>> + } else {
>> + offset = (addr >> 12) - (jmp_addr >> 12);
>> +
>> + /* patch ADRP */
>> + i2 = deposit32(*code_ptr++, 29, 2, offset & 0x3);
>> + i2 = deposit32(i2, 5, 19, offset >> 2);
>> + /* patch ADDI */
>> + i1 = deposit32(*code_ptr, 10, 12, addr & 0xfff);
>
>
> You can't just patch these insns, because they aren't necessarily ADRP+ADD.
> Indeed, they will very likely be B and NOP. The first address we patch in
> is tb_jmp_reset_offset, which is the following opcode, which is definitely
> in range of the branch above.
Whoops, I totally missed that we patch these out the first time out. I
will explicitly generate the ADRP+ADD pair from here.
Thanks,
--
Pranith
^ permalink raw reply [flat|nested] 8+ messages in thread
* [Qemu-devel] [PATCH v3 3/3] tcg/aarch64: Enable indirect jump path using LDR (literal)
2017-06-30 0:40 [Qemu-devel] [PATCH v3 0/3] Relax code buffer size limitation on aarch64 hosts Pranith Kumar
2017-06-30 0:40 ` [Qemu-devel] [PATCH v3 1/3] tcg/aarch64: Introduce and use long branch to register Pranith Kumar
2017-06-30 0:40 ` [Qemu-devel] [PATCH v3 2/3] tcg/aarch64: Use ADRP+ADD to compute target address Pranith Kumar
@ 2017-06-30 0:40 ` Pranith Kumar
2 siblings, 0 replies; 8+ messages in thread
From: Pranith Kumar @ 2017-06-30 0:40 UTC (permalink / raw)
To: alex.bennee; +Cc: qemu-devel, rth
This patch enables the indirect jump path using an LDR (literal)
instruction. It will be interesting to test and see which performs
better among the two paths.
CC: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
---
tcg/aarch64/tcg-target.inc.c | 42 ++++++++++++++++++++++++++++--------------
1 file changed, 28 insertions(+), 14 deletions(-)
diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
index f059d9d781..b0801d0259 100644
--- a/tcg/aarch64/tcg-target.inc.c
+++ b/tcg/aarch64/tcg-target.inc.c
@@ -269,6 +269,8 @@ typedef enum {
I3207_BLR = 0xd63f0000,
I3207_RET = 0xd65f0000,
+ /* Load literal for loading the address at pc-relative offset */
+ I3305_LDR = 0x58000000,
/* Load/store register. Described here as 3.3.12, but the helper
that emits them can transform to 3.3.10 or 3.3.13. */
I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
@@ -389,6 +391,11 @@ static inline uint32_t tcg_in32(TCGContext *s)
#define tcg_out_insn(S, FMT, OP, ...) \
glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
+static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, int imm19, TCGReg rt)
+{
+ tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
+}
+
static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
TCGReg rt, int imm19)
{
@@ -864,6 +871,8 @@ static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
}
}
+#ifdef USE_DIRECT_JUMP
+
void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
{
tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr;
@@ -889,6 +898,8 @@ void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
flush_icache_range(jmp_addr, jmp_addr + 8);
}
+#endif
+
static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
{
if (!l->has_value) {
@@ -1400,21 +1411,24 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_goto_tb:
-#ifndef USE_DIRECT_JUMP
-#error "USE_DIRECT_JUMP required for aarch64"
-#endif
- /* consistency for USE_DIRECT_JUMP */
- tcg_debug_assert(s->tb_jmp_insn_offset != NULL);
- /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
- write can be used to patch the target address. */
- if ((uintptr_t)s->code_ptr & 7) {
- tcg_out32(s, NOP);
+ if (s->tb_jmp_insn_offset != NULL) {
+ /* USE_DIRECT_JUMP */
+ /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
+ write can be used to patch the target address. */
+ if ((uintptr_t)s->code_ptr & 7) {
+ tcg_out32(s, NOP);
+ }
+ s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
+ /* actual branch destination will be patched by
+ aarch64_tb_set_jmp_target later. */
+ tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
+ tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
+ } else {
+ /* !USE_DIRECT_JUMP */
+ tcg_debug_assert(s->tb_jmp_target_addr != NULL);
+ intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
+ tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
}
- s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
- /* actual branch destination will be patched by
- aarch64_tb_set_jmp_target later. */
- tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
- tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
break;
--
2.13.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* [Qemu-devel] [PATCH v3 3/3] tcg/aarch64: Enable indirect jump path using LDR (literal)
2017-06-29 7:52 [Qemu-devel] [PATCH v2 0/3] Relax code buffer size limitation on aarch64 hosts Pranith Kumar
@ 2017-06-29 7:52 ` Pranith Kumar
2017-06-29 16:41 ` Richard Henderson
0 siblings, 1 reply; 8+ messages in thread
From: Pranith Kumar @ 2017-06-29 7:52 UTC (permalink / raw)
To: alex.bennee; +Cc: qemu-devel, rth
This patch enables the indirect jump path using an LDR (literal)
instruction. It will be interesting to test and see which performs
better among the two paths.
CC: Richard Henderson <rth@twiddle.net>
CC: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
---
tcg/aarch64/tcg-target.inc.c | 42 ++++++++++++++++++++++++++++--------------
1 file changed, 28 insertions(+), 14 deletions(-)
diff --git a/tcg/aarch64/tcg-target.inc.c b/tcg/aarch64/tcg-target.inc.c
index b7670ecc90..5381c31b45 100644
--- a/tcg/aarch64/tcg-target.inc.c
+++ b/tcg/aarch64/tcg-target.inc.c
@@ -269,6 +269,8 @@ typedef enum {
I3207_BLR = 0xd63f0000,
I3207_RET = 0xd65f0000,
+ /* Load literal for loading the address at pc-relative offset */
+ I3305_LDR = 0x58000000,
/* Load/store register. Described here as 3.3.12, but the helper
that emits them can transform to 3.3.10 or 3.3.13. */
I3312_STRB = 0x38000000 | LDST_ST << 22 | MO_8 << 30,
@@ -389,6 +391,11 @@ static inline uint32_t tcg_in32(TCGContext *s)
#define tcg_out_insn(S, FMT, OP, ...) \
glue(tcg_out_insn_,FMT)(S, glue(glue(glue(I,FMT),_),OP), ## __VA_ARGS__)
+static void tcg_out_insn_3305(TCGContext *s, AArch64Insn insn, int imm19, TCGReg rt)
+{
+ tcg_out32(s, insn | (imm19 & 0x7ffff) << 5 | rt);
+}
+
static void tcg_out_insn_3201(TCGContext *s, AArch64Insn insn, TCGType ext,
TCGReg rt, int imm19)
{
@@ -864,6 +871,8 @@ static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
}
}
+#ifdef USE_DIRECT_JUMP
+
void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
{
tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr;
@@ -881,6 +890,8 @@ void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
flush_icache_range(jmp_addr, jmp_addr + 8);
}
+#endif
+
static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
{
if (!l->has_value) {
@@ -1392,21 +1403,24 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
break;
case INDEX_op_goto_tb:
-#ifndef USE_DIRECT_JUMP
-#error "USE_DIRECT_JUMP required for aarch64"
-#endif
- /* consistency for USE_DIRECT_JUMP */
- tcg_debug_assert(s->tb_jmp_insn_offset != NULL);
- /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
- write can be used to patch the target address. */
- if ((uintptr_t)s->code_ptr & 7) {
- tcg_out32(s, NOP);
+ if (s->tb_jmp_insn_offset != NULL) {
+ /* USE_DIRECT_JUMP */
+ /* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
+ write can be used to patch the target address. */
+ if ((uintptr_t)s->code_ptr & 7) {
+ tcg_out32(s, NOP);
+ }
+ s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
+ /* actual branch destination will be patched by
+ aarch64_tb_set_jmp_target later, beware of retranslation */
+ tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
+ tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
+ } else {
+ /* !USE_DIRECT_JUMP */
+ tcg_debug_assert(s->tb_jmp_target_addr != NULL);
+ intptr_t offset = tcg_pcrel_diff(s, (s->tb_jmp_target_addr + a0)) >> 2;
+ tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
}
- s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
- /* actual branch destination will be patched by
- aarch64_tb_set_jmp_target later, beware of retranslation */
- tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
- tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
tcg_out_callr(s, TCG_REG_TMP);
s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
break;
--
2.13.0
^ permalink raw reply related [flat|nested] 8+ messages in thread
* Re: [Qemu-devel] [PATCH v3 3/3] tcg/aarch64: Enable indirect jump path using LDR (literal)
2017-06-29 7:52 ` [Qemu-devel] [PATCH v3 3/3] tcg/aarch64: Enable indirect jump path using LDR (literal) Pranith Kumar
@ 2017-06-29 16:41 ` Richard Henderson
0 siblings, 0 replies; 8+ messages in thread
From: Richard Henderson @ 2017-06-29 16:41 UTC (permalink / raw)
To: Pranith Kumar, alex.bennee; +Cc: qemu-devel
On 06/29/2017 12:52 AM, Pranith Kumar wrote:
> This patch enables the indirect jump path using an LDR (literal)
> instruction. It will be interesting to test and see which performs
> better among the two paths.
>
> CC: Richard Henderson<rth@twiddle.net>
> CC: Alex Bennée<alex.bennee@linaro.org>
> Signed-off-by: Pranith Kumar<bobby.prani@gmail.com>
> ---
> tcg/aarch64/tcg-target.inc.c | 42 ++++++++++++++++++++++++++++--------------
> 1 file changed, 28 insertions(+), 14 deletions(-)
Reviewed-by: Richard Henderson <rth@twiddle.net>
r~
^ permalink raw reply [flat|nested] 8+ messages in thread