From: Richard Henderson <richard.henderson@linaro.org>
To: qemu-devel@nongnu.org
Cc: Alistair.Francis@wdc.com
Subject: [Qemu-devel] [PATCH for-4.0 v2 17/37] tcg/arm: Reduce the number of temps for tcg_out_tlb_read
Date: Fri, 23 Nov 2018 15:45:38 +0100 [thread overview]
Message-ID: <20181123144558.5048-18-richard.henderson@linaro.org> (raw)
In-Reply-To: <20181123144558.5048-1-richard.henderson@linaro.org>
When moving the qemu_ld/st thunk out of line, we no longer have LR for
use as a temporary. In the worst case we must make do with 3 temps,
when dealing with a 64-bit guest address. This in turn imples that we
cannot use LDRD anymore, as there are not enough temps.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/arm/tcg-target.inc.c | 97 ++++++++++++++++++++++------------------
1 file changed, 53 insertions(+), 44 deletions(-)
diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
index 4339c472e8..2deeb1f5d1 100644
--- a/tcg/arm/tcg-target.inc.c
+++ b/tcg/arm/tcg-target.inc.c
@@ -1251,13 +1251,12 @@ static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
QEMU_BUILD_BUG_ON(CPU_TLB_BITS > 8);
/*
- *Load and compare a TLB entry, leaving the flags set. Returns the register
- * containing the addend of the tlb entry. Clobbers t0, t1, t2, t3.
- * T0 and T1 must be consecutive for LDRD.
+ * Load and compare a TLB entry, leaving the flags set. Returns the register
+ * containing the addend of the tlb entry. Clobbers t0, t1, t2.
*/
static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
TCGMemOp opc, int mem_index, bool is_load,
- TCGReg t0, TCGReg t1, TCGReg t2, TCGReg t3)
+ TCGReg t0, TCGReg t1, TCGReg t2)
{
TCGReg base = TCG_AREG0;
int cmp_off =
@@ -1265,49 +1264,64 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
: offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
- int mask_off;
unsigned s_bits = opc & MO_SIZE;
unsigned a_bits = get_alignment_bits(opc);
/* V7 generates the following:
* ubfx t0, addrlo, #TARGET_PAGE_BITS, #CPU_TLB_BITS
* add t2, env, #high
- * add t2, t2, r0, lsl #CPU_TLB_ENTRY_BITS
- * ldr t0, [t2, #cmp] (and t1 w/ldrd)
+ * add t2, t2, t0, lsl #CPU_TLB_ENTRY_BITS
+ * ldr t0, [t2, #cmp]
* ldr t2, [t2, #add]
- * movw t3, #page_align_mask
- * bic t3, addrlo, t3
- * cmp t0, t3
+ * movw t1, #page_align_mask
+ * bic t1, addrlo, t1
+ * cmp t0, t1
+ *
+ * ubfx t0, addrlo, #TPB, #CTB -- 64-bit address
+ * add t2, env, #high
+ * add t2, t2, t0, lsl #CTEB
+ * ldr t0, [t2, #cmplo]
+ * movw t1, #page_align_mask
+ * bic t1, addrlo, t1
+ * cmp t0, t1
+ * ldr t0, [t2, #cmphi]
+ * ldr t2, [t2, #add]
+ * cmpeq t0, addrhi
*
* Otherwise we generate:
* shr t3, addrlo, #TARGET_PAGE_BITS
* add t2, env, #high
* and t0, t3, #(CPU_TLB_SIZE - 1)
* add t2, t2, t0, lsl #CPU_TLB_ENTRY_BITS
- * ldr t0, [t2, #cmp] (and t1 w/ldrd)
+ * ldr t0, [t2, #cmp]
* ldr t2, [t2, #add]
* tst addrlo, #s_mask
* cmpeq t0, t3, lsl #TARGET_PAGE_BITS
+ *
+ * shr t1, addrlo, #TPB -- 64-bit address
+ * add t2, env, #high
+ * and t0, t1, #CTS-1
+ * add t2, t2, t0, lsl #CTEB
+ * ldr t0, [t2, #cmplo]
+ * tst addrlo, #s_mask
+ * cmpeq t0, t1, lsl #TBP
+ * ldr t0, [t2, #cmphi]
+ * ldr t2, [t2, #add]
+ * cmpeq t0, addrhi
*/
if (use_armv7_instructions) {
tcg_out_extract(s, COND_AL, t0, addrlo,
TARGET_PAGE_BITS, CPU_TLB_BITS);
} else {
- tcg_out_dat_reg(s, COND_AL, ARITH_MOV, t3,
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, t1,
0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
}
/* Add portions of the offset until the memory access is in range.
- * If we plan on using ldrd, reduce to an 8-bit offset; otherwise
- * we can use a 12-bit offset.
+ * We are not using ldrd, so we can use a 12-bit offset.
*/
- if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
- mask_off = 0xff;
- } else {
- mask_off = 0xfff;
- }
- while (cmp_off > mask_off) {
- int shift = ctz32(cmp_off & ~mask_off) & ~1;
+ while (cmp_off > 0xfff) {
+ int shift = ctz32(cmp_off & ~0xfff) & ~1;
int rot = ((32 - shift) << 7) & 0xf00;
int addend = cmp_off & (0xff << shift);
tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t2, base,
@@ -1318,25 +1332,13 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
}
if (!use_armv7_instructions) {
- tcg_out_dat_imm(s, COND_AL, ARITH_AND, t0, t3, CPU_TLB_SIZE - 1);
+ tcg_out_dat_imm(s, COND_AL, ARITH_AND, t0, t1, CPU_TLB_SIZE - 1);
}
tcg_out_dat_reg(s, COND_AL, ARITH_ADD, t2, base, t0,
SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
- /* Load the tlb comparator. Use ldrd if needed and available,
- but due to how the pointer needs setting up, ldm isn't useful.
- Base arm5 doesn't have ldrd, but armv5te does. */
- if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
- tcg_out_ldrd_8(s, COND_AL, t0, t2, cmp_off);
- } else {
- tcg_out_ld32_12(s, COND_AL, t0, t2, cmp_off);
- if (TARGET_LONG_BITS == 64) {
- tcg_out_ld32_12(s, COND_AL, t1, t2, cmp_off + 4);
- }
- }
-
- /* Load the tlb addend. */
- tcg_out_ld32_12(s, COND_AL, t2, t2, add_off);
+ /* Load the tlb comparator (low part). */
+ tcg_out_ld32_12(s, COND_AL, t0, t2, cmp_off);
/* Check alignment. We don't support inline unaligned acceses,
but we can easily support overalignment checks. */
@@ -1349,24 +1351,31 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
int rot = encode_imm(mask);
if (rot >= 0) {
- tcg_out_dat_imm(s, COND_AL, ARITH_BIC, t3, addrlo,
+ tcg_out_dat_imm(s, COND_AL, ARITH_BIC, t1, addrlo,
rotl(mask, rot) | (rot << 7));
} else {
- tcg_out_movi32(s, COND_AL, t3, mask);
- tcg_out_dat_reg(s, COND_AL, ARITH_BIC, t3, addrlo, t3, 0);
+ tcg_out_movi32(s, COND_AL, t1, mask);
+ tcg_out_dat_reg(s, COND_AL, ARITH_BIC, t1, addrlo, t1, 0);
}
- tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, t0, t3, 0);
+ tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, t0, t1, 0);
} else {
if (a_bits) {
tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo,
(1 << a_bits) - 1);
}
tcg_out_dat_reg(s, (a_bits ? COND_EQ : COND_AL), ARITH_CMP,
- 0, t0, t3, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
+ 0, t0, t1, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
}
+ /* Load the tlb comparator (high part). */
if (TARGET_LONG_BITS == 64) {
- tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, t1, addrhi, 0);
+ tcg_out_ld32_12(s, COND_AL, t0, t2, cmp_off + 4);
+ }
+ /* Load the tlb addend. */
+ tcg_out_ld32_12(s, COND_AL, t2, t2, add_off);
+
+ if (TARGET_LONG_BITS == 64) {
+ tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, t0, addrhi, 0);
}
return t2;
@@ -1636,7 +1645,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
#ifdef CONFIG_SOFTMMU
mem_index = get_mmuidx(oi);
addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 1,
- TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R14);
+ TCG_REG_R0, TCG_REG_R1, TCG_REG_TMP);
/* This a conditional BL only to load a pointer within this opcode into LR
for the slow path. We will not be using the value for a tail call. */
@@ -1768,7 +1777,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
#ifdef CONFIG_SOFTMMU
mem_index = get_mmuidx(oi);
addend = tcg_out_tlb_read(s, addrlo, addrhi, opc, mem_index, 0,
- TCG_REG_R0, TCG_REG_R1, TCG_REG_R2, TCG_REG_R14);
+ TCG_REG_R0, TCG_REG_R1, TCG_REG_TMP);
tcg_out_qemu_st_index(s, COND_EQ, opc, datalo, datahi, addrlo, addend);
--
2.17.2
next prev parent reply other threads:[~2018-11-23 14:46 UTC|newest]
Thread overview: 55+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-11-23 14:45 [Qemu-devel] [PATCH for-4.0 v2 00/37] tcg: Assorted cleanups Richard Henderson
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 01/37] tcg/i386: Always use %ebp for TCG_AREG0 Richard Henderson
2018-11-29 12:52 ` Alex Bennée
2018-11-29 14:55 ` Richard Henderson
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 02/37] tcg/i386: Move TCG_REG_CALL_STACK from define to enum Richard Henderson
2018-11-29 12:52 ` Alex Bennée
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 03/37] tcg: Return success from patch_reloc Richard Henderson
2018-11-29 14:47 ` Alex Bennée
2018-11-29 17:35 ` Richard Henderson
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 04/37] tcg: Add TCG_TARGET_NEED_LDST_OOL_LABELS Richard Henderson
2018-11-26 0:31 ` Emilio G. Cota
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 05/37] tcg/i386: Add constraints for r8 and r9 Richard Henderson
2018-11-29 15:00 ` Alex Bennée
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 06/37] tcg/i386: Return a base register from tcg_out_tlb_load Richard Henderson
2018-11-29 16:34 ` Alex Bennée
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 07/37] tcg/i386: Change TCG_REG_L[01] to not overlap function arguments Richard Henderson
2018-11-29 17:13 ` Alex Bennée
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 08/37] tcg/i386: Force qemu_ld/st arguments into fixed registers Richard Henderson
2018-11-30 16:16 ` Alex Bennée
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 09/37] tcg/i386: Use TCG_TARGET_NEED_LDST_OOL_LABELS Richard Henderson
2018-11-30 17:22 ` Alex Bennée
2018-11-30 17:37 ` Richard Henderson
2018-11-30 17:52 ` Alex Bennée
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 10/37] tcg/aarch64: Add constraints for x0, x1, x2 Richard Henderson
2018-11-30 17:25 ` Alex Bennée
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 11/37] tcg/aarch64: Parameterize the temps for tcg_out_tlb_read Richard Henderson
2018-11-30 17:50 ` Alex Bennée
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 12/37] tcg/aarch64: Parameterize the temp for tcg_out_goto_long Richard Henderson
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 13/37] tcg/aarch64: Use B not BL " Richard Henderson
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 14/37] tcg/aarch64: Use TCG_TARGET_NEED_LDST_OOL_LABELS Richard Henderson
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 15/37] tcg/arm: Parameterize the temps for tcg_out_tlb_read Richard Henderson
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 16/37] tcg/arm: Add constraints for R0-R5 Richard Henderson
2018-11-23 14:45 ` Richard Henderson [this message]
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 18/37] tcg/arm: Force qemu_ld/st arguments into fixed registers Richard Henderson
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 19/37] tcg/arm: Use TCG_TARGET_NEED_LDST_OOL_LABELS Richard Henderson
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 20/37] tcg/ppc: Parameterize the temps for tcg_out_tlb_read Richard Henderson
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 21/37] tcg/ppc: Split out tcg_out_call_int Richard Henderson
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 22/37] tcg/ppc: Add constraints for R7-R8 Richard Henderson
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 23/37] tcg/ppc: Change TCG_TARGET_CALL_ALIGN_ARGS to bool Richard Henderson
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 24/37] tcg/ppc: Force qemu_ld/st arguments into fixed registers Richard Henderson
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 25/37] tcg/ppc: Use TCG_TARGET_NEED_LDST_OOL_LABELS Richard Henderson
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 26/37] tcg: Clean up generic bswap32 Richard Henderson
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 27/37] tcg: Clean up generic bswap64 Richard Henderson
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 28/37] tcg/optimize: Optimize bswap Richard Henderson
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 29/37] tcg: Add TCG_TARGET_HAS_MEMORY_BSWAP Richard Henderson
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 30/37] tcg/i386: Adjust TCG_TARGET_HAS_MEMORY_BSWAP Richard Henderson
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 31/37] tcg/aarch64: Set TCG_TARGET_HAS_MEMORY_BSWAP to false Richard Henderson
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 32/37] tcg/arm: Set TCG_TARGET_HAS_MEMORY_BSWAP to false for user-only Richard Henderson
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 33/37] tcg/i386: Propagate is64 to tcg_out_qemu_ld_direct Richard Henderson
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 34/37] tcg/i386: Restrict user-only qemu_st_i32 values to q-regs Richard Henderson
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 35/37] tcg/i386: Add setup_guest_base_seg for FreeBSD Richard Henderson
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 36/37] tcg/i386: Require segment syscalls to succeed Richard Henderson
2018-11-23 14:45 ` [Qemu-devel] [PATCH for-4.0 v2 37/37] tcg/i386: Remove L constraint Richard Henderson
2018-11-23 21:04 ` [Qemu-devel] [PATCH for-4.0 v2 00/37] tcg: Assorted cleanups no-reply
2018-11-26 0:30 ` Emilio G. Cota
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20181123144558.5048-18-richard.henderson@linaro.org \
--to=richard.henderson@linaro.org \
--cc=Alistair.Francis@wdc.com \
--cc=qemu-devel@nongnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.