* [PATCH v3 1/4] target/s390x: Improve cc computation for ADD LOGICAL
2020-12-14 22:13 [PATCH v3 0/4] target/s390x: Improve carry computation Richard Henderson
@ 2020-12-14 22:13 ` Richard Henderson
2020-12-14 22:13 ` [PATCH v3 2/4] target/s390x: Improve ADD LOGICAL WITH CARRY Richard Henderson
` (4 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Richard Henderson @ 2020-12-14 22:13 UTC (permalink / raw)
To: qemu-devel; +Cc: david
The resulting cc is only dependent on the result and the
carry-out. So save those things rather than the inputs.
Carry-out for 64-bit inputs is had via tcg_gen_add2_i64 directly
into cc_src. Carry-out for 32-bit inputs is had via extraction
from a normal 64-bit add (with zero-extended inputs).
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/s390x/internal.h | 4 +-
target/s390x/cc_helper.c | 25 ++++-----
target/s390x/helper.c | 3 +-
target/s390x/translate.c | 103 ++++++++++++++++++++++++-------------
target/s390x/insn-data.def | 36 ++++++-------
5 files changed, 97 insertions(+), 74 deletions(-)
diff --git a/target/s390x/internal.h b/target/s390x/internal.h
index 64602660ae..55c5442102 100644
--- a/target/s390x/internal.h
+++ b/target/s390x/internal.h
@@ -160,6 +160,8 @@ enum cc_op {
CC_OP_STATIC, /* CC value is env->cc_op */
CC_OP_NZ, /* env->cc_dst != 0 */
+ CC_OP_ADDU, /* dst != 0, src = carry out (0,1) */
+
CC_OP_LTGT_32, /* signed less/greater than (32bit) */
CC_OP_LTGT_64, /* signed less/greater than (64bit) */
CC_OP_LTUGTU_32, /* unsigned less/greater than (32bit) */
@@ -168,7 +170,6 @@ enum cc_op {
CC_OP_LTGT0_64, /* signed less/greater than 0 (64bit) */
CC_OP_ADD_64, /* overflow on add (64bit) */
- CC_OP_ADDU_64, /* overflow on unsigned add (64bit) */
CC_OP_ADDC_64, /* overflow on unsigned add-carry (64bit) */
CC_OP_SUB_64, /* overflow on subtraction (64bit) */
CC_OP_SUBU_64, /* overflow on unsigned subtraction (64bit) */
@@ -178,7 +179,6 @@ enum cc_op {
CC_OP_MULS_64, /* overflow on signed multiply (64bit) */
CC_OP_ADD_32, /* overflow on add (32bit) */
- CC_OP_ADDU_32, /* overflow on unsigned add (32bit) */
CC_OP_ADDC_32, /* overflow on unsigned add-carry (32bit) */
CC_OP_SUB_32, /* overflow on subtraction (32bit) */
CC_OP_SUBU_32, /* overflow on unsigned subtraction (32bit) */
diff --git a/target/s390x/cc_helper.c b/target/s390x/cc_helper.c
index 5432aeeed4..59da4d1cc2 100644
--- a/target/s390x/cc_helper.c
+++ b/target/s390x/cc_helper.c
@@ -123,6 +123,12 @@ static uint32_t cc_calc_nz(uint64_t dst)
return !!dst;
}
+static uint32_t cc_calc_addu(uint64_t carry_out, uint64_t result)
+{
+ g_assert(carry_out <= 1);
+ return (result != 0) + 2 * carry_out;
+}
+
static uint32_t cc_calc_add_64(int64_t a1, int64_t a2, int64_t ar)
{
if ((a1 > 0 && a2 > 0 && ar < 0) || (a1 < 0 && a2 < 0 && ar > 0)) {
@@ -138,11 +144,6 @@ static uint32_t cc_calc_add_64(int64_t a1, int64_t a2, int64_t ar)
}
}
-static uint32_t cc_calc_addu_64(uint64_t a1, uint64_t a2, uint64_t ar)
-{
- return (ar != 0) + 2 * (ar < a1);
-}
-
static uint32_t cc_calc_addc_64(uint64_t a1, uint64_t a2, uint64_t ar)
{
/* Recover a2 + carry_in. */
@@ -239,11 +240,6 @@ static uint32_t cc_calc_add_32(int32_t a1, int32_t a2, int32_t ar)
}
}
-static uint32_t cc_calc_addu_32(uint32_t a1, uint32_t a2, uint32_t ar)
-{
- return (ar != 0) + 2 * (ar < a1);
-}
-
static uint32_t cc_calc_addc_32(uint32_t a1, uint32_t a2, uint32_t ar)
{
/* Recover a2 + carry_in. */
@@ -483,12 +479,12 @@ static uint32_t do_calc_cc(CPUS390XState *env, uint32_t cc_op,
case CC_OP_NZ:
r = cc_calc_nz(dst);
break;
+ case CC_OP_ADDU:
+ r = cc_calc_addu(src, dst);
+ break;
case CC_OP_ADD_64:
r = cc_calc_add_64(src, dst, vr);
break;
- case CC_OP_ADDU_64:
- r = cc_calc_addu_64(src, dst, vr);
- break;
case CC_OP_ADDC_64:
r = cc_calc_addc_64(src, dst, vr);
break;
@@ -517,9 +513,6 @@ static uint32_t do_calc_cc(CPUS390XState *env, uint32_t cc_op,
case CC_OP_ADD_32:
r = cc_calc_add_32(src, dst, vr);
break;
- case CC_OP_ADDU_32:
- r = cc_calc_addu_32(src, dst, vr);
- break;
case CC_OP_ADDC_32:
r = cc_calc_addc_32(src, dst, vr);
break;
diff --git a/target/s390x/helper.c b/target/s390x/helper.c
index b877690845..db87a62a57 100644
--- a/target/s390x/helper.c
+++ b/target/s390x/helper.c
@@ -395,6 +395,7 @@ const char *cc_name(enum cc_op cc_op)
[CC_OP_DYNAMIC] = "CC_OP_DYNAMIC",
[CC_OP_STATIC] = "CC_OP_STATIC",
[CC_OP_NZ] = "CC_OP_NZ",
+ [CC_OP_ADDU] = "CC_OP_ADDU",
[CC_OP_LTGT_32] = "CC_OP_LTGT_32",
[CC_OP_LTGT_64] = "CC_OP_LTGT_64",
[CC_OP_LTUGTU_32] = "CC_OP_LTUGTU_32",
@@ -402,7 +403,6 @@ const char *cc_name(enum cc_op cc_op)
[CC_OP_LTGT0_32] = "CC_OP_LTGT0_32",
[CC_OP_LTGT0_64] = "CC_OP_LTGT0_64",
[CC_OP_ADD_64] = "CC_OP_ADD_64",
- [CC_OP_ADDU_64] = "CC_OP_ADDU_64",
[CC_OP_ADDC_64] = "CC_OP_ADDC_64",
[CC_OP_SUB_64] = "CC_OP_SUB_64",
[CC_OP_SUBU_64] = "CC_OP_SUBU_64",
@@ -410,7 +410,6 @@ const char *cc_name(enum cc_op cc_op)
[CC_OP_ABS_64] = "CC_OP_ABS_64",
[CC_OP_NABS_64] = "CC_OP_NABS_64",
[CC_OP_ADD_32] = "CC_OP_ADD_32",
- [CC_OP_ADDU_32] = "CC_OP_ADDU_32",
[CC_OP_ADDC_32] = "CC_OP_ADDC_32",
[CC_OP_SUB_32] = "CC_OP_SUB_32",
[CC_OP_SUBU_32] = "CC_OP_SUBU_32",
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index be32938f6d..b473233edf 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -600,13 +600,11 @@ static void gen_op_calc_cc(DisasContext *s)
dummy = tcg_const_i64(0);
/* FALLTHRU */
case CC_OP_ADD_64:
- case CC_OP_ADDU_64:
case CC_OP_ADDC_64:
case CC_OP_SUB_64:
case CC_OP_SUBU_64:
case CC_OP_SUBB_64:
case CC_OP_ADD_32:
- case CC_OP_ADDU_32:
case CC_OP_ADDC_32:
case CC_OP_SUB_32:
case CC_OP_SUBU_32:
@@ -650,6 +648,7 @@ static void gen_op_calc_cc(DisasContext *s)
/* 1 argument */
gen_helper_calc_cc(cc_op, cpu_env, local_cc_op, dummy, cc_dst, dummy);
break;
+ case CC_OP_ADDU:
case CC_OP_ICM:
case CC_OP_LTGT_32:
case CC_OP_LTGT_64:
@@ -666,13 +665,11 @@ static void gen_op_calc_cc(DisasContext *s)
gen_helper_calc_cc(cc_op, cpu_env, local_cc_op, cc_src, cc_dst, dummy);
break;
case CC_OP_ADD_64:
- case CC_OP_ADDU_64:
case CC_OP_ADDC_64:
case CC_OP_SUB_64:
case CC_OP_SUBU_64:
case CC_OP_SUBB_64:
case CC_OP_ADD_32:
- case CC_OP_ADDU_32:
case CC_OP_ADDC_32:
case CC_OP_SUB_32:
case CC_OP_SUBU_32:
@@ -849,20 +846,19 @@ static void disas_jcc(DisasContext *s, DisasCompare *c, uint32_t mask)
account_inline_branch(s, old_cc_op);
break;
- case CC_OP_ADDU_32:
- case CC_OP_ADDU_64:
+ case CC_OP_ADDU:
switch (mask) {
- case 8 | 2: /* vr == 0 */
+ case 8 | 2: /* result == 0 */
cond = TCG_COND_EQ;
break;
- case 4 | 1: /* vr != 0 */
+ case 4 | 1: /* result != 0 */
cond = TCG_COND_NE;
break;
- case 8 | 4: /* no carry -> vr >= src */
- cond = TCG_COND_GEU;
+ case 8 | 4: /* no carry */
+ cond = TCG_COND_EQ;
break;
- case 2 | 1: /* carry -> vr < src */
- cond = TCG_COND_LTU;
+ case 2 | 1: /* carry */
+ cond = TCG_COND_NE;
break;
default:
goto do_dynamic;
@@ -950,26 +946,21 @@ static void disas_jcc(DisasContext *s, DisasCompare *c, uint32_t mask)
tcg_gen_and_i64(c->u.s64.a, cc_src, cc_dst);
break;
- case CC_OP_ADDU_32:
- c->is_64 = false;
- c->u.s32.a = tcg_temp_new_i32();
- c->u.s32.b = tcg_temp_new_i32();
- tcg_gen_extrl_i64_i32(c->u.s32.a, cc_vr);
- if (cond == TCG_COND_EQ || cond == TCG_COND_NE) {
- tcg_gen_movi_i32(c->u.s32.b, 0);
- } else {
- tcg_gen_extrl_i64_i32(c->u.s32.b, cc_src);
- }
- break;
-
- case CC_OP_ADDU_64:
- c->u.s64.a = cc_vr;
+ case CC_OP_ADDU:
+ c->is_64 = true;
+ c->u.s64.b = tcg_const_i64(0);
c->g1 = true;
- if (cond == TCG_COND_EQ || cond == TCG_COND_NE) {
- c->u.s64.b = tcg_const_i64(0);
- } else {
- c->u.s64.b = cc_src;
- c->g2 = true;
+ switch (mask) {
+ case 8 | 2:
+ case 4 | 1: /* result */
+ c->u.s64.a = cc_dst;
+ break;
+ case 8 | 4:
+ case 2 | 1: /* carry */
+ c->u.s64.a = cc_src;
+ break;
+ default:
+ g_assert_not_reached();
}
break;
@@ -1445,6 +1436,13 @@ static DisasJumpType op_add(DisasContext *s, DisasOps *o)
return DISAS_NEXT;
}
+static DisasJumpType op_addu64(DisasContext *s, DisasOps *o)
+{
+ tcg_gen_movi_i64(cc_src, 0);
+ tcg_gen_add2_i64(o->out, cc_src, o->in1, cc_src, o->in2, cc_src);
+ return DISAS_NEXT;
+}
+
static DisasJumpType op_addc(DisasContext *s, DisasOps *o)
{
DisasCompare cmp;
@@ -1474,9 +1472,10 @@ static DisasJumpType op_addc(DisasContext *s, DisasOps *o)
static DisasJumpType op_asi(DisasContext *s, DisasOps *o)
{
- o->in1 = tcg_temp_new_i64();
+ bool non_atomic = !s390_has_feat(S390_FEAT_STFLE_45);
- if (!s390_has_feat(S390_FEAT_STFLE_45)) {
+ o->in1 = tcg_temp_new_i64();
+ if (non_atomic) {
tcg_gen_qemu_ld_tl(o->in1, o->addr1, get_mem_index(s), s->insn->data);
} else {
/* Perform the atomic addition in memory. */
@@ -1487,7 +1486,30 @@ static DisasJumpType op_asi(DisasContext *s, DisasOps *o)
/* Recompute also for atomic case: needed for setting CC. */
tcg_gen_add_i64(o->out, o->in1, o->in2);
- if (!s390_has_feat(S390_FEAT_STFLE_45)) {
+ if (non_atomic) {
+ tcg_gen_qemu_st_tl(o->out, o->addr1, get_mem_index(s), s->insn->data);
+ }
+ return DISAS_NEXT;
+}
+
+static DisasJumpType op_asiu64(DisasContext *s, DisasOps *o)
+{
+ bool non_atomic = !s390_has_feat(S390_FEAT_STFLE_45);
+
+ o->in1 = tcg_temp_new_i64();
+ if (non_atomic) {
+ tcg_gen_qemu_ld_tl(o->in1, o->addr1, get_mem_index(s), s->insn->data);
+ } else {
+ /* Perform the atomic addition in memory. */
+ tcg_gen_atomic_fetch_add_i64(o->in1, o->addr1, o->in2, get_mem_index(s),
+ s->insn->data);
+ }
+
+ /* Recompute also for atomic case: needed for setting CC. */
+ tcg_gen_movi_i64(cc_src, 0);
+ tcg_gen_add2_i64(o->out, cc_src, o->in1, cc_src, o->in2, cc_src);
+
+ if (non_atomic) {
tcg_gen_qemu_st_tl(o->out, o->addr1, get_mem_index(s), s->insn->data);
}
return DISAS_NEXT;
@@ -5185,12 +5207,14 @@ static void cout_adds64(DisasContext *s, DisasOps *o)
static void cout_addu32(DisasContext *s, DisasOps *o)
{
- gen_op_update3_cc_i64(s, CC_OP_ADDU_32, o->in1, o->in2, o->out);
+ tcg_gen_shri_i64(cc_src, o->out, 32);
+ tcg_gen_ext32u_i64(cc_dst, o->out);
+ gen_op_update2_cc_i64(s, CC_OP_ADDU, cc_src, cc_dst);
}
static void cout_addu64(DisasContext *s, DisasOps *o)
{
- gen_op_update3_cc_i64(s, CC_OP_ADDU_64, o->in1, o->in2, o->out);
+ gen_op_update2_cc_i64(s, CC_OP_ADDU, cc_src, o->out);
}
static void cout_addc32(DisasContext *s, DisasOps *o)
@@ -5637,6 +5661,13 @@ static void in1_r2_sr32(DisasContext *s, DisasOps *o)
}
#define SPEC_in1_r2_sr32 0
+static void in1_r2_32u(DisasContext *s, DisasOps *o)
+{
+ o->in1 = tcg_temp_new_i64();
+ tcg_gen_ext32u_i64(o->in1, regs[get_field(s, r2)]);
+}
+#define SPEC_in1_r2_32u 0
+
static void in1_r3(DisasContext *s, DisasOps *o)
{
o->in1 = load_reg(get_field(s, r3));
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index b95bc98d35..5461e6aa3b 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -58,29 +58,29 @@
C(0xa70b, AGHI, RI_a, Z, r1, i2, r1, 0, add, adds64)
/* ADD LOGICAL */
- C(0x1e00, ALR, RR_a, Z, r1, r2, new, r1_32, add, addu32)
- C(0xb9fa, ALRK, RRF_a, DO, r2, r3, new, r1_32, add, addu32)
- C(0x5e00, AL, RX_a, Z, r1, m2_32u, new, r1_32, add, addu32)
- C(0xe35e, ALY, RXY_a, LD, r1, m2_32u, new, r1_32, add, addu32)
- C(0xb90a, ALGR, RRE, Z, r1, r2, r1, 0, add, addu64)
- C(0xb91a, ALGFR, RRE, Z, r1, r2_32u, r1, 0, add, addu64)
- C(0xb9ea, ALGRK, RRF_a, DO, r2, r3, r1, 0, add, addu64)
- C(0xe30a, ALG, RXY_a, Z, r1, m2_64, r1, 0, add, addu64)
- C(0xe31a, ALGF, RXY_a, Z, r1, m2_32u, r1, 0, add, addu64)
+ C(0x1e00, ALR, RR_a, Z, r1_32u, r2_32u, new, r1_32, add, addu32)
+ C(0xb9fa, ALRK, RRF_a, DO, r2_32u, r3_32u, new, r1_32, add, addu32)
+ C(0x5e00, AL, RX_a, Z, r1_32u, m2_32u, new, r1_32, add, addu32)
+ C(0xe35e, ALY, RXY_a, LD, r1_32u, m2_32u, new, r1_32, add, addu32)
+ C(0xb90a, ALGR, RRE, Z, r1, r2, r1, 0, addu64, addu64)
+ C(0xb91a, ALGFR, RRE, Z, r1, r2_32u, r1, 0, addu64, addu64)
+ C(0xb9ea, ALGRK, RRF_a, DO, r2, r3, r1, 0, addu64, addu64)
+ C(0xe30a, ALG, RXY_a, Z, r1, m2_64, r1, 0, addu64, addu64)
+ C(0xe31a, ALGF, RXY_a, Z, r1, m2_32u, r1, 0, addu64, addu64)
/* ADD LOGICAL HIGH */
C(0xb9ca, ALHHHR, RRF_a, HW, r2_sr32, r3_sr32, new, r1_32h, add, addu32)
- C(0xb9da, ALHHLR, RRF_a, HW, r2_sr32, r3, new, r1_32h, add, addu32)
+ C(0xb9da, ALHHLR, RRF_a, HW, r2_sr32, r3_32u, new, r1_32h, add, addu32)
/* ADD LOGICAL IMMEDIATE */
- C(0xc20b, ALFI, RIL_a, EI, r1, i2_32u, new, r1_32, add, addu32)
- C(0xc20a, ALGFI, RIL_a, EI, r1, i2_32u, r1, 0, add, addu64)
+ C(0xc20b, ALFI, RIL_a, EI, r1_32u, i2_32u, new, r1_32, add, addu32)
+ C(0xc20a, ALGFI, RIL_a, EI, r1, i2_32u, r1, 0, addu64, addu64)
/* ADD LOGICAL WITH SIGNED IMMEDIATE */
- D(0xeb6e, ALSI, SIY, GIE, la1, i2, new, 0, asi, addu32, MO_TEUL)
- C(0xecda, ALHSIK, RIE_d, DO, r3, i2, new, r1_32, add, addu32)
- D(0xeb7e, ALGSI, SIY, GIE, la1, i2, new, 0, asi, addu64, MO_TEQ)
- C(0xecdb, ALGHSIK, RIE_d, DO, r3, i2, r1, 0, add, addu64)
+ D(0xeb6e, ALSI, SIY, GIE, la1, i2_32u, new, 0, asi, addu32, MO_TEUL)
+ C(0xecda, ALHSIK, RIE_d, DO, r3_32u, i2_32u, new, r1_32, add, addu32)
+ C(0xeb7e, ALGSI, SIY, GIE, la1, i2, r1, 0, asiu64, addu64)
+ C(0xecdb, ALGHSIK, RIE_d, DO, r3, i2, r1, 0, addu64, addu64)
/* ADD LOGICAL WITH SIGNED IMMEDIATE HIGH */
- C(0xcc0a, ALSIH, RIL_a, HW, r1_sr32, i2, new, r1_32h, add, addu32)
- C(0xcc0b, ALSIHN, RIL_a, HW, r1_sr32, i2, new, r1_32h, add, 0)
+ C(0xcc0a, ALSIH, RIL_a, HW, r1_sr32, i2_32u, new, r1_32h, add, addu32)
+ C(0xcc0b, ALSIHN, RIL_a, HW, r1_sr32, i2_32u, new, r1_32h, add, 0)
/* ADD LOGICAL WITH CARRY */
C(0xb998, ALCR, RRE, Z, r1, r2, new, r1_32, addc, addc32)
C(0xb988, ALCGR, RRE, Z, r1, r2, r1, 0, addc, addc64)
--
2.25.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH v3 2/4] target/s390x: Improve ADD LOGICAL WITH CARRY
2020-12-14 22:13 [PATCH v3 0/4] target/s390x: Improve carry computation Richard Henderson
2020-12-14 22:13 ` [PATCH v3 1/4] target/s390x: Improve cc computation for ADD LOGICAL Richard Henderson
@ 2020-12-14 22:13 ` Richard Henderson
2020-12-14 22:13 ` [PATCH v3 3/4] target/s390x: Improve cc computation for SUBTRACT LOGICAL Richard Henderson
` (3 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Richard Henderson @ 2020-12-14 22:13 UTC (permalink / raw)
To: qemu-devel; +Cc: david
Now that ADD LOGICAL outputs carry, we can use that as input directly.
It also means we can re-use CC_OP_ADDU and produce an output carry
directly from ADD LOGICAL WITH CARRY.
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/s390x/internal.h | 2 --
target/s390x/cc_helper.c | 26 ---------------
target/s390x/helper.c | 2 --
target/s390x/translate.c | 67 ++++++++++++++++++--------------------
target/s390x/insn-data.def | 8 ++---
5 files changed, 36 insertions(+), 69 deletions(-)
diff --git a/target/s390x/internal.h b/target/s390x/internal.h
index 55c5442102..f5f3ae063e 100644
--- a/target/s390x/internal.h
+++ b/target/s390x/internal.h
@@ -170,7 +170,6 @@ enum cc_op {
CC_OP_LTGT0_64, /* signed less/greater than 0 (64bit) */
CC_OP_ADD_64, /* overflow on add (64bit) */
- CC_OP_ADDC_64, /* overflow on unsigned add-carry (64bit) */
CC_OP_SUB_64, /* overflow on subtraction (64bit) */
CC_OP_SUBU_64, /* overflow on unsigned subtraction (64bit) */
CC_OP_SUBB_64, /* overflow on unsigned sub-borrow (64bit) */
@@ -179,7 +178,6 @@ enum cc_op {
CC_OP_MULS_64, /* overflow on signed multiply (64bit) */
CC_OP_ADD_32, /* overflow on add (32bit) */
- CC_OP_ADDC_32, /* overflow on unsigned add-carry (32bit) */
CC_OP_SUB_32, /* overflow on subtraction (32bit) */
CC_OP_SUBU_32, /* overflow on unsigned subtraction (32bit) */
CC_OP_SUBB_32, /* overflow on unsigned sub-borrow (32bit) */
diff --git a/target/s390x/cc_helper.c b/target/s390x/cc_helper.c
index 59da4d1cc2..cd2c5c4b39 100644
--- a/target/s390x/cc_helper.c
+++ b/target/s390x/cc_helper.c
@@ -144,16 +144,6 @@ static uint32_t cc_calc_add_64(int64_t a1, int64_t a2, int64_t ar)
}
}
-static uint32_t cc_calc_addc_64(uint64_t a1, uint64_t a2, uint64_t ar)
-{
- /* Recover a2 + carry_in. */
- uint64_t a2c = ar - a1;
- /* Check for a2+carry_in overflow, then a1+a2c overflow. */
- int carry_out = (a2c < a2) || (ar < a1);
-
- return (ar != 0) + 2 * carry_out;
-}
-
static uint32_t cc_calc_sub_64(int64_t a1, int64_t a2, int64_t ar)
{
if ((a1 > 0 && a2 < 0 && ar < 0) || (a1 < 0 && a2 > 0 && ar > 0)) {
@@ -240,16 +230,6 @@ static uint32_t cc_calc_add_32(int32_t a1, int32_t a2, int32_t ar)
}
}
-static uint32_t cc_calc_addc_32(uint32_t a1, uint32_t a2, uint32_t ar)
-{
- /* Recover a2 + carry_in. */
- uint32_t a2c = ar - a1;
- /* Check for a2+carry_in overflow, then a1+a2c overflow. */
- int carry_out = (a2c < a2) || (ar < a1);
-
- return (ar != 0) + 2 * carry_out;
-}
-
static uint32_t cc_calc_sub_32(int32_t a1, int32_t a2, int32_t ar)
{
if ((a1 > 0 && a2 < 0 && ar < 0) || (a1 < 0 && a2 > 0 && ar > 0)) {
@@ -485,9 +465,6 @@ static uint32_t do_calc_cc(CPUS390XState *env, uint32_t cc_op,
case CC_OP_ADD_64:
r = cc_calc_add_64(src, dst, vr);
break;
- case CC_OP_ADDC_64:
- r = cc_calc_addc_64(src, dst, vr);
- break;
case CC_OP_SUB_64:
r = cc_calc_sub_64(src, dst, vr);
break;
@@ -513,9 +490,6 @@ static uint32_t do_calc_cc(CPUS390XState *env, uint32_t cc_op,
case CC_OP_ADD_32:
r = cc_calc_add_32(src, dst, vr);
break;
- case CC_OP_ADDC_32:
- r = cc_calc_addc_32(src, dst, vr);
- break;
case CC_OP_SUB_32:
r = cc_calc_sub_32(src, dst, vr);
break;
diff --git a/target/s390x/helper.c b/target/s390x/helper.c
index db87a62a57..4f4561bc64 100644
--- a/target/s390x/helper.c
+++ b/target/s390x/helper.c
@@ -403,14 +403,12 @@ const char *cc_name(enum cc_op cc_op)
[CC_OP_LTGT0_32] = "CC_OP_LTGT0_32",
[CC_OP_LTGT0_64] = "CC_OP_LTGT0_64",
[CC_OP_ADD_64] = "CC_OP_ADD_64",
- [CC_OP_ADDC_64] = "CC_OP_ADDC_64",
[CC_OP_SUB_64] = "CC_OP_SUB_64",
[CC_OP_SUBU_64] = "CC_OP_SUBU_64",
[CC_OP_SUBB_64] = "CC_OP_SUBB_64",
[CC_OP_ABS_64] = "CC_OP_ABS_64",
[CC_OP_NABS_64] = "CC_OP_NABS_64",
[CC_OP_ADD_32] = "CC_OP_ADD_32",
- [CC_OP_ADDC_32] = "CC_OP_ADDC_32",
[CC_OP_SUB_32] = "CC_OP_SUB_32",
[CC_OP_SUBU_32] = "CC_OP_SUBU_32",
[CC_OP_SUBB_32] = "CC_OP_SUBB_32",
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index b473233edf..d1d97e4696 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -600,12 +600,10 @@ static void gen_op_calc_cc(DisasContext *s)
dummy = tcg_const_i64(0);
/* FALLTHRU */
case CC_OP_ADD_64:
- case CC_OP_ADDC_64:
case CC_OP_SUB_64:
case CC_OP_SUBU_64:
case CC_OP_SUBB_64:
case CC_OP_ADD_32:
- case CC_OP_ADDC_32:
case CC_OP_SUB_32:
case CC_OP_SUBU_32:
case CC_OP_SUBB_32:
@@ -665,12 +663,10 @@ static void gen_op_calc_cc(DisasContext *s)
gen_helper_calc_cc(cc_op, cpu_env, local_cc_op, cc_src, cc_dst, dummy);
break;
case CC_OP_ADD_64:
- case CC_OP_ADDC_64:
case CC_OP_SUB_64:
case CC_OP_SUBU_64:
case CC_OP_SUBB_64:
case CC_OP_ADD_32:
- case CC_OP_ADDC_32:
case CC_OP_SUB_32:
case CC_OP_SUBU_32:
case CC_OP_SUBB_32:
@@ -1443,30 +1439,41 @@ static DisasJumpType op_addu64(DisasContext *s, DisasOps *o)
return DISAS_NEXT;
}
-static DisasJumpType op_addc(DisasContext *s, DisasOps *o)
+/* Compute carry into cc_src. */
+static void compute_carry(DisasContext *s)
{
- DisasCompare cmp;
- TCGv_i64 carry;
-
- tcg_gen_add_i64(o->out, o->in1, o->in2);
-
- /* The carry flag is the msb of CC, therefore the branch mask that would
- create that comparison is 3. Feeding the generated comparison to
- setcond produces the carry flag that we desire. */
- disas_jcc(s, &cmp, 3);
- carry = tcg_temp_new_i64();
- if (cmp.is_64) {
- tcg_gen_setcond_i64(cmp.cond, carry, cmp.u.s64.a, cmp.u.s64.b);
- } else {
- TCGv_i32 t = tcg_temp_new_i32();
- tcg_gen_setcond_i32(cmp.cond, t, cmp.u.s32.a, cmp.u.s32.b);
- tcg_gen_extu_i32_i64(carry, t);
- tcg_temp_free_i32(t);
+ switch (s->cc_op) {
+ case CC_OP_ADDU:
+ /* The carry value is already in cc_src (1,0). */
+ break;
+ default:
+ gen_op_calc_cc(s);
+ /* fall through */
+ case CC_OP_STATIC:
+ /* The carry flag is the msb of CC; compute into cc_src. */
+ tcg_gen_extu_i32_i64(cc_src, cc_op);
+ tcg_gen_shri_i64(cc_src, cc_src, 1);
+ break;
}
- free_compare(&cmp);
+}
+
+static DisasJumpType op_addc32(DisasContext *s, DisasOps *o)
+{
+ compute_carry(s);
+ tcg_gen_add_i64(o->out, o->in1, o->in2);
+ tcg_gen_add_i64(o->out, o->out, cc_src);
+ return DISAS_NEXT;
+}
+
+static DisasJumpType op_addc64(DisasContext *s, DisasOps *o)
+{
+ compute_carry(s);
+
+ TCGv_i64 zero = tcg_const_i64(0);
+ tcg_gen_add2_i64(o->out, cc_src, o->in1, zero, cc_src, zero);
+ tcg_gen_add2_i64(o->out, cc_src, o->out, cc_src, o->in2, zero);
+ tcg_temp_free_i64(zero);
- tcg_gen_add_i64(o->out, o->out, carry);
- tcg_temp_free_i64(carry);
return DISAS_NEXT;
}
@@ -5217,16 +5224,6 @@ static void cout_addu64(DisasContext *s, DisasOps *o)
gen_op_update2_cc_i64(s, CC_OP_ADDU, cc_src, o->out);
}
-static void cout_addc32(DisasContext *s, DisasOps *o)
-{
- gen_op_update3_cc_i64(s, CC_OP_ADDC_32, o->in1, o->in2, o->out);
-}
-
-static void cout_addc64(DisasContext *s, DisasOps *o)
-{
- gen_op_update3_cc_i64(s, CC_OP_ADDC_64, o->in1, o->in2, o->out);
-}
-
static void cout_cmps32(DisasContext *s, DisasOps *o)
{
gen_op_update2_cc_i64(s, CC_OP_LTGT_32, o->in1, o->in2);
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 5461e6aa3b..e380723dcd 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -82,10 +82,10 @@
C(0xcc0a, ALSIH, RIL_a, HW, r1_sr32, i2_32u, new, r1_32h, add, addu32)
C(0xcc0b, ALSIHN, RIL_a, HW, r1_sr32, i2_32u, new, r1_32h, add, 0)
/* ADD LOGICAL WITH CARRY */
- C(0xb998, ALCR, RRE, Z, r1, r2, new, r1_32, addc, addc32)
- C(0xb988, ALCGR, RRE, Z, r1, r2, r1, 0, addc, addc64)
- C(0xe398, ALC, RXY_a, Z, r1, m2_32u, new, r1_32, addc, addc32)
- C(0xe388, ALCG, RXY_a, Z, r1, m2_64, r1, 0, addc, addc64)
+ C(0xb998, ALCR, RRE, Z, r1_32u, r2_32u, new, r1_32, addc32, addu32)
+ C(0xb988, ALCGR, RRE, Z, r1, r2, r1, 0, addc64, addu64)
+ C(0xe398, ALC, RXY_a, Z, r1_32u, m2_32u, new, r1_32, addc32, addu32)
+ C(0xe388, ALCG, RXY_a, Z, r1, m2_64, r1, 0, addc64, addu64)
/* AND */
C(0x1400, NR, RR_a, Z, r1, r2, new, r1_32, and, nz32)
--
2.25.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH v3 3/4] target/s390x: Improve cc computation for SUBTRACT LOGICAL
2020-12-14 22:13 [PATCH v3 0/4] target/s390x: Improve carry computation Richard Henderson
2020-12-14 22:13 ` [PATCH v3 1/4] target/s390x: Improve cc computation for ADD LOGICAL Richard Henderson
2020-12-14 22:13 ` [PATCH v3 2/4] target/s390x: Improve ADD LOGICAL WITH CARRY Richard Henderson
@ 2020-12-14 22:13 ` Richard Henderson
2020-12-14 22:13 ` [PATCH v3 4/4] target/s390x: Improve SUB LOGICAL WITH BORROW Richard Henderson
` (2 subsequent siblings)
5 siblings, 0 replies; 7+ messages in thread
From: Richard Henderson @ 2020-12-14 22:13 UTC (permalink / raw)
To: qemu-devel; +Cc: david
The resulting cc is only dependent on the result and the carry-out.
Carry-out and borrow-out are inverses, so are trivially converted.
With tcg ops, it is easier to compute borrow-out than carry-out, so
save result and borrow-out rather than the inputs.
Borrow-out for 64-bit inputs is had via tcg_gen_sub2_i64 directly
into cc_src. Borrow-out for 32-bit inputs is had via extraction
from a normal 64-bit sub (with zero-extended inputs).
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/s390x/internal.h | 3 +--
target/s390x/cc_helper.c | 40 ++++++---------------------
target/s390x/helper.c | 3 +--
target/s390x/translate.c | 55 +++++++++++++++-----------------------
target/s390x/insn-data.def | 24 ++++++++---------
5 files changed, 43 insertions(+), 82 deletions(-)
diff --git a/target/s390x/internal.h b/target/s390x/internal.h
index f5f3ae063e..4077047494 100644
--- a/target/s390x/internal.h
+++ b/target/s390x/internal.h
@@ -161,6 +161,7 @@ enum cc_op {
CC_OP_NZ, /* env->cc_dst != 0 */
CC_OP_ADDU, /* dst != 0, src = carry out (0,1) */
+ CC_OP_SUBU, /* dst != 0, src = borrow out (0,-1) */
CC_OP_LTGT_32, /* signed less/greater than (32bit) */
CC_OP_LTGT_64, /* signed less/greater than (64bit) */
@@ -171,7 +172,6 @@ enum cc_op {
CC_OP_ADD_64, /* overflow on add (64bit) */
CC_OP_SUB_64, /* overflow on subtraction (64bit) */
- CC_OP_SUBU_64, /* overflow on unsigned subtraction (64bit) */
CC_OP_SUBB_64, /* overflow on unsigned sub-borrow (64bit) */
CC_OP_ABS_64, /* sign eval on abs (64bit) */
CC_OP_NABS_64, /* sign eval on nabs (64bit) */
@@ -179,7 +179,6 @@ enum cc_op {
CC_OP_ADD_32, /* overflow on add (32bit) */
CC_OP_SUB_32, /* overflow on subtraction (32bit) */
- CC_OP_SUBU_32, /* overflow on unsigned subtraction (32bit) */
CC_OP_SUBB_32, /* overflow on unsigned sub-borrow (32bit) */
CC_OP_ABS_32, /* sign eval on abs (64bit) */
CC_OP_NABS_32, /* sign eval on nabs (64bit) */
diff --git a/target/s390x/cc_helper.c b/target/s390x/cc_helper.c
index cd2c5c4b39..c7728d1225 100644
--- a/target/s390x/cc_helper.c
+++ b/target/s390x/cc_helper.c
@@ -129,6 +129,11 @@ static uint32_t cc_calc_addu(uint64_t carry_out, uint64_t result)
return (result != 0) + 2 * carry_out;
}
+static uint32_t cc_calc_subu(uint64_t borrow_out, uint64_t result)
+{
+ return cc_calc_addu(borrow_out + 1, result);
+}
+
static uint32_t cc_calc_add_64(int64_t a1, int64_t a2, int64_t ar)
{
if ((a1 > 0 && a2 > 0 && ar < 0) || (a1 < 0 && a2 < 0 && ar > 0)) {
@@ -159,19 +164,6 @@ static uint32_t cc_calc_sub_64(int64_t a1, int64_t a2, int64_t ar)
}
}
-static uint32_t cc_calc_subu_64(uint64_t a1, uint64_t a2, uint64_t ar)
-{
- if (ar == 0) {
- return 2;
- } else {
- if (a2 > a1) {
- return 1;
- } else {
- return 3;
- }
- }
-}
-
static uint32_t cc_calc_subb_64(uint64_t a1, uint64_t a2, uint64_t ar)
{
int borrow_out;
@@ -245,19 +237,6 @@ static uint32_t cc_calc_sub_32(int32_t a1, int32_t a2, int32_t ar)
}
}
-static uint32_t cc_calc_subu_32(uint32_t a1, uint32_t a2, uint32_t ar)
-{
- if (ar == 0) {
- return 2;
- } else {
- if (a2 > a1) {
- return 1;
- } else {
- return 3;
- }
- }
-}
-
static uint32_t cc_calc_subb_32(uint32_t a1, uint32_t a2, uint32_t ar)
{
int borrow_out;
@@ -462,15 +441,15 @@ static uint32_t do_calc_cc(CPUS390XState *env, uint32_t cc_op,
case CC_OP_ADDU:
r = cc_calc_addu(src, dst);
break;
+ case CC_OP_SUBU:
+ r = cc_calc_subu(src, dst);
+ break;
case CC_OP_ADD_64:
r = cc_calc_add_64(src, dst, vr);
break;
case CC_OP_SUB_64:
r = cc_calc_sub_64(src, dst, vr);
break;
- case CC_OP_SUBU_64:
- r = cc_calc_subu_64(src, dst, vr);
- break;
case CC_OP_SUBB_64:
r = cc_calc_subb_64(src, dst, vr);
break;
@@ -493,9 +472,6 @@ static uint32_t do_calc_cc(CPUS390XState *env, uint32_t cc_op,
case CC_OP_SUB_32:
r = cc_calc_sub_32(src, dst, vr);
break;
- case CC_OP_SUBU_32:
- r = cc_calc_subu_32(src, dst, vr);
- break;
case CC_OP_SUBB_32:
r = cc_calc_subb_32(src, dst, vr);
break;
diff --git a/target/s390x/helper.c b/target/s390x/helper.c
index 4f4561bc64..fa3aa500e5 100644
--- a/target/s390x/helper.c
+++ b/target/s390x/helper.c
@@ -396,6 +396,7 @@ const char *cc_name(enum cc_op cc_op)
[CC_OP_STATIC] = "CC_OP_STATIC",
[CC_OP_NZ] = "CC_OP_NZ",
[CC_OP_ADDU] = "CC_OP_ADDU",
+ [CC_OP_SUBU] = "CC_OP_SUBU",
[CC_OP_LTGT_32] = "CC_OP_LTGT_32",
[CC_OP_LTGT_64] = "CC_OP_LTGT_64",
[CC_OP_LTUGTU_32] = "CC_OP_LTUGTU_32",
@@ -404,13 +405,11 @@ const char *cc_name(enum cc_op cc_op)
[CC_OP_LTGT0_64] = "CC_OP_LTGT0_64",
[CC_OP_ADD_64] = "CC_OP_ADD_64",
[CC_OP_SUB_64] = "CC_OP_SUB_64",
- [CC_OP_SUBU_64] = "CC_OP_SUBU_64",
[CC_OP_SUBB_64] = "CC_OP_SUBB_64",
[CC_OP_ABS_64] = "CC_OP_ABS_64",
[CC_OP_NABS_64] = "CC_OP_NABS_64",
[CC_OP_ADD_32] = "CC_OP_ADD_32",
[CC_OP_SUB_32] = "CC_OP_SUB_32",
- [CC_OP_SUBU_32] = "CC_OP_SUBU_32",
[CC_OP_SUBB_32] = "CC_OP_SUBB_32",
[CC_OP_ABS_32] = "CC_OP_ABS_32",
[CC_OP_NABS_32] = "CC_OP_NABS_32",
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index d1d97e4696..40add1df1f 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -601,11 +601,9 @@ static void gen_op_calc_cc(DisasContext *s)
/* FALLTHRU */
case CC_OP_ADD_64:
case CC_OP_SUB_64:
- case CC_OP_SUBU_64:
case CC_OP_SUBB_64:
case CC_OP_ADD_32:
case CC_OP_SUB_32:
- case CC_OP_SUBU_32:
case CC_OP_SUBB_32:
local_cc_op = tcg_const_i32(s->cc_op);
break;
@@ -656,6 +654,7 @@ static void gen_op_calc_cc(DisasContext *s)
case CC_OP_TM_64:
case CC_OP_SLA_32:
case CC_OP_SLA_64:
+ case CC_OP_SUBU:
case CC_OP_NZ_F128:
case CC_OP_VC:
case CC_OP_MULS_64:
@@ -664,11 +663,9 @@ static void gen_op_calc_cc(DisasContext *s)
break;
case CC_OP_ADD_64:
case CC_OP_SUB_64:
- case CC_OP_SUBU_64:
case CC_OP_SUBB_64:
case CC_OP_ADD_32:
case CC_OP_SUB_32:
- case CC_OP_SUBU_32:
case CC_OP_SUBB_32:
/* 3 arguments */
gen_helper_calc_cc(cc_op, cpu_env, local_cc_op, cc_src, cc_dst, cc_vr);
@@ -843,6 +840,7 @@ static void disas_jcc(DisasContext *s, DisasCompare *c, uint32_t mask)
break;
case CC_OP_ADDU:
+ case CC_OP_SUBU:
switch (mask) {
case 8 | 2: /* result == 0 */
cond = TCG_COND_EQ;
@@ -850,33 +848,11 @@ static void disas_jcc(DisasContext *s, DisasCompare *c, uint32_t mask)
case 4 | 1: /* result != 0 */
cond = TCG_COND_NE;
break;
- case 8 | 4: /* no carry */
- cond = TCG_COND_EQ;
+ case 8 | 4: /* !carry (borrow) */
+ cond = old_cc_op == CC_OP_ADDU ? TCG_COND_EQ : TCG_COND_NE;
break;
- case 2 | 1: /* carry */
- cond = TCG_COND_NE;
- break;
- default:
- goto do_dynamic;
- }
- account_inline_branch(s, old_cc_op);
- break;
-
- case CC_OP_SUBU_32:
- case CC_OP_SUBU_64:
- /* Note that CC=0 is impossible; treat it as dont-care. */
- switch (mask & 7) {
- case 2: /* zero -> op1 == op2 */
- cond = TCG_COND_EQ;
- break;
- case 4 | 1: /* !zero -> op1 != op2 */
- cond = TCG_COND_NE;
- break;
- case 4: /* borrow (!carry) -> op1 < op2 */
- cond = TCG_COND_LTU;
- break;
- case 2 | 1: /* !borrow (carry) -> op1 >= op2 */
- cond = TCG_COND_GEU;
+ case 2 | 1: /* carry (!borrow) */
+ cond = old_cc_op == CC_OP_ADDU ? TCG_COND_NE : TCG_COND_EQ;
break;
default:
goto do_dynamic;
@@ -911,7 +887,6 @@ static void disas_jcc(DisasContext *s, DisasCompare *c, uint32_t mask)
break;
case CC_OP_LTGT_32:
case CC_OP_LTUGTU_32:
- case CC_OP_SUBU_32:
c->is_64 = false;
c->u.s32.a = tcg_temp_new_i32();
tcg_gen_extrl_i64_i32(c->u.s32.a, cc_src);
@@ -928,7 +903,6 @@ static void disas_jcc(DisasContext *s, DisasCompare *c, uint32_t mask)
break;
case CC_OP_LTGT_64:
case CC_OP_LTUGTU_64:
- case CC_OP_SUBU_64:
c->u.s64.a = cc_src;
c->u.s64.b = cc_dst;
c->g1 = c->g2 = true;
@@ -943,6 +917,7 @@ static void disas_jcc(DisasContext *s, DisasCompare *c, uint32_t mask)
break;
case CC_OP_ADDU:
+ case CC_OP_SUBU:
c->is_64 = true;
c->u.s64.b = tcg_const_i64(0);
c->g1 = true;
@@ -1446,6 +1421,9 @@ static void compute_carry(DisasContext *s)
case CC_OP_ADDU:
/* The carry value is already in cc_src (1,0). */
break;
+ case CC_OP_SUBU:
+ tcg_gen_addi_i64(cc_src, cc_src, 1);
+ break;
default:
gen_op_calc_cc(s);
/* fall through */
@@ -4761,6 +4739,13 @@ static DisasJumpType op_sub(DisasContext *s, DisasOps *o)
return DISAS_NEXT;
}
+static DisasJumpType op_subu64(DisasContext *s, DisasOps *o)
+{
+ tcg_gen_movi_i64(cc_src, 0);
+ tcg_gen_sub2_i64(o->out, cc_src, o->in1, cc_src, o->in2, cc_src);
+ return DISAS_NEXT;
+}
+
static DisasJumpType op_subb(DisasContext *s, DisasOps *o)
{
DisasCompare cmp;
@@ -5312,12 +5297,14 @@ static void cout_subs64(DisasContext *s, DisasOps *o)
static void cout_subu32(DisasContext *s, DisasOps *o)
{
- gen_op_update3_cc_i64(s, CC_OP_SUBU_32, o->in1, o->in2, o->out);
+ tcg_gen_sari_i64(cc_src, o->out, 32);
+ tcg_gen_ext32u_i64(cc_dst, o->out);
+ gen_op_update2_cc_i64(s, CC_OP_SUBU, cc_src, cc_dst);
}
static void cout_subu64(DisasContext *s, DisasOps *o)
{
- gen_op_update3_cc_i64(s, CC_OP_SUBU_64, o->in1, o->in2, o->out);
+ gen_op_update2_cc_i64(s, CC_OP_SUBU, cc_src, o->out);
}
static void cout_subb32(DisasContext *s, DisasOps *o)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index e380723dcd..7ff3e7e517 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -900,21 +900,21 @@
C(0xb9c9, SHHHR, RRF_a, HW, r2_sr32, r3_sr32, new, r1_32h, sub, subs32)
C(0xb9d9, SHHLR, RRF_a, HW, r2_sr32, r3, new, r1_32h, sub, subs32)
/* SUBTRACT LOGICAL */
- C(0x1f00, SLR, RR_a, Z, r1, r2, new, r1_32, sub, subu32)
- C(0xb9fb, SLRK, RRF_a, DO, r2, r3, new, r1_32, sub, subu32)
- C(0x5f00, SL, RX_a, Z, r1, m2_32u, new, r1_32, sub, subu32)
- C(0xe35f, SLY, RXY_a, LD, r1, m2_32u, new, r1_32, sub, subu32)
- C(0xb90b, SLGR, RRE, Z, r1, r2, r1, 0, sub, subu64)
- C(0xb91b, SLGFR, RRE, Z, r1, r2_32u, r1, 0, sub, subu64)
- C(0xb9eb, SLGRK, RRF_a, DO, r2, r3, r1, 0, sub, subu64)
- C(0xe30b, SLG, RXY_a, Z, r1, m2_64, r1, 0, sub, subu64)
- C(0xe31b, SLGF, RXY_a, Z, r1, m2_32u, r1, 0, sub, subu64)
+ C(0x1f00, SLR, RR_a, Z, r1_32u, r2_32u, new, r1_32, sub, subu32)
+ C(0xb9fb, SLRK, RRF_a, DO, r2_32u, r3_32u, new, r1_32, sub, subu32)
+ C(0x5f00, SL, RX_a, Z, r1_32u, m2_32u, new, r1_32, sub, subu32)
+ C(0xe35f, SLY, RXY_a, LD, r1_32u, m2_32u, new, r1_32, sub, subu32)
+ C(0xb90b, SLGR, RRE, Z, r1, r2, r1, 0, subu64, subu64)
+ C(0xb91b, SLGFR, RRE, Z, r1, r2_32u, r1, 0, subu64, subu64)
+ C(0xb9eb, SLGRK, RRF_a, DO, r2, r3, r1, 0, subu64, subu64)
+ C(0xe30b, SLG, RXY_a, Z, r1, m2_64, r1, 0, subu64, subu64)
+ C(0xe31b, SLGF, RXY_a, Z, r1, m2_32u, r1, 0, subu64, subu64)
/* SUBTRACT LOCICAL HIGH */
C(0xb9cb, SLHHHR, RRF_a, HW, r2_sr32, r3_sr32, new, r1_32h, sub, subu32)
- C(0xb9db, SLHHLR, RRF_a, HW, r2_sr32, r3, new, r1_32h, sub, subu32)
+ C(0xb9db, SLHHLR, RRF_a, HW, r2_sr32, r3_32u, new, r1_32h, sub, subu32)
/* SUBTRACT LOGICAL IMMEDIATE */
- C(0xc205, SLFI, RIL_a, EI, r1, i2_32u, new, r1_32, sub, subu32)
- C(0xc204, SLGFI, RIL_a, EI, r1, i2_32u, r1, 0, sub, subu64)
+ C(0xc205, SLFI, RIL_a, EI, r1_32u, i2_32u, new, r1_32, sub, subu32)
+ C(0xc204, SLGFI, RIL_a, EI, r1, i2_32u, r1, 0, subu64, subu64)
/* SUBTRACT LOGICAL WITH BORROW */
C(0xb999, SLBR, RRE, Z, r1, r2, new, r1_32, subb, subb32)
C(0xb989, SLBGR, RRE, Z, r1, r2, r1, 0, subb, subb64)
--
2.25.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH v3 4/4] target/s390x: Improve SUB LOGICAL WITH BORROW
2020-12-14 22:13 [PATCH v3 0/4] target/s390x: Improve carry computation Richard Henderson
` (2 preceding siblings ...)
2020-12-14 22:13 ` [PATCH v3 3/4] target/s390x: Improve cc computation for SUBTRACT LOGICAL Richard Henderson
@ 2020-12-14 22:13 ` Richard Henderson
2020-12-16 14:44 ` [PATCH v3 0/4] target/s390x: Improve carry computation David Hildenbrand
2020-12-16 17:09 ` Cornelia Huck
5 siblings, 0 replies; 7+ messages in thread
From: Richard Henderson @ 2020-12-14 22:13 UTC (permalink / raw)
To: qemu-devel; +Cc: david
Now that SUB LOGICAL outputs borrow, we can use that as input directly.
It also means we can re-use CC_OP_SUBU and produce an output borrow
directly from SUB LOGICAL WITH BORROW.
Reviewed-by: David Hildenbrand <david@redhat.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/s390x/internal.h | 2 -
target/s390x/cc_helper.c | 32 ----------------
target/s390x/helper.c | 2 -
target/s390x/translate.c | 76 +++++++++++++++++++++-----------------
target/s390x/insn-data.def | 8 ++--
5 files changed, 46 insertions(+), 74 deletions(-)
diff --git a/target/s390x/internal.h b/target/s390x/internal.h
index 4077047494..11515bb617 100644
--- a/target/s390x/internal.h
+++ b/target/s390x/internal.h
@@ -172,14 +172,12 @@ enum cc_op {
CC_OP_ADD_64, /* overflow on add (64bit) */
CC_OP_SUB_64, /* overflow on subtraction (64bit) */
- CC_OP_SUBB_64, /* overflow on unsigned sub-borrow (64bit) */
CC_OP_ABS_64, /* sign eval on abs (64bit) */
CC_OP_NABS_64, /* sign eval on nabs (64bit) */
CC_OP_MULS_64, /* overflow on signed multiply (64bit) */
CC_OP_ADD_32, /* overflow on add (32bit) */
CC_OP_SUB_32, /* overflow on subtraction (32bit) */
- CC_OP_SUBB_32, /* overflow on unsigned sub-borrow (32bit) */
CC_OP_ABS_32, /* sign eval on abs (64bit) */
CC_OP_NABS_32, /* sign eval on nabs (64bit) */
CC_OP_MULS_32, /* overflow on signed multiply (32bit) */
diff --git a/target/s390x/cc_helper.c b/target/s390x/cc_helper.c
index c7728d1225..e7039d0d18 100644
--- a/target/s390x/cc_helper.c
+++ b/target/s390x/cc_helper.c
@@ -164,19 +164,6 @@ static uint32_t cc_calc_sub_64(int64_t a1, int64_t a2, int64_t ar)
}
}
-static uint32_t cc_calc_subb_64(uint64_t a1, uint64_t a2, uint64_t ar)
-{
- int borrow_out;
-
- if (ar != a1 - a2) { /* difference means borrow-in */
- borrow_out = (a2 >= a1);
- } else {
- borrow_out = (a2 > a1);
- }
-
- return (ar != 0) + 2 * !borrow_out;
-}
-
static uint32_t cc_calc_abs_64(int64_t dst)
{
if ((uint64_t)dst == 0x8000000000000000ULL) {
@@ -237,19 +224,6 @@ static uint32_t cc_calc_sub_32(int32_t a1, int32_t a2, int32_t ar)
}
}
-static uint32_t cc_calc_subb_32(uint32_t a1, uint32_t a2, uint32_t ar)
-{
- int borrow_out;
-
- if (ar != a1 - a2) { /* difference means borrow-in */
- borrow_out = (a2 >= a1);
- } else {
- borrow_out = (a2 > a1);
- }
-
- return (ar != 0) + 2 * !borrow_out;
-}
-
static uint32_t cc_calc_abs_32(int32_t dst)
{
if ((uint32_t)dst == 0x80000000UL) {
@@ -450,9 +424,6 @@ static uint32_t do_calc_cc(CPUS390XState *env, uint32_t cc_op,
case CC_OP_SUB_64:
r = cc_calc_sub_64(src, dst, vr);
break;
- case CC_OP_SUBB_64:
- r = cc_calc_subb_64(src, dst, vr);
- break;
case CC_OP_ABS_64:
r = cc_calc_abs_64(dst);
break;
@@ -472,9 +443,6 @@ static uint32_t do_calc_cc(CPUS390XState *env, uint32_t cc_op,
case CC_OP_SUB_32:
r = cc_calc_sub_32(src, dst, vr);
break;
- case CC_OP_SUBB_32:
- r = cc_calc_subb_32(src, dst, vr);
- break;
case CC_OP_ABS_32:
r = cc_calc_abs_32(dst);
break;
diff --git a/target/s390x/helper.c b/target/s390x/helper.c
index fa3aa500e5..7678994feb 100644
--- a/target/s390x/helper.c
+++ b/target/s390x/helper.c
@@ -405,12 +405,10 @@ const char *cc_name(enum cc_op cc_op)
[CC_OP_LTGT0_64] = "CC_OP_LTGT0_64",
[CC_OP_ADD_64] = "CC_OP_ADD_64",
[CC_OP_SUB_64] = "CC_OP_SUB_64",
- [CC_OP_SUBB_64] = "CC_OP_SUBB_64",
[CC_OP_ABS_64] = "CC_OP_ABS_64",
[CC_OP_NABS_64] = "CC_OP_NABS_64",
[CC_OP_ADD_32] = "CC_OP_ADD_32",
[CC_OP_SUB_32] = "CC_OP_SUB_32",
- [CC_OP_SUBB_32] = "CC_OP_SUBB_32",
[CC_OP_ABS_32] = "CC_OP_ABS_32",
[CC_OP_NABS_32] = "CC_OP_NABS_32",
[CC_OP_COMP_32] = "CC_OP_COMP_32",
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index 40add1df1f..3d5c0d6106 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -601,10 +601,8 @@ static void gen_op_calc_cc(DisasContext *s)
/* FALLTHRU */
case CC_OP_ADD_64:
case CC_OP_SUB_64:
- case CC_OP_SUBB_64:
case CC_OP_ADD_32:
case CC_OP_SUB_32:
- case CC_OP_SUBB_32:
local_cc_op = tcg_const_i32(s->cc_op);
break;
case CC_OP_CONST0:
@@ -663,10 +661,8 @@ static void gen_op_calc_cc(DisasContext *s)
break;
case CC_OP_ADD_64:
case CC_OP_SUB_64:
- case CC_OP_SUBB_64:
case CC_OP_ADD_32:
case CC_OP_SUB_32:
- case CC_OP_SUBB_32:
/* 3 arguments */
gen_helper_calc_cc(cc_op, cpu_env, local_cc_op, cc_src, cc_dst, cc_vr);
break;
@@ -4746,29 +4742,51 @@ static DisasJumpType op_subu64(DisasContext *s, DisasOps *o)
return DISAS_NEXT;
}
-static DisasJumpType op_subb(DisasContext *s, DisasOps *o)
+/* Compute borrow (0, -1) into cc_src. */
+static void compute_borrow(DisasContext *s)
{
- DisasCompare cmp;
- TCGv_i64 borrow;
-
- tcg_gen_sub_i64(o->out, o->in1, o->in2);
-
- /* The !borrow flag is the msb of CC. Since we want the inverse of
- that, we ask for a comparison of CC=0 | CC=1 -> mask of 8 | 4. */
- disas_jcc(s, &cmp, 8 | 4);
- borrow = tcg_temp_new_i64();
- if (cmp.is_64) {
- tcg_gen_setcond_i64(cmp.cond, borrow, cmp.u.s64.a, cmp.u.s64.b);
- } else {
- TCGv_i32 t = tcg_temp_new_i32();
- tcg_gen_setcond_i32(cmp.cond, t, cmp.u.s32.a, cmp.u.s32.b);
- tcg_gen_extu_i32_i64(borrow, t);
- tcg_temp_free_i32(t);
+ switch (s->cc_op) {
+ case CC_OP_SUBU:
+ /* The borrow value is already in cc_src (0,-1). */
+ break;
+ default:
+ gen_op_calc_cc(s);
+ /* fall through */
+ case CC_OP_STATIC:
+ /* The carry flag is the msb of CC; compute into cc_src. */
+ tcg_gen_extu_i32_i64(cc_src, cc_op);
+ tcg_gen_shri_i64(cc_src, cc_src, 1);
+ /* fall through */
+ case CC_OP_ADDU:
+ /* Convert carry (1,0) to borrow (0,-1). */
+ tcg_gen_subi_i64(cc_src, cc_src, 1);
+ break;
}
- free_compare(&cmp);
+}
+
+static DisasJumpType op_subb32(DisasContext *s, DisasOps *o)
+{
+ compute_borrow(s);
+
+ /* Borrow is {0, -1}, so add to subtract. */
+ tcg_gen_add_i64(o->out, o->in1, cc_src);
+ tcg_gen_sub_i64(o->out, o->out, o->in2);
+ return DISAS_NEXT;
+}
+
+static DisasJumpType op_subb64(DisasContext *s, DisasOps *o)
+{
+ compute_borrow(s);
+
+ /*
+ * Borrow is {0, -1}, so add to subtract; replicate the
+ * borrow input to produce 128-bit -1 for the addition.
+ */
+ TCGv_i64 zero = tcg_const_i64(0);
+ tcg_gen_add2_i64(o->out, cc_src, o->in1, zero, cc_src, cc_src);
+ tcg_gen_sub2_i64(o->out, cc_src, o->out, cc_src, o->in2, zero);
+ tcg_temp_free_i64(zero);
- tcg_gen_sub_i64(o->out, o->out, borrow);
- tcg_temp_free_i64(borrow);
return DISAS_NEXT;
}
@@ -5307,16 +5325,6 @@ static void cout_subu64(DisasContext *s, DisasOps *o)
gen_op_update2_cc_i64(s, CC_OP_SUBU, cc_src, o->out);
}
-static void cout_subb32(DisasContext *s, DisasOps *o)
-{
- gen_op_update3_cc_i64(s, CC_OP_SUBB_32, o->in1, o->in2, o->out);
-}
-
-static void cout_subb64(DisasContext *s, DisasOps *o)
-{
- gen_op_update3_cc_i64(s, CC_OP_SUBB_64, o->in1, o->in2, o->out);
-}
-
static void cout_tm32(DisasContext *s, DisasOps *o)
{
gen_op_update2_cc_i64(s, CC_OP_TM_32, o->in1, o->in2);
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 7ff3e7e517..26badb663a 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -916,10 +916,10 @@
C(0xc205, SLFI, RIL_a, EI, r1_32u, i2_32u, new, r1_32, sub, subu32)
C(0xc204, SLGFI, RIL_a, EI, r1, i2_32u, r1, 0, subu64, subu64)
/* SUBTRACT LOGICAL WITH BORROW */
- C(0xb999, SLBR, RRE, Z, r1, r2, new, r1_32, subb, subb32)
- C(0xb989, SLBGR, RRE, Z, r1, r2, r1, 0, subb, subb64)
- C(0xe399, SLB, RXY_a, Z, r1, m2_32u, new, r1_32, subb, subb32)
- C(0xe389, SLBG, RXY_a, Z, r1, m2_64, r1, 0, subb, subb64)
+ C(0xb999, SLBR, RRE, Z, r1_32u, r2_32u, new, r1_32, subb32, subu32)
+ C(0xb989, SLBGR, RRE, Z, r1, r2, r1, 0, subb64, subu64)
+ C(0xe399, SLB, RXY_a, Z, r1_32u, m2_32u, new, r1_32, subb32, subu32)
+ C(0xe389, SLBG, RXY_a, Z, r1, m2_64, r1, 0, subb64, subu64)
/* SUPERVISOR CALL */
C(0x0a00, SVC, I, Z, 0, 0, 0, 0, svc, 0)
--
2.25.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH v3 0/4] target/s390x: Improve carry computation
2020-12-14 22:13 [PATCH v3 0/4] target/s390x: Improve carry computation Richard Henderson
` (3 preceding siblings ...)
2020-12-14 22:13 ` [PATCH v3 4/4] target/s390x: Improve SUB LOGICAL WITH BORROW Richard Henderson
@ 2020-12-16 14:44 ` David Hildenbrand
2020-12-16 17:09 ` Cornelia Huck
5 siblings, 0 replies; 7+ messages in thread
From: David Hildenbrand @ 2020-12-16 14:44 UTC (permalink / raw)
To: Richard Henderson, qemu-devel; +Cc: Thomas Huth, Cornelia Huck
On 14.12.20 23:13, Richard Henderson wrote:
> While testing the float128_muladd changes for s390x host,
> emulating under x86_64 of course, I noticed that the code
> we generate for strings of ALCGR and SLBGR is pretty awful.
>
> I realized that we were missing a trick: the output cc is
> based only on the output (result and carry) and so we don't
> need to save the inputs. And once we do that, we can use
> the output carry as a direct input to the next insn.
>
LGTM, @Conny, can you pick these?
--
Thanks,
David / dhildenb
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH v3 0/4] target/s390x: Improve carry computation
2020-12-14 22:13 [PATCH v3 0/4] target/s390x: Improve carry computation Richard Henderson
` (4 preceding siblings ...)
2020-12-16 14:44 ` [PATCH v3 0/4] target/s390x: Improve carry computation David Hildenbrand
@ 2020-12-16 17:09 ` Cornelia Huck
5 siblings, 0 replies; 7+ messages in thread
From: Cornelia Huck @ 2020-12-16 17:09 UTC (permalink / raw)
To: Richard Henderson; +Cc: qemu-devel, david
On Mon, 14 Dec 2020 16:13:52 -0600
Richard Henderson <richard.henderson@linaro.org> wrote:
> While testing the float128_muladd changes for s390x host,
> emulating under x86_64 of course, I noticed that the code
> we generate for strings of ALCGR and SLBGR is pretty awful.
>
> I realized that we were missing a trick: the output cc is
> based only on the output (result and carry) and so we don't
> need to save the inputs. And once we do that, we can use
> the output carry as a direct input to the next insn.
>
> Changes for v3:
> * Rebased.
> Changes for v2:
> * Add a few more comments, and enhance the patch descriptions.
>
>
> r~
>
> Richard Henderson (4):
> target/s390x: Improve cc computation for ADD LOGICAL
> target/s390x: Improve ADD LOGICAL WITH CARRY
> target/s390x: Improve cc computation for SUBTRACT LOGICAL
> target/s390x: Improve SUB LOGICAL WITH BORROW
>
> target/s390x/internal.h | 11 +-
> target/s390x/cc_helper.c | 123 +++-------------
> target/s390x/helper.c | 10 +-
> target/s390x/translate.c | 289 ++++++++++++++++++++-----------------
> target/s390x/insn-data.def | 76 +++++-----
> 5 files changed, 216 insertions(+), 293 deletions(-)
>
Thanks, applied.
^ permalink raw reply [flat|nested] 7+ messages in thread