* [PATCH v2 1/3] Hexagon (target/hexagon) Analyze reads before writes
2024-02-01 10:33 [PATCH v2 0/3] Hexagon (target/hexagon) Enable more short-circuit packets Taylor Simpson
@ 2024-02-01 10:33 ` Taylor Simpson
2024-03-29 2:05 ` Brian Cain
2024-02-01 10:33 ` [PATCH v2 2/3] Hexagon (target/hexagon) Enable more short-circuit packets (scalar core) Taylor Simpson
2024-02-01 10:33 ` [PATCH v2 3/3] Hexagon (target/hexagon) Enable more short-circuit packets (HVX) Taylor Simpson
2 siblings, 1 reply; 7+ messages in thread
From: Taylor Simpson @ 2024-02-01 10:33 UTC (permalink / raw)
To: qemu-devel
Cc: bcain, quic_mathbern, sidneym, quic_mliebel, richard.henderson,
philmd, ale, anjo, ltaylorsimpson
We divide gen_analyze_funcs.py into 3 phases
Declare the operands
Analyze the register reads
Analyze the register writes
We also create special versions of ctx_log_*_read for new operands
Check that the operand is written before the read
This is a precursor to improving the analysis for short-circuiting
the packet semantics in a subsequent commit
Signed-off-by: Taylor Simpson <ltaylorsimpson@gmail.com>
---
target/hexagon/translate.h | 26 +++++++++++-
target/hexagon/README | 9 +++--
target/hexagon/gen_analyze_funcs.py | 34 ++++++++++------
target/hexagon/hex_common.py | 63 +++++++++++++++--------------
4 files changed, 83 insertions(+), 49 deletions(-)
diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
index 4dd59c6726..f06d71fc53 100644
--- a/target/hexagon/translate.h
+++ b/target/hexagon/translate.h
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ * Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -75,6 +75,8 @@ typedef struct DisasContext {
TCGv dczero_addr;
} DisasContext;
+bool is_gather_store_insn(DisasContext *ctx);
+
static inline void ctx_log_pred_write(DisasContext *ctx, int pnum)
{
if (!test_bit(pnum, ctx->pregs_written)) {
@@ -89,6 +91,12 @@ static inline void ctx_log_pred_read(DisasContext *ctx, int pnum)
set_bit(pnum, ctx->pregs_read);
}
+static inline void ctx_log_pred_read_new(DisasContext *ctx, int pnum)
+{
+ g_assert(test_bit(pnum, ctx->pregs_written));
+ set_bit(pnum, ctx->pregs_read);
+}
+
static inline void ctx_log_reg_write(DisasContext *ctx, int rnum,
bool is_predicated)
{
@@ -120,6 +128,12 @@ static inline void ctx_log_reg_read(DisasContext *ctx, int rnum)
set_bit(rnum, ctx->regs_read);
}
+static inline void ctx_log_reg_read_new(DisasContext *ctx, int rnum)
+{
+ g_assert(test_bit(rnum, ctx->regs_written));
+ set_bit(rnum, ctx->regs_read);
+}
+
static inline void ctx_log_reg_read_pair(DisasContext *ctx, int rnum)
{
ctx_log_reg_read(ctx, rnum);
@@ -171,6 +185,15 @@ static inline void ctx_log_vreg_read(DisasContext *ctx, int rnum)
set_bit(rnum, ctx->vregs_read);
}
+static inline void ctx_log_vreg_read_new(DisasContext *ctx, int rnum)
+{
+ g_assert(is_gather_store_insn(ctx) ||
+ test_bit(rnum, ctx->vregs_updated) ||
+ test_bit(rnum, ctx->vregs_select) ||
+ test_bit(rnum, ctx->vregs_updated_tmp));
+ set_bit(rnum, ctx->vregs_read);
+}
+
static inline void ctx_log_vreg_read_pair(DisasContext *ctx, int rnum)
{
ctx_log_vreg_read(ctx, rnum ^ 0);
@@ -205,7 +228,6 @@ extern TCGv hex_vstore_addr[VSTORES_MAX];
extern TCGv hex_vstore_size[VSTORES_MAX];
extern TCGv hex_vstore_pending[VSTORES_MAX];
-bool is_gather_store_insn(DisasContext *ctx);
void process_store(DisasContext *ctx, int slot_num);
FIELD(PROBE_PKT_SCALAR_STORE_S0, MMU_IDX, 0, 2)
diff --git a/target/hexagon/README b/target/hexagon/README
index 746ebec378..c1d8c8d0ab 100644
--- a/target/hexagon/README
+++ b/target/hexagon/README
@@ -183,10 +183,11 @@ when the override is present.
}
We also generate an analyze_<tag> function for each instruction. Currently,
-these functions record the writes to registers by calling ctx_log_*. During
-gen_start_packet, we invoke the analyze_<tag> function for each instruction in
-the packet, and we mark the implicit writes. After the analysis is performed,
-we initialize the result register for each of the predicated assignments.
+these functions record the reads and writes to registers by calling ctx_log_*.
+During gen_start_packet, we invoke the analyze_<tag> function for each instruction in
+the packet, and we mark the implicit writes. The analysis determines if the packet
+semantics can be short-circuited. If not, we initialize the result register for each
+of the predicated assignments.
In addition to instruction semantics, we use a generator to create the decode
tree. This generation is a four step process.
diff --git a/target/hexagon/gen_analyze_funcs.py b/target/hexagon/gen_analyze_funcs.py
index a9af666cef..890e6a3a95 100755
--- a/target/hexagon/gen_analyze_funcs.py
+++ b/target/hexagon/gen_analyze_funcs.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
##
-## Copyright(c) 2022-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
+## Copyright(c) 2022-2024 Qualcomm Innovation Center, Inc. All Rights Reserved.
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
@@ -44,15 +44,25 @@ def gen_analyze_func(f, tag, regs, imms):
f.write(" Insn *insn G_GNUC_UNUSED = ctx->insn;\n")
- i = 0
- ## Analyze all the registers
- for regtype, regid in regs:
- reg = hex_common.get_register(tag, regtype, regid)
+ ## Declare all the registers
+ for regno, register in enumerate(regs):
+ reg_type, reg_id = register
+ reg = hex_common.get_register(tag, reg_type, reg_id)
+ reg.decl_reg_num(f, regno)
+
+ ## Analyze the register reads
+ for regno, register in enumerate(regs):
+ reg_type, reg_id = register
+ reg = hex_common.get_register(tag, reg_type, reg_id)
+ if reg.is_read():
+ reg.analyze_read(f, regno)
+
+ ## Analyze the register writes
+ for regno, register in enumerate(regs):
+ reg_type, reg_id = register
+ reg = hex_common.get_register(tag, reg_type, reg_id)
if reg.is_written():
- reg.analyze_write(f, tag, i)
- else:
- reg.analyze_read(f, i)
- i += 1
+ reg.analyze_write(f, tag, regno)
has_generated_helper = not hex_common.skip_qemu_helper(
tag
@@ -89,13 +99,13 @@ def main():
tagimms = hex_common.get_tagimms()
with open(sys.argv[-1], "w") as f:
- f.write("#ifndef HEXAGON_TCG_FUNCS_H\n")
- f.write("#define HEXAGON_TCG_FUNCS_H\n\n")
+ f.write("#ifndef HEXAGON_ANALYZE_FUNCS_C_INC\n")
+ f.write("#define HEXAGON_ANALYZE_FUNCS_C_INC\n\n")
for tag in hex_common.tags:
gen_analyze_func(f, tag, tagregs[tag], tagimms[tag])
- f.write("#endif /* HEXAGON_TCG_FUNCS_H */\n")
+ f.write("#endif /* HEXAGON_ANALYZE_FUNCS_C_INC */\n")
if __name__ == "__main__":
diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py
index 195620c7ec..33801e4bd7 100755
--- a/target/hexagon/hex_common.py
+++ b/target/hexagon/hex_common.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
##
-## Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
+## Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved.
##
## This program is free software; you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
@@ -425,7 +425,6 @@ def log_write(self, f, tag):
gen_log_reg_write(ctx, {self.reg_num}, {self.reg_tcg()});
"""))
def analyze_write(self, f, tag, regno):
- self.decl_reg_num(f, regno)
predicated = "true" if is_predicated(tag) else "false"
f.write(code_fmt(f"""\
ctx_log_reg_write(ctx, {self.reg_num}, {predicated});
@@ -438,7 +437,6 @@ def decl_tcg(self, f, tag, regno):
TCGv {self.reg_tcg()} = hex_gpr[{self.reg_num}];
"""))
def analyze_read(self, f, regno):
- self.decl_reg_num(f, regno)
f.write(code_fmt(f"""\
ctx_log_reg_read(ctx, {self.reg_num});
"""))
@@ -449,9 +447,8 @@ def decl_tcg(self, f, tag, regno):
TCGv {self.reg_tcg()} = get_result_gpr(ctx, insn->regno[{regno}]);
"""))
def analyze_read(self, f, regno):
- self.decl_reg_num(f, regno)
f.write(code_fmt(f"""\
- ctx_log_reg_read(ctx, {self.reg_num});
+ ctx_log_reg_read_new(ctx, {self.reg_num});
"""))
class GprReadWrite(Register, Single, ReadWrite):
@@ -471,8 +468,11 @@ def log_write(self, f, tag):
f.write(code_fmt(f"""\
gen_log_reg_write(ctx, {self.reg_num}, {self.reg_tcg()});
"""))
+ def analyze_read(self, f, regno):
+ f.write(code_fmt(f"""\
+ ctx_log_reg_read(ctx, {self.reg_num});
+ """))
def analyze_write(self, f, tag, regno):
- self.decl_reg_num(f, regno)
predicated = "true" if is_predicated(tag) else "false"
f.write(code_fmt(f"""\
ctx_log_reg_write(ctx, {self.reg_num}, {predicated});
@@ -493,7 +493,6 @@ def log_write(self, f, tag):
gen_write_ctrl_reg(ctx, {self.reg_num}, {self.reg_tcg()});
"""))
def analyze_write(self, f, tag, regno):
- self.decl_reg_num(f, regno)
predicated = "true" if is_predicated(tag) else "false"
f.write(code_fmt(f"""\
ctx_log_reg_write(ctx, {self.reg_num}, {predicated});
@@ -511,7 +510,6 @@ def decl_tcg(self, f, tag, regno):
gen_read_ctrl_reg(ctx, {self.reg_num}, {self.reg_tcg()});
"""))
def analyze_read(self, f, regno):
- self.decl_reg_num(f, regno)
f.write(code_fmt(f"""\
ctx_log_reg_read(ctx, {self.reg_num});
"""))
@@ -532,7 +530,6 @@ def idef_arg(self, declared):
declared.append(self.reg_tcg())
declared.append("CS")
def analyze_read(self, f, regno):
- self.decl_reg_num(f, regno)
f.write(code_fmt(f"""\
ctx_log_reg_read(ctx, {self.reg_num});
"""))
@@ -548,7 +545,6 @@ def log_write(self, f, tag):
gen_log_pred_write(ctx, {self.reg_num}, {self.reg_tcg()});
"""))
def analyze_write(self, f, tag, regno):
- self.decl_reg_num(f, regno)
f.write(code_fmt(f"""\
ctx_log_pred_write(ctx, {self.reg_num});
"""))
@@ -560,7 +556,6 @@ def decl_tcg(self, f, tag, regno):
TCGv {self.reg_tcg()} = hex_pred[{self.reg_num}];
"""))
def analyze_read(self, f, regno):
- self.decl_reg_num(f, regno)
f.write(code_fmt(f"""\
ctx_log_pred_read(ctx, {self.reg_num});
"""))
@@ -571,9 +566,8 @@ def decl_tcg(self, f, tag, regno):
TCGv {self.reg_tcg()} = get_result_pred(ctx, insn->regno[{regno}]);
"""))
def analyze_read(self, f, regno):
- self.decl_reg_num(f, regno)
f.write(code_fmt(f"""\
- ctx_log_pred_read(ctx, {self.reg_num});
+ ctx_log_pred_read_new(ctx, {self.reg_num});
"""))
class PredReadWrite(Register, Single, ReadWrite):
@@ -587,8 +581,11 @@ def log_write(self, f, tag):
f.write(code_fmt(f"""\
gen_log_pred_write(ctx, {self.reg_num}, {self.reg_tcg()});
"""))
+ def analyze_read(self, f, regno):
+ f.write(code_fmt(f"""\
+ ctx_log_pred_read(ctx, {self.reg_num});
+ """))
def analyze_write(self, f, tag, regno):
- self.decl_reg_num(f, regno)
f.write(code_fmt(f"""\
ctx_log_pred_write(ctx, {self.reg_num});
"""))
@@ -605,7 +602,6 @@ def log_write(self, f, tag):
gen_log_reg_write_pair(ctx, {self.reg_num}, {self.reg_tcg()});
"""))
def analyze_write(self, f, tag, regno):
- self.decl_reg_num(f, regno)
predicated = "true" if is_predicated(tag) else "false"
f.write(code_fmt(f"""\
ctx_log_reg_write_pair(ctx, {self.reg_num}, {predicated});
@@ -621,7 +617,6 @@ def decl_tcg(self, f, tag, regno):
hex_gpr[{self.reg_num} + 1]);
"""))
def analyze_read(self, f, regno):
- self.decl_reg_num(f, regno)
f.write(code_fmt(f"""\
ctx_log_reg_read_pair(ctx, {self.reg_num});
"""))
@@ -640,8 +635,11 @@ def log_write(self, f, tag):
f.write(code_fmt(f"""\
gen_log_reg_write_pair(ctx, {self.reg_num}, {self.reg_tcg()});
"""))
+ def analyze_read(self, f, regno):
+ f.write(code_fmt(f"""\
+ ctx_log_reg_read_pair(ctx, {self.reg_num});
+ """))
def analyze_write(self, f, tag, regno):
- self.decl_reg_num(f, regno)
predicated = "true" if is_predicated(tag) else "false"
f.write(code_fmt(f"""\
ctx_log_reg_write_pair(ctx, {self.reg_num}, {predicated});
@@ -663,7 +661,6 @@ def log_write(self, f, tag):
gen_write_ctrl_reg_pair(ctx, {self.reg_num}, {self.reg_tcg()});
"""))
def analyze_write(self, f, tag, regno):
- self.decl_reg_num(f, regno)
predicated = "true" if is_predicated(tag) else "false"
f.write(code_fmt(f"""\
ctx_log_reg_write_pair(ctx, {self.reg_num}, {predicated});
@@ -681,7 +678,6 @@ def decl_tcg(self, f, tag, regno):
gen_read_ctrl_reg_pair(ctx, {self.reg_num}, {self.reg_tcg()});
"""))
def analyze_read(self, f, regno):
- self.decl_reg_num(f, regno)
f.write(code_fmt(f"""\
ctx_log_reg_read_pair(ctx, {self.reg_num});
"""))
@@ -705,7 +701,6 @@ def helper_hvx_desc(self, f):
/* {self.reg_tcg()} is *(MMVector *)({self.helper_arg_name()}) */
"""))
def analyze_write(self, f, tag, regno):
- self.decl_reg_num(f, regno)
newv = hvx_newv(tag)
predicated = "true" if is_predicated(tag) else "false"
f.write(code_fmt(f"""\
@@ -728,7 +723,6 @@ def helper_hvx_desc(self, f):
/* {self.reg_tcg()} is *(MMVector *)({self.helper_arg_name()}) */
"""))
def analyze_read(self, f, regno):
- self.decl_reg_num(f, regno)
f.write(code_fmt(f"""\
ctx_log_vreg_read(ctx, {self.reg_num});
"""))
@@ -746,9 +740,8 @@ def helper_hvx_desc(self, f):
/* {self.reg_tcg()} is *(MMVector *)({self.helper_arg_name()}) */
"""))
def analyze_read(self, f, regno):
- self.decl_reg_num(f, regno)
f.write(code_fmt(f"""\
- ctx_log_vreg_read(ctx, {self.reg_num});
+ ctx_log_vreg_read_new(ctx, {self.reg_num});
"""))
class VRegReadWrite(Register, Hvx, ReadWrite):
@@ -772,8 +765,11 @@ def helper_hvx_desc(self, f):
f.write(code_fmt(f"""\
/* {self.reg_tcg()} is *(MMVector *)({self.helper_arg_name()}) */
"""))
+ def analyze_read(self, f, regno):
+ f.write(code_fmt(f"""\
+ ctx_log_vreg_read(ctx, {self.reg_num});
+ """))
def analyze_write(self, f, tag, regno):
- self.decl_reg_num(f, regno)
newv = hvx_newv(tag)
predicated = "true" if is_predicated(tag) else "false"
f.write(code_fmt(f"""\
@@ -803,8 +799,11 @@ def helper_hvx_desc(self, f):
f.write(code_fmt(f"""\
/* {self.reg_tcg()} is *(MMVector *)({self.helper_arg_name()}) */
"""))
+ def analyze_read(self, f, regno):
+ f.write(code_fmt(f"""\
+ ctx_log_vreg_read(ctx, {self.reg_num});
+ """))
def analyze_write(self, f, tag, regno):
- self.decl_reg_num(f, regno)
newv = hvx_newv(tag)
predicated = "true" if is_predicated(tag) else "false"
f.write(code_fmt(f"""\
@@ -830,7 +829,6 @@ def helper_hvx_desc(self, f):
/* {self.reg_tcg()} is *(MMVectorPair *)({self.helper_arg_name()}) */
"""))
def analyze_write(self, f, tag, regno):
- self.decl_reg_num(f, regno)
newv = hvx_newv(tag)
predicated = "true" if is_predicated(tag) else "false"
f.write(code_fmt(f"""\
@@ -860,7 +858,6 @@ def helper_hvx_desc(self, f):
/* {self.reg_tcg()} is *(MMVectorPair *)({self.helper_arg_name()}) */
"""))
def analyze_read(self, f, regno):
- self.decl_reg_num(f, regno)
f.write(code_fmt(f"""\
ctx_log_vreg_read_pair(ctx, {self.reg_num});
"""))
@@ -892,8 +889,11 @@ def helper_hvx_desc(self, f):
f.write(code_fmt(f"""\
/* {self.reg_tcg()} is *(MMVectorPair *)({self.helper_arg_name()}) */
"""))
+ def analyze_read(self, f, regno):
+ f.write(code_fmt(f"""\
+ ctx_log_vreg_read_pair(ctx, {self.reg_num});
+ """))
def analyze_write(self, f, tag, regno):
- self.decl_reg_num(f, regno)
newv = hvx_newv(tag)
predicated = "true" if is_predicated(tag) else "false"
f.write(code_fmt(f"""\
@@ -919,7 +919,6 @@ def helper_hvx_desc(self, f):
/* {self.reg_tcg()} is *(MMQReg *)({self.helper_arg_name()}) */
"""))
def analyze_write(self, f, tag, regno):
- self.decl_reg_num(f, regno)
f.write(code_fmt(f"""\
ctx_log_qreg_write(ctx, {self.reg_num});
"""))
@@ -941,7 +940,6 @@ def helper_hvx_desc(self, f):
/* {self.reg_tcg()} is *(MMQReg *)({self.helper_arg_name()}) */
"""))
def analyze_read(self, f, regno):
- self.decl_reg_num(f, regno)
f.write(code_fmt(f"""\
ctx_log_qreg_read(ctx, {self.reg_num});
"""))
@@ -967,8 +965,11 @@ def helper_hvx_desc(self, f):
f.write(code_fmt(f"""\
/* {self.reg_tcg()} is *(MMQReg *)({self.helper_arg_name()}) */
"""))
+ def analyze_read(self, f, regno):
+ f.write(code_fmt(f"""\
+ ctx_log_qreg_read(ctx, {self.reg_num});
+ """))
def analyze_write(self, f, tag, regno):
- self.decl_reg_num(f, regno)
f.write(code_fmt(f"""\
ctx_log_qreg_write(ctx, {self.reg_num});
"""))
--
2.34.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* RE: [PATCH v2 1/3] Hexagon (target/hexagon) Analyze reads before writes
2024-02-01 10:33 ` [PATCH v2 1/3] Hexagon (target/hexagon) Analyze reads before writes Taylor Simpson
@ 2024-03-29 2:05 ` Brian Cain
0 siblings, 0 replies; 7+ messages in thread
From: Brian Cain @ 2024-03-29 2:05 UTC (permalink / raw)
To: Taylor Simpson, qemu-devel
Cc: Matheus Bernardino (QUIC), Sid Manning, Marco Liebel (QUIC),
richard.henderson, philmd, ale, anjo
> -----Original Message-----
> From: Taylor Simpson <ltaylorsimpson@gmail.com>
> Sent: Thursday, February 1, 2024 4:34 AM
> To: qemu-devel@nongnu.org
> Cc: Brian Cain <bcain@quicinc.com>; Matheus Bernardino (QUIC)
> <quic_mathbern@quicinc.com>; Sid Manning <sidneym@quicinc.com>;
> Marco Liebel (QUIC) <quic_mliebel@quicinc.com>;
> richard.henderson@linaro.org; philmd@linaro.org; ale@rev.ng; anjo@rev.ng;
> ltaylorsimpson@gmail.com
> Subject: [PATCH v2 1/3] Hexagon (target/hexagon) Analyze reads before
> writes
>
> WARNING: This email originated from outside of Qualcomm. Please be wary
> of any links or attachments, and do not enable macros.
>
> We divide gen_analyze_funcs.py into 3 phases
> Declare the operands
> Analyze the register reads
> Analyze the register writes
>
> We also create special versions of ctx_log_*_read for new operands
> Check that the operand is written before the read
>
> This is a precursor to improving the analysis for short-circuiting
> the packet semantics in a subsequent commit
>
> Signed-off-by: Taylor Simpson <ltaylorsimpson@gmail.com>
> ---
> target/hexagon/translate.h | 26 +++++++++++-
> target/hexagon/README | 9 +++--
> target/hexagon/gen_analyze_funcs.py | 34 ++++++++++------
> target/hexagon/hex_common.py | 63 +++++++++++++++--------------
> 4 files changed, 83 insertions(+), 49 deletions(-)
>
> diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
> index 4dd59c6726..f06d71fc53 100644
> --- a/target/hexagon/translate.h
> +++ b/target/hexagon/translate.h
> @@ -1,5 +1,5 @@
> /*
> - * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights
> Reserved.
> + * Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights
> Reserved.
> *
> * This program is free software; you can redistribute it and/or modify
> * it under the terms of the GNU General Public License as published by
> @@ -75,6 +75,8 @@ typedef struct DisasContext {
> TCGv dczero_addr;
> } DisasContext;
>
> +bool is_gather_store_insn(DisasContext *ctx);
> +
> static inline void ctx_log_pred_write(DisasContext *ctx, int pnum)
> {
> if (!test_bit(pnum, ctx->pregs_written)) {
> @@ -89,6 +91,12 @@ static inline void ctx_log_pred_read(DisasContext *ctx,
> int pnum)
> set_bit(pnum, ctx->pregs_read);
> }
>
> +static inline void ctx_log_pred_read_new(DisasContext *ctx, int pnum)
> +{
> + g_assert(test_bit(pnum, ctx->pregs_written));
> + set_bit(pnum, ctx->pregs_read);
> +}
> +
> static inline void ctx_log_reg_write(DisasContext *ctx, int rnum,
> bool is_predicated)
> {
> @@ -120,6 +128,12 @@ static inline void ctx_log_reg_read(DisasContext
> *ctx, int rnum)
> set_bit(rnum, ctx->regs_read);
> }
>
> +static inline void ctx_log_reg_read_new(DisasContext *ctx, int rnum)
> +{
> + g_assert(test_bit(rnum, ctx->regs_written));
> + set_bit(rnum, ctx->regs_read);
> +}
> +
> static inline void ctx_log_reg_read_pair(DisasContext *ctx, int rnum)
> {
> ctx_log_reg_read(ctx, rnum);
> @@ -171,6 +185,15 @@ static inline void ctx_log_vreg_read(DisasContext
> *ctx, int rnum)
> set_bit(rnum, ctx->vregs_read);
> }
>
> +static inline void ctx_log_vreg_read_new(DisasContext *ctx, int rnum)
> +{
> + g_assert(is_gather_store_insn(ctx) ||
> + test_bit(rnum, ctx->vregs_updated) ||
> + test_bit(rnum, ctx->vregs_select) ||
> + test_bit(rnum, ctx->vregs_updated_tmp));
> + set_bit(rnum, ctx->vregs_read);
> +}
> +
> static inline void ctx_log_vreg_read_pair(DisasContext *ctx, int rnum)
> {
> ctx_log_vreg_read(ctx, rnum ^ 0);
> @@ -205,7 +228,6 @@ extern TCGv hex_vstore_addr[VSTORES_MAX];
> extern TCGv hex_vstore_size[VSTORES_MAX];
> extern TCGv hex_vstore_pending[VSTORES_MAX];
>
> -bool is_gather_store_insn(DisasContext *ctx);
> void process_store(DisasContext *ctx, int slot_num);
>
> FIELD(PROBE_PKT_SCALAR_STORE_S0, MMU_IDX, 0, 2)
> diff --git a/target/hexagon/README b/target/hexagon/README
> index 746ebec378..c1d8c8d0ab 100644
> --- a/target/hexagon/README
> +++ b/target/hexagon/README
> @@ -183,10 +183,11 @@ when the override is present.
> }
>
> We also generate an analyze_<tag> function for each instruction. Currently,
> -these functions record the writes to registers by calling ctx_log_*. During
> -gen_start_packet, we invoke the analyze_<tag> function for each instruction
> in
> -the packet, and we mark the implicit writes. After the analysis is performed,
> -we initialize the result register for each of the predicated assignments.
> +these functions record the reads and writes to registers by calling ctx_log_*.
> +During gen_start_packet, we invoke the analyze_<tag> function for each
> instruction in
> +the packet, and we mark the implicit writes. The analysis determines if the
> packet
> +semantics can be short-circuited. If not, we initialize the result register for
> each
> +of the predicated assignments.
>
> In addition to instruction semantics, we use a generator to create the decode
> tree. This generation is a four step process.
> diff --git a/target/hexagon/gen_analyze_funcs.py
> b/target/hexagon/gen_analyze_funcs.py
> index a9af666cef..890e6a3a95 100755
> --- a/target/hexagon/gen_analyze_funcs.py
> +++ b/target/hexagon/gen_analyze_funcs.py
> @@ -1,7 +1,7 @@
> #!/usr/bin/env python3
>
> ##
> -## Copyright(c) 2022-2023 Qualcomm Innovation Center, Inc. All Rights
> Reserved.
> +## Copyright(c) 2022-2024 Qualcomm Innovation Center, Inc. All Rights
> Reserved.
> ##
> ## This program is free software; you can redistribute it and/or modify
> ## it under the terms of the GNU General Public License as published by
> @@ -44,15 +44,25 @@ def gen_analyze_func(f, tag, regs, imms):
>
> f.write(" Insn *insn G_GNUC_UNUSED = ctx->insn;\n")
>
> - i = 0
> - ## Analyze all the registers
> - for regtype, regid in regs:
> - reg = hex_common.get_register(tag, regtype, regid)
> + ## Declare all the registers
> + for regno, register in enumerate(regs):
> + reg_type, reg_id = register
> + reg = hex_common.get_register(tag, reg_type, reg_id)
> + reg.decl_reg_num(f, regno)
> +
> + ## Analyze the register reads
> + for regno, register in enumerate(regs):
> + reg_type, reg_id = register
> + reg = hex_common.get_register(tag, reg_type, reg_id)
> + if reg.is_read():
> + reg.analyze_read(f, regno)
> +
> + ## Analyze the register writes
> + for regno, register in enumerate(regs):
> + reg_type, reg_id = register
> + reg = hex_common.get_register(tag, reg_type, reg_id)
> if reg.is_written():
> - reg.analyze_write(f, tag, i)
> - else:
> - reg.analyze_read(f, i)
> - i += 1
> + reg.analyze_write(f, tag, regno)
>
> has_generated_helper = not hex_common.skip_qemu_helper(
> tag
> @@ -89,13 +99,13 @@ def main():
> tagimms = hex_common.get_tagimms()
>
> with open(sys.argv[-1], "w") as f:
> - f.write("#ifndef HEXAGON_TCG_FUNCS_H\n")
> - f.write("#define HEXAGON_TCG_FUNCS_H\n\n")
> + f.write("#ifndef HEXAGON_ANALYZE_FUNCS_C_INC\n")
> + f.write("#define HEXAGON_ANALYZE_FUNCS_C_INC\n\n")
>
> for tag in hex_common.tags:
> gen_analyze_func(f, tag, tagregs[tag], tagimms[tag])
>
> - f.write("#endif /* HEXAGON_TCG_FUNCS_H */\n")
> + f.write("#endif /* HEXAGON_ANALYZE_FUNCS_C_INC */\n")
>
>
> if __name__ == "__main__":
> diff --git a/target/hexagon/hex_common.py
> b/target/hexagon/hex_common.py
> index 195620c7ec..33801e4bd7 100755
> --- a/target/hexagon/hex_common.py
> +++ b/target/hexagon/hex_common.py
> @@ -1,7 +1,7 @@
> #!/usr/bin/env python3
>
> ##
> -## Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights
> Reserved.
> +## Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights
> Reserved.
> ##
> ## This program is free software; you can redistribute it and/or modify
> ## it under the terms of the GNU General Public License as published by
> @@ -425,7 +425,6 @@ def log_write(self, f, tag):
> gen_log_reg_write(ctx, {self.reg_num}, {self.reg_tcg()});
> """))
> def analyze_write(self, f, tag, regno):
> - self.decl_reg_num(f, regno)
> predicated = "true" if is_predicated(tag) else "false"
> f.write(code_fmt(f"""\
> ctx_log_reg_write(ctx, {self.reg_num}, {predicated});
> @@ -438,7 +437,6 @@ def decl_tcg(self, f, tag, regno):
> TCGv {self.reg_tcg()} = hex_gpr[{self.reg_num}];
> """))
> def analyze_read(self, f, regno):
> - self.decl_reg_num(f, regno)
> f.write(code_fmt(f"""\
> ctx_log_reg_read(ctx, {self.reg_num});
> """))
> @@ -449,9 +447,8 @@ def decl_tcg(self, f, tag, regno):
> TCGv {self.reg_tcg()} = get_result_gpr(ctx, insn->regno[{regno}]);
> """))
> def analyze_read(self, f, regno):
> - self.decl_reg_num(f, regno)
> f.write(code_fmt(f"""\
> - ctx_log_reg_read(ctx, {self.reg_num});
> + ctx_log_reg_read_new(ctx, {self.reg_num});
> """))
>
> class GprReadWrite(Register, Single, ReadWrite):
> @@ -471,8 +468,11 @@ def log_write(self, f, tag):
> f.write(code_fmt(f"""\
> gen_log_reg_write(ctx, {self.reg_num}, {self.reg_tcg()});
> """))
> + def analyze_read(self, f, regno):
> + f.write(code_fmt(f"""\
> + ctx_log_reg_read(ctx, {self.reg_num});
> + """))
> def analyze_write(self, f, tag, regno):
> - self.decl_reg_num(f, regno)
> predicated = "true" if is_predicated(tag) else "false"
> f.write(code_fmt(f"""\
> ctx_log_reg_write(ctx, {self.reg_num}, {predicated});
> @@ -493,7 +493,6 @@ def log_write(self, f, tag):
> gen_write_ctrl_reg(ctx, {self.reg_num}, {self.reg_tcg()});
> """))
> def analyze_write(self, f, tag, regno):
> - self.decl_reg_num(f, regno)
> predicated = "true" if is_predicated(tag) else "false"
> f.write(code_fmt(f"""\
> ctx_log_reg_write(ctx, {self.reg_num}, {predicated});
> @@ -511,7 +510,6 @@ def decl_tcg(self, f, tag, regno):
> gen_read_ctrl_reg(ctx, {self.reg_num}, {self.reg_tcg()});
> """))
> def analyze_read(self, f, regno):
> - self.decl_reg_num(f, regno)
> f.write(code_fmt(f"""\
> ctx_log_reg_read(ctx, {self.reg_num});
> """))
> @@ -532,7 +530,6 @@ def idef_arg(self, declared):
> declared.append(self.reg_tcg())
> declared.append("CS")
> def analyze_read(self, f, regno):
> - self.decl_reg_num(f, regno)
> f.write(code_fmt(f"""\
> ctx_log_reg_read(ctx, {self.reg_num});
> """))
> @@ -548,7 +545,6 @@ def log_write(self, f, tag):
> gen_log_pred_write(ctx, {self.reg_num}, {self.reg_tcg()});
> """))
> def analyze_write(self, f, tag, regno):
> - self.decl_reg_num(f, regno)
> f.write(code_fmt(f"""\
> ctx_log_pred_write(ctx, {self.reg_num});
> """))
> @@ -560,7 +556,6 @@ def decl_tcg(self, f, tag, regno):
> TCGv {self.reg_tcg()} = hex_pred[{self.reg_num}];
> """))
> def analyze_read(self, f, regno):
> - self.decl_reg_num(f, regno)
> f.write(code_fmt(f"""\
> ctx_log_pred_read(ctx, {self.reg_num});
> """))
> @@ -571,9 +566,8 @@ def decl_tcg(self, f, tag, regno):
> TCGv {self.reg_tcg()} = get_result_pred(ctx, insn->regno[{regno}]);
> """))
> def analyze_read(self, f, regno):
> - self.decl_reg_num(f, regno)
> f.write(code_fmt(f"""\
> - ctx_log_pred_read(ctx, {self.reg_num});
> + ctx_log_pred_read_new(ctx, {self.reg_num});
> """))
>
> class PredReadWrite(Register, Single, ReadWrite):
> @@ -587,8 +581,11 @@ def log_write(self, f, tag):
> f.write(code_fmt(f"""\
> gen_log_pred_write(ctx, {self.reg_num}, {self.reg_tcg()});
> """))
> + def analyze_read(self, f, regno):
> + f.write(code_fmt(f"""\
> + ctx_log_pred_read(ctx, {self.reg_num});
> + """))
> def analyze_write(self, f, tag, regno):
> - self.decl_reg_num(f, regno)
> f.write(code_fmt(f"""\
> ctx_log_pred_write(ctx, {self.reg_num});
> """))
> @@ -605,7 +602,6 @@ def log_write(self, f, tag):
> gen_log_reg_write_pair(ctx, {self.reg_num}, {self.reg_tcg()});
> """))
> def analyze_write(self, f, tag, regno):
> - self.decl_reg_num(f, regno)
> predicated = "true" if is_predicated(tag) else "false"
> f.write(code_fmt(f"""\
> ctx_log_reg_write_pair(ctx, {self.reg_num}, {predicated});
> @@ -621,7 +617,6 @@ def decl_tcg(self, f, tag, regno):
> hex_gpr[{self.reg_num} + 1]);
> """))
> def analyze_read(self, f, regno):
> - self.decl_reg_num(f, regno)
> f.write(code_fmt(f"""\
> ctx_log_reg_read_pair(ctx, {self.reg_num});
> """))
> @@ -640,8 +635,11 @@ def log_write(self, f, tag):
> f.write(code_fmt(f"""\
> gen_log_reg_write_pair(ctx, {self.reg_num}, {self.reg_tcg()});
> """))
> + def analyze_read(self, f, regno):
> + f.write(code_fmt(f"""\
> + ctx_log_reg_read_pair(ctx, {self.reg_num});
> + """))
> def analyze_write(self, f, tag, regno):
> - self.decl_reg_num(f, regno)
> predicated = "true" if is_predicated(tag) else "false"
> f.write(code_fmt(f"""\
> ctx_log_reg_write_pair(ctx, {self.reg_num}, {predicated});
> @@ -663,7 +661,6 @@ def log_write(self, f, tag):
> gen_write_ctrl_reg_pair(ctx, {self.reg_num}, {self.reg_tcg()});
> """))
> def analyze_write(self, f, tag, regno):
> - self.decl_reg_num(f, regno)
> predicated = "true" if is_predicated(tag) else "false"
> f.write(code_fmt(f"""\
> ctx_log_reg_write_pair(ctx, {self.reg_num}, {predicated});
> @@ -681,7 +678,6 @@ def decl_tcg(self, f, tag, regno):
> gen_read_ctrl_reg_pair(ctx, {self.reg_num}, {self.reg_tcg()});
> """))
> def analyze_read(self, f, regno):
> - self.decl_reg_num(f, regno)
> f.write(code_fmt(f"""\
> ctx_log_reg_read_pair(ctx, {self.reg_num});
> """))
> @@ -705,7 +701,6 @@ def helper_hvx_desc(self, f):
> /* {self.reg_tcg()} is *(MMVector *)({self.helper_arg_name()}) */
> """))
> def analyze_write(self, f, tag, regno):
> - self.decl_reg_num(f, regno)
> newv = hvx_newv(tag)
> predicated = "true" if is_predicated(tag) else "false"
> f.write(code_fmt(f"""\
> @@ -728,7 +723,6 @@ def helper_hvx_desc(self, f):
> /* {self.reg_tcg()} is *(MMVector *)({self.helper_arg_name()}) */
> """))
> def analyze_read(self, f, regno):
> - self.decl_reg_num(f, regno)
> f.write(code_fmt(f"""\
> ctx_log_vreg_read(ctx, {self.reg_num});
> """))
> @@ -746,9 +740,8 @@ def helper_hvx_desc(self, f):
> /* {self.reg_tcg()} is *(MMVector *)({self.helper_arg_name()}) */
> """))
> def analyze_read(self, f, regno):
> - self.decl_reg_num(f, regno)
> f.write(code_fmt(f"""\
> - ctx_log_vreg_read(ctx, {self.reg_num});
> + ctx_log_vreg_read_new(ctx, {self.reg_num});
> """))
>
> class VRegReadWrite(Register, Hvx, ReadWrite):
> @@ -772,8 +765,11 @@ def helper_hvx_desc(self, f):
> f.write(code_fmt(f"""\
> /* {self.reg_tcg()} is *(MMVector *)({self.helper_arg_name()}) */
> """))
> + def analyze_read(self, f, regno):
> + f.write(code_fmt(f"""\
> + ctx_log_vreg_read(ctx, {self.reg_num});
> + """))
> def analyze_write(self, f, tag, regno):
> - self.decl_reg_num(f, regno)
> newv = hvx_newv(tag)
> predicated = "true" if is_predicated(tag) else "false"
> f.write(code_fmt(f"""\
> @@ -803,8 +799,11 @@ def helper_hvx_desc(self, f):
> f.write(code_fmt(f"""\
> /* {self.reg_tcg()} is *(MMVector *)({self.helper_arg_name()}) */
> """))
> + def analyze_read(self, f, regno):
> + f.write(code_fmt(f"""\
> + ctx_log_vreg_read(ctx, {self.reg_num});
> + """))
> def analyze_write(self, f, tag, regno):
> - self.decl_reg_num(f, regno)
> newv = hvx_newv(tag)
> predicated = "true" if is_predicated(tag) else "false"
> f.write(code_fmt(f"""\
> @@ -830,7 +829,6 @@ def helper_hvx_desc(self, f):
> /* {self.reg_tcg()} is *(MMVectorPair *)({self.helper_arg_name()}) */
> """))
> def analyze_write(self, f, tag, regno):
> - self.decl_reg_num(f, regno)
> newv = hvx_newv(tag)
> predicated = "true" if is_predicated(tag) else "false"
> f.write(code_fmt(f"""\
> @@ -860,7 +858,6 @@ def helper_hvx_desc(self, f):
> /* {self.reg_tcg()} is *(MMVectorPair *)({self.helper_arg_name()}) */
> """))
> def analyze_read(self, f, regno):
> - self.decl_reg_num(f, regno)
> f.write(code_fmt(f"""\
> ctx_log_vreg_read_pair(ctx, {self.reg_num});
> """))
> @@ -892,8 +889,11 @@ def helper_hvx_desc(self, f):
> f.write(code_fmt(f"""\
> /* {self.reg_tcg()} is *(MMVectorPair *)({self.helper_arg_name()}) */
> """))
> + def analyze_read(self, f, regno):
> + f.write(code_fmt(f"""\
> + ctx_log_vreg_read_pair(ctx, {self.reg_num});
> + """))
> def analyze_write(self, f, tag, regno):
> - self.decl_reg_num(f, regno)
> newv = hvx_newv(tag)
> predicated = "true" if is_predicated(tag) else "false"
> f.write(code_fmt(f"""\
> @@ -919,7 +919,6 @@ def helper_hvx_desc(self, f):
> /* {self.reg_tcg()} is *(MMQReg *)({self.helper_arg_name()}) */
> """))
> def analyze_write(self, f, tag, regno):
> - self.decl_reg_num(f, regno)
> f.write(code_fmt(f"""\
> ctx_log_qreg_write(ctx, {self.reg_num});
> """))
> @@ -941,7 +940,6 @@ def helper_hvx_desc(self, f):
> /* {self.reg_tcg()} is *(MMQReg *)({self.helper_arg_name()}) */
> """))
> def analyze_read(self, f, regno):
> - self.decl_reg_num(f, regno)
> f.write(code_fmt(f"""\
> ctx_log_qreg_read(ctx, {self.reg_num});
> """))
> @@ -967,8 +965,11 @@ def helper_hvx_desc(self, f):
> f.write(code_fmt(f"""\
> /* {self.reg_tcg()} is *(MMQReg *)({self.helper_arg_name()}) */
> """))
> + def analyze_read(self, f, regno):
> + f.write(code_fmt(f"""\
> + ctx_log_qreg_read(ctx, {self.reg_num});
> + """))
> def analyze_write(self, f, tag, regno):
> - self.decl_reg_num(f, regno)
> f.write(code_fmt(f"""\
> ctx_log_qreg_write(ctx, {self.reg_num});
> """))
> --
> 2.34.1
Reviewed-by: Brian Cain <bcain@quicinc.com>
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH v2 2/3] Hexagon (target/hexagon) Enable more short-circuit packets (scalar core)
2024-02-01 10:33 [PATCH v2 0/3] Hexagon (target/hexagon) Enable more short-circuit packets Taylor Simpson
2024-02-01 10:33 ` [PATCH v2 1/3] Hexagon (target/hexagon) Analyze reads before writes Taylor Simpson
@ 2024-02-01 10:33 ` Taylor Simpson
2024-03-29 2:02 ` Brian Cain
2024-02-01 10:33 ` [PATCH v2 3/3] Hexagon (target/hexagon) Enable more short-circuit packets (HVX) Taylor Simpson
2 siblings, 1 reply; 7+ messages in thread
From: Taylor Simpson @ 2024-02-01 10:33 UTC (permalink / raw)
To: qemu-devel
Cc: bcain, quic_mathbern, sidneym, quic_mliebel, richard.henderson,
philmd, ale, anjo, ltaylorsimpson
Look for read-after-write instead of overlap of reads and writes
Here is an example with overalp but no read-after-write:
0x000200fc: 0x38103876 { R0 = add(R0,R1); R6 = add(R6,R7) }
BEFORE:
---- 00000000000200fc
mov_i32 loc2,$0x0
mov_i32 loc2,r0
add_i32 loc3,loc2,r1
mov_i32 loc2,loc3
mov_i32 loc4,$0x0
mov_i32 loc4,r6
add_i32 loc5,loc4,r7
mov_i32 loc4,loc5
mov_i32 r0,loc2
mov_i32 r6,loc4
AFTER:
---- 00000000000200fc
add_i32 loc2,r0,r1
mov_i32 r0,loc2
add_i32 loc3,r6,r7
mov_i32 r6,loc3
We can also short-circuit packets with .new values by reading from the
real destination instead of the temporary.
0x00020100: 0x78005ff3 { R19 = #0xff
0x00020104: 0x2002e204 if (cmp.eq(N19.new,R2)) jump:t PC+8 }
BEFORE:
---- 0000000000020100
mov_i32 pc,$0x20108
mov_i32 loc8,$0x0
mov_i32 loc8,$0xff
setcond_i32 loc10,loc8,r2,eq
mov_i32 loc6,loc10
mov_i32 r19,loc8
add_i32 pkt_cnt,pkt_cnt,$0x2
add_i32 insn_cnt,insn_cnt,$0x4
brcond_i32 loc6,$0x0,eq,$L1
goto_tb $0x0
mov_i32 pc,$0x20108
exit_tb $0x7fbb54000040
set_label $L1
goto_tb $0x1
exit_tb $0x7fbb54000041
set_label $L0
exit_tb $0x7fbb54000043
AFTER:
---- 0000000000020100
mov_i32 pc,$0x20108
mov_i32 r19,$0xff
setcond_i32 loc7,r19,r2,eq
mov_i32 loc4,loc7
add_i32 pkt_cnt,pkt_cnt,$0x2
add_i32 insn_cnt,insn_cnt,$0x4
brcond_i32 loc4,$0x0,eq,$L1
goto_tb $0x0
mov_i32 pc,$0x20108
exit_tb $0x7f9764000040
set_label $L1
goto_tb $0x1
exit_tb $0x7f9764000041
set_label $L0
exit_tb $0x7f9764000043
Signed-off-by: Taylor Simpson <ltaylorsimpson@gmail.com>
---
target/hexagon/translate.h | 13 +++++++------
target/hexagon/translate.c | 21 ++++-----------------
2 files changed, 11 insertions(+), 23 deletions(-)
diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
index f06d71fc53..d5e7f49ad8 100644
--- a/target/hexagon/translate.h
+++ b/target/hexagon/translate.h
@@ -38,12 +38,10 @@ typedef struct DisasContext {
int reg_log[REG_WRITES_MAX];
int reg_log_idx;
DECLARE_BITMAP(regs_written, TOTAL_PER_THREAD_REGS);
- DECLARE_BITMAP(regs_read, TOTAL_PER_THREAD_REGS);
DECLARE_BITMAP(predicated_regs, TOTAL_PER_THREAD_REGS);
int preg_log[PRED_WRITES_MAX];
int preg_log_idx;
DECLARE_BITMAP(pregs_written, NUM_PREGS);
- DECLARE_BITMAP(pregs_read, NUM_PREGS);
uint8_t store_width[STORES_MAX];
bool s1_store_processed;
int future_vregs_idx;
@@ -68,6 +66,7 @@ typedef struct DisasContext {
bool is_tight_loop;
bool short_circuit;
bool has_hvx_helper;
+ bool read_after_write;
TCGv new_value[TOTAL_PER_THREAD_REGS];
TCGv new_pred_value[NUM_PREGS];
TCGv pred_written;
@@ -88,13 +87,14 @@ static inline void ctx_log_pred_write(DisasContext *ctx, int pnum)
static inline void ctx_log_pred_read(DisasContext *ctx, int pnum)
{
- set_bit(pnum, ctx->pregs_read);
+ if (test_bit(pnum, ctx->pregs_written)) {
+ ctx->read_after_write = true;
+ }
}
static inline void ctx_log_pred_read_new(DisasContext *ctx, int pnum)
{
g_assert(test_bit(pnum, ctx->pregs_written));
- set_bit(pnum, ctx->pregs_read);
}
static inline void ctx_log_reg_write(DisasContext *ctx, int rnum,
@@ -125,13 +125,14 @@ static inline void ctx_log_reg_write_pair(DisasContext *ctx, int rnum,
static inline void ctx_log_reg_read(DisasContext *ctx, int rnum)
{
- set_bit(rnum, ctx->regs_read);
+ if (test_bit(rnum, ctx->regs_written)) {
+ ctx->read_after_write = true;
+ }
}
static inline void ctx_log_reg_read_new(DisasContext *ctx, int rnum)
{
g_assert(test_bit(rnum, ctx->regs_written));
- set_bit(rnum, ctx->regs_read);
}
static inline void ctx_log_reg_read_pair(DisasContext *ctx, int rnum)
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index 95579ae243..751ca71790 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -1,5 +1,5 @@
/*
- * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ * Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -394,20 +394,8 @@ static bool need_commit(DisasContext *ctx)
}
}
- /* Check for overlap between register reads and writes */
- for (int i = 0; i < ctx->reg_log_idx; i++) {
- int rnum = ctx->reg_log[i];
- if (test_bit(rnum, ctx->regs_read)) {
- return true;
- }
- }
-
- /* Check for overlap between predicate reads and writes */
- for (int i = 0; i < ctx->preg_log_idx; i++) {
- int pnum = ctx->preg_log[i];
- if (test_bit(pnum, ctx->pregs_read)) {
- return true;
- }
+ if (ctx->read_after_write) {
+ return true;
}
/* Check for overlap between HVX reads and writes */
@@ -466,6 +454,7 @@ static void analyze_packet(DisasContext *ctx)
{
Packet *pkt = ctx->pkt;
ctx->has_hvx_helper = false;
+ ctx->read_after_write = false;
for (int i = 0; i < pkt->num_insns; i++) {
Insn *insn = &pkt->insn[i];
ctx->insn = insn;
@@ -490,11 +479,9 @@ static void gen_start_packet(DisasContext *ctx)
ctx->next_PC = next_PC;
ctx->reg_log_idx = 0;
bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS);
- bitmap_zero(ctx->regs_read, TOTAL_PER_THREAD_REGS);
bitmap_zero(ctx->predicated_regs, TOTAL_PER_THREAD_REGS);
ctx->preg_log_idx = 0;
bitmap_zero(ctx->pregs_written, NUM_PREGS);
- bitmap_zero(ctx->pregs_read, NUM_PREGS);
ctx->future_vregs_idx = 0;
ctx->tmp_vregs_idx = 0;
ctx->vreg_log_idx = 0;
--
2.34.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* RE: [PATCH v2 2/3] Hexagon (target/hexagon) Enable more short-circuit packets (scalar core)
2024-02-01 10:33 ` [PATCH v2 2/3] Hexagon (target/hexagon) Enable more short-circuit packets (scalar core) Taylor Simpson
@ 2024-03-29 2:02 ` Brian Cain
0 siblings, 0 replies; 7+ messages in thread
From: Brian Cain @ 2024-03-29 2:02 UTC (permalink / raw)
To: Taylor Simpson, qemu-devel
Cc: Matheus Bernardino (QUIC), Sid Manning, Marco Liebel (QUIC),
richard.henderson, philmd, ale, anjo
> -----Original Message-----
> From: Taylor Simpson <ltaylorsimpson@gmail.com>
> Sent: Thursday, February 1, 2024 4:34 AM
> To: qemu-devel@nongnu.org
> Cc: Brian Cain <bcain@quicinc.com>; Matheus Bernardino (QUIC)
> <quic_mathbern@quicinc.com>; Sid Manning <sidneym@quicinc.com>;
> Marco Liebel (QUIC) <quic_mliebel@quicinc.com>;
> richard.henderson@linaro.org; philmd@linaro.org; ale@rev.ng; anjo@rev.ng;
> ltaylorsimpson@gmail.com
> Subject: [PATCH v2 2/3] Hexagon (target/hexagon) Enable more short-circuit
> packets (scalar core)
>
> WARNING: This email originated from outside of Qualcomm. Please be wary
> of any links or attachments, and do not enable macros.
>
> Look for read-after-write instead of overlap of reads and writes
>
> Here is an example with overalp but no read-after-write:
> 0x000200fc: 0x38103876 { R0 = add(R0,R1); R6 = add(R6,R7) }
>
> BEFORE:
> ---- 00000000000200fc
> mov_i32 loc2,$0x0
> mov_i32 loc2,r0
> add_i32 loc3,loc2,r1
> mov_i32 loc2,loc3
> mov_i32 loc4,$0x0
> mov_i32 loc4,r6
> add_i32 loc5,loc4,r7
> mov_i32 loc4,loc5
> mov_i32 r0,loc2
> mov_i32 r6,loc4
>
> AFTER:
> ---- 00000000000200fc
> add_i32 loc2,r0,r1
> mov_i32 r0,loc2
> add_i32 loc3,r6,r7
> mov_i32 r6,loc3
>
> We can also short-circuit packets with .new values by reading from the
> real destination instead of the temporary.
> 0x00020100: 0x78005ff3 { R19 = #0xff
> 0x00020104: 0x2002e204 if (cmp.eq(N19.new,R2)) jump:t PC+8 }
>
> BEFORE:
> ---- 0000000000020100
> mov_i32 pc,$0x20108
> mov_i32 loc8,$0x0
> mov_i32 loc8,$0xff
> setcond_i32 loc10,loc8,r2,eq
> mov_i32 loc6,loc10
> mov_i32 r19,loc8
> add_i32 pkt_cnt,pkt_cnt,$0x2
> add_i32 insn_cnt,insn_cnt,$0x4
> brcond_i32 loc6,$0x0,eq,$L1
> goto_tb $0x0
> mov_i32 pc,$0x20108
> exit_tb $0x7fbb54000040
> set_label $L1
> goto_tb $0x1
> exit_tb $0x7fbb54000041
> set_label $L0
> exit_tb $0x7fbb54000043
>
> AFTER:
> ---- 0000000000020100
> mov_i32 pc,$0x20108
> mov_i32 r19,$0xff
> setcond_i32 loc7,r19,r2,eq
> mov_i32 loc4,loc7
> add_i32 pkt_cnt,pkt_cnt,$0x2
> add_i32 insn_cnt,insn_cnt,$0x4
> brcond_i32 loc4,$0x0,eq,$L1
> goto_tb $0x0
> mov_i32 pc,$0x20108
> exit_tb $0x7f9764000040
> set_label $L1
> goto_tb $0x1
> exit_tb $0x7f9764000041
> set_label $L0
> exit_tb $0x7f9764000043
>
> Signed-off-by: Taylor Simpson <ltaylorsimpson@gmail.com>
> ---
> target/hexagon/translate.h | 13 +++++++------
> target/hexagon/translate.c | 21 ++++-----------------
> 2 files changed, 11 insertions(+), 23 deletions(-)
>
> diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
> index f06d71fc53..d5e7f49ad8 100644
> --- a/target/hexagon/translate.h
> +++ b/target/hexagon/translate.h
> @@ -38,12 +38,10 @@ typedef struct DisasContext {
> int reg_log[REG_WRITES_MAX];
> int reg_log_idx;
> DECLARE_BITMAP(regs_written, TOTAL_PER_THREAD_REGS);
> - DECLARE_BITMAP(regs_read, TOTAL_PER_THREAD_REGS);
> DECLARE_BITMAP(predicated_regs, TOTAL_PER_THREAD_REGS);
> int preg_log[PRED_WRITES_MAX];
> int preg_log_idx;
> DECLARE_BITMAP(pregs_written, NUM_PREGS);
> - DECLARE_BITMAP(pregs_read, NUM_PREGS);
> uint8_t store_width[STORES_MAX];
> bool s1_store_processed;
> int future_vregs_idx;
> @@ -68,6 +66,7 @@ typedef struct DisasContext {
> bool is_tight_loop;
> bool short_circuit;
> bool has_hvx_helper;
> + bool read_after_write;
> TCGv new_value[TOTAL_PER_THREAD_REGS];
> TCGv new_pred_value[NUM_PREGS];
> TCGv pred_written;
> @@ -88,13 +87,14 @@ static inline void ctx_log_pred_write(DisasContext
> *ctx, int pnum)
>
> static inline void ctx_log_pred_read(DisasContext *ctx, int pnum)
> {
> - set_bit(pnum, ctx->pregs_read);
> + if (test_bit(pnum, ctx->pregs_written)) {
> + ctx->read_after_write = true;
> + }
> }
>
> static inline void ctx_log_pred_read_new(DisasContext *ctx, int pnum)
> {
> g_assert(test_bit(pnum, ctx->pregs_written));
> - set_bit(pnum, ctx->pregs_read);
> }
>
> static inline void ctx_log_reg_write(DisasContext *ctx, int rnum,
> @@ -125,13 +125,14 @@ static inline void
> ctx_log_reg_write_pair(DisasContext *ctx, int rnum,
>
> static inline void ctx_log_reg_read(DisasContext *ctx, int rnum)
> {
> - set_bit(rnum, ctx->regs_read);
> + if (test_bit(rnum, ctx->regs_written)) {
> + ctx->read_after_write = true;
> + }
> }
>
> static inline void ctx_log_reg_read_new(DisasContext *ctx, int rnum)
> {
> g_assert(test_bit(rnum, ctx->regs_written));
> - set_bit(rnum, ctx->regs_read);
> }
>
> static inline void ctx_log_reg_read_pair(DisasContext *ctx, int rnum)
> diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
> index 95579ae243..751ca71790 100644
> --- a/target/hexagon/translate.c
> +++ b/target/hexagon/translate.c
> @@ -1,5 +1,5 @@
> /*
> - * Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights
> Reserved.
> + * Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights
> Reserved.
> *
> * This program is free software; you can redistribute it and/or modify
> * it under the terms of the GNU General Public License as published by
> @@ -394,20 +394,8 @@ static bool need_commit(DisasContext *ctx)
> }
> }
>
> - /* Check for overlap between register reads and writes */
> - for (int i = 0; i < ctx->reg_log_idx; i++) {
> - int rnum = ctx->reg_log[i];
> - if (test_bit(rnum, ctx->regs_read)) {
> - return true;
> - }
> - }
> -
> - /* Check for overlap between predicate reads and writes */
> - for (int i = 0; i < ctx->preg_log_idx; i++) {
> - int pnum = ctx->preg_log[i];
> - if (test_bit(pnum, ctx->pregs_read)) {
> - return true;
> - }
> + if (ctx->read_after_write) {
> + return true;
> }
>
> /* Check for overlap between HVX reads and writes */
> @@ -466,6 +454,7 @@ static void analyze_packet(DisasContext *ctx)
> {
> Packet *pkt = ctx->pkt;
> ctx->has_hvx_helper = false;
> + ctx->read_after_write = false;
> for (int i = 0; i < pkt->num_insns; i++) {
> Insn *insn = &pkt->insn[i];
> ctx->insn = insn;
> @@ -490,11 +479,9 @@ static void gen_start_packet(DisasContext *ctx)
> ctx->next_PC = next_PC;
> ctx->reg_log_idx = 0;
> bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS);
> - bitmap_zero(ctx->regs_read, TOTAL_PER_THREAD_REGS);
> bitmap_zero(ctx->predicated_regs, TOTAL_PER_THREAD_REGS);
> ctx->preg_log_idx = 0;
> bitmap_zero(ctx->pregs_written, NUM_PREGS);
> - bitmap_zero(ctx->pregs_read, NUM_PREGS);
> ctx->future_vregs_idx = 0;
> ctx->tmp_vregs_idx = 0;
> ctx->vreg_log_idx = 0;
> --
> 2.34.1
Reviewed-by: Brian Cain <bcain@quicinc.com>
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH v2 3/3] Hexagon (target/hexagon) Enable more short-circuit packets (HVX)
2024-02-01 10:33 [PATCH v2 0/3] Hexagon (target/hexagon) Enable more short-circuit packets Taylor Simpson
2024-02-01 10:33 ` [PATCH v2 1/3] Hexagon (target/hexagon) Analyze reads before writes Taylor Simpson
2024-02-01 10:33 ` [PATCH v2 2/3] Hexagon (target/hexagon) Enable more short-circuit packets (scalar core) Taylor Simpson
@ 2024-02-01 10:33 ` Taylor Simpson
2024-03-29 2:02 ` Brian Cain
2 siblings, 1 reply; 7+ messages in thread
From: Taylor Simpson @ 2024-02-01 10:33 UTC (permalink / raw)
To: qemu-devel
Cc: bcain, quic_mathbern, sidneym, quic_mliebel, richard.henderson,
philmd, ale, anjo, ltaylorsimpson
Look for read-after-write instead of overlap of reads and writes
HVX instructions with helpers have pass-by-reference semantics, so
we check for overlaps of reads and writes within the same instruction.
Signed-off-by: Taylor Simpson <ltaylorsimpson@gmail.com>
---
target/hexagon/translate.h | 88 +++++++++++++++++++++++------
target/hexagon/translate.c | 58 ++-----------------
target/hexagon/gen_analyze_funcs.py | 19 ++++---
target/hexagon/hex_common.py | 45 ++++++++++-----
4 files changed, 115 insertions(+), 95 deletions(-)
diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
index d5e7f49ad8..00cc2bcd63 100644
--- a/target/hexagon/translate.h
+++ b/target/hexagon/translate.h
@@ -50,23 +50,27 @@ typedef struct DisasContext {
int tmp_vregs_num[VECTOR_TEMPS_MAX];
int vreg_log[NUM_VREGS];
int vreg_log_idx;
+ DECLARE_BITMAP(vregs_written, NUM_VREGS);
+ DECLARE_BITMAP(insn_vregs_written, NUM_VREGS);
DECLARE_BITMAP(vregs_updated_tmp, NUM_VREGS);
DECLARE_BITMAP(vregs_updated, NUM_VREGS);
DECLARE_BITMAP(vregs_select, NUM_VREGS);
DECLARE_BITMAP(predicated_future_vregs, NUM_VREGS);
DECLARE_BITMAP(predicated_tmp_vregs, NUM_VREGS);
- DECLARE_BITMAP(vregs_read, NUM_VREGS);
+ DECLARE_BITMAP(insn_vregs_read, NUM_VREGS);
int qreg_log[NUM_QREGS];
int qreg_log_idx;
- DECLARE_BITMAP(qregs_read, NUM_QREGS);
+ DECLARE_BITMAP(qregs_written, NUM_QREGS);
+ DECLARE_BITMAP(insn_qregs_written, NUM_QREGS);
+ DECLARE_BITMAP(insn_qregs_read, NUM_QREGS);
bool pre_commit;
bool need_commit;
TCGCond branch_cond;
target_ulong branch_dest;
bool is_tight_loop;
bool short_circuit;
- bool has_hvx_helper;
bool read_after_write;
+ bool has_hvx_overlap;
TCGv new_value[TOTAL_PER_THREAD_REGS];
TCGv new_pred_value[NUM_PREGS];
TCGv pred_written;
@@ -146,10 +150,25 @@ intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum,
intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum,
int num, bool alloc_ok);
+static inline void ctx_start_hvx_insn(DisasContext *ctx)
+{
+ bitmap_zero(ctx->insn_vregs_written, NUM_VREGS);
+ bitmap_zero(ctx->insn_vregs_read, NUM_VREGS);
+ bitmap_zero(ctx->insn_qregs_written, NUM_QREGS);
+ bitmap_zero(ctx->insn_qregs_read, NUM_QREGS);
+}
+
static inline void ctx_log_vreg_write(DisasContext *ctx,
int rnum, VRegWriteType type,
- bool is_predicated)
+ bool is_predicated, bool has_helper)
{
+ if (has_helper) {
+ set_bit(rnum, ctx->insn_vregs_written);
+ if (test_bit(rnum, ctx->insn_vregs_read)) {
+ ctx->has_hvx_overlap = true;
+ }
+ }
+ set_bit(rnum, ctx->vregs_written);
if (type != EXT_TMP) {
if (!test_bit(rnum, ctx->vregs_updated)) {
ctx->vreg_log[ctx->vreg_log_idx] = rnum;
@@ -175,42 +194,77 @@ static inline void ctx_log_vreg_write(DisasContext *ctx,
static inline void ctx_log_vreg_write_pair(DisasContext *ctx,
int rnum, VRegWriteType type,
- bool is_predicated)
+ bool is_predicated, bool has_helper)
{
- ctx_log_vreg_write(ctx, rnum ^ 0, type, is_predicated);
- ctx_log_vreg_write(ctx, rnum ^ 1, type, is_predicated);
+ ctx_log_vreg_write(ctx, rnum ^ 0, type, is_predicated, has_helper);
+ ctx_log_vreg_write(ctx, rnum ^ 1, type, is_predicated, has_helper);
}
-static inline void ctx_log_vreg_read(DisasContext *ctx, int rnum)
+static inline void ctx_log_vreg_read(DisasContext *ctx, int rnum,
+ bool has_helper)
{
- set_bit(rnum, ctx->vregs_read);
+ if (has_helper) {
+ set_bit(rnum, ctx->insn_vregs_read);
+ if (test_bit(rnum, ctx->insn_vregs_written)) {
+ ctx->has_hvx_overlap = true;
+ }
+ }
+ if (test_bit(rnum, ctx->vregs_written)) {
+ ctx->read_after_write = true;
+ }
}
-static inline void ctx_log_vreg_read_new(DisasContext *ctx, int rnum)
+static inline void ctx_log_vreg_read_new(DisasContext *ctx, int rnum,
+ bool has_helper)
{
g_assert(is_gather_store_insn(ctx) ||
test_bit(rnum, ctx->vregs_updated) ||
test_bit(rnum, ctx->vregs_select) ||
test_bit(rnum, ctx->vregs_updated_tmp));
- set_bit(rnum, ctx->vregs_read);
+ if (has_helper) {
+ set_bit(rnum, ctx->insn_vregs_read);
+ if (test_bit(rnum, ctx->insn_vregs_written)) {
+ ctx->has_hvx_overlap = true;
+ }
+ }
+ if (is_gather_store_insn(ctx)) {
+ ctx->read_after_write = true;
+ }
}
-static inline void ctx_log_vreg_read_pair(DisasContext *ctx, int rnum)
+static inline void ctx_log_vreg_read_pair(DisasContext *ctx, int rnum,
+ bool has_helper)
{
- ctx_log_vreg_read(ctx, rnum ^ 0);
- ctx_log_vreg_read(ctx, rnum ^ 1);
+ ctx_log_vreg_read(ctx, rnum ^ 0, has_helper);
+ ctx_log_vreg_read(ctx, rnum ^ 1, has_helper);
}
static inline void ctx_log_qreg_write(DisasContext *ctx,
- int rnum)
+ int rnum, bool has_helper)
{
+ if (has_helper) {
+ set_bit(rnum, ctx->insn_qregs_written);
+ if (test_bit(rnum, ctx->insn_qregs_read)) {
+ ctx->has_hvx_overlap = true;
+ }
+ }
+ set_bit(rnum, ctx->qregs_written);
ctx->qreg_log[ctx->qreg_log_idx] = rnum;
ctx->qreg_log_idx++;
}
-static inline void ctx_log_qreg_read(DisasContext *ctx, int qnum)
+static inline void ctx_log_qreg_read(DisasContext *ctx,
+ int qnum, bool has_helper)
{
- set_bit(qnum, ctx->qregs_read);
+ if (has_helper) {
+ set_bit(qnum, ctx->insn_qregs_read);
+ if (test_bit(qnum, ctx->insn_qregs_written)) {
+ ctx->has_hvx_overlap = true;
+ }
+ }
+ if (test_bit(qnum, ctx->qregs_written)) {
+ ctx->read_after_write = true;
+ }
}
extern TCGv hex_gpr[TOTAL_PER_THREAD_REGS];
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index 751ca71790..ed4b4acd1d 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -378,60 +378,10 @@ static bool need_commit(DisasContext *ctx)
return true;
}
- if (pkt->num_insns == 1) {
- if (pkt->pkt_has_hvx) {
- /*
- * The HVX instructions with generated helpers use
- * pass-by-reference, so they need the read/write overlap
- * check below.
- * The HVX instructions with overrides are OK.
- */
- if (!ctx->has_hvx_helper) {
- return false;
- }
- } else {
- return false;
- }
- }
-
- if (ctx->read_after_write) {
+ if (ctx->read_after_write || ctx->has_hvx_overlap) {
return true;
}
- /* Check for overlap between HVX reads and writes */
- for (int i = 0; i < ctx->vreg_log_idx; i++) {
- int vnum = ctx->vreg_log[i];
- if (test_bit(vnum, ctx->vregs_read)) {
- return true;
- }
- }
- if (!bitmap_empty(ctx->vregs_updated_tmp, NUM_VREGS)) {
- int i = find_first_bit(ctx->vregs_updated_tmp, NUM_VREGS);
- while (i < NUM_VREGS) {
- if (test_bit(i, ctx->vregs_read)) {
- return true;
- }
- i = find_next_bit(ctx->vregs_updated_tmp, NUM_VREGS, i + 1);
- }
- }
- if (!bitmap_empty(ctx->vregs_select, NUM_VREGS)) {
- int i = find_first_bit(ctx->vregs_select, NUM_VREGS);
- while (i < NUM_VREGS) {
- if (test_bit(i, ctx->vregs_read)) {
- return true;
- }
- i = find_next_bit(ctx->vregs_select, NUM_VREGS, i + 1);
- }
- }
-
- /* Check for overlap between HVX predicate reads and writes */
- for (int i = 0; i < ctx->qreg_log_idx; i++) {
- int qnum = ctx->qreg_log[i];
- if (test_bit(qnum, ctx->qregs_read)) {
- return true;
- }
- }
-
return false;
}
@@ -453,8 +403,8 @@ static void mark_implicit_pred_reads(DisasContext *ctx)
static void analyze_packet(DisasContext *ctx)
{
Packet *pkt = ctx->pkt;
- ctx->has_hvx_helper = false;
ctx->read_after_write = false;
+ ctx->has_hvx_overlap = false;
for (int i = 0; i < pkt->num_insns; i++) {
Insn *insn = &pkt->insn[i];
ctx->insn = insn;
@@ -485,13 +435,13 @@ static void gen_start_packet(DisasContext *ctx)
ctx->future_vregs_idx = 0;
ctx->tmp_vregs_idx = 0;
ctx->vreg_log_idx = 0;
+ bitmap_zero(ctx->vregs_written, NUM_VREGS);
bitmap_zero(ctx->vregs_updated_tmp, NUM_VREGS);
bitmap_zero(ctx->vregs_updated, NUM_VREGS);
bitmap_zero(ctx->vregs_select, NUM_VREGS);
bitmap_zero(ctx->predicated_future_vregs, NUM_VREGS);
bitmap_zero(ctx->predicated_tmp_vregs, NUM_VREGS);
- bitmap_zero(ctx->vregs_read, NUM_VREGS);
- bitmap_zero(ctx->qregs_read, NUM_QREGS);
+ bitmap_zero(ctx->qregs_written, NUM_QREGS);
ctx->qreg_log_idx = 0;
for (i = 0; i < STORES_MAX; i++) {
ctx->store_width[i] = 0;
diff --git a/target/hexagon/gen_analyze_funcs.py b/target/hexagon/gen_analyze_funcs.py
index 890e6a3a95..81e1d9cfa3 100755
--- a/target/hexagon/gen_analyze_funcs.py
+++ b/target/hexagon/gen_analyze_funcs.py
@@ -43,6 +43,16 @@ def gen_analyze_func(f, tag, regs, imms):
f.write("{\n")
f.write(" Insn *insn G_GNUC_UNUSED = ctx->insn;\n")
+ if (hex_common.is_hvx_insn(tag)):
+ if hex_common.has_hvx_helper(tag):
+ f.write(
+ " const bool G_GNUC_UNUSED insn_has_hvx_helper = true;\n"
+ )
+ f.write(" ctx_start_hvx_insn(ctx);\n")
+ else:
+ f.write(
+ " const bool G_GNUC_UNUSED insn_has_hvx_helper = false;\n"
+ )
## Declare all the registers
for regno, register in enumerate(regs):
@@ -64,15 +74,6 @@ def gen_analyze_func(f, tag, regs, imms):
if reg.is_written():
reg.analyze_write(f, tag, regno)
- has_generated_helper = not hex_common.skip_qemu_helper(
- tag
- ) and not hex_common.is_idef_parser_enabled(tag)
-
- ## Mark HVX instructions with generated helpers
- if (has_generated_helper and
- "A_CVI" in hex_common.attribdict[tag]):
- f.write(" ctx->has_hvx_helper = true;\n")
-
f.write("}\n\n")
diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py
index 33801e4bd7..9e7f613e3c 100755
--- a/target/hexagon/hex_common.py
+++ b/target/hexagon/hex_common.py
@@ -241,6 +241,16 @@ def is_idef_parser_enabled(tag):
return tag in idef_parser_enabled
+def is_hvx_insn(tag):
+ return "A_CVI" in attribdict[tag]
+
+
+def has_hvx_helper(tag):
+ return (is_hvx_insn(tag) and
+ not skip_qemu_helper(tag) and
+ not is_idef_parser_enabled(tag))
+
+
def imm_name(immlett):
return f"{immlett}iV"
@@ -704,7 +714,8 @@ def analyze_write(self, f, tag, regno):
newv = hvx_newv(tag)
predicated = "true" if is_predicated(tag) else "false"
f.write(code_fmt(f"""\
- ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated});
+ ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated},
+ insn_has_hvx_helper);
"""))
class VRegSource(Register, Hvx, OldSource):
@@ -724,7 +735,7 @@ def helper_hvx_desc(self, f):
"""))
def analyze_read(self, f, regno):
f.write(code_fmt(f"""\
- ctx_log_vreg_read(ctx, {self.reg_num});
+ ctx_log_vreg_read(ctx, {self.reg_num}, insn_has_hvx_helper);
"""))
class VRegNewSource(Register, Hvx, NewSource):
@@ -741,7 +752,7 @@ def helper_hvx_desc(self, f):
"""))
def analyze_read(self, f, regno):
f.write(code_fmt(f"""\
- ctx_log_vreg_read_new(ctx, {self.reg_num});
+ ctx_log_vreg_read_new(ctx, {self.reg_num}, insn_has_hvx_helper);
"""))
class VRegReadWrite(Register, Hvx, ReadWrite):
@@ -767,13 +778,14 @@ def helper_hvx_desc(self, f):
"""))
def analyze_read(self, f, regno):
f.write(code_fmt(f"""\
- ctx_log_vreg_read(ctx, {self.reg_num});
+ ctx_log_vreg_read(ctx, {self.reg_num}, insn_has_hvx_helper);
"""))
def analyze_write(self, f, tag, regno):
newv = hvx_newv(tag)
predicated = "true" if is_predicated(tag) else "false"
f.write(code_fmt(f"""\
- ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated});
+ ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated},
+ insn_has_hvx_helper);
"""))
class VRegTmp(Register, Hvx, ReadWrite):
@@ -801,13 +813,14 @@ def helper_hvx_desc(self, f):
"""))
def analyze_read(self, f, regno):
f.write(code_fmt(f"""\
- ctx_log_vreg_read(ctx, {self.reg_num});
+ ctx_log_vreg_read(ctx, {self.reg_num}, insn_has_hvx_helper);
"""))
def analyze_write(self, f, tag, regno):
newv = hvx_newv(tag)
predicated = "true" if is_predicated(tag) else "false"
f.write(code_fmt(f"""\
- ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated});
+ ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated},
+ insn_has_hvx_helper);
"""))
class VRegPairDest(Register, Hvx, Dest):
@@ -832,7 +845,8 @@ def analyze_write(self, f, tag, regno):
newv = hvx_newv(tag)
predicated = "true" if is_predicated(tag) else "false"
f.write(code_fmt(f"""\
- ctx_log_vreg_write_pair(ctx, {self.reg_num}, {newv}, {predicated});
+ ctx_log_vreg_write_pair(ctx, {self.reg_num}, {newv}, {predicated},
+ insn_has_hvx_helper);
"""))
class VRegPairSource(Register, Hvx, OldSource):
@@ -859,7 +873,7 @@ def helper_hvx_desc(self, f):
"""))
def analyze_read(self, f, regno):
f.write(code_fmt(f"""\
- ctx_log_vreg_read_pair(ctx, {self.reg_num});
+ ctx_log_vreg_read_pair(ctx, {self.reg_num}, insn_has_hvx_helper);
"""))
class VRegPairReadWrite(Register, Hvx, ReadWrite):
@@ -891,13 +905,14 @@ def helper_hvx_desc(self, f):
"""))
def analyze_read(self, f, regno):
f.write(code_fmt(f"""\
- ctx_log_vreg_read_pair(ctx, {self.reg_num});
+ ctx_log_vreg_read_pair(ctx, {self.reg_num}, insn_has_hvx_helper);
"""))
def analyze_write(self, f, tag, regno):
newv = hvx_newv(tag)
predicated = "true" if is_predicated(tag) else "false"
f.write(code_fmt(f"""\
- ctx_log_vreg_write_pair(ctx, {self.reg_num}, {newv}, {predicated});
+ ctx_log_vreg_write_pair(ctx, {self.reg_num}, {newv}, {predicated},
+ insn_has_hvx_helper);
"""))
class QRegDest(Register, Hvx, Dest):
@@ -920,7 +935,7 @@ def helper_hvx_desc(self, f):
"""))
def analyze_write(self, f, tag, regno):
f.write(code_fmt(f"""\
- ctx_log_qreg_write(ctx, {self.reg_num});
+ ctx_log_qreg_write(ctx, {self.reg_num}, insn_has_hvx_helper);
"""))
class QRegSource(Register, Hvx, OldSource):
@@ -941,7 +956,7 @@ def helper_hvx_desc(self, f):
"""))
def analyze_read(self, f, regno):
f.write(code_fmt(f"""\
- ctx_log_qreg_read(ctx, {self.reg_num});
+ ctx_log_qreg_read(ctx, {self.reg_num}, insn_has_hvx_helper);
"""))
class QRegReadWrite(Register, Hvx, ReadWrite):
@@ -967,11 +982,11 @@ def helper_hvx_desc(self, f):
"""))
def analyze_read(self, f, regno):
f.write(code_fmt(f"""\
- ctx_log_qreg_read(ctx, {self.reg_num});
+ ctx_log_qreg_read(ctx, {self.reg_num}, insn_has_hvx_helper);
"""))
def analyze_write(self, f, tag, regno):
f.write(code_fmt(f"""\
- ctx_log_qreg_write(ctx, {self.reg_num});
+ ctx_log_qreg_write(ctx, {self.reg_num}, insn_has_hvx_helper);
"""))
def init_registers():
--
2.34.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* RE: [PATCH v2 3/3] Hexagon (target/hexagon) Enable more short-circuit packets (HVX)
2024-02-01 10:33 ` [PATCH v2 3/3] Hexagon (target/hexagon) Enable more short-circuit packets (HVX) Taylor Simpson
@ 2024-03-29 2:02 ` Brian Cain
0 siblings, 0 replies; 7+ messages in thread
From: Brian Cain @ 2024-03-29 2:02 UTC (permalink / raw)
To: Taylor Simpson, qemu-devel
Cc: Matheus Bernardino (QUIC), Sid Manning, Marco Liebel (QUIC),
richard.henderson, philmd, ale, anjo
> -----Original Message-----
> From: Taylor Simpson <ltaylorsimpson@gmail.com>
> Sent: Thursday, February 1, 2024 4:34 AM
> To: qemu-devel@nongnu.org
> Cc: Brian Cain <bcain@quicinc.com>; Matheus Bernardino (QUIC)
> <quic_mathbern@quicinc.com>; Sid Manning <sidneym@quicinc.com>;
> Marco Liebel (QUIC) <quic_mliebel@quicinc.com>;
> richard.henderson@linaro.org; philmd@linaro.org; ale@rev.ng; anjo@rev.ng;
> ltaylorsimpson@gmail.com
> Subject: [PATCH v2 3/3] Hexagon (target/hexagon) Enable more short-circuit
> packets (HVX)
>
> WARNING: This email originated from outside of Qualcomm. Please be wary
> of any links or attachments, and do not enable macros.
>
> Look for read-after-write instead of overlap of reads and writes
>
> HVX instructions with helpers have pass-by-reference semantics, so
> we check for overlaps of reads and writes within the same instruction.
>
> Signed-off-by: Taylor Simpson <ltaylorsimpson@gmail.com>
> ---
> target/hexagon/translate.h | 88 +++++++++++++++++++++++------
> target/hexagon/translate.c | 58 ++-----------------
> target/hexagon/gen_analyze_funcs.py | 19 ++++---
> target/hexagon/hex_common.py | 45 ++++++++++-----
> 4 files changed, 115 insertions(+), 95 deletions(-)
>
> diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
> index d5e7f49ad8..00cc2bcd63 100644
> --- a/target/hexagon/translate.h
> +++ b/target/hexagon/translate.h
> @@ -50,23 +50,27 @@ typedef struct DisasContext {
> int tmp_vregs_num[VECTOR_TEMPS_MAX];
> int vreg_log[NUM_VREGS];
> int vreg_log_idx;
> + DECLARE_BITMAP(vregs_written, NUM_VREGS);
> + DECLARE_BITMAP(insn_vregs_written, NUM_VREGS);
> DECLARE_BITMAP(vregs_updated_tmp, NUM_VREGS);
> DECLARE_BITMAP(vregs_updated, NUM_VREGS);
> DECLARE_BITMAP(vregs_select, NUM_VREGS);
> DECLARE_BITMAP(predicated_future_vregs, NUM_VREGS);
> DECLARE_BITMAP(predicated_tmp_vregs, NUM_VREGS);
> - DECLARE_BITMAP(vregs_read, NUM_VREGS);
> + DECLARE_BITMAP(insn_vregs_read, NUM_VREGS);
> int qreg_log[NUM_QREGS];
> int qreg_log_idx;
> - DECLARE_BITMAP(qregs_read, NUM_QREGS);
> + DECLARE_BITMAP(qregs_written, NUM_QREGS);
> + DECLARE_BITMAP(insn_qregs_written, NUM_QREGS);
> + DECLARE_BITMAP(insn_qregs_read, NUM_QREGS);
> bool pre_commit;
> bool need_commit;
> TCGCond branch_cond;
> target_ulong branch_dest;
> bool is_tight_loop;
> bool short_circuit;
> - bool has_hvx_helper;
> bool read_after_write;
> + bool has_hvx_overlap;
> TCGv new_value[TOTAL_PER_THREAD_REGS];
> TCGv new_pred_value[NUM_PREGS];
> TCGv pred_written;
> @@ -146,10 +150,25 @@ intptr_t ctx_future_vreg_off(DisasContext *ctx, int
> regnum,
> intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum,
> int num, bool alloc_ok);
>
> +static inline void ctx_start_hvx_insn(DisasContext *ctx)
> +{
> + bitmap_zero(ctx->insn_vregs_written, NUM_VREGS);
> + bitmap_zero(ctx->insn_vregs_read, NUM_VREGS);
> + bitmap_zero(ctx->insn_qregs_written, NUM_QREGS);
> + bitmap_zero(ctx->insn_qregs_read, NUM_QREGS);
> +}
> +
> static inline void ctx_log_vreg_write(DisasContext *ctx,
> int rnum, VRegWriteType type,
> - bool is_predicated)
> + bool is_predicated, bool has_helper)
> {
> + if (has_helper) {
> + set_bit(rnum, ctx->insn_vregs_written);
> + if (test_bit(rnum, ctx->insn_vregs_read)) {
> + ctx->has_hvx_overlap = true;
> + }
> + }
> + set_bit(rnum, ctx->vregs_written);
> if (type != EXT_TMP) {
> if (!test_bit(rnum, ctx->vregs_updated)) {
> ctx->vreg_log[ctx->vreg_log_idx] = rnum;
> @@ -175,42 +194,77 @@ static inline void ctx_log_vreg_write(DisasContext
> *ctx,
>
> static inline void ctx_log_vreg_write_pair(DisasContext *ctx,
> int rnum, VRegWriteType type,
> - bool is_predicated)
> + bool is_predicated, bool has_helper)
> {
> - ctx_log_vreg_write(ctx, rnum ^ 0, type, is_predicated);
> - ctx_log_vreg_write(ctx, rnum ^ 1, type, is_predicated);
> + ctx_log_vreg_write(ctx, rnum ^ 0, type, is_predicated, has_helper);
> + ctx_log_vreg_write(ctx, rnum ^ 1, type, is_predicated, has_helper);
> }
>
> -static inline void ctx_log_vreg_read(DisasContext *ctx, int rnum)
> +static inline void ctx_log_vreg_read(DisasContext *ctx, int rnum,
> + bool has_helper)
> {
> - set_bit(rnum, ctx->vregs_read);
> + if (has_helper) {
> + set_bit(rnum, ctx->insn_vregs_read);
> + if (test_bit(rnum, ctx->insn_vregs_written)) {
> + ctx->has_hvx_overlap = true;
> + }
> + }
> + if (test_bit(rnum, ctx->vregs_written)) {
> + ctx->read_after_write = true;
> + }
> }
>
> -static inline void ctx_log_vreg_read_new(DisasContext *ctx, int rnum)
> +static inline void ctx_log_vreg_read_new(DisasContext *ctx, int rnum,
> + bool has_helper)
> {
> g_assert(is_gather_store_insn(ctx) ||
> test_bit(rnum, ctx->vregs_updated) ||
> test_bit(rnum, ctx->vregs_select) ||
> test_bit(rnum, ctx->vregs_updated_tmp));
> - set_bit(rnum, ctx->vregs_read);
> + if (has_helper) {
> + set_bit(rnum, ctx->insn_vregs_read);
> + if (test_bit(rnum, ctx->insn_vregs_written)) {
> + ctx->has_hvx_overlap = true;
> + }
> + }
> + if (is_gather_store_insn(ctx)) {
> + ctx->read_after_write = true;
> + }
> }
>
> -static inline void ctx_log_vreg_read_pair(DisasContext *ctx, int rnum)
> +static inline void ctx_log_vreg_read_pair(DisasContext *ctx, int rnum,
> + bool has_helper)
> {
> - ctx_log_vreg_read(ctx, rnum ^ 0);
> - ctx_log_vreg_read(ctx, rnum ^ 1);
> + ctx_log_vreg_read(ctx, rnum ^ 0, has_helper);
> + ctx_log_vreg_read(ctx, rnum ^ 1, has_helper);
> }
>
> static inline void ctx_log_qreg_write(DisasContext *ctx,
> - int rnum)
> + int rnum, bool has_helper)
> {
> + if (has_helper) {
> + set_bit(rnum, ctx->insn_qregs_written);
> + if (test_bit(rnum, ctx->insn_qregs_read)) {
> + ctx->has_hvx_overlap = true;
> + }
> + }
> + set_bit(rnum, ctx->qregs_written);
> ctx->qreg_log[ctx->qreg_log_idx] = rnum;
> ctx->qreg_log_idx++;
> }
>
> -static inline void ctx_log_qreg_read(DisasContext *ctx, int qnum)
> +static inline void ctx_log_qreg_read(DisasContext *ctx,
> + int qnum, bool has_helper)
> {
> - set_bit(qnum, ctx->qregs_read);
> + if (has_helper) {
> + set_bit(qnum, ctx->insn_qregs_read);
> + if (test_bit(qnum, ctx->insn_qregs_written)) {
> + ctx->has_hvx_overlap = true;
> + }
> + }
> + if (test_bit(qnum, ctx->qregs_written)) {
> + ctx->read_after_write = true;
> + }
> }
>
> extern TCGv hex_gpr[TOTAL_PER_THREAD_REGS];
> diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
> index 751ca71790..ed4b4acd1d 100644
> --- a/target/hexagon/translate.c
> +++ b/target/hexagon/translate.c
> @@ -378,60 +378,10 @@ static bool need_commit(DisasContext *ctx)
> return true;
> }
>
> - if (pkt->num_insns == 1) {
> - if (pkt->pkt_has_hvx) {
> - /*
> - * The HVX instructions with generated helpers use
> - * pass-by-reference, so they need the read/write overlap
> - * check below.
> - * The HVX instructions with overrides are OK.
> - */
> - if (!ctx->has_hvx_helper) {
> - return false;
> - }
> - } else {
> - return false;
> - }
> - }
> -
> - if (ctx->read_after_write) {
> + if (ctx->read_after_write || ctx->has_hvx_overlap) {
> return true;
> }
>
> - /* Check for overlap between HVX reads and writes */
> - for (int i = 0; i < ctx->vreg_log_idx; i++) {
> - int vnum = ctx->vreg_log[i];
> - if (test_bit(vnum, ctx->vregs_read)) {
> - return true;
> - }
> - }
> - if (!bitmap_empty(ctx->vregs_updated_tmp, NUM_VREGS)) {
> - int i = find_first_bit(ctx->vregs_updated_tmp, NUM_VREGS);
> - while (i < NUM_VREGS) {
> - if (test_bit(i, ctx->vregs_read)) {
> - return true;
> - }
> - i = find_next_bit(ctx->vregs_updated_tmp, NUM_VREGS, i + 1);
> - }
> - }
> - if (!bitmap_empty(ctx->vregs_select, NUM_VREGS)) {
> - int i = find_first_bit(ctx->vregs_select, NUM_VREGS);
> - while (i < NUM_VREGS) {
> - if (test_bit(i, ctx->vregs_read)) {
> - return true;
> - }
> - i = find_next_bit(ctx->vregs_select, NUM_VREGS, i + 1);
> - }
> - }
> -
> - /* Check for overlap between HVX predicate reads and writes */
> - for (int i = 0; i < ctx->qreg_log_idx; i++) {
> - int qnum = ctx->qreg_log[i];
> - if (test_bit(qnum, ctx->qregs_read)) {
> - return true;
> - }
> - }
> -
> return false;
> }
>
> @@ -453,8 +403,8 @@ static void mark_implicit_pred_reads(DisasContext
> *ctx)
> static void analyze_packet(DisasContext *ctx)
> {
> Packet *pkt = ctx->pkt;
> - ctx->has_hvx_helper = false;
> ctx->read_after_write = false;
> + ctx->has_hvx_overlap = false;
> for (int i = 0; i < pkt->num_insns; i++) {
> Insn *insn = &pkt->insn[i];
> ctx->insn = insn;
> @@ -485,13 +435,13 @@ static void gen_start_packet(DisasContext *ctx)
> ctx->future_vregs_idx = 0;
> ctx->tmp_vregs_idx = 0;
> ctx->vreg_log_idx = 0;
> + bitmap_zero(ctx->vregs_written, NUM_VREGS);
> bitmap_zero(ctx->vregs_updated_tmp, NUM_VREGS);
> bitmap_zero(ctx->vregs_updated, NUM_VREGS);
> bitmap_zero(ctx->vregs_select, NUM_VREGS);
> bitmap_zero(ctx->predicated_future_vregs, NUM_VREGS);
> bitmap_zero(ctx->predicated_tmp_vregs, NUM_VREGS);
> - bitmap_zero(ctx->vregs_read, NUM_VREGS);
> - bitmap_zero(ctx->qregs_read, NUM_QREGS);
> + bitmap_zero(ctx->qregs_written, NUM_QREGS);
> ctx->qreg_log_idx = 0;
> for (i = 0; i < STORES_MAX; i++) {
> ctx->store_width[i] = 0;
> diff --git a/target/hexagon/gen_analyze_funcs.py
> b/target/hexagon/gen_analyze_funcs.py
> index 890e6a3a95..81e1d9cfa3 100755
> --- a/target/hexagon/gen_analyze_funcs.py
> +++ b/target/hexagon/gen_analyze_funcs.py
> @@ -43,6 +43,16 @@ def gen_analyze_func(f, tag, regs, imms):
> f.write("{\n")
>
> f.write(" Insn *insn G_GNUC_UNUSED = ctx->insn;\n")
> + if (hex_common.is_hvx_insn(tag)):
> + if hex_common.has_hvx_helper(tag):
> + f.write(
> + " const bool G_GNUC_UNUSED insn_has_hvx_helper = true;\n"
> + )
> + f.write(" ctx_start_hvx_insn(ctx);\n")
> + else:
> + f.write(
> + " const bool G_GNUC_UNUSED insn_has_hvx_helper = false;\n"
> + )
>
> ## Declare all the registers
> for regno, register in enumerate(regs):
> @@ -64,15 +74,6 @@ def gen_analyze_func(f, tag, regs, imms):
> if reg.is_written():
> reg.analyze_write(f, tag, regno)
>
> - has_generated_helper = not hex_common.skip_qemu_helper(
> - tag
> - ) and not hex_common.is_idef_parser_enabled(tag)
> -
> - ## Mark HVX instructions with generated helpers
> - if (has_generated_helper and
> - "A_CVI" in hex_common.attribdict[tag]):
> - f.write(" ctx->has_hvx_helper = true;\n")
> -
> f.write("}\n\n")
>
>
> diff --git a/target/hexagon/hex_common.py
> b/target/hexagon/hex_common.py
> index 33801e4bd7..9e7f613e3c 100755
> --- a/target/hexagon/hex_common.py
> +++ b/target/hexagon/hex_common.py
> @@ -241,6 +241,16 @@ def is_idef_parser_enabled(tag):
> return tag in idef_parser_enabled
>
>
> +def is_hvx_insn(tag):
> + return "A_CVI" in attribdict[tag]
> +
> +
> +def has_hvx_helper(tag):
> + return (is_hvx_insn(tag) and
> + not skip_qemu_helper(tag) and
> + not is_idef_parser_enabled(tag))
> +
> +
> def imm_name(immlett):
> return f"{immlett}iV"
>
> @@ -704,7 +714,8 @@ def analyze_write(self, f, tag, regno):
> newv = hvx_newv(tag)
> predicated = "true" if is_predicated(tag) else "false"
> f.write(code_fmt(f"""\
> - ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated});
> + ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated},
> + insn_has_hvx_helper);
> """))
>
> class VRegSource(Register, Hvx, OldSource):
> @@ -724,7 +735,7 @@ def helper_hvx_desc(self, f):
> """))
> def analyze_read(self, f, regno):
> f.write(code_fmt(f"""\
> - ctx_log_vreg_read(ctx, {self.reg_num});
> + ctx_log_vreg_read(ctx, {self.reg_num}, insn_has_hvx_helper);
> """))
>
> class VRegNewSource(Register, Hvx, NewSource):
> @@ -741,7 +752,7 @@ def helper_hvx_desc(self, f):
> """))
> def analyze_read(self, f, regno):
> f.write(code_fmt(f"""\
> - ctx_log_vreg_read_new(ctx, {self.reg_num});
> + ctx_log_vreg_read_new(ctx, {self.reg_num}, insn_has_hvx_helper);
> """))
>
> class VRegReadWrite(Register, Hvx, ReadWrite):
> @@ -767,13 +778,14 @@ def helper_hvx_desc(self, f):
> """))
> def analyze_read(self, f, regno):
> f.write(code_fmt(f"""\
> - ctx_log_vreg_read(ctx, {self.reg_num});
> + ctx_log_vreg_read(ctx, {self.reg_num}, insn_has_hvx_helper);
> """))
> def analyze_write(self, f, tag, regno):
> newv = hvx_newv(tag)
> predicated = "true" if is_predicated(tag) else "false"
> f.write(code_fmt(f"""\
> - ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated});
> + ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated},
> + insn_has_hvx_helper);
> """))
>
> class VRegTmp(Register, Hvx, ReadWrite):
> @@ -801,13 +813,14 @@ def helper_hvx_desc(self, f):
> """))
> def analyze_read(self, f, regno):
> f.write(code_fmt(f"""\
> - ctx_log_vreg_read(ctx, {self.reg_num});
> + ctx_log_vreg_read(ctx, {self.reg_num}, insn_has_hvx_helper);
> """))
> def analyze_write(self, f, tag, regno):
> newv = hvx_newv(tag)
> predicated = "true" if is_predicated(tag) else "false"
> f.write(code_fmt(f"""\
> - ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated});
> + ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated},
> + insn_has_hvx_helper);
> """))
>
> class VRegPairDest(Register, Hvx, Dest):
> @@ -832,7 +845,8 @@ def analyze_write(self, f, tag, regno):
> newv = hvx_newv(tag)
> predicated = "true" if is_predicated(tag) else "false"
> f.write(code_fmt(f"""\
> - ctx_log_vreg_write_pair(ctx, {self.reg_num}, {newv}, {predicated});
> + ctx_log_vreg_write_pair(ctx, {self.reg_num}, {newv}, {predicated},
> + insn_has_hvx_helper);
> """))
>
> class VRegPairSource(Register, Hvx, OldSource):
> @@ -859,7 +873,7 @@ def helper_hvx_desc(self, f):
> """))
> def analyze_read(self, f, regno):
> f.write(code_fmt(f"""\
> - ctx_log_vreg_read_pair(ctx, {self.reg_num});
> + ctx_log_vreg_read_pair(ctx, {self.reg_num}, insn_has_hvx_helper);
> """))
>
> class VRegPairReadWrite(Register, Hvx, ReadWrite):
> @@ -891,13 +905,14 @@ def helper_hvx_desc(self, f):
> """))
> def analyze_read(self, f, regno):
> f.write(code_fmt(f"""\
> - ctx_log_vreg_read_pair(ctx, {self.reg_num});
> + ctx_log_vreg_read_pair(ctx, {self.reg_num}, insn_has_hvx_helper);
> """))
> def analyze_write(self, f, tag, regno):
> newv = hvx_newv(tag)
> predicated = "true" if is_predicated(tag) else "false"
> f.write(code_fmt(f"""\
> - ctx_log_vreg_write_pair(ctx, {self.reg_num}, {newv}, {predicated});
> + ctx_log_vreg_write_pair(ctx, {self.reg_num}, {newv}, {predicated},
> + insn_has_hvx_helper);
> """))
>
> class QRegDest(Register, Hvx, Dest):
> @@ -920,7 +935,7 @@ def helper_hvx_desc(self, f):
> """))
> def analyze_write(self, f, tag, regno):
> f.write(code_fmt(f"""\
> - ctx_log_qreg_write(ctx, {self.reg_num});
> + ctx_log_qreg_write(ctx, {self.reg_num}, insn_has_hvx_helper);
> """))
>
> class QRegSource(Register, Hvx, OldSource):
> @@ -941,7 +956,7 @@ def helper_hvx_desc(self, f):
> """))
> def analyze_read(self, f, regno):
> f.write(code_fmt(f"""\
> - ctx_log_qreg_read(ctx, {self.reg_num});
> + ctx_log_qreg_read(ctx, {self.reg_num}, insn_has_hvx_helper);
> """))
>
> class QRegReadWrite(Register, Hvx, ReadWrite):
> @@ -967,11 +982,11 @@ def helper_hvx_desc(self, f):
> """))
> def analyze_read(self, f, regno):
> f.write(code_fmt(f"""\
> - ctx_log_qreg_read(ctx, {self.reg_num});
> + ctx_log_qreg_read(ctx, {self.reg_num}, insn_has_hvx_helper);
> """))
> def analyze_write(self, f, tag, regno):
> f.write(code_fmt(f"""\
> - ctx_log_qreg_write(ctx, {self.reg_num});
> + ctx_log_qreg_write(ctx, {self.reg_num}, insn_has_hvx_helper);
> """))
>
> def init_registers():
> --
> 2.34.1
Reviewed-by: Brian Cain <bcain@quicinc.com>
^ permalink raw reply [flat|nested] 7+ messages in thread