All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/3] Hexagon (target/hexagon) Enable more short-circuit packets
@ 2024-02-01 10:33 Taylor Simpson
  2024-02-01 10:33 ` [PATCH v2 1/3] Hexagon (target/hexagon) Analyze reads before writes Taylor Simpson
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Taylor Simpson @ 2024-02-01 10:33 UTC (permalink / raw)
  To: qemu-devel
  Cc: bcain, quic_mathbern, sidneym, quic_mliebel, richard.henderson,
	philmd, ale, anjo, ltaylorsimpson

This patch series improves the set of packets that can short-circuit
the commit packet logic and write the results directly during the
execution of each instruction in the packet.

The key observation is that checking for overlap between register reads
and writes is different from read-after-write.  For example, this packet
    { R0 = add(R0,R1); R6 = add(R6,R7) }
has an overlap between the reads and writes without doing a read after a
write.  Therefore, it is safe to write directly into the destination
registers during instruction execution.

Another example is a .new register read.  These can read from either the
destination register or a temporary location.

HVX instructions with generated helpers require special handling.
The semantics of the helpers are pass-by-reference, so we still need the
overlap check for these.

***** Changes in v2 *****
Conform to object oriented generators patch
https://lists.gnu.org/archive/html/qemu-devel/2023-12/msg01211.html


Taylor Simpson (3):
  Hexagon (target/hexagon) Analyze reads before writes
  Hexagon (target/hexagon) Enable more short-circuit packets (scalar
    core)
  Hexagon (target/hexagon) Enable more short-circuit packets (HVX)

 target/hexagon/translate.h          | 119 +++++++++++++++++++++++-----
 target/hexagon/translate.c          |  77 ++----------------
 target/hexagon/README               |   9 ++-
 target/hexagon/gen_analyze_funcs.py |  55 ++++++++-----
 target/hexagon/hex_common.py        |  98 +++++++++++++----------
 5 files changed, 200 insertions(+), 158 deletions(-)

-- 
2.34.1



^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v2 1/3] Hexagon (target/hexagon) Analyze reads before writes
  2024-02-01 10:33 [PATCH v2 0/3] Hexagon (target/hexagon) Enable more short-circuit packets Taylor Simpson
@ 2024-02-01 10:33 ` Taylor Simpson
  2024-03-29  2:05   ` Brian Cain
  2024-02-01 10:33 ` [PATCH v2 2/3] Hexagon (target/hexagon) Enable more short-circuit packets (scalar core) Taylor Simpson
  2024-02-01 10:33 ` [PATCH v2 3/3] Hexagon (target/hexagon) Enable more short-circuit packets (HVX) Taylor Simpson
  2 siblings, 1 reply; 7+ messages in thread
From: Taylor Simpson @ 2024-02-01 10:33 UTC (permalink / raw)
  To: qemu-devel
  Cc: bcain, quic_mathbern, sidneym, quic_mliebel, richard.henderson,
	philmd, ale, anjo, ltaylorsimpson

We divide gen_analyze_funcs.py into 3 phases
    Declare the operands
    Analyze the register reads
    Analyze the register writes

We also create special versions of ctx_log_*_read for new operands
    Check that the operand is written before the read

This is a precursor to improving the analysis for short-circuiting
the packet semantics in a subsequent commit

Signed-off-by: Taylor Simpson <ltaylorsimpson@gmail.com>
---
 target/hexagon/translate.h          | 26 +++++++++++-
 target/hexagon/README               |  9 +++--
 target/hexagon/gen_analyze_funcs.py | 34 ++++++++++------
 target/hexagon/hex_common.py        | 63 +++++++++++++++--------------
 4 files changed, 83 insertions(+), 49 deletions(-)

diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
index 4dd59c6726..f06d71fc53 100644
--- a/target/hexagon/translate.h
+++ b/target/hexagon/translate.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -75,6 +75,8 @@ typedef struct DisasContext {
     TCGv dczero_addr;
 } DisasContext;
 
+bool is_gather_store_insn(DisasContext *ctx);
+
 static inline void ctx_log_pred_write(DisasContext *ctx, int pnum)
 {
     if (!test_bit(pnum, ctx->pregs_written)) {
@@ -89,6 +91,12 @@ static inline void ctx_log_pred_read(DisasContext *ctx, int pnum)
     set_bit(pnum, ctx->pregs_read);
 }
 
+static inline void ctx_log_pred_read_new(DisasContext *ctx, int pnum)
+{
+    g_assert(test_bit(pnum, ctx->pregs_written));
+    set_bit(pnum, ctx->pregs_read);
+}
+
 static inline void ctx_log_reg_write(DisasContext *ctx, int rnum,
                                      bool is_predicated)
 {
@@ -120,6 +128,12 @@ static inline void ctx_log_reg_read(DisasContext *ctx, int rnum)
     set_bit(rnum, ctx->regs_read);
 }
 
+static inline void ctx_log_reg_read_new(DisasContext *ctx, int rnum)
+{
+    g_assert(test_bit(rnum, ctx->regs_written));
+    set_bit(rnum, ctx->regs_read);
+}
+
 static inline void ctx_log_reg_read_pair(DisasContext *ctx, int rnum)
 {
     ctx_log_reg_read(ctx, rnum);
@@ -171,6 +185,15 @@ static inline void ctx_log_vreg_read(DisasContext *ctx, int rnum)
     set_bit(rnum, ctx->vregs_read);
 }
 
+static inline void ctx_log_vreg_read_new(DisasContext *ctx, int rnum)
+{
+    g_assert(is_gather_store_insn(ctx) ||
+             test_bit(rnum, ctx->vregs_updated) ||
+             test_bit(rnum, ctx->vregs_select) ||
+             test_bit(rnum, ctx->vregs_updated_tmp));
+    set_bit(rnum, ctx->vregs_read);
+}
+
 static inline void ctx_log_vreg_read_pair(DisasContext *ctx, int rnum)
 {
     ctx_log_vreg_read(ctx, rnum ^ 0);
@@ -205,7 +228,6 @@ extern TCGv hex_vstore_addr[VSTORES_MAX];
 extern TCGv hex_vstore_size[VSTORES_MAX];
 extern TCGv hex_vstore_pending[VSTORES_MAX];
 
-bool is_gather_store_insn(DisasContext *ctx);
 void process_store(DisasContext *ctx, int slot_num);
 
 FIELD(PROBE_PKT_SCALAR_STORE_S0, MMU_IDX,       0, 2)
diff --git a/target/hexagon/README b/target/hexagon/README
index 746ebec378..c1d8c8d0ab 100644
--- a/target/hexagon/README
+++ b/target/hexagon/README
@@ -183,10 +183,11 @@ when the override is present.
     }
 
 We also generate an analyze_<tag> function for each instruction.  Currently,
-these functions record the writes to registers by calling ctx_log_*.  During
-gen_start_packet, we invoke the analyze_<tag> function for each instruction in
-the packet, and we mark the implicit writes.  After the analysis is performed,
-we initialize the result register for each of the predicated assignments.
+these functions record the reads and writes to registers by calling ctx_log_*.
+During gen_start_packet, we invoke the analyze_<tag> function for each instruction in
+the packet, and we mark the implicit writes.  The analysis determines if the packet
+semantics can be short-circuited.  If not, we initialize the result register for each
+of the predicated assignments.
 
 In addition to instruction semantics, we use a generator to create the decode
 tree.  This generation is a four step process.
diff --git a/target/hexagon/gen_analyze_funcs.py b/target/hexagon/gen_analyze_funcs.py
index a9af666cef..890e6a3a95 100755
--- a/target/hexagon/gen_analyze_funcs.py
+++ b/target/hexagon/gen_analyze_funcs.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
 ##
-##  Copyright(c) 2022-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
+##  Copyright(c) 2022-2024 Qualcomm Innovation Center, Inc. All Rights Reserved.
 ##
 ##  This program is free software; you can redistribute it and/or modify
 ##  it under the terms of the GNU General Public License as published by
@@ -44,15 +44,25 @@ def gen_analyze_func(f, tag, regs, imms):
 
     f.write("    Insn *insn G_GNUC_UNUSED = ctx->insn;\n")
 
-    i = 0
-    ## Analyze all the registers
-    for regtype, regid in regs:
-        reg = hex_common.get_register(tag, regtype, regid)
+    ## Declare all the registers
+    for regno, register in enumerate(regs):
+        reg_type, reg_id = register
+        reg = hex_common.get_register(tag, reg_type, reg_id)
+        reg.decl_reg_num(f, regno)
+
+    ## Analyze the register reads
+    for regno, register in enumerate(regs):
+        reg_type, reg_id = register
+        reg = hex_common.get_register(tag, reg_type, reg_id)
+        if reg.is_read():
+            reg.analyze_read(f, regno)
+
+    ## Analyze the register writes
+    for regno, register in enumerate(regs):
+        reg_type, reg_id = register
+        reg = hex_common.get_register(tag, reg_type, reg_id)
         if reg.is_written():
-            reg.analyze_write(f, tag, i)
-        else:
-            reg.analyze_read(f, i)
-        i += 1
+            reg.analyze_write(f, tag, regno)
 
     has_generated_helper = not hex_common.skip_qemu_helper(
         tag
@@ -89,13 +99,13 @@ def main():
     tagimms = hex_common.get_tagimms()
 
     with open(sys.argv[-1], "w") as f:
-        f.write("#ifndef HEXAGON_TCG_FUNCS_H\n")
-        f.write("#define HEXAGON_TCG_FUNCS_H\n\n")
+        f.write("#ifndef HEXAGON_ANALYZE_FUNCS_C_INC\n")
+        f.write("#define HEXAGON_ANALYZE_FUNCS_C_INC\n\n")
 
         for tag in hex_common.tags:
             gen_analyze_func(f, tag, tagregs[tag], tagimms[tag])
 
-        f.write("#endif    /* HEXAGON_TCG_FUNCS_H */\n")
+        f.write("#endif    /* HEXAGON_ANALYZE_FUNCS_C_INC */\n")
 
 
 if __name__ == "__main__":
diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py
index 195620c7ec..33801e4bd7 100755
--- a/target/hexagon/hex_common.py
+++ b/target/hexagon/hex_common.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
 ##
-##  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
+##  Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved.
 ##
 ##  This program is free software; you can redistribute it and/or modify
 ##  it under the terms of the GNU General Public License as published by
@@ -425,7 +425,6 @@ def log_write(self, f, tag):
             gen_log_reg_write(ctx, {self.reg_num}, {self.reg_tcg()});
         """))
     def analyze_write(self, f, tag, regno):
-        self.decl_reg_num(f, regno)
         predicated = "true" if is_predicated(tag) else "false"
         f.write(code_fmt(f"""\
             ctx_log_reg_write(ctx, {self.reg_num}, {predicated});
@@ -438,7 +437,6 @@ def decl_tcg(self, f, tag, regno):
             TCGv {self.reg_tcg()} = hex_gpr[{self.reg_num}];
         """))
     def analyze_read(self, f, regno):
-        self.decl_reg_num(f, regno)
         f.write(code_fmt(f"""\
             ctx_log_reg_read(ctx, {self.reg_num});
         """))
@@ -449,9 +447,8 @@ def decl_tcg(self, f, tag, regno):
             TCGv {self.reg_tcg()} = get_result_gpr(ctx, insn->regno[{regno}]);
         """))
     def analyze_read(self, f, regno):
-        self.decl_reg_num(f, regno)
         f.write(code_fmt(f"""\
-            ctx_log_reg_read(ctx, {self.reg_num});
+            ctx_log_reg_read_new(ctx, {self.reg_num});
         """))
 
 class GprReadWrite(Register, Single, ReadWrite):
@@ -471,8 +468,11 @@ def log_write(self, f, tag):
         f.write(code_fmt(f"""\
             gen_log_reg_write(ctx, {self.reg_num}, {self.reg_tcg()});
         """))
+    def analyze_read(self, f, regno):
+        f.write(code_fmt(f"""\
+            ctx_log_reg_read(ctx, {self.reg_num});
+        """))
     def analyze_write(self, f, tag, regno):
-        self.decl_reg_num(f, regno)
         predicated = "true" if is_predicated(tag) else "false"
         f.write(code_fmt(f"""\
             ctx_log_reg_write(ctx, {self.reg_num}, {predicated});
@@ -493,7 +493,6 @@ def log_write(self, f, tag):
             gen_write_ctrl_reg(ctx, {self.reg_num}, {self.reg_tcg()});
         """))
     def analyze_write(self, f, tag, regno):
-        self.decl_reg_num(f, regno)
         predicated = "true" if is_predicated(tag) else "false"
         f.write(code_fmt(f"""\
             ctx_log_reg_write(ctx, {self.reg_num}, {predicated});
@@ -511,7 +510,6 @@ def decl_tcg(self, f, tag, regno):
             gen_read_ctrl_reg(ctx, {self.reg_num}, {self.reg_tcg()});
         """))
     def analyze_read(self, f, regno):
-        self.decl_reg_num(f, regno)
         f.write(code_fmt(f"""\
             ctx_log_reg_read(ctx, {self.reg_num});
         """))
@@ -532,7 +530,6 @@ def idef_arg(self, declared):
         declared.append(self.reg_tcg())
         declared.append("CS")
     def analyze_read(self, f, regno):
-        self.decl_reg_num(f, regno)
         f.write(code_fmt(f"""\
             ctx_log_reg_read(ctx, {self.reg_num});
         """))
@@ -548,7 +545,6 @@ def log_write(self, f, tag):
             gen_log_pred_write(ctx, {self.reg_num}, {self.reg_tcg()});
         """))
     def analyze_write(self, f, tag, regno):
-        self.decl_reg_num(f, regno)
         f.write(code_fmt(f"""\
             ctx_log_pred_write(ctx, {self.reg_num});
         """))
@@ -560,7 +556,6 @@ def decl_tcg(self, f, tag, regno):
             TCGv {self.reg_tcg()} = hex_pred[{self.reg_num}];
         """))
     def analyze_read(self, f, regno):
-        self.decl_reg_num(f, regno)
         f.write(code_fmt(f"""\
             ctx_log_pred_read(ctx, {self.reg_num});
         """))
@@ -571,9 +566,8 @@ def decl_tcg(self, f, tag, regno):
             TCGv {self.reg_tcg()} = get_result_pred(ctx, insn->regno[{regno}]);
         """))
     def analyze_read(self, f, regno):
-        self.decl_reg_num(f, regno)
         f.write(code_fmt(f"""\
-            ctx_log_pred_read(ctx, {self.reg_num});
+            ctx_log_pred_read_new(ctx, {self.reg_num});
         """))
 
 class PredReadWrite(Register, Single, ReadWrite):
@@ -587,8 +581,11 @@ def log_write(self, f, tag):
         f.write(code_fmt(f"""\
             gen_log_pred_write(ctx, {self.reg_num}, {self.reg_tcg()});
         """))
+    def analyze_read(self, f, regno):
+        f.write(code_fmt(f"""\
+            ctx_log_pred_read(ctx, {self.reg_num});
+        """))
     def analyze_write(self, f, tag, regno):
-        self.decl_reg_num(f, regno)
         f.write(code_fmt(f"""\
             ctx_log_pred_write(ctx, {self.reg_num});
         """))
@@ -605,7 +602,6 @@ def log_write(self, f, tag):
             gen_log_reg_write_pair(ctx, {self.reg_num}, {self.reg_tcg()});
         """))
     def analyze_write(self, f, tag, regno):
-        self.decl_reg_num(f, regno)
         predicated = "true" if is_predicated(tag) else "false"
         f.write(code_fmt(f"""\
             ctx_log_reg_write_pair(ctx, {self.reg_num}, {predicated});
@@ -621,7 +617,6 @@ def decl_tcg(self, f, tag, regno):
                                     hex_gpr[{self.reg_num} + 1]);
         """))
     def analyze_read(self, f, regno):
-        self.decl_reg_num(f, regno)
         f.write(code_fmt(f"""\
             ctx_log_reg_read_pair(ctx, {self.reg_num});
         """))
@@ -640,8 +635,11 @@ def log_write(self, f, tag):
         f.write(code_fmt(f"""\
             gen_log_reg_write_pair(ctx, {self.reg_num}, {self.reg_tcg()});
         """))
+    def analyze_read(self, f, regno):
+        f.write(code_fmt(f"""\
+            ctx_log_reg_read_pair(ctx, {self.reg_num});
+        """))
     def analyze_write(self, f, tag, regno):
-        self.decl_reg_num(f, regno)
         predicated = "true" if is_predicated(tag) else "false"
         f.write(code_fmt(f"""\
             ctx_log_reg_write_pair(ctx, {self.reg_num}, {predicated});
@@ -663,7 +661,6 @@ def log_write(self, f, tag):
             gen_write_ctrl_reg_pair(ctx, {self.reg_num}, {self.reg_tcg()});
         """))
     def analyze_write(self, f, tag, regno):
-        self.decl_reg_num(f, regno)
         predicated = "true" if is_predicated(tag) else "false"
         f.write(code_fmt(f"""\
             ctx_log_reg_write_pair(ctx, {self.reg_num}, {predicated});
@@ -681,7 +678,6 @@ def decl_tcg(self, f, tag, regno):
             gen_read_ctrl_reg_pair(ctx, {self.reg_num}, {self.reg_tcg()});
         """))
     def analyze_read(self, f, regno):
-        self.decl_reg_num(f, regno)
         f.write(code_fmt(f"""\
             ctx_log_reg_read_pair(ctx, {self.reg_num});
         """))
@@ -705,7 +701,6 @@ def helper_hvx_desc(self, f):
             /* {self.reg_tcg()} is *(MMVector *)({self.helper_arg_name()}) */
         """))
     def analyze_write(self, f, tag, regno):
-        self.decl_reg_num(f, regno)
         newv = hvx_newv(tag)
         predicated = "true" if is_predicated(tag) else "false"
         f.write(code_fmt(f"""\
@@ -728,7 +723,6 @@ def helper_hvx_desc(self, f):
             /* {self.reg_tcg()} is *(MMVector *)({self.helper_arg_name()}) */
         """))
     def analyze_read(self, f, regno):
-        self.decl_reg_num(f, regno)
         f.write(code_fmt(f"""\
             ctx_log_vreg_read(ctx, {self.reg_num});
         """))
@@ -746,9 +740,8 @@ def helper_hvx_desc(self, f):
             /* {self.reg_tcg()} is *(MMVector *)({self.helper_arg_name()}) */
         """))
     def analyze_read(self, f, regno):
-        self.decl_reg_num(f, regno)
         f.write(code_fmt(f"""\
-            ctx_log_vreg_read(ctx, {self.reg_num});
+            ctx_log_vreg_read_new(ctx, {self.reg_num});
         """))
 
 class VRegReadWrite(Register, Hvx, ReadWrite):
@@ -772,8 +765,11 @@ def helper_hvx_desc(self, f):
         f.write(code_fmt(f"""\
             /* {self.reg_tcg()} is *(MMVector *)({self.helper_arg_name()}) */
         """))
+    def analyze_read(self, f, regno):
+        f.write(code_fmt(f"""\
+            ctx_log_vreg_read(ctx, {self.reg_num});
+        """))
     def analyze_write(self, f, tag, regno):
-        self.decl_reg_num(f, regno)
         newv = hvx_newv(tag)
         predicated = "true" if is_predicated(tag) else "false"
         f.write(code_fmt(f"""\
@@ -803,8 +799,11 @@ def helper_hvx_desc(self, f):
         f.write(code_fmt(f"""\
             /* {self.reg_tcg()} is *(MMVector *)({self.helper_arg_name()}) */
         """))
+    def analyze_read(self, f, regno):
+        f.write(code_fmt(f"""\
+            ctx_log_vreg_read(ctx, {self.reg_num});
+        """))
     def analyze_write(self, f, tag, regno):
-        self.decl_reg_num(f, regno)
         newv = hvx_newv(tag)
         predicated = "true" if is_predicated(tag) else "false"
         f.write(code_fmt(f"""\
@@ -830,7 +829,6 @@ def helper_hvx_desc(self, f):
             /* {self.reg_tcg()} is *(MMVectorPair *)({self.helper_arg_name()}) */
         """))
     def analyze_write(self, f, tag, regno):
-        self.decl_reg_num(f, regno)
         newv = hvx_newv(tag)
         predicated = "true" if is_predicated(tag) else "false"
         f.write(code_fmt(f"""\
@@ -860,7 +858,6 @@ def helper_hvx_desc(self, f):
             /* {self.reg_tcg()} is *(MMVectorPair *)({self.helper_arg_name()}) */
         """))
     def analyze_read(self, f, regno):
-        self.decl_reg_num(f, regno)
         f.write(code_fmt(f"""\
             ctx_log_vreg_read_pair(ctx, {self.reg_num});
         """))
@@ -892,8 +889,11 @@ def helper_hvx_desc(self, f):
         f.write(code_fmt(f"""\
             /* {self.reg_tcg()} is *(MMVectorPair *)({self.helper_arg_name()}) */
         """))
+    def analyze_read(self, f, regno):
+        f.write(code_fmt(f"""\
+            ctx_log_vreg_read_pair(ctx, {self.reg_num});
+        """))
     def analyze_write(self, f, tag, regno):
-        self.decl_reg_num(f, regno)
         newv = hvx_newv(tag)
         predicated = "true" if is_predicated(tag) else "false"
         f.write(code_fmt(f"""\
@@ -919,7 +919,6 @@ def helper_hvx_desc(self, f):
             /* {self.reg_tcg()} is *(MMQReg *)({self.helper_arg_name()}) */
         """))
     def analyze_write(self, f, tag, regno):
-        self.decl_reg_num(f, regno)
         f.write(code_fmt(f"""\
             ctx_log_qreg_write(ctx, {self.reg_num});
         """))
@@ -941,7 +940,6 @@ def helper_hvx_desc(self, f):
             /* {self.reg_tcg()} is *(MMQReg *)({self.helper_arg_name()}) */
         """))
     def analyze_read(self, f, regno):
-        self.decl_reg_num(f, regno)
         f.write(code_fmt(f"""\
             ctx_log_qreg_read(ctx, {self.reg_num});
         """))
@@ -967,8 +965,11 @@ def helper_hvx_desc(self, f):
         f.write(code_fmt(f"""\
             /* {self.reg_tcg()} is *(MMQReg *)({self.helper_arg_name()}) */
         """))
+    def analyze_read(self, f, regno):
+        f.write(code_fmt(f"""\
+            ctx_log_qreg_read(ctx, {self.reg_num});
+        """))
     def analyze_write(self, f, tag, regno):
-        self.decl_reg_num(f, regno)
         f.write(code_fmt(f"""\
             ctx_log_qreg_write(ctx, {self.reg_num});
         """))
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v2 2/3] Hexagon (target/hexagon) Enable more short-circuit packets (scalar core)
  2024-02-01 10:33 [PATCH v2 0/3] Hexagon (target/hexagon) Enable more short-circuit packets Taylor Simpson
  2024-02-01 10:33 ` [PATCH v2 1/3] Hexagon (target/hexagon) Analyze reads before writes Taylor Simpson
@ 2024-02-01 10:33 ` Taylor Simpson
  2024-03-29  2:02   ` Brian Cain
  2024-02-01 10:33 ` [PATCH v2 3/3] Hexagon (target/hexagon) Enable more short-circuit packets (HVX) Taylor Simpson
  2 siblings, 1 reply; 7+ messages in thread
From: Taylor Simpson @ 2024-02-01 10:33 UTC (permalink / raw)
  To: qemu-devel
  Cc: bcain, quic_mathbern, sidneym, quic_mliebel, richard.henderson,
	philmd, ale, anjo, ltaylorsimpson

Look for read-after-write instead of overlap of reads and writes

Here is an example with overalp but no read-after-write:
0x000200fc:  0x38103876	{	R0 = add(R0,R1); R6 = add(R6,R7) }

BEFORE:
 ---- 00000000000200fc
 mov_i32 loc2,$0x0
 mov_i32 loc2,r0
 add_i32 loc3,loc2,r1
 mov_i32 loc2,loc3
 mov_i32 loc4,$0x0
 mov_i32 loc4,r6
 add_i32 loc5,loc4,r7
 mov_i32 loc4,loc5
 mov_i32 r0,loc2
 mov_i32 r6,loc4

AFTER:
 ---- 00000000000200fc
 add_i32 loc2,r0,r1
 mov_i32 r0,loc2
 add_i32 loc3,r6,r7
 mov_i32 r6,loc3

We can also short-circuit packets with .new values by reading from the
real destination instead of the temporary.
0x00020100:  0x78005ff3	{	R19 = #0xff
0x00020104:  0x2002e204		if (cmp.eq(N19.new,R2)) jump:t PC+8 }

BEFORE:
 ---- 0000000000020100
 mov_i32 pc,$0x20108
 mov_i32 loc8,$0x0
 mov_i32 loc8,$0xff
 setcond_i32 loc10,loc8,r2,eq
 mov_i32 loc6,loc10
 mov_i32 r19,loc8
 add_i32 pkt_cnt,pkt_cnt,$0x2
 add_i32 insn_cnt,insn_cnt,$0x4
 brcond_i32 loc6,$0x0,eq,$L1
 goto_tb $0x0
 mov_i32 pc,$0x20108
 exit_tb $0x7fbb54000040
 set_label $L1
 goto_tb $0x1
 exit_tb $0x7fbb54000041
 set_label $L0
 exit_tb $0x7fbb54000043

AFTER:
 ---- 0000000000020100
 mov_i32 pc,$0x20108
 mov_i32 r19,$0xff
 setcond_i32 loc7,r19,r2,eq
 mov_i32 loc4,loc7
 add_i32 pkt_cnt,pkt_cnt,$0x2
 add_i32 insn_cnt,insn_cnt,$0x4
 brcond_i32 loc4,$0x0,eq,$L1
 goto_tb $0x0
 mov_i32 pc,$0x20108
 exit_tb $0x7f9764000040
 set_label $L1
 goto_tb $0x1
 exit_tb $0x7f9764000041
 set_label $L0
 exit_tb $0x7f9764000043

Signed-off-by: Taylor Simpson <ltaylorsimpson@gmail.com>
---
 target/hexagon/translate.h | 13 +++++++------
 target/hexagon/translate.c | 21 ++++-----------------
 2 files changed, 11 insertions(+), 23 deletions(-)

diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
index f06d71fc53..d5e7f49ad8 100644
--- a/target/hexagon/translate.h
+++ b/target/hexagon/translate.h
@@ -38,12 +38,10 @@ typedef struct DisasContext {
     int reg_log[REG_WRITES_MAX];
     int reg_log_idx;
     DECLARE_BITMAP(regs_written, TOTAL_PER_THREAD_REGS);
-    DECLARE_BITMAP(regs_read, TOTAL_PER_THREAD_REGS);
     DECLARE_BITMAP(predicated_regs, TOTAL_PER_THREAD_REGS);
     int preg_log[PRED_WRITES_MAX];
     int preg_log_idx;
     DECLARE_BITMAP(pregs_written, NUM_PREGS);
-    DECLARE_BITMAP(pregs_read, NUM_PREGS);
     uint8_t store_width[STORES_MAX];
     bool s1_store_processed;
     int future_vregs_idx;
@@ -68,6 +66,7 @@ typedef struct DisasContext {
     bool is_tight_loop;
     bool short_circuit;
     bool has_hvx_helper;
+    bool read_after_write;
     TCGv new_value[TOTAL_PER_THREAD_REGS];
     TCGv new_pred_value[NUM_PREGS];
     TCGv pred_written;
@@ -88,13 +87,14 @@ static inline void ctx_log_pred_write(DisasContext *ctx, int pnum)
 
 static inline void ctx_log_pred_read(DisasContext *ctx, int pnum)
 {
-    set_bit(pnum, ctx->pregs_read);
+    if (test_bit(pnum, ctx->pregs_written)) {
+        ctx->read_after_write = true;
+    }
 }
 
 static inline void ctx_log_pred_read_new(DisasContext *ctx, int pnum)
 {
     g_assert(test_bit(pnum, ctx->pregs_written));
-    set_bit(pnum, ctx->pregs_read);
 }
 
 static inline void ctx_log_reg_write(DisasContext *ctx, int rnum,
@@ -125,13 +125,14 @@ static inline void ctx_log_reg_write_pair(DisasContext *ctx, int rnum,
 
 static inline void ctx_log_reg_read(DisasContext *ctx, int rnum)
 {
-    set_bit(rnum, ctx->regs_read);
+    if (test_bit(rnum, ctx->regs_written)) {
+        ctx->read_after_write = true;
+    }
 }
 
 static inline void ctx_log_reg_read_new(DisasContext *ctx, int rnum)
 {
     g_assert(test_bit(rnum, ctx->regs_written));
-    set_bit(rnum, ctx->regs_read);
 }
 
 static inline void ctx_log_reg_read_pair(DisasContext *ctx, int rnum)
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index 95579ae243..751ca71790 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *  Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights Reserved.
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License as published by
@@ -394,20 +394,8 @@ static bool need_commit(DisasContext *ctx)
         }
     }
 
-    /* Check for overlap between register reads and writes */
-    for (int i = 0; i < ctx->reg_log_idx; i++) {
-        int rnum = ctx->reg_log[i];
-        if (test_bit(rnum, ctx->regs_read)) {
-            return true;
-        }
-    }
-
-    /* Check for overlap between predicate reads and writes */
-    for (int i = 0; i < ctx->preg_log_idx; i++) {
-        int pnum = ctx->preg_log[i];
-        if (test_bit(pnum, ctx->pregs_read)) {
-            return true;
-        }
+    if (ctx->read_after_write) {
+        return true;
     }
 
     /* Check for overlap between HVX reads and writes */
@@ -466,6 +454,7 @@ static void analyze_packet(DisasContext *ctx)
 {
     Packet *pkt = ctx->pkt;
     ctx->has_hvx_helper = false;
+    ctx->read_after_write = false;
     for (int i = 0; i < pkt->num_insns; i++) {
         Insn *insn = &pkt->insn[i];
         ctx->insn = insn;
@@ -490,11 +479,9 @@ static void gen_start_packet(DisasContext *ctx)
     ctx->next_PC = next_PC;
     ctx->reg_log_idx = 0;
     bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS);
-    bitmap_zero(ctx->regs_read, TOTAL_PER_THREAD_REGS);
     bitmap_zero(ctx->predicated_regs, TOTAL_PER_THREAD_REGS);
     ctx->preg_log_idx = 0;
     bitmap_zero(ctx->pregs_written, NUM_PREGS);
-    bitmap_zero(ctx->pregs_read, NUM_PREGS);
     ctx->future_vregs_idx = 0;
     ctx->tmp_vregs_idx = 0;
     ctx->vreg_log_idx = 0;
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH v2 3/3] Hexagon (target/hexagon) Enable more short-circuit packets (HVX)
  2024-02-01 10:33 [PATCH v2 0/3] Hexagon (target/hexagon) Enable more short-circuit packets Taylor Simpson
  2024-02-01 10:33 ` [PATCH v2 1/3] Hexagon (target/hexagon) Analyze reads before writes Taylor Simpson
  2024-02-01 10:33 ` [PATCH v2 2/3] Hexagon (target/hexagon) Enable more short-circuit packets (scalar core) Taylor Simpson
@ 2024-02-01 10:33 ` Taylor Simpson
  2024-03-29  2:02   ` Brian Cain
  2 siblings, 1 reply; 7+ messages in thread
From: Taylor Simpson @ 2024-02-01 10:33 UTC (permalink / raw)
  To: qemu-devel
  Cc: bcain, quic_mathbern, sidneym, quic_mliebel, richard.henderson,
	philmd, ale, anjo, ltaylorsimpson

Look for read-after-write instead of overlap of reads and writes

HVX instructions with helpers have pass-by-reference semantics, so
we check for overlaps of reads and writes within the same instruction.

Signed-off-by: Taylor Simpson <ltaylorsimpson@gmail.com>
---
 target/hexagon/translate.h          | 88 +++++++++++++++++++++++------
 target/hexagon/translate.c          | 58 ++-----------------
 target/hexagon/gen_analyze_funcs.py | 19 ++++---
 target/hexagon/hex_common.py        | 45 ++++++++++-----
 4 files changed, 115 insertions(+), 95 deletions(-)

diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
index d5e7f49ad8..00cc2bcd63 100644
--- a/target/hexagon/translate.h
+++ b/target/hexagon/translate.h
@@ -50,23 +50,27 @@ typedef struct DisasContext {
     int tmp_vregs_num[VECTOR_TEMPS_MAX];
     int vreg_log[NUM_VREGS];
     int vreg_log_idx;
+    DECLARE_BITMAP(vregs_written, NUM_VREGS);
+    DECLARE_BITMAP(insn_vregs_written, NUM_VREGS);
     DECLARE_BITMAP(vregs_updated_tmp, NUM_VREGS);
     DECLARE_BITMAP(vregs_updated, NUM_VREGS);
     DECLARE_BITMAP(vregs_select, NUM_VREGS);
     DECLARE_BITMAP(predicated_future_vregs, NUM_VREGS);
     DECLARE_BITMAP(predicated_tmp_vregs, NUM_VREGS);
-    DECLARE_BITMAP(vregs_read, NUM_VREGS);
+    DECLARE_BITMAP(insn_vregs_read, NUM_VREGS);
     int qreg_log[NUM_QREGS];
     int qreg_log_idx;
-    DECLARE_BITMAP(qregs_read, NUM_QREGS);
+    DECLARE_BITMAP(qregs_written, NUM_QREGS);
+    DECLARE_BITMAP(insn_qregs_written, NUM_QREGS);
+    DECLARE_BITMAP(insn_qregs_read, NUM_QREGS);
     bool pre_commit;
     bool need_commit;
     TCGCond branch_cond;
     target_ulong branch_dest;
     bool is_tight_loop;
     bool short_circuit;
-    bool has_hvx_helper;
     bool read_after_write;
+    bool has_hvx_overlap;
     TCGv new_value[TOTAL_PER_THREAD_REGS];
     TCGv new_pred_value[NUM_PREGS];
     TCGv pred_written;
@@ -146,10 +150,25 @@ intptr_t ctx_future_vreg_off(DisasContext *ctx, int regnum,
 intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum,
                           int num, bool alloc_ok);
 
+static inline void ctx_start_hvx_insn(DisasContext *ctx)
+{
+    bitmap_zero(ctx->insn_vregs_written, NUM_VREGS);
+    bitmap_zero(ctx->insn_vregs_read, NUM_VREGS);
+    bitmap_zero(ctx->insn_qregs_written, NUM_QREGS);
+    bitmap_zero(ctx->insn_qregs_read, NUM_QREGS);
+}
+
 static inline void ctx_log_vreg_write(DisasContext *ctx,
                                       int rnum, VRegWriteType type,
-                                      bool is_predicated)
+                                      bool is_predicated, bool has_helper)
 {
+    if (has_helper) {
+        set_bit(rnum, ctx->insn_vregs_written);
+        if (test_bit(rnum, ctx->insn_vregs_read)) {
+            ctx->has_hvx_overlap = true;
+        }
+    }
+    set_bit(rnum, ctx->vregs_written);
     if (type != EXT_TMP) {
         if (!test_bit(rnum, ctx->vregs_updated)) {
             ctx->vreg_log[ctx->vreg_log_idx] = rnum;
@@ -175,42 +194,77 @@ static inline void ctx_log_vreg_write(DisasContext *ctx,
 
 static inline void ctx_log_vreg_write_pair(DisasContext *ctx,
                                            int rnum, VRegWriteType type,
-                                           bool is_predicated)
+                                           bool is_predicated, bool has_helper)
 {
-    ctx_log_vreg_write(ctx, rnum ^ 0, type, is_predicated);
-    ctx_log_vreg_write(ctx, rnum ^ 1, type, is_predicated);
+    ctx_log_vreg_write(ctx, rnum ^ 0, type, is_predicated, has_helper);
+    ctx_log_vreg_write(ctx, rnum ^ 1, type, is_predicated, has_helper);
 }
 
-static inline void ctx_log_vreg_read(DisasContext *ctx, int rnum)
+static inline void ctx_log_vreg_read(DisasContext *ctx, int rnum,
+                                     bool has_helper)
 {
-    set_bit(rnum, ctx->vregs_read);
+    if (has_helper) {
+        set_bit(rnum, ctx->insn_vregs_read);
+        if (test_bit(rnum, ctx->insn_vregs_written)) {
+            ctx->has_hvx_overlap = true;
+        }
+    }
+    if (test_bit(rnum, ctx->vregs_written)) {
+        ctx->read_after_write = true;
+    }
 }
 
-static inline void ctx_log_vreg_read_new(DisasContext *ctx, int rnum)
+static inline void ctx_log_vreg_read_new(DisasContext *ctx, int rnum,
+                                         bool has_helper)
 {
     g_assert(is_gather_store_insn(ctx) ||
              test_bit(rnum, ctx->vregs_updated) ||
              test_bit(rnum, ctx->vregs_select) ||
              test_bit(rnum, ctx->vregs_updated_tmp));
-    set_bit(rnum, ctx->vregs_read);
+    if (has_helper) {
+        set_bit(rnum, ctx->insn_vregs_read);
+        if (test_bit(rnum, ctx->insn_vregs_written)) {
+            ctx->has_hvx_overlap = true;
+        }
+    }
+    if (is_gather_store_insn(ctx)) {
+        ctx->read_after_write = true;
+    }
 }
 
-static inline void ctx_log_vreg_read_pair(DisasContext *ctx, int rnum)
+static inline void ctx_log_vreg_read_pair(DisasContext *ctx, int rnum,
+                                          bool has_helper)
 {
-    ctx_log_vreg_read(ctx, rnum ^ 0);
-    ctx_log_vreg_read(ctx, rnum ^ 1);
+    ctx_log_vreg_read(ctx, rnum ^ 0, has_helper);
+    ctx_log_vreg_read(ctx, rnum ^ 1, has_helper);
 }
 
 static inline void ctx_log_qreg_write(DisasContext *ctx,
-                                      int rnum)
+                                      int rnum, bool has_helper)
 {
+    if (has_helper) {
+        set_bit(rnum, ctx->insn_qregs_written);
+        if (test_bit(rnum, ctx->insn_qregs_read)) {
+            ctx->has_hvx_overlap = true;
+        }
+    }
+    set_bit(rnum, ctx->qregs_written);
     ctx->qreg_log[ctx->qreg_log_idx] = rnum;
     ctx->qreg_log_idx++;
 }
 
-static inline void ctx_log_qreg_read(DisasContext *ctx, int qnum)
+static inline void ctx_log_qreg_read(DisasContext *ctx,
+                                     int qnum, bool has_helper)
 {
-    set_bit(qnum, ctx->qregs_read);
+    if (has_helper) {
+        set_bit(qnum, ctx->insn_qregs_read);
+        if (test_bit(qnum, ctx->insn_qregs_written)) {
+            ctx->has_hvx_overlap = true;
+        }
+    }
+    if (test_bit(qnum, ctx->qregs_written)) {
+        ctx->read_after_write = true;
+    }
 }
 
 extern TCGv hex_gpr[TOTAL_PER_THREAD_REGS];
diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
index 751ca71790..ed4b4acd1d 100644
--- a/target/hexagon/translate.c
+++ b/target/hexagon/translate.c
@@ -378,60 +378,10 @@ static bool need_commit(DisasContext *ctx)
         return true;
     }
 
-    if (pkt->num_insns == 1) {
-        if (pkt->pkt_has_hvx) {
-            /*
-             * The HVX instructions with generated helpers use
-             * pass-by-reference, so they need the read/write overlap
-             * check below.
-             * The HVX instructions with overrides are OK.
-             */
-            if (!ctx->has_hvx_helper) {
-                return false;
-            }
-        } else {
-            return false;
-        }
-    }
-
-    if (ctx->read_after_write) {
+    if (ctx->read_after_write || ctx->has_hvx_overlap) {
         return true;
     }
 
-    /* Check for overlap between HVX reads and writes */
-    for (int i = 0; i < ctx->vreg_log_idx; i++) {
-        int vnum = ctx->vreg_log[i];
-        if (test_bit(vnum, ctx->vregs_read)) {
-            return true;
-        }
-    }
-    if (!bitmap_empty(ctx->vregs_updated_tmp, NUM_VREGS)) {
-        int i = find_first_bit(ctx->vregs_updated_tmp, NUM_VREGS);
-        while (i < NUM_VREGS) {
-            if (test_bit(i, ctx->vregs_read)) {
-                return true;
-            }
-            i = find_next_bit(ctx->vregs_updated_tmp, NUM_VREGS, i + 1);
-        }
-    }
-    if (!bitmap_empty(ctx->vregs_select, NUM_VREGS)) {
-        int i = find_first_bit(ctx->vregs_select, NUM_VREGS);
-        while (i < NUM_VREGS) {
-            if (test_bit(i, ctx->vregs_read)) {
-                return true;
-            }
-            i = find_next_bit(ctx->vregs_select, NUM_VREGS, i + 1);
-        }
-    }
-
-    /* Check for overlap between HVX predicate reads and writes */
-    for (int i = 0; i < ctx->qreg_log_idx; i++) {
-        int qnum = ctx->qreg_log[i];
-        if (test_bit(qnum, ctx->qregs_read)) {
-            return true;
-        }
-    }
-
     return false;
 }
 
@@ -453,8 +403,8 @@ static void mark_implicit_pred_reads(DisasContext *ctx)
 static void analyze_packet(DisasContext *ctx)
 {
     Packet *pkt = ctx->pkt;
-    ctx->has_hvx_helper = false;
     ctx->read_after_write = false;
+    ctx->has_hvx_overlap = false;
     for (int i = 0; i < pkt->num_insns; i++) {
         Insn *insn = &pkt->insn[i];
         ctx->insn = insn;
@@ -485,13 +435,13 @@ static void gen_start_packet(DisasContext *ctx)
     ctx->future_vregs_idx = 0;
     ctx->tmp_vregs_idx = 0;
     ctx->vreg_log_idx = 0;
+    bitmap_zero(ctx->vregs_written, NUM_VREGS);
     bitmap_zero(ctx->vregs_updated_tmp, NUM_VREGS);
     bitmap_zero(ctx->vregs_updated, NUM_VREGS);
     bitmap_zero(ctx->vregs_select, NUM_VREGS);
     bitmap_zero(ctx->predicated_future_vregs, NUM_VREGS);
     bitmap_zero(ctx->predicated_tmp_vregs, NUM_VREGS);
-    bitmap_zero(ctx->vregs_read, NUM_VREGS);
-    bitmap_zero(ctx->qregs_read, NUM_QREGS);
+    bitmap_zero(ctx->qregs_written, NUM_QREGS);
     ctx->qreg_log_idx = 0;
     for (i = 0; i < STORES_MAX; i++) {
         ctx->store_width[i] = 0;
diff --git a/target/hexagon/gen_analyze_funcs.py b/target/hexagon/gen_analyze_funcs.py
index 890e6a3a95..81e1d9cfa3 100755
--- a/target/hexagon/gen_analyze_funcs.py
+++ b/target/hexagon/gen_analyze_funcs.py
@@ -43,6 +43,16 @@ def gen_analyze_func(f, tag, regs, imms):
     f.write("{\n")
 
     f.write("    Insn *insn G_GNUC_UNUSED = ctx->insn;\n")
+    if (hex_common.is_hvx_insn(tag)):
+        if hex_common.has_hvx_helper(tag):
+            f.write(
+                "    const bool G_GNUC_UNUSED insn_has_hvx_helper = true;\n"
+            )
+            f.write("    ctx_start_hvx_insn(ctx);\n")
+        else:
+            f.write(
+                "    const bool G_GNUC_UNUSED insn_has_hvx_helper = false;\n"
+            )
 
     ## Declare all the registers
     for regno, register in enumerate(regs):
@@ -64,15 +74,6 @@ def gen_analyze_func(f, tag, regs, imms):
         if reg.is_written():
             reg.analyze_write(f, tag, regno)
 
-    has_generated_helper = not hex_common.skip_qemu_helper(
-        tag
-    ) and not hex_common.is_idef_parser_enabled(tag)
-
-    ## Mark HVX instructions with generated helpers
-    if (has_generated_helper and
-        "A_CVI" in hex_common.attribdict[tag]):
-        f.write("    ctx->has_hvx_helper = true;\n")
-
     f.write("}\n\n")
 
 
diff --git a/target/hexagon/hex_common.py b/target/hexagon/hex_common.py
index 33801e4bd7..9e7f613e3c 100755
--- a/target/hexagon/hex_common.py
+++ b/target/hexagon/hex_common.py
@@ -241,6 +241,16 @@ def is_idef_parser_enabled(tag):
     return tag in idef_parser_enabled
 
 
+def is_hvx_insn(tag):
+    return "A_CVI" in attribdict[tag]
+
+
+def has_hvx_helper(tag):
+    return (is_hvx_insn(tag) and
+            not skip_qemu_helper(tag) and
+            not is_idef_parser_enabled(tag))
+
+
 def imm_name(immlett):
     return f"{immlett}iV"
 
@@ -704,7 +714,8 @@ def analyze_write(self, f, tag, regno):
         newv = hvx_newv(tag)
         predicated = "true" if is_predicated(tag) else "false"
         f.write(code_fmt(f"""\
-            ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated});
+            ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated},
+                               insn_has_hvx_helper);
         """))
 
 class VRegSource(Register, Hvx, OldSource):
@@ -724,7 +735,7 @@ def helper_hvx_desc(self, f):
         """))
     def analyze_read(self, f, regno):
         f.write(code_fmt(f"""\
-            ctx_log_vreg_read(ctx, {self.reg_num});
+            ctx_log_vreg_read(ctx, {self.reg_num}, insn_has_hvx_helper);
         """))
 
 class VRegNewSource(Register, Hvx, NewSource):
@@ -741,7 +752,7 @@ def helper_hvx_desc(self, f):
         """))
     def analyze_read(self, f, regno):
         f.write(code_fmt(f"""\
-            ctx_log_vreg_read_new(ctx, {self.reg_num});
+            ctx_log_vreg_read_new(ctx, {self.reg_num}, insn_has_hvx_helper);
         """))
 
 class VRegReadWrite(Register, Hvx, ReadWrite):
@@ -767,13 +778,14 @@ def helper_hvx_desc(self, f):
         """))
     def analyze_read(self, f, regno):
         f.write(code_fmt(f"""\
-            ctx_log_vreg_read(ctx, {self.reg_num});
+            ctx_log_vreg_read(ctx, {self.reg_num}, insn_has_hvx_helper);
         """))
     def analyze_write(self, f, tag, regno):
         newv = hvx_newv(tag)
         predicated = "true" if is_predicated(tag) else "false"
         f.write(code_fmt(f"""\
-            ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated});
+            ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated},
+                               insn_has_hvx_helper);
         """))
 
 class VRegTmp(Register, Hvx, ReadWrite):
@@ -801,13 +813,14 @@ def helper_hvx_desc(self, f):
         """))
     def analyze_read(self, f, regno):
         f.write(code_fmt(f"""\
-            ctx_log_vreg_read(ctx, {self.reg_num});
+            ctx_log_vreg_read(ctx, {self.reg_num}, insn_has_hvx_helper);
         """))
     def analyze_write(self, f, tag, regno):
         newv = hvx_newv(tag)
         predicated = "true" if is_predicated(tag) else "false"
         f.write(code_fmt(f"""\
-            ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated});
+            ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated},
+                               insn_has_hvx_helper);
         """))
 
 class VRegPairDest(Register, Hvx, Dest):
@@ -832,7 +845,8 @@ def analyze_write(self, f, tag, regno):
         newv = hvx_newv(tag)
         predicated = "true" if is_predicated(tag) else "false"
         f.write(code_fmt(f"""\
-            ctx_log_vreg_write_pair(ctx, {self.reg_num}, {newv}, {predicated});
+            ctx_log_vreg_write_pair(ctx, {self.reg_num}, {newv}, {predicated},
+                                    insn_has_hvx_helper);
         """))
 
 class VRegPairSource(Register, Hvx, OldSource):
@@ -859,7 +873,7 @@ def helper_hvx_desc(self, f):
         """))
     def analyze_read(self, f, regno):
         f.write(code_fmt(f"""\
-            ctx_log_vreg_read_pair(ctx, {self.reg_num});
+            ctx_log_vreg_read_pair(ctx, {self.reg_num}, insn_has_hvx_helper);
         """))
 
 class VRegPairReadWrite(Register, Hvx, ReadWrite):
@@ -891,13 +905,14 @@ def helper_hvx_desc(self, f):
         """))
     def analyze_read(self, f, regno):
         f.write(code_fmt(f"""\
-            ctx_log_vreg_read_pair(ctx, {self.reg_num});
+            ctx_log_vreg_read_pair(ctx, {self.reg_num}, insn_has_hvx_helper);
         """))
     def analyze_write(self, f, tag, regno):
         newv = hvx_newv(tag)
         predicated = "true" if is_predicated(tag) else "false"
         f.write(code_fmt(f"""\
-            ctx_log_vreg_write_pair(ctx, {self.reg_num}, {newv}, {predicated});
+            ctx_log_vreg_write_pair(ctx, {self.reg_num}, {newv}, {predicated},
+                                    insn_has_hvx_helper);
         """))
 
 class QRegDest(Register, Hvx, Dest):
@@ -920,7 +935,7 @@ def helper_hvx_desc(self, f):
         """))
     def analyze_write(self, f, tag, regno):
         f.write(code_fmt(f"""\
-            ctx_log_qreg_write(ctx, {self.reg_num});
+            ctx_log_qreg_write(ctx, {self.reg_num}, insn_has_hvx_helper);
         """))
 
 class QRegSource(Register, Hvx, OldSource):
@@ -941,7 +956,7 @@ def helper_hvx_desc(self, f):
         """))
     def analyze_read(self, f, regno):
         f.write(code_fmt(f"""\
-            ctx_log_qreg_read(ctx, {self.reg_num});
+            ctx_log_qreg_read(ctx, {self.reg_num}, insn_has_hvx_helper);
         """))
 
 class QRegReadWrite(Register, Hvx, ReadWrite):
@@ -967,11 +982,11 @@ def helper_hvx_desc(self, f):
         """))
     def analyze_read(self, f, regno):
         f.write(code_fmt(f"""\
-            ctx_log_qreg_read(ctx, {self.reg_num});
+            ctx_log_qreg_read(ctx, {self.reg_num}, insn_has_hvx_helper);
         """))
     def analyze_write(self, f, tag, regno):
         f.write(code_fmt(f"""\
-            ctx_log_qreg_write(ctx, {self.reg_num});
+            ctx_log_qreg_write(ctx, {self.reg_num}, insn_has_hvx_helper);
         """))
 
 def init_registers():
-- 
2.34.1



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* RE: [PATCH v2 3/3] Hexagon (target/hexagon) Enable more short-circuit packets (HVX)
  2024-02-01 10:33 ` [PATCH v2 3/3] Hexagon (target/hexagon) Enable more short-circuit packets (HVX) Taylor Simpson
@ 2024-03-29  2:02   ` Brian Cain
  0 siblings, 0 replies; 7+ messages in thread
From: Brian Cain @ 2024-03-29  2:02 UTC (permalink / raw)
  To: Taylor Simpson, qemu-devel
  Cc: Matheus Bernardino (QUIC), Sid Manning, Marco Liebel (QUIC),
	richard.henderson, philmd, ale, anjo



> -----Original Message-----
> From: Taylor Simpson <ltaylorsimpson@gmail.com>
> Sent: Thursday, February 1, 2024 4:34 AM
> To: qemu-devel@nongnu.org
> Cc: Brian Cain <bcain@quicinc.com>; Matheus Bernardino (QUIC)
> <quic_mathbern@quicinc.com>; Sid Manning <sidneym@quicinc.com>;
> Marco Liebel (QUIC) <quic_mliebel@quicinc.com>;
> richard.henderson@linaro.org; philmd@linaro.org; ale@rev.ng; anjo@rev.ng;
> ltaylorsimpson@gmail.com
> Subject: [PATCH v2 3/3] Hexagon (target/hexagon) Enable more short-circuit
> packets (HVX)
> 
> WARNING: This email originated from outside of Qualcomm. Please be wary
> of any links or attachments, and do not enable macros.
> 
> Look for read-after-write instead of overlap of reads and writes
> 
> HVX instructions with helpers have pass-by-reference semantics, so
> we check for overlaps of reads and writes within the same instruction.
> 
> Signed-off-by: Taylor Simpson <ltaylorsimpson@gmail.com>
> ---
>  target/hexagon/translate.h          | 88 +++++++++++++++++++++++------
>  target/hexagon/translate.c          | 58 ++-----------------
>  target/hexagon/gen_analyze_funcs.py | 19 ++++---
>  target/hexagon/hex_common.py        | 45 ++++++++++-----
>  4 files changed, 115 insertions(+), 95 deletions(-)
> 
> diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
> index d5e7f49ad8..00cc2bcd63 100644
> --- a/target/hexagon/translate.h
> +++ b/target/hexagon/translate.h
> @@ -50,23 +50,27 @@ typedef struct DisasContext {
>      int tmp_vregs_num[VECTOR_TEMPS_MAX];
>      int vreg_log[NUM_VREGS];
>      int vreg_log_idx;
> +    DECLARE_BITMAP(vregs_written, NUM_VREGS);
> +    DECLARE_BITMAP(insn_vregs_written, NUM_VREGS);
>      DECLARE_BITMAP(vregs_updated_tmp, NUM_VREGS);
>      DECLARE_BITMAP(vregs_updated, NUM_VREGS);
>      DECLARE_BITMAP(vregs_select, NUM_VREGS);
>      DECLARE_BITMAP(predicated_future_vregs, NUM_VREGS);
>      DECLARE_BITMAP(predicated_tmp_vregs, NUM_VREGS);
> -    DECLARE_BITMAP(vregs_read, NUM_VREGS);
> +    DECLARE_BITMAP(insn_vregs_read, NUM_VREGS);
>      int qreg_log[NUM_QREGS];
>      int qreg_log_idx;
> -    DECLARE_BITMAP(qregs_read, NUM_QREGS);
> +    DECLARE_BITMAP(qregs_written, NUM_QREGS);
> +    DECLARE_BITMAP(insn_qregs_written, NUM_QREGS);
> +    DECLARE_BITMAP(insn_qregs_read, NUM_QREGS);
>      bool pre_commit;
>      bool need_commit;
>      TCGCond branch_cond;
>      target_ulong branch_dest;
>      bool is_tight_loop;
>      bool short_circuit;
> -    bool has_hvx_helper;
>      bool read_after_write;
> +    bool has_hvx_overlap;
>      TCGv new_value[TOTAL_PER_THREAD_REGS];
>      TCGv new_pred_value[NUM_PREGS];
>      TCGv pred_written;
> @@ -146,10 +150,25 @@ intptr_t ctx_future_vreg_off(DisasContext *ctx, int
> regnum,
>  intptr_t ctx_tmp_vreg_off(DisasContext *ctx, int regnum,
>                            int num, bool alloc_ok);
> 
> +static inline void ctx_start_hvx_insn(DisasContext *ctx)
> +{
> +    bitmap_zero(ctx->insn_vregs_written, NUM_VREGS);
> +    bitmap_zero(ctx->insn_vregs_read, NUM_VREGS);
> +    bitmap_zero(ctx->insn_qregs_written, NUM_QREGS);
> +    bitmap_zero(ctx->insn_qregs_read, NUM_QREGS);
> +}
> +
>  static inline void ctx_log_vreg_write(DisasContext *ctx,
>                                        int rnum, VRegWriteType type,
> -                                      bool is_predicated)
> +                                      bool is_predicated, bool has_helper)
>  {
> +    if (has_helper) {
> +        set_bit(rnum, ctx->insn_vregs_written);
> +        if (test_bit(rnum, ctx->insn_vregs_read)) {
> +            ctx->has_hvx_overlap = true;
> +        }
> +    }
> +    set_bit(rnum, ctx->vregs_written);
>      if (type != EXT_TMP) {
>          if (!test_bit(rnum, ctx->vregs_updated)) {
>              ctx->vreg_log[ctx->vreg_log_idx] = rnum;
> @@ -175,42 +194,77 @@ static inline void ctx_log_vreg_write(DisasContext
> *ctx,
> 
>  static inline void ctx_log_vreg_write_pair(DisasContext *ctx,
>                                             int rnum, VRegWriteType type,
> -                                           bool is_predicated)
> +                                           bool is_predicated, bool has_helper)
>  {
> -    ctx_log_vreg_write(ctx, rnum ^ 0, type, is_predicated);
> -    ctx_log_vreg_write(ctx, rnum ^ 1, type, is_predicated);
> +    ctx_log_vreg_write(ctx, rnum ^ 0, type, is_predicated, has_helper);
> +    ctx_log_vreg_write(ctx, rnum ^ 1, type, is_predicated, has_helper);
>  }
> 
> -static inline void ctx_log_vreg_read(DisasContext *ctx, int rnum)
> +static inline void ctx_log_vreg_read(DisasContext *ctx, int rnum,
> +                                     bool has_helper)
>  {
> -    set_bit(rnum, ctx->vregs_read);
> +    if (has_helper) {
> +        set_bit(rnum, ctx->insn_vregs_read);
> +        if (test_bit(rnum, ctx->insn_vregs_written)) {
> +            ctx->has_hvx_overlap = true;
> +        }
> +    }
> +    if (test_bit(rnum, ctx->vregs_written)) {
> +        ctx->read_after_write = true;
> +    }
>  }
> 
> -static inline void ctx_log_vreg_read_new(DisasContext *ctx, int rnum)
> +static inline void ctx_log_vreg_read_new(DisasContext *ctx, int rnum,
> +                                         bool has_helper)
>  {
>      g_assert(is_gather_store_insn(ctx) ||
>               test_bit(rnum, ctx->vregs_updated) ||
>               test_bit(rnum, ctx->vregs_select) ||
>               test_bit(rnum, ctx->vregs_updated_tmp));
> -    set_bit(rnum, ctx->vregs_read);
> +    if (has_helper) {
> +        set_bit(rnum, ctx->insn_vregs_read);
> +        if (test_bit(rnum, ctx->insn_vregs_written)) {
> +            ctx->has_hvx_overlap = true;
> +        }
> +    }
> +    if (is_gather_store_insn(ctx)) {
> +        ctx->read_after_write = true;
> +    }
>  }
> 
> -static inline void ctx_log_vreg_read_pair(DisasContext *ctx, int rnum)
> +static inline void ctx_log_vreg_read_pair(DisasContext *ctx, int rnum,
> +                                          bool has_helper)
>  {
> -    ctx_log_vreg_read(ctx, rnum ^ 0);
> -    ctx_log_vreg_read(ctx, rnum ^ 1);
> +    ctx_log_vreg_read(ctx, rnum ^ 0, has_helper);
> +    ctx_log_vreg_read(ctx, rnum ^ 1, has_helper);
>  }
> 
>  static inline void ctx_log_qreg_write(DisasContext *ctx,
> -                                      int rnum)
> +                                      int rnum, bool has_helper)
>  {
> +    if (has_helper) {
> +        set_bit(rnum, ctx->insn_qregs_written);
> +        if (test_bit(rnum, ctx->insn_qregs_read)) {
> +            ctx->has_hvx_overlap = true;
> +        }
> +    }
> +    set_bit(rnum, ctx->qregs_written);
>      ctx->qreg_log[ctx->qreg_log_idx] = rnum;
>      ctx->qreg_log_idx++;
>  }
> 
> -static inline void ctx_log_qreg_read(DisasContext *ctx, int qnum)
> +static inline void ctx_log_qreg_read(DisasContext *ctx,
> +                                     int qnum, bool has_helper)
>  {
> -    set_bit(qnum, ctx->qregs_read);
> +    if (has_helper) {
> +        set_bit(qnum, ctx->insn_qregs_read);
> +        if (test_bit(qnum, ctx->insn_qregs_written)) {
> +            ctx->has_hvx_overlap = true;
> +        }
> +    }
> +    if (test_bit(qnum, ctx->qregs_written)) {
> +        ctx->read_after_write = true;
> +    }
>  }
> 
>  extern TCGv hex_gpr[TOTAL_PER_THREAD_REGS];
> diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
> index 751ca71790..ed4b4acd1d 100644
> --- a/target/hexagon/translate.c
> +++ b/target/hexagon/translate.c
> @@ -378,60 +378,10 @@ static bool need_commit(DisasContext *ctx)
>          return true;
>      }
> 
> -    if (pkt->num_insns == 1) {
> -        if (pkt->pkt_has_hvx) {
> -            /*
> -             * The HVX instructions with generated helpers use
> -             * pass-by-reference, so they need the read/write overlap
> -             * check below.
> -             * The HVX instructions with overrides are OK.
> -             */
> -            if (!ctx->has_hvx_helper) {
> -                return false;
> -            }
> -        } else {
> -            return false;
> -        }
> -    }
> -
> -    if (ctx->read_after_write) {
> +    if (ctx->read_after_write || ctx->has_hvx_overlap) {
>          return true;
>      }
> 
> -    /* Check for overlap between HVX reads and writes */
> -    for (int i = 0; i < ctx->vreg_log_idx; i++) {
> -        int vnum = ctx->vreg_log[i];
> -        if (test_bit(vnum, ctx->vregs_read)) {
> -            return true;
> -        }
> -    }
> -    if (!bitmap_empty(ctx->vregs_updated_tmp, NUM_VREGS)) {
> -        int i = find_first_bit(ctx->vregs_updated_tmp, NUM_VREGS);
> -        while (i < NUM_VREGS) {
> -            if (test_bit(i, ctx->vregs_read)) {
> -                return true;
> -            }
> -            i = find_next_bit(ctx->vregs_updated_tmp, NUM_VREGS, i + 1);
> -        }
> -    }
> -    if (!bitmap_empty(ctx->vregs_select, NUM_VREGS)) {
> -        int i = find_first_bit(ctx->vregs_select, NUM_VREGS);
> -        while (i < NUM_VREGS) {
> -            if (test_bit(i, ctx->vregs_read)) {
> -                return true;
> -            }
> -            i = find_next_bit(ctx->vregs_select, NUM_VREGS, i + 1);
> -        }
> -    }
> -
> -    /* Check for overlap between HVX predicate reads and writes */
> -    for (int i = 0; i < ctx->qreg_log_idx; i++) {
> -        int qnum = ctx->qreg_log[i];
> -        if (test_bit(qnum, ctx->qregs_read)) {
> -            return true;
> -        }
> -    }
> -
>      return false;
>  }
> 
> @@ -453,8 +403,8 @@ static void mark_implicit_pred_reads(DisasContext
> *ctx)
>  static void analyze_packet(DisasContext *ctx)
>  {
>      Packet *pkt = ctx->pkt;
> -    ctx->has_hvx_helper = false;
>      ctx->read_after_write = false;
> +    ctx->has_hvx_overlap = false;
>      for (int i = 0; i < pkt->num_insns; i++) {
>          Insn *insn = &pkt->insn[i];
>          ctx->insn = insn;
> @@ -485,13 +435,13 @@ static void gen_start_packet(DisasContext *ctx)
>      ctx->future_vregs_idx = 0;
>      ctx->tmp_vregs_idx = 0;
>      ctx->vreg_log_idx = 0;
> +    bitmap_zero(ctx->vregs_written, NUM_VREGS);
>      bitmap_zero(ctx->vregs_updated_tmp, NUM_VREGS);
>      bitmap_zero(ctx->vregs_updated, NUM_VREGS);
>      bitmap_zero(ctx->vregs_select, NUM_VREGS);
>      bitmap_zero(ctx->predicated_future_vregs, NUM_VREGS);
>      bitmap_zero(ctx->predicated_tmp_vregs, NUM_VREGS);
> -    bitmap_zero(ctx->vregs_read, NUM_VREGS);
> -    bitmap_zero(ctx->qregs_read, NUM_QREGS);
> +    bitmap_zero(ctx->qregs_written, NUM_QREGS);
>      ctx->qreg_log_idx = 0;
>      for (i = 0; i < STORES_MAX; i++) {
>          ctx->store_width[i] = 0;
> diff --git a/target/hexagon/gen_analyze_funcs.py
> b/target/hexagon/gen_analyze_funcs.py
> index 890e6a3a95..81e1d9cfa3 100755
> --- a/target/hexagon/gen_analyze_funcs.py
> +++ b/target/hexagon/gen_analyze_funcs.py
> @@ -43,6 +43,16 @@ def gen_analyze_func(f, tag, regs, imms):
>      f.write("{\n")
> 
>      f.write("    Insn *insn G_GNUC_UNUSED = ctx->insn;\n")
> +    if (hex_common.is_hvx_insn(tag)):
> +        if hex_common.has_hvx_helper(tag):
> +            f.write(
> +                "    const bool G_GNUC_UNUSED insn_has_hvx_helper = true;\n"
> +            )
> +            f.write("    ctx_start_hvx_insn(ctx);\n")
> +        else:
> +            f.write(
> +                "    const bool G_GNUC_UNUSED insn_has_hvx_helper = false;\n"
> +            )
> 
>      ## Declare all the registers
>      for regno, register in enumerate(regs):
> @@ -64,15 +74,6 @@ def gen_analyze_func(f, tag, regs, imms):
>          if reg.is_written():
>              reg.analyze_write(f, tag, regno)
> 
> -    has_generated_helper = not hex_common.skip_qemu_helper(
> -        tag
> -    ) and not hex_common.is_idef_parser_enabled(tag)
> -
> -    ## Mark HVX instructions with generated helpers
> -    if (has_generated_helper and
> -        "A_CVI" in hex_common.attribdict[tag]):
> -        f.write("    ctx->has_hvx_helper = true;\n")
> -
>      f.write("}\n\n")
> 
> 
> diff --git a/target/hexagon/hex_common.py
> b/target/hexagon/hex_common.py
> index 33801e4bd7..9e7f613e3c 100755
> --- a/target/hexagon/hex_common.py
> +++ b/target/hexagon/hex_common.py
> @@ -241,6 +241,16 @@ def is_idef_parser_enabled(tag):
>      return tag in idef_parser_enabled
> 
> 
> +def is_hvx_insn(tag):
> +    return "A_CVI" in attribdict[tag]
> +
> +
> +def has_hvx_helper(tag):
> +    return (is_hvx_insn(tag) and
> +            not skip_qemu_helper(tag) and
> +            not is_idef_parser_enabled(tag))
> +
> +
>  def imm_name(immlett):
>      return f"{immlett}iV"
> 
> @@ -704,7 +714,8 @@ def analyze_write(self, f, tag, regno):
>          newv = hvx_newv(tag)
>          predicated = "true" if is_predicated(tag) else "false"
>          f.write(code_fmt(f"""\
> -            ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated});
> +            ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated},
> +                               insn_has_hvx_helper);
>          """))
> 
>  class VRegSource(Register, Hvx, OldSource):
> @@ -724,7 +735,7 @@ def helper_hvx_desc(self, f):
>          """))
>      def analyze_read(self, f, regno):
>          f.write(code_fmt(f"""\
> -            ctx_log_vreg_read(ctx, {self.reg_num});
> +            ctx_log_vreg_read(ctx, {self.reg_num}, insn_has_hvx_helper);
>          """))
> 
>  class VRegNewSource(Register, Hvx, NewSource):
> @@ -741,7 +752,7 @@ def helper_hvx_desc(self, f):
>          """))
>      def analyze_read(self, f, regno):
>          f.write(code_fmt(f"""\
> -            ctx_log_vreg_read_new(ctx, {self.reg_num});
> +            ctx_log_vreg_read_new(ctx, {self.reg_num}, insn_has_hvx_helper);
>          """))
> 
>  class VRegReadWrite(Register, Hvx, ReadWrite):
> @@ -767,13 +778,14 @@ def helper_hvx_desc(self, f):
>          """))
>      def analyze_read(self, f, regno):
>          f.write(code_fmt(f"""\
> -            ctx_log_vreg_read(ctx, {self.reg_num});
> +            ctx_log_vreg_read(ctx, {self.reg_num}, insn_has_hvx_helper);
>          """))
>      def analyze_write(self, f, tag, regno):
>          newv = hvx_newv(tag)
>          predicated = "true" if is_predicated(tag) else "false"
>          f.write(code_fmt(f"""\
> -            ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated});
> +            ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated},
> +                               insn_has_hvx_helper);
>          """))
> 
>  class VRegTmp(Register, Hvx, ReadWrite):
> @@ -801,13 +813,14 @@ def helper_hvx_desc(self, f):
>          """))
>      def analyze_read(self, f, regno):
>          f.write(code_fmt(f"""\
> -            ctx_log_vreg_read(ctx, {self.reg_num});
> +            ctx_log_vreg_read(ctx, {self.reg_num}, insn_has_hvx_helper);
>          """))
>      def analyze_write(self, f, tag, regno):
>          newv = hvx_newv(tag)
>          predicated = "true" if is_predicated(tag) else "false"
>          f.write(code_fmt(f"""\
> -            ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated});
> +            ctx_log_vreg_write(ctx, {self.reg_num}, {newv}, {predicated},
> +                               insn_has_hvx_helper);
>          """))
> 
>  class VRegPairDest(Register, Hvx, Dest):
> @@ -832,7 +845,8 @@ def analyze_write(self, f, tag, regno):
>          newv = hvx_newv(tag)
>          predicated = "true" if is_predicated(tag) else "false"
>          f.write(code_fmt(f"""\
> -            ctx_log_vreg_write_pair(ctx, {self.reg_num}, {newv}, {predicated});
> +            ctx_log_vreg_write_pair(ctx, {self.reg_num}, {newv}, {predicated},
> +                                    insn_has_hvx_helper);
>          """))
> 
>  class VRegPairSource(Register, Hvx, OldSource):
> @@ -859,7 +873,7 @@ def helper_hvx_desc(self, f):
>          """))
>      def analyze_read(self, f, regno):
>          f.write(code_fmt(f"""\
> -            ctx_log_vreg_read_pair(ctx, {self.reg_num});
> +            ctx_log_vreg_read_pair(ctx, {self.reg_num}, insn_has_hvx_helper);
>          """))
> 
>  class VRegPairReadWrite(Register, Hvx, ReadWrite):
> @@ -891,13 +905,14 @@ def helper_hvx_desc(self, f):
>          """))
>      def analyze_read(self, f, regno):
>          f.write(code_fmt(f"""\
> -            ctx_log_vreg_read_pair(ctx, {self.reg_num});
> +            ctx_log_vreg_read_pair(ctx, {self.reg_num}, insn_has_hvx_helper);
>          """))
>      def analyze_write(self, f, tag, regno):
>          newv = hvx_newv(tag)
>          predicated = "true" if is_predicated(tag) else "false"
>          f.write(code_fmt(f"""\
> -            ctx_log_vreg_write_pair(ctx, {self.reg_num}, {newv}, {predicated});
> +            ctx_log_vreg_write_pair(ctx, {self.reg_num}, {newv}, {predicated},
> +                                    insn_has_hvx_helper);
>          """))
> 
>  class QRegDest(Register, Hvx, Dest):
> @@ -920,7 +935,7 @@ def helper_hvx_desc(self, f):
>          """))
>      def analyze_write(self, f, tag, regno):
>          f.write(code_fmt(f"""\
> -            ctx_log_qreg_write(ctx, {self.reg_num});
> +            ctx_log_qreg_write(ctx, {self.reg_num}, insn_has_hvx_helper);
>          """))
> 
>  class QRegSource(Register, Hvx, OldSource):
> @@ -941,7 +956,7 @@ def helper_hvx_desc(self, f):
>          """))
>      def analyze_read(self, f, regno):
>          f.write(code_fmt(f"""\
> -            ctx_log_qreg_read(ctx, {self.reg_num});
> +            ctx_log_qreg_read(ctx, {self.reg_num}, insn_has_hvx_helper);
>          """))
> 
>  class QRegReadWrite(Register, Hvx, ReadWrite):
> @@ -967,11 +982,11 @@ def helper_hvx_desc(self, f):
>          """))
>      def analyze_read(self, f, regno):
>          f.write(code_fmt(f"""\
> -            ctx_log_qreg_read(ctx, {self.reg_num});
> +            ctx_log_qreg_read(ctx, {self.reg_num}, insn_has_hvx_helper);
>          """))
>      def analyze_write(self, f, tag, regno):
>          f.write(code_fmt(f"""\
> -            ctx_log_qreg_write(ctx, {self.reg_num});
> +            ctx_log_qreg_write(ctx, {self.reg_num}, insn_has_hvx_helper);
>          """))
> 
>  def init_registers():
> --
> 2.34.1

Reviewed-by: Brian Cain <bcain@quicinc.com>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH v2 2/3] Hexagon (target/hexagon) Enable more short-circuit packets (scalar core)
  2024-02-01 10:33 ` [PATCH v2 2/3] Hexagon (target/hexagon) Enable more short-circuit packets (scalar core) Taylor Simpson
@ 2024-03-29  2:02   ` Brian Cain
  0 siblings, 0 replies; 7+ messages in thread
From: Brian Cain @ 2024-03-29  2:02 UTC (permalink / raw)
  To: Taylor Simpson, qemu-devel
  Cc: Matheus Bernardino (QUIC), Sid Manning, Marco Liebel (QUIC),
	richard.henderson, philmd, ale, anjo



> -----Original Message-----
> From: Taylor Simpson <ltaylorsimpson@gmail.com>
> Sent: Thursday, February 1, 2024 4:34 AM
> To: qemu-devel@nongnu.org
> Cc: Brian Cain <bcain@quicinc.com>; Matheus Bernardino (QUIC)
> <quic_mathbern@quicinc.com>; Sid Manning <sidneym@quicinc.com>;
> Marco Liebel (QUIC) <quic_mliebel@quicinc.com>;
> richard.henderson@linaro.org; philmd@linaro.org; ale@rev.ng; anjo@rev.ng;
> ltaylorsimpson@gmail.com
> Subject: [PATCH v2 2/3] Hexagon (target/hexagon) Enable more short-circuit
> packets (scalar core)
> 
> WARNING: This email originated from outside of Qualcomm. Please be wary
> of any links or attachments, and do not enable macros.
> 
> Look for read-after-write instead of overlap of reads and writes
> 
> Here is an example with overalp but no read-after-write:
> 0x000200fc:  0x38103876 {       R0 = add(R0,R1); R6 = add(R6,R7) }
> 
> BEFORE:
>  ---- 00000000000200fc
>  mov_i32 loc2,$0x0
>  mov_i32 loc2,r0
>  add_i32 loc3,loc2,r1
>  mov_i32 loc2,loc3
>  mov_i32 loc4,$0x0
>  mov_i32 loc4,r6
>  add_i32 loc5,loc4,r7
>  mov_i32 loc4,loc5
>  mov_i32 r0,loc2
>  mov_i32 r6,loc4
> 
> AFTER:
>  ---- 00000000000200fc
>  add_i32 loc2,r0,r1
>  mov_i32 r0,loc2
>  add_i32 loc3,r6,r7
>  mov_i32 r6,loc3
> 
> We can also short-circuit packets with .new values by reading from the
> real destination instead of the temporary.
> 0x00020100:  0x78005ff3 {       R19 = #0xff
> 0x00020104:  0x2002e204         if (cmp.eq(N19.new,R2)) jump:t PC+8 }
> 
> BEFORE:
>  ---- 0000000000020100
>  mov_i32 pc,$0x20108
>  mov_i32 loc8,$0x0
>  mov_i32 loc8,$0xff
>  setcond_i32 loc10,loc8,r2,eq
>  mov_i32 loc6,loc10
>  mov_i32 r19,loc8
>  add_i32 pkt_cnt,pkt_cnt,$0x2
>  add_i32 insn_cnt,insn_cnt,$0x4
>  brcond_i32 loc6,$0x0,eq,$L1
>  goto_tb $0x0
>  mov_i32 pc,$0x20108
>  exit_tb $0x7fbb54000040
>  set_label $L1
>  goto_tb $0x1
>  exit_tb $0x7fbb54000041
>  set_label $L0
>  exit_tb $0x7fbb54000043
> 
> AFTER:
>  ---- 0000000000020100
>  mov_i32 pc,$0x20108
>  mov_i32 r19,$0xff
>  setcond_i32 loc7,r19,r2,eq
>  mov_i32 loc4,loc7
>  add_i32 pkt_cnt,pkt_cnt,$0x2
>  add_i32 insn_cnt,insn_cnt,$0x4
>  brcond_i32 loc4,$0x0,eq,$L1
>  goto_tb $0x0
>  mov_i32 pc,$0x20108
>  exit_tb $0x7f9764000040
>  set_label $L1
>  goto_tb $0x1
>  exit_tb $0x7f9764000041
>  set_label $L0
>  exit_tb $0x7f9764000043
> 
> Signed-off-by: Taylor Simpson <ltaylorsimpson@gmail.com>
> ---
>  target/hexagon/translate.h | 13 +++++++------
>  target/hexagon/translate.c | 21 ++++-----------------
>  2 files changed, 11 insertions(+), 23 deletions(-)
> 
> diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
> index f06d71fc53..d5e7f49ad8 100644
> --- a/target/hexagon/translate.h
> +++ b/target/hexagon/translate.h
> @@ -38,12 +38,10 @@ typedef struct DisasContext {
>      int reg_log[REG_WRITES_MAX];
>      int reg_log_idx;
>      DECLARE_BITMAP(regs_written, TOTAL_PER_THREAD_REGS);
> -    DECLARE_BITMAP(regs_read, TOTAL_PER_THREAD_REGS);
>      DECLARE_BITMAP(predicated_regs, TOTAL_PER_THREAD_REGS);
>      int preg_log[PRED_WRITES_MAX];
>      int preg_log_idx;
>      DECLARE_BITMAP(pregs_written, NUM_PREGS);
> -    DECLARE_BITMAP(pregs_read, NUM_PREGS);
>      uint8_t store_width[STORES_MAX];
>      bool s1_store_processed;
>      int future_vregs_idx;
> @@ -68,6 +66,7 @@ typedef struct DisasContext {
>      bool is_tight_loop;
>      bool short_circuit;
>      bool has_hvx_helper;
> +    bool read_after_write;
>      TCGv new_value[TOTAL_PER_THREAD_REGS];
>      TCGv new_pred_value[NUM_PREGS];
>      TCGv pred_written;
> @@ -88,13 +87,14 @@ static inline void ctx_log_pred_write(DisasContext
> *ctx, int pnum)
> 
>  static inline void ctx_log_pred_read(DisasContext *ctx, int pnum)
>  {
> -    set_bit(pnum, ctx->pregs_read);
> +    if (test_bit(pnum, ctx->pregs_written)) {
> +        ctx->read_after_write = true;
> +    }
>  }
> 
>  static inline void ctx_log_pred_read_new(DisasContext *ctx, int pnum)
>  {
>      g_assert(test_bit(pnum, ctx->pregs_written));
> -    set_bit(pnum, ctx->pregs_read);
>  }
> 
>  static inline void ctx_log_reg_write(DisasContext *ctx, int rnum,
> @@ -125,13 +125,14 @@ static inline void
> ctx_log_reg_write_pair(DisasContext *ctx, int rnum,
> 
>  static inline void ctx_log_reg_read(DisasContext *ctx, int rnum)
>  {
> -    set_bit(rnum, ctx->regs_read);
> +    if (test_bit(rnum, ctx->regs_written)) {
> +        ctx->read_after_write = true;
> +    }
>  }
> 
>  static inline void ctx_log_reg_read_new(DisasContext *ctx, int rnum)
>  {
>      g_assert(test_bit(rnum, ctx->regs_written));
> -    set_bit(rnum, ctx->regs_read);
>  }
> 
>  static inline void ctx_log_reg_read_pair(DisasContext *ctx, int rnum)
> diff --git a/target/hexagon/translate.c b/target/hexagon/translate.c
> index 95579ae243..751ca71790 100644
> --- a/target/hexagon/translate.c
> +++ b/target/hexagon/translate.c
> @@ -1,5 +1,5 @@
>  /*
> - *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights
> Reserved.
> + *  Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights
> Reserved.
>   *
>   *  This program is free software; you can redistribute it and/or modify
>   *  it under the terms of the GNU General Public License as published by
> @@ -394,20 +394,8 @@ static bool need_commit(DisasContext *ctx)
>          }
>      }
> 
> -    /* Check for overlap between register reads and writes */
> -    for (int i = 0; i < ctx->reg_log_idx; i++) {
> -        int rnum = ctx->reg_log[i];
> -        if (test_bit(rnum, ctx->regs_read)) {
> -            return true;
> -        }
> -    }
> -
> -    /* Check for overlap between predicate reads and writes */
> -    for (int i = 0; i < ctx->preg_log_idx; i++) {
> -        int pnum = ctx->preg_log[i];
> -        if (test_bit(pnum, ctx->pregs_read)) {
> -            return true;
> -        }
> +    if (ctx->read_after_write) {
> +        return true;
>      }
> 
>      /* Check for overlap between HVX reads and writes */
> @@ -466,6 +454,7 @@ static void analyze_packet(DisasContext *ctx)
>  {
>      Packet *pkt = ctx->pkt;
>      ctx->has_hvx_helper = false;
> +    ctx->read_after_write = false;
>      for (int i = 0; i < pkt->num_insns; i++) {
>          Insn *insn = &pkt->insn[i];
>          ctx->insn = insn;
> @@ -490,11 +479,9 @@ static void gen_start_packet(DisasContext *ctx)
>      ctx->next_PC = next_PC;
>      ctx->reg_log_idx = 0;
>      bitmap_zero(ctx->regs_written, TOTAL_PER_THREAD_REGS);
> -    bitmap_zero(ctx->regs_read, TOTAL_PER_THREAD_REGS);
>      bitmap_zero(ctx->predicated_regs, TOTAL_PER_THREAD_REGS);
>      ctx->preg_log_idx = 0;
>      bitmap_zero(ctx->pregs_written, NUM_PREGS);
> -    bitmap_zero(ctx->pregs_read, NUM_PREGS);
>      ctx->future_vregs_idx = 0;
>      ctx->tmp_vregs_idx = 0;
>      ctx->vreg_log_idx = 0;
> --
> 2.34.1

Reviewed-by: Brian Cain <bcain@quicinc.com>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [PATCH v2 1/3] Hexagon (target/hexagon) Analyze reads before writes
  2024-02-01 10:33 ` [PATCH v2 1/3] Hexagon (target/hexagon) Analyze reads before writes Taylor Simpson
@ 2024-03-29  2:05   ` Brian Cain
  0 siblings, 0 replies; 7+ messages in thread
From: Brian Cain @ 2024-03-29  2:05 UTC (permalink / raw)
  To: Taylor Simpson, qemu-devel
  Cc: Matheus Bernardino (QUIC), Sid Manning, Marco Liebel (QUIC),
	richard.henderson, philmd, ale, anjo



> -----Original Message-----
> From: Taylor Simpson <ltaylorsimpson@gmail.com>
> Sent: Thursday, February 1, 2024 4:34 AM
> To: qemu-devel@nongnu.org
> Cc: Brian Cain <bcain@quicinc.com>; Matheus Bernardino (QUIC)
> <quic_mathbern@quicinc.com>; Sid Manning <sidneym@quicinc.com>;
> Marco Liebel (QUIC) <quic_mliebel@quicinc.com>;
> richard.henderson@linaro.org; philmd@linaro.org; ale@rev.ng; anjo@rev.ng;
> ltaylorsimpson@gmail.com
> Subject: [PATCH v2 1/3] Hexagon (target/hexagon) Analyze reads before
> writes
> 
> WARNING: This email originated from outside of Qualcomm. Please be wary
> of any links or attachments, and do not enable macros.
> 
> We divide gen_analyze_funcs.py into 3 phases
>     Declare the operands
>     Analyze the register reads
>     Analyze the register writes
> 
> We also create special versions of ctx_log_*_read for new operands
>     Check that the operand is written before the read
> 
> This is a precursor to improving the analysis for short-circuiting
> the packet semantics in a subsequent commit
> 
> Signed-off-by: Taylor Simpson <ltaylorsimpson@gmail.com>
> ---
>  target/hexagon/translate.h          | 26 +++++++++++-
>  target/hexagon/README               |  9 +++--
>  target/hexagon/gen_analyze_funcs.py | 34 ++++++++++------
>  target/hexagon/hex_common.py        | 63 +++++++++++++++--------------
>  4 files changed, 83 insertions(+), 49 deletions(-)
> 
> diff --git a/target/hexagon/translate.h b/target/hexagon/translate.h
> index 4dd59c6726..f06d71fc53 100644
> --- a/target/hexagon/translate.h
> +++ b/target/hexagon/translate.h
> @@ -1,5 +1,5 @@
>  /*
> - *  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights
> Reserved.
> + *  Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights
> Reserved.
>   *
>   *  This program is free software; you can redistribute it and/or modify
>   *  it under the terms of the GNU General Public License as published by
> @@ -75,6 +75,8 @@ typedef struct DisasContext {
>      TCGv dczero_addr;
>  } DisasContext;
> 
> +bool is_gather_store_insn(DisasContext *ctx);
> +
>  static inline void ctx_log_pred_write(DisasContext *ctx, int pnum)
>  {
>      if (!test_bit(pnum, ctx->pregs_written)) {
> @@ -89,6 +91,12 @@ static inline void ctx_log_pred_read(DisasContext *ctx,
> int pnum)
>      set_bit(pnum, ctx->pregs_read);
>  }
> 
> +static inline void ctx_log_pred_read_new(DisasContext *ctx, int pnum)
> +{
> +    g_assert(test_bit(pnum, ctx->pregs_written));
> +    set_bit(pnum, ctx->pregs_read);
> +}
> +
>  static inline void ctx_log_reg_write(DisasContext *ctx, int rnum,
>                                       bool is_predicated)
>  {
> @@ -120,6 +128,12 @@ static inline void ctx_log_reg_read(DisasContext
> *ctx, int rnum)
>      set_bit(rnum, ctx->regs_read);
>  }
> 
> +static inline void ctx_log_reg_read_new(DisasContext *ctx, int rnum)
> +{
> +    g_assert(test_bit(rnum, ctx->regs_written));
> +    set_bit(rnum, ctx->regs_read);
> +}
> +
>  static inline void ctx_log_reg_read_pair(DisasContext *ctx, int rnum)
>  {
>      ctx_log_reg_read(ctx, rnum);
> @@ -171,6 +185,15 @@ static inline void ctx_log_vreg_read(DisasContext
> *ctx, int rnum)
>      set_bit(rnum, ctx->vregs_read);
>  }
> 
> +static inline void ctx_log_vreg_read_new(DisasContext *ctx, int rnum)
> +{
> +    g_assert(is_gather_store_insn(ctx) ||
> +             test_bit(rnum, ctx->vregs_updated) ||
> +             test_bit(rnum, ctx->vregs_select) ||
> +             test_bit(rnum, ctx->vregs_updated_tmp));
> +    set_bit(rnum, ctx->vregs_read);
> +}
> +
>  static inline void ctx_log_vreg_read_pair(DisasContext *ctx, int rnum)
>  {
>      ctx_log_vreg_read(ctx, rnum ^ 0);
> @@ -205,7 +228,6 @@ extern TCGv hex_vstore_addr[VSTORES_MAX];
>  extern TCGv hex_vstore_size[VSTORES_MAX];
>  extern TCGv hex_vstore_pending[VSTORES_MAX];
> 
> -bool is_gather_store_insn(DisasContext *ctx);
>  void process_store(DisasContext *ctx, int slot_num);
> 
>  FIELD(PROBE_PKT_SCALAR_STORE_S0, MMU_IDX,       0, 2)
> diff --git a/target/hexagon/README b/target/hexagon/README
> index 746ebec378..c1d8c8d0ab 100644
> --- a/target/hexagon/README
> +++ b/target/hexagon/README
> @@ -183,10 +183,11 @@ when the override is present.
>      }
> 
>  We also generate an analyze_<tag> function for each instruction.  Currently,
> -these functions record the writes to registers by calling ctx_log_*.  During
> -gen_start_packet, we invoke the analyze_<tag> function for each instruction
> in
> -the packet, and we mark the implicit writes.  After the analysis is performed,
> -we initialize the result register for each of the predicated assignments.
> +these functions record the reads and writes to registers by calling ctx_log_*.
> +During gen_start_packet, we invoke the analyze_<tag> function for each
> instruction in
> +the packet, and we mark the implicit writes.  The analysis determines if the
> packet
> +semantics can be short-circuited.  If not, we initialize the result register for
> each
> +of the predicated assignments.
> 
>  In addition to instruction semantics, we use a generator to create the decode
>  tree.  This generation is a four step process.
> diff --git a/target/hexagon/gen_analyze_funcs.py
> b/target/hexagon/gen_analyze_funcs.py
> index a9af666cef..890e6a3a95 100755
> --- a/target/hexagon/gen_analyze_funcs.py
> +++ b/target/hexagon/gen_analyze_funcs.py
> @@ -1,7 +1,7 @@
>  #!/usr/bin/env python3
> 
>  ##
> -##  Copyright(c) 2022-2023 Qualcomm Innovation Center, Inc. All Rights
> Reserved.
> +##  Copyright(c) 2022-2024 Qualcomm Innovation Center, Inc. All Rights
> Reserved.
>  ##
>  ##  This program is free software; you can redistribute it and/or modify
>  ##  it under the terms of the GNU General Public License as published by
> @@ -44,15 +44,25 @@ def gen_analyze_func(f, tag, regs, imms):
> 
>      f.write("    Insn *insn G_GNUC_UNUSED = ctx->insn;\n")
> 
> -    i = 0
> -    ## Analyze all the registers
> -    for regtype, regid in regs:
> -        reg = hex_common.get_register(tag, regtype, regid)
> +    ## Declare all the registers
> +    for regno, register in enumerate(regs):
> +        reg_type, reg_id = register
> +        reg = hex_common.get_register(tag, reg_type, reg_id)
> +        reg.decl_reg_num(f, regno)
> +
> +    ## Analyze the register reads
> +    for regno, register in enumerate(regs):
> +        reg_type, reg_id = register
> +        reg = hex_common.get_register(tag, reg_type, reg_id)
> +        if reg.is_read():
> +            reg.analyze_read(f, regno)
> +
> +    ## Analyze the register writes
> +    for regno, register in enumerate(regs):
> +        reg_type, reg_id = register
> +        reg = hex_common.get_register(tag, reg_type, reg_id)
>          if reg.is_written():
> -            reg.analyze_write(f, tag, i)
> -        else:
> -            reg.analyze_read(f, i)
> -        i += 1
> +            reg.analyze_write(f, tag, regno)
> 
>      has_generated_helper = not hex_common.skip_qemu_helper(
>          tag
> @@ -89,13 +99,13 @@ def main():
>      tagimms = hex_common.get_tagimms()
> 
>      with open(sys.argv[-1], "w") as f:
> -        f.write("#ifndef HEXAGON_TCG_FUNCS_H\n")
> -        f.write("#define HEXAGON_TCG_FUNCS_H\n\n")
> +        f.write("#ifndef HEXAGON_ANALYZE_FUNCS_C_INC\n")
> +        f.write("#define HEXAGON_ANALYZE_FUNCS_C_INC\n\n")
> 
>          for tag in hex_common.tags:
>              gen_analyze_func(f, tag, tagregs[tag], tagimms[tag])
> 
> -        f.write("#endif    /* HEXAGON_TCG_FUNCS_H */\n")
> +        f.write("#endif    /* HEXAGON_ANALYZE_FUNCS_C_INC */\n")
> 
> 
>  if __name__ == "__main__":
> diff --git a/target/hexagon/hex_common.py
> b/target/hexagon/hex_common.py
> index 195620c7ec..33801e4bd7 100755
> --- a/target/hexagon/hex_common.py
> +++ b/target/hexagon/hex_common.py
> @@ -1,7 +1,7 @@
>  #!/usr/bin/env python3
> 
>  ##
> -##  Copyright(c) 2019-2023 Qualcomm Innovation Center, Inc. All Rights
> Reserved.
> +##  Copyright(c) 2019-2024 Qualcomm Innovation Center, Inc. All Rights
> Reserved.
>  ##
>  ##  This program is free software; you can redistribute it and/or modify
>  ##  it under the terms of the GNU General Public License as published by
> @@ -425,7 +425,6 @@ def log_write(self, f, tag):
>              gen_log_reg_write(ctx, {self.reg_num}, {self.reg_tcg()});
>          """))
>      def analyze_write(self, f, tag, regno):
> -        self.decl_reg_num(f, regno)
>          predicated = "true" if is_predicated(tag) else "false"
>          f.write(code_fmt(f"""\
>              ctx_log_reg_write(ctx, {self.reg_num}, {predicated});
> @@ -438,7 +437,6 @@ def decl_tcg(self, f, tag, regno):
>              TCGv {self.reg_tcg()} = hex_gpr[{self.reg_num}];
>          """))
>      def analyze_read(self, f, regno):
> -        self.decl_reg_num(f, regno)
>          f.write(code_fmt(f"""\
>              ctx_log_reg_read(ctx, {self.reg_num});
>          """))
> @@ -449,9 +447,8 @@ def decl_tcg(self, f, tag, regno):
>              TCGv {self.reg_tcg()} = get_result_gpr(ctx, insn->regno[{regno}]);
>          """))
>      def analyze_read(self, f, regno):
> -        self.decl_reg_num(f, regno)
>          f.write(code_fmt(f"""\
> -            ctx_log_reg_read(ctx, {self.reg_num});
> +            ctx_log_reg_read_new(ctx, {self.reg_num});
>          """))
> 
>  class GprReadWrite(Register, Single, ReadWrite):
> @@ -471,8 +468,11 @@ def log_write(self, f, tag):
>          f.write(code_fmt(f"""\
>              gen_log_reg_write(ctx, {self.reg_num}, {self.reg_tcg()});
>          """))
> +    def analyze_read(self, f, regno):
> +        f.write(code_fmt(f"""\
> +            ctx_log_reg_read(ctx, {self.reg_num});
> +        """))
>      def analyze_write(self, f, tag, regno):
> -        self.decl_reg_num(f, regno)
>          predicated = "true" if is_predicated(tag) else "false"
>          f.write(code_fmt(f"""\
>              ctx_log_reg_write(ctx, {self.reg_num}, {predicated});
> @@ -493,7 +493,6 @@ def log_write(self, f, tag):
>              gen_write_ctrl_reg(ctx, {self.reg_num}, {self.reg_tcg()});
>          """))
>      def analyze_write(self, f, tag, regno):
> -        self.decl_reg_num(f, regno)
>          predicated = "true" if is_predicated(tag) else "false"
>          f.write(code_fmt(f"""\
>              ctx_log_reg_write(ctx, {self.reg_num}, {predicated});
> @@ -511,7 +510,6 @@ def decl_tcg(self, f, tag, regno):
>              gen_read_ctrl_reg(ctx, {self.reg_num}, {self.reg_tcg()});
>          """))
>      def analyze_read(self, f, regno):
> -        self.decl_reg_num(f, regno)
>          f.write(code_fmt(f"""\
>              ctx_log_reg_read(ctx, {self.reg_num});
>          """))
> @@ -532,7 +530,6 @@ def idef_arg(self, declared):
>          declared.append(self.reg_tcg())
>          declared.append("CS")
>      def analyze_read(self, f, regno):
> -        self.decl_reg_num(f, regno)
>          f.write(code_fmt(f"""\
>              ctx_log_reg_read(ctx, {self.reg_num});
>          """))
> @@ -548,7 +545,6 @@ def log_write(self, f, tag):
>              gen_log_pred_write(ctx, {self.reg_num}, {self.reg_tcg()});
>          """))
>      def analyze_write(self, f, tag, regno):
> -        self.decl_reg_num(f, regno)
>          f.write(code_fmt(f"""\
>              ctx_log_pred_write(ctx, {self.reg_num});
>          """))
> @@ -560,7 +556,6 @@ def decl_tcg(self, f, tag, regno):
>              TCGv {self.reg_tcg()} = hex_pred[{self.reg_num}];
>          """))
>      def analyze_read(self, f, regno):
> -        self.decl_reg_num(f, regno)
>          f.write(code_fmt(f"""\
>              ctx_log_pred_read(ctx, {self.reg_num});
>          """))
> @@ -571,9 +566,8 @@ def decl_tcg(self, f, tag, regno):
>              TCGv {self.reg_tcg()} = get_result_pred(ctx, insn->regno[{regno}]);
>          """))
>      def analyze_read(self, f, regno):
> -        self.decl_reg_num(f, regno)
>          f.write(code_fmt(f"""\
> -            ctx_log_pred_read(ctx, {self.reg_num});
> +            ctx_log_pred_read_new(ctx, {self.reg_num});
>          """))
> 
>  class PredReadWrite(Register, Single, ReadWrite):
> @@ -587,8 +581,11 @@ def log_write(self, f, tag):
>          f.write(code_fmt(f"""\
>              gen_log_pred_write(ctx, {self.reg_num}, {self.reg_tcg()});
>          """))
> +    def analyze_read(self, f, regno):
> +        f.write(code_fmt(f"""\
> +            ctx_log_pred_read(ctx, {self.reg_num});
> +        """))
>      def analyze_write(self, f, tag, regno):
> -        self.decl_reg_num(f, regno)
>          f.write(code_fmt(f"""\
>              ctx_log_pred_write(ctx, {self.reg_num});
>          """))
> @@ -605,7 +602,6 @@ def log_write(self, f, tag):
>              gen_log_reg_write_pair(ctx, {self.reg_num}, {self.reg_tcg()});
>          """))
>      def analyze_write(self, f, tag, regno):
> -        self.decl_reg_num(f, regno)
>          predicated = "true" if is_predicated(tag) else "false"
>          f.write(code_fmt(f"""\
>              ctx_log_reg_write_pair(ctx, {self.reg_num}, {predicated});
> @@ -621,7 +617,6 @@ def decl_tcg(self, f, tag, regno):
>                                      hex_gpr[{self.reg_num} + 1]);
>          """))
>      def analyze_read(self, f, regno):
> -        self.decl_reg_num(f, regno)
>          f.write(code_fmt(f"""\
>              ctx_log_reg_read_pair(ctx, {self.reg_num});
>          """))
> @@ -640,8 +635,11 @@ def log_write(self, f, tag):
>          f.write(code_fmt(f"""\
>              gen_log_reg_write_pair(ctx, {self.reg_num}, {self.reg_tcg()});
>          """))
> +    def analyze_read(self, f, regno):
> +        f.write(code_fmt(f"""\
> +            ctx_log_reg_read_pair(ctx, {self.reg_num});
> +        """))
>      def analyze_write(self, f, tag, regno):
> -        self.decl_reg_num(f, regno)
>          predicated = "true" if is_predicated(tag) else "false"
>          f.write(code_fmt(f"""\
>              ctx_log_reg_write_pair(ctx, {self.reg_num}, {predicated});
> @@ -663,7 +661,6 @@ def log_write(self, f, tag):
>              gen_write_ctrl_reg_pair(ctx, {self.reg_num}, {self.reg_tcg()});
>          """))
>      def analyze_write(self, f, tag, regno):
> -        self.decl_reg_num(f, regno)
>          predicated = "true" if is_predicated(tag) else "false"
>          f.write(code_fmt(f"""\
>              ctx_log_reg_write_pair(ctx, {self.reg_num}, {predicated});
> @@ -681,7 +678,6 @@ def decl_tcg(self, f, tag, regno):
>              gen_read_ctrl_reg_pair(ctx, {self.reg_num}, {self.reg_tcg()});
>          """))
>      def analyze_read(self, f, regno):
> -        self.decl_reg_num(f, regno)
>          f.write(code_fmt(f"""\
>              ctx_log_reg_read_pair(ctx, {self.reg_num});
>          """))
> @@ -705,7 +701,6 @@ def helper_hvx_desc(self, f):
>              /* {self.reg_tcg()} is *(MMVector *)({self.helper_arg_name()}) */
>          """))
>      def analyze_write(self, f, tag, regno):
> -        self.decl_reg_num(f, regno)
>          newv = hvx_newv(tag)
>          predicated = "true" if is_predicated(tag) else "false"
>          f.write(code_fmt(f"""\
> @@ -728,7 +723,6 @@ def helper_hvx_desc(self, f):
>              /* {self.reg_tcg()} is *(MMVector *)({self.helper_arg_name()}) */
>          """))
>      def analyze_read(self, f, regno):
> -        self.decl_reg_num(f, regno)
>          f.write(code_fmt(f"""\
>              ctx_log_vreg_read(ctx, {self.reg_num});
>          """))
> @@ -746,9 +740,8 @@ def helper_hvx_desc(self, f):
>              /* {self.reg_tcg()} is *(MMVector *)({self.helper_arg_name()}) */
>          """))
>      def analyze_read(self, f, regno):
> -        self.decl_reg_num(f, regno)
>          f.write(code_fmt(f"""\
> -            ctx_log_vreg_read(ctx, {self.reg_num});
> +            ctx_log_vreg_read_new(ctx, {self.reg_num});
>          """))
> 
>  class VRegReadWrite(Register, Hvx, ReadWrite):
> @@ -772,8 +765,11 @@ def helper_hvx_desc(self, f):
>          f.write(code_fmt(f"""\
>              /* {self.reg_tcg()} is *(MMVector *)({self.helper_arg_name()}) */
>          """))
> +    def analyze_read(self, f, regno):
> +        f.write(code_fmt(f"""\
> +            ctx_log_vreg_read(ctx, {self.reg_num});
> +        """))
>      def analyze_write(self, f, tag, regno):
> -        self.decl_reg_num(f, regno)
>          newv = hvx_newv(tag)
>          predicated = "true" if is_predicated(tag) else "false"
>          f.write(code_fmt(f"""\
> @@ -803,8 +799,11 @@ def helper_hvx_desc(self, f):
>          f.write(code_fmt(f"""\
>              /* {self.reg_tcg()} is *(MMVector *)({self.helper_arg_name()}) */
>          """))
> +    def analyze_read(self, f, regno):
> +        f.write(code_fmt(f"""\
> +            ctx_log_vreg_read(ctx, {self.reg_num});
> +        """))
>      def analyze_write(self, f, tag, regno):
> -        self.decl_reg_num(f, regno)
>          newv = hvx_newv(tag)
>          predicated = "true" if is_predicated(tag) else "false"
>          f.write(code_fmt(f"""\
> @@ -830,7 +829,6 @@ def helper_hvx_desc(self, f):
>              /* {self.reg_tcg()} is *(MMVectorPair *)({self.helper_arg_name()}) */
>          """))
>      def analyze_write(self, f, tag, regno):
> -        self.decl_reg_num(f, regno)
>          newv = hvx_newv(tag)
>          predicated = "true" if is_predicated(tag) else "false"
>          f.write(code_fmt(f"""\
> @@ -860,7 +858,6 @@ def helper_hvx_desc(self, f):
>              /* {self.reg_tcg()} is *(MMVectorPair *)({self.helper_arg_name()}) */
>          """))
>      def analyze_read(self, f, regno):
> -        self.decl_reg_num(f, regno)
>          f.write(code_fmt(f"""\
>              ctx_log_vreg_read_pair(ctx, {self.reg_num});
>          """))
> @@ -892,8 +889,11 @@ def helper_hvx_desc(self, f):
>          f.write(code_fmt(f"""\
>              /* {self.reg_tcg()} is *(MMVectorPair *)({self.helper_arg_name()}) */
>          """))
> +    def analyze_read(self, f, regno):
> +        f.write(code_fmt(f"""\
> +            ctx_log_vreg_read_pair(ctx, {self.reg_num});
> +        """))
>      def analyze_write(self, f, tag, regno):
> -        self.decl_reg_num(f, regno)
>          newv = hvx_newv(tag)
>          predicated = "true" if is_predicated(tag) else "false"
>          f.write(code_fmt(f"""\
> @@ -919,7 +919,6 @@ def helper_hvx_desc(self, f):
>              /* {self.reg_tcg()} is *(MMQReg *)({self.helper_arg_name()}) */
>          """))
>      def analyze_write(self, f, tag, regno):
> -        self.decl_reg_num(f, regno)
>          f.write(code_fmt(f"""\
>              ctx_log_qreg_write(ctx, {self.reg_num});
>          """))
> @@ -941,7 +940,6 @@ def helper_hvx_desc(self, f):
>              /* {self.reg_tcg()} is *(MMQReg *)({self.helper_arg_name()}) */
>          """))
>      def analyze_read(self, f, regno):
> -        self.decl_reg_num(f, regno)
>          f.write(code_fmt(f"""\
>              ctx_log_qreg_read(ctx, {self.reg_num});
>          """))
> @@ -967,8 +965,11 @@ def helper_hvx_desc(self, f):
>          f.write(code_fmt(f"""\
>              /* {self.reg_tcg()} is *(MMQReg *)({self.helper_arg_name()}) */
>          """))
> +    def analyze_read(self, f, regno):
> +        f.write(code_fmt(f"""\
> +            ctx_log_qreg_read(ctx, {self.reg_num});
> +        """))
>      def analyze_write(self, f, tag, regno):
> -        self.decl_reg_num(f, regno)
>          f.write(code_fmt(f"""\
>              ctx_log_qreg_write(ctx, {self.reg_num});
>          """))
> --
> 2.34.1

Reviewed-by: Brian Cain <bcain@quicinc.com>


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2024-03-29  2:06 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-02-01 10:33 [PATCH v2 0/3] Hexagon (target/hexagon) Enable more short-circuit packets Taylor Simpson
2024-02-01 10:33 ` [PATCH v2 1/3] Hexagon (target/hexagon) Analyze reads before writes Taylor Simpson
2024-03-29  2:05   ` Brian Cain
2024-02-01 10:33 ` [PATCH v2 2/3] Hexagon (target/hexagon) Enable more short-circuit packets (scalar core) Taylor Simpson
2024-03-29  2:02   ` Brian Cain
2024-02-01 10:33 ` [PATCH v2 3/3] Hexagon (target/hexagon) Enable more short-circuit packets (HVX) Taylor Simpson
2024-03-29  2:02   ` Brian Cain

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.