qemu-devel.nongnu.org archive mirror
 help / color / mirror / Atom feed
From: Taylor Simpson <tsimpson@quicinc.com>
To: qemu-devel@nongnu.org
Cc: ale@rev.ng, philmd@redhat.com, tsimpson@quicinc.com,
	richard.henderson@linaro.org, bcain@quicinc.com
Subject: [PATCH v2 19/21] Hexagon (target/hexagon) load and unpack bytes instructions
Date: Wed, 31 Mar 2021 22:53:31 -0500	[thread overview]
Message-ID: <1617249213-22667-20-git-send-email-tsimpson@quicinc.com> (raw)
In-Reply-To: <1617249213-22667-1-git-send-email-tsimpson@quicinc.com>

The following instructions are added
    L2_loadbzw2_io          Rd32 = memubh(Rs32+#s11:1)
    L2_loadbzw4_io          Rdd32 = memubh(Rs32+#s11:1)
    L2_loadbsw2_io          Rd32 = membh(Rs32+#s11:1)
    L2_loadbsw4_io          Rdd32 = membh(Rs32+#s11:1)

    L4_loadbzw2_ur          Rd32 = memubh(Rt32<<#u2+#U6)
    L4_loadbzw4_ur          Rdd32 = memubh(Rt32<<#u2+#U6)
    L4_loadbsw2_ur          Rd32 = membh(Rt32<<#u2+#U6)
    L4_loadbsw4_ur          Rdd32 = membh(Rt32<<#u2+#U6)

    L4_loadbzw2_ap          Rd32 = memubh(Re32=#U6)
    L4_loadbzw4_ap          Rdd32 = memubh(Re32=#U6)
    L4_loadbsw2_ap          Rd32 = membh(Re32=#U6)
    L4_loadbsw4_ap          Rdd32 = membh(Re32=#U6)

    L2_loadbzw2_pr          Rd32 = memubh(Rx32++Mu2)
    L2_loadbzw4_pr          Rdd32 = memubh(Rx32++Mu2)
    L2_loadbsw2_pr          Rd32 = membh(Rx32++Mu2)
    L2_loadbsw4_pr          Rdd32 = membh(Rx32++Mu2)

    L2_loadbzw2_pbr         Rd32 = memubh(Rx32++Mu2:brev)
    L2_loadbzw4_pbr         Rdd32 = memubh(Rx32++Mu2:brev)
    L2_loadbsw2_pbr         Rd32 = membh(Rx32++Mu2:brev)
    L2_loadbsw4_pbr         Rdd32 = membh(Rx32++Mu2:brev)

    L2_loadbzw2_pi          Rd32 = memubh(Rx32++#s4:1)
    L2_loadbzw4_pi          Rdd32 = memubh(Rx32++#s4:1)
    L2_loadbsw2_pi          Rd32 = membh(Rx32++#s4:1)
    L2_loadbsw4_pi          Rdd32 = membh(Rx32++#s4:1)

    L2_loadbzw2_pci         Rd32 = memubh(Rx32++#s4:1:circ(Mu2))
    L2_loadbzw4_pci         Rdd32 = memubh(Rx32++#s4:1:circ(Mu2))
    L2_loadbsw2_pci         Rd32 = membh(Rx32++#s4:1:circ(Mu2))
    L2_loadbsw4_pci         Rdd32 = membh(Rx32++#s4:1:circ(Mu2))

    L2_loadbzw2_pcr         Rd32 = memubh(Rx32++I:circ(Mu2))
    L2_loadbzw4_pcr         Rdd32 = memubh(Rx32++I:circ(Mu2))
    L2_loadbsw2_pcr         Rd32 = membh(Rx32++I:circ(Mu2))
    L2_loadbsw4_pcr         Rdd32 = membh(Rx32++I:circ(Mu2))

Test cases in tests/tcg/hexagon/load_unpack.c

Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
---
 target/hexagon/gen_tcg.h              | 108 ++++++++
 target/hexagon/genptr.c               |  13 +
 target/hexagon/imported/encode_pp.def |   6 +
 target/hexagon/imported/ldst.idef     |  43 +++
 target/hexagon/macros.h               |  16 ++
 tests/tcg/hexagon/Makefile.target     |   1 +
 tests/tcg/hexagon/load_unpack.c       | 474 ++++++++++++++++++++++++++++++++++
 7 files changed, 661 insertions(+)
 create mode 100644 tests/tcg/hexagon/load_unpack.c

diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h
index 143f656..570389f 100644
--- a/target/hexagon/gen_tcg.h
+++ b/target/hexagon/gen_tcg.h
@@ -153,6 +153,114 @@
 #define fGEN_TCG_L2_loadrd_pi(SHORTCODE)       SHORTCODE
 
 /*
+ * These instructions load 2 bytes and places them in
+ * two halves of the destination register.
+ * The GET_EA macro determines the addressing mode.
+ * The SIGN argument determines whether to zero-extend or
+ * sign-extend.
+ */
+#define fGEN_TCG_loadbXw2(GET_EA, SIGN) \
+    do { \
+        TCGv tmp = tcg_temp_new(); \
+        TCGv byte = tcg_temp_new(); \
+        GET_EA; \
+        fLOAD(1, 2, u, EA, tmp); \
+        tcg_gen_movi_tl(RdV, 0); \
+        for (int i = 0; i < 2; i++) { \
+            gen_set_half(i, RdV, gen_get_byte(byte, i, tmp, (SIGN))); \
+        } \
+        tcg_temp_free(tmp); \
+        tcg_temp_free(byte); \
+    } while (0)
+
+#define fGEN_TCG_L2_loadbzw2_io(SHORTCODE) \
+    fGEN_TCG_loadbXw2(fEA_RI(RsV, siV), false)
+#define fGEN_TCG_L4_loadbzw2_ur(SHORTCODE) \
+    fGEN_TCG_loadbXw2(fEA_IRs(UiV, RtV, uiV), false)
+#define fGEN_TCG_L2_loadbsw2_io(SHORTCODE) \
+    fGEN_TCG_loadbXw2(fEA_RI(RsV, siV), true)
+#define fGEN_TCG_L4_loadbsw2_ur(SHORTCODE) \
+    fGEN_TCG_loadbXw2(fEA_IRs(UiV, RtV, uiV), true)
+#define fGEN_TCG_L4_loadbzw2_ap(SHORTCODE) \
+    fGEN_TCG_loadbXw2(GET_EA_ap, false)
+#define fGEN_TCG_L2_loadbzw2_pr(SHORTCODE) \
+    fGEN_TCG_loadbXw2(GET_EA_pr, false)
+#define fGEN_TCG_L2_loadbzw2_pbr(SHORTCODE) \
+    fGEN_TCG_loadbXw2(GET_EA_pbr, false)
+#define fGEN_TCG_L2_loadbzw2_pi(SHORTCODE) \
+    fGEN_TCG_loadbXw2(GET_EA_pi, false)
+#define fGEN_TCG_L4_loadbsw2_ap(SHORTCODE) \
+    fGEN_TCG_loadbXw2(GET_EA_ap, true)
+#define fGEN_TCG_L2_loadbsw2_pr(SHORTCODE) \
+    fGEN_TCG_loadbXw2(GET_EA_pr, true)
+#define fGEN_TCG_L2_loadbsw2_pbr(SHORTCODE) \
+    fGEN_TCG_loadbXw2(GET_EA_pbr, true)
+#define fGEN_TCG_L2_loadbsw2_pi(SHORTCODE) \
+    fGEN_TCG_loadbXw2(GET_EA_pi, true)
+#define fGEN_TCG_L2_loadbzw2_pci(SHORTCODE) \
+    fGEN_TCG_loadbXw2(GET_EA_pci, false)
+#define fGEN_TCG_L2_loadbsw2_pci(SHORTCODE) \
+    fGEN_TCG_loadbXw2(GET_EA_pci, true)
+#define fGEN_TCG_L2_loadbzw2_pcr(SHORTCODE) \
+    fGEN_TCG_loadbXw2(GET_EA_pcr(1), false)
+#define fGEN_TCG_L2_loadbsw2_pcr(SHORTCODE) \
+    fGEN_TCG_loadbXw2(GET_EA_pcr(1), true)
+
+/*
+ * These instructions load 4 bytes and places them in
+ * four halves of the destination register pair.
+ * The GET_EA macro determines the addressing mode.
+ * The SIGN argument determines whether to zero-extend or
+ * sign-extend.
+ */
+#define fGEN_TCG_loadbXw4(GET_EA, SIGN) \
+    do { \
+        TCGv tmp = tcg_temp_new(); \
+        TCGv byte = tcg_temp_new(); \
+        GET_EA; \
+        fLOAD(1, 4, u, EA, tmp);  \
+        tcg_gen_movi_i64(RddV, 0); \
+        for (int i = 0; i < 4; i++) { \
+            gen_set_half_i64(i, RddV, gen_get_byte(byte, i, tmp, (SIGN)));  \
+        }  \
+        tcg_temp_free(tmp); \
+        tcg_temp_free(byte); \
+    } while (0)
+
+#define fGEN_TCG_L2_loadbzw4_io(SHORTCODE) \
+    fGEN_TCG_loadbXw4(fEA_RI(RsV, siV), false)
+#define fGEN_TCG_L4_loadbzw4_ur(SHORTCODE) \
+    fGEN_TCG_loadbXw4(fEA_IRs(UiV, RtV, uiV), false)
+#define fGEN_TCG_L2_loadbsw4_io(SHORTCODE) \
+    fGEN_TCG_loadbXw4(fEA_RI(RsV, siV), true)
+#define fGEN_TCG_L4_loadbsw4_ur(SHORTCODE) \
+    fGEN_TCG_loadbXw4(fEA_IRs(UiV, RtV, uiV), true)
+#define fGEN_TCG_L2_loadbzw4_pci(SHORTCODE) \
+    fGEN_TCG_loadbXw4(GET_EA_pci, false)
+#define fGEN_TCG_L2_loadbsw4_pci(SHORTCODE) \
+    fGEN_TCG_loadbXw4(GET_EA_pci, true)
+#define fGEN_TCG_L2_loadbzw4_pcr(SHORTCODE) \
+    fGEN_TCG_loadbXw4(GET_EA_pcr(2), false)
+#define fGEN_TCG_L2_loadbsw4_pcr(SHORTCODE) \
+    fGEN_TCG_loadbXw4(GET_EA_pcr(2), true)
+#define fGEN_TCG_L4_loadbzw4_ap(SHORTCODE) \
+    fGEN_TCG_loadbXw4(GET_EA_ap, false)
+#define fGEN_TCG_L2_loadbzw4_pr(SHORTCODE) \
+    fGEN_TCG_loadbXw4(GET_EA_pr, false)
+#define fGEN_TCG_L2_loadbzw4_pbr(SHORTCODE) \
+    fGEN_TCG_loadbXw4(GET_EA_pbr, false)
+#define fGEN_TCG_L2_loadbzw4_pi(SHORTCODE) \
+    fGEN_TCG_loadbXw4(GET_EA_pi, false)
+#define fGEN_TCG_L4_loadbsw4_ap(SHORTCODE) \
+    fGEN_TCG_loadbXw4(GET_EA_ap, true)
+#define fGEN_TCG_L2_loadbsw4_pr(SHORTCODE) \
+    fGEN_TCG_loadbXw4(GET_EA_pr, true)
+#define fGEN_TCG_L2_loadbsw4_pbr(SHORTCODE) \
+    fGEN_TCG_loadbXw4(GET_EA_pbr, true)
+#define fGEN_TCG_L2_loadbsw4_pi(SHORTCODE) \
+    fGEN_TCG_loadbXw4(GET_EA_pi, true)
+
+/*
  * Predicated loads
  * Here is a primer to understand the tag names
  *
diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c
index 0d83fe3..34c9d96 100644
--- a/target/hexagon/genptr.c
+++ b/target/hexagon/genptr.c
@@ -306,6 +306,19 @@ static inline TCGv gen_get_half(TCGv result, int N, TCGv src, bool sign)
     return result;
 }
 
+static inline void gen_set_half(int N, TCGv result, TCGv src)
+{
+    tcg_gen_deposit_tl(result, result, src, N * 16, 16);
+}
+
+static inline void gen_set_half_i64(int N, TCGv_i64 result, TCGv src)
+{
+    TCGv_i64 src64 = tcg_temp_new_i64();
+    tcg_gen_extu_i32_i64(src64, src);
+    tcg_gen_deposit_i64(result, result, src64, N * 16, 16);
+    tcg_temp_free_i64(src64);
+}
+
 static inline void gen_set_byte(int N, TCGv result, TCGv src)
 {
     tcg_gen_deposit_tl(result, result, src, N * 8, 8);
diff --git a/target/hexagon/imported/encode_pp.def b/target/hexagon/imported/encode_pp.def
index 4464926..e3582eb 100644
--- a/target/hexagon/imported/encode_pp.def
+++ b/target/hexagon/imported/encode_pp.def
@@ -342,6 +342,12 @@ DEF_ENC32(L4_pload##TAG##fnew_abs,ICLASS_LD" 1 11 "OPC"  iiiii  PP111tti  1--ddd
 
 
 /*               0 000  misc: dealloc,loadw_locked,dcfetch      */
+STD_LD_ENC(bzw4,"0 101")
+STD_LD_ENC(bzw2,"0 011")
+
+STD_LD_ENC(bsw4,"0 111")
+STD_LD_ENC(bsw2,"0 001")
+
 STD_LD_ENC(rb,  "1 000")
 STD_LD_ENC(rub, "1 001")
 STD_LD_ENC(rh,  "1 010")
diff --git a/target/hexagon/imported/ldst.idef b/target/hexagon/imported/ldst.idef
index fe7e018..95c0470 100644
--- a/target/hexagon/imported/ldst.idef
+++ b/target/hexagon/imported/ldst.idef
@@ -38,6 +38,49 @@ STD_LD_AMODES(loadrh, "Rd32=memh", "Load signed Half integer",ATTRIBS(A_LOAD),"1
 STD_LD_AMODES(loadri, "Rd32=memw", "Load Word",ATTRIBS(A_LOAD),"2",fLOAD(1,4,u,EA,RdV),2)
 STD_LD_AMODES(loadrd, "Rdd32=memd","Load Double integer",ATTRIBS(A_LOAD),"3",fLOAD(1,8,u,EA,RddV),3)
 
+/* These instructions do a load an unpack */
+STD_LD_AMODES(loadbzw2, "Rd32=memubh", "Load Bytes and Vector Zero-Extend (unpack)",
+ATTRIBS(A_LOAD),"1",
+{fHIDE(size2u_t tmpV; int i;)
+ fLOAD(1,2,u,EA,tmpV);
+ for (i=0;i<2;i++) {
+  fSETHALF(i,RdV,fGETUBYTE(i,tmpV));
+ }
+},1)
+
+STD_LD_AMODES(loadbzw4, "Rdd32=memubh", "Load Bytes and Vector Zero-Extend (unpack)",
+ATTRIBS(A_LOAD),"2",
+{fHIDE(size4u_t tmpV; int i;)
+ fLOAD(1,4,u,EA,tmpV);
+ for (i=0;i<4;i++) {
+  fSETHALF(i,RddV,fGETUBYTE(i,tmpV));
+ }
+},2)
+
+
+
+/* These instructions do a load an unpack */
+STD_LD_AMODES(loadbsw2, "Rd32=membh", "Load Bytes and Vector Sign-Extend (unpack)",
+ATTRIBS(A_LOAD),"1",
+{fHIDE(size2u_t tmpV; int i;)
+ fLOAD(1,2,u,EA,tmpV);
+ for (i=0;i<2;i++) {
+  fSETHALF(i,RdV,fGETBYTE(i,tmpV));
+ }
+},1)
+
+STD_LD_AMODES(loadbsw4, "Rdd32=membh", "Load Bytes and Vector Sign-Extend (unpack)",
+ATTRIBS(A_LOAD),"2",
+{fHIDE(size4u_t tmpV; int i;)
+ fLOAD(1,4,u,EA,tmpV);
+ for (i=0;i<4;i++) {
+  fSETHALF(i,RddV,fGETBYTE(i,tmpV));
+ }
+},2)
+
+
+
+
 /* The set of addressing modes standard to all Store instructions */
 #define STD_ST_AMODES(TAG,DEST,OPER,DESCR,ATTRIB,SHFT,SEMANTICS,SCALE)\
 Q6INSN(S2_##TAG##_io,  OPER"(Rs32+#s11:"SHFT")="DEST,     ATTRIB,DESCR,{fIMMEXT(siV); fEA_RI(RsV,siV); SEMANTICS; })\
diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h
index 4d26abe..a71da8c 100644
--- a/target/hexagon/macros.h
+++ b/target/hexagon/macros.h
@@ -465,6 +465,21 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift)
 #define fCAST8S_16S(A) (int128_exts64(A))
 #define fCAST16S_8S(A) (int128_getlo(A))
 
+#ifdef QEMU_GENERATE
+#define fEA_RI(REG, IMM) tcg_gen_addi_tl(EA, REG, IMM)
+#define fEA_RRs(REG, REG2, SCALE) \
+    do { \
+        TCGv tmp = tcg_temp_new(); \
+        tcg_gen_shli_tl(tmp, REG2, SCALE); \
+        tcg_gen_add_tl(EA, REG, tmp); \
+        tcg_temp_free(tmp); \
+    } while (0)
+#define fEA_IRs(IMM, REG, SCALE) \
+    do { \
+        tcg_gen_shli_tl(EA, REG, SCALE); \
+        tcg_gen_addi_tl(EA, EA, IMM); \
+    } while (0)
+#else
 #define fEA_RI(REG, IMM) \
     do { \
         EA = REG + IMM; \
@@ -477,6 +492,7 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift)
     do { \
         EA = IMM + (REG << SCALE); \
     } while (0)
+#endif
 
 #ifdef QEMU_GENERATE
 #define fEA_IMM(IMM) tcg_gen_movi_tl(EA, IMM)
diff --git a/tests/tcg/hexagon/Makefile.target b/tests/tcg/hexagon/Makefile.target
index 98cd4ab..5411eee 100644
--- a/tests/tcg/hexagon/Makefile.target
+++ b/tests/tcg/hexagon/Makefile.target
@@ -43,6 +43,7 @@ HEX_TESTS += multi_result
 HEX_TESTS += mem_noshuf
 HEX_TESTS += circ
 HEX_TESTS += brev
+HEX_TESTS += load_unpack
 HEX_TESTS += atomics
 HEX_TESTS += fpstuff
 
diff --git a/tests/tcg/hexagon/load_unpack.c b/tests/tcg/hexagon/load_unpack.c
new file mode 100644
index 0000000..3575a37
--- /dev/null
+++ b/tests/tcg/hexagon/load_unpack.c
@@ -0,0 +1,474 @@
+/*
+ *  Copyright(c) 2019-2021 Qualcomm Innovation Center, Inc. All Rights Reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Test load unpack instructions
+ *
+ * Example
+ *     r0 = memubh(r1+#0)
+ * loads a half word from memory and zero-extends the 2 bytes to form a word
+ *
+ * For each addressing mode, there are 4 tests
+ *     bzw2          unsigned     2 elements
+ *     bsw2          signed       2 elements
+ *     bzw4          unsigned     4 elements
+ *     bsw4          signed       4 elements
+ * There are 8 addressing modes, for a total of 32 instructions to test
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+int err;
+
+char buf[16] __attribute__((aligned(1 << 16)));
+
+void init_buf(void)
+{
+    int i;
+    for (i = 0; i < 16; i++) {
+        int sign = i % 2 == 0 ? 0x80 : 0;
+        buf[i] = sign | (i + 1);
+    }
+}
+
+void __check(int line, long long result, long long expect)
+{
+    if (result != expect) {
+        printf("ERROR at line %d: 0x%08llx != 0x%08llx\n",
+               line, result, expect);
+        err++;
+    }
+}
+
+#define check(RES, EXP) __check(__LINE__, RES, EXP)
+
+void __checkp(int line, void *p, void *expect)
+{
+    if (p != expect) {
+        printf("ERROR at line %d: 0x%p != 0x%p\n", line, p, expect);
+        err++;
+    }
+}
+
+#define checkp(RES, EXP) __checkp(__LINE__, RES, EXP)
+
+/*
+ ****************************************************************************
+ * _io addressing mode (addr + offset)
+ */
+#define BxW_LOAD_io(SZ, RES, ADDR, OFF) \
+    __asm__( \
+        "%0 = mem" #SZ "(%1+#" #OFF ")\n\t" \
+        : "=r"(RES) \
+        : "r"(ADDR))
+#define BxW_LOAD_io_Z(RES, ADDR, OFF) \
+    BxW_LOAD_io(ubh, RES, ADDR, OFF)
+#define BxW_LOAD_io_S(RES, ADDR, OFF) \
+    BxW_LOAD_io(bh, RES, ADDR, OFF)
+
+#define TEST_io(NAME, TYPE, SIGN, SIZE, EXT, EXP1, EXP2, EXP3, EXP4) \
+void test_##NAME(void) \
+{ \
+    TYPE result; \
+    init_buf(); \
+    BxW_LOAD_io_##SIGN(result, buf, 0 * (SIZE)); \
+    check(result, (EXP1) | (EXT)); \
+    BxW_LOAD_io_##SIGN(result, buf, 1 * (SIZE)); \
+    check(result, (EXP2) | (EXT)); \
+    BxW_LOAD_io_##SIGN(result, buf, 2 * (SIZE)); \
+    check(result, (EXP3) | (EXT)); \
+    BxW_LOAD_io_##SIGN(result, buf, 3 * (SIZE)); \
+    check(result, (EXP4) | (EXT)); \
+}
+
+
+TEST_io(loadbzw2_io, int, Z, 2, 0x00000000,
+        0x00020081, 0x00040083, 0x00060085, 0x00080087)
+TEST_io(loadbsw2_io, int, S, 2, 0x0000ff00,
+        0x00020081, 0x00040083, 0x00060085, 0x00080087)
+TEST_io(loadbzw4_io, long long, Z,  4, 0x0000000000000000LL,
+        0x0004008300020081LL, 0x0008008700060085LL,
+        0x000c008b000a0089LL, 0x0010008f000e008dLL)
+TEST_io(loadbsw4_io, long long, S,  4, 0x0000ff000000ff00LL,
+        0x0004008300020081LL, 0x0008008700060085LL,
+        0x000c008b000a0089LL, 0x0010008f000e008dLL)
+
+/*
+ ****************************************************************************
+ * _ur addressing mode (index << offset + base)
+ */
+#define BxW_LOAD_ur(SZ, RES, SHIFT, IDX) \
+    __asm__( \
+        "%0 = mem" #SZ "(%1<<#" #SHIFT " + ##buf)\n\t" \
+        : "=r"(RES) \
+        : "r"(IDX))
+#define BxW_LOAD_ur_Z(RES, SHIFT, IDX) \
+    BxW_LOAD_ur(ubh, RES, SHIFT, IDX)
+#define BxW_LOAD_ur_S(RES, SHIFT, IDX) \
+    BxW_LOAD_ur(bh, RES, SHIFT, IDX)
+
+#define TEST_ur(NAME, TYPE, SIGN, SHIFT, EXT, RES1, RES2, RES3, RES4) \
+void test_##NAME(void) \
+{ \
+    TYPE result; \
+    init_buf(); \
+    BxW_LOAD_ur_##SIGN(result, (SHIFT), 0); \
+    check(result, (RES1) | (EXT)); \
+    BxW_LOAD_ur_##SIGN(result, (SHIFT), 1); \
+    check(result, (RES2) | (EXT)); \
+    BxW_LOAD_ur_##SIGN(result, (SHIFT), 2); \
+    check(result, (RES3) | (EXT)); \
+    BxW_LOAD_ur_##SIGN(result, (SHIFT), 3); \
+    check(result, (RES4) | (EXT)); \
+} \
+
+TEST_ur(loadbzw2_ur, int, Z, 1, 0x00000000,
+        0x00020081, 0x00040083, 0x00060085, 0x00080087)
+TEST_ur(loadbsw2_ur, int, S, 1, 0x0000ff00,
+        0x00020081, 0x00040083, 0x00060085, 0x00080087)
+TEST_ur(loadbzw4_ur, long long, Z, 2, 0x0000000000000000LL,
+        0x0004008300020081LL, 0x0008008700060085LL,
+        0x000c008b000a0089LL, 0x0010008f000e008dLL)
+TEST_ur(loadbsw4_ur, long long, S, 2, 0x0000ff000000ff00LL,
+        0x0004008300020081LL, 0x0008008700060085LL,
+        0x000c008b000a0089LL, 0x0010008f000e008dLL)
+
+/*
+ ****************************************************************************
+ * _ap addressing mode (addr = base)
+ */
+#define BxW_LOAD_ap(SZ, RES, PTR, ADDR) \
+    __asm__( \
+        "%0 = mem" #SZ "(%1 = ##" #ADDR ")\n\t" \
+        : "=r"(RES), "=r"(PTR))
+#define BxW_LOAD_ap_Z(RES, PTR, ADDR) \
+    BxW_LOAD_ap(ubh, RES, PTR, ADDR)
+#define BxW_LOAD_ap_S(RES, PTR, ADDR) \
+    BxW_LOAD_ap(bh, RES, PTR, ADDR)
+
+#define TEST_ap(NAME, TYPE, SIGN, SIZE, EXT, RES1, RES2, RES3, RES4) \
+void test_##NAME(void) \
+{ \
+    TYPE result; \
+    void *ptr; \
+    init_buf(); \
+    BxW_LOAD_ap_##SIGN(result, ptr, (buf + 0 * (SIZE))); \
+    check(result, (RES1) | (EXT)); \
+    checkp(ptr, &buf[0 * (SIZE)]); \
+    BxW_LOAD_ap_##SIGN(result, ptr, (buf + 1 * (SIZE))); \
+    check(result, (RES2) | (EXT)); \
+    checkp(ptr, &buf[1 * (SIZE)]); \
+    BxW_LOAD_ap_##SIGN(result, ptr, (buf + 2 * (SIZE))); \
+    check(result, (RES3) | (EXT)); \
+    checkp(ptr, &buf[2 * (SIZE)]); \
+    BxW_LOAD_ap_##SIGN(result, ptr, (buf + 3 * (SIZE))); \
+    check(result, (RES4) | (EXT)); \
+    checkp(ptr, &buf[3 * (SIZE)]); \
+}
+
+TEST_ap(loadbzw2_ap, int, Z, 2, 0x00000000,
+        0x00020081, 0x00040083, 0x00060085, 0x00080087)
+TEST_ap(loadbsw2_ap, int, S, 2, 0x0000ff00,
+        0x00020081, 0x00040083, 0x00060085, 0x00080087)
+TEST_ap(loadbzw4_ap, long long, Z, 4, 0x0000000000000000LL,
+        0x0004008300020081LL, 0x0008008700060085LL,
+        0x000c008b000a0089LL, 0x0010008f000e008dLL)
+TEST_ap(loadbsw4_ap, long long, S, 4, 0x0000ff000000ff00LL,
+        0x0004008300020081LL, 0x0008008700060085LL,
+        0x000c008b000a0089LL, 0x0010008f000e008dLL)
+
+/*
+ ****************************************************************************
+ * _rp addressing mode (addr ++ modifer-reg)
+ */
+#define BxW_LOAD_pr(SZ, RES, PTR, INC) \
+    __asm__( \
+        "m0 = %2\n\t" \
+        "%0 = mem" #SZ "(%1++m0)\n\t" \
+        : "=r"(RES), "+r"(PTR) \
+        : "r"(INC) \
+        : "m0")
+#define BxW_LOAD_pr_Z(RES, PTR, INC) \
+    BxW_LOAD_pr(ubh, RES, PTR, INC)
+#define BxW_LOAD_pr_S(RES, PTR, INC) \
+    BxW_LOAD_pr(bh, RES, PTR, INC)
+
+#define TEST_pr(NAME, TYPE, SIGN, SIZE, EXT, RES1, RES2, RES3, RES4) \
+void test_##NAME(void) \
+{ \
+    TYPE result; \
+    void *ptr = buf; \
+    init_buf(); \
+    BxW_LOAD_pr_##SIGN(result, ptr, (SIZE)); \
+    check(result, (RES1) | (EXT)); \
+    checkp(ptr, &buf[1 * (SIZE)]); \
+    BxW_LOAD_pr_##SIGN(result, ptr, (SIZE)); \
+    check(result, (RES2) | (EXT)); \
+    checkp(ptr, &buf[2 * (SIZE)]); \
+    BxW_LOAD_pr_##SIGN(result, ptr, (SIZE)); \
+    check(result, (RES3) | (EXT)); \
+    checkp(ptr, &buf[3 * (SIZE)]); \
+    BxW_LOAD_pr_##SIGN(result, ptr, (SIZE)); \
+    check(result, (RES4) | (EXT)); \
+    checkp(ptr, &buf[4 * (SIZE)]); \
+}
+
+TEST_pr(loadbzw2_pr, int, Z, 2, 0x00000000,
+    0x00020081, 0x0040083, 0x00060085, 0x00080087)
+TEST_pr(loadbsw2_pr, int, S, 2, 0x0000ff00,
+    0x00020081, 0x0040083, 0x00060085, 0x00080087)
+TEST_pr(loadbzw4_pr, long long, Z, 4, 0x0000000000000000LL,
+    0x0004008300020081LL, 0x0008008700060085LL,
+    0x000c008b000a0089LL, 0x0010008f000e008dLL)
+TEST_pr(loadbsw4_pr, long long, S, 4, 0x0000ff000000ff00LL,
+    0x0004008300020081LL, 0x0008008700060085LL,
+    0x000c008b000a0089LL, 0x0010008f000e008dLL)
+
+/*
+ ****************************************************************************
+ * _pbr addressing mode (addr ++ modifer-reg:brev)
+ */
+#define BxW_LOAD_pbr(SZ, RES, PTR) \
+    __asm__( \
+        "r4 = #(1 << (16 - 3))\n\t" \
+        "m0 = r4\n\t" \
+        "%0 = mem" #SZ "(%1++m0:brev)\n\t" \
+        : "=r"(RES), "+r"(PTR) \
+        : \
+        : "r4", "m0")
+#define BxW_LOAD_pbr_Z(RES, PTR) \
+    BxW_LOAD_pbr(ubh, RES, PTR)
+#define BxW_LOAD_pbr_S(RES, PTR) \
+    BxW_LOAD_pbr(bh, RES, PTR)
+
+#define TEST_pbr(NAME, TYPE, SIGN, EXT, RES1, RES2, RES3, RES4) \
+void test_##NAME(void) \
+{ \
+    TYPE result; \
+    void *ptr = buf; \
+    init_buf(); \
+    BxW_LOAD_pbr_##SIGN(result, ptr); \
+    check(result, (RES1) | (EXT)); \
+    BxW_LOAD_pbr_##SIGN(result, ptr); \
+    check(result, (RES2) | (EXT)); \
+    BxW_LOAD_pbr_##SIGN(result, ptr); \
+    check(result, (RES3) | (EXT)); \
+    BxW_LOAD_pbr_##SIGN(result, ptr); \
+    check(result, (RES4) | (EXT)); \
+}
+
+TEST_pbr(loadbzw2_pbr, int, Z, 0x00000000,
+    0x00020081, 0x00060085, 0x00040083, 0x00080087)
+TEST_pbr(loadbsw2_pbr, int, S, 0x0000ff00,
+    0x00020081, 0x00060085, 0x00040083, 0x00080087)
+TEST_pbr(loadbzw4_pbr, long long, Z, 0x0000000000000000LL,
+    0x0004008300020081LL, 0x0008008700060085LL,
+    0x0006008500040083LL, 0x000a008900080087LL)
+TEST_pbr(loadbsw4_pbr, long long, S, 0x0000ff000000ff00LL,
+    0x0004008300020081LL, 0x0008008700060085LL,
+    0x0006008500040083LL, 0x000a008900080087LL)
+
+/*
+ ****************************************************************************
+ * _pi addressing mode (addr ++ inc)
+ */
+#define BxW_LOAD_pi(SZ, RES, PTR, INC) \
+    __asm__( \
+        "%0 = mem" #SZ "(%1++#" #INC ")\n\t" \
+        : "=r"(RES), "+r"(PTR))
+#define BxW_LOAD_pi_Z(RES, PTR, INC) \
+    BxW_LOAD_pi(ubh, RES, PTR, INC)
+#define BxW_LOAD_pi_S(RES, PTR, INC) \
+    BxW_LOAD_pi(bh, RES, PTR, INC)
+
+#define TEST_pi(NAME, TYPE, SIGN, INC, EXT, RES1, RES2, RES3, RES4) \
+void test_##NAME(void) \
+{ \
+    TYPE result; \
+    void *ptr = buf; \
+    init_buf(); \
+    BxW_LOAD_pi_##SIGN(result, ptr, (INC)); \
+    check(result, (RES1) | (EXT)); \
+    checkp(ptr, &buf[1 * (INC)]); \
+    BxW_LOAD_pi_##SIGN(result, ptr, (INC)); \
+    check(result, (RES2) | (EXT)); \
+    checkp(ptr, &buf[2 * (INC)]); \
+    BxW_LOAD_pi_##SIGN(result, ptr, (INC)); \
+    check(result, (RES3) | (EXT)); \
+    checkp(ptr, &buf[3 * (INC)]); \
+    BxW_LOAD_pi_##SIGN(result, ptr, (INC)); \
+    check(result, (RES4) | (EXT)); \
+    checkp(ptr, &buf[4 * (INC)]); \
+}
+
+TEST_pi(loadbzw2_pi, int, Z, 2, 0x00000000,
+    0x00020081, 0x00040083, 0x00060085, 0x00080087)
+TEST_pi(loadbsw2_pi, int, S, 2, 0x0000ff00,
+    0x00020081, 0x00040083, 0x00060085, 0x00080087)
+TEST_pi(loadbzw4_pi, long long, Z, 4, 0x0000000000000000LL,
+    0x0004008300020081LL, 0x0008008700060085LL,
+    0x000c008b000a0089LL, 0x0010008f000e008dLL)
+TEST_pi(loadbsw4_pi, long long, S, 4, 0x0000ff000000ff00LL,
+    0x0004008300020081LL, 0x0008008700060085LL,
+    0x000c008b000a0089LL, 0x0010008f000e008dLL)
+
+/*
+ ****************************************************************************
+ * _pci addressing mode (addr ++ inc:circ)
+ */
+#define BxW_LOAD_pci(SZ, RES, PTR, START, LEN, INC) \
+    __asm__( \
+        "r4 = %3\n\t" \
+        "m0 = r4\n\t" \
+        "cs0 = %2\n\t" \
+        "%0 = mem" #SZ "(%1++#" #INC ":circ(m0))\n\t" \
+        : "=r"(RES), "+r"(PTR) \
+        : "r"(START), "r"(LEN) \
+        : "r4", "m0", "cs0")
+#define BxW_LOAD_pci_Z(RES, PTR, START, LEN, INC) \
+    BxW_LOAD_pci(ubh, RES, PTR, START, LEN, INC)
+#define BxW_LOAD_pci_S(RES, PTR, START, LEN, INC) \
+    BxW_LOAD_pci(bh, RES, PTR, START, LEN, INC)
+
+#define TEST_pci(NAME, TYPE, SIGN, LEN, INC, EXT, RES1, RES2, RES3, RES4) \
+void test_##NAME(void) \
+{ \
+    TYPE result; \
+    void *ptr = buf; \
+    init_buf(); \
+    BxW_LOAD_pci_##SIGN(result, ptr, buf, (LEN), (INC)); \
+    check(result, (RES1) | (EXT)); \
+    checkp(ptr, &buf[(1 * (INC)) % (LEN)]); \
+    BxW_LOAD_pci_##SIGN(result, ptr, buf, (LEN), (INC)); \
+    check(result, (RES2) | (EXT)); \
+    checkp(ptr, &buf[(2 * (INC)) % (LEN)]); \
+    BxW_LOAD_pci_##SIGN(result, ptr, buf, (LEN), (INC)); \
+    check(result, (RES3) | (EXT)); \
+    checkp(ptr, &buf[(3 * (INC)) % (LEN)]); \
+    BxW_LOAD_pci_##SIGN(result, ptr, buf, (LEN), (INC)); \
+    check(result, (RES4) | (EXT)); \
+    checkp(ptr, &buf[(4 * (INC)) % (LEN)]); \
+}
+
+TEST_pci(loadbzw2_pci, int, Z, 6, 2, 0x00000000,
+    0x00020081, 0x00040083, 0x00060085, 0x00020081)
+TEST_pci(loadbsw2_pci, int, S, 6, 2, 0x0000ff00,
+    0x00020081, 0x00040083, 0x00060085, 0x00020081)
+TEST_pci(loadbzw4_pci, long long, Z, 8, 4, 0x0000000000000000LL,
+    0x0004008300020081LL, 0x0008008700060085LL,
+    0x0004008300020081LL, 0x0008008700060085LL)
+TEST_pci(loadbsw4_pci, long long, S, 8, 4, 0x0000ff000000ff00LL,
+    0x0004008300020081LL, 0x0008008700060085LL,
+    0x0004008300020081LL, 0x0008008700060085LL)
+
+/*
+ ****************************************************************************
+ * _pcr addressing mode (addr ++ I:circ(modifier-reg))
+ */
+#define BxW_LOAD_pcr(SZ, RES, PTR, START, LEN, INC) \
+    __asm__( \
+        "r4 = %2\n\t" \
+        "m1 = r4\n\t" \
+        "cs1 = %3\n\t" \
+        "%0 = mem" #SZ "(%1++I:circ(m1))\n\t" \
+        : "=r"(RES), "+r"(PTR) \
+        : "r"((((INC) & 0x7f) << 17) | ((LEN) & 0x1ffff)), \
+          "r"(START) \
+        : "r4", "m1", "cs1")
+#define BxW_LOAD_pcr_Z(RES, PTR, START, LEN, INC) \
+    BxW_LOAD_pcr(ubh, RES, PTR, START, LEN, INC)
+#define BxW_LOAD_pcr_S(RES, PTR, START, LEN, INC) \
+    BxW_LOAD_pcr(bh, RES, PTR, START, LEN, INC)
+
+#define TEST_pcr(NAME, TYPE, SIGN, SIZE, LEN, INC, \
+                 EXT, RES1, RES2, RES3, RES4) \
+void test_##NAME(void) \
+{ \
+    TYPE result; \
+    void *ptr = buf; \
+    init_buf(); \
+    BxW_LOAD_pcr_##SIGN(result, ptr, buf, (LEN), (INC)); \
+    check(result, (RES1) | (EXT)); \
+    checkp(ptr, &buf[(1 * (INC) * (SIZE)) % (LEN)]); \
+    BxW_LOAD_pcr_##SIGN(result, ptr, buf, (LEN), (INC)); \
+    check(result, (RES2) | (EXT)); \
+    checkp(ptr, &buf[(2 * (INC) * (SIZE)) % (LEN)]); \
+    BxW_LOAD_pcr_##SIGN(result, ptr, buf, (LEN), (INC)); \
+    check(result, (RES3) | (EXT)); \
+    checkp(ptr, &buf[(3 * (INC) * (SIZE)) % (LEN)]); \
+    BxW_LOAD_pcr_##SIGN(result, ptr, buf, (LEN), (INC)); \
+    check(result, (RES4) | (EXT)); \
+    checkp(ptr, &buf[(4 * (INC) * (SIZE)) % (LEN)]); \
+}
+
+TEST_pcr(loadbzw2_pcr, int, Z, 2, 8, 2, 0x00000000,
+    0x00020081, 0x00060085, 0x00020081, 0x00060085)
+TEST_pcr(loadbsw2_pcr, int, S, 2, 8, 2, 0x0000ff00,
+    0x00020081, 0x00060085, 0x00020081, 0x00060085)
+TEST_pcr(loadbzw4_pcr, long long, Z, 4, 8, 1, 0x0000000000000000LL,
+    0x0004008300020081LL, 0x0008008700060085LL,
+    0x0004008300020081LL, 0x0008008700060085LL)
+TEST_pcr(loadbsw4_pcr, long long, S, 4, 8, 1, 0x0000ff000000ff00LL,
+    0x0004008300020081LL, 0x0008008700060085LL,
+    0x0004008300020081LL, 0x0008008700060085LL)
+
+int main()
+{
+    test_loadbzw2_io();
+    test_loadbsw2_io();
+    test_loadbzw4_io();
+    test_loadbsw4_io();
+
+    test_loadbzw2_ur();
+    test_loadbsw2_ur();
+    test_loadbzw4_ur();
+    test_loadbsw4_ur();
+
+    test_loadbzw2_ap();
+    test_loadbsw2_ap();
+    test_loadbzw4_ap();
+    test_loadbsw4_ap();
+
+    test_loadbzw2_pr();
+    test_loadbsw2_pr();
+    test_loadbzw4_pr();
+    test_loadbsw4_pr();
+
+    test_loadbzw2_pbr();
+    test_loadbsw2_pbr();
+    test_loadbzw4_pbr();
+    test_loadbsw4_pbr();
+
+    test_loadbzw2_pi();
+    test_loadbsw2_pi();
+    test_loadbzw4_pi();
+    test_loadbsw4_pi();
+
+    test_loadbzw2_pci();
+    test_loadbsw2_pci();
+    test_loadbzw4_pci();
+    test_loadbsw4_pci();
+
+    test_loadbzw2_pcr();
+    test_loadbsw2_pcr();
+    test_loadbzw4_pcr();
+    test_loadbsw4_pcr();
+
+    puts(err ? "FAIL" : "PASS");
+    return err ? 1 : 0;
+}
-- 
2.7.4



  parent reply	other threads:[~2021-04-01  4:00 UTC|newest]

Thread overview: 49+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-04-01  3:53 [PATCH v2 00/21] Hexagon (target/hexagon) update Taylor Simpson
2021-04-01  3:53 ` [PATCH v2 01/21] Hexagon (target/hexagon) TCG generation cleanup Taylor Simpson
2021-04-02 17:25   ` Richard Henderson
2021-04-02 17:46   ` Richard Henderson
2021-04-02 19:42     ` Taylor Simpson
2021-04-02 20:00       ` Richard Henderson
2021-04-01  3:53 ` [PATCH v2 02/21] Hexagon (target/hexagon) remove unnecessary inline directives Taylor Simpson
2021-04-02 17:26   ` Richard Henderson
2021-04-01  3:53 ` [PATCH v2 03/21] Hexagon (target/hexagon) use env_archcpu and env_cpu Taylor Simpson
2021-04-02 17:27   ` Richard Henderson
2021-04-01  3:53 ` [PATCH v2 04/21] Hexagon (target/hexagon) properly generate TB end for DISAS_NORETURN Taylor Simpson
2021-04-02 17:34   ` Richard Henderson
2021-04-01  3:53 ` [PATCH v2 05/21] Hexagon (target/hexagon) decide if pred has been written at TCG gen time Taylor Simpson
2021-04-02 17:44   ` Richard Henderson
2021-04-01  3:53 ` [PATCH v2 06/21] Hexagon (target/hexagon) change variables from int to bool when appropriate Taylor Simpson
2021-04-01  3:53 ` [PATCH v2 07/21] Hexagon (target/hexagon) remove unused carry_from_add64 function Taylor Simpson
2021-04-01  3:53 ` [PATCH v2 08/21] Hexagon (target/hexagon) change type of softfloat_roundingmodes Taylor Simpson
2021-04-01  3:53 ` [PATCH v2 09/21] Hexagon (target/hexagon) use softfloat default NaN and tininess Taylor Simpson
2021-04-02 17:48   ` Richard Henderson
2021-04-01  3:53 ` [PATCH v2 10/21] Hexagon (target/hexagon) replace float32_mul_pow2 with float32_scalbn Taylor Simpson
2021-04-01  3:53 ` [PATCH v2 11/21] Hexagon (target/hexagon) use softfloat for float-to-int conversions Taylor Simpson
2021-04-06 20:09   ` Richard Henderson
2021-04-01  3:53 ` [PATCH v2 12/21] Hexagon (target/hexagon) add F2_sfrecipa instruction Taylor Simpson
2021-04-06 20:30   ` Richard Henderson
2021-04-06 20:46   ` Richard Henderson
2021-04-06 21:55     ` Taylor Simpson
2021-04-06 22:13       ` Richard Henderson
2021-04-01  3:53 ` [PATCH v2 13/21] Hexagon (target/hexagon) add F2_sfinvsqrta Taylor Simpson
2021-04-06 20:47   ` Richard Henderson
2021-04-01  3:53 ` [PATCH v2 14/21] Hexagon (target/hexagon) add A5_ACS (vacsh) Taylor Simpson
2021-04-06 20:51   ` Richard Henderson
2021-04-06 21:31     ` Taylor Simpson
2021-04-01  3:53 ` [PATCH v2 15/21] Hexagon (target/hexagon) add A6_vminub_RdP Taylor Simpson
2021-04-06 20:57   ` Richard Henderson
2021-04-01  3:53 ` [PATCH v2 16/21] Hexagon (target/hexagon) add A4_addp_c/A4_subp_c Taylor Simpson
2021-04-06 21:11   ` Richard Henderson
2021-04-06 21:58     ` Taylor Simpson
2021-04-01  3:53 ` [PATCH v2 17/21] Hexagon (target/hexagon) circular addressing Taylor Simpson
2021-04-06 22:11   ` Richard Henderson
2021-04-07  3:11     ` Taylor Simpson
2021-04-07 16:27       ` Taylor Simpson
2021-04-01  3:53 ` [PATCH v2 18/21] Hexagon (target/hexagon) bit reverse (brev) addressing Taylor Simpson
2021-04-06 22:35   ` Richard Henderson
2021-04-01  3:53 ` Taylor Simpson [this message]
2021-04-06 22:46   ` [PATCH v2 19/21] Hexagon (target/hexagon) load and unpack bytes instructions Richard Henderson
2021-04-01  3:53 ` [PATCH v2 20/21] Hexagon (target/hexagon) load into shifted register instructions Taylor Simpson
2021-04-06 22:50   ` Richard Henderson
2021-04-01  3:53 ` [PATCH v2 21/21] Hexagon (target/hexagon) CABAC decode bin Taylor Simpson
2021-04-06 22:54   ` Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1617249213-22667-20-git-send-email-tsimpson@quicinc.com \
    --to=tsimpson@quicinc.com \
    --cc=ale@rev.ng \
    --cc=bcain@quicinc.com \
    --cc=philmd@redhat.com \
    --cc=qemu-devel@nongnu.org \
    --cc=richard.henderson@linaro.org \
    --subject='Re: [PATCH v2 19/21] Hexagon (target/hexagon) load and unpack bytes instructions' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).