All of lore.kernel.org
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH 0/3] TCG: first attempt to reduce host registers usage
@ 2011-05-17 16:28 Aurelien Jarno
  2011-05-17 16:28 ` [Qemu-devel] [PATCH 1/3] tcg: replace op_dead_iargs by op_dead_args Aurelien Jarno
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Aurelien Jarno @ 2011-05-17 16:28 UTC (permalink / raw)
  To: qemu-devel; +Cc: Aurelien Jarno

This patch series is a first attempt after the TCG_AREG0 discussion to
show it's possible to easily (at least without making huge changes to
the TCG core code) spare some registers and avoid a few spills on low
register count TCG targets like i386.

On i386 hosts some TB are reduced by up to 10%, though the global TB
reduction is around 0.5%. The gain is speed is only measurable in user
mode with very specific benchmarks: sha1sum of 4GB files is 1.9% +/-0.2%
faster. On x86-64 host, some TB are one or two bytes longer, some of
them one or two bytes smaller, mainly due to different register usage,
which adds or removes some REX prefixes, but overall the size is
unchanged.

I have a few more ideas to further reduce register spill, I'll try to
implement them in the next days/weeks.


Aurelien Jarno (3):
  tcg: replace op_dead_iargs by op_dead_args
  tcg: mark dead output argument in op_dead_args
  tcg: don't keep dead outputs in registers

 tcg/tcg.c |  105 +++++++++++++++++++++++++++++++++++--------------------------
 tcg/tcg.h |    4 +-
 2 files changed, 62 insertions(+), 47 deletions(-)

-- 
1.7.2.3

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [Qemu-devel] [PATCH 1/3] tcg: replace op_dead_iargs by op_dead_args
  2011-05-17 16:28 [Qemu-devel] [PATCH 0/3] TCG: first attempt to reduce host registers usage Aurelien Jarno
@ 2011-05-17 16:28 ` Aurelien Jarno
  2011-05-17 16:28 ` [Qemu-devel] [PATCH 2/3] tcg: mark dead output argument in op_dead_args Aurelien Jarno
  2011-05-17 16:28 ` [Qemu-devel] [PATCH 3/3] tcg: don't keep dead outputs in registers Aurelien Jarno
  2 siblings, 0 replies; 4+ messages in thread
From: Aurelien Jarno @ 2011-05-17 16:28 UTC (permalink / raw)
  To: qemu-devel; +Cc: Aurelien Jarno

Allow all args to be dead by replacing the input specific op_dead_iargs
variable by op_dead_args. Note this is a purely mechanical change.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/tcg.c |   71 +++++++++++++++++++++++++++++++------------------------------
 tcg/tcg.h |    4 +-
 2 files changed, 38 insertions(+), 37 deletions(-)

diff --git a/tcg/tcg.c b/tcg/tcg.c
index 8748c05..9a48cb9 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1193,7 +1193,7 @@ static inline void tcg_la_bb_end(TCGContext *s, uint8_t *dead_temps)
     }
 }
 
-/* Liveness analysis : update the opc_dead_iargs array to tell if a
+/* Liveness analysis : update the opc_dead_args array to tell if a
    given input arguments is dead. Instructions updating dead
    temporaries are removed. */
 static void tcg_liveness_analysis(TCGContext *s)
@@ -1203,13 +1203,13 @@ static void tcg_liveness_analysis(TCGContext *s)
     TCGArg *args;
     const TCGOpDef *def;
     uint8_t *dead_temps;
-    unsigned int dead_iargs;
+    unsigned int dead_args;
     
     gen_opc_ptr++; /* skip end */
 
     nb_ops = gen_opc_ptr - gen_opc_buf;
 
-    s->op_dead_iargs = tcg_malloc(nb_ops * sizeof(uint16_t));
+    s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
     
     dead_temps = tcg_malloc(s->nb_temps);
     memset(dead_temps, 1, s->nb_temps);
@@ -1256,17 +1256,17 @@ static void tcg_liveness_analysis(TCGContext *s)
                     }
 
                     /* input args are live */
-                    dead_iargs = 0;
-                    for(i = 0; i < nb_iargs; i++) {
-                        arg = args[i + nb_oargs];
+                    dead_args = 0;
+                    for(i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
+                        arg = args[i];
                         if (arg != TCG_CALL_DUMMY_ARG) {
                             if (dead_temps[arg]) {
-                                dead_iargs |= (1 << i);
+                                dead_args |= (1 << i);
                             }
                             dead_temps[arg] = 0;
                         }
                     }
-                    s->op_dead_iargs[op_index] = dead_iargs;
+                    s->op_dead_args[op_index] = dead_args;
                 }
                 args--;
             }
@@ -1327,15 +1327,15 @@ static void tcg_liveness_analysis(TCGContext *s)
                 }
 
                 /* input args are live */
-                dead_iargs = 0;
-                for(i = 0; i < nb_iargs; i++) {
-                    arg = args[i + nb_oargs];
+                dead_args = 0;
+                for(i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
+                    arg = args[i];
                     if (dead_temps[arg]) {
-                        dead_iargs |= (1 << i);
+                        dead_args |= (1 << i);
                     }
                     dead_temps[arg] = 0;
                 }
-                s->op_dead_iargs[op_index] = dead_iargs;
+                s->op_dead_args[op_index] = dead_args;
             }
             break;
         }
@@ -1352,8 +1352,8 @@ static void tcg_liveness_analysis(TCGContext *s)
     int nb_ops;
     nb_ops = gen_opc_ptr - gen_opc_buf;
 
-    s->op_dead_iargs = tcg_malloc(nb_ops * sizeof(uint16_t));
-    memset(s->op_dead_iargs, 0, nb_ops * sizeof(uint16_t));
+    s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
+    memset(s->op_dead_args, 0, nb_ops * sizeof(uint16_t));
 }
 #endif
 
@@ -1557,7 +1557,7 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
     save_globals(s, allocated_regs);
 }
 
-#define IS_DEAD_IARG(n) ((dead_iargs >> (n)) & 1)
+#define IS_DEAD_ARG(n) ((dead_args >> (n)) & 1)
 
 static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args)
 {
@@ -1582,7 +1582,7 @@ static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args)
 
 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
                               const TCGArg *args,
-                              unsigned int dead_iargs)
+                              unsigned int dead_args)
 {
     TCGTemp *ts, *ots;
     int reg;
@@ -1592,9 +1592,9 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
     ts = &s->temps[args[1]];
     arg_ct = &def->args_ct[0];
 
-    /* XXX: always mark arg dead if IS_DEAD_IARG(0) */
+    /* XXX: always mark arg dead if IS_DEAD_ARG(1) */
     if (ts->val_type == TEMP_VAL_REG) {
-        if (IS_DEAD_IARG(0) && !ts->fixed_reg && !ots->fixed_reg) {
+        if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) {
             /* the mov can be suppressed */
             if (ots->val_type == TEMP_VAL_REG)
                 s->reg_to_temp[ots->reg] = -1;
@@ -1642,7 +1642,7 @@ static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
 static void tcg_reg_alloc_op(TCGContext *s, 
                              const TCGOpDef *def, TCGOpcode opc,
                              const TCGArg *args,
-                             unsigned int dead_iargs)
+                             unsigned int dead_args)
 {
     TCGRegSet allocated_regs;
     int i, k, nb_iargs, nb_oargs, reg;
@@ -1701,8 +1701,9 @@ static void tcg_reg_alloc_op(TCGContext *s,
                 /* if the input is aliased to an output and if it is
                    not dead after the instruction, we must allocate
                    a new register and move it */
-                if (!IS_DEAD_IARG(i - nb_oargs)) 
+                if (!IS_DEAD_ARG(i)) {
                     goto allocate_in_reg;
+                }
             }
         }
         reg = ts->reg;
@@ -1725,9 +1726,9 @@ static void tcg_reg_alloc_op(TCGContext *s,
         tcg_reg_alloc_bb_end(s, allocated_regs);
     } else {
         /* mark dead temporaries and free the associated registers */
-        for(i = 0; i < nb_iargs; i++) {
-            arg = args[nb_oargs + i];
-            if (IS_DEAD_IARG(i)) {
+        for(i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
+            arg = args[i];
+            if (IS_DEAD_ARG(i)) {
                 ts = &s->temps[arg];
                 if (!ts->fixed_reg) {
                     if (ts->val_type == TEMP_VAL_REG)
@@ -1808,7 +1809,7 @@ static void tcg_reg_alloc_op(TCGContext *s,
 
 static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
                               TCGOpcode opc, const TCGArg *args,
-                              unsigned int dead_iargs)
+                              unsigned int dead_args)
 {
     int nb_iargs, nb_oargs, flags, nb_regs, i, reg, nb_params;
     TCGArg arg, func_arg;
@@ -1930,9 +1931,9 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
         
     
     /* mark dead temporaries and free the associated registers */
-    for(i = 0; i < nb_iargs; i++) {
-        arg = args[nb_oargs + i];
-        if (IS_DEAD_IARG(i)) {
+    for(i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
+        arg = args[i];
+        if (IS_DEAD_ARG(i)) {
             ts = &s->temps[arg];
             if (!ts->fixed_reg) {
                 if (ts->val_type == TEMP_VAL_REG)
@@ -2007,7 +2008,7 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf,
     TCGOpcode opc;
     int op_index;
     const TCGOpDef *def;
-    unsigned int dead_iargs;
+    unsigned int dead_args;
     const TCGArg *args;
 
 #ifdef DEBUG_DISAS
@@ -2058,8 +2059,8 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf,
 #if TCG_TARGET_REG_BITS == 64
         case INDEX_op_mov_i64:
 #endif
-            dead_iargs = s->op_dead_iargs[op_index];
-            tcg_reg_alloc_mov(s, def, args, dead_iargs);
+            dead_args = s->op_dead_args[op_index];
+            tcg_reg_alloc_mov(s, def, args, dead_args);
             break;
         case INDEX_op_movi_i32:
 #if TCG_TARGET_REG_BITS == 64
@@ -2095,8 +2096,8 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf,
             tcg_out_label(s, args[0], (long)s->code_ptr);
             break;
         case INDEX_op_call:
-            dead_iargs = s->op_dead_iargs[op_index];
-            args += tcg_reg_alloc_call(s, def, opc, args, dead_iargs);
+            dead_args = s->op_dead_args[op_index];
+            args += tcg_reg_alloc_call(s, def, opc, args, dead_args);
             goto next;
         case INDEX_op_end:
             goto the_end;
@@ -2104,8 +2105,8 @@ static inline int tcg_gen_code_common(TCGContext *s, uint8_t *gen_code_buf,
             /* Note: in order to speed up the code, it would be much
                faster to have specialized register allocator functions for
                some common argument patterns */
-            dead_iargs = s->op_dead_iargs[op_index];
-            tcg_reg_alloc_op(s, def, opc, args, dead_iargs);
+            dead_args = s->op_dead_args[op_index];
+            tcg_reg_alloc_op(s, def, opc, args, dead_args);
             break;
         }
         args += def->nb_args;
diff --git a/tcg/tcg.h b/tcg/tcg.h
index cecef63..2b985ac 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -286,8 +286,8 @@ struct TCGContext {
     uint16_t *tb_jmp_offset; /* != NULL if USE_DIRECT_JUMP */
 
     /* liveness analysis */
-    uint16_t *op_dead_iargs; /* for each operation, each bit tells if the
-                                corresponding input argument is dead */
+    uint16_t *op_dead_args; /* for each operation, each bit tells if the
+                               corresponding argument is dead */
     
     /* tells in which temporary a given register is. It does not take
        into account fixed registers */
-- 
1.7.2.3

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [Qemu-devel] [PATCH 2/3] tcg: mark dead output argument in op_dead_args
  2011-05-17 16:28 [Qemu-devel] [PATCH 0/3] TCG: first attempt to reduce host registers usage Aurelien Jarno
  2011-05-17 16:28 ` [Qemu-devel] [PATCH 1/3] tcg: replace op_dead_iargs by op_dead_args Aurelien Jarno
@ 2011-05-17 16:28 ` Aurelien Jarno
  2011-05-17 16:28 ` [Qemu-devel] [PATCH 3/3] tcg: don't keep dead outputs in registers Aurelien Jarno
  2 siblings, 0 replies; 4+ messages in thread
From: Aurelien Jarno @ 2011-05-17 16:28 UTC (permalink / raw)
  To: qemu-devel; +Cc: Aurelien Jarno

If an op is not removed and has dead output arguments, mark it
in op_dead_args similarly to what is done for input arguments.

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/tcg.c |   10 ++++++++--
 1 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/tcg/tcg.c b/tcg/tcg.c
index 9a48cb9..82d3e1d 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1245,8 +1245,12 @@ static void tcg_liveness_analysis(TCGContext *s)
                 do_not_remove_call:
 
                     /* output args are dead */
+                    dead_args = 0;
                     for(i = 0; i < nb_oargs; i++) {
                         arg = args[i];
+                        if (dead_temps[arg]) {
+                            dead_args |= (1 << i);
+                        }
                         dead_temps[arg] = 1;
                     }
                     
@@ -1256,7 +1260,6 @@ static void tcg_liveness_analysis(TCGContext *s)
                     }
 
                     /* input args are live */
-                    dead_args = 0;
                     for(i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
                         arg = args[i];
                         if (arg != TCG_CALL_DUMMY_ARG) {
@@ -1313,8 +1316,12 @@ static void tcg_liveness_analysis(TCGContext *s)
             do_not_remove:
 
                 /* output args are dead */
+                dead_args = 0;
                 for(i = 0; i < nb_oargs; i++) {
                     arg = args[i];
+                    if (dead_temps[arg]) {
+                        dead_args |= (1 << i);
+                    }
                     dead_temps[arg] = 1;
                 }
 
@@ -1327,7 +1334,6 @@ static void tcg_liveness_analysis(TCGContext *s)
                 }
 
                 /* input args are live */
-                dead_args = 0;
                 for(i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
                     arg = args[i];
                     if (dead_temps[arg]) {
-- 
1.7.2.3

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [Qemu-devel] [PATCH 3/3] tcg: don't keep dead outputs in registers
  2011-05-17 16:28 [Qemu-devel] [PATCH 0/3] TCG: first attempt to reduce host registers usage Aurelien Jarno
  2011-05-17 16:28 ` [Qemu-devel] [PATCH 1/3] tcg: replace op_dead_iargs by op_dead_args Aurelien Jarno
  2011-05-17 16:28 ` [Qemu-devel] [PATCH 2/3] tcg: mark dead output argument in op_dead_args Aurelien Jarno
@ 2011-05-17 16:28 ` Aurelien Jarno
  2 siblings, 0 replies; 4+ messages in thread
From: Aurelien Jarno @ 2011-05-17 16:28 UTC (permalink / raw)
  To: qemu-devel; +Cc: Aurelien Jarno

If an op with dead outputs is not removed, because it has side effects
or has multiple output arguments and only one dead, mark the TCG 
registers as dead instead of keeping them allocated. This avoid a few 
register spills on TCG targets with low register count, especially with
div2 and mul2 ops, or when a qemu_ld* result is not used (prefetch 
emulation for example).

Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
---
 tcg/tcg.c |   28 ++++++++++++++++++----------
 1 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/tcg/tcg.c b/tcg/tcg.c
index 82d3e1d..fad92f9 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1782,12 +1782,16 @@ static void tcg_reg_alloc_op(TCGContext *s,
             if (!ts->fixed_reg) {
                 if (ts->val_type == TEMP_VAL_REG)
                     s->reg_to_temp[ts->reg] = -1;
-                ts->val_type = TEMP_VAL_REG;
-                ts->reg = reg;
-                /* temp value is modified, so the value kept in memory is
-                   potentially not the same */
-                ts->mem_coherent = 0; 
-                s->reg_to_temp[reg] = arg;
+                if (IS_DEAD_ARG(i)) {
+                    ts->val_type = TEMP_VAL_DEAD;
+                } else {
+                    ts->val_type = TEMP_VAL_REG;
+                    ts->reg = reg;
+                    /* temp value is modified, so the value kept in memory is
+                       potentially not the same */
+                    ts->mem_coherent = 0;
+                    s->reg_to_temp[reg] = arg;
+               }
             }
         oarg_end:
             new_args[i] = reg;
@@ -1981,10 +1985,14 @@ static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
         } else {
             if (ts->val_type == TEMP_VAL_REG)
                 s->reg_to_temp[ts->reg] = -1;
-            ts->val_type = TEMP_VAL_REG;
-            ts->reg = reg;
-            ts->mem_coherent = 0; 
-            s->reg_to_temp[reg] = arg;
+            if (IS_DEAD_ARG(i)) {
+                ts->val_type = TEMP_VAL_DEAD;
+            } else {
+                ts->val_type = TEMP_VAL_REG;
+                ts->reg = reg;
+                ts->mem_coherent = 0;
+                s->reg_to_temp[reg] = arg;
+            }
         }
     }
     
-- 
1.7.2.3

^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2011-05-17 16:28 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-05-17 16:28 [Qemu-devel] [PATCH 0/3] TCG: first attempt to reduce host registers usage Aurelien Jarno
2011-05-17 16:28 ` [Qemu-devel] [PATCH 1/3] tcg: replace op_dead_iargs by op_dead_args Aurelien Jarno
2011-05-17 16:28 ` [Qemu-devel] [PATCH 2/3] tcg: mark dead output argument in op_dead_args Aurelien Jarno
2011-05-17 16:28 ` [Qemu-devel] [PATCH 3/3] tcg: don't keep dead outputs in registers Aurelien Jarno

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.