render / rpms / qemu

Forked from rpms/qemu 11 months ago
Clone

Blame 0070-tcg-optimize-rework-copy-progagation.patch

5544c1
From bf408071104de13f79a0c3c8cac892f440462e7c Mon Sep 17 00:00:00 2001
5544c1
From: Aurelien Jarno <aurelien@aurel32.net>
5544c1
Date: Tue, 11 Sep 2012 12:31:21 +0200
5544c1
Subject: [PATCH] tcg/optimize: rework copy progagation
5544c1
5544c1
The copy propagation pass tries to keep track what is a copy of what
5544c1
and what has copy of what, and in addition it keep a circular list of
5544c1
of all the copies. Unfortunately this doesn't fully work: a mov from
5544c1
a temp which has a state "COPY" changed it into a state "HAS_COPY".
5544c1
Later when this temp is used again, it is considered has not having
5544c1
copy and thus no propagation is done.
5544c1
5544c1
This patch fixes that by removing the hiearchy between copies, and thus
5544c1
only keeping a "COPY" state both meaning "is a copy" and "has a copy".
5544c1
The decision of which copy to use is deferred to the actual temp
5544c1
replacement. At this stage there is not one best choice to do, but only
5544c1
better choices than others. For doing the best choice the operation
5544c1
would have to be parsed in reversed to know if a temp is going to be
5544c1
used later or not. That what is done by the liveness analysis. At this
5544c1
stage it is known that globals will be always live, that local temps
5544c1
will be dead at the end of the translation block, and that the temps
5544c1
will be dead at the end of the basic block. This means that this stage
5544c1
should try to replace temps by local temps or globals and local temps
5544c1
by globals.
5544c1
5544c1
Reviewed-by: Richard Henderson <rth@twiddle.net>
5544c1
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
5544c1
Signed-off-by: Michael Roth <mdroth@linux.vnet.ibm.com>
5544c1
---
5544c1
 tcg/optimize.c | 167 +++++++++++++++++++++++++++++++--------------------------
5544c1
 1 file changed, 92 insertions(+), 75 deletions(-)
5544c1
5544c1
diff --git a/tcg/optimize.c b/tcg/optimize.c
5544c1
index da8dffe..1904b39 100644
5544c1
--- a/tcg/optimize.c
5544c1
+++ b/tcg/optimize.c
5544c1
@@ -39,7 +39,6 @@ typedef enum {
5544c1
     TCG_TEMP_UNDEF = 0,
5544c1
     TCG_TEMP_CONST,
5544c1
     TCG_TEMP_COPY,
5544c1
-    TCG_TEMP_HAS_COPY
5544c1
 } tcg_temp_state;
5544c1
 
5544c1
 struct tcg_temp_info {
5544c1
@@ -51,39 +50,19 @@ struct tcg_temp_info {
5544c1
 
5544c1
 static struct tcg_temp_info temps[TCG_MAX_TEMPS];
5544c1
 
5544c1
-/* Reset TEMP's state to TCG_TEMP_UNDEF.  If TEMP was a representative of some
5544c1
-   class of equivalent temp's, a new representative should be chosen in this
5544c1
-   class. */
5544c1
-static void reset_temp(TCGArg temp, int nb_temps, int nb_globals)
5544c1
+/* Reset TEMP's state to TCG_TEMP_UNDEF.  If TEMP only had one copy, remove
5544c1
+   the copy flag from the left temp.  */
5544c1
+static void reset_temp(TCGArg temp)
5544c1
 {
5544c1
-    int i;
5544c1
-    TCGArg new_base = (TCGArg)-1;
5544c1
-    if (temps[temp].state == TCG_TEMP_HAS_COPY) {
5544c1
-        for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) {
5544c1
-            if (i >= nb_globals) {
5544c1
-                temps[i].state = TCG_TEMP_HAS_COPY;
5544c1
-                new_base = i;
5544c1
-                break;
5544c1
-            }
5544c1
-        }
5544c1
-        for (i = temps[temp].next_copy; i != temp; i = temps[i].next_copy) {
5544c1
-            if (new_base == (TCGArg)-1) {
5544c1
-                temps[i].state = TCG_TEMP_UNDEF;
5544c1
-            } else {
5544c1
-                temps[i].val = new_base;
5544c1
-            }
5544c1
+    if (temps[temp].state == TCG_TEMP_COPY) {
5544c1
+        if (temps[temp].prev_copy == temps[temp].next_copy) {
5544c1
+            temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF;
5544c1
+        } else {
5544c1
+            temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
5544c1
+            temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
5544c1
         }
5544c1
-        temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
5544c1
-        temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
5544c1
-    } else if (temps[temp].state == TCG_TEMP_COPY) {
5544c1
-        temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
5544c1
-        temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
5544c1
-        new_base = temps[temp].val;
5544c1
     }
5544c1
     temps[temp].state = TCG_TEMP_UNDEF;
5544c1
-    if (new_base != (TCGArg)-1 && temps[new_base].next_copy == new_base) {
5544c1
-        temps[new_base].state = TCG_TEMP_UNDEF;
5544c1
-    }
5544c1
 }
5544c1
 
5544c1
 static int op_bits(TCGOpcode op)
5544c1
@@ -106,34 +85,83 @@ static TCGOpcode op_to_movi(TCGOpcode op)
5544c1
     }
5544c1
 }
5544c1
 
5544c1
+static TCGArg find_better_copy(TCGContext *s, TCGArg temp)
5544c1
+{
5544c1
+    TCGArg i;
5544c1
+
5544c1
+    /* If this is already a global, we can't do better. */
5544c1
+    if (temp < s->nb_globals) {
5544c1
+        return temp;
5544c1
+    }
5544c1
+
5544c1
+    /* Search for a global first. */
5544c1
+    for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
5544c1
+        if (i < s->nb_globals) {
5544c1
+            return i;
5544c1
+        }
5544c1
+    }
5544c1
+
5544c1
+    /* If it is a temp, search for a temp local. */
5544c1
+    if (!s->temps[temp].temp_local) {
5544c1
+        for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
5544c1
+            if (s->temps[i].temp_local) {
5544c1
+                return i;
5544c1
+            }
5544c1
+        }
5544c1
+    }
5544c1
+
5544c1
+    /* Failure to find a better representation, return the same temp. */
5544c1
+    return temp;
5544c1
+}
5544c1
+
5544c1
+static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
5544c1
+{
5544c1
+    TCGArg i;
5544c1
+
5544c1
+    if (arg1 == arg2) {
5544c1
+        return true;
5544c1
+    }
5544c1
+
5544c1
+    if (temps[arg1].state != TCG_TEMP_COPY
5544c1
+        || temps[arg2].state != TCG_TEMP_COPY) {
5544c1
+        return false;
5544c1
+    }
5544c1
+
5544c1
+    for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) {
5544c1
+        if (i == arg2) {
5544c1
+            return true;
5544c1
+        }
5544c1
+    }
5544c1
+
5544c1
+    return false;
5544c1
+}
5544c1
+
5544c1
 static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args,
5544c1
                             TCGArg dst, TCGArg src)
5544c1
 {
5544c1
-        reset_temp(dst, s->nb_temps, s->nb_globals);
5544c1
-        assert(temps[src].state != TCG_TEMP_COPY);
5544c1
-        /* Only consider temps with the same type (width) as copies. */
5544c1
-        if (src >= s->nb_globals && s->temps[dst].type == s->temps[src].type) {
5544c1
-            assert(temps[src].state != TCG_TEMP_CONST);
5544c1
-            if (temps[src].state != TCG_TEMP_HAS_COPY) {
5544c1
-                temps[src].state = TCG_TEMP_HAS_COPY;
5544c1
+        reset_temp(dst);
5544c1
+        assert(temps[src].state != TCG_TEMP_CONST);
5544c1
+
5544c1
+        if (s->temps[src].type == s->temps[dst].type) {
5544c1
+            if (temps[src].state != TCG_TEMP_COPY) {
5544c1
+                temps[src].state = TCG_TEMP_COPY;
5544c1
                 temps[src].next_copy = src;
5544c1
                 temps[src].prev_copy = src;
5544c1
             }
5544c1
             temps[dst].state = TCG_TEMP_COPY;
5544c1
-            temps[dst].val = src;
5544c1
             temps[dst].next_copy = temps[src].next_copy;
5544c1
             temps[dst].prev_copy = src;
5544c1
             temps[temps[dst].next_copy].prev_copy = dst;
5544c1
             temps[src].next_copy = dst;
5544c1
         }
5544c1
+
5544c1
         gen_args[0] = dst;
5544c1
         gen_args[1] = src;
5544c1
 }
5544c1
 
5544c1
-static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val,
5544c1
-                             int nb_temps, int nb_globals)
5544c1
+static void tcg_opt_gen_movi(TCGArg *gen_args, TCGArg dst, TCGArg val)
5544c1
 {
5544c1
-        reset_temp(dst, nb_temps, nb_globals);
5544c1
+        reset_temp(dst);
5544c1
         temps[dst].state = TCG_TEMP_CONST;
5544c1
         temps[dst].val = val;
5544c1
         gen_args[0] = dst;
5544c1
@@ -324,7 +352,6 @@ static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
5544c1
     tcg_abort();
5544c1
 }
5544c1
 
5544c1
-
5544c1
 /* Propagate constants and copies, fold constant expressions. */
5544c1
 static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
5544c1
                                     TCGArg *args, TCGOpDef *tcg_op_defs)
5544c1
@@ -338,10 +365,8 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
5544c1
 
5544c1
     /* Array VALS has an element for each temp.
5544c1
        If this temp holds a constant then its value is kept in VALS' element.
5544c1
-       If this temp is a copy of other ones then this equivalence class'
5544c1
-       representative is kept in VALS' element.
5544c1
-       If this temp is neither copy nor constant then corresponding VALS'
5544c1
-       element is unused. */
5544c1
+       If this temp is a copy of other ones then the other copies are
5544c1
+       available through the doubly linked circular list. */
5544c1
 
5544c1
     nb_temps = s->nb_temps;
5544c1
     nb_globals = s->nb_globals;
5544c1
@@ -357,7 +382,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
5544c1
             assert(op != INDEX_op_call);
5544c1
             for (i = def->nb_oargs; i < def->nb_oargs + def->nb_iargs; i++) {
5544c1
                 if (temps[args[i]].state == TCG_TEMP_COPY) {
5544c1
-                    args[i] = temps[args[i]].val;
5544c1
+                    args[i] = find_better_copy(s, args[i]);
5544c1
                 }
5544c1
             }
5544c1
         }
5544c1
@@ -429,7 +454,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
5544c1
             if (temps[args[1]].state == TCG_TEMP_CONST
5544c1
                 && temps[args[1]].val == 0) {
5544c1
                 gen_opc_buf[op_index] = op_to_movi(op);
5544c1
-                tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals);
5544c1
+                tcg_opt_gen_movi(gen_args, args[0], 0);
5544c1
                 args += 3;
5544c1
                 gen_args += 2;
5544c1
                 continue;
5544c1
@@ -456,9 +481,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
5544c1
             }
5544c1
             if (temps[args[2]].state == TCG_TEMP_CONST
5544c1
                 && temps[args[2]].val == 0) {
5544c1
-                if ((temps[args[0]].state == TCG_TEMP_COPY
5544c1
-                    && temps[args[0]].val == args[1])
5544c1
-                    || args[0] == args[1]) {
5544c1
+                if (temps_are_copies(args[0], args[1])) {
5544c1
                     gen_opc_buf[op_index] = INDEX_op_nop;
5544c1
                 } else {
5544c1
                     gen_opc_buf[op_index] = op_to_mov(op);
5544c1
@@ -480,7 +503,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
5544c1
             if ((temps[args[2]].state == TCG_TEMP_CONST
5544c1
                 && temps[args[2]].val == 0)) {
5544c1
                 gen_opc_buf[op_index] = op_to_movi(op);
5544c1
-                tcg_opt_gen_movi(gen_args, args[0], 0, nb_temps, nb_globals);
5544c1
+                tcg_opt_gen_movi(gen_args, args[0], 0);
5544c1
                 args += 3;
5544c1
                 gen_args += 2;
5544c1
                 continue;
5544c1
@@ -495,7 +518,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
5544c1
         CASE_OP_32_64(or):
5544c1
         CASE_OP_32_64(and):
5544c1
             if (args[1] == args[2]) {
5544c1
-                if (args[1] == args[0]) {
5544c1
+                if (temps_are_copies(args[0], args[1])) {
5544c1
                     gen_opc_buf[op_index] = INDEX_op_nop;
5544c1
                 } else {
5544c1
                     gen_opc_buf[op_index] = op_to_mov(op);
5544c1
@@ -515,9 +538,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
5544c1
            allocator where needed and possible.  Also detect copies. */
5544c1
         switch (op) {
5544c1
         CASE_OP_32_64(mov):
5544c1
-            if ((temps[args[1]].state == TCG_TEMP_COPY
5544c1
-                && temps[args[1]].val == args[0])
5544c1
-                || args[0] == args[1]) {
5544c1
+            if (temps_are_copies(args[0], args[1])) {
5544c1
                 args += 2;
5544c1
                 gen_opc_buf[op_index] = INDEX_op_nop;
5544c1
                 break;
5544c1
@@ -535,7 +556,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
5544c1
             args[1] = temps[args[1]].val;
5544c1
             /* fallthrough */
5544c1
         CASE_OP_32_64(movi):
5544c1
-            tcg_opt_gen_movi(gen_args, args[0], args[1], nb_temps, nb_globals);
5544c1
+            tcg_opt_gen_movi(gen_args, args[0], args[1]);
5544c1
             gen_args += 2;
5544c1
             args += 2;
5544c1
             break;
5544c1
@@ -550,9 +571,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
5544c1
             if (temps[args[1]].state == TCG_TEMP_CONST) {
5544c1
                 gen_opc_buf[op_index] = op_to_movi(op);
5544c1
                 tmp = do_constant_folding(op, temps[args[1]].val, 0);
5544c1
-                tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
5544c1
+                tcg_opt_gen_movi(gen_args, args[0], tmp);
5544c1
             } else {
5544c1
-                reset_temp(args[0], nb_temps, nb_globals);
5544c1
+                reset_temp(args[0]);
5544c1
                 gen_args[0] = args[0];
5544c1
                 gen_args[1] = args[1];
5544c1
             }
5544c1
@@ -580,10 +601,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
5544c1
                 gen_opc_buf[op_index] = op_to_movi(op);
5544c1
                 tmp = do_constant_folding(op, temps[args[1]].val,
5544c1
                                           temps[args[2]].val);
5544c1
-                tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
5544c1
+                tcg_opt_gen_movi(gen_args, args[0], tmp);
5544c1
                 gen_args += 2;
5544c1
             } else {
5544c1
-                reset_temp(args[0], nb_temps, nb_globals);
5544c1
+                reset_temp(args[0]);
5544c1
                 gen_args[0] = args[0];
5544c1
                 gen_args[1] = args[1];
5544c1
                 gen_args[2] = args[2];
5544c1
@@ -597,10 +618,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
5544c1
                 gen_opc_buf[op_index] = op_to_movi(op);
5544c1
                 tmp = do_constant_folding_cond(op, temps[args[1]].val,
5544c1
                                                temps[args[2]].val, args[3]);
5544c1
-                tcg_opt_gen_movi(gen_args, args[0], tmp, nb_temps, nb_globals);
5544c1
+                tcg_opt_gen_movi(gen_args, args[0], tmp);
5544c1
                 gen_args += 2;
5544c1
             } else {
5544c1
-                reset_temp(args[0], nb_temps, nb_globals);
5544c1
+                reset_temp(args[0]);
5544c1
                 gen_args[0] = args[0];
5544c1
                 gen_args[1] = args[1];
5544c1
                 gen_args[2] = args[2];
5544c1
@@ -623,7 +644,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
5544c1
                 }
5544c1
             } else {
5544c1
                 memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
5544c1
-                reset_temp(args[0], nb_temps, nb_globals);
5544c1
+                reset_temp(args[0]);
5544c1
                 gen_args[0] = args[0];
5544c1
                 gen_args[1] = args[1];
5544c1
                 gen_args[2] = args[2];
5544c1
@@ -637,23 +658,19 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
5544c1
                 && temps[args[2]].state == TCG_TEMP_CONST) {
5544c1
                 tmp = do_constant_folding_cond(op, temps[args[1]].val,
5544c1
                                                temps[args[2]].val, args[5]);
5544c1
-                if (args[0] == args[4-tmp]
5544c1
-                    || (temps[args[4-tmp]].state == TCG_TEMP_COPY
5544c1
-                        && temps[args[4-tmp]].val == args[0])) {
5544c1
+                if (temps_are_copies(args[0], args[4-tmp])) {
5544c1
                     gen_opc_buf[op_index] = INDEX_op_nop;
5544c1
                 } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) {
5544c1
                     gen_opc_buf[op_index] = op_to_movi(op);
5544c1
-                    tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val,
5544c1
-                                     nb_temps, nb_globals);
5544c1
+                    tcg_opt_gen_movi(gen_args, args[0], temps[args[4-tmp]].val);
5544c1
                     gen_args += 2;
5544c1
                 } else {
5544c1
                     gen_opc_buf[op_index] = op_to_mov(op);
5544c1
-                    tcg_opt_gen_mov(gen_args, args[0], args[4-tmp],
5544c1
-                                    nb_temps, nb_globals);
5544c1
+                    tcg_opt_gen_mov(s, gen_args, args[0], args[4-tmp]);
5544c1
                     gen_args += 2;
5544c1
                 }
5544c1
             } else {
5544c1
-                reset_temp(args[0], nb_temps, nb_globals);
5544c1
+                reset_temp(args[0]);
5544c1
                 gen_args[0] = args[0];
5544c1
                 gen_args[1] = args[1];
5544c1
                 gen_args[2] = args[2];
5544c1
@@ -668,11 +685,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
5544c1
             nb_call_args = (args[0] >> 16) + (args[0] & 0xffff);
5544c1
             if (!(args[nb_call_args + 1] & (TCG_CALL_CONST | TCG_CALL_PURE))) {
5544c1
                 for (i = 0; i < nb_globals; i++) {
5544c1
-                    reset_temp(i, nb_temps, nb_globals);
5544c1
+                    reset_temp(i);
5544c1
                 }
5544c1
             }
5544c1
             for (i = 0; i < (args[0] >> 16); i++) {
5544c1
-                reset_temp(args[i + 1], nb_temps, nb_globals);
5544c1
+                reset_temp(args[i + 1]);
5544c1
             }
5544c1
             i = nb_call_args + 3;
5544c1
             while (i) {
5544c1
@@ -691,7 +708,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
5544c1
                 memset(temps, 0, nb_temps * sizeof(struct tcg_temp_info));
5544c1
             } else {
5544c1
                 for (i = 0; i < def->nb_oargs; i++) {
5544c1
-                    reset_temp(args[i], nb_temps, nb_globals);
5544c1
+                    reset_temp(args[i]);
5544c1
                 }
5544c1
             }
5544c1
             for (i = 0; i < def->nb_args; i++) {
5544c1
-- 
5544c1
1.7.12.1
5544c1