Blame SOURCES/valgrind-3.14.0-ppc-frontend-new-IROps.patch

560544
commit 81d9832226d6e3d1ee78ee3133189d7b520e7eea
560544
Author: Julian Seward <jseward@acm.org>
560544
Date:   Tue Nov 20 11:36:53 2018 +0100
560544
560544
    ppc front end: use new IROps added in 42719898.
560544
    
560544
    This pertains to bug 386945.
560544
    
560544
    VEX/priv/guest_ppc_toIR.c:
560544
    
560544
    gen_POPCOUNT: use Iop_PopCount{32,64} where possible.
560544
    
560544
    gen_vpopcntd_mode32: use Iop_PopCount32.
560544
    
560544
    for cntlz{w,d}, use Iop_CtzNat{32,64}.
560544
    
560544
    gen_byterev32: use Iop_Reverse8sIn32_x1 instead of lengthy sequence.
560544
    
560544
    verbose_Clz32: remove (was unused anyway).
560544
560544
diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
560544
index cb1cae1..8977d4f 100644
560544
--- a/VEX/priv/guest_ppc_toIR.c
560544
+++ b/VEX/priv/guest_ppc_toIR.c
560544
@@ -1595,7 +1595,8 @@ typedef enum {
560544
 /* Generate an IR sequence to do a popcount operation on the supplied
560544
    IRTemp, and return a new IRTemp holding the result.  'ty' may be
560544
    Ity_I32 or Ity_I64 only. */
560544
-static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_type )
560544
+static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src,
560544
+                             _popcount_data_type data_type )
560544
 {
560544
   /* Do count across 2^data_type bits,
560544
      byte:        data_type = 3
560544
@@ -1611,6 +1612,22 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_typ
560544
 
560544
    vassert(ty == Ity_I64 || ty == Ity_I32);
560544
 
560544
+   // Use a single IROp in cases where we can.
560544
+
560544
+   if (ty == Ity_I64 && data_type == DWORD) {
560544
+      IRTemp res = newTemp(Ity_I64);
560544
+      assign(res, unop(Iop_PopCount64, mkexpr(src)));
560544
+      return res;
560544
+   }
560544
+
560544
+   if (ty == Ity_I32 && data_type == WORD) {
560544
+      IRTemp res = newTemp(Ity_I32);
560544
+      assign(res, unop(Iop_PopCount32, mkexpr(src)));
560544
+      return res;
560544
+   }
560544
+
560544
+   // For the rest, we have to do it the slow way.
560544
+
560544
    if (ty == Ity_I32) {
560544
 
560544
       for (idx = 0; idx < WORD; idx++) {
560544
@@ -1638,7 +1655,7 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_typ
560544
       return nyu;
560544
    }
560544
 
560544
-// else, ty == Ity_I64
560544
+   // else, ty == Ity_I64
560544
    vassert(mode64);
560544
 
560544
    for (i = 0; i < DWORD; i++) {
560544
@@ -1670,52 +1687,15 @@ static IRTemp gen_POPCOUNT ( IRType ty, IRTemp src, _popcount_data_type data_typ
560544
  */
560544
 static IRTemp gen_vpopcntd_mode32 ( IRTemp src1, IRTemp src2 )
560544
 {
560544
-   Int i, shift[6];
560544
-   IRTemp mask[6];
560544
-   IRTemp old = IRTemp_INVALID;
560544
-   IRTemp nyu1 = IRTemp_INVALID;
560544
-   IRTemp nyu2 = IRTemp_INVALID;
560544
    IRTemp retval = newTemp(Ity_I64);
560544
 
560544
    vassert(!mode64);
560544
 
560544
-   for (i = 0; i < WORD; i++) {
560544
-      mask[i]  = newTemp(Ity_I32);
560544
-      shift[i] = 1 << i;
560544
-   }
560544
-   assign(mask[0], mkU32(0x55555555));
560544
-   assign(mask[1], mkU32(0x33333333));
560544
-   assign(mask[2], mkU32(0x0F0F0F0F));
560544
-   assign(mask[3], mkU32(0x00FF00FF));
560544
-   assign(mask[4], mkU32(0x0000FFFF));
560544
-   old = src1;
560544
-   for (i = 0; i < WORD; i++) {
560544
-      nyu1 = newTemp(Ity_I32);
560544
-      assign(nyu1,
560544
-             binop(Iop_Add32,
560544
-                   binop(Iop_And32,
560544
-                         mkexpr(old),
560544
-                         mkexpr(mask[i])),
560544
-                   binop(Iop_And32,
560544
-                         binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])),
560544
-                         mkexpr(mask[i]))));
560544
-      old = nyu1;
560544
-   }
560544
-
560544
-   old = src2;
560544
-   for (i = 0; i < WORD; i++) {
560544
-      nyu2 = newTemp(Ity_I32);
560544
-      assign(nyu2,
560544
-             binop(Iop_Add32,
560544
-                   binop(Iop_And32,
560544
-                         mkexpr(old),
560544
-                         mkexpr(mask[i])),
560544
-                   binop(Iop_And32,
560544
-                         binop(Iop_Shr32, mkexpr(old), mkU8(shift[i])),
560544
-                         mkexpr(mask[i]))));
560544
-      old = nyu2;
560544
-   }
560544
-   assign(retval, unop(Iop_32Uto64, binop(Iop_Add32, mkexpr(nyu1), mkexpr(nyu2))));
560544
+   assign(retval,
560544
+          unop(Iop_32Uto64,
560544
+               binop(Iop_Add32,
560544
+                     unop(Iop_PopCount32, mkexpr(src1)),
560544
+                     unop(Iop_PopCount32, mkexpr(src2)))));
560544
    return retval;
560544
 }
560544
 
560544
@@ -5715,7 +5695,7 @@ static Bool dis_modulo_int ( UInt theInstr )
560544
                 rA_address, rS_address);
560544
 
560544
             assign( rS, getIReg( rS_address ) );
560544
-            assign( result, unop( Iop_Ctz32,
560544
+            assign( result, unop( Iop_CtzNat32,
560544
                                   unop( Iop_64to32, mkexpr( rS ) ) ) );
560544
             assign( rA, binop( Iop_32HLto64, mkU32( 0 ), mkexpr( result ) ) );
560544
 
560544
@@ -5746,7 +5726,7 @@ static Bool dis_modulo_int ( UInt theInstr )
560544
                 rA_address, rS_address);
560544
 
560544
             assign( rS, getIReg( rS_address ) );
560544
-            assign( rA, unop( Iop_Ctz64, mkexpr( rS ) ) );
560544
+            assign( rA, unop( Iop_CtzNat64, mkexpr( rS ) ) );
560544
 
560544
             if ( flag_rC == 1 )
560544
                set_CR0( mkexpr( rA ) );
560544
@@ -6307,7 +6287,6 @@ static Bool dis_int_logic ( UInt theInstr )
560544
    IRTemp rS     = newTemp(ty);
560544
    IRTemp rA     = newTemp(ty);
560544
    IRTemp rB     = newTemp(ty);
560544
-   IRExpr* irx;
560544
    Bool do_rc    = False;
560544
 
560544
    assign( rS, getIReg(rS_addr) );
560544
@@ -6404,26 +6383,16 @@ static Bool dis_int_logic ( UInt theInstr )
560544
          break;
560544
          
560544
       case 0x01A: { // cntlzw (Count Leading Zeros Word, PPC32 p371)
560544
-         IRExpr* lo32;
560544
          if (rB_addr!=0) {
560544
             vex_printf("dis_int_logic(ppc)(cntlzw,rB_addr)\n");
560544
             return False;
560544
          }
560544
-         DIP("cntlzw%s r%u,r%u\n",
560544
-             flag_rC ? ".":"", rA_addr, rS_addr);
560544
+         DIP("cntlzw%s r%u,r%u\n", flag_rC ? ".":"", rA_addr, rS_addr);
560544
          
560544
          // mode64: count in low word only
560544
-         lo32 = mode64 ? unop(Iop_64to32, mkexpr(rS)) : mkexpr(rS);
560544
-         
560544
-         // Iop_Clz32 undefined for arg==0, so deal with that case:
560544
-         irx =  binop(Iop_CmpNE32, lo32, mkU32(0));
560544
-         assign(rA, mkWidenFrom32(ty,
560544
-                         IRExpr_ITE( irx,
560544
-                                     unop(Iop_Clz32, lo32),
560544
-                                     mkU32(32)),
560544
-                         False));
560544
-
560544
-         // TODO: alternatively: assign(rA, verbose_Clz32(rS));
560544
+         IRExpr* lo32 = mode64 ? unop(Iop_64to32, mkexpr(rS)) : mkexpr(rS);
560544
+         IRExpr* res32 = unop(Iop_ClzNat32, lo32);
560544
+         assign(rA, mode64 ? unop(Iop_32Uto64, res32) : res32);
560544
          break;
560544
       }
560544
          
560544
@@ -6521,14 +6490,8 @@ static Bool dis_int_logic ( UInt theInstr )
560544
             vex_printf("dis_int_logic(ppc)(cntlzd,rB_addr)\n");
560544
             return False;
560544
          }
560544
-         DIP("cntlzd%s r%u,r%u\n",
560544
-             flag_rC ? ".":"", rA_addr, rS_addr);
560544
-         // Iop_Clz64 undefined for arg==0, so deal with that case:
560544
-         irx =  binop(Iop_CmpNE64, mkexpr(rS), mkU64(0));
560544
-         assign(rA, IRExpr_ITE( irx,
560544
-                                unop(Iop_Clz64, mkexpr(rS)),
560544
-                                mkU64(64) ));
560544
-         // TODO: alternatively: assign(rA, verbose_Clz64(rS));
560544
+         DIP("cntlzd%s r%u,r%u\n", flag_rC ? ".":"", rA_addr, rS_addr);
560544
+         assign(rA, unop(Iop_ClzNat64, mkexpr(rS)));
560544
          break;
560544
 
560544
       case 0x1FC: // cmpb (Power6: compare bytes)
560544
@@ -6574,8 +6537,9 @@ static Bool dis_int_logic ( UInt theInstr )
560544
          putFReg( rS_addr, mkexpr(frA));
560544
          return True;
560544
       }
560544
-      case 0x1FA: // popcntd (population count doubleword
560544
+      case 0x1FA: // popcntd (population count doubleword)
560544
       {
560544
+          vassert(mode64);
560544
     	  DIP("popcntd r%u,r%u\n", rA_addr, rS_addr);
560544
     	  IRTemp result = gen_POPCOUNT(ty, rS, DWORD);
560544
     	  putIReg( rA_addr, mkexpr(result) );
560544
@@ -9154,18 +9118,7 @@ static Bool dis_int_shift ( UInt theInstr )
560544
 static IRExpr* /* :: Ity_I32 */ gen_byterev32 ( IRTemp t )
560544
 {
560544
    vassert(typeOfIRTemp(irsb->tyenv, t) == Ity_I32);
560544
-   return
560544
-      binop(Iop_Or32,
560544
-         binop(Iop_Shl32, mkexpr(t), mkU8(24)),
560544
-      binop(Iop_Or32,
560544
-         binop(Iop_And32, binop(Iop_Shl32, mkexpr(t), mkU8(8)), 
560544
-                          mkU32(0x00FF0000)),
560544
-      binop(Iop_Or32,
560544
-         binop(Iop_And32, binop(Iop_Shr32, mkexpr(t), mkU8(8)),
560544
-                          mkU32(0x0000FF00)),
560544
-         binop(Iop_And32, binop(Iop_Shr32, mkexpr(t), mkU8(24)),
560544
-                          mkU32(0x000000FF) )
560544
-      )));
560544
+   return unop(Iop_Reverse8sIn32_x1, mkexpr(t));
560544
 }
560544
 
560544
 /* Generates code to swap the byte order in the lower half of an Ity_I32,
560544
@@ -9225,6 +9178,10 @@ static Bool dis_int_ldst_rev ( UInt theInstr )
560544
 
560544
       case 0x214: // ldbrx (Load Doubleword Byte-Reverse Indexed)
560544
       {
560544
+         // JRS FIXME:
560544
+         // * is the host_endness conditional below actually necessary?
560544
+         // * can we just do a 64-bit load followed by by Iop_Reverse8sIn64_x1?
560544
+         //   That would be a lot more efficient.
560544
          IRExpr * nextAddr;
560544
          IRTemp w3 = newTemp( Ity_I32 );
560544
          IRTemp w4 = newTemp( Ity_I32 );
560544
@@ -17056,8 +17013,8 @@ dis_av_count_bitTranspose ( UInt theInstr, UInt opc2 )
560544
       case 0x7C3:    // vpopcntd
560544
       {
560544
          if (mode64) {
560544
-            /* Break vector into 64-bit double words and do the population count
560544
-             * on each double word.
560544
+            /* Break vector into 64-bit double words and do the population
560544
+               count on each double word.
560544
              */
560544
             IRType ty = Ity_I64;
560544
             IRTemp bits0_63   = newTemp(Ity_I64);
560544
@@ -17077,15 +17034,16 @@ dis_av_count_bitTranspose ( UInt theInstr, UInt opc2 )
560544
                                       mkexpr( cnt_bits0_63 ) ) );
560544
          } else {
560544
             /* Break vector into 32-bit words and do the population count
560544
-             * on each doubleword.
560544
+               on each 32-bit word.
560544
              */
560544
             IRTemp bits0_31, bits32_63, bits64_95, bits96_127;
560544
             bits0_31 = bits32_63 = bits64_95 = bits96_127 = IRTemp_INVALID;
560544
-            IRTemp cnt_bits0_63   = newTemp(Ity_I64);
560544
+            IRTemp cnt_bits0_63    = newTemp(Ity_I64);
560544
             IRTemp cnt_bits64_127  = newTemp(Ity_I64);
560544
 
560544
             DIP("vpopcntd v%d,v%d\n", vRT_addr, vRB_addr);
560544
-            breakV128to4x32(mkexpr( vB), &bits96_127, &bits64_95, &bits32_63, &bits0_31 );
560544
+            breakV128to4x32(mkexpr( vB), &bits96_127, &bits64_95,
560544
+                                         &bits32_63, &bits0_31 );
560544
 
560544
             cnt_bits0_63   = gen_vpopcntd_mode32(bits0_31, bits32_63);
560544
             cnt_bits64_127 = gen_vpopcntd_mode32(bits64_95, bits96_127);
560544
@@ -29103,10 +29061,12 @@ DisResult disInstr_PPC_WRK (
560544
 
560544
       /* Miscellaneous ISA 2.06 instructions */
560544
       case 0x1FA: // popcntd
560544
+         if (!mode64) goto decode_failure;
560544
+         /* else fallthru */
560544
       case 0x17A: // popcntw
560544
       case 0x7A:  // popcntb
560544
-	  if (dis_int_logic( theInstr )) goto decode_success;
560544
-    	  goto decode_failure;
560544
+         if (dis_int_logic( theInstr )) goto decode_success;
560544
+         goto decode_failure;
560544
 
560544
       case 0x0FC: // bpermd
560544
          if (!mode64) goto decode_failure;
560544
@@ -29669,94 +29629,6 @@ DisResult disInstr_PPC ( IRSB*        irsb_IN,
560544
    return dres;
560544
 }
560544
 
560544
-
560544
-/*------------------------------------------------------------*/
560544
-/*--- Unused stuff                                         ---*/
560544
-/*------------------------------------------------------------*/
560544
-
560544
-///* A potentially more memcheck-friendly implementation of Clz32, with
560544
-//   the boundary case Clz32(0) = 32, which is what ppc requires. */
560544
-//
560544
-//static IRExpr* /* :: Ity_I32 */ verbose_Clz32 ( IRTemp arg )
560544
-//{
560544
-//   /* Welcome ... to SSA R Us. */
560544
-//   IRTemp n1  = newTemp(Ity_I32);
560544
-//   IRTemp n2  = newTemp(Ity_I32);
560544
-//   IRTemp n3  = newTemp(Ity_I32);
560544
-//   IRTemp n4  = newTemp(Ity_I32);
560544
-//   IRTemp n5  = newTemp(Ity_I32);
560544
-//   IRTemp n6  = newTemp(Ity_I32);
560544
-//   IRTemp n7  = newTemp(Ity_I32);
560544
-//   IRTemp n8  = newTemp(Ity_I32);
560544
-//   IRTemp n9  = newTemp(Ity_I32);
560544
-//   IRTemp n10 = newTemp(Ity_I32);
560544
-//   IRTemp n11 = newTemp(Ity_I32);
560544
-//   IRTemp n12 = newTemp(Ity_I32);
560544
-//
560544
-//   /* First, propagate the most significant 1-bit into all lower
560544
-//      positions in the word. */
560544
-//   /* unsigned int clz ( unsigned int n )
560544
-//      {
560544
-//         n |= (n >> 1);
560544
-//         n |= (n >> 2);
560544
-//         n |= (n >> 4);
560544
-//         n |= (n >> 8);
560544
-//         n |= (n >> 16);
560544
-//         return bitcount(~n);
560544
-//      }
560544
-//   */
560544
-//   assign(n1, mkexpr(arg));
560544
-//   assign(n2, binop(Iop_Or32, mkexpr(n1), binop(Iop_Shr32, mkexpr(n1), mkU8(1))));
560544
-//   assign(n3, binop(Iop_Or32, mkexpr(n2), binop(Iop_Shr32, mkexpr(n2), mkU8(2))));
560544
-//   assign(n4, binop(Iop_Or32, mkexpr(n3), binop(Iop_Shr32, mkexpr(n3), mkU8(4))));
560544
-//   assign(n5, binop(Iop_Or32, mkexpr(n4), binop(Iop_Shr32, mkexpr(n4), mkU8(8))));
560544
-//   assign(n6, binop(Iop_Or32, mkexpr(n5), binop(Iop_Shr32, mkexpr(n5), mkU8(16))));
560544
-//   /* This gives a word of the form 0---01---1.  Now invert it, giving
560544
-//      a word of the form 1---10---0, then do a population-count idiom
560544
-//      (to count the 1s, which is the number of leading zeroes, or 32
560544
-//      if the original word was 0. */
560544
-//   assign(n7, unop(Iop_Not32, mkexpr(n6)));
560544
-//
560544
-//   /* unsigned int bitcount ( unsigned int n )
560544
-//      {
560544
-//         n = n - ((n >> 1) & 0x55555555);
560544
-//         n = (n & 0x33333333) + ((n >> 2) & 0x33333333);
560544
-//         n = (n + (n >> 4)) & 0x0F0F0F0F;
560544
-//         n = n + (n >> 8);
560544
-//         n = (n + (n >> 16)) & 0x3F;
560544
-//         return n;
560544
-//      }
560544
-//   */
560544
-//   assign(n8, 
560544
-//          binop(Iop_Sub32, 
560544
-//                mkexpr(n7),  
560544
-//                binop(Iop_And32, 
560544
-//                      binop(Iop_Shr32, mkexpr(n7), mkU8(1)),
560544
-//                      mkU32(0x55555555))));
560544
-//   assign(n9,
560544
-//          binop(Iop_Add32,
560544
-//                binop(Iop_And32, mkexpr(n8), mkU32(0x33333333)),
560544
-//                binop(Iop_And32,
560544
-//                      binop(Iop_Shr32, mkexpr(n8), mkU8(2)),
560544
-//                      mkU32(0x33333333))));
560544
-//   assign(n10,
560544
-//          binop(Iop_And32,
560544
-//                binop(Iop_Add32, 
560544
-//                      mkexpr(n9), 
560544
-//                      binop(Iop_Shr32, mkexpr(n9), mkU8(4))),
560544
-//                mkU32(0x0F0F0F0F)));
560544
-//   assign(n11,
560544
-//          binop(Iop_Add32,
560544
-//                mkexpr(n10),
560544
-//                binop(Iop_Shr32, mkexpr(n10), mkU8(8))));
560544
-//   assign(n12,
560544
-//          binop(Iop_Add32,
560544
-//                mkexpr(n11),
560544
-//                binop(Iop_Shr32, mkexpr(n11), mkU8(16))));
560544
-//   return
560544
-//      binop(Iop_And32, mkexpr(n12), mkU32(0x3F));
560544
-//}
560544
-
560544
 /*--------------------------------------------------------------------*/
560544
 /*--- end                                         guest_ppc_toIR.c ---*/
560544
 /*--------------------------------------------------------------------*/