Blame SOURCES/valgrind-3.14.0-s390x-vec-float-point-code.patch

560544
commit 600a0099a1eb2335a3f9563534c112e11817002b
560544
Author: Vadim Barkov <vbrkov@gmail.com>
560544
Date:   Fri Oct 5 13:51:49 2018 +0300
560544
560544
    Bug 385411 s390x: Add z13 vector floating point support
560544
    
560544
    This adds support for the z/Architecture vector FP instructions that were
560544
    introduced with z13.
560544
    
560544
    The patch was contributed by Vadim Barkov, with some clean-up and minor
560544
    adjustments by Andreas Arnez.
560544
560544
diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h
560544
index 3bfecbe..d72cc9f 100644
560544
--- a/VEX/priv/guest_s390_defs.h
560544
+++ b/VEX/priv/guest_s390_defs.h
560544
@@ -281,7 +281,11 @@ enum {
560544
    S390_VEC_OP_VMALH = 13,
560544
    S390_VEC_OP_VCH = 14,
560544
    S390_VEC_OP_VCHL = 15,
560544
-   S390_VEC_OP_LAST = 16 // supposed to be the last element in enum
560544
+   S390_VEC_OP_VFCE = 16,
560544
+   S390_VEC_OP_VFCH = 17,
560544
+   S390_VEC_OP_VFCHE = 18,
560544
+   S390_VEC_OP_VFTCI = 19,
560544
+   S390_VEC_OP_LAST = 20 // supposed to be the last element in enum
560544
 } s390x_vec_op_t;
560544
 
560544
 /* Arguments of s390x_dirtyhelper_vec_op(...) which are packed into one
560544
@@ -300,8 +304,10 @@ typedef union {
560544
 
560544
       unsigned int m4 : 4;        // field m4 of insn or zero if it's missing
560544
       unsigned int m5 : 4;        // field m5 of insn or zero if it's missing
560544
+      unsigned int m6 : 4;        // field m6 of insn or zero if it's missing
560544
+      unsigned int i3 : 12;       // field i3 of insn or zero if it's missing
560544
       unsigned int read_only: 1;  // don't write result to Guest State
560544
-      unsigned int reserved : 27; // reserved for future
560544
+      unsigned int reserved : 11; // reserved for future
560544
    };
560544
    ULong serialized;
560544
 } s390x_vec_op_details_t;
560544
diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c
560544
index d9773e7..5877743 100644
560544
--- a/VEX/priv/guest_s390_helpers.c
560544
+++ b/VEX/priv/guest_s390_helpers.c
560544
@@ -2498,6 +2498,10 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
560544
       {0xe7, 0xa9}, /* VMALH */
560544
       {0xe7, 0xfb}, /* VCH */
560544
       {0xe7, 0xf9}, /* VCHL */
560544
+      {0xe7, 0xe8}, /* VFCE */
560544
+      {0xe7, 0xeb}, /* VFCH */
560544
+      {0xe7, 0xea}, /* VFCHE */
560544
+      {0xe7, 0x4a}  /* VFTCI */
560544
    };
560544
 
560544
    union {
560544
@@ -2525,6 +2529,28 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
560544
         unsigned int rxb : 4;
560544
         unsigned int op2 : 8;
560544
       } VRRd;
560544
+      struct {
560544
+         UInt op1 : 8;
560544
+         UInt v1  : 4;
560544
+         UInt v2  : 4;
560544
+         UInt v3  : 4;
560544
+         UInt     : 4;
560544
+         UInt m6  : 4;
560544
+         UInt m5  : 4;
560544
+         UInt m4  : 4;
560544
+         UInt rxb : 4;
560544
+         UInt op2 : 8;
560544
+      } VRRc;
560544
+      struct {
560544
+         UInt op1 : 8;
560544
+         UInt v1  : 4;
560544
+         UInt v2  : 4;
560544
+         UInt i3  : 12;
560544
+         UInt m5  : 4;
560544
+         UInt m4  : 4;
560544
+         UInt rxb : 4;
560544
+         UInt op2 : 8;
560544
+      } VRIe;
560544
       UChar bytes[6];
560544
    } the_insn;
560544
 
560544
@@ -2578,6 +2604,27 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
560544
       the_insn.VRRd.m6 = d->m5;
560544
       break;
560544
 
560544
+   case S390_VEC_OP_VFCE:
560544
+   case S390_VEC_OP_VFCH:
560544
+   case S390_VEC_OP_VFCHE:
560544
+      the_insn.VRRc.v1 = 1;
560544
+      the_insn.VRRc.v2 = 2;
560544
+      the_insn.VRRc.v3 = 3;
560544
+      the_insn.VRRc.rxb = 0b1110;
560544
+      the_insn.VRRc.m4 = d->m4;
560544
+      the_insn.VRRc.m5 = d->m5;
560544
+      the_insn.VRRc.m6 = d->m6;
560544
+      break;
560544
+
560544
+   case S390_VEC_OP_VFTCI:
560544
+      the_insn.VRIe.v1 = 1;
560544
+      the_insn.VRIe.v2 = 2;
560544
+      the_insn.VRIe.rxb = 0b1100;
560544
+      the_insn.VRIe.i3 = d->i3;
560544
+      the_insn.VRIe.m4 = d->m4;
560544
+      the_insn.VRIe.m5 = d->m5;
560544
+      break;
560544
+
560544
    default:
560544
       vex_printf("operation = %d\n", d->op);
560544
       vpanic("s390x_dirtyhelper_vec_op: unknown operation");
560544
diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
560544
index 50a5a41..1c4ac39 100644
560544
--- a/VEX/priv/guest_s390_toIR.c
560544
+++ b/VEX/priv/guest_s390_toIR.c
560544
@@ -86,6 +86,7 @@ typedef enum {
560544
    S390_DECODE_UNKNOWN_INSN,
560544
    S390_DECODE_UNIMPLEMENTED_INSN,
560544
    S390_DECODE_UNKNOWN_SPECIAL_INSN,
560544
+   S390_DECODE_SPECIFICATION_EXCEPTION,
560544
    S390_DECODE_ERROR
560544
 } s390_decode_t;
560544
 
560544
@@ -421,6 +422,26 @@ yield_if(IRExpr *condition)
560544
                     S390X_GUEST_OFFSET(guest_IA)));
560544
 }
560544
 
560544
+/* Convenience macro to yield a specification exception if the given condition
560544
+   is not met.  Used to pass this type of decoding error up through the call
560544
+   chain. */
560544
+#define s390_insn_assert(mnm, cond)             \
560544
+   do {                                         \
560544
+      if (!(cond)) {                            \
560544
+         dis_res->whatNext = Dis_StopHere;      \
560544
+         dis_res->jk_StopHere = Ijk_NoDecode;   \
560544
+         return (mnm);                          \
560544
+      }                                         \
560544
+   } while (0)
560544
+
560544
+/* Convenience function to check for a specification exception. */
560544
+static Bool
560544
+is_specification_exception(void)
560544
+{
560544
+   return (dis_res->whatNext == Dis_StopHere &&
560544
+           dis_res->jk_StopHere == Ijk_NoDecode);
560544
+}
560544
+
560544
 static __inline__ IRExpr *get_fpr_dw0(UInt);
560544
 static __inline__ void    put_fpr_dw0(UInt, IRExpr *);
560544
 static __inline__ IRExpr *get_dpr_dw0(UInt);
560544
@@ -1770,6 +1791,11 @@ s390_vr_get_type(const UChar m)
560544
 /* Determine if Zero Search (ZS) flag is set in m field */
560544
 #define s390_vr_is_zs_set(m) (((m) & 0b0010) != 0)
560544
 
560544
+/* Check if the "Single-Element-Control" bit is set.
560544
+   Used in vector FP instructions.
560544
+ */
560544
+#define s390_vr_is_single_element_control_set(m) (((m) & 0x8) != 0)
560544
+
560544
 /* Generates arg1 < arg2 (or arg1 <= arg2 if allow_equal == True) expression.
560544
    Arguments must have V128 type and are treated as unsigned 128-bit numbers.
560544
 */
560544
@@ -2001,12 +2027,14 @@ s390_vr_offset_by_index(UInt archreg,IRType type, UChar index)
560544
       return vr_offset(archreg) + sizeof(UShort) * index;
560544
 
560544
    case Ity_I32:
560544
+   case Ity_F32:
560544
       if(index > 3) {
560544
          goto invalidIndex;
560544
       }
560544
       return vr_offset(archreg) + sizeof(UInt) * index;
560544
 
560544
    case Ity_I64:
560544
+   case Ity_F64:
560544
       if(index > 1) {
560544
          goto invalidIndex;
560544
       }
560544
@@ -2237,8 +2265,8 @@ encode_bfp_rounding_mode(UChar mode)
560544
    case S390_BFP_ROUND_PER_FPC:
560544
       rm = get_bfp_rounding_mode_from_fpc();
560544
       break;
560544
-   case S390_BFP_ROUND_NEAREST_AWAY:  /* not supported */
560544
-   case S390_BFP_ROUND_PREPARE_SHORT: /* not supported */
560544
+   case S390_BFP_ROUND_NEAREST_AWAY:  rm = mkU32(Irrm_NEAREST_TIE_AWAY_0); break;
560544
+   case S390_BFP_ROUND_PREPARE_SHORT: rm = mkU32(Irrm_PREPARE_SHORTER); break;
560544
    case S390_BFP_ROUND_NEAREST_EVEN:  rm = mkU32(Irrm_NEAREST); break;
560544
    case S390_BFP_ROUND_ZERO:          rm = mkU32(Irrm_ZERO);    break;
560544
    case S390_BFP_ROUND_POSINF:        rm = mkU32(Irrm_PosINF);  break;
560544
@@ -3524,6 +3552,26 @@ s390_format_VRI_VVIM(const HChar *(*irgen)(UChar v1, UChar v3, UShort i2, UChar
560544
       s390_disasm(ENC5(MNM, VR, VR, UINT, UINT), mnm, v1, v3, i2, m4);
560544
 }
560544
 
560544
+static void
560544
+s390_format_VRI_VVIMM(const HChar *(*irgen)(UChar v1, UChar v2, UShort i3,
560544
+                                            UChar m4, UChar m5),
560544
+                      UChar v1, UChar v2, UShort i3, UChar m4, UChar m5,
560544
+                      UChar rxb)
560544
+{
560544
+   const HChar *mnm;
560544
+
560544
+   if (!s390_host_has_vx) {
560544
+      emulation_failure(EmFail_S390X_vx);
560544
+      return;
560544
+   }
560544
+
560544
+   v1 = s390_vr_getVRindex(v1, 1, rxb);
560544
+   v2 = s390_vr_getVRindex(v2, 2, rxb);
560544
+   mnm = irgen(v1, v2, i3, m4, m5);
560544
+
560544
+   if (vex_traceflags & VEX_TRACE_FE)
560544
+      s390_disasm(ENC6(MNM, VR, VR, UINT, UINT, UINT), mnm, v1, v2, i3, m4, m5);
560544
+}
560544
 
560544
 static void
560544
 s390_format_VRS_RRDVM(const HChar *(*irgen)(UChar r1, IRTemp op2addr, UChar v3,
560544
@@ -3680,7 +3728,7 @@ s390_format_VRV_VVRDMT(const HChar *(*irgen)(UChar v1, IRTemp op2addr, UChar m3)
560544
 
560544
 
560544
 static void
560544
-s390_format_VRRd_VVVVMM(const HChar *(*irgen)(UChar v1, UChar v2, UChar v3,
560544
+s390_format_VRR_VVVVMM(const HChar *(*irgen)(UChar v1, UChar v2, UChar v3,
560544
                                               UChar v4, UChar m5, UChar m6),
560544
                         UChar v1, UChar v2, UChar v3, UChar v4, UChar m5,
560544
                         UChar m6, UChar rxb)
560544
@@ -3794,6 +3842,92 @@ s390_format_VRRd_VVVVM(const HChar *(*irgen)(UChar v1, UChar v2, UChar v3,
560544
 }
560544
 
560544
 
560544
+static void
560544
+s390_format_VRRa_VVMMM(const HChar *(*irgen)(UChar v1, UChar v2, UChar m3,
560544
+                                             UChar m4, UChar m5),
560544
+                       UChar v1, UChar v2, UChar m3, UChar m4, UChar m5,
560544
+                       UChar rxb)
560544
+{
560544
+   const HChar *mnm;
560544
+
560544
+   if (!s390_host_has_vx) {
560544
+      emulation_failure(EmFail_S390X_vx);
560544
+      return;
560544
+   }
560544
+
560544
+   v1 = s390_vr_getVRindex(v1, 1, rxb);
560544
+   v2 = s390_vr_getVRindex(v2, 2, rxb);
560544
+   mnm = irgen(v1, v2, m3, m4, m5);
560544
+
560544
+   if (vex_traceflags & VEX_TRACE_FE)
560544
+      s390_disasm(ENC6(MNM, VR, VR, UINT, UINT, UINT), mnm, v1, v2, m3, m4, m5);
560544
+}
560544
+
560544
+static void
560544
+s390_format_VRRa_VVVMM(const HChar *(*irgen)(UChar v1, UChar v2, UChar v3,
560544
+                                             UChar m4, UChar m5),
560544
+                       UChar v1, UChar v2, UChar v3, UChar m4, UChar m5,
560544
+                       UChar rxb)
560544
+{
560544
+   const HChar *mnm;
560544
+
560544
+   if (!s390_host_has_vx) {
560544
+      emulation_failure(EmFail_S390X_vx);
560544
+      return;
560544
+   }
560544
+
560544
+   v1 = s390_vr_getVRindex(v1, 1, rxb);
560544
+   v2 = s390_vr_getVRindex(v2, 2, rxb);
560544
+   v3 = s390_vr_getVRindex(v3, 3, rxb);
560544
+   mnm = irgen(v1, v2, v3, m4, m5);
560544
+
560544
+   if (vex_traceflags & VEX_TRACE_FE)
560544
+      s390_disasm(ENC6(MNM, VR, VR, VR, UINT, UINT), mnm, v1, v2, v3, m4, m5);
560544
+}
560544
+
560544
+static void
560544
+s390_format_VRRa_VVMM(const HChar *(*irgen)(UChar v1, UChar v2, UChar m3,
560544
+                                            UChar m4),
560544
+                       UChar v1, UChar v2, UChar m3, UChar m4, UChar rxb)
560544
+{
560544
+   const HChar *mnm;
560544
+
560544
+   if (!s390_host_has_vx) {
560544
+      emulation_failure(EmFail_S390X_vx);
560544
+      return;
560544
+   }
560544
+
560544
+   v1 = s390_vr_getVRindex(v1, 1, rxb);
560544
+   v2 = s390_vr_getVRindex(v2, 2, rxb);
560544
+   mnm = irgen(v1, v2, m3, m4);
560544
+
560544
+   if (vex_traceflags & VEX_TRACE_FE)
560544
+      s390_disasm(ENC5(MNM, VR, VR, UINT, UINT), mnm, v1, v2, m3, m4);
560544
+}
560544
+
560544
+static void
560544
+s390_format_VRRa_VVVMMM(const HChar *(*irgen)(UChar v1, UChar v2, UChar v3,
560544
+                                              UChar m4, UChar m5, UChar m6),
560544
+                        UChar v1, UChar v2, UChar v3, UChar m4, UChar m5,
560544
+                        UChar m6, UChar rxb)
560544
+{
560544
+   const HChar *mnm;
560544
+
560544
+   if (!s390_host_has_vx) {
560544
+      emulation_failure(EmFail_S390X_vx);
560544
+      return;
560544
+   }
560544
+
560544
+   v1 = s390_vr_getVRindex(v1, 1, rxb);
560544
+   v2 = s390_vr_getVRindex(v2, 2, rxb);
560544
+   v3 = s390_vr_getVRindex(v3, 3, rxb);
560544
+   mnm = irgen(v1, v2, v3, m4, m5, m6);
560544
+
560544
+   if (vex_traceflags & VEX_TRACE_FE)
560544
+      s390_disasm(ENC6(MNM, VR, VR, VR, UINT, UINT),
560544
+                  mnm, v1, v2, v3, m4, m5, m6);
560544
+}
560544
+
560544
 /*------------------------------------------------------------*/
560544
 /*--- Build IR for opcodes                                 ---*/
560544
 /*------------------------------------------------------------*/
560544
@@ -17895,6 +18029,575 @@ s390_irgen_VMALH(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5)
560544
    return "vmalh";
560544
 }
560544
 
560544
+static void
560544
+s390_vector_fp_convert(IROp op, IRType fromType, IRType toType,
560544
+                       UChar v1, UChar v2, UChar m3, UChar m4, UChar m5)
560544
+{
560544
+   Bool isSingleElementOp = s390_vr_is_single_element_control_set(m4);
560544
+   UChar maxIndex = isSingleElementOp ? 0 : 1;
560544
+
560544
+   /* For Iop_F32toF64 we do this:
560544
+      f32[0] -> f64[0]
560544
+      f32[2] -> f64[1]
560544
+
560544
+      For Iop_F64toF32 we do this:
560544
+      f64[0] -> f32[0]
560544
+      f64[1] -> f32[2]
560544
+
560544
+      The magic below with scaling factors is used to achieve the logic
560544
+      described above.
560544
+   */
560544
+   const UChar sourceIndexScaleFactor = (op == Iop_F32toF64) ? 2 : 1;
560544
+   const UChar destinationIndexScaleFactor = (op == Iop_F64toF32) ? 2 : 1;
560544
+
560544
+   const Bool isUnary = (op == Iop_F32toF64);
560544
+   for (UChar i = 0; i <= maxIndex; i++) {
560544
+      IRExpr* argument = get_vr(v2, fromType, i * sourceIndexScaleFactor);
560544
+      IRExpr* result;
560544
+      if (!isUnary) {
560544
+         result = binop(op,
560544
+                        mkexpr(encode_bfp_rounding_mode(m5)),
560544
+                        argument);
560544
+      } else {
560544
+         result = unop(op, argument);
560544
+      }
560544
+      put_vr(v1, toType, i * destinationIndexScaleFactor, result);
560544
+   }
560544
+
560544
+   if (isSingleElementOp) {
560544
+      put_vr_dw1(v1, mkU64(0));
560544
+   }
560544
+}
560544
+
560544
+static const HChar *
560544
+s390_irgen_VCDG(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5)
560544
+{
560544
+   s390_insn_assert("vcdg", m3 == 3);
560544
+
560544
+   if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) {
560544
+      emulation_warning(EmWarn_S390X_fpext_rounding);
560544
+      m5 = S390_BFP_ROUND_PER_FPC;
560544
+   }
560544
+
560544
+   s390_vector_fp_convert(Iop_I64StoF64, Ity_I64, Ity_F64, v1, v2, m3, m4, m5);
560544
+
560544
+   return "vcdg";
560544
+}
560544
+
560544
+static const HChar *
560544
+s390_irgen_VCDLG(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5)
560544
+{
560544
+   s390_insn_assert("vcdlg", m3 == 3);
560544
+
560544
+   if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) {
560544
+      emulation_warning(EmWarn_S390X_fpext_rounding);
560544
+      m5 = S390_BFP_ROUND_PER_FPC;
560544
+   }
560544
+
560544
+   s390_vector_fp_convert(Iop_I64UtoF64, Ity_I64, Ity_F64, v1, v2, m3, m4, m5);
560544
+
560544
+   return "vcdlg";
560544
+}
560544
+
560544
+static const HChar *
560544
+s390_irgen_VCGD(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5)
560544
+{
560544
+   s390_insn_assert("vcgd", m3 == 3);
560544
+
560544
+   if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) {
560544
+      emulation_warning(EmWarn_S390X_fpext_rounding);
560544
+      m5 = S390_BFP_ROUND_PER_FPC;
560544
+   }
560544
+
560544
+   s390_vector_fp_convert(Iop_F64toI64S, Ity_F64, Ity_I64, v1, v2, m3, m4, m5);
560544
+
560544
+   return "vcgd";
560544
+}
560544
+
560544
+static const HChar *
560544
+s390_irgen_VCLGD(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5)
560544
+{
560544
+   s390_insn_assert("vclgd", m3 == 3);
560544
+
560544
+   if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) {
560544
+      emulation_warning(EmWarn_S390X_fpext_rounding);
560544
+      m5 = S390_BFP_ROUND_PER_FPC;
560544
+   }
560544
+
560544
+   s390_vector_fp_convert(Iop_F64toI64U, Ity_F64, Ity_I64, v1, v2, m3, m4, m5);
560544
+
560544
+   return "vclgd";
560544
+}
560544
+
560544
+static const HChar *
560544
+s390_irgen_VFI(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5)
560544
+{
560544
+   s390_insn_assert("vfi", m3 == 3);
560544
+
560544
+   if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) {
560544
+      emulation_warning(EmWarn_S390X_fpext_rounding);
560544
+      m5 = S390_BFP_ROUND_PER_FPC;
560544
+   }
560544
+
560544
+   s390_vector_fp_convert(Iop_RoundF64toInt, Ity_F64, Ity_F64,
560544
+                          v1, v2, m3, m4, m5);
560544
+
560544
+   return "vcgld";
560544
+}
560544
+
560544
+static const HChar *
560544
+s390_irgen_VLDE(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5)
560544
+{
560544
+   s390_insn_assert("vlde", m3 == 2);
560544
+
560544
+   s390_vector_fp_convert(Iop_F32toF64, Ity_F32, Ity_F64, v1, v2, m3, m4, m5);
560544
+
560544
+   return "vlde";
560544
+}
560544
+
560544
+static const HChar *
560544
+s390_irgen_VLED(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5)
560544
+{
560544
+   s390_insn_assert("vled", m3 == 3);
560544
+
560544
+   if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) {
560544
+      m5 = S390_BFP_ROUND_PER_FPC;
560544
+   }
560544
+
560544
+   s390_vector_fp_convert(Iop_F64toF32, Ity_F64, Ity_F32, v1, v2, m3, m4, m5);
560544
+
560544
+   return "vled";
560544
+}
560544
+
560544
+static const HChar *
560544
+s390_irgen_VFPSO(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5)
560544
+{
560544
+   s390_insn_assert("vfpso", m3 == 3);
560544
+
560544
+   IRExpr* result;
560544
+   switch (m5) {
560544
+   case 0: {
560544
+      /* Invert sign */
560544
+      if (!s390_vr_is_single_element_control_set(m4)) {
560544
+         result = unop(Iop_Neg64Fx2, get_vr_qw(v2));
560544
+      }
560544
+      else {
560544
+         result = binop(Iop_64HLtoV128,
560544
+                        unop(Iop_ReinterpF64asI64,
560544
+                             unop(Iop_NegF64, get_vr(v2, Ity_F64, 0))),
560544
+                        mkU64(0));
560544
+      }
560544
+      break;
560544
+   }
560544
+
560544
+   case 1: {
560544
+      /* Set sign to negative */
560544
+      IRExpr* highHalf = mkU64(0x8000000000000000ULL);
560544
+      if (!s390_vr_is_single_element_control_set(m4)) {
560544
+         IRExpr* lowHalf = highHalf;
560544
+         IRExpr* mask = binop(Iop_64HLtoV128, highHalf, lowHalf);
560544
+         result = binop(Iop_OrV128, get_vr_qw(v2), mask);
560544
+      }
560544
+      else {
560544
+         result = binop(Iop_64HLtoV128,
560544
+                        binop(Iop_Or64, get_vr_dw0(v2), highHalf),
560544
+                        mkU64(0ULL));
560544
+      }
560544
+
560544
+      break;
560544
+   }
560544
+
560544
+   case 2: {
560544
+      /* Set sign to positive */
560544
+      if (!s390_vr_is_single_element_control_set(m4)) {
560544
+         result = unop(Iop_Abs64Fx2, get_vr_qw(v2));
560544
+      }
560544
+      else {
560544
+         result = binop(Iop_64HLtoV128,
560544
+                        unop(Iop_ReinterpF64asI64,
560544
+                             unop(Iop_AbsF64, get_vr(v2, Ity_F64, 0))),
560544
+                        mkU64(0));
560544
+      }
560544
+
560544
+      break;
560544
+   }
560544
+
560544
+   default:
560544
+      vpanic("s390_irgen_VFPSO: Invalid m5 value");
560544
+   }
560544
+
560544
+   put_vr_qw(v1, result);
560544
+   if (s390_vr_is_single_element_control_set(m4)) {
560544
+      put_vr_dw1(v1, mkU64(0ULL));
560544
+   }
560544
+
560544
+   return "vfpso";
560544
+}
560544
+
560544
+static void s390x_vec_fp_binary_op(IROp generalOp, IROp singleElementOp,
560544
+                                   UChar v1, UChar v2, UChar v3, UChar m4,
560544
+                                   UChar m5)
560544
+{
560544
+   IRExpr* result;
560544
+   if (!s390_vr_is_single_element_control_set(m5)) {
560544
+      result = triop(generalOp, get_bfp_rounding_mode_from_fpc(),
560544
+                     get_vr_qw(v2), get_vr_qw(v3));
560544
+   } else {
560544
+      IRExpr* highHalf = triop(singleElementOp,
560544
+                               get_bfp_rounding_mode_from_fpc(),
560544
+                               get_vr(v2, Ity_F64, 0),
560544
+                               get_vr(v3, Ity_F64, 0));
560544
+      result = binop(Iop_64HLtoV128, unop(Iop_ReinterpF64asI64, highHalf),
560544
+                     mkU64(0ULL));
560544
+   }
560544
+
560544
+   put_vr_qw(v1, result);
560544
+}
560544
+
560544
+static void s390x_vec_fp_unary_op(IROp generalOp, IROp singleElementOp,
560544
+                                  UChar v1, UChar v2, UChar m3, UChar m4)
560544
+{
560544
+   IRExpr* result;
560544
+   if (!s390_vr_is_single_element_control_set(m4)) {
560544
+      result = binop(generalOp, get_bfp_rounding_mode_from_fpc(),
560544
+                     get_vr_qw(v2));
560544
+   }
560544
+   else {
560544
+      IRExpr* highHalf = binop(singleElementOp,
560544
+                               get_bfp_rounding_mode_from_fpc(),
560544
+                               get_vr(v2, Ity_F64, 0));
560544
+      result = binop(Iop_64HLtoV128, unop(Iop_ReinterpF64asI64, highHalf),
560544
+                     mkU64(0ULL));
560544
+   }
560544
+
560544
+   put_vr_qw(v1, result);
560544
+}
560544
+
560544
+
560544
+static void
560544
+s390_vector_fp_mulAddOrSub(IROp singleElementOp,
560544
+                           UChar v1, UChar v2, UChar v3, UChar v4,
560544
+                           UChar m5, UChar m6)
560544
+{
560544
+   Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5);
560544
+   IRTemp irrm_temp = newTemp(Ity_I32);
560544
+   assign(irrm_temp, get_bfp_rounding_mode_from_fpc());
560544
+   IRExpr* irrm = mkexpr(irrm_temp);
560544
+   IRExpr* result;
560544
+   IRExpr* highHalf = qop(singleElementOp,
560544
+                          irrm,
560544
+                          get_vr(v2, Ity_F64, 0),
560544
+                          get_vr(v3, Ity_F64, 0),
560544
+                          get_vr(v4, Ity_F64, 0));
560544
+
560544
+   if (isSingleElementOp) {
560544
+      result = binop(Iop_64HLtoV128, unop(Iop_ReinterpF64asI64, highHalf),
560544
+                     mkU64(0ULL));
560544
+   } else {
560544
+      IRExpr* lowHalf = qop(singleElementOp,
560544
+                            irrm,
560544
+                            get_vr(v2, Ity_F64, 1),
560544
+                            get_vr(v3, Ity_F64, 1),
560544
+                            get_vr(v4, Ity_F64, 1));
560544
+      result = binop(Iop_64HLtoV128, unop(Iop_ReinterpF64asI64, highHalf),
560544
+                     unop(Iop_ReinterpF64asI64, lowHalf));
560544
+   }
560544
+
560544
+   put_vr_qw(v1, result);
560544
+}
560544
+
560544
+static const HChar *
560544
+s390_irgen_VFA(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
560544
+{
560544
+   s390_insn_assert("vfa", m4 == 3);
560544
+   s390x_vec_fp_binary_op(Iop_Add64Fx2, Iop_AddF64, v1, v2, v3, m4, m5);
560544
+   return "vfa";
560544
+}
560544
+
560544
+static const HChar *
560544
+s390_irgen_VFS(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
560544
+{
560544
+   s390_insn_assert("vfs", m4 == 3);
560544
+   s390x_vec_fp_binary_op(Iop_Sub64Fx2, Iop_SubF64, v1, v2, v3, m4, m5);
560544
+   return "vfs";
560544
+}
560544
+
560544
+static const HChar *
560544
+s390_irgen_VFM(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
560544
+{
560544
+   s390_insn_assert("vfm", m4 == 3);
560544
+   s390x_vec_fp_binary_op(Iop_Mul64Fx2, Iop_MulF64, v1, v2, v3, m4, m5);
560544
+   return "vfm";
560544
+}
560544
+
560544
+static const HChar *
560544
+s390_irgen_VFD(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
560544
+{
560544
+   s390_insn_assert("vfd", m4 == 3);
560544
+   s390x_vec_fp_binary_op(Iop_Div64Fx2, Iop_DivF64, v1, v2, v3, m4, m5);
560544
+   return "vfd";
560544
+}
560544
+
560544
+static const HChar *
560544
+s390_irgen_VFSQ(UChar v1, UChar v2, UChar m3, UChar m4)
560544
+{
560544
+   s390_insn_assert("vfsq", m3 == 3);
560544
+   s390x_vec_fp_unary_op(Iop_Sqrt64Fx2, Iop_SqrtF64, v1, v2, m3, m4);
560544
+
560544
+   return "vfsq";
560544
+}
560544
+
560544
+static const HChar *
560544
+s390_irgen_VFMA(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6)
560544
+{
560544
+   s390_insn_assert("vfma", m6 == 3);
560544
+   s390_vector_fp_mulAddOrSub(Iop_MAddF64, v1, v2, v3, v4, m5, m6);
560544
+   return "vfma";
560544
+}
560544
+
560544
+static const HChar *
560544
+s390_irgen_VFMS(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6)
560544
+{
560544
+   s390_insn_assert("vfms", m6 == 3);
560544
+   s390_vector_fp_mulAddOrSub(Iop_MSubF64, v1, v2, v3, v4, m5, m6);
560544
+   return "vfms";
560544
+}
560544
+
560544
+static const HChar *
560544
+s390_irgen_WFC(UChar v1, UChar v2, UChar m3, UChar m4)
560544
+{
560544
+   s390_insn_assert("wfc", m3 == 3);
560544
+   s390_insn_assert("wfc", m4 == 0);
560544
+
560544
+   IRTemp cc_vex = newTemp(Ity_I32);
560544
+   assign(cc_vex, binop(Iop_CmpF64,
560544
+                        get_vr(v1, Ity_F64, 0), get_vr(v2, Ity_F64, 0)));
560544
+
560544
+   IRTemp cc_s390 = newTemp(Ity_I32);
560544
+   assign(cc_s390, convert_vex_bfpcc_to_s390(cc_vex));
560544
+   s390_cc_thunk_put1(S390_CC_OP_SET, cc_s390, False);
560544
+
560544
+   return "wfc";
560544
+}
560544
+
560544
+static const HChar *
560544
+s390_irgen_WFK(UChar v1, UChar v2, UChar m3, UChar m4)
560544
+{
560544
+   s390_irgen_WFC(v1, v2, m3, m4);
560544
+
560544
+   return "wfk";
560544
+}
560544
+
560544
+static const HChar *
560544
+s390_irgen_VFCE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6)
560544
+{
560544
+   s390_insn_assert("vfce", m4 == 3);
560544
+
560544
+   Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5);
560544
+   if (!s390_vr_is_cs_set(m6)) {
560544
+      if (!isSingleElementOp) {
560544
+         put_vr_qw(v1, binop(Iop_CmpEQ64Fx2, get_vr_qw(v2), get_vr_qw(v3)));
560544
+      } else {
560544
+         IRExpr* comparisonResult = binop(Iop_CmpF64, get_vr(v2, Ity_F64, 0),
560544
+                                          get_vr(v3, Ity_F64, 0));
560544
+         IRExpr* result = mkite(binop(Iop_CmpEQ32, comparisonResult,
560544
+                                      mkU32(Ircr_EQ)),
560544
+                                mkU64(0xffffffffffffffffULL),
560544
+                                mkU64(0ULL));
560544
+         put_vr_qw(v1, binop(Iop_64HLtoV128, result, mkU64(0ULL)));
560544
+      }
560544
+   } else {
560544
+      IRDirty* d;
560544
+      IRTemp cc = newTemp(Ity_I64);
560544
+
560544
+      s390x_vec_op_details_t details = { .serialized = 0ULL };
560544
+      details.op = S390_VEC_OP_VFCE;
560544
+      details.v1 = v1;
560544
+      details.v2 = v2;
560544
+      details.v3 = v3;
560544
+      details.m4 = m4;
560544
+      details.m5 = m5;
560544
+      details.m6 = m6;
560544
+
560544
+      d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op",
560544
+                            &s390x_dirtyhelper_vec_op,
560544
+                            mkIRExprVec_2(IRExpr_GSPTR(),
560544
+                                          mkU64(details.serialized)));
560544
+
560544
+      const UChar elementSize = isSingleElementOp ? sizeof(ULong) : sizeof(V128);
560544
+      d->nFxState = 3;
560544
+      vex_bzero(&d->fxState, sizeof(d->fxState));
560544
+      d->fxState[0].fx = Ifx_Read;
560544
+      d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128);
560544
+      d->fxState[0].size = elementSize;
560544
+      d->fxState[1].fx = Ifx_Read;
560544
+      d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128);
560544
+      d->fxState[1].size = elementSize;
560544
+      d->fxState[2].fx = Ifx_Write;
560544
+      d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128);
560544
+      d->fxState[2].size = sizeof(V128);
560544
+
560544
+      stmt(IRStmt_Dirty(d));
560544
+      s390_cc_set(cc);
560544
+   }
560544
+
560544
+   return "vfce";
560544
+}
560544
+
560544
+static const HChar *
560544
+s390_irgen_VFCH(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6)
560544
+{
560544
+   vassert(m4 == 3);
560544
+
560544
+   Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5);
560544
+   if (!s390_vr_is_cs_set(m6)) {
560544
+      if (!isSingleElementOp) {
560544
+         put_vr_qw(v1, binop(Iop_CmpLE64Fx2, get_vr_qw(v3), get_vr_qw(v2)));
560544
+      } else {
560544
+         IRExpr* comparisonResult = binop(Iop_CmpF64, get_vr(v2, Ity_F64, 0),
560544
+                                          get_vr(v3, Ity_F64, 0));
560544
+         IRExpr* result = mkite(binop(Iop_CmpEQ32, comparisonResult,
560544
+                                      mkU32(Ircr_GT)),
560544
+                                mkU64(0xffffffffffffffffULL),
560544
+                                mkU64(0ULL));
560544
+         put_vr_qw(v1, binop(Iop_64HLtoV128, result, mkU64(0ULL)));
560544
+      }
560544
+   }
560544
+   else {
560544
+      IRDirty* d;
560544
+      IRTemp cc = newTemp(Ity_I64);
560544
+
560544
+      s390x_vec_op_details_t details = { .serialized = 0ULL };
560544
+      details.op = S390_VEC_OP_VFCH;
560544
+      details.v1 = v1;
560544
+      details.v2 = v2;
560544
+      details.v3 = v3;
560544
+      details.m4 = m4;
560544
+      details.m5 = m5;
560544
+      details.m6 = m6;
560544
+
560544
+      d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op",
560544
+                            &s390x_dirtyhelper_vec_op,
560544
+                            mkIRExprVec_2(IRExpr_GSPTR(),
560544
+                                          mkU64(details.serialized)));
560544
+
560544
+      const UChar elementSize = isSingleElementOp ? sizeof(ULong) : sizeof(V128);
560544
+      d->nFxState = 3;
560544
+      vex_bzero(&d->fxState, sizeof(d->fxState));
560544
+      d->fxState[0].fx = Ifx_Read;
560544
+      d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128);
560544
+      d->fxState[0].size = elementSize;
560544
+      d->fxState[1].fx = Ifx_Read;
560544
+      d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128);
560544
+      d->fxState[1].size = elementSize;
560544
+      d->fxState[2].fx = Ifx_Write;
560544
+      d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128);
560544
+      d->fxState[2].size = sizeof(V128);
560544
+
560544
+      stmt(IRStmt_Dirty(d));
560544
+      s390_cc_set(cc);
560544
+   }
560544
+
560544
+   return "vfch";
560544
+}
560544
+
560544
+static const HChar *
560544
+s390_irgen_VFCHE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6)
560544
+{
560544
+   s390_insn_assert("vfche", m4 == 3);
560544
+
560544
+   Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5);
560544
+   if (!s390_vr_is_cs_set(m6)) {
560544
+      if (!isSingleElementOp) {
560544
+         put_vr_qw(v1, binop(Iop_CmpLT64Fx2, get_vr_qw(v3), get_vr_qw(v2)));
560544
+      }
560544
+      else {
560544
+         IRExpr* comparisonResult = binop(Iop_CmpF64, get_vr(v3, Ity_F64, 0),
560544
+                                          get_vr(v2, Ity_F64, 0));
560544
+         IRExpr* result = mkite(binop(Iop_CmpEQ32, comparisonResult,
560544
+                                      mkU32(Ircr_LT)),
560544
+                                mkU64(0xffffffffffffffffULL),
560544
+                                mkU64(0ULL));
560544
+         put_vr_qw(v1, binop(Iop_64HLtoV128, result, mkU64(0ULL)));
560544
+      }
560544
+   }
560544
+   else {
560544
+      IRDirty* d;
560544
+      IRTemp cc = newTemp(Ity_I64);
560544
+
560544
+      s390x_vec_op_details_t details = { .serialized = 0ULL };
560544
+      details.op = S390_VEC_OP_VFCHE;
560544
+      details.v1 = v1;
560544
+      details.v2 = v2;
560544
+      details.v3 = v3;
560544
+      details.m4 = m4;
560544
+      details.m5 = m5;
560544
+      details.m6 = m6;
560544
+
560544
+      d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op",
560544
+                            &s390x_dirtyhelper_vec_op,
560544
+                            mkIRExprVec_2(IRExpr_GSPTR(),
560544
+                                          mkU64(details.serialized)));
560544
+
560544
+      const UChar elementSize = isSingleElementOp ? sizeof(ULong) : sizeof(V128);
560544
+      d->nFxState = 3;
560544
+      vex_bzero(&d->fxState, sizeof(d->fxState));
560544
+      d->fxState[0].fx = Ifx_Read;
560544
+      d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128);
560544
+      d->fxState[0].size = elementSize;
560544
+      d->fxState[1].fx = Ifx_Read;
560544
+      d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128);
560544
+      d->fxState[1].size = elementSize;
560544
+      d->fxState[2].fx = Ifx_Write;
560544
+      d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128);
560544
+      d->fxState[2].size = sizeof(V128);
560544
+
560544
+      stmt(IRStmt_Dirty(d));
560544
+      s390_cc_set(cc);
560544
+   }
560544
+
560544
+   return "vfche";
560544
+}
560544
+
560544
+static const HChar *
560544
+s390_irgen_VFTCI(UChar v1, UChar v2, UShort i3, UChar m4, UChar m5)
560544
+{
560544
+   s390_insn_assert("vftci", m4 == 3);
560544
+
560544
+   Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5);
560544
+
560544
+   IRDirty* d;
560544
+   IRTemp cc = newTemp(Ity_I64);
560544
+
560544
+   s390x_vec_op_details_t details = { .serialized = 0ULL };
560544
+   details.op = S390_VEC_OP_VFTCI;
560544
+   details.v1 = v1;
560544
+   details.v2 = v2;
560544
+   details.i3 = i3;
560544
+   details.m4 = m4;
560544
+   details.m5 = m5;
560544
+
560544
+   d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op",
560544
+                         &s390x_dirtyhelper_vec_op,
560544
+                         mkIRExprVec_2(IRExpr_GSPTR(),
560544
+                                       mkU64(details.serialized)));
560544
+
560544
+   const UChar elementSize = isSingleElementOp ? sizeof(ULong) : sizeof(V128);
560544
+   d->nFxState = 2;
560544
+   vex_bzero(&d->fxState, sizeof(d->fxState));
560544
+   d->fxState[0].fx = Ifx_Read;
560544
+   d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128);
560544
+   d->fxState[0].size = elementSize;
560544
+   d->fxState[1].fx = Ifx_Write;
560544
+   d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128);
560544
+   d->fxState[1].size = sizeof(V128);
560544
+
560544
+   stmt(IRStmt_Dirty(d));
560544
+   s390_cc_set(cc);
560544
+
560544
+   return "vftci";
560544
+}
560544
+
560544
 /* New insns are added here.
560544
    If an insn is contingent on a facility being installed also
560544
    check whether the list of supported facilities in function
560544
@@ -19358,6 +20061,18 @@ s390_decode_6byte_and_irgen(const UChar *bytes)
560544
         unsigned int op2 : 8;
560544
       } VRR;
560544
       struct {
560544
+         UInt op1 : 8;
560544
+         UInt v1  : 4;
560544
+         UInt v2  : 4;
560544
+         UInt v3  : 4;
560544
+         UInt     : 4;
560544
+         UInt m5  : 4;
560544
+         UInt m4  : 4;
560544
+         UInt m3  : 4;
560544
+         UInt rxb : 4;
560544
+         UInt op2 : 8;
560544
+      } VRRa;
560544
+      struct {
560544
         unsigned int op1 : 8;
560544
         unsigned int v1  : 4;
560544
         unsigned int v2  : 4;
560544
@@ -19370,6 +20085,18 @@ s390_decode_6byte_and_irgen(const UChar *bytes)
560544
         unsigned int op2 : 8;
560544
       } VRRd;
560544
       struct {
560544
+         unsigned int op1 : 8;
560544
+         unsigned int v1  : 4;
560544
+         unsigned int v2  : 4;
560544
+         unsigned int v3  : 4;
560544
+         unsigned int m6  : 4;
560544
+         unsigned int     : 4;
560544
+         unsigned int m5  : 4;
560544
+         unsigned int v4  : 4;
560544
+         unsigned int rxb : 4;
560544
+         unsigned int op2 : 8;
560544
+      } VRRe;
560544
+      struct {
560544
         unsigned int op1 : 8;
560544
         unsigned int v1  : 4;
560544
         unsigned int v3  : 4;
560544
@@ -19390,6 +20117,16 @@ s390_decode_6byte_and_irgen(const UChar *bytes)
560544
         unsigned int op2 : 8;
560544
       } VRId;
560544
       struct {
560544
+         UInt op1 : 8;
560544
+         UInt v1  : 4;
560544
+         UInt v2  : 4;
560544
+         UInt i3  : 12;
560544
+         UInt m5  : 4;
560544
+         UInt m4  : 4;
560544
+         UInt rxb : 4;
560544
+         UInt op2 : 8;
560544
+      } VRIe;
560544
+      struct {
560544
         unsigned int op1 : 8;
560544
         unsigned int v1  : 4;
560544
         unsigned int v3  : 4;
560544
@@ -19974,7 +20711,10 @@ s390_decode_6byte_and_irgen(const UChar *bytes)
560544
    case 0xe70000000046ULL: s390_format_VRI_VIM(s390_irgen_VGM, ovl.fmt.VRI.v1,
560544
                                                ovl.fmt.VRI.i2, ovl.fmt.VRI.m3,
560544
                                                ovl.fmt.VRI.rxb);  goto ok;
560544
-   case 0xe7000000004aULL: /* VFTCI */ goto unimplemented;
560544
+   case 0xe7000000004aULL: s390_format_VRI_VVIMM(s390_irgen_VFTCI, ovl.fmt.VRIe.v1,
560544
+                                                 ovl.fmt.VRIe.v2, ovl.fmt.VRIe.i3,
560544
+                                                 ovl.fmt.VRIe.m4, ovl.fmt.VRIe.m5,
560544
+                                                 ovl.fmt.VRIe.rxb);  goto ok;
560544
    case 0xe7000000004dULL: s390_format_VRI_VVIM(s390_irgen_VREP, ovl.fmt.VRI.v1,
560544
                                                ovl.fmt.VRI.v3, ovl.fmt.VRI.i2,
560544
                                                ovl.fmt.VRI.m3, ovl.fmt.VRI.rxb);  goto ok;
560544
@@ -20087,19 +20827,27 @@ s390_decode_6byte_and_irgen(const UChar *bytes)
560544
                                                ovl.fmt.VRR.v2, ovl.fmt.VRR.r3,
560544
                                                ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb);  goto ok;
560544
    case 0xe70000000085ULL: /* VBPERM */ goto unimplemented;
560544
-   case 0xe7000000008aULL: s390_format_VRRd_VVVVMM(s390_irgen_VSTRC, ovl.fmt.VRRd.v1,
560544
-                                                   ovl.fmt.VRRd.v2, ovl.fmt.VRRd.v3,
560544
-                                                   ovl.fmt.VRRd.v4, ovl.fmt.VRRd.m5,
560544
-                                                   ovl.fmt.VRRd.m6,
560544
-                                                   ovl.fmt.VRRd.rxb);  goto ok;
560544
+   case 0xe7000000008aULL: s390_format_VRR_VVVVMM(s390_irgen_VSTRC, ovl.fmt.VRRd.v1,
560544
+                                                  ovl.fmt.VRRd.v2, ovl.fmt.VRRd.v3,
560544
+                                                  ovl.fmt.VRRd.v4, ovl.fmt.VRRd.m5,
560544
+                                                  ovl.fmt.VRRd.m6,
560544
+                                                  ovl.fmt.VRRd.rxb);  goto ok;
560544
    case 0xe7000000008cULL: s390_format_VRR_VVVV(s390_irgen_VPERM, ovl.fmt.VRR.v1,
560544
                                                ovl.fmt.VRR.v2, ovl.fmt.VRR.r3,
560544
                                                ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb);  goto ok;
560544
    case 0xe7000000008dULL: s390_format_VRR_VVVV(s390_irgen_VSEL, ovl.fmt.VRR.v1,
560544
                                                ovl.fmt.VRR.v2, ovl.fmt.VRR.r3,
560544
                                                ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb);  goto ok;
560544
-   case 0xe7000000008eULL: /* VFMS */ goto unimplemented;
560544
-   case 0xe7000000008fULL: /* VFMA */ goto unimplemented;
560544
+   case 0xe7000000008eULL: s390_format_VRR_VVVVMM(s390_irgen_VFMS, ovl.fmt.VRRe.v1,
560544
+                                                  ovl.fmt.VRRe.v2, ovl.fmt.VRRe.v3,
560544
+                                                  ovl.fmt.VRRe.v4, ovl.fmt.VRRe.m5,
560544
+                                                  ovl.fmt.VRRe.m6,
560544
+                                                  ovl.fmt.VRRe.rxb);  goto ok;
560544
+   case 0xe7000000008fULL: s390_format_VRR_VVVVMM(s390_irgen_VFMA, ovl.fmt.VRRe.v1,
560544
+                                                  ovl.fmt.VRRe.v2, ovl.fmt.VRRe.v3,
560544
+                                                  ovl.fmt.VRRe.v4, ovl.fmt.VRRe.m5,
560544
+                                                  ovl.fmt.VRRe.m6,
560544
+                                                  ovl.fmt.VRRe.rxb);  goto ok;
560544
    case 0xe70000000094ULL: s390_format_VRR_VVVM(s390_irgen_VPK, ovl.fmt.VRR.v1,
560544
                                                ovl.fmt.VRR.v2, ovl.fmt.VRR.r3,
560544
                                                ovl.fmt.VRR.m4, ovl.fmt.VRR.rxb);  goto ok;
560544
@@ -20184,17 +20932,50 @@ s390_decode_6byte_and_irgen(const UChar *bytes)
560544
                                                   ovl.fmt.VRRd.v2, ovl.fmt.VRRd.v3,
560544
                                                   ovl.fmt.VRRd.v4, ovl.fmt.VRRd.m5,
560544
                                                   ovl.fmt.VRRd.rxb);  goto ok;
560544
-   case 0xe700000000c0ULL: /* VCLGD */ goto unimplemented;
560544
-   case 0xe700000000c1ULL: /* VCDLG */ goto unimplemented;
560544
-   case 0xe700000000c2ULL: /* VCGD */ goto unimplemented;
560544
-   case 0xe700000000c3ULL: /* VCDG */ goto unimplemented;
560544
-   case 0xe700000000c4ULL: /* VLDE */ goto unimplemented;
560544
-   case 0xe700000000c5ULL: /* VLED */ goto unimplemented;
560544
-   case 0xe700000000c7ULL: /* VFI */ goto unimplemented;
560544
-   case 0xe700000000caULL: /* WFK */ goto unimplemented;
560544
-   case 0xe700000000cbULL: /* WFC */ goto unimplemented;
560544
-   case 0xe700000000ccULL: /* VFPSO */ goto unimplemented;
560544
-   case 0xe700000000ceULL: /* VFSQ */ goto unimplemented;
560544
+   case 0xe700000000c0ULL: s390_format_VRRa_VVMMM(s390_irgen_VCLGD, ovl.fmt.VRRa.v1,
560544
+                                                  ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3,
560544
+                                                  ovl.fmt.VRRa.m4, ovl.fmt.VRRa.m5,
560544
+                                                  ovl.fmt.VRRa.rxb); goto ok;
560544
+   case 0xe700000000c1ULL: s390_format_VRRa_VVMMM(s390_irgen_VCDLG, ovl.fmt.VRRa.v1,
560544
+                                                  ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3,
560544
+                                                  ovl.fmt.VRRa.m4, ovl.fmt.VRRa.m5,
560544
+                                                  ovl.fmt.VRRa.rxb); goto ok;
560544
+   case 0xe700000000c2ULL: s390_format_VRRa_VVMMM(s390_irgen_VCGD, ovl.fmt.VRRa.v1,
560544
+                                                  ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3,
560544
+                                                  ovl.fmt.VRRa.m4, ovl.fmt.VRRa.m5,
560544
+                                                  ovl.fmt.VRRa.rxb); goto ok;
560544
+   case 0xe700000000c3ULL: s390_format_VRRa_VVMMM(s390_irgen_VCDG, ovl.fmt.VRRa.v1,
560544
+                                                  ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3,
560544
+                                                  ovl.fmt.VRRa.m4, ovl.fmt.VRRa.m5,
560544
+                                                  ovl.fmt.VRRa.rxb); goto ok;
560544
+   case 0xe700000000c4ULL: s390_format_VRRa_VVMMM(s390_irgen_VLDE, ovl.fmt.VRRa.v1,
560544
+                                                  ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3,
560544
+                                                  ovl.fmt.VRRa.m4, ovl.fmt.VRRa.m5,
560544
+                                                  ovl.fmt.VRRa.rxb); goto ok;
560544
+   case 0xe700000000c5ULL: s390_format_VRRa_VVMMM(s390_irgen_VLED, ovl.fmt.VRRa.v1,
560544
+                                                  ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3,
560544
+                                                  ovl.fmt.VRRa.m4, ovl.fmt.VRRa.m5,
560544
+                                                  ovl.fmt.VRRa.rxb); goto ok;
560544
+   case 0xe700000000c7ULL: s390_format_VRRa_VVMMM(s390_irgen_VFI, ovl.fmt.VRRa.v1,
560544
+                                                  ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3,
560544
+                                                  ovl.fmt.VRRa.m4, ovl.fmt.VRRa.m5,
560544
+                                                  ovl.fmt.VRRa.rxb); goto ok;
560544
+   case 0xe700000000caULL: s390_format_VRRa_VVMM(s390_irgen_WFK, ovl.fmt.VRRa.v1,
560544
+                                                 ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3,
560544
+                                                 ovl.fmt.VRRa.m4,
560544
+                                                 ovl.fmt.VRRa.rxb); goto ok;
560544
+   case 0xe700000000cbULL: s390_format_VRRa_VVMM(s390_irgen_WFC, ovl.fmt.VRRa.v1,
560544
+                                                 ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3,
560544
+                                                 ovl.fmt.VRRa.m4,
560544
+                                                 ovl.fmt.VRRa.rxb); goto ok;
560544
+   case 0xe700000000ccULL: s390_format_VRRa_VVMMM(s390_irgen_VFPSO, ovl.fmt.VRRa.v1,
560544
+                                                  ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3,
560544
+                                                  ovl.fmt.VRRa.m4, ovl.fmt.VRRa.m5,
560544
+                                                  ovl.fmt.VRRa.rxb); goto ok;
560544
+   case 0xe700000000ceULL: s390_format_VRRa_VVMM(s390_irgen_VFSQ, ovl.fmt.VRRa.v1,
560544
+                                                 ovl.fmt.VRRa.v2, ovl.fmt.VRRa.m3,
560544
+                                                 ovl.fmt.VRRa.m4,
560544
+                                                 ovl.fmt.VRRa.rxb); goto ok;
560544
    case 0xe700000000d4ULL: s390_format_VRR_VVM(s390_irgen_VUPLL, ovl.fmt.VRR.v1,
560544
                                                ovl.fmt.VRR.v2, ovl.fmt.VRR.m4,
560544
                                                ovl.fmt.VRR.rxb);  goto ok;
560544
@@ -20221,13 +21002,37 @@ s390_decode_6byte_and_irgen(const UChar *bytes)
560544
    case 0xe700000000dfULL: s390_format_VRR_VVM(s390_irgen_VLP, ovl.fmt.VRR.v1,
560544
                                                ovl.fmt.VRR.v2, ovl.fmt.VRR.m4,
560544
                                                ovl.fmt.VRR.rxb);  goto ok;
560544
-   case 0xe700000000e2ULL: /* VFS */ goto unimplemented;
560544
-   case 0xe700000000e3ULL: /* VFA */ goto unimplemented;
560544
-   case 0xe700000000e5ULL: /* VFD */ goto unimplemented;
560544
-   case 0xe700000000e7ULL: /* VFM */ goto unimplemented;
560544
-   case 0xe700000000e8ULL: /* VFCE */ goto unimplemented;
560544
-   case 0xe700000000eaULL: /* VFCHE */ goto unimplemented;
560544
-   case 0xe700000000ebULL: /* VFCH */ goto unimplemented;
560544
+   case 0xe700000000e2ULL: s390_format_VRRa_VVVMM(s390_irgen_VFS, ovl.fmt.VRRa.v1,
560544
+                                                  ovl.fmt.VRRa.v2, ovl.fmt.VRRa.v3,
560544
+                                                  ovl.fmt.VRRa.m3, ovl.fmt.VRRa.m4,
560544
+                                                  ovl.fmt.VRRa.rxb); goto ok;
560544
+   case 0xe700000000e3ULL: s390_format_VRRa_VVVMM(s390_irgen_VFA, ovl.fmt.VRRa.v1,
560544
+                                                  ovl.fmt.VRRa.v2, ovl.fmt.VRRa.v3,
560544
+                                                  ovl.fmt.VRRa.m3, ovl.fmt.VRRa.m4,
560544
+                                                  ovl.fmt.VRRa.rxb); goto ok;
560544
+   case 0xe700000000e5ULL: s390_format_VRRa_VVVMM(s390_irgen_VFD, ovl.fmt.VRRa.v1,
560544
+                                                  ovl.fmt.VRRa.v2, ovl.fmt.VRRa.v3,
560544
+                                                  ovl.fmt.VRRa.m3, ovl.fmt.VRRa.m4,
560544
+                                                  ovl.fmt.VRRa.rxb); goto ok;
560544
+   case 0xe700000000e7ULL: s390_format_VRRa_VVVMM(s390_irgen_VFM, ovl.fmt.VRRa.v1,
560544
+                                                  ovl.fmt.VRRa.v2, ovl.fmt.VRRa.v3,
560544
+                                                  ovl.fmt.VRRa.m3, ovl.fmt.VRRa.m4,
560544
+                                                  ovl.fmt.VRRa.rxb); goto ok;
560544
+   case 0xe700000000e8ULL: s390_format_VRRa_VVVMMM(s390_irgen_VFCE, ovl.fmt.VRRa.v1,
560544
+                                                   ovl.fmt.VRRa.v2, ovl.fmt.VRRa.v3,
560544
+                                                   ovl.fmt.VRRa.m3, ovl.fmt.VRRa.m4,
560544
+                                                   ovl.fmt.VRRa.m5,
560544
+                                                   ovl.fmt.VRRa.rxb); goto ok;
560544
+   case 0xe700000000eaULL: s390_format_VRRa_VVVMMM(s390_irgen_VFCHE, ovl.fmt.VRRa.v1,
560544
+                                                   ovl.fmt.VRRa.v2, ovl.fmt.VRRa.v3,
560544
+                                                   ovl.fmt.VRRa.m3, ovl.fmt.VRRa.m4,
560544
+                                                   ovl.fmt.VRRa.m5,
560544
+                                                   ovl.fmt.VRRa.rxb); goto ok;
560544
+   case 0xe700000000ebULL: s390_format_VRRa_VVVMMM(s390_irgen_VFCH, ovl.fmt.VRRa.v1,
560544
+                                                   ovl.fmt.VRRa.v2, ovl.fmt.VRRa.v3,
560544
+                                                   ovl.fmt.VRRa.m3, ovl.fmt.VRRa.m4,
560544
+                                                   ovl.fmt.VRRa.m5,
560544
+                                                   ovl.fmt.VRRa.rxb); goto ok;
560544
    case 0xe700000000eeULL: /* VFMIN */ goto unimplemented;
560544
    case 0xe700000000efULL: /* VFMAX */ goto unimplemented;
560544
    case 0xe700000000f0ULL: s390_format_VRR_VVVM(s390_irgen_VAVGL, ovl.fmt.VRR.v1,
560544
@@ -21148,7 +21953,13 @@ s390_decode_and_irgen(const UChar *bytes, UInt insn_length, DisResult *dres)
560544
       dis_res->jk_StopHere = Ijk_Boring;
560544
    }
560544
 
560544
-   if (status == S390_DECODE_OK) return insn_length;  /* OK */
560544
+   if (status == S390_DECODE_OK) {
560544
+      /* Adjust status if a specification exception was indicated. */
560544
+      if (is_specification_exception())
560544
+         status = S390_DECODE_SPECIFICATION_EXCEPTION;
560544
+      else
560544
+         return insn_length;  /* OK */
560544
+   }
560544
 
560544
    /* Decoding failed somehow */
560544
    if (sigill_diag) {
560544
@@ -21166,6 +21977,10 @@ s390_decode_and_irgen(const UChar *bytes, UInt insn_length, DisResult *dres)
560544
          vex_printf("unimplemented special insn: ");
560544
          break;
560544
 
560544
+      case S390_DECODE_SPECIFICATION_EXCEPTION:
560544
+         vex_printf("specification exception: ");
560544
+         break;
560544
+
560544
       case S390_DECODE_ERROR:
560544
          vex_printf("decoding error: ");
560544
          break;
560544
diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c
560544
index 98ac938..22cdd04 100644
560544
--- a/VEX/priv/host_s390_defs.c
560544
+++ b/VEX/priv/host_s390_defs.c
560544
@@ -1711,6 +1711,23 @@ emit_VRR_VVM(UChar *p, ULong op, UChar v1, UChar v2, UChar m4)
560544
    return emit_6bytes(p, the_insn);
560544
 }
560544
 
560544
+static UChar *
560544
+emit_VRR_VVMMM(UChar *p, ULong op, UChar v1, UChar v2, UChar m3, UChar m4,
560544
+               UChar m5)
560544
+{
560544
+   ULong the_insn = op;
560544
+   ULong rxb = s390_update_rxb(0, 1, &v1;;
560544
+   rxb = s390_update_rxb(rxb, 2, &v2;;
560544
+
560544
+   the_insn |= ((ULong)v1) << 36;
560544
+   the_insn |= ((ULong)v2) << 32;
560544
+   the_insn |= ((ULong)m5) << 20;
560544
+   the_insn |= ((ULong)m4) << 16;
560544
+   the_insn |= ((ULong)m3) << 12;
560544
+   the_insn |= ((ULong)rxb) << 8;
560544
+
560544
+   return emit_6bytes(p, the_insn);
560544
+}
560544
 
560544
 static UChar *
560544
 emit_VRR_VVVM(UChar *p, ULong op, UChar v1, UChar v2, UChar v3, UChar m4)
560544
@@ -1762,6 +1779,26 @@ emit_VRR_VVVV(UChar *p, ULong op, UChar v1, UChar v2, UChar v3, UChar v4)
560544
    return emit_6bytes(p, the_insn);
560544
 }
560544
 
560544
+static UChar *
560544
+emit_VRRe_VVVVMM(UChar *p, ULong op, UChar v1, UChar v2, UChar v3, UChar v4,
560544
+                 UChar m5, UChar m6)
560544
+{
560544
+   ULong the_insn = op;
560544
+   ULong rxb = s390_update_rxb(0, 1, &v1;;
560544
+   rxb = s390_update_rxb(rxb, 2, &v2;;
560544
+   rxb = s390_update_rxb(rxb, 3, &v3;;
560544
+   rxb = s390_update_rxb(rxb, 4, &v4;;
560544
+
560544
+   the_insn |= ((ULong)v1) << 36;
560544
+   the_insn |= ((ULong)v2) << 32;
560544
+   the_insn |= ((ULong)v3) << 28;
560544
+   the_insn |= ((ULong)m6) << 24;
560544
+   the_insn |= ((ULong)m5) << 16;
560544
+   the_insn |= ((ULong)v4) << 12;
560544
+   the_insn |= ((ULong)rxb) << 8;
560544
+
560544
+   return emit_6bytes(p, the_insn);
560544
+}
560544
 
560544
 static UChar *
560544
 emit_VRR_VRR(UChar *p, ULong op, UChar v1, UChar r2, UChar r3)
560544
@@ -1777,6 +1814,33 @@ emit_VRR_VRR(UChar *p, ULong op, UChar v1, UChar r2, UChar r3)
560544
    return emit_6bytes(p, the_insn);
560544
 }
560544
 
560544
+static UChar *
560544
+emit_VRR_VVVMMM(UChar *p, ULong op, UChar v1, UChar v2, UChar v3, UChar m4,
560544
+                UChar m5, UChar m6)
560544
+{
560544
+   ULong the_insn = op;
560544
+   ULong rxb = s390_update_rxb(0, 1, &v1;;
560544
+   rxb = s390_update_rxb(rxb, 2, &v2;;
560544
+   rxb = s390_update_rxb(rxb, 3, &v3;;
560544
+
560544
+   the_insn |= ((ULong)v1) << 36;
560544
+   the_insn |= ((ULong)v2) << 32;
560544
+   the_insn |= ((ULong)v3) << 28;
560544
+   the_insn |= ((ULong)m6) << 20;
560544
+   the_insn |= ((ULong)m5) << 16;
560544
+   the_insn |= ((ULong)m4) << 12;
560544
+   the_insn |= ((ULong)rxb) << 8;
560544
+
560544
+   return emit_6bytes(p, the_insn);
560544
+}
560544
+
560544
+static UChar*
560544
+emit_VRR_VVVMM(UChar *p, ULong op, UChar v1, UChar v2, UChar v3, UChar m4,
560544
+               UChar m5)
560544
+{
560544
+   return emit_VRR_VVVMMM(p, op, v1, v2, v3, m4, m5, 0);
560544
+}
560544
+
560544
 /*------------------------------------------------------------*/
560544
 /*--- Functions to emit particular instructions            ---*/
560544
 /*------------------------------------------------------------*/
560544
@@ -6057,6 +6121,116 @@ s390_emit_VLVGP(UChar *p, UChar v1, UChar r2, UChar r3)
560544
    return emit_VRR_VRR(p, 0xE70000000062ULL, v1, r2, r3);
560544
 }
560544
 
560544
+static UChar *
560544
+s390_emit_VFPSO(UChar *p, UChar v1, UChar v2, UChar m3, UChar m4, UChar m5)
560544
+{
560544
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
560544
+      s390_disasm(ENC6(MNM, VR, VR, UINT, UINT, UINT), "vfpso", v1, v2, m3, m4,
560544
+                  m5);
560544
+
560544
+   return emit_VRR_VVMMM(p, 0xE700000000CCULL, v1, v2, m3, m4, m5);
560544
+}
560544
+
560544
+static UChar *
560544
+s390_emit_VFA(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
560544
+{
560544
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
560544
+      s390_disasm(ENC6(MNM, VR, VR, VR, UINT, UINT), "vfa", v1, v2, v3, m4, m5);
560544
+
560544
+   return emit_VRR_VVVMM(p, 0xE700000000e3ULL, v1, v2, v3, m4, m5);
560544
+}
560544
+
560544
+static UChar *
560544
+s390_emit_VFS(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
560544
+{
560544
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
560544
+      s390_disasm(ENC6(MNM, VR, VR, VR, UINT, UINT), "vfs", v1, v2, v3, m4, m5);
560544
+
560544
+   return emit_VRR_VVVMM(p, 0xE700000000e2ULL, v1, v2, v3, m4, m5);
560544
+}
560544
+
560544
+static UChar *
560544
+s390_emit_VFM(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
560544
+{
560544
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
560544
+      s390_disasm(ENC6(MNM, VR, VR, VR, UINT, UINT), "vfm", v1, v2, v3, m4, m5);
560544
+
560544
+   return emit_VRR_VVVMM(p, 0xE700000000e7ULL, v1, v2, v3, m4, m5);
560544
+}
560544
+
560544
+static UChar *
560544
+s390_emit_VFD(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
560544
+{
560544
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
560544
+      s390_disasm(ENC6(MNM, VR, VR, VR, UINT, UINT), "vfd", v1, v2, v3, m4, m5);
560544
+
560544
+   return emit_VRR_VVVMM(p, 0xE700000000e5ULL, v1, v2, v3, m4, m5);
560544
+}
560544
+
560544
+static UChar *
560544
+s390_emit_VFSQ(UChar *p, UChar v1, UChar v2, UChar m3, UChar m4)
560544
+{
560544
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
560544
+      s390_disasm(ENC5(MNM, VR, VR, UINT, UINT), "vfsq", v1, v2, m3, m4);
560544
+
560544
+   return emit_VRR_VVMMM(p, 0xE700000000CEULL, v1, v2, m3, m4, 0);
560544
+}
560544
+
560544
+static UChar *
560544
+s390_emit_VFMA(UChar *p, UChar v1, UChar v2, UChar v3, UChar v4, UChar m5,
560544
+               UChar m6)
560544
+{
560544
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
560544
+      s390_disasm(ENC7(MNM, VR, VR, VR, VR, UINT, UINT), "vfma",
560544
+                  v1, v2, v3, v4, m5, m6);
560544
+
560544
+   return emit_VRRe_VVVVMM(p, 0xE7000000008fULL, v1, v2, v3, v4, m5, m6);
560544
+}
560544
+
560544
+static UChar *
560544
+s390_emit_VFMS(UChar *p, UChar v1, UChar v2, UChar v3, UChar v4, UChar m5,
560544
+               UChar m6)
560544
+{
560544
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
560544
+      s390_disasm(ENC7(MNM, VR, VR, VR, VR, UINT, UINT), "vfms",
560544
+                  v1, v2, v3, v4, m5, m6);
560544
+
560544
+   return emit_VRRe_VVVVMM(p, 0xE7000000008eULL, v1, v2, v3, v4, m5, m6);
560544
+}
560544
+
560544
+static UChar *
560544
+s390_emit_VFCE(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4, UChar m5,
560544
+               UChar m6)
560544
+{
560544
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
560544
+      s390_disasm(ENC7(MNM, VR, VR, VR, UINT, UINT, UINT), "vfce",
560544
+                  v1, v2, v3, m4, m5, m6);
560544
+
560544
+   return emit_VRR_VVVMMM(p, 0xE700000000e8ULL, v1, v2, v3, m4, m5, m6);
560544
+}
560544
+
560544
+static UChar *
560544
+s390_emit_VFCH(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4, UChar m5,
560544
+               UChar m6)
560544
+{
560544
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
560544
+      s390_disasm(ENC7(MNM, VR, VR, VR, UINT, UINT, UINT), "vfch",
560544
+                  v1, v2, v3, m4, m5, m6);
560544
+
560544
+   return emit_VRR_VVVMMM(p, 0xE700000000ebULL, v1, v2, v3, m4, m5, m6);
560544
+}
560544
+
560544
+static UChar *
560544
+s390_emit_VFCHE(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4, UChar m5,
560544
+                UChar m6)
560544
+{
560544
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
560544
+      s390_disasm(ENC7(MNM, VR, VR, VR, UINT, UINT, UINT), "vfche",
560544
+                  v1, v2, v3, m4, m5, m6);
560544
+
560544
+   return emit_VRR_VVVMMM(p, 0xE700000000eaULL, v1, v2, v3, m4, m5, m6);
560544
+}
560544
+
560544
 /*---------------------------------------------------------------*/
560544
 /*--- Constructors for the various s390_insn kinds            ---*/
560544
 /*---------------------------------------------------------------*/
560544
@@ -7201,7 +7375,6 @@ s390_insn *s390_insn_vec_triop(UChar size, s390_vec_triop_t tag, HReg dst,
560544
 {
560544
    s390_insn *insn = LibVEX_Alloc_inline(sizeof(s390_insn));
560544
 
560544
-   vassert(size == 16);
560544
 
560544
    insn->tag  = S390_INSN_VEC_TRIOP;
560544
    insn->size = size;
560544
@@ -7508,6 +7681,18 @@ s390_insn_as_string(const s390_insn *insn)
560544
          op = "v-vunpacku";
560544
          break;
560544
 
560544
+      case S390_VEC_FLOAT_NEG:
560544
+         op = "v-vfloatneg";
560544
+         break;
560544
+
560544
+      case S390_VEC_FLOAT_SQRT:
560544
+         op = "v-vfloatsqrt";
560544
+         break;
560544
+
560544
+      case S390_VEC_FLOAT_ABS:
560544
+         op = "v-vfloatabs";
560544
+         break;
560544
+
560544
       default:
560544
          goto fail;
560544
       }
560544
@@ -7880,6 +8065,13 @@ s390_insn_as_string(const s390_insn *insn)
560544
       case S390_VEC_PWSUM_DW:         op = "v-vpwsumdw"; break;
560544
       case S390_VEC_PWSUM_QW:         op = "v-vpwsumqw"; break;
560544
       case S390_VEC_INIT_FROM_GPRS:   op = "v-vinitfromgprs"; break;
560544
+      case S390_VEC_FLOAT_ADD:        op = "v-vfloatadd"; break;
560544
+      case S390_VEC_FLOAT_SUB:        op = "v-vfloatsub"; break;
560544
+      case S390_VEC_FLOAT_MUL:        op = "v-vfloatmul"; break;
560544
+      case S390_VEC_FLOAT_DIV:        op = "v-vfloatdiv"; break;
560544
+      case S390_VEC_FLOAT_COMPARE_EQUAL: op = "v-vfloatcmpeq"; break;
560544
+      case S390_VEC_FLOAT_COMPARE_LESS_OR_EQUAL:  op = "v-vfloatcmple"; break;
560544
+      case S390_VEC_FLOAT_COMPARE_LESS: op = "v-vfloatcmpl"; break;
560544
       default: goto fail;
560544
       }
560544
       s390_sprintf(buf, "%M %R, %R, %R", op, insn->variant.vec_binop.dst,
560544
@@ -7889,6 +8081,8 @@ s390_insn_as_string(const s390_insn *insn)
560544
    case S390_INSN_VEC_TRIOP:
560544
       switch (insn->variant.vec_triop.tag) {
560544
       case S390_VEC_PERM:  op = "v-vperm";  break;
560544
+      case S390_VEC_FLOAT_MADD: op = "v-vfloatmadd"; break;
560544
+      case S390_VEC_FLOAT_MSUB: op = "v-vfloatmsub"; break;
560544
       default: goto fail;
560544
       }
560544
       s390_sprintf(buf, "%M %R, %R, %R, %R", op, insn->variant.vec_triop.dst,
560544
@@ -9036,6 +9230,27 @@ s390_insn_unop_emit(UChar *buf, const s390_insn *insn)
560544
       return s390_emit_VPOPCT(buf, v1, v2, s390_getM_from_size(insn->size));
560544
    }
560544
 
560544
+   case S390_VEC_FLOAT_NEG: {
560544
+      vassert(insn->variant.unop.src.tag == S390_OPND_REG);
560544
+      vassert(insn->size == 8);
560544
+      UChar v1 = hregNumber(insn->variant.unop.dst);
560544
+      UChar v2 = hregNumber(insn->variant.unop.src.variant.reg);
560544
+      return s390_emit_VFPSO(buf, v1, v2, s390_getM_from_size(insn->size), 0, 0);
560544
+   }
560544
+   case S390_VEC_FLOAT_ABS: {
560544
+      vassert(insn->variant.unop.src.tag == S390_OPND_REG);
560544
+      vassert(insn->size == 8);
560544
+      UChar v1 = hregNumber(insn->variant.unop.dst);
560544
+      UChar v2 = hregNumber(insn->variant.unop.src.variant.reg);
560544
+      return s390_emit_VFPSO(buf, v1, v2, s390_getM_from_size(insn->size), 0, 2);
560544
+   }
560544
+   case S390_VEC_FLOAT_SQRT: {
560544
+      vassert(insn->variant.unop.src.tag == S390_OPND_REG);
560544
+      vassert(insn->size == 8);
560544
+      UChar v1 = hregNumber(insn->variant.unop.dst);
560544
+      UChar v2 = hregNumber(insn->variant.unop.src.variant.reg);
560544
+      return s390_emit_VFSQ(buf, v1, v2, s390_getM_from_size(insn->size), 0);
560544
+   }
560544
    default:
560544
       vpanic("s390_insn_unop_emit");
560544
    }
560544
@@ -11049,6 +11264,21 @@ s390_insn_vec_binop_emit(UChar *buf, const s390_insn *insn)
560544
          return s390_emit_VSUMQ(buf, v1, v2, v3, s390_getM_from_size(size));
560544
       case S390_VEC_INIT_FROM_GPRS:
560544
          return s390_emit_VLVGP(buf, v1, v2, v3);
560544
+      case S390_VEC_FLOAT_ADD:
560544
+         return s390_emit_VFA(buf, v1, v2, v3, s390_getM_from_size(size), 0);
560544
+      case S390_VEC_FLOAT_SUB:
560544
+         return s390_emit_VFS(buf, v1, v2, v3, s390_getM_from_size(size), 0);
560544
+      case S390_VEC_FLOAT_MUL:
560544
+         return s390_emit_VFM(buf, v1, v2, v3, s390_getM_from_size(size), 0);
560544
+      case S390_VEC_FLOAT_DIV:
560544
+         return s390_emit_VFD(buf, v1, v2, v3, s390_getM_from_size(size), 0);
560544
+      case S390_VEC_FLOAT_COMPARE_EQUAL:
560544
+         return s390_emit_VFCE(buf, v1, v2, v3, s390_getM_from_size(size), 0, 0);
560544
+      case S390_VEC_FLOAT_COMPARE_LESS_OR_EQUAL:
560544
+         return s390_emit_VFCH(buf, v1, v3, v2, s390_getM_from_size(size), 0, 0);
560544
+      case S390_VEC_FLOAT_COMPARE_LESS:
560544
+         return s390_emit_VFCHE(buf, v1, v3, v2, s390_getM_from_size(size), 0, 0);
560544
+
560544
       default:
560544
          goto fail;
560544
    }
560544
@@ -11070,8 +11300,14 @@ s390_insn_vec_triop_emit(UChar *buf, const s390_insn *insn)
560544
    UChar v4 = hregNumber(insn->variant.vec_triop.op3);
560544
 
560544
    switch (tag) {
560544
-      case S390_VEC_PERM:
560544
+      case S390_VEC_PERM: {
560544
+         vassert(insn->size == 16);
560544
          return s390_emit_VPERM(buf, v1, v2, v3, v4);
560544
+      }
560544
+      case S390_VEC_FLOAT_MADD:
560544
+         return s390_emit_VFMA(buf, v1, v2, v3, v4, 0, 3);
560544
+      case S390_VEC_FLOAT_MSUB:
560544
+         return s390_emit_VFMS(buf, v1, v2, v3, v4, 0, 3);
560544
       default:
560544
          goto fail;
560544
    }
560544
diff --git a/VEX/priv/host_s390_defs.h b/VEX/priv/host_s390_defs.h
560544
index 7ea0101..40f0472 100644
560544
--- a/VEX/priv/host_s390_defs.h
560544
+++ b/VEX/priv/host_s390_defs.h
560544
@@ -202,7 +202,10 @@ typedef enum {
560544
    S390_VEC_ABS,
560544
    S390_VEC_COUNT_LEADING_ZEROES,
560544
    S390_VEC_COUNT_TRAILING_ZEROES,
560544
-   S390_VEC_COUNT_ONES
560544
+   S390_VEC_COUNT_ONES,
560544
+   S390_VEC_FLOAT_NEG,
560544
+   S390_VEC_FLOAT_ABS,
560544
+   S390_VEC_FLOAT_SQRT
560544
 } s390_unop_t;
560544
 
560544
 /* The kind of ternary BFP operations */
560544
@@ -394,11 +397,20 @@ typedef enum {
560544
    S390_VEC_PWSUM_QW,
560544
 
560544
    S390_VEC_INIT_FROM_GPRS,
560544
+   S390_VEC_FLOAT_ADD,
560544
+   S390_VEC_FLOAT_SUB,
560544
+   S390_VEC_FLOAT_MUL,
560544
+   S390_VEC_FLOAT_DIV,
560544
+   S390_VEC_FLOAT_COMPARE_EQUAL,
560544
+   S390_VEC_FLOAT_COMPARE_LESS_OR_EQUAL,
560544
+   S390_VEC_FLOAT_COMPARE_LESS
560544
 } s390_vec_binop_t;
560544
 
560544
 /* The vector operations with three operands */
560544
 typedef enum {
560544
-   S390_VEC_PERM
560544
+   S390_VEC_PERM,
560544
+   S390_VEC_FLOAT_MADD,
560544
+   S390_VEC_FLOAT_MSUB
560544
 } s390_vec_triop_t;
560544
 
560544
 /* The details of a CDAS insn. Carved out to keep the size of
560544
diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c
560544
index bc34f90..79581ff 100644
560544
--- a/VEX/priv/host_s390_isel.c
560544
+++ b/VEX/priv/host_s390_isel.c
560544
@@ -787,10 +787,12 @@ get_bfp_rounding_mode(ISelEnv *env, IRExpr *irrm)
560544
       IRRoundingMode mode = irrm->Iex.Const.con->Ico.U32;
560544
 
560544
       switch (mode) {
560544
-      case Irrm_NEAREST:  return S390_BFP_ROUND_NEAREST_EVEN;
560544
-      case Irrm_ZERO:     return S390_BFP_ROUND_ZERO;
560544
-      case Irrm_PosINF:   return S390_BFP_ROUND_POSINF;
560544
-      case Irrm_NegINF:   return S390_BFP_ROUND_NEGINF;
560544
+      case Irrm_NEAREST_TIE_AWAY_0: return S390_BFP_ROUND_NEAREST_AWAY;
560544
+      case Irrm_PREPARE_SHORTER:    return S390_BFP_ROUND_PREPARE_SHORT;
560544
+      case Irrm_NEAREST:            return S390_BFP_ROUND_NEAREST_EVEN;
560544
+      case Irrm_ZERO:               return S390_BFP_ROUND_ZERO;
560544
+      case Irrm_PosINF:             return S390_BFP_ROUND_POSINF;
560544
+      case Irrm_NegINF:             return S390_BFP_ROUND_NEGINF;
560544
       default:
560544
          vpanic("get_bfp_rounding_mode");
560544
       }
560544
@@ -3871,6 +3873,17 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr)
560544
          vec_op = S390_VEC_COUNT_ONES;
560544
          goto Iop_V_wrk;
560544
 
560544
+      case Iop_Neg64Fx2:
560544
+         size = 8;
560544
+         vec_op = S390_VEC_FLOAT_NEG;
560544
+         goto Iop_V_wrk;
560544
+
560544
+      case Iop_Abs64Fx2:
560544
+         size = 8;
560544
+         vec_op = S390_VEC_FLOAT_ABS;
560544
+         goto Iop_V_wrk;
560544
+
560544
+
560544
       Iop_V_wrk: {
560544
          dst = newVRegV(env);
560544
          reg1 = s390_isel_vec_expr(env, arg);
560544
@@ -4388,6 +4401,28 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr)
560544
          vec_op = S390_VEC_ELEM_ROLL_V;
560544
          goto Iop_VV_wrk;
560544
 
560544
+      case Iop_CmpEQ64Fx2:
560544
+         size = 8;
560544
+         vec_op = S390_VEC_FLOAT_COMPARE_EQUAL;
560544
+         goto Iop_VV_wrk;
560544
+
560544
+      case Iop_CmpLE64Fx2: {
560544
+         size = 8;
560544
+         vec_op = S390_VEC_FLOAT_COMPARE_LESS_OR_EQUAL;
560544
+         goto Iop_VV_wrk;
560544
+      }
560544
+
560544
+      case Iop_CmpLT64Fx2: {
560544
+         size = 8;
560544
+         vec_op = S390_VEC_FLOAT_COMPARE_LESS;
560544
+         goto Iop_VV_wrk;
560544
+      }
560544
+
560544
+      case Iop_Sqrt64Fx2:
560544
+         size = 8;
560544
+         vec_op = S390_VEC_FLOAT_SQRT;
560544
+         goto Iop_irrm_V_wrk;
560544
+
560544
       case Iop_ShlN8x16:
560544
          size = 1;
560544
          shift_op = S390_VEC_ELEM_SHL_INT;
560544
@@ -4493,6 +4528,14 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr)
560544
          return dst;
560544
       }
560544
 
560544
+      Iop_irrm_V_wrk: {
560544
+         set_bfp_rounding_mode_in_fpc(env, arg1);
560544
+         reg1 = s390_isel_vec_expr(env, arg2);
560544
+
560544
+         addInstr(env, s390_insn_unop(size, vec_op, dst, s390_opnd_reg(reg1)));
560544
+         return dst;
560544
+      }
560544
+
560544
       case Iop_64HLtoV128:
560544
          reg1 = s390_isel_int_expr(env, arg1);
560544
          reg2 = s390_isel_int_expr(env, arg2);
560544
@@ -4516,6 +4559,7 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr)
560544
       IRExpr* arg1 = expr->Iex.Triop.details->arg1;
560544
       IRExpr* arg2 = expr->Iex.Triop.details->arg2;
560544
       IRExpr* arg3 = expr->Iex.Triop.details->arg3;
560544
+      IROp vec_op;
560544
       switch (op) {
560544
       case Iop_SetElem8x16:
560544
          size = 1;
560544
@@ -4551,6 +4595,36 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr)
560544
                                            dst, reg1, reg2, reg3));
560544
          return dst;
560544
 
560544
+      case Iop_Add64Fx2:
560544
+         size = 8;
560544
+         vec_op = S390_VEC_FLOAT_ADD;
560544
+         goto Iop_irrm_VV_wrk;
560544
+
560544
+      case Iop_Sub64Fx2:
560544
+         size = 8;
560544
+         vec_op = S390_VEC_FLOAT_SUB;
560544
+         goto Iop_irrm_VV_wrk;
560544
+
560544
+      case Iop_Mul64Fx2:
560544
+         size = 8;
560544
+         vec_op = S390_VEC_FLOAT_MUL;
560544
+         goto Iop_irrm_VV_wrk;
560544
+      case Iop_Div64Fx2:
560544
+         size = 8;
560544
+         vec_op = S390_VEC_FLOAT_DIV;
560544
+         goto Iop_irrm_VV_wrk;
560544
+
560544
+      Iop_irrm_VV_wrk: {
560544
+         set_bfp_rounding_mode_in_fpc(env, arg1);
560544
+         reg1 = s390_isel_vec_expr(env, arg2);
560544
+         reg2 = s390_isel_vec_expr(env, arg3);
560544
+
560544
+         addInstr(env, s390_insn_vec_binop(size, vec_op,
560544
+                                           dst, reg1, reg2));
560544
+
560544
+         return dst;
560544
+       }
560544
+
560544
       default:
560544
          goto irreducible;
560544
       }