Tree - rpms/gcc-toolset-11-valgrind

rpms / gcc-toolset-11-valgrind

Blame SOURCES/valgrind-3.17.0-ppc64-isa-3.1.patch

Blob History Raw

		39285c	`commit 3cc0232c46a5905b4a6c2fbd302b58bf5f90b3d5`
		39285c	`Author: Carl Love <cel@us.ibm.com>`
		39285c	`Date: Mon Jan 11 16:00:57 2021 -0600`
		39285c
		39285c	`PPC64: ISA 3.1 VSX PCV Generate Operations`
		39285c
		39285c	`xgenpcvbm VSX Vector Generate PCV from Byte Mask`
		39285c	`xxgenpcvdmVSX Vector Generate PCV from Doubleword Mask`
		39285c	`xxgenpcvhmVSX Vector Generate PCV from Halfword Mask`
		39285c	`xxgenpcvwmVSX Vector Generate PCV from Word Mask`
		39285c
		39285c	`diff --git a/VEX/priv/guest_ppc_defs.h b/VEX/priv/guest_ppc_defs.h`
		39285c	`index deda4dfce..54ce923a9 100644`
		39285c	`--- a/VEX/priv/guest_ppc_defs.h`
		39285c	`+++ b/VEX/priv/guest_ppc_defs.h`
		39285c	`@@ -169,6 +169,23 @@ void write_ACC_entry (VexGuestPPC64State* gst, UInt offset, UInt acc,`
		39285c	`void get_ACC_entry (VexGuestPPC64State* gst, UInt offset, UInt acc,`
		39285c	`UInt reg, UInt *result);`
		39285c
		39285c	`+extern void vector_gen_pvc_byte_mask_dirty_helper( VexGuestPPC64State* gst,`
		39285c	`+ ULong src_hi,`
		39285c	`+ ULong src_lo,`
		39285c	`+ UInt rtn_val, UInt IMM );`
		39285c	`+extern void vector_gen_pvc_hword_mask_dirty_helper( VexGuestPPC64State* gst,`
		39285c	`+ ULong src_hi,`
		39285c	`+ ULong src_lo,`
		39285c	`+ UInt rtn_val, UInt IMM );`
		39285c	`+extern void vector_gen_pvc_word_mask_dirty_helper( VexGuestPPC64State* gst,`
		39285c	`+ ULong src_hi,`
		39285c	`+ ULong src_lo,`
		39285c	`+ UInt rtn_val, UInt IMM );`
		39285c	`+extern void vector_gen_pvc_dword_mask_dirty_helper( VexGuestPPC64State* gst,`
		39285c	`+ ULong src_hi,`
		39285c	`+ ULong src_lo,`
		39285c	`+ UInt rtn_val, UInt IMM );`
		39285c	`+`
		39285c	`/* 8-bit XO value from instruction description */`
		39285c	`#define XVI4GER8 0b00100011`
		39285c	`#define XVI4GER8PP 0b00100010`
		39285c	`diff --git a/VEX/priv/guest_ppc_helpers.c b/VEX/priv/guest_ppc_helpers.c`
		39285c	`index c24191ef3..75497abb9 100644`
		39285c	`--- a/VEX/priv/guest_ppc_helpers.c`
		39285c	`+++ b/VEX/priv/guest_ppc_helpers.c`
		39285c	`@@ -701,6 +701,738 @@ ULong vector_evaluate64_helper( ULong srcA, ULong srcB, ULong srcC,`
		39285c	`#undef MAX_IMM_BITS`
		39285c	`}`
		39285c
		39285c	`+/--------------------------------------------------/`
		39285c	`+/---- VSX Vector Generate PCV from Mask helpers ---/`
		39285c	`+/--------------------------------------------------/`
		39285c	`+static void write_VSX_entry (VexGuestPPC64State* gst, UInt reg_offset,`
		39285c	`+ ULong *vsx_entry)`
		39285c	`+{`
		39285c	`+ U128* pU128_dst;`
		39285c	`+ pU128_dst = (U128) (((UChar) gst) + reg_offset);`
		39285c	`+`
		39285c	`+ /* The U128 type is defined as an array of unsigned intetgers. */`
		39285c	`+ /* Writing in LE order */`
		39285c	`+ (*pU128_dst)[0] = (UInt)(vsx_entry[1] & 0xFFFFFFFF);`
		39285c	`+ (*pU128_dst)[1] = (UInt)(vsx_entry[1] >> 32);`
		39285c	`+ (*pU128_dst)[2] = (UInt)(vsx_entry[0] & 0xFFFFFFFF);`
		39285c	`+ (*pU128_dst)[3] = (UInt)(vsx_entry[0] >> 32);`
		39285c	`+ return;`
		39285c	`+}`
		39285c	`+`
		39285c	`+/* CALLED FROM GENERATED CODE */`
		39285c	`+void vector_gen_pvc_byte_mask_dirty_helper( VexGuestPPC64State* gst,`
		39285c	`+ ULong src_hi, ULong src_lo,`
		39285c	`+ UInt reg_offset, UInt imm ) {`
		39285c	`+ /* The function computes the 128-bit result then writes it directly`
		39285c	`+ into the guest state VSX register. */`
		39285c	`+`
		39285c	`+ UInt i, shift_by, sel_shift_by, half_sel;`
		39285c	`+ ULong index, src, result[2];`
		39285c	`+ ULong j;`
		39285c	`+`
		39285c	`+ result[0] = 0;`
		39285c	`+ result[1] = 0;`
		39285c	`+ j = 0;`
		39285c	`+`
		39285c	`+ /* The algorithm in the ISA is written with IBM numbering zero on left and`
		39285c	`+ N-1 on right. The loop index is converted to "i" to match the algorithm`
		39285c	`+ for claritiy of matching the C code to the algorithm in the ISA. */`
		39285c	`+`
		39285c	`+ if (imm == 0b00) { // big endian expansion`
		39285c	`+ for( index = 0; index < 16; index++) {`
		39285c	`+ i = 15 - index;`
		39285c	`+`
		39285c	`+ shift_by = i*8;`
		39285c	`+`
		39285c	`+ if ( i >= 8) {`
		39285c	`+ src = src_hi;`
		39285c	`+ shift_by = shift_by - 64;`
		39285c	`+ half_sel = 0;`
		39285c	`+ } else {`
		39285c	`+ src = src_lo;`
		39285c	`+ half_sel = 1;`
		39285c	`+ }`
		39285c	`+`
		39285c	`+ sel_shift_by = shift_by + 7;`
		39285c	`+`
		39285c	`+ if ( ((src >> sel_shift_by) & 0x1) == 1) {`
		39285c	`+ result[half_sel] \|= j << shift_by;`
		39285c	`+ j++;`
		39285c	`+ } else {`
		39285c	`+ result[half_sel] \|= (index + (unsigned long long)0x10) << shift_by;`
		39285c	`+ }`
		39285c	`+ }`
		39285c	`+`
		39285c	`+`
		39285c	`+ } else if (imm == 0b01) { // big endian compression`
		39285c	`+ /* If IMM=0b00001, let pcv be the permute control vector required to`
		39285c	`+ enable a left-indexed permute (vperm or xxperm) to implement a`
		39285c	`+ compression of the sparse byte elements in a source vector specified`
		39285c	`+ by the byte-element mask in VSR[VRB+32] into the leftmost byte`
		39285c	`+ elements of a result vector.`
		39285c	`+ */`
		39285c	`+ for( index = 0; index < 16; index++) {`
		39285c	`+ i = 15 - index;`
		39285c	`+ shift_by = i*8;`
		39285c	`+`
		39285c	`+ if ( i >= 8) {`
		39285c	`+ src = src_hi;`
		39285c	`+ shift_by = shift_by - 64;`
		39285c	`+ half_sel = 0;`
		39285c	`+ } else {`
		39285c	`+ src = src_lo;`
		39285c	`+ half_sel = 1;`
		39285c	`+ }`
		39285c	`+`
		39285c	`+ sel_shift_by = shift_by + 7;`
		39285c	`+`
		39285c	`+ if ( ((src >> sel_shift_by) & 0x1) == 1) {`
		39285c	`+ if (j >= 8)`
		39285c	`+ result[1] \|= (index) << (15 - j)*8;`
		39285c	`+ else`
		39285c	`+ result[0] \|= (index) << (7 - j)*8;`
		39285c	`+ j++;`
		39285c	`+ }`
		39285c	`+ }`
		39285c	`+ /* The algorithim says set to undefined, leave as 0`
		39285c	`+ for( index = 3 - j; index < 4; index++) {`
		39285c	`+ result \|= (0 << (index*8));`
		39285c	`+ }`
		39285c	`+ */`
		39285c	`+`
		39285c	`+ } else if (imm == 0b10) { //little-endian expansion`
		39285c	`+ /* If IMM=0b00010, let pcv be the permute control vector required to`
		39285c	`+ enable a right-indexed permute (vpermr or xxpermr) to implement an`
		39285c	`+ expansion of the rightmost byte elements of a source vector into the`
		39285c	`+ byte elements of a result vector specified by the byte-element mask`
		39285c	`+ in VSR[VRB+32]. */`
		39285c	`+ for( index = 0; index < 16; index++) {`
		39285c	`+ i = index;`
		39285c	`+`
		39285c	`+ shift_by = i*8;`
		39285c	`+`
		39285c	`+ if ( i >= 8) {`
		39285c	`+ src = src_hi;`
		39285c	`+ shift_by = shift_by - 64;`
		39285c	`+ half_sel = 0;`
		39285c	`+ } else {`
		39285c	`+ src = src_lo;`
		39285c	`+ half_sel = 1;`
		39285c	`+ }`
		39285c	`+`
		39285c	`+ sel_shift_by = shift_by + 7;`
		39285c	`+`
		39285c	`+ /* mod shift amount by 8 since src is either the upper or lower`
		39285c	`+ 64-bits. */`
		39285c	`+ if ( ((src >> sel_shift_by) & 0x1) == 1) {`
		39285c	`+ result[half_sel] \|= j << shift_by;`
		39285c	`+ j++;`
		39285c	`+ } else {`
		39285c	`+ result[half_sel] \|= (index + (unsigned long long)0x10) << shift_by;`
		39285c	`+ }`
		39285c	`+ }`
		39285c	`+`
		39285c	`+ } else if (imm == 0b11) { //little-endian compression`
		39285c	`+ /* If IMM=0b00011, let pcv be the permute control vector required to`
		39285c	`+ enable a right-indexed permute (vpermr or xxpermr) to implement a`
		39285c	`+ compression of the sparse byte elements in a source vector specified`
		39285c	`+ by the byte-element mask in VSR[VRB+32] into the rightmost byte`
		39285c	`+ elements of a result vector. */`
		39285c	`+`
		39285c	`+ for( index = 0; index < 16; index++) {`
		39285c	`+ i = index;`
		39285c	`+`
		39285c	`+ shift_by = i*8;`
		39285c	`+`
		39285c	`+ if ( i >= 8) {`
		39285c	`+ src = src_hi;`
		39285c	`+ shift_by = shift_by - 64;`
		39285c	`+ half_sel = 0;`
		39285c	`+ } else {`
		39285c	`+ src = src_lo;`
		39285c	`+ half_sel = 1;`
		39285c	`+ }`
		39285c	`+`
		39285c	`+ sel_shift_by = shift_by + 7;`
		39285c	`+`
		39285c	`+ if ( ((src >> sel_shift_by) & 0x1) == 1) {`
		39285c	`+ if (j >= 8)`
		39285c	`+ result[0] \|= (index) << (j-8)*8;`
		39285c	`+ else`
		39285c	`+ result[1] \|= (index) << j*8;`
		39285c	`+ j++;`
		39285c	`+ }`
		39285c	`+ }`
		39285c	`+`
		39285c	`+ /* The algorithim says set to undefined, leave as 0`
		39285c	`+ for( index = 3 - j; index < 4; index++) {`
		39285c	`+ result \|= (0 << (index*8));`
		39285c	`+ }`
		39285c	`+ */`
		39285c	`+`
		39285c	`+ } else {`
		39285c	`+ vex_printf("ERROR, vector_gen_pvc_byte_mask_dirty_helper, imm value %u not supported.\n",`
		39285c	`+ imm);`
		39285c	`+ vassert(0);`
		39285c	`+ }`
		39285c	`+ write_VSX_entry( gst, reg_offset, result);`
		39285c	`+}`
		39285c	`+`
		39285c	`+/* CALLED FROM GENERATED CODE */`
		39285c	`+void vector_gen_pvc_hword_mask_dirty_helper( VexGuestPPC64State* gst,`
		39285c	`+ ULong src_hi, ULong src_lo,`
		39285c	`+ UInt reg_offset,`
		39285c	`+ UInt imm ) {`
		39285c	`+ /* The function computes the 128-bit result then writes it directly`
		39285c	`+ into the guest state VSX register. */`
		39285c	`+ UInt i, shift_by, sel_shift_by, half_sel;`
		39285c	`+ ULong index, src, result[2];`
		39285c	`+ ULong j;`
		39285c	`+`
		39285c	`+ result[0] = 0;`
		39285c	`+ result[1] = 0;`
		39285c	`+ j = 0;`
		39285c	`+`
		39285c	`+ /* The algorithm in the ISA is written with IBM numbering zero on left and`
		39285c	`+ N-1 on right. The loop index is converted to "i" to match the algorithm`
		39285c	`+ for claritiy of matching the C code to the algorithm in the ISA. */`
		39285c	`+`
		39285c	`+ if (imm == 0b00) { // big endian expansion`
		39285c	`+ /* If IMM=0b00000, let pcv be the permute control vector required to`
		39285c	`+ enable a left-indexed permute (vperm or xxperm) to implement an`
		39285c	`+ expansion of the leftmost halfword elements of a source vector into`
		39285c	`+ the halfword elements of a result vector specified by the halfword-`
		39285c	`+ element mask in VSR[VRB+32].`
		39285c	`+ */`
		39285c	`+ for( index = 0; index < 8; index++) {`
		39285c	`+ i = 7 - index;`
		39285c	`+`
		39285c	`+ shift_by = i*16;`
		39285c	`+`
		39285c	`+ if ( i >= 4) {`
		39285c	`+ src = src_hi;`
		39285c	`+ shift_by = shift_by - 64;`
		39285c	`+ half_sel = 0;`
		39285c	`+ } else {`
		39285c	`+ src = src_lo;`
		39285c	`+ half_sel = 1;`
		39285c	`+ }`
		39285c	`+`
		39285c	`+ sel_shift_by = shift_by + 15;`
		39285c	`+`
		39285c	`+ if ( ((src >> sel_shift_by) & 0x1) == 1) {`
		39285c	`+ // half-word i, byte 0`
		39285c	`+ result[half_sel] \|= (2*j + 0x0) << (shift_by+8);`
		39285c	`+ // half-word i, byte 1`
		39285c	`+ result[half_sel] \|= (2*j + 0x1) << shift_by;`
		39285c	`+ j++;`
		39285c	`+ } else {`
		39285c	`+ result[half_sel] \|= (2*index + 0x10) << (shift_by+8);`
		39285c	`+ result[half_sel] \|= (2*index + 0x11) << shift_by;`
		39285c	`+ }`
		39285c	`+ }`
		39285c	`+`
		39285c	`+ } else if (imm == 0b01) { // big endian expansion`
		39285c	`+ /* If IMM=0b00001,let pcv be the permute control vector required to`
		39285c	`+ enable a left-indexed permute (vperm or xxperm) to implement a`
		39285c	`+ compression of the sparse halfword elements in a source vector`
		39285c	`+ specified by the halfword-element mask in VSR[VRB+32] into the`
		39285c	`+ leftmost halfword elements of a result vector.`
		39285c	`+ */`
		39285c	`+ for( index = 0; index < 8; index++) {`
		39285c	`+ i = 7 - index;`
		39285c	`+`
		39285c	`+ shift_by = i*16;`
		39285c	`+`
		39285c	`+ if ( i >= 4) {`
		39285c	`+ src = src_hi;`
		39285c	`+ shift_by = shift_by - 64;`
		39285c	`+ half_sel = 0;`
		39285c	`+ } else {`
		39285c	`+ src = src_lo;`
		39285c	`+ half_sel = 1;`
		39285c	`+ }`
		39285c	`+`
		39285c	`+ sel_shift_by = shift_by + 15;`
		39285c	`+`
		39285c	`+ if ( ((src >> sel_shift_by) & 0x1) == 1) {`
		39285c	`+ if (j >= 4) {`
		39285c	`+ // half-word i, byte 0`
		39285c	`+ result[1] \|= (2index + 0x0) << ((7 - j)16 + 8);`
		39285c	`+ // half-word i, byte 1`
		39285c	`+ result[1] \|= (2index + 0x1) << ((7 - j)16);`
		39285c	`+ } else {`
		39285c	`+ // half-word i, byte 0`
		39285c	`+ result[0] \|= (2index + 0x0) << ((3 - j)16 + 8);`
		39285c	`+ // half-word i, byte 1`
		39285c	`+ result[0] \|= (2index + 0x1) << ((3 - j)16);`
		39285c	`+ }`
		39285c	`+ j++;`
		39285c	`+ }`
		39285c	`+ }`
		39285c	`+`
		39285c	`+ } else if (imm == 0b10) { //little-endian expansion`
		39285c	`+ /* If IMM=0b00010, let pcv be the permute control vector required to`
		39285c	`+ enable a right-indexed permute (vpermr or xxpermr) to implement an`
		39285c	`+ expansion of the rightmost halfword elements of a source vector into`
		39285c	`+ the halfword elements of a result vector specified by the halfword-`
		39285c	`+ element mask in VSR[VRB+32].`
		39285c	`+ */`
		39285c	`+ for( index = 0; index < 8; index++) {`
		39285c	`+ i = index;`
		39285c	`+ shift_by = i*16;`
		39285c	`+`
		39285c	`+ if ( i >= 4) {`
		39285c	`+ src = src_hi;`
		39285c	`+ shift_by = shift_by - 64;`
		39285c	`+ half_sel = 0;`
		39285c	`+ } else {`
		39285c	`+ src = src_lo;`
		39285c	`+ half_sel = 1;`
		39285c	`+ }`
		39285c	`+`
		39285c	`+ sel_shift_by = shift_by + 15;`
		39285c	`+`
		39285c	`+ if ( ((src >> sel_shift_by) & 0x1) == 1) {`
		39285c	`+ // half-word i, byte 0`
		39285c	`+ result[half_sel] \|= (2*j + 0x00) << shift_by;`
		39285c	`+ // half-word i, byte 1`
		39285c	`+ result[half_sel] \|= (2*j + 0x01) << (shift_by+8);`
		39285c	`+ j++;`
		39285c	`+`
		39285c	`+ } else {`
		39285c	`+ // half-word i, byte 0`
		39285c	`+ result[half_sel] \|= (2*index + 0x10) << shift_by;`
		39285c	`+ // half-word i, byte 1`
		39285c	`+ result[half_sel] \|= (2*index + 0x11) << (shift_by+8);`
		39285c	`+ }`
		39285c	`+ }`
		39285c	`+`
		39285c	`+ } else if (imm == 0b11) { //little-endian compression`
		39285c	`+ /* If IMM=0b00011, let pcv be the permute control vector required to`
		39285c	`+ enable a right-indexed permute (vpermr or xxpermr) to implement a`
		39285c	`+ compression of the sparse halfword elements in a source vector`
		39285c	`+ specified by the halfword-element mask in VSR[VRB+32] into the`
		39285c	`+ rightmost halfword elements of a result vector. */`
		39285c	`+ for( index = 0; index < 8; index++) {`
		39285c	`+ i = index;`
		39285c	`+ shift_by = i*16;`
		39285c	`+`
		39285c	`+ if ( i >= 4) {`
		39285c	`+ src = src_hi;`
		39285c	`+ shift_by = shift_by - 64;`
		39285c	`+ half_sel = 0;`
		39285c	`+ } else {`
		39285c	`+ src = src_lo;`
		39285c	`+ half_sel = 1;`
		39285c	`+ }`
		39285c	`+`
		39285c	`+ sel_shift_by = shift_by + 15;`
		39285c	`+`
		39285c	`+ if ( ((src >> sel_shift_by) & 0x1) == 1) {`
		39285c	`+ if (j >= 4) {`
		39285c	`+ // half-word j, byte 0`
		39285c	`+ result[0] \|= (2index + 0x0) << ((j-4)16);`
		39285c	`+ // half-word j, byte 1`
		39285c	`+ result[0] \|= (2index + 0x1) << ((j-4)16+8);`
		39285c	`+ } else {`
		39285c	`+ // half-word j, byte 0`
		39285c	`+ result[1] \|= (2index + 0x0) << (j16);`
		39285c	`+ // half-word j, byte 1`
		39285c	`+ result[1] \|= (2index + 0x1) << ((j16)+8);`
		39285c	`+ }`
		39285c	`+ j++;`
		39285c	`+ }`
		39285c	`+ }`
		39285c	`+`
		39285c	`+ } else {`
		39285c	`+ vex_printf("ERROR, vector_gen_pvc_hword_dirty_mask_helper, imm value %u not supported.\n",`
		39285c	`+ imm);`
		39285c	`+ vassert(0);`
		39285c	`+ }`
		39285c	`+ write_VSX_entry( gst, reg_offset, result);`
		39285c	`+}`
		39285c	`+`
		39285c	`+/* CALLED FROM GENERATED CODE */`
		39285c	`+void vector_gen_pvc_word_mask_dirty_helper( VexGuestPPC64State* gst,`
		39285c	`+ ULong src_hi, ULong src_lo,`
		39285c	`+ UInt reg_offset, UInt imm ) {`
		39285c	`+ /* The function computes the 128-bit result then writes it directly`
		39285c	`+ into the guest state VSX register. */`
		39285c	`+ UInt i, shift_by, sel_shift_by, half_sel;`
		39285c	`+ ULong index, src, result[2];`
		39285c	`+ ULong j;`
		39285c	`+`
		39285c	`+ result[0] = 0;`
		39285c	`+ result[1] = 0;`
		39285c	`+ j = 0;`
		39285c	`+`
		39285c	`+ /* The algorithm in the ISA is written with IBM numbering zero on left and`
		39285c	`+ N-1 on right. The loop index is converted to "i" to match the algorithm`
		39285c	`+ for claritiy of matching the C code to the algorithm in the ISA. */`
		39285c	`+`
		39285c	`+ if (imm == 0b00) { // big endian expansion`
		39285c	`+ /* If IMM=0b00000, let pcv be the permute control vector required to`
		39285c	`+ enable a left-indexed permute (vperm or xxperm) to implement an`
		39285c	`+ expansion of the leftmost word elements of a source vector into the`
		39285c	`+ word elements of a result vector specified by the word-element mask`
		39285c	`+ in VSR[VRB+32].`
		39285c	`+ */`
		39285c	`+ for( index = 0; index < 4; index++) {`
		39285c	`+ i = 3 - index;`
		39285c	`+`
		39285c	`+ shift_by = i*32;`
		39285c	`+`
		39285c	`+ if ( i >= 2) {`
		39285c	`+ src = src_hi;`
		39285c	`+ shift_by = shift_by - 64;`
		39285c	`+ half_sel = 0;`
		39285c	`+ } else {`
		39285c	`+ src = src_lo;`
		39285c	`+ half_sel = 1;`
		39285c	`+ }`
		39285c	`+`
		39285c	`+ sel_shift_by = shift_by + 31;`
		39285c	`+`
		39285c	`+ if ( ((src >> sel_shift_by) & 0x1) == 1) {`
		39285c	`+ result[half_sel] \|= (4*j+0) << (shift_by+24); // word i, byte 0`
		39285c	`+ result[half_sel] \|= (4*j+1) << (shift_by+16); // word i, byte 1`
		39285c	`+ result[half_sel] \|= (4*j+2) << (shift_by+8); // word i, byte 2`
		39285c	`+ result[half_sel] \|= (4*j+3) << shift_by; // word i, byte 3`
		39285c	`+ j++;`
		39285c	`+ } else {`
		39285c	`+ result[half_sel] \|= (4*index + 0x10) << (shift_by+24);`
		39285c	`+ result[half_sel] \|= (4*index + 0x11) << (shift_by+16);`
		39285c	`+ result[half_sel] \|= (4*index + 0x12) << (shift_by+8);`
		39285c	`+ result[half_sel] \|= (4*index + 0x13) << shift_by;`
		39285c	`+ }`
		39285c	`+ }`
		39285c	`+`
		39285c	`+ } else if (imm == 0b01) { // big endian compression`
		39285c	`+ /* If IMM=0b00001, let pcv be the permute control vector required to`
		39285c	`+ enable a left-indexed permute (vperm or xxperm) to implement a`
		39285c	`+ compression of the sparse word elements in a source vector specified`
		39285c	`+ by the word-element mask in VSR[VRB+32] into the leftmost word`
		39285c	`+ elements of a result vector.`
		39285c	`+ */`
		39285c	`+ for( index = 0; index < 4; index++) {`
		39285c	`+ i = 3 - index;`
		39285c	`+`
		39285c	`+ shift_by = i*32;`
		39285c	`+`
		39285c	`+ if ( i >= 2) {`
		39285c	`+ src = src_hi;`
		39285c	`+ shift_by = shift_by - 64;`
		39285c	`+ half_sel = 0;`
		39285c	`+ } else {`
		39285c	`+ src = src_lo;`
		39285c	`+ half_sel = 1;`
		39285c	`+ }`
		39285c	`+`
		39285c	`+ sel_shift_by = shift_by + 31;`
		39285c	`+`
		39285c	`+ if (((src >> sel_shift_by) & 0x1) == 1) {`
		39285c	`+ if (j >= 2) {`
		39285c	`+ // word j, byte 0`
		39285c	`+ result[1] \|= (4index+0) << ((3 - j)32 + 24);`
		39285c	`+ // word j, byte 1`
		39285c	`+ result[1] \|= (4index+1) << ((3 - j)32 + 16);`
		39285c	`+ // word j, byte 2`
		39285c	`+ result[1] \|= (4index+2) << ((3 - j)32 + 8);`
		39285c	`+ // word j, byte 3`
		39285c	`+ result[1] \|= (4index+3) << ((3 - j)32 + 0);`
		39285c	`+ } else {`
		39285c	`+ result[0] \|= (4index+0) << ((1 - j)32 + 24);`
		39285c	`+ result[0] \|= (4index+1) << ((1 - j)32 + 16);`
		39285c	`+ result[0] \|= (4index+2) << ((1 - j)32 + 8);`
		39285c	`+ result[0] \|= (4index+3) << ((1 - j)32 + 0);`
		39285c	`+ }`
		39285c	`+ j++;`
		39285c	`+ }`
		39285c	`+ }`
		39285c	`+`
		39285c	`+ } else if (imm == 0b10) { //little-endian expansion`
		39285c	`+ /* If IMM=0b00010, let pcv be the permute control vector required to`
		39285c	`+ enable a right-indexed permute (vpermr or xxpermr) to implement an`
		39285c	`+ expansion of the rightmost word elements of a source vector into the`
		39285c	`+ word elements of a result vector specified by the word-element mask`
		39285c	`+ in VSR[VRB+32].`
		39285c	`+ */`
		39285c	`+ for( index = 0; index < 4; index++) {`
		39285c	`+ i = index;`
		39285c	`+`
		39285c	`+ shift_by = i*32;`
		39285c	`+`
		39285c	`+ if ( i >= 2) {`
		39285c	`+ src = src_hi;`
		39285c	`+ shift_by = shift_by - 64;`
		39285c	`+ half_sel = 0;`
		39285c	`+ } else {`
		39285c	`+ src = src_lo;`
		39285c	`+ half_sel = 1;`
		39285c	`+ }`
		39285c	`+`
		39285c	`+ sel_shift_by = shift_by + 31;`
		39285c	`+`
		39285c	`+ if (((src >> sel_shift_by) & 0x1) == 1) {`
		39285c	`+ result[half_sel] \|= (4*j+0) << (shift_by + 0); // word j, byte 0`
		39285c	`+ result[half_sel] \|= (4*j+1) << (shift_by + 8); // word j, byte 1`
		39285c	`+ result[half_sel] \|= (4*j+2) << (shift_by + 16); // word j, byte 2`
		39285c	`+ result[half_sel] \|= (4*j+3) << (shift_by + 24); // word j, byte 3`
		39285c	`+ j++;`
		39285c	`+ } else {`
		39285c	`+ result[half_sel] \|= (4*index + 0x10) << (shift_by + 0);`
		39285c	`+ result[half_sel] \|= (4*index + 0x11) << (shift_by + 8);`
		39285c	`+ result[half_sel] \|= (4*index + 0x12) << (shift_by + 16);`
		39285c	`+ result[half_sel] \|= (4*index + 0x13) << (shift_by + 24);`
		39285c	`+ }`
		39285c	`+ }`
		39285c	`+`
		39285c	`+ } else if (imm == 0b11) { //little-endian compression`
		39285c	`+ /* If IMM=0b00011, let pcv be the permute control vector required to`
		39285c	`+ enable a right-indexed permute (vpermr or xxpermr) to implement a`
		39285c	`+ compression of the sparse word elements in a source vector specified`
		39285c	`+ by the word-element mask in VSR[VRB+32] into the rightmost word`
		39285c	`+ elements of a result vector. */`
		39285c	`+ for( index = 0; index < 4; index++) {`
		39285c	`+ i =index;`
		39285c	`+`
		39285c	`+ shift_by = i*32;`
		39285c	`+`
		39285c	`+ if ( i >= 2) {`
		39285c	`+ src = src_hi;`
		39285c	`+ shift_by = shift_by - 64;`
		39285c	`+ half_sel = 0;`
		39285c	`+ } else {`
		39285c	`+ src = src_lo;`
		39285c	`+ half_sel = 1;`
		39285c	`+ }`
		39285c	`+`
		39285c	`+ sel_shift_by = shift_by + 31;`
		39285c	`+`
		39285c	`+ if (((src >> sel_shift_by) & 0x1) == 1) {`
		39285c	`+ if (j >= 2){`
		39285c	`+ // word j, byte 0`
		39285c	`+ result[0] \|= (4index + 0x0) << ((j-2)32+0);`
		39285c	`+ // word j, byte 1`
		39285c	`+ result[0] \|= (4index + 0x1) << ((j-2)32+8);`
		39285c	`+ // word j, byte 2`
		39285c	`+ result[0] \|= (4index + 0x2) << ((j-2)32+16);`
		39285c	`+ // word j, byte 3`
		39285c	`+ result[0] \|= (4index + 0x3) << ((j-2)32+24);`
		39285c	`+ } else {`
		39285c	`+ result[1] \|= (4index + 0x0) << (j32+0);`
		39285c	`+ result[1] \|= (4index + 0x1) << (j32+8);`
		39285c	`+ result[1] \|= (4index + 0x2) << (j32+16);`
		39285c	`+ result[1] \|= (4index + 0x3) << (j32+24);`
		39285c	`+ }`
		39285c	`+ j++;`
		39285c	`+ }`
		39285c	`+ }`
		39285c	`+ } else {`
		39285c	`+ vex_printf("ERROR, vector_gen_pvc_word_mask_dirty_helper, imm value %u not supported.\n",`
		39285c	`+ imm);`
		39285c	`+ vassert(0);`
		39285c	`+ }`
		39285c	`+`
		39285c	`+ write_VSX_entry( gst, reg_offset, result);`
		39285c	`+}`
		39285c	`+`
		39285c	`+/* CALLED FROM GENERATED CODE */`
		39285c	`+void vector_gen_pvc_dword_mask_dirty_helper( VexGuestPPC64State* gst,`
		39285c	`+ ULong src_hi, ULong src_lo,`
		39285c	`+ UInt reg_offset, UInt imm ) {`
		39285c	`+ /* The function computes the 128-bit result then writes it directly`
		39285c	`+ into the guest state VSX register. */`
		39285c	`+ UInt sel_shift_by, half_sel;`
		39285c	`+ ULong index, src, result[2];`
		39285c	`+ ULong j, i;`
		39285c	`+`
		39285c	`+ result[0] = 0;`
		39285c	`+ result[1] = 0;`
		39285c	`+ j = 0;`
		39285c	`+`
		39285c	`+ /* The algorithm in the ISA is written with IBM numbering zero on left and`
		39285c	`+ N-1 on right. The loop index is converted to "i" to match the algorithm`
		39285c	`+ for claritiy of matching the C code to the algorithm in the ISA. */`
		39285c	`+`
		39285c	`+ if (imm == 0b00) { // big endian expansion`
		39285c	`+ /* If IMM=0b00000, let pcv be the permute control vector required to`
		39285c	`+ enable a left-indexed permute (vperm or xxperm) to implement an`
		39285c	`+ expansion of the leftmost doubleword elements of a source vector into`
		39285c	`+ the doubleword elements of a result vector specified by the`
		39285c	`+ doubleword-element mask in VSR[VRB+32].`
		39285c	`+ */`
		39285c	`+ for( index = 0; index < 2; index++) {`
		39285c	`+ i = 1 - index;`
		39285c	`+`
		39285c	`+ if ( i == 1) {`
		39285c	`+ src = src_hi;`
		39285c	`+ half_sel = 0;`
		39285c	`+ } else {`
		39285c	`+ src = src_lo;`
		39285c	`+ half_sel = 1;`
		39285c	`+ }`
		39285c	`+`
		39285c	`+ sel_shift_by = 63;`
		39285c	`+`
		39285c	`+ if ( ((src >> sel_shift_by) & 0x1) == 1) {`
		39285c	`+ result[half_sel] \|= (8*j + 0x0) << 56; // dword i, byte 0`
		39285c	`+ result[half_sel] \|= (8*j + 0x1) << 48; // dword i, byte 1`
		39285c	`+ result[half_sel] \|= (8*j + 0x2) << 40; // dword i, byte 2`
		39285c	`+ result[half_sel] \|= (8*j + 0x3) << 32; // dword i, byte 3`
		39285c	`+ result[half_sel] \|= (8*j + 0x4) << 24; // dword i, byte 4`
		39285c	`+ result[half_sel] \|= (8*j + 0x5) << 16; // dword i, byte 5`
		39285c	`+ result[half_sel] \|= (8*j + 0x6) << 8; // dword i, byte 6`
		39285c	`+ result[half_sel] \|= (8*j + 0x7) << 0; // dword i, byte 7`
		39285c	`+ j++;`
		39285c	`+ } else {`
		39285c	`+ result[half_sel] \|= (8*index + 0x10) << 56;`
		39285c	`+ result[half_sel] \|= (8*index + 0x11) << 48;`
		39285c	`+ result[half_sel] \|= (8*index + 0x12) << 40;`
		39285c	`+ result[half_sel] \|= (8*index + 0x13) << 32;`
		39285c	`+ result[half_sel] \|= (8*index + 0x14) << 24;`
		39285c	`+ result[half_sel] \|= (8*index + 0x15) << 16;`
		39285c	`+ result[half_sel] \|= (8*index + 0x16) << 8;`
		39285c	`+ result[half_sel] \|= (8*index + 0x17) << 0;`
		39285c	`+ }`
		39285c	`+ }`
		39285c	`+ } else if (imm == 0b01) { // big endian compression`
		39285c	`+ /* If IMM=0b00001, let pcv be the the permute control vector required to`
		39285c	`+ enable a left-indexed permute (vperm or xxperm) to implement a`
		39285c	`+ compression of the sparse doubleword elements in a source vector`
		39285c	`+ specified by the doubleword-element mask in VSR[VRB+32] into the`
		39285c	`+ leftmost doubleword elements of a result vector.`
		39285c	`+ */`
		39285c	`+ for( index = 0; index < 2; index++) {`
		39285c	`+ i = 1 - index;`
		39285c	`+`
		39285c	`+ if ( i == 1) {`
		39285c	`+ src = src_hi;`
		39285c	`+ half_sel = 0;`
		39285c	`+ } else {`
		39285c	`+ src = src_lo;`
		39285c	`+ half_sel = 1;`
		39285c	`+ }`
		39285c	`+`
		39285c	`+ sel_shift_by = 63;`
		39285c	`+`
		39285c	`+ if ( ((src >> sel_shift_by) & 0x1) == 1) {`
		39285c	`+ if (j == 1) {`
		39285c	`+ result[1] \|= (8*index + 0x0) << 56; // double-word j, byte 0`
		39285c	`+ result[1] \|= (8*index + 0x1) << 48; // double-word j, byte 1`
		39285c	`+ result[1] \|= (8*index + 0x2) << 40; // double-word j, byte 2`
		39285c	`+ result[1] \|= (8*index + 0x3) << 32; // double-word j, byte 3`
		39285c	`+ result[1] \|= (8*index + 0x4) << 24; // double-word j, byte 4`
		39285c	`+ result[1] \|= (8*index + 0x5) << 16; // double-word j, byte 5`
		39285c	`+ result[1] \|= (8*index + 0x6) << 8; // double-word j, byte 6`
		39285c	`+ result[1] \|= (8*index + 0x7) << 0; // double-word j, byte 7`
		39285c	`+ } else {`
		39285c	`+ result[0] \|= (8*index + 0x0) << 56; // double-word j, byte 0`
		39285c	`+ result[0] \|= (8*index + 0x1) << 48; // double-word j, byte 1`
		39285c	`+ result[0] \|= (8*index + 0x2) << 40; // double-word j, byte 2`
		39285c	`+ result[0] \|= (8*index + 0x3) << 32; // double-word j, byte 3`
		39285c	`+ result[0] \|= (8*index + 0x4) << 24; // double-word j, byte 4`
		39285c	`+ result[0] \|= (8*index + 0x5) << 16; // double-word j, byte 5`
		39285c	`+ result[0] \|= (8*index + 0x6) << 8; // double-word j, byte 6`
		39285c	`+ result[0] \|= (8*index + 0x7) << 0; // double-word j, byte 7`
		39285c	`+ }`
		39285c	`+ j++;`
		39285c	`+ }`
		39285c	`+ }`
		39285c	`+ } else if (imm == 0b10) { //little-endian expansion`
		39285c	`+ /* If IMM=0b00010, let pcv be the permute control vector required to`
		39285c	`+ enable a right-indexed permute (vpermr or xxpermr) to implement an`
		39285c	`+ expansion of the rightmost doubleword elements of a source vector`
		39285c	`+ into the doubleword elements of a result vector specified by the`
		39285c	`+ doubleword-element mask in VSR[VRB+32].`
		39285c	`+ */`
		39285c	`+`
		39285c	`+ for( index = 0; index < 2; index++) {`
		39285c	`+ i = index;`
		39285c	`+`
		39285c	`+ if ( i == 1) {`
		39285c	`+ src = src_hi;`
		39285c	`+ half_sel = 0;`
		39285c	`+ } else {`
		39285c	`+ src = src_lo;`
		39285c	`+ half_sel = 1;`
		39285c	`+ }`
		39285c	`+`
		39285c	`+ sel_shift_by = 63;`
		39285c	`+`
		39285c	`+ if ( ((src >> sel_shift_by) & 0x1) == 1) {`
		39285c	`+ result[half_sel] \|= (8*j+0) << 0; // double-word i, byte 0`
		39285c	`+ result[half_sel] \|= (8*j+1) << 8; // double-word i, byte 1`
		39285c	`+ result[half_sel] \|= (8*j+2) << 16; // double-word i, byte 2`
		39285c	`+ result[half_sel] \|= (8*j+3) << 24; // double-word i, byte 3`
		39285c	`+ result[half_sel] \|= (8*j+4) << 32; // double-word i, byte 4`
		39285c	`+ result[half_sel] \|= (8*j+5) << 40; // double-word i, byte 5`
		39285c	`+ result[half_sel] \|= (8*j+6) << 48; // double-word i, byte 6`
		39285c	`+ result[half_sel] \|= (8*j+7) << 56; // double-word i, byte 7`
		39285c	`+ j++;`
		39285c	`+ } else {`
		39285c	`+ result[half_sel] \|= (8*index + 0x10) << 0;`
		39285c	`+ result[half_sel] \|= (8*index + 0x11) << 8;`
		39285c	`+ result[half_sel] \|= (8*index + 0x12) << 16;`
		39285c	`+ result[half_sel] \|= (8*index + 0x13) << 24;`
		39285c	`+ result[half_sel] \|= (8*index + 0x14) << 32;`
		39285c	`+ result[half_sel] \|= (8*index + 0x15) << 40;`
		39285c	`+ result[half_sel] \|= (8*index + 0x16) << 48;`
		39285c	`+ result[half_sel] \|= (8*index + 0x17) << 56;`
		39285c	`+ }`
		39285c	`+ }`
		39285c	`+`
		39285c	`+ } else if (imm == 0b11) { //little-endian compression`
		39285c	`+ /* If IMM=0b00011, let pcv be the permute control vector required to`
		39285c	`+ enable a right-indexed permute (vpermr or xxpermr) to implement a`
		39285c	`+ compression of the sparse doubleword elements in a source vector`
		39285c	`+ specified by the doubleword-element mask in VSR[VRB+32] into the`
		39285c	`+ rightmost doubleword elements of a result vector. */`
		39285c	`+ for( index = 0; index < 2; index++) {`
		39285c	`+ i = index;`
		39285c	`+`
		39285c	`+ if ( i == 1) {`
		39285c	`+ src = src_hi;`
		39285c	`+ half_sel = 0;`
		39285c	`+ } else {`
		39285c	`+ src = src_lo;`
		39285c	`+ half_sel = 1;`
		39285c	`+ }`
		39285c	`+`
		39285c	`+ sel_shift_by = 63;`
		39285c	`+`
		39285c	`+ if (((src >> sel_shift_by) & 0x1) == 1) {`
		39285c	`+ if (j == 1) {`
		39285c	`+ result[0] \|= (8*index + 0x0) << 0; // double-word j, byte 0`
		39285c	`+ result[0] \|= (8*index + 0x1) << 8; // double-word j, byte 1`
		39285c	`+ result[0] \|= (8*index + 0x2) << 16; // double-word j, byte 2`
		39285c	`+ result[0] \|= (8*index + 0x3) << 24; // double-word j, byte 3`
		39285c	`+ result[0] \|= (8*index + 0x4) << 32; // double-word j, byte 4`
		39285c	`+ result[0] \|= (8*index + 0x5) << 40; // double-word j, byte 5`
		39285c	`+ result[0] \|= (8*index + 0x6) << 48; // double-word j, byte 6`
		39285c	`+ result[0] \|= (8*index + 0x7) << 56; // double-word j, byte 7`
		39285c	`+ } else {`
		39285c	`+ result[1] \|= (8*index + 0x0) << 0;`
		39285c	`+ result[1] \|= (8*index + 0x1) << 8;`
		39285c	`+ result[1] \|= (8*index + 0x2) << 16;`
		39285c	`+ result[1] \|= (8*index + 0x3) << 24;`
		39285c	`+ result[1] \|= (8*index + 0x4) << 32;`
		39285c	`+ result[1] \|= (8*index + 0x5) << 40;`
		39285c	`+ result[1] \|= (8*index + 0x6) << 48;`
		39285c	`+ result[1] \|= (8*index + 0x7) << 56;`
		39285c	`+ }`
		39285c	`+ j++;`
		39285c	`+ }`
		39285c	`+ }`
		39285c	`+ } else {`
		39285c	`+ vex_printf("ERROR, vector_gen_pvc_dword_mask_helper, imm value %u not supported.\n",`
		39285c	`+ imm);`
		39285c	`+ vassert(0);`
		39285c	`+ }`
		39285c	`+`
		39285c	`+ write_VSX_entry( gst, reg_offset, result);`
		39285c	`+}`
		39285c
		39285c	`/------------------------------------------------/`
		39285c	`/---- VSX Matrix signed integer GER functions ---/`
		39285c	`diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c`
		39285c	`index bcabf69dd..354be6b53 100644`
		39285c	`--- a/VEX/priv/guest_ppc_toIR.c`
		39285c	`+++ b/VEX/priv/guest_ppc_toIR.c`
		39285c	`@@ -3322,6 +3322,7 @@ static IRExpr * locate_vector_ele_eq ( IRTemp src, IRExpr *value,`
		39285c	`#define DFORM_IMMASK 0xffffffff`
		39285c	`#define DSFORM_IMMASK 0xfffffffc`
		39285c	`#define DQFORM_IMMASK 0xfffffff0`
		39285c	`+#define DA8LSFORM_IMMASK 0x3fffffff // Algebraic 8LS Dform`
		39285c
		39285c	`#define ISA_3_1_PREFIX_CHECK if (prefix) {if (!allow_isa_3_1) goto decode_noIsa3_1;}`
		39285c
		39285c	`@@ -6109,6 +6110,87 @@ static void vsx_matrix_64bit_float_ger ( const VexAbiInfo* vbi,`
		39285c	`stmt( IRStmt_Dirty(d) );`
		39285c	`}`
		39285c
		39285c	`+static void vector_gen_pvc_mask ( const VexAbiInfo* vbi,`
		39285c	`+ IRExpr *src, UInt IMM,`
		39285c	`+ UInt opc2, UInt VSX_addr ) {`
		39285c	`+ /* The function takes a 64-bit source and an immediate value. The function`
		39285c	`+ calls a helper to execute the xxgenpcvbm, xxgenpcvhm, xxgenpcvwm,`
		39285c	`+ xxgenpcvdm instruction. The instructions are not practical to do with`
		39285c	`+ Iops. The instruction is implemented with a dirty helper that`
		39285c	`+ calculates the 128-bit result and writes it directly into the guest`
		39285c	`+ state VSX register.`
		39285c	`+ */`
		39285c	`+ IRTemp src_hi = newTemp( Ity_I64);`
		39285c	`+ IRTemp src_lo = newTemp( Ity_I64);`
		39285c	`+`
		39285c	`+ IRDirty* d;`
		39285c	`+`
		39285c	`+ vassert( (VSX_addr >= 0) && (VSX_addr < 64) );`
		39285c	`+ UInt reg_offset = offsetofPPCGuestState( guest_VSR0 )`
		39285c	`+ + sizeof(U128) * VSX_addr;`
		39285c	`+`
		39285c	`+ assign( src_hi, unop( Iop_V128HIto64, src ) );`
		39285c	`+ assign( src_lo, unop( Iop_V128to64, src ) );`
		39285c	`+`
		39285c	`+ IRExpr** args = mkIRExprVec_5(`
		39285c	`+ IRExpr_GSPTR(),`
		39285c	`+ mkexpr( src_hi ),`
		39285c	`+ mkexpr( src_lo ),`
		39285c	`+ mkU32( reg_offset ),`
		39285c	`+ mkU64( IMM ) );`
		39285c	`+`
		39285c	`+ switch( opc2 ) {`
		39285c	`+ case 0x394: // xxgenpcvbm`
		39285c	`+ d = unsafeIRDirty_0_N (`
		39285c	`+ 0 /regparms/,`
		39285c	`+ "vector_gen_pvc_byte_mask_dirty_helper",`
		39285c	`+ fnptr_to_fnentry( vbi,`
		39285c	`+ &vector_gen_pvc_byte_mask_dirty_helper ),`
		39285c	`+ args);`
		39285c	`+ break;`
		39285c	`+`
		39285c	`+ case 0x395: // xxgenpcvhm`
		39285c	`+ d = unsafeIRDirty_0_N (`
		39285c	`+ 0 /regparms/,`
		39285c	`+ "vector_gen_pvc_hword_mask_dirty_helper",`
		39285c	`+ fnptr_to_fnentry( vbi,`
		39285c	`+ &vector_gen_pvc_hword_mask_dirty_helper ),`
		39285c	`+ args);`
		39285c	`+ break;`
		39285c	`+`
		39285c	`+ case 0x3B4: // xxgenpcvwm`
		39285c	`+ d = unsafeIRDirty_0_N (`
		39285c	`+ 0 /regparms/,`
		39285c	`+ "vector_gen_pvc_word_mask_dirty_helper",`
		39285c	`+ fnptr_to_fnentry( vbi,`
		39285c	`+ &vector_gen_pvc_word_mask_dirty_helper ),`
		39285c	`+ args);`
		39285c	`+ break;`
		39285c	`+`
		39285c	`+ case 0x3B5: // xxgenpcvdm`
		39285c	`+ d = unsafeIRDirty_0_N (`
		39285c	`+ 0 /regparms/,`
		39285c	`+ "vector_gen_pvc_dword_mask_dirty_helper",`
		39285c	`+ fnptr_to_fnentry( vbi,`
		39285c	`+ &vector_gen_pvc_dword_mask_dirty_helper ),`
		39285c	`+ args);`
		39285c	`+ break;`
		39285c	`+ default:`
		39285c	`+ vex_printf("ERROR: Unkown instruction = %u in vector_gen_pvc_mask()\n",`
		39285c	`+ opc2);`
		39285c	`+ return;`
		39285c	`+ }`
		39285c	`+`
		39285c	`+ d->nFxState = 1;`
		39285c	`+ vex_bzero(&d->fxState, sizeof(d->fxState));`
		39285c	`+ d->fxState[0].fx = Ifx_Modify;`
		39285c	`+ d->fxState[0].size = sizeof(U128);`
		39285c	`+ d->fxState[0].offset = reg_offset;`
		39285c	`+`
		39285c	`+ /* execute the dirty call, side-effecting guest state */`
		39285c	`+ stmt( IRStmt_Dirty(d) );`
		39285c	`+}`
		39285c	`+`
		39285c	`static IRExpr * UNSIGNED_CMP_GT_V128 ( IRExpr vA, IRExpr vB ) {`
		39285c	`/* This function does an unsigned compare of two V128 values. The`
		39285c	`* function is for use in 32-bit mode only as it is expensive. The`
		39285c	`@@ -35227,6 +35309,54 @@ static Bool dis_vsx_accumulator_prefix ( UInt prefix, UInt theInstr,`
		39285c	`return True;`
		39285c	`}`
		39285c
		39285c	`+static Bool dis_vector_generate_pvc_from_mask ( UInt prefix,`
		39285c	`+ UInt theInstr,`
		39285c	`+ const VexAbiInfo* vbi )`
		39285c	`+{`
		39285c	`+ UChar XT_addr = ifieldRegXT(theInstr);`
		39285c	`+ UChar vB_addr = ifieldRegB(theInstr);`
		39285c	`+ IRTemp vB = newTemp( Ity_V128 );`
		39285c	`+ UInt opc2 = ifieldOPClo10(theInstr);`
		39285c	`+ UInt IMM = IFIELD(theInstr, (31-15), 5); // bits[11:15]`
		39285c	`+`
		39285c	`+ assign( vB, getVReg( vB_addr ) );`
		39285c	`+`
		39285c	`+ switch( opc2 ) {`
		39285c	`+ case 0x394:`
		39285c	`+ DIP("xxgenpcvbm v%u,v%u,%u\n", XT_addr, vB_addr, IMM);`
		39285c	`+ /* vector_gen_pvc_mask uses a dirty helper to calculate the result and`
		39285c	`+ write it to the VSX result register. */`
		39285c	`+ vector_gen_pvc_mask( vbi, mkexpr( vB ), IMM, opc2, XT_addr );`
		39285c	`+ break;`
		39285c	`+`
		39285c	`+ case 0x395:`
		39285c	`+ DIP("xxgenpcvhm v%u,v%u,%u\n", XT_addr, vB_addr, IMM);`
		39285c	`+ /* vector_gen_pvc_mask uses a dirty helper to calculate the result and`
		39285c	`+ write it to the VSX result register. */`
		39285c	`+ vector_gen_pvc_mask( vbi, mkexpr( vB ), IMM, opc2, XT_addr );`
		39285c	`+ break;`
		39285c	`+`
		39285c	`+ case 0x3B4:`
		39285c	`+ DIP("xxgenpcvwm v%u,v%u,%u\n", XT_addr, vB_addr, IMM);`
		39285c	`+ /* vector_gen_pvc_mask uses a dirty helper to calculate the result and`
		39285c	`+ write it to the VSX result register. */`
		39285c	`+ vector_gen_pvc_mask( vbi, mkexpr( vB ), IMM, opc2, XT_addr );`
		39285c	`+ break;`
		39285c	`+`
		39285c	`+ case 0x3B5:`
		39285c	`+ DIP("xxgenpcvdm v%u,v%u,%u\n", XT_addr, vB_addr, IMM);`
		39285c	`+ /* vector_gen_pvc_mask uses a dirty helper to calculate the result and`
		39285c	`+ write it to the VSX result register. */`
		39285c	`+ vector_gen_pvc_mask( vbi, mkexpr( vB ), IMM, opc2, XT_addr );`
		39285c	`+ break;`
		39285c	`+`
		39285c	`+ default:`
		39285c	`+ return False;`
		39285c	`+ }`
		39285c	`+`
		39285c	`+ return True;`
		39285c	`+}`
		39285c	`+`
		39285c	`static Int dis_nop_prefix ( UInt prefix, UInt theInstr )`
		39285c	`{`
		39285c	`Bool is_prefix = prefix_instruction( prefix );`
		39285c	`@@ -35748,14 +35878,9 @@ DisResult disInstr_PPC_WRK (`
		39285c	`}`
		39285c	`goto decode_failure;`
		39285c
		39285c	`- case 0x31: // lfsu, stxv`
		39285c	`+ case 0x31: // lfsu`
		39285c	`if (!allow_F) goto decode_noF;`
		39285c	`- if (prefix_instruction( prefix )) { // stxv`
		39285c	`- if ( !(allow_isa_3_1) ) goto decode_noIsa3_1;`
		39285c	`- if (dis_fp_pair_prefix( prefix, theInstr )) goto decode_success;`
		39285c	`- } else { // lfsu`
		39285c	`- if (dis_fp_load( prefix, theInstr )) goto decode_success;`
		39285c	`- }`
		39285c	`+ if (dis_fp_load( prefix, theInstr )) goto decode_success;`
		39285c	`goto decode_failure;`
		39285c
		39285c	`case 0x32:`
		39285c	`@@ -35842,7 +35967,6 @@ DisResult disInstr_PPC_WRK (`
		39285c	`case 0x39: // pld, lxsd, lxssp, lfdp`
		39285c	`{`
		39285c	`UInt opc2tmp = ifieldOPC0o2(theInstr);`
		39285c	`-`
		39285c	`if (!allow_F) goto decode_noF;`
		39285c	`if (prefix_instruction( prefix )) { // pld`
		39285c	`if ( !(allow_isa_3_1) ) goto decode_noIsa3_1;`
		39285c	`@@ -36125,12 +36249,6 @@ DisResult disInstr_PPC_WRK (`
		39285c	`goto decode_failure;`
		39285c	`}`
		39285c
		39285c	`- /* The vsxOpc2 returned is the "normalized" value, representing the`
		39285c	`- * instructions secondary opcode as taken from the standard secondary`
		39285c	`- * opcode field [21:30] (IBM notatition), even if the actual field`
		39285c	`- * is non-standard. These normalized values are given in the opcode`
		39285c	`- * appendices of the ISA 2.06 document.`
		39285c	`- */`
		39285c	`if ( ( opc2 == 0x168 ) && ( IFIELD( theInstr, 19, 2 ) == 0 ) )// xxspltib`
		39285c	`{`
		39285c	`/* This is a special case of the XX1 form where the RA, RB`
		39285c	`@@ -36153,6 +36271,23 @@ DisResult disInstr_PPC_WRK (`
		39285c	`goto decode_failure;`
		39285c	`}`
		39285c
		39285c	`+ if ( ( opc2 == 0x394 ) \|\| // xxgenpcvbm`
		39285c	`+ ( opc2 == 0x395 ) \|\| // xxgenpcvwm`
		39285c	`+ ( opc2 == 0x3B4 ) \|\| // xxgenpcvhm`
		39285c	`+ ( opc2 == 0x3B5 ) ) { // xxgenpcvdm`
		39285c	`+ if ( !(allow_isa_3_1) ) goto decode_noIsa3_1;`
		39285c	`+ if (dis_vector_generate_pvc_from_mask( prefix, theInstr,`
		39285c	`+ abiinfo ))`
		39285c	`+ goto decode_success;`
		39285c	`+ goto decode_failure;`
		39285c	`+ }`
		39285c	`+`
		39285c	`+ /* The vsxOpc2 returned is the "normalized" value, representing the`
		39285c	`+ * instructions secondary opcode as taken from the standard secondary`
		39285c	`+ * opcode field [21:30] (IBM notatition), even if the actual field`
		39285c	`+ * is non-standard. These normalized values are given in the opcode`
		39285c	`+ * appendices of the ISA 2.06 document.`
		39285c	`+ */`
		39285c	`vsxOpc2 = get_VSX60_opc2(opc2, theInstr);`
		39285c
		39285c	`switch (vsxOpc2) {`
		39285c	`commit 078f89e99b6f62e043f6138c6a7ae238befc1f2a`
		39285c	`Author: Carl Love <cel@us.ibm.com>`
		39285c	`Date: Fri Feb 26 15:46:55 2021 -0600`
		39285c
		39285c	`PPC64: Reduced-Precision - bfloat16 Outer Product & Format Conversion Operations`
		39285c
		39285c	`Add support for:`
		39285c
		39285c	`pmxvbf16ger2 Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update)`
		39285c	`pmxvbf16ger2pp Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) Positive`
		39285c	`multiply, Positive accumulate`
		39285c	`pmxvbf16ger2pn Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) Positive`
		39285c	`multiply, Negative accumulate`
		39285c	`pmxvbf16ger2np Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) Negative`
		39285c	`multiply, Positive accumulate`
		39285c	`pmxvbf16ger2nn Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) Negative`
		39285c	`multiply, Negative accumulate`
		39285c	`xvbf16ger2VSX Vector bfloat16 GER (Rank-2 Update)`
		39285c	`xvbf16ger2pp VSX Vector bfloat16 GER (Rank-2 Update) Positive multiply, Positive`
		39285c	`accumulate`
		39285c	`xvbf16ger2pn VSX Vector bfloat16 GER (Rank-2 Update) Positive multiply, Negative`
		39285c	`accumulate`
		39285c	`xvbf16ger2np VSX Vector bfloat16 GER (Rank-2 Update) Negative multiply, Positive`
		39285c	`accumulate`
		39285c	`xvbf16ger2nn VSX Vector bfloat16 GER (Rank-2 Update) Negative multiply, Negative`
		39285c	`accumulate`
		39285c	`xvcvbf16sp VSX Vector Convert bfloat16 to Single-Precision format`
		39285c	`xvcvspbf16 VSX Vector Convert with round Single-Precision to bfloat16 format`
		39285c
		39285c	`diff --git a/VEX/priv/guest_ppc_defs.h b/VEX/priv/guest_ppc_defs.h`
		39285c	`index 54ce923a9..d36d6c07d 100644`
		39285c	`--- a/VEX/priv/guest_ppc_defs.h`
		39285c	`+++ b/VEX/priv/guest_ppc_defs.h`
		39285c	`@@ -150,6 +150,8 @@ extern ULong convert_to_zoned_helper( ULong src_hi, ULong src_low,`
		39285c	`ULong return_upper );`
		39285c	`extern ULong convert_to_national_helper( ULong src, ULong return_upper );`
		39285c	`extern ULong convert_from_zoned_helper( ULong src_hi, ULong src_low );`
		39285c	`+extern ULong convert_from_floattobf16_helper( ULong src );`
		39285c	`+extern ULong convert_from_bf16tofloat_helper( ULong src );`
		39285c	`extern ULong convert_from_national_helper( ULong src_hi, ULong src_low );`
		39285c	`extern ULong generate_C_FPCC_helper( ULong size, ULong src_hi, ULong src );`
		39285c	`extern ULong extract_bits_under_mask_helper( ULong src, ULong mask,`
		39285c	`@@ -201,6 +203,11 @@ extern void vector_gen_pvc_dword_mask_dirty_helper( VexGuestPPC64State* gst,`
		39285c	`#define XVF16GER2PN 0b10010010`
		39285c	`#define XVF16GER2NP 0b01010010`
		39285c	`#define XVF16GER2NN 0b11010010`
		39285c	`+#define XVBF16GER2 0b00110011`
		39285c	`+#define XVBF16GER2PP 0b00110010`
		39285c	`+#define XVBF16GER2PN 0b10110010`
		39285c	`+#define XVBF16GER2NP 0b01110010`
		39285c	`+#define XVBF16GER2NN 0b11110010`
		39285c	`#define XVF32GER 0b00011011`
		39285c	`#define XVF32GERPP 0b00011010`
		39285c	`#define XVF32GERPN 0b10011010`
		39285c	`diff --git a/VEX/priv/guest_ppc_helpers.c b/VEX/priv/guest_ppc_helpers.c`
		39285c	`index 75497abb9..6bcee966d 100644`
		39285c	`--- a/VEX/priv/guest_ppc_helpers.c`
		39285c	`+++ b/VEX/priv/guest_ppc_helpers.c`
		39285c	`@@ -1905,6 +1905,125 @@ static Double conv_f16_to_double( ULong input )`
		39285c	`# endif`
		39285c	`}`
		39285c
		39285c	`+#define BF16_SIGN_MASK 0x8000`
		39285c	`+#define BF16_EXP_MASK 0x7F80`
		39285c	`+#define BF16_FRAC_MASK 0x007F`
		39285c	`+#define BF16_BIAS 127`
		39285c	`+#define BF16_MAX_UNBIASED_EXP 127`
		39285c	`+#define BF16_MIN_UNBIASED_EXP -126`
		39285c	`+#define FLOAT_SIGN_MASK 0x80000000`
		39285c	`+#define FLOAT_EXP_MASK 0x7F800000`
		39285c	`+#define FLOAT_FRAC_MASK 0x007FFFFF`
		39285c	`+#define FLOAT_FRAC_BIT8 0x00008000`
		39285c	`+#define FLOAT_BIAS 127`
		39285c	`+`
		39285c	`+static Float conv_bf16_to_float( UInt input )`
		39285c	`+{`
		39285c	`+ /* input is 16-bit bfloat.`
		39285c	`+ bias +127, exponent 8-bits, fraction 7-bits`
		39285c	`+`
		39285c	`+ output is 32-bit float.`
		39285c	`+ bias +127, exponent 8-bits, fraction 22-bits`
		39285c	`+ */`
		39285c	`+`
		39285c	`+ UInt input_exp, input_fraction, unbiased_exp;`
		39285c	`+ UInt output_exp, output_fraction;`
		39285c	`+ UInt sign;`
		39285c	`+ union convert_t conv;`
		39285c	`+`
		39285c	`+ sign = (UInt)(input & BF16_SIGN_MASK);`
		39285c	`+ input_exp = input & BF16_EXP_MASK;`
		39285c	`+ unbiased_exp = (input_exp >> 7) - (UInt)BF16_BIAS;`
		39285c	`+ input_fraction = input & BF16_FRAC_MASK;`
		39285c	`+`
		39285c	`+ if (((input_exp & BF16_EXP_MASK) == BF16_EXP_MASK) &&`
		39285c	`+ (input_fraction != 0)) {`
		39285c	`+ /* input is NaN or SNaN, exp all 1's, fraction != 0 */`
		39285c	`+ output_exp = FLOAT_EXP_MASK;`
		39285c	`+ output_fraction = input_fraction;`
		39285c	`+`
		39285c	`+ } else if(((input_exp & BF16_EXP_MASK) == BF16_EXP_MASK) &&`
		39285c	`+ ( input_fraction == 0)) {`
		39285c	`+ /* input is infinity, exp all 1's, fraction = 0 */`
		39285c	`+ output_exp = FLOAT_EXP_MASK;`
		39285c	`+ output_fraction = 0;`
		39285c	`+`
		39285c	`+ } else if((input_exp == 0) && (input_fraction == 0)) {`
		39285c	`+ /* input is zero */`
		39285c	`+ output_exp = 0;`
		39285c	`+ output_fraction = 0;`
		39285c	`+`
		39285c	`+ } else if((input_exp == 0) && (input_fraction != 0)) {`
		39285c	`+ /* input is denormal */`
		39285c	`+ output_fraction = input_fraction;`
		39285c	`+ output_exp = (-(Int)BF16_BIAS + (Int)FLOAT_BIAS ) << 23;`
		39285c	`+`
		39285c	`+ } else {`
		39285c	`+ /* result is normal */`
		39285c	`+ output_exp = (unbiased_exp + FLOAT_BIAS) << 23;`
		39285c	`+ output_fraction = input_fraction;`
		39285c	`+ }`
		39285c	`+`
		39285c	`+ conv.u32 = sign << (31 - 15) \| output_exp \| (output_fraction << (23-7));`
		39285c	`+ return conv.f;`
		39285c	`+}`
		39285c	`+`
		39285c	`+static UInt conv_float_to_bf16( UInt input )`
		39285c	`+{`
		39285c	`+ /* input is 32-bit float stored as unsigned 32-bit.`
		39285c	`+ bias +127, exponent 8-bits, fraction 23-bits`
		39285c	`+`
		39285c	`+ output is 16-bit bfloat.`
		39285c	`+ bias +127, exponent 8-bits, fraction 7-bits`
		39285c	`+`
		39285c	`+ If the unbiased exponent of the input is greater than the max floating`
		39285c	`+ point unbiased exponent value, the result of the floating point 16-bit`
		39285c	`+ value is infinity.`
		39285c	`+ */`
		39285c	`+`
		39285c	`+ UInt input_exp, input_fraction;`
		39285c	`+ UInt output_exp, output_fraction;`
		39285c	`+ UInt result, sign;`
		39285c	`+`
		39285c	`+ sign = input & FLOAT_SIGN_MASK;`
		39285c	`+ input_exp = input & FLOAT_EXP_MASK;`
		39285c	`+ input_fraction = input & FLOAT_FRAC_MASK;`
		39285c	`+`
		39285c	`+ if (((input_exp & FLOAT_EXP_MASK) == FLOAT_EXP_MASK) &&`
		39285c	`+ (input_fraction != 0)) {`
		39285c	`+ /* input is NaN or SNaN, exp all 1's, fraction != 0 */`
		39285c	`+ output_exp = BF16_EXP_MASK;`
		39285c	`+ output_fraction = (ULong)input_fraction >> (23 - 7);`
		39285c	`+ } else if (((input_exp & FLOAT_EXP_MASK) == FLOAT_EXP_MASK) &&`
		39285c	`+ ( input_fraction == 0)) {`
		39285c	`+ /* input is infinity, exp all 1's, fraction = 0 */`
		39285c	`+ output_exp = BF16_EXP_MASK;`
		39285c	`+ output_fraction = 0;`
		39285c	`+ } else if ((input_exp == 0) && (input_fraction == 0)) {`
		39285c	`+ /* input is zero */`
		39285c	`+ output_exp = 0;`
		39285c	`+ output_fraction = 0;`
		39285c	`+ } else if ((input_exp == 0) && (input_fraction != 0)) {`
		39285c	`+ /* input is denormal */`
		39285c	`+ output_exp = 0;`
		39285c	`+ output_fraction = (ULong)input_fraction >> (23 - 7);`
		39285c	`+ } else {`
		39285c	`+ /* result is normal */`
		39285c	`+ output_exp = (input_exp - BF16_BIAS + FLOAT_BIAS) >> (23 - 7);`
		39285c	`+ output_fraction = (ULong)input_fraction >> (23 - 7);`
		39285c	`+`
		39285c	`+ /* Round result. Look at the 8th bit position of the 32-bit floating`
		39285c	`+ pointt fraction. The F16 fraction is only 7 bits wide so if the 8th`
		39285c	`+ bit of the F32 is a 1 we need to round up by adding 1 to the output`
		39285c	`+ fraction. */`
		39285c	`+ if ((input_fraction & FLOAT_FRAC_BIT8) == FLOAT_FRAC_BIT8)`
		39285c	`+ /* Round the F16 fraction up by 1 */`
		39285c	`+ output_fraction = output_fraction + 1;`
		39285c	`+ }`
		39285c	`+`
		39285c	`+ result = sign >> (31 - 15) \| output_exp \| output_fraction;`
		39285c	`+ return result;`
		39285c	`+}`
		39285c
		39285c	`static Float conv_double_to_float( Double src )`
		39285c	`{`
		39285c	`@@ -1942,6 +2061,36 @@ static Float negate_float( Float input )`
		39285c	`return -input;`
		39285c	`}`
		39285c
		39285c	`+/* This C-helper takes a vector of two 32-bit floating point values`
		39285c	`+ * and returns a vector containing two 16-bit bfloats.`
		39285c	`+ input: word0 word1`
		39285c	`+ output 0x0 hword1 0x0 hword3`
		39285c	`+ Called from generated code.`
		39285c	`+ */`
		39285c	`+ULong convert_from_floattobf16_helper( ULong src ) {`
		39285c	`+ ULong resultHi, resultLo;`
		39285c	`+`
		39285c	`+ resultHi = (ULong)conv_float_to_bf16( (UInt)(src >> 32));`
		39285c	`+ resultLo = (ULong)conv_float_to_bf16( (UInt)(src & 0xFFFFFFFF));`
		39285c	`+ return (resultHi << 32) \| resultLo;`
		39285c	`+`
		39285c	`+}`
		39285c	`+`
		39285c	`+/* This C-helper takes a vector of two 16-bit bfloating point values`
		39285c	`+ * and returns a vector containing one 32-bit float.`
		39285c	`+ input: 0x0 hword1 0x0 hword3`
		39285c	`+ output: word0 word1`
		39285c	`+ */`
		39285c	`+ULong convert_from_bf16tofloat_helper( ULong src ) {`
		39285c	`+ ULong result;`
		39285c	`+ union convert_t conv;`
		39285c	`+ conv.f = conv_bf16_to_float( (UInt)(src >> 32) );`
		39285c	`+ result = (ULong) conv.u32;`
		39285c	`+ conv.f = conv_bf16_to_float( (UInt)(src & 0xFFFFFFFF));`
		39285c	`+ result = (result << 32) \| (ULong) conv.u32;`
		39285c	`+ return result;`
		39285c	`+ }`
		39285c	`+`
		39285c	`void vsx_matrix_16bit_float_ger_dirty_helper( VexGuestPPC64State* gst,`
		39285c	`UInt offset_ACC,`
		39285c	`ULong srcA_hi, ULong srcA_lo,`
		39285c	`@@ -2002,24 +2151,44 @@ void vsx_matrix_16bit_float_ger_dirty_helper( VexGuestPPC64State* gst,`
		39285c	`srcB_word[0][j] = (UInt)((srcB_lo >> (16-16*j)) & mask);`
		39285c	`}`
		39285c
		39285c	`+ /* Note the isa is not consistent in the src naming. Will use the`
		39285c	`+ naming src10, src11, src20, src21 used with xvf16ger2 instructions.`
		39285c	`+ */`
		39285c	`for( j = 0; j < 4; j++) {`
		39285c	`if (((pmsk >> 1) & 0x1) == 0) {`
		39285c	`src10 = 0;`
		39285c	`src20 = 0;`
		39285c	`} else {`
		39285c	`- src10 = conv_f16_to_double((ULong)srcA_word[i][0]);`
		39285c	`- src20 = conv_f16_to_double((ULong)srcB_word[j][0]);`
		39285c	`+ if (( inst == XVF16GER2 ) \|\| ( inst == XVF16GER2PP )`
		39285c	`+ \|\| ( inst == XVF16GER2PN ) \|\| ( inst == XVF16GER2NP )`
		39285c	`+ \|\| ( inst == XVF16GER2NN )) {`
		39285c	`+ src10 = conv_f16_to_double((ULong)srcA_word[i][0]);`
		39285c	`+ src20 = conv_f16_to_double((ULong)srcB_word[j][0]);`
		39285c	`+ } else {`
		39285c	`+ /* Input is in bfloat format, result is stored in the`
		39285c	`+ "traditional" 64-bit float format. */`
		39285c	`+ src10 = (double)conv_bf16_to_float((ULong)srcA_word[i][0]);`
		39285c	`+ src20 = (double)conv_bf16_to_float((ULong)srcB_word[j][0]);`
		39285c	`+ }`
		39285c	`}`
		39285c
		39285c	`if ((pmsk & 0x1) == 0) {`
		39285c	`src11 = 0;`
		39285c	`src21 = 0;`
		39285c	`} else {`
		39285c	`- src11 = conv_f16_to_double((ULong)srcA_word[i][1]);`
		39285c	`- src21 = conv_f16_to_double((ULong)srcB_word[j][1]);`
		39285c	`+ if (( inst == XVF16GER2 ) \|\| ( inst == XVF16GER2PP )`
		39285c	`+ \|\| ( inst == XVF16GER2PN ) \|\| ( inst == XVF16GER2NP )`
		39285c	`+ \|\| ( inst == XVF16GER2NN )) {`
		39285c	`+ src11 = conv_f16_to_double((ULong)srcA_word[i][1]);`
		39285c	`+ src21 = conv_f16_to_double((ULong)srcB_word[j][1]);`
		39285c	`+ } else {`
		39285c	`+ /* Input is in bfloat format, result is stored in the`
		39285c	`+ "traditional" 64-bit float format. */`
		39285c	`+ src11 = (double)conv_bf16_to_float((ULong)srcA_word[i][1]);`
		39285c	`+ src21 = (double)conv_bf16_to_float((ULong)srcB_word[j][1]);`
		39285c	`+ }`
		39285c	`}`
		39285c
		39285c	`-`
		39285c	`prod = src10 * src20;`
		39285c	`msum = prod + src11 * src21;`
		39285c
		39285c	`@@ -2027,26 +2196,26 @@ void vsx_matrix_16bit_float_ger_dirty_helper( VexGuestPPC64State* gst,`
		39285c	`/* Note, we do not track the exception handling bits`
		39285c	`ox, ux, xx, si, mz, vxsnan and vximz in the FPSCR. */`
		39285c
		39285c	`- if ( inst == XVF16GER2 )`
		39285c	`+ if (( inst == XVF16GER2 ) \|\| ( inst == XVBF16GER2 ) )`
		39285c	`result[j] = reinterpret_float_as_int(`
		39285c	`conv_double_to_float(msum) );`
		39285c
		39285c	`- else if ( inst == XVF16GER2PP )`
		39285c	`+ else if (( inst == XVF16GER2PP ) \|\| (inst == XVBF16GER2PP ))`
		39285c	`result[j] = reinterpret_float_as_int(`
		39285c	`conv_double_to_float(msum)`
		39285c	`+ acc_word[j] );`
		39285c
		39285c	`- else if ( inst == XVF16GER2PN )`
		39285c	`+ else if (( inst == XVF16GER2PN ) \|\| ( inst == XVBF16GER2PN ))`
		39285c	`result[j] = reinterpret_float_as_int(`
		39285c	`conv_double_to_float(msum)`
		39285c	`+ negate_float( acc_word[j] ) );`
		39285c
		39285c	`- else if ( inst == XVF16GER2NP )`
		39285c	`+ else if (( inst == XVF16GER2NP ) \|\| ( inst == XVBF16GER2NP ))`
		39285c	`result[j] = reinterpret_float_as_int(`
		39285c	`conv_double_to_float( negate_double( msum ) )`
		39285c	`+ acc_word[j] );`
		39285c
		39285c	`- else if ( inst == XVF16GER2NN )`
		39285c	`+ else if (( inst == XVF16GER2NN ) \|\| ( inst == XVBF16GER2NN ))`
		39285c	`result[j] = reinterpret_float_as_int(`
		39285c	`conv_double_to_float( negate_double( msum ) )`
		39285c	`+ negate_float( acc_word[j] ) );`
		39285c	`diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c`
		39285c	`index 354be6b53..20553a539 100644`
		39285c	`--- a/VEX/priv/guest_ppc_toIR.c`
		39285c	`+++ b/VEX/priv/guest_ppc_toIR.c`
		39285c	`@@ -5688,6 +5688,57 @@ static IRExpr * convert_from_national ( const VexAbiInfo* vbi, IRExpr *src ) {`
		39285c	`return mkexpr( result );`
		39285c	`}`
		39285c
		39285c	`+static IRExpr * vector_convert_floattobf16 ( const VexAbiInfo* vbi,`
		39285c	`+ IRExpr *src ) {`
		39285c	`+ /* The function takes 128-bit value containing four 32-bit floats and`
		39285c	`+ returns a 128-bit value containint four 16-bit bfloats in the lower`
		39285c	`+ halfwords. */`
		39285c	`+`
		39285c	`+ IRTemp resultHi = newTemp( Ity_I64);`
		39285c	`+ IRTemp resultLo = newTemp( Ity_I64);`
		39285c	`+`
		39285c	`+ assign( resultHi,`
		39285c	`+ mkIRExprCCall( Ity_I64, 0 /regparms/,`
		39285c	`+ "vector_convert_floattobf16_helper",`
		39285c	`+ fnptr_to_fnentry( vbi,`
		39285c	`+ &convert_from_floattobf16_helper ),`
		39285c	`+ mkIRExprVec_1( unop( Iop_V128HIto64, src ) ) ) );`
		39285c	`+`
		39285c	`+ assign( resultLo,`
		39285c	`+ mkIRExprCCall( Ity_I64, 0 /regparms/,`
		39285c	`+ "vector_convert_floattobf16_helper",`
		39285c	`+ fnptr_to_fnentry( vbi,`
		39285c	`+ &convert_from_floattobf16_helper ),`
		39285c	`+ mkIRExprVec_1( unop( Iop_V128to64, src ) ) ) );`
		39285c	`+`
		39285c	`+ return binop( Iop_64HLtoV128, mkexpr( resultHi ), mkexpr( resultLo ) );`
		39285c	`+}`
		39285c	`+`
		39285c	`+static IRExpr * vector_convert_bf16tofloat ( const VexAbiInfo* vbi,`
		39285c	`+ IRExpr *src ) {`
		39285c	`+ /* The function takes 128-bit value containing four 16-bit bfloats in`
		39285c	`+ the lower halfwords and returns a 128-bit value containint four`
		39285c	`+ 32-bit floats. */`
		39285c	`+ IRTemp resultHi = newTemp( Ity_I64);`
		39285c	`+ IRTemp resultLo = newTemp( Ity_I64);`
		39285c	`+`
		39285c	`+ assign( resultHi,`
		39285c	`+ mkIRExprCCall( Ity_I64, 0 /regparms/,`
		39285c	`+ "vector_convert_bf16tofloat_helper",`
		39285c	`+ fnptr_to_fnentry( vbi,`
		39285c	`+ &convert_from_bf16tofloat_helper ),`
		39285c	`+ mkIRExprVec_1( unop( Iop_V128HIto64, src ) ) ) );`
		39285c	`+`
		39285c	`+ assign( resultLo,`
		39285c	`+ mkIRExprCCall( Ity_I64, 0 /regparms/,`
		39285c	`+ "vector_convert_bf16tofloat_helper",`
		39285c	`+ fnptr_to_fnentry( vbi,`
		39285c	`+ &convert_from_bf16tofloat_helper ),`
		39285c	`+ mkIRExprVec_1( unop( Iop_V128to64, src ) ) ) );`
		39285c	`+`
		39285c	`+ return binop( Iop_64HLtoV128, mkexpr( resultHi ), mkexpr( resultLo ) );`
		39285c	`+}`
		39285c	`+`
		39285c	`static IRExpr * popcnt64 ( const VexAbiInfo* vbi,`
		39285c	`IRExpr *src ){`
		39285c	`/* The function takes a 64-bit source and counts the number of bits in the`
		39285c	`@@ -5936,6 +5987,7 @@ static void vsx_matrix_ger ( const VexAbiInfo* vbi,`
		39285c	`case XVI16GER2:`
		39285c	`case XVI16GER2S:`
		39285c	`case XVF16GER2:`
		39285c	`+ case XVBF16GER2:`
		39285c	`case XVF32GER:`
		39285c	`AT_fx = Ifx_Write;`
		39285c	`break;`
		39285c	`@@ -5943,6 +5995,10 @@ static void vsx_matrix_ger ( const VexAbiInfo* vbi,`
		39285c	`case XVI8GER4PP:`
		39285c	`case XVI16GER2PP:`
		39285c	`case XVI16GER2SPP:`
		39285c	`+ case XVBF16GER2PP:`
		39285c	`+ case XVBF16GER2PN:`
		39285c	`+ case XVBF16GER2NP:`
		39285c	`+ case XVBF16GER2NN:`
		39285c	`case XVF16GER2PP:`
		39285c	`case XVF16GER2PN:`
		39285c	`case XVF16GER2NP:`
		39285c	`@@ -23899,6 +23955,24 @@ dis_vxs_misc( UInt prefix, UInt theInstr, const VexAbiInfo* vbi, UInt opc2,`
		39285c	`mkexpr( sub_element1 ),`
		39285c	`mkexpr( sub_element0 ) ) ) );`
		39285c
		39285c	`+ } else if ((inst_select == 16) && !prefix) {`
		39285c	`+ IRTemp result = newTemp(Ity_V128);`
		39285c	`+ UChar xT_addr = ifieldRegXT ( theInstr );`
		39285c	`+ UChar xB_addr = ifieldRegXB ( theInstr );`
		39285c	`+ /* Convert 16-bit bfloat to 32-bit float, not a prefix inst */`
		39285c	`+ DIP("xvcvbf16sp v%u,v%u\n", xT_addr, xB_addr);`
		39285c	`+ assign( result, vector_convert_bf16tofloat( vbi, mkexpr( vB ) ) );`
		39285c	`+ putVSReg( XT, mkexpr( result) );`
		39285c	`+`
		39285c	`+ } else if ((inst_select == 17) && !prefix) {`
		39285c	`+ IRTemp result = newTemp(Ity_V128);`
		39285c	`+ UChar xT_addr = ifieldRegXT ( theInstr );`
		39285c	`+ UChar xB_addr = ifieldRegXB ( theInstr );`
		39285c	`+ /* Convert 32-bit float to 16-bit bfloat, not a prefix inst */`
		39285c	`+ DIP("xvcvspbf16 v%u,v%u\n", xT_addr, xB_addr);`
		39285c	`+ assign( result, vector_convert_floattobf16( vbi, mkexpr( vB ) ) );`
		39285c	`+ putVSReg( XT, mkexpr( result) );`
		39285c	`+`
		39285c	`} else if (inst_select == 23) {`
		39285c	`DIP("xxbrd v%u, v%u\n", (UInt)XT, (UInt)XB);`
		39285c
		39285c	`@@ -34956,6 +35030,41 @@ static Bool dis_vsx_accumulator_prefix ( UInt prefix, UInt theInstr,`
		39285c	`getVSReg( rB_addr ), AT,`
		39285c	`( ( inst_prefix << 8 ) \| XO ) );`
		39285c	`break;`
		39285c	`+ case XVBF16GER2:`
		39285c	`+ DIP("xvbf16ger2 %u,r%u, r%u\n", AT, rA_addr, rB_addr);`
		39285c	`+ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER,`
		39285c	`+ getVSReg( rA_addr ),`
		39285c	`+ getVSReg( rB_addr ), AT,`
		39285c	`+ ( ( inst_prefix << 8 ) \| XO ) );`
		39285c	`+ break;`
		39285c	`+ case XVBF16GER2PP:`
		39285c	`+ DIP("xvbf16ger2pp %u,r%u, r%u\n", AT, rA_addr, rB_addr);`
		39285c	`+ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER,`
		39285c	`+ getVSReg( rA_addr ),`
		39285c	`+ getVSReg( rB_addr ), AT,`
		39285c	`+ ( ( inst_prefix << 8 ) \| XO ) );`
		39285c	`+ break;`
		39285c	`+ case XVBF16GER2PN:`
		39285c	`+ DIP("xvbf16ger2pn %u,r%u, r%u\n", AT, rA_addr, rB_addr);`
		39285c	`+ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER,`
		39285c	`+ getVSReg( rA_addr ),`
		39285c	`+ getVSReg( rB_addr ), AT,`
		39285c	`+ ( ( inst_prefix << 8 ) \| XO ) );`
		39285c	`+ break;`
		39285c	`+ case XVBF16GER2NP:`
		39285c	`+ DIP("xvbf16ger2np %u,r%u, r%u\n", AT, rA_addr, rB_addr);`
		39285c	`+ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER,`
		39285c	`+ getVSReg( rA_addr ),`
		39285c	`+ getVSReg( rB_addr ), AT,`
		39285c	`+ ( ( inst_prefix << 8 ) \| XO ) );`
		39285c	`+ break;`
		39285c	`+ case XVBF16GER2NN:`
		39285c	`+ DIP("xvbf16ger2nn %u,r%u, r%u\n", AT, rA_addr, rB_addr);`
		39285c	`+ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER,`
		39285c	`+ getVSReg( rA_addr ),`
		39285c	`+ getVSReg( rB_addr ), AT,`
		39285c	`+ ( ( inst_prefix << 8 ) \| XO ) );`
		39285c	`+ break;`
		39285c	`case XVF32GER:`
		39285c	`DIP("xvf32ger %u,r%u, r%u\n", AT, rA_addr, rB_addr);`
		39285c	`vsx_matrix_ger( vbi, MATRIX_32BIT_FLOAT_GER,`
		39285c	`@@ -35106,6 +35215,61 @@ static Bool dis_vsx_accumulator_prefix ( UInt prefix, UInt theInstr,`
		39285c	`AT,`
		39285c	`( (MASKS << 9 ) \| ( inst_prefix << 8 ) \| XO ) );`
		39285c	`break;`
		39285c	`+ case XVBF16GER2:`
		39285c	`+ PMSK = IFIELD( prefix, 14, 2);`
		39285c	`+ XMSK = IFIELD( prefix, 4, 4);`
		39285c	`+ YMSK = IFIELD( prefix, 0, 4);`
		39285c	`+ DIP("pmxvbf16ger2 %u,r%u, r%u\n", AT, rA_addr, rB_addr);`
		39285c	`+ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER,`
		39285c	`+ getVSReg( rA_addr ),`
		39285c	`+ getVSReg( rB_addr ),`
		39285c	`+ AT, ( (MASKS << 9 )`
		39285c	`+ \| ( inst_prefix << 8 ) \| XO ) );`
		39285c	`+ break;`
		39285c	`+ case XVBF16GER2PP:`
		39285c	`+ PMSK = IFIELD( prefix, 14, 2);`
		39285c	`+ XMSK = IFIELD( prefix, 4, 4);`
		39285c	`+ YMSK = IFIELD( prefix, 0, 4);`
		39285c	`+ DIP("pmxvbf16ger2pp %u,r%u, r%u\n", AT, rA_addr, rB_addr);`
		39285c	`+ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER,`
		39285c	`+ getVSReg( rA_addr ),`
		39285c	`+ getVSReg( rB_addr ),`
		39285c	`+ AT, ( (MASKS << 9 )`
		39285c	`+ \| ( inst_prefix << 8 ) \| XO ) );`
		39285c	`+ break;`
		39285c	`+ case XVBF16GER2PN:`
		39285c	`+ PMSK = IFIELD( prefix, 14, 2);`
		39285c	`+ XMSK = IFIELD( prefix, 4, 4);`
		39285c	`+ YMSK = IFIELD( prefix, 0, 4);`
		39285c	`+ DIP("pmxvbf16ger2pn %u,r%u, r%u\n", AT, rA_addr, rB_addr);`
		39285c	`+ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER,`
		39285c	`+ getVSReg( rA_addr ),`
		39285c	`+ getVSReg( rB_addr ),`
		39285c	`+ AT, ( (MASKS << 9 )`
		39285c	`+ \| ( inst_prefix << 8 ) \| XO ) );`
		39285c	`+ break;`
		39285c	`+ case XVBF16GER2NP:`
		39285c	`+ PMSK = IFIELD( prefix, 14, 2);`
		39285c	`+ XMSK = IFIELD( prefix, 4, 4);`
		39285c	`+ YMSK = IFIELD( prefix, 0, 4);`
		39285c	`+ DIP("pmxvbf16ger2np %u,r%u, r%u\n", AT, rA_addr, rB_addr);`
		39285c	`+ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER,`
		39285c	`+ getVSReg( rA_addr ),`
		39285c	`+ getVSReg( rB_addr ),`
		39285c	`+ AT, ( (MASKS << 9 )`
		39285c	`+ \| ( inst_prefix << 8 ) \| XO ) );`
		39285c	`+ break;`
		39285c	`+ case XVBF16GER2NN:`
		39285c	`+ PMSK = IFIELD( prefix, 14, 2);`
		39285c	`+ XMSK = IFIELD( prefix, 4, 4);`
		39285c	`+ YMSK = IFIELD( prefix, 0, 4);`
		39285c	`+ DIP("pmxvbf16ger2nn %u,r%u, r%u\n", AT, rA_addr, rB_addr);`
		39285c	`+ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER,`
		39285c	`+ getVSReg( rA_addr ),`
		39285c	`+ getVSReg( rB_addr ),`
		39285c	`+ AT, ( (MASKS << 9 )`
		39285c	`+ \| ( inst_prefix << 8 ) \| XO ) );`
		39285c	`+ break;`
		39285c	`case XVF16GER2:`
		39285c	`PMSK = IFIELD( prefix, 14, 2);`
		39285c	`XMSK = IFIELD( prefix, 4, 4);`
		39285c	`@@ -36181,6 +36345,11 @@ DisResult disInstr_PPC_WRK (`
		39285c	`(opc2 == XVI4GER8PP) \|\| // xvi4ger8pp`
		39285c	`(opc2 == XVI8GER4) \|\| // xvi8ger4`
		39285c	`(opc2 == XVI8GER4PP) \|\| // xvi8ger4pp`
		39285c	`+ (opc2 == XVBF16GER2) \|\| // xvbf16ger2`
		39285c	`+ (opc2 == XVBF16GER2PP) \|\| // xvbf16ger2pp`
		39285c	`+ (opc2 == XVBF16GER2PN) \|\| // xvbf16ger2pn`
		39285c	`+ (opc2 == XVBF16GER2NP) \|\| // xvbf16ger2np`
		39285c	`+ (opc2 == XVBF16GER2NN) \|\| // xvbf16ger2nn`
		39285c	`(opc2 == XVF16GER2) \|\| // xvf16ger2`
		39285c	`(opc2 == XVF16GER2PP) \|\| // xvf16ger2pp`
		39285c	`(opc2 == XVF16GER2PN) \|\| // xvf16ger2pn`
		39285c	`commit e09fdaf569b975717465ed8043820d0198d4d47d`
		39285c	`Author: Carl Love <cel@us.ibm.com>`
		39285c	`Date: Fri Feb 26 16:05:12 2021 -0600`
		39285c
		39285c	`PPC64: Reduced-Precision: Missing Integer-based Outer Product Operations`
		39285c
		39285c	`Add support for:`
		39285c
		39285c	`pmxvi16ger2 VSX Vector 16-bit Signed Integer GER (rank-2 update), Prefixed`
		39285c	`Masked`
		39285c	`pmxvi16ger2pp VSX Vector 16-bit Signed Integer GER (rank-2 update) (Positive`
		39285c	`multiply, Positive accumulate), Prefixed Masked`
		39285c	`pmxvi8ger4spp VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) with`
		39285c	`Saturation (Positive multiply, Positive accumulate), Prefixed Masked`
		39285c	`xvi16ger2 VSX Vector 16-bit Signed Integer GER (rank-2 update)`
		39285c	`xvi16ger2pp VSX Vector 16-bit Signed Integer GER (rank-2 update) (Positive`
		39285c	`multiply, Positive accumulate)`
		39285c	`xvi8ger4spp VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) with`
		39285c	`Saturation (Positive multiply, Positive accumulate)`
		39285c
		39285c	`diff --git a/VEX/priv/guest_ppc_helpers.c b/VEX/priv/guest_ppc_helpers.c`
		39285c	`index 6bcee966d..d8131eb60 100644`
		39285c	`--- a/VEX/priv/guest_ppc_helpers.c`
		39285c	`+++ b/VEX/priv/guest_ppc_helpers.c`
		39285c	`@@ -1446,16 +1446,16 @@ static UInt exts4( UInt src)`
		39285c	`return src & 0xF; /* make sure high order bits are zero */`
		39285c	`}`
		39285c
		39285c	`-static UInt exts8( UInt src)`
		39285c	`+static ULong exts8( UInt src)`
		39285c	`{`
		39285c	`- /* Input is an 8-bit value. Extend bit 7 to bits [31:8] */`
		39285c	`+ /* Input is an 8-bit value. Extend bit 7 to bits [63:8] */`
		39285c	`if (( src >> 7 ) & 0x1)`
		39285c	`- return src \| 0xFFFFFF00; /* sign bit is a 1, extend */`
		39285c	`+ return src \| 0xFFFFFFFFFFFFFF00ULL; /* sign bit is a 1, extend */`
		39285c	`else`
		39285c	`return src & 0xFF; /* make sure high order bits are zero */`
		39285c	`}`
		39285c
		39285c	`-static UInt extz8( UInt src)`
		39285c	`+static ULong extz8( UInt src)`
		39285c	`{`
		39285c	`/* Input is an 8-bit value. Extend src on the left with zeros. */`
		39285c	`return src & 0xFF; /* make sure high order bits are zero */`
		39285c	`@@ -1662,12 +1662,12 @@ void vsx_matrix_8bit_ger_dirty_helper( VexGuestPPC64State* gst,`
		39285c	`ULong srcB_hi, ULong srcB_lo,`
		39285c	`UInt masks_inst )`
		39285c	`{`
		39285c	`- UInt i, j, mask, sum, inst, acc_entry, prefix_inst;`
		39285c	`+ UInt i, j, mask, inst, acc_entry, prefix_inst;`
		39285c
		39285c	`UInt srcA_bytes[4][4]; /* word, byte */`
		39285c	`UInt srcB_bytes[4][4]; /* word, byte */`
		39285c	`UInt acc_word[4];`
		39285c	`- UInt prod0, prod1, prod2, prod3;`
		39285c	`+ ULong prod0, prod1, prod2, prod3, sum;`
		39285c	`UInt result[4];`
		39285c	`UInt pmsk = 0;`
		39285c	`UInt xmsk = 0;`
		39285c	`@@ -1742,10 +1742,13 @@ void vsx_matrix_8bit_ger_dirty_helper( VexGuestPPC64State* gst,`
		39285c	`sum = prod0 + prod1 + prod2 + prod3;`
		39285c
		39285c	`if ( inst == XVI8GER4 )`
		39285c	`- result[j] = sum;`
		39285c	`+ result[j] = chop64to32( sum );`
		39285c
		39285c	`else if ( inst == XVI8GER4PP )`
		39285c	`- result[j] = sum + acc_word[j];`
		39285c	`+ result[j] = chop64to32( sum + acc_word[j] );`
		39285c	`+`
		39285c	`+ else if ( inst == XVI8GER4SPP )`
		39285c	`+ result[j] = clampS64toS32(sum + acc_word[j]);`
		39285c
		39285c	`} else {`
		39285c	`result[j] = 0;`
		39285c	`@@ -1821,7 +1824,7 @@ void vsx_matrix_16bit_ger_dirty_helper( VexGuestPPC64State* gst,`
		39285c	`else`
		39285c	`prod1 = exts16to64( srcA_word[i][1] )`
		39285c	`* exts16to64( srcB_word[j][1] );`
		39285c	`- /* sum is UInt so the result is choped to 32-bits */`
		39285c	`+`
		39285c	`sum = prod0 + prod1;`
		39285c
		39285c	`if ( inst == XVI16GER2 )`
		39285c	`@@ -1830,13 +1833,11 @@ void vsx_matrix_16bit_ger_dirty_helper( VexGuestPPC64State* gst,`
		39285c	`else if ( inst == XVI16GER2S )`
		39285c	`result[j] = clampS64toS32( sum );`
		39285c
		39285c	`- else if ( inst == XVI16GER2PP ) {`
		39285c	`+ else if ( inst == XVI16GER2PP )`
		39285c	`result[j] = chop64to32( sum + acc_word[j] );`
		39285c	`- }`
		39285c
		39285c	`- else if ( inst == XVI16GER2SPP ) {`
		39285c	`+ else if ( inst == XVI16GER2SPP )`
		39285c	`result[j] = clampS64toS32( sum + acc_word[j] );`
		39285c	`- }`
		39285c
		39285c	`} else {`
		39285c	`result[j] = 0;`
		39285c	`diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c`
		39285c	`index 20553a539..e54f0f389 100644`
		39285c	`--- a/VEX/priv/guest_ppc_toIR.c`
		39285c	`+++ b/VEX/priv/guest_ppc_toIR.c`
		39285c	`@@ -5993,6 +5993,7 @@ static void vsx_matrix_ger ( const VexAbiInfo* vbi,`
		39285c	`break;`
		39285c	`case XVI4GER8PP:`
		39285c	`case XVI8GER4PP:`
		39285c	`+ case XVI8GER4SPP:`
		39285c	`case XVI16GER2PP:`
		39285c	`case XVI16GER2SPP:`
		39285c	`case XVBF16GER2PP:`
		39285c	`@@ -34983,6 +34984,12 @@ static Bool dis_vsx_accumulator_prefix ( UInt prefix, UInt theInstr,`
		39285c	`getVSReg( rA_addr ), getVSReg( rB_addr ),`
		39285c	`AT, ( ( inst_prefix << 8 ) \| XO ) );`
		39285c	`break;`
		39285c	`+ case XVI8GER4SPP:`
		39285c	`+ DIP("xvi8ger4spp %u,r%u, r%u\n", AT, rA_addr, rB_addr);`
		39285c	`+ vsx_matrix_ger( vbi, MATRIX_8BIT_INT_GER,`
		39285c	`+ getVSReg( rA_addr ), getVSReg( rB_addr ),`
		39285c	`+ AT, ( ( inst_prefix << 8 ) \| XO ) );`
		39285c	`+ break;`
		39285c	`case XVI16GER2S:`
		39285c	`DIP("xvi16ger2s %u,r%u, r%u\n", AT, rA_addr, rB_addr);`
		39285c	`vsx_matrix_ger( vbi, MATRIX_16BIT_INT_GER,`
		39285c	`@@ -34995,6 +35002,19 @@ static Bool dis_vsx_accumulator_prefix ( UInt prefix, UInt theInstr,`
		39285c	`getVSReg( rA_addr ), getVSReg( rB_addr ),`
		39285c	`AT, ( ( inst_prefix << 8 ) \| XO ) );`
		39285c	`break;`
		39285c	`+ case XVI16GER2:`
		39285c	`+ DIP("xvi16ger2 %u,r%u, r%u\n", AT, rA_addr, rB_addr);`
		39285c	`+ vsx_matrix_ger( vbi, MATRIX_16BIT_INT_GER,`
		39285c	`+ getVSReg( rA_addr ), getVSReg( rB_addr ),`
		39285c	`+ AT, ( ( inst_prefix << 8 ) \| XO ) );`
		39285c	`+ break;`
		39285c	`+ case XVI16GER2PP:`
		39285c	`+ DIP("xvi16ger2pp %u,r%u, r%u\n", AT, rA_addr, rB_addr);`
		39285c	`+ vsx_matrix_ger( vbi, MATRIX_16BIT_INT_GER,`
		39285c	`+ getVSReg( rA_addr ), getVSReg( rB_addr ),`
		39285c	`+ AT, ( ( inst_prefix << 8 ) \| XO ) );`
		39285c	`+ break;`
		39285c	`+`
		39285c	`case XVF16GER2:`
		39285c	`DIP("xvf16ger2 %u,r%u, r%u\n", AT, rA_addr, rB_addr);`
		39285c	`vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER,`
		39285c	`@@ -35193,6 +35213,39 @@ static Bool dis_vsx_accumulator_prefix ( UInt prefix, UInt theInstr,`
		39285c	`AT,`
		39285c	`( (MASKS << 9 ) \| ( inst_prefix << 8 ) \| XO ) );`
		39285c	`break;`
		39285c	`+ case XVI8GER4SPP:`
		39285c	`+ PMSK = IFIELD( prefix, 12, 4);`
		39285c	`+ XMSK = IFIELD( prefix, 4, 4);`
		39285c	`+ YMSK = IFIELD( prefix, 0, 4);`
		39285c	`+ DIP("pmxvi8ger4spp %u,r%u, r%u,%u,%u,%u\n",`
		39285c	`+ AT, rA_addr, rB_addr, XMSK, YMSK, PMSK);`
		39285c	`+ vsx_matrix_ger( vbi, MATRIX_8BIT_INT_GER,`
		39285c	`+ getVSReg( rA_addr ), getVSReg( rB_addr ),`
		39285c	`+ AT,`
		39285c	`+ ( (MASKS << 9 ) \| ( inst_prefix << 8 ) \| XO ) );`
		39285c	`+ break;`
		39285c	`+ case XVI16GER2:`
		39285c	`+ PMSK = IFIELD( prefix, 12, 4);`
		39285c	`+ XMSK = IFIELD( prefix, 4, 4);`
		39285c	`+ YMSK = IFIELD( prefix, 0, 4);`
		39285c	`+ DIP("pmxvi16ger2 %u,r%u, r%u,%u,%u,%u\n",`
		39285c	`+ AT, rA_addr, rB_addr, XMSK, YMSK, PMSK);`
		39285c	`+ vsx_matrix_ger( vbi, MATRIX_16BIT_INT_GER,`
		39285c	`+ getVSReg( rA_addr ), getVSReg( rB_addr ),`
		39285c	`+ AT,`
		39285c	`+ ( (MASKS << 9 ) \| ( inst_prefix << 8 ) \| XO ) );`
		39285c	`+ break;`
		39285c	`+ case XVI16GER2PP:`
		39285c	`+ PMSK = IFIELD( prefix, 12, 4);`
		39285c	`+ XMSK = IFIELD( prefix, 4, 4);`
		39285c	`+ YMSK = IFIELD( prefix, 0, 4);`
		39285c	`+ DIP("pmxvi16ger2pp %u,r%u, r%u,%u,%u,%u\n",`
		39285c	`+ AT, rA_addr, rB_addr, XMSK, YMSK, PMSK);`
		39285c	`+ vsx_matrix_ger( vbi, MATRIX_16BIT_INT_GER,`
		39285c	`+ getVSReg( rA_addr ), getVSReg( rB_addr ),`
		39285c	`+ AT,`
		39285c	`+ ( (MASKS << 9 ) \| ( inst_prefix << 8 ) \| XO ) );`
		39285c	`+ break;`
		39285c	`case XVI16GER2S:`
		39285c	`PMSK = IFIELD( prefix, 14, 2);`
		39285c	`XMSK = IFIELD( prefix, 4, 4);`
		39285c	`@@ -36345,6 +36398,9 @@ DisResult disInstr_PPC_WRK (`
		39285c	`(opc2 == XVI4GER8PP) \|\| // xvi4ger8pp`
		39285c	`(opc2 == XVI8GER4) \|\| // xvi8ger4`
		39285c	`(opc2 == XVI8GER4PP) \|\| // xvi8ger4pp`
		39285c	`+ (opc2 == XVI8GER4SPP) \|\| // xvi8ger4spp`
		39285c	`+ (opc2 == XVI16GER2) \|\| // xvi16ger2`
		39285c	`+ (opc2 == XVI16GER2PP) \|\| // xvi16ger2pp`
		39285c	`(opc2 == XVBF16GER2) \|\| // xvbf16ger2`
		39285c	`(opc2 == XVBF16GER2PP) \|\| // xvbf16ger2pp`
		39285c	`(opc2 == XVBF16GER2PN) \|\| // xvbf16ger2pn`

rpms / gcc-toolset-11-valgrind

Source Code

Blame SOURCES/valgrind-3.17.0-ppc64-isa-3.1.patch