Tree - rpms/java-1.8.0-openjdk

rpms / java-1.8.0-openjdk

Blame SOURCES/8145913-pr3466-rh1498309.patch

Blob History Raw

		a42b25	`# HG changeset patch`
		a42b25	`# User mdoerr`
		a42b25	`# Date 1507750779 -3600`
		a42b25	`# Wed Oct 11 20:39:39 2017 +0100`
		a42b25	`# Node ID 92f0dbe76a13992cc27188e0f68e4b1771c7004a`
		a42b25	`# Parent 542c122b1d7d30c29189565248074aa28f21ae58`
		a42b25	`8145913, PR3466, RH1498309: PPC64: add Montgomery multiply intrinsic`
		a42b25	`Reviewed-by: aph, goetz`
		a42b25
		a42b25	`diff --git a/src/cpu/ppc/vm/assembler_ppc.hpp b/src/cpu/ppc/vm/assembler_ppc.hpp`
		a42b25	`--- openjdk/hotspot/src/cpu/ppc/vm/assembler_ppc.hpp`
		a42b25	`+++ openjdk/hotspot/src/cpu/ppc/vm/assembler_ppc.hpp`
		ecbadd	`@@ -1179,6 +1179,8 @@`
		a42b25	`inline void mullw_( Register d, Register a, Register b);`
		a42b25	`inline void mulhw( Register d, Register a, Register b);`
		a42b25	`inline void mulhw_( Register d, Register a, Register b);`
		a42b25	`+ inline void mulhwu( Register d, Register a, Register b);`
		a42b25	`+ inline void mulhwu_(Register d, Register a, Register b);`
		a42b25	`inline void mulhd( Register d, Register a, Register b);`
		a42b25	`inline void mulhd_( Register d, Register a, Register b);`
		a42b25	`inline void mulhdu( Register d, Register a, Register b);`
		a42b25	`diff --git a/src/cpu/ppc/vm/assembler_ppc.inline.hpp b/src/cpu/ppc/vm/assembler_ppc.inline.hpp`
		a42b25	`--- openjdk/hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp`
		a42b25	`+++ openjdk/hotspot/src/cpu/ppc/vm/assembler_ppc.inline.hpp`
		a42b25	`@@ -109,6 +109,8 @@`
		a42b25	`inline void Assembler::mullw_( Register d, Register a, Register b) { emit_int32(MULLW_OPCODE \| rt(d) \| ra(a) \| rb(b) \| oe(0) \| rc(1)); }`
		a42b25	`inline void Assembler::mulhw( Register d, Register a, Register b) { emit_int32(MULHW_OPCODE \| rt(d) \| ra(a) \| rb(b) \| rc(0)); }`
		a42b25	`inline void Assembler::mulhw_( Register d, Register a, Register b) { emit_int32(MULHW_OPCODE \| rt(d) \| ra(a) \| rb(b) \| rc(1)); }`
		a42b25	`+inline void Assembler::mulhwu( Register d, Register a, Register b) { emit_int32(MULHWU_OPCODE \| rt(d) \| ra(a) \| rb(b) \| rc(0)); }`
		a42b25	`+inline void Assembler::mulhwu_(Register d, Register a, Register b) { emit_int32(MULHWU_OPCODE \| rt(d) \| ra(a) \| rb(b) \| rc(1)); }`
		a42b25	`inline void Assembler::mulhd( Register d, Register a, Register b) { emit_int32(MULHD_OPCODE \| rt(d) \| ra(a) \| rb(b) \| rc(0)); }`
		a42b25	`inline void Assembler::mulhd_( Register d, Register a, Register b) { emit_int32(MULHD_OPCODE \| rt(d) \| ra(a) \| rb(b) \| rc(1)); }`
		a42b25	`inline void Assembler::mulhdu( Register d, Register a, Register b) { emit_int32(MULHDU_OPCODE \| rt(d) \| ra(a) \| rb(b) \| rc(0)); }`
		a42b25	`diff --git a/src/cpu/ppc/vm/c2_init_ppc.cpp b/src/cpu/ppc/vm/c2_init_ppc.cpp`
		a42b25	`--- openjdk/hotspot/src/cpu/ppc/vm/c2_init_ppc.cpp`
		a42b25	`+++ openjdk/hotspot/src/cpu/ppc/vm/c2_init_ppc.cpp`
		a42b25	`@@ -45,4 +45,10 @@`
		a42b25	`FLAG_SET_ERGO(bool, InsertEndGroupPPC64, true);`
		a42b25	`}`
		a42b25	`}`
		a42b25	`+`
		a42b25	`+ if (OptimizeFill) {`
		a42b25	`+ warning("OptimizeFill is not supported on this CPU.");`
		a42b25	`+ FLAG_SET_DEFAULT(OptimizeFill, false);`
		a42b25	`+ }`
		a42b25	`+`
		a42b25	`}`
		a42b25	`diff --git a/src/cpu/ppc/vm/sharedRuntime_ppc.cpp b/src/cpu/ppc/vm/sharedRuntime_ppc.cpp`
		a42b25	`--- openjdk/hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp`
		a42b25	`+++ openjdk/hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp`
		a42b25	`@@ -42,6 +42,8 @@`
		a42b25	`#include "opto/runtime.hpp"`
		a42b25	`#endif`
		a42b25
		a42b25	`+#include <alloca.h>`
		a42b25	`+`
		a42b25	`#define __ masm->`
		a42b25
		a42b25	`#ifdef PRODUCT`
		a42b25	`@@ -3269,3 +3271,245 @@`
		a42b25	`return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_bytes/wordSize,`
		a42b25	`oop_maps, true);`
		a42b25	`}`
		a42b25	`+`
		a42b25	`+`
		a42b25	`+//------------------------------Montgomery multiplication------------------------`
		a42b25	`+//`
		a42b25	`+`
		a42b25	`+// Subtract 0:b from carry:a. Return carry.`
		a42b25	`+static unsigned long`
		a42b25	`+sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {`
		a42b25	`+ long i = 0;`
		a42b25	`+ unsigned long tmp, tmp2;`
		a42b25	`+ __asm__ __volatile__ (`
		a42b25	`+ "subfc %[tmp], %[tmp], %[tmp] \n" // pre-set CA`
		a42b25	`+ "mtctr %[len] \n"`
		a42b25	`+ "0: \n"`
		a42b25	`+ "ldx %[tmp], %[i], %[a] \n"`
		a42b25	`+ "ldx %[tmp2], %[i], %[b] \n"`
		a42b25	`+ "subfe %[tmp], %[tmp2], %[tmp] \n" // subtract extended`
		a42b25	`+ "stdx %[tmp], %[i], %[a] \n"`
		a42b25	`+ "addi %[i], %[i], 8 \n"`
		a42b25	`+ "bdnz 0b \n"`
		a42b25	`+ "addme %[tmp], %[carry] \n" // carry + CA - 1`
		a42b25	`+ : [i]"+b"(i), [tmp]"=&r"(tmp), [tmp2]"=&r"(tmp2)`
		a42b25	`+ : [a]"r"(a), [b]"r"(b), [carry]"r"(carry), [len]"r"(len)`
		a42b25	`+ : "ctr", "xer", "memory"`
		a42b25	`+ );`
		a42b25	`+ return tmp;`
		a42b25	`+}`
		a42b25	`+`
		a42b25	`+// Multiply (unsigned) Long A by Long B, accumulating the double-`
		a42b25	`+// length result into the accumulator formed of T0, T1, and T2.`
		a42b25	`+inline void MACC(unsigned long A, unsigned long B, unsigned long &T0, unsigned long &T1, unsigned long &T2) {`
		a42b25	`+ unsigned long hi, lo;`
		a42b25	`+ __asm__ __volatile__ (`
		a42b25	`+ "mulld %[lo], %[A], %[B] \n"`
		a42b25	`+ "mulhdu %[hi], %[A], %[B] \n"`
		a42b25	`+ "addc %[T0], %[T0], %[lo] \n"`
		a42b25	`+ "adde %[T1], %[T1], %[hi] \n"`
		a42b25	`+ "addze %[T2], %[T2] \n"`
		a42b25	`+ : [hi]"=&r"(hi), [lo]"=&r"(lo), [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)`
		a42b25	`+ : [A]"r"(A), [B]"r"(B)`
		a42b25	`+ : "xer"`
		a42b25	`+ );`
		a42b25	`+}`
		a42b25	`+`
		a42b25	`+// As above, but add twice the double-length result into the`
		a42b25	`+// accumulator.`
		a42b25	`+inline void MACC2(unsigned long A, unsigned long B, unsigned long &T0, unsigned long &T1, unsigned long &T2) {`
		a42b25	`+ unsigned long hi, lo;`
		a42b25	`+ __asm__ __volatile__ (`
		a42b25	`+ "mulld %[lo], %[A], %[B] \n"`
		a42b25	`+ "mulhdu %[hi], %[A], %[B] \n"`
		a42b25	`+ "addc %[T0], %[T0], %[lo] \n"`
		a42b25	`+ "adde %[T1], %[T1], %[hi] \n"`
		a42b25	`+ "addze %[T2], %[T2] \n"`
		a42b25	`+ "addc %[T0], %[T0], %[lo] \n"`
		a42b25	`+ "adde %[T1], %[T1], %[hi] \n"`
		a42b25	`+ "addze %[T2], %[T2] \n"`
		a42b25	`+ : [hi]"=&r"(hi), [lo]"=&r"(lo), [T0]"+r"(T0), [T1]"+r"(T1), [T2]"+r"(T2)`
		a42b25	`+ : [A]"r"(A), [B]"r"(B)`
		a42b25	`+ : "xer"`
		a42b25	`+ );`
		a42b25	`+}`
		a42b25	`+`
		a42b25	`+// Fast Montgomery multiplication. The derivation of the algorithm is`
		a42b25	`+// in "A Cryptographic Library for the Motorola DSP56000,`
		a42b25	`+// Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237".`
		a42b25	`+static void`
		a42b25	`+montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],`
		a42b25	`+ unsigned long m[], unsigned long inv, int len) {`
		a42b25	`+ unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator`
		a42b25	`+ int i;`
		a42b25	`+`
		a42b25	`+ assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");`
		a42b25	`+`
		a42b25	`+ for (i = 0; i < len; i++) {`
		a42b25	`+ int j;`
		a42b25	`+ for (j = 0; j < i; j++) {`
		a42b25	`+ MACC(a[j], b[i-j], t0, t1, t2);`
		a42b25	`+ MACC(m[j], n[i-j], t0, t1, t2);`
		a42b25	`+ }`
		a42b25	`+ MACC(a[i], b[0], t0, t1, t2);`
		a42b25	`+ m[i] = t0 * inv;`
		a42b25	`+ MACC(m[i], n[0], t0, t1, t2);`
		a42b25	`+`
		a42b25	`+ assert(t0 == 0, "broken Montgomery multiply");`
		a42b25	`+`
		a42b25	`+ t0 = t1; t1 = t2; t2 = 0;`
		a42b25	`+ }`
		a42b25	`+`
		a42b25	`+ for (i = len; i < 2*len; i++) {`
		a42b25	`+ int j;`
		a42b25	`+ for (j = i-len+1; j < len; j++) {`
		a42b25	`+ MACC(a[j], b[i-j], t0, t1, t2);`
		a42b25	`+ MACC(m[j], n[i-j], t0, t1, t2);`
		a42b25	`+ }`
		a42b25	`+ m[i-len] = t0;`
		a42b25	`+ t0 = t1; t1 = t2; t2 = 0;`
		a42b25	`+ }`
		a42b25	`+`
		a42b25	`+ while (t0) {`
		a42b25	`+ t0 = sub(m, n, t0, len);`
		a42b25	`+ }`
		a42b25	`+}`
		a42b25	`+`
		a42b25	`+// Fast Montgomery squaring. This uses asymptotically 25% fewer`
		a42b25	`+// multiplies so it should be up to 25% faster than Montgomery`
		a42b25	`+// multiplication. However, its loop control is more complex and it`
		a42b25	`+// may actually run slower on some machines.`
		a42b25	`+static void`
		a42b25	`+montgomery_square(unsigned long a[], unsigned long n[],`
		a42b25	`+ unsigned long m[], unsigned long inv, int len) {`
		a42b25	`+ unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator`
		a42b25	`+ int i;`
		a42b25	`+`
		a42b25	`+ assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");`
		a42b25	`+`
		a42b25	`+ for (i = 0; i < len; i++) {`
		a42b25	`+ int j;`
		a42b25	`+ int end = (i+1)/2;`
		a42b25	`+ for (j = 0; j < end; j++) {`
		a42b25	`+ MACC2(a[j], a[i-j], t0, t1, t2);`
		a42b25	`+ MACC(m[j], n[i-j], t0, t1, t2);`
		a42b25	`+ }`
		a42b25	`+ if ((i & 1) == 0) {`
		a42b25	`+ MACC(a[j], a[j], t0, t1, t2);`
		a42b25	`+ }`
		a42b25	`+ for (; j < i; j++) {`
		a42b25	`+ MACC(m[j], n[i-j], t0, t1, t2);`
		a42b25	`+ }`
		a42b25	`+ m[i] = t0 * inv;`
		a42b25	`+ MACC(m[i], n[0], t0, t1, t2);`
		a42b25	`+`
		a42b25	`+ assert(t0 == 0, "broken Montgomery square");`
		a42b25	`+`
		a42b25	`+ t0 = t1; t1 = t2; t2 = 0;`
		a42b25	`+ }`
		a42b25	`+`
		a42b25	`+ for (i = len; i < 2*len; i++) {`
		a42b25	`+ int start = i-len+1;`
		a42b25	`+ int end = start + (len - start)/2;`
		a42b25	`+ int j;`
		a42b25	`+ for (j = start; j < end; j++) {`
		a42b25	`+ MACC2(a[j], a[i-j], t0, t1, t2);`
		a42b25	`+ MACC(m[j], n[i-j], t0, t1, t2);`
		a42b25	`+ }`
		a42b25	`+ if ((i & 1) == 0) {`
		a42b25	`+ MACC(a[j], a[j], t0, t1, t2);`
		a42b25	`+ }`
		a42b25	`+ for (; j < len; j++) {`
		a42b25	`+ MACC(m[j], n[i-j], t0, t1, t2);`
		a42b25	`+ }`
		a42b25	`+ m[i-len] = t0;`
		a42b25	`+ t0 = t1; t1 = t2; t2 = 0;`
		a42b25	`+ }`
		a42b25	`+`
		a42b25	`+ while (t0) {`
		a42b25	`+ t0 = sub(m, n, t0, len);`
		a42b25	`+ }`
		a42b25	`+}`
		a42b25	`+`
		a42b25	`+// The threshold at which squaring is advantageous was determined`
		a42b25	`+// experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.`
		a42b25	`+// Doesn't seem to be relevant for Power8 so we use the same value.`
		a42b25	`+#define MONTGOMERY_SQUARING_THRESHOLD 64`
		a42b25	`+`
		a42b25	`+// Copy len longwords from s to d, word-swapping as we go. The`
		a42b25	`+// destination array is reversed.`
		a42b25	`+static void reverse_words(unsigned long s, unsigned long d, int len) {`
		a42b25	`+ d += len;`
		a42b25	`+ while(len-- > 0) {`
		a42b25	`+ d--;`
		a42b25	`+ unsigned long s_val = *s;`
		a42b25	`+ // Swap words in a longword on little endian machines.`
		a42b25	`+#ifdef VM_LITTLE_ENDIAN`
		a42b25	`+ s_val = (s_val << 32) \| (s_val >> 32);`
		a42b25	`+#endif`
		a42b25	`+ *d = s_val;`
		a42b25	`+ s++;`
		a42b25	`+ }`
		a42b25	`+}`
		a42b25	`+`
		a42b25	`+void SharedRuntime::montgomery_multiply(jint a_ints, jint b_ints, jint *n_ints,`
		a42b25	`+ jint len, jlong inv,`
		a42b25	`+ jint *m_ints) {`
		a42b25	`+ assert(len % 2 == 0, "array length in montgomery_multiply must be even");`
		a42b25	`+ int longwords = len/2;`
		a42b25	`+ assert(longwords > 0, "unsupported");`
		a42b25	`+`
		a42b25	`+ // Make very sure we don't use so much space that the stack might`
		a42b25	`+ // overflow. 512 jints corresponds to an 16384-bit integer and`
		a42b25	`+ // will use here a total of 8k bytes of stack space.`
		a42b25	`+ int total_allocation = longwords * sizeof (unsigned long) * 4;`
		a42b25	`+ guarantee(total_allocation <= 8192, "must be");`
		a42b25	`+ unsigned long scratch = (unsigned long )alloca(total_allocation);`
		a42b25	`+`
		a42b25	`+ // Local scratch arrays`
		a42b25	`+ unsigned long`
		a42b25	`+ a = scratch + 0 longwords,`
		a42b25	`+ b = scratch + 1 longwords,`
		a42b25	`+ n = scratch + 2 longwords,`
		a42b25	`+ m = scratch + 3 longwords;`
		a42b25	`+`
		a42b25	`+ reverse_words((unsigned long *)a_ints, a, longwords);`
		a42b25	`+ reverse_words((unsigned long *)b_ints, b, longwords);`
		a42b25	`+ reverse_words((unsigned long *)n_ints, n, longwords);`
		a42b25	`+`
		a42b25	`+ ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);`
		a42b25	`+`
		a42b25	`+ reverse_words(m, (unsigned long *)m_ints, longwords);`
		a42b25	`+}`
		a42b25	`+`
		a42b25	`+void SharedRuntime::montgomery_square(jint a_ints, jint n_ints,`
		a42b25	`+ jint len, jlong inv,`
		a42b25	`+ jint *m_ints) {`
		a42b25	`+ assert(len % 2 == 0, "array length in montgomery_square must be even");`
		a42b25	`+ int longwords = len/2;`
		a42b25	`+ assert(longwords > 0, "unsupported");`
		a42b25	`+`
		a42b25	`+ // Make very sure we don't use so much space that the stack might`
		a42b25	`+ // overflow. 512 jints corresponds to an 16384-bit integer and`
		a42b25	`+ // will use here a total of 6k bytes of stack space.`
		a42b25	`+ int total_allocation = longwords * sizeof (unsigned long) * 3;`
		a42b25	`+ guarantee(total_allocation <= 8192, "must be");`
		a42b25	`+ unsigned long scratch = (unsigned long )alloca(total_allocation);`
		a42b25	`+`
		a42b25	`+ // Local scratch arrays`
		a42b25	`+ unsigned long`
		a42b25	`+ a = scratch + 0 longwords,`
		a42b25	`+ n = scratch + 1 longwords,`
		a42b25	`+ m = scratch + 2 longwords;`
		a42b25	`+`
		a42b25	`+ reverse_words((unsigned long *)a_ints, a, longwords);`
		a42b25	`+ reverse_words((unsigned long *)n_ints, n, longwords);`
		a42b25	`+`
		a42b25	`+ if (len >= MONTGOMERY_SQUARING_THRESHOLD) {`
		a42b25	`+ ::montgomery_square(a, n, m, (unsigned long)inv, longwords);`
		a42b25	`+ } else {`
		a42b25	`+ ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);`
		a42b25	`+ }`
		a42b25	`+`
		a42b25	`+ reverse_words(m, (unsigned long *)m_ints, longwords);`
		a42b25	`+}`
		a42b25	`diff --git a/src/cpu/ppc/vm/stubGenerator_ppc.cpp b/src/cpu/ppc/vm/stubGenerator_ppc.cpp`
		a42b25	`--- openjdk/hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp`
		a42b25	`+++ openjdk/hotspot/src/cpu/ppc/vm/stubGenerator_ppc.cpp`
		9a353b	`@@ -2518,6 +2518,15 @@`
		a42b25	`generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,`
		a42b25	`&StubRoutines::_safefetchN_fault_pc,`
		a42b25	`&StubRoutines::_safefetchN_continuation_pc);`
		9a353b	`+`
		a42b25	`+ if (UseMontgomeryMultiplyIntrinsic) {`
		a42b25	`+ StubRoutines::_montgomeryMultiply`
		a42b25	`+ = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);`
		a42b25	`+ }`
		a42b25	`+ if (UseMontgomerySquareIntrinsic) {`
		a42b25	`+ StubRoutines::_montgomerySquare`
		a42b25	`+ = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);`
		a42b25	`+ }`
		a42b25
		9a353b	`if (UseAESIntrinsics) {`
		9a353b	`StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();`
		a42b25	`diff --git a/src/cpu/ppc/vm/templateInterpreter_ppc.cpp b/src/cpu/ppc/vm/templateInterpreter_ppc.cpp`
		a42b25	`--- openjdk/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.cpp`
		a42b25	`+++ openjdk/hotspot/src/cpu/ppc/vm/templateInterpreter_ppc.cpp`
		a42b25	`@@ -265,7 +265,7 @@`
		a42b25	`__ cmpdi(CCR0, Rmdo, 0);`
		a42b25	`__ beq(CCR0, no_mdo);`
		a42b25
		a42b25	`- // Increment backedge counter in the MDO.`
		a42b25	`+ // Increment invocation counter in the MDO.`
		a42b25	`const int mdo_bc_offs = in_bytes(MethodData::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset());`
		a42b25	`__ lwz(Rscratch2, mdo_bc_offs, Rmdo);`
		a42b25	`__ addi(Rscratch2, Rscratch2, increment);`
		a42b25	`@@ -277,12 +277,12 @@`
		a42b25	`}`
		a42b25
		a42b25	`// Increment counter in MethodCounters*.`
		a42b25	`- const int mo_bc_offs = in_bytes(MethodCounters::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset());`
		a42b25	`+ const int mo_ic_offs = in_bytes(MethodCounters::invocation_counter_offset()) + in_bytes(InvocationCounter::counter_offset());`
		a42b25	`__ bind(no_mdo);`
		a42b25	`__ get_method_counters(R19_method, R3_counters, done);`
		a42b25	`- __ lwz(Rscratch2, mo_bc_offs, R3_counters);`
		a42b25	`+ __ lwz(Rscratch2, mo_ic_offs, R3_counters);`
		a42b25	`__ addi(Rscratch2, Rscratch2, increment);`
		a42b25	`- __ stw(Rscratch2, mo_bc_offs, R3_counters);`
		a42b25	`+ __ stw(Rscratch2, mo_ic_offs, R3_counters);`
		a42b25	`__ load_const_optimized(Rscratch1, mask, R0);`
		a42b25	`__ and_(Rscratch1, Rscratch2, Rscratch1);`
		a42b25	`__ beq(CCR0, *overflow);`
		a42b25	`diff --git a/src/cpu/ppc/vm/vm_version_ppc.cpp b/src/cpu/ppc/vm/vm_version_ppc.cpp`
		a42b25	`--- openjdk/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp`
		a42b25	`+++ openjdk/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp`
		ecbadd	`@@ -177,6 +177,12 @@`
		a42b25	`FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);`
		a42b25	`}`
		a42b25
		a42b25	`+ if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {`
		a42b25	`+ UseMontgomeryMultiplyIntrinsic = true;`
		a42b25	`+ }`
		a42b25	`+ if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {`
		a42b25	`+ UseMontgomerySquareIntrinsic = true;`
		a42b25	`+ }`
		a42b25	`}`
		a42b25
		a42b25	`void VM_Version::print_features() {`
		a42b25	`diff --git a/src/share/vm/opto/library_call.cpp b/src/share/vm/opto/library_call.cpp`
		a42b25	`--- openjdk/hotspot/src/share/vm/opto/library_call.cpp`
		a42b25	`+++ openjdk/hotspot/src/share/vm/opto/library_call.cpp`
		ecbadd	`@@ -6031,11 +6031,21 @@`
		a42b25	`Node* n_start = array_element_address(n, intcon(0), n_elem);`
		a42b25	`Node* m_start = array_element_address(m, intcon(0), m_elem);`
		a42b25
		a42b25	`- Node* call = make_runtime_call(RC_LEAF,`
		a42b25	`- OptoRuntime::montgomeryMultiply_Type(),`
		a42b25	`- stubAddr, stubName, TypePtr::BOTTOM,`
		a42b25	`- a_start, b_start, n_start, len, inv, top(),`
		a42b25	`- m_start);`
		a42b25	`+ Node* call = NULL;`
		a42b25	`+ if (CCallingConventionRequiresIntsAsLongs) {`
		a42b25	`+ Node* len_I2L = ConvI2L(len);`
		a42b25	`+ call = make_runtime_call(RC_LEAF,`
		a42b25	`+ OptoRuntime::montgomeryMultiply_Type(),`
		a42b25	`+ stubAddr, stubName, TypePtr::BOTTOM,`
		a42b25	`+ a_start, b_start, n_start, len_I2L XTOP, inv,`
		a42b25	`+ top(), m_start);`
		a42b25	`+ } else {`
		a42b25	`+ call = make_runtime_call(RC_LEAF,`
		a42b25	`+ OptoRuntime::montgomeryMultiply_Type(),`
		a42b25	`+ stubAddr, stubName, TypePtr::BOTTOM,`
		a42b25	`+ a_start, b_start, n_start, len, inv, top(),`
		a42b25	`+ m_start);`
		a42b25	`+ }`
		a42b25	`set_result(m);`
		a42b25	`}`
		a42b25
		ecbadd	`@@ -6085,11 +6095,22 @@`
		a42b25	`Node* n_start = array_element_address(n, intcon(0), n_elem);`
		a42b25	`Node* m_start = array_element_address(m, intcon(0), m_elem);`
		a42b25
		a42b25	`- Node* call = make_runtime_call(RC_LEAF,`
		a42b25	`- OptoRuntime::montgomerySquare_Type(),`
		a42b25	`- stubAddr, stubName, TypePtr::BOTTOM,`
		a42b25	`- a_start, n_start, len, inv, top(),`
		a42b25	`- m_start);`
		a42b25	`+ Node* call = NULL;`
		a42b25	`+ if (CCallingConventionRequiresIntsAsLongs) {`
		a42b25	`+ Node* len_I2L = ConvI2L(len);`
		a42b25	`+ call = make_runtime_call(RC_LEAF,`
		a42b25	`+ OptoRuntime::montgomerySquare_Type(),`
		a42b25	`+ stubAddr, stubName, TypePtr::BOTTOM,`
		a42b25	`+ a_start, n_start, len_I2L XTOP, inv, top(),`
		a42b25	`+ m_start);`
		a42b25	`+ } else {`
		a42b25	`+ call = make_runtime_call(RC_LEAF,`
		a42b25	`+ OptoRuntime::montgomerySquare_Type(),`
		a42b25	`+ stubAddr, stubName, TypePtr::BOTTOM,`
		a42b25	`+ a_start, n_start, len, inv, top(),`
		a42b25	`+ m_start);`
		a42b25	`+ }`
		a42b25	`+`
		a42b25	`set_result(m);`
		a42b25	`}`
		a42b25
		a42b25	`diff --git a/src/share/vm/opto/runtime.cpp b/src/share/vm/opto/runtime.cpp`
		a42b25	`--- openjdk/hotspot/src/share/vm/opto/runtime.cpp`
		a42b25	`+++ openjdk/hotspot/src/share/vm/opto/runtime.cpp`
		ecbadd	`@@ -1005,12 +1005,20 @@`
		a42b25	`// create input type (domain)`
		a42b25	`int num_args = 7;`
		a42b25	`int argcnt = num_args;`
		a42b25	`+ if (CCallingConventionRequiresIntsAsLongs) {`
		a42b25	`+ argcnt++; // additional placeholder`
		a42b25	`+ }`
		a42b25	`const Type** fields = TypeTuple::fields(argcnt);`
		a42b25	`int argp = TypeFunc::Parms;`
		a42b25	`fields[argp++] = TypePtr::NOTNULL; // a`
		a42b25	`fields[argp++] = TypePtr::NOTNULL; // b`
		a42b25	`fields[argp++] = TypePtr::NOTNULL; // n`
		a42b25	`- fields[argp++] = TypeInt::INT; // len`
		a42b25	`+ if (CCallingConventionRequiresIntsAsLongs) {`
		a42b25	`+ fields[argp++] = TypeLong::LONG; // len`
		a42b25	`+ fields[argp++] = TypeLong::HALF; // placeholder`
		a42b25	`+ } else {`
		a42b25	`+ fields[argp++] = TypeInt::INT; // len`
		a42b25	`+ }`
		a42b25	`fields[argp++] = TypeLong::LONG; // inv`
		a42b25	`fields[argp++] = Type::HALF;`
		a42b25	`fields[argp++] = TypePtr::NOTNULL; // result`
		ecbadd	`@@ -1029,11 +1037,19 @@`
		a42b25	`// create input type (domain)`
		a42b25	`int num_args = 6;`
		a42b25	`int argcnt = num_args;`
		a42b25	`+ if (CCallingConventionRequiresIntsAsLongs) {`
		a42b25	`+ argcnt++; // additional placeholder`
		a42b25	`+ }`
		a42b25	`const Type** fields = TypeTuple::fields(argcnt);`
		a42b25	`int argp = TypeFunc::Parms;`
		a42b25	`fields[argp++] = TypePtr::NOTNULL; // a`
		a42b25	`fields[argp++] = TypePtr::NOTNULL; // n`
		a42b25	`- fields[argp++] = TypeInt::INT; // len`
		a42b25	`+ if (CCallingConventionRequiresIntsAsLongs) {`
		a42b25	`+ fields[argp++] = TypeLong::LONG; // len`
		a42b25	`+ fields[argp++] = TypeLong::HALF; // placeholder`
		a42b25	`+ } else {`
		a42b25	`+ fields[argp++] = TypeInt::INT; // len`
		a42b25	`+ }`
		a42b25	`fields[argp++] = TypeLong::LONG; // inv`
		a42b25	`fields[argp++] = Type::HALF;`
		a42b25	`fields[argp++] = TypePtr::NOTNULL; // result`

rpms / java-1.8.0-openjdk

Source Code

Blame SOURCES/8145913-pr3466-rh1498309.patch