Blame SOURCES/8132051-aarch64.patch

f8968f
# HG changeset patch
f8968f
# User aph
f8968f
# Date 1460374398 0
f8968f
#      Mon Apr 11 11:33:18 2016 +0000
f8968f
# Node ID 388e9d0905e69727a15a94f825bdde17e2ed96d6
f8968f
# Parent  e2b90ce9a1d12eae1a8edbd34eacd9a9674e315b
f8968f
8132051: Better byte behavior
f8968f
Reviewed-by: adinn
f8968f
f8968f
diff --git a/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp b/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp
f8968f
--- openjdk/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp
f8968f
+++ openjdk/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp
f8968f
@@ -337,7 +337,7 @@
f8968f
     length.load_item();
f8968f
 
f8968f
   }
f8968f
-  if (needs_store_check) {
f8968f
+  if (needs_store_check || x->check_boolean()) {
f8968f
     value.load_item();
f8968f
   } else {
f8968f
     value.load_for_store(x->elt_type());
f8968f
@@ -386,7 +386,8 @@
f8968f
     // Seems to be a precise
f8968f
     post_barrier(LIR_OprFact::address(array_addr), value.result());
f8968f
   } else {
f8968f
-    __ move(value.result(), array_addr, null_check_info);
f8968f
+    LIR_Opr result = maybe_mask_boolean(x, array.result(), value.result(), null_check_info);
f8968f
+    __ move(result, array_addr, null_check_info);
f8968f
   }
f8968f
 }
f8968f
 
f8968f
diff --git a/src/cpu/aarch64/vm/interp_masm_aarch64.cpp b/src/cpu/aarch64/vm/interp_masm_aarch64.cpp
f8968f
--- openjdk/hotspot/src/cpu/aarch64/vm/interp_masm_aarch64.cpp
f8968f
+++ openjdk/hotspot/src/cpu/aarch64/vm/interp_masm_aarch64.cpp
f8968f
@@ -41,7 +41,43 @@
f8968f
 #include "runtime/thread.inline.hpp"
f8968f
 
f8968f
 
f8968f
-// Implementation of InterpreterMacroAssembler
f8968f
+void InterpreterMacroAssembler::narrow(Register result) {
f8968f
+
f8968f
+  // Get method->_constMethod->_result_type
f8968f
+  ldr(rscratch1, Address(rfp, frame::interpreter_frame_method_offset * wordSize));
f8968f
+  ldr(rscratch1, Address(rscratch1, Method::const_offset()));
f8968f
+  ldrb(rscratch1, Address(rscratch1, ConstMethod::result_type_offset()));
f8968f
+
f8968f
+  Label done, notBool, notByte, notChar;
f8968f
+
f8968f
+  // common case first
f8968f
+  cmpw(rscratch1, T_INT);
f8968f
+  br(Assembler::EQ, done);
f8968f
+
f8968f
+  // mask integer result to narrower return type.
f8968f
+  cmpw(rscratch1, T_BOOLEAN);
f8968f
+  br(Assembler::NE, notBool);
f8968f
+  andw(result, result, 0x1);
f8968f
+  b(done);
f8968f
+
f8968f
+  bind(notBool);
f8968f
+  cmpw(rscratch1, T_BYTE);
f8968f
+  br(Assembler::NE, notByte);
f8968f
+  sbfx(result, result, 0, 8);
f8968f
+  b(done);
f8968f
+
f8968f
+  bind(notByte);
f8968f
+  cmpw(rscratch1, T_CHAR);
f8968f
+  br(Assembler::NE, notChar);
f8968f
+  ubfx(result, result, 0, 16);  // truncate upper 16 bits
f8968f
+  b(done);
f8968f
+
f8968f
+  bind(notChar);
f8968f
+  sbfx(result, result, 0, 16);     // sign-extend short
f8968f
+
f8968f
+  // Nothing to do for T_INT
f8968f
+  bind(done);
f8968f
+}
f8968f
 
f8968f
 #ifndef CC_INTERP
f8968f
 
f8968f
@@ -79,6 +115,7 @@
f8968f
                verify_oop(r0, state);               break;
f8968f
     case ltos: ldr(r0, val_addr);                   break;
f8968f
     case btos:                                   // fall through
f8968f
+    case ztos:                                   // fall through
f8968f
     case ctos:                                   // fall through
f8968f
     case stos:                                   // fall through
f8968f
     case itos: ldrw(r0, val_addr);                  break;
f8968f
@@ -312,6 +349,7 @@
f8968f
   switch (state) {
f8968f
   case atos: pop_ptr();                 break;
f8968f
   case btos:
f8968f
+  case ztos:
f8968f
   case ctos:
f8968f
   case stos:
f8968f
   case itos: pop_i();                   break;
f8968f
@@ -329,6 +367,7 @@
f8968f
   switch (state) {
f8968f
   case atos: push_ptr();                break;
f8968f
   case btos:
f8968f
+  case ztos:
f8968f
   case ctos:
f8968f
   case stos:
f8968f
   case itos: push_i();                  break;
f8968f
diff --git a/src/cpu/aarch64/vm/interp_masm_aarch64.hpp b/src/cpu/aarch64/vm/interp_masm_aarch64.hpp
f8968f
--- openjdk/hotspot/src/cpu/aarch64/vm/interp_masm_aarch64.hpp
f8968f
+++ openjdk/hotspot/src/cpu/aarch64/vm/interp_masm_aarch64.hpp
f8968f
@@ -252,6 +252,9 @@
f8968f
   void update_mdp_by_constant(Register mdp_in, int constant);
f8968f
   void update_mdp_for_ret(Register return_bci);
f8968f
 
f8968f
+  // narrow int return value
f8968f
+  void narrow(Register result);
f8968f
+
f8968f
   void profile_taken_branch(Register mdp, Register bumped_count);
f8968f
   void profile_not_taken_branch(Register mdp);
f8968f
   void profile_call(Register mdp);
f8968f
diff --git a/src/cpu/aarch64/vm/templateTable_aarch64.cpp b/src/cpu/aarch64/vm/templateTable_aarch64.cpp
f8968f
--- openjdk/hotspot/src/cpu/aarch64/vm/templateTable_aarch64.cpp
f8968f
+++ openjdk/hotspot/src/cpu/aarch64/vm/templateTable_aarch64.cpp
f8968f
@@ -233,6 +233,7 @@
f8968f
   switch (bc) {
f8968f
   case Bytecodes::_fast_aputfield:
f8968f
   case Bytecodes::_fast_bputfield:
f8968f
+  case Bytecodes::_fast_zputfield:
f8968f
   case Bytecodes::_fast_cputfield:
f8968f
   case Bytecodes::_fast_dputfield:
f8968f
   case Bytecodes::_fast_fputfield:
f8968f
@@ -1072,6 +1073,18 @@
f8968f
   // r1: index
f8968f
   // r3: array
f8968f
   index_check(r3, r1); // prefer index in r1
f8968f
+
f8968f
+  // Need to check whether array is boolean or byte
f8968f
+  // since both types share the bastore bytecode.
f8968f
+  __ load_klass(r2, r3);
f8968f
+  __ ldrw(r2, Address(r2, Klass::layout_helper_offset()));
f8968f
+  int diffbit = Klass::layout_helper_boolean_diffbit();
f8968f
+  __ andw(rscratch1, r2, diffbit);
f8968f
+  Label L_skip;
f8968f
+  __ cbzw(rscratch1, L_skip);
f8968f
+  __ andw(r0, r0, 1);  // if it is a T_BOOLEAN array, mask the stored value to 0/1
f8968f
+  __ bind(L_skip);
f8968f
+
f8968f
   __ lea(rscratch1, Address(r3, r1, Address::uxtw(0)));
f8968f
   __ strb(r0, Address(rscratch1,
f8968f
 		      arrayOopDesc::base_offset_in_bytes(T_BYTE)));
f8968f
@@ -2186,6 +2199,13 @@
f8968f
   if (_desc->bytecode() == Bytecodes::_return)
f8968f
     __ membar(MacroAssembler::StoreStore);
f8968f
 
f8968f
+  // Narrow result if state is itos but result type is smaller.
f8968f
+  // Need to narrow in the return bytecode rather than in generate_return_entry
f8968f
+  // since compiled code callers expect the result to already be narrowed.
f8968f
+  if (state == itos) {
f8968f
+    __ narrow(r0);
f8968f
+  }
f8968f
+
f8968f
   __ remove_activation(state);
f8968f
   __ ret(lr);
f8968f
 }
f8968f
@@ -2395,7 +2415,7 @@
f8968f
 
f8968f
   const Address field(obj, off);
f8968f
 
f8968f
-  Label Done, notByte, notInt, notShort, notChar,
f8968f
+  Label Done, notByte, notBool, notInt, notShort, notChar,
f8968f
               notLong, notFloat, notObj, notDouble;
f8968f
 
f8968f
   // x86 uses a shift and mask or wings it with a shift plus assert
f8968f
@@ -2415,6 +2435,20 @@
f8968f
   __ b(Done);
f8968f
 
f8968f
   __ bind(notByte);
f8968f
+  __ cmp(flags, ztos);
f8968f
+  __ br(Assembler::NE, notBool);
f8968f
+
f8968f
+  // ztos (same code as btos)
f8968f
+  __ ldrsb(r0, field);
f8968f
+  __ push(ztos);
f8968f
+  // Rewrite bytecode to be faster
f8968f
+  if (!is_static) {
f8968f
+    // use btos rewriting, no truncating to t/f bit is needed for getfield.
f8968f
+    patch_bytecode(Bytecodes::_fast_bgetfield, bc, r1);
f8968f
+  }
f8968f
+  __ b(Done);
f8968f
+
f8968f
+  __ bind(notBool);
f8968f
   __ cmp(flags, atos);
f8968f
   __ br(Assembler::NE, notObj);
f8968f
   // atos
f8968f
@@ -2606,7 +2640,7 @@
f8968f
   // field address
f8968f
   const Address field(obj, off);
f8968f
 
f8968f
-  Label notByte, notInt, notShort, notChar,
f8968f
+  Label notByte, notBool, notInt, notShort, notChar,
f8968f
         notLong, notFloat, notObj, notDouble;
f8968f
 
f8968f
   // x86 uses a shift and mask or wings it with a shift plus assert
f8968f
@@ -2628,6 +2662,22 @@
f8968f
   }
f8968f
 
f8968f
   __ bind(notByte);
f8968f
+  __ cmp(flags, ztos);
f8968f
+  __ br(Assembler::NE, notBool);
f8968f
+
f8968f
+  // ztos
f8968f
+  {
f8968f
+    __ pop(ztos);
f8968f
+    if (!is_static) pop_and_check_object(obj);
f8968f
+    __ andw(r0, r0, 0x1);
f8968f
+    __ strb(r0, field);
f8968f
+    if (!is_static) {
f8968f
+      patch_bytecode(Bytecodes::_fast_zputfield, bc, r1, true, byte_no);
f8968f
+    }
f8968f
+    __ b(Done);
f8968f
+  }
f8968f
+
f8968f
+  __ bind(notBool);
f8968f
   __ cmp(flags, atos);
f8968f
   __ br(Assembler::NE, notObj);
f8968f
 
f8968f
@@ -2778,6 +2828,7 @@
f8968f
     switch (bytecode()) {          // load values into the jvalue object
f8968f
     case Bytecodes::_fast_aputfield: __ push_ptr(r0); break;
f8968f
     case Bytecodes::_fast_bputfield: // fall through
f8968f
+    case Bytecodes::_fast_zputfield: // fall through
f8968f
     case Bytecodes::_fast_sputfield: // fall through
f8968f
     case Bytecodes::_fast_cputfield: // fall through
f8968f
     case Bytecodes::_fast_iputfield: __ push_i(r0); break;
f8968f
@@ -2803,6 +2854,7 @@
f8968f
     switch (bytecode()) {             // restore tos values
f8968f
     case Bytecodes::_fast_aputfield: __ pop_ptr(r0); break;
f8968f
     case Bytecodes::_fast_bputfield: // fall through
f8968f
+    case Bytecodes::_fast_zputfield: // fall through
f8968f
     case Bytecodes::_fast_sputfield: // fall through
f8968f
     case Bytecodes::_fast_cputfield: // fall through
f8968f
     case Bytecodes::_fast_iputfield: __ pop_i(r0); break;
f8968f
@@ -2858,6 +2910,9 @@
f8968f
   case Bytecodes::_fast_iputfield:
f8968f
     __ strw(r0, field);
f8968f
     break;
f8968f
+  case Bytecodes::_fast_zputfield:
f8968f
+    __ andw(r0, r0, 0x1);  // boolean is true if LSB is 1
f8968f
+    // fall through to bputfield
f8968f
   case Bytecodes::_fast_bputfield:
f8968f
     __ strb(r0, field);
f8968f
     break;