|
|
0e3697 |
commit 87c504d3b293ebe6d36f3b50696cd307b02b0daa
|
|
|
0e3697 |
Author: acsawdey <acsawdey@138bc75d-0d04-0410-961f-82ee72b054a4>
|
|
|
0e3697 |
Date: Tue Jun 19 21:23:39 2018 +0000
|
|
|
0e3697 |
|
|
|
0e3697 |
2018-06-19 Aaron Sawdey <acsawdey@linux.ibm.com>
|
|
|
0e3697 |
|
|
|
0e3697 |
* config/rs6000/rs6000-string.c (select_block_compare_mode): Check
|
|
|
0e3697 |
TARGET_EFFICIENT_OVERLAPPING_UNALIGNED here instead of in caller.
|
|
|
0e3697 |
(do_and3, do_and3_mask, do_compb3, do_rotl3): New functions.
|
|
|
0e3697 |
(expand_block_compare): Change select_block_compare_mode call.
|
|
|
0e3697 |
(expand_strncmp_align_check): Use new functions, fix comment.
|
|
|
0e3697 |
(emit_final_str_compare_gpr): New function.
|
|
|
0e3697 |
(expand_strn_compare): Refactor and clean up code.
|
|
|
0e3697 |
* config/rs6000/vsx.md (vsx_mov<mode>_64bit): Remove *.
|
|
|
0e3697 |
|
|
|
0e3697 |
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@261769 138bc75d-0d04-0410-961f-82ee72b054a4
|
|
|
0e3697 |
|
|
|
0e3697 |
diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c
|
|
|
0e3697 |
index 632d3359711..f9dd54eb639 100644
|
|
|
0e3697 |
--- a/gcc/config/rs6000/rs6000-string.c
|
|
|
0e3697 |
+++ b/gcc/config/rs6000/rs6000-string.c
|
|
|
0e3697 |
@@ -266,6 +266,7 @@ select_block_compare_mode (unsigned HOST_WIDE_INT offset,
|
|
|
0e3697 |
else if (bytes == GET_MODE_SIZE (QImode))
|
|
|
0e3697 |
return QImode;
|
|
|
0e3697 |
else if (bytes < GET_MODE_SIZE (SImode)
|
|
|
0e3697 |
+ && TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
|
|
|
0e3697 |
&& offset >= GET_MODE_SIZE (SImode) - bytes)
|
|
|
0e3697 |
/* This matches the case were we have SImode and 3 bytes
|
|
|
0e3697 |
and offset >= 1 and permits us to move back one and overlap
|
|
|
0e3697 |
@@ -273,6 +274,7 @@ select_block_compare_mode (unsigned HOST_WIDE_INT offset,
|
|
|
0e3697 |
unwanted bytes off of the input. */
|
|
|
0e3697 |
return SImode;
|
|
|
0e3697 |
else if (word_mode_ok && bytes < UNITS_PER_WORD
|
|
|
0e3697 |
+ && TARGET_EFFICIENT_OVERLAPPING_UNALIGNED
|
|
|
0e3697 |
&& offset >= UNITS_PER_WORD-bytes)
|
|
|
0e3697 |
/* Similarly, if we can use DImode it will get matched here and
|
|
|
0e3697 |
can do an overlapping read that ends at the end of the block. */
|
|
|
0e3697 |
@@ -408,6 +410,54 @@ do_add3 (rtx dest, rtx src1, rtx src2)
|
|
|
0e3697 |
emit_insn (gen_addsi3 (dest, src1, src2));
|
|
|
0e3697 |
}
|
|
|
0e3697 |
|
|
|
0e3697 |
+/* Emit an and of the proper mode for DEST.
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+ DEST is the destination register for the and.
|
|
|
0e3697 |
+ SRC1 is the first and input.
|
|
|
0e3697 |
+ SRC2 is the second and input.
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+ Computes DEST = SRC1&SRC2. */
|
|
|
0e3697 |
+static void
|
|
|
0e3697 |
+do_and3 (rtx dest, rtx src1, rtx src2)
|
|
|
0e3697 |
+{
|
|
|
0e3697 |
+ if (GET_MODE (dest) == DImode)
|
|
|
0e3697 |
+ emit_insn (gen_anddi3 (dest, src1, src2));
|
|
|
0e3697 |
+ else
|
|
|
0e3697 |
+ emit_insn (gen_andsi3 (dest, src1, src2));
|
|
|
0e3697 |
+}
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+/* Emit an cmpb of the proper mode for DEST.
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+ DEST is the destination register for the cmpb.
|
|
|
0e3697 |
+ SRC1 is the first input.
|
|
|
0e3697 |
+ SRC2 is the second input.
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+ Computes cmpb of SRC1, SRC2. */
|
|
|
0e3697 |
+static void
|
|
|
0e3697 |
+do_cmpb3 (rtx dest, rtx src1, rtx src2)
|
|
|
0e3697 |
+{
|
|
|
0e3697 |
+ if (GET_MODE (dest) == DImode)
|
|
|
0e3697 |
+ emit_insn (gen_cmpbdi3 (dest, src1, src2));
|
|
|
0e3697 |
+ else
|
|
|
0e3697 |
+ emit_insn (gen_cmpbsi3 (dest, src1, src2));
|
|
|
0e3697 |
+}
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+/* Emit a rotl of the proper mode for DEST.
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+ DEST is the destination register for the and.
|
|
|
0e3697 |
+ SRC1 is the first and input.
|
|
|
0e3697 |
+ SRC2 is the second and input.
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+ Computes DEST = SRC1 rotated left by SRC2. */
|
|
|
0e3697 |
+static void
|
|
|
0e3697 |
+do_rotl3 (rtx dest, rtx src1, rtx src2)
|
|
|
0e3697 |
+{
|
|
|
0e3697 |
+ if (GET_MODE (dest) == DImode)
|
|
|
0e3697 |
+ emit_insn (gen_rotldi3 (dest, src1, src2));
|
|
|
0e3697 |
+ else
|
|
|
0e3697 |
+ emit_insn (gen_rotlsi3 (dest, src1, src2));
|
|
|
0e3697 |
+}
|
|
|
0e3697 |
+
|
|
|
0e3697 |
/* Generate rtl for a load, shift, and compare of less than a full word.
|
|
|
0e3697 |
|
|
|
0e3697 |
LOAD_MODE is the machine mode for the loads.
|
|
|
0e3697 |
@@ -1395,11 +1445,8 @@ expand_block_compare (rtx operands[])
|
|
|
0e3697 |
while (bytes > 0)
|
|
|
0e3697 |
{
|
|
|
0e3697 |
unsigned int align = compute_current_alignment (base_align, offset);
|
|
|
0e3697 |
- if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
|
|
|
0e3697 |
- load_mode = select_block_compare_mode (offset, bytes, align,
|
|
|
0e3697 |
- word_mode_ok);
|
|
|
0e3697 |
- else
|
|
|
0e3697 |
- load_mode = select_block_compare_mode (0, bytes, align, word_mode_ok);
|
|
|
0e3697 |
+ load_mode = select_block_compare_mode (offset, bytes,
|
|
|
0e3697 |
+ align, word_mode_ok);
|
|
|
0e3697 |
load_mode_size = GET_MODE_SIZE (load_mode);
|
|
|
0e3697 |
if (bytes >= load_mode_size)
|
|
|
0e3697 |
cmp_bytes = load_mode_size;
|
|
|
0e3697 |
@@ -1627,22 +1674,19 @@ expand_block_compare (rtx operands[])
|
|
|
0e3697 |
return true;
|
|
|
0e3697 |
}
|
|
|
0e3697 |
|
|
|
0e3697 |
-/* Generate alignment check and branch code to set up for
|
|
|
0e3697 |
+/* Generate page crossing check and branch code to set up for
|
|
|
0e3697 |
strncmp when we don't have DI alignment.
|
|
|
0e3697 |
STRNCMP_LABEL is the label to branch if there is a page crossing.
|
|
|
0e3697 |
- SRC is the string pointer to be examined.
|
|
|
0e3697 |
+ SRC_ADDR is the string address to be examined.
|
|
|
0e3697 |
BYTES is the max number of bytes to compare. */
|
|
|
0e3697 |
static void
|
|
|
0e3697 |
-expand_strncmp_align_check (rtx strncmp_label, rtx src, HOST_WIDE_INT bytes)
|
|
|
0e3697 |
+expand_strncmp_align_check (rtx strncmp_label, rtx src_addr, HOST_WIDE_INT bytes)
|
|
|
0e3697 |
{
|
|
|
0e3697 |
rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, strncmp_label);
|
|
|
0e3697 |
- rtx src_check = copy_addr_to_reg (XEXP (src, 0));
|
|
|
0e3697 |
- if (GET_MODE (src_check) == SImode)
|
|
|
0e3697 |
- emit_insn (gen_andsi3 (src_check, src_check, GEN_INT (0xfff)));
|
|
|
0e3697 |
- else
|
|
|
0e3697 |
- emit_insn (gen_anddi3 (src_check, src_check, GEN_INT (0xfff)));
|
|
|
0e3697 |
+ rtx src_pgoff = gen_reg_rtx (GET_MODE (src_addr));
|
|
|
0e3697 |
+ do_and3 (src_pgoff, src_addr, GEN_INT (0xfff));
|
|
|
0e3697 |
rtx cond = gen_reg_rtx (CCmode);
|
|
|
0e3697 |
- emit_move_insn (cond, gen_rtx_COMPARE (CCmode, src_check,
|
|
|
0e3697 |
+ emit_move_insn (cond, gen_rtx_COMPARE (CCmode, src_pgoff,
|
|
|
0e3697 |
GEN_INT (4096 - bytes)));
|
|
|
0e3697 |
|
|
|
0e3697 |
rtx cmp_rtx = gen_rtx_GE (VOIDmode, cond, const0_rtx);
|
|
|
0e3697 |
@@ -1654,6 +1698,76 @@ expand_strncmp_align_check (rtx strncmp_label, rtx src, HOST_WIDE_INT bytes)
|
|
|
0e3697 |
LABEL_NUSES (strncmp_label) += 1;
|
|
|
0e3697 |
}
|
|
|
0e3697 |
|
|
|
0e3697 |
+/* Generate the final sequence that identifies the differing
|
|
|
0e3697 |
+ byte and generates the final result, taking into account
|
|
|
0e3697 |
+ zero bytes:
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+ cmpb cmpb_result1, src1, src2
|
|
|
0e3697 |
+ cmpb cmpb_result2, src1, zero
|
|
|
0e3697 |
+ orc cmpb_result1, cmp_result1, cmpb_result2
|
|
|
0e3697 |
+ cntlzd get bit of first zero/diff byte
|
|
|
0e3697 |
+ addi convert for rldcl use
|
|
|
0e3697 |
+ rldcl rldcl extract diff/zero byte
|
|
|
0e3697 |
+ subf subtract for final result
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+ STR1 is the reg rtx for data from string 1.
|
|
|
0e3697 |
+ STR2 is the reg rtx for data from string 2.
|
|
|
0e3697 |
+ RESULT is the reg rtx for the comparison result. */
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+static void
|
|
|
0e3697 |
+emit_final_str_compare_gpr (rtx str1, rtx str2, rtx result)
|
|
|
0e3697 |
+{
|
|
|
0e3697 |
+ machine_mode m = GET_MODE (str1);
|
|
|
0e3697 |
+ rtx cmpb_diff = gen_reg_rtx (m);
|
|
|
0e3697 |
+ rtx cmpb_zero = gen_reg_rtx (m);
|
|
|
0e3697 |
+ rtx rot_amt = gen_reg_rtx (m);
|
|
|
0e3697 |
+ rtx zero_reg = gen_reg_rtx (m);
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+ rtx rot1_1 = gen_reg_rtx (m);
|
|
|
0e3697 |
+ rtx rot1_2 = gen_reg_rtx (m);
|
|
|
0e3697 |
+ rtx rot2_1 = gen_reg_rtx (m);
|
|
|
0e3697 |
+ rtx rot2_2 = gen_reg_rtx (m);
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+ if (m == SImode)
|
|
|
0e3697 |
+ {
|
|
|
0e3697 |
+ emit_insn (gen_cmpbsi3 (cmpb_diff, str1, str2));
|
|
|
0e3697 |
+ emit_insn (gen_movsi (zero_reg, GEN_INT (0)));
|
|
|
0e3697 |
+ emit_insn (gen_cmpbsi3 (cmpb_zero, str1, zero_reg));
|
|
|
0e3697 |
+ emit_insn (gen_one_cmplsi2 (cmpb_diff,cmpb_diff));
|
|
|
0e3697 |
+ emit_insn (gen_iorsi3 (cmpb_diff, cmpb_diff, cmpb_zero));
|
|
|
0e3697 |
+ emit_insn (gen_clzsi2 (rot_amt, cmpb_diff));
|
|
|
0e3697 |
+ emit_insn (gen_addsi3 (rot_amt, rot_amt, GEN_INT (8)));
|
|
|
0e3697 |
+ emit_insn (gen_rotlsi3 (rot1_1, str1,
|
|
|
0e3697 |
+ gen_lowpart (SImode, rot_amt)));
|
|
|
0e3697 |
+ emit_insn (gen_andsi3_mask (rot1_2, rot1_1, GEN_INT (0xff)));
|
|
|
0e3697 |
+ emit_insn (gen_rotlsi3 (rot2_1, str2,
|
|
|
0e3697 |
+ gen_lowpart (SImode, rot_amt)));
|
|
|
0e3697 |
+ emit_insn (gen_andsi3_mask (rot2_2, rot2_1, GEN_INT (0xff)));
|
|
|
0e3697 |
+ emit_insn (gen_subsi3 (result, rot1_2, rot2_2));
|
|
|
0e3697 |
+ }
|
|
|
0e3697 |
+ else if (m == DImode)
|
|
|
0e3697 |
+ {
|
|
|
0e3697 |
+ emit_insn (gen_cmpbdi3 (cmpb_diff, str1, str2));
|
|
|
0e3697 |
+ emit_insn (gen_movdi (zero_reg, GEN_INT (0)));
|
|
|
0e3697 |
+ emit_insn (gen_cmpbdi3 (cmpb_zero, str1, zero_reg));
|
|
|
0e3697 |
+ emit_insn (gen_one_cmpldi2 (cmpb_diff,cmpb_diff));
|
|
|
0e3697 |
+ emit_insn (gen_iordi3 (cmpb_diff, cmpb_diff, cmpb_zero));
|
|
|
0e3697 |
+ emit_insn (gen_clzdi2 (rot_amt, cmpb_diff));
|
|
|
0e3697 |
+ emit_insn (gen_adddi3 (rot_amt, rot_amt, GEN_INT (8)));
|
|
|
0e3697 |
+ emit_insn (gen_rotldi3 (rot1_1, str1,
|
|
|
0e3697 |
+ gen_lowpart (SImode, rot_amt)));
|
|
|
0e3697 |
+ emit_insn (gen_anddi3_mask (rot1_2, rot1_1, GEN_INT (0xff)));
|
|
|
0e3697 |
+ emit_insn (gen_rotldi3 (rot2_1, str2,
|
|
|
0e3697 |
+ gen_lowpart (SImode, rot_amt)));
|
|
|
0e3697 |
+ emit_insn (gen_anddi3_mask (rot2_2, rot2_1, GEN_INT (0xff)));
|
|
|
0e3697 |
+ emit_insn (gen_subdi3 (result, rot1_2, rot2_2));
|
|
|
0e3697 |
+ }
|
|
|
0e3697 |
+ else
|
|
|
0e3697 |
+ gcc_unreachable ();
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+ return;
|
|
|
0e3697 |
+}
|
|
|
0e3697 |
+
|
|
|
0e3697 |
/* Expand a string compare operation with length, and return
|
|
|
0e3697 |
true if successful. Return false if we should let the
|
|
|
0e3697 |
compiler generate normal code, probably a strncmp call.
|
|
|
0e3697 |
@@ -1684,8 +1798,8 @@ expand_strn_compare (rtx operands[], int no_length)
|
|
|
0e3697 |
align_rtx = operands[4];
|
|
|
0e3697 |
}
|
|
|
0e3697 |
unsigned HOST_WIDE_INT cmp_bytes = 0;
|
|
|
0e3697 |
- rtx src1 = orig_src1;
|
|
|
0e3697 |
- rtx src2 = orig_src2;
|
|
|
0e3697 |
+ rtx src1_addr = force_reg (Pmode, XEXP (orig_src1, 0));
|
|
|
0e3697 |
+ rtx src2_addr = force_reg (Pmode, XEXP (orig_src2, 0));
|
|
|
0e3697 |
|
|
|
0e3697 |
/* If we have a length, it must be constant. This simplifies things
|
|
|
0e3697 |
a bit as we don't have to generate code to check if we've exceeded
|
|
|
0e3697 |
@@ -1698,8 +1812,8 @@ expand_strn_compare (rtx operands[], int no_length)
|
|
|
0e3697 |
return false;
|
|
|
0e3697 |
|
|
|
0e3697 |
unsigned int base_align = UINTVAL (align_rtx);
|
|
|
0e3697 |
- int align1 = MEM_ALIGN (orig_src1) / BITS_PER_UNIT;
|
|
|
0e3697 |
- int align2 = MEM_ALIGN (orig_src2) / BITS_PER_UNIT;
|
|
|
0e3697 |
+ unsigned int align1 = MEM_ALIGN (orig_src1) / BITS_PER_UNIT;
|
|
|
0e3697 |
+ unsigned int align2 = MEM_ALIGN (orig_src2) / BITS_PER_UNIT;
|
|
|
0e3697 |
|
|
|
0e3697 |
/* targetm.slow_unaligned_access -- don't do unaligned stuff. */
|
|
|
0e3697 |
if (targetm.slow_unaligned_access (word_mode, align1)
|
|
|
0e3697 |
@@ -1751,8 +1865,9 @@ expand_strn_compare (rtx operands[], int no_length)
|
|
|
0e3697 |
rtx final_move_label = gen_label_rtx ();
|
|
|
0e3697 |
rtx final_label = gen_label_rtx ();
|
|
|
0e3697 |
rtx begin_compare_label = NULL;
|
|
|
0e3697 |
+ unsigned int required_align = 8;
|
|
|
0e3697 |
|
|
|
0e3697 |
- if (base_align < 8)
|
|
|
0e3697 |
+ if (base_align < required_align)
|
|
|
0e3697 |
{
|
|
|
0e3697 |
/* Generate code that checks distance to 4k boundary for this case. */
|
|
|
0e3697 |
begin_compare_label = gen_label_rtx ();
|
|
|
0e3697 |
@@ -1775,14 +1890,14 @@ expand_strn_compare (rtx operands[], int no_length)
|
|
|
0e3697 |
}
|
|
|
0e3697 |
else
|
|
|
0e3697 |
{
|
|
|
0e3697 |
- align_test = ROUND_UP (align_test, 8);
|
|
|
0e3697 |
- base_align = 8;
|
|
|
0e3697 |
+ align_test = ROUND_UP (align_test, required_align);
|
|
|
0e3697 |
+ base_align = required_align;
|
|
|
0e3697 |
}
|
|
|
0e3697 |
|
|
|
0e3697 |
- if (align1 < 8)
|
|
|
0e3697 |
- expand_strncmp_align_check (strncmp_label, src1, align_test);
|
|
|
0e3697 |
- if (align2 < 8)
|
|
|
0e3697 |
- expand_strncmp_align_check (strncmp_label, src2, align_test);
|
|
|
0e3697 |
+ if (align1 < required_align)
|
|
|
0e3697 |
+ expand_strncmp_align_check (strncmp_label, src1_addr, align_test);
|
|
|
0e3697 |
+ if (align2 < required_align)
|
|
|
0e3697 |
+ expand_strncmp_align_check (strncmp_label, src2_addr, align_test);
|
|
|
0e3697 |
|
|
|
0e3697 |
/* Now generate the following sequence:
|
|
|
0e3697 |
- branch to begin_compare
|
|
|
0e3697 |
@@ -1799,25 +1914,13 @@ expand_strn_compare (rtx operands[], int no_length)
|
|
|
0e3697 |
|
|
|
0e3697 |
emit_label (strncmp_label);
|
|
|
0e3697 |
|
|
|
0e3697 |
- if (!REG_P (XEXP (src1, 0)))
|
|
|
0e3697 |
- {
|
|
|
0e3697 |
- rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
|
|
|
0e3697 |
- src1 = replace_equiv_address (src1, src1_reg);
|
|
|
0e3697 |
- }
|
|
|
0e3697 |
-
|
|
|
0e3697 |
- if (!REG_P (XEXP (src2, 0)))
|
|
|
0e3697 |
- {
|
|
|
0e3697 |
- rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
|
|
|
0e3697 |
- src2 = replace_equiv_address (src2, src2_reg);
|
|
|
0e3697 |
- }
|
|
|
0e3697 |
-
|
|
|
0e3697 |
if (no_length)
|
|
|
0e3697 |
{
|
|
|
0e3697 |
tree fun = builtin_decl_explicit (BUILT_IN_STRCMP);
|
|
|
0e3697 |
emit_library_call_value (XEXP (DECL_RTL (fun), 0),
|
|
|
0e3697 |
target, LCT_NORMAL, GET_MODE (target),
|
|
|
0e3697 |
- force_reg (Pmode, XEXP (src1, 0)), Pmode,
|
|
|
0e3697 |
- force_reg (Pmode, XEXP (src2, 0)), Pmode);
|
|
|
0e3697 |
+ force_reg (Pmode, src1_addr), Pmode,
|
|
|
0e3697 |
+ force_reg (Pmode, src2_addr), Pmode);
|
|
|
0e3697 |
}
|
|
|
0e3697 |
else
|
|
|
0e3697 |
{
|
|
|
0e3697 |
@@ -1830,8 +1933,8 @@ expand_strn_compare (rtx operands[], int no_length)
|
|
|
0e3697 |
tree fun = builtin_decl_explicit (BUILT_IN_STRNCMP);
|
|
|
0e3697 |
emit_library_call_value (XEXP (DECL_RTL (fun), 0),
|
|
|
0e3697 |
target, LCT_NORMAL, GET_MODE (target),
|
|
|
0e3697 |
- force_reg (Pmode, XEXP (src1, 0)), Pmode,
|
|
|
0e3697 |
- force_reg (Pmode, XEXP (src2, 0)), Pmode,
|
|
|
0e3697 |
+ force_reg (Pmode, src1_addr), Pmode,
|
|
|
0e3697 |
+ force_reg (Pmode, src2_addr), Pmode,
|
|
|
0e3697 |
len_rtx, Pmode);
|
|
|
0e3697 |
}
|
|
|
0e3697 |
|
|
|
0e3697 |
@@ -1847,12 +1950,12 @@ expand_strn_compare (rtx operands[], int no_length)
|
|
|
0e3697 |
rtx tmp_reg_src1 = gen_reg_rtx (word_mode);
|
|
|
0e3697 |
rtx tmp_reg_src2 = gen_reg_rtx (word_mode);
|
|
|
0e3697 |
|
|
|
0e3697 |
- /* Generate sequence of ld/ldbrx, cmpb to compare out
|
|
|
0e3697 |
+ /* Generate a sequence of GPR or VEC/VSX instructions to compare out
|
|
|
0e3697 |
to the length specified. */
|
|
|
0e3697 |
unsigned HOST_WIDE_INT bytes_to_compare = compare_length;
|
|
|
0e3697 |
while (bytes_to_compare > 0)
|
|
|
0e3697 |
{
|
|
|
0e3697 |
- /* Compare sequence:
|
|
|
0e3697 |
+ /* GPR compare sequence:
|
|
|
0e3697 |
check each 8B with: ld/ld cmpd bne
|
|
|
0e3697 |
If equal, use rldicr/cmpb to check for zero byte.
|
|
|
0e3697 |
cleanup code at end:
|
|
|
0e3697 |
@@ -1866,13 +1969,10 @@ expand_strn_compare (rtx operands[], int no_length)
|
|
|
0e3697 |
|
|
|
0e3697 |
The last compare can branch around the cleanup code if the
|
|
|
0e3697 |
result is zero because the strings are exactly equal. */
|
|
|
0e3697 |
+
|
|
|
0e3697 |
unsigned int align = compute_current_alignment (base_align, offset);
|
|
|
0e3697 |
- if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
|
|
|
0e3697 |
- load_mode = select_block_compare_mode (offset, bytes_to_compare, align,
|
|
|
0e3697 |
- word_mode_ok);
|
|
|
0e3697 |
- else
|
|
|
0e3697 |
- load_mode = select_block_compare_mode (0, bytes_to_compare, align,
|
|
|
0e3697 |
- word_mode_ok);
|
|
|
0e3697 |
+ load_mode = select_block_compare_mode (offset, bytes_to_compare,
|
|
|
0e3697 |
+ align, word_mode_ok);
|
|
|
0e3697 |
load_mode_size = GET_MODE_SIZE (load_mode);
|
|
|
0e3697 |
if (bytes_to_compare >= load_mode_size)
|
|
|
0e3697 |
cmp_bytes = load_mode_size;
|
|
|
0e3697 |
@@ -1895,25 +1995,10 @@ expand_strn_compare (rtx operands[], int no_length)
|
|
|
0e3697 |
rid of the extra bytes. */
|
|
|
0e3697 |
cmp_bytes = bytes_to_compare;
|
|
|
0e3697 |
|
|
|
0e3697 |
- src1 = adjust_address (orig_src1, load_mode, offset);
|
|
|
0e3697 |
- src2 = adjust_address (orig_src2, load_mode, offset);
|
|
|
0e3697 |
-
|
|
|
0e3697 |
- if (!REG_P (XEXP (src1, 0)))
|
|
|
0e3697 |
- {
|
|
|
0e3697 |
- rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
|
|
|
0e3697 |
- src1 = replace_equiv_address (src1, src1_reg);
|
|
|
0e3697 |
- }
|
|
|
0e3697 |
- set_mem_size (src1, load_mode_size);
|
|
|
0e3697 |
-
|
|
|
0e3697 |
- if (!REG_P (XEXP (src2, 0)))
|
|
|
0e3697 |
- {
|
|
|
0e3697 |
- rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
|
|
|
0e3697 |
- src2 = replace_equiv_address (src2, src2_reg);
|
|
|
0e3697 |
- }
|
|
|
0e3697 |
- set_mem_size (src2, load_mode_size);
|
|
|
0e3697 |
-
|
|
|
0e3697 |
- do_load_for_compare (tmp_reg_src1, src1, load_mode);
|
|
|
0e3697 |
- do_load_for_compare (tmp_reg_src2, src2, load_mode);
|
|
|
0e3697 |
+ rtx addr1 = gen_rtx_PLUS (Pmode, src1_addr, GEN_INT (offset));
|
|
|
0e3697 |
+ do_load_for_compare_from_addr (load_mode, tmp_reg_src1, addr1, orig_src1);
|
|
|
0e3697 |
+ rtx addr2 = gen_rtx_PLUS (Pmode, src2_addr, GEN_INT (offset));
|
|
|
0e3697 |
+ do_load_for_compare_from_addr (load_mode, tmp_reg_src2, addr2, orig_src2);
|
|
|
0e3697 |
|
|
|
0e3697 |
/* We must always left-align the data we read, and
|
|
|
0e3697 |
clear any bytes to the right that are beyond the string.
|
|
|
0e3697 |
@@ -1926,16 +2011,8 @@ expand_strn_compare (rtx operands[], int no_length)
|
|
|
0e3697 |
{
|
|
|
0e3697 |
/* Rotate left first. */
|
|
|
0e3697 |
rtx sh = GEN_INT (BITS_PER_UNIT * (word_mode_size - load_mode_size));
|
|
|
0e3697 |
- if (word_mode == DImode)
|
|
|
0e3697 |
- {
|
|
|
0e3697 |
- emit_insn (gen_rotldi3 (tmp_reg_src1, tmp_reg_src1, sh));
|
|
|
0e3697 |
- emit_insn (gen_rotldi3 (tmp_reg_src2, tmp_reg_src2, sh));
|
|
|
0e3697 |
- }
|
|
|
0e3697 |
- else
|
|
|
0e3697 |
- {
|
|
|
0e3697 |
- emit_insn (gen_rotlsi3 (tmp_reg_src1, tmp_reg_src1, sh));
|
|
|
0e3697 |
- emit_insn (gen_rotlsi3 (tmp_reg_src2, tmp_reg_src2, sh));
|
|
|
0e3697 |
- }
|
|
|
0e3697 |
+ do_rotl3 (tmp_reg_src1, tmp_reg_src1, sh);
|
|
|
0e3697 |
+ do_rotl3 (tmp_reg_src2, tmp_reg_src2, sh);
|
|
|
0e3697 |
}
|
|
|
0e3697 |
|
|
|
0e3697 |
if (cmp_bytes < word_mode_size)
|
|
|
0e3697 |
@@ -1944,16 +2021,8 @@ expand_strn_compare (rtx operands[], int no_length)
|
|
|
0e3697 |
turned into a rldicr instruction. */
|
|
|
0e3697 |
HOST_WIDE_INT mb = BITS_PER_UNIT * (word_mode_size - cmp_bytes);
|
|
|
0e3697 |
rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
|
|
|
0e3697 |
- if (word_mode == DImode)
|
|
|
0e3697 |
- {
|
|
|
0e3697 |
- emit_insn (gen_anddi3_mask (tmp_reg_src1, tmp_reg_src1, mask));
|
|
|
0e3697 |
- emit_insn (gen_anddi3_mask (tmp_reg_src2, tmp_reg_src2, mask));
|
|
|
0e3697 |
- }
|
|
|
0e3697 |
- else
|
|
|
0e3697 |
- {
|
|
|
0e3697 |
- emit_insn (gen_andsi3_mask (tmp_reg_src1, tmp_reg_src1, mask));
|
|
|
0e3697 |
- emit_insn (gen_andsi3_mask (tmp_reg_src2, tmp_reg_src2, mask));
|
|
|
0e3697 |
- }
|
|
|
0e3697 |
+ do_and3 (tmp_reg_src1, tmp_reg_src1, mask);
|
|
|
0e3697 |
+ do_and3 (tmp_reg_src2, tmp_reg_src2, mask);
|
|
|
0e3697 |
}
|
|
|
0e3697 |
|
|
|
0e3697 |
/* Cases to handle. A and B are chunks of the two strings.
|
|
|
0e3697 |
@@ -2010,31 +2079,16 @@ expand_strn_compare (rtx operands[], int no_length)
|
|
|
0e3697 |
rtx lab_ref_fin = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
|
|
|
0e3697 |
rtx condz = gen_reg_rtx (CCmode);
|
|
|
0e3697 |
rtx zero_reg = gen_reg_rtx (word_mode);
|
|
|
0e3697 |
- if (word_mode == SImode)
|
|
|
0e3697 |
- {
|
|
|
0e3697 |
- emit_insn (gen_movsi (zero_reg, GEN_INT (0)));
|
|
|
0e3697 |
- emit_insn (gen_cmpbsi3 (cmpb_zero, tmp_reg_src1, zero_reg));
|
|
|
0e3697 |
- if (cmp_bytes < word_mode_size)
|
|
|
0e3697 |
- {
|
|
|
0e3697 |
- /* Don't want to look at zero bytes past end. */
|
|
|
0e3697 |
- HOST_WIDE_INT mb =
|
|
|
0e3697 |
- BITS_PER_UNIT * (word_mode_size - cmp_bytes);
|
|
|
0e3697 |
- rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
|
|
|
0e3697 |
- emit_insn (gen_andsi3_mask (cmpb_zero, cmpb_zero, mask));
|
|
|
0e3697 |
- }
|
|
|
0e3697 |
- }
|
|
|
0e3697 |
- else
|
|
|
0e3697 |
+ emit_move_insn (zero_reg, GEN_INT (0));
|
|
|
0e3697 |
+ do_cmpb3 (cmpb_zero, tmp_reg_src1, zero_reg);
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+ if (cmp_bytes < word_mode_size)
|
|
|
0e3697 |
{
|
|
|
0e3697 |
- emit_insn (gen_movdi (zero_reg, GEN_INT (0)));
|
|
|
0e3697 |
- emit_insn (gen_cmpbdi3 (cmpb_zero, tmp_reg_src1, zero_reg));
|
|
|
0e3697 |
- if (cmp_bytes < word_mode_size)
|
|
|
0e3697 |
- {
|
|
|
0e3697 |
- /* Don't want to look at zero bytes past end. */
|
|
|
0e3697 |
- HOST_WIDE_INT mb =
|
|
|
0e3697 |
- BITS_PER_UNIT * (word_mode_size - cmp_bytes);
|
|
|
0e3697 |
- rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
|
|
|
0e3697 |
- emit_insn (gen_anddi3_mask (cmpb_zero, cmpb_zero, mask));
|
|
|
0e3697 |
- }
|
|
|
0e3697 |
+ /* Don't want to look at zero bytes past end. */
|
|
|
0e3697 |
+ HOST_WIDE_INT mb =
|
|
|
0e3697 |
+ BITS_PER_UNIT * (word_mode_size - cmp_bytes);
|
|
|
0e3697 |
+ rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
|
|
|
0e3697 |
+ do_and3 (cmpb_zero, cmpb_zero, mask);
|
|
|
0e3697 |
}
|
|
|
0e3697 |
|
|
|
0e3697 |
emit_move_insn (condz, gen_rtx_COMPARE (CCmode, cmpb_zero, zero_reg));
|
|
|
0e3697 |
@@ -2054,22 +2108,10 @@ expand_strn_compare (rtx operands[], int no_length)
|
|
|
0e3697 |
if (equality_compare_rest)
|
|
|
0e3697 |
{
|
|
|
0e3697 |
/* Update pointers past what has been compared already. */
|
|
|
0e3697 |
- src1 = adjust_address (orig_src1, load_mode, offset);
|
|
|
0e3697 |
- src2 = adjust_address (orig_src2, load_mode, offset);
|
|
|
0e3697 |
-
|
|
|
0e3697 |
- if (!REG_P (XEXP (src1, 0)))
|
|
|
0e3697 |
- {
|
|
|
0e3697 |
- rtx src1_reg = copy_addr_to_reg (XEXP (src1, 0));
|
|
|
0e3697 |
- src1 = replace_equiv_address (src1, src1_reg);
|
|
|
0e3697 |
- }
|
|
|
0e3697 |
- set_mem_size (src1, load_mode_size);
|
|
|
0e3697 |
-
|
|
|
0e3697 |
- if (!REG_P (XEXP (src2, 0)))
|
|
|
0e3697 |
- {
|
|
|
0e3697 |
- rtx src2_reg = copy_addr_to_reg (XEXP (src2, 0));
|
|
|
0e3697 |
- src2 = replace_equiv_address (src2, src2_reg);
|
|
|
0e3697 |
- }
|
|
|
0e3697 |
- set_mem_size (src2, load_mode_size);
|
|
|
0e3697 |
+ rtx src1 = force_reg (Pmode,
|
|
|
0e3697 |
+ gen_rtx_PLUS (Pmode, src1_addr, GEN_INT (offset)));
|
|
|
0e3697 |
+ rtx src2 = force_reg (Pmode,
|
|
|
0e3697 |
+ gen_rtx_PLUS (Pmode, src2_addr, GEN_INT (offset)));
|
|
|
0e3697 |
|
|
|
0e3697 |
/* Construct call to strcmp/strncmp to compare the rest of the string. */
|
|
|
0e3697 |
if (no_length)
|
|
|
0e3697 |
@@ -2077,8 +2119,7 @@ expand_strn_compare (rtx operands[], int no_length)
|
|
|
0e3697 |
tree fun = builtin_decl_explicit (BUILT_IN_STRCMP);
|
|
|
0e3697 |
emit_library_call_value (XEXP (DECL_RTL (fun), 0),
|
|
|
0e3697 |
target, LCT_NORMAL, GET_MODE (target),
|
|
|
0e3697 |
- force_reg (Pmode, XEXP (src1, 0)), Pmode,
|
|
|
0e3697 |
- force_reg (Pmode, XEXP (src2, 0)), Pmode);
|
|
|
0e3697 |
+ src1, Pmode, src2, Pmode);
|
|
|
0e3697 |
}
|
|
|
0e3697 |
else
|
|
|
0e3697 |
{
|
|
|
0e3697 |
@@ -2087,9 +2128,7 @@ expand_strn_compare (rtx operands[], int no_length)
|
|
|
0e3697 |
tree fun = builtin_decl_explicit (BUILT_IN_STRNCMP);
|
|
|
0e3697 |
emit_library_call_value (XEXP (DECL_RTL (fun), 0),
|
|
|
0e3697 |
target, LCT_NORMAL, GET_MODE (target),
|
|
|
0e3697 |
- force_reg (Pmode, XEXP (src1, 0)), Pmode,
|
|
|
0e3697 |
- force_reg (Pmode, XEXP (src2, 0)), Pmode,
|
|
|
0e3697 |
- len_rtx, Pmode);
|
|
|
0e3697 |
+ src1, Pmode, src2, Pmode, len_rtx, Pmode);
|
|
|
0e3697 |
}
|
|
|
0e3697 |
|
|
|
0e3697 |
rtx fin_ref = gen_rtx_LABEL_REF (VOIDmode, final_label);
|
|
|
0e3697 |
@@ -2102,63 +2141,7 @@ expand_strn_compare (rtx operands[], int no_length)
|
|
|
0e3697 |
if (cleanup_label)
|
|
|
0e3697 |
emit_label (cleanup_label);
|
|
|
0e3697 |
|
|
|
0e3697 |
- /* Generate the final sequence that identifies the differing
|
|
|
0e3697 |
- byte and generates the final result, taking into account
|
|
|
0e3697 |
- zero bytes:
|
|
|
0e3697 |
-
|
|
|
0e3697 |
- cmpb cmpb_result1, src1, src2
|
|
|
0e3697 |
- cmpb cmpb_result2, src1, zero
|
|
|
0e3697 |
- orc cmpb_result1, cmp_result1, cmpb_result2
|
|
|
0e3697 |
- cntlzd get bit of first zero/diff byte
|
|
|
0e3697 |
- addi convert for rldcl use
|
|
|
0e3697 |
- rldcl rldcl extract diff/zero byte
|
|
|
0e3697 |
- subf subtract for final result
|
|
|
0e3697 |
- */
|
|
|
0e3697 |
-
|
|
|
0e3697 |
- rtx cmpb_diff = gen_reg_rtx (word_mode);
|
|
|
0e3697 |
- rtx cmpb_zero = gen_reg_rtx (word_mode);
|
|
|
0e3697 |
- rtx rot_amt = gen_reg_rtx (word_mode);
|
|
|
0e3697 |
- rtx zero_reg = gen_reg_rtx (word_mode);
|
|
|
0e3697 |
-
|
|
|
0e3697 |
- rtx rot1_1 = gen_reg_rtx (word_mode);
|
|
|
0e3697 |
- rtx rot1_2 = gen_reg_rtx (word_mode);
|
|
|
0e3697 |
- rtx rot2_1 = gen_reg_rtx (word_mode);
|
|
|
0e3697 |
- rtx rot2_2 = gen_reg_rtx (word_mode);
|
|
|
0e3697 |
-
|
|
|
0e3697 |
- if (word_mode == SImode)
|
|
|
0e3697 |
- {
|
|
|
0e3697 |
- emit_insn (gen_cmpbsi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2));
|
|
|
0e3697 |
- emit_insn (gen_movsi (zero_reg, GEN_INT (0)));
|
|
|
0e3697 |
- emit_insn (gen_cmpbsi3 (cmpb_zero, tmp_reg_src1, zero_reg));
|
|
|
0e3697 |
- emit_insn (gen_one_cmplsi2 (cmpb_diff,cmpb_diff));
|
|
|
0e3697 |
- emit_insn (gen_iorsi3 (cmpb_diff, cmpb_diff, cmpb_zero));
|
|
|
0e3697 |
- emit_insn (gen_clzsi2 (rot_amt, cmpb_diff));
|
|
|
0e3697 |
- emit_insn (gen_addsi3 (rot_amt, rot_amt, GEN_INT (8)));
|
|
|
0e3697 |
- emit_insn (gen_rotlsi3 (rot1_1, tmp_reg_src1,
|
|
|
0e3697 |
- gen_lowpart (SImode, rot_amt)));
|
|
|
0e3697 |
- emit_insn (gen_andsi3_mask (rot1_2, rot1_1, GEN_INT (0xff)));
|
|
|
0e3697 |
- emit_insn (gen_rotlsi3 (rot2_1, tmp_reg_src2,
|
|
|
0e3697 |
- gen_lowpart (SImode, rot_amt)));
|
|
|
0e3697 |
- emit_insn (gen_andsi3_mask (rot2_2, rot2_1, GEN_INT (0xff)));
|
|
|
0e3697 |
- emit_insn (gen_subsi3 (result_reg, rot1_2, rot2_2));
|
|
|
0e3697 |
- }
|
|
|
0e3697 |
- else
|
|
|
0e3697 |
- {
|
|
|
0e3697 |
- emit_insn (gen_cmpbdi3 (cmpb_diff, tmp_reg_src1, tmp_reg_src2));
|
|
|
0e3697 |
- emit_insn (gen_movdi (zero_reg, GEN_INT (0)));
|
|
|
0e3697 |
- emit_insn (gen_cmpbdi3 (cmpb_zero, tmp_reg_src1, zero_reg));
|
|
|
0e3697 |
- emit_insn (gen_one_cmpldi2 (cmpb_diff,cmpb_diff));
|
|
|
0e3697 |
- emit_insn (gen_iordi3 (cmpb_diff, cmpb_diff, cmpb_zero));
|
|
|
0e3697 |
- emit_insn (gen_clzdi2 (rot_amt, cmpb_diff));
|
|
|
0e3697 |
- emit_insn (gen_adddi3 (rot_amt, rot_amt, GEN_INT (8)));
|
|
|
0e3697 |
- emit_insn (gen_rotldi3 (rot1_1, tmp_reg_src1,
|
|
|
0e3697 |
- gen_lowpart (SImode, rot_amt)));
|
|
|
0e3697 |
- emit_insn (gen_anddi3_mask (rot1_2, rot1_1, GEN_INT (0xff)));
|
|
|
0e3697 |
- emit_insn (gen_rotldi3 (rot2_1, tmp_reg_src2,
|
|
|
0e3697 |
- gen_lowpart (SImode, rot_amt)));
|
|
|
0e3697 |
- emit_insn (gen_anddi3_mask (rot2_2, rot2_1, GEN_INT (0xff)));
|
|
|
0e3697 |
- emit_insn (gen_subdi3 (result_reg, rot1_2, rot2_2));
|
|
|
0e3697 |
- }
|
|
|
0e3697 |
+ emit_final_str_compare_gpr (tmp_reg_src1, tmp_reg_src2, result_reg);
|
|
|
0e3697 |
|
|
|
0e3697 |
emit_label (final_move_label);
|
|
|
0e3697 |
emit_insn (gen_movsi (target,
|
|
|
0e3697 |
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
|
|
|
0e3697 |
index 0fc77aa18b0..e6921e96a3d 100644
|
|
|
0e3697 |
--- a/gcc/config/rs6000/vsx.md
|
|
|
0e3697 |
+++ b/gcc/config/rs6000/vsx.md
|
|
|
0e3697 |
@@ -1210,7 +1210,7 @@
|
|
|
0e3697 |
;; VSX store VSX load VSX move VSX->GPR GPR->VSX LQ (GPR)
|
|
|
0e3697 |
;; STQ (GPR) GPR load GPR store GPR move XXSPLTIB VSPLTISW
|
|
|
0e3697 |
;; VSX 0/-1 GPR 0/-1 VMX const GPR const LVX (VMX) STVX (VMX)
|
|
|
0e3697 |
-(define_insn "*vsx_mov<mode>_64bit"
|
|
|
0e3697 |
+(define_insn "vsx_mov<mode>_64bit"
|
|
|
0e3697 |
[(set (match_operand:VSX_M 0 "nonimmediate_operand"
|
|
|
0e3697 |
"=ZwO, <VSa>, <VSa>, r, we, ?wQ,
|
|
|
0e3697 |
?&r, ??r, ??Y, , wo, v,
|