|
|
0e3697 |
commit c7a833caa029b84ad579c3fabe006a80f718d7e1
|
|
|
0e3697 |
Author: acsawdey <acsawdey@138bc75d-0d04-0410-961f-82ee72b054a4>
|
|
|
0e3697 |
Date: Thu Aug 2 18:11:54 2018 +0000
|
|
|
0e3697 |
|
|
|
0e3697 |
2018-07-31 Aaron Sawdey <acsawdey@linux.ibm.com>
|
|
|
0e3697 |
|
|
|
0e3697 |
* config/rs6000/rs6000-string.c (select_block_compare_mode): Move test
|
|
|
0e3697 |
for word_mode_ok here instead of passing as argument.
|
|
|
0e3697 |
(expand_block_compare): Change select_block_compare_mode() call.
|
|
|
0e3697 |
(expand_strncmp_gpr_sequence): New function.
|
|
|
0e3697 |
(expand_strn_compare): Make use of expand_strncmp_gpr_sequence.
|
|
|
0e3697 |
|
|
|
0e3697 |
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@263273 138bc75d-0d04-0410-961f-82ee72b054a4
|
|
|
0e3697 |
|
|
|
0e3697 |
diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c
|
|
|
0e3697 |
index f9dd54eb639..451e9ed33da 100644
|
|
|
0e3697 |
--- a/gcc/config/rs6000/rs6000-string.c
|
|
|
0e3697 |
+++ b/gcc/config/rs6000/rs6000-string.c
|
|
|
0e3697 |
@@ -238,13 +238,11 @@ do_load_for_compare (rtx reg, rtx mem, machine_mode mode)
|
|
|
0e3697 |
|
|
|
0e3697 |
OFFSET is the current read offset from the beginning of the block.
|
|
|
0e3697 |
BYTES is the number of bytes remaining to be read.
|
|
|
0e3697 |
- ALIGN is the minimum alignment of the memory blocks being compared in bytes.
|
|
|
0e3697 |
- WORD_MODE_OK indicates using WORD_MODE is allowed, else SImode is
|
|
|
0e3697 |
- the largest allowable mode. */
|
|
|
0e3697 |
+ ALIGN is the minimum alignment of the memory blocks being compared in bytes. */
|
|
|
0e3697 |
static machine_mode
|
|
|
0e3697 |
select_block_compare_mode (unsigned HOST_WIDE_INT offset,
|
|
|
0e3697 |
unsigned HOST_WIDE_INT bytes,
|
|
|
0e3697 |
- unsigned HOST_WIDE_INT align, bool word_mode_ok)
|
|
|
0e3697 |
+ unsigned HOST_WIDE_INT align)
|
|
|
0e3697 |
{
|
|
|
0e3697 |
/* First see if we can do a whole load unit
|
|
|
0e3697 |
as that will be more efficient than a larger load + shift. */
|
|
|
0e3697 |
@@ -257,6 +255,11 @@ select_block_compare_mode (unsigned HOST_WIDE_INT offset,
|
|
|
0e3697 |
/* The most we can read without potential page crossing. */
|
|
|
0e3697 |
unsigned HOST_WIDE_INT maxread = ROUND_UP (bytes, align);
|
|
|
0e3697 |
|
|
|
0e3697 |
+ /* If we have an LE target without ldbrx and word_mode is DImode,
|
|
|
0e3697 |
+ then we must avoid using word_mode. */
|
|
|
0e3697 |
+ int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX
|
|
|
0e3697 |
+ && word_mode == DImode);
|
|
|
0e3697 |
+
|
|
|
0e3697 |
if (word_mode_ok && bytes >= UNITS_PER_WORD)
|
|
|
0e3697 |
return word_mode;
|
|
|
0e3697 |
else if (bytes == GET_MODE_SIZE (SImode))
|
|
|
0e3697 |
@@ -1382,16 +1385,11 @@ expand_block_compare (rtx operands[])
|
|
|
0e3697 |
else
|
|
|
0e3697 |
cond = gen_reg_rtx (CCmode);
|
|
|
0e3697 |
|
|
|
0e3697 |
- /* If we have an LE target without ldbrx and word_mode is DImode,
|
|
|
0e3697 |
- then we must avoid using word_mode. */
|
|
|
0e3697 |
- int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX
|
|
|
0e3697 |
- && word_mode == DImode);
|
|
|
0e3697 |
-
|
|
|
0e3697 |
/* Strategy phase. How many ops will this take and should we expand it? */
|
|
|
0e3697 |
|
|
|
0e3697 |
unsigned HOST_WIDE_INT offset = 0;
|
|
|
0e3697 |
machine_mode load_mode =
|
|
|
0e3697 |
- select_block_compare_mode (offset, bytes, base_align, word_mode_ok);
|
|
|
0e3697 |
+ select_block_compare_mode (offset, bytes, base_align);
|
|
|
0e3697 |
unsigned int load_mode_size = GET_MODE_SIZE (load_mode);
|
|
|
0e3697 |
|
|
|
0e3697 |
/* We don't want to generate too much code. The loop code can take
|
|
|
0e3697 |
@@ -1445,8 +1443,7 @@ expand_block_compare (rtx operands[])
|
|
|
0e3697 |
while (bytes > 0)
|
|
|
0e3697 |
{
|
|
|
0e3697 |
unsigned int align = compute_current_alignment (base_align, offset);
|
|
|
0e3697 |
- load_mode = select_block_compare_mode (offset, bytes,
|
|
|
0e3697 |
- align, word_mode_ok);
|
|
|
0e3697 |
+ load_mode = select_block_compare_mode (offset, bytes, align);
|
|
|
0e3697 |
load_mode_size = GET_MODE_SIZE (load_mode);
|
|
|
0e3697 |
if (bytes >= load_mode_size)
|
|
|
0e3697 |
cmp_bytes = load_mode_size;
|
|
|
0e3697 |
@@ -1698,6 +1695,189 @@ expand_strncmp_align_check (rtx strncmp_label, rtx src_addr, HOST_WIDE_INT bytes
|
|
|
0e3697 |
LABEL_NUSES (strncmp_label) += 1;
|
|
|
0e3697 |
}
|
|
|
0e3697 |
|
|
|
0e3697 |
+/* Generate the sequence of compares for strcmp/strncmp using gpr instructions.
|
|
|
0e3697 |
+ BYTES_TO_COMPARE is the number of bytes to be compared.
|
|
|
0e3697 |
+ BASE_ALIGN is the smaller of the alignment of the two strings.
|
|
|
0e3697 |
+ ORIG_SRC1 is the unmodified rtx for the first string.
|
|
|
0e3697 |
+ ORIG_SRC2 is the unmodified rtx for the second string.
|
|
|
0e3697 |
+ TMP_REG_SRC1 is the register for loading the first string.
|
|
|
0e3697 |
+ TMP_REG_SRC2 is the register for loading the second string.
|
|
|
0e3697 |
+ RESULT_REG is the rtx for the result register.
|
|
|
0e3697 |
+ EQUALITY_COMPARE_REST is a flag to indicate we need to make a cleanup call
|
|
|
0e3697 |
+ to strcmp/strncmp if we have equality at the end of the inline comparison.
|
|
|
0e3697 |
+ CLEANUP_LABEL is rtx for a label we generate if we need code to clean up
|
|
|
0e3697 |
+ and generate the final comparison result.
|
|
|
0e3697 |
+ FINAL_MOVE_LABEL is rtx for a label we can branch to when we can just
|
|
|
0e3697 |
+ set the final result. */
|
|
|
0e3697 |
+static void
|
|
|
0e3697 |
+expand_strncmp_gpr_sequence(unsigned HOST_WIDE_INT bytes_to_compare,
|
|
|
0e3697 |
+ unsigned int base_align,
|
|
|
0e3697 |
+ rtx orig_src1, rtx orig_src2,
|
|
|
0e3697 |
+ rtx tmp_reg_src1, rtx tmp_reg_src2, rtx result_reg,
|
|
|
0e3697 |
+ bool equality_compare_rest, rtx &cleanup_label,
|
|
|
0e3697 |
+ rtx final_move_label)
|
|
|
0e3697 |
+{
|
|
|
0e3697 |
+ unsigned int word_mode_size = GET_MODE_SIZE (word_mode);
|
|
|
0e3697 |
+ machine_mode load_mode;
|
|
|
0e3697 |
+ unsigned int load_mode_size;
|
|
|
0e3697 |
+ unsigned HOST_WIDE_INT cmp_bytes = 0;
|
|
|
0e3697 |
+ unsigned HOST_WIDE_INT offset = 0;
|
|
|
0e3697 |
+ rtx src1_addr = force_reg (Pmode, XEXP (orig_src1, 0));
|
|
|
0e3697 |
+ rtx src2_addr = force_reg (Pmode, XEXP (orig_src2, 0));
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+ while (bytes_to_compare > 0)
|
|
|
0e3697 |
+ {
|
|
|
0e3697 |
+ /* GPR compare sequence:
|
|
|
0e3697 |
+ check each 8B with: ld/ld cmpd bne
|
|
|
0e3697 |
+ If equal, use rldicr/cmpb to check for zero byte.
|
|
|
0e3697 |
+ cleanup code at end:
|
|
|
0e3697 |
+ cmpb get byte that differs
|
|
|
0e3697 |
+ cmpb look for zero byte
|
|
|
0e3697 |
+ orc combine
|
|
|
0e3697 |
+ cntlzd get bit of first zero/diff byte
|
|
|
0e3697 |
+ subfic convert for rldcl use
|
|
|
0e3697 |
+ rldcl rldcl extract diff/zero byte
|
|
|
0e3697 |
+ subf subtract for final result
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+ The last compare can branch around the cleanup code if the
|
|
|
0e3697 |
+ result is zero because the strings are exactly equal. */
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+ unsigned int align = compute_current_alignment (base_align, offset);
|
|
|
0e3697 |
+ load_mode = select_block_compare_mode (offset, bytes_to_compare, align);
|
|
|
0e3697 |
+ load_mode_size = GET_MODE_SIZE (load_mode);
|
|
|
0e3697 |
+ if (bytes_to_compare >= load_mode_size)
|
|
|
0e3697 |
+ cmp_bytes = load_mode_size;
|
|
|
0e3697 |
+ else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
|
|
|
0e3697 |
+ {
|
|
|
0e3697 |
+ /* Move this load back so it doesn't go past the end.
|
|
|
0e3697 |
+ P8/P9 can do this efficiently. */
|
|
|
0e3697 |
+ unsigned int extra_bytes = load_mode_size - bytes_to_compare;
|
|
|
0e3697 |
+ cmp_bytes = bytes_to_compare;
|
|
|
0e3697 |
+ if (extra_bytes < offset)
|
|
|
0e3697 |
+ {
|
|
|
0e3697 |
+ offset -= extra_bytes;
|
|
|
0e3697 |
+ cmp_bytes = load_mode_size;
|
|
|
0e3697 |
+ bytes_to_compare = cmp_bytes;
|
|
|
0e3697 |
+ }
|
|
|
0e3697 |
+ }
|
|
|
0e3697 |
+ else
|
|
|
0e3697 |
+ /* P7 and earlier can't do the overlapping load trick fast,
|
|
|
0e3697 |
+ so this forces a non-overlapping load and a shift to get
|
|
|
0e3697 |
+ rid of the extra bytes. */
|
|
|
0e3697 |
+ cmp_bytes = bytes_to_compare;
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+ rtx addr1 = gen_rtx_PLUS (Pmode, src1_addr, GEN_INT (offset));
|
|
|
0e3697 |
+ do_load_for_compare_from_addr (load_mode, tmp_reg_src1, addr1, orig_src1);
|
|
|
0e3697 |
+ rtx addr2 = gen_rtx_PLUS (Pmode, src2_addr, GEN_INT (offset));
|
|
|
0e3697 |
+ do_load_for_compare_from_addr (load_mode, tmp_reg_src2, addr2, orig_src2);
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+ /* We must always left-align the data we read, and
|
|
|
0e3697 |
+ clear any bytes to the right that are beyond the string.
|
|
|
0e3697 |
+ Otherwise the cmpb sequence won't produce the correct
|
|
|
0e3697 |
+ results. The beginning of the compare will be done
|
|
|
0e3697 |
+ with word_mode so will not have any extra shifts or
|
|
|
0e3697 |
+ clear rights. */
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+ if (load_mode_size < word_mode_size)
|
|
|
0e3697 |
+ {
|
|
|
0e3697 |
+ /* Rotate left first. */
|
|
|
0e3697 |
+ rtx sh = GEN_INT (BITS_PER_UNIT * (word_mode_size - load_mode_size));
|
|
|
0e3697 |
+ do_rotl3 (tmp_reg_src1, tmp_reg_src1, sh);
|
|
|
0e3697 |
+ do_rotl3 (tmp_reg_src2, tmp_reg_src2, sh);
|
|
|
0e3697 |
+ }
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+ if (cmp_bytes < word_mode_size)
|
|
|
0e3697 |
+ {
|
|
|
0e3697 |
+ /* Now clear right. This plus the rotate can be
|
|
|
0e3697 |
+ turned into a rldicr instruction. */
|
|
|
0e3697 |
+ HOST_WIDE_INT mb = BITS_PER_UNIT * (word_mode_size - cmp_bytes);
|
|
|
0e3697 |
+ rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
|
|
|
0e3697 |
+ do_and3 (tmp_reg_src1, tmp_reg_src1, mask);
|
|
|
0e3697 |
+ do_and3 (tmp_reg_src2, tmp_reg_src2, mask);
|
|
|
0e3697 |
+ }
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+ /* Cases to handle. A and B are chunks of the two strings.
|
|
|
0e3697 |
+ 1: Not end of comparison:
|
|
|
0e3697 |
+ A != B: branch to cleanup code to compute result.
|
|
|
0e3697 |
+ A == B: check for 0 byte, next block if not found.
|
|
|
0e3697 |
+ 2: End of the inline comparison:
|
|
|
0e3697 |
+ A != B: branch to cleanup code to compute result.
|
|
|
0e3697 |
+ A == B: check for 0 byte, call strcmp/strncmp
|
|
|
0e3697 |
+ 3: compared requested N bytes:
|
|
|
0e3697 |
+ A == B: branch to result 0.
|
|
|
0e3697 |
+ A != B: cleanup code to compute result. */
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+ unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes;
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+ rtx dst_label;
|
|
|
0e3697 |
+ if (remain > 0 || equality_compare_rest)
|
|
|
0e3697 |
+ {
|
|
|
0e3697 |
+ /* Branch to cleanup code, otherwise fall through to do
|
|
|
0e3697 |
+ more compares. */
|
|
|
0e3697 |
+ if (!cleanup_label)
|
|
|
0e3697 |
+ cleanup_label = gen_label_rtx ();
|
|
|
0e3697 |
+ dst_label = cleanup_label;
|
|
|
0e3697 |
+ }
|
|
|
0e3697 |
+ else
|
|
|
0e3697 |
+ /* Branch to end and produce result of 0. */
|
|
|
0e3697 |
+ dst_label = final_move_label;
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+ rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label);
|
|
|
0e3697 |
+ rtx cond = gen_reg_rtx (CCmode);
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+ /* Always produce the 0 result, it is needed if
|
|
|
0e3697 |
+ cmpb finds a 0 byte in this chunk. */
|
|
|
0e3697 |
+ rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
|
|
|
0e3697 |
+ rs6000_emit_dot_insn (result_reg, tmp, 1, cond);
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+ rtx cmp_rtx;
|
|
|
0e3697 |
+ if (remain == 0 && !equality_compare_rest)
|
|
|
0e3697 |
+ cmp_rtx = gen_rtx_EQ (VOIDmode, cond, const0_rtx);
|
|
|
0e3697 |
+ else
|
|
|
0e3697 |
+ cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+ rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
|
|
|
0e3697 |
+ lab_ref, pc_rtx);
|
|
|
0e3697 |
+ rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
|
|
|
0e3697 |
+ JUMP_LABEL (j) = dst_label;
|
|
|
0e3697 |
+ LABEL_NUSES (dst_label) += 1;
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+ if (remain > 0 || equality_compare_rest)
|
|
|
0e3697 |
+ {
|
|
|
0e3697 |
+ /* Generate a cmpb to test for a 0 byte and branch
|
|
|
0e3697 |
+ to final result if found. */
|
|
|
0e3697 |
+ rtx cmpb_zero = gen_reg_rtx (word_mode);
|
|
|
0e3697 |
+ rtx lab_ref_fin = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
|
|
|
0e3697 |
+ rtx condz = gen_reg_rtx (CCmode);
|
|
|
0e3697 |
+ rtx zero_reg = gen_reg_rtx (word_mode);
|
|
|
0e3697 |
+ emit_move_insn (zero_reg, GEN_INT (0));
|
|
|
0e3697 |
+ do_cmpb3 (cmpb_zero, tmp_reg_src1, zero_reg);
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+ if (cmp_bytes < word_mode_size)
|
|
|
0e3697 |
+ {
|
|
|
0e3697 |
+ /* Don't want to look at zero bytes past end. */
|
|
|
0e3697 |
+ HOST_WIDE_INT mb =
|
|
|
0e3697 |
+ BITS_PER_UNIT * (word_mode_size - cmp_bytes);
|
|
|
0e3697 |
+ rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
|
|
|
0e3697 |
+ do_and3 (cmpb_zero, cmpb_zero, mask);
|
|
|
0e3697 |
+ }
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+ emit_move_insn (condz, gen_rtx_COMPARE (CCmode, cmpb_zero, zero_reg));
|
|
|
0e3697 |
+ rtx cmpnz_rtx = gen_rtx_NE (VOIDmode, condz, const0_rtx);
|
|
|
0e3697 |
+ rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmpnz_rtx,
|
|
|
0e3697 |
+ lab_ref_fin, pc_rtx);
|
|
|
0e3697 |
+ rtx j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
|
|
|
0e3697 |
+ JUMP_LABEL (j2) = final_move_label;
|
|
|
0e3697 |
+ LABEL_NUSES (final_move_label) += 1;
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+ }
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+ offset += cmp_bytes;
|
|
|
0e3697 |
+ bytes_to_compare -= cmp_bytes;
|
|
|
0e3697 |
+ }
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+}
|
|
|
0e3697 |
+
|
|
|
0e3697 |
/* Generate the final sequence that identifies the differing
|
|
|
0e3697 |
byte and generates the final result, taking into account
|
|
|
0e3697 |
zero bytes:
|
|
|
0e3697 |
@@ -1797,7 +1977,7 @@ expand_strn_compare (rtx operands[], int no_length)
|
|
|
0e3697 |
bytes_rtx = operands[3];
|
|
|
0e3697 |
align_rtx = operands[4];
|
|
|
0e3697 |
}
|
|
|
0e3697 |
- unsigned HOST_WIDE_INT cmp_bytes = 0;
|
|
|
0e3697 |
+
|
|
|
0e3697 |
rtx src1_addr = force_reg (Pmode, XEXP (orig_src1, 0));
|
|
|
0e3697 |
rtx src2_addr = force_reg (Pmode, XEXP (orig_src2, 0));
|
|
|
0e3697 |
|
|
|
0e3697 |
@@ -1822,11 +2002,6 @@ expand_strn_compare (rtx operands[], int no_length)
|
|
|
0e3697 |
|
|
|
0e3697 |
gcc_assert (GET_MODE (target) == SImode);
|
|
|
0e3697 |
|
|
|
0e3697 |
- /* If we have an LE target without ldbrx and word_mode is DImode,
|
|
|
0e3697 |
- then we must avoid using word_mode. */
|
|
|
0e3697 |
- int word_mode_ok = !(!BYTES_BIG_ENDIAN && !TARGET_LDBRX
|
|
|
0e3697 |
- && word_mode == DImode);
|
|
|
0e3697 |
-
|
|
|
0e3697 |
unsigned int word_mode_size = GET_MODE_SIZE (word_mode);
|
|
|
0e3697 |
|
|
|
0e3697 |
unsigned HOST_WIDE_INT offset = 0;
|
|
|
0e3697 |
@@ -1839,7 +2014,7 @@ expand_strn_compare (rtx operands[], int no_length)
|
|
|
0e3697 |
bytes = UINTVAL (bytes_rtx);
|
|
|
0e3697 |
|
|
|
0e3697 |
machine_mode load_mode =
|
|
|
0e3697 |
- select_block_compare_mode (offset, bytes, base_align, word_mode_ok);
|
|
|
0e3697 |
+ select_block_compare_mode (0, bytes, base_align);
|
|
|
0e3697 |
unsigned int load_mode_size = GET_MODE_SIZE (load_mode);
|
|
|
0e3697 |
compare_length = rs6000_string_compare_inline_limit * load_mode_size;
|
|
|
0e3697 |
|
|
|
0e3697 |
@@ -1867,6 +2042,8 @@ expand_strn_compare (rtx operands[], int no_length)
|
|
|
0e3697 |
rtx begin_compare_label = NULL;
|
|
|
0e3697 |
unsigned int required_align = 8;
|
|
|
0e3697 |
|
|
|
0e3697 |
+ required_align = 8;
|
|
|
0e3697 |
+
|
|
|
0e3697 |
if (base_align < required_align)
|
|
|
0e3697 |
{
|
|
|
0e3697 |
/* Generate code that checks distance to 4k boundary for this case. */
|
|
|
0e3697 |
@@ -1952,159 +2129,15 @@ expand_strn_compare (rtx operands[], int no_length)
|
|
|
0e3697 |
|
|
|
0e3697 |
/* Generate a sequence of GPR or VEC/VSX instructions to compare out
|
|
|
0e3697 |
to the length specified. */
|
|
|
0e3697 |
- unsigned HOST_WIDE_INT bytes_to_compare = compare_length;
|
|
|
0e3697 |
- while (bytes_to_compare > 0)
|
|
|
0e3697 |
- {
|
|
|
0e3697 |
- /* GPR compare sequence:
|
|
|
0e3697 |
- check each 8B with: ld/ld cmpd bne
|
|
|
0e3697 |
- If equal, use rldicr/cmpb to check for zero byte.
|
|
|
0e3697 |
- cleanup code at end:
|
|
|
0e3697 |
- cmpb get byte that differs
|
|
|
0e3697 |
- cmpb look for zero byte
|
|
|
0e3697 |
- orc combine
|
|
|
0e3697 |
- cntlzd get bit of first zero/diff byte
|
|
|
0e3697 |
- subfic convert for rldcl use
|
|
|
0e3697 |
- rldcl rldcl extract diff/zero byte
|
|
|
0e3697 |
- subf subtract for final result
|
|
|
0e3697 |
-
|
|
|
0e3697 |
- The last compare can branch around the cleanup code if the
|
|
|
0e3697 |
- result is zero because the strings are exactly equal. */
|
|
|
0e3697 |
-
|
|
|
0e3697 |
- unsigned int align = compute_current_alignment (base_align, offset);
|
|
|
0e3697 |
- load_mode = select_block_compare_mode (offset, bytes_to_compare,
|
|
|
0e3697 |
- align, word_mode_ok);
|
|
|
0e3697 |
- load_mode_size = GET_MODE_SIZE (load_mode);
|
|
|
0e3697 |
- if (bytes_to_compare >= load_mode_size)
|
|
|
0e3697 |
- cmp_bytes = load_mode_size;
|
|
|
0e3697 |
- else if (TARGET_EFFICIENT_OVERLAPPING_UNALIGNED)
|
|
|
0e3697 |
- {
|
|
|
0e3697 |
- /* Move this load back so it doesn't go past the end.
|
|
|
0e3697 |
- P8/P9 can do this efficiently. */
|
|
|
0e3697 |
- unsigned int extra_bytes = load_mode_size - bytes_to_compare;
|
|
|
0e3697 |
- cmp_bytes = bytes_to_compare;
|
|
|
0e3697 |
- if (extra_bytes < offset)
|
|
|
0e3697 |
- {
|
|
|
0e3697 |
- offset -= extra_bytes;
|
|
|
0e3697 |
- cmp_bytes = load_mode_size;
|
|
|
0e3697 |
- bytes_to_compare = cmp_bytes;
|
|
|
0e3697 |
- }
|
|
|
0e3697 |
- }
|
|
|
0e3697 |
- else
|
|
|
0e3697 |
- /* P7 and earlier can't do the overlapping load trick fast,
|
|
|
0e3697 |
- so this forces a non-overlapping load and a shift to get
|
|
|
0e3697 |
- rid of the extra bytes. */
|
|
|
0e3697 |
- cmp_bytes = bytes_to_compare;
|
|
|
0e3697 |
-
|
|
|
0e3697 |
- rtx addr1 = gen_rtx_PLUS (Pmode, src1_addr, GEN_INT (offset));
|
|
|
0e3697 |
- do_load_for_compare_from_addr (load_mode, tmp_reg_src1, addr1, orig_src1);
|
|
|
0e3697 |
- rtx addr2 = gen_rtx_PLUS (Pmode, src2_addr, GEN_INT (offset));
|
|
|
0e3697 |
- do_load_for_compare_from_addr (load_mode, tmp_reg_src2, addr2, orig_src2);
|
|
|
0e3697 |
-
|
|
|
0e3697 |
- /* We must always left-align the data we read, and
|
|
|
0e3697 |
- clear any bytes to the right that are beyond the string.
|
|
|
0e3697 |
- Otherwise the cmpb sequence won't produce the correct
|
|
|
0e3697 |
- results. The beginning of the compare will be done
|
|
|
0e3697 |
- with word_mode so will not have any extra shifts or
|
|
|
0e3697 |
- clear rights. */
|
|
|
0e3697 |
-
|
|
|
0e3697 |
- if (load_mode_size < word_mode_size)
|
|
|
0e3697 |
- {
|
|
|
0e3697 |
- /* Rotate left first. */
|
|
|
0e3697 |
- rtx sh = GEN_INT (BITS_PER_UNIT * (word_mode_size - load_mode_size));
|
|
|
0e3697 |
- do_rotl3 (tmp_reg_src1, tmp_reg_src1, sh);
|
|
|
0e3697 |
- do_rotl3 (tmp_reg_src2, tmp_reg_src2, sh);
|
|
|
0e3697 |
- }
|
|
|
0e3697 |
-
|
|
|
0e3697 |
- if (cmp_bytes < word_mode_size)
|
|
|
0e3697 |
- {
|
|
|
0e3697 |
- /* Now clear right. This plus the rotate can be
|
|
|
0e3697 |
- turned into a rldicr instruction. */
|
|
|
0e3697 |
- HOST_WIDE_INT mb = BITS_PER_UNIT * (word_mode_size - cmp_bytes);
|
|
|
0e3697 |
- rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
|
|
|
0e3697 |
- do_and3 (tmp_reg_src1, tmp_reg_src1, mask);
|
|
|
0e3697 |
- do_and3 (tmp_reg_src2, tmp_reg_src2, mask);
|
|
|
0e3697 |
- }
|
|
|
0e3697 |
-
|
|
|
0e3697 |
- /* Cases to handle. A and B are chunks of the two strings.
|
|
|
0e3697 |
- 1: Not end of comparison:
|
|
|
0e3697 |
- A != B: branch to cleanup code to compute result.
|
|
|
0e3697 |
- A == B: check for 0 byte, next block if not found.
|
|
|
0e3697 |
- 2: End of the inline comparison:
|
|
|
0e3697 |
- A != B: branch to cleanup code to compute result.
|
|
|
0e3697 |
- A == B: check for 0 byte, call strcmp/strncmp
|
|
|
0e3697 |
- 3: compared requested N bytes:
|
|
|
0e3697 |
- A == B: branch to result 0.
|
|
|
0e3697 |
- A != B: cleanup code to compute result. */
|
|
|
0e3697 |
-
|
|
|
0e3697 |
- unsigned HOST_WIDE_INT remain = bytes_to_compare - cmp_bytes;
|
|
|
0e3697 |
-
|
|
|
0e3697 |
- rtx dst_label;
|
|
|
0e3697 |
- if (remain > 0 || equality_compare_rest)
|
|
|
0e3697 |
- {
|
|
|
0e3697 |
- /* Branch to cleanup code, otherwise fall through to do
|
|
|
0e3697 |
- more compares. */
|
|
|
0e3697 |
- if (!cleanup_label)
|
|
|
0e3697 |
- cleanup_label = gen_label_rtx ();
|
|
|
0e3697 |
- dst_label = cleanup_label;
|
|
|
0e3697 |
- }
|
|
|
0e3697 |
- else
|
|
|
0e3697 |
- /* Branch to end and produce result of 0. */
|
|
|
0e3697 |
- dst_label = final_move_label;
|
|
|
0e3697 |
-
|
|
|
0e3697 |
- rtx lab_ref = gen_rtx_LABEL_REF (VOIDmode, dst_label);
|
|
|
0e3697 |
- rtx cond = gen_reg_rtx (CCmode);
|
|
|
0e3697 |
-
|
|
|
0e3697 |
- /* Always produce the 0 result, it is needed if
|
|
|
0e3697 |
- cmpb finds a 0 byte in this chunk. */
|
|
|
0e3697 |
- rtx tmp = gen_rtx_MINUS (word_mode, tmp_reg_src1, tmp_reg_src2);
|
|
|
0e3697 |
- rs6000_emit_dot_insn (result_reg, tmp, 1, cond);
|
|
|
0e3697 |
-
|
|
|
0e3697 |
- rtx cmp_rtx;
|
|
|
0e3697 |
- if (remain == 0 && !equality_compare_rest)
|
|
|
0e3697 |
- cmp_rtx = gen_rtx_EQ (VOIDmode, cond, const0_rtx);
|
|
|
0e3697 |
- else
|
|
|
0e3697 |
- cmp_rtx = gen_rtx_NE (VOIDmode, cond, const0_rtx);
|
|
|
0e3697 |
-
|
|
|
0e3697 |
- rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmp_rtx,
|
|
|
0e3697 |
- lab_ref, pc_rtx);
|
|
|
0e3697 |
- rtx j = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
|
|
|
0e3697 |
- JUMP_LABEL (j) = dst_label;
|
|
|
0e3697 |
- LABEL_NUSES (dst_label) += 1;
|
|
|
0e3697 |
-
|
|
|
0e3697 |
- if (remain > 0 || equality_compare_rest)
|
|
|
0e3697 |
- {
|
|
|
0e3697 |
- /* Generate a cmpb to test for a 0 byte and branch
|
|
|
0e3697 |
- to final result if found. */
|
|
|
0e3697 |
- rtx cmpb_zero = gen_reg_rtx (word_mode);
|
|
|
0e3697 |
- rtx lab_ref_fin = gen_rtx_LABEL_REF (VOIDmode, final_move_label);
|
|
|
0e3697 |
- rtx condz = gen_reg_rtx (CCmode);
|
|
|
0e3697 |
- rtx zero_reg = gen_reg_rtx (word_mode);
|
|
|
0e3697 |
- emit_move_insn (zero_reg, GEN_INT (0));
|
|
|
0e3697 |
- do_cmpb3 (cmpb_zero, tmp_reg_src1, zero_reg);
|
|
|
0e3697 |
-
|
|
|
0e3697 |
- if (cmp_bytes < word_mode_size)
|
|
|
0e3697 |
- {
|
|
|
0e3697 |
- /* Don't want to look at zero bytes past end. */
|
|
|
0e3697 |
- HOST_WIDE_INT mb =
|
|
|
0e3697 |
- BITS_PER_UNIT * (word_mode_size - cmp_bytes);
|
|
|
0e3697 |
- rtx mask = GEN_INT (HOST_WIDE_INT_M1U << mb);
|
|
|
0e3697 |
- do_and3 (cmpb_zero, cmpb_zero, mask);
|
|
|
0e3697 |
- }
|
|
|
0e3697 |
-
|
|
|
0e3697 |
- emit_move_insn (condz, gen_rtx_COMPARE (CCmode, cmpb_zero, zero_reg));
|
|
|
0e3697 |
- rtx cmpnz_rtx = gen_rtx_NE (VOIDmode, condz, const0_rtx);
|
|
|
0e3697 |
- rtx ifelse = gen_rtx_IF_THEN_ELSE (VOIDmode, cmpnz_rtx,
|
|
|
0e3697 |
- lab_ref_fin, pc_rtx);
|
|
|
0e3697 |
- rtx j2 = emit_jump_insn (gen_rtx_SET (pc_rtx, ifelse));
|
|
|
0e3697 |
- JUMP_LABEL (j2) = final_move_label;
|
|
|
0e3697 |
- LABEL_NUSES (final_move_label) += 1;
|
|
|
0e3697 |
-
|
|
|
0e3697 |
- }
|
|
|
0e3697 |
-
|
|
|
0e3697 |
- offset += cmp_bytes;
|
|
|
0e3697 |
- bytes_to_compare -= cmp_bytes;
|
|
|
0e3697 |
- }
|
|
|
0e3697 |
-
|
|
|
0e3697 |
+ expand_strncmp_gpr_sequence(compare_length, base_align,
|
|
|
0e3697 |
+ orig_src1, orig_src2,
|
|
|
0e3697 |
+ tmp_reg_src1, tmp_reg_src2,
|
|
|
0e3697 |
+ result_reg,
|
|
|
0e3697 |
+ equality_compare_rest,
|
|
|
0e3697 |
+ cleanup_label, final_move_label);
|
|
|
0e3697 |
+
|
|
|
0e3697 |
+ offset = compare_length;
|
|
|
0e3697 |
+
|
|
|
0e3697 |
if (equality_compare_rest)
|
|
|
0e3697 |
{
|
|
|
0e3697 |
/* Update pointers past what has been compared already. */
|