|
|
d8d6ff |
2014-09-25 Jakub Jelinek <jakub@redhat.com>
|
|
|
d8d6ff |
|
|
|
d8d6ff |
PR tree-optimization/63341
|
|
|
d8d6ff |
* tree-vectorizer.h (vect_create_data_ref_ptr,
|
|
|
d8d6ff |
vect_create_addr_base_for_vector_ref): Add another tree argument
|
|
|
d8d6ff |
defaulting to NULL_TREE.
|
|
|
d8d6ff |
* tree-vect-data-refs.c (vect_create_data_ref_ptr): Add byte_offset
|
|
|
d8d6ff |
argument, pass it down to vect_create_addr_base_for_vector_ref.
|
|
|
d8d6ff |
(vect_create_addr_base_for_vector_ref): Add byte_offset argument,
|
|
|
d8d6ff |
add that to base_offset too if non-NULL.
|
|
|
d8d6ff |
* tree-vect-stmts.c (vectorizable_load): Add byte_offset variable,
|
|
|
d8d6ff |
for dr_explicit_realign_optimized set it to vector byte size
|
|
|
d8d6ff |
- 1 instead of setting offset, pass byte_offset down to
|
|
|
d8d6ff |
vect_create_data_ref_ptr.
|
|
|
d8d6ff |
|
|
|
d8d6ff |
* gcc.dg/vect/pr63341-1.c: New test.
|
|
|
d8d6ff |
* gcc.dg/vect/pr63341-2.c: New test.
|
|
|
d8d6ff |
|
|
|
d8d6ff |
--- gcc/tree-vectorizer.h (revision 215586)
|
|
|
d8d6ff |
+++ gcc/tree-vectorizer.h (revision 215587)
|
|
|
d8d6ff |
@@ -931,7 +931,8 @@ extern tree vect_check_gather (gimple, l
|
|
|
d8d6ff |
extern bool vect_analyze_data_refs (loop_vec_info, bb_vec_info, int *);
|
|
|
d8d6ff |
extern tree vect_create_data_ref_ptr (gimple, tree, struct loop *, tree,
|
|
|
d8d6ff |
tree *, gimple_stmt_iterator *,
|
|
|
d8d6ff |
- gimple *, bool, bool *);
|
|
|
d8d6ff |
+ gimple *, bool, bool *,
|
|
|
d8d6ff |
+ tree = NULL_TREE);
|
|
|
d8d6ff |
extern tree bump_vector_ptr (tree, gimple, gimple_stmt_iterator *, gimple, tree);
|
|
|
d8d6ff |
extern tree vect_create_destination_var (tree, tree);
|
|
|
d8d6ff |
extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT);
|
|
|
d8d6ff |
@@ -949,7 +950,8 @@ extern void vect_record_grouped_load_vec
|
|
|
d8d6ff |
extern int vect_get_place_in_interleaving_chain (gimple, gimple);
|
|
|
d8d6ff |
extern tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
|
|
|
d8d6ff |
extern tree vect_create_addr_base_for_vector_ref (gimple, gimple_seq *,
|
|
|
d8d6ff |
- tree, struct loop *);
|
|
|
d8d6ff |
+ tree, struct loop *,
|
|
|
d8d6ff |
+ tree = NULL_TREE);
|
|
|
d8d6ff |
|
|
|
d8d6ff |
/* In tree-vect-loop.c. */
|
|
|
d8d6ff |
/* FORNOW: Used in tree-parloops.c. */
|
|
|
d8d6ff |
--- gcc/tree-vect-data-refs.c (revision 215586)
|
|
|
d8d6ff |
+++ gcc/tree-vect-data-refs.c (revision 215587)
|
|
|
d8d6ff |
@@ -3553,6 +3553,9 @@ vect_get_new_vect_var (tree type, enum v
|
|
|
d8d6ff |
is as follows:
|
|
|
d8d6ff |
if LOOP=i_loop: &in (relative to i_loop)
|
|
|
d8d6ff |
if LOOP=j_loop: &in+i*2B (relative to j_loop)
|
|
|
d8d6ff |
+ BYTE_OFFSET: Optional, defaulted to NULL. If supplied, it is added to the
|
|
|
d8d6ff |
+ initial address. Unlike OFFSET, which is number of elements to
|
|
|
d8d6ff |
+ be added, BYTE_OFFSET is measured in bytes.
|
|
|
d8d6ff |
|
|
|
d8d6ff |
Output:
|
|
|
d8d6ff |
1. Return an SSA_NAME whose value is the address of the memory location of
|
|
|
d8d6ff |
@@ -3566,7 +3569,8 @@ tree
|
|
|
d8d6ff |
vect_create_addr_base_for_vector_ref (gimple stmt,
|
|
|
d8d6ff |
gimple_seq *new_stmt_list,
|
|
|
d8d6ff |
tree offset,
|
|
|
d8d6ff |
- struct loop *loop)
|
|
|
d8d6ff |
+ struct loop *loop,
|
|
|
d8d6ff |
+ tree byte_offset)
|
|
|
d8d6ff |
{
|
|
|
d8d6ff |
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
|
|
d8d6ff |
struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
|
|
|
d8d6ff |
@@ -3628,6 +3632,16 @@ vect_create_addr_base_for_vector_ref (gi
|
|
|
d8d6ff |
base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
|
|
|
d8d6ff |
gimple_seq_add_seq (new_stmt_list, seq);
|
|
|
d8d6ff |
}
|
|
|
d8d6ff |
+ if (byte_offset)
|
|
|
d8d6ff |
+ {
|
|
|
d8d6ff |
+ tree tmp = create_tmp_var (sizetype, "offset");
|
|
|
d8d6ff |
+
|
|
|
d8d6ff |
+ byte_offset = fold_convert (sizetype, byte_offset);
|
|
|
d8d6ff |
+ base_offset = fold_build2 (PLUS_EXPR, sizetype,
|
|
|
d8d6ff |
+ base_offset, byte_offset);
|
|
|
d8d6ff |
+ base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
|
|
|
d8d6ff |
+ gimple_seq_add_seq (new_stmt_list, seq);
|
|
|
d8d6ff |
+ }
|
|
|
d8d6ff |
|
|
|
d8d6ff |
/* base + base_offset */
|
|
|
d8d6ff |
if (loop_vinfo)
|
|
|
d8d6ff |
@@ -3692,6 +3706,10 @@ vect_create_addr_base_for_vector_ref (gi
|
|
|
d8d6ff |
5. BSI: location where the new stmts are to be placed if there is no loop
|
|
|
d8d6ff |
6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
|
|
|
d8d6ff |
pointing to the initial address.
|
|
|
d8d6ff |
+ 7. BYTE_OFFSET (optional, defaults to NULL): a byte offset to be added
|
|
|
d8d6ff |
+ to the initial address accessed by the data-ref in STMT. This is
|
|
|
d8d6ff |
+ similar to OFFSET, but OFFSET is counted in elements, while BYTE_OFFSET
|
|
|
d8d6ff |
+ in bytes.
|
|
|
d8d6ff |
|
|
|
d8d6ff |
Output:
|
|
|
d8d6ff |
1. Declare a new ptr to vector_type, and have it point to the base of the
|
|
|
d8d6ff |
@@ -3705,6 +3723,8 @@ vect_create_addr_base_for_vector_ref (gi
|
|
|
d8d6ff |
initial_address = &a[init];
|
|
|
d8d6ff |
if OFFSET is supplied:
|
|
|
d8d6ff |
initial_address = &a[init + OFFSET];
|
|
|
d8d6ff |
+ if BYTE_OFFSET is supplied:
|
|
|
d8d6ff |
+ initial_address = &a[init] + BYTE_OFFSET;
|
|
|
d8d6ff |
|
|
|
d8d6ff |
Return the initial_address in INITIAL_ADDRESS.
|
|
|
d8d6ff |
|
|
|
d8d6ff |
@@ -3722,7 +3742,7 @@ tree
|
|
|
d8d6ff |
vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
|
|
|
d8d6ff |
tree offset, tree *initial_address,
|
|
|
d8d6ff |
gimple_stmt_iterator *gsi, gimple *ptr_incr,
|
|
|
d8d6ff |
- bool only_init, bool *inv_p)
|
|
|
d8d6ff |
+ bool only_init, bool *inv_p, tree byte_offset)
|
|
|
d8d6ff |
{
|
|
|
d8d6ff |
const char *base_name;
|
|
|
d8d6ff |
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
|
|
d8d6ff |
@@ -3881,10 +3901,10 @@ vect_create_data_ref_ptr (gimple stmt, t
|
|
|
d8d6ff |
/* (2) Calculate the initial address of the aggregate-pointer, and set
|
|
|
d8d6ff |
the aggregate-pointer to point to it before the loop. */
|
|
|
d8d6ff |
|
|
|
d8d6ff |
- /* Create: (&(base[init_val+offset]) in the loop preheader. */
|
|
|
d8d6ff |
+ /* Create: (&(base[init_val+offset]+byte_offset) in the loop preheader. */
|
|
|
d8d6ff |
|
|
|
d8d6ff |
new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
|
|
|
d8d6ff |
- offset, loop);
|
|
|
d8d6ff |
+ offset, loop, byte_offset);
|
|
|
d8d6ff |
if (new_stmt_list)
|
|
|
d8d6ff |
{
|
|
|
d8d6ff |
if (pe)
|
|
|
d8d6ff |
--- gcc/tree-vect-stmts.c (revision 215586)
|
|
|
d8d6ff |
+++ gcc/tree-vect-stmts.c (revision 215587)
|
|
|
d8d6ff |
@@ -4319,6 +4319,7 @@ vectorizable_load (gimple stmt, gimple_s
|
|
|
d8d6ff |
int i, j, group_size;
|
|
|
d8d6ff |
tree msq = NULL_TREE, lsq;
|
|
|
d8d6ff |
tree offset = NULL_TREE;
|
|
|
d8d6ff |
+ tree byte_offset = NULL_TREE;
|
|
|
d8d6ff |
tree realignment_token = NULL_TREE;
|
|
|
d8d6ff |
gimple phi = NULL;
|
|
|
d8d6ff |
vec<tree> dr_chain = vNULL;
|
|
|
d8d6ff |
@@ -4934,7 +4935,8 @@ vectorizable_load (gimple stmt, gimple_s
|
|
|
d8d6ff |
if (alignment_support_scheme == dr_explicit_realign_optimized)
|
|
|
d8d6ff |
{
|
|
|
d8d6ff |
phi = SSA_NAME_DEF_STMT (msq);
|
|
|
d8d6ff |
- offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
|
|
|
d8d6ff |
+ byte_offset = size_binop (MINUS_EXPR, TYPE_SIZE_UNIT (vectype),
|
|
|
d8d6ff |
+ size_one_node);
|
|
|
d8d6ff |
}
|
|
|
d8d6ff |
}
|
|
|
d8d6ff |
else
|
|
|
d8d6ff |
@@ -4955,7 +4957,8 @@ vectorizable_load (gimple stmt, gimple_s
|
|
|
d8d6ff |
if (j == 0)
|
|
|
d8d6ff |
dataref_ptr = vect_create_data_ref_ptr (first_stmt, aggr_type, at_loop,
|
|
|
d8d6ff |
offset, &dummy, gsi,
|
|
|
d8d6ff |
- &ptr_incr, false, &inv_p);
|
|
|
d8d6ff |
+ &ptr_incr, false, &inv_p,
|
|
|
d8d6ff |
+ byte_offset);
|
|
|
d8d6ff |
else
|
|
|
d8d6ff |
dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, gsi, stmt,
|
|
|
d8d6ff |
TYPE_SIZE_UNIT (aggr_type));
|
|
|
d8d6ff |
--- gcc/testsuite/gcc.dg/vect/pr63341-1.c (revision 0)
|
|
|
d8d6ff |
+++ gcc/testsuite/gcc.dg/vect/pr63341-1.c (revision 215587)
|
|
|
d8d6ff |
@@ -0,0 +1,32 @@
|
|
|
d8d6ff |
+/* PR tree-optimization/63341 */
|
|
|
d8d6ff |
+/* { dg-do run } */
|
|
|
d8d6ff |
+
|
|
|
d8d6ff |
+#include "tree-vect.h"
|
|
|
d8d6ff |
+
|
|
|
d8d6ff |
+typedef union U { unsigned short s; unsigned char c; } __attribute__((packed)) U;
|
|
|
d8d6ff |
+struct S { char e __attribute__((aligned (64))); U s[32]; };
|
|
|
d8d6ff |
+struct S t = {0, {{1}, {2}, {3}, {4}, {5}, {6}, {7}, {8},
|
|
|
d8d6ff |
+ {9}, {10}, {11}, {12}, {13}, {14}, {15}, {16},
|
|
|
d8d6ff |
+ {17}, {18}, {19}, {20}, {21}, {22}, {23}, {24},
|
|
|
d8d6ff |
+ {25}, {26}, {27}, {28}, {29}, {30}, {31}, {32}}};
|
|
|
d8d6ff |
+unsigned short d[32] = { 1 };
|
|
|
d8d6ff |
+
|
|
|
d8d6ff |
+__attribute__((noinline, noclone)) void
|
|
|
d8d6ff |
+foo ()
|
|
|
d8d6ff |
+{
|
|
|
d8d6ff |
+ int i;
|
|
|
d8d6ff |
+ for (i = 0; i < 32; i++)
|
|
|
d8d6ff |
+ d[i] = t.s[i].s;
|
|
|
d8d6ff |
+ if (__builtin_memcmp (d, t.s, sizeof d))
|
|
|
d8d6ff |
+ abort ();
|
|
|
d8d6ff |
+}
|
|
|
d8d6ff |
+
|
|
|
d8d6ff |
+int
|
|
|
d8d6ff |
+main ()
|
|
|
d8d6ff |
+{
|
|
|
d8d6ff |
+ check_vect ();
|
|
|
d8d6ff |
+ foo ();
|
|
|
d8d6ff |
+ return 0;
|
|
|
d8d6ff |
+}
|
|
|
d8d6ff |
+
|
|
|
d8d6ff |
+/* { dg-final { cleanup-tree-dump "vect" } } */
|
|
|
d8d6ff |
--- gcc/testsuite/gcc.dg/vect/pr63341-2.c (revision 0)
|
|
|
d8d6ff |
+++ gcc/testsuite/gcc.dg/vect/pr63341-2.c (revision 215587)
|
|
|
d8d6ff |
@@ -0,0 +1,35 @@
|
|
|
d8d6ff |
+/* PR tree-optimization/63341 */
|
|
|
d8d6ff |
+/* { dg-do run } */
|
|
|
d8d6ff |
+
|
|
|
d8d6ff |
+#include "tree-vect.h"
|
|
|
d8d6ff |
+
|
|
|
d8d6ff |
+typedef union U { unsigned short s; unsigned char c; } __attribute__((packed)) U;
|
|
|
d8d6ff |
+struct S { char e __attribute__((aligned (64))); U s[32]; };
|
|
|
d8d6ff |
+struct S t = {0, {{0x5010}, {0x5111}, {0x5212}, {0x5313}, {0x5414}, {0x5515}, {0x5616}, {0x5717},
|
|
|
d8d6ff |
+ {0x5818}, {0x5919}, {0x5a1a}, {0x5b1b}, {0x5c1c}, {0x5d1d}, {0x5e1e}, {0x5f1f},
|
|
|
d8d6ff |
+ {0x6020}, {0x6121}, {0x6222}, {0x6323}, {0x6424}, {0x6525}, {0x6626}, {0x6727},
|
|
|
d8d6ff |
+ {0x6828}, {0x6929}, {0x6a2a}, {0x6b2b}, {0x6c2c}, {0x6d2d}, {0x6e2e}, {0x6f2f}}};
|
|
|
d8d6ff |
+unsigned short d[32] = { 1 };
|
|
|
d8d6ff |
+
|
|
|
d8d6ff |
+__attribute__((noinline, noclone)) void
|
|
|
d8d6ff |
+foo ()
|
|
|
d8d6ff |
+{
|
|
|
d8d6ff |
+ int i;
|
|
|
d8d6ff |
+ for (i = 0; i < 32; i++)
|
|
|
d8d6ff |
+ d[i] = t.s[i].s + 4;
|
|
|
d8d6ff |
+ for (i = 0; i < 32; i++)
|
|
|
d8d6ff |
+ if (d[i] != t.s[i].s + 4)
|
|
|
d8d6ff |
+ abort ();
|
|
|
d8d6ff |
+ else
|
|
|
d8d6ff |
+ asm volatile ("" : : : "memory");
|
|
|
d8d6ff |
+}
|
|
|
d8d6ff |
+
|
|
|
d8d6ff |
+int
|
|
|
d8d6ff |
+main ()
|
|
|
d8d6ff |
+{
|
|
|
d8d6ff |
+ check_vect ();
|
|
|
d8d6ff |
+ foo ();
|
|
|
d8d6ff |
+ return 0;
|
|
|
d8d6ff |
+}
|
|
|
d8d6ff |
+
|
|
|
d8d6ff |
+/* { dg-final { cleanup-tree-dump "vect" } } */
|