Index: gcc/testsuite/gcc.dg/vec-scal-opt2.c =================================================================== --- gcc/testsuite/gcc.dg/vec-scal-opt2.c (revision 0) +++ gcc/testsuite/gcc.dg/vec-scal-opt2.c (revision 0) @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-veclower" } */ + +#define vidx(type, vec, idx) (*((type *) &(vec) + idx)) +#define vector(elcount, type) \ +__attribute__((vector_size((elcount)*sizeof(type)))) type + +int main (int argc, char *argv[]) { + vector(8, short) v0 = {argc,1,2,3,4,5,6,7}; + vector(8, short) v1 = {2,2,2,2,2,2,2,2}; + vector(8, short) r1; + + r1 = v0 >> v1; + + return vidx(short, r1, 0); +} + +/* { dg-final { scan-tree-dump-times ">> 2" 1 "veclower" } } */ +/* { dg-final { cleanup-tree-dump "veclower" } } */ Index: gcc/testsuite/gcc.dg/vec-scal-opt.c =================================================================== --- gcc/testsuite/gcc.dg/vec-scal-opt.c (revision 0) +++ gcc/testsuite/gcc.dg/vec-scal-opt.c (revision 0) @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-veclower" } */ + +#define vidx(type, vec, idx) (*((type *) &(vec) + idx)) +#define vector(elcount, type) \ +__attribute__((vector_size((elcount)*sizeof(type)))) type + +short k; + +int main (int argc, char *argv[]) { + k = argc; + vector(8, short) v0 = {argc,1,2,3,4,5,6,7}; + vector(8, short) v2 = {k, k,k,k,k,k,k,k}; + vector(8, short) r1; + + r1 = v0 >> v2; + + return vidx(short, r1, 0); +} + +/* { dg-final { scan-tree-dump-times ">> k.\[0-9_\]*" 1 "veclower" } } */ +/* { dg-final { cleanup-tree-dump "veclower" } } */ Index: gcc/testsuite/gcc.dg/vec-scal-opt1.c =================================================================== --- gcc/testsuite/gcc.dg/vec-scal-opt1.c (revision 0) +++ gcc/testsuite/gcc.dg/vec-scal-opt1.c (revision 0) @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-O -fdump-tree-veclower" } */ + +#define vidx(type, vec, idx) (*((type *) &(vec) + idx)) +#define vector(elcount, type) \ +__attribute__((vector_size((elcount)*sizeof(type)))) type + +short k; + +int main (int argc, char *argv[]) { + vector(8, short) v0 = {argc,1,2,3,4,5,6,7}; + vector(8, short) r1; + + r1 = v0 >> (vector(8, short)){2,2,2,2,2,2,2,2}; + + return vidx(short, r1, 0); +} + +/* { dg-final { scan-tree-dump-times ">> 2" 1 "veclower" } } */ +/* { dg-final { cleanup-tree-dump "veclower" } } */ Index: gcc/tree-vect-generic.c =================================================================== --- gcc/tree-vect-generic.c (revision 166249) +++ gcc/tree-vect-generic.c (working copy) @@ -284,6 +284,62 @@ expand_vector_addition (gimple_stmt_iter a, b, code); } +/* Check if vector VEC consists of all the equal elements and + that the number of elements corresponds to the type of VEC. + Function sets ELEMENT with the first element of the vector + or NULL_TREE if comparison failed. */ +static bool uniform_vector_p (tree vec, tree *element) +{ + tree first, t, els; + bool eq = true; + unsigned HOST_WIDE_INT i; + + *element = NULL_TREE; + + if (vec == NULL_TREE) + return false; + + if (TREE_CODE (vec) == VECTOR_CST) + { + els = TREE_VECTOR_CST_ELTS (vec); + first = TREE_VALUE (els); + els = TREE_CHAIN (els); + + for (t = els, i = 0; t; t = TREE_CHAIN (t), i++) + { + eq = operand_equal_p (first, TREE_VALUE(t), 0); + if (!eq) + break; + } + + *element = first; + return eq && (i == TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec))-1); + } + + else if (TREE_CODE (vec) == CONSTRUCTOR) + { + first = error_mark_node; + + FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (vec), i, t) + { + if (i == 0) + { + first = t; + continue; + } + eq = (first == t); + if (!eq) + break; + } + + *element = first; + return eq && (i == TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec))); + + } + + return false; +} + static tree expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type, gimple assign, enum tree_code code) @@ -392,7 +448,7 @@ expand_vector_operations_1 (gimple_stmt_ tree lhs, rhs1, rhs2 = NULL, type, compute_type; enum tree_code code; enum machine_mode compute_mode; - optab op; + optab op = NULL; enum gimple_rhs_class rhs_class; tree new_rhs; @@ -434,18 +490,45 @@ expand_vector_operations_1 (gimple_stmt_ || code == LROTATE_EXPR || code == RROTATE_EXPR) { - /* If the 2nd argument is vector, we need a vector/vector shift */ + bool vector_scalar_shift; + op = optab_for_tree_code (code, type, optab_scalar); + + /* Vector/Scalar shift is supported. */ + vector_scalar_shift = !(!op + || optab_handler (op, TYPE_MODE (type)) + == CODE_FOR_nothing); + + /* If the 2nd argument is vector, we need a vector/vector shift. + Except all the elements in the second vector are the same. */ if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs2)))) - op = optab_for_tree_code (code, type, optab_vector); - else - { - /* Try for a vector/scalar shift, and if we don't have one, see if we - have a vector/vector shift */ - op = optab_for_tree_code (code, type, optab_scalar); - if (!op - || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) - op = optab_for_tree_code (code, type, optab_vector); - } + { + tree first, var = NULL_TREE; + gimple def_stmt; + + /* Check whether we have vector {x,x,x,x} where x + could be a scalar variable or a constant. Transform + vector {x,x,x,x} ==> vector scalar. */ + if (vector_scalar_shift + && ((TREE_CODE (rhs2) == VECTOR_CST && (var = rhs2)) + || + (TREE_CODE (rhs2) == SSA_NAME + && (def_stmt = SSA_NAME_DEF_STMT (rhs2)) + && gimple_assign_single_p (def_stmt) + && (var = gimple_assign_rhs1 (def_stmt)))) + && uniform_vector_p (var, &first)) + { + gimple_assign_set_rhs2 (stmt, first); + update_stmt (stmt); + rhs2 = first; + } + else + op = optab_for_tree_code (code, type, optab_vector); + } + + /* Try for a vector/scalar shift, and if we don't have one, see if we + have a vector/vector shift */ + else if (!vector_scalar_shift) + op = optab_for_tree_code (code, type, optab_vector); } else op = optab_for_tree_code (code, type, optab_default); Index: gcc/passes.c =================================================================== --- gcc/passes.c (revision 166249) +++ gcc/passes.c (working copy) @@ -736,7 +736,6 @@ init_optimization_passes (void) NEXT_PASS (pass_refactor_eh); NEXT_PASS (pass_lower_eh); NEXT_PASS (pass_build_cfg); - NEXT_PASS (pass_lower_vector); NEXT_PASS (pass_warn_function_return); NEXT_PASS (pass_build_cgraph_edges); NEXT_PASS (pass_inline_parameters); @@ -755,6 +754,7 @@ init_optimization_passes (void) NEXT_PASS (pass_referenced_vars); NEXT_PASS (pass_build_ssa); + NEXT_PASS (pass_lower_vector); NEXT_PASS (pass_early_warn_uninitialized); /* Note that it is not strictly necessary to schedule an early inline pass here. However, some test cases (e.g.,