diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index acc0cfe5f94..df33509c6e4 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -22079,30 +22079,36 @@ aarch64_expand_vector_init (rtx target, rtx vals) and matches[X][1] with the count of duplicate elements (if X is the earliest element which has duplicates). */ - if (n_var == n_elts && n_elts <= 16) + int matches[16][2] = {0}; + for (int i = 0; i < n_elts; i++) { - int matches[16][2] = {0}; - for (int i = 0; i < n_elts; i++) + for (int j = 0; j <= i; j++) { - for (int j = 0; j <= i; j++) + if (rtx_equal_p (XVECEXP (vals, 0, i), XVECEXP (vals, 0, j))) { - if (rtx_equal_p (XVECEXP (vals, 0, i), XVECEXP (vals, 0, j))) - { - matches[i][0] = j; - matches[j][1]++; - break; - } + matches[i][0] = j; + matches[j][1]++; + break; } } - int maxelement = 0; - int maxv = 0; - for (int i = 0; i < n_elts; i++) - if (matches[i][1] > maxv) - { - maxelement = i; - maxv = matches[i][1]; - } + } + int maxelement = 0; + int maxv = 0; + for (int i = 0; i < n_elts; i++) + if (matches[i][1] > maxv) + { + maxelement = i; + maxv = matches[i][1]; + } + + rtx max_elem = XVECEXP (vals, 0, maxelement); + if (n_elts <= 16 + && ((n_var == n_elts) + || (maxv >= (int)(0.8 * n_elts) + && !CONST_INT_P (max_elem) + && !CONST_DOUBLE_P (max_elem)))) + { /* Create a duplicate of the most common element, unless all elements are equally useless to us, in which case just immediately set the vector register using the first element. */ diff --git a/gcc/testsuite/gcc.target/aarch64/vec-init-18.c b/gcc/testsuite/gcc.target/aarch64/vec-init-18.c new file mode 100644 index 00000000000..e20b813559e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/vec-init-18.c @@ -0,0 +1,53 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#include + +/* +** f1_s16: +** ... +** dup v[0-9]+\.8h, w[0-9]+ +** movi v[0-9]+\.4h, 0x1 +** ins v[0-9]+\.h\[7\], v[0-9]+\.h\[0\] +** ... +** ret +*/ + +int16x8_t f1_s16(int16_t x) +{ + return (int16x8_t) {x, x, x, x, x, x, x, 1}; +} + +/* +** f2_s16: +** ... +** dup v[0-9]+\.8h, w[0-9]+ +** movi v[0-9]+\.4h, 0x1 +** movi v[0-9]+\.4h, 0x2 +** ins v[0-9]+\.h\[6\], v[0-9]+\.h\[0\] +** ins v[0-9]+\.h\[7\], v[0-9]+\.h\[0\] +** ... +** ret +*/ + +int16x8_t f2_s16(int16_t x) +{ + return (int16x8_t) { x, x, x, x, x, x, 1, 2 }; +} + +/* +** f3_s16: +** ... +** movi v[0-9]+\.8h, 0x1 +** ins v[0-9]+\.h\[0\], w0 +** ins v[0-9]+\.h\[1\], w0 +** ins v[0-9]+\.h\[2\], w0 +** ... +** ret +*/ + +int16x8_t f3_s16(int16_t x) +{ + return (int16x8_t) {x, x, x, 1, 1, 1, 1, 1}; +}