diff --git a/gcc/omp-simd-clone.cc b/gcc/omp-simd-clone.cc index 258d3c6377f..58e3dc8b2e9 100644 --- a/gcc/omp-simd-clone.cc +++ b/gcc/omp-simd-clone.cc @@ -716,6 +716,7 @@ simd_clone_adjust_argument_types (struct cgraph_node *node) } sc->args[i].orig_type = base_type; sc->args[i].arg_type = SIMD_CLONE_ARG_TYPE_MASK; + sc->args[i].vector_type = adj.type; } if (node->definition) diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16.c new file mode 100644 index 00000000000..ffaabb30d1e --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16.c @@ -0,0 +1,89 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +/* Test that simd inbranch clones work correctly. */ + +#ifndef TYPE +#define TYPE int +#endif + +/* A simple function that will be cloned. */ +#pragma omp declare simd +TYPE __attribute__((noinline)) +foo (TYPE a) +{ + return a + 1; +} + +/* Check that "inbranch" clones are called correctly. */ + +void __attribute__((noinline)) +masked (TYPE * __restrict a, TYPE * __restrict b, int size) +{ + #pragma omp simd + for (int i = 0; i < size; i++) + b[i] = a[i]<1 ? foo(a[i]) : a[i]; +} + +/* Check that "inbranch" works when there might be unrolling. */ + +void __attribute__((noinline)) +masked_fixed (TYPE * __restrict a, TYPE * __restrict b) +{ + #pragma omp simd + for (int i = 0; i < 128; i++) + b[i] = a[i]<1 ? foo(a[i]) : a[i]; +} + +/* Validate the outputs. */ + +void +check_masked (TYPE *b, int size) +{ + for (int i = 0; i < size; i++) + if (((TYPE)i < 1 && b[i] != (TYPE)(i + 1)) + || ((TYPE)i >= 1 && b[i] != (TYPE)i)) + { + __builtin_printf ("error at %d\n", i); + __builtin_exit (1); + } +} + +int +main () +{ + TYPE a[1024]; + TYPE b[1024]; + + for (int i = 0; i < 1024; i++) + a[i] = i; + + masked_fixed (a, b); + check_masked (b, 128); + + /* Test various sizes to cover machines with different vectorization + factors. */ + for (int size = 8; size <= 1024; size *= 2) + { + masked (a, b, size); + check_masked (b, size); + } + + /* Test sizes that might exercise the partial vector code-path. */ + for (int size = 8; size <= 1024; size *= 2) + { + masked (a, b, size-4); + check_masked (b, size-4); + } + + return 0; +} + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 18 "optimized" { target x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 7 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16b.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16b.c new file mode 100644 index 00000000000..a503ef85238 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16b.c @@ -0,0 +1,14 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE float +#include "vect-simd-clone-16.c" + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 19 "optimized" { target x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 7 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16c.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16c.c new file mode 100644 index 00000000000..6563879df71 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16c.c @@ -0,0 +1,16 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE short +#include "vect-simd-clone-16.c" + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-final { scan-tree-dump-times "simdclone" 7 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ + +/* Fails to use in-branch clones for TYPE=short. */ +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 16 "optimized" { target x86_64-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16d.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16d.c new file mode 100644 index 00000000000..6c5e69482e5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16d.c @@ -0,0 +1,16 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE char +#include "vect-simd-clone-16.c" + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-final { scan-tree-dump-times "simdclone" 7 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ + +/* Fails to use in-branch clones for TYPE=char. */ +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 16 "optimized" { target x86_64-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16e.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16e.c new file mode 100644 index 00000000000..6690844deae --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16e.c @@ -0,0 +1,14 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE double +#include "vect-simd-clone-16.c" + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 19 "optimized" { target x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 7 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c new file mode 100644 index 00000000000..e7b35a6a2dc --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c @@ -0,0 +1,16 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE __INT64_TYPE__ +#include "vect-simd-clone-16.c" + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-final { scan-tree-dump-times "simdclone" 7 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ + +/* Fails to use in-branch clones for TYPE=int64. */ +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 18 "optimized" { target x86_64-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17.c new file mode 100644 index 00000000000..6f5d374a417 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17.c @@ -0,0 +1,89 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +/* Test that simd inbranch clones work correctly. */ + +#ifndef TYPE +#define TYPE int +#endif + +/* A simple function that will be cloned. */ +#pragma omp declare simd uniform(b) +TYPE __attribute__((noinline)) +foo (TYPE a, TYPE b) +{ + return a + b; +} + +/* Check that "inbranch" clones are called correctly. */ + +void __attribute__((noinline)) +masked (TYPE * __restrict a, TYPE * __restrict b, int size) +{ + #pragma omp simd + for (int i = 0; i < size; i++) + b[i] = a[i]<1 ? foo(a[i], 1) : a[i]; +} + +/* Check that "inbranch" works when there might be unrolling. */ + +void __attribute__((noinline)) +masked_fixed (TYPE * __restrict a, TYPE * __restrict b) +{ + #pragma omp simd + for (int i = 0; i < 128; i++) + b[i] = a[i]<1 ? foo(a[i], 1) : a[i]; +} + +/* Validate the outputs. */ + +void +check_masked (TYPE *b, int size) +{ + for (int i = 0; i < size; i++) + if (((TYPE)i < 1 && b[i] != (TYPE)(i + 1)) + || ((TYPE)i >= 1 && b[i] != (TYPE)i)) + { + __builtin_printf ("error at %d\n", i); + __builtin_exit (1); + } +} + +int +main () +{ + TYPE a[1024]; + TYPE b[1024]; + + for (int i = 0; i < 1024; i++) + a[i] = i; + + masked_fixed (a, b); + check_masked (b, 128); + + /* Test various sizes to cover machines with different vectorization + factors. */ + for (int size = 8; size <= 1024; size *= 2) + { + masked (a, b, size); + check_masked (b, size); + } + + /* Test sizes that might exercise the partial vector code-path. */ + for (int size = 8; size <= 1024; size *= 2) + { + masked (a, b, size-4); + check_masked (b, size-4); + } + + return 0; +} + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 18 "optimized" { target x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 6 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17b.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17b.c new file mode 100644 index 00000000000..1e2c3ab11b3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17b.c @@ -0,0 +1,14 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE float +#include "vect-simd-clone-17.c" + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 19 "optimized" { target x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 6 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17c.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17c.c new file mode 100644 index 00000000000..007001de669 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17c.c @@ -0,0 +1,16 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE short +#include "vect-simd-clone-17.c" + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-final { scan-tree-dump-times "simdclone" 6 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ + +/* Fails to use in-branch clones for TYPE=short. */ +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 16 "optimized" { target x86_64-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17d.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17d.c new file mode 100644 index 00000000000..abb85a4ceee --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17d.c @@ -0,0 +1,16 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE char +#include "vect-simd-clone-17.c" + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-final { scan-tree-dump-times "simdclone" 6 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ + +/* Fails to use in-branch clones for TYPE=char. */ +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 16 "optimized" { target x86_64-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17e.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17e.c new file mode 100644 index 00000000000..2c1d8a659bd --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17e.c @@ -0,0 +1,14 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE double +#include "vect-simd-clone-17.c" + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 19 "optimized" { target x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 6 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17f.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17f.c new file mode 100644 index 00000000000..582e690304f --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17f.c @@ -0,0 +1,16 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE __INT64_TYPE__ +#include "vect-simd-clone-17.c" + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-final { scan-tree-dump-times "simdclone" 6 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ + +/* Fails to use in-branch clones for TYPE=int64. */ +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 18 "optimized" { target x86_64-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18.c new file mode 100644 index 00000000000..750a3f92b62 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18.c @@ -0,0 +1,89 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +/* Test that simd inbranch clones work correctly. */ + +#ifndef TYPE +#define TYPE int +#endif + +/* A simple function that will be cloned. */ +#pragma omp declare simd uniform(b) +TYPE __attribute__((noinline)) +foo (TYPE b, TYPE a) +{ + return a + b; +} + +/* Check that "inbranch" clones are called correctly. */ + +void __attribute__((noinline)) +masked (TYPE * __restrict a, TYPE * __restrict b, int size) +{ + #pragma omp simd + for (int i = 0; i < size; i++) + b[i] = a[i]<1 ? foo(1, a[i]) : a[i]; +} + +/* Check that "inbranch" works when there might be unrolling. */ + +void __attribute__((noinline)) +masked_fixed (TYPE * __restrict a, TYPE * __restrict b) +{ + #pragma omp simd + for (int i = 0; i < 128; i++) + b[i] = a[i]<1 ? foo(1, a[i]) : a[i]; +} + +/* Validate the outputs. */ + +void +check_masked (TYPE *b, int size) +{ + for (int i = 0; i < size; i++) + if (((TYPE)i < 1 && b[i] != (TYPE)(i + 1)) + || ((TYPE)i >= 1 && b[i] != (TYPE)i)) + { + __builtin_printf ("error at %d\n", i); + __builtin_exit (1); + } +} + +int +main () +{ + TYPE a[1024]; + TYPE b[1024]; + + for (int i = 0; i < 1024; i++) + a[i] = i; + + masked_fixed (a, b); + check_masked (b, 128); + + /* Test various sizes to cover machines with different vectorization + factors. */ + for (int size = 8; size <= 1024; size *= 2) + { + masked (a, b, size); + check_masked (b, size); + } + + /* Test sizes that might exercise the partial vector code-path. */ + for (int size = 8; size <= 1024; size *= 2) + { + masked (a, b, size-4); + check_masked (b, size-4); + } + + return 0; +} + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 18 "optimized" { target x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 6 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18b.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18b.c new file mode 100644 index 00000000000..a77ccf3bfcc --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18b.c @@ -0,0 +1,14 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE float +#include "vect-simd-clone-18.c" + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 19 "optimized" { target x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 6 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18c.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18c.c new file mode 100644 index 00000000000..bee5f338abe --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18c.c @@ -0,0 +1,16 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE short +#include "vect-simd-clone-18.c" + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-final { scan-tree-dump-times "simdclone" 6 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ + +/* Fails to use in-branch clones for TYPE=short. */ +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 16 "optimized" { target x86_64-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18d.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18d.c new file mode 100644 index 00000000000..a749edefdd7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18d.c @@ -0,0 +1,16 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE char +#include "vect-simd-clone-18.c" + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-final { scan-tree-dump-times "simdclone" 6 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ + +/* Fails to use in-branch clones for TYPE=char. */ +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 16 "optimized" { target x86_64-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18e.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18e.c new file mode 100644 index 00000000000..061e0dc2621 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18e.c @@ -0,0 +1,14 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE double +#include "vect-simd-clone-18.c" + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 19 "optimized" { target x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 6 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c new file mode 100644 index 00000000000..a3037f5809a --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c @@ -0,0 +1,16 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE __INT64_TYPE__ +#include "vect-simd-clone-18.c" + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-final { scan-tree-dump-times "simdclone" 6 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ + +/* Fails to use in-branch clones for TYPE=int64. */ +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 18 "optimized" { target x86_64-*-* } } } */ diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc index 1c8e1a45234..82b21add802 100644 --- a/gcc/tree-if-conv.cc +++ b/gcc/tree-if-conv.cc @@ -122,6 +122,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-ssa-dse.h" #include "tree-vectorizer.h" #include "tree-eh.h" +#include "cgraph.h" /* Only handle PHIs with no more arguments unless we are asked to by simd pragma. */ @@ -1054,7 +1055,8 @@ if_convertible_gimple_assign_stmt_p (gimple *stmt, A statement is if-convertible if: - it is an if-convertible GIMPLE_ASSIGN, - it is a GIMPLE_LABEL or a GIMPLE_COND, - - it is builtins call. */ + - it is builtins call. + - it is a call to a function with a SIMD clone. */ static bool if_convertible_stmt_p (gimple *stmt, vec refs) @@ -1074,13 +1076,19 @@ if_convertible_stmt_p (gimple *stmt, vec refs) tree fndecl = gimple_call_fndecl (stmt); if (fndecl) { + /* We can vectorize some builtins and functions with SIMD + clones. */ int flags = gimple_call_flags (stmt); + struct cgraph_node *node = cgraph_node::get (fndecl); if ((flags & ECF_CONST) && !(flags & ECF_LOOPING_CONST_OR_PURE) - /* We can only vectorize some builtins at the moment, - so restrict if-conversion to those. */ && fndecl_built_in_p (fndecl)) return true; + else if (node && node->simd_clones != NULL) + { + need_to_predicate = true; + return true; + } } return false; } @@ -2614,6 +2622,31 @@ predicate_statements (loop_p loop) gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi)); update_stmt (stmt); } + + /* Add a predicate parameter to functions that have a SIMD clone. + This will cause the vectorizer to match the "in branch" clone + variants because they also have the extra parameter, and serves + to build the mask vector in a natural way. */ + gcall *call = dyn_cast (gsi_stmt (gsi)); + if (call && !gimple_call_internal_p (call)) + { + tree orig_fndecl = gimple_call_fndecl (call); + int orig_nargs = gimple_call_num_args (call); + auto_vec args; + for (int i=0; i < orig_nargs; i++) + args.safe_push (gimple_call_arg (call, i)); + args.safe_push (cond); + + /* Replace the call with a new one that has the extra + parameter. The FUNCTION_DECL remains unchanged so that + the vectorizer can find the SIMD clones. This call will + either be deleted or replaced at that time, so the + mismatch is short-lived and we can live with it. */ + gcall *new_call = gimple_build_call_vec (orig_fndecl, args); + gimple_call_set_lhs (new_call, gimple_call_lhs (call)); + gsi_replace (&gsi, new_call, true); + } + lhs = gimple_get_lhs (gsi_stmt (gsi)); if (lhs && TREE_CODE (lhs) == SSA_NAME) ssa_names.add (lhs); diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index f582d238984..2214d216c15 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -4049,16 +4049,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, || thisarginfo.dt == vect_external_def) gcc_assert (thisarginfo.vectype == NULL_TREE); else - { - gcc_assert (thisarginfo.vectype != NULL_TREE); - if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "vector mask arguments are not supported\n"); - return false; - } - } + gcc_assert (thisarginfo.vectype != NULL_TREE); /* For linear arguments, the analyze phase should have saved the base and step in STMT_VINFO_SIMD_CLONE_INFO. */ @@ -4151,9 +4142,6 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, if (target_badness < 0) continue; this_badness += target_badness * 512; - /* FORNOW: Have to add code to add the mask argument. */ - if (n->simdclone->inbranch) - continue; for (i = 0; i < nargs; i++) { switch (n->simdclone->args[i].arg_type) @@ -4191,7 +4179,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, i = -1; break; case SIMD_CLONE_ARG_TYPE_MASK: - gcc_unreachable (); + break; } if (i == (size_t) -1) break; @@ -4217,18 +4205,55 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, return false; for (i = 0; i < nargs; i++) - if ((arginfo[i].dt == vect_constant_def - || arginfo[i].dt == vect_external_def) - && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR) - { - tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i)); - arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type, - slp_node); - if (arginfo[i].vectype == NULL - || !constant_multiple_p (bestn->simdclone->simdlen, - simd_clone_subparts (arginfo[i].vectype))) + { + if ((arginfo[i].dt == vect_constant_def + || arginfo[i].dt == vect_external_def) + && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR) + { + tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i)); + arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type, + slp_node); + if (arginfo[i].vectype == NULL + || !constant_multiple_p (bestn->simdclone->simdlen, + simd_clone_subparts (arginfo[i].vectype))) + return false; + } + + if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR + && VECTOR_BOOLEAN_TYPE_P (bestn->simdclone->args[i].vector_type)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "vector mask arguments are not supported.\n"); return false; - } + } + + if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK + && bestn->simdclone->mask_mode == VOIDmode + && (simd_clone_subparts (bestn->simdclone->args[i].vector_type) + != simd_clone_subparts (arginfo[i].vectype))) + { + /* FORNOW we only have partial support for vector-type masks that + can't hold all of simdlen. */ + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, + vect_location, + "in-branch vector clones are not yet" + " supported for mismatched vector sizes.\n"); + return false; + } + if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK + && bestn->simdclone->mask_mode != VOIDmode) + { + /* FORNOW don't support integer-type masks. */ + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, + vect_location, + "in-branch vector clones are not yet" + " supported for integer mask modes.\n"); + return false; + } + } fndecl = bestn->decl; nunits = bestn->simdclone->simdlen; @@ -4417,6 +4442,65 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, } } break; + case SIMD_CLONE_ARG_TYPE_MASK: + atype = bestn->simdclone->args[i].vector_type; + if (bestn->simdclone->mask_mode != VOIDmode) + { + /* FORNOW: this is disabled above. */ + gcc_unreachable (); + } + else + { + tree elt_type = TREE_TYPE (atype); + tree one = fold_convert (elt_type, integer_one_node); + tree zero = fold_convert (elt_type, integer_zero_node); + o = vector_unroll_factor (nunits, + simd_clone_subparts (atype)); + for (m = j * o; m < (j + 1) * o; m++) + { + if (simd_clone_subparts (atype) + < simd_clone_subparts (arginfo[i].vectype)) + { + /* The mask type has fewer elements than simdlen. */ + + /* FORNOW */ + gcc_unreachable (); + } + else if (simd_clone_subparts (atype) + == simd_clone_subparts (arginfo[i].vectype)) + { + /* The SIMD clone function has the same number of + elements as the current function. */ + if (m == 0) + { + vect_get_vec_defs_for_operand (vinfo, stmt_info, + o * ncopies, + op, + &vec_oprnds[i]); + vec_oprnds_i[i] = 0; + } + vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++]; + vec_oprnd0 + = build3 (VEC_COND_EXPR, atype, vec_oprnd0, + build_vector_from_val (atype, one), + build_vector_from_val (atype, zero)); + gassign *new_stmt + = gimple_build_assign (make_ssa_name (atype), + vec_oprnd0); + vect_finish_stmt_generation (vinfo, stmt_info, + new_stmt, gsi); + vargs.safe_push (gimple_assign_lhs (new_stmt)); + } + else + { + /* The mask type has more elements than simdlen. */ + + /* FORNOW */ + gcc_unreachable (); + } + } + } + break; case SIMD_CLONE_ARG_TYPE_UNIFORM: vargs.safe_push (op); break;