vect: inbranch SIMD clones There has been support for generating "inbranch" SIMD clones for a long time, but nothing actually uses them (as far as I can see). This patch add supports for a sub-set of possible cases (those using mask_mode == VOIDmode). The other cases fail to vectorize, just as before, so there should be no regressions. The sub-set of support should cover all cases needed by amdgcn, at present. gcc/ChangeLog: * internal-fn.cc (expand_MASK_CALL): New. * internal-fn.def (MASK_CALL): New. * internal-fn.h (expand_MASK_CALL): New prototype. * omp-simd-clone.cc (simd_clone_adjust_argument_types): Set vector_type for mask arguments also. * tree-if-conv.cc: Include cgraph.h. (if_convertible_stmt_p): Do if conversions for calls to SIMD calls. (predicate_statements): Convert functions to IFN_MASK_CALL. * tree-vect-loop.cc (vect_get_datarefs_in_loop): Recognise IFN_MASK_CALL as a SIMD function call. * tree-vect-stmts.cc (vectorizable_simd_clone_call): Handle IFN_MASK_CALL as an inbranch SIMD function call. Generate the mask vector arguments. gcc/testsuite/ChangeLog: * gcc.dg/vect/vect-simd-clone-16.c: New test. * gcc.dg/vect/vect-simd-clone-16b.c: New test. * gcc.dg/vect/vect-simd-clone-16c.c: New test. * gcc.dg/vect/vect-simd-clone-16d.c: New test. * gcc.dg/vect/vect-simd-clone-16e.c: New test. * gcc.dg/vect/vect-simd-clone-16f.c: New test. * gcc.dg/vect/vect-simd-clone-17.c: New test. * gcc.dg/vect/vect-simd-clone-17b.c: New test. * gcc.dg/vect/vect-simd-clone-17c.c: New test. * gcc.dg/vect/vect-simd-clone-17d.c: New test. * gcc.dg/vect/vect-simd-clone-17e.c: New test. * gcc.dg/vect/vect-simd-clone-17f.c: New test. * gcc.dg/vect/vect-simd-clone-18.c: New test. * gcc.dg/vect/vect-simd-clone-18b.c: New test. * gcc.dg/vect/vect-simd-clone-18c.c: New test. * gcc.dg/vect/vect-simd-clone-18d.c: New test. * gcc.dg/vect/vect-simd-clone-18e.c: New test. * gcc.dg/vect/vect-simd-clone-18f.c: New test. diff --git a/gcc/internal-fn.cc b/gcc/internal-fn.cc index 9471f543191..d9e11bfc62a 100644 --- a/gcc/internal-fn.cc +++ b/gcc/internal-fn.cc @@ -4527,3 +4527,10 @@ void expand_ASSUME (internal_fn, gcall *) { } + +void +expand_MASK_CALL (internal_fn, gcall *) +{ + /* This IFN should only exist between ifcvt and vect passes. */ + gcc_unreachable (); +} diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def index 61516dab66d..301c3780659 100644 --- a/gcc/internal-fn.def +++ b/gcc/internal-fn.def @@ -466,6 +466,9 @@ DEF_INTERNAL_FN (TRAP, ECF_CONST | ECF_LEAF | ECF_NORETURN DEF_INTERNAL_FN (ASSUME, ECF_CONST | ECF_LEAF | ECF_NOTHROW | ECF_LOOPING_CONST_OR_PURE, NULL) +/* For if-conversion of inbranch SIMD clones. */ +DEF_INTERNAL_FN (MASK_CALL, ECF_NOVOPS, NULL) + #undef DEF_INTERNAL_INT_FN #undef DEF_INTERNAL_FLT_FN #undef DEF_INTERNAL_FLT_FLOATN_FN diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h index 21b1ce43df6..ced92c041bb 100644 --- a/gcc/internal-fn.h +++ b/gcc/internal-fn.h @@ -244,6 +244,7 @@ extern void expand_SHUFFLEVECTOR (internal_fn, gcall *); extern void expand_SPACESHIP (internal_fn, gcall *); extern void expand_TRAP (internal_fn, gcall *); extern void expand_ASSUME (internal_fn, gcall *); +extern void expand_MASK_CALL (internal_fn, gcall *); extern bool vectorized_internal_fn_supported_p (internal_fn, tree); diff --git a/gcc/omp-simd-clone.cc b/gcc/omp-simd-clone.cc index 21d69aa8747..afb7d99747b 100644 --- a/gcc/omp-simd-clone.cc +++ b/gcc/omp-simd-clone.cc @@ -937,6 +937,7 @@ simd_clone_adjust_argument_types (struct cgraph_node *node) } sc->args[i].orig_type = base_type; sc->args[i].arg_type = SIMD_CLONE_ARG_TYPE_MASK; + sc->args[i].vector_type = adj.type; } if (node->definition) diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16.c new file mode 100644 index 00000000000..ffaabb30d1e --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16.c @@ -0,0 +1,89 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +/* Test that simd inbranch clones work correctly. */ + +#ifndef TYPE +#define TYPE int +#endif + +/* A simple function that will be cloned. */ +#pragma omp declare simd +TYPE __attribute__((noinline)) +foo (TYPE a) +{ + return a + 1; +} + +/* Check that "inbranch" clones are called correctly. */ + +void __attribute__((noinline)) +masked (TYPE * __restrict a, TYPE * __restrict b, int size) +{ + #pragma omp simd + for (int i = 0; i < size; i++) + b[i] = a[i]<1 ? foo(a[i]) : a[i]; +} + +/* Check that "inbranch" works when there might be unrolling. */ + +void __attribute__((noinline)) +masked_fixed (TYPE * __restrict a, TYPE * __restrict b) +{ + #pragma omp simd + for (int i = 0; i < 128; i++) + b[i] = a[i]<1 ? foo(a[i]) : a[i]; +} + +/* Validate the outputs. */ + +void +check_masked (TYPE *b, int size) +{ + for (int i = 0; i < size; i++) + if (((TYPE)i < 1 && b[i] != (TYPE)(i + 1)) + || ((TYPE)i >= 1 && b[i] != (TYPE)i)) + { + __builtin_printf ("error at %d\n", i); + __builtin_exit (1); + } +} + +int +main () +{ + TYPE a[1024]; + TYPE b[1024]; + + for (int i = 0; i < 1024; i++) + a[i] = i; + + masked_fixed (a, b); + check_masked (b, 128); + + /* Test various sizes to cover machines with different vectorization + factors. */ + for (int size = 8; size <= 1024; size *= 2) + { + masked (a, b, size); + check_masked (b, size); + } + + /* Test sizes that might exercise the partial vector code-path. */ + for (int size = 8; size <= 1024; size *= 2) + { + masked (a, b, size-4); + check_masked (b, size-4); + } + + return 0; +} + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 18 "optimized" { target x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 7 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16b.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16b.c new file mode 100644 index 00000000000..a503ef85238 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16b.c @@ -0,0 +1,14 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE float +#include "vect-simd-clone-16.c" + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 19 "optimized" { target x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 7 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16c.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16c.c new file mode 100644 index 00000000000..6563879df71 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16c.c @@ -0,0 +1,16 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE short +#include "vect-simd-clone-16.c" + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-final { scan-tree-dump-times "simdclone" 7 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ + +/* Fails to use in-branch clones for TYPE=short. */ +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 16 "optimized" { target x86_64-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16d.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16d.c new file mode 100644 index 00000000000..6c5e69482e5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16d.c @@ -0,0 +1,16 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE char +#include "vect-simd-clone-16.c" + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-final { scan-tree-dump-times "simdclone" 7 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ + +/* Fails to use in-branch clones for TYPE=char. */ +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 16 "optimized" { target x86_64-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16e.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16e.c new file mode 100644 index 00000000000..6690844deae --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16e.c @@ -0,0 +1,14 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE double +#include "vect-simd-clone-16.c" + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 19 "optimized" { target x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 7 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c new file mode 100644 index 00000000000..e7b35a6a2dc --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-16f.c @@ -0,0 +1,16 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE __INT64_TYPE__ +#include "vect-simd-clone-16.c" + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-final { scan-tree-dump-times "simdclone" 7 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ + +/* Fails to use in-branch clones for TYPE=int64. */ +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 18 "optimized" { target x86_64-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17.c new file mode 100644 index 00000000000..6f5d374a417 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17.c @@ -0,0 +1,89 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +/* Test that simd inbranch clones work correctly. */ + +#ifndef TYPE +#define TYPE int +#endif + +/* A simple function that will be cloned. */ +#pragma omp declare simd uniform(b) +TYPE __attribute__((noinline)) +foo (TYPE a, TYPE b) +{ + return a + b; +} + +/* Check that "inbranch" clones are called correctly. */ + +void __attribute__((noinline)) +masked (TYPE * __restrict a, TYPE * __restrict b, int size) +{ + #pragma omp simd + for (int i = 0; i < size; i++) + b[i] = a[i]<1 ? foo(a[i], 1) : a[i]; +} + +/* Check that "inbranch" works when there might be unrolling. */ + +void __attribute__((noinline)) +masked_fixed (TYPE * __restrict a, TYPE * __restrict b) +{ + #pragma omp simd + for (int i = 0; i < 128; i++) + b[i] = a[i]<1 ? foo(a[i], 1) : a[i]; +} + +/* Validate the outputs. */ + +void +check_masked (TYPE *b, int size) +{ + for (int i = 0; i < size; i++) + if (((TYPE)i < 1 && b[i] != (TYPE)(i + 1)) + || ((TYPE)i >= 1 && b[i] != (TYPE)i)) + { + __builtin_printf ("error at %d\n", i); + __builtin_exit (1); + } +} + +int +main () +{ + TYPE a[1024]; + TYPE b[1024]; + + for (int i = 0; i < 1024; i++) + a[i] = i; + + masked_fixed (a, b); + check_masked (b, 128); + + /* Test various sizes to cover machines with different vectorization + factors. */ + for (int size = 8; size <= 1024; size *= 2) + { + masked (a, b, size); + check_masked (b, size); + } + + /* Test sizes that might exercise the partial vector code-path. */ + for (int size = 8; size <= 1024; size *= 2) + { + masked (a, b, size-4); + check_masked (b, size-4); + } + + return 0; +} + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 18 "optimized" { target x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 6 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17b.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17b.c new file mode 100644 index 00000000000..1e2c3ab11b3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17b.c @@ -0,0 +1,14 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE float +#include "vect-simd-clone-17.c" + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 19 "optimized" { target x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 6 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17c.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17c.c new file mode 100644 index 00000000000..007001de669 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17c.c @@ -0,0 +1,16 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE short +#include "vect-simd-clone-17.c" + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-final { scan-tree-dump-times "simdclone" 6 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ + +/* Fails to use in-branch clones for TYPE=short. */ +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 16 "optimized" { target x86_64-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17d.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17d.c new file mode 100644 index 00000000000..abb85a4ceee --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17d.c @@ -0,0 +1,16 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE char +#include "vect-simd-clone-17.c" + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-final { scan-tree-dump-times "simdclone" 6 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ + +/* Fails to use in-branch clones for TYPE=char. */ +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 16 "optimized" { target x86_64-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17e.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17e.c new file mode 100644 index 00000000000..2c1d8a659bd --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17e.c @@ -0,0 +1,14 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE double +#include "vect-simd-clone-17.c" + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 19 "optimized" { target x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 6 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17f.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17f.c new file mode 100644 index 00000000000..582e690304f --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-17f.c @@ -0,0 +1,16 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE __INT64_TYPE__ +#include "vect-simd-clone-17.c" + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-final { scan-tree-dump-times "simdclone" 6 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ + +/* Fails to use in-branch clones for TYPE=int64. */ +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 18 "optimized" { target x86_64-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18.c new file mode 100644 index 00000000000..750a3f92b62 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18.c @@ -0,0 +1,89 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +/* Test that simd inbranch clones work correctly. */ + +#ifndef TYPE +#define TYPE int +#endif + +/* A simple function that will be cloned. */ +#pragma omp declare simd uniform(b) +TYPE __attribute__((noinline)) +foo (TYPE b, TYPE a) +{ + return a + b; +} + +/* Check that "inbranch" clones are called correctly. */ + +void __attribute__((noinline)) +masked (TYPE * __restrict a, TYPE * __restrict b, int size) +{ + #pragma omp simd + for (int i = 0; i < size; i++) + b[i] = a[i]<1 ? foo(1, a[i]) : a[i]; +} + +/* Check that "inbranch" works when there might be unrolling. */ + +void __attribute__((noinline)) +masked_fixed (TYPE * __restrict a, TYPE * __restrict b) +{ + #pragma omp simd + for (int i = 0; i < 128; i++) + b[i] = a[i]<1 ? foo(1, a[i]) : a[i]; +} + +/* Validate the outputs. */ + +void +check_masked (TYPE *b, int size) +{ + for (int i = 0; i < size; i++) + if (((TYPE)i < 1 && b[i] != (TYPE)(i + 1)) + || ((TYPE)i >= 1 && b[i] != (TYPE)i)) + { + __builtin_printf ("error at %d\n", i); + __builtin_exit (1); + } +} + +int +main () +{ + TYPE a[1024]; + TYPE b[1024]; + + for (int i = 0; i < 1024; i++) + a[i] = i; + + masked_fixed (a, b); + check_masked (b, 128); + + /* Test various sizes to cover machines with different vectorization + factors. */ + for (int size = 8; size <= 1024; size *= 2) + { + masked (a, b, size); + check_masked (b, size); + } + + /* Test sizes that might exercise the partial vector code-path. */ + for (int size = 8; size <= 1024; size *= 2) + { + masked (a, b, size-4); + check_masked (b, size-4); + } + + return 0; +} + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 18 "optimized" { target x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 6 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18b.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18b.c new file mode 100644 index 00000000000..a77ccf3bfcc --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18b.c @@ -0,0 +1,14 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE float +#include "vect-simd-clone-18.c" + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 19 "optimized" { target x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 6 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18c.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18c.c new file mode 100644 index 00000000000..bee5f338abe --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18c.c @@ -0,0 +1,16 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE short +#include "vect-simd-clone-18.c" + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-final { scan-tree-dump-times "simdclone" 6 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ + +/* Fails to use in-branch clones for TYPE=short. */ +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 16 "optimized" { target x86_64-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18d.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18d.c new file mode 100644 index 00000000000..a749edefdd7 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18d.c @@ -0,0 +1,16 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE char +#include "vect-simd-clone-18.c" + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-final { scan-tree-dump-times "simdclone" 6 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ + +/* Fails to use in-branch clones for TYPE=char. */ +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 16 "optimized" { target x86_64-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18e.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18e.c new file mode 100644 index 00000000000..061e0dc2621 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18e.c @@ -0,0 +1,14 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE double +#include "vect-simd-clone-18.c" + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 19 "optimized" { target x86_64-*-* } } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 6 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ diff --git a/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c new file mode 100644 index 00000000000..a3037f5809a --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-simd-clone-18f.c @@ -0,0 +1,16 @@ +/* { dg-require-effective-target vect_simd_clones } */ +/* { dg-additional-options "-fopenmp-simd -fdump-tree-optimized" } */ +/* { dg-additional-options "-mavx" { target avx_runtime } } */ + +#define TYPE __INT64_TYPE__ +#include "vect-simd-clone-18.c" + +/* Ensure the the in-branch simd clones are used on targets that support + them. These counts include all call and definitions. */ + +/* { dg-final { scan-tree-dump-times "simdclone" 6 "optimized" { target amdgcn-*-* } } } */ +/* TODO: aarch64 */ + +/* Fails to use in-branch clones for TYPE=int64. */ +/* { dg-skip-if "" { x86_64-*-* } { "-flto" } { "" } } */ +/* { dg-final { scan-tree-dump-times "simdclone" 18 "optimized" { target x86_64-*-* } } } */ diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc index 64b20b4a9e1..127ee873d25 100644 --- a/gcc/tree-if-conv.cc +++ b/gcc/tree-if-conv.cc @@ -123,6 +123,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-ssa-dse.h" #include "tree-vectorizer.h" #include "tree-eh.h" +#include "cgraph.h" /* For lang_hooks.types.type_for_mode. */ #include "langhooks.h" @@ -1063,7 +1064,8 @@ if_convertible_gimple_assign_stmt_p (gimple *stmt, A statement is if-convertible if: - it is an if-convertible GIMPLE_ASSIGN, - it is a GIMPLE_LABEL or a GIMPLE_COND, - - it is builtins call. */ + - it is builtins call. + - it is a call to a function with a SIMD clone. */ static bool if_convertible_stmt_p (gimple *stmt, vec refs) @@ -1083,13 +1085,23 @@ if_convertible_stmt_p (gimple *stmt, vec refs) tree fndecl = gimple_call_fndecl (stmt); if (fndecl) { + /* We can vectorize some builtins and functions with SIMD + "inbranch" clones. */ int flags = gimple_call_flags (stmt); + struct cgraph_node *node = cgraph_node::get (fndecl); if ((flags & ECF_CONST) && !(flags & ECF_LOOPING_CONST_OR_PURE) - /* We can only vectorize some builtins at the moment, - so restrict if-conversion to those. */ && fndecl_built_in_p (fndecl)) return true; + else if (node && node->simd_clones != NULL) + /* Ensure that at least one clone can be "inbranch". */ + for (struct cgraph_node *n = node->simd_clones; n != NULL; + n = n->simdclone->next_clone) + if (n->simdclone->inbranch) + { + need_to_predicate = true; + return true; + } } return false; } @@ -2603,6 +2615,29 @@ predicate_statements (loop_p loop) gimple_assign_set_rhs1 (stmt, ifc_temp_var (type, rhs, &gsi)); update_stmt (stmt); } + + /* Convert functions that have a SIMD clone to IFN_MASK_CALL. This + will cause the vectorizer to match the "in branch" clone variants, + and serves to build the mask vector in a natural way. */ + gcall *call = dyn_cast (gsi_stmt (gsi)); + if (call && !gimple_call_internal_p (call)) + { + tree orig_fn = gimple_call_fn (call); + int orig_nargs = gimple_call_num_args (call); + auto_vec args; + args.safe_push (orig_fn); + for (int i=0; i < orig_nargs; i++) + args.safe_push (gimple_call_arg (call, i)); + args.safe_push (cond); + + /* Replace the call with a IFN_MASK_CALL that has the extra + condition parameter. */ + gcall *new_call = gimple_build_call_internal_vec (IFN_MASK_CALL, + args); + gimple_call_set_lhs (new_call, gimple_call_lhs (call)); + gsi_replace (&gsi, new_call, true); + } + lhs = gimple_get_lhs (gsi_stmt (gsi)); if (lhs && TREE_CODE (lhs) == SSA_NAME) ssa_names.add (lhs); diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index aacbb12580c..60efd4d7525 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -2121,6 +2121,15 @@ vect_get_datarefs_in_loop (loop_p loop, basic_block *bbs, if (is_gimple_call (stmt) && loop->safelen) { tree fndecl = gimple_call_fndecl (stmt), op; + if (fndecl == NULL_TREE + && gimple_call_internal_p (stmt) + && gimple_call_internal_fn (stmt) == IFN_MASK_CALL) + { + fndecl = gimple_call_arg (stmt, 0); + gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR); + fndecl = TREE_OPERAND (fndecl, 0); + gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL); + } if (fndecl != NULL_TREE) { cgraph_node *node = cgraph_node::get (fndecl); diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc index 5485da58b38..0310c80c79d 100644 --- a/gcc/tree-vect-stmts.cc +++ b/gcc/tree-vect-stmts.cc @@ -3987,6 +3987,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, size_t i, nargs; tree lhs, rtype, ratype; vec *ret_ctor_elts = NULL; + int arg_offset = 0; /* Is STMT a vectorizable call? */ gcall *stmt = dyn_cast (stmt_info->stmt); @@ -3994,6 +3995,16 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, return false; fndecl = gimple_call_fndecl (stmt); + if (fndecl == NULL_TREE + && gimple_call_internal_p (stmt) + && gimple_call_internal_fn (stmt) == IFN_MASK_CALL) + { + fndecl = gimple_call_arg (stmt, 0); + gcc_checking_assert (TREE_CODE (fndecl) == ADDR_EXPR); + fndecl = TREE_OPERAND (fndecl, 0); + gcc_checking_assert (TREE_CODE (fndecl) == FUNCTION_DECL); + arg_offset = 1; + } if (fndecl == NULL_TREE) return false; @@ -4024,7 +4035,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, return false; /* Process function arguments. */ - nargs = gimple_call_num_args (stmt); + nargs = gimple_call_num_args (stmt) - arg_offset; /* Bail out if the function has zero arguments. */ if (nargs == 0) @@ -4042,7 +4053,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, thisarginfo.op = NULL_TREE; thisarginfo.simd_lane_linear = false; - op = gimple_call_arg (stmt, i); + op = gimple_call_arg (stmt, i + arg_offset); if (!vect_is_simple_use (op, vinfo, &thisarginfo.dt, &thisarginfo.vectype) || thisarginfo.dt == vect_uninitialized_def) @@ -4057,16 +4068,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, || thisarginfo.dt == vect_external_def) gcc_assert (thisarginfo.vectype == NULL_TREE); else - { - gcc_assert (thisarginfo.vectype != NULL_TREE); - if (VECTOR_BOOLEAN_TYPE_P (thisarginfo.vectype)) - { - if (dump_enabled_p ()) - dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, - "vector mask arguments are not supported\n"); - return false; - } - } + gcc_assert (thisarginfo.vectype != NULL_TREE); /* For linear arguments, the analyze phase should have saved the base and step in STMT_VINFO_SIMD_CLONE_INFO. */ @@ -4159,9 +4161,6 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, if (target_badness < 0) continue; this_badness += target_badness * 512; - /* FORNOW: Have to add code to add the mask argument. */ - if (n->simdclone->inbranch) - continue; for (i = 0; i < nargs; i++) { switch (n->simdclone->args[i].arg_type) @@ -4169,7 +4168,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, case SIMD_CLONE_ARG_TYPE_VECTOR: if (!useless_type_conversion_p (n->simdclone->args[i].orig_type, - TREE_TYPE (gimple_call_arg (stmt, i)))) + TREE_TYPE (gimple_call_arg (stmt, i + arg_offset)))) i = -1; else if (arginfo[i].dt == vect_constant_def || arginfo[i].dt == vect_external_def @@ -4199,7 +4198,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, i = -1; break; case SIMD_CLONE_ARG_TYPE_MASK: - gcc_unreachable (); + break; } if (i == (size_t) -1) break; @@ -4225,18 +4224,55 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, return false; for (i = 0; i < nargs; i++) - if ((arginfo[i].dt == vect_constant_def - || arginfo[i].dt == vect_external_def) - && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR) - { - tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i)); - arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type, - slp_node); - if (arginfo[i].vectype == NULL - || !constant_multiple_p (bestn->simdclone->simdlen, - simd_clone_subparts (arginfo[i].vectype))) + { + if ((arginfo[i].dt == vect_constant_def + || arginfo[i].dt == vect_external_def) + && bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR) + { + tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i + arg_offset)); + arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type, + slp_node); + if (arginfo[i].vectype == NULL + || !constant_multiple_p (bestn->simdclone->simdlen, + simd_clone_subparts (arginfo[i].vectype))) + return false; + } + + if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR + && VECTOR_BOOLEAN_TYPE_P (bestn->simdclone->args[i].vector_type)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, + "vector mask arguments are not supported.\n"); return false; - } + } + + if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK + && bestn->simdclone->mask_mode == VOIDmode + && (simd_clone_subparts (bestn->simdclone->args[i].vector_type) + != simd_clone_subparts (arginfo[i].vectype))) + { + /* FORNOW we only have partial support for vector-type masks that + can't hold all of simdlen. */ + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, + vect_location, + "in-branch vector clones are not yet" + " supported for mismatched vector sizes.\n"); + return false; + } + if (bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_MASK + && bestn->simdclone->mask_mode != VOIDmode) + { + /* FORNOW don't support integer-type masks. */ + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, + vect_location, + "in-branch vector clones are not yet" + " supported for integer mask modes.\n"); + return false; + } + } fndecl = bestn->decl; nunits = bestn->simdclone->simdlen; @@ -4326,7 +4362,7 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, { unsigned int k, l, m, o; tree atype; - op = gimple_call_arg (stmt, i); + op = gimple_call_arg (stmt, i + arg_offset); switch (bestn->simdclone->args[i].arg_type) { case SIMD_CLONE_ARG_TYPE_VECTOR: @@ -4425,6 +4461,65 @@ vectorizable_simd_clone_call (vec_info *vinfo, stmt_vec_info stmt_info, } } break; + case SIMD_CLONE_ARG_TYPE_MASK: + atype = bestn->simdclone->args[i].vector_type; + if (bestn->simdclone->mask_mode != VOIDmode) + { + /* FORNOW: this is disabled above. */ + gcc_unreachable (); + } + else + { + tree elt_type = TREE_TYPE (atype); + tree one = fold_convert (elt_type, integer_one_node); + tree zero = fold_convert (elt_type, integer_zero_node); + o = vector_unroll_factor (nunits, + simd_clone_subparts (atype)); + for (m = j * o; m < (j + 1) * o; m++) + { + if (simd_clone_subparts (atype) + < simd_clone_subparts (arginfo[i].vectype)) + { + /* The mask type has fewer elements than simdlen. */ + + /* FORNOW */ + gcc_unreachable (); + } + else if (simd_clone_subparts (atype) + == simd_clone_subparts (arginfo[i].vectype)) + { + /* The SIMD clone function has the same number of + elements as the current function. */ + if (m == 0) + { + vect_get_vec_defs_for_operand (vinfo, stmt_info, + o * ncopies, + op, + &vec_oprnds[i]); + vec_oprnds_i[i] = 0; + } + vec_oprnd0 = vec_oprnds[i][vec_oprnds_i[i]++]; + vec_oprnd0 + = build3 (VEC_COND_EXPR, atype, vec_oprnd0, + build_vector_from_val (atype, one), + build_vector_from_val (atype, zero)); + gassign *new_stmt + = gimple_build_assign (make_ssa_name (atype), + vec_oprnd0); + vect_finish_stmt_generation (vinfo, stmt_info, + new_stmt, gsi); + vargs.safe_push (gimple_assign_lhs (new_stmt)); + } + else + { + /* The mask type has more elements than simdlen. */ + + /* FORNOW */ + gcc_unreachable (); + } + } + } + break; case SIMD_CLONE_ARG_TYPE_UNIFORM: vargs.safe_push (op); break;