> On Fri, 2024-01-05 at 11:02 +0000, Tamar Christina wrote: > > Ok, so something like: > > > > > > ([istarget loongarch*-*-*] && > > > > ([check_effective_target_loongarch_sx] || > > > > [check_effective_target_hard_float])) > > ? > > We don't need "[check_effective_target_loongarch_sx] ||" because SIMD > requires hard float. > Cool, thanks! -- Hi All, currently GCC does not treat IFN_COPYSIGN the same as the copysign tree expr. The latter has a libcall fallback and the IFN can only do optabs. Because of this the change I made to optimize copysign only works if the target has impemented the optab, but it should work for those that have the libcall too. More annoyingly if a target has vector versions of ABS and NEG but not COPYSIGN then the change made them lose vectorization. The proper fix for this is to treat the IFN the same as the tree EXPR and to enhance expand_COPYSIGN to also support vector calls. I have such a patch for GCC 15 but it's quite big and too invasive for stage-4. As such this is a minimal fix, just don't apply the transformation and leave targets which don't have the optab unoptimized. Targets list for check_effective_target_ifn_copysign was gotten by grepping for copysign and looking at the optab. Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. Tests ran in x86_64-pc-linux-gnu -m32 and tests no longer fail. Ok for master? Thanks, Tamar gcc/ChangeLog: PR tree-optimization/112468 * doc/sourcebuild.texi: Document ifn_copysign. * match.pd: Only apply transformation if target supports the IFN. gcc/testsuite/ChangeLog: PR tree-optimization/112468 * gcc.dg/fold-copysign-1.c: Modify tests based on if target supports IFN_COPYSIGN. * gcc.dg/pr55152-2.c: Likewise. * gcc.dg/tree-ssa/abs-4.c: Likewise. * gcc.dg/tree-ssa/backprop-6.c: Likewise. * gcc.dg/tree-ssa/copy-sign-2.c: Likewise. * gcc.dg/tree-ssa/mult-abs-2.c: Likewise. * lib/target-supports.exp (check_effective_target_ifn_copysign): New. --- inline copy of patch --- diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index 4be67daedb20d394857c02739389cabf23c0d533..f4847dafe65cbbf8c9de34905f614ef6957658b4 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -2664,6 +2664,10 @@ Target requires a command line argument to enable a SIMD instruction set. @item xorsign Target supports the xorsign optab expansion. +@item ifn_copysign +Target supports the IFN_COPYSIGN optab expansion for both scalar and vector +types. + @end table @subsubsection Environment attributes diff --git a/gcc/match.pd b/gcc/match.pd index d57e29bfe1d68afd4df4dda20fecc2405ff05332..87d13e7e3e1aa6d89119142b614890dc4729b521 100644 --- a/gcc/match.pd +++ b/gcc/match.pd @@ -1159,13 +1159,22 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) (simplify (copysigns @0 REAL_CST@1) (if (!REAL_VALUE_NEGATIVE (TREE_REAL_CST (@1))) - (abs @0)))) + (abs @0) +#if GIMPLE + (if (!direct_internal_fn_supported_p (IFN_COPYSIGN, type, + OPTIMIZE_FOR_BOTH)) + (negate (abs @0))) +#endif + ))) +#if GIMPLE /* Transform fneg (fabs (X)) -> copysign (X, -1). */ (simplify (negate (abs @0)) - (IFN_COPYSIGN @0 { build_minus_one_cst (type); })) - + (if (direct_internal_fn_supported_p (IFN_COPYSIGN, type, + OPTIMIZE_FOR_BOTH)) + (IFN_COPYSIGN @0 { build_minus_one_cst (type); }))) +#endif /* copysign(copysign(x, y), z) -> copysign(x, z). */ (for copysigns (COPYSIGN_ALL) (simplify diff --git a/gcc/testsuite/gcc.dg/fold-copysign-1.c b/gcc/testsuite/gcc.dg/fold-copysign-1.c index f9cafd14ab05f5e8ab2f6f68e62801d21c2df6a6..96b80c733794fffada1b08274ef39cc8f6e442ce 100644 --- a/gcc/testsuite/gcc.dg/fold-copysign-1.c +++ b/gcc/testsuite/gcc.dg/fold-copysign-1.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O -fdump-tree-cddce1" } */ +/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */ double foo (double x) { @@ -12,5 +13,7 @@ double bar (double x) return __builtin_copysign (x, minuszero); } -/* { dg-final { scan-tree-dump-times "__builtin_copysign" 1 "cddce1" } } */ -/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "cddce1" } } */ +/* { dg-final { scan-tree-dump-times "__builtin_copysign" 1 "cddce1" { target ifn_copysign } } } */ +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "cddce1" { target ifn_copysign } } } */ +/* { dg-final { scan-tree-dump-times "= -" 1 "cddce1" { target { ! ifn_copysign } } } } */ +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 2 "cddce1" { target { ! ifn_copysign } } } } */ diff --git a/gcc/testsuite/gcc.dg/pr55152-2.c b/gcc/testsuite/gcc.dg/pr55152-2.c index 605f202ed6bc7aa8fe921457b02ff0b88cc63ce6..24068cffa4a8e2807ba7d16c4ed3def4f736e797 100644 --- a/gcc/testsuite/gcc.dg/pr55152-2.c +++ b/gcc/testsuite/gcc.dg/pr55152-2.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O -ffinite-math-only -fno-signed-zeros -fstrict-overflow -fdump-tree-optimized" } */ +/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */ double g (double a) { @@ -10,5 +11,6 @@ int f(int a) return (a<-a)?a:-a; } -/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 1 "optimized" } } */ -/* { dg-final { scan-tree-dump-times "ABS_EXPR" 1 "optimized" } } */ +/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 1 "optimized" { target ifn_copysign } } } */ +/* { dg-final { scan-tree-dump-times "ABS_EXPR" 1 "optimized" { target ifn_copysign } } } */ +/* { dg-final { scan-tree-dump-times "ABS_EXPR" 2 "optimized" { target { ! ifn_copysign } } } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c index e1b825f37f69ac3c4666b3a52d733368805ad31d..80fa448df1259c7dba406797f4198205783a2fba 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/abs-4.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O1 -fdump-tree-optimized" } */ +/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */ /* PR tree-optimization/109829 */ float abs_f(float x) { return __builtin_signbit(x) ? x : -x; } @@ -9,6 +10,8 @@ long double abs_ld(long double x) { return __builtin_signbit(x) ? x : -x; } /* __builtin_signbit(x) ? x : -x. Should be convert into - ABS_EXP */ /* { dg-final { scan-tree-dump-not "signbit" "optimized"} } */ -/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "optimized"} } */ -/* { dg-final { scan-tree-dump-times "= -" 1 "optimized"} } */ -/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 2 "optimized"} } */ +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 1 "optimized" { target ifn_copysign } } } */ +/* { dg-final { scan-tree-dump-times "= -" 1 "optimized" { target ifn_copysign } } } */ +/* { dg-final { scan-tree-dump-times "= \.COPYSIGN" 2 "optimized" { target ifn_copysign } } } */ +/* { dg-final { scan-tree-dump-times "= ABS_EXPR" 3 "optimized" { target { ! ifn_copysign } } } } */ +/* { dg-final { scan-tree-dump-times "= -" 3 "optimized" { target { ! ifn_copysign } } } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c index c3a138642d6ff7be984e91fa1343cb2718db7ae1..4087ba93018bb71710102eb379460bc760020081 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/backprop-6.c @@ -1,5 +1,6 @@ /* { dg-do compile } */ /* { dg-options "-O -fdump-tree-backprop-details" } */ +/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */ void start (void *); void end (void *); @@ -26,6 +27,8 @@ TEST_FUNCTION (float, f) TEST_FUNCTION (double, ) TEST_FUNCTION (long double, l) -/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 4 "backprop" } } */ -/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = \.COPYSIGN} 2 "backprop" } } */ -/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 1 "backprop" } } */ +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 4 "backprop" { target ifn_copysign } } } */ +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = \.COPYSIGN} 2 "backprop" { target ifn_copysign } } } */ +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 1 "backprop" { target ifn_copysign } } } */ +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = -} 6 "backprop" { target { ! ifn_copysign } } } } */ +/* { dg-final { scan-tree-dump-times {Deleting[^\n]* = ABS_EXPR <} 3 "backprop" { target { ! ifn_copysign } } } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c index e5d565c4b9832c00106588ef411fbd8c292a5cad..e43bc315bef2bd11c11cfd2685f5088e792b7bf7 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/copy-sign-2.c @@ -1,4 +1,5 @@ /* { dg-options "-O2 -ffast-math -fdump-tree-optimized" } */ +/* { dg-additional-options "-msse -mfpmath=sse" { target { { i?86-*-* x86_64-*-* } && ilp32 } } } */ /* { dg-do compile } */ float f(float x) { @@ -10,5 +11,6 @@ float f1(float x) float t = __builtin_copysignf (1.0f, -x); return x * t; } -/* { dg-final { scan-tree-dump-times "ABS" 1 "optimized"} } */ -/* { dg-final { scan-tree-dump-times ".COPYSIGN" 1 "optimized"} } */ +/* { dg-final { scan-tree-dump-times "ABS" 1 "optimized" { target ifn_copysign } } } */ +/* { dg-final { scan-tree-dump-times ".COPYSIGN" 1 "optimized" { target ifn_copysign } } } */ +/* { dg-final { scan-tree-dump-times "ABS" 2 "optimized" { target { ! ifn_copysign } } } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c index a22896b21c8b5a4d5d8e28bd8ae0db896e63ade0..675127cfe56b2e9aa9d4c06e2bdce62b59545a08 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/mult-abs-2.c @@ -34,5 +34,5 @@ float i1(float x) { return x * (x <= 0.f ? 1.f : -1.f); } -/* { dg-final { scan-tree-dump-times "ABS" 4 "gimple"} } */ -/* { dg-final { scan-tree-dump-times "\.COPYSIGN" 4 "gimple"} } */ + +/* { dg-final { scan-tree-dump-times "ABS" 8 "gimple" } } */ diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 7f13ff0ca565efdf19065811f3301db897329073..f0765a14fb78f2267f54f5ae79a86f4ab644152b 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -7830,6 +7830,30 @@ proc check_effective_target_xorsign { } { || [istarget aarch64*-*-*] || [istarget arm*-*-*] }}] } +# Return 1 if the target plus current options supports folding of +# copysign into IFN_COPYSIGN. +# +# This won't change for different subtargets so cache the result. + +proc check_effective_target_ifn_copysign { } { + return [check_cached_effective_target_indexed ifn_copysign { + expr { + (([istarget i?86-*-*] || [istarget x86_64-*-*]) + && [is-effective-target sse]) + || ([istarget loongarch*-*-*] + && [check_effective_target_hard_float]) + || ([istarget powerpc*-*-*] + && ![istarget powerpc-*-linux*paired*]) + || [istarget alpha*-*-*] + || [istarget aarch64*-*-*] + || [is-effective-target arm_neon] + || ([istarget s390*-*-*] + && [check_effective_target_s390_vx]) + || ([istarget riscv*-*-*] + && [check_effective_target_hard_float]) + }}] +} + # Return 1 if the target plus current options supports a vector # widening summation of *short* args into *int* result, 0 otherwise. #