* [PATCH][GCC][AArch64] Vectorise __builtin_signbit on aarch64
@ 2019-03-21 9:57 Przemyslaw Wirkus
2019-03-22 12:47 ` Richard Sandiford
0 siblings, 1 reply; 8+ messages in thread
From: Przemyslaw Wirkus @ 2019-03-21 9:57 UTC (permalink / raw)
To: gcc-patches; +Cc: nd, Richard Earnshaw, James Greenhalgh, Marcus Shawcroft
[-- Attachment #1: Type: text/plain, Size: 1992 bytes --]
Hi all,
Vectorise __builtin_signbit (v4sf) with unsigned shift right vector
instruction.
Bootstrapped and tested on aarch64-none-linux-gnu.
Assembly output for:
$ aarch64-elf-gcc -S -O3 signbitv4sf.c -dp
Before patch:
foo:
adrp x3, in // 37 [c=4 l=4] *movdi_aarch64/12
adrp x2, out // 40 [c=4 l=4] *movdi_aarch64/12
add x3, x3, :lo12:in // 39 [c=4 l=4] add_losym_di
add x2, x2, :lo12:out // 42 [c=4 l=4] add_losym_di
mov x0, 0 // 3 [c=4 l=4] *movdi_aarch64/3
.p2align 3,,7
.L2:
ldr w1, [x3, x0] // 10 [c=16 l=4] *zero_extendsidi2_aarch64/1
and w1, w1, -2147483648 // 11 [c=4 l=4] andsi3/1
str w1, [x2, x0] // 16 [c=4 l=4] *movsi_aarch64/8
add x0, x0, 4 // 17 [c=4 l=4] *adddi3_aarch64/0
cmp x0, 4096 // 19 [c=4 l=4] cmpdi/1
bne .L2 // 20 [c=4 l=4] condjump
ret // 50 [c=0 l=4] *do_return
After patch:
foo:
adrp x2, in // 36 [c=4 l=4] *movdi_aarch64/12
adrp x1, out // 39 [c=4 l=4] *movdi_aarch64/12
add x2, x2, :lo12:in // 38 [c=4 l=4] add_losym_di
add x1, x1, :lo12:out // 41 [c=4 l=4] add_losym_di
mov x0, 0 // 3 [c=4 l=4] *movdi_aarch64/3
.p2align 3,,7
.L2:
ldr q0, [x2, x0] // 10 [c=8 l=4] *aarch64_simd_movv4sf/0
ushr v0.4s, v0.4s, 31 // 11 [c=12 l=4] aarch64_simd_lshrv4si
str q0, [x1, x0] // 15 [c=4 l=4] *aarch64_simd_movv4si/2
add x0, x0, 16 // 16 [c=4 l=4] *adddi3_aarch64/0
cmp x0, 4096 // 18 [c=4 l=4] cmpdi/1
bne .L2 // 19 [c=4 l=4] condjump
ret // 49 [c=0 l=4] *do_return
Thanks,
Przemyslaw
gcc/ChangeLog:
2019-03-20 Przemyslaw Wirkus <przemyslaw.wirkus@arm.com>
* config/aarch64/aarch64-builtins.c
(aarch64_builtin_vectorized_function): Added CASE_CFN_SIGNBIT.
* config/aarch64/aarch64-simd-builtins.def: (signbit)
Extend to V4SF mode.
* config/aarch64/aarch64-simd.md (signbitv4sf2): New expand
defined.
gcc/testsuite/ChangeLog:
2019-02-28 Przemyslaw Wirkus <przemyslaw.wirkus@arm.com>
* gcc.target/aarch64/signbitv4sf.c: New test.
[-- Attachment #2: patch.txt --]
[-- Type: text/plain, Size: 3257 bytes --]
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 04063e5ed134d2e64487db23b8fa7794817b2739..86f8345848abd1515cef61824db525dc26ec9bdb 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -1709,6 +1709,13 @@ aarch64_builtin_vectorized_function (unsigned int fn, tree type_out,
return aarch64_builtin_decls[builtin];
}
+ CASE_CFN_SIGNBIT:
+ {
+ if (AARCH64_CHECK_BUILTIN_MODE (4, S))
+ return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_signbitv4sf];
+ else
+ return NULL_TREE;
+ }
case CFN_BUILT_IN_BSWAP16:
#undef AARCH64_CHECK_BUILTIN_MODE
#define AARCH64_CHECK_BUILTIN_MODE(C, N) \
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 17bb0c4869b12ede2fc51a8f89d841ded8fac230..d568f0ba4e61febf0590b22789b006f3bfe11ccd 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -324,6 +324,9 @@
VAR1 (UNOP, rint, 2, hf)
VAR1 (UNOP, round, 2, hf)
+ /* Implemented by signbit<mode>2 pattern */
+ VAR1 (UNOP, signbit, 2, v4sf)
+
/* Implemented by l<fcvt_pattern><su_optab><VQDF:mode><vcvt_target>2. */
VAR1 (UNOP, lbtruncv4hf, 2, v4hi)
VAR1 (UNOP, lbtruncv8hf, 2, v8hi)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index be6c27d319a1ca6fee581d8f8856a4dff8f4a060..87e2a58649c3e5d490c499115cf6b7495d448c29 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -915,6 +915,21 @@
[(set_attr "type" "neon_ins<q>")]
)
+(define_expand "signbitv4sf2"
+ [(use (match_operand:V4SI 0 "register_operand"))
+ (use (match_operand:V4SF 1 "register_operand"))]
+ "TARGET_SIMD"
+{
+ int shift_amount = GET_MODE_UNIT_BITSIZE (V4SImode) - 1;
+ rtx shift_vector = aarch64_simd_gen_const_vector_dup (V4SImode,
+ shift_amount);
+ operands[1] = lowpart_subreg (V4SImode, operands[1], V4SFmode);
+
+ emit_insn (gen_aarch64_simd_lshrv4si (operands[0], operands[1],
+ shift_vector));
+ DONE;
+})
+
(define_insn "aarch64_simd_lshr<mode>"
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
(lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
diff --git a/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c b/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c
new file mode 100644
index 0000000000000000000000000000000000000000..aa06a5df1dbb3e295355d485b39963127a828b68
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c
@@ -0,0 +1,35 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O3 --save-temps" } */
+
+extern void abort ();
+
+#define N 1024
+float in[N] = {1.0, -1.0, -2.0, 3.0, -5.0, -8.0, 13.0, 21.0};
+int out[N];
+
+void
+foo ()
+{
+ for (int i = 0; i < N; i++)
+ out[i] = __builtin_signbit (in[i]);
+}
+
+/* { dg-final { scan-assembler-not {-2147483648} } } */
+/* { dg-final { scan-assembler {\tushr\tv[0-9]+.4s, v[0-9]+.4s, 31} } } */
+
+int
+main ()
+{
+ foo ();
+
+ for (int i = 0; i < N; i++)
+ {
+ if (in[i] >= 0.0 && out[i])
+ abort ();
+ if (in[i] < 0.0 && !out[i])
+ abort ();
+ }
+
+ return 0;
+}
+
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH][GCC][AArch64] Vectorise __builtin_signbit on aarch64
2019-03-21 9:57 [PATCH][GCC][AArch64] Vectorise __builtin_signbit on aarch64 Przemyslaw Wirkus
@ 2019-03-22 12:47 ` Richard Sandiford
2019-05-03 8:46 ` Przemyslaw Wirkus
0 siblings, 1 reply; 8+ messages in thread
From: Richard Sandiford @ 2019-03-22 12:47 UTC (permalink / raw)
To: Przemyslaw Wirkus
Cc: gcc-patches, nd, Richard Earnshaw, James Greenhalgh, Marcus Shawcroft
Hi,
Przemyslaw Wirkus <Przemyslaw.Wirkus@arm.com> writes:
> Hi all,
>
> Vectorise __builtin_signbit (v4sf) with unsigned shift right vector
> instruction.
>
> Bootstrapped and tested on aarch64-none-linux-gnu.
>
> Assembly output for:
> $ aarch64-elf-gcc -S -O3 signbitv4sf.c -dp
>
> Before patch:
>
> foo:
> adrp x3, in // 37 [c=4 l=4] *movdi_aarch64/12
> adrp x2, out // 40 [c=4 l=4] *movdi_aarch64/12
> add x3, x3, :lo12:in // 39 [c=4 l=4] add_losym_di
> add x2, x2, :lo12:out // 42 [c=4 l=4] add_losym_di
> mov x0, 0 // 3 [c=4 l=4] *movdi_aarch64/3
> .p2align 3,,7
> .L2:
> ldr w1, [x3, x0] // 10 [c=16 l=4] *zero_extendsidi2_aarch64/1
> and w1, w1, -2147483648 // 11 [c=4 l=4] andsi3/1
> str w1, [x2, x0] // 16 [c=4 l=4] *movsi_aarch64/8
> add x0, x0, 4 // 17 [c=4 l=4] *adddi3_aarch64/0
> cmp x0, 4096 // 19 [c=4 l=4] cmpdi/1
> bne .L2 // 20 [c=4 l=4] condjump
> ret // 50 [c=0 l=4] *do_return
>
> After patch:
>
> foo:
> adrp x2, in // 36 [c=4 l=4] *movdi_aarch64/12
> adrp x1, out // 39 [c=4 l=4] *movdi_aarch64/12
> add x2, x2, :lo12:in // 38 [c=4 l=4] add_losym_di
> add x1, x1, :lo12:out // 41 [c=4 l=4] add_losym_di
> mov x0, 0 // 3 [c=4 l=4] *movdi_aarch64/3
> .p2align 3,,7
> .L2:
> ldr q0, [x2, x0] // 10 [c=8 l=4] *aarch64_simd_movv4sf/0
> ushr v0.4s, v0.4s, 31 // 11 [c=12 l=4] aarch64_simd_lshrv4si
> str q0, [x1, x0] // 15 [c=4 l=4] *aarch64_simd_movv4si/2
> add x0, x0, 16 // 16 [c=4 l=4] *adddi3_aarch64/0
> cmp x0, 4096 // 18 [c=4 l=4] cmpdi/1
> bne .L2 // 19 [c=4 l=4] condjump
> ret // 49 [c=0 l=4] *do_return
>
> Thanks,
> Przemyslaw
>
> gcc/ChangeLog:
>
> 2019-03-20 Przemyslaw Wirkus <przemyslaw.wirkus@arm.com>
>
> * config/aarch64/aarch64-builtins.c
> (aarch64_builtin_vectorized_function): Added CASE_CFN_SIGNBIT.
> * config/aarch64/aarch64-simd-builtins.def: (signbit)
> Extend to V4SF mode.
> * config/aarch64/aarch64-simd.md (signbitv4sf2): New expand
> defined.
I think it'd be better to add a new IFN_SIGNBIT internal function
that maps to signbit_optab. That way the compiler will know what
the vector function does and there'll be no need to add a new
built-in function.
Thanks,
Richard
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH][GCC][AArch64] Vectorise __builtin_signbit on aarch64
2019-03-22 12:47 ` Richard Sandiford
@ 2019-05-03 8:46 ` Przemyslaw Wirkus
2019-05-04 9:23 ` Richard Sandiford
0 siblings, 1 reply; 8+ messages in thread
From: Przemyslaw Wirkus @ 2019-05-03 8:46 UTC (permalink / raw)
To: Richard Sandiford
Cc: nd, Richard Earnshaw, James Greenhalgh, Marcus Shawcroft, gcc-patches
[-- Attachment #1: Type: text/plain, Size: 445 bytes --]
Hi Richard,
New patch adds a new IFN_SIGNBIT internal function that maps
to signbit_optab.
gcc/ChangeLog:
2019-05-05 Przemyslaw Wirkus <przemyslaw.wirkus@arm.com>
* gcc/internal-fn.def (SIGNBIT): New.
* gcc/config/aarch64/aarch64-simd.md (signbitv4sf2): New expand
defined.
gcc/testsuite/ChangeLog:
2019-05-05 Przemyslaw Wirkus <przemyslaw.wirkus@arm.com>
* gcc/testsuite/gcc.target/aarch64/signbitv4sf.c: New test.
[-- Attachment #2: patch.txt --]
[-- Type: text/plain, Size: 2581 bytes --]
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index e3852c5d182b70978d7603225fce55c0b8ee2894..3374ce95b912cceaca49660df0579467f758974d 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -935,6 +935,21 @@
[(set_attr "type" "neon_ins<q>")]
)
+(define_expand "signbitv4sf2"
+ [(use (match_operand:V4SI 0 "register_operand"))
+ (use (match_operand:V4SF 1 "register_operand"))]
+ "TARGET_SIMD"
+{
+ int shift_amount = GET_MODE_UNIT_BITSIZE (V4SImode) - 1;
+ rtx shift_vector = aarch64_simd_gen_const_vector_dup (V4SImode,
+ shift_amount);
+ operands[1] = lowpart_subreg (V4SImode, operands[1], V4SFmode);
+
+ emit_insn (gen_aarch64_simd_lshrv4si (operands[0], operands[1],
+ shift_vector));
+ DONE;
+})
+
(define_insn "aarch64_simd_lshr<mode>"
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
(lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index e370eaa84767839c827b6ebd0c86303bcc36fa54..016301a58d83d7128817824d7c7ef92825c7e03e 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -217,6 +217,7 @@ DEF_INTERNAL_FLT_FN (LOG10, ECF_CONST, log10, unary)
DEF_INTERNAL_FLT_FN (LOG1P, ECF_CONST, log1p, unary)
DEF_INTERNAL_FLT_FN (LOG2, ECF_CONST, log2, unary)
DEF_INTERNAL_FLT_FN (LOGB, ECF_CONST, logb, unary)
+DEF_INTERNAL_FLT_FN (SIGNBIT, ECF_CONST, signbit, unary)
DEF_INTERNAL_FLT_FN (SIGNIFICAND, ECF_CONST, significand, unary)
DEF_INTERNAL_FLT_FN (SIN, ECF_CONST, sin, unary)
DEF_INTERNAL_FLT_FN (SINH, ECF_CONST, sinh, unary)
diff --git a/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c b/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c
new file mode 100644
index 0000000000000000000000000000000000000000..aa06a5df1dbb3e295355d485b39963127a828b68
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c
@@ -0,0 +1,35 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O3 --save-temps" } */
+
+extern void abort ();
+
+#define N 1024
+float in[N] = {1.0, -1.0, -2.0, 3.0, -5.0, -8.0, 13.0, 21.0};
+int out[N];
+
+void
+foo ()
+{
+ for (int i = 0; i < N; i++)
+ out[i] = __builtin_signbit (in[i]);
+}
+
+/* { dg-final { scan-assembler-not {-2147483648} } } */
+/* { dg-final { scan-assembler {\tushr\tv[0-9]+.4s, v[0-9]+.4s, 31} } } */
+
+int
+main ()
+{
+ foo ();
+
+ for (int i = 0; i < N; i++)
+ {
+ if (in[i] >= 0.0 && out[i])
+ abort ();
+ if (in[i] < 0.0 && !out[i])
+ abort ();
+ }
+
+ return 0;
+}
+
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH][GCC][AArch64] Vectorise __builtin_signbit on aarch64
2019-05-03 8:46 ` Przemyslaw Wirkus
@ 2019-05-04 9:23 ` Richard Sandiford
2019-05-13 10:53 ` Przemyslaw Wirkus
0 siblings, 1 reply; 8+ messages in thread
From: Richard Sandiford @ 2019-05-04 9:23 UTC (permalink / raw)
To: Przemyslaw Wirkus
Cc: nd, Richard Earnshaw, James Greenhalgh, Marcus Shawcroft, gcc-patches
Przemyslaw Wirkus <Przemyslaw.Wirkus@arm.com> writes:
> Hi Richard,
> New patch adds a new IFN_SIGNBIT internal function that maps
> to signbit_optab.
Thanks.
> gcc/ChangeLog:
>
> 2019-05-05 Przemyslaw Wirkus <przemyslaw.wirkus@arm.com>
>
> * gcc/internal-fn.def (SIGNBIT): New.
> * gcc/config/aarch64/aarch64-simd.md (signbitv4sf2): New expand
> defined.
Sorry for the nitpicks (I'm not really a fan of ChangeLogs), but:
the filenames are relative to the changelog file, so no "gcc/" here and
> gcc/testsuite/ChangeLog:
>
> 2019-05-05 Przemyslaw Wirkus <przemyslaw.wirkus@arm.com>
>
> * gcc/testsuite/gcc.target/aarch64/signbitv4sf.c: New test.
no "gcc/testsuite/" here.
> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
> index e3852c5d182b70978d7603225fce55c0b8ee2894..3374ce95b912cceaca49660df0579467f758974d 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -935,6 +935,21 @@
> [(set_attr "type" "neon_ins<q>")]
> )
>
> +(define_expand "signbitv4sf2"
> + [(use (match_operand:V4SI 0 "register_operand"))
> + (use (match_operand:V4SF 1 "register_operand"))]
> + "TARGET_SIMD"
> +{
> + int shift_amount = GET_MODE_UNIT_BITSIZE (V4SImode) - 1;
> + rtx shift_vector = aarch64_simd_gen_const_vector_dup (V4SImode,
> + shift_amount);
> + operands[1] = lowpart_subreg (V4SImode, operands[1], V4SFmode);
> +
> + emit_insn (gen_aarch64_simd_lshrv4si (operands[0], operands[1],
> + shift_vector));
Formatting nit: argument should be indented to the column after the
innermost unclosed "(".
> + DONE;
> +})
> +
Looks good, but I think it can be generalised to handle v2sf if you use:
- :VDQSF instead of :V4SF
- <MODE> instead of other instances of V4SF (and <mode> instead of v4sf)
- <V_INT_EQUIV> instead of V4SI (and <v_int_equiv> instead of v4si)
E.g. this will handle SLP instances like:
void
f (int *i, float *f)
{
i[0] = __builtin_signbitf (f[0]);
i[1] = __builtin_signbitf (f[1]);
}
It could also be used for epilogue loop vectorisation, if we ever
turn that on by default for AArch64.
Thanks,
Richard
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH][GCC][AArch64] Vectorise __builtin_signbit on aarch64
2019-05-04 9:23 ` Richard Sandiford
@ 2019-05-13 10:53 ` Przemyslaw Wirkus
2019-05-14 8:08 ` Richard Sandiford
0 siblings, 1 reply; 8+ messages in thread
From: Przemyslaw Wirkus @ 2019-05-13 10:53 UTC (permalink / raw)
To: Richard Sandiford
Cc: nd, Richard Earnshaw, James Greenhalgh, Marcus Shawcroft, gcc-patches
[-- Attachment #1: Type: text/plain, Size: 2543 bytes --]
Hi all,
Vectorise __builtin_signbit (v2sf, v4sf) with unsigned shift right vector
instruction.
Bootstrapped and tested on aarch64-none-linux-gnu.
Assembly output for:
$ aarch64-elf-gcc -S -O3 signbitv2sf.c -dp
Before patch:
foo:
ldp w2, w1, [x1] // 37 [c=0 l=4] *load_pair_zero_extendsidi2_aarch64/0
and w2, w2, -2147483648 // 8 [c=4 l=4] andsi3/1
and w1, w1, -2147483648 // 12 [c=4 l=4] andsi3/1
stp w2, w1, [x0] // 38 [c=0 l=4] store_pair_sw_sisi/0
ret // 32 [c=0 l=4] *do_return
After patch:
foo:
ldr d0, [x1] // 7 [c=8 l=4] *aarch64_simd_movv2sf/0
ushr v0.2s, v0.2s, 31 // 8 [c=12 l=4] aarch64_simd_lshrv2si
str d0, [x0] // 9 [c=4 l=4] *aarch64_simd_movv2si/2
ret // 28 [c=0 l=4] *do_return
Assembly output for:
$ aarch64-elf-gcc -S -O3 signbitv4sf.c -dp
Before patch:
foo:
adrp x3, in // 38 [c=4 l=4] *movdi_aarch64/12
adrp x2, out // 41 [c=4 l=4] *movdi_aarch64/12
add x3, x3, :lo12:in // 40 [c=4 l=4] add_losym_di
add x2, x2, :lo12:out // 43 [c=4 l=4] add_losym_di
mov x0, 0 // 3 [c=4 l=4] *movdi_aarch64/3
.p2align 3,,7
.L2:
ldr w1, [x3, x0] // 10 [c=16 l=4] *zero_extendsidi2_aarch64/1
and w1, w1, -2147483648 // 11 [c=4 l=4] andsi3/1
str w1, [x2, x0] // 16 [c=4 l=4] *movsi_aarch64/8
add x0, x0, 4 // 17 [c=4 l=4] *adddi3_aarch64/0
cmp x0, 4096 // 19 [c=4 l=4] cmpdi/1
bne .L2 // 20 [c=4 l=4] condjump
ret // 51 [c=0 l=4] \*do_return
After patch:
foo:
adrp x2, in // 37 [c=4 l=4] *movdi_aarch64/12
adrp x1, out // 40 [c=4 l=4] *movdi_aarch64/12
add x2, x2, :lo12:in // 39 [c=4 l=4] add_losym_di
add x1, x1, :lo12:out // 42 [c=4 l=4] add_losym_di
mov x0, 0 // 3 [c=4 l=4] *movdi_aarch64/3
.p2align 3,,7
.L2:
ldr q0, [x2, x0] // 10 [c=8 l=4] *aarch64_simd_movv4sf/0
ushr v0.4s, v0.4s, 31 // 11 [c=12 l=4] aarch64_simd_lshrv4si
str q0, [x1, x0] // 15 [c=4 l=4] *aarch64_simd_movv4si/2
add x0, x0, 16 // 16 [c=4 l=4] *adddi3_aarch64/0
cmp x0, 4096 // 18 [c=4 l=4] cmpdi/1
bne .L2 // 19 [c=4 l=4] condjump
ret // 50 [c=0 l=4] *do_return
OK for Trunk ?
Thanks,
Przemyslaw
gcc/ChangeLog:
2019-05-13 Przemyslaw Wirkus <przemyslaw.wirkus@arm.com\>
* internal-fn.def (SIGNBIT): New.
* config/aarch64/aarch64-simd.md (signbitv2sf2): New expand
defined.
(signbitv4sf2): Likewise.
gcc/testsuite/ChangeLog:
2019-05-13 Przemyslaw Wirkus <przemyslaw.wirkus@arm.com\>
* gcc.target/aarch64/signbitv4sf.c: New test.
* gcc.target/aarch64/signbitv2sf.c: New test.
[-- Attachment #2: patch.txt --]
[-- Type: text/plain, Size: 3729 bytes --]
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index e3852c5d182b70978d7603225fce55c0b8ee2894..8f7227327cb960fb34c7b88e1bf283f8f17a3be9 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -935,6 +935,21 @@
[(set_attr "type" "neon_ins<q>")]
)
+(define_expand "signbit<mode>2"
+ [(use (match_operand:<V_INT_EQUIV> 0 "register_operand"))
+ (use (match_operand:VDQSF 1 "register_operand"))]
+ "TARGET_SIMD"
+{
+ int shift_amount = GET_MODE_UNIT_BITSIZE (<V_INT_EQUIV>mode) - 1;
+ rtx shift_vector = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
+ shift_amount);
+ operands[1] = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
+
+ emit_insn (gen_aarch64_simd_lshr<v_int_equiv> (operands[0], operands[1],
+ shift_vector));
+ DONE;
+})
+
(define_insn "aarch64_simd_lshr<mode>"
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
(lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index e370eaa84767839c827b6ebd0c86303bcc36fa54..016301a58d83d7128817824d7c7ef92825c7e03e 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -217,6 +217,7 @@ DEF_INTERNAL_FLT_FN (LOG10, ECF_CONST, log10, unary)
DEF_INTERNAL_FLT_FN (LOG1P, ECF_CONST, log1p, unary)
DEF_INTERNAL_FLT_FN (LOG2, ECF_CONST, log2, unary)
DEF_INTERNAL_FLT_FN (LOGB, ECF_CONST, logb, unary)
+DEF_INTERNAL_FLT_FN (SIGNBIT, ECF_CONST, signbit, unary)
DEF_INTERNAL_FLT_FN (SIGNIFICAND, ECF_CONST, significand, unary)
DEF_INTERNAL_FLT_FN (SIN, ECF_CONST, sin, unary)
DEF_INTERNAL_FLT_FN (SINH, ECF_CONST, sinh, unary)
diff --git a/gcc/testsuite/gcc.target/aarch64/signbitv2sf.c b/gcc/testsuite/gcc.target/aarch64/signbitv2sf.c
new file mode 100644
index 0000000000000000000000000000000000000000..2587bfedd538f30a018cf827ea57cd583b2fa084
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/signbitv2sf.c
@@ -0,0 +1,40 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O3 --save-temps" } */
+
+extern void abort ();
+
+#define N 8
+float in[N] = {1.0, -1.0, -2.0, 3.0, -5.0, -8.0, 13.0, 21.0};
+int out[N];
+
+void
+foo (int *i, float *f)
+{
+ i[0] = __builtin_signbit (f[0]);
+ i[1] = __builtin_signbit (f[1]);
+}
+
+/* { dg-final { scan-assembler-not {-2147483648} } } */
+/* { dg-final { scan-assembler {\tushr\tv[0-9]+.2s, v[0-9]+.2s, 31} } } */
+
+int
+main ()
+{
+ int i;
+
+ foo (out, in);
+ foo (out + 2, in + 2);
+ foo (out + 4, in + 4);
+ foo (out + 6, in + 6);
+
+ for (i = 0; i < N; i++)
+ {
+ if (in[i] >= 0.0 && out[i])
+ abort ();
+ if (in[i] < 0.0 && !out[i])
+ abort ();
+ }
+
+ return 0;
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c b/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c
new file mode 100644
index 0000000000000000000000000000000000000000..18cffdc7d5b2701a1bbf23f9f7d27b7a31568758
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/signbitv4sf.c
@@ -0,0 +1,38 @@
+/* { dg-do run } */
+/* { dg-additional-options "-O3 --save-temps" } */
+
+extern void abort ();
+
+#define N 1024
+float in[N] = {1.0, -1.0, -2.0, 3.0, -5.0, -8.0, 13.0, 21.0};
+int out[N];
+
+void
+foo ()
+{
+ int i;
+ for (i = 0; i < N; i++)
+ out[i] = __builtin_signbit (in[i]);
+}
+
+/* { dg-final { scan-assembler-not {-2147483648} } } */
+/* { dg-final { scan-assembler {\tushr\tv[0-9]+.4s, v[0-9]+.4s, 31} } } */
+
+int
+main ()
+{
+ int i;
+
+ foo ();
+
+ for (i = 0; i < N; i++)
+ {
+ if (in[i] >= 0.0 && out[i])
+ abort ();
+ if (in[i] < 0.0 && !out[i])
+ abort ();
+ }
+
+ return 0;
+}
+
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH][GCC][AArch64] Vectorise __builtin_signbit on aarch64
2019-05-13 10:53 ` Przemyslaw Wirkus
@ 2019-05-14 8:08 ` Richard Sandiford
2019-05-14 8:14 ` Jakub Jelinek
0 siblings, 1 reply; 8+ messages in thread
From: Richard Sandiford @ 2019-05-14 8:08 UTC (permalink / raw)
To: Przemyslaw Wirkus
Cc: nd, Richard Earnshaw, James Greenhalgh, Marcus Shawcroft, gcc-patches
Przemyslaw Wirkus <Przemyslaw.Wirkus@arm.com> writes:
> Hi all,
>
> Vectorise __builtin_signbit (v2sf, v4sf) with unsigned shift right vector
> instruction.
>
> Bootstrapped and tested on aarch64-none-linux-gnu.
>
> Assembly output for:
> $ aarch64-elf-gcc -S -O3 signbitv2sf.c -dp
>
> Before patch:
>
> foo:
> ldp w2, w1, [x1] // 37 [c=0 l=4] *load_pair_zero_extendsidi2_aarch64/0
> and w2, w2, -2147483648 // 8 [c=4 l=4] andsi3/1
> and w1, w1, -2147483648 // 12 [c=4 l=4] andsi3/1
> stp w2, w1, [x0] // 38 [c=0 l=4] store_pair_sw_sisi/0
> ret // 32 [c=0 l=4] *do_return
>
> After patch:
>
> foo:
> ldr d0, [x1] // 7 [c=8 l=4] *aarch64_simd_movv2sf/0
> ushr v0.2s, v0.2s, 31 // 8 [c=12 l=4] aarch64_simd_lshrv2si
> str d0, [x0] // 9 [c=4 l=4] *aarch64_simd_movv2si/2
> ret // 28 [c=0 l=4] *do_return
>
> Assembly output for:
> $ aarch64-elf-gcc -S -O3 signbitv4sf.c -dp
>
> Before patch:
>
> foo:
> adrp x3, in // 38 [c=4 l=4] *movdi_aarch64/12
> adrp x2, out // 41 [c=4 l=4] *movdi_aarch64/12
> add x3, x3, :lo12:in // 40 [c=4 l=4] add_losym_di
> add x2, x2, :lo12:out // 43 [c=4 l=4] add_losym_di
> mov x0, 0 // 3 [c=4 l=4] *movdi_aarch64/3
> .p2align 3,,7
> .L2:
> ldr w1, [x3, x0] // 10 [c=16 l=4] *zero_extendsidi2_aarch64/1
> and w1, w1, -2147483648 // 11 [c=4 l=4] andsi3/1
> str w1, [x2, x0] // 16 [c=4 l=4] *movsi_aarch64/8
> add x0, x0, 4 // 17 [c=4 l=4] *adddi3_aarch64/0
> cmp x0, 4096 // 19 [c=4 l=4] cmpdi/1
> bne .L2 // 20 [c=4 l=4] condjump
> ret // 51 [c=0 l=4] \*do_return
>
> After patch:
>
> foo:
> adrp x2, in // 37 [c=4 l=4] *movdi_aarch64/12
> adrp x1, out // 40 [c=4 l=4] *movdi_aarch64/12
> add x2, x2, :lo12:in // 39 [c=4 l=4] add_losym_di
> add x1, x1, :lo12:out // 42 [c=4 l=4] add_losym_di
> mov x0, 0 // 3 [c=4 l=4] *movdi_aarch64/3
> .p2align 3,,7
> .L2:
> ldr q0, [x2, x0] // 10 [c=8 l=4] *aarch64_simd_movv4sf/0
> ushr v0.4s, v0.4s, 31 // 11 [c=12 l=4] aarch64_simd_lshrv4si
> str q0, [x1, x0] // 15 [c=4 l=4] *aarch64_simd_movv4si/2
> add x0, x0, 16 // 16 [c=4 l=4] *adddi3_aarch64/0
> cmp x0, 4096 // 18 [c=4 l=4] cmpdi/1
> bne .L2 // 19 [c=4 l=4] condjump
> ret // 50 [c=0 l=4] *do_return
>
> OK for Trunk ?
>
> Thanks,
> Przemyslaw
>
> gcc/ChangeLog:
>
> 2019-05-13 Przemyslaw Wirkus <przemyslaw.wirkus@arm.com\>
>
> * internal-fn.def (SIGNBIT): New.
> * config/aarch64/aarch64-simd.md (signbitv2sf2): New expand
> defined.
> (signbitv4sf2): Likewise.
>
> gcc/testsuite/ChangeLog:
>
> 2019-05-13 Przemyslaw Wirkus <przemyslaw.wirkus@arm.com\>
>
> * gcc.target/aarch64/signbitv4sf.c: New test.
> * gcc.target/aarch64/signbitv2sf.c: New test.
Thanks, applied as r271149.
Richard
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH][GCC][AArch64] Vectorise __builtin_signbit on aarch64
2019-05-14 8:08 ` Richard Sandiford
@ 2019-05-14 8:14 ` Jakub Jelinek
2019-05-14 8:39 ` Przemyslaw Wirkus
0 siblings, 1 reply; 8+ messages in thread
From: Jakub Jelinek @ 2019-05-14 8:14 UTC (permalink / raw)
To: Przemyslaw Wirkus, nd, Richard Earnshaw, James Greenhalgh,
Marcus Shawcroft, gcc-patches, richard.sandiford
On Tue, May 14, 2019 at 09:08:28AM +0100, Richard Sandiford wrote:
> > 2019-05-13 Przemyslaw Wirkus <przemyslaw.wirkus@arm.com\>
What is that backslash in \> doing in the ChangeLog entries?
Jakub
^ permalink raw reply [flat|nested] 8+ messages in thread
* Re: [PATCH][GCC][AArch64] Vectorise __builtin_signbit on aarch64
2019-05-14 8:14 ` Jakub Jelinek
@ 2019-05-14 8:39 ` Przemyslaw Wirkus
0 siblings, 0 replies; 8+ messages in thread
From: Przemyslaw Wirkus @ 2019-05-14 8:39 UTC (permalink / raw)
To: Jakub Jelinek
Cc: Richard Earnshaw, James Greenhalgh, Marcus Shawcroft,
gcc-patches, Richard Sandiford, nd
> What is that backslash in \> doing in the ChangeLog entries?
> Jakub
My bad, tool I use for code review crafted in backslash when it wrongly assumed
I want a markdown. An early sign that machines want to take over, I guess...
I promise I will be more diligent next time.
kind regards
Przemyslaw
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2019-05-14 8:39 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-03-21 9:57 [PATCH][GCC][AArch64] Vectorise __builtin_signbit on aarch64 Przemyslaw Wirkus
2019-03-22 12:47 ` Richard Sandiford
2019-05-03 8:46 ` Przemyslaw Wirkus
2019-05-04 9:23 ` Richard Sandiford
2019-05-13 10:53 ` Przemyslaw Wirkus
2019-05-14 8:08 ` Richard Sandiford
2019-05-14 8:14 ` Jakub Jelinek
2019-05-14 8:39 ` Przemyslaw Wirkus
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).