Hi All, This turns an inversion of the sign bit + arithmetic right shift into a comparison with 0. i.e. void fun1(int32_t *x, int n) { for (int i = 0; i < (n & -16); i++) x[i] = (-x[i]) >> 31; } now generates: .L3: ldr q0, [x0] cmgt v0.4s, v0.4s, #0 str q0, [x0], 16 cmp x0, x1 bne .L3 instead of: .L3: ldr q0, [x0] neg v0.4s, v0.4s sshr v0.4s, v0.4s, 31 str q0, [x0], 16 cmp x0, x1 bne .L3 Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. Ok for master? Thanks, Tamar gcc/ChangeLog: * config/aarch64/aarch64-simd.md (*aarch64_simd_neg_ashr): New. * config/aarch64/predicates.md (aarch64_simd_shift_imm_vec_signbit): New. gcc/testsuite/ChangeLog: * gcc.target/aarch64/signbit-1.c: New test. --- inline copy of patch -- diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index 0045b100c6af1c007293ee26506199868be90e9f..9d936428b438c95b56614c94081d7e2ebc47d89f 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1137,6 +1137,18 @@ (define_insn "aarch64_simd_ashr" [(set_attr "type" "neon_compare,neon_shift_imm")] ) +;; Additional opt when we negate the sign bit and then shift right +(define_insn "*aarch64_simd_neg_ashr" + [(set (match_operand:VDQ_I 0 "register_operand" "=w") + (ashiftrt:VDQ_I + (neg:VDQ_I + (match_operand:VDQ_I 1 "register_operand" "w")) + (match_operand:VDQ_I 2 "aarch64_simd_shift_imm_vec_signbit" "D1")))] + "TARGET_SIMD" + "cmgt\t%0., %1., #0" + [(set_attr "type" "neon_compare_zero")] +) + (define_insn "*aarch64_simd_sra" [(set (match_operand:VDQ_I 0 "register_operand" "=w") (plus:VDQ_I diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md index 7fd4f9e7d06d3082d6f3047290f0446789e1d0d2..12e7d35da154b10f0190274d0279cab313563455 100644 --- a/gcc/config/aarch64/predicates.md +++ b/gcc/config/aarch64/predicates.md @@ -545,6 +545,12 @@ (define_predicate "aarch64_simd_shift_imm_offset_di" (and (match_code "const_int") (match_test "IN_RANGE (INTVAL (op), 1, 64)"))) +(define_predicate "aarch64_simd_shift_imm_vec_signbit" + (and (match_code "const_vector") + (match_test "aarch64_const_vec_all_same_in_range_p (op, + GET_MODE_UNIT_BITSIZE (mode) - 1, + GET_MODE_UNIT_BITSIZE (mode) - 1)"))) + (define_predicate "aarch64_simd_shift_imm_vec_exact_top" (and (match_code "const_vector") (match_test "aarch64_const_vec_all_same_in_range_p (op, diff --git a/gcc/testsuite/gcc.target/aarch64/signbit-1.c b/gcc/testsuite/gcc.target/aarch64/signbit-1.c new file mode 100644 index 0000000000000000000000000000000000000000..3ebfb0586f37de29cf58635b27fe48503714447e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/signbit-1.c @@ -0,0 +1,18 @@ +/* { dg-do assemble } */ +/* { dg-options "-O3 --save-temps" } */ + +#include + +void fun1(int32_t *x, int n) +{ + for (int i = 0; i < (n & -16); i++) + x[i] = (-x[i]) >> 31; +} + +void fun2(int32_t *x, int n) +{ + for (int i = 0; i < (n & -16); i++) + x[i] = (-x[i]) >> 30; +} + +/* { dg-final { scan-assembler-times {\tcmgt\t} 1 } } */ --