Hi All, This adds an RTL pattern for when two NARROWB instructions are being combined with a PACK. The second NARROWB is then transformed into a NARROWT. For the example: void draw_bitmap1(uint8_t* restrict pixel, uint8_t level, int n) { for (int i = 0; i < (n & -16); i+=1) pixel[i] += (pixel[i] * level) / 0xff; } we generate: addhnb z6.b, z0.h, z4.h addhnb z5.b, z1.h, z4.h addhnb z0.b, z0.h, z6.h addhnt z0.b, z1.h, z5.h add z0.b, z0.b, z2.b instead of: addhnb z6.b, z1.h, z4.h addhnb z5.b, z0.h, z4.h addhnb z1.b, z1.h, z6.h addhnb z0.b, z0.h, z5.h uzp1 z0.b, z0.b, z1.b add z0.b, z0.b, z2.b Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. Ok for master? Thanks, Tamar gcc/ChangeLog: * config/aarch64/aarch64-sve2.md (*aarch64_sve_pack_): New. * config/aarch64/iterators.md (binary_top): New. gcc/testsuite/ChangeLog: * gcc.dg/vect/vect-div-bitmask-4.c: New test. * gcc.target/aarch64/sve2/div-by-bitmask_2.c: New test. --- inline copy of patch -- diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md index ab5dcc369481311e5bd68a1581265e1ce99b4b0f..0ee46c8b0d43467da4a6b98ad3c41e5d05d8cf38 100644 --- a/gcc/config/aarch64/aarch64-sve2.md +++ b/gcc/config/aarch64/aarch64-sve2.md @@ -1600,6 +1600,25 @@ (define_insn "@aarch64_sve_" "\t%0., %2., %3." ) +(define_insn_and_split "*aarch64_sve_pack_" + [(set (match_operand: 0 "register_operand" "=w") + (unspec: + [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w") + (subreg:SVE_FULL_HSDI (unspec: + [(match_operand:SVE_FULL_HSDI 2 "register_operand" "w") + (match_operand:SVE_FULL_HSDI 3 "register_operand" "w")] + SVE2_INT_BINARY_NARROWB) 0)] + UNSPEC_PACK))] + "TARGET_SVE2" + "#" + "&& true" + [(const_int 0)] +{ + rtx tmp = lowpart_subreg (mode, operands[1], mode); + emit_insn (gen_aarch64_sve (, mode, + operands[0], tmp, operands[2], operands[3])); +}) + ;; ------------------------------------------------------------------------- ;; ---- [INT] Narrowing right shifts ;; ------------------------------------------------------------------------- diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 0dd9dc66f7ccd78acacb759662d0cd561cd5b4ef..37d8161a33b1c399d80be82afa67613a087389d4 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -3589,6 +3589,11 @@ (define_int_attr brk_op [(UNSPEC_BRKA "a") (UNSPEC_BRKB "b") (define_int_attr sve_pred_op [(UNSPEC_PFIRST "pfirst") (UNSPEC_PNEXT "pnext")]) +(define_int_attr binary_top [(UNSPEC_ADDHNB "UNSPEC_ADDHNT") + (UNSPEC_RADDHNB "UNSPEC_RADDHNT") + (UNSPEC_RSUBHNB "UNSPEC_RSUBHNT") + (UNSPEC_SUBHNB "UNSPEC_SUBHNT")]) + (define_int_attr sve_int_op [(UNSPEC_ADCLB "adclb") (UNSPEC_ADCLT "adclt") (UNSPEC_ADDHNB "addhnb") diff --git a/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-4.c b/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-4.c new file mode 100644 index 0000000000000000000000000000000000000000..0df08bda6fd3e33280307ea15c82dd9726897cfd --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/vect-div-bitmask-4.c @@ -0,0 +1,26 @@ +/* { dg-require-effective-target vect_int } */ +/* { dg-additional-options "-fno-vect-cost-model" { target aarch64*-*-* } } */ + +#include +#include "tree-vect.h" + +#define N 50 +#define TYPE uint32_t + +__attribute__((noipa, noinline, optimize("O1"))) +void fun1(TYPE* restrict pixel, TYPE level, int n) +{ + for (int i = 0; i < n; i+=1) + pixel[i] += (pixel[i] * (uint64_t)level) / 0xffffffffUL; +} + +__attribute__((noipa, noinline, optimize("O3"))) +void fun2(TYPE* restrict pixel, TYPE level, int n) +{ + for (int i = 0; i < n; i+=1) + pixel[i] += (pixel[i] * (uint64_t)level) / 0xffffffffUL; +} + +#include "vect-div-bitmask.h" + +/* { dg-final { scan-tree-dump-not "vect_recog_divmod_pattern: detected" "vect" { target aarch64*-*-* } } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/div-by-bitmask_2.c b/gcc/testsuite/gcc.target/aarch64/sve2/div-by-bitmask_2.c new file mode 100644 index 0000000000000000000000000000000000000000..cddcebdf15ecaa9dc515f58cdbced36c8038db1b --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/div-by-bitmask_2.c @@ -0,0 +1,56 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O2 -std=c99" } */ +/* { dg-final { check-function-bodies "**" "" "" { target { le } } } } */ + +#include + +/* +** draw_bitmap1: +** ... +** addhnb z6.b, z0.h, z4.h +** addhnb z5.b, z1.h, z4.h +** addhnb z0.b, z0.h, z6.h +** addhnt z0.b, z1.h, z5.h +** ... +*/ +void draw_bitmap1(uint8_t* restrict pixel, uint8_t level, int n) +{ + for (int i = 0; i < (n & -16); i+=1) + pixel[i] += (pixel[i] * level) / 0xff; +} + +void draw_bitmap2(uint8_t* restrict pixel, uint8_t level, int n) +{ + for (int i = 0; i < (n & -16); i+=1) + pixel[i] += (pixel[i] * level) / 0xfe; +} + +/* +** draw_bitmap3: +** ... +** addhnb z6.h, z0.s, z4.s +** addhnb z5.h, z1.s, z4.s +** addhnb z0.h, z0.s, z6.s +** addhnt z0.h, z1.s, z5.s +** ... +*/ +void draw_bitmap3(uint16_t* restrict pixel, uint16_t level, int n) +{ + for (int i = 0; i < (n & -16); i+=1) + pixel[i] += (pixel[i] * level) / 0xffffU; +} + +/* +** draw_bitmap4: +** ... +** addhnb z6.s, z0.d, z4.d +** addhnb z5.s, z1.d, z4.d +** addhnb z0.s, z0.d, z6.d +** addhnt z0.s, z1.d, z5.d +** ... +*/ +void draw_bitmap4(uint32_t* restrict pixel, uint32_t level, int n) +{ + for (int i = 0; i < (n & -16); i+=1) + pixel[i] += (pixel[i] * (uint64_t)level) / 0xffffffffUL; +} --