* [PATCH]AArch64 sve2: Fix expansion of division [PR107830] @ 2022-11-23 14:24 Tamar Christina 2022-11-23 16:17 ` Richard Sandiford 0 siblings, 1 reply; 4+ messages in thread From: Tamar Christina @ 2022-11-23 14:24 UTC (permalink / raw) To: gcc-patches Cc: nd, Richard.Earnshaw, Marcus.Shawcroft, Kyrylo.Tkachov, richard.sandiford [-- Attachment #1: Type: text/plain, Size: 2094 bytes --] Hi All, SVE has an actual division optab, and when using -Os we don't optimize the division away. This means that we need to distinguish between a div which we can optimize and one we cannot even during expansion. Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. Ok for master? Thanks, Tamar gcc/ChangeLog: PR target/107830 * config/aarch64/aarch64.cc (aarch64_vectorize_can_special_div_by_constant): Check validity during codegen phase as well. gcc/testsuite/ChangeLog: PR target/107830 * gcc.target/aarch64/sve2/pr107830.c: New test. --- inline copy of patch -- diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 4176d7b046a126664360596b6db79a43e77ff76a..bee23625807af95d5ec15ad45702961b2d7ab55d 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -24322,12 +24322,15 @@ aarch64_vectorize_can_special_div_by_constant (enum tree_code code, if ((flags & VEC_ANY_SVE) && !TARGET_SVE2) return false; + wide_int val = wi::add (cst, 1); + int pow = wi::exact_log2 (val); + bool valid_p = pow == (int)(element_precision (vectype) / 2); + /* SVE actually has a div operator, we we may have gotten here through + that route. */ if (in0 == NULL_RTX && in1 == NULL_RTX) - { - wide_int val = wi::add (cst, 1); - int pow = wi::exact_log2 (val); - return pow == (int)(element_precision (vectype) / 2); - } + return valid_p; + else if (!valid_p) + return false; if (!VECTOR_TYPE_P (vectype)) return false; diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/pr107830.c b/gcc/testsuite/gcc.target/aarch64/sve2/pr107830.c new file mode 100644 index 0000000000000000000000000000000000000000..6d8ee3615fdb0083dbde1e45a2826fb681726139 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/pr107830.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target fopenmp } */ +/* { dg-additional-options "-Os -fopenmp" } */ + +void +f2 (int *a) +{ + unsigned int i; + +#pragma omp simd + for (i = 0; i < 4; ++i) + a[i / 3] -= 4; +} -- [-- Attachment #2: rb16645.patch --] [-- Type: text/plain, Size: 1493 bytes --] diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 4176d7b046a126664360596b6db79a43e77ff76a..bee23625807af95d5ec15ad45702961b2d7ab55d 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -24322,12 +24322,15 @@ aarch64_vectorize_can_special_div_by_constant (enum tree_code code, if ((flags & VEC_ANY_SVE) && !TARGET_SVE2) return false; + wide_int val = wi::add (cst, 1); + int pow = wi::exact_log2 (val); + bool valid_p = pow == (int)(element_precision (vectype) / 2); + /* SVE actually has a div operator, we we may have gotten here through + that route. */ if (in0 == NULL_RTX && in1 == NULL_RTX) - { - wide_int val = wi::add (cst, 1); - int pow = wi::exact_log2 (val); - return pow == (int)(element_precision (vectype) / 2); - } + return valid_p; + else if (!valid_p) + return false; if (!VECTOR_TYPE_P (vectype)) return false; diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/pr107830.c b/gcc/testsuite/gcc.target/aarch64/sve2/pr107830.c new file mode 100644 index 0000000000000000000000000000000000000000..6d8ee3615fdb0083dbde1e45a2826fb681726139 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/pr107830.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target fopenmp } */ +/* { dg-additional-options "-Os -fopenmp" } */ + +void +f2 (int *a) +{ + unsigned int i; + +#pragma omp simd + for (i = 0; i < 4; ++i) + a[i / 3] -= 4; +} ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH]AArch64 sve2: Fix expansion of division [PR107830] 2022-11-23 14:24 [PATCH]AArch64 sve2: Fix expansion of division [PR107830] Tamar Christina @ 2022-11-23 16:17 ` Richard Sandiford 2022-11-24 18:35 ` Tamar Christina 0 siblings, 1 reply; 4+ messages in thread From: Richard Sandiford @ 2022-11-23 16:17 UTC (permalink / raw) To: Tamar Christina Cc: gcc-patches, nd, Richard.Earnshaw, Marcus.Shawcroft, Kyrylo.Tkachov Tamar Christina <tamar.christina@arm.com> writes: > Hi All, > > SVE has an actual division optab, and when using -Os we don't > optimize the division away. This means that we need to distinguish > between a div which we can optimize and one we cannot even during > expansion. > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > Ok for master? > > Thanks, > Tamar > > gcc/ChangeLog: > > PR target/107830 > * config/aarch64/aarch64.cc > (aarch64_vectorize_can_special_div_by_constant): Check validity during > codegen phase as well. > > gcc/testsuite/ChangeLog: > > PR target/107830 > * gcc.target/aarch64/sve2/pr107830.c: New test. > > --- inline copy of patch -- > diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc > index 4176d7b046a126664360596b6db79a43e77ff76a..bee23625807af95d5ec15ad45702961b2d7ab55d 100644 > --- a/gcc/config/aarch64/aarch64.cc > +++ b/gcc/config/aarch64/aarch64.cc > @@ -24322,12 +24322,15 @@ aarch64_vectorize_can_special_div_by_constant (enum tree_code code, > if ((flags & VEC_ANY_SVE) && !TARGET_SVE2) > return false; > > + wide_int val = wi::add (cst, 1); > + int pow = wi::exact_log2 (val); > + bool valid_p = pow == (int)(element_precision (vectype) / 2); > + /* SVE actually has a div operator, we we may have gotten here through > + that route. */ > if (in0 == NULL_RTX && in1 == NULL_RTX) > - { > - wide_int val = wi::add (cst, 1); > - int pow = wi::exact_log2 (val); > - return pow == (int)(element_precision (vectype) / 2); > - } > + return valid_p; > + else if (!valid_p) > + return false; Is this equivalent to: int pow = wi::exact_log2 (cst + 1); if (pow != (int) (element_precision (vectype) / 2)) return false; /* We can use the optimized pattern. */ if (in0 == NULL_RTX && in1 == NULL_RTX) return true; ? If so, I'd find that slightly easier to follow, but I realise it's personal taste. OK with that change if it works and you agree. While looking at this, I noticed that we ICE for: void f(unsigned short *restrict p1, unsigned int *restrict p2) { for (int i = 0; i < 16; ++i) { p1[i] /= 0xff; p2[i] += 1; } } for -march=armv8-a+sve2 -msve-vector-bits=512. I guess we need to filter out partial modes or (better) add support for them. Adding support for them probably requires changes to the underlying ADDHNB pattern. Thanks, Richard > if (!VECTOR_TYPE_P (vectype)) > return false; > diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/pr107830.c b/gcc/testsuite/gcc.target/aarch64/sve2/pr107830.c > new file mode 100644 > index 0000000000000000000000000000000000000000..6d8ee3615fdb0083dbde1e45a2826fb681726139 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve2/pr107830.c > @@ -0,0 +1,13 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target fopenmp } */ > +/* { dg-additional-options "-Os -fopenmp" } */ > + > +void > +f2 (int *a) > +{ > + unsigned int i; > + > +#pragma omp simd > + for (i = 0; i < 4; ++i) > + a[i / 3] -= 4; > +} ^ permalink raw reply [flat|nested] 4+ messages in thread
* RE: [PATCH]AArch64 sve2: Fix expansion of division [PR107830] 2022-11-23 16:17 ` Richard Sandiford @ 2022-11-24 18:35 ` Tamar Christina 2022-11-24 18:44 ` Richard Sandiford 0 siblings, 1 reply; 4+ messages in thread From: Tamar Christina @ 2022-11-24 18:35 UTC (permalink / raw) To: Richard Sandiford Cc: gcc-patches, nd, Richard Earnshaw, Marcus Shawcroft, Kyrylo Tkachov [-- Attachment #1: Type: text/plain, Size: 6064 bytes --] > -----Original Message----- > From: Richard Sandiford <richard.sandiford@arm.com> > Sent: Wednesday, November 23, 2022 4:18 PM > To: Tamar Christina <Tamar.Christina@arm.com> > Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>; Richard Earnshaw > <Richard.Earnshaw@arm.com>; Marcus Shawcroft > <Marcus.Shawcroft@arm.com>; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com> > Subject: Re: [PATCH]AArch64 sve2: Fix expansion of division [PR107830] > > Tamar Christina <tamar.christina@arm.com> writes: > > Hi All, > > > > SVE has an actual division optab, and when using -Os we don't optimize > > the division away. This means that we need to distinguish between a > > div which we can optimize and one we cannot even during expansion. > > > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > > > Ok for master? > > > > Thanks, > > Tamar > > > > gcc/ChangeLog: > > > > PR target/107830 > > * config/aarch64/aarch64.cc > > (aarch64_vectorize_can_special_div_by_constant): Check validity > during > > codegen phase as well. > > > > gcc/testsuite/ChangeLog: > > > > PR target/107830 > > * gcc.target/aarch64/sve2/pr107830.c: New test. > > > > --- inline copy of patch -- > > diff --git a/gcc/config/aarch64/aarch64.cc > > b/gcc/config/aarch64/aarch64.cc index > > > 4176d7b046a126664360596b6db79a43e77ff76a..bee23625807af95d5ec15ad45 > 702 > > 961b2d7ab55d 100644 > > --- a/gcc/config/aarch64/aarch64.cc > > +++ b/gcc/config/aarch64/aarch64.cc > > @@ -24322,12 +24322,15 @@ > aarch64_vectorize_can_special_div_by_constant (enum tree_code code, > > if ((flags & VEC_ANY_SVE) && !TARGET_SVE2) > > return false; > > > > + wide_int val = wi::add (cst, 1); > > + int pow = wi::exact_log2 (val); > > + bool valid_p = pow == (int)(element_precision (vectype) / 2); > > + /* SVE actually has a div operator, we we may have gotten here through > > + that route. */ > > if (in0 == NULL_RTX && in1 == NULL_RTX) > > - { > > - wide_int val = wi::add (cst, 1); > > - int pow = wi::exact_log2 (val); > > - return pow == (int)(element_precision (vectype) / 2); > > - } > > + return valid_p; > > + else if (!valid_p) > > + return false; > > Is this equivalent to: > > int pow = wi::exact_log2 (cst + 1); > if (pow != (int) (element_precision (vectype) / 2)) > return false; > > /* We can use the optimized pattern. */ > if (in0 == NULL_RTX && in1 == NULL_RTX) > return true; > > ? If so, I'd find that slightly easier to follow, but I realise it's personal taste. > OK with that change if it works and you agree. > > While looking at this, I noticed that we ICE for: > > void f(unsigned short *restrict p1, unsigned int *restrict p2) > { > for (int i = 0; i < 16; ++i) > { > p1[i] /= 0xff; > p2[i] += 1; > } > } > > for -march=armv8-a+sve2 -msve-vector-bits=512. I guess we need to filter > out partial modes or (better) add support for them. Adding support for them > probably requires changes to the underlying ADDHNB pattern. I've prevented the ice by checking if the expansion for the mode exists. I'd like to defer adding partial support because when I tried I had to modify some iterators as well and need to check that it's safe to do so. Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. Ok for master? Thanks, Tamar gcc/ChangeLog: PR target/107830 * config/aarch64/aarch64.cc (aarch64_vectorize_can_special_div_by_constant): Check validity during codegen phase as well. gcc/testsuite/ChangeLog: PR target/107830 * gcc.target/aarch64/sve2/pr107830-1.c: New test. * gcc.target/aarch64/sve2/pr107830-2.c: New test. --- inline copy of patch ---- diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 4176d7b046a126664360596b6db79a43e77ff76a..02aa1f34ac6155b877340d788c6d151b7c8d8bcd 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -24322,12 +24322,18 @@ aarch64_vectorize_can_special_div_by_constant (enum tree_code code, if ((flags & VEC_ANY_SVE) && !TARGET_SVE2) return false; + wide_int val = wi::add (cst, 1); + int pow = wi::exact_log2 (val); + auto insn_code = maybe_code_for_aarch64_bitmask_udiv3 (TYPE_MODE (vectype)); + /* SVE actually has a div operator, we may have gotten here through + that route. */ + if (pow != (int)(element_precision (vectype) / 2) + || insn_code == CODE_FOR_nothing) + return false; + + /* We can use the optimized pattern. */ if (in0 == NULL_RTX && in1 == NULL_RTX) - { - wide_int val = wi::add (cst, 1); - int pow = wi::exact_log2 (val); - return pow == (int)(element_precision (vectype) / 2); - } + return true; if (!VECTOR_TYPE_P (vectype)) return false; diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/pr107830-1.c b/gcc/testsuite/gcc.target/aarch64/sve2/pr107830-1.c new file mode 100644 index 0000000000000000000000000000000000000000..6d8ee3615fdb0083dbde1e45a2826fb681726139 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/pr107830-1.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target fopenmp } */ +/* { dg-additional-options "-Os -fopenmp" } */ + +void +f2 (int *a) +{ + unsigned int i; + +#pragma omp simd + for (i = 0; i < 4; ++i) + a[i / 3] -= 4; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/pr107830-2.c b/gcc/testsuite/gcc.target/aarch64/sve2/pr107830-2.c new file mode 100644 index 0000000000000000000000000000000000000000..4ec45d7a47e591263fa9acbfc7e6d3297e10a109 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/pr107830-2.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O3 -msve-vector-bits=512" } */ + +void f(unsigned short *restrict p1, unsigned int *restrict p2) +{ + for (int i = 0; i < 16; ++i) + { + p1[i] /= 0xff; + p2[i] += 1; + } +} + [-- Attachment #2: rb16645.patch --] [-- Type: application/octet-stream, Size: 2195 bytes --] diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 4176d7b046a126664360596b6db79a43e77ff76a..02aa1f34ac6155b877340d788c6d151b7c8d8bcd 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -24322,12 +24322,18 @@ aarch64_vectorize_can_special_div_by_constant (enum tree_code code, if ((flags & VEC_ANY_SVE) && !TARGET_SVE2) return false; + wide_int val = wi::add (cst, 1); + int pow = wi::exact_log2 (val); + auto insn_code = maybe_code_for_aarch64_bitmask_udiv3 (TYPE_MODE (vectype)); + /* SVE actually has a div operator, we may have gotten here through + that route. */ + if (pow != (int)(element_precision (vectype) / 2) + || insn_code == CODE_FOR_nothing) + return false; + + /* We can use the optimized pattern. */ if (in0 == NULL_RTX && in1 == NULL_RTX) - { - wide_int val = wi::add (cst, 1); - int pow = wi::exact_log2 (val); - return pow == (int)(element_precision (vectype) / 2); - } + return true; if (!VECTOR_TYPE_P (vectype)) return false; diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/pr107830-1.c b/gcc/testsuite/gcc.target/aarch64/sve2/pr107830-1.c new file mode 100644 index 0000000000000000000000000000000000000000..6d8ee3615fdb0083dbde1e45a2826fb681726139 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/pr107830-1.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target fopenmp } */ +/* { dg-additional-options "-Os -fopenmp" } */ + +void +f2 (int *a) +{ + unsigned int i; + +#pragma omp simd + for (i = 0; i < 4; ++i) + a[i / 3] -= 4; +} diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/pr107830-2.c b/gcc/testsuite/gcc.target/aarch64/sve2/pr107830-2.c new file mode 100644 index 0000000000000000000000000000000000000000..4ec45d7a47e591263fa9acbfc7e6d3297e10a109 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/sve2/pr107830-2.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O3 -msve-vector-bits=512" } */ + +void f(unsigned short *restrict p1, unsigned int *restrict p2) +{ + for (int i = 0; i < 16; ++i) + { + p1[i] /= 0xff; + p2[i] += 1; + } +} + ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH]AArch64 sve2: Fix expansion of division [PR107830] 2022-11-24 18:35 ` Tamar Christina @ 2022-11-24 18:44 ` Richard Sandiford 0 siblings, 0 replies; 4+ messages in thread From: Richard Sandiford @ 2022-11-24 18:44 UTC (permalink / raw) To: Tamar Christina Cc: gcc-patches, nd, Richard Earnshaw, Marcus Shawcroft, Kyrylo Tkachov Tamar Christina <Tamar.Christina@arm.com> writes: >> -----Original Message----- >> From: Richard Sandiford <richard.sandiford@arm.com> >> Sent: Wednesday, November 23, 2022 4:18 PM >> To: Tamar Christina <Tamar.Christina@arm.com> >> Cc: gcc-patches@gcc.gnu.org; nd <nd@arm.com>; Richard Earnshaw >> <Richard.Earnshaw@arm.com>; Marcus Shawcroft >> <Marcus.Shawcroft@arm.com>; Kyrylo Tkachov <Kyrylo.Tkachov@arm.com> >> Subject: Re: [PATCH]AArch64 sve2: Fix expansion of division [PR107830] >> >> Tamar Christina <tamar.christina@arm.com> writes: >> > Hi All, >> > >> > SVE has an actual division optab, and when using -Os we don't optimize >> > the division away. This means that we need to distinguish between a >> > div which we can optimize and one we cannot even during expansion. >> > >> > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. >> > >> > Ok for master? >> > >> > Thanks, >> > Tamar >> > >> > gcc/ChangeLog: >> > >> > PR target/107830 >> > * config/aarch64/aarch64.cc >> > (aarch64_vectorize_can_special_div_by_constant): Check validity >> during >> > codegen phase as well. >> > >> > gcc/testsuite/ChangeLog: >> > >> > PR target/107830 >> > * gcc.target/aarch64/sve2/pr107830.c: New test. >> > >> > --- inline copy of patch -- >> > diff --git a/gcc/config/aarch64/aarch64.cc >> > b/gcc/config/aarch64/aarch64.cc index >> > >> 4176d7b046a126664360596b6db79a43e77ff76a..bee23625807af95d5ec15ad45 >> 702 >> > 961b2d7ab55d 100644 >> > --- a/gcc/config/aarch64/aarch64.cc >> > +++ b/gcc/config/aarch64/aarch64.cc >> > @@ -24322,12 +24322,15 @@ >> aarch64_vectorize_can_special_div_by_constant (enum tree_code code, >> > if ((flags & VEC_ANY_SVE) && !TARGET_SVE2) >> > return false; >> > >> > + wide_int val = wi::add (cst, 1); >> > + int pow = wi::exact_log2 (val); >> > + bool valid_p = pow == (int)(element_precision (vectype) / 2); >> > + /* SVE actually has a div operator, we we may have gotten here through >> > + that route. */ >> > if (in0 == NULL_RTX && in1 == NULL_RTX) >> > - { >> > - wide_int val = wi::add (cst, 1); >> > - int pow = wi::exact_log2 (val); >> > - return pow == (int)(element_precision (vectype) / 2); >> > - } >> > + return valid_p; >> > + else if (!valid_p) >> > + return false; >> >> Is this equivalent to: >> >> int pow = wi::exact_log2 (cst + 1); >> if (pow != (int) (element_precision (vectype) / 2)) >> return false; >> >> /* We can use the optimized pattern. */ >> if (in0 == NULL_RTX && in1 == NULL_RTX) >> return true; >> >> ? If so, I'd find that slightly easier to follow, but I realise it's personal taste. >> OK with that change if it works and you agree. >> >> While looking at this, I noticed that we ICE for: >> >> void f(unsigned short *restrict p1, unsigned int *restrict p2) >> { >> for (int i = 0; i < 16; ++i) >> { >> p1[i] /= 0xff; >> p2[i] += 1; >> } >> } >> >> for -march=armv8-a+sve2 -msve-vector-bits=512. I guess we need to filter >> out partial modes or (better) add support for them. Adding support for them >> probably requires changes to the underlying ADDHNB pattern. > > I've prevented the ice by checking if the expansion for the mode exists. I'd like to > defer adding partial support because when I tried I had to modify some iterators > as well and need to check that it's safe to do so. Sounds good. > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > Ok for master? > > Thanks, > Tamar > > gcc/ChangeLog: > > PR target/107830 > * config/aarch64/aarch64.cc > (aarch64_vectorize_can_special_div_by_constant): Check validity during > codegen phase as well. > > gcc/testsuite/ChangeLog: > > PR target/107830 > * gcc.target/aarch64/sve2/pr107830-1.c: New test. > * gcc.target/aarch64/sve2/pr107830-2.c: New test. > > --- inline copy of patch ---- > > diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc > index 4176d7b046a126664360596b6db79a43e77ff76a..02aa1f34ac6155b877340d788c6d151b7c8d8bcd 100644 > --- a/gcc/config/aarch64/aarch64.cc > +++ b/gcc/config/aarch64/aarch64.cc > @@ -24322,12 +24322,18 @@ aarch64_vectorize_can_special_div_by_constant (enum tree_code code, > if ((flags & VEC_ANY_SVE) && !TARGET_SVE2) > return false; > > + wide_int val = wi::add (cst, 1); > + int pow = wi::exact_log2 (val); Does the: int pow = wi::exact_log2 (cst + 1); I suggested above not work? That seems easier to read IMO, since there are no other uses of "val". > + auto insn_code = maybe_code_for_aarch64_bitmask_udiv3 (TYPE_MODE (vectype)); > + /* SVE actually has a div operator, we may have gotten here through > + that route. */ > + if (pow != (int)(element_precision (vectype) / 2) Formatting nit: should be a space after "(int)". OK with those changes, thanks. Richard > + || insn_code == CODE_FOR_nothing) > + return false; > + > + /* We can use the optimized pattern. */ > if (in0 == NULL_RTX && in1 == NULL_RTX) > - { > - wide_int val = wi::add (cst, 1); > - int pow = wi::exact_log2 (val); > - return pow == (int)(element_precision (vectype) / 2); > - } > + return true; > > if (!VECTOR_TYPE_P (vectype)) > return false; > diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/pr107830-1.c b/gcc/testsuite/gcc.target/aarch64/sve2/pr107830-1.c > new file mode 100644 > index 0000000000000000000000000000000000000000..6d8ee3615fdb0083dbde1e45a2826fb681726139 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve2/pr107830-1.c > @@ -0,0 +1,13 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target fopenmp } */ > +/* { dg-additional-options "-Os -fopenmp" } */ > + > +void > +f2 (int *a) > +{ > + unsigned int i; > + > +#pragma omp simd > + for (i = 0; i < 4; ++i) > + a[i / 3] -= 4; > +} > diff --git a/gcc/testsuite/gcc.target/aarch64/sve2/pr107830-2.c b/gcc/testsuite/gcc.target/aarch64/sve2/pr107830-2.c > new file mode 100644 > index 0000000000000000000000000000000000000000..4ec45d7a47e591263fa9acbfc7e6d3297e10a109 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/sve2/pr107830-2.c > @@ -0,0 +1,12 @@ > +/* { dg-do compile } */ > +/* { dg-additional-options "-O3 -msve-vector-bits=512" } */ > + > +void f(unsigned short *restrict p1, unsigned int *restrict p2) > +{ > + for (int i = 0; i < 16; ++i) > + { > + p1[i] /= 0xff; > + p2[i] += 1; > + } > +} > + ^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2022-11-24 18:44 UTC | newest] Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2022-11-23 14:24 [PATCH]AArch64 sve2: Fix expansion of division [PR107830] Tamar Christina 2022-11-23 16:17 ` Richard Sandiford 2022-11-24 18:35 ` Tamar Christina 2022-11-24 18:44 ` Richard Sandiford
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).