* [PATCH]AArch64 div-by-255, ensure that arguments are registers. [PR107988] @ 2022-12-08 16:39 Tamar Christina 2022-12-08 17:00 ` Richard Earnshaw 0 siblings, 1 reply; 5+ messages in thread From: Tamar Christina @ 2022-12-08 16:39 UTC (permalink / raw) To: gcc-patches Cc: nd, Richard.Earnshaw, Marcus.Shawcroft, Kyrylo.Tkachov, richard.sandiford [-- Attachment #1: Type: text/plain, Size: 2538 bytes --] Hi All, At -O0 (as opposed to e.g. volatile) we can get into the situation where the in0 and result RTL arguments passed to the division function are memory locations instead of registers. I think we could reject these early on by checking that the gimple values are GIMPLE registers, but I think it's better to handle it. As such I force them to registers and emit a move to the memory locations and leave it up to reload to handle. This fixes the ICE and still allows the optimization in these cases, which improves the code quality a lot. Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. Ok for master? Thanks, Tamar gcc/ChangeLog: PR target/107988 * config/aarch64/aarch64.cc (aarch64_vectorize_can_special_div_by_constant): Ensure input and output RTL are registers. gcc/testsuite/ChangeLog: PR target/107988 * gcc.target/aarch64/pr107988-1.c: New test. --- inline copy of patch -- diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index b8dc3f070c8afc47c85fa18768c4da92c774338f..9f96424993c4fcccce90e1b241fcb3aa97025225 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -24337,12 +24337,27 @@ aarch64_vectorize_can_special_div_by_constant (enum tree_code code, if (!VECTOR_TYPE_P (vectype)) return false; + if (!REG_P (in0)) + in0 = force_reg (GET_MODE (in0), in0); + gcc_assert (output); - if (!*output) - *output = gen_reg_rtx (TYPE_MODE (vectype)); + rtx res = NULL_RTX; + + /* Once e get to this point we cannot reject the RTL, if it's not a reg then + Create a new reg and write the result to the output afterwards. */ + if (!*output || !REG_P (*output)) + res = gen_reg_rtx (TYPE_MODE (vectype)); + else + res = *output; + + emit_insn (gen_aarch64_bitmask_udiv3 (TYPE_MODE (vectype), res, in0, in1)); + + if (*output && res != *output) + emit_move_insn (*output, res); + else + *output = res; - emit_insn (gen_aarch64_bitmask_udiv3 (TYPE_MODE (vectype), *output, in0, in1)); return true; } diff --git a/gcc/testsuite/gcc.target/aarch64/pr107988-1.c b/gcc/testsuite/gcc.target/aarch64/pr107988-1.c new file mode 100644 index 0000000000000000000000000000000000000000..c4fd290271b738345173b569bdc58c092fba7fe9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr107988-1.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O0" } */ +typedef unsigned short __attribute__((__vector_size__ (16))) V; + +V +foo (V v) +{ + v /= 255; + return v; +} -- [-- Attachment #2: rb16679.patch --] [-- Type: text/plain, Size: 1609 bytes --] diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index b8dc3f070c8afc47c85fa18768c4da92c774338f..9f96424993c4fcccce90e1b241fcb3aa97025225 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -24337,12 +24337,27 @@ aarch64_vectorize_can_special_div_by_constant (enum tree_code code, if (!VECTOR_TYPE_P (vectype)) return false; + if (!REG_P (in0)) + in0 = force_reg (GET_MODE (in0), in0); + gcc_assert (output); - if (!*output) - *output = gen_reg_rtx (TYPE_MODE (vectype)); + rtx res = NULL_RTX; + + /* Once e get to this point we cannot reject the RTL, if it's not a reg then + Create a new reg and write the result to the output afterwards. */ + if (!*output || !REG_P (*output)) + res = gen_reg_rtx (TYPE_MODE (vectype)); + else + res = *output; + + emit_insn (gen_aarch64_bitmask_udiv3 (TYPE_MODE (vectype), res, in0, in1)); + + if (*output && res != *output) + emit_move_insn (*output, res); + else + *output = res; - emit_insn (gen_aarch64_bitmask_udiv3 (TYPE_MODE (vectype), *output, in0, in1)); return true; } diff --git a/gcc/testsuite/gcc.target/aarch64/pr107988-1.c b/gcc/testsuite/gcc.target/aarch64/pr107988-1.c new file mode 100644 index 0000000000000000000000000000000000000000..c4fd290271b738345173b569bdc58c092fba7fe9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr107988-1.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O0" } */ +typedef unsigned short __attribute__((__vector_size__ (16))) V; + +V +foo (V v) +{ + v /= 255; + return v; +} ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH]AArch64 div-by-255, ensure that arguments are registers. [PR107988] 2022-12-08 16:39 [PATCH]AArch64 div-by-255, ensure that arguments are registers. [PR107988] Tamar Christina @ 2022-12-08 17:00 ` Richard Earnshaw 2022-12-09 7:08 ` Richard Sandiford 0 siblings, 1 reply; 5+ messages in thread From: Richard Earnshaw @ 2022-12-08 17:00 UTC (permalink / raw) To: Tamar Christina, gcc-patches Cc: nd, Richard.Earnshaw, Marcus.Shawcroft, Kyrylo.Tkachov, richard.sandiford On 08/12/2022 16:39, Tamar Christina via Gcc-patches wrote: > Hi All, > > At -O0 (as opposed to e.g. volatile) we can get into the situation where the > in0 and result RTL arguments passed to the division function are memory > locations instead of registers. I think we could reject these early on by > checking that the gimple values are GIMPLE registers, but I think it's better to > handle it. > > As such I force them to registers and emit a move to the memory locations and > leave it up to reload to handle. This fixes the ICE and still allows the > optimization in these cases, which improves the code quality a lot. > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > Ok for master? > > Thanks, > Tamar > > > > gcc/ChangeLog: > > PR target/107988 > * config/aarch64/aarch64.cc > (aarch64_vectorize_can_special_div_by_constant): Ensure input and output > RTL are registers. > > gcc/testsuite/ChangeLog: > > PR target/107988 > * gcc.target/aarch64/pr107988-1.c: New test. > > --- inline copy of patch -- > diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc > index b8dc3f070c8afc47c85fa18768c4da92c774338f..9f96424993c4fcccce90e1b241fcb3aa97025225 100644 > --- a/gcc/config/aarch64/aarch64.cc > +++ b/gcc/config/aarch64/aarch64.cc > @@ -24337,12 +24337,27 @@ aarch64_vectorize_can_special_div_by_constant (enum tree_code code, > if (!VECTOR_TYPE_P (vectype)) > return false; > > + if (!REG_P (in0)) > + in0 = force_reg (GET_MODE (in0), in0); > + > gcc_assert (output); > > - if (!*output) > - *output = gen_reg_rtx (TYPE_MODE (vectype)); > + rtx res = NULL_RTX; > + > + /* Once e get to this point we cannot reject the RTL, if it's not a reg then > + Create a new reg and write the result to the output afterwards. */ > + if (!*output || !REG_P (*output)) > + res = gen_reg_rtx (TYPE_MODE (vectype)); > + else > + res = *output; Why not write rtx res = *output if (!res || !REG_P (res)) res = gen_reg_rtx... then you don't need either the else clause or the dead NULL_RTX assignment. > + > + emit_insn (gen_aarch64_bitmask_udiv3 (TYPE_MODE (vectype), res, in0, in1)); > + > + if (*output && res != *output) > + emit_move_insn (*output, res); > + else > + *output = res; > > - emit_insn (gen_aarch64_bitmask_udiv3 (TYPE_MODE (vectype), *output, in0, in1)); > return true; > } > > diff --git a/gcc/testsuite/gcc.target/aarch64/pr107988-1.c b/gcc/testsuite/gcc.target/aarch64/pr107988-1.c > new file mode 100644 > index 0000000000000000000000000000000000000000..c4fd290271b738345173b569bdc58c092fba7fe9 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/pr107988-1.c > @@ -0,0 +1,10 @@ > +/* { dg-do compile } */ > +/* { dg-additional-options "-O0" } */ > +typedef unsigned short __attribute__((__vector_size__ (16))) V; > + > +V > +foo (V v) > +{ > + v /= 255; > + return v; > +} > > > > Otherwise OK. R. ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH]AArch64 div-by-255, ensure that arguments are registers. [PR107988] 2022-12-08 17:00 ` Richard Earnshaw @ 2022-12-09 7:08 ` Richard Sandiford 2022-12-14 11:18 ` Tamar Christina 0 siblings, 1 reply; 5+ messages in thread From: Richard Sandiford @ 2022-12-09 7:08 UTC (permalink / raw) To: Richard Earnshaw Cc: Tamar Christina, gcc-patches, nd, Richard.Earnshaw, Marcus.Shawcroft, Kyrylo.Tkachov Richard Earnshaw <Richard.Earnshaw@foss.arm.com> writes: > On 08/12/2022 16:39, Tamar Christina via Gcc-patches wrote: >> Hi All, >> >> At -O0 (as opposed to e.g. volatile) we can get into the situation where the >> in0 and result RTL arguments passed to the division function are memory >> locations instead of registers. I think we could reject these early on by >> checking that the gimple values are GIMPLE registers, but I think it's better to >> handle it. >> >> As such I force them to registers and emit a move to the memory locations and >> leave it up to reload to handle. This fixes the ICE and still allows the >> optimization in these cases, which improves the code quality a lot. >> >> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. >> >> Ok for master? >> >> Thanks, >> Tamar >> >> >> >> gcc/ChangeLog: >> >> PR target/107988 >> * config/aarch64/aarch64.cc >> (aarch64_vectorize_can_special_div_by_constant): Ensure input and output >> RTL are registers. >> >> gcc/testsuite/ChangeLog: >> >> PR target/107988 >> * gcc.target/aarch64/pr107988-1.c: New test. >> >> --- inline copy of patch -- >> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc >> index b8dc3f070c8afc47c85fa18768c4da92c774338f..9f96424993c4fcccce90e1b241fcb3aa97025225 100644 >> --- a/gcc/config/aarch64/aarch64.cc >> +++ b/gcc/config/aarch64/aarch64.cc >> @@ -24337,12 +24337,27 @@ aarch64_vectorize_can_special_div_by_constant (enum tree_code code, >> if (!VECTOR_TYPE_P (vectype)) >> return false; >> >> + if (!REG_P (in0)) >> + in0 = force_reg (GET_MODE (in0), in0); >> + >> gcc_assert (output); >> >> - if (!*output) >> - *output = gen_reg_rtx (TYPE_MODE (vectype)); >> + rtx res = NULL_RTX; >> + >> + /* Once e get to this point we cannot reject the RTL, if it's not a reg then >> + Create a new reg and write the result to the output afterwards. */ >> + if (!*output || !REG_P (*output)) >> + res = gen_reg_rtx (TYPE_MODE (vectype)); >> + else >> + res = *output; > > Why not write > rtx res = *output > if (!res || !REG_P (res)) > res = gen_reg_rtx... > > then you don't need either the else clause or the dead NULL_RTX assignment. I'd prefer that we use the expand_insn interface, which already has logic for coercing inputs and outputs to predicates. Something like: machine_mode mode = TYPE_MODE (vectype); unsigned int flags = aarch64_classify_vector_mode (mode); if ((flags & VEC_ANY_SVE) && !TARGET_SVE2) return false; ... expand_operand ops[3]; create_output_operand (&ops[0], *output, mode); create_input_operand (&ops[1], in0, mode); create_fixed_operand (&ops[2], in1); expand_insn (insn_code, 3, ops); *output = ops[0].value; return true; On this function: why do we have the VECTOR_TYPE_P condition in: /* We can use the optimized pattern. */ if (in0 == NULL_RTX && in1 == NULL_RTX) return true; if (!VECTOR_TYPE_P (vectype)) return false; ? It seems odd to be returning false after we have decided (in the non-generating case) that everything is OK. When would we see a vector mode that has an associated division instruction (checked above this), and yet not have a vector type? Thanks, Richard >> + >> + emit_insn (gen_aarch64_bitmask_udiv3 (TYPE_MODE (vectype), res, in0, in1)); >> + >> + if (*output && res != *output) >> + emit_move_insn (*output, res); >> + else >> + *output = res; >> >> - emit_insn (gen_aarch64_bitmask_udiv3 (TYPE_MODE (vectype), *output, in0, in1)); >> return true; >> } >> >> diff --git a/gcc/testsuite/gcc.target/aarch64/pr107988-1.c b/gcc/testsuite/gcc.target/aarch64/pr107988-1.c >> new file mode 100644 >> index 0000000000000000000000000000000000000000..c4fd290271b738345173b569bdc58c092fba7fe9 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/aarch64/pr107988-1.c >> @@ -0,0 +1,10 @@ >> +/* { dg-do compile } */ >> +/* { dg-additional-options "-O0" } */ >> +typedef unsigned short __attribute__((__vector_size__ (16))) V; >> + >> +V >> +foo (V v) >> +{ >> + v /= 255; >> + return v; >> +} >> >> >> >> > > Otherwise OK. > > R. ^ permalink raw reply [flat|nested] 5+ messages in thread
* RE: [PATCH]AArch64 div-by-255, ensure that arguments are registers. [PR107988] 2022-12-09 7:08 ` Richard Sandiford @ 2022-12-14 11:18 ` Tamar Christina 2022-12-14 13:50 ` Richard Sandiford 0 siblings, 1 reply; 5+ messages in thread From: Tamar Christina @ 2022-12-14 11:18 UTC (permalink / raw) To: Richard Sandiford, Richard Earnshaw Cc: gcc-patches, nd, Richard Earnshaw, Marcus Shawcroft, Kyrylo Tkachov [-- Attachment #1: Type: text/plain, Size: 5918 bytes --] > -----Original Message----- > From: Richard Sandiford <richard.sandiford@arm.com> > Sent: Friday, December 9, 2022 7:08 AM > To: Richard Earnshaw <Richard.Earnshaw@foss.arm.com> > Cc: Tamar Christina <Tamar.Christina@arm.com>; gcc-patches@gcc.gnu.org; > nd <nd@arm.com>; Richard Earnshaw <Richard.Earnshaw@arm.com>; > Marcus Shawcroft <Marcus.Shawcroft@arm.com>; Kyrylo Tkachov > <Kyrylo.Tkachov@arm.com> > Subject: Re: [PATCH]AArch64 div-by-255, ensure that arguments are > registers. [PR107988] > > Richard Earnshaw <Richard.Earnshaw@foss.arm.com> writes: > > On 08/12/2022 16:39, Tamar Christina via Gcc-patches wrote: > >> Hi All, > >> > >> At -O0 (as opposed to e.g. volatile) we can get into the situation > >> where the > >> in0 and result RTL arguments passed to the division function are > >> memory locations instead of registers. I think we could reject these > >> early on by checking that the gimple values are GIMPLE registers, but > >> I think it's better to handle it. > >> > >> As such I force them to registers and emit a move to the memory > >> locations and leave it up to reload to handle. This fixes the ICE > >> and still allows the optimization in these cases, which improves the code > quality a lot. > >> > >> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > >> > >> Ok for master? > >> > >> Thanks, > >> Tamar > >> > >> > >> > >> gcc/ChangeLog: > >> > >> PR target/107988 > >> * config/aarch64/aarch64.cc > >> (aarch64_vectorize_can_special_div_by_constant): Ensure input and > output > >> RTL are registers. > >> > >> gcc/testsuite/ChangeLog: > >> > >> PR target/107988 > >> * gcc.target/aarch64/pr107988-1.c: New test. > >> > >> --- inline copy of patch -- > >> diff --git a/gcc/config/aarch64/aarch64.cc > >> b/gcc/config/aarch64/aarch64.cc index > >> > b8dc3f070c8afc47c85fa18768c4da92c774338f..9f96424993c4fcccce90e1b241f > >> cb3aa97025225 100644 > >> --- a/gcc/config/aarch64/aarch64.cc > >> +++ b/gcc/config/aarch64/aarch64.cc > >> @@ -24337,12 +24337,27 @@ > aarch64_vectorize_can_special_div_by_constant (enum tree_code code, > >> if (!VECTOR_TYPE_P (vectype)) > >> return false; > >> > >> + if (!REG_P (in0)) > >> + in0 = force_reg (GET_MODE (in0), in0); > >> + > >> gcc_assert (output); > >> > >> - if (!*output) > >> - *output = gen_reg_rtx (TYPE_MODE (vectype)); > >> + rtx res = NULL_RTX; > >> + > >> + /* Once e get to this point we cannot reject the RTL, if it's not a reg > then > >> + Create a new reg and write the result to the output afterwards. > >> + */ if (!*output || !REG_P (*output)) > >> + res = gen_reg_rtx (TYPE_MODE (vectype)); else > >> + res = *output; > > > > Why not write > > rtx res = *output > > if (!res || !REG_P (res)) > > res = gen_reg_rtx... > > > > then you don't need either the else clause or the dead NULL_RTX > assignment. > > I'd prefer that we use the expand_insn interface, which already has logic for > coercing inputs and outputs to predicates. Something like: > > machine_mode mode = TYPE_MODE (vectype); > unsigned int flags = aarch64_classify_vector_mode (mode); > if ((flags & VEC_ANY_SVE) && !TARGET_SVE2) > return false; > > ... > > expand_operand ops[3]; > create_output_operand (&ops[0], *output, mode); > create_input_operand (&ops[1], in0, mode); > create_fixed_operand (&ops[2], in1); > expand_insn (insn_code, 3, ops); > *output = ops[0].value; > return true; > > On this function: why do we have the VECTOR_TYPE_P condition in: > It was left over after checking for optabs support. It's superfluous now. Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. Ok for master? Thanks, Tamar gcc/ChangeLog: PR target/107988 * config/aarch64/aarch64.cc (aarch64_vectorize_can_special_div_by_constant): Ensure input and output RTL are registers. gcc/testsuite/ChangeLog: PR target/107988 * gcc.target/aarch64/pr107988-1.c: New test. --- inline copy of patch --- diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 7bb0b7602ff6474410059494dd86b7be1621dde5..e1f34ef5da170ef11727e0c99a5bd42849c5d185 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -24395,7 +24395,8 @@ aarch64_vectorize_can_special_div_by_constant (enum tree_code code, || !TYPE_UNSIGNED (vectype)) return false; - unsigned int flags = aarch64_classify_vector_mode (TYPE_MODE (vectype)); + machine_mode mode = TYPE_MODE (vectype); + unsigned int flags = aarch64_classify_vector_mode (mode); if ((flags & VEC_ANY_SVE) && !TARGET_SVE2) return false; @@ -24411,15 +24412,14 @@ aarch64_vectorize_can_special_div_by_constant (enum tree_code code, if (in0 == NULL_RTX && in1 == NULL_RTX) return true; - if (!VECTOR_TYPE_P (vectype)) - return false; - gcc_assert (output); - if (!*output) - *output = gen_reg_rtx (TYPE_MODE (vectype)); - - emit_insn (gen_aarch64_bitmask_udiv3 (TYPE_MODE (vectype), *output, in0, in1)); + expand_operand ops[3]; + create_output_operand (&ops[0], *output, mode); + create_input_operand (&ops[1], in0, mode); + create_fixed_operand (&ops[2], in1); + expand_insn (insn_code, 3, ops); + *output = ops[0].value; return true; } diff --git a/gcc/testsuite/gcc.target/aarch64/pr107988-1.c b/gcc/testsuite/gcc.target/aarch64/pr107988-1.c new file mode 100644 index 0000000000000000000000000000000000000000..c4fd290271b738345173b569bdc58c092fba7fe9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr107988-1.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O0" } */ +typedef unsigned short __attribute__((__vector_size__ (16))) V; + +V +foo (V v) +{ + v /= 255; + return v; +} [-- Attachment #2: rb16679.patch --] [-- Type: application/octet-stream, Size: 1752 bytes --] diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 7bb0b7602ff6474410059494dd86b7be1621dde5..e1f34ef5da170ef11727e0c99a5bd42849c5d185 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -24395,7 +24395,8 @@ aarch64_vectorize_can_special_div_by_constant (enum tree_code code, || !TYPE_UNSIGNED (vectype)) return false; - unsigned int flags = aarch64_classify_vector_mode (TYPE_MODE (vectype)); + machine_mode mode = TYPE_MODE (vectype); + unsigned int flags = aarch64_classify_vector_mode (mode); if ((flags & VEC_ANY_SVE) && !TARGET_SVE2) return false; @@ -24411,15 +24412,14 @@ aarch64_vectorize_can_special_div_by_constant (enum tree_code code, if (in0 == NULL_RTX && in1 == NULL_RTX) return true; - if (!VECTOR_TYPE_P (vectype)) - return false; - gcc_assert (output); - if (!*output) - *output = gen_reg_rtx (TYPE_MODE (vectype)); - - emit_insn (gen_aarch64_bitmask_udiv3 (TYPE_MODE (vectype), *output, in0, in1)); + expand_operand ops[3]; + create_output_operand (&ops[0], *output, mode); + create_input_operand (&ops[1], in0, mode); + create_fixed_operand (&ops[2], in1); + expand_insn (insn_code, 3, ops); + *output = ops[0].value; return true; } diff --git a/gcc/testsuite/gcc.target/aarch64/pr107988-1.c b/gcc/testsuite/gcc.target/aarch64/pr107988-1.c new file mode 100644 index 0000000000000000000000000000000000000000..c4fd290271b738345173b569bdc58c092fba7fe9 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/pr107988-1.c @@ -0,0 +1,10 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-O0" } */ +typedef unsigned short __attribute__((__vector_size__ (16))) V; + +V +foo (V v) +{ + v /= 255; + return v; +} ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH]AArch64 div-by-255, ensure that arguments are registers. [PR107988] 2022-12-14 11:18 ` Tamar Christina @ 2022-12-14 13:50 ` Richard Sandiford 0 siblings, 0 replies; 5+ messages in thread From: Richard Sandiford @ 2022-12-14 13:50 UTC (permalink / raw) To: Tamar Christina Cc: Richard Earnshaw, gcc-patches, nd, Richard Earnshaw, Marcus Shawcroft, Kyrylo Tkachov Tamar Christina <Tamar.Christina@arm.com> writes: >> -----Original Message----- >> From: Richard Sandiford <richard.sandiford@arm.com> >> Sent: Friday, December 9, 2022 7:08 AM >> To: Richard Earnshaw <Richard.Earnshaw@foss.arm.com> >> Cc: Tamar Christina <Tamar.Christina@arm.com>; gcc-patches@gcc.gnu.org; >> nd <nd@arm.com>; Richard Earnshaw <Richard.Earnshaw@arm.com>; >> Marcus Shawcroft <Marcus.Shawcroft@arm.com>; Kyrylo Tkachov >> <Kyrylo.Tkachov@arm.com> >> Subject: Re: [PATCH]AArch64 div-by-255, ensure that arguments are >> registers. [PR107988] >> >> Richard Earnshaw <Richard.Earnshaw@foss.arm.com> writes: >> > On 08/12/2022 16:39, Tamar Christina via Gcc-patches wrote: >> >> Hi All, >> >> >> >> At -O0 (as opposed to e.g. volatile) we can get into the situation >> >> where the >> >> in0 and result RTL arguments passed to the division function are >> >> memory locations instead of registers. I think we could reject these >> >> early on by checking that the gimple values are GIMPLE registers, but >> >> I think it's better to handle it. >> >> >> >> As such I force them to registers and emit a move to the memory >> >> locations and leave it up to reload to handle. This fixes the ICE >> >> and still allows the optimization in these cases, which improves the code >> quality a lot. >> >> >> >> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. >> >> >> >> Ok for master? >> >> >> >> Thanks, >> >> Tamar >> >> >> >> >> >> >> >> gcc/ChangeLog: >> >> >> >> PR target/107988 >> >> * config/aarch64/aarch64.cc >> >> (aarch64_vectorize_can_special_div_by_constant): Ensure input and >> output >> >> RTL are registers. >> >> >> >> gcc/testsuite/ChangeLog: >> >> >> >> PR target/107988 >> >> * gcc.target/aarch64/pr107988-1.c: New test. >> >> >> >> --- inline copy of patch -- >> >> diff --git a/gcc/config/aarch64/aarch64.cc >> >> b/gcc/config/aarch64/aarch64.cc index >> >> >> b8dc3f070c8afc47c85fa18768c4da92c774338f..9f96424993c4fcccce90e1b241f >> >> cb3aa97025225 100644 >> >> --- a/gcc/config/aarch64/aarch64.cc >> >> +++ b/gcc/config/aarch64/aarch64.cc >> >> @@ -24337,12 +24337,27 @@ >> aarch64_vectorize_can_special_div_by_constant (enum tree_code code, >> >> if (!VECTOR_TYPE_P (vectype)) >> >> return false; >> >> >> >> + if (!REG_P (in0)) >> >> + in0 = force_reg (GET_MODE (in0), in0); >> >> + >> >> gcc_assert (output); >> >> >> >> - if (!*output) >> >> - *output = gen_reg_rtx (TYPE_MODE (vectype)); >> >> + rtx res = NULL_RTX; >> >> + >> >> + /* Once e get to this point we cannot reject the RTL, if it's not a reg >> then >> >> + Create a new reg and write the result to the output afterwards. >> >> + */ if (!*output || !REG_P (*output)) >> >> + res = gen_reg_rtx (TYPE_MODE (vectype)); else >> >> + res = *output; >> > >> > Why not write >> > rtx res = *output >> > if (!res || !REG_P (res)) >> > res = gen_reg_rtx... >> > >> > then you don't need either the else clause or the dead NULL_RTX >> assignment. >> >> I'd prefer that we use the expand_insn interface, which already has logic for >> coercing inputs and outputs to predicates. Something like: >> >> machine_mode mode = TYPE_MODE (vectype); >> unsigned int flags = aarch64_classify_vector_mode (mode); >> if ((flags & VEC_ANY_SVE) && !TARGET_SVE2) >> return false; >> >> ... >> >> expand_operand ops[3]; >> create_output_operand (&ops[0], *output, mode); >> create_input_operand (&ops[1], in0, mode); >> create_fixed_operand (&ops[2], in1); >> expand_insn (insn_code, 3, ops); >> *output = ops[0].value; >> return true; >> >> On this function: why do we have the VECTOR_TYPE_P condition in: >> > > It was left over after checking for optabs support. It's superfluous now. > > Bootstrapped Regtested on aarch64-none-linux-gnu and no issues. > > Ok for master? OK, thanks. Richard > Thanks, > Tamar > > gcc/ChangeLog: > > PR target/107988 > * config/aarch64/aarch64.cc > (aarch64_vectorize_can_special_div_by_constant): Ensure input and output > RTL are registers. > > gcc/testsuite/ChangeLog: > > PR target/107988 > * gcc.target/aarch64/pr107988-1.c: New test. > > --- inline copy of patch --- > > diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc > index 7bb0b7602ff6474410059494dd86b7be1621dde5..e1f34ef5da170ef11727e0c99a5bd42849c5d185 100644 > --- a/gcc/config/aarch64/aarch64.cc > +++ b/gcc/config/aarch64/aarch64.cc > @@ -24395,7 +24395,8 @@ aarch64_vectorize_can_special_div_by_constant (enum tree_code code, > || !TYPE_UNSIGNED (vectype)) > return false; > > - unsigned int flags = aarch64_classify_vector_mode (TYPE_MODE (vectype)); > + machine_mode mode = TYPE_MODE (vectype); > + unsigned int flags = aarch64_classify_vector_mode (mode); > if ((flags & VEC_ANY_SVE) && !TARGET_SVE2) > return false; > > @@ -24411,15 +24412,14 @@ aarch64_vectorize_can_special_div_by_constant (enum tree_code code, > if (in0 == NULL_RTX && in1 == NULL_RTX) > return true; > > - if (!VECTOR_TYPE_P (vectype)) > - return false; > - > gcc_assert (output); > > - if (!*output) > - *output = gen_reg_rtx (TYPE_MODE (vectype)); > - > - emit_insn (gen_aarch64_bitmask_udiv3 (TYPE_MODE (vectype), *output, in0, in1)); > + expand_operand ops[3]; > + create_output_operand (&ops[0], *output, mode); > + create_input_operand (&ops[1], in0, mode); > + create_fixed_operand (&ops[2], in1); > + expand_insn (insn_code, 3, ops); > + *output = ops[0].value; > return true; > } > > diff --git a/gcc/testsuite/gcc.target/aarch64/pr107988-1.c b/gcc/testsuite/gcc.target/aarch64/pr107988-1.c > new file mode 100644 > index 0000000000000000000000000000000000000000..c4fd290271b738345173b569bdc58c092fba7fe9 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/aarch64/pr107988-1.c > @@ -0,0 +1,10 @@ > +/* { dg-do compile } */ > +/* { dg-additional-options "-O0" } */ > +typedef unsigned short __attribute__((__vector_size__ (16))) V; > + > +V > +foo (V v) > +{ > + v /= 255; > + return v; > +} ^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2022-12-14 13:50 UTC | newest] Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2022-12-08 16:39 [PATCH]AArch64 div-by-255, ensure that arguments are registers. [PR107988] Tamar Christina 2022-12-08 17:00 ` Richard Earnshaw 2022-12-09 7:08 ` Richard Sandiford 2022-12-14 11:18 ` Tamar Christina 2022-12-14 13:50 ` Richard Sandiford
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).