* [PATCH, rs6000] Split TImode for logical operations in expand pass [PR100694] @ 2023-02-08 5:08 HAO CHEN GUI 2023-02-20 2:10 ` Ping " HAO CHEN GUI 2023-05-22 9:40 ` Kewen.Lin 0 siblings, 2 replies; 5+ messages in thread From: HAO CHEN GUI @ 2023-02-08 5:08 UTC (permalink / raw) To: gcc-patches; +Cc: Segher Boessenkool, David, Kewen.Lin, Peter Bergner Hi, The logical operations for TImode is split after reload pass right now. Some potential optimizations miss as the split is too late. This patch removes TImode from "AND", "IOR", "XOR" and "NOT" expander so that these logical operations can be split at expand pass. The new test case illustrates the optimization. Two test cases of pr92398 are merged into one as all sub-targets generates the same sequence of instructions with the patch. Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. Thanks Gui Haochen ChangeLog 2023-02-08 Haochen Gui <guihaoc@linux.ibm.com> gcc/ PR target/100694 * config/rs6000/rs6000.md (BOOL_128_V): New mode iterator for 128-bit vector types. (and<mode>3): Replace BOOL_128 with BOOL_128_V. (ior<mode>3): Likewise. (xor<mode>3): Likewise. (one_cmpl<mode>2 expander): New expander with BOOL_128_V. (one_cmpl<mode>2 insn_and_split): Rename to ... (*one_cmpl<mode>2): ... this. gcc/testsuite/ PR target/100694 * gcc.target/powerpc/pr100694.c: New. * gcc.target/powerpc/pr92398.c: New. * gcc.target/powerpc/pr92398.h: Remove. * gcc.target/powerpc/pr92398.p9-.c: Remove. * gcc.target/powerpc/pr92398.p9+.c: Remove. patch.diff diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 4bd1dfd3da9..455b7329643 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -743,6 +743,15 @@ (define_mode_iterator BOOL_128 [TI (V2DF "TARGET_ALTIVEC") (V1TI "TARGET_ALTIVEC")]) +;; Mode iterator for logical operations on 128-bit vector types +(define_mode_iterator BOOL_128_V [(V16QI "TARGET_ALTIVEC") + (V8HI "TARGET_ALTIVEC") + (V4SI "TARGET_ALTIVEC") + (V4SF "TARGET_ALTIVEC") + (V2DI "TARGET_ALTIVEC") + (V2DF "TARGET_ALTIVEC") + (V1TI "TARGET_ALTIVEC")]) + ;; For the GPRs we use 3 constraints for register outputs, two that are the ;; same as the output register, and a third where the output register is an ;; early clobber, so we don't have to deal with register overlaps. For the @@ -7135,23 +7144,23 @@ (define_expand "subti3" ;; 128-bit logical operations expanders (define_expand "and<mode>3" - [(set (match_operand:BOOL_128 0 "vlogical_operand") - (and:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand") - (match_operand:BOOL_128 2 "vlogical_operand")))] + [(set (match_operand:BOOL_128_V 0 "vlogical_operand") + (and:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand") + (match_operand:BOOL_128_V 2 "vlogical_operand")))] "" "") (define_expand "ior<mode>3" - [(set (match_operand:BOOL_128 0 "vlogical_operand") - (ior:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand") - (match_operand:BOOL_128 2 "vlogical_operand")))] + [(set (match_operand:BOOL_128_V 0 "vlogical_operand") + (ior:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand") + (match_operand:BOOL_128_V 2 "vlogical_operand")))] "" "") (define_expand "xor<mode>3" - [(set (match_operand:BOOL_128 0 "vlogical_operand") - (xor:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand") - (match_operand:BOOL_128 2 "vlogical_operand")))] + [(set (match_operand:BOOL_128_V 0 "vlogical_operand") + (xor:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand") + (match_operand:BOOL_128_V 2 "vlogical_operand")))] "" "") @@ -7449,7 +7458,14 @@ (define_insn_and_split "*eqv<mode>3_internal2" (const_string "16")))]) ;; 128-bit one's complement -(define_insn_and_split "one_cmpl<mode>2" +(define_expand "one_cmpl<mode>2" +[(set (match_operand:BOOL_128_V 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>") + (not:BOOL_128_V + (match_operand:BOOL_128_V 1 "vlogical_operand" "<BOOL_REGS_UNARY>")))] + "" + "") + +(define_insn_and_split "*one_cmpl<mode>2" [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>") (not:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_UNARY>")))] diff --git a/gcc/testsuite/gcc.target/powerpc/pr100694.c b/gcc/testsuite/gcc.target/powerpc/pr100694.c new file mode 100644 index 00000000000..96a895d6c44 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr100694.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target int128 } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 3 } } */ + +/* It just needs two std and one blr. */ +void foo (unsigned __int128* res, unsigned long long hi, unsigned long long lo) +{ + unsigned __int128 i = hi; + i <<= 64; + i |= lo; + *res = i; +} + diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.c b/gcc/testsuite/gcc.target/powerpc/pr92398.c new file mode 100644 index 00000000000..7d6201cc5bb --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/pr92398.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target int128 } */ +/* { dg-options "-O2" } */ +/* { dg-final { scan-assembler-times {\mnot\M} 2 } } */ +/* { dg-final { scan-assembler-times {\mstd\M} 2 } } */ + +/* All platforms should generate the same instructions: not;not;std;std. */ +void bar (__int128_t *dst, __int128_t src) +{ + *dst = ~src; +} + diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.h b/gcc/testsuite/gcc.target/powerpc/pr92398.h deleted file mode 100644 index 5a4a8bcab80..00000000000 --- a/gcc/testsuite/gcc.target/powerpc/pr92398.h +++ /dev/null @@ -1,17 +0,0 @@ -/* This test code is included into pr92398.p9-.c and pr92398.p9+.c. - The two files have the tests for the number of instructions generated for - P9- versus P9+. - - store generates difference instructions as below: - P9+: mtvsrdd;xxlnot;stxv. - P8/P7/P6 LE: not;not;std;std. - P8 BE: mtvsrd;mtvsrd;xxpermdi;xxlnor;stxvd2x. - P7/P6 BE: std;std;addi;lxvd2x;xxlnor;stxvd2x. - P9+ and P9- LE are expected, P6/P7/P8 BE are unexpected. */ - -void -bar (__int128_t *dst, __int128_t src) -{ - *dst = ~src; -} - diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c b/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c deleted file mode 100644 index 72dd1d9a274..00000000000 --- a/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c +++ /dev/null @@ -1,12 +0,0 @@ -/* { dg-do compile { target { lp64 && has_arch_pwr9 } } } */ -/* { dg-require-effective-target powerpc_vsx_ok } */ -/* { dg-options "-O2 -mvsx" } */ - -/* { dg-final { scan-assembler-times {\mmtvsrdd\M} 1 } } */ -/* { dg-final { scan-assembler-times {\mxxlnor\M} 1 } } */ -/* { dg-final { scan-assembler-times {\mstxv\M} 1 } } */ -/* { dg-final { scan-assembler-not {\mld\M} } } */ -/* { dg-final { scan-assembler-not {\mnot\M} } } */ - -/* Source code for the test in pr92398.h */ -#include "pr92398.h" diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c b/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c deleted file mode 100644 index bd7fa98af51..00000000000 --- a/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c +++ /dev/null @@ -1,10 +0,0 @@ -/* { dg-do compile { target { lp64 && {! has_arch_pwr9} } } } */ -/* { dg-require-effective-target powerpc_vsx_ok } */ -/* { dg-options "-O2 -mvsx" } */ - -/* { dg-final { scan-assembler-times {\mnot\M} 2 { xfail be } } } */ -/* { dg-final { scan-assembler-times {\mstd\M} 2 { xfail { { {! has_arch_pwr9} && has_arch_pwr8 } && be } } } } */ - -/* Source code for the test in pr92398.h */ -#include "pr92398.h" - ^ permalink raw reply [flat|nested] 5+ messages in thread
* Ping [PATCH, rs6000] Split TImode for logical operations in expand pass [PR100694] 2023-02-08 5:08 [PATCH, rs6000] Split TImode for logical operations in expand pass [PR100694] HAO CHEN GUI @ 2023-02-20 2:10 ` HAO CHEN GUI 2023-04-24 5:35 ` Ping^2 " HAO CHEN GUI 2023-05-22 9:40 ` Kewen.Lin 1 sibling, 1 reply; 5+ messages in thread From: HAO CHEN GUI @ 2023-02-20 2:10 UTC (permalink / raw) To: gcc-patches; +Cc: Segher Boessenkool, David, Kewen.Lin, Peter Bergner Hi, Gently ping this: https://gcc.gnu.org/pipermail/gcc-patches/2023-February/611550.html Gui Haochen Thanks 在 2023/2/8 13:08, HAO CHEN GUI 写道: > Hi, > The logical operations for TImode is split after reload pass right now. Some > potential optimizations miss as the split is too late. This patch removes > TImode from "AND", "IOR", "XOR" and "NOT" expander so that these logical > operations can be split at expand pass. The new test case illustrates the > optimization. > > Two test cases of pr92398 are merged into one as all sub-targets generates > the same sequence of instructions with the patch. > > Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. > > Thanks > Gui Haochen > > > ChangeLog > 2023-02-08 Haochen Gui <guihaoc@linux.ibm.com> > > gcc/ > PR target/100694 > * config/rs6000/rs6000.md (BOOL_128_V): New mode iterator for 128-bit > vector types. > (and<mode>3): Replace BOOL_128 with BOOL_128_V. > (ior<mode>3): Likewise. > (xor<mode>3): Likewise. > (one_cmpl<mode>2 expander): New expander with BOOL_128_V. > (one_cmpl<mode>2 insn_and_split): Rename to ... > (*one_cmpl<mode>2): ... this. > > gcc/testsuite/ > PR target/100694 > * gcc.target/powerpc/pr100694.c: New. > * gcc.target/powerpc/pr92398.c: New. > * gcc.target/powerpc/pr92398.h: Remove. > * gcc.target/powerpc/pr92398.p9-.c: Remove. > * gcc.target/powerpc/pr92398.p9+.c: Remove. > > > patch.diff > diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md > index 4bd1dfd3da9..455b7329643 100644 > --- a/gcc/config/rs6000/rs6000.md > +++ b/gcc/config/rs6000/rs6000.md > @@ -743,6 +743,15 @@ (define_mode_iterator BOOL_128 [TI > (V2DF "TARGET_ALTIVEC") > (V1TI "TARGET_ALTIVEC")]) > > +;; Mode iterator for logical operations on 128-bit vector types > +(define_mode_iterator BOOL_128_V [(V16QI "TARGET_ALTIVEC") > + (V8HI "TARGET_ALTIVEC") > + (V4SI "TARGET_ALTIVEC") > + (V4SF "TARGET_ALTIVEC") > + (V2DI "TARGET_ALTIVEC") > + (V2DF "TARGET_ALTIVEC") > + (V1TI "TARGET_ALTIVEC")]) > + > ;; For the GPRs we use 3 constraints for register outputs, two that are the > ;; same as the output register, and a third where the output register is an > ;; early clobber, so we don't have to deal with register overlaps. For the > @@ -7135,23 +7144,23 @@ (define_expand "subti3" > ;; 128-bit logical operations expanders > > (define_expand "and<mode>3" > - [(set (match_operand:BOOL_128 0 "vlogical_operand") > - (and:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand") > - (match_operand:BOOL_128 2 "vlogical_operand")))] > + [(set (match_operand:BOOL_128_V 0 "vlogical_operand") > + (and:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand") > + (match_operand:BOOL_128_V 2 "vlogical_operand")))] > "" > "") > > (define_expand "ior<mode>3" > - [(set (match_operand:BOOL_128 0 "vlogical_operand") > - (ior:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand") > - (match_operand:BOOL_128 2 "vlogical_operand")))] > + [(set (match_operand:BOOL_128_V 0 "vlogical_operand") > + (ior:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand") > + (match_operand:BOOL_128_V 2 "vlogical_operand")))] > "" > "") > > (define_expand "xor<mode>3" > - [(set (match_operand:BOOL_128 0 "vlogical_operand") > - (xor:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand") > - (match_operand:BOOL_128 2 "vlogical_operand")))] > + [(set (match_operand:BOOL_128_V 0 "vlogical_operand") > + (xor:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand") > + (match_operand:BOOL_128_V 2 "vlogical_operand")))] > "" > "") > > @@ -7449,7 +7458,14 @@ (define_insn_and_split "*eqv<mode>3_internal2" > (const_string "16")))]) > > ;; 128-bit one's complement > -(define_insn_and_split "one_cmpl<mode>2" > +(define_expand "one_cmpl<mode>2" > +[(set (match_operand:BOOL_128_V 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>") > + (not:BOOL_128_V > + (match_operand:BOOL_128_V 1 "vlogical_operand" "<BOOL_REGS_UNARY>")))] > + "" > + "") > + > +(define_insn_and_split "*one_cmpl<mode>2" > [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>") > (not:BOOL_128 > (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_UNARY>")))] > diff --git a/gcc/testsuite/gcc.target/powerpc/pr100694.c b/gcc/testsuite/gcc.target/powerpc/pr100694.c > new file mode 100644 > index 00000000000..96a895d6c44 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr100694.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target int128 } */ > +/* { dg-options "-O2" } */ > +/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 3 } } */ > + > +/* It just needs two std and one blr. */ > +void foo (unsigned __int128* res, unsigned long long hi, unsigned long long lo) > +{ > + unsigned __int128 i = hi; > + i <<= 64; > + i |= lo; > + *res = i; > +} > + > diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.c b/gcc/testsuite/gcc.target/powerpc/pr92398.c > new file mode 100644 > index 00000000000..7d6201cc5bb > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr92398.c > @@ -0,0 +1,12 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target int128 } */ > +/* { dg-options "-O2" } */ > +/* { dg-final { scan-assembler-times {\mnot\M} 2 } } */ > +/* { dg-final { scan-assembler-times {\mstd\M} 2 } } */ > + > +/* All platforms should generate the same instructions: not;not;std;std. */ > +void bar (__int128_t *dst, __int128_t src) > +{ > + *dst = ~src; > +} > + > diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.h b/gcc/testsuite/gcc.target/powerpc/pr92398.h > deleted file mode 100644 > index 5a4a8bcab80..00000000000 > --- a/gcc/testsuite/gcc.target/powerpc/pr92398.h > +++ /dev/null > @@ -1,17 +0,0 @@ > -/* This test code is included into pr92398.p9-.c and pr92398.p9+.c. > - The two files have the tests for the number of instructions generated for > - P9- versus P9+. > - > - store generates difference instructions as below: > - P9+: mtvsrdd;xxlnot;stxv. > - P8/P7/P6 LE: not;not;std;std. > - P8 BE: mtvsrd;mtvsrd;xxpermdi;xxlnor;stxvd2x. > - P7/P6 BE: std;std;addi;lxvd2x;xxlnor;stxvd2x. > - P9+ and P9- LE are expected, P6/P7/P8 BE are unexpected. */ > - > -void > -bar (__int128_t *dst, __int128_t src) > -{ > - *dst = ~src; > -} > - > diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c b/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c > deleted file mode 100644 > index 72dd1d9a274..00000000000 > --- a/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c > +++ /dev/null > @@ -1,12 +0,0 @@ > -/* { dg-do compile { target { lp64 && has_arch_pwr9 } } } */ > -/* { dg-require-effective-target powerpc_vsx_ok } */ > -/* { dg-options "-O2 -mvsx" } */ > - > -/* { dg-final { scan-assembler-times {\mmtvsrdd\M} 1 } } */ > -/* { dg-final { scan-assembler-times {\mxxlnor\M} 1 } } */ > -/* { dg-final { scan-assembler-times {\mstxv\M} 1 } } */ > -/* { dg-final { scan-assembler-not {\mld\M} } } */ > -/* { dg-final { scan-assembler-not {\mnot\M} } } */ > - > -/* Source code for the test in pr92398.h */ > -#include "pr92398.h" > diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c b/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c > deleted file mode 100644 > index bd7fa98af51..00000000000 > --- a/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c > +++ /dev/null > @@ -1,10 +0,0 @@ > -/* { dg-do compile { target { lp64 && {! has_arch_pwr9} } } } */ > -/* { dg-require-effective-target powerpc_vsx_ok } */ > -/* { dg-options "-O2 -mvsx" } */ > - > -/* { dg-final { scan-assembler-times {\mnot\M} 2 { xfail be } } } */ > -/* { dg-final { scan-assembler-times {\mstd\M} 2 { xfail { { {! has_arch_pwr9} && has_arch_pwr8 } && be } } } } */ > - > -/* Source code for the test in pr92398.h */ > -#include "pr92398.h" > - ^ permalink raw reply [flat|nested] 5+ messages in thread
* Ping^2 [PATCH, rs6000] Split TImode for logical operations in expand pass [PR100694] 2023-02-20 2:10 ` Ping " HAO CHEN GUI @ 2023-04-24 5:35 ` HAO CHEN GUI 2024-05-08 1:52 ` Ping^3 " HAO CHEN GUI 0 siblings, 1 reply; 5+ messages in thread From: HAO CHEN GUI @ 2023-04-24 5:35 UTC (permalink / raw) To: gcc-patches; +Cc: Segher Boessenkool, David, Kewen.Lin, Peter Bergner Hi, Gently ping this: https://gcc.gnu.org/pipermail/gcc-patches/2023-February/611550.html Thanks Gui Haochen 在 2023/2/20 10:10, HAO CHEN GUI 写道: > Hi, > Gently ping this: > https://gcc.gnu.org/pipermail/gcc-patches/2023-February/611550.html > > Gui Haochen > Thanks > > 在 2023/2/8 13:08, HAO CHEN GUI 写道: >> Hi, >> The logical operations for TImode is split after reload pass right now. Some >> potential optimizations miss as the split is too late. This patch removes >> TImode from "AND", "IOR", "XOR" and "NOT" expander so that these logical >> operations can be split at expand pass. The new test case illustrates the >> optimization. >> >> Two test cases of pr92398 are merged into one as all sub-targets generates >> the same sequence of instructions with the patch. >> >> Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. >> >> Thanks >> Gui Haochen >> >> >> ChangeLog >> 2023-02-08 Haochen Gui <guihaoc@linux.ibm.com> >> >> gcc/ >> PR target/100694 >> * config/rs6000/rs6000.md (BOOL_128_V): New mode iterator for 128-bit >> vector types. >> (and<mode>3): Replace BOOL_128 with BOOL_128_V. >> (ior<mode>3): Likewise. >> (xor<mode>3): Likewise. >> (one_cmpl<mode>2 expander): New expander with BOOL_128_V. >> (one_cmpl<mode>2 insn_and_split): Rename to ... >> (*one_cmpl<mode>2): ... this. >> >> gcc/testsuite/ >> PR target/100694 >> * gcc.target/powerpc/pr100694.c: New. >> * gcc.target/powerpc/pr92398.c: New. >> * gcc.target/powerpc/pr92398.h: Remove. >> * gcc.target/powerpc/pr92398.p9-.c: Remove. >> * gcc.target/powerpc/pr92398.p9+.c: Remove. >> >> >> patch.diff >> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md >> index 4bd1dfd3da9..455b7329643 100644 >> --- a/gcc/config/rs6000/rs6000.md >> +++ b/gcc/config/rs6000/rs6000.md >> @@ -743,6 +743,15 @@ (define_mode_iterator BOOL_128 [TI >> (V2DF "TARGET_ALTIVEC") >> (V1TI "TARGET_ALTIVEC")]) >> >> +;; Mode iterator for logical operations on 128-bit vector types >> +(define_mode_iterator BOOL_128_V [(V16QI "TARGET_ALTIVEC") >> + (V8HI "TARGET_ALTIVEC") >> + (V4SI "TARGET_ALTIVEC") >> + (V4SF "TARGET_ALTIVEC") >> + (V2DI "TARGET_ALTIVEC") >> + (V2DF "TARGET_ALTIVEC") >> + (V1TI "TARGET_ALTIVEC")]) >> + >> ;; For the GPRs we use 3 constraints for register outputs, two that are the >> ;; same as the output register, and a third where the output register is an >> ;; early clobber, so we don't have to deal with register overlaps. For the >> @@ -7135,23 +7144,23 @@ (define_expand "subti3" >> ;; 128-bit logical operations expanders >> >> (define_expand "and<mode>3" >> - [(set (match_operand:BOOL_128 0 "vlogical_operand") >> - (and:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand") >> - (match_operand:BOOL_128 2 "vlogical_operand")))] >> + [(set (match_operand:BOOL_128_V 0 "vlogical_operand") >> + (and:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand") >> + (match_operand:BOOL_128_V 2 "vlogical_operand")))] >> "" >> "") >> >> (define_expand "ior<mode>3" >> - [(set (match_operand:BOOL_128 0 "vlogical_operand") >> - (ior:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand") >> - (match_operand:BOOL_128 2 "vlogical_operand")))] >> + [(set (match_operand:BOOL_128_V 0 "vlogical_operand") >> + (ior:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand") >> + (match_operand:BOOL_128_V 2 "vlogical_operand")))] >> "" >> "") >> >> (define_expand "xor<mode>3" >> - [(set (match_operand:BOOL_128 0 "vlogical_operand") >> - (xor:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand") >> - (match_operand:BOOL_128 2 "vlogical_operand")))] >> + [(set (match_operand:BOOL_128_V 0 "vlogical_operand") >> + (xor:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand") >> + (match_operand:BOOL_128_V 2 "vlogical_operand")))] >> "" >> "") >> >> @@ -7449,7 +7458,14 @@ (define_insn_and_split "*eqv<mode>3_internal2" >> (const_string "16")))]) >> >> ;; 128-bit one's complement >> -(define_insn_and_split "one_cmpl<mode>2" >> +(define_expand "one_cmpl<mode>2" >> +[(set (match_operand:BOOL_128_V 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>") >> + (not:BOOL_128_V >> + (match_operand:BOOL_128_V 1 "vlogical_operand" "<BOOL_REGS_UNARY>")))] >> + "" >> + "") >> + >> +(define_insn_and_split "*one_cmpl<mode>2" >> [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>") >> (not:BOOL_128 >> (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_UNARY>")))] >> diff --git a/gcc/testsuite/gcc.target/powerpc/pr100694.c b/gcc/testsuite/gcc.target/powerpc/pr100694.c >> new file mode 100644 >> index 00000000000..96a895d6c44 >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/powerpc/pr100694.c >> @@ -0,0 +1,14 @@ >> +/* { dg-do compile } */ >> +/* { dg-require-effective-target int128 } */ >> +/* { dg-options "-O2" } */ >> +/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 3 } } */ >> + >> +/* It just needs two std and one blr. */ >> +void foo (unsigned __int128* res, unsigned long long hi, unsigned long long lo) >> +{ >> + unsigned __int128 i = hi; >> + i <<= 64; >> + i |= lo; >> + *res = i; >> +} >> + >> diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.c b/gcc/testsuite/gcc.target/powerpc/pr92398.c >> new file mode 100644 >> index 00000000000..7d6201cc5bb >> --- /dev/null >> +++ b/gcc/testsuite/gcc.target/powerpc/pr92398.c >> @@ -0,0 +1,12 @@ >> +/* { dg-do compile } */ >> +/* { dg-require-effective-target int128 } */ >> +/* { dg-options "-O2" } */ >> +/* { dg-final { scan-assembler-times {\mnot\M} 2 } } */ >> +/* { dg-final { scan-assembler-times {\mstd\M} 2 } } */ >> + >> +/* All platforms should generate the same instructions: not;not;std;std. */ >> +void bar (__int128_t *dst, __int128_t src) >> +{ >> + *dst = ~src; >> +} >> + >> diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.h b/gcc/testsuite/gcc.target/powerpc/pr92398.h >> deleted file mode 100644 >> index 5a4a8bcab80..00000000000 >> --- a/gcc/testsuite/gcc.target/powerpc/pr92398.h >> +++ /dev/null >> @@ -1,17 +0,0 @@ >> -/* This test code is included into pr92398.p9-.c and pr92398.p9+.c. >> - The two files have the tests for the number of instructions generated for >> - P9- versus P9+. >> - >> - store generates difference instructions as below: >> - P9+: mtvsrdd;xxlnot;stxv. >> - P8/P7/P6 LE: not;not;std;std. >> - P8 BE: mtvsrd;mtvsrd;xxpermdi;xxlnor;stxvd2x. >> - P7/P6 BE: std;std;addi;lxvd2x;xxlnor;stxvd2x. >> - P9+ and P9- LE are expected, P6/P7/P8 BE are unexpected. */ >> - >> -void >> -bar (__int128_t *dst, __int128_t src) >> -{ >> - *dst = ~src; >> -} >> - >> diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c b/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c >> deleted file mode 100644 >> index 72dd1d9a274..00000000000 >> --- a/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c >> +++ /dev/null >> @@ -1,12 +0,0 @@ >> -/* { dg-do compile { target { lp64 && has_arch_pwr9 } } } */ >> -/* { dg-require-effective-target powerpc_vsx_ok } */ >> -/* { dg-options "-O2 -mvsx" } */ >> - >> -/* { dg-final { scan-assembler-times {\mmtvsrdd\M} 1 } } */ >> -/* { dg-final { scan-assembler-times {\mxxlnor\M} 1 } } */ >> -/* { dg-final { scan-assembler-times {\mstxv\M} 1 } } */ >> -/* { dg-final { scan-assembler-not {\mld\M} } } */ >> -/* { dg-final { scan-assembler-not {\mnot\M} } } */ >> - >> -/* Source code for the test in pr92398.h */ >> -#include "pr92398.h" >> diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c b/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c >> deleted file mode 100644 >> index bd7fa98af51..00000000000 >> --- a/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c >> +++ /dev/null >> @@ -1,10 +0,0 @@ >> -/* { dg-do compile { target { lp64 && {! has_arch_pwr9} } } } */ >> -/* { dg-require-effective-target powerpc_vsx_ok } */ >> -/* { dg-options "-O2 -mvsx" } */ >> - >> -/* { dg-final { scan-assembler-times {\mnot\M} 2 { xfail be } } } */ >> -/* { dg-final { scan-assembler-times {\mstd\M} 2 { xfail { { {! has_arch_pwr9} && has_arch_pwr8 } && be } } } } */ >> - >> -/* Source code for the test in pr92398.h */ >> -#include "pr92398.h" >> - ^ permalink raw reply [flat|nested] 5+ messages in thread
* Ping^3 [PATCH, rs6000] Split TImode for logical operations in expand pass [PR100694] 2023-04-24 5:35 ` Ping^2 " HAO CHEN GUI @ 2024-05-08 1:52 ` HAO CHEN GUI 0 siblings, 0 replies; 5+ messages in thread From: HAO CHEN GUI @ 2024-05-08 1:52 UTC (permalink / raw) To: gcc-patches; +Cc: Segher Boessenkool, David, Kewen.Lin, Peter Bergner Hi, As now it's stage-1, gently ping this: https://gcc.gnu.org/pipermail/gcc-patches/2023-February/611550.html Gui Haochen Thanks 在 2023/4/24 13:35, HAO CHEN GUI 写道: > Hi, > Gently ping this: > https://gcc.gnu.org/pipermail/gcc-patches/2023-February/611550.html > > Thanks > Gui Haochen > > 在 2023/2/20 10:10, HAO CHEN GUI 写道: >> Hi, >> Gently ping this: >> https://gcc.gnu.org/pipermail/gcc-patches/2023-February/611550.html >> >> Gui Haochen >> Thanks >> >> 在 2023/2/8 13:08, HAO CHEN GUI 写道: >>> Hi, >>> The logical operations for TImode is split after reload pass right now. Some >>> potential optimizations miss as the split is too late. This patch removes >>> TImode from "AND", "IOR", "XOR" and "NOT" expander so that these logical >>> operations can be split at expand pass. The new test case illustrates the >>> optimization. >>> >>> Two test cases of pr92398 are merged into one as all sub-targets generates >>> the same sequence of instructions with the patch. >>> >>> Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. >>> >>> Thanks >>> Gui Haochen >>> >>> >>> ChangeLog >>> 2023-02-08 Haochen Gui <guihaoc@linux.ibm.com> >>> >>> gcc/ >>> PR target/100694 >>> * config/rs6000/rs6000.md (BOOL_128_V): New mode iterator for 128-bit >>> vector types. >>> (and<mode>3): Replace BOOL_128 with BOOL_128_V. >>> (ior<mode>3): Likewise. >>> (xor<mode>3): Likewise. >>> (one_cmpl<mode>2 expander): New expander with BOOL_128_V. >>> (one_cmpl<mode>2 insn_and_split): Rename to ... >>> (*one_cmpl<mode>2): ... this. >>> >>> gcc/testsuite/ >>> PR target/100694 >>> * gcc.target/powerpc/pr100694.c: New. >>> * gcc.target/powerpc/pr92398.c: New. >>> * gcc.target/powerpc/pr92398.h: Remove. >>> * gcc.target/powerpc/pr92398.p9-.c: Remove. >>> * gcc.target/powerpc/pr92398.p9+.c: Remove. >>> >>> >>> patch.diff >>> diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md >>> index 4bd1dfd3da9..455b7329643 100644 >>> --- a/gcc/config/rs6000/rs6000.md >>> +++ b/gcc/config/rs6000/rs6000.md >>> @@ -743,6 +743,15 @@ (define_mode_iterator BOOL_128 [TI >>> (V2DF "TARGET_ALTIVEC") >>> (V1TI "TARGET_ALTIVEC")]) >>> >>> +;; Mode iterator for logical operations on 128-bit vector types >>> +(define_mode_iterator BOOL_128_V [(V16QI "TARGET_ALTIVEC") >>> + (V8HI "TARGET_ALTIVEC") >>> + (V4SI "TARGET_ALTIVEC") >>> + (V4SF "TARGET_ALTIVEC") >>> + (V2DI "TARGET_ALTIVEC") >>> + (V2DF "TARGET_ALTIVEC") >>> + (V1TI "TARGET_ALTIVEC")]) >>> + >>> ;; For the GPRs we use 3 constraints for register outputs, two that are the >>> ;; same as the output register, and a third where the output register is an >>> ;; early clobber, so we don't have to deal with register overlaps. For the >>> @@ -7135,23 +7144,23 @@ (define_expand "subti3" >>> ;; 128-bit logical operations expanders >>> >>> (define_expand "and<mode>3" >>> - [(set (match_operand:BOOL_128 0 "vlogical_operand") >>> - (and:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand") >>> - (match_operand:BOOL_128 2 "vlogical_operand")))] >>> + [(set (match_operand:BOOL_128_V 0 "vlogical_operand") >>> + (and:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand") >>> + (match_operand:BOOL_128_V 2 "vlogical_operand")))] >>> "" >>> "") >>> >>> (define_expand "ior<mode>3" >>> - [(set (match_operand:BOOL_128 0 "vlogical_operand") >>> - (ior:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand") >>> - (match_operand:BOOL_128 2 "vlogical_operand")))] >>> + [(set (match_operand:BOOL_128_V 0 "vlogical_operand") >>> + (ior:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand") >>> + (match_operand:BOOL_128_V 2 "vlogical_operand")))] >>> "" >>> "") >>> >>> (define_expand "xor<mode>3" >>> - [(set (match_operand:BOOL_128 0 "vlogical_operand") >>> - (xor:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand") >>> - (match_operand:BOOL_128 2 "vlogical_operand")))] >>> + [(set (match_operand:BOOL_128_V 0 "vlogical_operand") >>> + (xor:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand") >>> + (match_operand:BOOL_128_V 2 "vlogical_operand")))] >>> "" >>> "") >>> >>> @@ -7449,7 +7458,14 @@ (define_insn_and_split "*eqv<mode>3_internal2" >>> (const_string "16")))]) >>> >>> ;; 128-bit one's complement >>> -(define_insn_and_split "one_cmpl<mode>2" >>> +(define_expand "one_cmpl<mode>2" >>> +[(set (match_operand:BOOL_128_V 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>") >>> + (not:BOOL_128_V >>> + (match_operand:BOOL_128_V 1 "vlogical_operand" "<BOOL_REGS_UNARY>")))] >>> + "" >>> + "") >>> + >>> +(define_insn_and_split "*one_cmpl<mode>2" >>> [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>") >>> (not:BOOL_128 >>> (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_UNARY>")))] >>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr100694.c b/gcc/testsuite/gcc.target/powerpc/pr100694.c >>> new file mode 100644 >>> index 00000000000..96a895d6c44 >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/powerpc/pr100694.c >>> @@ -0,0 +1,14 @@ >>> +/* { dg-do compile } */ >>> +/* { dg-require-effective-target int128 } */ >>> +/* { dg-options "-O2" } */ >>> +/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 3 } } */ >>> + >>> +/* It just needs two std and one blr. */ >>> +void foo (unsigned __int128* res, unsigned long long hi, unsigned long long lo) >>> +{ >>> + unsigned __int128 i = hi; >>> + i <<= 64; >>> + i |= lo; >>> + *res = i; >>> +} >>> + >>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.c b/gcc/testsuite/gcc.target/powerpc/pr92398.c >>> new file mode 100644 >>> index 00000000000..7d6201cc5bb >>> --- /dev/null >>> +++ b/gcc/testsuite/gcc.target/powerpc/pr92398.c >>> @@ -0,0 +1,12 @@ >>> +/* { dg-do compile } */ >>> +/* { dg-require-effective-target int128 } */ >>> +/* { dg-options "-O2" } */ >>> +/* { dg-final { scan-assembler-times {\mnot\M} 2 } } */ >>> +/* { dg-final { scan-assembler-times {\mstd\M} 2 } } */ >>> + >>> +/* All platforms should generate the same instructions: not;not;std;std. */ >>> +void bar (__int128_t *dst, __int128_t src) >>> +{ >>> + *dst = ~src; >>> +} >>> + >>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.h b/gcc/testsuite/gcc.target/powerpc/pr92398.h >>> deleted file mode 100644 >>> index 5a4a8bcab80..00000000000 >>> --- a/gcc/testsuite/gcc.target/powerpc/pr92398.h >>> +++ /dev/null >>> @@ -1,17 +0,0 @@ >>> -/* This test code is included into pr92398.p9-.c and pr92398.p9+.c. >>> - The two files have the tests for the number of instructions generated for >>> - P9- versus P9+. >>> - >>> - store generates difference instructions as below: >>> - P9+: mtvsrdd;xxlnot;stxv. >>> - P8/P7/P6 LE: not;not;std;std. >>> - P8 BE: mtvsrd;mtvsrd;xxpermdi;xxlnor;stxvd2x. >>> - P7/P6 BE: std;std;addi;lxvd2x;xxlnor;stxvd2x. >>> - P9+ and P9- LE are expected, P6/P7/P8 BE are unexpected. */ >>> - >>> -void >>> -bar (__int128_t *dst, __int128_t src) >>> -{ >>> - *dst = ~src; >>> -} >>> - >>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c b/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c >>> deleted file mode 100644 >>> index 72dd1d9a274..00000000000 >>> --- a/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c >>> +++ /dev/null >>> @@ -1,12 +0,0 @@ >>> -/* { dg-do compile { target { lp64 && has_arch_pwr9 } } } */ >>> -/* { dg-require-effective-target powerpc_vsx_ok } */ >>> -/* { dg-options "-O2 -mvsx" } */ >>> - >>> -/* { dg-final { scan-assembler-times {\mmtvsrdd\M} 1 } } */ >>> -/* { dg-final { scan-assembler-times {\mxxlnor\M} 1 } } */ >>> -/* { dg-final { scan-assembler-times {\mstxv\M} 1 } } */ >>> -/* { dg-final { scan-assembler-not {\mld\M} } } */ >>> -/* { dg-final { scan-assembler-not {\mnot\M} } } */ >>> - >>> -/* Source code for the test in pr92398.h */ >>> -#include "pr92398.h" >>> diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c b/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c >>> deleted file mode 100644 >>> index bd7fa98af51..00000000000 >>> --- a/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c >>> +++ /dev/null >>> @@ -1,10 +0,0 @@ >>> -/* { dg-do compile { target { lp64 && {! has_arch_pwr9} } } } */ >>> -/* { dg-require-effective-target powerpc_vsx_ok } */ >>> -/* { dg-options "-O2 -mvsx" } */ >>> - >>> -/* { dg-final { scan-assembler-times {\mnot\M} 2 { xfail be } } } */ >>> -/* { dg-final { scan-assembler-times {\mstd\M} 2 { xfail { { {! has_arch_pwr9} && has_arch_pwr8 } && be } } } } */ >>> - >>> -/* Source code for the test in pr92398.h */ >>> -#include "pr92398.h" >>> - ^ permalink raw reply [flat|nested] 5+ messages in thread
* Re: [PATCH, rs6000] Split TImode for logical operations in expand pass [PR100694] 2023-02-08 5:08 [PATCH, rs6000] Split TImode for logical operations in expand pass [PR100694] HAO CHEN GUI 2023-02-20 2:10 ` Ping " HAO CHEN GUI @ 2023-05-22 9:40 ` Kewen.Lin 1 sibling, 0 replies; 5+ messages in thread From: Kewen.Lin @ 2023-05-22 9:40 UTC (permalink / raw) To: HAO CHEN GUI; +Cc: Segher Boessenkool, David, Peter Bergner, gcc-patches Hi Haochen, on 2023/2/8 13:08, HAO CHEN GUI wrote: > Hi, > The logical operations for TImode is split after reload pass right now. Some > potential optimizations miss as the split is too late. This patch removes > TImode from "AND", "IOR", "XOR" and "NOT" expander so that these logical > operations can be split at expand pass. The new test case illustrates the > optimization. > > Two test cases of pr92398 are merged into one as all sub-targets generates > the same sequence of instructions with the patch. IIUC, this can also help PR target/93123. Add it to the PR marker too if so. This patch aligns with what the other ports do, I think it's good, but note that it can regress some case like: ``` vector unsigned __int128 test(unsigned __int128 *a, unsigned __int128 *b, unsigned __int128 *c, unsigned __int128 *d) { unsigned __int128 t1 = *a | *b; unsigned __int128 t2 = *c & *d; unsigned __int128 t3 = t1 ^ t2; return (vector unsigned __int128)t3; } ``` w/o the proposed patch: lxv 32,0(5) lxv 0,0(6) lxv 45,0(3) lxv 33,0(4) xxland 32,32,0 vor 2,1,13 vxor 2,2,0 vs. w/ this patch: ld 9,8(6) ld 8,0(5) ld 10,8(5) ld 0,0(6) ld 11,0(3) ld 6,8(3) ld 5,0(4) ld 7,8(4) and 8,8,0 and 10,10,9 or 9,5,11 xor 9,9,8 or 8,7,6 xor 8,8,10 mtvsrdd 34,8,9 It can get the optimal insn seq before, but fails to with the proposed patch. Apparently we don't have some support to get back the operation in vector when it's beneficial for now. I guess the cases in PR100694 and PR93123 are dominated and the regressed case is corner. So we can probably install this patch first and open a bug for further enhancement. Segher, what do you think of this? BR, Kewen > > Bootstrapped and tested on powerpc64-linux BE and LE with no regressions. > > Thanks > Gui Haochen > > > ChangeLog > 2023-02-08 Haochen Gui <guihaoc@linux.ibm.com> > > gcc/ > PR target/100694> * config/rs6000/rs6000.md (BOOL_128_V): New mode iterator for 128-bit > vector types. > (and<mode>3): Replace BOOL_128 with BOOL_128_V. > (ior<mode>3): Likewise. > (xor<mode>3): Likewise. > (one_cmpl<mode>2 expander): New expander with BOOL_128_V. > (one_cmpl<mode>2 insn_and_split): Rename to ... > (*one_cmpl<mode>2): ... this. > > gcc/testsuite/ > PR target/100694 > * gcc.target/powerpc/pr100694.c: New. > * gcc.target/powerpc/pr92398.c: New. > * gcc.target/powerpc/pr92398.h: Remove. > * gcc.target/powerpc/pr92398.p9-.c: Remove. > * gcc.target/powerpc/pr92398.p9+.c: Remove. > > > patch.diff > diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md > index 4bd1dfd3da9..455b7329643 100644 > --- a/gcc/config/rs6000/rs6000.md > +++ b/gcc/config/rs6000/rs6000.md > @@ -743,6 +743,15 @@ (define_mode_iterator BOOL_128 [TI > (V2DF "TARGET_ALTIVEC") > (V1TI "TARGET_ALTIVEC")]) > > +;; Mode iterator for logical operations on 128-bit vector types > +(define_mode_iterator BOOL_128_V [(V16QI "TARGET_ALTIVEC") > + (V8HI "TARGET_ALTIVEC") > + (V4SI "TARGET_ALTIVEC") > + (V4SF "TARGET_ALTIVEC") > + (V2DI "TARGET_ALTIVEC") > + (V2DF "TARGET_ALTIVEC") > + (V1TI "TARGET_ALTIVEC")]) > + > ;; For the GPRs we use 3 constraints for register outputs, two that are the > ;; same as the output register, and a third where the output register is an > ;; early clobber, so we don't have to deal with register overlaps. For the > @@ -7135,23 +7144,23 @@ (define_expand "subti3" > ;; 128-bit logical operations expanders > > (define_expand "and<mode>3" > - [(set (match_operand:BOOL_128 0 "vlogical_operand") > - (and:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand") > - (match_operand:BOOL_128 2 "vlogical_operand")))] > + [(set (match_operand:BOOL_128_V 0 "vlogical_operand") > + (and:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand") > + (match_operand:BOOL_128_V 2 "vlogical_operand")))] > "" > "") > > (define_expand "ior<mode>3" > - [(set (match_operand:BOOL_128 0 "vlogical_operand") > - (ior:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand") > - (match_operand:BOOL_128 2 "vlogical_operand")))] > + [(set (match_operand:BOOL_128_V 0 "vlogical_operand") > + (ior:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand") > + (match_operand:BOOL_128_V 2 "vlogical_operand")))] > "" > "") > > (define_expand "xor<mode>3" > - [(set (match_operand:BOOL_128 0 "vlogical_operand") > - (xor:BOOL_128 (match_operand:BOOL_128 1 "vlogical_operand") > - (match_operand:BOOL_128 2 "vlogical_operand")))] > + [(set (match_operand:BOOL_128_V 0 "vlogical_operand") > + (xor:BOOL_128_V (match_operand:BOOL_128_V 1 "vlogical_operand") > + (match_operand:BOOL_128_V 2 "vlogical_operand")))] > "" > "") > > @@ -7449,7 +7458,14 @@ (define_insn_and_split "*eqv<mode>3_internal2" > (const_string "16")))]) > > ;; 128-bit one's complement > -(define_insn_and_split "one_cmpl<mode>2" > +(define_expand "one_cmpl<mode>2" > +[(set (match_operand:BOOL_128_V 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>") > + (not:BOOL_128_V > + (match_operand:BOOL_128_V 1 "vlogical_operand" "<BOOL_REGS_UNARY>")))] > + "" > + "") > + > +(define_insn_and_split "*one_cmpl<mode>2" > [(set (match_operand:BOOL_128 0 "vlogical_operand" "=<BOOL_REGS_OUTPUT>") > (not:BOOL_128 > (match_operand:BOOL_128 1 "vlogical_operand" "<BOOL_REGS_UNARY>")))] > diff --git a/gcc/testsuite/gcc.target/powerpc/pr100694.c b/gcc/testsuite/gcc.target/powerpc/pr100694.c > new file mode 100644 > index 00000000000..96a895d6c44 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr100694.c > @@ -0,0 +1,14 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target int128 } */ > +/* { dg-options "-O2" } */ > +/* { dg-final { scan-assembler-times {(?n)^\s+[a-z]} 3 } } */ > + > +/* It just needs two std and one blr. */ > +void foo (unsigned __int128* res, unsigned long long hi, unsigned long long lo) > +{ > + unsigned __int128 i = hi; > + i <<= 64; > + i |= lo; > + *res = i; > +} > + > diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.c b/gcc/testsuite/gcc.target/powerpc/pr92398.c > new file mode 100644 > index 00000000000..7d6201cc5bb > --- /dev/null > +++ b/gcc/testsuite/gcc.target/powerpc/pr92398.c > @@ -0,0 +1,12 @@ > +/* { dg-do compile } */ > +/* { dg-require-effective-target int128 } */ > +/* { dg-options "-O2" } */ > +/* { dg-final { scan-assembler-times {\mnot\M} 2 } } */ > +/* { dg-final { scan-assembler-times {\mstd\M} 2 } } */ > + > +/* All platforms should generate the same instructions: not;not;std;std. */ > +void bar (__int128_t *dst, __int128_t src) > +{ > + *dst = ~src; > +} > + > diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.h b/gcc/testsuite/gcc.target/powerpc/pr92398.h > deleted file mode 100644 > index 5a4a8bcab80..00000000000 > --- a/gcc/testsuite/gcc.target/powerpc/pr92398.h > +++ /dev/null > @@ -1,17 +0,0 @@ > -/* This test code is included into pr92398.p9-.c and pr92398.p9+.c. > - The two files have the tests for the number of instructions generated for > - P9- versus P9+. > - > - store generates difference instructions as below: > - P9+: mtvsrdd;xxlnot;stxv. > - P8/P7/P6 LE: not;not;std;std. > - P8 BE: mtvsrd;mtvsrd;xxpermdi;xxlnor;stxvd2x. > - P7/P6 BE: std;std;addi;lxvd2x;xxlnor;stxvd2x. > - P9+ and P9- LE are expected, P6/P7/P8 BE are unexpected. */ > - > -void > -bar (__int128_t *dst, __int128_t src) > -{ > - *dst = ~src; > -} > - > diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c b/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c > deleted file mode 100644 > index 72dd1d9a274..00000000000 > --- a/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c > +++ /dev/null > @@ -1,12 +0,0 @@ > -/* { dg-do compile { target { lp64 && has_arch_pwr9 } } } */ > -/* { dg-require-effective-target powerpc_vsx_ok } */ > -/* { dg-options "-O2 -mvsx" } */ > - > -/* { dg-final { scan-assembler-times {\mmtvsrdd\M} 1 } } */ > -/* { dg-final { scan-assembler-times {\mxxlnor\M} 1 } } */ > -/* { dg-final { scan-assembler-times {\mstxv\M} 1 } } */ > -/* { dg-final { scan-assembler-not {\mld\M} } } */ > -/* { dg-final { scan-assembler-not {\mnot\M} } } */ > - > -/* Source code for the test in pr92398.h */ > -#include "pr92398.h" > diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c b/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c > deleted file mode 100644 > index bd7fa98af51..00000000000 > --- a/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c > +++ /dev/null > @@ -1,10 +0,0 @@ > -/* { dg-do compile { target { lp64 && {! has_arch_pwr9} } } } */ > -/* { dg-require-effective-target powerpc_vsx_ok } */ > -/* { dg-options "-O2 -mvsx" } */ > - > -/* { dg-final { scan-assembler-times {\mnot\M} 2 { xfail be } } } */ > -/* { dg-final { scan-assembler-times {\mstd\M} 2 { xfail { { {! has_arch_pwr9} && has_arch_pwr8 } && be } } } } */ > - > -/* Source code for the test in pr92398.h */ > -#include "pr92398.h" > - ^ permalink raw reply [flat|nested] 5+ messages in thread
end of thread, other threads:[~2024-05-08 2:18 UTC | newest] Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2023-02-08 5:08 [PATCH, rs6000] Split TImode for logical operations in expand pass [PR100694] HAO CHEN GUI 2023-02-20 2:10 ` Ping " HAO CHEN GUI 2023-04-24 5:35 ` Ping^2 " HAO CHEN GUI 2024-05-08 1:52 ` Ping^3 " HAO CHEN GUI 2023-05-22 9:40 ` Kewen.Lin
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).