[PATCH, rs6000] optimization for vec

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH, rs6000] optimization for vec_reve builtin [PR100868]
@ 2021-09-08  6:42 HAO CHEN GUI
  2021-09-12 15:50 ` Bill Schmidt
  2021-10-11  5:32 ` Ping^1 " HAO CHEN GUI
  0 siblings, 2 replies; 8+ messages in thread
From: HAO CHEN GUI @ 2021-09-08  6:42 UTC (permalink / raw)
  To: gcc-patches; +Cc: Segher Boessenkool, Bill Schmidt

Hi,

   The patch optimized for vec_reve builtin on rs6000. For V2DI and 
V2DF, it is implemented by xxswapd on all targets. For V16QI, V8HI, V4SI 
and V4SF, it is implemented by quadword byte reverse plus halfword/word 
byte reverse when p9_vector is defined.

   Bootstrapped and tested on powerpc64le-linux with no regressions. Is 
this okay for trunk? Any recommendations? Thanks a lot.


ChangeLog

2021-09-08 Haochen Gui <guihaoc@linux.ibm.com>

gcc/
         * config/rs6000/altivec.md (altivec_vreve<mode>2 for VEC_K):
         Use xxbrq for v16qi, xxbrq + xxbrh for v8hi and xxbrq + xxbrw
         for v4si or v4sf when p9_vector is defined.
         (altivec_vreve<mode>2 for VEC_64): Defined. Implemented by
         xxswapd.

gcc/testsuite/
         * gcc.target/powerpc/vec_reve_1.c: New test.
         * gcc.target/powerpc/vec_reve_2.c: Likewise.


patch.diff

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 1351dafbc41..a1698ce85c0 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -4049,13 +4049,43 @@ (define_expand "altivec_negv4sf2"
    DONE;
  })

-;; Vector reverse elements
+;; Vector reverse elements for V16QI V8HI V4SI V4SF
  (define_expand "altivec_vreve<mode>2"
-  [(set (match_operand:VEC_A 0 "register_operand" "=v")
-       (unspec:VEC_A [(match_operand:VEC_A 1 "register_operand" "v")]
+  [(set (match_operand:VEC_K 0 "register_operand" "=v")
+       (unspec:VEC_K [(match_operand:VEC_K 1 "register_operand" "v")]
                       UNSPEC_VREVEV))]
    "TARGET_ALTIVEC"
  {
+  if (TARGET_P9_VECTOR)
+    {
+      if (<MODE>mode == V16QImode)
+       emit_insn (gen_p9_xxbrq_v16qi (operands[0], operands[1]));
+      else if (<MODE>mode == V8HImode)
+       {
+         rtx subreg1 = simplify_gen_subreg (V1TImode, operands[1],
+                                            <MODE>mode, 0);
+         rtx temp = gen_reg_rtx (V1TImode);
+         emit_insn (gen_p9_xxbrq_v1ti (temp, subreg1));
+         rtx subreg2 = simplify_gen_subreg (<MODE>mode, temp,
+                                            V1TImode, 0);
+         emit_insn (gen_p9_xxbrh_v8hi (operands[0], subreg2));
+       }
+      else /* V4SI and V4SF.  */
+       {
+         rtx subreg1 = simplify_gen_subreg (V1TImode, operands[1],
+                                            <MODE>mode, 0);
+         rtx temp = gen_reg_rtx (V1TImode);
+         emit_insn (gen_p9_xxbrq_v1ti (temp, subreg1));
+         rtx subreg2 = simplify_gen_subreg (<MODE>mode, temp,
+                                            V1TImode, 0);
+         if (<MODE>mode == V4SImode)
+           emit_insn (gen_p9_xxbrw_v4si (operands[0], subreg2));
+         else
+           emit_insn (gen_p9_xxbrw_v4sf (operands[0], subreg2));
+       }
+      DONE;
+    }
+
    int i, j, size, num_elements;
    rtvec v = rtvec_alloc (16);
    rtx mask = gen_reg_rtx (V16QImode);
@@ -4074,6 +4104,17 @@ (define_expand "altivec_vreve<mode>2"
    DONE;
  })

+;; Vector reverse elements for V2DI V2DF
+(define_expand "altivec_vreve<mode>2"
+  [(set (match_operand:VEC_64 0 "register_operand" "=v")
+       (unspec:VEC_64 [(match_operand:VEC_64 1 "register_operand" "v")]
+                     UNSPEC_VREVEV))]
+  "TARGET_ALTIVEC"
+{
+  emit_insn (gen_xxswapd_<mode> (operands[0], operands[1]));
+  DONE;
+})
+
  ;; Vector SIMD PEM v2.06c defines LVLX, LVLXL, LVRX, LVRXL,
  ;; STVLX, STVLXL, STVVRX, STVRXL are available only on Cell.
  (define_insn "altivec_lvlx"
diff --git a/gcc/testsuite/gcc.target/powerpc/vec_reve_1.c 
b/gcc/testsuite/gcc.target/powerpc/vec_reve_1.c
new file mode 100644
index 00000000000..83a9206758b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec_reve_1.c
@@ -0,0 +1,16 @@
+/* { dg-require-effective-target powerpc_altivec_ok } */
+/* { dg-options "-O2 -maltivec" } */
+
+#include <altivec.h>
+
+vector double foo1 (vector double a)
+{
+   return vec_reve (a);
+}
+
+vector long long foo2 (vector long long a)
+{
+   return vec_reve (a);
+}
+
+/* { dg-final { scan-assembler-times {\mxxpermdi\M} 2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec_reve_2.c 
b/gcc/testsuite/gcc.target/powerpc/vec_reve_2.c
new file mode 100644
index 00000000000..b6dd33d6d79
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec_reve_2.c
@@ -0,0 +1,28 @@
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2 -maltivec" } */
+
+#include <altivec.h>
+
+vector int foo1 (vector int a)
+{
+   return vec_reve (a);
+}
+
+vector float foo2 (vector float a)
+{
+   return vec_reve (a);
+}
+
+vector short foo3 (vector short a)
+{
+   return vec_reve (a);
+}
+
+vector char foo4 (vector char a)
+{
+   return vec_reve (a);
+}
+
+/* { dg-final { scan-assembler-times {\mxxbrq\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mxxbrw\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxxbrh\M} 1 } } */


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH, rs6000] optimization for vec_reve builtin [PR100868]
  2021-09-08  6:42 [PATCH, rs6000] optimization for vec_reve builtin [PR100868] HAO CHEN GUI
@ 2021-09-12 15:50 ` Bill Schmidt
  2021-09-12 19:38   ` Segher Boessenkool
  2021-10-11  5:32 ` Ping^1 " HAO CHEN GUI
  1 sibling, 1 reply; 8+ messages in thread
From: Bill Schmidt @ 2021-09-12 15:50 UTC (permalink / raw)
  To: HAO CHEN GUI, gcc-patches; +Cc: Segher Boessenkool

Hi Haochen,

On 9/8/21 1:42 AM, HAO CHEN GUI wrote:
> Hi,
>
>     The patch optimized for vec_reve builtin on rs6000. For V2DI and
> V2DF, it is implemented by xxswapd on all targets. For V16QI, V8HI, V4SI
> and V4SF, it is implemented by quadword byte reverse plus halfword/word
> byte reverse when p9_vector is defined.
>
>     Bootstrapped and tested on powerpc64le-linux with no regressions. Is
> this okay for trunk? Any recommendations? Thanks a lot.
>
>
> ChangeLog
>
> 2021-09-08 Haochen Gui <guihaoc@linux.ibm.com>
>
> gcc/
>           * config/rs6000/altivec.md (altivec_vreve<mode>2 for VEC_K):
>           Use xxbrq for v16qi, xxbrq + xxbrh for v8hi and xxbrq + xxbrw
>           for v4si or v4sf when p9_vector is defined.
>           (altivec_vreve<mode>2 for VEC_64): Defined. Implemented by
>           xxswapd.
>
> gcc/testsuite/
>           * gcc.target/powerpc/vec_reve_1.c: New test.
>           * gcc.target/powerpc/vec_reve_2.c: Likewise.
>
>
> patch.diff
>
> diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
> index 1351dafbc41..a1698ce85c0 100644
> --- a/gcc/config/rs6000/altivec.md
> +++ b/gcc/config/rs6000/altivec.md
> @@ -4049,13 +4049,43 @@ (define_expand "altivec_negv4sf2"
>      DONE;
>    })
>
> -;; Vector reverse elements
> +;; Vector reverse elements for V16QI V8HI V4SI V4SF
>    (define_expand "altivec_vreve<mode>2"
> -  [(set (match_operand:VEC_A 0 "register_operand" "=v")
> -       (unspec:VEC_A [(match_operand:VEC_A 1 "register_operand" "v")]
> +  [(set (match_operand:VEC_K 0 "register_operand" "=v")
> +       (unspec:VEC_K [(match_operand:VEC_K 1 "register_operand" "v")]
>                         UNSPEC_VREVEV))]
>      "TARGET_ALTIVEC"
>    {
> +  if (TARGET_P9_VECTOR)
> +    {
> +      if (<MODE>mode == V16QImode)
> +       emit_insn (gen_p9_xxbrq_v16qi (operands[0], operands[1]));
> +      else if (<MODE>mode == V8HImode)
> +       {
> +         rtx subreg1 = simplify_gen_subreg (V1TImode, operands[1],
> +                                            <MODE>mode, 0);
> +         rtx temp = gen_reg_rtx (V1TImode);
> +         emit_insn (gen_p9_xxbrq_v1ti (temp, subreg1));
> +         rtx subreg2 = simplify_gen_subreg (<MODE>mode, temp,
> +                                            V1TImode, 0);
> +         emit_insn (gen_p9_xxbrh_v8hi (operands[0], subreg2));
> +       }
> +      else /* V4SI and V4SF.  */
> +       {
> +         rtx subreg1 = simplify_gen_subreg (V1TImode, operands[1],
> +                                            <MODE>mode, 0);
> +         rtx temp = gen_reg_rtx (V1TImode);
> +         emit_insn (gen_p9_xxbrq_v1ti (temp, subreg1));
> +         rtx subreg2 = simplify_gen_subreg (<MODE>mode, temp,
> +                                            V1TImode, 0);
> +         if (<MODE>mode == V4SImode)
> +           emit_insn (gen_p9_xxbrw_v4si (operands[0], subreg2));
> +         else
> +           emit_insn (gen_p9_xxbrw_v4sf (operands[0], subreg2));
> +       }
> +      DONE;
> +    }
> +
>      int i, j, size, num_elements;
>      rtvec v = rtvec_alloc (16);
>      rtx mask = gen_reg_rtx (V16QImode);
> @@ -4074,6 +4104,17 @@ (define_expand "altivec_vreve<mode>2"
>      DONE;
>    })
>
> +;; Vector reverse elements for V2DI V2DF
> +(define_expand "altivec_vreve<mode>2"
> +  [(set (match_operand:VEC_64 0 "register_operand" "=v")
> +       (unspec:VEC_64 [(match_operand:VEC_64 1 "register_operand" "v")]
> +                     UNSPEC_VREVEV))]
> +  "TARGET_ALTIVEC"


"TARGET_VSX" for V2DI and V2DF.  (This is the only good reason for
splitting this into two patterns; you need different criteria.)

> +{
> +  emit_insn (gen_xxswapd_<mode> (operands[0], operands[1]));
> +  DONE;
> +})
> +
>    ;; Vector SIMD PEM v2.06c defines LVLX, LVLXL, LVRX, LVRXL,
>    ;; STVLX, STVLXL, STVVRX, STVRXL are available only on Cell.
>    (define_insn "altivec_lvlx"
> diff --git a/gcc/testsuite/gcc.target/powerpc/vec_reve_1.c
> b/gcc/testsuite/gcc.target/powerpc/vec_reve_1.c
> new file mode 100644
> index 00000000000..83a9206758b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/vec_reve_1.c
> @@ -0,0 +1,16 @@
> +/* { dg-require-effective-target powerpc_altivec_ok } */


powerpc_vsx_ok to handle vector double and vector long long.

> +/* { dg-options "-O2 -maltivec" } */

-mvsx

Looks okay to me with those things fixed.  Maintainers?

Thanks for the patch!
Bill

> +
> +#include <altivec.h>
> +
> +vector double foo1 (vector double a)
> +{
> +   return vec_reve (a);
> +}
> +
> +vector long long foo2 (vector long long a)
> +{
> +   return vec_reve (a);
> +}
> +
> +/* { dg-final { scan-assembler-times {\mxxpermdi\M} 2 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/vec_reve_2.c
> b/gcc/testsuite/gcc.target/powerpc/vec_reve_2.c
> new file mode 100644
> index 00000000000..b6dd33d6d79
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/vec_reve_2.c
> @@ -0,0 +1,28 @@
> +/* { dg-require-effective-target powerpc_p9vector_ok } */
> +/* { dg-options "-mdejagnu-cpu=power9 -O2 -maltivec" } */
> +
> +#include <altivec.h>
> +
> +vector int foo1 (vector int a)
> +{
> +   return vec_reve (a);
> +}
> +
> +vector float foo2 (vector float a)
> +{
> +   return vec_reve (a);
> +}
> +
> +vector short foo3 (vector short a)
> +{
> +   return vec_reve (a);
> +}
> +
> +vector char foo4 (vector char a)
> +{
> +   return vec_reve (a);
> +}
> +
> +/* { dg-final { scan-assembler-times {\mxxbrq\M} 4 } } */
> +/* { dg-final { scan-assembler-times {\mxxbrw\M} 2 } } */
> +/* { dg-final { scan-assembler-times {\mxxbrh\M} 1 } } */
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH, rs6000] optimization for vec_reve builtin [PR100868]
  2021-09-12 15:50 ` Bill Schmidt
@ 2021-09-12 19:38   ` Segher Boessenkool
  0 siblings, 0 replies; 8+ messages in thread
From: Segher Boessenkool @ 2021-09-12 19:38 UTC (permalink / raw)
  To: Bill Schmidt; +Cc: HAO CHEN GUI, gcc-patches

Hi!

On Sun, Sep 12, 2021 at 10:50:17AM -0500, Bill Schmidt wrote:
> On 9/8/21 1:42 AM, HAO CHEN GUI wrote:
> >+;; Vector reverse elements for V2DI V2DF
> >+(define_expand "altivec_vreve<mode>2"
> >+  [(set (match_operand:VEC_64 0 "register_operand" "=v")
> >+       (unspec:VEC_64 [(match_operand:VEC_64 1 "register_operand" 
> >"v")]
> >+                     UNSPEC_VREVEV))]
> >+  "TARGET_ALTIVEC"

(Your quoted text is mangled again)

> "TARGET_VSX" for V2DI and V2DF.  (This is the only good reason for
> splitting this into two patterns; you need different criteria.)

The *good* reason for splitting the pattern is they have completely
different expansions as well.  Which is why I asked for it.

(I'll review the patch later).


Segher

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Ping^1 [PATCH, rs6000] optimization for vec_reve builtin [PR100868]
  2021-09-08  6:42 [PATCH, rs6000] optimization for vec_reve builtin [PR100868] HAO CHEN GUI
  2021-09-12 15:50 ` Bill Schmidt
@ 2021-10-11  5:32 ` HAO CHEN GUI
  2021-10-12 13:55   ` Bill Schmidt
  1 sibling, 1 reply; 8+ messages in thread
From: HAO CHEN GUI @ 2021-10-11  5:32 UTC (permalink / raw)
  To: gcc-patches; +Cc: Segher Boessenkool, Bill Schmidt

Hi,

      Gentle ping this:

https://gcc.gnu.org/pipermail/gcc-patches/2021-September/579038.html

Thanks

On 8/9/2021 下午 2:42, HAO CHEN GUI wrote:
> Hi,
>
>   The patch optimized for vec_reve builtin on rs6000. For V2DI and V2DF, it is implemented by xxswapd on all targets. For V16QI, V8HI, V4SI and V4SF, it is implemented by quadword byte reverse plus halfword/word byte reverse when p9_vector is defined.
>
>   Bootstrapped and tested on powerpc64le-linux with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot.
>
>
> ChangeLog
>
> 2021-09-08 Haochen Gui <guihaoc@linux.ibm.com>
>
> gcc/
>         * config/rs6000/altivec.md (altivec_vreve<mode>2 for VEC_K):
>         Use xxbrq for v16qi, xxbrq + xxbrh for v8hi and xxbrq + xxbrw
>         for v4si or v4sf when p9_vector is defined.
>         (altivec_vreve<mode>2 for VEC_64): Defined. Implemented by
>         xxswapd.
>
> gcc/testsuite/
>         * gcc.target/powerpc/vec_reve_1.c: New test.
>         * gcc.target/powerpc/vec_reve_2.c: Likewise.
>
>
> patch.diff
>
> diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
> index 1351dafbc41..a1698ce85c0 100644
> --- a/gcc/config/rs6000/altivec.md
> +++ b/gcc/config/rs6000/altivec.md
> @@ -4049,13 +4049,43 @@ (define_expand "altivec_negv4sf2"
>    DONE;
>  })
>
> -;; Vector reverse elements
> +;; Vector reverse elements for V16QI V8HI V4SI V4SF
>  (define_expand "altivec_vreve<mode>2"
> -  [(set (match_operand:VEC_A 0 "register_operand" "=v")
> -       (unspec:VEC_A [(match_operand:VEC_A 1 "register_operand" "v")]
> +  [(set (match_operand:VEC_K 0 "register_operand" "=v")
> +       (unspec:VEC_K [(match_operand:VEC_K 1 "register_operand" "v")]
>                       UNSPEC_VREVEV))]
>    "TARGET_ALTIVEC"
>  {
> +  if (TARGET_P9_VECTOR)
> +    {
> +      if (<MODE>mode == V16QImode)
> +       emit_insn (gen_p9_xxbrq_v16qi (operands[0], operands[1]));
> +      else if (<MODE>mode == V8HImode)
> +       {
> +         rtx subreg1 = simplify_gen_subreg (V1TImode, operands[1],
> +                                            <MODE>mode, 0);
> +         rtx temp = gen_reg_rtx (V1TImode);
> +         emit_insn (gen_p9_xxbrq_v1ti (temp, subreg1));
> +         rtx subreg2 = simplify_gen_subreg (<MODE>mode, temp,
> +                                            V1TImode, 0);
> +         emit_insn (gen_p9_xxbrh_v8hi (operands[0], subreg2));
> +       }
> +      else /* V4SI and V4SF.  */
> +       {
> +         rtx subreg1 = simplify_gen_subreg (V1TImode, operands[1],
> +                                            <MODE>mode, 0);
> +         rtx temp = gen_reg_rtx (V1TImode);
> +         emit_insn (gen_p9_xxbrq_v1ti (temp, subreg1));
> +         rtx subreg2 = simplify_gen_subreg (<MODE>mode, temp,
> +                                            V1TImode, 0);
> +         if (<MODE>mode == V4SImode)
> +           emit_insn (gen_p9_xxbrw_v4si (operands[0], subreg2));
> +         else
> +           emit_insn (gen_p9_xxbrw_v4sf (operands[0], subreg2));
> +       }
> +      DONE;
> +    }
> +
>    int i, j, size, num_elements;
>    rtvec v = rtvec_alloc (16);
>    rtx mask = gen_reg_rtx (V16QImode);
> @@ -4074,6 +4104,17 @@ (define_expand "altivec_vreve<mode>2"
>    DONE;
>  })
>
> +;; Vector reverse elements for V2DI V2DF
> +(define_expand "altivec_vreve<mode>2"
> +  [(set (match_operand:VEC_64 0 "register_operand" "=v")
> +       (unspec:VEC_64 [(match_operand:VEC_64 1 "register_operand" "v")]
> +                     UNSPEC_VREVEV))]
> +  "TARGET_ALTIVEC"
> +{
> +  emit_insn (gen_xxswapd_<mode> (operands[0], operands[1]));
> +  DONE;
> +})
> +
>  ;; Vector SIMD PEM v2.06c defines LVLX, LVLXL, LVRX, LVRXL,
>  ;; STVLX, STVLXL, STVVRX, STVRXL are available only on Cell.
>  (define_insn "altivec_lvlx"
> diff --git a/gcc/testsuite/gcc.target/powerpc/vec_reve_1.c b/gcc/testsuite/gcc.target/powerpc/vec_reve_1.c
> new file mode 100644
> index 00000000000..83a9206758b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/vec_reve_1.c
> @@ -0,0 +1,16 @@
> +/* { dg-require-effective-target powerpc_altivec_ok } */
> +/* { dg-options "-O2 -maltivec" } */
> +
> +#include <altivec.h>
> +
> +vector double foo1 (vector double a)
> +{
> +   return vec_reve (a);
> +}
> +
> +vector long long foo2 (vector long long a)
> +{
> +   return vec_reve (a);
> +}
> +
> +/* { dg-final { scan-assembler-times {\mxxpermdi\M} 2 } } */
> diff --git a/gcc/testsuite/gcc.target/powerpc/vec_reve_2.c b/gcc/testsuite/gcc.target/powerpc/vec_reve_2.c
> new file mode 100644
> index 00000000000..b6dd33d6d79
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/powerpc/vec_reve_2.c
> @@ -0,0 +1,28 @@
> +/* { dg-require-effective-target powerpc_p9vector_ok } */
> +/* { dg-options "-mdejagnu-cpu=power9 -O2 -maltivec" } */
> +
> +#include <altivec.h>
> +
> +vector int foo1 (vector int a)
> +{
> +   return vec_reve (a);
> +}
> +
> +vector float foo2 (vector float a)
> +{
> +   return vec_reve (a);
> +}
> +
> +vector short foo3 (vector short a)
> +{
> +   return vec_reve (a);
> +}
> +
> +vector char foo4 (vector char a)
> +{
> +   return vec_reve (a);
> +}
> +
> +/* { dg-final { scan-assembler-times {\mxxbrq\M} 4 } } */
> +/* { dg-final { scan-assembler-times {\mxxbrw\M} 2 } } */
> +/* { dg-final { scan-assembler-times {\mxxbrh\M} 1 } } */
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: Ping^1 [PATCH, rs6000] optimization for vec_reve builtin [PR100868]
  2021-10-11  5:32 ` Ping^1 " HAO CHEN GUI
@ 2021-10-12 13:55   ` Bill Schmidt
  0 siblings, 0 replies; 8+ messages in thread
From: Bill Schmidt @ 2021-10-12 13:55 UTC (permalink / raw)
  To: HAO CHEN GUI, gcc-patches; +Cc: Segher Boessenkool

Hi Hao Chen,

On 10/11/21 12:32 AM, HAO CHEN GUI wrote:
> Hi,
>
>      Gentle ping this:
>
> https://gcc.gnu.org/pipermail/gcc-patches/2021-September/579038.html
>
> Thanks
>
> On 8/9/2021 下午 2:42, HAO CHEN GUI wrote:
>> Hi,
>>
>>   The patch optimized for vec_reve builtin on rs6000. For V2DI and V2DF, it is implemented by xxswapd on all targets. For V16QI, V8HI, V4SI and V4SF, it is implemented by quadword byte reverse plus halfword/word byte reverse when p9_vector is defined.
>>
>>   Bootstrapped and tested on powerpc64le-linux with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot.
>>
>>
>> ChangeLog
>>
>> 2021-09-08 Haochen Gui <guihaoc@linux.ibm.com>
>>
>> gcc/
>>         * config/rs6000/altivec.md (altivec_vreve<mode>2 for VEC_K):
>>         Use xxbrq for v16qi, xxbrq + xxbrh for v8hi and xxbrq + xxbrw
>>         for v4si or v4sf when p9_vector is defined.
>>         (altivec_vreve<mode>2 for VEC_64): Defined. Implemented by
>>         xxswapd.
>>
>> gcc/testsuite/
>>         * gcc.target/powerpc/vec_reve_1.c: New test.
>>         * gcc.target/powerpc/vec_reve_2.c: Likewise.
>>
>>
>> patch.diff
>>
>> diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
>> index 1351dafbc41..a1698ce85c0 100644
>> --- a/gcc/config/rs6000/altivec.md
>> +++ b/gcc/config/rs6000/altivec.md
>> @@ -4049,13 +4049,43 @@ (define_expand "altivec_negv4sf2"
>>    DONE;
>>  })
>>
>> -;; Vector reverse elements
>> +;; Vector reverse elements for V16QI V8HI V4SI V4SF
>>  (define_expand "altivec_vreve<mode>2"
>> -  [(set (match_operand:VEC_A 0 "register_operand" "=v")
>> -       (unspec:VEC_A [(match_operand:VEC_A 1 "register_operand" "v")]
>> +  [(set (match_operand:VEC_K 0 "register_operand" "=v")
>> +       (unspec:VEC_K [(match_operand:VEC_K 1 "register_operand" "v")]
>>                       UNSPEC_VREVEV))]
>>    "TARGET_ALTIVEC"
>>  {
>> +  if (TARGET_P9_VECTOR)
>> +    {
>> +      if (<MODE>mode == V16QImode)
>> +       emit_insn (gen_p9_xxbrq_v16qi (operands[0], operands[1]));
>> +      else if (<MODE>mode == V8HImode)
>> +       {
>> +         rtx subreg1 = simplify_gen_subreg (V1TImode, operands[1],
>> +                                            <MODE>mode, 0);
>> +         rtx temp = gen_reg_rtx (V1TImode);
>> +         emit_insn (gen_p9_xxbrq_v1ti (temp, subreg1));
>> +         rtx subreg2 = simplify_gen_subreg (<MODE>mode, temp,
>> +                                            V1TImode, 0);
>> +         emit_insn (gen_p9_xxbrh_v8hi (operands[0], subreg2));
>> +       }
>> +      else /* V4SI and V4SF.  */
>> +       {
>> +         rtx subreg1 = simplify_gen_subreg (V1TImode, operands[1],
>> +                                            <MODE>mode, 0);
>> +         rtx temp = gen_reg_rtx (V1TImode);
>> +         emit_insn (gen_p9_xxbrq_v1ti (temp, subreg1));
>> +         rtx subreg2 = simplify_gen_subreg (<MODE>mode, temp,
>> +                                            V1TImode, 0);
>> +         if (<MODE>mode == V4SImode)
>> +           emit_insn (gen_p9_xxbrw_v4si (operands[0], subreg2));
>> +         else
>> +           emit_insn (gen_p9_xxbrw_v4sf (operands[0], subreg2));
>> +       }
>> +      DONE;
>> +    }
>> +
>>    int i, j, size, num_elements;
>>    rtvec v = rtvec_alloc (16);
>>    rtx mask = gen_reg_rtx (V16QImode);
>> @@ -4074,6 +4104,17 @@ (define_expand "altivec_vreve<mode>2"
>>    DONE;
>>  })
>>
>> +;; Vector reverse elements for V2DI V2DF
>> +(define_expand "altivec_vreve<mode>2"
>> +  [(set (match_operand:VEC_64 0 "register_operand" "=v")
>> +       (unspec:VEC_64 [(match_operand:VEC_64 1 "register_operand" "v")]
>> +                     UNSPEC_VREVEV))]
>> +  "TARGET_ALTIVEC"
>> +{
>> +  emit_insn (gen_xxswapd_<mode> (operands[0], operands[1]));
>> +  DONE;
>> +})
>> +
>>  ;; Vector SIMD PEM v2.06c defines LVLX, LVLXL, LVRX, LVRXL,
>>  ;; STVLX, STVLXL, STVVRX, STVRXL are available only on Cell.
>>  (define_insn "altivec_lvlx"
>> diff --git a/gcc/testsuite/gcc.target/powerpc/vec_reve_1.c b/gcc/testsuite/gcc.target/powerpc/vec_reve_1.c
>> new file mode 100644
>> index 00000000000..83a9206758b
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/vec_reve_1.c
>> @@ -0,0 +1,16 @@
>> +/* { dg-require-effective-target powerpc_altivec_ok } */
>> +/* { dg-options "-O2 -maltivec" } */
>> +
>> +#include <altivec.h>
>> +
>> +vector double foo1 (vector double a)
>> +{
>> +   return vec_reve (a);
>> +}
>> +
>> +vector long long foo2 (vector long long a)
>> +{
>> +   return vec_reve (a);
>> +}
>> +
>> +/* { dg-final { scan-assembler-times {\mxxpermdi\M} 2 } } */
>> diff --git a/gcc/testsuite/gcc.target/powerpc/vec_reve_2.c b/gcc/testsuite/gcc.target/powerpc/vec_reve_2.c
>> new file mode 100644
>> index 00000000000..b6dd33d6d79
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/powerpc/vec_reve_2.c
>> @@ -0,0 +1,28 @@
>> +/* { dg-require-effective-target powerpc_p9vector_ok } */
>> +/* { dg-options "-mdejagnu-cpu=power9 -O2 -maltivec" } */

One nit here -- you don't need -maltivec as it's redundant with -mdejagnu-cpu=power9.

Looks fine to me with or without that fixed.  Recommend maintainers approve.

Thanks for the improvement!
Bill
>> +
>> +#include <altivec.h>
>> +
>> +vector int foo1 (vector int a)
>> +{
>> +   return vec_reve (a);
>> +}
>> +
>> +vector float foo2 (vector float a)
>> +{
>> +   return vec_reve (a);
>> +}
>> +
>> +vector short foo3 (vector short a)
>> +{
>> +   return vec_reve (a);
>> +}
>> +
>> +vector char foo4 (vector char a)
>> +{
>> +   return vec_reve (a);
>> +}
>> +
>> +/* { dg-final { scan-assembler-times {\mxxbrq\M} 4 } } */
>> +/* { dg-final { scan-assembler-times {\mxxbrw\M} 2 } } */
>> +/* { dg-final { scan-assembler-times {\mxxbrh\M} 1 } } */
>>


^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH, rs6000] optimization for vec_reve builtin [PR100868]
  2021-11-22  2:56 ` David Edelsohn
@ 2021-11-23  8:38   ` HAO CHEN GUI
  0 siblings, 0 replies; 8+ messages in thread
From: HAO CHEN GUI @ 2021-11-23  8:38 UTC (permalink / raw)
  To: David Edelsohn; +Cc: gcc-patches, Segher Boessenkool, Bill Schmidt

Thanks for your review. Committed as r12-5463.

On 22/11/2021 上午 10:56, David Edelsohn wrote:
> On Wed, Nov 17, 2021 at 3:28 AM HAO CHEN GUI <guihaoc@linux.ibm.com> wrote:
>> Hi,
>>
>>   The patch optimized for vec_reve builtin on rs6000. For V2DI and V2DF, it is implemented by xxswapd on all targets. For V16QI, V8HI, V4SI and V4SF, it is implemented by quadword byte reverse plus halfword/word byte reverse when p9_vector is set.
>>
>>   Bootstrapped and tested on powerpc64le-linux with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot.
>>
>> ChangeLog
>> 2021-11-17 Haochen Gui <guihaoc@linux.ibm.com>
>>
>> gcc/
>>         * config/rs6000/altivec.md (altivec_vreve<mode>2 for VEC_K): Use
>>         xxbrq for v16qi, xxbrq + xxbrh for v8hi and xxbrq + xxbrw for v4si
>>         or v4sf when p9_vector is set.
>>         (altivec_vreve<mode>2 for VEC_64): Defined. Implemented by xxswapd.
>>
>> gcc/testsuite/
>>         * gcc.target/powerpc/vec_reve_1.c: New test.
>>         * gcc.target/powerpc/vec_reve_2.c: Likewise.
> This is okay.
>
> Please don't send a message that contains the patch as both an inline
> message and as an attachment.
>
> Thanks, David

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH, rs6000] optimization for vec_reve builtin [PR100868]
  2021-11-17  8:28 HAO CHEN GUI
@ 2021-11-22  2:56 ` David Edelsohn
  2021-11-23  8:38   ` HAO CHEN GUI
  0 siblings, 1 reply; 8+ messages in thread
From: David Edelsohn @ 2021-11-22  2:56 UTC (permalink / raw)
  To: HAO CHEN GUI; +Cc: gcc-patches, Segher Boessenkool, Bill Schmidt

On Wed, Nov 17, 2021 at 3:28 AM HAO CHEN GUI <guihaoc@linux.ibm.com> wrote:
>
> Hi,
>
>   The patch optimized for vec_reve builtin on rs6000. For V2DI and V2DF, it is implemented by xxswapd on all targets. For V16QI, V8HI, V4SI and V4SF, it is implemented by quadword byte reverse plus halfword/word byte reverse when p9_vector is set.
>
>   Bootstrapped and tested on powerpc64le-linux with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot.
>
> ChangeLog
> 2021-11-17 Haochen Gui <guihaoc@linux.ibm.com>
>
> gcc/
>         * config/rs6000/altivec.md (altivec_vreve<mode>2 for VEC_K): Use
>         xxbrq for v16qi, xxbrq + xxbrh for v8hi and xxbrq + xxbrw for v4si
>         or v4sf when p9_vector is set.
>         (altivec_vreve<mode>2 for VEC_64): Defined. Implemented by xxswapd.
>
> gcc/testsuite/
>         * gcc.target/powerpc/vec_reve_1.c: New test.
>         * gcc.target/powerpc/vec_reve_2.c: Likewise.

This is okay.

Please don't send a message that contains the patch as both an inline
message and as an attachment.

Thanks, David

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [PATCH, rs6000] optimization for vec_reve builtin [PR100868]
@ 2021-11-17  8:28 HAO CHEN GUI
  2021-11-22  2:56 ` David Edelsohn
  0 siblings, 1 reply; 8+ messages in thread
From: HAO CHEN GUI @ 2021-11-17  8:28 UTC (permalink / raw)
  To: gcc-patches; +Cc: Segher Boessenkool, David, Bill Schmidt

[-- Attachment #1: Type: text/plain, Size: 4849 bytes --]

Hi,

  The patch optimized for vec_reve builtin on rs6000. For V2DI and V2DF, it is implemented by xxswapd on all targets. For V16QI, V8HI, V4SI and V4SF, it is implemented by quadword byte reverse plus halfword/word byte reverse when p9_vector is set.

  Bootstrapped and tested on powerpc64le-linux with no regressions. Is this okay for trunk? Any recommendations? Thanks a lot.

ChangeLog
2021-11-17 Haochen Gui <guihaoc@linux.ibm.com>

gcc/
        * config/rs6000/altivec.md (altivec_vreve<mode>2 for VEC_K): Use
        xxbrq for v16qi, xxbrq + xxbrh for v8hi and xxbrq + xxbrw for v4si
        or v4sf when p9_vector is set.
        (altivec_vreve<mode>2 for VEC_64): Defined. Implemented by xxswapd.

gcc/testsuite/
        * gcc.target/powerpc/vec_reve_1.c: New test.
        * gcc.target/powerpc/vec_reve_2.c: Likewise.

patch.diff
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 1351dafbc41..a1698ce85c0 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -4049,13 +4049,43 @@ (define_expand "altivec_negv4sf2"
   DONE;
 })

-;; Vector reverse elements
+;; Vector reverse elements for V16QI V8HI V4SI V4SF
 (define_expand "altivec_vreve<mode>2"
-  [(set (match_operand:VEC_A 0 "register_operand" "=v")
-       (unspec:VEC_A [(match_operand:VEC_A 1 "register_operand" "v")]
+  [(set (match_operand:VEC_K 0 "register_operand" "=v")
+       (unspec:VEC_K [(match_operand:VEC_K 1 "register_operand" "v")]
                      UNSPEC_VREVEV))]
   "TARGET_ALTIVEC"
 {
+  if (TARGET_P9_VECTOR)
+    {
+      if (<MODE>mode == V16QImode)
+       emit_insn (gen_p9_xxbrq_v16qi (operands[0], operands[1]));
+      else if (<MODE>mode == V8HImode)
+       {
+         rtx subreg1 = simplify_gen_subreg (V1TImode, operands[1],
+                                            <MODE>mode, 0);
+         rtx temp = gen_reg_rtx (V1TImode);
+         emit_insn (gen_p9_xxbrq_v1ti (temp, subreg1));
+         rtx subreg2 = simplify_gen_subreg (<MODE>mode, temp,
+                                            V1TImode, 0);
+         emit_insn (gen_p9_xxbrh_v8hi (operands[0], subreg2));
+       }
+      else /* V4SI and V4SF.  */
+       {
+         rtx subreg1 = simplify_gen_subreg (V1TImode, operands[1],
+                                            <MODE>mode, 0);
+         rtx temp = gen_reg_rtx (V1TImode);
+         emit_insn (gen_p9_xxbrq_v1ti (temp, subreg1));
+         rtx subreg2 = simplify_gen_subreg (<MODE>mode, temp,
+                                            V1TImode, 0);
+         if (<MODE>mode == V4SImode)
+           emit_insn (gen_p9_xxbrw_v4si (operands[0], subreg2));
+         else
+           emit_insn (gen_p9_xxbrw_v4sf (operands[0], subreg2));
+       }
+      DONE;
+    }
+
   int i, j, size, num_elements;
   rtvec v = rtvec_alloc (16);
   rtx mask = gen_reg_rtx (V16QImode);
@@ -4074,6 +4104,17 @@ (define_expand "altivec_vreve<mode>2"
   DONE;
 })

+;; Vector reverse elements for V2DI V2DF
+(define_expand "altivec_vreve<mode>2"
+  [(set (match_operand:VEC_64 0 "register_operand" "=v")
+       (unspec:VEC_64 [(match_operand:VEC_64 1 "register_operand" "v")]
+                     UNSPEC_VREVEV))]
+  "TARGET_ALTIVEC"
+{
+  emit_insn (gen_xxswapd_<mode> (operands[0], operands[1]));
+  DONE;
+})
+
 ;; Vector SIMD PEM v2.06c defines LVLX, LVLXL, LVRX, LVRXL,
 ;; STVLX, STVLXL, STVVRX, STVRXL are available only on Cell.
 (define_insn "altivec_lvlx"
diff --git a/gcc/testsuite/gcc.target/powerpc/vec_reve_1.c b/gcc/testsuite/gcc.target/powerpc/vec_reve_1.c
new file mode 100644
index 00000000000..83a9206758b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec_reve_1.c
@@ -0,0 +1,16 @@
+/* { dg-require-effective-target powerpc_altivec_ok } */
+/* { dg-options "-O2 -maltivec" } */
+
+#include <altivec.h>
+
+vector double foo1 (vector double a)
+{
+   return vec_reve (a);
+}
+
+vector long long foo2 (vector long long a)
+{
+   return vec_reve (a);
+}
+
+/* { dg-final { scan-assembler-times {\mxxpermdi\M} 2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec_reve_2.c b/gcc/testsuite/gcc.target/powerpc/vec_reve_2.c
new file mode 100644
index 00000000000..b6dd33d6d79
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec_reve_2.c
@@ -0,0 +1,28 @@
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2 -maltivec" } */
+
+#include <altivec.h>
+
+vector int foo1 (vector int a)
+{
+   return vec_reve (a);
+}
+
+vector float foo2 (vector float a)
+{
+   return vec_reve (a);
+}
+
+vector short foo3 (vector short a)
+{
+   return vec_reve (a);
+}
+
+vector char foo4 (vector char a)
+{
+   return vec_reve (a);
+}
+
+/* { dg-final { scan-assembler-times {\mxxbrq\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mxxbrw\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxxbrh\M} 1 } } */

[-- Attachment #2: ChangeLog.txt --]
[-- Type: text/plain, Size: 396 bytes --]

2021-11-17 Haochen Gui <guihaoc@linux.ibm.com>

gcc/
	* config/rs6000/altivec.md (altivec_vreve<mode>2 for VEC_K): Use
	xxbrq for v16qi, xxbrq + xxbrh for v8hi and xxbrq + xxbrw for v4si
	or v4sf when p9_vector is set.
	(altivec_vreve<mode>2 for VEC_64): Defined. Implemented by xxswapd.

gcc/testsuite/
	* gcc.target/powerpc/vec_reve_1.c: New test.
	* gcc.target/powerpc/vec_reve_2.c: Likewise.

[-- Attachment #3: patch.diff.txt --]
[-- Type: text/plain, Size: 3731 bytes --]

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 93d237156d5..480db032495 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -4029,12 +4029,43 @@ (define_expand "altivec_vreveti2"
   DONE;
 })
 
+;; Vector reverse elements for V16QI V8HI V4SI V4SF
 (define_expand "altivec_vreve<mode>2"
-  [(set (match_operand:VEC_A 0 "register_operand" "=v")
-	(unspec:VEC_A [(match_operand:VEC_A 1 "register_operand" "v")]
+  [(set (match_operand:VEC_K 0 "register_operand" "=v")
+	(unspec:VEC_K [(match_operand:VEC_K 1 "register_operand" "v")]
 		      UNSPEC_VREVEV))]
   "TARGET_ALTIVEC"
 {
+  if (TARGET_P9_VECTOR)
+    {
+      if (<MODE>mode == V16QImode)
+	emit_insn (gen_p9_xxbrq_v16qi (operands[0], operands[1]));
+      else if (<MODE>mode == V8HImode)
+	{
+	  rtx subreg1 = simplify_gen_subreg (V1TImode, operands[1],
+					     <MODE>mode, 0);
+	  rtx temp = gen_reg_rtx (V1TImode);
+	  emit_insn (gen_p9_xxbrq_v1ti (temp, subreg1));
+	  rtx subreg2 = simplify_gen_subreg (<MODE>mode, temp,
+					     V1TImode, 0);
+	  emit_insn (gen_p9_xxbrh_v8hi (operands[0], subreg2));
+	}
+      else /* V4SI and V4SF.  */
+	{
+	  rtx subreg1 = simplify_gen_subreg (V1TImode, operands[1],
+					     <MODE>mode, 0);
+	  rtx temp = gen_reg_rtx (V1TImode);
+	  emit_insn (gen_p9_xxbrq_v1ti (temp, subreg1));
+	  rtx subreg2 = simplify_gen_subreg (<MODE>mode, temp,
+					     V1TImode, 0);
+	  if (<MODE>mode == V4SImode)
+	    emit_insn (gen_p9_xxbrw_v4si (operands[0], subreg2));
+	  else
+	    emit_insn (gen_p9_xxbrw_v4sf (operands[0], subreg2));
+	}
+      DONE;
+    }
+
   int i, j, size, num_elements;
   rtvec v = rtvec_alloc (16);
   rtx mask = gen_reg_rtx (V16QImode);
@@ -4053,6 +4084,17 @@ (define_expand "altivec_vreve<mode>2"
   DONE;
 })
 
+;; Vector reverse elements for V2DI V2DF
+(define_expand "altivec_vreve<mode>2"
+  [(set (match_operand:VEC_64 0 "register_operand" "=v")
+	(unspec:VEC_64 [(match_operand:VEC_64 1 "register_operand" "v")]
+		      UNSPEC_VREVEV))]
+  "TARGET_ALTIVEC"
+{
+  emit_insn (gen_xxswapd_<mode> (operands[0], operands[1]));
+  DONE;
+})
+
 ;; Vector SIMD PEM v2.06c defines LVLX, LVLXL, LVRX, LVRXL,
 ;; STVLX, STVLXL, STVVRX, STVRXL are available only on Cell.
 (define_insn "altivec_lvlx"
diff --git a/gcc/testsuite/gcc.target/powerpc/vec_reve_1.c b/gcc/testsuite/gcc.target/powerpc/vec_reve_1.c
new file mode 100644
index 00000000000..120c318ddfa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec_reve_1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_altivec_ok } */
+/* { dg-options "-O2 -maltivec" } */
+
+#include <altivec.h>
+
+vector double foo1 (vector double a)
+{
+   return vec_reve (a);
+}
+
+vector long long foo2 (vector long long a)
+{
+   return vec_reve (a);
+}
+
+/* { dg-final { scan-assembler-times {\mxxpermdi\M} 2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec_reve_2.c b/gcc/testsuite/gcc.target/powerpc/vec_reve_2.c
new file mode 100644
index 00000000000..966193951c3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vec_reve_2.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
+/* { dg-options "-mdejagnu-cpu=power9 -O2" } */
+
+#include <altivec.h>
+
+vector int foo1 (vector int a)
+{
+   return vec_reve (a);
+}
+
+vector float foo2 (vector float a)
+{
+   return vec_reve (a);
+}
+
+vector short foo3 (vector short a)
+{
+   return vec_reve (a);
+}
+
+vector char foo4 (vector char a)
+{
+   return vec_reve (a);
+}
+
+/* { dg-final { scan-assembler-times {\mxxbrq\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mxxbrw\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mxxbrh\M} 1 } } */

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2021-11-23  8:38 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-08  6:42 [PATCH, rs6000] optimization for vec_reve builtin [PR100868] HAO CHEN GUI
2021-09-12 15:50 ` Bill Schmidt
2021-09-12 19:38   ` Segher Boessenkool
2021-10-11  5:32 ` Ping^1 " HAO CHEN GUI
2021-10-12 13:55   ` Bill Schmidt
2021-11-17  8:28 HAO CHEN GUI
2021-11-22  2:56 ` David Edelsohn
2021-11-23  8:38   ` HAO CHEN GUI

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).