From: Andreas Krebbel <krebbel@linux.ibm.com>
To: gcc-patches@gcc.gnu.org
Subject: [PATCH 4/5] IBM Z: Implement TARGET_VECTORIZE_VEC_PERM_CONST for vector merge
Date: Thu, 29 Jul 2021 09:37:29 +0200 [thread overview]
Message-ID: <20210729073730.23208-5-krebbel@linux.ibm.com> (raw)
In-Reply-To: <20210729073730.23208-1-krebbel@linux.ibm.com>
This patch implements the TARGET_VECTORIZE_VEC_PERM_CONST in the IBM Z
backend. The initial implementation only exploits the vector merge
instruction but there is more to come.
gcc/ChangeLog:
* config/s390/s390.c (MAX_VECT_LEN): Define macro.
(struct expand_vec_perm_d): Define struct.
(expand_perm_with_merge): New function.
(vectorize_vec_perm_const_1): New function.
(s390_vectorize_vec_perm_const): New function.
(TARGET_VECTORIZE_VEC_PERM_CONST): Define target macro.
gcc/testsuite/ChangeLog:
* gcc.target/s390/vector/perm-merge.c: New test.
* gcc.target/s390/vector/vec-types.h: New test.
---
gcc/config/s390/s390.c | 108 ++++++++++++++++++
.../gcc.target/s390/vector/perm-merge.c | 104 +++++++++++++++++
.../gcc.target/s390/vector/vec-types.h | 35 ++++++
3 files changed, 247 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/s390/vector/perm-merge.c
create mode 100644 gcc/testsuite/gcc.target/s390/vector/vec-types.h
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index b1a9ca9d8aa..684241b00b8 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -16928,6 +16928,110 @@ s390_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs,
return after_md_seq;
}
+#define MAX_VECT_LEN 16
+
+struct expand_vec_perm_d
+{
+ rtx target, op0, op1;
+ unsigned char perm[MAX_VECT_LEN];
+ machine_mode vmode;
+ unsigned char nelt;
+ bool testing_p;
+};
+
+/* Try to expand the vector permute operation described by D using the
+ vector merge instructions vml and vmh. Return true if vector merge
+ could be used. */
+static bool
+expand_perm_with_merge (const struct expand_vec_perm_d &d)
+{
+ bool merge_lo_p = true;
+ bool merge_hi_p = true;
+
+ if (d.nelt % 2)
+ return false;
+
+ // For V4SI this checks for: { 0, 4, 1, 5 }
+ for (int telt = 0; telt < d.nelt; telt++)
+ if (d.perm[telt] != telt / 2 + (telt % 2) * d.nelt)
+ {
+ merge_hi_p = false;
+ break;
+ }
+
+ if (!merge_hi_p)
+ {
+ // For V4SI this checks for: { 2, 6, 3, 7 }
+ for (int telt = 0; telt < d.nelt; telt++)
+ if (d.perm[telt] != (telt + d.nelt) / 2 + (telt % 2) * d.nelt)
+ {
+ merge_lo_p = false;
+ break;
+ }
+ }
+ else
+ merge_lo_p = false;
+
+ if (d.testing_p)
+ return merge_lo_p || merge_hi_p;
+
+ if (merge_lo_p || merge_hi_p)
+ s390_expand_merge (d.target, d.op0, d.op1, merge_hi_p);
+
+ return merge_lo_p || merge_hi_p;
+}
+
+/* Try to find the best sequence for the vector permute operation
+ described by D. Return true if the operation could be
+ expanded. */
+static bool
+vectorize_vec_perm_const_1 (const struct expand_vec_perm_d &d)
+{
+ if (expand_perm_with_merge (d))
+ return true;
+
+ return false;
+}
+
+/* Return true if we can emit instructions for the constant
+ permutation vector in SEL. If OUTPUT, IN0, IN1 are non-null the
+ hook is supposed to emit the required INSNs. */
+
+bool
+s390_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1,
+ const vec_perm_indices &sel)
+{
+ struct expand_vec_perm_d d;
+ unsigned char perm[MAX_VECT_LEN];
+ unsigned int i, nelt;
+
+ if (!s390_vector_mode_supported_p (vmode) || GET_MODE_SIZE (vmode) != 16)
+ return false;
+
+ d.target = target;
+ d.op0 = op0;
+ d.op1 = op1;
+
+ d.vmode = vmode;
+ gcc_assert (VECTOR_MODE_P (d.vmode));
+ d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
+ d.testing_p = target == NULL_RTX;
+
+ gcc_assert (target == NULL_RTX || REG_P (target));
+ gcc_assert (sel.length () == nelt);
+ gcc_checking_assert (sizeof (d.perm) == sizeof (perm));
+
+ for (i = 0; i < nelt; i++)
+ {
+ unsigned char e = sel[i];
+ gcc_assert (e < 2 * nelt);
+ d.perm[i] = e;
+ perm[i] = e;
+ }
+
+ return vectorize_vec_perm_const_1 (d);
+}
+
/* Initialize GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
@@ -17238,6 +17342,10 @@ s390_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs,
#undef TARGET_MD_ASM_ADJUST
#define TARGET_MD_ASM_ADJUST s390_md_asm_adjust
+#undef TARGET_VECTORIZE_VEC_PERM_CONST
+#define TARGET_VECTORIZE_VEC_PERM_CONST s390_vectorize_vec_perm_const
+
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-s390.h"
diff --git a/gcc/testsuite/gcc.target/s390/vector/perm-merge.c b/gcc/testsuite/gcc.target/s390/vector/perm-merge.c
new file mode 100644
index 00000000000..51b23ddd886
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/perm-merge.c
@@ -0,0 +1,104 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mzarch -march=z14 -mzvector --save-temps" } */
+/* { dg-do run { target { s390_z14_hw } } } */
+
+/* { dg-final { scan-assembler-times "\tvmrhb\t" 2 } } */
+/* { dg-final { scan-assembler-times "\tvmrlb\t" 2 } } */
+/* { dg-final { scan-assembler-times "\tvmrhh\t" 2 } } */
+/* { dg-final { scan-assembler-times "\tvmrlh\t" 2 } } */
+/* { dg-final { scan-assembler-times "\tvmrhf\t" 3 } } */
+/* { dg-final { scan-assembler-times "\tvmrlf\t" 3 } } */
+/* { dg-final { scan-assembler-times "\tvmrhg\t" 3 } } */
+/* { dg-final { scan-assembler-times "\tvmrlg\t" 3 } } */
+
+#include "vec-types.h"
+
+#define GEN_MERGE_2(VEC_TYPE, HILO, A) \
+ VEC_TYPE __attribute__((noinline)) \
+ merge_##HILO##_##VEC_TYPE(VEC_TYPE a, VEC_TYPE b) { \
+ return (VEC_TYPE){ a[0+A], b[0+A] }; }
+
+#define GEN_MERGE_4(VEC_TYPE, HILO, A) \
+ VEC_TYPE __attribute__((noinline)) \
+ merge_##HILO##_##VEC_TYPE(VEC_TYPE a, VEC_TYPE b) { \
+ return (VEC_TYPE){ a[0+A], b[0+A], a[1+A], b[1+A] }; }
+
+#define GEN_MERGE_8(VEC_TYPE, HILO, A) \
+ VEC_TYPE __attribute__((noinline)) \
+ merge_##HILO##_##VEC_TYPE(VEC_TYPE a, VEC_TYPE b) { \
+ return (VEC_TYPE){ a[0+A], b[0+A], a[1+A], b[1+A], a[2+A], b[2+A], a[3+A], b[3+A] }; }
+
+#define GEN_MERGE_16(VEC_TYPE, HILO, A) \
+ VEC_TYPE __attribute__((noinline)) \
+ merge_##HILO##_##VEC_TYPE(VEC_TYPE a, VEC_TYPE b) { \
+ return (VEC_TYPE){ a[0+A], b[0+A], a[1+A], b[1+A], a[2+A], b[2+A], a[3+A], b[3+A], \
+ a[4+A], b[4+A], a[5+A], b[5+A], a[6+A], b[6+A], a[7+A], b[7+A]}; }
+
+
+GEN_MERGE_16(v16qi, l, 8)
+GEN_MERGE_16(v16qi, h, 0)
+GEN_MERGE_16(uv16qi, l, 8)
+GEN_MERGE_16(uv16qi, h, 0)
+
+GEN_MERGE_8(v8hi, l, 4)
+GEN_MERGE_8(v8hi, h, 0)
+GEN_MERGE_8(uv8hi, l, 4)
+GEN_MERGE_8(uv8hi, h, 0)
+
+GEN_MERGE_4(v4si, l, 2)
+GEN_MERGE_4(v4si, h, 0)
+GEN_MERGE_4(uv4si, l, 2)
+GEN_MERGE_4(uv4si, h, 0)
+
+GEN_MERGE_4(v4sf, l, 2)
+GEN_MERGE_4(v4sf, h, 0)
+
+GEN_MERGE_2(v2di, l, 1)
+GEN_MERGE_2(v2di, h, 0)
+GEN_MERGE_2(uv2di, l, 1)
+GEN_MERGE_2(uv2di, h, 0)
+
+GEN_MERGE_2(v2df, l, 1)
+GEN_MERGE_2(v2df, h, 0)
+
+
+#define CHECK_MERGE_LO(VEC_TYPE, SRC1, SRC2) \
+ { \
+ VEC_TYPE v = merge_l_##VEC_TYPE ((SRC1), (SRC2)); \
+ int elts = sizeof(v) / sizeof(v[0]); \
+ for (int i = 0; i < elts; i++) \
+ if (v[i] != (i + elts) / 2 + (i % 2) * elts) \
+ __builtin_abort(); \
+ }
+
+#define CHECK_MERGE_HI(VEC_TYPE, SRC1, SRC2) \
+ { \
+ VEC_TYPE v = merge_h_##VEC_TYPE ((SRC1), (SRC2)); \
+ int elts = sizeof(v) / sizeof(v[0]); \
+ for (int i = 0; i < elts; i++) \
+ if (v[i] != i / 2 + (i % 2) * elts) \
+ __builtin_abort(); \
+ }
+
+#define CHECK_MERGE(VEC_TYPE) \
+ { \
+ VEC_TYPE a = GEN_SEQ_VEC (VEC_TYPE, 0); \
+ VEC_TYPE b = GEN_SEQ_VEC (VEC_TYPE, sizeof(VEC_TYPE) / sizeof(a[0])); \
+ CHECK_MERGE_LO (VEC_TYPE, a, b); \
+ CHECK_MERGE_HI (VEC_TYPE, a, b); \
+ }
+
+int
+main ()
+{
+ CHECK_MERGE(v16qi);
+ CHECK_MERGE(uv16qi);
+ CHECK_MERGE(v8hi);
+ CHECK_MERGE(uv8hi);
+ CHECK_MERGE(v4si);
+ CHECK_MERGE(uv4si);
+ CHECK_MERGE(v4sf);
+ CHECK_MERGE(v2di);
+ CHECK_MERGE(uv2di);
+ CHECK_MERGE(v2df);
+}
diff --git a/gcc/testsuite/gcc.target/s390/vector/vec-types.h b/gcc/testsuite/gcc.target/s390/vector/vec-types.h
new file mode 100644
index 00000000000..b7ffbe73321
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/vec-types.h
@@ -0,0 +1,35 @@
+#ifndef VEC_TYPES_H
+#define VEC_TYPES_H 1
+
+typedef __attribute__((vector_size(16))) signed char v16qi;
+typedef __attribute__((vector_size(16))) unsigned char uv16qi;
+
+typedef __attribute__((vector_size(16))) signed short v8hi;
+typedef __attribute__((vector_size(16))) unsigned short uv8hi;
+
+typedef __attribute__((vector_size(16))) signed int v4si;
+typedef __attribute__((vector_size(16))) unsigned int uv4si;
+
+typedef __attribute__((vector_size(16))) signed long long v2di;
+typedef __attribute__((vector_size(16))) unsigned long long uv2di;
+
+#if __SIZEOF_INT128__ == 16
+typedef __attribute__((vector_size(16))) __int128_t v1ti;
+#endif
+
+typedef __attribute__((vector_size(16))) double v2df;
+typedef __attribute__((vector_size(16))) long double v1tf;
+
+#if __ARCH__ >= 12
+typedef __attribute__((vector_size(16))) float v4sf;
+#endif
+
+#define GEN_SEQ_VEC(VEC_TYPE, ADDEND) \
+ ({ VEC_TYPE dummy; \
+ const int elts = sizeof(VEC_TYPE) / sizeof(dummy[0]); \
+ typeof(dummy[0]) __attribute__((aligned(8))) ar[elts]; \
+ for (int i = 0; i < elts; i++) \
+ ar[i] = (typeof(dummy[0]))(i + (ADDEND)); \
+ *(VEC_TYPE*)ar;})
+
+#endif
--
2.31.1
next prev parent reply other threads:[~2021-07-29 7:37 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-07-29 7:37 [PATCH 0/5] IBM Z: Implement TARGET_VECTORIZE_VEC_PERM_CONST Andreas Krebbel
2021-07-29 7:37 ` [PATCH 1/5] IBM Z: Get rid of vec merge unspec Andreas Krebbel
2021-07-29 7:37 ` [PATCH 2/5] IBM Z: Get rid of vpdi unspec Andreas Krebbel
2021-07-29 7:37 ` [PATCH 3/5] IBM Z: Remove redundant V_HW_64 mode iterator Andreas Krebbel
2021-07-29 7:37 ` Andreas Krebbel [this message]
2021-07-29 7:37 ` [PATCH 5/5] IBM Z: Implement TARGET_VECTORIZE_VEC_PERM_CONST for vpdi Andreas Krebbel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210729073730.23208-5-krebbel@linux.ibm.com \
--to=krebbel@linux.ibm.com \
--cc=gcc-patches@gcc.gnu.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).