public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-3180] rs6000: Make some BIFs vectorized on P10
@ 2021-08-27 5:15 Kewen Lin
0 siblings, 0 replies; only message in thread
From: Kewen Lin @ 2021-08-27 5:15 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:26f5ea5e141cf1e40289dbc73ac21e85ad39fa57
commit r12-3180-g26f5ea5e141cf1e40289dbc73ac21e85ad39fa57
Author: Kewen Lin <linkw@linux.ibm.com>
Date: Thu Aug 26 20:23:58 2021 -0500
rs6000: Make some BIFs vectorized on P10
This patch is to add the support to make vectorizer able to
vectorize some built-in function scalar versions on Power10.
gcc/ChangeLog:
* config/rs6000/rs6000.c (rs6000_builtin_md_vectorized_function): Add
support for built-in functions MISC_BUILTIN_DIVWE, MISC_BUILTIN_DIVWEU,
MISC_BUILTIN_DIVDE, MISC_BUILTIN_DIVDEU, P10_BUILTIN_CFUGED,
P10_BUILTIN_CNTLZDM, P10_BUILTIN_CNTTZDM, P10_BUILTIN_PDEPD and
P10_BUILTIN_PEXTD on Power10.
gcc/testsuite/ChangeLog:
* gcc.target/powerpc/dive-vectorize-1.c: New test.
* gcc.target/powerpc/dive-vectorize-1.h: New test.
* gcc.target/powerpc/dive-vectorize-2.c: New test.
* gcc.target/powerpc/dive-vectorize-2.h: New test.
* gcc.target/powerpc/dive-vectorize-run-1.c: New test.
* gcc.target/powerpc/dive-vectorize-run-2.c: New test.
* gcc.target/powerpc/p10-bifs-vectorize-1.c: New test.
* gcc.target/powerpc/p10-bifs-vectorize-1.h: New test.
* gcc.target/powerpc/p10-bifs-vectorize-run-1.c: New test.
Diff:
---
gcc/config/rs6000/rs6000.c | 53 ++++++++++++++++++++
.../gcc.target/powerpc/dive-vectorize-1.c | 11 +++++
.../gcc.target/powerpc/dive-vectorize-1.h | 22 +++++++++
.../gcc.target/powerpc/dive-vectorize-2.c | 13 +++++
.../gcc.target/powerpc/dive-vectorize-2.h | 22 +++++++++
.../gcc.target/powerpc/dive-vectorize-run-1.c | 54 +++++++++++++++++++++
.../gcc.target/powerpc/dive-vectorize-run-2.c | 56 ++++++++++++++++++++++
.../gcc.target/powerpc/p10-bifs-vectorize-1.c | 16 +++++++
.../gcc.target/powerpc/p10-bifs-vectorize-1.h | 40 ++++++++++++++++
.../gcc.target/powerpc/p10-bifs-vectorize-run-1.c | 48 +++++++++++++++++++
10 files changed, 335 insertions(+)
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 05fb6aad4b0..d02c1b63a24 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -5793,6 +5793,59 @@ rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
default:
break;
}
+
+ machine_mode in_vmode = TYPE_MODE (type_in);
+ machine_mode out_vmode = TYPE_MODE (type_out);
+
+ /* Power10 supported vectorized built-in functions. */
+ if (TARGET_POWER10
+ && in_vmode == out_vmode
+ && VECTOR_UNIT_ALTIVEC_OR_VSX_P (in_vmode))
+ {
+ machine_mode exp_mode = DImode;
+ machine_mode exp_vmode = V2DImode;
+ enum rs6000_builtins bif;
+ switch (fn)
+ {
+ case MISC_BUILTIN_DIVWE:
+ case MISC_BUILTIN_DIVWEU:
+ exp_mode = SImode;
+ exp_vmode = V4SImode;
+ if (fn == MISC_BUILTIN_DIVWE)
+ bif = P10V_BUILTIN_DIVES_V4SI;
+ else
+ bif = P10V_BUILTIN_DIVEU_V4SI;
+ break;
+ case MISC_BUILTIN_DIVDE:
+ case MISC_BUILTIN_DIVDEU:
+ if (fn == MISC_BUILTIN_DIVDE)
+ bif = P10V_BUILTIN_DIVES_V2DI;
+ else
+ bif = P10V_BUILTIN_DIVEU_V2DI;
+ break;
+ case P10_BUILTIN_CFUGED:
+ bif = P10V_BUILTIN_VCFUGED;
+ break;
+ case P10_BUILTIN_CNTLZDM:
+ bif = P10V_BUILTIN_VCLZDM;
+ break;
+ case P10_BUILTIN_CNTTZDM:
+ bif = P10V_BUILTIN_VCTZDM;
+ break;
+ case P10_BUILTIN_PDEPD:
+ bif = P10V_BUILTIN_VPDEPD;
+ break;
+ case P10_BUILTIN_PEXTD:
+ bif = P10V_BUILTIN_VPEXTD;
+ break;
+ default:
+ return NULL_TREE;
+ }
+
+ if (in_mode == exp_mode && in_vmode == exp_vmode)
+ return rs6000_builtin_decls[bif];
+ }
+
return NULL_TREE;
}
\f
diff --git a/gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.c b/gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.c
new file mode 100644
index 00000000000..84f1b0a88f2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fdump-tree-vect-details" } */
+
+/* Test if signed/unsigned int extended divisions get vectorized. */
+
+#include "dive-vectorize-1.h"
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
+/* { dg-final { scan-assembler-times {\mvdivesw\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvdiveuw\M} 1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.h b/gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.h
new file mode 100644
index 00000000000..119f637b46b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dive-vectorize-1.h
@@ -0,0 +1,22 @@
+#define N 128
+
+typedef signed int si;
+typedef unsigned int ui;
+
+si si_a[N], si_b[N], si_c[N];
+ui ui_a[N], ui_b[N], ui_c[N];
+
+__attribute__ ((noipa)) void
+test_divwe ()
+{
+ for (int i = 0; i < N; i++)
+ si_c[i] = __builtin_divwe (si_a[i], si_b[i]);
+}
+
+__attribute__ ((noipa)) void
+test_divweu ()
+{
+ for (int i = 0; i < N; i++)
+ ui_c[i] = __builtin_divweu (ui_a[i], ui_b[i]);
+}
+
diff --git a/gcc/testsuite/gcc.target/powerpc/dive-vectorize-2.c b/gcc/testsuite/gcc.target/powerpc/dive-vectorize-2.c
new file mode 100644
index 00000000000..13d768d748c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dive-vectorize-2.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* We scan for vdive*d which are only supported on 64-bit env. */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fdump-tree-vect-details" } */
+
+/* Test if signed/unsigned long long extended divisions get vectorized. */
+
+#include "dive-vectorize-2.h"
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" } } */
+/* { dg-final { scan-assembler-times {\mvdivesd\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvdiveud\M} 1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/dive-vectorize-2.h b/gcc/testsuite/gcc.target/powerpc/dive-vectorize-2.h
new file mode 100644
index 00000000000..1cab56b2e0b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dive-vectorize-2.h
@@ -0,0 +1,22 @@
+#define N 128
+
+typedef signed long long sLL;
+typedef unsigned long long uLL;
+
+sLL sll_a[N], sll_b[N], sll_c[N];
+uLL ull_a[N], ull_b[N], ull_c[N];
+
+__attribute__ ((noipa)) void
+test_divde ()
+{
+ for (int i = 0; i < N; i++)
+ sll_c[i] = __builtin_divde (sll_a[i], sll_b[i]);
+}
+
+__attribute__ ((noipa)) void
+test_divdeu ()
+{
+ for (int i = 0; i < N; i++)
+ ull_c[i] = __builtin_divdeu (ull_a[i], ull_b[i]);
+}
+
diff --git a/gcc/testsuite/gcc.target/powerpc/dive-vectorize-run-1.c b/gcc/testsuite/gcc.target/powerpc/dive-vectorize-run-1.c
new file mode 100644
index 00000000000..dab112c2ee6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dive-vectorize-run-1.c
@@ -0,0 +1,54 @@
+/* { dg-do run } */
+/* { dg-require-effective-target power10_hw } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -ftree-vectorize -fno-vect-cost-model" } */
+
+#include "dive-vectorize-1.h"
+
+/* Check if test cases with signed/unsigned int extended division
+ vectorization run successfully. */
+
+/* Make optimize (1) to avoid vectorization applied on check func. */
+
+__attribute__ ((optimize (1))) void
+check_divwe ()
+{
+ test_divwe ();
+ for (int i = 0; i < N; i++)
+ {
+ si exp = __builtin_divwe (si_a[i], si_b[i]);
+ if (exp != si_c[i])
+ __builtin_abort ();
+ }
+}
+
+__attribute__ ((optimize (1))) void
+check_divweu ()
+{
+ test_divweu ();
+ for (int i = 0; i < N; i++)
+ {
+ ui exp = __builtin_divweu (ui_a[i], ui_b[i]);
+ if (exp != ui_c[i])
+ __builtin_abort ();
+ }
+}
+
+int
+main ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ si_a[i] = 0x10 * (i * 3 + 2);
+ si_b[i] = 0x7890 * (i * 3 + 1);
+ ui_a[i] = 0x234 * (i * 11 + 3) - 0xcd * (i * 5 - 7);
+ ui_b[i] = 0x6078 * (i * 7 + 3) + 0xef * (i * 7 - 11);
+ if (si_b[i] == 0 || ui_b[i] == 0)
+ __builtin_abort ();
+ }
+
+ check_divwe ();
+ check_divweu ();
+
+ return 0;
+}
+
diff --git a/gcc/testsuite/gcc.target/powerpc/dive-vectorize-run-2.c b/gcc/testsuite/gcc.target/powerpc/dive-vectorize-run-2.c
new file mode 100644
index 00000000000..eb761497def
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/dive-vectorize-run-2.c
@@ -0,0 +1,56 @@
+/* { dg-do run } */
+/* The checked bifs are only supported on 64-bit env. */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target power10_hw } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -ftree-vectorize -fno-vect-cost-model" } */
+
+#include "dive-vectorize-2.h"
+
+/* Check if test cases with signed/unsigned int extended division
+ vectorization run successfully. */
+
+/* Make optimize (1) to avoid vectorization applied on check func. */
+
+__attribute__ ((optimize (1))) void
+check_divde ()
+{
+ test_divde ();
+ for (int i = 0; i < N; i++)
+ {
+ sLL exp = __builtin_divde (sll_a[i], sll_b[i]);
+ if (exp != sll_c[i])
+ __builtin_abort ();
+ }
+}
+
+__attribute__ ((optimize (1))) void
+check_divdeu ()
+{
+ test_divdeu ();
+ for (int i = 0; i < N; i++)
+ {
+ uLL exp = __builtin_divdeu (ull_a[i], ull_b[i]);
+ if (exp != ull_c[i])
+ __builtin_abort ();
+ }
+}
+
+int
+main ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ sll_a[i] = 0x102 * (i * 3 + 2);
+ sll_b[i] = 0x789ab * (i * 3 + 1);
+ ull_a[i] = 0x2345 * (i * 11 + 3) - 0xcd1 * (i * 5 - 7);
+ ull_b[i] = 0x6078e * (i * 7 + 3) + 0xefa * (i * 7 - 11);
+ if (sll_b[i] == 0 || ull_b[i] == 0)
+ __builtin_abort ();
+ }
+
+ check_divde ();
+ check_divdeu ();
+
+ return 0;
+}
+
diff --git a/gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-1.c b/gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-1.c
new file mode 100644
index 00000000000..fdbb9ebd61b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-1.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* What we scan for are only supported on 64-bit env. */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -ftree-vectorize -fno-vect-cost-model -fno-unroll-loops -fdump-tree-vect-details" } */
+
+/* Test if some Power10 built-in functions get vectorized. */
+
+#include "p10-bifs-vectorize-1.h"
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 5 "vect" } } */
+/* { dg-final { scan-assembler-times {\mvcfuged\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvclzdm\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvctzdm\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvpdepd\M} 1 } } */
+/* { dg-final { scan-assembler-times {\mvpextd\M} 1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-1.h b/gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-1.h
new file mode 100644
index 00000000000..80b7aacf810
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-1.h
@@ -0,0 +1,40 @@
+#define N 32
+
+typedef unsigned long long uLL;
+uLL ull_a[N], ull_b[N], ull_c[N];
+
+__attribute__ ((noipa)) void
+test_cfuged ()
+{
+ for (int i = 0; i < N; i++)
+ ull_c[i] = __builtin_cfuged (ull_a[i], ull_b[i]);
+}
+
+__attribute__ ((noipa)) void
+test_cntlzdm ()
+{
+ for (int i = 0; i < N; i++)
+ ull_c[i] = __builtin_cntlzdm (ull_a[i], ull_b[i]);
+}
+
+__attribute__ ((noipa)) void
+test_cnttzdm ()
+{
+ for (int i = 0; i < N; i++)
+ ull_c[i] = __builtin_cnttzdm (ull_a[i], ull_b[i]);
+}
+
+__attribute__ ((noipa)) void
+test_pdepd ()
+{
+ for (int i = 0; i < N; i++)
+ ull_c[i] = __builtin_pdepd (ull_a[i], ull_b[i]);
+}
+
+__attribute__ ((noipa)) void
+test_pextd ()
+{
+ for (int i = 0; i < N; i++)
+ ull_c[i] = __builtin_pextd (ull_a[i], ull_b[i]);
+}
+
diff --git a/gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-run-1.c b/gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-run-1.c
new file mode 100644
index 00000000000..828fbe1f9a7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/p10-bifs-vectorize-run-1.c
@@ -0,0 +1,48 @@
+/* { dg-do run } */
+/* The checked bifs are only supported on 64-bit env. */
+/* { dg-require-effective-target lp64 } */
+/* { dg-require-effective-target power10_hw } */
+/* { dg-options "-mdejagnu-cpu=power10 -O2 -ftree-vectorize -fno-vect-cost-model" } */
+
+#include "p10-bifs-vectorize-1.h"
+
+/* Check if vectorized built-in functions run expectedly. */
+
+/* Make optimize (1) to avoid vectorization applied on check func. */
+
+#define CHECK(name) \
+ __attribute__ ((optimize (1))) void check_##name () \
+ { \
+ test_##name (); \
+ for (int i = 0; i < N; i++) \
+ { \
+ uLL exp = __builtin_##name (ull_a[i], ull_b[i]); \
+ if (exp != ull_c[i]) \
+ __builtin_abort (); \
+ } \
+ }
+
+CHECK (cfuged)
+CHECK (cntlzdm)
+CHECK (cnttzdm)
+CHECK (pdepd)
+CHECK (pextd)
+
+int
+main ()
+{
+ for (int i = 0; i < N; i++)
+ {
+ ull_a[i] = 0x789a * (i * 11 - 5) - 0xcd1 * (i * 5 - 7);
+ ull_b[i] = 0xfedc * (i * 7 + 3) + 0x467 * (i * 7 - 11);
+ }
+
+ check_cfuged ();
+ check_cntlzdm ();
+ check_cnttzdm ();
+ check_pdepd ();
+ check_pextd ();
+
+ return 0;
+}
+
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2021-08-27 5:15 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-27 5:15 [gcc r12-3180] rs6000: Make some BIFs vectorized on P10 Kewen Lin
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).