* [PATCH 4/4]AArch64: enable new predicate tuning for Neoverse cores.
@ 2024-05-22 9:30 Tamar Christina
2024-05-28 9:49 ` Tamar Christina
2024-05-30 20:16 ` Richard Sandiford
0 siblings, 2 replies; 4+ messages in thread
From: Tamar Christina @ 2024-05-22 9:30 UTC (permalink / raw)
To: gcc-patches
Cc: nd, Richard.Earnshaw, Marcus.Shawcroft, ktkachov, richard.sandiford
[-- Attachment #1: Type: text/plain, Size: 7637 bytes --]
Hi All,
This enables the new tuning flag for Neoverse V1, Neoverse V2 and Neoverse N2.
It is kept off for generic codegen.
Note the reason for the +sve even though they are in aarch64-sve.exp is if the
testsuite is ran with a forced SVE off option, e.g. -march=armv8-a+nosve then
the intrinsics end up being disabled because the -march is preferred over the
-mcpu even though the -mcpu comes later.
This prevents the tests from failing in such runs.
Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
Ok for master?
Thanks,
Tamar
gcc/ChangeLog:
* config/aarch64/tuning_models/neoversen2.h (neoversen2_tunings): Add
AARCH64_EXTRA_TUNE_AVOID_PRED_RMW.
* config/aarch64/tuning_models/neoversev1.h (neoversev1_tunings): Add
AARCH64_EXTRA_TUNE_AVOID_PRED_RMW.
* config/aarch64/tuning_models/neoversev2.h (neoversev2_tunings): Add
AARCH64_EXTRA_TUNE_AVOID_PRED_RMW.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/sve/pred_clobber_1.c: New test.
* gcc.target/aarch64/sve/pred_clobber_2.c: New test.
* gcc.target/aarch64/sve/pred_clobber_3.c: New test.
* gcc.target/aarch64/sve/pred_clobber_4.c: New test.
* gcc.target/aarch64/sve/pred_clobber_5.c: New test.
---
diff --git a/gcc/config/aarch64/tuning_models/neoversen2.h b/gcc/config/aarch64/tuning_models/neoversen2.h
index 7e799bbe762fe862e31befed50e54040a7fd1f2f..be9a48ac3adc097f967c217fe09dcac194d7d14f 100644
--- a/gcc/config/aarch64/tuning_models/neoversen2.h
+++ b/gcc/config/aarch64/tuning_models/neoversen2.h
@@ -236,7 +236,8 @@ static const struct tune_params neoversen2_tunings =
(AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
| AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
| AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
- | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT), /* tune_flags. */
+ | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
+ | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW), /* tune_flags. */
&generic_prefetch_tune,
AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */
AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */
diff --git a/gcc/config/aarch64/tuning_models/neoversev1.h b/gcc/config/aarch64/tuning_models/neoversev1.h
index 9363f2ad98a5279cc99f2f9b1509ba921d582e84..0fc41ce6a41b3135fa06d2bda1f517fdf4f8dbcf 100644
--- a/gcc/config/aarch64/tuning_models/neoversev1.h
+++ b/gcc/config/aarch64/tuning_models/neoversev1.h
@@ -227,7 +227,8 @@ static const struct tune_params neoversev1_tunings =
(AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
| AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
| AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
- | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND), /* tune_flags. */
+ | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
+ | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW), /* tune_flags. */
&generic_prefetch_tune,
AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */
AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */
diff --git a/gcc/config/aarch64/tuning_models/neoversev2.h b/gcc/config/aarch64/tuning_models/neoversev2.h
index bc01ed767c9b690504eb98456402df5d9d64eee3..f76e4ef358f7dfb9c7d7b470ea7240eaa2120f8e 100644
--- a/gcc/config/aarch64/tuning_models/neoversev2.h
+++ b/gcc/config/aarch64/tuning_models/neoversev2.h
@@ -236,7 +236,8 @@ static const struct tune_params neoversev2_tunings =
(AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
| AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
| AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
- | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT), /* tune_flags. */
+ | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
+ | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW), /* tune_flags. */
&generic_prefetch_tune,
AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */
AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..934a00a38531c5fd4139d99ff33414904b2c104f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=neoverse-n2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+** ...
+** ptrue p([1-9][0-9]?).b, all
+** cmplo p0.h, p\1/z, z0.h, z[0-9]+.h
+** ...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+ svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+ use (p0);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..58badb66a43b1ac50eeec153b9cac44fc831b145
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_2.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=neoverse-v2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+** ...
+** ptrue p([1-9][0-9]?).b, all
+** cmplo p0.h, p\1/z, z0.h, z[0-9]+.h
+** ...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+ svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+ use (p0);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_3.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..c67c2bd3422e0bb0c694b5fe0adf0d83e4d967c6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_3.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=neoverse-v1" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+** ...
+** ptrue p([1-9][0-9]?).b, all
+** cmplo p0.h, p\1/z, z0.h, z[0-9]+.h
+** ...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+ svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+ use (p0);
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_4.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_4.c
new file mode 100644
index 0000000000000000000000000000000000000000..c0120afe5d523eff8297fadd4fc4c678676413d6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_4.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+** ...
+** ptrue p0.b, all
+** cmplo p0.h, p0/z, z0.h, z[0-9]+.h
+** ...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+ svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+ use (p0);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_5.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_5.c
new file mode 100644
index 0000000000000000000000000000000000000000..63f0669abd23d45c0ffd77c53859a098a21e0192
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_5.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=neoverse-n2 -ffixed-p1 -ffixed-p2 -ffixed-p3 -ffixed-p4 -ffixed-p5 -ffixed-p6 -ffixed-p7 -ffixed-p8 -ffixed-p9 -ffixed-p10 -ffixed-p11 -ffixed-p12 -ffixed-p12 -ffixed-p13 -ffixed-p14 -ffixed-p15" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+** ...
+** ptrue p0.b, all
+** cmplo p0.h, p0/z, z0.h, z[0-9]+.h
+** ...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+ svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+ use (p0);
+}
--
[-- Attachment #2: rb18356.patch --]
[-- Type: text/x-diff, Size: 6443 bytes --]
diff --git a/gcc/config/aarch64/tuning_models/neoversen2.h b/gcc/config/aarch64/tuning_models/neoversen2.h
index 7e799bbe762fe862e31befed50e54040a7fd1f2f..be9a48ac3adc097f967c217fe09dcac194d7d14f 100644
--- a/gcc/config/aarch64/tuning_models/neoversen2.h
+++ b/gcc/config/aarch64/tuning_models/neoversen2.h
@@ -236,7 +236,8 @@ static const struct tune_params neoversen2_tunings =
(AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
| AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
| AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
- | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT), /* tune_flags. */
+ | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
+ | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW), /* tune_flags. */
&generic_prefetch_tune,
AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */
AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */
diff --git a/gcc/config/aarch64/tuning_models/neoversev1.h b/gcc/config/aarch64/tuning_models/neoversev1.h
index 9363f2ad98a5279cc99f2f9b1509ba921d582e84..0fc41ce6a41b3135fa06d2bda1f517fdf4f8dbcf 100644
--- a/gcc/config/aarch64/tuning_models/neoversev1.h
+++ b/gcc/config/aarch64/tuning_models/neoversev1.h
@@ -227,7 +227,8 @@ static const struct tune_params neoversev1_tunings =
(AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
| AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
| AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
- | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND), /* tune_flags. */
+ | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
+ | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW), /* tune_flags. */
&generic_prefetch_tune,
AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */
AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */
diff --git a/gcc/config/aarch64/tuning_models/neoversev2.h b/gcc/config/aarch64/tuning_models/neoversev2.h
index bc01ed767c9b690504eb98456402df5d9d64eee3..f76e4ef358f7dfb9c7d7b470ea7240eaa2120f8e 100644
--- a/gcc/config/aarch64/tuning_models/neoversev2.h
+++ b/gcc/config/aarch64/tuning_models/neoversev2.h
@@ -236,7 +236,8 @@ static const struct tune_params neoversev2_tunings =
(AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
| AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
| AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
- | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT), /* tune_flags. */
+ | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
+ | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW), /* tune_flags. */
&generic_prefetch_tune,
AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */
AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..934a00a38531c5fd4139d99ff33414904b2c104f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=neoverse-n2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+** ...
+** ptrue p([1-9][0-9]?).b, all
+** cmplo p0.h, p\1/z, z0.h, z[0-9]+.h
+** ...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+ svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+ use (p0);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..58badb66a43b1ac50eeec153b9cac44fc831b145
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_2.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=neoverse-v2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+** ...
+** ptrue p([1-9][0-9]?).b, all
+** cmplo p0.h, p\1/z, z0.h, z[0-9]+.h
+** ...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+ svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+ use (p0);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_3.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..c67c2bd3422e0bb0c694b5fe0adf0d83e4d967c6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_3.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=neoverse-v1" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+** ...
+** ptrue p([1-9][0-9]?).b, all
+** cmplo p0.h, p\1/z, z0.h, z[0-9]+.h
+** ...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+ svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+ use (p0);
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_4.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_4.c
new file mode 100644
index 0000000000000000000000000000000000000000..c0120afe5d523eff8297fadd4fc4c678676413d6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_4.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+** ...
+** ptrue p0.b, all
+** cmplo p0.h, p0/z, z0.h, z[0-9]+.h
+** ...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+ svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+ use (p0);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_5.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_5.c
new file mode 100644
index 0000000000000000000000000000000000000000..63f0669abd23d45c0ffd77c53859a098a21e0192
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_5.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=neoverse-n2 -ffixed-p1 -ffixed-p2 -ffixed-p3 -ffixed-p4 -ffixed-p5 -ffixed-p6 -ffixed-p7 -ffixed-p8 -ffixed-p9 -ffixed-p10 -ffixed-p11 -ffixed-p12 -ffixed-p12 -ffixed-p13 -ffixed-p14 -ffixed-p15" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+** ...
+** ptrue p0.b, all
+** cmplo p0.h, p0/z, z0.h, z[0-9]+.h
+** ...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+ svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+ use (p0);
+}
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH 4/4]AArch64: enable new predicate tuning for Neoverse cores.
2024-05-22 9:30 [PATCH 4/4]AArch64: enable new predicate tuning for Neoverse cores Tamar Christina
@ 2024-05-28 9:49 ` Tamar Christina
2024-05-30 20:16 ` Richard Sandiford
1 sibling, 0 replies; 4+ messages in thread
From: Tamar Christina @ 2024-05-28 9:49 UTC (permalink / raw)
To: gcc-patches
Cc: nd, Richard.Earnshaw, Marcus.Shawcroft, ktkachov, richard.sandiford
[-- Attachment #1: Type: text/plain, Size: 6660 bytes --]
Hi All,
This enables the new tuning flag for Neoverse V1, Neoverse V2 and Neoverse N2.
It is kept off for generic codegen.
Note the reason for the +sve even though they are in aarch64-sve.exp is if the
testsuite is ran with a forced SVE off option, e.g. -march=armv8-a+nosve then
the intrinsics end up being disabled because the -march is preferred over the
-mcpu even though the -mcpu comes later.
This prevents the tests from failing in such runs.
Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
Ok for master?
Thanks,
Tamar
gcc/ChangeLog:
* config/aarch64/tuning_models/neoversen2.h (neoversen2_tunings): Add
AARCH64_EXTRA_TUNE_AVOID_PRED_RMW.
* config/aarch64/tuning_models/neoversev1.h (neoversev1_tunings): Add
AARCH64_EXTRA_TUNE_AVOID_PRED_RMW.
* config/aarch64/tuning_models/neoversev2.h (neoversev2_tunings): Add
AARCH64_EXTRA_TUNE_AVOID_PRED_RMW.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/sve/pred_clobber_1.c: New test.
* gcc.target/aarch64/sve/pred_clobber_2.c: New test.
* gcc.target/aarch64/sve/pred_clobber_3.c: New test.
* gcc.target/aarch64/sve/pred_clobber_4.c: New test.
---
diff --git a/gcc/config/aarch64/tuning_models/neoversen2.h b/gcc/config/aarch64/tuning_models/neoversen2.h
index 7e799bbe762fe862e31befed50e54040a7fd1f2f..be9a48ac3adc097f967c217fe09dcac194d7d14f 100644
--- a/gcc/config/aarch64/tuning_models/neoversen2.h
+++ b/gcc/config/aarch64/tuning_models/neoversen2.h
@@ -236,7 +236,8 @@ static const struct tune_params neoversen2_tunings =
(AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
| AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
| AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
- | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT), /* tune_flags. */
+ | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
+ | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW), /* tune_flags. */
&generic_prefetch_tune,
AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */
AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */
diff --git a/gcc/config/aarch64/tuning_models/neoversev1.h b/gcc/config/aarch64/tuning_models/neoversev1.h
index 9363f2ad98a5279cc99f2f9b1509ba921d582e84..0fc41ce6a41b3135fa06d2bda1f517fdf4f8dbcf 100644
--- a/gcc/config/aarch64/tuning_models/neoversev1.h
+++ b/gcc/config/aarch64/tuning_models/neoversev1.h
@@ -227,7 +227,8 @@ static const struct tune_params neoversev1_tunings =
(AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
| AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
| AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
- | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND), /* tune_flags. */
+ | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
+ | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW), /* tune_flags. */
&generic_prefetch_tune,
AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */
AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */
diff --git a/gcc/config/aarch64/tuning_models/neoversev2.h b/gcc/config/aarch64/tuning_models/neoversev2.h
index bc01ed767c9b690504eb98456402df5d9d64eee3..f76e4ef358f7dfb9c7d7b470ea7240eaa2120f8e 100644
--- a/gcc/config/aarch64/tuning_models/neoversev2.h
+++ b/gcc/config/aarch64/tuning_models/neoversev2.h
@@ -236,7 +236,8 @@ static const struct tune_params neoversev2_tunings =
(AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
| AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
| AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
- | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT), /* tune_flags. */
+ | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
+ | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW), /* tune_flags. */
&generic_prefetch_tune,
AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */
AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..934a00a38531c5fd4139d99ff33414904b2c104f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=neoverse-n2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+** ...
+** ptrue p([1-9][0-9]?).b, all
+** cmplo p0.h, p\1/z, z0.h, z[0-9]+.h
+** ...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+ svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+ use (p0);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..58badb66a43b1ac50eeec153b9cac44fc831b145
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_2.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=neoverse-v2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+** ...
+** ptrue p([1-9][0-9]?).b, all
+** cmplo p0.h, p\1/z, z0.h, z[0-9]+.h
+** ...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+ svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+ use (p0);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_3.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..c67c2bd3422e0bb0c694b5fe0adf0d83e4d967c6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_3.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=neoverse-v1" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+** ...
+** ptrue p([1-9][0-9]?).b, all
+** cmplo p0.h, p\1/z, z0.h, z[0-9]+.h
+** ...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+ svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+ use (p0);
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_4.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_4.c
new file mode 100644
index 0000000000000000000000000000000000000000..c0120afe5d523eff8297fadd4fc4c678676413d6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_4.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+** ...
+** ptrue p0.b, all
+** cmplo p0.h, p0/z, z0.h, z[0-9]+.h
+** ...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+ svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+ use (p0);
+}
--
[-- Attachment #2: rb18356.patch --]
[-- Type: text/x-diff, Size: 5520 bytes --]
diff --git a/gcc/config/aarch64/tuning_models/neoversen2.h b/gcc/config/aarch64/tuning_models/neoversen2.h
index 7e799bbe762fe862e31befed50e54040a7fd1f2f..be9a48ac3adc097f967c217fe09dcac194d7d14f 100644
--- a/gcc/config/aarch64/tuning_models/neoversen2.h
+++ b/gcc/config/aarch64/tuning_models/neoversen2.h
@@ -236,7 +236,8 @@ static const struct tune_params neoversen2_tunings =
(AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
| AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
| AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
- | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT), /* tune_flags. */
+ | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
+ | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW), /* tune_flags. */
&generic_prefetch_tune,
AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */
AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */
diff --git a/gcc/config/aarch64/tuning_models/neoversev1.h b/gcc/config/aarch64/tuning_models/neoversev1.h
index 9363f2ad98a5279cc99f2f9b1509ba921d582e84..0fc41ce6a41b3135fa06d2bda1f517fdf4f8dbcf 100644
--- a/gcc/config/aarch64/tuning_models/neoversev1.h
+++ b/gcc/config/aarch64/tuning_models/neoversev1.h
@@ -227,7 +227,8 @@ static const struct tune_params neoversev1_tunings =
(AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
| AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
| AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
- | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND), /* tune_flags. */
+ | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
+ | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW), /* tune_flags. */
&generic_prefetch_tune,
AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */
AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */
diff --git a/gcc/config/aarch64/tuning_models/neoversev2.h b/gcc/config/aarch64/tuning_models/neoversev2.h
index bc01ed767c9b690504eb98456402df5d9d64eee3..f76e4ef358f7dfb9c7d7b470ea7240eaa2120f8e 100644
--- a/gcc/config/aarch64/tuning_models/neoversev2.h
+++ b/gcc/config/aarch64/tuning_models/neoversev2.h
@@ -236,7 +236,8 @@ static const struct tune_params neoversev2_tunings =
(AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
| AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
| AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
- | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT), /* tune_flags. */
+ | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
+ | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW), /* tune_flags. */
&generic_prefetch_tune,
AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */
AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..934a00a38531c5fd4139d99ff33414904b2c104f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=neoverse-n2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+** ...
+** ptrue p([1-9][0-9]?).b, all
+** cmplo p0.h, p\1/z, z0.h, z[0-9]+.h
+** ...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+ svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+ use (p0);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..58badb66a43b1ac50eeec153b9cac44fc831b145
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_2.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=neoverse-v2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+** ...
+** ptrue p([1-9][0-9]?).b, all
+** cmplo p0.h, p\1/z, z0.h, z[0-9]+.h
+** ...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+ svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+ use (p0);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_3.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..c67c2bd3422e0bb0c694b5fe0adf0d83e4d967c6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_3.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=neoverse-v1" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+** ...
+** ptrue p([1-9][0-9]?).b, all
+** cmplo p0.h, p\1/z, z0.h, z[0-9]+.h
+** ...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+ svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+ use (p0);
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_4.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_4.c
new file mode 100644
index 0000000000000000000000000000000000000000..c0120afe5d523eff8297fadd4fc4c678676413d6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_4.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+** ...
+** ptrue p0.b, all
+** cmplo p0.h, p0/z, z0.h, z[0-9]+.h
+** ...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+ svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+ use (p0);
+}
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH 4/4]AArch64: enable new predicate tuning for Neoverse cores.
2024-05-22 9:30 [PATCH 4/4]AArch64: enable new predicate tuning for Neoverse cores Tamar Christina
2024-05-28 9:49 ` Tamar Christina
@ 2024-05-30 20:16 ` Richard Sandiford
1 sibling, 0 replies; 4+ messages in thread
From: Richard Sandiford @ 2024-05-30 20:16 UTC (permalink / raw)
To: Tamar Christina
Cc: gcc-patches, nd, Richard.Earnshaw, Marcus.Shawcroft, ktkachov
Tamar Christina <tamar.christina@arm.com> writes:
> Hi All,
>
> This enables the new tuning flag for Neoverse V1, Neoverse V2 and Neoverse N2.
> It is kept off for generic codegen.
>
> Note the reason for the +sve even though they are in aarch64-sve.exp is if the
> testsuite is ran with a forced SVE off option, e.g. -march=armv8-a+nosve then
> the intrinsics end up being disabled because the -march is preferred over the
> -mcpu even though the -mcpu comes later.
>
> This prevents the tests from failing in such runs.
IMO we should just skip aarch64-sve.exp if the options explicitly disable
SVE. But that's separate work. I'll try it once this patch is in.
> Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
>
> Ok for master?
>
> Thanks,
> Tamar
>
> gcc/ChangeLog:
>
> * config/aarch64/tuning_models/neoversen2.h (neoversen2_tunings): Add
> AARCH64_EXTRA_TUNE_AVOID_PRED_RMW.
> * config/aarch64/tuning_models/neoversev1.h (neoversev1_tunings): Add
> AARCH64_EXTRA_TUNE_AVOID_PRED_RMW.
> * config/aarch64/tuning_models/neoversev2.h (neoversev2_tunings): Add
> AARCH64_EXTRA_TUNE_AVOID_PRED_RMW.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/aarch64/sve/pred_clobber_1.c: New test.
> * gcc.target/aarch64/sve/pred_clobber_2.c: New test.
> * gcc.target/aarch64/sve/pred_clobber_3.c: New test.
> * gcc.target/aarch64/sve/pred_clobber_4.c: New test.
>
> ---
> diff --git a/gcc/config/aarch64/tuning_models/neoversen2.h b/gcc/config/aarch64/tuning_models/neoversen2.h
> index 7e799bbe762fe862e31befed50e54040a7fd1f2f..be9a48ac3adc097f967c217fe09dcac194d7d14f 100644
> --- a/gcc/config/aarch64/tuning_models/neoversen2.h
> +++ b/gcc/config/aarch64/tuning_models/neoversen2.h
> @@ -236,7 +236,8 @@ static const struct tune_params neoversen2_tunings =
> (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
> | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
> | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
> - | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT), /* tune_flags. */
> + | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
> + | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW), /* tune_flags. */
> &generic_prefetch_tune,
> AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */
> AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */
> diff --git a/gcc/config/aarch64/tuning_models/neoversev1.h b/gcc/config/aarch64/tuning_models/neoversev1.h
> index 9363f2ad98a5279cc99f2f9b1509ba921d582e84..0fc41ce6a41b3135fa06d2bda1f517fdf4f8dbcf 100644
> --- a/gcc/config/aarch64/tuning_models/neoversev1.h
> +++ b/gcc/config/aarch64/tuning_models/neoversev1.h
> @@ -227,7 +227,8 @@ static const struct tune_params neoversev1_tunings =
> (AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
> | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
> | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
> - | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND), /* tune_flags. */
> + | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
> + | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW), /* tune_flags. */
> &generic_prefetch_tune,
> AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */
> AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */
> diff --git a/gcc/config/aarch64/tuning_models/neoversev2.h b/gcc/config/aarch64/tuning_models/neoversev2.h
> index bc01ed767c9b690504eb98456402df5d9d64eee3..f76e4ef358f7dfb9c7d7b470ea7240eaa2120f8e 100644
> --- a/gcc/config/aarch64/tuning_models/neoversev2.h
> +++ b/gcc/config/aarch64/tuning_models/neoversev2.h
> @@ -236,7 +236,8 @@ static const struct tune_params neoversev2_tunings =
> (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
> | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
> | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
> - | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT), /* tune_flags. */
> + | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
> + | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW), /* tune_flags. */
> &generic_prefetch_tune,
> AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */
> AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_1.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..934a00a38531c5fd4139d99ff33414904b2c104f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_1.c
> @@ -0,0 +1,22 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mcpu=neoverse-n2" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#pragma GCC target "+sve"
> +
> +#include <arm_sve.h>
> +
> +extern void use(svbool_t);
> +
> +/*
> +** foo:
> +** ...
> +** ptrue p([1-9][0-9]?).b, all
Might be better to make this p([1-3]), so that we disallow any registers
that would cause a spill.
OK with that change, thanks.
Richard
> +** cmplo p0.h, p\1/z, z0.h, z[0-9]+.h
> +** ...
> +*/
> +void foo (svuint16_t a, uint16_t b)
> +{
> + svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
> + use (p0);
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_2.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..58badb66a43b1ac50eeec153b9cac44fc831b145
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_2.c
> @@ -0,0 +1,22 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mcpu=neoverse-v2" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#pragma GCC target "+sve"
> +
> +#include <arm_sve.h>
> +
> +extern void use(svbool_t);
> +
> +/*
> +** foo:
> +** ...
> +** ptrue p([1-9][0-9]?).b, all
> +** cmplo p0.h, p\1/z, z0.h, z[0-9]+.h
> +** ...
> +*/
> +void foo (svuint16_t a, uint16_t b)
> +{
> + svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
> + use (p0);
> +}
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_3.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_3.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..c67c2bd3422e0bb0c694b5fe0adf0d83e4d967c6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_3.c
> @@ -0,0 +1,23 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mcpu=neoverse-v1" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#pragma GCC target "+sve"
> +
> +#include <arm_sve.h>
> +
> +extern void use(svbool_t);
> +
> +/*
> +** foo:
> +** ...
> +** ptrue p([1-9][0-9]?).b, all
> +** cmplo p0.h, p\1/z, z0.h, z[0-9]+.h
> +** ...
> +*/
> +void foo (svuint16_t a, uint16_t b)
> +{
> + svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
> + use (p0);
> +}
> +
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_4.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_4.c
> new file mode 100644
> index 0000000000000000000000000000000000000000..c0120afe5d523eff8297fadd4fc4c678676413d6
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_4.c
> @@ -0,0 +1,22 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +/* { dg-final { check-function-bodies "**" "" } } */
> +
> +#pragma GCC target "+sve"
> +
> +#include <arm_sve.h>
> +
> +extern void use(svbool_t);
> +
> +/*
> +** foo:
> +** ...
> +** ptrue p0.b, all
> +** cmplo p0.h, p0/z, z0.h, z[0-9]+.h
> +** ...
> +*/
> +void foo (svuint16_t a, uint16_t b)
> +{
> + svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
> + use (p0);
> +}
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH 0/4]AArch64: support conditional early clobbers on certain operations.
@ 2024-05-15 10:28 Tamar Christina
2024-05-15 10:29 ` [PATCH 4/4]AArch64: enable new predicate tuning for Neoverse cores Tamar Christina
0 siblings, 1 reply; 4+ messages in thread
From: Tamar Christina @ 2024-05-15 10:28 UTC (permalink / raw)
To: gcc-patches
Cc: nd, Richard.Earnshaw, Marcus.Shawcroft, ktkachov, richard.sandiford
[-- Attachment #1: Type: text/plain, Size: 1613 bytes --]
Hi All,
Some Neoverse Software Optimization Guides (SWoG) have a clause that state
that for predicated operations that also produce a predicate it is preferred
that the codegen should use a different register for the destination than that
of the input predicate in order to avoid a performance overhead.
This of course has the problem that it increases register pressure and so should
be done with care. Additionally not all micro-architectures have this
consideration and so it shouldn't be done as a default thing.
The patch series adds support for doing conditional early clobbers through a
combination of new alternatives and attributes to control their availability.
On high register pressure we also use LRA's costing to prefer not to use the
alternative and instead just use the tie as this is preferable to a reload.
Concretely this patch series does:
> aarch64-none-elf-gcc -O3 -g0 -S -o - pred-clobber.c -mcpu=neoverse-n2
foo:
mov z31.h, w0
ptrue p3.b, all
cmplo p0.h, p3/z, z0.h, z31.h
b use
> aarch64-none-elf-gcc -O3 -g0 -S -o - pred-clobber.c -mcpu=neoverse-n1+sve
foo:
mov z31.h, w0
ptrue p0.b, all
cmplo p0.h, p0/z, z0.h, z31.h
b use
> aarch64-none-elf-gcc -O3 -g0 -S -o - pred-clobber.c -mcpu=neoverse-n2 -ffixed-p[1-15]
foo:
mov z31.h, w0
ptrue p0.b, all
cmplo p0.h, p0/z, z0.h, z31.h
b use
Testcases for the changes are in the last patch of the series.
Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
Thanks,
Tamar
---
--
[-- Attachment #2: rb18359.patch --]
[-- Type: text/x-diff, Size: 0 bytes --]
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH 4/4]AArch64: enable new predicate tuning for Neoverse cores.
2024-05-15 10:28 [PATCH 0/4]AArch64: support conditional early clobbers on certain operations Tamar Christina
@ 2024-05-15 10:29 ` Tamar Christina
0 siblings, 0 replies; 4+ messages in thread
From: Tamar Christina @ 2024-05-15 10:29 UTC (permalink / raw)
To: gcc-patches
Cc: nd, Richard.Earnshaw, Marcus.Shawcroft, ktkachov, richard.sandiford
[-- Attachment #1: Type: text/plain, Size: 7715 bytes --]
Hi All,
This enables the new tuning flag for Neoverse V1, Neoverse V2 and Neoverse N2.
It is kept off for generic codegen.
Note the reason for the +sve even though they are in aarch64-sve.exp is if the
testsuite is ran with a forced SVE off option, e.g. -march=armv8-a+nosve then
the intrinsics end up being disabled because the -march is preferred over the
-mcpu even though the -mcpu comes later.
This prevents the tests from failing in such runs.
Bootstrapped Regtested on aarch64-none-linux-gnu and no issues.
Ok for master?
Thanks,
Tamar
gcc/ChangeLog:
* config/aarch64/tuning_models/neoversen2.h (neoversen2_tunings): Add
AARCH64_EXTRA_TUNE_EARLY_CLOBBER_SVE_PRED_DEST.
* config/aarch64/tuning_models/neoversev1.h (neoversev1_tunings): Add
AARCH64_EXTRA_TUNE_EARLY_CLOBBER_SVE_PRED_DEST.
* config/aarch64/tuning_models/neoversev2.h (neoversev2_tunings): Add
AARCH64_EXTRA_TUNE_EARLY_CLOBBER_SVE_PRED_DEST.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/sve/pred_clobber_1.c: New test.
* gcc.target/aarch64/sve/pred_clobber_2.c: New test.
* gcc.target/aarch64/sve/pred_clobber_3.c: New test.
* gcc.target/aarch64/sve/pred_clobber_4.c: New test.
* gcc.target/aarch64/sve/pred_clobber_5.c: New test.
---
diff --git a/gcc/config/aarch64/tuning_models/neoversen2.h b/gcc/config/aarch64/tuning_models/neoversen2.h
index 7e799bbe762fe862e31befed50e54040a7fd1f2f..0d8f3f6be67f3583b00473bef97ea3ae4fcea4ec 100644
--- a/gcc/config/aarch64/tuning_models/neoversen2.h
+++ b/gcc/config/aarch64/tuning_models/neoversen2.h
@@ -236,7 +236,8 @@ static const struct tune_params neoversen2_tunings =
(AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
| AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
| AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
- | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT), /* tune_flags. */
+ | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
+ | AARCH64_EXTRA_TUNE_EARLY_CLOBBER_SVE_PRED_DEST), /* tune_flags. */
&generic_prefetch_tune,
AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */
AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */
diff --git a/gcc/config/aarch64/tuning_models/neoversev1.h b/gcc/config/aarch64/tuning_models/neoversev1.h
index 9363f2ad98a5279cc99f2f9b1509ba921d582e84..d28d0b1c0498ed250b0a93ca69720fe10c65c93d 100644
--- a/gcc/config/aarch64/tuning_models/neoversev1.h
+++ b/gcc/config/aarch64/tuning_models/neoversev1.h
@@ -227,7 +227,8 @@ static const struct tune_params neoversev1_tunings =
(AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
| AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
| AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
- | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND), /* tune_flags. */
+ | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
+ | AARCH64_EXTRA_TUNE_EARLY_CLOBBER_SVE_PRED_DEST), /* tune_flags. */
&generic_prefetch_tune,
AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */
AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */
diff --git a/gcc/config/aarch64/tuning_models/neoversev2.h b/gcc/config/aarch64/tuning_models/neoversev2.h
index bc01ed767c9b690504eb98456402df5d9d64eee3..3b2f9797bd777e73ca9c21501fa97448d96cb65e 100644
--- a/gcc/config/aarch64/tuning_models/neoversev2.h
+++ b/gcc/config/aarch64/tuning_models/neoversev2.h
@@ -236,7 +236,8 @@ static const struct tune_params neoversev2_tunings =
(AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
| AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
| AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
- | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT), /* tune_flags. */
+ | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
+ | AARCH64_EXTRA_TUNE_EARLY_CLOBBER_SVE_PRED_DEST), /* tune_flags. */
&generic_prefetch_tune,
AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */
AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..934a00a38531c5fd4139d99ff33414904b2c104f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=neoverse-n2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+** ...
+** ptrue p([1-9][0-9]?).b, all
+** cmplo p0.h, p\1/z, z0.h, z[0-9]+.h
+** ...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+ svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+ use (p0);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..58badb66a43b1ac50eeec153b9cac44fc831b145
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_2.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=neoverse-v2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+** ...
+** ptrue p([1-9][0-9]?).b, all
+** cmplo p0.h, p\1/z, z0.h, z[0-9]+.h
+** ...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+ svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+ use (p0);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_3.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..c67c2bd3422e0bb0c694b5fe0adf0d83e4d967c6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_3.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=neoverse-v1" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+** ...
+** ptrue p([1-9][0-9]?).b, all
+** cmplo p0.h, p\1/z, z0.h, z[0-9]+.h
+** ...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+ svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+ use (p0);
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_4.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_4.c
new file mode 100644
index 0000000000000000000000000000000000000000..c0120afe5d523eff8297fadd4fc4c678676413d6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_4.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+** ...
+** ptrue p0.b, all
+** cmplo p0.h, p0/z, z0.h, z[0-9]+.h
+** ...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+ svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+ use (p0);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_5.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_5.c
new file mode 100644
index 0000000000000000000000000000000000000000..63f0669abd23d45c0ffd77c53859a098a21e0192
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_5.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=neoverse-n2 -ffixed-p1 -ffixed-p2 -ffixed-p3 -ffixed-p4 -ffixed-p5 -ffixed-p6 -ffixed-p7 -ffixed-p8 -ffixed-p9 -ffixed-p10 -ffixed-p11 -ffixed-p12 -ffixed-p12 -ffixed-p13 -ffixed-p14 -ffixed-p15" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+** ...
+** ptrue p0.b, all
+** cmplo p0.h, p0/z, z0.h, z[0-9]+.h
+** ...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+ svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+ use (p0);
+}
--
[-- Attachment #2: rb18356.patch --]
[-- Type: text/x-diff, Size: 6482 bytes --]
diff --git a/gcc/config/aarch64/tuning_models/neoversen2.h b/gcc/config/aarch64/tuning_models/neoversen2.h
index 7e799bbe762fe862e31befed50e54040a7fd1f2f..0d8f3f6be67f3583b00473bef97ea3ae4fcea4ec 100644
--- a/gcc/config/aarch64/tuning_models/neoversen2.h
+++ b/gcc/config/aarch64/tuning_models/neoversen2.h
@@ -236,7 +236,8 @@ static const struct tune_params neoversen2_tunings =
(AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
| AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
| AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
- | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT), /* tune_flags. */
+ | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
+ | AARCH64_EXTRA_TUNE_EARLY_CLOBBER_SVE_PRED_DEST), /* tune_flags. */
&generic_prefetch_tune,
AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */
AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */
diff --git a/gcc/config/aarch64/tuning_models/neoversev1.h b/gcc/config/aarch64/tuning_models/neoversev1.h
index 9363f2ad98a5279cc99f2f9b1509ba921d582e84..d28d0b1c0498ed250b0a93ca69720fe10c65c93d 100644
--- a/gcc/config/aarch64/tuning_models/neoversev1.h
+++ b/gcc/config/aarch64/tuning_models/neoversev1.h
@@ -227,7 +227,8 @@ static const struct tune_params neoversev1_tunings =
(AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
| AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
| AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
- | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND), /* tune_flags. */
+ | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
+ | AARCH64_EXTRA_TUNE_EARLY_CLOBBER_SVE_PRED_DEST), /* tune_flags. */
&generic_prefetch_tune,
AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */
AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */
diff --git a/gcc/config/aarch64/tuning_models/neoversev2.h b/gcc/config/aarch64/tuning_models/neoversev2.h
index bc01ed767c9b690504eb98456402df5d9d64eee3..3b2f9797bd777e73ca9c21501fa97448d96cb65e 100644
--- a/gcc/config/aarch64/tuning_models/neoversev2.h
+++ b/gcc/config/aarch64/tuning_models/neoversev2.h
@@ -236,7 +236,8 @@ static const struct tune_params neoversev2_tunings =
(AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
| AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
| AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
- | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT), /* tune_flags. */
+ | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
+ | AARCH64_EXTRA_TUNE_EARLY_CLOBBER_SVE_PRED_DEST), /* tune_flags. */
&generic_prefetch_tune,
AARCH64_LDP_STP_POLICY_ALWAYS, /* ldp_policy_model. */
AARCH64_LDP_STP_POLICY_ALWAYS /* stp_policy_model. */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_1.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_1.c
new file mode 100644
index 0000000000000000000000000000000000000000..934a00a38531c5fd4139d99ff33414904b2c104f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_1.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=neoverse-n2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+** ...
+** ptrue p([1-9][0-9]?).b, all
+** cmplo p0.h, p\1/z, z0.h, z[0-9]+.h
+** ...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+ svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+ use (p0);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_2.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_2.c
new file mode 100644
index 0000000000000000000000000000000000000000..58badb66a43b1ac50eeec153b9cac44fc831b145
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_2.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=neoverse-v2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+** ...
+** ptrue p([1-9][0-9]?).b, all
+** cmplo p0.h, p\1/z, z0.h, z[0-9]+.h
+** ...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+ svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+ use (p0);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_3.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..c67c2bd3422e0bb0c694b5fe0adf0d83e4d967c6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_3.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=neoverse-v1" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+** ...
+** ptrue p([1-9][0-9]?).b, all
+** cmplo p0.h, p\1/z, z0.h, z[0-9]+.h
+** ...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+ svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+ use (p0);
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_4.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_4.c
new file mode 100644
index 0000000000000000000000000000000000000000..c0120afe5d523eff8297fadd4fc4c678676413d6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_4.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+** ...
+** ptrue p0.b, all
+** cmplo p0.h, p0/z, z0.h, z[0-9]+.h
+** ...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+ svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+ use (p0);
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_5.c b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_5.c
new file mode 100644
index 0000000000000000000000000000000000000000..63f0669abd23d45c0ffd77c53859a098a21e0192
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pred_clobber_5.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=neoverse-n2 -ffixed-p1 -ffixed-p2 -ffixed-p3 -ffixed-p4 -ffixed-p5 -ffixed-p6 -ffixed-p7 -ffixed-p8 -ffixed-p9 -ffixed-p10 -ffixed-p11 -ffixed-p12 -ffixed-p12 -ffixed-p13 -ffixed-p14 -ffixed-p15" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+#pragma GCC target "+sve"
+
+#include <arm_sve.h>
+
+extern void use(svbool_t);
+
+/*
+** foo:
+** ...
+** ptrue p0.b, all
+** cmplo p0.h, p0/z, z0.h, z[0-9]+.h
+** ...
+*/
+void foo (svuint16_t a, uint16_t b)
+{
+ svbool_t p0 = svcmplt_n_u16 (svptrue_b16 (), a, b);
+ use (p0);
+}
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2024-05-30 20:16 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-05-22 9:30 [PATCH 4/4]AArch64: enable new predicate tuning for Neoverse cores Tamar Christina
2024-05-28 9:49 ` Tamar Christina
2024-05-30 20:16 ` Richard Sandiford
-- strict thread matches above, loose matches on Subject: below --
2024-05-15 10:28 [PATCH 0/4]AArch64: support conditional early clobbers on certain operations Tamar Christina
2024-05-15 10:29 ` [PATCH 4/4]AArch64: enable new predicate tuning for Neoverse cores Tamar Christina
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).