From: Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org>
To: gcc Patches <gcc-patches@gcc.gnu.org>,
Richard Sandiford <richard.sandiford@arm.com>
Subject: [aarch64] Code-gen for vector initialization involving constants
Date: Fri, 3 Feb 2023 12:46:33 +0530 [thread overview]
Message-ID: <CAAgBjMnwGk4fOc3PTM_agTXXFvt=767a3-AWOfSr23Xja6K81w@mail.gmail.com> (raw)
[-- Attachment #1: Type: text/plain, Size: 1478 bytes --]
Hi Richard,
While digging thru aarch64_expand_vector_init, I noticed it gives
priority to loading a constant first:
/* Initialise a vector which is part-variable. We want to first try
to build those lanes which are constant in the most efficient way we
can. */
which results in suboptimal code-gen for following case:
int16x8_t f_s16(int16_t x)
{
return (int16x8_t) { x, x, x, x, x, x, x, 1 };
}
code-gen trunk:
f_s16:
movi v0.8h, 0x1
ins v0.h[0], w0
ins v0.h[1], w0
ins v0.h[2], w0
ins v0.h[3], w0
ins v0.h[4], w0
ins v0.h[5], w0
ins v0.h[6], w0
ret
The attached patch tweaks the following condition:
if (n_var == n_elts && n_elts <= 16)
{
...
}
to pass if maxv >= 80% of n_elts, with 80% being an
arbitrary "high enough" threshold. The intent is to dup
the most repeating variable if it it's repetition
is "high enough" and insert constants which should be "better" than
loading constant first and inserting variables like in the above case.
Alternatively, I suppose we can remove threshold and for constants,
generate both sequences and check which one is more
efficient ?
code-gen with patch:
f_s16:
dup v0.8h, w0
movi v1.4h, 0x1
ins v0.h[7], v1.h[0]
ret
The patch is lightly tested to verify that vec[t]-init-*.c tests pass
with bootstrap+test
in progress.
Does this look OK ?
Thanks,
Prathamesh
[-- Attachment #2: gnu-780-2.txt --]
[-- Type: text/plain, Size: 2851 bytes --]
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index acc0cfe5f94..df33509c6e4 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -22079,30 +22079,36 @@ aarch64_expand_vector_init (rtx target, rtx vals)
and matches[X][1] with the count of duplicate elements (if X is the
earliest element which has duplicates). */
- if (n_var == n_elts && n_elts <= 16)
+ int matches[16][2] = {0};
+ for (int i = 0; i < n_elts; i++)
{
- int matches[16][2] = {0};
- for (int i = 0; i < n_elts; i++)
+ for (int j = 0; j <= i; j++)
{
- for (int j = 0; j <= i; j++)
+ if (rtx_equal_p (XVECEXP (vals, 0, i), XVECEXP (vals, 0, j)))
{
- if (rtx_equal_p (XVECEXP (vals, 0, i), XVECEXP (vals, 0, j)))
- {
- matches[i][0] = j;
- matches[j][1]++;
- break;
- }
+ matches[i][0] = j;
+ matches[j][1]++;
+ break;
}
}
- int maxelement = 0;
- int maxv = 0;
- for (int i = 0; i < n_elts; i++)
- if (matches[i][1] > maxv)
- {
- maxelement = i;
- maxv = matches[i][1];
- }
+ }
+ int maxelement = 0;
+ int maxv = 0;
+ for (int i = 0; i < n_elts; i++)
+ if (matches[i][1] > maxv)
+ {
+ maxelement = i;
+ maxv = matches[i][1];
+ }
+
+ rtx max_elem = XVECEXP (vals, 0, maxelement);
+ if (n_elts <= 16
+ && ((n_var == n_elts)
+ || (maxv >= (int)(0.8 * n_elts)
+ && !CONST_INT_P (max_elem)
+ && !CONST_DOUBLE_P (max_elem))))
+ {
/* Create a duplicate of the most common element, unless all elements
are equally useless to us, in which case just immediately set the
vector register using the first element. */
diff --git a/gcc/testsuite/gcc.target/aarch64/vec-init-18.c b/gcc/testsuite/gcc.target/aarch64/vec-init-18.c
new file mode 100644
index 00000000000..e20b813559e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/vec-init-18.c
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#include <arm_neon.h>
+
+/*
+** f1_s16:
+** ...
+** dup v[0-9]+\.8h, w[0-9]+
+** movi v[0-9]+\.4h, 0x1
+** ins v[0-9]+\.h\[7\], v[0-9]+\.h\[0\]
+** ...
+** ret
+*/
+
+int16x8_t f1_s16(int16_t x)
+{
+ return (int16x8_t) {x, x, x, x, x, x, x, 1};
+}
+
+/*
+** f2_s16:
+** ...
+** dup v[0-9]+\.8h, w[0-9]+
+** movi v[0-9]+\.4h, 0x1
+** movi v[0-9]+\.4h, 0x2
+** ins v[0-9]+\.h\[6\], v[0-9]+\.h\[0\]
+** ins v[0-9]+\.h\[7\], v[0-9]+\.h\[0\]
+** ...
+** ret
+*/
+
+int16x8_t f2_s16(int16_t x)
+{
+ return (int16x8_t) { x, x, x, x, x, x, 1, 2 };
+}
+
+/*
+** f3_s16:
+** ...
+** movi v[0-9]+\.8h, 0x1
+** ins v[0-9]+\.h\[0\], w0
+** ins v[0-9]+\.h\[1\], w0
+** ins v[0-9]+\.h\[2\], w0
+** ...
+** ret
+*/
+
+int16x8_t f3_s16(int16_t x)
+{
+ return (int16x8_t) {x, x, x, 1, 1, 1, 1, 1};
+}
next reply other threads:[~2023-02-03 7:17 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-02-03 7:16 Prathamesh Kulkarni [this message]
2023-02-13 6:28 ` Prathamesh Kulkarni
2023-04-03 18:12 ` Prathamesh Kulkarni
2023-04-25 10:59 ` Richard Sandiford
2023-05-02 5:41 ` Prathamesh Kulkarni
2023-05-02 9:25 ` Richard Sandiford
2023-05-02 10:22 ` Prathamesh Kulkarni
2023-05-02 12:02 ` Richard Sandiford
2023-05-02 12:38 ` Prathamesh Kulkarni
2023-05-02 12:52 ` Richard Sandiford
2023-05-03 11:28 ` Prathamesh Kulkarni
2023-05-11 19:15 ` Richard Sandiford
2023-05-15 14:09 ` Prathamesh Kulkarni
2023-05-15 18:59 ` Richard Sandiford
2023-05-17 15:23 ` Prathamesh Kulkarni
2023-05-18 8:07 ` Richard Sandiford
2023-05-18 14:41 ` Prathamesh Kulkarni
2023-05-18 16:34 ` Richard Sandiford
2023-05-19 10:56 ` Prathamesh Kulkarni
2023-05-22 8:48 ` Richard Sandiford
2023-05-24 9:29 ` Prathamesh Kulkarni
2023-05-24 10:10 ` Richard Sandiford
2023-05-24 19:13 ` Prathamesh Kulkarni
2023-05-24 19:58 ` Richard Sandiford
2023-05-25 6:47 ` Prathamesh Kulkarni
2023-05-25 7:34 ` Richard Sandiford
2023-05-25 9:56 ` Prathamesh Kulkarni
2023-05-26 3:04 ` Prathamesh Kulkarni
2023-05-30 18:53 ` Richard Sandiford
2023-06-12 17:52 ` Prathamesh Kulkarni
2023-05-24 19:50 ` Prathamesh Kulkarni
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CAAgBjMnwGk4fOc3PTM_agTXXFvt=767a3-AWOfSr23Xja6K81w@mail.gmail.com' \
--to=prathamesh.kulkarni@linaro.org \
--cc=gcc-patches@gcc.gnu.org \
--cc=richard.sandiford@arm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).