public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Hongyu Wang <hongyu.wang@intel.com>
To: gcc-patches@gcc.gnu.org
Cc: hongtao.liu@intel.com, ubizjak@gmail.com, hubicka@ucw.cz,
	vmakarov@redhat.com, jakub@redhat.com,
	Kong Lingling <lingling.kong@intel.com>
Subject: [PATCH 06/13] [APX EGPR] Map reg/mem constraints in inline asm to non-EGPR constraint.
Date: Thu, 31 Aug 2023 16:20:17 +0800	[thread overview]
Message-ID: <20230831082024.314097-7-hongyu.wang@intel.com> (raw)
In-Reply-To: <20230831082024.314097-1-hongyu.wang@intel.com>

From: Kong Lingling <lingling.kong@intel.com>

In inline asm, we do not know if the insn can use EGPR, so disable EGPR
usage by default from mapping the common reg/mem constraint to non-EGPR
constraints. Use a flag mapx-inline-asm-use-gpr32 to enable EGPR usage
for inline asm.

gcc/ChangeLog:

	* config/i386/i386.cc (INCLUDE_STRING): Add include for
	ix86_md_asm_adjust.
	(ix86_md_asm_adjust): When APX EGPR enabled without specifying the
	target option, map reg/mem constraints to non-EGPR constraints.
	* config/i386/i386.opt: Add option mapx-inline-asm-use-gpr32.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/apx-inline-gpr-norex2.c: New test.
---
 gcc/config/i386/i386.cc                       |  44 +++++++
 gcc/config/i386/i386.opt                      |   5 +
 .../gcc.target/i386/apx-inline-gpr-norex2.c   | 107 ++++++++++++++++++
 3 files changed, 156 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/apx-inline-gpr-norex2.c

diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index d26d9ab0d9d..9460ebbfda4 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -17,6 +17,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
+#define INCLUDE_STRING
 #define IN_TARGET_CODE 1
 
 #include "config.h"
@@ -23077,6 +23078,49 @@ ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
   bool saw_asm_flag = false;
 
   start_sequence ();
+  /* TODO: Here we just mapped the general r/m constraints to non-EGPR
+   constraints, will eventually map all the usable constraints in the future. */
+  if (TARGET_APX_EGPR && !ix86_apx_inline_asm_use_gpr32)
+    {
+      /* Map "r" constraint in inline asm to "h" that disallows r16-r31
+	 and replace only r, exclude Br and Yr.  */
+      for (unsigned i = 0; i < constraints.length (); i++)
+	{
+	  std::string *s = new std::string (constraints[i]);
+	  size_t pos = s->find ('r');
+	  while (pos != std::string::npos)
+	    {
+	      if (pos > 0
+		  && (s->at (pos - 1) == 'Y' || s->at (pos - 1) == 'B'))
+		pos = s->find ('r', pos + 1);
+	      else
+		{
+		  s->replace (pos, 1, "h");
+		  constraints[i] = (const char*) s->c_str ();
+		  break;
+		}
+	    }
+	}
+      /* Also map "m/memory/Bm" constraint that may use GPR32, replace them with
+	 "Bt/Bt/BT".  */
+      for (unsigned i = 0; i < constraints.length (); i++)
+	{
+	  std::string *s = new std::string (constraints[i]);
+	  size_t pos = s->find ("m");
+	  size_t pos2 = s->find ("memory");
+	  if (pos != std::string::npos)
+	    {
+	      if (pos > 0 && (s->at (pos - 1) == 'B'))
+		  s->replace (pos - 1, 2, "BT");
+	      else if (pos2 != std::string::npos)
+		  s->replace (pos, 6, "Bt");
+	      else
+		  s->replace (pos, 1, "Bt");
+	      constraints[i] = (const char*) s->c_str ();
+	    }
+	}
+     }
+
   for (unsigned i = 0, n = outputs.length (); i < n; ++i)
     {
       const char *con = constraints[i];
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 1ee4d90186e..5c8d3a207e3 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -1335,3 +1335,8 @@ Enum(apx_features) String(ndd) Value(apx_ndd) Set(4)
 
 EnumValue
 Enum(apx_features) String(all) Value(apx_all) Set(1)
+
+mapx-inline-asm-use-gpr32
+Target Var(ix86_apx_inline_asm_use_gpr32) Init(0)
+Enable GPR32 in inline asm when APX_EGPR enabled, do not
+hook reg or mem constraint in inline asm to GPR16.
diff --git a/gcc/testsuite/gcc.target/i386/apx-inline-gpr-norex2.c b/gcc/testsuite/gcc.target/i386/apx-inline-gpr-norex2.c
new file mode 100644
index 00000000000..21534450045
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/apx-inline-gpr-norex2.c
@@ -0,0 +1,107 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mapxf -m64 -march=skylake-avx512 -DDTYPE32" } */
+
+typedef unsigned int u32;
+typedef unsigned long long u64;
+
+#ifdef DTYPE32
+typedef u32 DTYPE;
+#define byteswap byteswapu32
+#endif
+
+#define R(x,n) ( (x >> n) | (x << (32 - n)))
+
+#define S0(x) (R(x, 2) ^ R(x,13) ^ R(x,22))
+#define S1(x) (R(x, 6) ^ R(x,11) ^ R(x,25))
+
+#define TT(a,b,c,d,e,f,g,h,x,K)                 \
+{                                                        \
+    tmp1 = h + S1(e) + (g ^ (e & (f ^ g))) + K + x;                \
+    tmp2 = S0(a) + ((a & b) | (c & (a | b)));                           \
+    h  = tmp1 + tmp2;                                    \
+    d += tmp1;                                           \
+}
+
+static inline u32 byteswapu32(u32 x)
+{
+  x = (x & 0x0000FFFF) << 16 | (x & 0xFFFF0000) >> 16;
+  x = (x & 0x00FF00FF) << 8 | (x & 0xFF00FF00) >> 8;  
+  return x;
+}
+
+void foo (DTYPE in[16], DTYPE out[8], const DTYPE C[16])
+{
+    DTYPE tmp1 = 0, tmp2 = 0, a, b, c, d, e, f, g, h;
+    DTYPE w0, w1, w2, w3, w4, w5, w6, w7,
+	w8, w9, w10, w11, w12, w13, w14, w15;
+    w0  = byteswap(in[0]);
+    w1  = byteswap(in[1]);
+    w2  = byteswap(in[2]);
+    w3  = byteswap(in[3]);
+    w4  = byteswap(in[4]);
+    w5  = byteswap(in[5]);
+    w6  = byteswap(in[6]);
+    w7  = byteswap(in[7]);
+    w8  = byteswap(in[8]);
+    w9  = byteswap(in[9]);
+    w10 = byteswap(in[10]);
+    w11 = byteswap(in[11]);
+    w12 = byteswap(in[12]);
+    w13 = byteswap(in[13]);
+    w14 = byteswap(in[14]);
+    w15 = byteswap(in[15]);
+    a = out[0];
+    b = out[1];
+    c = out[2];
+    d = out[3];
+    e = out[4];
+    f = out[5];
+    g = out[6];
+    h = out[7];
+    
+    TT(a, b, c, d, e, f, g, h,  w0, C[0]);
+    TT(h, a, b, c, d, e, f, g,  w1, C[1]);
+    TT(g, h, a, b, c, d, e, f,  w2, C[2]);
+    TT(f, g, h, a, b, c, d, e,  w3, C[3]);
+    TT(e, f, g, h, a, b, c, d,  w4, C[4]);
+    TT(d, e, f, g, h, a, b, c,  w5, C[5]);
+    TT(c, d, e, f, g, h, a, b,  w6, C[6]);
+    TT(b, c, d, e, f, g, h, a,  w7, C[7]);
+    TT(a, b, c, d, e, f, g, h,  w8, C[8]);
+    TT(h, a, b, c, d, e, f, g,  w9, C[9]);
+    TT(g, h, a, b, c, d, e, f, w10, C[10]);
+    TT(f, g, h, a, b, c, d, e, w11, C[11]);
+    TT(e, f, g, h, a, b, c, d, w12, C[12]);
+    TT(d, e, f, g, h, a, b, c, w13, C[13]);
+    TT(c, d, e, f, g, h, a, b, w14, C[14]);
+    TT(b, c, d, e, f, g, h, a, w15, C[15]);
+
+    out[0] += a;
+    out[1] += b;
+    out[2] += c;
+    out[3] += d;
+    out[4] += e;
+    out[5] += f;
+    out[6] += g;
+    out[7] += h;
+
+    __asm__ __volatile__ ("test_asm_xmm %0, %%rax" : : "Yr" (out[7]) : "rax");
+    __asm__ __volatile__ ("test_asm_Brr %0, %%rax" : : "Brr" (w14) : "rbx");
+    __asm__ __volatile__ ("test_asm_rBr %0, %%rax" : : "rBr" (w13) : "rbx");
+    __asm__ __volatile__ ("test_asm_r %0, %%rax" : : "r" (w15) : "rbx");
+    __asm__ __volatile__ ("test_asm_m %0, %%rax" : : "m" (out[0]) : "rbx");
+    __asm__ __volatile__ ("test_asm_mem %0, %%rax" : : "memory" (out[1]) : "rbx");
+}
+
+/* { dg-final { scan-assembler-not "knot" } } */
+/* { dg-final { scan-assembler-not "kxor" } } */
+/* { dg-final { scan-assembler-not "kor" } } */
+/* { dg-final { scan-assembler-not "kandn" } } */
+/* { dg-final { scan-assembler-times "test_asm_xmm %xmm5, %rax" 1 } } */
+/* { dg-final { scan-assembler-times "test_asm_Brr %r15d, %rax" 1 } } */
+/* { dg-final { scan-assembler-times "test_asm_rBr %r14d, %rax" 1 } } */
+/* { dg-final { scan-assembler-times "test_asm_r %r13d, %rax" 1 } } */
+/* { dg-final { scan-assembler-not "test_asm_rBr %r31d, %rax" } } */
+/* { dg-final { scan-assembler-not "test_asm_r %r30d, %rax" } } */
+/* { dg-final { scan-assembler-not "test_asm_m \\(%r29d\\), %rax" } } */
+/* { dg-final { scan-assembler-not "test_asm_mem \\(%r28d\\), %rax" } } */
-- 
2.31.1


  parent reply	other threads:[~2023-08-31  8:20 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-08-31  8:20 [PATCH 00/13] [RFC] Support Intel APX EGPR Hongyu Wang
2023-08-31  8:20 ` [PATCH 01/13] [APX EGPR] middle-end: Add insn argument to base_reg_class Hongyu Wang
2023-08-31 10:15   ` Uros Bizjak
2023-09-01  9:07     ` Hongyu Wang
2023-09-06 19:43       ` Vladimir Makarov
2023-09-07  6:23         ` Uros Bizjak
2023-09-07 12:13           ` Vladimir Makarov
2023-09-08 17:03   ` Vladimir Makarov
2023-09-10  4:49     ` Hongyu Wang
2023-09-14 12:09       ` Vladimir Makarov
2023-08-31  8:20 ` [PATCH 02/13] [APX EGPR] middle-end: Add index_reg_class with insn argument Hongyu Wang
2023-08-31  8:20 ` [PATCH 03/13] [APX_EGPR] Initial support for APX_F Hongyu Wang
2023-08-31  8:20 ` [PATCH 04/13] [APX EGPR] Add 16 new integer general purpose registers Hongyu Wang
2023-08-31  8:20 ` [PATCH 05/13] [APX EGPR] Add register and memory constraints that disallow EGPR Hongyu Wang
2023-08-31  8:20 ` Hongyu Wang [this message]
2023-08-31  9:17   ` [PATCH 06/13] [APX EGPR] Map reg/mem constraints in inline asm to non-EGPR constraint Jakub Jelinek
2023-08-31 10:00     ` Uros Bizjak
2023-09-01  9:04       ` Hongyu Wang
2023-09-01  9:38         ` Uros Bizjak
2023-09-01 10:35           ` Hongtao Liu
2023-09-01 11:27             ` Uros Bizjak
2023-09-04  0:28               ` Hongtao Liu
2023-09-04  8:57                 ` Uros Bizjak
2023-09-04  9:10                   ` Hongtao Liu
2023-09-01 11:03       ` Richard Sandiford
2023-09-04  1:03         ` Hongtao Liu
2023-09-01  9:04     ` Hongyu Wang
2023-08-31  8:20 ` [PATCH 07/13] [APX EGPR] Add backend hook for base_reg_class/index_reg_class Hongyu Wang
2023-08-31  8:20 ` [PATCH 08/13] [APX EGPR] Handle GPR16 only vector move insns Hongyu Wang
2023-08-31  9:43   ` Jakub Jelinek
2023-09-01  9:07     ` Hongyu Wang
2023-09-01  9:20       ` Jakub Jelinek
2023-09-01 11:34         ` Hongyu Wang
2023-09-01 11:41           ` Jakub Jelinek
2023-08-31  8:20 ` [PATCH 09/13] [APX EGPR] Handle legacy insn that only support GPR16 (1/5) Hongyu Wang
2023-08-31 10:06   ` Uros Bizjak
2023-08-31  8:20 ` [PATCH 10/13] [APX EGPR] Handle legacy insns that only support GPR16 (2/5) Hongyu Wang
2023-08-31  8:20 ` [PATCH 11/13] [APX EGPR] Handle legacy insns that only support GPR16 (3/5) Hongyu Wang
2023-08-31  9:26   ` Richard Biener
2023-08-31  9:28     ` Richard Biener
2023-09-01  9:03       ` Hongyu Wang
2023-09-01 10:38       ` Hongtao Liu
2023-08-31  9:31     ` Jakub Jelinek
2023-08-31  8:20 ` [PATCH 12/13] [APX_EGPR] Handle legacy insns that only support GPR16 (4/5) Hongyu Wang
2023-08-31  8:20 ` [PATCH 13/13] [APX EGPR] Handle vex insns that only support GPR16 (5/5) Hongyu Wang
2023-08-31  9:19 ` [PATCH 00/13] [RFC] Support Intel APX EGPR Richard Biener
2023-09-01  8:55   ` Hongyu Wang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230831082024.314097-7-hongyu.wang@intel.com \
    --to=hongyu.wang@intel.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=hongtao.liu@intel.com \
    --cc=hubicka@ucw.cz \
    --cc=jakub@redhat.com \
    --cc=lingling.kong@intel.com \
    --cc=ubizjak@gmail.com \
    --cc=vmakarov@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).