public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-9171] x86: Properly implement AMX-TILE load/store intrinsics
@ 2024-02-26  4:26 H.J. Lu
  0 siblings, 0 replies; only message in thread
From: H.J. Lu @ 2024-02-26  4:26 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:4972f97a265c574d51e20373ddefd66576051e5c

commit r14-9171-g4972f97a265c574d51e20373ddefd66576051e5c
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Sun Feb 25 10:21:04 2024 -0800

    x86: Properly implement AMX-TILE load/store intrinsics
    
    ldtilecfg and sttilecfg take a 512-byte memory block.  With
    _tile_loadconfig implemented as
    
    extern __inline void
    __attribute__((__gnu_inline__, __always_inline__, __artificial__))
    _tile_loadconfig (const void *__config)
    {
      __asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void **)__config)));
    }
    
    GCC sees:
    
    (parallel [
      (asm_operands/v ("ldtilecfg   %X0") ("") 0
       [(mem/f/c:DI (plus:DI (reg/f:DI 77 virtual-stack-vars)
                             (const_int -64 [0xffffffffffffffc0])) [1 MEM[(const void * *)&tile_data]+0 S8 A128])]
       [(asm_input:DI ("m"))]
       (clobber (reg:CC 17 flags))])
    
    and the memory operand size is 1 byte.  As the result, the rest of 511
    bytes is ignored by GCC.  Implement ldtilecfg and sttilecfg intrinsics
    with a pointer to XImode to honor the 512-byte memory block.
    
    gcc/ChangeLog:
    
            PR target/114098
            * config/i386/amxtileintrin.h (_tile_loadconfig): Use
            __builtin_ia32_ldtilecfg.
            (_tile_storeconfig): Use __builtin_ia32_sttilecfg.
            * config/i386/i386-builtin.def (BDESC): Add
            __builtin_ia32_ldtilecfg and __builtin_ia32_sttilecfg.
            * config/i386/i386-expand.cc (ix86_expand_builtin): Handle
            IX86_BUILTIN_LDTILECFG and IX86_BUILTIN_STTILECFG.
            * config/i386/i386.md (ldtilecfg): New pattern.
            (sttilecfg): Likewise.
    
    gcc/testsuite/ChangeLog:
    
            PR target/114098
            * gcc.target/i386/amxtile-4.c: New test.

Diff:
---
 gcc/config/i386/amxtileintrin.h           |  4 +--
 gcc/config/i386/i386-builtin.def          |  4 +++
 gcc/config/i386/i386-expand.cc            | 19 +++++++++++
 gcc/config/i386/i386.md                   | 24 ++++++++++++++
 gcc/testsuite/gcc.target/i386/amxtile-4.c | 52 +++++++++++++++++++++++++++++++
 5 files changed, 101 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/amxtileintrin.h b/gcc/config/i386/amxtileintrin.h
index d1a26e0fea5b..5081b3264984 100644
--- a/gcc/config/i386/amxtileintrin.h
+++ b/gcc/config/i386/amxtileintrin.h
@@ -39,14 +39,14 @@ extern __inline void
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _tile_loadconfig (const void *__config)
 {
-  __asm__ volatile ("ldtilecfg\t%X0" :: "m" (*((const void **)__config)));
+  __builtin_ia32_ldtilecfg (__config);
 }
 
 extern __inline void
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _tile_storeconfig (void *__config)
 {
-  __asm__ volatile ("sttilecfg\t%X0" : "=m" (*((void **)__config)));
+  __builtin_ia32_sttilecfg (__config);
 }
 
 extern __inline void
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 729355230b86..ab73e20121aa 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -126,6 +126,10 @@ BDESC (OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_nothing, "__b
 BDESC (OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_nothing, "__builtin_ia32_xrstors64", IX86_BUILTIN_XRSTORS64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64)
 BDESC (OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_nothing, "__builtin_ia32_xsavec64", IX86_BUILTIN_XSAVEC64, UNKNOWN, (int) VOID_FTYPE_PVOID_INT64)
 
+/* LDFILECFG and STFILECFG.  */
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AMX_TILE, CODE_FOR_nothing, "__builtin_ia32_ldtilecfg", IX86_BUILTIN_LDTILECFG, UNKNOWN, (int) VOID_FTYPE_PCVOID)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AMX_TILE, CODE_FOR_nothing, "__builtin_ia32_sttilecfg", IX86_BUILTIN_STTILECFG, UNKNOWN, (int) VOID_FTYPE_PVOID)
+
 /* SSE */
 BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_movv4sf_internal, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF)
 BDESC (OPTION_MASK_ISA_SSE, 0, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF)
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index a4d3369f01b0..c98e0f81f0c3 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -14152,6 +14152,25 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
 	emit_insn (pat);
       return 0;
 
+    case IX86_BUILTIN_LDTILECFG:
+    case IX86_BUILTIN_STTILECFG:
+      arg0 = CALL_EXPR_ARG (exp, 0);
+      op0 = expand_normal (arg0);
+
+      if (!address_operand (op0, VOIDmode))
+	{
+	  op0 = convert_memory_address (Pmode, op0);
+	  op0 = copy_addr_to_reg (op0);
+	}
+      op0 = gen_rtx_MEM (XImode, op0);
+      if (fcode == IX86_BUILTIN_LDTILECFG)
+	icode = CODE_FOR_ldtilecfg;
+      else
+	icode = CODE_FOR_sttilecfg;
+      pat = GEN_FCN (icode) (op0);
+      emit_insn (pat);
+      return 0;
+
     case IX86_BUILTIN_LLWPCB:
       arg0 = CALL_EXPR_ARG (exp, 0);
       op0 = expand_normal (arg0);
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 6a26d966a0e3..df97a2d6270d 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -353,6 +353,10 @@
   ;; For USER_MSR support
   UNSPECV_URDMSR
   UNSPECV_UWRMSR
+
+  ;; For AMX-TILE
+  UNSPECV_LDTILECFG
+  UNSPECV_STTILECFG
 ])
 
 ;; Constants to represent rounding modes in the ROUND instruction
@@ -28152,6 +28156,26 @@
   [(set_attr "prefix" "vex")
    (set_attr "type" "other")])
 
+(define_insn "ldtilecfg"
+  [(unspec_volatile [(match_operand:XI 0 "memory_operand" "m")]
+            UNSPECV_LDTILECFG)]
+  "TARGET_AMX_TILE"
+  "ldtilecfg\t%0"
+  [(set_attr "type" "other")
+   (set_attr "prefix" "maybe_evex")
+   (set_attr "memory" "load")
+   (set_attr "mode" "XI")])
+
+(define_insn "sttilecfg"
+  [(set (match_operand:XI 0 "memory_operand" "=m")
+        (unspec_volatile:XI [(const_int 0)] UNSPECV_STTILECFG))]
+  "TARGET_AMX_TILE"
+  "sttilecfg\t%0"
+  [(set_attr "type" "other")
+   (set_attr "prefix" "maybe_evex")
+   (set_attr "memory" "store")
+   (set_attr "mode" "XI")])
+
 (include "mmx.md")
 (include "sse.md")
 (include "sync.md")
diff --git a/gcc/testsuite/gcc.target/i386/amxtile-4.c b/gcc/testsuite/gcc.target/i386/amxtile-4.c
new file mode 100644
index 000000000000..6b49cdeeb509
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/amxtile-4.c
@@ -0,0 +1,52 @@
+/* PR target/114098 */
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mamx-tile" } */
+
+#include <stdint.h>
+#include <x86intrin.h>
+
+#define MAX_ROWS 16
+#define MAX_COLS 64
+#define MAX 1024
+#define STRIDE 64
+
+typedef struct __tile_config
+{
+  uint8_t palette_id;
+  uint8_t start_row;
+  uint8_t reserved_0[14];
+  uint16_t colsb[16];
+  uint8_t rows[16];
+} __tilecfg __attribute__ ((aligned (64)));
+
+/* Initialize tile config */
+static void
+init_tile_config (__tilecfg *tileinfo)
+{
+  int i;
+  tileinfo->palette_id = 1;
+  tileinfo->start_row = 0;
+
+  for (i = 0; i < 1; ++i)
+  {
+    tileinfo->colsb[i] = MAX_ROWS;
+    tileinfo->rows[i] = MAX_ROWS;
+  }
+
+  for (i = 1; i < 4; ++i)
+  {
+    tileinfo->colsb[i] = MAX_COLS;
+    tileinfo->rows[i] = MAX_ROWS;
+  }
+
+  _tile_loadconfig (tileinfo);
+}
+
+void
+enable_amx (void)
+{
+  __tilecfg tile_data = {0};
+  init_tile_config (&tile_data);
+}
+
+/* { dg-final { scan-assembler-times "pxor\[^\n\]*%xmm" 1 } } */

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2024-02-26  4:26 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-02-26  4:26 [gcc r14-9171] x86: Properly implement AMX-TILE load/store intrinsics H.J. Lu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).