public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH, i386] Introduce support for PKU instructions.
@ 2015-12-18  7:16 Kirill Yukhin
  2015-12-20 10:56 ` Uros Bizjak
  0 siblings, 1 reply; 4+ messages in thread
From: Kirill Yukhin @ 2015-12-18  7:16 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: GCC Patches

Hello,
Patch in the bottom introduces support Intel PKRU instructions:
rdpkru and wrpkru.
It is pretty straight-forward, so I hope it is still suitable for v6.

Names for new intrinsics will appear shortly in new revision of SDM.

Bootstrapped & regtested.

Is it ok for trunk?

gcc/
	* common/config/i386/i386-common.c (OPTION_MASK_ISA_PKU_SET): New.
	(OPTION_MASK_ISA_PKU_UNSET): Ditto.
	(ix86_handle_option): Handle OPT_mpku.
	* config.gcc: Add pkuintrin.h to i[34567]86-*-* and x86_64-*-*
	targets.
	* config/i386/cpuid.h (host_detect_local_cpu): Detect PKU feature.
	* config/i386/i386-c.c (ix86_target_macros_internal): Handle PKU ISA
	flag.
	* config/i386/i386.c (ix86_target_string): Add "-mpku" to
	ix86_target_opts.
	(ix86_option_override_internal): Define PTA_PKU, mention new key
	in skylake-avx512. Handle new ISA bits.
	(ix86_valid_target_attribute_inner_p): Add "pku".
	(enum ix86_builtins): Add IX86_BUILTIN_RDPKRU and IX86_BUILTIN_WRPKRU.
	(builtin_description bdesc_special_args[]): Add new built-ins.
	* config/i386/i386.h (define TARGET_PKU): New.
	(define TARGET_PKU_P): Ditto.
	* config/i386/i386.md (define_c_enum "unspec"): Add UNSPEC_PKU.
	(define_c_enum "unspecv"): Add UNSPECV_PKU.
	(define_expand "rdpkru"): New.
	(define_insn "rdpkru_2"): Ditto.
	(define_expand "wrpkru"): Ditto.
	(define_insn "wrpkru_2"): Ditto.
	* config/i386/i386.opt (mpku): Ditto.
	* config/i386/pkuintrin.h: New file.
	* config/i386/x86intrin.h: Include pkuintrin.h
	* doc/extend.texi: Describe new built-ins.
	* doc/invoke.texi: Describe new switches.

gcc/testsuite/
	* g++.dg/other/i386-2.C: Add -mpku.
	* g++.dg/other/i386-3.C: Ditto.
	* gcc.target/i386/rdpku-1.c: New test.
	* gcc.target/i386/sse-12.c: Add -mpku.
	* gcc.target/i386/sse-13.c: Ditto..
	* gcc.target/i386/sse-22.c: Ditto..
	* gcc.target/i386/sse-33.c: Ditto..
	* gcc.target/i386/wrpku-1.c: New test.

--
Thanks, K

commit ebd39dd557ddd0d1aae344655f1bd69673477865
Author: Kirill Yukhin <kirill.yukhin@intel.com>
Date:   Wed Dec 16 10:52:37 2015 +0300

    PKU. Initial support.

diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c
index a9d2208..6039e04 100644
--- a/gcc/common/config/i386/i386-common.c
+++ b/gcc/common/config/i386/i386-common.c
@@ -129,6 +129,7 @@ along with GCC; see the file COPYING3.  If not see
   (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
 #define OPTION_MASK_ISA_MWAITX_SET OPTION_MASK_ISA_MWAITX
 #define OPTION_MASK_ISA_CLZERO_SET OPTION_MASK_ISA_CLZERO
+#define OPTION_MASK_ISA_PKU_SET OPTION_MASK_ISA_PKU
 
 /* Define a set of ISAs which aren't available when a given ISA is
    disabled.  MMX and SSE ISAs are handled separately.  */
@@ -190,6 +191,7 @@ along with GCC; see the file COPYING3.  If not see
 #define OPTION_MASK_ISA_CLWB_UNSET OPTION_MASK_ISA_CLWB
 #define OPTION_MASK_ISA_MWAITX_UNSET OPTION_MASK_ISA_MWAITX
 #define OPTION_MASK_ISA_CLZERO_UNSET OPTION_MASK_ISA_CLZERO
+#define OPTION_MASK_ISA_PKU_UNSET OPTION_MASK_ISA_PKU
 
 /* SSE4 includes both SSE4.1 and SSE4.2.  -mno-sse4 should the same
    as -mno-sse4.1. */
@@ -962,6 +964,19 @@ ix86_handle_option (struct gcc_options *opts,
 	}
       return true;
 
+    case OPT_mpku:
+      if (value)
+	{
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PKU_SET;
+	  opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_PKU_SET;
+	}
+      else
+	{
+	  opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_PKU_UNSET;
+	  opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_PKU_UNSET;
+	}
+      return true;
+
 
   /* Comes from final.c -- no real reason to change it.  */
 #define MAX_CODE_ALIGN 16
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 882e413..4fd6d8b 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -372,7 +372,8 @@ i[34567]86-*-*)
 		       xsavesintrin.h avx512dqintrin.h avx512bwintrin.h
 		       avx512vlintrin.h avx512vlbwintrin.h avx512vldqintrin.h
 		       avx512ifmaintrin.h avx512ifmavlintrin.h avx512vbmiintrin.h
-		       avx512vbmivlintrin.h clwbintrin.h pcommitintrin.h mwaitxintrin.h clzerointrin.h"
+		       avx512vbmivlintrin.h clwbintrin.h pcommitintrin.h
+		       mwaitxintrin.h clzerointrin.h pkuintrin.h"
 	;;
 x86_64-*-*)
 	cpu_type=i386
@@ -393,7 +394,8 @@ x86_64-*-*)
 		       xsavesintrin.h avx512dqintrin.h avx512bwintrin.h
 		       avx512vlintrin.h avx512vlbwintrin.h avx512vldqintrin.h
 		       avx512ifmaintrin.h avx512ifmavlintrin.h avx512vbmiintrin.h
-		       avx512vbmivlintrin.h clwbintrin.h pcommitintrin.h mwaitxintrin.h clzerointrin.h"
+		       avx512vbmivlintrin.h clwbintrin.h pcommitintrin.h
+		       mwaitxintrin.h clzerointrin.h pkuintrin.h"
 	;;
 ia64-*-*)
 	extra_headers=ia64intrin.h
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index fccdf1f..05cdc80 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -95,6 +95,8 @@
 /* %ecx */
 #define bit_PREFETCHWT1	  (1 << 0)
 #define bit_AVX512VBMI	(1 << 1)
+#define bit_PKU	(1 << 3)
+#define bit_OSPKE	(1 << 4)
 
 /* XFEATURE_ENABLED_MASK register bits (%eax == 13, %ecx == 0) */
 #define bit_BNDREGS     (1 << 3)
diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
index 8ec1e40..b91d38a 100644
--- a/gcc/config/i386/driver-i386.c
+++ b/gcc/config/i386/driver-i386.c
@@ -414,7 +414,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
   unsigned int has_avx512dq = 0, has_avx512bw = 0, has_avx512vl = 0;
   unsigned int has_avx512vbmi = 0, has_avx512ifma = 0, has_clwb = 0;
   unsigned int has_pcommit = 0, has_mwaitx = 0;
-  unsigned int has_clzero = 0;
+  unsigned int has_clzero = 0, has_pku = 0;
 
   bool arch;
 
@@ -501,7 +501,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
       has_avx512vl = ebx & bit_AVX512IFMA;
 
       has_prefetchwt1 = ecx & bit_PREFETCHWT1;
-      has_avx512vl = ecx & bit_AVX512VBMI;
+      has_avx512vbmi = ecx & bit_AVX512VBMI;
+      has_pku = ecx & bit_OSPKE;
     }
 
   if (max_level >= 13)
@@ -971,6 +972,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
       const char *pcommit = has_pcommit ? " -mpcommit" : " -mno-pcommit";
       const char *mwaitx  = has_mwaitx  ? " -mmwaitx"  : " -mno-mwaitx"; 
       const char *clzero  = has_clzero  ? " -mclzero"  : " -mno-clzero";
+      const char *pku = has_pku ? " -mpku" : " -mno-pku";
       options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
 			sse4a, cx16, sahf, movbe, aes, sha, pclmul,
 			popcnt, abm, lwp, fma, fma4, xop, bmi, bmi2,
@@ -980,7 +982,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
 			avx512cd, avx512pf, prefetchwt1, clflushopt,
 			xsavec, xsaves, avx512dq, avx512bw, avx512vl,
 			avx512ifma, avx512vbmi, clwb, pcommit, mwaitx,
-			clzero, NULL);
+			clzero, pku, NULL);
     }
 
 done:
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 0f3c3ec..5429f6e 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -441,6 +441,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
     def_or_undef (parse_in, "__MWAITX__");
   if (isa_flag & OPTION_MASK_ISA_CLZERO)
     def_or_undef (parse_in, "__CLZERO__");
+  if (isa_flag & OPTION_MASK_ISA_PKU)
+    def_or_undef (parse_in, "__PKU__");
   if (TARGET_IAMCU)
     {
       def_or_undef (parse_in, "__iamcu");
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index cecea24..d813a96 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -3755,6 +3755,7 @@ ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
     { "-mpcommit",	OPTION_MASK_ISA_PCOMMIT },
     { "-mmwaitx",	OPTION_MASK_ISA_MWAITX  },
     { "-mclzero",	OPTION_MASK_ISA_CLZERO  },
+    { "-mpku",		OPTION_MASK_ISA_PKU  },
   };
 
   /* Flag options.  */
@@ -4310,6 +4311,7 @@ ix86_option_override_internal (bool main_args_p,
 #define PTA_MWAITX		(HOST_WIDE_INT_1 << 57)
 #define PTA_CLZERO		(HOST_WIDE_INT_1 << 58)
 #define PTA_NO_80387		(HOST_WIDE_INT_1 << 59)
+#define PTA_PKU		(HOST_WIDE_INT_1 << 60)
 
 #define PTA_CORE2 \
   (PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 \
@@ -4331,7 +4333,7 @@ ix86_option_override_internal (bool main_args_p,
   (PTA_BROADWELL | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES)
 #define PTA_SKYLAKE_AVX512 \
   (PTA_SKYLAKE | PTA_AVX512F | PTA_AVX512CD | PTA_AVX512VL \
-   | PTA_AVX512BW | PTA_AVX512DQ)
+   | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU)
 #define PTA_KNL \
   (PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER | PTA_AVX512F | PTA_AVX512CD)
 #define PTA_BONNELL \
@@ -4934,6 +4936,9 @@ ix86_option_override_internal (bool main_args_p,
 	if (processor_alias_table[i].flags & PTA_MWAITX
 	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_MWAITX))
 	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_MWAITX;
+	if (processor_alias_table[i].flags & PTA_PKU
+	    && !(opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_PKU))
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_PKU;
 
 	if (!(opts_set->x_target_flags & MASK_80387))
 	  {
@@ -5930,6 +5935,7 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
     IX86_ATTR_ISA ("pcommit",	OPT_mpcommit),
     IX86_ATTR_ISA ("mwaitx",	OPT_mmwaitx),
     IX86_ATTR_ISA ("clzero",    OPT_mclzero),
+    IX86_ATTR_ISA ("pku",	OPT_mpku),
 
     /* enum options */
     IX86_ATTR_ENUM ("fpmath=",	OPT_mfpmath_),
@@ -32283,6 +32289,10 @@ enum ix86_builtins
   IX86_BUILTIN_READ_FLAGS,
   IX86_BUILTIN_WRITE_FLAGS,
 
+  /* PKU instructions.  */
+  IX86_BUILTIN_RDPKRU,
+  IX86_BUILTIN_WRPKRU,
+
   IX86_BUILTIN_MAX
 };
 
@@ -32788,6 +32798,10 @@ static const struct builtin_description bdesc_special_args[] =
 
   /* PCOMMIT.  */
   { OPTION_MASK_ISA_PCOMMIT, CODE_FOR_pcommit, "__builtin_ia32_pcommit", IX86_BUILTIN_PCOMMIT, UNKNOWN, (int) VOID_FTYPE_VOID },
+
+  /* RDPKRU and WRPKRU.  */
+  { OPTION_MASK_ISA_PKU, CODE_FOR_rdpkru,  "__builtin_ia32_rdpkru", IX86_BUILTIN_RDPKRU, UNKNOWN, (int) UNSIGNED_FTYPE_VOID },
+  { OPTION_MASK_ISA_PKU, CODE_FOR_wrpkru,  "__builtin_ia32_wrpkru", IX86_BUILTIN_WRPKRU, UNKNOWN, (int) VOID_FTYPE_UNSIGNED }
 };
 
 /* Builtins with variable number of arguments.  */
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index e69c9cc..7e6548b 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -158,6 +158,8 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define TARGET_CLWB_P(x)	TARGET_ISA_CLWB_P(x)
 #define TARGET_MWAITX	TARGET_ISA_MWAITX
 #define TARGET_MWAITX_P(x)	TARGET_ISA_MWAITX_P(x)
+#define TARGET_PKU	TARGET_ISA_PKU
+#define TARGET_PKU_P(x)	TARGET_ISA_PKU_P(x)
 
 #define TARGET_LP64	TARGET_ABI_64
 #define TARGET_LP64_P(x)	TARGET_ABI_64_P(x)
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 49b2216..a2a44ad 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -193,6 +193,9 @@
   UNSPEC_BNDCU
   UNSPEC_BNDCN
   UNSPEC_MPX_FENCE
+
+  ;; For RDPKRU support
+  UNSPEC_PKU
 ])
 
 (define_c_enum "unspecv" [
@@ -268,6 +271,9 @@
   ;; For CLZERO support
   UNSPECV_CLZERO
 
+  ;; For WRPKRU support
+  UNSPECV_PKU
+
 ])
 
 ;; Constants to represent rounding modes in the ROUND instruction
@@ -19287,6 +19293,50 @@
   [(set_attr "type" "imov")
    (set_attr "mode" "<MODE>")])
 
+(define_expand "rdpkru"
+  [(set (match_operand:SI 0 "register_operand")
+	(unspec:SI [(const_int 0)] UNSPEC_PKU))
+   (set (reg:SI CX_REG)
+	(const_int 0))
+   (clobber (reg:SI DX_REG))]
+  "TARGET_PKU"
+{
+  emit_move_insn (gen_rtx_REG (SImode, CX_REG), CONST0_RTX (SImode));
+  emit_insn (gen_rdpkru_2 (operands[0]));
+  DONE;
+})
+
+(define_insn "rdpkru_2"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec:SI [(const_int 0)] UNSPEC_PKU))
+   (clobber (reg:SI DX_REG))
+   (use (reg:SI CX_REG))]
+  "TARGET_PKU"
+  "rdpkru"
+  [(set_attr "type" "other")])
+
+(define_expand "wrpkru"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand")] UNSPECV_PKU)
+   (set (reg:SI CX_REG)
+	(const_int 0))
+   (set (reg:SI DX_REG)
+	(const_int 0))]
+  "TARGET_PKU"
+{
+  emit_move_insn (gen_rtx_REG (SImode, CX_REG), CONST0_RTX (SImode));
+  emit_move_insn (gen_rtx_REG (SImode, DX_REG), CONST0_RTX (SImode));
+  emit_insn (gen_wrpkru_2 (operands[0]));
+  DONE;
+})
+
+(define_insn "wrpkru_2"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "a")] UNSPECV_PKU)
+   (use (reg:SI CX_REG))
+   (use (reg:SI DX_REG))]
+  "TARGET_PKU"
+  "wrpkru"
+  [(set_attr "type" "other")])
+
 (include "mmx.md")
 (include "sse.md")
 (include "sync.md")
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 730b753..5f83b3a 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -876,6 +876,10 @@ mclzero
 Target Report Mask(ISA_CLZERO) Var(ix86_isa_flags) Save
 Support CLZERO built-in functions and code generation.
 
+mpku
+Target Report Mask(ISA_PKU) Var(ix86_isa_flags) Save
+Support PKU built-in functions and code generation.
+
 mstack-protector-guard=
 Target RejectNegative Joined Enum(stack_protector_guard) Var(ix86_stack_protector_guard) Init(SSP_TLS)
 Use given stack-protector guard.
diff --git a/gcc/config/i386/pkuintrin.h b/gcc/config/i386/pkuintrin.h
new file mode 100644
index 0000000..c810585
--- /dev/null
+++ b/gcc/config/i386/pkuintrin.h
@@ -0,0 +1,56 @@
+/* Copyright (C) 2015 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if !defined _X86INTRIN_H_INCLUDED
+# error "Never use <pkuintrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef _PKUINTRIN_H_INCLUDED
+#define _PKUINTRIN_H_INCLUDED
+
+#ifndef __PKU__
+#pragma GCC push_options
+#pragma GCC target("pku")
+#define __DISABLE_PKU__
+#endif /* __PKU__ */
+
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_rdpkru_u32(void)
+{
+  return __builtin_ia32_rdpkru ();
+}
+
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_wrpkru(unsigned int key)
+{
+  return __builtin_ia32_wrpkru (key);
+}
+
+#ifdef __DISABLE_PKU__
+#undef __DISABLE_PKU__
+#pragma GCC pop_options
+#endif /* __DISABLE_PKU__ */
+
+#endif /* _PKUINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/x86intrin.h b/gcc/config/i386/x86intrin.h
index 9b292b3..c8819eb 100644
--- a/gcc/config/i386/x86intrin.h
+++ b/gcc/config/i386/x86intrin.h
@@ -95,6 +95,8 @@
 
 #include <clzerointrin.h>
 
+#include <pkuintrin.h>
+
 #endif /* __iamcu__ */
 
 #endif /* _X86INTRIN_H_INCLUDED */
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 883d9b3..4578925 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -18343,6 +18343,13 @@ All of them generate the machine instruction that is part of the name.
 void __builtin_i32_clzero (void *)
 @end smallexample
 
+The following built-in functions are available when @option{-mpku} is used.
+They generate reads and writes to PKRU.
+@smallexample
+void __builtin_ia32_wrpkru (unsigned int)
+unsigned int __builtin_ia32_rdpkru ()
+@end smallexample
+
 @node x86 transactional memory intrinsics
 @subsection x86 Transactional Memory Intrinsics
 
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 9b3e2fe..f627d45 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1103,7 +1103,8 @@ See RS/6000 and PowerPC Options.
 -mpclmul -mfsgsbase -mrdrnd -mf16c -mfma @gol
 -mprefetchwt1 -mclflushopt -mxsavec -mxsaves @gol
 -msse4a -m3dnow -mpopcnt -mabm -mbmi -mtbm -mfma4 -mxop -mlzcnt @gol
--mbmi2 -mfxsr -mxsave -mxsaveopt -mrtm -mlwp -mmpx -mmwaitx -mclzero -mthreads @gol
+-mbmi2 -mfxsr -mxsave -mxsaveopt -mrtm -mlwp -mmpx -mmwaitx -mclzero
+-mpku -mthreads @gol
 -mms-bitfields -mno-align-stringops  -minline-all-stringops @gol
 -minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol
 -mmemcpy-strategy=@var{strategy} -mmemset-strategy=@var{strategy} @gol
@@ -22628,7 +22629,7 @@ AVX512CD instruction set support.
 
 @item skylake-avx512
 Intel Skylake Server CPU with 64-bit extensions, MOVBE, MMX, SSE, SSE2, SSE3,
-SSSE3, SSE4.1, SSE4.2, POPCNT, AVX, AVX2, AES, PCLMUL, FSGSBASE, RDRND, FMA,
+SSSE3, SSE4.1, SSE4.2, POPCNT, PKU, AVX, AVX2, AES, PCLMUL, FSGSBASE, RDRND, FMA,
 BMI, BMI2, F16C, RDSEED, ADCX, PREFETCHW, CLFLUSHOPT, XSAVEC, XSAVES, AVX512F,
 AVX512VL, AVX512BW, AVX512DQ and AVX512CD instruction set support.
 
@@ -23250,11 +23251,13 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
 @need 200
 @itemx -mclzero
 @opindex mclzero
+@itemx -mpku
+@opindex mpku
 These switches enable the use of instructions in the MMX, SSE,
 SSE2, SSE3, SSSE3, SSE4.1, AVX, AVX2, AVX512F, AVX512PF, AVX512ER, AVX512CD,
 SHA, AES, PCLMUL, FSGSBASE, RDRND, F16C, FMA, SSE4A, FMA4, XOP, LWP, ABM,
 AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA AVX512VBMI, BMI, BMI2, FXSR,
-XSAVE, XSAVEOPT, LZCNT, RTM, MPX, MWAITX or 3DNow!@:
+XSAVE, XSAVEOPT, LZCNT, RTM, MPX, MWAITX, PKU or 3DNow!@:
 extended instruction sets.  Each has a corresponding @option{-mno-} option
 to disable use of these instructions.
 
diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C
index 99caa10..74b3cb8 100644
--- a/gcc/testsuite/g++.dg/other/i386-2.C
+++ b/gcc/testsuite/g++.dg/other/i386-2.C
@@ -1,9 +1,9 @@
 /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt  -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mclwb -mpcommit -mmwaitx -mclzero" } */
+/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt  -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mclwb -mpcommit -mmwaitx -mclzero -mpku" } */
 
 /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
    xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
-   popcntintrin.h, fmaintrin.h and mm_malloc.h.h are usable with 
+   popcntintrin.h, fmaintrin.h, pkuintrin.h and mm_malloc.h.h are usable with
    -O -pedantic-errors.  */
 
 #include <x86intrin.h>
diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C
index 49b4484..04ea372 100644
--- a/gcc/testsuite/g++.dg/other/i386-3.C
+++ b/gcc/testsuite/g++.dg/other/i386-3.C
@@ -1,9 +1,9 @@
 /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mclwb -mpcommit -mmwaitx -mclzero" } */
+/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mclwb -mpcommit -mmwaitx -mclzero -mpku" } */
 
 /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
    xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
-   popcntintrin.h, fmaintrin.h and mm_malloc.h are usable with
+   popcntintrin.h, fmaintrin.h, pkuintrin.h and mm_malloc.h are usable with
    -O -fkeep-inline-functions.  */
 
 #include <x86intrin.h>
diff --git a/gcc/testsuite/gcc.target/i386/rdpku-1.c b/gcc/testsuite/gcc.target/i386/rdpku-1.c
new file mode 100644
index 0000000..044301c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/rdpku-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-mpku -O2" } */
+/* { dg-final { scan-assembler "rdpkru\n" } } */
+
+#include <x86intrin.h>
+
+unsigned extern
+rdpku_test (void)
+{
+  return _rdpkru_u32 ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c
index 8b7ef6d..af15946 100644
--- a/gcc/testsuite/gcc.target/i386/sse-12.c
+++ b/gcc/testsuite/gcc.target/i386/sse-12.c
@@ -3,7 +3,7 @@
    popcntintrin.h and mm_malloc.h are usable
    with -O -std=c89 -pedantic-errors.  */
 /* { dg-do compile } */
-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512ifma -mclwb -mpcommit -mmwaitx -mclzero" } */
+/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512ifma -mclwb -mpcommit -mmwaitx -mclzero -mpku" } */
 
 #include <x86intrin.h>
 
diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
index 0592370..1144e5d 100644
--- a/gcc/testsuite/gcc.target/i386/sse-13.c
+++ b/gcc/testsuite/gcc.target/i386/sse-13.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512ifma -mclwb -mpcommit -mmwaitx -mclzero" } */
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512ifma -mclwb -mpcommit -mmwaitx -mclzero -mpku" } */
 
 #include <mm_malloc.h>
 
diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c
index 72017f5..9b15515 100644
--- a/gcc/testsuite/gcc.target/i386/sse-22.c
+++ b/gcc/testsuite/gcc.target/i386/sse-22.c
@@ -699,7 +699,7 @@ test_2 (_mm_clmulepi64_si128, __m128i, __m128i, __m128i, 1)
 
 /* x86intrin.h (FMA4/XOP/LWP/BMI/BMI2/TBM/LZCNT/FMA). */
 #ifdef DIFFERENT_PRAGMAS
-#pragma GCC target ("fma4,xop,lwp,bmi,bmi2,tbm,lzcnt,fma,rdseed,prfchw,adx,fxsr,xsaveopt,xsavec,xsaves,clflushopt,clwb,pcommit")
+#pragma GCC target ("fma4,xop,lwp,bmi,bmi2,tbm,lzcnt,fma,rdseed,prfchw,adx,fxsr,xsaveopt,xsavec,xsaves,clflushopt,clwb,pcommit,pku")
 #endif
 #include <x86intrin.h>
 /* xopintrin.h */
diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
index 4561354..d29d2d7 100644
--- a/gcc/testsuite/gcc.target/i386/sse-23.c
+++ b/gcc/testsuite/gcc.target/i386/sse-23.c
@@ -594,6 +594,6 @@
 #define __builtin_ia32_extracti64x2_256_mask(A, E, C, D) __builtin_ia32_extracti64x2_256_mask(A, 1, C, D)
 #define __builtin_ia32_extractf64x2_256_mask(A, E, C, D) __builtin_ia32_extractf64x2_256_mask(A, 1, C, D)
 
-#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,clwb,pcommit,mwaitx,clzero")
+#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,clwb,pcommit,mwaitx,clzero,pku")
 
 #include <x86intrin.h>
diff --git a/gcc/testsuite/gcc.target/i386/wrpku-1.c b/gcc/testsuite/gcc.target/i386/wrpku-1.c
new file mode 100644
index 0000000..44a4b72
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/wrpku-1.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-mpku -O2" } */
+/* { dg-final { scan-assembler "wrpkru\n" } } */
+
+#include <x86intrin.h>
+
+void extern
+wrpku_test (unsigned int key)
+{
+  _wrpkru (key);
+}

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH, i386] Introduce support for PKU instructions.
  2015-12-18  7:16 [PATCH, i386] Introduce support for PKU instructions Kirill Yukhin
@ 2015-12-20 10:56 ` Uros Bizjak
  2015-12-22 15:43   ` Kirill Yukhin
  0 siblings, 1 reply; 4+ messages in thread
From: Uros Bizjak @ 2015-12-20 10:56 UTC (permalink / raw)
  To: Kirill Yukhin; +Cc: GCC Patches

On Fri, Dec 18, 2015 at 8:15 AM, Kirill Yukhin <kirill.yukhin@gmail.com> wrote:
> Hello,
> Patch in the bottom introduces support Intel PKRU instructions:
> rdpkru and wrpkru.
> It is pretty straight-forward, so I hope it is still suitable for v6.
>
> Names for new intrinsics will appear shortly in new revision of SDM.
>
> Bootstrapped & regtested.
>
> Is it ok for trunk?

The patch mostly looks OK, but md patterns are written in the wrong
way. Please see comments bellow.

> gcc/
>         * common/config/i386/i386-common.c (OPTION_MASK_ISA_PKU_SET): New.
>         (OPTION_MASK_ISA_PKU_UNSET): Ditto.
>         (ix86_handle_option): Handle OPT_mpku.
>         * config.gcc: Add pkuintrin.h to i[34567]86-*-* and x86_64-*-*
>         targets.
>         * config/i386/cpuid.h (host_detect_local_cpu): Detect PKU feature.
>         * config/i386/i386-c.c (ix86_target_macros_internal): Handle PKU ISA
>         flag.
>         * config/i386/i386.c (ix86_target_string): Add "-mpku" to
>         ix86_target_opts.
>         (ix86_option_override_internal): Define PTA_PKU, mention new key
>         in skylake-avx512. Handle new ISA bits.
>         (ix86_valid_target_attribute_inner_p): Add "pku".
>         (enum ix86_builtins): Add IX86_BUILTIN_RDPKRU and IX86_BUILTIN_WRPKRU.
>         (builtin_description bdesc_special_args[]): Add new built-ins.
>         * config/i386/i386.h (define TARGET_PKU): New.
>         (define TARGET_PKU_P): Ditto.
>         * config/i386/i386.md (define_c_enum "unspec"): Add UNSPEC_PKU.
>         (define_c_enum "unspecv"): Add UNSPECV_PKU.
>         (define_expand "rdpkru"): New.
>         (define_insn "rdpkru_2"): Ditto.
>         (define_expand "wrpkru"): Ditto.
>         (define_insn "wrpkru_2"): Ditto.
>         * config/i386/i386.opt (mpku): Ditto.
>         * config/i386/pkuintrin.h: New file.
>         * config/i386/x86intrin.h: Include pkuintrin.h
>         * doc/extend.texi: Describe new built-ins.
>         * doc/invoke.texi: Describe new switches.
>
> gcc/testsuite/
>         * g++.dg/other/i386-2.C: Add -mpku.
>         * g++.dg/other/i386-3.C: Ditto.
>         * gcc.target/i386/rdpku-1.c: New test.
>         * gcc.target/i386/sse-12.c: Add -mpku.
>         * gcc.target/i386/sse-13.c: Ditto..
>         * gcc.target/i386/sse-22.c: Ditto..
>         * gcc.target/i386/sse-33.c: Ditto..
>         * gcc.target/i386/wrpku-1.c: New test.
>

> +(define_expand "rdpkru"
> +  [(set (match_operand:SI 0 "register_operand")
> +       (unspec:SI [(const_int 0)] UNSPEC_PKU))
> +   (set (reg:SI CX_REG)
> +       (const_int 0))
> +   (clobber (reg:SI DX_REG))]
> +  "TARGET_PKU"
> +{
> +  emit_move_insn (gen_rtx_REG (SImode, CX_REG), CONST0_RTX (SImode));
> +  emit_insn (gen_rdpkru_2 (operands[0]));
> +  DONE;
> +})

You should use "parallel" to emit insn with several parallel
expressions. So, in the preparation statements, you move const0 to a
pseudo, so the RA will later use correct register. And please leave to
the expander to emit the pattern.

> +(define_insn "rdpkru_2"
> +  [(set (match_operand:SI 0 "register_operand" "=a")
> +       (unspec:SI [(const_int 0)] UNSPEC_PKU))
> +   (clobber (reg:SI DX_REG))
> +   (use (reg:SI CX_REG))]
> +  "TARGET_PKU"
> +  "rdpkru"
> +  [(set_attr "type" "other")])

Please do not use explicit hard registers. There are appropriate
single-reg constraints available for use. Without seeing the
documentation, I think the above should look like:

(define_insn "*rdpkru"
  [(set (match_operand:SI 0 "register_operand" "=a")
       (unspec:SI [(match_operand:SI 1 "register_operand" "c")] UNSPEC_PKU))
   (clobber (rmatch_operand "register_operand "=d"))
  "TARGET_PKU"
  "rdpkru"
  [(set_attr "type" "other")])

> +(define_expand "wrpkru"
> +  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand")] UNSPECV_PKU)
> +   (set (reg:SI CX_REG)
> +       (const_int 0))
> +   (set (reg:SI DX_REG)
> +       (const_int 0))]
> +  "TARGET_PKU"
> +{
> +  emit_move_insn (gen_rtx_REG (SImode, CX_REG), CONST0_RTX (SImode));
> +  emit_move_insn (gen_rtx_REG (SImode, DX_REG), CONST0_RTX (SImode));
> +  emit_insn (gen_wrpkru_2 (operands[0]));
> +  DONE;
> +})
> +
> +(define_insn "wrpkru_2"
> +  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "a")] UNSPECV_PKU)
> +   (use (reg:SI CX_REG))
> +   (use (reg:SI DX_REG))]
> +  "TARGET_PKU"
> +  "wrpkru"
> +  [(set_attr "type" "other")])
>
Please move all input operands to the insisde of the unspec, but it
looks that this pattern is missing clobber, as in the above rdpkru
pattern.

Uros.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH, i386] Introduce support for PKU instructions.
  2015-12-20 10:56 ` Uros Bizjak
@ 2015-12-22 15:43   ` Kirill Yukhin
  2015-12-23 11:43     ` Uros Bizjak
  0 siblings, 1 reply; 4+ messages in thread
From: Kirill Yukhin @ 2015-12-22 15:43 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: GCC Patches

Hello Uroš,
I (hopefully fixed all of inputs, thanks!

Updated patch for i386.md in the bottom,
rest patch is the same.

Bootstrap in progress. New tests pass.

Is it ok for trunk if bootstrap will pass?

On 20 Dec 11:56, Uros Bizjak wrote:
> > +(define_expand "rdpkru"
> > +  [(set (match_operand:SI 0 "register_operand")
> > +       (unspec:SI [(const_int 0)] UNSPEC_PKU))
> > +   (set (reg:SI CX_REG)
> > +       (const_int 0))
> > +   (clobber (reg:SI DX_REG))]
> > +  "TARGET_PKU"
> > +{
> > +  emit_move_insn (gen_rtx_REG (SImode, CX_REG), CONST0_RTX (SImode));
> > +  emit_insn (gen_rdpkru_2 (operands[0]));
> > +  DONE;
> > +})
> 
> You should use "parallel" to emit insn with several parallel
> expressions. So, in the preparation statements, you move const0 to a
> pseudo, so the RA will later use correct register. And please leave to
> the expander to emit the pattern.
> 
> > +(define_insn "rdpkru_2"
> > +  [(set (match_operand:SI 0 "register_operand" "=a")
> > +       (unspec:SI [(const_int 0)] UNSPEC_PKU))
> > +   (clobber (reg:SI DX_REG))
> > +   (use (reg:SI CX_REG))]
> > +  "TARGET_PKU"
> > +  "rdpkru"
> > +  [(set_attr "type" "other")])
> 
> Please do not use explicit hard registers. There are appropriate
> single-reg constraints available for use. Without seeing the
> documentation, I think the above should look like:
> 
> (define_insn "*rdpkru"
>   [(set (match_operand:SI 0 "register_operand" "=a")
>        (unspec:SI [(match_operand:SI 1 "register_operand" "c")] UNSPEC_PKU))
>    (clobber (rmatch_operand "register_operand "=d"))
>   "TARGET_PKU"
>   "rdpkru"
>   [(set_attr "type" "other")])
> 
> > +(define_expand "wrpkru"
> > +  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand")] UNSPECV_PKU)
> > +   (set (reg:SI CX_REG)
> > +       (const_int 0))
> > +   (set (reg:SI DX_REG)
> > +       (const_int 0))]
> > +  "TARGET_PKU"
> > +{
> > +  emit_move_insn (gen_rtx_REG (SImode, CX_REG), CONST0_RTX (SImode));
> > +  emit_move_insn (gen_rtx_REG (SImode, DX_REG), CONST0_RTX (SImode));
> > +  emit_insn (gen_wrpkru_2 (operands[0]));
> > +  DONE;
> > +})
> > +
> > +(define_insn "wrpkru_2"
> > +  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "a")] UNSPECV_PKU)
> > +   (use (reg:SI CX_REG))
> > +   (use (reg:SI DX_REG))]
> > +  "TARGET_PKU"
> > +  "wrpkru"
> > +  [(set_attr "type" "other")])
> >
> Please move all input operands to the insisde of the unspec, but it
> looks that this pattern is missing clobber, as in the above rdpkru
> pattern.
This isns does not clobber any register.

> 
> Uros.

--
Thanks, K

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 49b2216..f427ae3 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -193,6 +193,9 @@
   UNSPEC_BNDCU
   UNSPEC_BNDCN
   UNSPEC_MPX_FENCE
+
+  ;; For RDPKRU support
+  UNSPEC_PKU
 ])

 (define_c_enum "unspecv" [
@@ -268,6 +271,9 @@
   ;; For CLZERO support
   UNSPECV_CLZERO

+  ;; For WRPKRU support
+  UNSPECV_PKU
+
 ])

 ;; Constants to represent rounding modes in the ROUND instruction
@@ -19287,6 +19293,47 @@
   [(set_attr "type" "imov")
    (set_attr "mode" "<MODE>")])

+(define_expand "rdpkru"
+  [(set (match_operand:SI 2 "register_operand") (const_int 0))
+   (parallel [(set (match_operand:SI 0 "register_operand")
+                  (unspec:SI [(match_dup 2)] UNSPEC_PKU))
+             (clobber (match_operand:SI 1 "register_operand"))])]
+  "TARGET_PKU"
+{
+  operands[1] = gen_reg_rtx (SImode);
+  operands[2] = gen_reg_rtx (SImode);
+})
+
+(define_insn "*rdpkru_2"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+       (unspec:SI [(match_operand:SI 2 "register_operand" "c")] UNSPEC_PKU))
+   (clobber (match_operand:SI 1 "register_operand" "=d"))]
+  "TARGET_PKU"
+  "rdpkru"
+  [(set_attr "type" "other")])
+
+(define_expand "wrpkru"
+  [(set (match_operand:SI 1 "register_operand")
+       (const_int 0))
+   (set (match_operand:SI 2 "register_operand")
+       (const_int 0))
+   (unspec_volatile:SI [(match_operand:SI 0 "register_operand")
+                       (match_dup 1)
+                       (match_dup 2)] UNSPECV_PKU)]
+  "TARGET_PKU"
+{
+  operands[1] = gen_reg_rtx (SImode);
+  operands[2] = gen_reg_rtx (SImode);
+})
+
+(define_insn "*wrpkru_2"
+  [(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "a")
+                       (match_operand:SI 1 "register_operand" "c")
+                       (match_operand:SI 2 "register_operand" "d")] UNSPECV_PKU)]
+  "TARGET_PKU"
+  "wrpkru"
+  [(set_attr "type" "other")])
+
 (include "mmx.md")
 (include "sse.md")
 (include "sync.md")

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH, i386] Introduce support for PKU instructions.
  2015-12-22 15:43   ` Kirill Yukhin
@ 2015-12-23 11:43     ` Uros Bizjak
  0 siblings, 0 replies; 4+ messages in thread
From: Uros Bizjak @ 2015-12-23 11:43 UTC (permalink / raw)
  To: Kirill Yukhin; +Cc: GCC Patches

[-- Attachment #1: Type: text/plain, Size: 2263 bytes --]

On Tue, Dec 22, 2015 at 4:43 PM, Kirill Yukhin <kirill.yukhin@gmail.com> wrote:
> Hello Uroš,
> I (hopefully fixed all of inputs, thanks!
>
> Updated patch for i386.md in the bottom,
> rest patch is the same.
>
> Bootstrap in progress. New tests pass.
>
> Is it ok for trunk if bootstrap will pass?
>
> On 20 Dec 11:56, Uros Bizjak wrote:
>> > +(define_expand "rdpkru"
>> > +  [(set (match_operand:SI 0 "register_operand")
>> > +       (unspec:SI [(const_int 0)] UNSPEC_PKU))
>> > +   (set (reg:SI CX_REG)
>> > +       (const_int 0))
>> > +   (clobber (reg:SI DX_REG))]
>> > +  "TARGET_PKU"
>> > +{
>> > +  emit_move_insn (gen_rtx_REG (SImode, CX_REG), CONST0_RTX (SImode));
>> > +  emit_insn (gen_rdpkru_2 (operands[0]));
>> > +  DONE;
>> > +})
>>
>> You should use "parallel" to emit insn with several parallel
>> expressions. So, in the preparation statements, you move const0 to a
>> pseudo, so the RA will later use correct register. And please leave to
>> the expander to emit the pattern.
>>
>> > +(define_insn "rdpkru_2"
>> > +  [(set (match_operand:SI 0 "register_operand" "=a")
>> > +       (unspec:SI [(const_int 0)] UNSPEC_PKU))
>> > +   (clobber (reg:SI DX_REG))
>> > +   (use (reg:SI CX_REG))]
>> > +  "TARGET_PKU"
>> > +  "rdpkru"
>> > +  [(set_attr "type" "other")])
>>
>> Please do not use explicit hard registers. There are appropriate
>> single-reg constraints available for use. Without seeing the
>> documentation, I think the above should look like:
>>
>> (define_insn "*rdpkru"
>>   [(set (match_operand:SI 0 "register_operand" "=a")
>>        (unspec:SI [(match_operand:SI 1 "register_operand" "c")] UNSPEC_PKU))
>>    (clobber (rmatch_operand "register_operand "=d"))
>>   "TARGET_PKU"
>>   "rdpkru"
>>   [(set_attr "type" "other")])

According to the SDM, rdpkru moves zero to a %edx register. Let's be
precise and model this. Also, since rdpkru insn accesses hidden state
(PKRU that is not modelled properly in the pattern), it should also be
marked as unspec_volatile. I took the liberty and rewrite the i386.md
changes in the attached patch.

Your patch with the attached i386.md changes is OK for mainline, after
additional bootstrap and regression test.

Thanks,
Uros.

[-- Attachment #2: pku.diff --]
[-- Type: text/plain, Size: 1655 bytes --]

Index: i386.md
===================================================================
--- i386.md	(revision 231927)
+++ i386.md	(working copy)
@@ -268,6 +268,8 @@
   ;; For CLZERO support
   UNSPECV_CLZERO
 
+  ;; For RDPKRU and WRPKRU support
+  UNSPECV_PKU
 ])
 
 ;; Constants to represent rounding modes in the ROUND instruction
@@ -19320,6 +19322,48 @@
   [(set_attr "type" "imov")
    (set_attr "mode" "<MODE>")])
 
+;; RDPKRU and WRPKRU
+
+(define_expand "rdpkru"
+  [(parallel
+     [(set (match_operand:SI 0 "register_operand")
+	   (unspec_volatile:SI [(match_dup 1)] UNSPECV_PKU))
+      (set (match_dup 2) (const_int 0))])]
+  "TARGET_PKU"
+{
+  operands[1] = force_reg (SImode, const0_rtx);
+  operands[2] = gen_reg_rtx (SImode);
+})
+
+(define_insn "*rdpkru"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec_volatile:SI [(match_operand:SI 2 "register_operand" "c")]
+			    UNSPECV_PKU))
+   (set (match_operand:SI 1 "register_operand" "=d")
+	(const_int 0))]
+  "TARGET_PKU"
+  "rdpkru"
+  [(set_attr "type" "other")])
+
+(define_expand "wrpkru"
+  [(unspec_volatile:SI
+     [(match_operand:SI 0 "register_operand")
+      (match_dup 1) (match_dup 2)] UNSPECV_PKU)]
+  "TARGET_PKU"
+{
+  operands[1] = force_reg (SImode, const0_rtx);
+  operands[2] = force_reg (SImode, const0_rtx);
+})
+
+(define_insn "*wrpkru"
+  [(unspec_volatile:SI
+     [(match_operand:SI 0 "register_operand" "a")
+      (match_operand:SI 1 "register_operand" "d")
+      (match_operand:SI 2 "register_operand" "c")] UNSPECV_PKU)]
+  "TARGET_PKU"
+  "wrpkru"
+  [(set_attr "type" "other")])
+
 (include "mmx.md")
 (include "sse.md")
 (include "sync.md")

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2015-12-23 11:43 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-12-18  7:16 [PATCH, i386] Introduce support for PKU instructions Kirill Yukhin
2015-12-20 10:56 ` Uros Bizjak
2015-12-22 15:43   ` Kirill Yukhin
2015-12-23 11:43     ` Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).