public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH, testsuite, i386] BMI2 support for GCC
@ 2011-08-19 12:27 Kirill Yukhin
  2011-08-19 13:26 ` H.J. Lu
  0 siblings, 1 reply; 27+ messages in thread
From: Kirill Yukhin @ 2011-08-19 12:27 UTC (permalink / raw)
  To: gcc-patches List, Uros Bizjak, H.J. Lu

[-- Attachment #1: Type: text/plain, Size: 549 bytes --]

Hi guys,
I've prepared a patch which enables BMI2 extensions in GCC

It conforms (hopefully) to Spec which can be found at [1]

I am attaching following files:
 - bmi2.gcc.patch. Bunch of changes to GCC
 - ChangeLog. Entry for ChangeLog in GCC's root directory
 - ChangeLog.testsuite. Entry for ChangeLog in GCC's test suite

Bootstrap is passed
Make-check shows no new fails, my compile-time new tests are passed
Make-check under simulator causes all my new tests to pass

Is it OK for trunk?

[1] - http://software.intel.com/file/36945

Thanks, K

[-- Attachment #2: bmi2.gcc.patch --]
[-- Type: application/octet-stream, Size: 52585 bytes --]

diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c
index b201835..99643d6 100644
--- a/gcc/common/config/i386/i386-common.c
+++ b/gcc/common/config/i386/i386-common.c
@@ -79,6 +79,7 @@ along with GCC; see the file COPYING3.  If not see
   (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
 
 #define OPTION_MASK_ISA_BMI_SET OPTION_MASK_ISA_BMI
+#define OPTION_MASK_ISA_BMI2_SET OPTION_MASK_ISA_BMI2
 #define OPTION_MASK_ISA_TBM_SET OPTION_MASK_ISA_TBM
 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
@@ -137,6 +138,7 @@ along with GCC; see the file COPYING3.  If not see
 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
 #define OPTION_MASK_ISA_BMI_UNSET OPTION_MASK_ISA_BMI
+#define OPTION_MASK_ISA_BMI2_UNSET OPTION_MASK_ISA_BMI2
 #define OPTION_MASK_ISA_TBM_UNSET OPTION_MASK_ISA_TBM
 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
@@ -395,6 +397,19 @@ ix86_handle_option (struct gcc_options *opts,
 	}
       return true;
 
+    case OPT_mbmi2:
+      if (value)
+	{
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2_SET;
+	  opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI2_SET;
+	}
+      else
+	{
+	  opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_BMI2_UNSET;
+	  opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI2_UNSET;
+	}
+      return true;
+
     case OPT_mtbm:
       if (value)
 	{
diff --git a/gcc/config.gcc b/gcc/config.gcc
index b92ce3d..30cce99 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -352,7 +352,7 @@ i[34567]86-*-*)
 		       nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
 		       immintrin.h x86intrin.h avxintrin.h xopintrin.h
 		       ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h
-		       lzcntintrin.h bmiintrin.h tbmintrin.h"
+		       lzcntintrin.h bmiintrin.h bmi2intrin.h tbmintrin.h"
 	;;
 x86_64-*-*)
 	cpu_type=i386
@@ -364,7 +364,7 @@ x86_64-*-*)
 		       nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
 		       immintrin.h x86intrin.h avxintrin.h xopintrin.h
 		       ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h
-		       lzcntintrin.h bmiintrin.h tbmintrin.h"
+		       lzcntintrin.h bmiintrin.h bmi2intrin.h tbmintrin.h"
 	need_64bit_hwint=yes
 	;;
 ia64-*-*)
diff --git a/gcc/config/i386/bmi2intrin.h b/gcc/config/i386/bmi2intrin.h
new file mode 100644
index 0000000..f3ffa52
--- /dev/null
+++ b/gcc/config/i386/bmi2intrin.h
@@ -0,0 +1,81 @@
+/* Copyright (C) 2010, 2011 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _X86INTRIN_H_INCLUDED
+# error "Never use <bmi2intrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef __BMI2__
+# error "BMI2 instruction set not enabled"
+#endif /* __BMI2__ */
+
+#ifndef _BMI2INTRIN_H_INCLUDED
+#define _BMI2INTRIN_H_INCLUDED
+
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_bzhi_u32 (unsigned int __X, unsigned int __Y)
+{
+  return __builtin_ia32_bzhi_si (__X, __Y);
+}
+
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_pdep_u32 (unsigned int __X, unsigned int __Y)
+{
+  return __builtin_ia32_pdep_si (__X, __Y);
+}
+
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_pext_u32 (unsigned int __X, unsigned int __Y)
+{
+  return __builtin_ia32_pext_si (__X, __Y);
+}
+
+#ifdef  __x86_64__
+
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_bzhi_u64 (unsigned long long __X, unsigned long long __Y)
+{
+  return __builtin_ia32_bzhi_di (__X, __Y);
+}
+
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_pdep_u64 (unsigned long long __X, unsigned long long __Y)
+{
+  return __builtin_ia32_pdep_di (__X, __Y);
+}
+
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_pext_u64 (unsigned long long __X, unsigned long long __Y)
+{
+  return __builtin_ia32_pext_di (__X, __Y);
+}
+
+#endif /* __x86_64__  */
+
+#endif /* _BMI2INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index d53743f..5da8fd2 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -67,6 +67,7 @@
 #define bit_FSGSBASE	(1 << 0)
 #define bit_BMI		(1 << 3)
 #define bit_AVX2	(1 << 5)
+#define bit_BMI2	(1 << 8)
 
 #if defined(__i386__) && defined(__PIC__)
 /* %ebx may be the PIC register.  */
diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
index b7a1f52..8107ece 100644
--- a/gcc/config/i386/driver-i386.c
+++ b/gcc/config/i386/driver-i386.c
@@ -396,7 +396,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
   unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0, has_avx2 = 0;
   unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0;
   unsigned int has_fma = 0, has_fma4 = 0, has_xop = 0;
-  unsigned int has_bmi = 0, has_tbm = 0, has_lzcnt = 0;
+  unsigned int has_bmi = 0, has_bmi2 = 0, has_tbm = 0, has_lzcnt = 0;
 
   bool arch;
 
@@ -475,6 +475,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
 
       has_bmi = ebx & bit_BMI;
       has_avx2 = ebx & bit_AVX2;
+      has_bmi2 = ebx & bit_BMI2;
     }
 
   if (!arch)
@@ -715,6 +716,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
       const char *fma4 = has_fma4 ? " -mfma4" : " -mno-fma4";
       const char *xop = has_xop ? " -mxop" : " -mno-xop";
       const char *bmi = has_bmi ? " -mbmi" : " -mno-bmi";
+      const char *bmi2 = has_bmi2 ? " -mbmi2" : " -mno-bmi2";
       const char *tbm = has_tbm ? " -mtbm" : " -mno-tbm";
       const char *avx = has_avx ? " -mavx" : " -mno-avx";
       const char *avx2 = has_avx2 ? " -mavx2" : " -mno-avx2";
@@ -723,8 +725,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
       const char *lzcnt = has_lzcnt ? " -mlzcnt" : " -mno-lzcnt";
 
       options = concat (options, cx16, sahf, movbe, ase, pclmul,
-			popcnt, abm, lwp, fma, fma4, xop, bmi, tbm,
-			avx2, avx, sse4_2, sse4_1, lzcnt, NULL);
+			popcnt, abm, lwp, fma, fma4, xop, bmi, bmi2,
+			tbm, avx, avx2, sse4_2, sse4_1, lzcnt, NULL);
     }
 
 done:
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 5c1dfe6..d4b0b08 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -273,6 +273,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
     def_or_undef (parse_in, "__ABM__");
   if (isa_flag & OPTION_MASK_ISA_BMI)
     def_or_undef (parse_in, "__BMI__");
+  if (isa_flag & OPTION_MASK_ISA_BMI2)
+    def_or_undef (parse_in, "__BMI2__");
   if (isa_flag & OPTION_MASK_ISA_LZCNT)
     def_or_undef (parse_in, "__LZCNT__");
   if (isa_flag & OPTION_MASK_ISA_TBM)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 53c5944..e8f229a 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2664,6 +2664,7 @@ ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
     { "-mmmx",		OPTION_MASK_ISA_MMX },
     { "-mabm",		OPTION_MASK_ISA_ABM },
     { "-mbmi",		OPTION_MASK_ISA_BMI },
+    { "-mbmi2", 	OPTION_MASK_ISA_BMI2 },
     { "-mlzcnt",	OPTION_MASK_ISA_LZCNT },
     { "-mtbm",		OPTION_MASK_ISA_TBM },
     { "-mpopcnt",	OPTION_MASK_ISA_POPCNT },
@@ -2921,6 +2922,7 @@ ix86_option_override_internal (bool main_args_p)
 #define PTA_TBM		 	(HOST_WIDE_INT_1 << 28)
 #define PTA_XOP		 	(HOST_WIDE_INT_1 << 29)
 #define PTA_AVX2		(HOST_WIDE_INT_1 << 30)
+#define PTA_BMI2	 	(HOST_WIDE_INT_1 << 31)
 /* if this reaches 64, need to widen struct pta flags below */
 
   static struct pta
@@ -2978,8 +2980,8 @@ ix86_option_override_internal (bool main_args_p)
 	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
 	| PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX | PTA_AVX2
 	| PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
-	| PTA_RDRND | PTA_F16C | PTA_BMI | PTA_LZCNT | PTA_FMA
-	| PTA_MOVBE},
+	| PTA_RDRND | PTA_F16C | PTA_BMI | PTA_BMI2 | PTA_LZCNT
+        | PTA_FMA | PTA_MOVBE},
       {"atom", PROCESSOR_ATOM, CPU_ATOM,
 	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
 	| PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
@@ -3300,6 +3302,9 @@ ix86_option_override_internal (bool main_args_p)
 	if (processor_alias_table[i].flags & PTA_TBM
 	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
 	  ix86_isa_flags |= OPTION_MASK_ISA_TBM;
+	if (processor_alias_table[i].flags & PTA_BMI2
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
+	  ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
 	if (processor_alias_table[i].flags & PTA_CX16
 	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
 	  ix86_isa_flags |= OPTION_MASK_ISA_CX16;
@@ -4053,6 +4058,7 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
     IX86_ATTR_ISA ("3dnow",	OPT_m3dnow),
     IX86_ATTR_ISA ("abm",	OPT_mabm),
     IX86_ATTR_ISA ("bmi",	OPT_mbmi),
+    IX86_ATTR_ISA ("bmi2",	OPT_mbmi2),
     IX86_ATTR_ISA ("lzcnt",	OPT_mlzcnt),
     IX86_ATTR_ISA ("tbm",	OPT_mtbm),
     IX86_ATTR_ISA ("aes",	OPT_maes),
@@ -13285,6 +13291,7 @@ put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
    If CODE is 't', pretend the mode is V8SFmode.
    If CODE is 'h', pretend the reg is the 'high' byte register.
    If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
+   If CODE is 'N', print the half mode high register.
    If CODE is 'd', duplicate the operand for AVX instruction.
  */
 
@@ -13294,6 +13301,15 @@ print_reg (rtx x, int code, FILE *file)
   const char *reg;
   bool duplicated = code == 'd' && TARGET_AVX;
 
+  if (code == 'N')
+    {
+      enum machine_mode mode = GET_MODE (x);
+      enum machine_mode half_mode = mode == TImode ? DImode : SImode;
+      x = simplify_gen_subreg (half_mode, x, mode,
+			       GET_MODE_SIZE (half_mode));
+      code = 0;
+    }
+
   gcc_assert (x == pc_rtx
 	      || (REGNO (x) != ARG_POINTER_REGNUM
 		  && REGNO (x) != FRAME_POINTER_REGNUM
@@ -13472,6 +13488,7 @@ get_some_local_dynamic_name (void)
    t --  likewise, print the V8SFmode name of the register.
    h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
    y -- print "st(0)" instead of "st" as a register.
+   N -- print the half mode high register.
    d -- print duplicated register operand for AVX instruction.
    D -- print condition for SSE cmp instruction.
    P -- if PIC, print an @PLT suffix.
@@ -13678,6 +13695,7 @@ ix86_print_operand (FILE *file, rtx x, int code)
 	case 'h':
 	case 't':
 	case 'y':
+	case 'N':
 	case 'x':
 	case 'X':
 	case 'P':
@@ -15745,8 +15763,20 @@ ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
     }
   else
     {
-      clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
-      emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
+      rtx insn;
+      if (code == ROTATERT
+	  && TARGET_BMI2
+	  && !optimize_function_for_size_p (cfun)
+	  && ((mode == SImode) || (mode == DImode && TARGET_64BIT))
+	  && CONST_INT_P (src2) ) {
+        /* We generatin RORX instruction, freedom of register +
+	   flags not affected  */
+	insn = op;
+      } else {
+	clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
+	insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob));
+      }
+      emit_insn (insn);
     }
 
   /* Fix up the destination if needed.  */
@@ -24072,6 +24102,13 @@ enum ix86_builtins
   IX86_BUILTIN_BEXTRI32,
   IX86_BUILTIN_BEXTRI64,
 
+  /* BMI2 instructions. */
+  IX86_BUILTIN_BZHI32,
+  IX86_BUILTIN_BZHI64,
+  IX86_BUILTIN_PDEP32,
+  IX86_BUILTIN_PDEP64,
+  IX86_BUILTIN_PEXT32,
+  IX86_BUILTIN_PEXT64,
 
   /* FSGSBASE instructions.  */
   IX86_BUILTIN_RDFSBASE32,
@@ -25046,6 +25083,14 @@ static const struct builtin_description bdesc_args[] =
   { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
   { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
   { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
+
+  /* BMI2 */
+  { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
+  { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
+  { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
+  { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
+  { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
+  { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
 };
 
 /* FMA4 and XOP.  */
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index f8a35ba..47442a0 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -62,6 +62,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define TARGET_ROUND	OPTION_ISA_ROUND
 #define TARGET_ABM	OPTION_ISA_ABM
 #define TARGET_BMI	OPTION_ISA_BMI
+#define TARGET_BMI2	OPTION_ISA_BMI2
 #define TARGET_LZCNT	OPTION_ISA_LZCNT
 #define TARGET_TBM	OPTION_ISA_TBM
 #define TARGET_POPCNT	OPTION_ISA_POPCNT
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index e7ae397..3fa6b5e 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -236,6 +236,11 @@
 
   ;; For RDRAND support
   UNSPEC_RDRAND
+
+  ;; For BMI2 support
+  UNSPEC_BZHI
+  UNSPEC_PDEP
+  UNSPEC_PEXT
 ])
 
 (define_c_enum "unspecv" [
@@ -751,14 +756,17 @@
 ;; Base name for insn mnemonic.
 (define_code_attr logic [(and "and") (ior "or") (xor "xor")])
 
+;; Mapping of shift operators
+(define_code_iterator any_shift [ashift lshiftrt ashiftrt])
+
 ;; Mapping of shift-right operators
 (define_code_iterator any_shiftrt [lshiftrt ashiftrt])
 
 ;; Base name for define_insn
-(define_code_attr shiftrt_insn [(lshiftrt "lshr") (ashiftrt "ashr")])
+(define_code_attr shift_insn [(ashift "ashl") (lshiftrt "lshr") (ashiftrt "ashr")])
 
 ;; Base name for insn mnemonic.
-(define_code_attr shiftrt [(lshiftrt "shr") (ashiftrt "sar")])
+(define_code_attr shift [(ashift "shl") (lshiftrt "shr") (ashiftrt "sar")])
 
 ;; Mapping of rotate operators
 (define_code_iterator any_rotate [rotate rotatert])
@@ -777,6 +785,8 @@
 
 ;; Used in signed and unsigned widening multiplications.
 (define_code_iterator any_extend [sign_extend zero_extend])
+(define_code_attr any_extend [(sign_extend "SIGN_EXTEND")
+			      (zero_extend "ZERO_EXTEND")])
 
 ;; Various insn prefixes for signed and unsigned operations.
 (define_code_attr u [(sign_extend "") (zero_extend "u")
@@ -6837,7 +6847,17 @@
 		       (match_operand:DWIH 1 "nonimmediate_operand" ""))
 		     (any_extend:<DWI>
 		       (match_operand:DWIH 2 "register_operand" ""))))
-	      (clobber (reg:CC FLAGS_REG))])])
+	      (clobber (reg:CC FLAGS_REG))])]
+  ""
+{
+  if (TARGET_BMI2 && <any_extend> == ZERO_EXTEND)
+    {
+      emit_insn (gen_bmi2_umul<mode><dwi>3_1 (operands[0],
+					      operands[1],
+					      operands[2]));
+      DONE;
+    }
+})
 
 (define_expand "<u>mulqihi3"
   [(parallel [(set (match_operand:HI 0 "register_operand" "")
@@ -6849,6 +6869,24 @@
 	      (clobber (reg:CC FLAGS_REG))])]
   "TARGET_QIMODE_MATH")
 
+(define_insn "bmi2_umul<mode><dwi>3_1"
+  [(set (match_operand:<DWI> 0 "register_operand" "=r")
+	(mult:<DWI>
+	  (zero_extend:<DWI>
+	    (match_operand:DWIH 1 "register_operand" "d"))
+	  (zero_extend:<DWI>
+	    (match_operand:DWIH 2 "nonimmediate_operand" "rm"))))]
+  "TARGET_BMI2"
+{
+  if (<MODE>mode == DImode)
+    return "mulx\t{%2, %q0, %N0|%N0, %q0, %2}";
+  else
+    return "mulx\t{%2, %k0, %N0|%N0, %k0, %2}";
+}
+  [(set_attr "type" "imul")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*<u>mul<mode><dwi>3_1"
   [(set (match_operand:<DWI> 0 "register_operand" "=A")
 	(mult:<DWI>
@@ -9587,15 +9625,43 @@
 
 ;; See comment above `ashl<mode>3' about how this works.
 
-(define_expand "<shiftrt_insn><mode>3"
+(define_expand "<shift_insn><mode>3"
   [(set (match_operand:SDWIM 0 "<shift_operand>" "")
 	(any_shiftrt:SDWIM (match_operand:SDWIM 1 "<shift_operand>" "")
 			   (match_operand:QI 2 "nonmemory_operand" "")))]
   ""
   "ix86_expand_binary_operator (<CODE>, <MODE>mode, operands); DONE;")
 
+;; Update pattern if BMI2 is available
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand" "")
+	(any_shift:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "")
+	  (subreg:QI
+	      (match_operand:SI 2 "register_operand" "") 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && !reload_completed"
+  [(set (match_dup 0)
+        (any_shift:SWI48 (match_dup 1) (match_dup 2)))]
+{
+  if (can_create_pseudo_p () && <MODE>mode != SImode) {
+    rtx tmp = gen_rtx_REG (<MODE>mode, 0);
+    emit_insn (gen_extendsidi2 (tmp, operands[2]));
+    operands[2] = tmp;
+  }
+})
+
+(define_insn "*bmi2_<shift_insn><mode>3"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r")
+	(any_shift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+			 (match_operand:SWI48 2 "register_operand" "r")))]
+  "TARGET_BMI2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "<shift>x\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "mode" "<MODE>")])
+
 ;; Avoid useless masking of count operand.
-(define_insn_and_split "*<shiftrt_insn><mode>3_mask"
+(define_insn_and_split "*<shift_insn><mode>3_mask"
   [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm")
 	(any_shiftrt:SWI48
 	  (match_operand:SWI48 1 "nonimmediate_operand" "0")
@@ -9621,7 +9687,7 @@
   [(set_attr "type" "ishift")
    (set_attr "mode" "<MODE>")])
 
-(define_insn_and_split "*<shiftrt_insn><mode>3_doubleword"
+(define_insn_and_split "*<shift_insn><mode>3_doubleword"
   [(set (match_operand:DWI 0 "register_operand" "=r")
 	(any_shiftrt:DWI (match_operand:DWI 1 "register_operand" "0")
 			 (match_operand:QI 2 "nonmemory_operand" "<S>c")))
@@ -9630,7 +9696,7 @@
   "#"
   "(optimize && flag_peephole2) ? epilogue_completed : reload_completed"
   [(const_int 0)]
-  "ix86_split_<shiftrt_insn> (operands, NULL_RTX, <MODE>mode); DONE;"
+  "ix86_split_<shift_insn> (operands, NULL_RTX, <MODE>mode); DONE;"
   [(set_attr "type" "multi")])
 
 ;; By default we don't ask for a scratch register, because when DWImode
@@ -9647,7 +9713,7 @@
    (match_dup 3)]
   "TARGET_CMOVE"
   [(const_int 0)]
-  "ix86_split_<shiftrt_insn> (operands, operands[3], <DWI>mode); DONE;")
+  "ix86_split_<shift_insn> (operands, operands[3], <DWI>mode); DONE;")
 
 (define_insn "x86_64_shrd"
   [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
@@ -9763,7 +9829,7 @@
   DONE;
 })
 
-(define_insn "*<shiftrt_insn><mode>3_1"
+(define_insn "*<shift_insn><mode>3_1"
   [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
 	(any_shiftrt:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")
 			 (match_operand:QI 2 "nonmemory_operand" "c<S>")))
@@ -9772,9 +9838,9 @@
 {
   if (operands[2] == const1_rtx
       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-    return "<shiftrt>{<imodesuffix>}\t%0";
+    return "<shift>{<imodesuffix>}\t%0";
   else
-    return "<shiftrt>{<imodesuffix>}\t{%2, %0|%0, %2}";
+    return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
 }
   [(set_attr "type" "ishift")
    (set (attr "length_immediate")
@@ -9786,7 +9852,7 @@
        (const_string "*")))
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*<shiftrt_insn>si3_1_zext"
+(define_insn "*<shift_insn>si3_1_zext"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extend:DI
 	  (any_shiftrt:SI (match_operand:SI 1 "register_operand" "0")
@@ -9796,9 +9862,9 @@
 {
   if (operands[2] == const1_rtx
       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-    return "<shiftrt>{l}\t%k0";
+    return "<shift>{l}\t%k0";
   else
-    return "<shiftrt>{l}\t{%2, %k0|%k0, %2}";
+    return "<shift>{l}\t{%2, %k0|%k0, %2}";
 }
   [(set_attr "type" "ishift")
    (set (attr "length_immediate")
@@ -9810,7 +9876,7 @@
        (const_string "*")))
    (set_attr "mode" "SI")])
 
-(define_insn "*<shiftrt_insn>qi3_1_slp"
+(define_insn "*<shift_insn>qi3_1_slp"
   [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
 	(any_shiftrt:QI (match_dup 0)
 			(match_operand:QI 1 "nonmemory_operand" "cI")))
@@ -9822,9 +9888,9 @@
 {
   if (operands[1] == const1_rtx
       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-    return "<shiftrt>{b}\t%0";
+    return "<shift>{b}\t%0";
   else
-    return "<shiftrt>{b}\t{%1, %0|%0, %1}";
+    return "<shift>{b}\t{%1, %0|%0, %1}";
 }
   [(set_attr "type" "ishift1")
    (set (attr "length_immediate")
@@ -9839,7 +9905,7 @@
 ;; This pattern can't accept a variable shift count, since shifts by
 ;; zero don't affect the flags.  We assume that shifts by constant
 ;; zero are optimized away.
-(define_insn "*<shiftrt_insn><mode>3_cmp"
+(define_insn "*<shift_insn><mode>3_cmp"
   [(set (reg FLAGS_REG)
 	(compare
 	  (any_shiftrt:SWI
@@ -9857,9 +9923,9 @@
 {
   if (operands[2] == const1_rtx
       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-    return "<shiftrt>{<imodesuffix>}\t%0";
+    return "<shift>{<imodesuffix>}\t%0";
   else
-    return "<shiftrt>{<imodesuffix>}\t{%2, %0|%0, %2}";
+    return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
 }
   [(set_attr "type" "ishift")
    (set (attr "length_immediate")
@@ -9871,7 +9937,7 @@
        (const_string "*")))
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*<shiftrt_insn>si3_cmp_zext"
+(define_insn "*<shift_insn>si3_cmp_zext"
   [(set (reg FLAGS_REG)
 	(compare
 	  (any_shiftrt:SI (match_operand:SI 1 "register_operand" "0")
@@ -9889,9 +9955,9 @@
 {
   if (operands[2] == const1_rtx
       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-    return "<shiftrt>{l}\t%k0";
+    return "<shift>{l}\t%k0";
   else
-    return "<shiftrt>{l}\t{%2, %k0|%k0, %2}";
+    return "<shift>{l}\t{%2, %k0|%k0, %2}";
 }
   [(set_attr "type" "ishift")
    (set (attr "length_immediate")
@@ -9903,7 +9969,7 @@
        (const_string "*")))
    (set_attr "mode" "SI")])
 
-(define_insn "*<shiftrt_insn><mode>3_cconly"
+(define_insn "*<shift_insn><mode>3_cconly"
   [(set (reg FLAGS_REG)
 	(compare
 	  (any_shiftrt:SWI
@@ -9919,9 +9985,9 @@
 {
   if (operands[2] == const1_rtx
       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-    return "<shiftrt>{<imodesuffix>}\t%0";
+    return "<shift>{<imodesuffix>}\t%0";
   else
-    return "<shiftrt>{<imodesuffix>}\t{%2, %0|%0, %2}";
+    return "<shift>{<imodesuffix>}\t{%2, %0|%0, %2}";
 }
   [(set_attr "type" "ishift")
    (set (attr "length_immediate")
@@ -10060,6 +10126,15 @@
   split_double_mode (<DWI>mode, &operands[0], 1, &operands[4], &operands[5]);
 })
 
+(define_insn "*bmi2_rorx<mode>3_1"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=r")
+	(rotatert:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+			(match_operand:QI 2 "immediate_operand" "c<S>")))]
+  "TARGET_BMI2 && ix86_binary_operator_ok (ROTATERT, <MODE>mode, operands)"
+  "rorx\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "rotate")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*<rotate_insn><mode>3_1"
   [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
 	(any_rotate:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")
@@ -12346,6 +12421,42 @@
   "xor{b}\t{%h0, %b0|%b0, %h0}"
   [(set_attr "length" "2")
    (set_attr "mode" "HI")])
+
+;; BMI2 instructions.
+(define_insn "bmi2_bzhi_<mode>3"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")
+                       (match_operand:SWI48 2 "register_operand" "r")]
+                       UNSPEC_BZHI))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI2"
+  "bzhi\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "bmi2_pdep_<mode>3"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")
+                       (match_operand:SWI48 2 "register_operand" "r")]
+                       UNSPEC_PDEP))]
+  "TARGET_BMI2"
+  "pdep\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "bmi2_pext_<mode>3"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")
+                       (match_operand:SWI48 2 "register_operand" "r")]
+                       UNSPEC_PEXT))]
+  "TARGET_BMI2"
+  "pext\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
 \f
 ;; Thread-local storage patterns for ELF.
 ;;
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 54d7af1..8e4d51b 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -493,6 +493,10 @@ mbmi
 Target Report Mask(ISA_BMI) Var(ix86_isa_flags) Save
 Support BMI built-in functions and code generation
 
+mbmi2
+Target Report Mask(ISA_BMI2) Var(ix86_isa_flags) Save
+Support BMI2 built-in functions and code generation
+
 mlzcnt
 Target Report Mask(ISA_LZCNT) Var(ix86_isa_flags) Save
 Support LZCNT built-in function and code generation
diff --git a/gcc/config/i386/x86intrin.h b/gcc/config/i386/x86intrin.h
index 88456f9..e01ecd2 100644
--- a/gcc/config/i386/x86intrin.h
+++ b/gcc/config/i386/x86intrin.h
@@ -81,6 +81,10 @@
 #include <bmiintrin.h>
 #endif
 
+#ifdef __BMI2__
+#include <bmi2intrin.h>
+#endif
+
 #ifdef __TBM__
 #include <tbmintrin.h>
 #endif
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 786c18d..1900276 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -9693,6 +9693,17 @@ unsigned int __builtin_ia32_bextr_u32(unsigned int, unsigned int);
 unsigned long long __builtin_ia32_bextr_u64 (unsigned long long, unsigned long long);
 @end smallexample
 
+The following built-in functions are available when @option{-mbmi2} is used.
+All of them generate the machine instruction that is part of the name.
+@smallexample
+unsigned int _bzhi_u32 (unsigned int, unsigned int)
+unsigned int _pdep_u32 (unsigned int, unsigned int)
+unsigned int _pext_u32 (unsigned int, unsigned int)
+unsigned long long _bzhi_u64 (unsigned long long, unsigned long long)
+unsigned long long _pdep_u64 (unsigned long long, unsigned long long)
+unsigned long long _pext_u64 (unsigned long long, unsigned long long)
+@end smallexample
+
 The following built-in functions are available when @option{-mlzcnt} is used.
 All of them generate the machine instruction that is part of the name.
 @smallexample
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index fdc3297..acf30e3 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -607,7 +607,7 @@ Objective-C and Objective-C++ Dialects}.
 -mmmx  -msse  -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx @gol
 -mavx2 -maes -mpclmul -mfsgsbase -mrdrnd -mf16c -mfma @gol
 -msse4a -m3dnow -mpopcnt -mabm -mbmi -mtbm -mfma4 -mxop -mlzcnt @gol
--mlwp -mthreads  -mno-align-stringops  -minline-all-stringops @gol
+-mbmi2 -mlwp -mthreads  -mno-align-stringops  -minline-all-stringops @gol
 -minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol
 -mpush-args  -maccumulate-outgoing-args  -m128bit-long-double @gol
 -m96bit-long-double  -mregparm=@var{num}  -msseregparm @gol
@@ -12697,7 +12697,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
 @itemx -mabm
 @itemx -mno-abm
 @itemx -mbmi
+@itemx -mbmi2
 @itemx -mno-bmi
+@itemx -mno-bmi2
 @itemx -mlzcnt
 @itemx -mno-lzcnt
 @itemx -mtbm
@@ -12709,8 +12711,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
 @opindex m3dnow
 @opindex mno-3dnow
 These switches enable or disable the use of instructions in the MMX, SSE,
-SSE2, SSE3, SSSE3, SSE4.1, AVX, AVX2, AES, PCLMUL, FSGSBASE, RDRND, F16C, FMA,
-SSE4A, FMA4, XOP, LWP, ABM, BMI, LZCNT or 3DNow!@: extended instruction sets.
+SSE2, SSE3, SSSE3, SSE4.1, AVX, AVX2, AES, PCLMUL, FSGSBASE, RDRND, F16C,
+FMA, SSE4A, FMA4, XOP, LWP, ABM, BMI, BMI2, LZCNT or 3DNow!
+@: extended instruction sets.
 These extensions are also available as built-in functions: see
 @ref{X86 Built-in Functions}, for details of the functions enabled and
 disabled by these switches.
diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C
index ed183c7..5f2eaf9 100644
--- a/gcc/testsuite/g++.dg/other/i386-2.C
+++ b/gcc/testsuite/g++.dg/other/i386-2.C
@@ -1,5 +1,5 @@
 /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
+/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
 
 /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
    xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C
index 626f972..76d4d19 100644
--- a/gcc/testsuite/g++.dg/other/i386-3.C
+++ b/gcc/testsuite/g++.dg/other/i386-3.C
@@ -1,5 +1,5 @@
 /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
+/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
 
 /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
    xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-bzhi32-1.c b/gcc/testsuite/gcc.target/i386/bmi2-bzhi32-1.c
new file mode 100644
index 0000000..aae2353
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-bzhi32-1.c
@@ -0,0 +1,35 @@
+/* { dg-do run { target { bmi2 } } } */
+/* { dg-options "-mbmi2 -O2" } */
+
+#include <x86intrin.h>
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned
+calc_bzhi_u32 (unsigned a, int l)
+{
+  unsigned res = a;
+  int i;
+  for (i=0; i<32-l; ++i)
+    res &= ~(1 << (31 - i));
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  unsigned src = 0xce7ace0f;
+  unsigned res, res_ref;
+
+  for (i=0; i<5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_bzhi_u32 (src, i * 2);
+    res = _bzhi_u32 (src, i * 2);
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-bzhi32-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-bzhi32-1a.c
new file mode 100644
index 0000000..79e47a4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-bzhi32-1a.c
@@ -0,0 +1,5 @@
+/* { dg-do compile } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+#include "bmi2-bzhi32-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_bzhi_si3" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-bzhi64-1.c b/gcc/testsuite/gcc.target/i386/bmi2-bzhi64-1.c
new file mode 100644
index 0000000..8db29db
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-bzhi64-1.c
@@ -0,0 +1,35 @@
+/* { dg-do run { target { bmi2 && { ! ia32 } } } } */
+/* { dg-options "-mbmi2 -O2" } */
+
+#include <x86intrin.h>
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned long long
+calc_bzhi_u64 (unsigned long long a, int l)
+{
+  unsigned long long res = a;
+  int i;
+  for (i=0; i<64-l; ++i)
+    res &= ~(1LL << (63 - i));
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  unsigned long long src = 0xce7ace0ce7ace0ff;
+  unsigned long long res, res_ref;
+
+  for (i=0; i<5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_bzhi_u64 (src, i * 2);
+    res = _bzhi_u64 (src, i * 2);
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-bzhi64-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-bzhi64-1a.c
new file mode 100644
index 0000000..dc4a94c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-bzhi64-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-bzhi64-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_bzhi_di3" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-check.h b/gcc/testsuite/gcc.target/i386/bmi2-check.h
new file mode 100644
index 0000000..5ffce44
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-check.h
@@ -0,0 +1,36 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "cpuid.h"
+
+static void bmi2_test (void);
+
+static void
+__attribute__ ((noinline))
+do_test (void)
+{
+  bmi2_test ();
+}
+
+int
+main ()
+{
+  unsigned int eax, ebx, ecx, edx;
+
+  __cpuid_count (7, 0,  eax, ebx, ecx, edx);
+
+  /* Run BMI2 test only if host has BMI2 support.  */
+  if (ebx & bit_BMI2)
+    {
+      do_test ();
+#ifdef DEBUG
+      printf ("PASSED\n");
+#endif
+    }
+#ifdef DEBUG
+  else
+    printf ("SKIPPED\n");
+#endif
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-mulx32-1.c b/gcc/testsuite/gcc.target/i386/bmi2-mulx32-1.c
new file mode 100644
index 0000000..e1d49de
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-mulx32-1.c
@@ -0,0 +1,37 @@
+/* { dg-do run { target { bmi2 && { ia32 } } } } */
+/* { dg-options "-mbmi2 -Ofast" } */
+
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned long long
+calc_mul_u32 (unsigned a, unsigned b)
+{
+  unsigned long long res = 0;
+  volatile unsigned dummy = 0;
+  int i;
+  for (i=0; i<b; ++i)
+    res += (unsigned long long)(dummy? 0 : a);
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  unsigned a = 0xce7ace0;
+  unsigned b = 0xfacefff;
+  unsigned long long res, res_ref;
+
+  for (i=0; i<5; ++i) {
+    a = a * (i + 1);
+    b = b / (i + 1);
+
+    res_ref = calc_mul_u32 (a, b);
+    res = (unsigned long long)a * b;
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-mulx32-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-mulx32-1a.c
new file mode 100644
index 0000000..cf3bb08
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-mulx32-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ia32 } } } */
+/* { dg-options "-O2 -mbmi2 -dp" } */
+
+#include "bmi2-mulx32-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_umulsidi3_1" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-mulx64-1.c b/gcc/testsuite/gcc.target/i386/bmi2-mulx64-1.c
new file mode 100644
index 0000000..ded3dc1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-mulx64-1.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { bmi2 && { ! ia32 } } } } */
+/* { dg-options "-mbmi2 -Ofast" } */
+
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned __int128
+calc_mul_u64 (unsigned long long a, unsigned long long b)
+{
+  unsigned __int128 res = 0;
+  volatile unsigned dummy = 0;
+  int i;
+  for (i=0; i<b; ++i) {
+    /* Block loop opts  */
+    res += (unsigned __int128)(dummy? 0 : a);
+  }
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  unsigned long long a = 0xce7ace0ce7ace0;
+  unsigned long long b = 0xface;
+  unsigned __int128 res, res_ref;
+
+  for (i=0; i<5; ++i) {
+    a = a * (i + 1);
+    b = b / (i + 1);
+
+    res_ref = calc_mul_u64 (a, b);
+    res = (unsigned __int128)a * b;
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-mulx64-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-mulx64-1a.c
new file mode 100644
index 0000000..592d713
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-mulx64-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mbmi2 -dp" } */
+
+#include "bmi2-mulx64-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_umulditi3_1" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-pdep32-1.c b/gcc/testsuite/gcc.target/i386/bmi2-pdep32-1.c
new file mode 100644
index 0000000..e44a968
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-pdep32-1.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { bmi2 } } } */
+/* { dg-options "-mbmi2 -O2" } */
+
+#include <x86intrin.h>
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned
+calc_pdep_u32 (unsigned a, int mask)
+{
+  unsigned res = 0;
+  int i, k = 0;
+
+  for (i=0; i<32; ++i)
+    if (mask & (1 << i)) {
+      res |= ((a & (1 << k)) >> k) << i;
+      ++k;
+    }
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  unsigned src = 0xce7acc;
+  unsigned res, res_ref;
+
+  for (i=0; i<5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_pdep_u32 (src, i*3);
+    res = _pdep_u32 (src, i*3);
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-pdep32-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-pdep32-1a.c
new file mode 100644
index 0000000..87888fc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-pdep32-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-pdep32-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_pdep_si3" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-pdep64-1.c b/gcc/testsuite/gcc.target/i386/bmi2-pdep64-1.c
new file mode 100644
index 0000000..c0074fc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-pdep64-1.c
@@ -0,0 +1,38 @@
+/* { dg-do run { target { bmi2 && { ! ia32 } } } } */
+/* { dg-options "-mbmi2 -O2" } */
+
+#include <x86intrin.h>
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned long long
+calc_pdep_u64 (unsigned long long a, unsigned long long mask)
+{
+  unsigned long long res = 0;
+  unsigned long long i, k = 0;
+
+  for (i=0; i<64; ++i)
+    if (mask & (1LL << i)) {
+      res |= ((a & (1LL << k)) >> k) << i;
+      ++k;
+    }
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned long long i;
+  unsigned long long src = 0xce7acce7acce7ac;
+  unsigned long long res, res_ref;
+
+  for (i=0; i<5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_pdep_u64 (src, ~(i * 3));
+    res = _pdep_u64 (src, ~(i * 3));
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-pdep64-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-pdep64-1a.c
new file mode 100644
index 0000000..8163c40
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-pdep64-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-pdep64-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_pdep_di3" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-pext32-1.c b/gcc/testsuite/gcc.target/i386/bmi2-pext32-1.c
new file mode 100644
index 0000000..f21029f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-pext32-1.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { bmi2 } } } */
+/* { dg-options "-mbmi2 -O2" } */
+
+#include <x86intrin.h>
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned
+calc_pext_u32 (unsigned a, unsigned mask)
+{
+  unsigned res = 0;
+  int i, k = 0;
+
+  for (i=0; i<32; ++i)
+    if (mask & (1 << i)) {
+      res |= ((a & (1 << i)) >> i) << k;
+      ++k;
+    }
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  unsigned src = 0xce7acc;
+  unsigned res, res_ref;
+
+  for (i=0; i<5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_pext_u32 (src, ~(i * 3));
+    res = _pext_u32 (src, ~(i * 3));
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-pext32-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-pext32-1a.c
new file mode 100644
index 0000000..c4a6dee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-pext32-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-pext32-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_pext_si3" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-pext64-1.c b/gcc/testsuite/gcc.target/i386/bmi2-pext64-1.c
new file mode 100644
index 0000000..bad0584
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-pext64-1.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { bmi2 && { ! ia32 } } } } */
+/* { dg-options "-mbmi2 -O2" } */
+
+#include <x86intrin.h>
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned long long
+calc_pext_u64 (unsigned long long a, unsigned long long mask)
+{
+  unsigned long long res = 0;
+  int i, k = 0;
+
+  for (i=0; i<64; ++i)
+    if (mask & (1LL << i)) {
+      res |= ((a & (1LL << i)) >> i) << k;
+      ++k;
+    }
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned long long i;
+  unsigned long long src = 0xce7acce7acce7ac;
+  unsigned long long res, res_ref;
+
+  for (i=0; i<5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_pext_u64 (src, ~(i * 3));
+    res = _pext_u64 (src, ~(i * 3));
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-pext64-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-pext64-1a.c
new file mode 100644
index 0000000..aaf06c1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-pext64-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-pext64-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_pext_di3" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-rorx32-1.c b/gcc/testsuite/gcc.target/i386/bmi2-rorx32-1.c
new file mode 100644
index 0000000..84618e3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-rorx32-1.c
@@ -0,0 +1,36 @@
+/* { dg-do run { target { bmi2 } } } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned
+calc_rorx_u32 (unsigned a, int l)
+{
+  unsigned res = a;
+  int i;
+  for (i=0; i<l; ++i)
+    res = (res >> 1) | ((res&1)<< 31);
+
+  return res;
+}
+
+#define SHIFT_VAL 0x0e
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  unsigned src = 0xce7ace0;
+  unsigned res, res_ref;
+
+  for (i=0; i<5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_rorx_u32 (src, SHIFT_VAL);
+    res = (src >> SHIFT_VAL) | (src << (32 - SHIFT_VAL));
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-rorx32-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-rorx32-1a.c
new file mode 100644
index 0000000..bb3b28d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-rorx32-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbmi2 -dp" } */
+
+#include "bmi2-rorx32-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_rorxsi3_1" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-rorx64-1.c b/gcc/testsuite/gcc.target/i386/bmi2-rorx64-1.c
new file mode 100644
index 0000000..7dc722a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-rorx64-1.c
@@ -0,0 +1,36 @@
+/* { dg-do run { target { bmi2 && { ! ia32 } } } } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned long long
+calc_rorx_u64 (unsigned long long a, int l)
+{
+  unsigned long long res = a;
+  int i;
+  for (i=0; i<l; ++i)
+    res = (res >> 1) | ((res&1)<< 63);
+
+  return res;
+}
+
+#define SHIFT_VAL 0x1e
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  unsigned long long src = 0xce7ace0ce7ace0;
+  unsigned long long res, res_ref;
+
+  for (i=0; i<5; ++i) {
+    src = src * (i+1);
+
+    res_ref = calc_rorx_u64 (src, SHIFT_VAL);
+    res = (src >> SHIFT_VAL) | (src << (64 - SHIFT_VAL));
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-rorx64-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-rorx64-1a.c
new file mode 100644
index 0000000..2a7a7a0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-rorx64-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mbmi2 -dp" } */
+
+#include "bmi2-rorx64-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_rorxdi3_1" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-sarx32-1.c b/gcc/testsuite/gcc.target/i386/bmi2-sarx32-1.c
new file mode 100644
index 0000000..2bbf016
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-sarx32-1.c
@@ -0,0 +1,36 @@
+/* { dg-do run { target { bmi2 } } } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+int
+calc_sarx_u32 (int a, int l)
+{
+  int res = a;
+  int i;
+  for (i=0; i<l; ++i)
+    res >>= 1;
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  int src = 0xfce7ace0;
+  int res, res_ref;
+
+  for (i=0; i<5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_sarx_u32 (src, i + 1);
+    res = src >> (i + 1);
+
+    printf ("%x %x\n", res_ref, res);
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-sarx32-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-sarx32-1a.c
new file mode 100644
index 0000000..f10d60b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-sarx32-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbmi2 -dp" } */
+
+#include "bmi2-sarx32-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_ashrsi3" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-sarx64-1.c b/gcc/testsuite/gcc.target/i386/bmi2-sarx64-1.c
new file mode 100644
index 0000000..0bb13c0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-sarx64-1.c
@@ -0,0 +1,34 @@
+/* { dg-do run { target { bmi2 && { ! ia32 } } } } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+long long
+calc_sarx_u64 (long long a, int l)
+{
+  long long res = a;
+  int i;
+  for (i=0; i<l; ++i)
+    res >>= 1;
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  long long src = 0xfce7ace0ce7ace0;
+  long long res, res_ref;
+
+  for (i=0; i<5; ++i) {
+    src = src * (i+1);
+
+    res_ref = calc_sarx_u64 (src, i + 1);
+    res = src >> (i + 1);
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-sarx64-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-sarx64-1a.c
new file mode 100644
index 0000000..bcf0fd4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-sarx64-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mbmi2 -dp" } */
+
+#include "bmi2-sarx64-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_ashrdi3" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-shlx32-1.c b/gcc/testsuite/gcc.target/i386/bmi2-shlx32-1.c
new file mode 100644
index 0000000..3f35047
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-shlx32-1.c
@@ -0,0 +1,34 @@
+/* { dg-do run { target { bmi2 } } } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+int
+calc_shlx_u32 (int a, int l)
+{
+  int res = a;
+  int i;
+  for (i=0; i<l; ++i)
+    res <<= 1;
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  int src = 0xfce7ace0;
+  int res, res_ref;
+
+  for (i=0; i<5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_shlx_u32 (src, i + 1);
+    res = src << (i + 1);
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-shlx32-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-shlx32-1a.c
new file mode 100644
index 0000000..215e5d3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-shlx32-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbmi2 -dp" } */
+
+#include "bmi2-shlx32-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_ashlsi3" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-shrx32-1.c b/gcc/testsuite/gcc.target/i386/bmi2-shrx32-1.c
new file mode 100644
index 0000000..17f0c67
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-shrx32-1.c
@@ -0,0 +1,36 @@
+/* { dg-do run { target { bmi2 } } } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned
+calc_shrx_u32 (unsigned a, int l)
+{
+  unsigned res = a;
+  int i;
+  for (i=0; i<l; ++i)
+    res >>= 1;
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  unsigned src = 0xce7ace0;
+  unsigned res, res_ref;
+
+  for (i=0; i<5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_shrx_u32 (src, i + 1);
+    res = src >> (i + 1);
+
+    printf ("%x %x\n", res_ref, res);
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-shrx32-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-shrx32-1a.c
new file mode 100644
index 0000000..24c53d4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-shrx32-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbmi2 -dp" } */
+
+#include "bmi2-shrx32-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_lshrsi3" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-shrx64-1.c b/gcc/testsuite/gcc.target/i386/bmi2-shrx64-1.c
new file mode 100644
index 0000000..022baa9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-shrx64-1.c
@@ -0,0 +1,34 @@
+/* { dg-do run { target { bmi2 && { ! ia32 } } } } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned long long
+calc_shrx_u64 (unsigned long long a, int l)
+{
+  unsigned long long res = a;
+  int i;
+  for (i=0; i<l; ++i)
+    res >>= 1;
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  unsigned long long src = 0xce7ace0ce7ace0;
+  unsigned long long res, res_ref;
+
+  for (i=0; i<5; ++i) {
+    src = src * (i+1);
+
+    res_ref = calc_shrx_u64 (src, i + 1);
+    res = src >> (i + 1);
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-shrx64-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-shrx64-1a.c
new file mode 100644
index 0000000..7830439
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-shrx64-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mbmi2 -dp" } */
+
+#include "bmi2-shrx64-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_lshrdi3" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/i386.exp b/gcc/testsuite/gcc.target/i386/i386.exp
index 167b79b..cff8a9a 100644
--- a/gcc/testsuite/gcc.target/i386/i386.exp
+++ b/gcc/testsuite/gcc.target/i386/i386.exp
@@ -206,6 +206,17 @@ proc check_effective_target_bmi { } {
     } "-mbmi" ]
 }
 
+# Return 1 if bmi2 instructions can be compiled.
+proc check_effective_target_bmi2 { } {
+    return [check_no_compiler_messages bmi2 object {
+	unsigned int
+	_bzhi_u32 (unsigned int __X, unsigned int __Y)
+	{
+	    return __builtin_ia32_bzhi_si (__X, __Y);
+	}
+    } "-mbmi2" ]
+}
+
 # If the linker used understands -M <mapfile>, pass it to clear hardware
 # capabilities set by the Sun assembler.
 set clearcap_ldflags "-Wl,-M,$srcdir/$subdir/clearcap.map"

[-- Attachment #3: ChangeLog --]
[-- Type: application/octet-stream, Size: 3015 bytes --]

2011-08-18  Kirill Yukhin  <kirill.yukhin@intel.com>

	* common/config/i386/i386-common.c (OPTION_MASK_ISA_BMI2_SET):
	New.
	(OPTION_MASK_ISA_BMI2_UNSET): Likewise.
	(ix86_handle_option): Handle OPT_mbmi2 case.
	* config.gcc (i[34567]86-*-*): Add bmi22intrin.h.
	(x86_64-*-*): Likewise.
	* config/i386/bmi2intrin.h: New file.
	* config/i386/cpuid.h (bit_BMI2): New.
	* config/i386/driver-i386.c (host_detect_local_cpu): Detect
	BMI2 feature.
	* config/i386/i386-c.c (ix86_target_macros_internal): Define
	__BMI2_ if needed.
	* config/i386/i386.c (ix86_option_override_internal): Handle
	BMI2 option, extend core-avx2.
	(ix86_valid_target_attribute_inner_p): Likewise.
	(print_reg): New code.
	(ix86_print_operand): Likewise.
	(ix86_expand_binary_operator): Generate pattern for BMI2's
	RORX.
	(ix86_builtins): Add IX86_BUILTIN_BZHI32, IX86_BUILTIN_BZHI64,
	IX86_BUILTIN_PDEP32, IX86_BUILTIN_PDEP64, IX86_BUILTIN_PEXT32,
	IX86_BUILTIN_PEXT64.
	(bdesc_args): Add IX86_BUILTIN_BZHI32, IX86_BUILTIN_BZHI64,
	IX86_BUILTIN_PDEP32, IX86_BUILTIN_PDEP64, IX86_BUILTIN_PEXT32,
	IX86_BUILTIN_PEXT64.
	* config/i386/i386.h (TARGET_BMI2): New.
	* config/i386/i386.md (UNSPEC_BZHI) New.
	(UNSPEC_PDEP): Likewise.
	(UNSPEC_PEXT): Likewise.
	(define_code_iterator any_shift): New.
	(define_code_attr shiftrt_insn): Rename to ...
	(define_code_attr shift_insn): ... this.
	(define_code_attr shiftrt): Likewise.
	(define_code_attr shift): Likewise.
	(any_extend): Update.
	(define_insn "bmi2_umul<mode><dwi>3_1"): New.
	(define_expand "<u>mul<mode><dwi>3"): Update.
	(define_expand "<shiftrt_insn><mode>3"): Rename to ...
	(define_expand "<shift_insn><mode>3"): ... this.
	(define_split)<any_shift, BMI2>: New.
	(define_insn "*bmi2_<shift_insn><mode>3"): Likewise.
	(define_insn_and_split "*<shiftrt_insn><mode>3_mask"): Rename
	to ...
	(define_insn_and_split "*<shift_insn><mode>3_mask"): ... this.
	(define_insn_and_split "*<shiftrt_insn><mode>3_doubleword"):
	Rename to ...
	(define_insn_and_split "*<shift_insn><mode>3_doubleword"):
	... this, update.
	(define_peephole2) <CMOVE>: Update.
	(define_insn "*<shiftrt_insn><mode>3_1"): Rename to ...
	(define_insn "*<shift_insn><mode>3_1"): ... this. Update.
	(define_insn "*<shiftrt_insn>si3_1_zext"): Likewise.
	(define_insn "*<shift_insn>si3_1_zext"): Likewise.
	(define_insn "*<shiftrt_insn><mode>3_cmp"): Likewise.
	(define_insn "*<shift_insn><mode>3_cmp"): Likewise.
	(define_insn "*<shiftrt_insn>si3_cmp_zext"): Likewise.
	(define_insn "*<shift_insn>si3_cmp_zext"): Likewise.
	(define_insn "*<shiftrt_insn><mode>3_cconly"): Likewise.
	(define_insn "*<shift_insn><mode>3_cconly"): Likewise.
	(define_insn "*bmi2_rorx<mode>3_1"): New.
	(define_insn "bmi2_bzhi_<mode>3"): Likewise.
	(define_insn "bmi2_pdep_<mode>3"): Likewise.
	(define_insn "bmi2_pext_<mode>3"): Likewise.
	* config/i386/i386.opt (mbmi2): New.
	* config/i386/x86intrin.h: Include bmi2intrin.h when __BMI2__
	is defined.
	* doc/extend.texi: Document BMI2 built-in functions.
	* doc/invoke.texi: Document -mbmi2.

[-- Attachment #4: ChangeLog.testsuite --]
[-- Type: application/octet-stream, Size: 1690 bytes --]

2011-08-18  Kirill Yukhin  <kirill.yukhin@intel.com>

	* g++.dg/other/i386-2.C: Add -mbmi2 check.
	* g++.dg/other/i386-3.C: Likewise.
	* gcc.target/i386/bmi2-bzhi32-1.c: New testcase.
	* gcc.target/i386/bmi2-bzhi32-1a.c: Likewise.
	* gcc.target/i386/bmi2-bzhi64-1.c: Likewise.
	* gcc.target/i386/bmi2-bzhi64-1a.c: Likewise.
	* gcc.target/i386/bmi2-mulx32-1.c: Likewise.
	* gcc.target/i386/bmi2-mulx32-1a.c: Likewise.
	* gcc.target/i386/bmi2-mulx64-1.c: Likewise.
	* gcc.target/i386/bmi2-mulx64-1a.c: Likewise.
	* gcc.target/i386/bmi2-pdep32-1.c: Likewise.
	* gcc.target/i386/bmi2-pdep32-1a.c: Likewise.
	* gcc.target/i386/bmi2-pdep64-1.c: Likewise.
	* gcc.target/i386/bmi2-pdep64-1a.c: Likewise.
	* gcc.target/i386/bmi2-pext32-1.c: Likewise.
	* gcc.target/i386/bmi2-pext32-1a.c: Likewise.
	* gcc.target/i386/bmi2-pext64-1.c: Likewise.
	* gcc.target/i386/bmi2-pext64-1a.c: Likewise.
	* gcc.target/i386/bmi2-rorx32-1.c: Likewise.
	* gcc.target/i386/bmi2-rorx32-1a.c: Likewise.
	* gcc.target/i386/bmi2-rorx64-1.c: Likewise.
	* gcc.target/i386/bmi2-rorx64-1a.c: Likewise.
	* gcc.target/i386/bmi2-sarx32-1.c: Likewise.
	* gcc.target/i386/bmi2-sarx32-1a.c: Likewise.
	* gcc.target/i386/bmi2-sarx64-1.c: Likewise.
	* gcc.target/i386/bmi2-sarx64-1a.c: Likewise.
	* gcc.target/i386/bmi2-shlx32-1.c: Likewise.
	* gcc.target/i386/bmi2-shlx32-1a.c: Likewise.
	* gcc.target/i386/bmi2-shlx64-1.c: Likewise.
	* gcc.target/i386/bmi2-shlx64-1a.c: Likewise.
	* gcc.target/i386/bmi2-shrx32-1.c: Likewise.
	* gcc.target/i386/bmi2-shrx32-1a.c: Likewise.
	* gcc.target/i386/bmi2-shrx64-1.c: Likewise.
	* gcc.target/i386/bmi2-shrx64-1a.c: Likewise.
	* gcc.target/i386/i386.exp (check_effective_target_bmi2): New.

^ permalink raw reply	[flat|nested] 27+ messages in thread

end of thread, other threads:[~2011-08-21 11:02 UTC | newest]

Thread overview: 27+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-08-19 12:27 [PATCH, testsuite, i386] BMI2 support for GCC Kirill Yukhin
2011-08-19 13:26 ` H.J. Lu
2011-08-19 14:05   ` Kirill Yukhin
2011-08-19 14:13     ` Jakub Jelinek
2011-08-19 14:34       ` Kirill Yukhin
2011-08-19 14:38         ` H.J. Lu
2011-08-19 14:38           ` Kirill Yukhin
2011-08-19 14:47             ` H.J. Lu
2011-08-19 15:22               ` Kirill Yukhin
2011-08-19 15:36                 ` Kirill Yukhin
2011-08-20 20:05                 ` Uros Bizjak
2011-08-20 21:52                   ` Uros Bizjak
2011-08-20 22:04                     ` H.J. Lu
2011-08-20 22:26                       ` Uros Bizjak
2011-08-20 22:56                         ` H.J. Lu
2011-08-21 15:01                       ` Uros Bizjak
2011-08-20 22:39                     ` Richard Henderson
2011-08-21  1:37                       ` Uros Bizjak
2011-08-21 13:23                         ` Jakub Jelinek
2011-08-20 23:50                     ` Richard Henderson
2011-08-20 23:58                       ` H.J. Lu
2011-08-21  0:11                         ` H.J. Lu
2011-08-21  4:24                           ` Richard Henderson
2011-08-21  9:14                             ` Uros Bizjak
2011-08-21  9:39                               ` H.J. Lu
2011-08-21  9:49                                 ` Richard Henderson
2011-08-21 11:14                       ` Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).