public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v3, i386] BMI2 support for GCC, mulx, rorx, <shift>x part
@ 2011-08-21 18:23 Uros Bizjak
  2011-08-22  9:54 ` Uros Bizjak
  0 siblings, 1 reply; 8+ messages in thread
From: Uros Bizjak @ 2011-08-21 18:23 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Richard Henderson, Kirill Yukhin, H.J. Lu, gcc-patches List

[-- Attachment #1: Type: text/plain, Size: 2329 bytes --]

Hello!

This is the third version of BMI2 support that includes generation of
mulx, rorx, <shift>x part. This patch includes all comments on
previous version, splits all insn post-reload, uses "enable" attribute
and avoids new register modifiers. As a compromise (see previous
posts), the mulx insn is now split post-reload into pattern that
separates outputs (so, post-reload passes can do their job more
effectively), with the hope that someday other DWI patterns will be
rewritten in the same way.

2011-08-21  Uros Bizjak  <ubizjak@gmail.com>

	* config/i386/i386.md (type): Add imulx, ishiftx and rotatex.
	(length_immediate): Handle imulx, ishiftx and rotatex.
	(imm_disp): Ditto.
	(isa): Add bmi2.
	(enabled): Handle bmi2.
	(w): New mode attribute.

	(*mul<mode><dwi>3): Split from *<u>mul<mode><dwi>3.
	(*umul<mode><dwi>3): Ditto.  Add imulx BMI2 alternative.
	(*bmi2_umulditi3_1): New insn pattern.
	(*bmi2_umulsidi3_1): Ditto.
	(*umul<mode><dwi>3 splitter): New splitter to avoid flags dependency.

	(*bmi2_ashl<mode>3_1): New insn pattern.
	(*ashl<mode>3_1): Add ishiftx BMI2 alternative.
	(*ashl<mode>3_1 splitter): New splitter to avoid flags dependency.
	(*bmi2_ashlsi3_1_zext): New insn pattern.
	(*ashlsi3_1_zext): Add ishiftx BMI2 alternative.
	(*ashlsi3_1_zext splitter): New splitter to avoid flags dependency.

	(*bmi2_<shiftrt_insn><mode>3_1): New insn pattern.
	(*<shiftrt_insn><mode>3_1): Add ishiftx BMI2 alternative.
	(*<shiftrt_insn><mode>3_1 splitter): New splitter to avoid
	flags dependency.
	(*bmi2_<shiftrt_insn>si3_1_zext): New insn pattern.
	(*<shiftrt_insn>si3_1_zext): Add ishiftx BMI2 alternative.
	(*<shiftrt_insn>si3_1_zext splitter): New splitter to avoid
	flags dependency.

	(*bmi2_rorx<mode>3_1): New insn pattern.
	(*<rotate_insn><mode>3_1): Add rotatex BMI2 alternative.
	(*rotate<mode>3_1 splitter): New splitter to avoid flags dependency.
	(*rotatert<mode>3_1 splitter): Ditto.
	(*bmi2_rorxsi3_1_zext): New insn pattern.
	(*<rotate_insn>si3_1_zext): Add rotatex BMI2 alternative.
	(*rotatesi3_1_zext  splitter): New splitter to avoid flags dependency.
	(*rotatertsi3_1_zext splitter): Ditto.

The patch was bootstrapped and fully tested on x86_64-pc-linux-gnu
{,-m32}. Kirill, please add other stuff, re-test it on simulator and
re-post complete patch with (fixed) testcases.

Uros.

[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 21856 bytes --]

Index: i386/i386.md
===================================================================
--- i386/i386.md	(revision 177939)
+++ i386/i386.md	(working copy)
@@ -377,7 +377,7 @@
 (define_attr "type"
   "other,multi,
    alu,alu1,negnot,imov,imovx,lea,
-   incdec,ishift,ishift1,rotate,rotate1,imul,idiv,
+   incdec,ishift,ishiftx,ishift1,rotate,rotatex,rotate1,imul,imulx,idiv,
    icmp,test,ibr,setcc,icmov,
    push,pop,call,callv,leave,
    str,bitmanip,
@@ -410,12 +410,12 @@
 ;; The (bounding maximum) length of an instruction immediate.
 (define_attr "length_immediate" ""
   (cond [(eq_attr "type" "incdec,setcc,icmov,str,lea,other,multi,idiv,leave,
-                          bitmanip")
+                          bitmanip,imulx")
 	   (const_int 0)
 	 (eq_attr "unit" "i387,sse,mmx")
 	   (const_int 0)
-	 (eq_attr "type" "alu,alu1,negnot,imovx,ishift,rotate,ishift1,rotate1,
-			  imul,icmp,push,pop")
+	 (eq_attr "type" "alu,alu1,negnot,imovx,ishift,ishiftx,ishift1,
+			  rotate,rotatex,rotate1,imul,icmp,push,pop")
 	   (symbol_ref "ix86_attr_length_immediate_default (insn, true)")
 	 (eq_attr "type" "imov,test")
 	   (symbol_ref "ix86_attr_length_immediate_default (insn, false)")
@@ -675,7 +675,7 @@
 	      (and (match_operand 0 "memory_displacement_operand" "")
 		   (match_operand 1 "immediate_operand" "")))
 	   (const_string "true")
-	 (and (eq_attr "type" "alu,ishift,rotate,imul,idiv")
+	 (and (eq_attr "type" "alu,ishift,ishiftx,rotate,rotatex,imul,idiv")
 	      (and (match_operand 0 "memory_displacement_operand" "")
 		   (match_operand 2 "immediate_operand" "")))
 	   (const_string "true")
@@ -699,12 +699,13 @@
 (define_attr "movu" "0,1" (const_string "0"))
 
 ;; Used to control the "enabled" attribute on a per-instruction basis.
-(define_attr "isa" "base,noavx,avx"
+(define_attr "isa" "base,noavx,avx,bmi2"
   (const_string "base"))
 
 (define_attr "enabled" ""
   (cond [(eq_attr "isa" "noavx") (symbol_ref "!TARGET_AVX")
 	 (eq_attr "isa" "avx") (symbol_ref "TARGET_AVX")
+	 (eq_attr "isa" "bmi2") (symbol_ref "TARGET_BMI2")
 	]
 	(const_int 1)))
 
@@ -947,6 +948,9 @@
 ;; Instruction suffix for REX 64bit operators.
 (define_mode_attr rex64suffix [(SI "") (DI "{q}")])
 
+;; Register prefix for word mode.
+(define_mode_attr w [(SI "k") (DI "q")])
+
 ;; This mode iterator allows :P to be used for patterns that operate on
 ;; pointer-sized quantities.  Exactly one of the two alternatives will match.
 (define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
@@ -6849,16 +6853,102 @@
 	      (clobber (reg:CC FLAGS_REG))])]
   "TARGET_QIMODE_MATH")
 
-(define_insn "*<u>mul<mode><dwi>3_1"
+(define_insn "*bmi2_umulditi3_1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI
+	  (match_operand:DI 2 "nonimmediate_operand" "%d")
+	  (match_operand:DI 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:DI 1 "register_operand" "=r")
+	(truncate:DI
+	  (lshiftrt:TI
+	    (mult:TI (zero_extend:TI (match_dup 2))
+		     (zero_extend:TI (match_dup 3)))
+	    (const_int 64))))]
+  "TARGET_64BIT && TARGET_BMI2
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "mulx\t{%3, %0, %1|%1, %0, %3}"
+  [(set_attr "type" "imulx")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "DI")])
+
+(define_insn "*bmi2_umulsidi3_1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI
+	  (match_operand:SI 2 "nonimmediate_operand" "%d")
+	  (match_operand:SI 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:SI 1 "register_operand" "=r")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (mult:DI (zero_extend:DI (match_dup 2))
+		     (zero_extend:DI (match_dup 3)))
+	    (const_int 32))))]
+  "!TARGET_64BIT && TARGET_BMI2
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "mulx\t{%3, %0, %1|%1, %0, %3}"
+  [(set_attr "type" "imulx")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "SI")])
+
+(define_insn "*umul<mode><dwi>3_1"
+  [(set (match_operand:<DWI> 0 "register_operand" "=A,r")
+	(mult:<DWI>
+	  (zero_extend:<DWI>
+	    (match_operand:DWIH 1 "nonimmediate_operand" "%0,d"))
+	  (zero_extend:<DWI>
+	    (match_operand:DWIH 2 "nonimmediate_operand" "rm,rm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   mul{<imodesuffix>}\t%2
+   #"
+  [(set_attr "isa" "base,bmi2")
+   (set_attr "type" "imul,imulx")
+   (set_attr "length_immediate" "0,*")
+   (set (attr "athlon_decode")
+	(cond [(eq_attr "alternative" "0")
+		 (if_then_else (eq_attr "cpu" "athlon")
+		   (const_string "vector")
+		   (const_string "double"))]
+	      (const_string "*")))
+   (set_attr "amdfam10_decode" "double,*")
+   (set_attr "bdver1_decode" "direct,*")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "<MODE>")])
+
+;; Convert mul to the mulx pattern to avoid flags dependency.
+(define_split
+ [(set (match_operand:<DWI> 0 "register_operand" "")
+       (mult:<DWI>
+	 (zero_extend:<DWI>
+	   (match_operand:DWIH 1 "nonimmediate_operand" ""))
+	 (zero_extend:<DWI>
+	   (match_operand:DWIH 2 "nonimmediate_operand" ""))))
+  (clobber (reg:CC FLAGS_REG))]
+ "TARGET_BMI2 && reload_completed"
+  [(parallel [(set (match_dup 3)
+		   (mult:DWIH (match_dup 1) (match_dup 2)))
+	      (set (match_dup 4)
+		   (truncate:DWIH
+		     (lshiftrt:<DWI> 
+		       (mult:<DWI> (zero_extend:<DWI> (match_dup 1))
+				   (zero_extend:<DWI> (match_dup 2)))
+	    	       (match_dup 5))))])]
+{
+  operands[3] = gen_lowpart (<MODE>mode, operands[0]);
+  operands[4] = gen_highpart (<MODE>mode, operands[0]);
+  operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
+})
+
+(define_insn "*mul<mode><dwi>3_1"
   [(set (match_operand:<DWI> 0 "register_operand" "=A")
 	(mult:<DWI>
-	  (any_extend:<DWI>
+	  (sign_extend:<DWI>
 	    (match_operand:DWIH 1 "nonimmediate_operand" "%0"))
-	  (any_extend:<DWI>
+	  (sign_extend:<DWI>
 	    (match_operand:DWIH 2 "nonimmediate_operand" "rm"))))
    (clobber (reg:CC FLAGS_REG))]
   "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
-  "<sgnprefix>mul{<imodesuffix>}\t%2"
+  "imul{<imodesuffix>}\t%2"
   [(set_attr "type" "imul")
    (set_attr "length_immediate" "0")
    (set (attr "athlon_decode")
@@ -9056,16 +9146,26 @@
   [(set_attr "type" "ishift")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*bmi2_ashl<mode>3_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+		      (match_operand:QI 2 "register_operand" "r")))]
+  "TARGET_BMI2"
+  "salx\t{%<w>2, %1, %0|%0, %1, %<w>2}"
+  [(set_attr "type" "ishiftx")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*ashl<mode>3_1"
-  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
-	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l")
-		      (match_operand:QI 2 "nonmemory_operand" "c<S>,M")))
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
+	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm")
+		      (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
+    case TYPE_ISHIFTX:
       return "#";
 
     case TYPE_ALU:
@@ -9081,9 +9181,12 @@
 	return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
-  [(set (attr "type")
+  [(set_attr "isa" "base,base,bmi2")
+   (set (attr "type")
      (cond [(eq_attr "alternative" "1")
 	      (const_string "lea")
+	    (eq_attr "alternative" "2")
+	      (const_string "ishiftx")
             (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
 		          (const_int 0))
 		      (match_operand 0 "register_operand" ""))
@@ -9102,17 +9205,38 @@
        (const_string "*")))
    (set_attr "mode" "<MODE>")])
 
+;; Convert shift to the shiftx pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand" "")
+	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "")
+		      (match_operand:QI 2 "register_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(ashift:SWI48 (match_dup 1) (match_dup 2)))])
+
+(define_insn "*bmi2_ashlsi3_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+		     (match_operand:QI 2 "register_operand" "r"))))]
+  "TARGET_64BIT && TARGET_BMI2"
+  "salx\t{%k2, %1, %k0|%k0, %1, %k2}"
+  [(set_attr "type" "ishiftx")
+   (set_attr "mode" "SI")])
+
 (define_insn "*ashlsi3_1_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
 	(zero_extend:DI
-	  (ashift:SI (match_operand:SI 1 "register_operand" "0,l")
-		     (match_operand:QI 2 "nonmemory_operand" "cI,M"))))
+	  (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l,rm")
+		     (match_operand:QI 2 "nonmemory_operand" "cI,M,r"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
+    case TYPE_ISHIFTX:
       return "#";
 
     case TYPE_ALU:
@@ -9127,9 +9251,12 @@
 	return "sal{l}\t{%2, %k0|%k0, %2}";
     }
 }
-  [(set (attr "type")
+  [(set_attr "isa" "base,base,bmi2")
+   (set (attr "type")
      (cond [(eq_attr "alternative" "1")
 	      (const_string "lea")
+	    (eq_attr "alternative" "2")
+	      (const_string "ishiftx")
             (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
 		     (const_int 0))
 		 (match_operand 2 "const1_operand" ""))
@@ -9147,6 +9274,17 @@
        (const_string "*")))
    (set_attr "mode" "SI")])
 
+;; Convert shift to the shiftx pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI
+	  (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "")
+		     (match_operand:QI 2 "register_operand" ""))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+  	(zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))])
+
 (define_insn "*ashlhi3_1"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
 	(ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0")
@@ -9763,20 +9901,38 @@
   DONE;
 })
 
+(define_insn "*bmi2_<shiftrt_insn><mode>3_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+			   (match_operand:QI 2 "register_operand" "r")))]
+  "TARGET_BMI2"
+  "<shiftrt>x\t{%<w>2, %1, %0|%0, %1, %<w>2}"
+  [(set_attr "type" "ishiftx")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*<shiftrt_insn><mode>3_1"
-  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
-	(any_shiftrt:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")
-			 (match_operand:QI 2 "nonmemory_operand" "c<S>")))
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
+	(any_shiftrt:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>,r")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
 {
-  if (operands[2] == const1_rtx
-      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-    return "<shiftrt>{<imodesuffix>}\t%0";
-  else
-    return "<shiftrt>{<imodesuffix>}\t{%2, %0|%0, %2}";
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ISHIFTX:
+      return "#";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "<shiftrt>{<imodesuffix>}\t%0";
+      else
+	return "<shiftrt>{<imodesuffix>}\t{%2, %0|%0, %2}";
+    }
 }
-  [(set_attr "type" "ishift")
+  [(set_attr "isa" "base,bmi2")
+   (set_attr "type" "ishift,ishiftx")
    (set (attr "length_immediate")
      (if_then_else
        (and (match_operand 2 "const1_operand" "")
@@ -9786,19 +9942,82 @@
        (const_string "*")))
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*<shiftrt_insn>si3_1_zext"
+;; Convert shift to the shiftx pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand" "")
+	(any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "")
+			   (match_operand:QI 2 "register_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(any_shiftrt:SWI48 (match_dup 1) (match_dup 2)))])
+
+(define_insn "*bmi2_<shiftrt_insn>si3_1_zext"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extend:DI
-	  (any_shiftrt:SI (match_operand:SI 1 "register_operand" "0")
-			  (match_operand:QI 2 "nonmemory_operand" "cI"))))
+	  (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+			  (match_operand:QI 2 "register_operand" "r"))))]
+  "TARGET_64BIT && TARGET_BMI2"
+  "<shiftrt>x\t{%k2, %1, %k0|%k0, %1, %k2}"
+  [(set_attr "type" "ishiftx")
+   (set_attr "mode" "SI")])
+
+(define_insn "*<shiftrt_insn>si3_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI
+	  (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
+			  (match_operand:QI 2 "nonmemory_operand" "cI,r"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
 {
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ISHIFTX:
+      return "#";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "<shiftrt>{l}\t%k0";
+      else
+	return "<shiftrt>{l}\t{%2, %k0|%k0, %2}";
+    }
+}
+  [(set_attr "isa" "base,bmi2")
+   (set_attr "type" "ishift,ishiftx")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 2 "const1_operand" "")
+	    (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+		(const_int 0)))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "SI")])
+
+;; Convert shift to the shiftx pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI
+	  (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "")
+			  (match_operand:QI 2 "register_operand" ""))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+  	(zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))])
+
+(define_insn "*<shiftrt_insn><mode>3_1"
+  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m")
+	(any_shiftrt:SWI12
+	  (match_operand:SWI12 1 "nonimmediate_operand" "0")
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+{
   if (operands[2] == const1_rtx
       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-    return "<shiftrt>{l}\t%k0";
+    return "<shiftrt>{<imodesuffix>}\t%0";
   else
-    return "<shiftrt>{l}\t{%2, %k0|%k0, %2}";
+    return "<shiftrt>{<imodesuffix>}\t{%2, %0|%0, %2}";
 }
   [(set_attr "type" "ishift")
    (set (attr "length_immediate")
@@ -9808,7 +10027,7 @@
 		(const_int 0)))
        (const_string "0")
        (const_string "*")))
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "*<shiftrt_insn>qi3_1_slp"
   [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
@@ -10060,42 +10279,151 @@
   split_double_mode (<DWI>mode, &operands[0], 1, &operands[4], &operands[5]);
 })
 
+(define_insn "*bmi2_rorx<mode>3_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(rotatert:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+			(match_operand:QI 2 "immediate_operand" "<S>")))]
+  "TARGET_BMI2"
+  "rorx\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "rotatex")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*<rotate_insn><mode>3_1"
-  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
-	(any_rotate:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")
-			(match_operand:QI 2 "nonmemory_operand" "c<S>")))
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
+	(any_rotate:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>,<S>")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
 {
-  if (operands[2] == const1_rtx
-      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-    return "<rotate>{<imodesuffix>}\t%0";
-  else
-    return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ROTATEX:
+      return "#";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "<rotate>{<imodesuffix>}\t%0";
+      else
+	return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
+    }
 }
-  [(set_attr "type" "rotate")
+  [(set_attr "isa" "base,bmi2")
+   (set_attr "type" "rotate,rotatex")
    (set (attr "length_immediate")
      (if_then_else
-       (and (match_operand 2 "const1_operand" "")
-	    (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
-		(const_int 0)))
+       (and (eq_attr "type" "rotate")
+	    (and (match_operand 2 "const1_operand" "")
+		 (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+		     (const_int 0))))
        (const_string "0")
        (const_string "*")))
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*<rotate_insn>si3_1_zext"
+;; Convert rotate to the rotatex pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand" "")
+	(rotate:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "")
+		      (match_operand:QI 2 "immediate_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(rotatert:SWI48 (match_dup 1) (match_dup 2)))]
+{
+  operands[2]
+    = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - INTVAL (operands[2]));
+})
+
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand" "")
+	(rotatert:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "")
+			(match_operand:QI 2 "immediate_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(rotatert:SWI48 (match_dup 1) (match_dup 2)))])
+
+(define_insn "*bmi2_rorxsi3_1_zext"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extend:DI
-	  (any_rotate:SI (match_operand:SI 1 "register_operand" "0")
-			 (match_operand:QI 2 "nonmemory_operand" "cI"))))
+	  (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+		       (match_operand:QI 2 "immediate_operand" "I"))))]
+  "TARGET_64BIT && TARGET_BMI2"
+  "rorx\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "type" "rotatex")
+   (set_attr "mode" "SI")])
+
+(define_insn "*<rotate_insn>si3_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI
+	  (any_rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
+			 (match_operand:QI 2 "nonmemory_operand" "cI,I"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
 {
-    if (operands[2] == const1_rtx
-	&& (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-    return "<rotate>{l}\t%k0";
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ROTATEX:
+      return "#";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "<rotate>{l}\t%k0";
+      else
+	return "<rotate>{l}\t{%2, %k0|%k0, %2}";
+    }
+}
+  [(set_attr "isa" "base,bmi2")
+   (set_attr "type" "rotate,rotatex")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (eq_attr "type" "rotate")
+	    (and (match_operand 2 "const1_operand" "")
+		 (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+		     (const_int 0))))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "SI")])
+
+;; Convert rotate to the rotatex pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI
+	  (rotate:SI (match_operand:SI 1 "nonimmediate_operand" "")
+		     (match_operand:QI 2 "immediate_operand" ""))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+  	(zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))]
+{
+  operands[2]
+    = GEN_INT (GET_MODE_BITSIZE (SImode) - INTVAL (operands[2]));
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI
+	  (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "")
+		       (match_operand:QI 2 "immediate_operand" ""))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+  	(zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))])
+
+(define_insn "*<rotate_insn><mode>3_1"
+  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m")
+	(any_rotate:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0")
+			  (match_operand:QI 2 "nonmemory_operand" "c<S>")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+{
+  if (operands[2] == const1_rtx
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+    return "<rotate>{<imodesuffix>}\t%0";
   else
-    return "<rotate>{l}\t{%2, %k0|%k0, %2}";
+    return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
 }
   [(set_attr "type" "rotate")
    (set (attr "length_immediate")
@@ -10105,7 +10433,7 @@
 		(const_int 0)))
        (const_string "0")
        (const_string "*")))
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "*<rotate_insn>qi3_1_slp"
   [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3, i386] BMI2 support for GCC, mulx, rorx, <shift>x part
  2011-08-21 18:23 [PATCH v3, i386] BMI2 support for GCC, mulx, rorx, <shift>x part Uros Bizjak
@ 2011-08-22  9:54 ` Uros Bizjak
  2011-08-23 11:35   ` Kirill Yukhin
  0 siblings, 1 reply; 8+ messages in thread
From: Uros Bizjak @ 2011-08-22  9:54 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Richard Henderson, Kirill Yukhin, H.J. Lu, gcc-patches List

[-- Attachment #1: Type: text/plain, Size: 759 bytes --]

On Sun, Aug 21, 2011 at 1:39 PM, Uros Bizjak <ubizjak@gmail.com> wrote:

> This is the third version of BMI2 support that includes generation of
> mulx, rorx, <shift>x part. This patch includes all comments on
> previous version, splits all insn post-reload, uses "enable" attribute
> and avoids new register modifiers. As a compromise (see previous
> posts), the mulx insn is now split post-reload into pattern that
> separates outputs (so, post-reload passes can do their job more
> effectively), with the hope that someday other DWI patterns will be
> rewritten in the same way.

A small update that removes the need for "w" mode attribute. We can
convert count register to the correct mode in a splitter.

Re-tested on x86_64-pc-linux-gnu {,-m32}.

Uros.

[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 21627 bytes --]

Index: i386.md
===================================================================
--- i386.md	(revision 177949)
+++ i386.md	(working copy)
@@ -377,7 +377,7 @@
 (define_attr "type"
   "other,multi,
    alu,alu1,negnot,imov,imovx,lea,
-   incdec,ishift,ishift1,rotate,rotate1,imul,idiv,
+   incdec,ishift,ishiftx,ishift1,rotate,rotatex,rotate1,imul,imulx,idiv,
    icmp,test,ibr,setcc,icmov,
    push,pop,call,callv,leave,
    str,bitmanip,
@@ -410,12 +410,12 @@
 ;; The (bounding maximum) length of an instruction immediate.
 (define_attr "length_immediate" ""
   (cond [(eq_attr "type" "incdec,setcc,icmov,str,lea,other,multi,idiv,leave,
-                          bitmanip")
+                          bitmanip,imulx")
 	   (const_int 0)
 	 (eq_attr "unit" "i387,sse,mmx")
 	   (const_int 0)
-	 (eq_attr "type" "alu,alu1,negnot,imovx,ishift,rotate,ishift1,rotate1,
-			  imul,icmp,push,pop")
+	 (eq_attr "type" "alu,alu1,negnot,imovx,ishift,ishiftx,ishift1,
+			  rotate,rotatex,rotate1,imul,icmp,push,pop")
 	   (symbol_ref "ix86_attr_length_immediate_default (insn, true)")
 	 (eq_attr "type" "imov,test")
 	   (symbol_ref "ix86_attr_length_immediate_default (insn, false)")
@@ -675,7 +675,7 @@
 	      (and (match_operand 0 "memory_displacement_operand" "")
 		   (match_operand 1 "immediate_operand" "")))
 	   (const_string "true")
-	 (and (eq_attr "type" "alu,ishift,rotate,imul,idiv")
+	 (and (eq_attr "type" "alu,ishift,ishiftx,rotate,rotatex,imul,idiv")
 	      (and (match_operand 0 "memory_displacement_operand" "")
 		   (match_operand 2 "immediate_operand" "")))
 	   (const_string "true")
@@ -699,12 +699,13 @@
 (define_attr "movu" "0,1" (const_string "0"))
 
 ;; Used to control the "enabled" attribute on a per-instruction basis.
-(define_attr "isa" "base,noavx,avx"
+(define_attr "isa" "base,noavx,avx,bmi2"
   (const_string "base"))
 
 (define_attr "enabled" ""
   (cond [(eq_attr "isa" "noavx") (symbol_ref "!TARGET_AVX")
 	 (eq_attr "isa" "avx") (symbol_ref "TARGET_AVX")
+	 (eq_attr "isa" "bmi2") (symbol_ref "TARGET_BMI2")
 	]
 	(const_int 1)))
 
@@ -6844,16 +6845,102 @@
 	      (clobber (reg:CC FLAGS_REG))])]
   "TARGET_QIMODE_MATH")
 
-(define_insn "*<u>mul<mode><dwi>3_1"
+(define_insn "*bmi2_umulditi3_1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI
+	  (match_operand:DI 2 "nonimmediate_operand" "%d")
+	  (match_operand:DI 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:DI 1 "register_operand" "=r")
+	(truncate:DI
+	  (lshiftrt:TI
+	    (mult:TI (zero_extend:TI (match_dup 2))
+		     (zero_extend:TI (match_dup 3)))
+	    (const_int 64))))]
+  "TARGET_64BIT && TARGET_BMI2
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "mulx\t{%3, %0, %1|%1, %0, %3}"
+  [(set_attr "type" "imulx")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "DI")])
+
+(define_insn "*bmi2_umulsidi3_1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI
+	  (match_operand:SI 2 "nonimmediate_operand" "%d")
+	  (match_operand:SI 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:SI 1 "register_operand" "=r")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (mult:DI (zero_extend:DI (match_dup 2))
+		     (zero_extend:DI (match_dup 3)))
+	    (const_int 32))))]
+  "!TARGET_64BIT && TARGET_BMI2
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "mulx\t{%3, %0, %1|%1, %0, %3}"
+  [(set_attr "type" "imulx")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "SI")])
+
+(define_insn "*umul<mode><dwi>3_1"
+  [(set (match_operand:<DWI> 0 "register_operand" "=A,r")
+	(mult:<DWI>
+	  (zero_extend:<DWI>
+	    (match_operand:DWIH 1 "nonimmediate_operand" "%0,d"))
+	  (zero_extend:<DWI>
+	    (match_operand:DWIH 2 "nonimmediate_operand" "rm,rm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   mul{<imodesuffix>}\t%2
+   #"
+  [(set_attr "isa" "base,bmi2")
+   (set_attr "type" "imul,imulx")
+   (set_attr "length_immediate" "0,*")
+   (set (attr "athlon_decode")
+	(cond [(eq_attr "alternative" "0")
+		 (if_then_else (eq_attr "cpu" "athlon")
+		   (const_string "vector")
+		   (const_string "double"))]
+	      (const_string "*")))
+   (set_attr "amdfam10_decode" "double,*")
+   (set_attr "bdver1_decode" "direct,*")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "<MODE>")])
+
+;; Convert mul to the mulx pattern to avoid flags dependency.
+(define_split
+ [(set (match_operand:<DWI> 0 "register_operand" "")
+       (mult:<DWI>
+	 (zero_extend:<DWI>
+	   (match_operand:DWIH 1 "nonimmediate_operand" ""))
+	 (zero_extend:<DWI>
+	   (match_operand:DWIH 2 "nonimmediate_operand" ""))))
+  (clobber (reg:CC FLAGS_REG))]
+ "TARGET_BMI2 && reload_completed"
+  [(parallel [(set (match_dup 3)
+		   (mult:DWIH (match_dup 1) (match_dup 2)))
+	      (set (match_dup 4)
+		   (truncate:DWIH
+		     (lshiftrt:<DWI> 
+		       (mult:<DWI> (zero_extend:<DWI> (match_dup 1))
+				   (zero_extend:<DWI> (match_dup 2)))
+	    	       (match_dup 5))))])]
+{
+  operands[3] = gen_lowpart (<MODE>mode, operands[0]);
+  operands[4] = gen_highpart (<MODE>mode, operands[0]);
+  operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
+})
+
+(define_insn "*mul<mode><dwi>3_1"
   [(set (match_operand:<DWI> 0 "register_operand" "=A")
 	(mult:<DWI>
-	  (any_extend:<DWI>
+	  (sign_extend:<DWI>
 	    (match_operand:DWIH 1 "nonimmediate_operand" "%0"))
-	  (any_extend:<DWI>
+	  (sign_extend:<DWI>
 	    (match_operand:DWIH 2 "nonimmediate_operand" "rm"))))
    (clobber (reg:CC FLAGS_REG))]
   "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
-  "<sgnprefix>mul{<imodesuffix>}\t%2"
+  "imul{<imodesuffix>}\t%2"
   [(set_attr "type" "imul")
    (set_attr "length_immediate" "0")
    (set (attr "athlon_decode")
@@ -9051,16 +9138,26 @@
   [(set_attr "type" "ishift")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*bmi2_ashl<mode>3_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+		      (match_operand:SWI48 2 "register_operand" "r")))]
+  "TARGET_BMI2"
+  "salx\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishiftx")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*ashl<mode>3_1"
-  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
-	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l")
-		      (match_operand:QI 2 "nonmemory_operand" "c<S>,M")))
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
+	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm")
+		      (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
+    case TYPE_ISHIFTX:
       return "#";
 
     case TYPE_ALU:
@@ -9076,9 +9173,12 @@
 	return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
-  [(set (attr "type")
+  [(set_attr "isa" "base,base,bmi2")
+   (set (attr "type")
      (cond [(eq_attr "alternative" "1")
 	      (const_string "lea")
+	    (eq_attr "alternative" "2")
+	      (const_string "ishiftx")
             (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
 		          (const_int 0))
 		      (match_operand 0 "register_operand" ""))
@@ -9097,17 +9197,39 @@
        (const_string "*")))
    (set_attr "mode" "<MODE>")])
 
+;; Convert shift to the shiftx pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand" "")
+	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "")
+		      (match_operand:QI 2 "register_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(ashift:SWI48 (match_dup 1) (match_dup 2)))]
+  "operands[2] = gen_lowpart (<MODE>mode, operands[2]);")
+
+(define_insn "*bmi2_ashlsi3_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+		     (match_operand:SI 2 "register_operand" "r"))))]
+  "TARGET_64BIT && TARGET_BMI2"
+  "salx\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "type" "ishiftx")
+   (set_attr "mode" "SI")])
+
 (define_insn "*ashlsi3_1_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
 	(zero_extend:DI
-	  (ashift:SI (match_operand:SI 1 "register_operand" "0,l")
-		     (match_operand:QI 2 "nonmemory_operand" "cI,M"))))
+	  (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l,rm")
+		     (match_operand:QI 2 "nonmemory_operand" "cI,M,r"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
+    case TYPE_ISHIFTX:
       return "#";
 
     case TYPE_ALU:
@@ -9122,9 +9244,12 @@
 	return "sal{l}\t{%2, %k0|%k0, %2}";
     }
 }
-  [(set (attr "type")
+  [(set_attr "isa" "base,base,bmi2")
+   (set (attr "type")
      (cond [(eq_attr "alternative" "1")
 	      (const_string "lea")
+	    (eq_attr "alternative" "2")
+	      (const_string "ishiftx")
             (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
 		     (const_int 0))
 		 (match_operand 2 "const1_operand" ""))
@@ -9142,6 +9267,18 @@
        (const_string "*")))
    (set_attr "mode" "SI")])
 
+;; Convert shift to the shiftx pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI
+	  (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "")
+		     (match_operand:QI 2 "register_operand" ""))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+  	(zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
+  "operands[2] = gen_lowpart (SImode, operands[2]);")
+
 (define_insn "*ashlhi3_1"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
 	(ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0")
@@ -9758,20 +9895,38 @@
   DONE;
 })
 
+(define_insn "*bmi2_<shiftrt_insn><mode>3_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+			   (match_operand:SWI48 2 "register_operand" "r")))]
+  "TARGET_BMI2"
+  "<shiftrt>x\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishiftx")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*<shiftrt_insn><mode>3_1"
-  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
-	(any_shiftrt:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")
-			 (match_operand:QI 2 "nonmemory_operand" "c<S>")))
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
+	(any_shiftrt:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>,r")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
 {
-  if (operands[2] == const1_rtx
-      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-    return "<shiftrt>{<imodesuffix>}\t%0";
-  else
-    return "<shiftrt>{<imodesuffix>}\t{%2, %0|%0, %2}";
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ISHIFTX:
+      return "#";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "<shiftrt>{<imodesuffix>}\t%0";
+      else
+	return "<shiftrt>{<imodesuffix>}\t{%2, %0|%0, %2}";
+    }
 }
-  [(set_attr "type" "ishift")
+  [(set_attr "isa" "base,bmi2")
+   (set_attr "type" "ishift,ishiftx")
    (set (attr "length_immediate")
      (if_then_else
        (and (match_operand 2 "const1_operand" "")
@@ -9781,19 +9936,84 @@
        (const_string "*")))
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*<shiftrt_insn>si3_1_zext"
+;; Convert shift to the shiftx pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand" "")
+	(any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "")
+			   (match_operand:QI 2 "register_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(any_shiftrt:SWI48 (match_dup 1) (match_dup 2)))]
+  "operands[2] = gen_lowpart (<MODE>mode, operands[2]);")
+
+(define_insn "*bmi2_<shiftrt_insn>si3_1_zext"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extend:DI
-	  (any_shiftrt:SI (match_operand:SI 1 "register_operand" "0")
-			  (match_operand:QI 2 "nonmemory_operand" "cI"))))
+	  (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+			  (match_operand:SI 2 "register_operand" "r"))))]
+  "TARGET_64BIT && TARGET_BMI2"
+  "<shiftrt>x\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "type" "ishiftx")
+   (set_attr "mode" "SI")])
+
+(define_insn "*<shiftrt_insn>si3_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI
+	  (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
+			  (match_operand:QI 2 "nonmemory_operand" "cI,r"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
 {
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ISHIFTX:
+      return "#";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "<shiftrt>{l}\t%k0";
+      else
+	return "<shiftrt>{l}\t{%2, %k0|%k0, %2}";
+    }
+}
+  [(set_attr "isa" "base,bmi2")
+   (set_attr "type" "ishift,ishiftx")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 2 "const1_operand" "")
+	    (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+		(const_int 0)))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "SI")])
+
+;; Convert shift to the shiftx pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI
+	  (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "")
+			  (match_operand:QI 2 "register_operand" ""))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+  	(zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))]
+  "operands[2] = gen_lowpart (SImode, operands[2]);")
+
+(define_insn "*<shiftrt_insn><mode>3_1"
+  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m")
+	(any_shiftrt:SWI12
+	  (match_operand:SWI12 1 "nonimmediate_operand" "0")
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+{
   if (operands[2] == const1_rtx
       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-    return "<shiftrt>{l}\t%k0";
+    return "<shiftrt>{<imodesuffix>}\t%0";
   else
-    return "<shiftrt>{l}\t{%2, %k0|%k0, %2}";
+    return "<shiftrt>{<imodesuffix>}\t{%2, %0|%0, %2}";
 }
   [(set_attr "type" "ishift")
    (set (attr "length_immediate")
@@ -9803,7 +10023,7 @@
 		(const_int 0)))
        (const_string "0")
        (const_string "*")))
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "*<shiftrt_insn>qi3_1_slp"
   [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
@@ -10055,42 +10275,151 @@
   split_double_mode (<DWI>mode, &operands[0], 1, &operands[4], &operands[5]);
 })
 
+(define_insn "*bmi2_rorx<mode>3_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(rotatert:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+			(match_operand:QI 2 "immediate_operand" "<S>")))]
+  "TARGET_BMI2"
+  "rorx\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "rotatex")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*<rotate_insn><mode>3_1"
-  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
-	(any_rotate:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")
-			(match_operand:QI 2 "nonmemory_operand" "c<S>")))
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
+	(any_rotate:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>,<S>")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
 {
-  if (operands[2] == const1_rtx
-      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-    return "<rotate>{<imodesuffix>}\t%0";
-  else
-    return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ROTATEX:
+      return "#";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "<rotate>{<imodesuffix>}\t%0";
+      else
+	return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
+    }
 }
-  [(set_attr "type" "rotate")
+  [(set_attr "isa" "base,bmi2")
+   (set_attr "type" "rotate,rotatex")
    (set (attr "length_immediate")
      (if_then_else
-       (and (match_operand 2 "const1_operand" "")
-	    (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
-		(const_int 0)))
+       (and (eq_attr "type" "rotate")
+	    (and (match_operand 2 "const1_operand" "")
+		 (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+		     (const_int 0))))
        (const_string "0")
        (const_string "*")))
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*<rotate_insn>si3_1_zext"
+;; Convert rotate to the rotatex pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand" "")
+	(rotate:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "")
+		      (match_operand:QI 2 "immediate_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(rotatert:SWI48 (match_dup 1) (match_dup 2)))]
+{
+  operands[2]
+    = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - INTVAL (operands[2]));
+})
+
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand" "")
+	(rotatert:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "")
+			(match_operand:QI 2 "immediate_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(rotatert:SWI48 (match_dup 1) (match_dup 2)))])
+
+(define_insn "*bmi2_rorxsi3_1_zext"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extend:DI
-	  (any_rotate:SI (match_operand:SI 1 "register_operand" "0")
-			 (match_operand:QI 2 "nonmemory_operand" "cI"))))
+	  (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+		       (match_operand:QI 2 "immediate_operand" "I"))))]
+  "TARGET_64BIT && TARGET_BMI2"
+  "rorx\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "type" "rotatex")
+   (set_attr "mode" "SI")])
+
+(define_insn "*<rotate_insn>si3_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI
+	  (any_rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
+			 (match_operand:QI 2 "nonmemory_operand" "cI,I"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
 {
-    if (operands[2] == const1_rtx
-	&& (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-    return "<rotate>{l}\t%k0";
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ROTATEX:
+      return "#";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "<rotate>{l}\t%k0";
+      else
+	return "<rotate>{l}\t{%2, %k0|%k0, %2}";
+    }
+}
+  [(set_attr "isa" "base,bmi2")
+   (set_attr "type" "rotate,rotatex")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (eq_attr "type" "rotate")
+	    (and (match_operand 2 "const1_operand" "")
+		 (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+		     (const_int 0))))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "SI")])
+
+;; Convert rotate to the rotatex pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI
+	  (rotate:SI (match_operand:SI 1 "nonimmediate_operand" "")
+		     (match_operand:QI 2 "immediate_operand" ""))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+  	(zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))]
+{
+  operands[2]
+    = GEN_INT (GET_MODE_BITSIZE (SImode) - INTVAL (operands[2]));
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI
+	  (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "")
+		       (match_operand:QI 2 "immediate_operand" ""))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+  	(zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))])
+
+(define_insn "*<rotate_insn><mode>3_1"
+  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m")
+	(any_rotate:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0")
+			  (match_operand:QI 2 "nonmemory_operand" "c<S>")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+{
+  if (operands[2] == const1_rtx
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+    return "<rotate>{<imodesuffix>}\t%0";
   else
-    return "<rotate>{l}\t{%2, %k0|%k0, %2}";
+    return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
 }
   [(set_attr "type" "rotate")
    (set (attr "length_immediate")
@@ -10100,7 +10429,7 @@
 		(const_int 0)))
        (const_string "0")
        (const_string "*")))
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "*<rotate_insn>qi3_1_slp"
   [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3, i386] BMI2 support for GCC, mulx, rorx, <shift>x part
  2011-08-22  9:54 ` Uros Bizjak
@ 2011-08-23 11:35   ` Kirill Yukhin
  2011-08-23 12:21     ` Uros Bizjak
  0 siblings, 1 reply; 8+ messages in thread
From: Kirill Yukhin @ 2011-08-23 11:35 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: Jakub Jelinek, Richard Henderson, H.J. Lu, gcc-patches List

[-- Attachment #1: Type: text/plain, Size: 1244 bytes --]

Hi,
I've slightly updated mulx split to avoid ICE.
Updated patch, ChangeLog entry (with Uros's contribution) and
ChangeLog.testsuite entry are attached.

Bootstrapped and make-checked.

Tests all pass under simulator (expept one, but it is simulator issue).

Uros, you asked if BMI2 is inherited from BMI. The answer is no, these
2 extensions are not connected.

Is is OK?

--
Thanks, K

On Mon, Aug 22, 2011 at 12:45 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Sun, Aug 21, 2011 at 1:39 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
>
>> This is the third version of BMI2 support that includes generation of
>> mulx, rorx, <shift>x part. This patch includes all comments on
>> previous version, splits all insn post-reload, uses "enable" attribute
>> and avoids new register modifiers. As a compromise (see previous
>> posts), the mulx insn is now split post-reload into pattern that
>> separates outputs (so, post-reload passes can do their job more
>> effectively), with the hope that someday other DWI patterns will be
>> rewritten in the same way.
>
> A small update that removes the need for "w" mode attribute. We can
> convert count register to the correct mode in a splitter.
>
> Re-tested on x86_64-pc-linux-gnu {,-m32}.
>
> Uros.
>

[-- Attachment #2: ChangeLog --]
[-- Type: application/octet-stream, Size: 3271 bytes --]

2011-08-21  Uros Bizjak  <ubizjak@gmail.com>

       * config/i386/i386.md (type): Add imulx, ishiftx and rotatex.
       (length_immediate): Handle imulx, ishiftx and rotatex.
       (imm_disp): Ditto.
       (isa): Add bmi2.
       (enabled): Handle bmi2.
       (w): New mode attribute.

       (*mul<mode><dwi>3): Split from *<u>mul<mode><dwi>3.
       (*umul<mode><dwi>3): Ditto.  Add imulx BMI2 alternative.
       (*bmi2_umulditi3_1): New insn pattern.
       (*bmi2_umulsidi3_1): Ditto.
       (*umul<mode><dwi>3 splitter): New splitter to avoid flags dependency.

       (*bmi2_ashl<mode>3_1): New insn pattern.
       (*ashl<mode>3_1): Add ishiftx BMI2 alternative.
       (*ashl<mode>3_1 splitter): New splitter to avoid flags dependency.
       (*bmi2_ashlsi3_1_zext): New insn pattern.
       (*ashlsi3_1_zext): Add ishiftx BMI2 alternative.
       (*ashlsi3_1_zext splitter): New splitter to avoid flags dependency.

       (*bmi2_<shiftrt_insn><mode>3_1): New insn pattern.
       (*<shiftrt_insn><mode>3_1): Add ishiftx BMI2 alternative.
       (*<shiftrt_insn><mode>3_1 splitter): New splitter to avoid
       flags dependency.
       (*bmi2_<shiftrt_insn>si3_1_zext): New insn pattern.
       (*<shiftrt_insn>si3_1_zext): Add ishiftx BMI2 alternative.
       (*<shiftrt_insn>si3_1_zext splitter): New splitter to avoid
       flags dependency.

       (*bmi2_rorx<mode>3_1): New insn pattern.
       (*<rotate_insn><mode>3_1): Add rotatex BMI2 alternative.
       (*rotate<mode>3_1 splitter): New splitter to avoid flags dependency.
       (*rotatert<mode>3_1 splitter): Ditto.
       (*bmi2_rorxsi3_1_zext): New insn pattern.
       (*<rotate_insn>si3_1_zext): Add rotatex BMI2 alternative.
       (*rotatesi3_1_zext  splitter): New splitter to avoid flags dependency.
       (*rotatertsi3_1_zext splitter): Ditto.

2011-08-21  Kirill Yukhin  <kirill.yukhin@intel.com>

	* common/config/i386/i386-common.c (OPTION_MASK_ISA_BMI2_SET):
	New.
	(OPTION_MASK_ISA_BMI2_UNSET): Likewise.
	(ix86_handle_option): Handle OPT_mbmi2 case.
	* config.gcc (i[34567]86-*-*): Add bmi2intrin.h.
	(x86_64-*-*): Likewise.
	* config/i386/bmi2intrin.h: New file.
	* config/i386/cpuid.h (bit_BMI2): New.
	* config/i386/driver-i386.c (host_detect_local_cpu): Detect
	BMI2 feature.
	* config/i386/i386-c.c (ix86_target_macros_internal): Define
	__BMI2_ if needed.
	* config/i386/i386.c (ix86_option_override_internal): Handle
	BMI2 option, extend core-avx2.
	(ix86_valid_target_attribute_inner_p): Likewise.
	(print_reg): New code.
	(ix86_print_operand): Likewise.
	(ix86_builtins): Add IX86_BUILTIN_BZHI32, IX86_BUILTIN_BZHI64,
	IX86_BUILTIN_PDEP32, IX86_BUILTIN_PDEP64, IX86_BUILTIN_PEXT32,
	IX86_BUILTIN_PEXT64.
	(bdesc_args): Add IX86_BUILTIN_BZHI32, IX86_BUILTIN_BZHI64,
	IX86_BUILTIN_PDEP32, IX86_BUILTIN_PDEP64, IX86_BUILTIN_PEXT32,
	IX86_BUILTIN_PEXT64.
	* config/i386/i386.h (TARGET_BMI2): New.
	* config/i386/i386.md (UNSPEC_PDEP): New.
	(UNSPEC_PEXT): Likewise.
	(*bmi2_bzhi_<mode>3): Likewise.
	(*bmi2_pdep_<mode>3): Likewise.
	(*bmi2_pext_<mode>3): Likewise.
	* config/i386/i386.opt (mbmi2): New.
	* config/i386/x86intrin.h: Include bmi2intrin.h when __BMI2__
	is defined.
	* doc/extend.texi: Document BMI2 built-in functions.
	* doc/invoke.texi: Document -mbmi2.

[-- Attachment #3: ChangeLog.testsuite --]
[-- Type: application/octet-stream, Size: 1690 bytes --]

2011-08-18  Kirill Yukhin  <kirill.yukhin@intel.com>

	* g++.dg/other/i386-2.C: Add -mbmi2 check.
	* g++.dg/other/i386-3.C: Likewise.
	* gcc.target/i386/bmi2-bzhi32-1.c: New testcase.
	* gcc.target/i386/bmi2-bzhi32-1a.c: Likewise.
	* gcc.target/i386/bmi2-bzhi64-1.c: Likewise.
	* gcc.target/i386/bmi2-bzhi64-1a.c: Likewise.
	* gcc.target/i386/bmi2-mulx32-1.c: Likewise.
	* gcc.target/i386/bmi2-mulx32-1a.c: Likewise.
	* gcc.target/i386/bmi2-mulx64-1.c: Likewise.
	* gcc.target/i386/bmi2-mulx64-1a.c: Likewise.
	* gcc.target/i386/bmi2-pdep32-1.c: Likewise.
	* gcc.target/i386/bmi2-pdep32-1a.c: Likewise.
	* gcc.target/i386/bmi2-pdep64-1.c: Likewise.
	* gcc.target/i386/bmi2-pdep64-1a.c: Likewise.
	* gcc.target/i386/bmi2-pext32-1.c: Likewise.
	* gcc.target/i386/bmi2-pext32-1a.c: Likewise.
	* gcc.target/i386/bmi2-pext64-1.c: Likewise.
	* gcc.target/i386/bmi2-pext64-1a.c: Likewise.
	* gcc.target/i386/bmi2-rorx32-1.c: Likewise.
	* gcc.target/i386/bmi2-rorx32-1a.c: Likewise.
	* gcc.target/i386/bmi2-rorx64-1.c: Likewise.
	* gcc.target/i386/bmi2-rorx64-1a.c: Likewise.
	* gcc.target/i386/bmi2-sarx32-1.c: Likewise.
	* gcc.target/i386/bmi2-sarx32-1a.c: Likewise.
	* gcc.target/i386/bmi2-sarx64-1.c: Likewise.
	* gcc.target/i386/bmi2-sarx64-1a.c: Likewise.
	* gcc.target/i386/bmi2-shlx32-1.c: Likewise.
	* gcc.target/i386/bmi2-shlx32-1a.c: Likewise.
	* gcc.target/i386/bmi2-shlx64-1.c: Likewise.
	* gcc.target/i386/bmi2-shlx64-1a.c: Likewise.
	* gcc.target/i386/bmi2-shrx32-1.c: Likewise.
	* gcc.target/i386/bmi2-shrx32-1a.c: Likewise.
	* gcc.target/i386/bmi2-shrx64-1.c: Likewise.
	* gcc.target/i386/bmi2-shrx64-1a.c: Likewise.
	* gcc.target/i386/i386.exp (check_effective_target_bmi2): New.

[-- Attachment #4: bmi2-8.gcc.patch --]
[-- Type: application/octet-stream, Size: 62215 bytes --]

diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c
index b201835..99643d6 100644
--- a/gcc/common/config/i386/i386-common.c
+++ b/gcc/common/config/i386/i386-common.c
@@ -79,6 +79,7 @@ along with GCC; see the file COPYING3.  If not see
   (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
 
 #define OPTION_MASK_ISA_BMI_SET OPTION_MASK_ISA_BMI
+#define OPTION_MASK_ISA_BMI2_SET OPTION_MASK_ISA_BMI2
 #define OPTION_MASK_ISA_TBM_SET OPTION_MASK_ISA_TBM
 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
@@ -137,6 +138,7 @@ along with GCC; see the file COPYING3.  If not see
 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
 #define OPTION_MASK_ISA_BMI_UNSET OPTION_MASK_ISA_BMI
+#define OPTION_MASK_ISA_BMI2_UNSET OPTION_MASK_ISA_BMI2
 #define OPTION_MASK_ISA_TBM_UNSET OPTION_MASK_ISA_TBM
 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
@@ -395,6 +397,19 @@ ix86_handle_option (struct gcc_options *opts,
 	}
       return true;
 
+    case OPT_mbmi2:
+      if (value)
+	{
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2_SET;
+	  opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI2_SET;
+	}
+      else
+	{
+	  opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_BMI2_UNSET;
+	  opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI2_UNSET;
+	}
+      return true;
+
     case OPT_mtbm:
       if (value)
 	{
diff --git a/gcc/config.gcc b/gcc/config.gcc
index b8addaf..67aae86 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -352,7 +352,8 @@ i[34567]86-*-*)
 		       nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
 		       immintrin.h x86intrin.h avxintrin.h xopintrin.h
 		       ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h
-		       lzcntintrin.h bmiintrin.h tbmintrin.h avx2intrin.h"
+		       lzcntintrin.h bmiintrin.h bmi2intrin.h tbmintrin.h
+		       avx2intrin.h"
 	;;
 x86_64-*-*)
 	cpu_type=i386
@@ -364,7 +365,8 @@ x86_64-*-*)
 		       nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
 		       immintrin.h x86intrin.h avxintrin.h xopintrin.h
 		       ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h
-		       lzcntintrin.h bmiintrin.h tbmintrin.h avx2intrin.h"
+		       lzcntintrin.h bmiintrin.h tbmintrin.h bmi2intrin.h
+		       avx2intrin.h"
 	need_64bit_hwint=yes
 	;;
 ia64-*-*)
diff --git a/gcc/config/i386/bmi2intrin.h b/gcc/config/i386/bmi2intrin.h
new file mode 100644
index 0000000..f3ffa52
--- /dev/null
+++ b/gcc/config/i386/bmi2intrin.h
@@ -0,0 +1,81 @@
+/* Copyright (C) 2010, 2011 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _X86INTRIN_H_INCLUDED
+# error "Never use <bmi2intrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef __BMI2__
+# error "BMI2 instruction set not enabled"
+#endif /* __BMI2__ */
+
+#ifndef _BMI2INTRIN_H_INCLUDED
+#define _BMI2INTRIN_H_INCLUDED
+
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_bzhi_u32 (unsigned int __X, unsigned int __Y)
+{
+  return __builtin_ia32_bzhi_si (__X, __Y);
+}
+
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_pdep_u32 (unsigned int __X, unsigned int __Y)
+{
+  return __builtin_ia32_pdep_si (__X, __Y);
+}
+
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_pext_u32 (unsigned int __X, unsigned int __Y)
+{
+  return __builtin_ia32_pext_si (__X, __Y);
+}
+
+#ifdef  __x86_64__
+
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_bzhi_u64 (unsigned long long __X, unsigned long long __Y)
+{
+  return __builtin_ia32_bzhi_di (__X, __Y);
+}
+
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_pdep_u64 (unsigned long long __X, unsigned long long __Y)
+{
+  return __builtin_ia32_pdep_di (__X, __Y);
+}
+
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_pext_u64 (unsigned long long __X, unsigned long long __Y)
+{
+  return __builtin_ia32_pext_di (__X, __Y);
+}
+
+#endif /* __x86_64__  */
+
+#endif /* _BMI2INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index d53743f..5da8fd2 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -67,6 +67,7 @@
 #define bit_FSGSBASE	(1 << 0)
 #define bit_BMI		(1 << 3)
 #define bit_AVX2	(1 << 5)
+#define bit_BMI2	(1 << 8)
 
 #if defined(__i386__) && defined(__PIC__)
 /* %ebx may be the PIC register.  */
diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
index b7a1f52..8107ece 100644
--- a/gcc/config/i386/driver-i386.c
+++ b/gcc/config/i386/driver-i386.c
@@ -396,7 +396,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
   unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0, has_avx2 = 0;
   unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0;
   unsigned int has_fma = 0, has_fma4 = 0, has_xop = 0;
-  unsigned int has_bmi = 0, has_tbm = 0, has_lzcnt = 0;
+  unsigned int has_bmi = 0, has_bmi2 = 0, has_tbm = 0, has_lzcnt = 0;
 
   bool arch;
 
@@ -475,6 +475,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
 
       has_bmi = ebx & bit_BMI;
       has_avx2 = ebx & bit_AVX2;
+      has_bmi2 = ebx & bit_BMI2;
     }
 
   if (!arch)
@@ -715,6 +716,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
       const char *fma4 = has_fma4 ? " -mfma4" : " -mno-fma4";
       const char *xop = has_xop ? " -mxop" : " -mno-xop";
       const char *bmi = has_bmi ? " -mbmi" : " -mno-bmi";
+      const char *bmi2 = has_bmi2 ? " -mbmi2" : " -mno-bmi2";
       const char *tbm = has_tbm ? " -mtbm" : " -mno-tbm";
       const char *avx = has_avx ? " -mavx" : " -mno-avx";
       const char *avx2 = has_avx2 ? " -mavx2" : " -mno-avx2";
@@ -723,8 +725,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
       const char *lzcnt = has_lzcnt ? " -mlzcnt" : " -mno-lzcnt";
 
       options = concat (options, cx16, sahf, movbe, ase, pclmul,
-			popcnt, abm, lwp, fma, fma4, xop, bmi, tbm,
-			avx2, avx, sse4_2, sse4_1, lzcnt, NULL);
+			popcnt, abm, lwp, fma, fma4, xop, bmi, bmi2,
+			tbm, avx, avx2, sse4_2, sse4_1, lzcnt, NULL);
     }
 
 done:
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 5c1dfe6..d4b0b08 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -273,6 +273,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
     def_or_undef (parse_in, "__ABM__");
   if (isa_flag & OPTION_MASK_ISA_BMI)
     def_or_undef (parse_in, "__BMI__");
+  if (isa_flag & OPTION_MASK_ISA_BMI2)
+    def_or_undef (parse_in, "__BMI2__");
   if (isa_flag & OPTION_MASK_ISA_LZCNT)
     def_or_undef (parse_in, "__LZCNT__");
   if (isa_flag & OPTION_MASK_ISA_TBM)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index ef02673..61f2c5c 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2664,6 +2664,7 @@ ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
     { "-mmmx",		OPTION_MASK_ISA_MMX },
     { "-mabm",		OPTION_MASK_ISA_ABM },
     { "-mbmi",		OPTION_MASK_ISA_BMI },
+    { "-mbmi2", 	OPTION_MASK_ISA_BMI2 },
     { "-mlzcnt",	OPTION_MASK_ISA_LZCNT },
     { "-mtbm",		OPTION_MASK_ISA_TBM },
     { "-mpopcnt",	OPTION_MASK_ISA_POPCNT },
@@ -2921,6 +2922,7 @@ ix86_option_override_internal (bool main_args_p)
 #define PTA_TBM		 	(HOST_WIDE_INT_1 << 28)
 #define PTA_XOP		 	(HOST_WIDE_INT_1 << 29)
 #define PTA_AVX2		(HOST_WIDE_INT_1 << 30)
+#define PTA_BMI2	 	(HOST_WIDE_INT_1 << 31)
 /* if this reaches 64, need to widen struct pta flags below */
 
   static struct pta
@@ -2978,8 +2980,8 @@ ix86_option_override_internal (bool main_args_p)
 	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
 	| PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX | PTA_AVX2
 	| PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
-	| PTA_RDRND | PTA_F16C | PTA_BMI | PTA_LZCNT | PTA_FMA
-	| PTA_MOVBE},
+	| PTA_RDRND | PTA_F16C | PTA_BMI | PTA_BMI2 | PTA_LZCNT
+        | PTA_FMA | PTA_MOVBE},
       {"atom", PROCESSOR_ATOM, CPU_ATOM,
 	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
 	| PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
@@ -3300,6 +3302,9 @@ ix86_option_override_internal (bool main_args_p)
 	if (processor_alias_table[i].flags & PTA_TBM
 	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
 	  ix86_isa_flags |= OPTION_MASK_ISA_TBM;
+	if (processor_alias_table[i].flags & PTA_BMI2
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
+	  ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
 	if (processor_alias_table[i].flags & PTA_CX16
 	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
 	  ix86_isa_flags |= OPTION_MASK_ISA_CX16;
@@ -4053,6 +4058,7 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
     IX86_ATTR_ISA ("3dnow",	OPT_m3dnow),
     IX86_ATTR_ISA ("abm",	OPT_mabm),
     IX86_ATTR_ISA ("bmi",	OPT_mbmi),
+    IX86_ATTR_ISA ("bmi2",	OPT_mbmi2),
     IX86_ATTR_ISA ("lzcnt",	OPT_mlzcnt),
     IX86_ATTR_ISA ("tbm",	OPT_mtbm),
     IX86_ATTR_ISA ("aes",	OPT_maes),
@@ -24242,6 +24248,13 @@ enum ix86_builtins
   IX86_BUILTIN_BEXTRI32,
   IX86_BUILTIN_BEXTRI64,
 
+  /* BMI2 instructions. */
+  IX86_BUILTIN_BZHI32,
+  IX86_BUILTIN_BZHI64,
+  IX86_BUILTIN_PDEP32,
+  IX86_BUILTIN_PDEP64,
+  IX86_BUILTIN_PEXT32,
+  IX86_BUILTIN_PEXT64,
 
   /* FSGSBASE instructions.  */
   IX86_BUILTIN_RDFSBASE32,
@@ -25375,6 +25388,14 @@ static const struct builtin_description bdesc_args[] =
   { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
   { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
   { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
+
+  /* BMI2 */
+  { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
+  { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
+  { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
+  { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
+  { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
+  { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
 };
 
 /* FMA4 and XOP.  */
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index f8a35ba..47442a0 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -62,6 +62,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define TARGET_ROUND	OPTION_ISA_ROUND
 #define TARGET_ABM	OPTION_ISA_ABM
 #define TARGET_BMI	OPTION_ISA_BMI
+#define TARGET_BMI2	OPTION_ISA_BMI2
 #define TARGET_LZCNT	OPTION_ISA_LZCNT
 #define TARGET_TBM	OPTION_ISA_TBM
 #define TARGET_POPCNT	OPTION_ISA_POPCNT
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index d343fc2..28872ef 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -244,6 +244,10 @@
 
   ;; For RDRAND support
   UNSPEC_RDRAND
+
+  ;; For BMI2 support
+  UNSPEC_PDEP
+  UNSPEC_PEXT
 ])
 
 (define_c_enum "unspecv" [
@@ -385,7 +389,7 @@
 (define_attr "type"
   "other,multi,
    alu,alu1,negnot,imov,imovx,lea,
-   incdec,ishift,ishift1,rotate,rotate1,imul,idiv,
+   incdec,ishift,ishiftx,ishift1,rotate,rotatex,rotate1,imul,imulx,idiv,
    icmp,test,ibr,setcc,icmov,
    push,pop,call,callv,leave,
    str,bitmanip,
@@ -418,12 +422,12 @@
 ;; The (bounding maximum) length of an instruction immediate.
 (define_attr "length_immediate" ""
   (cond [(eq_attr "type" "incdec,setcc,icmov,str,lea,other,multi,idiv,leave,
-                          bitmanip")
+                          bitmanip,imulx")
 	   (const_int 0)
 	 (eq_attr "unit" "i387,sse,mmx")
 	   (const_int 0)
-	 (eq_attr "type" "alu,alu1,negnot,imovx,ishift,rotate,ishift1,rotate1,
-			  imul,icmp,push,pop")
+	 (eq_attr "type" "alu,alu1,negnot,imovx,ishift,ishiftx,ishift1,
+			  rotate,rotatex,rotate1,imul,icmp,push,pop")
 	   (symbol_ref "ix86_attr_length_immediate_default (insn, true)")
 	 (eq_attr "type" "imov,test")
 	   (symbol_ref "ix86_attr_length_immediate_default (insn, false)")
@@ -683,7 +687,7 @@
 	      (and (match_operand 0 "memory_displacement_operand" "")
 		   (match_operand 1 "immediate_operand" "")))
 	   (const_string "true")
-	 (and (eq_attr "type" "alu,ishift,rotate,imul,idiv")
+	 (and (eq_attr "type" "alu,ishift,ishiftx,rotate,rotatex,imul,idiv")
 	      (and (match_operand 0 "memory_displacement_operand" "")
 		   (match_operand 2 "immediate_operand" "")))
 	   (const_string "true")
@@ -707,12 +711,13 @@
 (define_attr "movu" "0,1" (const_string "0"))
 
 ;; Used to control the "enabled" attribute on a per-instruction basis.
-(define_attr "isa" "base,noavx,avx"
+(define_attr "isa" "base,noavx,avx,bmi2"
   (const_string "base"))
 
 (define_attr "enabled" ""
   (cond [(eq_attr "isa" "noavx") (symbol_ref "!TARGET_AVX")
 	 (eq_attr "isa" "avx") (symbol_ref "TARGET_AVX")
+	 (eq_attr "isa" "bmi2") (symbol_ref "TARGET_BMI2")
 	]
 	(const_int 1)))
 
@@ -6853,16 +6858,103 @@
 	      (clobber (reg:CC FLAGS_REG))])]
   "TARGET_QIMODE_MATH")
 
-(define_insn "*<u>mul<mode><dwi>3_1"
+(define_insn "*bmi2_umulditi3_1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI
+	  (match_operand:DI 2 "nonimmediate_operand" "%d")
+	  (match_operand:DI 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:DI 1 "register_operand" "=r")
+	(truncate:DI
+	  (lshiftrt:TI
+	    (mult:TI (zero_extend:TI (match_dup 2))
+		     (zero_extend:TI (match_dup 3)))
+	    (const_int 64))))]
+  "TARGET_64BIT && TARGET_BMI2
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "mulx\t{%3, %0, %1|%1, %0, %3}"
+  [(set_attr "type" "imulx")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "DI")])
+
+(define_insn "*bmi2_umulsidi3_1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI
+	  (match_operand:SI 2 "nonimmediate_operand" "%d")
+	  (match_operand:SI 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:SI 1 "register_operand" "=r")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (mult:DI (zero_extend:DI (match_dup 2))
+		     (zero_extend:DI (match_dup 3)))
+	    (const_int 32))))]
+  "!TARGET_64BIT && TARGET_BMI2
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "mulx\t{%3, %0, %1|%1, %0, %3}"
+  [(set_attr "type" "imulx")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "SI")])
+
+(define_insn "*umul<mode><dwi>3_1"
+  [(set (match_operand:<DWI> 0 "register_operand" "=A,r")
+	(mult:<DWI>
+	  (zero_extend:<DWI>
+	    (match_operand:DWIH 1 "nonimmediate_operand" "%0,d"))
+	  (zero_extend:<DWI>
+	    (match_operand:DWIH 2 "nonimmediate_operand" "rm,rm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   mul{<imodesuffix>}\t%2
+   #"
+  [(set_attr "isa" "base,bmi2")
+   (set_attr "type" "imul,imulx")
+   (set_attr "length_immediate" "0,*")
+   (set (attr "athlon_decode")
+	(cond [(eq_attr "alternative" "0")
+		 (if_then_else (eq_attr "cpu" "athlon")
+		   (const_string "vector")
+		   (const_string "double"))]
+	      (const_string "*")))
+   (set_attr "amdfam10_decode" "double,*")
+   (set_attr "bdver1_decode" "direct,*")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "<MODE>")])
+
+;; Convert mul to the mulx pattern to avoid flags dependency.
+(define_split
+ [(set (match_operand:<DWI> 0 "register_operand" "")
+       (mult:<DWI>
+	 (zero_extend:<DWI>
+	   (match_operand:DWIH 1 "register_operand" ""))
+	 (zero_extend:<DWI>
+	   (match_operand:DWIH 2 "nonimmediate_operand" ""))))
+  (clobber (reg:CC FLAGS_REG))]
+ "TARGET_BMI2 && reload_completed
+  && true_regnum (operands[1]) == DX_REG"
+  [(parallel [(set (match_dup 3)
+		   (mult:DWIH (match_dup 1) (match_dup 2)))
+	      (set (match_dup 4)
+		   (truncate:DWIH
+		     (lshiftrt:<DWI>
+		       (mult:<DWI> (zero_extend:<DWI> (match_dup 1))
+				   (zero_extend:<DWI> (match_dup 2)))
+		       (match_dup 5))))])]
+{
+  operands[3] = gen_lowpart (<MODE>mode, operands[0]);
+  operands[4] = gen_highpart (<MODE>mode, operands[0]);
+  operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
+})
+
+(define_insn "*mul<mode><dwi>3_1"
   [(set (match_operand:<DWI> 0 "register_operand" "=A")
 	(mult:<DWI>
-	  (any_extend:<DWI>
+	  (sign_extend:<DWI>
 	    (match_operand:DWIH 1 "nonimmediate_operand" "%0"))
-	  (any_extend:<DWI>
+	  (sign_extend:<DWI>
 	    (match_operand:DWIH 2 "nonimmediate_operand" "rm"))))
    (clobber (reg:CC FLAGS_REG))]
   "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
-  "<sgnprefix>mul{<imodesuffix>}\t%2"
+  "imul{<imodesuffix>}\t%2"
   [(set_attr "type" "imul")
    (set_attr "length_immediate" "0")
    (set (attr "athlon_decode")
@@ -9060,16 +9152,26 @@
   [(set_attr "type" "ishift")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*bmi2_ashl<mode>3_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+		      (match_operand:SWI48 2 "register_operand" "r")))]
+  "TARGET_BMI2"
+  "shlx\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishiftx")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*ashl<mode>3_1"
-  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
-	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l")
-		      (match_operand:QI 2 "nonmemory_operand" "c<S>,M")))
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
+	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm")
+		      (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
+    case TYPE_ISHIFTX:
       return "#";
 
     case TYPE_ALU:
@@ -9085,9 +9187,12 @@
 	return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
-  [(set (attr "type")
+  [(set_attr "isa" "base,base,bmi2")
+   (set (attr "type")
      (cond [(eq_attr "alternative" "1")
 	      (const_string "lea")
+	    (eq_attr "alternative" "2")
+	      (const_string "ishiftx")
             (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
 		          (const_int 0))
 		      (match_operand 0 "register_operand" ""))
@@ -9106,17 +9211,39 @@
        (const_string "*")))
    (set_attr "mode" "<MODE>")])
 
+;; Convert shift to the shiftx pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand" "")
+	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "")
+		      (match_operand:QI 2 "register_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(ashift:SWI48 (match_dup 1) (match_dup 2)))]
+  "operands[2] = gen_lowpart (<MODE>mode, operands[2]);")
+
+(define_insn "*bmi2_ashlsi3_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+		     (match_operand:SI 2 "register_operand" "r"))))]
+  "TARGET_64BIT && TARGET_BMI2"
+  "shlx\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "type" "ishiftx")
+   (set_attr "mode" "SI")])
+
 (define_insn "*ashlsi3_1_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
 	(zero_extend:DI
-	  (ashift:SI (match_operand:SI 1 "register_operand" "0,l")
-		     (match_operand:QI 2 "nonmemory_operand" "cI,M"))))
+	  (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l,rm")
+		     (match_operand:QI 2 "nonmemory_operand" "cI,M,r"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
+    case TYPE_ISHIFTX:
       return "#";
 
     case TYPE_ALU:
@@ -9131,9 +9258,12 @@
 	return "sal{l}\t{%2, %k0|%k0, %2}";
     }
 }
-  [(set (attr "type")
+  [(set_attr "isa" "base,base,bmi2")
+   (set (attr "type")
      (cond [(eq_attr "alternative" "1")
 	      (const_string "lea")
+	    (eq_attr "alternative" "2")
+	      (const_string "ishiftx")
             (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
 		     (const_int 0))
 		 (match_operand 2 "const1_operand" ""))
@@ -9151,6 +9281,18 @@
        (const_string "*")))
    (set_attr "mode" "SI")])
 
+;; Convert shift to the shiftx pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI
+	  (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "")
+		     (match_operand:QI 2 "register_operand" ""))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
+  "operands[2] = gen_lowpart (SImode, operands[2]);")
+
 (define_insn "*ashlhi3_1"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
 	(ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0")
@@ -9767,20 +9909,38 @@
   DONE;
 })
 
+(define_insn "*bmi2_<shiftrt_insn><mode>3_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+			   (match_operand:SWI48 2 "register_operand" "r")))]
+  "TARGET_BMI2"
+  "<shiftrt>x\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishiftx")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*<shiftrt_insn><mode>3_1"
-  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
-	(any_shiftrt:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")
-			 (match_operand:QI 2 "nonmemory_operand" "c<S>")))
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
+	(any_shiftrt:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>,r")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
 {
-  if (operands[2] == const1_rtx
-      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-    return "<shiftrt>{<imodesuffix>}\t%0";
-  else
-    return "<shiftrt>{<imodesuffix>}\t{%2, %0|%0, %2}";
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ISHIFTX:
+      return "#";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "<shiftrt>{<imodesuffix>}\t%0";
+      else
+	return "<shiftrt>{<imodesuffix>}\t{%2, %0|%0, %2}";
+    }
 }
-  [(set_attr "type" "ishift")
+  [(set_attr "isa" "base,bmi2")
+   (set_attr "type" "ishift,ishiftx")
    (set (attr "length_immediate")
      (if_then_else
        (and (match_operand 2 "const1_operand" "")
@@ -9790,19 +9950,84 @@
        (const_string "*")))
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*<shiftrt_insn>si3_1_zext"
+;; Convert shift to the shiftx pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand" "")
+	(any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "")
+			   (match_operand:QI 2 "register_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(any_shiftrt:SWI48 (match_dup 1) (match_dup 2)))]
+  "operands[2] = gen_lowpart (<MODE>mode, operands[2]);")
+
+(define_insn "*bmi2_<shiftrt_insn>si3_1_zext"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extend:DI
-	  (any_shiftrt:SI (match_operand:SI 1 "register_operand" "0")
-			  (match_operand:QI 2 "nonmemory_operand" "cI"))))
+	  (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+			  (match_operand:SI 2 "register_operand" "r"))))]
+  "TARGET_64BIT && TARGET_BMI2"
+  "<shiftrt>x\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "type" "ishiftx")
+   (set_attr "mode" "SI")])
+
+(define_insn "*<shiftrt_insn>si3_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI
+	  (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
+			  (match_operand:QI 2 "nonmemory_operand" "cI,r"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
 {
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ISHIFTX:
+      return "#";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "<shiftrt>{l}\t%k0";
+      else
+	return "<shiftrt>{l}\t{%2, %k0|%k0, %2}";
+    }
+}
+  [(set_attr "isa" "base,bmi2")
+   (set_attr "type" "ishift,ishiftx")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 2 "const1_operand" "")
+	    (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+		(const_int 0)))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "SI")])
+
+;; Convert shift to the shiftx pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI
+	  (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "")
+			  (match_operand:QI 2 "register_operand" ""))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))]
+  "operands[2] = gen_lowpart (SImode, operands[2]);")
+
+(define_insn "*<shiftrt_insn><mode>3_1"
+  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m")
+	(any_shiftrt:SWI12
+	  (match_operand:SWI12 1 "nonimmediate_operand" "0")
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+{
   if (operands[2] == const1_rtx
       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-    return "<shiftrt>{l}\t%k0";
+    return "<shiftrt>{<imodesuffix>}\t%0";
   else
-    return "<shiftrt>{l}\t{%2, %k0|%k0, %2}";
+    return "<shiftrt>{<imodesuffix>}\t{%2, %0|%0, %2}";
 }
   [(set_attr "type" "ishift")
    (set (attr "length_immediate")
@@ -9812,7 +10037,7 @@
 		(const_int 0)))
        (const_string "0")
        (const_string "*")))
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "*<shiftrt_insn>qi3_1_slp"
   [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
@@ -10064,42 +10289,151 @@
   split_double_mode (<DWI>mode, &operands[0], 1, &operands[4], &operands[5]);
 })
 
+(define_insn "*bmi2_rorx<mode>3_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(rotatert:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+			(match_operand:QI 2 "immediate_operand" "<S>")))]
+  "TARGET_BMI2"
+  "rorx\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "rotatex")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*<rotate_insn><mode>3_1"
-  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
-	(any_rotate:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")
-			(match_operand:QI 2 "nonmemory_operand" "c<S>")))
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
+	(any_rotate:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>,<S>")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
 {
-  if (operands[2] == const1_rtx
-      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-    return "<rotate>{<imodesuffix>}\t%0";
-  else
-    return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ROTATEX:
+      return "#";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "<rotate>{<imodesuffix>}\t%0";
+      else
+	return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
+    }
 }
-  [(set_attr "type" "rotate")
+  [(set_attr "isa" "base,bmi2")
+   (set_attr "type" "rotate,rotatex")
    (set (attr "length_immediate")
      (if_then_else
-       (and (match_operand 2 "const1_operand" "")
-	    (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
-		(const_int 0)))
+       (and (eq_attr "type" "rotate")
+	    (and (match_operand 2 "const1_operand" "")
+		 (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+		     (const_int 0))))
        (const_string "0")
        (const_string "*")))
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*<rotate_insn>si3_1_zext"
+;; Convert rotate to the rotatex pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand" "")
+	(rotate:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "")
+		      (match_operand:QI 2 "immediate_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(rotatert:SWI48 (match_dup 1) (match_dup 2)))]
+{
+  operands[2]
+    = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - INTVAL (operands[2]));
+})
+
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand" "")
+	(rotatert:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "")
+			(match_operand:QI 2 "immediate_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(rotatert:SWI48 (match_dup 1) (match_dup 2)))])
+
+(define_insn "*bmi2_rorxsi3_1_zext"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extend:DI
-	  (any_rotate:SI (match_operand:SI 1 "register_operand" "0")
-			 (match_operand:QI 2 "nonmemory_operand" "cI"))))
+	  (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+		       (match_operand:QI 2 "immediate_operand" "I"))))]
+  "TARGET_64BIT && TARGET_BMI2"
+  "rorx\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "type" "rotatex")
+   (set_attr "mode" "SI")])
+
+(define_insn "*<rotate_insn>si3_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI
+	  (any_rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
+			 (match_operand:QI 2 "nonmemory_operand" "cI,I"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
 {
-    if (operands[2] == const1_rtx
-	&& (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-    return "<rotate>{l}\t%k0";
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ROTATEX:
+      return "#";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "<rotate>{l}\t%k0";
+      else
+	return "<rotate>{l}\t{%2, %k0|%k0, %2}";
+    }
+}
+  [(set_attr "isa" "base,bmi2")
+   (set_attr "type" "rotate,rotatex")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (eq_attr "type" "rotate")
+	    (and (match_operand 2 "const1_operand" "")
+		 (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+		     (const_int 0))))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "SI")])
+
+;; Convert rotate to the rotatex pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI
+	  (rotate:SI (match_operand:SI 1 "nonimmediate_operand" "")
+		     (match_operand:QI 2 "immediate_operand" ""))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))]
+{
+  operands[2]
+    = GEN_INT (GET_MODE_BITSIZE (SImode) - INTVAL (operands[2]));
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI
+	  (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "")
+		       (match_operand:QI 2 "immediate_operand" ""))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))])
+
+(define_insn "*<rotate_insn><mode>3_1"
+  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m")
+	(any_rotate:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0")
+			  (match_operand:QI 2 "nonmemory_operand" "c<S>")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+{
+  if (operands[2] == const1_rtx
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+    return "<rotate>{<imodesuffix>}\t%0";
   else
-    return "<rotate>{l}\t{%2, %k0|%k0, %2}";
+    return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
 }
   [(set_attr "type" "rotate")
    (set (attr "length_immediate")
@@ -10109,7 +10443,7 @@
 		(const_int 0)))
        (const_string "0")
        (const_string "*")))
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "*<rotate_insn>qi3_1_slp"
   [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
@@ -11951,6 +12285,41 @@
   [(set_attr "type" "bitmanip")
    (set_attr "mode" "<MODE>")])
 
+;; BMI2 instructions.
+(define_insn "bmi2_bzhi_<mode>3"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(and:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+		   (lshiftrt:SWI48 (const_int -1)
+				   (match_operand:SWI48 2 "register_operand" "r"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI2"
+  "bzhi\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "bmi2_pdep_<mode>3"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")
+                       (match_operand:SWI48 2 "register_operand" "r")]
+                       UNSPEC_PDEP))]
+  "TARGET_BMI2"
+  "pdep\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "bmi2_pext_<mode>3"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")
+                       (match_operand:SWI48 2 "register_operand" "r")]
+                       UNSPEC_PEXT))]
+  "TARGET_BMI2"
+  "pext\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
 ;; TBM instructions.
 (define_insn "tbm_bextri_<mode>"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
@@ -12350,6 +12719,7 @@
   "xor{b}\t{%h0, %b0|%b0, %h0}"
   [(set_attr "length" "2")
    (set_attr "mode" "HI")])
+
 \f
 ;; Thread-local storage patterns for ELF.
 ;;
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 54d7af1..8e4d51b 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -493,6 +493,10 @@ mbmi
 Target Report Mask(ISA_BMI) Var(ix86_isa_flags) Save
 Support BMI built-in functions and code generation
 
+mbmi2
+Target Report Mask(ISA_BMI2) Var(ix86_isa_flags) Save
+Support BMI2 built-in functions and code generation
+
 mlzcnt
 Target Report Mask(ISA_LZCNT) Var(ix86_isa_flags) Save
 Support LZCNT built-in function and code generation
diff --git a/gcc/config/i386/x86intrin.h b/gcc/config/i386/x86intrin.h
index 88456f9..e01ecd2 100644
--- a/gcc/config/i386/x86intrin.h
+++ b/gcc/config/i386/x86intrin.h
@@ -81,6 +81,10 @@
 #include <bmiintrin.h>
 #endif
 
+#ifdef __BMI2__
+#include <bmi2intrin.h>
+#endif
+
 #ifdef __TBM__
 #include <tbmintrin.h>
 #endif
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 29c02b8..0b36336 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -9883,6 +9883,17 @@ unsigned int __builtin_ia32_bextr_u32(unsigned int, unsigned int);
 unsigned long long __builtin_ia32_bextr_u64 (unsigned long long, unsigned long long);
 @end smallexample
 
+The following built-in functions are available when @option{-mbmi2} is used.
+All of them generate the machine instruction that is part of the name.
+@smallexample
+unsigned int _bzhi_u32 (unsigned int, unsigned int)
+unsigned int _pdep_u32 (unsigned int, unsigned int)
+unsigned int _pext_u32 (unsigned int, unsigned int)
+unsigned long long _bzhi_u64 (unsigned long long, unsigned long long)
+unsigned long long _pdep_u64 (unsigned long long, unsigned long long)
+unsigned long long _pext_u64 (unsigned long long, unsigned long long)
+@end smallexample
+
 The following built-in functions are available when @option{-mlzcnt} is used.
 All of them generate the machine instruction that is part of the name.
 @smallexample
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index fdc3297..acf30e3 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -607,7 +607,7 @@ Objective-C and Objective-C++ Dialects}.
 -mmmx  -msse  -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx @gol
 -mavx2 -maes -mpclmul -mfsgsbase -mrdrnd -mf16c -mfma @gol
 -msse4a -m3dnow -mpopcnt -mabm -mbmi -mtbm -mfma4 -mxop -mlzcnt @gol
--mlwp -mthreads  -mno-align-stringops  -minline-all-stringops @gol
+-mbmi2 -mlwp -mthreads  -mno-align-stringops  -minline-all-stringops @gol
 -minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol
 -mpush-args  -maccumulate-outgoing-args  -m128bit-long-double @gol
 -m96bit-long-double  -mregparm=@var{num}  -msseregparm @gol
@@ -12697,7 +12697,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
 @itemx -mabm
 @itemx -mno-abm
 @itemx -mbmi
+@itemx -mbmi2
 @itemx -mno-bmi
+@itemx -mno-bmi2
 @itemx -mlzcnt
 @itemx -mno-lzcnt
 @itemx -mtbm
@@ -12709,8 +12711,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
 @opindex m3dnow
 @opindex mno-3dnow
 These switches enable or disable the use of instructions in the MMX, SSE,
-SSE2, SSE3, SSSE3, SSE4.1, AVX, AVX2, AES, PCLMUL, FSGSBASE, RDRND, F16C, FMA,
-SSE4A, FMA4, XOP, LWP, ABM, BMI, LZCNT or 3DNow!@: extended instruction sets.
+SSE2, SSE3, SSSE3, SSE4.1, AVX, AVX2, AES, PCLMUL, FSGSBASE, RDRND, F16C,
+FMA, SSE4A, FMA4, XOP, LWP, ABM, BMI, BMI2, LZCNT or 3DNow!
+@: extended instruction sets.
 These extensions are also available as built-in functions: see
 @ref{X86 Built-in Functions}, for details of the functions enabled and
 disabled by these switches.
diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C
index ed183c7..5f2eaf9 100644
--- a/gcc/testsuite/g++.dg/other/i386-2.C
+++ b/gcc/testsuite/g++.dg/other/i386-2.C
@@ -1,5 +1,5 @@
 /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
+/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
 
 /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
    xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C
index 626f972..76d4d19 100644
--- a/gcc/testsuite/g++.dg/other/i386-3.C
+++ b/gcc/testsuite/g++.dg/other/i386-3.C
@@ -1,5 +1,5 @@
 /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
+/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
 
 /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
    xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-bzhi32-1.c b/gcc/testsuite/gcc.target/i386/bmi2-bzhi32-1.c
new file mode 100644
index 0000000..68df8b7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-bzhi32-1.c
@@ -0,0 +1,35 @@
+/* { dg-do run { target { bmi2 } } } */
+/* { dg-options "-mbmi2 -O2" } */
+
+#include <x86intrin.h>
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned
+calc_bzhi_u32 (unsigned a, int l)
+{
+  unsigned res = a;
+  int i;
+  for (i = 0; i < 32 - l; ++i)
+    res &= ~(1 << (31 - i));
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  unsigned src = 0xce7ace0f;
+  unsigned res, res_ref;
+
+  for (i = 0; i < 5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_bzhi_u32 (src, i * 2);
+    res = _bzhi_u32 (src, i * 2);
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-bzhi32-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-bzhi32-1a.c
new file mode 100644
index 0000000..05be7a8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-bzhi32-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-bzhi32-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_bzhi_si3" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-bzhi64-1.c b/gcc/testsuite/gcc.target/i386/bmi2-bzhi64-1.c
new file mode 100644
index 0000000..1ffe135
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-bzhi64-1.c
@@ -0,0 +1,35 @@
+/* { dg-do run { target { bmi2 && { ! ia32 } } } } */
+/* { dg-options "-mbmi2 -O2" } */
+
+#include <x86intrin.h>
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned long long
+calc_bzhi_u64 (unsigned long long a, int l)
+{
+  unsigned long long res = a;
+  int i;
+  for (i = 0; i < 64 - l; ++i)
+    res &= ~(1LL << (63 - i));
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  unsigned long long src = 0xce7ace0ce7ace0ff;
+  unsigned long long res, res_ref;
+
+  for (i = 0; i < 5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_bzhi_u64 (src, i * 2);
+    res = _bzhi_u64 (src, i * 2);
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-bzhi64-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-bzhi64-1a.c
new file mode 100644
index 0000000..dc4a94c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-bzhi64-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-bzhi64-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_bzhi_di3" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-check.h b/gcc/testsuite/gcc.target/i386/bmi2-check.h
new file mode 100644
index 0000000..5ffce44
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-check.h
@@ -0,0 +1,36 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "cpuid.h"
+
+static void bmi2_test (void);
+
+static void
+__attribute__ ((noinline))
+do_test (void)
+{
+  bmi2_test ();
+}
+
+int
+main ()
+{
+  unsigned int eax, ebx, ecx, edx;
+
+  __cpuid_count (7, 0,  eax, ebx, ecx, edx);
+
+  /* Run BMI2 test only if host has BMI2 support.  */
+  if (ebx & bit_BMI2)
+    {
+      do_test ();
+#ifdef DEBUG
+      printf ("PASSED\n");
+#endif
+    }
+#ifdef DEBUG
+  else
+    printf ("SKIPPED\n");
+#endif
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-mulx32-1.c b/gcc/testsuite/gcc.target/i386/bmi2-mulx32-1.c
new file mode 100644
index 0000000..a90ff1a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-mulx32-1.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { bmi2 && { ia32 } } } } */
+/* { dg-options "-mbmi2 -O2" } */
+
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned long long
+calc_mul_u32 (unsigned volatile a, unsigned b)
+{
+  unsigned long long res = 0;
+  int i;
+  for (i = 0; i < b; ++i)
+    res += a;
+
+  return res;
+}
+
+__attribute__((noinline))
+unsigned long long
+gen_mulx (unsigned a, unsigned b)
+{
+  unsigned long long res;
+
+  res = (unsigned long long)a * b;
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  unsigned a = 0xce7ace0;
+  unsigned b = 0xfacefff;
+  unsigned long long res, res_ref;
+
+  for (i = 0; i < 5; ++i) {
+    a = a * (i + 1);
+    b = b / (i + 1);
+
+    res_ref = calc_mul_u32 (a, b);
+    res = gen_mulx (a, b);
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-mulx32-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-mulx32-1a.c
new file mode 100644
index 0000000..cf3bb08
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-mulx32-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ia32 } } } */
+/* { dg-options "-O2 -mbmi2 -dp" } */
+
+#include "bmi2-mulx32-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_umulsidi3_1" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-mulx64-1.c b/gcc/testsuite/gcc.target/i386/bmi2-mulx64-1.c
new file mode 100644
index 0000000..6844946
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-mulx64-1.c
@@ -0,0 +1,36 @@
+/* { dg-do run { target { bmi2 && { ! ia32 } } } } */
+/* { dg-options "-mbmi2 -O2" } */
+
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned __int128
+calc_mul_u64 (unsigned long long volatile a, unsigned long long b)
+{
+  unsigned __int128 res = 0;
+  int i;
+  for (i = 0; i < b; ++i)
+    res += (unsigned __int128) a;
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  unsigned long long a = 0xce7ace0ce7ace0;
+  unsigned long long b = 0xface;
+  unsigned __int128 res, res_ref;
+
+  for (i=0; i<5; ++i) {
+    a = a * (i + 1);
+    b = b / (i + 1);
+
+    res_ref = calc_mul_u64 (a, b);
+    res = (unsigned __int128) a * b;
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-mulx64-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-mulx64-1a.c
new file mode 100644
index 0000000..592d713
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-mulx64-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mbmi2 -dp" } */
+
+#include "bmi2-mulx64-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_umulditi3_1" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-pdep32-1.c b/gcc/testsuite/gcc.target/i386/bmi2-pdep32-1.c
new file mode 100644
index 0000000..5aecf57
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-pdep32-1.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { bmi2 } } } */
+/* { dg-options "-mbmi2 -O2" } */
+
+#include <x86intrin.h>
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned
+calc_pdep_u32 (unsigned a, int mask)
+{
+  unsigned res = 0;
+  int i, k = 0;
+
+  for (i = 0; i < 32; ++i)
+    if (mask & (1 << i)) {
+      res |= ((a & (1 << k)) >> k) << i;
+      ++k;
+    }
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  unsigned src = 0xce7acc;
+  unsigned res, res_ref;
+
+  for (i = 0; i < 5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_pdep_u32 (src, i * 3);
+    res = _pdep_u32 (src, i * 3);
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-pdep32-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-pdep32-1a.c
new file mode 100644
index 0000000..87888fc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-pdep32-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-pdep32-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_pdep_si3" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-pdep64-1.c b/gcc/testsuite/gcc.target/i386/bmi2-pdep64-1.c
new file mode 100644
index 0000000..f718b2f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-pdep64-1.c
@@ -0,0 +1,38 @@
+/* { dg-do run { target { bmi2 && { ! ia32 } } } } */
+/* { dg-options "-mbmi2 -O2" } */
+
+#include <x86intrin.h>
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned long long
+calc_pdep_u64 (unsigned long long a, unsigned long long mask)
+{
+  unsigned long long res = 0;
+  unsigned long long i, k = 0;
+
+  for (i = 0; i < 64; ++i)
+    if (mask & (1LL << i)) {
+      res |= ((a & (1LL << k)) >> k) << i;
+      ++k;
+    }
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned long long i;
+  unsigned long long src = 0xce7acce7acce7ac;
+  unsigned long long res, res_ref;
+
+  for (i = 0; i < 5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_pdep_u64 (src, ~(i * 3));
+    res = _pdep_u64 (src, ~(i * 3));
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-pdep64-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-pdep64-1a.c
new file mode 100644
index 0000000..8163c40
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-pdep64-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-pdep64-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_pdep_di3" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-pext32-1.c b/gcc/testsuite/gcc.target/i386/bmi2-pext32-1.c
new file mode 100644
index 0000000..7fe7837
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-pext32-1.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { bmi2 } } } */
+/* { dg-options "-mbmi2 -O2" } */
+
+#include <x86intrin.h>
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned
+calc_pext_u32 (unsigned a, unsigned mask)
+{
+  unsigned res = 0;
+  int i, k = 0;
+
+  for (i = 0; i < 32; ++i)
+    if (mask & (1 << i)) {
+      res |= ((a & (1 << i)) >> i) << k;
+      ++k;
+    }
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  unsigned src = 0xce7acc;
+  unsigned res, res_ref;
+
+  for (i = 0; i < 5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_pext_u32 (src, ~(i * 3));
+    res = _pext_u32 (src, ~(i * 3));
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-pext32-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-pext32-1a.c
new file mode 100644
index 0000000..c4a6dee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-pext32-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-pext32-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_pext_si3" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-pext64-1.c b/gcc/testsuite/gcc.target/i386/bmi2-pext64-1.c
new file mode 100644
index 0000000..6850749
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-pext64-1.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { bmi2 && { ! ia32 } } } } */
+/* { dg-options "-mbmi2 -O2" } */
+
+#include <x86intrin.h>
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned long long
+calc_pext_u64 (unsigned long long a, unsigned long long mask)
+{
+  unsigned long long res = 0;
+  int i, k = 0;
+
+  for (i = 0; i < 64; ++i)
+    if (mask & (1LL << i)) {
+      res |= ((a & (1LL << i)) >> i) << k;
+      ++k;
+    }
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned long long i;
+  unsigned long long src = 0xce7acce7acce7ac;
+  unsigned long long res, res_ref;
+
+  for (i = 0; i < 5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_pext_u64 (src, ~(i * 3));
+    res = _pext_u64 (src, ~(i * 3));
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-pext64-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-pext64-1a.c
new file mode 100644
index 0000000..aaf06c1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-pext64-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-pext64-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_pext_di3" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-rorx32-1.c b/gcc/testsuite/gcc.target/i386/bmi2-rorx32-1.c
new file mode 100644
index 0000000..d7f6f3b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-rorx32-1.c
@@ -0,0 +1,36 @@
+/* { dg-do run { target { bmi2 } } } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned
+calc_rorx_u32 (unsigned a, int l)
+{
+  unsigned volatile res = a;
+  int i;
+  for (i = 0; i < l; ++i)
+    res = (res >> 1) | ((res & 1) << 31);
+
+  return res;
+}
+
+#define SHIFT_VAL 0x0e
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  unsigned src = 0xce7ace0;
+  unsigned res, res_ref;
+
+  for (i = 0; i < 5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_rorx_u32 (src, SHIFT_VAL);
+    res = (src >> SHIFT_VAL) | (src << (32 - SHIFT_VAL));
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-rorx32-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-rorx32-1a.c
new file mode 100644
index 0000000..bb3b28d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-rorx32-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbmi2 -dp" } */
+
+#include "bmi2-rorx32-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_rorxsi3_1" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-rorx64-1.c b/gcc/testsuite/gcc.target/i386/bmi2-rorx64-1.c
new file mode 100644
index 0000000..ccd60c2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-rorx64-1.c
@@ -0,0 +1,36 @@
+/* { dg-do run { target { bmi2 && { ! ia32 } } } } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned long long
+calc_rorx_u64 (unsigned long long a, int l)
+{
+  unsigned long long volatile res = a;
+  int i;
+  for (i = 0; i < l; ++i)
+    res = (res >> 1) | ((res&1)<< 63);
+
+  return res;
+}
+
+#define SHIFT_VAL 0x1e
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  unsigned long long src = 0xce7ace0ce7ace0;
+  unsigned long long res, res_ref;
+
+  for (i = 0; i < 5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_rorx_u64 (src, SHIFT_VAL);
+    res = (src >> SHIFT_VAL) | (src << (64 - SHIFT_VAL));
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-rorx64-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-rorx64-1a.c
new file mode 100644
index 0000000..2a7a7a0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-rorx64-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mbmi2 -dp" } */
+
+#include "bmi2-rorx64-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_rorxdi3_1" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-sarx32-1.c b/gcc/testsuite/gcc.target/i386/bmi2-sarx32-1.c
new file mode 100644
index 0000000..8224b6f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-sarx32-1.c
@@ -0,0 +1,34 @@
+/* { dg-do run { target { bmi2 } } } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+int
+calc_sarx_u32 (int a, int l)
+{
+  int volatile res = a;
+  int i;
+  for (i = 0; i < l; ++i)
+    res >>= 1;
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  int src = 0xfce7ace0;
+  int res, res_ref;
+
+  for (i = 0; i < 5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_sarx_u32 (src, i + 1);
+    res = src >> (i + 1);
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-sarx32-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-sarx32-1a.c
new file mode 100644
index 0000000..f10d60b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-sarx32-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbmi2 -dp" } */
+
+#include "bmi2-sarx32-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_ashrsi3" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-sarx64-1.c b/gcc/testsuite/gcc.target/i386/bmi2-sarx64-1.c
new file mode 100644
index 0000000..a43b202
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-sarx64-1.c
@@ -0,0 +1,34 @@
+/* { dg-do run { target { bmi2 && { ! ia32 } } } } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+long long
+calc_sarx_u64 (long long a, int l)
+{
+  long long volatile res = a;
+  int i;
+  for (i = 0; i < l; ++i)
+    res >>= 1;
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  long long src = 0xfce7ace0ce7ace0;
+  long long res, res_ref;
+
+  for (i = 0; i < 5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_sarx_u64 (src, i + 1);
+    res = src >> (i + 1);
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-sarx64-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-sarx64-1a.c
new file mode 100644
index 0000000..bcf0fd4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-sarx64-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mbmi2 -dp" } */
+
+#include "bmi2-sarx64-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_ashrdi3" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-shlx32-1.c b/gcc/testsuite/gcc.target/i386/bmi2-shlx32-1.c
new file mode 100644
index 0000000..0bf9702
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-shlx32-1.c
@@ -0,0 +1,34 @@
+/* { dg-do run { target { bmi2 } } } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+int
+calc_shlx_u32 (int a, int l)
+{
+  int volatile res = a;
+  int i;
+  for (i = 0; i < l; ++i)
+    res <<= 1;
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  int src = 0xfce7ace0;
+  int res, res_ref;
+
+  for (i = 0; i < 5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_shlx_u32 (src, i + 1);
+    res = src << (i + 1);
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-shlx32-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-shlx32-1a.c
new file mode 100644
index 0000000..215e5d3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-shlx32-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbmi2 -dp" } */
+
+#include "bmi2-shlx32-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_ashlsi3" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-shrx32-1.c b/gcc/testsuite/gcc.target/i386/bmi2-shrx32-1.c
new file mode 100644
index 0000000..2d2ec15
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-shrx32-1.c
@@ -0,0 +1,34 @@
+/* { dg-do run { target { bmi2 } } } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned
+calc_shrx_u32 (unsigned a, int l)
+{
+  unsigned volatile res = a;
+  int i;
+  for (i = 0; i < l; ++i)
+    res >>= 1;
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  unsigned src = 0xce7ace0;
+  unsigned res, res_ref;
+
+  for (i = 0; i < 5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_shrx_u32 (src, i + 1);
+    res = src >> (i + 1);
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-shrx32-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-shrx32-1a.c
new file mode 100644
index 0000000..24c53d4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-shrx32-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbmi2 -dp" } */
+
+#include "bmi2-shrx32-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_lshrsi3" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-shrx64-1.c b/gcc/testsuite/gcc.target/i386/bmi2-shrx64-1.c
new file mode 100644
index 0000000..81d232e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-shrx64-1.c
@@ -0,0 +1,34 @@
+/* { dg-do run { target { bmi2 && { ! ia32 } } } } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned long long
+calc_shrx_u64 (unsigned long long a, int l)
+{
+  unsigned long long volatile res = a;
+  int i;
+  for (i = 0; i < l; ++i)
+    res >>= 1;
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  unsigned long long src = 0xce7ace0ce7ace0;
+  unsigned long long res, res_ref;
+
+  for (i = 0; i < 5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_shrx_u64 (src, i + 1);
+    res = src >> (i + 1);
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-shrx64-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-shrx64-1a.c
new file mode 100644
index 0000000..7830439
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-shrx64-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mbmi2 -dp" } */
+
+#include "bmi2-shrx64-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_lshrdi3" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/i386.exp b/gcc/testsuite/gcc.target/i386/i386.exp
index 167b79b..cff8a9a 100644
--- a/gcc/testsuite/gcc.target/i386/i386.exp
+++ b/gcc/testsuite/gcc.target/i386/i386.exp
@@ -206,6 +206,17 @@ proc check_effective_target_bmi { } {
     } "-mbmi" ]
 }
 
+# Return 1 if bmi2 instructions can be compiled.
+proc check_effective_target_bmi2 { } {
+    return [check_no_compiler_messages bmi2 object {
+	unsigned int
+	_bzhi_u32 (unsigned int __X, unsigned int __Y)
+	{
+	    return __builtin_ia32_bzhi_si (__X, __Y);
+	}
+    } "-mbmi2" ]
+}
+
 # If the linker used understands -M <mapfile>, pass it to clear hardware
 # capabilities set by the Sun assembler.
 set clearcap_ldflags "-Wl,-M,$srcdir/$subdir/clearcap.map"

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3, i386] BMI2 support for GCC, mulx, rorx, <shift>x part
  2011-08-23 11:35   ` Kirill Yukhin
@ 2011-08-23 12:21     ` Uros Bizjak
  2011-08-23 17:01       ` Kirill Yukhin
  0 siblings, 1 reply; 8+ messages in thread
From: Uros Bizjak @ 2011-08-23 12:21 UTC (permalink / raw)
  To: Kirill Yukhin; +Cc: Jakub Jelinek, Richard Henderson, H.J. Lu, gcc-patches List

On Tue, Aug 23, 2011 at 1:07 PM, Kirill Yukhin <kirill.yukhin@gmail.com> wrote:
> Hi,
> I've slightly updated mulx split to avoid ICE.
> Updated patch, ChangeLog entry (with Uros's contribution) and
> ChangeLog.testsuite entry are attached.
>
> Bootstrapped and make-checked.
>
> Tests all pass under simulator (expept one, but it is simulator issue).
>
> Uros, you asked if BMI2 is inherited from BMI. The answer is no, these
> 2 extensions are not connected.
>
> Is is OK?

+{
+  operands[3] = gen_lowpart (<MODE>mode, operands[0]);
+  operands[4] = gen_highpart (<MODE>mode, operands[0]);
+  operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
+})

Please change this part to:

{
  split_double_mode (<DWI>mode, &operands[0], 1, &operands[3], &operands[4]);

  operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
})

Please also add -mbmi2 to gcc.target/i386/sse-{12,13,14,22,23}.c files.

Please also change some entries in the ChangeLog to:

	* config/i386/i386-c.c (ix86_target_macros_internal):
	Conditionally define __BMI2__.
	* config/i386/i386.c (ix86_option_override_internal): Define PTA_BMI2.
	Handle BMI2 option.
	(ix86_valid_target_attribute_inner_p): Handle BMI2 option.

OK with these changes.

Thanks,
Uros.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3, i386] BMI2 support for GCC, mulx, rorx, <shift>x part
  2011-08-23 12:21     ` Uros Bizjak
@ 2011-08-23 17:01       ` Kirill Yukhin
  2011-08-23 17:06         ` Uros Bizjak
  0 siblings, 1 reply; 8+ messages in thread
From: Kirill Yukhin @ 2011-08-23 17:01 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: Jakub Jelinek, Richard Henderson, H.J. Lu, gcc-patches List

[-- Attachment #1: Type: text/plain, Size: 1585 bytes --]

Hi,
thanks. I've applied your inputs.

Updated patch, ChangeLog, testsuite/ChangeLog are attached.

Are they ok now?

--
Thanks, K

On Tue, Aug 23, 2011 at 3:25 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Tue, Aug 23, 2011 at 1:07 PM, Kirill Yukhin <kirill.yukhin@gmail.com> wrote:
>> Hi,
>> I've slightly updated mulx split to avoid ICE.
>> Updated patch, ChangeLog entry (with Uros's contribution) and
>> ChangeLog.testsuite entry are attached.
>>
>> Bootstrapped and make-checked.
>>
>> Tests all pass under simulator (expept one, but it is simulator issue).
>>
>> Uros, you asked if BMI2 is inherited from BMI. The answer is no, these
>> 2 extensions are not connected.
>>
>> Is is OK?
>
> +{
> +  operands[3] = gen_lowpart (<MODE>mode, operands[0]);
> +  operands[4] = gen_highpart (<MODE>mode, operands[0]);
> +  operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
> +})
>
> Please change this part to:
>
> {
>  split_double_mode (<DWI>mode, &operands[0], 1, &operands[3], &operands[4]);
>
>  operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
> })
>
> Please also add -mbmi2 to gcc.target/i386/sse-{12,13,14,22,23}.c files.
>
> Please also change some entries in the ChangeLog to:
>
>        * config/i386/i386-c.c (ix86_target_macros_internal):
>        Conditionally define __BMI2__.
>        * config/i386/i386.c (ix86_option_override_internal): Define PTA_BMI2.
>        Handle BMI2 option.
>        (ix86_valid_target_attribute_inner_p): Handle BMI2 option.
>
> OK with these changes.
>
> Thanks,
> Uros.
>

[-- Attachment #2: bmi2-9.gcc.patch --]
[-- Type: application/octet-stream, Size: 65905 bytes --]

diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c
index b201835..99643d6 100644
--- a/gcc/common/config/i386/i386-common.c
+++ b/gcc/common/config/i386/i386-common.c
@@ -79,6 +79,7 @@ along with GCC; see the file COPYING3.  If not see
   (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
 
 #define OPTION_MASK_ISA_BMI_SET OPTION_MASK_ISA_BMI
+#define OPTION_MASK_ISA_BMI2_SET OPTION_MASK_ISA_BMI2
 #define OPTION_MASK_ISA_TBM_SET OPTION_MASK_ISA_TBM
 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
@@ -137,6 +138,7 @@ along with GCC; see the file COPYING3.  If not see
 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
 #define OPTION_MASK_ISA_BMI_UNSET OPTION_MASK_ISA_BMI
+#define OPTION_MASK_ISA_BMI2_UNSET OPTION_MASK_ISA_BMI2
 #define OPTION_MASK_ISA_TBM_UNSET OPTION_MASK_ISA_TBM
 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
@@ -395,6 +397,19 @@ ix86_handle_option (struct gcc_options *opts,
 	}
       return true;
 
+    case OPT_mbmi2:
+      if (value)
+	{
+	  opts->x_ix86_isa_flags |= OPTION_MASK_ISA_BMI2_SET;
+	  opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI2_SET;
+	}
+      else
+	{
+	  opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_BMI2_UNSET;
+	  opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_BMI2_UNSET;
+	}
+      return true;
+
     case OPT_mtbm:
       if (value)
 	{
diff --git a/gcc/config.gcc b/gcc/config.gcc
index b8addaf..67aae86 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -352,7 +352,8 @@ i[34567]86-*-*)
 		       nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
 		       immintrin.h x86intrin.h avxintrin.h xopintrin.h
 		       ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h
-		       lzcntintrin.h bmiintrin.h tbmintrin.h avx2intrin.h"
+		       lzcntintrin.h bmiintrin.h bmi2intrin.h tbmintrin.h
+		       avx2intrin.h"
 	;;
 x86_64-*-*)
 	cpu_type=i386
@@ -364,7 +365,8 @@ x86_64-*-*)
 		       nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
 		       immintrin.h x86intrin.h avxintrin.h xopintrin.h
 		       ia32intrin.h cross-stdarg.h lwpintrin.h popcntintrin.h
-		       lzcntintrin.h bmiintrin.h tbmintrin.h avx2intrin.h"
+		       lzcntintrin.h bmiintrin.h tbmintrin.h bmi2intrin.h
+		       avx2intrin.h"
 	need_64bit_hwint=yes
 	;;
 ia64-*-*)
diff --git a/gcc/config/i386/bmi2intrin.h b/gcc/config/i386/bmi2intrin.h
new file mode 100644
index 0000000..f3ffa52
--- /dev/null
+++ b/gcc/config/i386/bmi2intrin.h
@@ -0,0 +1,81 @@
+/* Copyright (C) 2010, 2011 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _X86INTRIN_H_INCLUDED
+# error "Never use <bmi2intrin.h> directly; include <x86intrin.h> instead."
+#endif
+
+#ifndef __BMI2__
+# error "BMI2 instruction set not enabled"
+#endif /* __BMI2__ */
+
+#ifndef _BMI2INTRIN_H_INCLUDED
+#define _BMI2INTRIN_H_INCLUDED
+
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_bzhi_u32 (unsigned int __X, unsigned int __Y)
+{
+  return __builtin_ia32_bzhi_si (__X, __Y);
+}
+
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_pdep_u32 (unsigned int __X, unsigned int __Y)
+{
+  return __builtin_ia32_pdep_si (__X, __Y);
+}
+
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_pext_u32 (unsigned int __X, unsigned int __Y)
+{
+  return __builtin_ia32_pext_si (__X, __Y);
+}
+
+#ifdef  __x86_64__
+
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_bzhi_u64 (unsigned long long __X, unsigned long long __Y)
+{
+  return __builtin_ia32_bzhi_di (__X, __Y);
+}
+
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_pdep_u64 (unsigned long long __X, unsigned long long __Y)
+{
+  return __builtin_ia32_pdep_di (__X, __Y);
+}
+
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_pext_u64 (unsigned long long __X, unsigned long long __Y)
+{
+  return __builtin_ia32_pext_di (__X, __Y);
+}
+
+#endif /* __x86_64__  */
+
+#endif /* _BMI2INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index d53743f..5da8fd2 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -67,6 +67,7 @@
 #define bit_FSGSBASE	(1 << 0)
 #define bit_BMI		(1 << 3)
 #define bit_AVX2	(1 << 5)
+#define bit_BMI2	(1 << 8)
 
 #if defined(__i386__) && defined(__PIC__)
 /* %ebx may be the PIC register.  */
diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
index b7a1f52..8107ece 100644
--- a/gcc/config/i386/driver-i386.c
+++ b/gcc/config/i386/driver-i386.c
@@ -396,7 +396,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
   unsigned int has_popcnt = 0, has_aes = 0, has_avx = 0, has_avx2 = 0;
   unsigned int has_pclmul = 0, has_abm = 0, has_lwp = 0;
   unsigned int has_fma = 0, has_fma4 = 0, has_xop = 0;
-  unsigned int has_bmi = 0, has_tbm = 0, has_lzcnt = 0;
+  unsigned int has_bmi = 0, has_bmi2 = 0, has_tbm = 0, has_lzcnt = 0;
 
   bool arch;
 
@@ -475,6 +475,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
 
       has_bmi = ebx & bit_BMI;
       has_avx2 = ebx & bit_AVX2;
+      has_bmi2 = ebx & bit_BMI2;
     }
 
   if (!arch)
@@ -715,6 +716,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
       const char *fma4 = has_fma4 ? " -mfma4" : " -mno-fma4";
       const char *xop = has_xop ? " -mxop" : " -mno-xop";
       const char *bmi = has_bmi ? " -mbmi" : " -mno-bmi";
+      const char *bmi2 = has_bmi2 ? " -mbmi2" : " -mno-bmi2";
       const char *tbm = has_tbm ? " -mtbm" : " -mno-tbm";
       const char *avx = has_avx ? " -mavx" : " -mno-avx";
       const char *avx2 = has_avx2 ? " -mavx2" : " -mno-avx2";
@@ -723,8 +725,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
       const char *lzcnt = has_lzcnt ? " -mlzcnt" : " -mno-lzcnt";
 
       options = concat (options, cx16, sahf, movbe, ase, pclmul,
-			popcnt, abm, lwp, fma, fma4, xop, bmi, tbm,
-			avx2, avx, sse4_2, sse4_1, lzcnt, NULL);
+			popcnt, abm, lwp, fma, fma4, xop, bmi, bmi2,
+			tbm, avx, avx2, sse4_2, sse4_1, lzcnt, NULL);
     }
 
 done:
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 5c1dfe6..d4b0b08 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -273,6 +273,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
     def_or_undef (parse_in, "__ABM__");
   if (isa_flag & OPTION_MASK_ISA_BMI)
     def_or_undef (parse_in, "__BMI__");
+  if (isa_flag & OPTION_MASK_ISA_BMI2)
+    def_or_undef (parse_in, "__BMI2__");
   if (isa_flag & OPTION_MASK_ISA_LZCNT)
     def_or_undef (parse_in, "__LZCNT__");
   if (isa_flag & OPTION_MASK_ISA_TBM)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index ef02673..61f2c5c 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2664,6 +2664,7 @@ ix86_target_string (HOST_WIDE_INT isa, int flags, const char *arch,
     { "-mmmx",		OPTION_MASK_ISA_MMX },
     { "-mabm",		OPTION_MASK_ISA_ABM },
     { "-mbmi",		OPTION_MASK_ISA_BMI },
+    { "-mbmi2", 	OPTION_MASK_ISA_BMI2 },
     { "-mlzcnt",	OPTION_MASK_ISA_LZCNT },
     { "-mtbm",		OPTION_MASK_ISA_TBM },
     { "-mpopcnt",	OPTION_MASK_ISA_POPCNT },
@@ -2921,6 +2922,7 @@ ix86_option_override_internal (bool main_args_p)
 #define PTA_TBM		 	(HOST_WIDE_INT_1 << 28)
 #define PTA_XOP		 	(HOST_WIDE_INT_1 << 29)
 #define PTA_AVX2		(HOST_WIDE_INT_1 << 30)
+#define PTA_BMI2	 	(HOST_WIDE_INT_1 << 31)
 /* if this reaches 64, need to widen struct pta flags below */
 
   static struct pta
@@ -2978,8 +2980,8 @@ ix86_option_override_internal (bool main_args_p)
 	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
 	| PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AVX | PTA_AVX2
 	| PTA_CX16 | PTA_POPCNT | PTA_AES | PTA_PCLMUL | PTA_FSGSBASE
-	| PTA_RDRND | PTA_F16C | PTA_BMI | PTA_LZCNT | PTA_FMA
-	| PTA_MOVBE},
+	| PTA_RDRND | PTA_F16C | PTA_BMI | PTA_BMI2 | PTA_LZCNT
+        | PTA_FMA | PTA_MOVBE},
       {"atom", PROCESSOR_ATOM, CPU_ATOM,
 	PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
 	| PTA_SSSE3 | PTA_CX16 | PTA_MOVBE},
@@ -3300,6 +3302,9 @@ ix86_option_override_internal (bool main_args_p)
 	if (processor_alias_table[i].flags & PTA_TBM
 	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_TBM))
 	  ix86_isa_flags |= OPTION_MASK_ISA_TBM;
+	if (processor_alias_table[i].flags & PTA_BMI2
+	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_BMI2))
+	  ix86_isa_flags |= OPTION_MASK_ISA_BMI2;
 	if (processor_alias_table[i].flags & PTA_CX16
 	    && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
 	  ix86_isa_flags |= OPTION_MASK_ISA_CX16;
@@ -4053,6 +4058,7 @@ ix86_valid_target_attribute_inner_p (tree args, char *p_strings[],
     IX86_ATTR_ISA ("3dnow",	OPT_m3dnow),
     IX86_ATTR_ISA ("abm",	OPT_mabm),
     IX86_ATTR_ISA ("bmi",	OPT_mbmi),
+    IX86_ATTR_ISA ("bmi2",	OPT_mbmi2),
     IX86_ATTR_ISA ("lzcnt",	OPT_mlzcnt),
     IX86_ATTR_ISA ("tbm",	OPT_mtbm),
     IX86_ATTR_ISA ("aes",	OPT_maes),
@@ -24242,6 +24248,13 @@ enum ix86_builtins
   IX86_BUILTIN_BEXTRI32,
   IX86_BUILTIN_BEXTRI64,
 
+  /* BMI2 instructions. */
+  IX86_BUILTIN_BZHI32,
+  IX86_BUILTIN_BZHI64,
+  IX86_BUILTIN_PDEP32,
+  IX86_BUILTIN_PDEP64,
+  IX86_BUILTIN_PEXT32,
+  IX86_BUILTIN_PEXT64,
 
   /* FSGSBASE instructions.  */
   IX86_BUILTIN_RDFSBASE32,
@@ -25375,6 +25388,14 @@ static const struct builtin_description bdesc_args[] =
   { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtph2ps256, "__builtin_ia32_vcvtph2ps256", IX86_BUILTIN_CVTPH2PS256, UNKNOWN, (int) V8SF_FTYPE_V8HI },
   { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph, "__builtin_ia32_vcvtps2ph", IX86_BUILTIN_CVTPS2PH, UNKNOWN, (int) V8HI_FTYPE_V4SF_INT },
   { OPTION_MASK_ISA_F16C, CODE_FOR_vcvtps2ph256, "__builtin_ia32_vcvtps2ph256", IX86_BUILTIN_CVTPS2PH256, UNKNOWN, (int) V8HI_FTYPE_V8SF_INT },
+
+  /* BMI2 */
+  { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_si3, "__builtin_ia32_bzhi_si", IX86_BUILTIN_BZHI32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
+  { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_bzhi_di3, "__builtin_ia32_bzhi_di", IX86_BUILTIN_BZHI64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
+  { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_si3, "__builtin_ia32_pdep_si", IX86_BUILTIN_PDEP32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
+  { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pdep_di3, "__builtin_ia32_pdep_di", IX86_BUILTIN_PDEP64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
+  { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si", IX86_BUILTIN_PEXT32, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
+  { OPTION_MASK_ISA_BMI2, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
 };
 
 /* FMA4 and XOP.  */
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index f8a35ba..47442a0 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -62,6 +62,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define TARGET_ROUND	OPTION_ISA_ROUND
 #define TARGET_ABM	OPTION_ISA_ABM
 #define TARGET_BMI	OPTION_ISA_BMI
+#define TARGET_BMI2	OPTION_ISA_BMI2
 #define TARGET_LZCNT	OPTION_ISA_LZCNT
 #define TARGET_TBM	OPTION_ISA_TBM
 #define TARGET_POPCNT	OPTION_ISA_POPCNT
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index d343fc2..3ce2a01 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -244,6 +244,10 @@
 
   ;; For RDRAND support
   UNSPEC_RDRAND
+
+  ;; For BMI2 support
+  UNSPEC_PDEP
+  UNSPEC_PEXT
 ])
 
 (define_c_enum "unspecv" [
@@ -385,7 +389,7 @@
 (define_attr "type"
   "other,multi,
    alu,alu1,negnot,imov,imovx,lea,
-   incdec,ishift,ishift1,rotate,rotate1,imul,idiv,
+   incdec,ishift,ishiftx,ishift1,rotate,rotatex,rotate1,imul,imulx,idiv,
    icmp,test,ibr,setcc,icmov,
    push,pop,call,callv,leave,
    str,bitmanip,
@@ -418,12 +422,12 @@
 ;; The (bounding maximum) length of an instruction immediate.
 (define_attr "length_immediate" ""
   (cond [(eq_attr "type" "incdec,setcc,icmov,str,lea,other,multi,idiv,leave,
-                          bitmanip")
+                          bitmanip,imulx")
 	   (const_int 0)
 	 (eq_attr "unit" "i387,sse,mmx")
 	   (const_int 0)
-	 (eq_attr "type" "alu,alu1,negnot,imovx,ishift,rotate,ishift1,rotate1,
-			  imul,icmp,push,pop")
+	 (eq_attr "type" "alu,alu1,negnot,imovx,ishift,ishiftx,ishift1,
+			  rotate,rotatex,rotate1,imul,icmp,push,pop")
 	   (symbol_ref "ix86_attr_length_immediate_default (insn, true)")
 	 (eq_attr "type" "imov,test")
 	   (symbol_ref "ix86_attr_length_immediate_default (insn, false)")
@@ -683,7 +687,7 @@
 	      (and (match_operand 0 "memory_displacement_operand" "")
 		   (match_operand 1 "immediate_operand" "")))
 	   (const_string "true")
-	 (and (eq_attr "type" "alu,ishift,rotate,imul,idiv")
+	 (and (eq_attr "type" "alu,ishift,ishiftx,rotate,rotatex,imul,idiv")
 	      (and (match_operand 0 "memory_displacement_operand" "")
 		   (match_operand 2 "immediate_operand" "")))
 	   (const_string "true")
@@ -707,12 +711,13 @@
 (define_attr "movu" "0,1" (const_string "0"))
 
 ;; Used to control the "enabled" attribute on a per-instruction basis.
-(define_attr "isa" "base,noavx,avx"
+(define_attr "isa" "base,noavx,avx,bmi2"
   (const_string "base"))
 
 (define_attr "enabled" ""
   (cond [(eq_attr "isa" "noavx") (symbol_ref "!TARGET_AVX")
 	 (eq_attr "isa" "avx") (symbol_ref "TARGET_AVX")
+	 (eq_attr "isa" "bmi2") (symbol_ref "TARGET_BMI2")
 	]
 	(const_int 1)))
 
@@ -6853,16 +6858,103 @@
 	      (clobber (reg:CC FLAGS_REG))])]
   "TARGET_QIMODE_MATH")
 
-(define_insn "*<u>mul<mode><dwi>3_1"
+(define_insn "*bmi2_umulditi3_1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(mult:DI
+	  (match_operand:DI 2 "nonimmediate_operand" "%d")
+	  (match_operand:DI 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:DI 1 "register_operand" "=r")
+	(truncate:DI
+	  (lshiftrt:TI
+	    (mult:TI (zero_extend:TI (match_dup 2))
+		     (zero_extend:TI (match_dup 3)))
+	    (const_int 64))))]
+  "TARGET_64BIT && TARGET_BMI2
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "mulx\t{%3, %0, %1|%1, %0, %3}"
+  [(set_attr "type" "imulx")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "DI")])
+
+(define_insn "*bmi2_umulsidi3_1"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(mult:SI
+	  (match_operand:SI 2 "nonimmediate_operand" "%d")
+	  (match_operand:SI 3 "nonimmediate_operand" "rm")))
+   (set (match_operand:SI 1 "register_operand" "=r")
+	(truncate:SI
+	  (lshiftrt:DI
+	    (mult:DI (zero_extend:DI (match_dup 2))
+		     (zero_extend:DI (match_dup 3)))
+	    (const_int 32))))]
+  "!TARGET_64BIT && TARGET_BMI2
+   && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "mulx\t{%3, %0, %1|%1, %0, %3}"
+  [(set_attr "type" "imulx")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "SI")])
+
+(define_insn "*umul<mode><dwi>3_1"
+  [(set (match_operand:<DWI> 0 "register_operand" "=A,r")
+	(mult:<DWI>
+	  (zero_extend:<DWI>
+	    (match_operand:DWIH 1 "nonimmediate_operand" "%0,d"))
+	  (zero_extend:<DWI>
+	    (match_operand:DWIH 2 "nonimmediate_operand" "rm,rm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
+  "@
+   mul{<imodesuffix>}\t%2
+   #"
+  [(set_attr "isa" "base,bmi2")
+   (set_attr "type" "imul,imulx")
+   (set_attr "length_immediate" "0,*")
+   (set (attr "athlon_decode")
+	(cond [(eq_attr "alternative" "0")
+		 (if_then_else (eq_attr "cpu" "athlon")
+		   (const_string "vector")
+		   (const_string "double"))]
+	      (const_string "*")))
+   (set_attr "amdfam10_decode" "double,*")
+   (set_attr "bdver1_decode" "direct,*")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "<MODE>")])
+
+;; Convert mul to the mulx pattern to avoid flags dependency.
+(define_split
+ [(set (match_operand:<DWI> 0 "register_operand" "")
+       (mult:<DWI>
+	 (zero_extend:<DWI>
+	   (match_operand:DWIH 1 "register_operand" ""))
+	 (zero_extend:<DWI>
+	   (match_operand:DWIH 2 "nonimmediate_operand" ""))))
+  (clobber (reg:CC FLAGS_REG))]
+ "TARGET_BMI2 && reload_completed
+  && true_regnum (operands[1]) == DX_REG"
+  [(parallel [(set (match_dup 3)
+		   (mult:DWIH (match_dup 1) (match_dup 2)))
+	      (set (match_dup 4)
+		   (truncate:DWIH
+		     (lshiftrt:<DWI>
+		       (mult:<DWI> (zero_extend:<DWI> (match_dup 1))
+				   (zero_extend:<DWI> (match_dup 2)))
+		       (match_dup 5))))])]
+{
+  split_double_mode (<DWI>mode, &operands[0], 1, &operands[3], &operands[4]);
+
+  operands[5] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
+})
+
+(define_insn "*mul<mode><dwi>3_1"
   [(set (match_operand:<DWI> 0 "register_operand" "=A")
 	(mult:<DWI>
-	  (any_extend:<DWI>
+	  (sign_extend:<DWI>
 	    (match_operand:DWIH 1 "nonimmediate_operand" "%0"))
-	  (any_extend:<DWI>
+	  (sign_extend:<DWI>
 	    (match_operand:DWIH 2 "nonimmediate_operand" "rm"))))
    (clobber (reg:CC FLAGS_REG))]
   "!(MEM_P (operands[1]) && MEM_P (operands[2]))"
-  "<sgnprefix>mul{<imodesuffix>}\t%2"
+  "imul{<imodesuffix>}\t%2"
   [(set_attr "type" "imul")
    (set_attr "length_immediate" "0")
    (set (attr "athlon_decode")
@@ -9060,16 +9152,26 @@
   [(set_attr "type" "ishift")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*bmi2_ashl<mode>3_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+		      (match_operand:SWI48 2 "register_operand" "r")))]
+  "TARGET_BMI2"
+  "shlx\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishiftx")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*ashl<mode>3_1"
-  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
-	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l")
-		      (match_operand:QI 2 "nonmemory_operand" "c<S>,M")))
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r")
+	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "0,l,rm")
+		      (match_operand:QI 2 "nonmemory_operand" "c<S>,M,r")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
+    case TYPE_ISHIFTX:
       return "#";
 
     case TYPE_ALU:
@@ -9085,9 +9187,12 @@
 	return "sal{<imodesuffix>}\t{%2, %0|%0, %2}";
     }
 }
-  [(set (attr "type")
+  [(set_attr "isa" "base,base,bmi2")
+   (set (attr "type")
      (cond [(eq_attr "alternative" "1")
 	      (const_string "lea")
+	    (eq_attr "alternative" "2")
+	      (const_string "ishiftx")
             (and (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
 		          (const_int 0))
 		      (match_operand 0 "register_operand" ""))
@@ -9106,17 +9211,39 @@
        (const_string "*")))
    (set_attr "mode" "<MODE>")])
 
+;; Convert shift to the shiftx pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand" "")
+	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "")
+		      (match_operand:QI 2 "register_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(ashift:SWI48 (match_dup 1) (match_dup 2)))]
+  "operands[2] = gen_lowpart (<MODE>mode, operands[2]);")
+
+(define_insn "*bmi2_ashlsi3_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+		     (match_operand:SI 2 "register_operand" "r"))))]
+  "TARGET_64BIT && TARGET_BMI2"
+  "shlx\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "type" "ishiftx")
+   (set_attr "mode" "SI")])
+
 (define_insn "*ashlsi3_1_zext"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
 	(zero_extend:DI
-	  (ashift:SI (match_operand:SI 1 "register_operand" "0,l")
-		     (match_operand:QI 2 "nonmemory_operand" "cI,M"))))
+	  (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "0,l,rm")
+		     (match_operand:QI 2 "nonmemory_operand" "cI,M,r"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (ASHIFT, SImode, operands)"
 {
   switch (get_attr_type (insn))
     {
     case TYPE_LEA:
+    case TYPE_ISHIFTX:
       return "#";
 
     case TYPE_ALU:
@@ -9131,9 +9258,12 @@
 	return "sal{l}\t{%2, %k0|%k0, %2}";
     }
 }
-  [(set (attr "type")
+  [(set_attr "isa" "base,base,bmi2")
+   (set (attr "type")
      (cond [(eq_attr "alternative" "1")
 	      (const_string "lea")
+	    (eq_attr "alternative" "2")
+	      (const_string "ishiftx")
             (and (ne (symbol_ref "TARGET_DOUBLE_WITH_ADD")
 		     (const_int 0))
 		 (match_operand 2 "const1_operand" ""))
@@ -9151,6 +9281,18 @@
        (const_string "*")))
    (set_attr "mode" "SI")])
 
+;; Convert shift to the shiftx pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI
+	  (ashift:SI (match_operand:SI 1 "nonimmediate_operand" "")
+		     (match_operand:QI 2 "register_operand" ""))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (ashift:SI (match_dup 1) (match_dup 2))))]
+  "operands[2] = gen_lowpart (SImode, operands[2]);")
+
 (define_insn "*ashlhi3_1"
   [(set (match_operand:HI 0 "nonimmediate_operand" "=rm")
 	(ashift:HI (match_operand:HI 1 "nonimmediate_operand" "0")
@@ -9767,20 +9909,38 @@
   DONE;
 })
 
+(define_insn "*bmi2_<shiftrt_insn><mode>3_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+			   (match_operand:SWI48 2 "register_operand" "r")))]
+  "TARGET_BMI2"
+  "<shiftrt>x\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishiftx")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*<shiftrt_insn><mode>3_1"
-  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
-	(any_shiftrt:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")
-			 (match_operand:QI 2 "nonmemory_operand" "c<S>")))
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
+	(any_shiftrt:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>,r")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
 {
-  if (operands[2] == const1_rtx
-      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-    return "<shiftrt>{<imodesuffix>}\t%0";
-  else
-    return "<shiftrt>{<imodesuffix>}\t{%2, %0|%0, %2}";
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ISHIFTX:
+      return "#";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "<shiftrt>{<imodesuffix>}\t%0";
+      else
+	return "<shiftrt>{<imodesuffix>}\t{%2, %0|%0, %2}";
+    }
 }
-  [(set_attr "type" "ishift")
+  [(set_attr "isa" "base,bmi2")
+   (set_attr "type" "ishift,ishiftx")
    (set (attr "length_immediate")
      (if_then_else
        (and (match_operand 2 "const1_operand" "")
@@ -9790,19 +9950,84 @@
        (const_string "*")))
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*<shiftrt_insn>si3_1_zext"
+;; Convert shift to the shiftx pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand" "")
+	(any_shiftrt:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "")
+			   (match_operand:QI 2 "register_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(any_shiftrt:SWI48 (match_dup 1) (match_dup 2)))]
+  "operands[2] = gen_lowpart (<MODE>mode, operands[2]);")
+
+(define_insn "*bmi2_<shiftrt_insn>si3_1_zext"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extend:DI
-	  (any_shiftrt:SI (match_operand:SI 1 "register_operand" "0")
-			  (match_operand:QI 2 "nonmemory_operand" "cI"))))
+	  (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+			  (match_operand:SI 2 "register_operand" "r"))))]
+  "TARGET_64BIT && TARGET_BMI2"
+  "<shiftrt>x\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "type" "ishiftx")
+   (set_attr "mode" "SI")])
+
+(define_insn "*<shiftrt_insn>si3_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI
+	  (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
+			  (match_operand:QI 2 "nonmemory_operand" "cI,r"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
 {
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ISHIFTX:
+      return "#";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "<shiftrt>{l}\t%k0";
+      else
+	return "<shiftrt>{l}\t{%2, %k0|%k0, %2}";
+    }
+}
+  [(set_attr "isa" "base,bmi2")
+   (set_attr "type" "ishift,ishiftx")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (match_operand 2 "const1_operand" "")
+	    (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+		(const_int 0)))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "SI")])
+
+;; Convert shift to the shiftx pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI
+	  (any_shiftrt:SI (match_operand:SI 1 "nonimmediate_operand" "")
+			  (match_operand:QI 2 "register_operand" ""))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (any_shiftrt:SI (match_dup 1) (match_dup 2))))]
+  "operands[2] = gen_lowpart (SImode, operands[2]);")
+
+(define_insn "*<shiftrt_insn><mode>3_1"
+  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m")
+	(any_shiftrt:SWI12
+	  (match_operand:SWI12 1 "nonimmediate_operand" "0")
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+{
   if (operands[2] == const1_rtx
       && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-    return "<shiftrt>{l}\t%k0";
+    return "<shiftrt>{<imodesuffix>}\t%0";
   else
-    return "<shiftrt>{l}\t{%2, %k0|%k0, %2}";
+    return "<shiftrt>{<imodesuffix>}\t{%2, %0|%0, %2}";
 }
   [(set_attr "type" "ishift")
    (set (attr "length_immediate")
@@ -9812,7 +10037,7 @@
 		(const_int 0)))
        (const_string "0")
        (const_string "*")))
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "*<shiftrt_insn>qi3_1_slp"
   [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
@@ -10064,42 +10289,151 @@
   split_double_mode (<DWI>mode, &operands[0], 1, &operands[4], &operands[5]);
 })
 
+(define_insn "*bmi2_rorx<mode>3_1"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(rotatert:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+			(match_operand:QI 2 "immediate_operand" "<S>")))]
+  "TARGET_BMI2"
+  "rorx\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "rotatex")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*<rotate_insn><mode>3_1"
-  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m")
-	(any_rotate:SWI (match_operand:SWI 1 "nonimmediate_operand" "0")
-			(match_operand:QI 2 "nonmemory_operand" "c<S>")))
+  [(set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
+	(any_rotate:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand" "0,rm")
+	  (match_operand:QI 2 "nonmemory_operand" "c<S>,<S>")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
 {
-  if (operands[2] == const1_rtx
-      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-    return "<rotate>{<imodesuffix>}\t%0";
-  else
-    return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ROTATEX:
+      return "#";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "<rotate>{<imodesuffix>}\t%0";
+      else
+	return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
+    }
 }
-  [(set_attr "type" "rotate")
+  [(set_attr "isa" "base,bmi2")
+   (set_attr "type" "rotate,rotatex")
    (set (attr "length_immediate")
      (if_then_else
-       (and (match_operand 2 "const1_operand" "")
-	    (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
-		(const_int 0)))
+       (and (eq_attr "type" "rotate")
+	    (and (match_operand 2 "const1_operand" "")
+		 (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+		     (const_int 0))))
        (const_string "0")
        (const_string "*")))
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*<rotate_insn>si3_1_zext"
+;; Convert rotate to the rotatex pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand" "")
+	(rotate:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "")
+		      (match_operand:QI 2 "immediate_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(rotatert:SWI48 (match_dup 1) (match_dup 2)))]
+{
+  operands[2]
+    = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - INTVAL (operands[2]));
+})
+
+(define_split
+  [(set (match_operand:SWI48 0 "register_operand" "")
+	(rotatert:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "")
+			(match_operand:QI 2 "immediate_operand" "")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(rotatert:SWI48 (match_dup 1) (match_dup 2)))])
+
+(define_insn "*bmi2_rorxsi3_1_zext"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extend:DI
-	  (any_rotate:SI (match_operand:SI 1 "register_operand" "0")
-			 (match_operand:QI 2 "nonmemory_operand" "cI"))))
+	  (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "rm")
+		       (match_operand:QI 2 "immediate_operand" "I"))))]
+  "TARGET_64BIT && TARGET_BMI2"
+  "rorx\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "type" "rotatex")
+   (set_attr "mode" "SI")])
+
+(define_insn "*<rotate_insn>si3_1_zext"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI
+	  (any_rotate:SI (match_operand:SI 1 "nonimmediate_operand" "0,rm")
+			 (match_operand:QI 2 "nonmemory_operand" "cI,I"))))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT && ix86_binary_operator_ok (<CODE>, SImode, operands)"
 {
-    if (operands[2] == const1_rtx
-	&& (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
-    return "<rotate>{l}\t%k0";
+  switch (get_attr_type (insn))
+    {
+    case TYPE_ROTATEX:
+      return "#";
+
+    default:
+      if (operands[2] == const1_rtx
+	  && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+	return "<rotate>{l}\t%k0";
+      else
+	return "<rotate>{l}\t{%2, %k0|%k0, %2}";
+    }
+}
+  [(set_attr "isa" "base,bmi2")
+   (set_attr "type" "rotate,rotatex")
+   (set (attr "length_immediate")
+     (if_then_else
+       (and (eq_attr "type" "rotate")
+	    (and (match_operand 2 "const1_operand" "")
+		 (ne (symbol_ref "TARGET_SHIFT1 || optimize_function_for_size_p (cfun)")
+		     (const_int 0))))
+       (const_string "0")
+       (const_string "*")))
+   (set_attr "mode" "SI")])
+
+;; Convert rotate to the rotatex pattern to avoid flags dependency.
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI
+	  (rotate:SI (match_operand:SI 1 "nonimmediate_operand" "")
+		     (match_operand:QI 2 "immediate_operand" ""))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))]
+{
+  operands[2]
+    = GEN_INT (GET_MODE_BITSIZE (SImode) - INTVAL (operands[2]));
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand" "")
+	(zero_extend:DI
+	  (rotatert:SI (match_operand:SI 1 "nonimmediate_operand" "")
+		       (match_operand:QI 2 "immediate_operand" ""))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_BMI2 && reload_completed"
+  [(set (match_dup 0)
+	(zero_extend:DI (rotatert:SI (match_dup 1) (match_dup 2))))])
+
+(define_insn "*<rotate_insn><mode>3_1"
+  [(set (match_operand:SWI12 0 "nonimmediate_operand" "=<r>m")
+	(any_rotate:SWI12 (match_operand:SWI12 1 "nonimmediate_operand" "0")
+			  (match_operand:QI 2 "nonmemory_operand" "c<S>")))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+{
+  if (operands[2] == const1_rtx
+      && (TARGET_SHIFT1 || optimize_function_for_size_p (cfun)))
+    return "<rotate>{<imodesuffix>}\t%0";
   else
-    return "<rotate>{l}\t{%2, %k0|%k0, %2}";
+    return "<rotate>{<imodesuffix>}\t{%2, %0|%0, %2}";
 }
   [(set_attr "type" "rotate")
    (set (attr "length_immediate")
@@ -10109,7 +10443,7 @@
 		(const_int 0)))
        (const_string "0")
        (const_string "*")))
-   (set_attr "mode" "SI")])
+   (set_attr "mode" "<MODE>")])
 
 (define_insn "*<rotate_insn>qi3_1_slp"
   [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
@@ -11951,6 +12285,41 @@
   [(set_attr "type" "bitmanip")
    (set_attr "mode" "<MODE>")])
 
+;; BMI2 instructions.
+(define_insn "bmi2_bzhi_<mode>3"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(and:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+		   (lshiftrt:SWI48 (const_int -1)
+				   (match_operand:SWI48 2 "register_operand" "r"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_BMI2"
+  "bzhi\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "bmi2_pdep_<mode>3"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")
+                       (match_operand:SWI48 2 "register_operand" "r")]
+                       UNSPEC_PDEP))]
+  "TARGET_BMI2"
+  "pdep\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "bmi2_pext_<mode>3"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "rm")
+                       (match_operand:SWI48 2 "register_operand" "r")]
+                       UNSPEC_PEXT))]
+  "TARGET_BMI2"
+  "pext\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
 ;; TBM instructions.
 (define_insn "tbm_bextri_<mode>"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
@@ -12350,6 +12719,7 @@
   "xor{b}\t{%h0, %b0|%b0, %h0}"
   [(set_attr "length" "2")
    (set_attr "mode" "HI")])
+
 \f
 ;; Thread-local storage patterns for ELF.
 ;;
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 54d7af1..8e4d51b 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -493,6 +493,10 @@ mbmi
 Target Report Mask(ISA_BMI) Var(ix86_isa_flags) Save
 Support BMI built-in functions and code generation
 
+mbmi2
+Target Report Mask(ISA_BMI2) Var(ix86_isa_flags) Save
+Support BMI2 built-in functions and code generation
+
 mlzcnt
 Target Report Mask(ISA_LZCNT) Var(ix86_isa_flags) Save
 Support LZCNT built-in function and code generation
diff --git a/gcc/config/i386/x86intrin.h b/gcc/config/i386/x86intrin.h
index 88456f9..e01ecd2 100644
--- a/gcc/config/i386/x86intrin.h
+++ b/gcc/config/i386/x86intrin.h
@@ -81,6 +81,10 @@
 #include <bmiintrin.h>
 #endif
 
+#ifdef __BMI2__
+#include <bmi2intrin.h>
+#endif
+
 #ifdef __TBM__
 #include <tbmintrin.h>
 #endif
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 29c02b8..0b36336 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -9883,6 +9883,17 @@ unsigned int __builtin_ia32_bextr_u32(unsigned int, unsigned int);
 unsigned long long __builtin_ia32_bextr_u64 (unsigned long long, unsigned long long);
 @end smallexample
 
+The following built-in functions are available when @option{-mbmi2} is used.
+All of them generate the machine instruction that is part of the name.
+@smallexample
+unsigned int _bzhi_u32 (unsigned int, unsigned int)
+unsigned int _pdep_u32 (unsigned int, unsigned int)
+unsigned int _pext_u32 (unsigned int, unsigned int)
+unsigned long long _bzhi_u64 (unsigned long long, unsigned long long)
+unsigned long long _pdep_u64 (unsigned long long, unsigned long long)
+unsigned long long _pext_u64 (unsigned long long, unsigned long long)
+@end smallexample
+
 The following built-in functions are available when @option{-mlzcnt} is used.
 All of them generate the machine instruction that is part of the name.
 @smallexample
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index fdc3297..acf30e3 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -607,7 +607,7 @@ Objective-C and Objective-C++ Dialects}.
 -mmmx  -msse  -msse2 -msse3 -mssse3 -msse4.1 -msse4.2 -msse4 -mavx @gol
 -mavx2 -maes -mpclmul -mfsgsbase -mrdrnd -mf16c -mfma @gol
 -msse4a -m3dnow -mpopcnt -mabm -mbmi -mtbm -mfma4 -mxop -mlzcnt @gol
--mlwp -mthreads  -mno-align-stringops  -minline-all-stringops @gol
+-mbmi2 -mlwp -mthreads  -mno-align-stringops  -minline-all-stringops @gol
 -minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol
 -mpush-args  -maccumulate-outgoing-args  -m128bit-long-double @gol
 -m96bit-long-double  -mregparm=@var{num}  -msseregparm @gol
@@ -12697,7 +12697,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
 @itemx -mabm
 @itemx -mno-abm
 @itemx -mbmi
+@itemx -mbmi2
 @itemx -mno-bmi
+@itemx -mno-bmi2
 @itemx -mlzcnt
 @itemx -mno-lzcnt
 @itemx -mtbm
@@ -12709,8 +12711,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
 @opindex m3dnow
 @opindex mno-3dnow
 These switches enable or disable the use of instructions in the MMX, SSE,
-SSE2, SSE3, SSSE3, SSE4.1, AVX, AVX2, AES, PCLMUL, FSGSBASE, RDRND, F16C, FMA,
-SSE4A, FMA4, XOP, LWP, ABM, BMI, LZCNT or 3DNow!@: extended instruction sets.
+SSE2, SSE3, SSSE3, SSE4.1, AVX, AVX2, AES, PCLMUL, FSGSBASE, RDRND, F16C,
+FMA, SSE4A, FMA4, XOP, LWP, ABM, BMI, BMI2, LZCNT or 3DNow!
+@: extended instruction sets.
 These extensions are also available as built-in functions: see
 @ref{X86 Built-in Functions}, for details of the functions enabled and
 disabled by these switches.
diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C
index ed183c7..5f2eaf9 100644
--- a/gcc/testsuite/g++.dg/other/i386-2.C
+++ b/gcc/testsuite/g++.dg/other/i386-2.C
@@ -1,5 +1,5 @@
 /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
+/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
 
 /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
    xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C
index 626f972..76d4d19 100644
--- a/gcc/testsuite/g++.dg/other/i386-3.C
+++ b/gcc/testsuite/g++.dg/other/i386-3.C
@@ -1,5 +1,5 @@
 /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
-/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
+/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
 
 /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
    xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-bzhi32-1.c b/gcc/testsuite/gcc.target/i386/bmi2-bzhi32-1.c
new file mode 100644
index 0000000..68df8b7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-bzhi32-1.c
@@ -0,0 +1,35 @@
+/* { dg-do run { target { bmi2 } } } */
+/* { dg-options "-mbmi2 -O2" } */
+
+#include <x86intrin.h>
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned
+calc_bzhi_u32 (unsigned a, int l)
+{
+  unsigned res = a;
+  int i;
+  for (i = 0; i < 32 - l; ++i)
+    res &= ~(1 << (31 - i));
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  unsigned src = 0xce7ace0f;
+  unsigned res, res_ref;
+
+  for (i = 0; i < 5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_bzhi_u32 (src, i * 2);
+    res = _bzhi_u32 (src, i * 2);
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-bzhi32-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-bzhi32-1a.c
new file mode 100644
index 0000000..05be7a8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-bzhi32-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-bzhi32-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_bzhi_si3" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-bzhi64-1.c b/gcc/testsuite/gcc.target/i386/bmi2-bzhi64-1.c
new file mode 100644
index 0000000..1ffe135
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-bzhi64-1.c
@@ -0,0 +1,35 @@
+/* { dg-do run { target { bmi2 && { ! ia32 } } } } */
+/* { dg-options "-mbmi2 -O2" } */
+
+#include <x86intrin.h>
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned long long
+calc_bzhi_u64 (unsigned long long a, int l)
+{
+  unsigned long long res = a;
+  int i;
+  for (i = 0; i < 64 - l; ++i)
+    res &= ~(1LL << (63 - i));
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  unsigned long long src = 0xce7ace0ce7ace0ff;
+  unsigned long long res, res_ref;
+
+  for (i = 0; i < 5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_bzhi_u64 (src, i * 2);
+    res = _bzhi_u64 (src, i * 2);
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-bzhi64-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-bzhi64-1a.c
new file mode 100644
index 0000000..dc4a94c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-bzhi64-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-bzhi64-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_bzhi_di3" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-check.h b/gcc/testsuite/gcc.target/i386/bmi2-check.h
new file mode 100644
index 0000000..5ffce44
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-check.h
@@ -0,0 +1,36 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "cpuid.h"
+
+static void bmi2_test (void);
+
+static void
+__attribute__ ((noinline))
+do_test (void)
+{
+  bmi2_test ();
+}
+
+int
+main ()
+{
+  unsigned int eax, ebx, ecx, edx;
+
+  __cpuid_count (7, 0,  eax, ebx, ecx, edx);
+
+  /* Run BMI2 test only if host has BMI2 support.  */
+  if (ebx & bit_BMI2)
+    {
+      do_test ();
+#ifdef DEBUG
+      printf ("PASSED\n");
+#endif
+    }
+#ifdef DEBUG
+  else
+    printf ("SKIPPED\n");
+#endif
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-mulx32-1.c b/gcc/testsuite/gcc.target/i386/bmi2-mulx32-1.c
new file mode 100644
index 0000000..a90ff1a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-mulx32-1.c
@@ -0,0 +1,47 @@
+/* { dg-do run { target { bmi2 && { ia32 } } } } */
+/* { dg-options "-mbmi2 -O2" } */
+
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned long long
+calc_mul_u32 (unsigned volatile a, unsigned b)
+{
+  unsigned long long res = 0;
+  int i;
+  for (i = 0; i < b; ++i)
+    res += a;
+
+  return res;
+}
+
+__attribute__((noinline))
+unsigned long long
+gen_mulx (unsigned a, unsigned b)
+{
+  unsigned long long res;
+
+  res = (unsigned long long)a * b;
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  unsigned a = 0xce7ace0;
+  unsigned b = 0xfacefff;
+  unsigned long long res, res_ref;
+
+  for (i = 0; i < 5; ++i) {
+    a = a * (i + 1);
+    b = b / (i + 1);
+
+    res_ref = calc_mul_u32 (a, b);
+    res = gen_mulx (a, b);
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-mulx32-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-mulx32-1a.c
new file mode 100644
index 0000000..cf3bb08
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-mulx32-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ia32 } } } */
+/* { dg-options "-O2 -mbmi2 -dp" } */
+
+#include "bmi2-mulx32-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_umulsidi3_1" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-mulx64-1.c b/gcc/testsuite/gcc.target/i386/bmi2-mulx64-1.c
new file mode 100644
index 0000000..6844946
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-mulx64-1.c
@@ -0,0 +1,36 @@
+/* { dg-do run { target { bmi2 && { ! ia32 } } } } */
+/* { dg-options "-mbmi2 -O2" } */
+
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned __int128
+calc_mul_u64 (unsigned long long volatile a, unsigned long long b)
+{
+  unsigned __int128 res = 0;
+  int i;
+  for (i = 0; i < b; ++i)
+    res += (unsigned __int128) a;
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  unsigned long long a = 0xce7ace0ce7ace0;
+  unsigned long long b = 0xface;
+  unsigned __int128 res, res_ref;
+
+  for (i=0; i<5; ++i) {
+    a = a * (i + 1);
+    b = b / (i + 1);
+
+    res_ref = calc_mul_u64 (a, b);
+    res = (unsigned __int128) a * b;
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-mulx64-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-mulx64-1a.c
new file mode 100644
index 0000000..592d713
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-mulx64-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mbmi2 -dp" } */
+
+#include "bmi2-mulx64-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_umulditi3_1" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-pdep32-1.c b/gcc/testsuite/gcc.target/i386/bmi2-pdep32-1.c
new file mode 100644
index 0000000..5aecf57
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-pdep32-1.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { bmi2 } } } */
+/* { dg-options "-mbmi2 -O2" } */
+
+#include <x86intrin.h>
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned
+calc_pdep_u32 (unsigned a, int mask)
+{
+  unsigned res = 0;
+  int i, k = 0;
+
+  for (i = 0; i < 32; ++i)
+    if (mask & (1 << i)) {
+      res |= ((a & (1 << k)) >> k) << i;
+      ++k;
+    }
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  unsigned src = 0xce7acc;
+  unsigned res, res_ref;
+
+  for (i = 0; i < 5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_pdep_u32 (src, i * 3);
+    res = _pdep_u32 (src, i * 3);
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-pdep32-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-pdep32-1a.c
new file mode 100644
index 0000000..87888fc
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-pdep32-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-pdep32-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_pdep_si3" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-pdep64-1.c b/gcc/testsuite/gcc.target/i386/bmi2-pdep64-1.c
new file mode 100644
index 0000000..f718b2f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-pdep64-1.c
@@ -0,0 +1,38 @@
+/* { dg-do run { target { bmi2 && { ! ia32 } } } } */
+/* { dg-options "-mbmi2 -O2" } */
+
+#include <x86intrin.h>
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned long long
+calc_pdep_u64 (unsigned long long a, unsigned long long mask)
+{
+  unsigned long long res = 0;
+  unsigned long long i, k = 0;
+
+  for (i = 0; i < 64; ++i)
+    if (mask & (1LL << i)) {
+      res |= ((a & (1LL << k)) >> k) << i;
+      ++k;
+    }
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned long long i;
+  unsigned long long src = 0xce7acce7acce7ac;
+  unsigned long long res, res_ref;
+
+  for (i = 0; i < 5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_pdep_u64 (src, ~(i * 3));
+    res = _pdep_u64 (src, ~(i * 3));
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-pdep64-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-pdep64-1a.c
new file mode 100644
index 0000000..8163c40
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-pdep64-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-pdep64-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_pdep_di3" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-pext32-1.c b/gcc/testsuite/gcc.target/i386/bmi2-pext32-1.c
new file mode 100644
index 0000000..7fe7837
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-pext32-1.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { bmi2 } } } */
+/* { dg-options "-mbmi2 -O2" } */
+
+#include <x86intrin.h>
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned
+calc_pext_u32 (unsigned a, unsigned mask)
+{
+  unsigned res = 0;
+  int i, k = 0;
+
+  for (i = 0; i < 32; ++i)
+    if (mask & (1 << i)) {
+      res |= ((a & (1 << i)) >> i) << k;
+      ++k;
+    }
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  unsigned src = 0xce7acc;
+  unsigned res, res_ref;
+
+  for (i = 0; i < 5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_pext_u32 (src, ~(i * 3));
+    res = _pext_u32 (src, ~(i * 3));
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-pext32-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-pext32-1a.c
new file mode 100644
index 0000000..c4a6dee
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-pext32-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-pext32-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_pext_si3" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-pext64-1.c b/gcc/testsuite/gcc.target/i386/bmi2-pext64-1.c
new file mode 100644
index 0000000..6850749
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-pext64-1.c
@@ -0,0 +1,39 @@
+/* { dg-do run { target { bmi2 && { ! ia32 } } } } */
+/* { dg-options "-mbmi2 -O2" } */
+
+#include <x86intrin.h>
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned long long
+calc_pext_u64 (unsigned long long a, unsigned long long mask)
+{
+  unsigned long long res = 0;
+  int i, k = 0;
+
+  for (i = 0; i < 64; ++i)
+    if (mask & (1LL << i)) {
+      res |= ((a & (1LL << i)) >> i) << k;
+      ++k;
+    }
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned long long i;
+  unsigned long long src = 0xce7acce7acce7ac;
+  unsigned long long res, res_ref;
+
+  for (i = 0; i < 5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_pext_u64 (src, ~(i * 3));
+    res = _pext_u64 (src, ~(i * 3));
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-pext64-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-pext64-1a.c
new file mode 100644
index 0000000..aaf06c1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-pext64-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-pext64-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_pext_di3" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-rorx32-1.c b/gcc/testsuite/gcc.target/i386/bmi2-rorx32-1.c
new file mode 100644
index 0000000..d7f6f3b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-rorx32-1.c
@@ -0,0 +1,36 @@
+/* { dg-do run { target { bmi2 } } } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned
+calc_rorx_u32 (unsigned a, int l)
+{
+  unsigned volatile res = a;
+  int i;
+  for (i = 0; i < l; ++i)
+    res = (res >> 1) | ((res & 1) << 31);
+
+  return res;
+}
+
+#define SHIFT_VAL 0x0e
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  unsigned src = 0xce7ace0;
+  unsigned res, res_ref;
+
+  for (i = 0; i < 5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_rorx_u32 (src, SHIFT_VAL);
+    res = (src >> SHIFT_VAL) | (src << (32 - SHIFT_VAL));
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-rorx32-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-rorx32-1a.c
new file mode 100644
index 0000000..bb3b28d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-rorx32-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbmi2 -dp" } */
+
+#include "bmi2-rorx32-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_rorxsi3_1" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-rorx64-1.c b/gcc/testsuite/gcc.target/i386/bmi2-rorx64-1.c
new file mode 100644
index 0000000..ccd60c2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-rorx64-1.c
@@ -0,0 +1,36 @@
+/* { dg-do run { target { bmi2 && { ! ia32 } } } } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned long long
+calc_rorx_u64 (unsigned long long a, int l)
+{
+  unsigned long long volatile res = a;
+  int i;
+  for (i = 0; i < l; ++i)
+    res = (res >> 1) | ((res&1)<< 63);
+
+  return res;
+}
+
+#define SHIFT_VAL 0x1e
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  unsigned long long src = 0xce7ace0ce7ace0;
+  unsigned long long res, res_ref;
+
+  for (i = 0; i < 5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_rorx_u64 (src, SHIFT_VAL);
+    res = (src >> SHIFT_VAL) | (src << (64 - SHIFT_VAL));
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-rorx64-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-rorx64-1a.c
new file mode 100644
index 0000000..2a7a7a0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-rorx64-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mbmi2 -dp" } */
+
+#include "bmi2-rorx64-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_rorxdi3_1" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-sarx32-1.c b/gcc/testsuite/gcc.target/i386/bmi2-sarx32-1.c
new file mode 100644
index 0000000..8224b6f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-sarx32-1.c
@@ -0,0 +1,34 @@
+/* { dg-do run { target { bmi2 } } } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+int
+calc_sarx_u32 (int a, int l)
+{
+  int volatile res = a;
+  int i;
+  for (i = 0; i < l; ++i)
+    res >>= 1;
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  int src = 0xfce7ace0;
+  int res, res_ref;
+
+  for (i = 0; i < 5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_sarx_u32 (src, i + 1);
+    res = src >> (i + 1);
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-sarx32-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-sarx32-1a.c
new file mode 100644
index 0000000..f10d60b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-sarx32-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbmi2 -dp" } */
+
+#include "bmi2-sarx32-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_ashrsi3" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-sarx64-1.c b/gcc/testsuite/gcc.target/i386/bmi2-sarx64-1.c
new file mode 100644
index 0000000..a43b202
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-sarx64-1.c
@@ -0,0 +1,34 @@
+/* { dg-do run { target { bmi2 && { ! ia32 } } } } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+long long
+calc_sarx_u64 (long long a, int l)
+{
+  long long volatile res = a;
+  int i;
+  for (i = 0; i < l; ++i)
+    res >>= 1;
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  long long src = 0xfce7ace0ce7ace0;
+  long long res, res_ref;
+
+  for (i = 0; i < 5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_sarx_u64 (src, i + 1);
+    res = src >> (i + 1);
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-sarx64-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-sarx64-1a.c
new file mode 100644
index 0000000..bcf0fd4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-sarx64-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mbmi2 -dp" } */
+
+#include "bmi2-sarx64-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_ashrdi3" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-shlx32-1.c b/gcc/testsuite/gcc.target/i386/bmi2-shlx32-1.c
new file mode 100644
index 0000000..0bf9702
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-shlx32-1.c
@@ -0,0 +1,34 @@
+/* { dg-do run { target { bmi2 } } } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+int
+calc_shlx_u32 (int a, int l)
+{
+  int volatile res = a;
+  int i;
+  for (i = 0; i < l; ++i)
+    res <<= 1;
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  int src = 0xfce7ace0;
+  int res, res_ref;
+
+  for (i = 0; i < 5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_shlx_u32 (src, i + 1);
+    res = src << (i + 1);
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-shlx32-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-shlx32-1a.c
new file mode 100644
index 0000000..215e5d3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-shlx32-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbmi2 -dp" } */
+
+#include "bmi2-shlx32-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_ashlsi3" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-shrx32-1.c b/gcc/testsuite/gcc.target/i386/bmi2-shrx32-1.c
new file mode 100644
index 0000000..2d2ec15
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-shrx32-1.c
@@ -0,0 +1,34 @@
+/* { dg-do run { target { bmi2 } } } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned
+calc_shrx_u32 (unsigned a, int l)
+{
+  unsigned volatile res = a;
+  int i;
+  for (i = 0; i < l; ++i)
+    res >>= 1;
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  unsigned src = 0xce7ace0;
+  unsigned res, res_ref;
+
+  for (i = 0; i < 5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_shrx_u32 (src, i + 1);
+    res = src >> (i + 1);
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-shrx32-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-shrx32-1a.c
new file mode 100644
index 0000000..24c53d4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-shrx32-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbmi2 -dp" } */
+
+#include "bmi2-shrx32-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_lshrsi3" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-shrx64-1.c b/gcc/testsuite/gcc.target/i386/bmi2-shrx64-1.c
new file mode 100644
index 0000000..81d232e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-shrx64-1.c
@@ -0,0 +1,34 @@
+/* { dg-do run { target { bmi2 && { ! ia32 } } } } */
+/* { dg-options "-mbmi2 -O2 -dp" } */
+
+#include "bmi2-check.h"
+
+__attribute__((noinline))
+unsigned long long
+calc_shrx_u64 (unsigned long long a, int l)
+{
+  unsigned long long volatile res = a;
+  int i;
+  for (i = 0; i < l; ++i)
+    res >>= 1;
+
+  return res;
+}
+
+static void
+bmi2_test ()
+{
+  unsigned i;
+  unsigned long long src = 0xce7ace0ce7ace0;
+  unsigned long long res, res_ref;
+
+  for (i = 0; i < 5; ++i) {
+    src = src * (i + 1);
+
+    res_ref = calc_shrx_u64 (src, i + 1);
+    res = src >> (i + 1);
+
+    if (res != res_ref)
+      abort();
+  }
+}
diff --git a/gcc/testsuite/gcc.target/i386/bmi2-shrx64-1a.c b/gcc/testsuite/gcc.target/i386/bmi2-shrx64-1a.c
new file mode 100644
index 0000000..7830439
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/bmi2-shrx64-1a.c
@@ -0,0 +1,6 @@
+/* { dg-do compile { target { ! ia32 } } } */
+/* { dg-options "-O2 -mbmi2 -dp" } */
+
+#include "bmi2-shrx64-1.c"
+
+/* { dg-final { scan-assembler-times "bmi2_lshrdi3" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/i386.exp b/gcc/testsuite/gcc.target/i386/i386.exp
index 167b79b..cff8a9a 100644
--- a/gcc/testsuite/gcc.target/i386/i386.exp
+++ b/gcc/testsuite/gcc.target/i386/i386.exp
@@ -206,6 +206,17 @@ proc check_effective_target_bmi { } {
     } "-mbmi" ]
 }
 
+# Return 1 if bmi2 instructions can be compiled.
+proc check_effective_target_bmi2 { } {
+    return [check_no_compiler_messages bmi2 object {
+	unsigned int
+	_bzhi_u32 (unsigned int __X, unsigned int __Y)
+	{
+	    return __builtin_ia32_bzhi_si (__X, __Y);
+	}
+    } "-mbmi2" ]
+}
+
 # If the linker used understands -M <mapfile>, pass it to clear hardware
 # capabilities set by the Sun assembler.
 set clearcap_ldflags "-Wl,-M,$srcdir/$subdir/clearcap.map"
diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c
index 59e659e..db94983 100644
--- a/gcc/testsuite/gcc.target/i386/sse-12.c
+++ b/gcc/testsuite/gcc.target/i386/sse-12.c
@@ -3,7 +3,7 @@
    popcntintrin.h and mm_malloc.h are usable
    with -O -std=c89 -pedantic-errors.  */
 /* { dg-do compile } */
-/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
+/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
 
 #include <x86intrin.h>
 
diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
index 836272d..6b02df7 100644
--- a/gcc/testsuite/gcc.target/i386/sse-13.c
+++ b/gcc/testsuite/gcc.target/i386/sse-13.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
+/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
 
 #include <mm_malloc.h>
 
diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c
index af42781..e5216b1 100644
--- a/gcc/testsuite/gcc.target/i386/sse-14.c
+++ b/gcc/testsuite/gcc.target/i386/sse-14.c
@@ -1,5 +1,5 @@
 /* { dg-do compile } */
-/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
+/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c" } */
 
 #include <mm_malloc.h>
 
diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c
index 0a7af03..4660ba0 100644
--- a/gcc/testsuite/gcc.target/i386/sse-22.c
+++ b/gcc/testsuite/gcc.target/i386/sse-22.c
@@ -46,7 +46,7 @@
 
 
 #ifndef DIFFERENT_PRAGMAS
-#pragma GCC target ("sse4a,3dnow,avx,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,tbm,lwp,fsgsbase,rdrnd,f16c")
+#pragma GCC target ("sse4a,3dnow,avx,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c")
 #endif
 
 /* Following intrinsics require immediate arguments.  They
@@ -220,9 +220,9 @@ test_2 (_mm_clmulepi64_si128, __m128i, __m128i, __m128i, 1)
 #endif
 #include <popcntintrin.h>
 
-/* x86intrin.h (FMA4/XOP/LWP/BMI/TBM/LZCNT). */
+/* x86intrin.h (FMA4/XOP/LWP/BMI/BMI2/TBM/LZCNT). */
 #ifdef DIFFERENT_PRAGMAS
-#pragma GCC target ("fma4,xop,lwp,bmi,tbm,lzcnt")
+#pragma GCC target ("fma4,xop,lwp,bmi,bmi2,tbm,lzcnt")
 #endif
 #include <x86intrin.h>
 /* xopintrin.h */
diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
index 8d0c3233..b2e5255 100644
--- a/gcc/testsuite/gcc.target/i386/sse-23.c
+++ b/gcc/testsuite/gcc.target/i386/sse-23.c
@@ -147,7 +147,7 @@
 #define __builtin_ia32_bextri_u32(X, Y) __builtin_ia32_bextr_u32 (X, 1)
 #define __builtin_ia32_bextri_u64(X, Y) __builtin_ia32_bextr_u64 (X, 1)
 
-#pragma GCC target ("sse4a,3dnow,avx,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,tbm,lwp,fsgsbase,rdrnd,f16c")
+#pragma GCC target ("sse4a,3dnow,avx,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c")
 #include <wmmintrin.h>
 #include <smmintrin.h>
 #include <mm3dnow.h>

[-- Attachment #3: ChangeLog --]
[-- Type: application/octet-stream, Size: 3285 bytes --]

2011-08-21  Uros Bizjak  <ubizjak@gmail.com>

       * config/i386/i386.md (type): Add imulx, ishiftx and rotatex.
       (length_immediate): Handle imulx, ishiftx and rotatex.
       (imm_disp): Ditto.
       (isa): Add bmi2.
       (enabled): Handle bmi2.
       (w): New mode attribute.

       (*mul<mode><dwi>3): Split from *<u>mul<mode><dwi>3.
       (*umul<mode><dwi>3): Ditto.  Add imulx BMI2 alternative.
       (*bmi2_umulditi3_1): New insn pattern.
       (*bmi2_umulsidi3_1): Ditto.
       (*umul<mode><dwi>3 splitter): New splitter to avoid flags dependency.

       (*bmi2_ashl<mode>3_1): New insn pattern.
       (*ashl<mode>3_1): Add ishiftx BMI2 alternative.
       (*ashl<mode>3_1 splitter): New splitter to avoid flags dependency.
       (*bmi2_ashlsi3_1_zext): New insn pattern.
       (*ashlsi3_1_zext): Add ishiftx BMI2 alternative.
       (*ashlsi3_1_zext splitter): New splitter to avoid flags dependency.

       (*bmi2_<shiftrt_insn><mode>3_1): New insn pattern.
       (*<shiftrt_insn><mode>3_1): Add ishiftx BMI2 alternative.
       (*<shiftrt_insn><mode>3_1 splitter): New splitter to avoid
       flags dependency.
       (*bmi2_<shiftrt_insn>si3_1_zext): New insn pattern.
       (*<shiftrt_insn>si3_1_zext): Add ishiftx BMI2 alternative.
       (*<shiftrt_insn>si3_1_zext splitter): New splitter to avoid
       flags dependency.

       (*bmi2_rorx<mode>3_1): New insn pattern.
       (*<rotate_insn><mode>3_1): Add rotatex BMI2 alternative.
       (*rotate<mode>3_1 splitter): New splitter to avoid flags dependency.
       (*rotatert<mode>3_1 splitter): Ditto.
       (*bmi2_rorxsi3_1_zext): New insn pattern.
       (*<rotate_insn>si3_1_zext): Add rotatex BMI2 alternative.
       (*rotatesi3_1_zext  splitter): New splitter to avoid flags dependency.
       (*rotatertsi3_1_zext splitter): Ditto.

2011-08-21  Kirill Yukhin  <kirill.yukhin@intel.com>

	* common/config/i386/i386-common.c (OPTION_MASK_ISA_BMI2_SET):
	New.
	(OPTION_MASK_ISA_BMI2_UNSET): Likewise.
	(ix86_handle_option): Handle OPT_mbmi2 case.
	* config.gcc (i[34567]86-*-*): Add bmi2intrin.h.
	(x86_64-*-*): Likewise.
	* config/i386/bmi2intrin.h: New file.
	* config/i386/cpuid.h (bit_BMI2): New.
	* config/i386/driver-i386.c (host_detect_local_cpu): Detect
	BMI2 feature.
	* config/i386/i386-c.c (ix86_target_macros_internal):
	Conditionally define __BMI2__.
	* config/i386/i386.c (ix86_option_override_internal): Define PTA_BMI2.
	Handle BMI2 option.
	(ix86_valid_target_attribute_inner_p): Handle BMI2 option.
	(print_reg): New code.
	(ix86_print_operand): Likewise.
	(ix86_builtins): Add IX86_BUILTIN_BZHI32, IX86_BUILTIN_BZHI64,
	IX86_BUILTIN_PDEP32, IX86_BUILTIN_PDEP64, IX86_BUILTIN_PEXT32,
	IX86_BUILTIN_PEXT64.
	(bdesc_args): Add IX86_BUILTIN_BZHI32, IX86_BUILTIN_BZHI64,
	IX86_BUILTIN_PDEP32, IX86_BUILTIN_PDEP64, IX86_BUILTIN_PEXT32,
	IX86_BUILTIN_PEXT64.
	* config/i386/i386.h (TARGET_BMI2): New.
	* config/i386/i386.md (UNSPEC_PDEP): New.
	(UNSPEC_PEXT): Likewise.
	(*bmi2_bzhi_<mode>3): Likewise.
	(*bmi2_pdep_<mode>3): Likewise.
	(*bmi2_pext_<mode>3): Likewise.
	* config/i386/i386.opt (mbmi2): New.
	* config/i386/x86intrin.h: Include bmi2intrin.h when __BMI2__
	is defined.
	* doc/extend.texi: Document BMI2 built-in functions.
	* doc/invoke.texi: Document -mbmi2.

[-- Attachment #4: ChangeLog.testsuite --]
[-- Type: application/octet-stream, Size: 1885 bytes --]

2011-08-18  Kirill Yukhin  <kirill.yukhin@intel.com>

	* g++.dg/other/i386-2.C: Add -mbmi2 check.
	* g++.dg/other/i386-3.C: Likewise.
	* gcc.target/i386/bmi2-bzhi32-1.c: New testcase.
	* gcc.target/i386/bmi2-bzhi32-1a.c: Likewise.
	* gcc.target/i386/bmi2-bzhi64-1.c: Likewise.
	* gcc.target/i386/bmi2-bzhi64-1a.c: Likewise.
	* gcc.target/i386/bmi2-mulx32-1.c: Likewise.
	* gcc.target/i386/bmi2-mulx32-1a.c: Likewise.
	* gcc.target/i386/bmi2-mulx64-1.c: Likewise.
	* gcc.target/i386/bmi2-mulx64-1a.c: Likewise.
	* gcc.target/i386/bmi2-pdep32-1.c: Likewise.
	* gcc.target/i386/bmi2-pdep32-1a.c: Likewise.
	* gcc.target/i386/bmi2-pdep64-1.c: Likewise.
	* gcc.target/i386/bmi2-pdep64-1a.c: Likewise.
	* gcc.target/i386/bmi2-pext32-1.c: Likewise.
	* gcc.target/i386/bmi2-pext32-1a.c: Likewise.
	* gcc.target/i386/bmi2-pext64-1.c: Likewise.
	* gcc.target/i386/bmi2-pext64-1a.c: Likewise.
	* gcc.target/i386/bmi2-rorx32-1.c: Likewise.
	* gcc.target/i386/bmi2-rorx32-1a.c: Likewise.
	* gcc.target/i386/bmi2-rorx64-1.c: Likewise.
	* gcc.target/i386/bmi2-rorx64-1a.c: Likewise.
	* gcc.target/i386/bmi2-sarx32-1.c: Likewise.
	* gcc.target/i386/bmi2-sarx32-1a.c: Likewise.
	* gcc.target/i386/bmi2-sarx64-1.c: Likewise.
	* gcc.target/i386/bmi2-sarx64-1a.c: Likewise.
	* gcc.target/i386/bmi2-shlx32-1.c: Likewise.
	* gcc.target/i386/bmi2-shlx32-1a.c: Likewise.
	* gcc.target/i386/bmi2-shlx64-1.c: Likewise.
	* gcc.target/i386/bmi2-shlx64-1a.c: Likewise.
	* gcc.target/i386/bmi2-shrx32-1.c: Likewise.
	* gcc.target/i386/bmi2-shrx32-1a.c: Likewise.
	* gcc.target/i386/bmi2-shrx64-1.c: Likewise.
	* gcc.target/i386/bmi2-shrx64-1a.c: Likewise.
	* gcc.target/i386/i386.exp (check_effective_target_bmi2): New.
	* gcc.target/i386/sse-12.c: Add BMI2.
	* gcc.target/i386/sse-13.c: Likewise.
	* gcc.target/i386/sse-14.c: Likewise.
	* gcc.target/i386/sse-22.c: Likewise.
	* gcc.target/i386/sse-23.c: Likewise.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3, i386] BMI2 support for GCC, mulx, rorx, <shift>x part
  2011-08-23 17:01       ` Kirill Yukhin
@ 2011-08-23 17:06         ` Uros Bizjak
  2011-08-23 17:52           ` Kirill Yukhin
  0 siblings, 1 reply; 8+ messages in thread
From: Uros Bizjak @ 2011-08-23 17:06 UTC (permalink / raw)
  To: Kirill Yukhin; +Cc: Jakub Jelinek, Richard Henderson, H.J. Lu, gcc-patches List

On Tue, Aug 23, 2011 at 6:22 PM, Kirill Yukhin <kirill.yukhin@gmail.com> wrote:

> thanks. I've applied your inputs.
>
> Updated patch, ChangeLog, testsuite/ChangeLog are attached.
>
> Are they ok now?

OK for mainline.

Thanks,
Uros.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3, i386] BMI2 support for GCC, mulx, rorx, <shift>x part
  2011-08-23 17:06         ` Uros Bizjak
@ 2011-08-23 17:52           ` Kirill Yukhin
  2011-08-23 18:43             ` H.J. Lu
  0 siblings, 1 reply; 8+ messages in thread
From: Kirill Yukhin @ 2011-08-23 17:52 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: Jakub Jelinek, Richard Henderson, H.J. Lu, gcc-patches List

Great! Thanks.

Could anybody please commit that?

K

On Tue, Aug 23, 2011 at 8:53 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Tue, Aug 23, 2011 at 6:22 PM, Kirill Yukhin <kirill.yukhin@gmail.com> wrote:
>
>> thanks. I've applied your inputs.
>>
>> Updated patch, ChangeLog, testsuite/ChangeLog are attached.
>>
>> Are they ok now?
>
> OK for mainline.
>
> Thanks,
> Uros.
>

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH v3, i386] BMI2 support for GCC, mulx, rorx, <shift>x part
  2011-08-23 17:52           ` Kirill Yukhin
@ 2011-08-23 18:43             ` H.J. Lu
  0 siblings, 0 replies; 8+ messages in thread
From: H.J. Lu @ 2011-08-23 18:43 UTC (permalink / raw)
  To: Kirill Yukhin
  Cc: Uros Bizjak, Jakub Jelinek, Richard Henderson, gcc-patches List

On Tue, Aug 23, 2011 at 9:55 AM, Kirill Yukhin <kirill.yukhin@gmail.com> wrote:
> Great! Thanks.
>
> Could anybody please commit that?

Done.

Thanks.

> K
>
> On Tue, Aug 23, 2011 at 8:53 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
>> On Tue, Aug 23, 2011 at 6:22 PM, Kirill Yukhin <kirill.yukhin@gmail.com> wrote:
>>
>>> thanks. I've applied your inputs.
>>>
>>> Updated patch, ChangeLog, testsuite/ChangeLog are attached.
>>>
>>> Are they ok now?
>>
>> OK for mainline.
>>
>> Thanks,
>> Uros.
>>
>



-- 
H.J.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2011-08-23 17:02 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-08-21 18:23 [PATCH v3, i386] BMI2 support for GCC, mulx, rorx, <shift>x part Uros Bizjak
2011-08-22  9:54 ` Uros Bizjak
2011-08-23 11:35   ` Kirill Yukhin
2011-08-23 12:21     ` Uros Bizjak
2011-08-23 17:01       ` Kirill Yukhin
2011-08-23 17:06         ` Uros Bizjak
2011-08-23 17:52           ` Kirill Yukhin
2011-08-23 18:43             ` H.J. Lu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).