public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [AVX]: More AVX updates
@ 2008-05-23 18:48 H.J. Lu
  0 siblings, 0 replies; 11+ messages in thread
From: H.J. Lu @ 2008-05-23 18:48 UTC (permalink / raw)
  To: gcc-patches

Hi,

I am checking this patch into AVX branch. Now we can generate
pure AVX codes for all intrinsics in 64bit.

Thanks.


H.J.
---
2008-05-23  H.J. Lu  <hongjiu.lu@intel.com>

	* config/i386/i386.md (maxmin): New.
	(*avx_<code><mode>3): Likewise.
	(*avx_ieee_smin<mode>3): Likewise.
	(*avx_ieee_smax<mode>3): Likewise.
	(*movdi_1_rex64): Support AVX.

	* config/i386/mmx.md (*movv2sf_internal_rex64_avx): New.
	(*movv2sf_internal_avx): Likewise.

	* config/i386/sse.md (*avx_vm<plusminus_insn><mode>3): New.
	(*avx_div<mode>3): Likewise.
	(*avx_<code><mode>3_finite: Likewise.
	(*avx_<code><mode>3): Likewise.
	(*avx_vm<code><mode>3): Likewise.
	(*avx_pmaddwd): Likewise.
	(*avx_<code><mode>3): Likewise.
	(<code><mode>3): Check flag_finite_math_only.
	(*avx_<code><mode>3): Moved before "*<code><mode>3".
	(sse_stmxcsr): Support AVX.
	(abs<mode>2): Likewise.
	
2008-05-23  Joey Ye  <joey.ye@intel.com>

	* config/i386/i386.md (*movsf_1): Insert the missing 'v'.
	(fix_trunc<mode>di_sse): Support AVX.
	(fix_trunc<mode>si_sse): Likewise.
	(*float<SSEMODEI24:mode><MODEF:mode>2_sse_interunit): Likewise.

diff -x LAST_UPDATED -x REVISION -x gcc_update -x .svn -upr -upr ../../gcc-avx/gcc/config/i386/i386.md gcc/gcc/config/i386/i386.md
--- ../../gcc-avx/gcc/config/i386/i386.md	2008-05-23 10:35:13.000000000 -0700
+++ gcc/gcc/config/i386/i386.md	2008-05-23 10:38:06.000000000 -0700
@@ -626,6 +626,9 @@
 ;; Mapping of unsigned max and min
 (define_code_iterator umaxmin [umax umin])
 
+;; Mapping of signed/unsigned max and min
+(define_code_iterator maxmin [smax smin umax umin])
+
 ;; Base name for integer and FP insn mnemonic
 (define_code_attr maxminiprefix [(smax "maxs") (smin "mins")
 				 (umax "maxu") (umin "minu")])
@@ -2336,6 +2339,14 @@
 	return "movdq2q\t{%1, %0|%0, %1}";
 
     case TYPE_SSEMOV:
+      if (TARGET_AVX)
+	{
+	  if (get_attr_mode (insn) == MODE_TI)
+	    return "vmovdqa\t{%1, %0|%0, %1}";
+	  else
+	    return "vmovq\t{%1, %0|%0, %1}";
+	}
+
       if (get_attr_mode (insn) == MODE_TI)
 	return "movdqa\t{%1, %0|%0, %1}";
       /* FALLTHRU */
@@ -2348,6 +2359,8 @@
       return "movq\t{%1, %0|%0, %1}";
 
     case TYPE_SSELOG1:
+      return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
+
     case TYPE_MMXADD:
       return "pxor\t%0, %0";
 
@@ -2386,6 +2399,10 @@
 	   (const_string "imov")))
    (set_attr "modrm" "*,0,0,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*")
    (set_attr "length_immediate" "*,4,8,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "11,12,13,14,15,16")
+       (const_string "maybe_vex")
+       (const_string "orig")))
    (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,DI,DI,DI,TI,TI,DI,DI,DI,DI,DI,DI")])
 
 ;; Stores and loads of ax to arbitrary constant address.
@@ -2727,7 +2744,7 @@
       if (get_attr_mode (insn) == MODE_TI)
 	return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
       else
-	return TARGET_AVX ? "xorps\t%0, %0, %0": "xorps\t%0, %0";
+	return TARGET_AVX ? "vxorps\t%0, %0, %0": "xorps\t%0, %0";
     case 6:
       if (get_attr_mode (insn) == MODE_V4SF)
 	return TARGET_AVX ? "vmovaps\t{%1, %0|%0, %1}"
@@ -4707,8 +4724,10 @@
 	(fix:DI (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))]
   "TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode)
    && (!TARGET_FISTTP || TARGET_SSE_MATH)"
-  "cvtts<ssemodefsuffix>2si{q}\t{%1, %0|%0, %1}"
+  "* return TARGET_AVX ? \"vcvtts<ssemodefsuffix>2si{q}\t{%1, %0|%0, %1}\"
+                       : \"cvtts<ssemodefsuffix>2si{q}\t{%1, %0|%0, %1}\";"
   [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<MODE>")
    (set_attr "athlon_decode" "double,vector")
    (set_attr "amdfam10_decode" "double,double")])
@@ -4718,8 +4737,11 @@
 	(fix:SI (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))]
   "SSE_FLOAT_MODE_P (<MODE>mode)
    && (!TARGET_FISTTP || TARGET_SSE_MATH)"
-  "cvtts<ssemodefsuffix>2si\t{%1, %0|%0, %1}"
+  "* return TARGET_AVX 
+            ? \"vcvtts<ssemodefsuffix>2si\t{%1, %0|%0, %1}\"
+            :\"cvtts<ssemodefsuffix>2si\t{%1, %0|%0, %1}\";"
   [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<MODE>")
    (set_attr "athlon_decode" "double,vector")
    (set_attr "amdfam10_decode" "double,double")])
@@ -5389,8 +5411,13 @@
   "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
    && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
    && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)"
-  "cvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %0|%0, %1}"
+  "* return TARGET_AVX
+       ? \"vcvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t\"
+         \"{%1, %0, %0|%0, %0, %1}\"
+       : \"cvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t\"
+         \"{%1, %0|%0, %1}\";"
   [(set_attr "type" "sseicvt")
+   (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<MODEF:MODE>")
    (set_attr "athlon_decode" "double,direct")
    (set_attr "amdfam10_decode" "vector,double")
@@ -19863,6 +19890,17 @@
 ;; Since both the tree-level MAX_EXPR and the rtl-level SMAX operator
 ;; are undefined in this condition, we're certain this is correct.
 
+(define_insn "*avx_<code><mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(smaxmin:MODEF
+	  (match_operand:MODEF 1 "nonimmediate_operand" "%x")
+	  (match_operand:MODEF 2 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX && SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "v<maxminfprefix>s<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "<code><mode>3"
   [(set (match_operand:MODEF 0 "register_operand" "=x")
 	(smaxmin:MODEF
@@ -19879,6 +19917,18 @@
 ;; Their operands are not commutative, and thus they may be used in the
 ;; presence of -0.0 and NaN.
 
+(define_insn "*avx_ieee_smin<mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(unspec:MODEF
+	  [(match_operand:MODEF 1 "register_operand" "x")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "xm")]
+	 UNSPEC_IEEE_MIN))]
+  "TARGET_AVX && SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "vmins<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*ieee_smin<mode>3"
   [(set (match_operand:MODEF 0 "register_operand" "=x")
 	(unspec:MODEF
@@ -19890,6 +19940,18 @@
   [(set_attr "type" "sseadd")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*avx_ieee_smax<mode>3"
+  [(set (match_operand:MODEF 0 "register_operand" "=x")
+	(unspec:MODEF
+	  [(match_operand:MODEF 1 "register_operand" "0")
+	   (match_operand:MODEF 2 "nonimmediate_operand" "xm")]
+	 UNSPEC_IEEE_MAX))]
+  "TARGET_AVX && SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
+  "vmaxs<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*ieee_smax<mode>3"
   [(set (match_operand:MODEF 0 "register_operand" "=x")
 	(unspec:MODEF
diff -x LAST_UPDATED -x REVISION -x gcc_update -x .svn -upr -upr ../../gcc-avx/gcc/config/i386/mmx.md gcc/gcc/config/i386/mmx.md
--- ../../gcc-avx/gcc/config/i386/mmx.md	2008-05-23 10:35:13.000000000 -0700
+++ gcc/gcc/config/i386/mmx.md	2008-05-23 10:38:06.000000000 -0700
@@ -176,6 +176,35 @@
   DONE;
 })
 
+(define_insn "*movv2sf_internal_rex64_avx"
+  [(set (match_operand:V2SF 0 "nonimmediate_operand"
+				"=rm,r ,!?y,!?y ,m ,!y,Y2,x,x,x,m,r,x")
+        (match_operand:V2SF 1 "vector_move_operand"
+				"Cr ,m ,C  ,!?ym,!y,Y2,!y,C,x,m,x,x,r"))]
+  "TARGET_64BIT && TARGET_AVX
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+    mov{q}\t{%1, %0|%0, %1}
+    mov{q}\t{%1, %0|%0, %1}
+    pxor\t%0, %0
+    movq\t{%1, %0|%0, %1}
+    movq\t{%1, %0|%0, %1}
+    movdq2q\t{%1, %0|%0, %1}
+    movq2dq\t{%1, %0|%0, %1}
+    vxorps\t%0, %0, %0
+    vmovaps\t{%1, %0|%0, %1}
+    vmovlps\t{%1, %0, %0|%0, %0, %1}
+    vmovlps\t{%1, %0|%0, %1}
+    vmovq\t{%1, %0|%0, %1}
+    vmovq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "imov,imov,mmx,mmxmov,mmxmov,ssecvt,ssecvt,ssemov,sselog1,ssemov,ssemov,ssemov,ssemov")
+   (set_attr "unit" "*,*,*,*,*,mmx,mmx,*,*,*,*,*,*")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "7,8,9,10,11,12")
+       (const_string "vex")
+       (const_string "orig")))
+   (set_attr "mode" "DI,DI,DI,DI,DI,DI,DI,V4SF,V4SF,V2SF,V2SF,DI,DI")])
+
 (define_insn "*movv2sf_internal_rex64"
   [(set (match_operand:V2SF 0 "nonimmediate_operand"
 				"=rm,r ,!?y,!?y ,m ,!y,Y2,x,x,x,m,r,x")
@@ -201,6 +230,33 @@
    (set_attr "unit" "*,*,*,*,*,mmx,mmx,*,*,*,*,*,*")
    (set_attr "mode" "DI,DI,DI,DI,DI,DI,DI,V4SF,V4SF,V2SF,V2SF,DI,DI")])
 
+(define_insn "*movv2sf_internal_avx"
+  [(set (match_operand:V2SF 0 "nonimmediate_operand"
+			"=!?y,!?y ,m  ,!y ,*Y2,*x,*x,*x,m ,r  ,m")
+        (match_operand:V2SF 1 "vector_move_operand"
+			"C   ,!?ym,!?y,*Y2,!y ,C ,*x,m ,*x,irm,r"))]
+  "TARGET_AVX
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+    pxor\t%0, %0
+    movq\t{%1, %0|%0, %1}
+    movq\t{%1, %0|%0, %1}
+    movdq2q\t{%1, %0|%0, %1}
+    movq2dq\t{%1, %0|%0, %1}
+    vxorps\t%0, %0, %0
+    vmovaps\t{%1, %0|%0, %1}
+    vmovlps\t{%1, %0, %0|%0, %0, %1}
+    vmovlps\t{%1, %0|%0, %1}
+    #
+    #"
+  [(set_attr "type" "mmx,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,ssemov,*,*")
+   (set_attr "unit" "*,*,*,mmx,mmx,*,*,*,*,*,*")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "5,6,7,8")
+       (const_string "vex")
+       (const_string "orig")))
+   (set_attr "mode" "DI,DI,DI,DI,DI,V4SF,V4SF,V2SF,V2SF,DI,DI")])
+
 (define_insn "*movv2sf_internal"
   [(set (match_operand:V2SF 0 "nonimmediate_operand"
 			"=!?y,!?y ,m  ,!y ,*Y2,*x,*x,*x,m ,r  ,m")
diff -x LAST_UPDATED -x REVISION -x gcc_update -x .svn -upr -upr ../../gcc-avx/gcc/config/i386/sse.md gcc/gcc/config/i386/sse.md
--- ../../gcc-avx/gcc/config/i386/sse.md	2008-05-23 10:35:13.000000000 -0700
+++ gcc/gcc/config/i386/sse.md	2008-05-23 10:38:06.000000000 -0700
@@ -514,6 +514,20 @@
   [(set_attr "type" "sseadd")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*avx_vm<plusminus_insn><mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(vec_merge:SSEMODEF2P
+	  (plusminus:SSEMODEF2P
+	    (match_operand:SSEMODEF2P 1 "register_operand" "x")
+	    (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
+	  (match_dup 1)
+	  (const_int 1)))]
+  "TARGET_AVX && SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "v<plusminus_mnemonic>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<ssescalarmode>")])
+
 (define_insn "<sse>_vm<plusminus_insn><mode>3"
   [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
 	(vec_merge:SSEMODEF2P
@@ -652,6 +666,17 @@
   "TARGET_SSE2"
   "")
 
+(define_insn "*avx_div<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(div:SSEMODEF2P
+	  (match_operand:SSEMODEF2P 1 "register_operand" "x")
+	  (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX && SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "vdivp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssediv")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "<sse>_div<mode>3"
   [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
 	(div:SSEMODEF2P
@@ -902,18 +927,11 @@
 	  (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
 	  (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
   "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
-  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
-
-(define_insn "*avx_<code><mode>3"
-  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
-	(smaxmin:AVXMODEF2P
-	  (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
-	  (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
-  "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
-  "v<maxminfprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "sseadd")
-   (set_attr "prefix" "vex")
-   (set_attr "mode" "<avxvecmode>")])
+{
+  if (!flag_finite_math_only)
+    operands[1] = force_reg (<MODE>mode, operands[1]);
+  ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
+})
 
 (define_expand "<code><mode>3"
   [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
@@ -927,6 +945,18 @@
   ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
 })
 
+(define_insn "*avx_<code><mode>3_finite"
+  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+	(smaxmin:AVXMODEF2P
+	  (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
+	  (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
+  "AVX_VEC_FLOAT_MODE_P (<MODE>mode) && flag_finite_math_only
+   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "v<maxminfprefix>p<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*<code><mode>3_finite"
   [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
 	(smaxmin:SSEMODEF2P
@@ -938,6 +968,17 @@
   [(set_attr "type" "sseadd")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*avx_<code><mode>3"
+  [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
+	(smaxmin:AVXMODEF2P
+	  (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
+	  (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
+  "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "v<maxminfprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<avxvecmode>")])
+
 (define_insn "*<code><mode>3"
   [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
 	(smaxmin:SSEMODEF2P
@@ -948,6 +989,20 @@
   [(set_attr "type" "sseadd")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*avx_vm<code><mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(vec_merge:SSEMODEF2P
+	  (smaxmin:SSEMODEF2P
+	    (match_operand:SSEMODEF2P 1 "register_operand" "x")
+	    (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
+	 (match_dup 1)
+	 (const_int 1)))]
+  "TARGET_AVX && SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "v<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sse")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<ssescalarmode>")])
+
 (define_insn "<sse>_vm<code><mode>3"
   [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
 	(vec_merge:SSEMODEF2P
@@ -4817,6 +4872,43 @@
   "TARGET_SSE2"
   "ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
 
+(define_insn "*avx_pmaddwd"
+  [(set (match_operand:V4SI 0 "register_operand" "=x")
+	(plus:V4SI
+	  (mult:V4SI
+	    (sign_extend:V4SI
+	      (vec_select:V4HI
+		(match_operand:V8HI 1 "nonimmediate_operand" "%x")
+		(parallel [(const_int 0)
+			   (const_int 2)
+			   (const_int 4)
+			   (const_int 6)])))
+	    (sign_extend:V4SI
+	      (vec_select:V4HI
+		(match_operand:V8HI 2 "nonimmediate_operand" "xm")
+		(parallel [(const_int 0)
+			   (const_int 2)
+			   (const_int 4)
+			   (const_int 6)]))))
+	  (mult:V4SI
+	    (sign_extend:V4SI
+	      (vec_select:V4HI (match_dup 1)
+		(parallel [(const_int 1)
+			   (const_int 3)
+			   (const_int 5)
+			   (const_int 7)])))
+	    (sign_extend:V4SI
+	      (vec_select:V4HI (match_dup 2)
+		(parallel [(const_int 1)
+			   (const_int 3)
+			   (const_int 5)
+			   (const_int 7)]))))))]
+  "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
+  "vpmaddwd\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
 (define_insn "*sse2_pmaddwd"
   [(set (match_operand:V4SI 0 "register_operand" "=x")
 	(plus:V4SI
@@ -5343,6 +5435,17 @@
   operands[1] = gen_lowpart (TImode, operands[1]);
 })
 
+(define_insn "*avx_<code><mode>3"
+  [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
+	(maxmin:SSEMODE124
+	  (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
+	  (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
+  "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
+  "vp<maxminiprefix><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sseiadd")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "TI")])
+
 (define_expand "<code>v16qi3"
   [(set (match_operand:V16QI 0 "register_operand" "")
 	(umaxmin:V16QI
@@ -7520,8 +7623,10 @@
   [(set (match_operand:SI 0 "memory_operand" "=m")
 	(unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
   "TARGET_SSE"
-  "stmxcsr\t%0"
+  "* return TARGET_AVX ? \"vstmxcsr\t%0\"
+                       : \"stmxcsr\t%0\";"
   [(set_attr "type" "sse")
+   (set_attr "prefix" "maybe_vex")
    (set_attr "memory" "store")])
 
 (define_expand "sse_sfence"
@@ -8596,10 +8701,12 @@
   [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
 	(abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
   "TARGET_SSSE3"
-  "pabs<ssevecsize>\t{%1, %0|%0, %1}";
+  "* return TARGET_AVX ? \"vpabs<ssevecsize>\t{%1, %0|%0, %1}\"
+		       : \"pabs<ssevecsize>\t{%1, %0|%0, %1}\";"
   [(set_attr "type" "sselog1")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "TI")])
 
 (define_insn "abs<mode>2"

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [AVX]: More AVX updates
  2008-05-25  9:02     ` H.J. Lu
@ 2008-05-26 15:17       ` H.J. Lu
  0 siblings, 0 replies; 11+ messages in thread
From: H.J. Lu @ 2008-05-26 15:17 UTC (permalink / raw)
  To: Uros Bizjak, Joey Ye, Xuepeng Guo; +Cc: GCC Patches

On Sat, May 24, 2008 at 06:42:54PM -0700, H.J. Lu wrote:
> On Sat, May 24, 2008 at 10:19:06AM -0700, H.J. Lu wrote:
> > On Fri, May 23, 2008 at 02:48:45PM -0700, H.J. Lu wrote:
> > > Hi Uros,
> > > 
> > > How about this patch?
> > > 
> > 
> > Here is the updated patch. I added %v as prefix and %d to
> > print_reg. We can add AVX support to most SSE patterns directly.
> > 
> > 
> > H.J.
> > ---
> > 2008-05-24  H.J. Lu  <hongjiu.lu@intel.com>
> > 
> > 	* config/i386/gas.h (ASM_OUTPUT_OPCODE): Undefine before
> > 	define.  Use ASM_OUTPUT_AVX_PREFIX.
> > 
> > 	* config/i386/i386.c (print_reg): Handle 'd' to duplicate
> > 	the operand.
> > 	(print_operand): Handle 'd'.
> > 
> > 	* config/i386/i386.h (ASM_OUTPUT_AVX_PREFIX): New.
> > 	(ASM_OUTPUT_OPCODE): Likewise.
> > 
> > 	* config/i386/i386.md (*movdi_2): Support AVX.
> > 	(*movdf_nointeger): Likewise.
> > 
> > 	* config/i386/mmx.md (*mov<mode>_internal_rex64_avx): Removed.
> > 	(*mov<mode>_internal_rex64): Support AVX.
> > 
> > 	* config/i386/sse.md (*avx_storehps): Removed.
> > 	(sse_storehps): Support AVX.
> > 	(*vec_dupv2df): Remove AVX support.
> > 
> 
> We should print duplicated register operand only for AVX instruction.
> Here is the updated patch.
> 
> 

I am checking this patch into AVX branch to use "%v" and "%d0" for
AVX support in i386.md.

Thanks.


H.J.
---
2008-05-26  H.J. Lu  <hongjiu.lu@intel.com>

	* config/i386/i386.md (*movsi_1): Use "%v" and "%d0" for AVX
	support.
	(*movsf_1): Likewise.
	(*movdf_integer_rex64): Likewise.
	(*extendsfdf2_mixed): Likewise.
	(*truncdfsf_fast_mixed): Likewise.
	(*truncdfsf_fast_sse): Likewise.
	(*truncdfsf_mixed): Likewise.
	(fix_trunc<mode>di_sse): Likewise.
	(*extendsfdf2_sse): Likewise.
	(fix_trunc<mode>si_sse): Likewise.
	(*float<SSEMODEI24:mode><MODEF:mode>2_sse_interunit): Likewise.
	(*rcpsf2_sse): Likewise.
	(*rsqrtsf2_sse): Likewise.
	(*sqrt<mode>2_sse): Likewise.
	(sse4_1_round<mode>2): Likewise.

Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 2763)
+++ config/i386/i386.md	(working copy)
@@ -1509,26 +1509,20 @@
     {
     case TYPE_SSELOG1:
       if (get_attr_mode (insn) == MODE_TI)
-        return TARGET_AVX ? "vpxor\t%0, %0, %0"
-                          : "pxor\t%0, %0";
-      return TARGET_AVX ? "vxorps\t%0, %0, %0"
-                        : "xorps\t%0, %0";
+        return "%vpxor\t%0, %d0";
+      return "%vxorps\t%0, %d0";
 
     case TYPE_SSEMOV:
       switch (get_attr_mode (insn))
 	{
 	case MODE_TI:
-	  return TARGET_AVX ? "vmovdqa\t{%1, %0|%0, %1}"
-	                    : "movdqa\t{%1, %0|%0, %1}";
+	  return "%vmovdqa\t{%1, %0|%0, %1}";
 	case MODE_V4SF:
-	  return TARGET_AVX ? "vmovaps\t{%1, %0|%0, %1}"
-	                    : "movaps\t{%1, %0|%0, %1}";
+	  return "%vmovaps\t{%1, %0|%0, %1}";
 	case MODE_SI:
-          return TARGET_AVX ? "vmovd\t{%1, %0|%0, %1}"
-                            : "movd\t{%1, %0|%0, %1}";
+          return "%vmovd\t{%1, %0|%0, %1}";
 	case MODE_SF:
-          return TARGET_AVX ? "vmovss\t{%1, %0|%0, %1}"
-                            : "movss\t{%1, %0|%0, %1}";
+          return "%vmovss\t{%1, %0|%0, %1}";
 	default:
 	  gcc_unreachable ();
 	}
@@ -2364,7 +2358,7 @@
       return "movq\t{%1, %0|%0, %1}";
 
     case TYPE_SSELOG1:
-      return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
+      return "%vpxor\t%0, %d0";
 
     case TYPE_MMXADD:
       return "pxor\t%0, %0";
@@ -2747,16 +2741,14 @@
       return "mov{l}\t{%1, %0|%0, %1}";
     case 5:
       if (get_attr_mode (insn) == MODE_TI)
-	return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
+	return "%vpxor\t%0, %d0";
       else
-	return TARGET_AVX ? "vxorps\t%0, %0, %0": "xorps\t%0, %0";
+	return "%vxorps\t%0, %d0";
     case 6:
       if (get_attr_mode (insn) == MODE_V4SF)
-	return TARGET_AVX ? "vmovaps\t{%1, %0|%0, %1}"
-			  : "movaps\t{%1, %0|%0, %1}";
+	return "%vmovaps\t{%1, %0|%0, %1}";
       else
-	return TARGET_AVX ? "vmovss\t{%1, %0, %0|%0, %0, %1}"
-			  : "movss\t{%1, %0|%0, %1}";
+	return "%vmovss\t{%1, %d0|%d0, %1}";
     case 7:
       if (TARGET_AVX)
 	return REG_P (operands[1]) ? "vmovss\t{%1, %0, %0|%0, %0, %1}"
@@ -2764,14 +2756,12 @@
       else
 	return "movss\t{%1, %0|%0, %1}";
     case 8:
-      return TARGET_AVX ? "vmovss\t{%1, %0|%0, %1}"
-			: "movss\t{%1, %0|%0, %1}";
+      return "%vmovss\t{%1, %0|%0, %1}";
 
     case 9: case 10: case 14: case 15:
       return "movd\t{%1, %0|%0, %1}";
     case 12: case 13:
-      return TARGET_AVX ? "vmovd\t{%1, %0|%0, %1}"
-			: "movd\t{%1, %0|%0, %1}";
+      return "%vmovd\t{%1, %0|%0, %1}";
 
     case 11:
       return "movq\t{%1, %0|%0, %1}";
@@ -3077,11 +3067,11 @@
       switch (get_attr_mode (insn))
 	{
 	case MODE_V4SF:
-	  return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
+	  return "%vxorps\t%0, %d0";
 	case MODE_V2DF:
-	  return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
+	  return "%vxorpd\t%0, %d0";
 	case MODE_TI:
-	  return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
+	  return "%vpxor\t%0, %d0";
 	default:
 	  gcc_unreachable ();
 	}
@@ -3091,17 +3081,13 @@
       switch (get_attr_mode (insn))
 	{
 	case MODE_V4SF:
-	  return TARGET_AVX ? "vmovaps\t{%1, %0|%0, %1}" 
-	                    : "movaps\t{%1, %0|%0, %1}";
+	  return "%vmovaps\t{%1, %0|%0, %1}";
 	case MODE_V2DF:
-	  return TARGET_AVX ? "vmovapd\t{%1, %0|%0, %1}" 
-	                    : "movapd\t{%1, %0|%0, %1}";
+	  return "%vmovapd\t{%1, %0|%0, %1}";
 	case MODE_TI:
-	  return TARGET_AVX ? "vmovdqa\t{%1, %0|%0, %1}"
-	                    : "movdqa\t{%1, %0|%0, %1}";
+	  return "%vmovdqa\t{%1, %0|%0, %1}";
 	case MODE_DI:
-	  return TARGET_AVX ? "vmovq\t{%1, %0|%0, %1}"
-	                    : "movq\t{%1, %0|%0, %1}";
+	  return "%vmovq\t{%1, %0|%0, %1}";
 	case MODE_DF:
 	  if (TARGET_AVX)
 	    {
@@ -3113,21 +3099,16 @@
 	  else
 	    return "movsd\t{%1, %0|%0, %1}";
 	case MODE_V1DF:
-	  return TARGET_AVX ? "vmovlpd\t{%1, %0|%0, %1}"
-	                    : "movlpd\t{%1, %0|%0, %1}";
+	  return "%vmovlpd\t{%1, %d0|%d0, %1}";
 	case MODE_V2SF:
-	  return TARGET_AVX ? "vmovlps\t{%1, %0|%0, %1}"
-	                    : "movlps\t{%1, %0|%0, %1}";
+	  return "%vmovlps\t{%1, %d0|%d0, %1}";
 	default:
 	  gcc_unreachable ();
 	}
 
     case 9:
     case 10:
-      if (TARGET_AVX)
-	return "vmovq\t{%1, %0|%0, %1}";
-      else
-	return "movd\t{%1, %0|%0, %1}";
+    return "%vmovq\t{%1, %0|%0, %1}";
 
     default:
       gcc_unreachable();
@@ -4265,10 +4246,7 @@
       return output_387_reg_move (insn, operands);
 
     case 2:
-      if (TARGET_AVX)
-	return "vcvtss2sd\t{%1, %0, %0|%0, %0, %1}";
-      else
-	return "cvtss2sd\t{%1, %0|%0, %1}";
+      return "%vcvtss2sd\t{%1, %d0|%d0, %1}";
 
     default:
       gcc_unreachable ();
@@ -4282,8 +4260,7 @@
   [(set (match_operand:DF 0 "nonimmediate_operand" "=x")
         (float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "xm")))]
   "TARGET_SSE2 && TARGET_SSE_MATH"
-  "* return TARGET_AVX ? \"vcvtss2sd\t{%1, %0, %0|%0, %0, %1}\"
-                       : \"cvtss2sd\t{%1, %0|%0, %1}\";"
+  "%vcvtss2sd\t{%1, %d0|%d0, %1}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "DF")])
@@ -4414,10 +4391,7 @@
     case 0:
       return output_387_reg_move (insn, operands);
     case 1:
-      if (TARGET_AVX)
-	return "vcvtsd2ss\t{%1, %0, %0|%0, %0, %1}";
-      else
-	return "cvtsd2ss\t{%1, %0|%0, %1}";
+      return "%vcvtsd2ss\t{%1, %d0|%d0, %1}";
     default:
       gcc_unreachable ();
     }
@@ -4433,8 +4407,7 @@
         (float_truncate:SF
           (match_operand:DF 1 "nonimmediate_operand" "xm")))]
   "TARGET_SSE2 && TARGET_SSE_MATH"
-  "* return TARGET_AVX ? \"vcvtsd2ss\t{%1, %0, %0|%0, %0, %1}\"
-                       : \"cvtsd2ss\t{%1, %0|%0, %1}\";"
+  "%vcvtsd2ss\t{%1, %d0|%d0, %1}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "SF")])
@@ -4463,10 +4436,7 @@
     case 1:
       return "#";
     case 2:
-      if (TARGET_AVX)
-	return "vcvtsd2ss\t{%1, %0, %0|%0, %0, %1}";
-      else
-	return "cvtsd2ss\t{%1, %0|%0, %1}";
+      return "%vcvtsd2ss\t{%1, %d0|%d0, %1}";
     default:
       gcc_unreachable ();
     }
@@ -4757,8 +4727,7 @@
 	(fix:DI (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))]
   "TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode)
    && (!TARGET_FISTTP || TARGET_SSE_MATH)"
-  "* return TARGET_AVX ? \"vcvtts<ssemodefsuffix>2si{q}\t{%1, %0|%0, %1}\"
-                       : \"cvtts<ssemodefsuffix>2si{q}\t{%1, %0|%0, %1}\";"
+  "%vcvtts<ssemodefsuffix>2si{q}\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseicvt")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<MODE>")
@@ -4770,9 +4739,7 @@
 	(fix:SI (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))]
   "SSE_FLOAT_MODE_P (<MODE>mode)
    && (!TARGET_FISTTP || TARGET_SSE_MATH)"
-  "* return TARGET_AVX 
-            ? \"vcvtts<ssemodefsuffix>2si\t{%1, %0|%0, %1}\"
-            :\"cvtts<ssemodefsuffix>2si\t{%1, %0|%0, %1}\";"
+  "%vcvtts<ssemodefsuffix>2si\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseicvt")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<MODE>")
@@ -5444,11 +5411,7 @@
   "(<SSEMODEI24:MODE>mode != DImode || TARGET_64BIT)
    && SSE_FLOAT_MODE_P (<MODEF:MODE>mode) && TARGET_SSE_MATH
    && (TARGET_INTER_UNIT_CONVERSIONS || optimize_size)"
-  "* return TARGET_AVX
-       ? \"vcvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t\"
-         \"{%1, %0, %0|%0, %0, %1}\"
-       : \"cvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t\"
-         \"{%1, %0|%0, %1}\";"
+  "%vcvtsi2s<MODEF:ssemodefsuffix><SSEMODEI24:rex64suffix>\t{%1, %d0|%d0, %1}"
   [(set_attr "type" "sseicvt")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<MODEF:MODE>")
@@ -16196,8 +16159,7 @@
 	(unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm")]
 		   UNSPEC_RCP))]
   "TARGET_SSE_MATH"
-  "* return TARGET_AVX ? \"vrcpss\t{%1, %0, %0|%0, %0, %1}\"
-                       : \"rcpss\t{%1, %0|%0, %1}\";"
+  "%vrcpss\t{%1, %d0|%d0, %1}"
   [(set_attr "type" "sse")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "SF")])
@@ -16774,8 +16736,7 @@
 	(unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "xm")]
 		   UNSPEC_RSQRT))]
   "TARGET_SSE_MATH"
-  "* return TARGET_AVX ? \"vrsqrtss\t{%1, %0, %0|%0, %0, %1}\" 
-                       : \"rsqrtss\t{%1, %0|%0, %1}\";"
+  "%vrsqrtss\t{%1, %d0|%d0, %1}"
   [(set_attr "type" "sse")
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "SF")])
@@ -16795,8 +16756,7 @@
 	(sqrt:MODEF
 	  (match_operand:MODEF 1 "nonimmediate_operand" "xm")))]
   "SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH"
-  "* return TARGET_AVX ? \"vsqrts<ssemodefsuffix>\t{%1, %0, %0|%0, %0, %1}\"
-                       : \"sqrts<ssemodefsuffix>\t{%1, %0|%0, %1}\";"
+  "%vsqrts<ssemodefsuffix>\t{%1, %d0|%d0, %1}"
   [(set_attr "type" "sse")
    (set_attr "mode" "<MODE>")
    (set_attr "prefix" "maybe_vex")
@@ -17854,8 +17814,7 @@
 		       (match_operand:SI 2 "const_0_to_15_operand" "n")]
 		      UNSPEC_ROUND))]
   "TARGET_ROUND"
-  "* return TARGET_AVX ? \"vrounds<ssemodefsuffix>\t{%2, %1, %0, %0|%0, %0, %1, %2}\"
-                       : \"rounds<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}\";"
+  "%vrounds<ssemodefsuffix>\t{%2, %1, %d0|%d0, %1, %2}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix_extra" "1")
    (set_attr "prefix" "maybe_vex")

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [AVX]: More AVX updates
  2008-05-24 21:38   ` H.J. Lu
@ 2008-05-25  9:02     ` H.J. Lu
  2008-05-26 15:17       ` H.J. Lu
  0 siblings, 1 reply; 11+ messages in thread
From: H.J. Lu @ 2008-05-25  9:02 UTC (permalink / raw)
  To: Uros Bizjak, Joey Ye, Xuepeng Guo; +Cc: GCC Patches

On Sat, May 24, 2008 at 10:19:06AM -0700, H.J. Lu wrote:
> On Fri, May 23, 2008 at 02:48:45PM -0700, H.J. Lu wrote:
> > Hi Uros,
> > 
> > How about this patch?
> > 
> 
> Here is the updated patch. I added %v as prefix and %d to
> print_reg. We can add AVX support to most SSE patterns directly.
> 
> 
> H.J.
> ---
> 2008-05-24  H.J. Lu  <hongjiu.lu@intel.com>
> 
> 	* config/i386/gas.h (ASM_OUTPUT_OPCODE): Undefine before
> 	define.  Use ASM_OUTPUT_AVX_PREFIX.
> 
> 	* config/i386/i386.c (print_reg): Handle 'd' to duplicate
> 	the operand.
> 	(print_operand): Handle 'd'.
> 
> 	* config/i386/i386.h (ASM_OUTPUT_AVX_PREFIX): New.
> 	(ASM_OUTPUT_OPCODE): Likewise.
> 
> 	* config/i386/i386.md (*movdi_2): Support AVX.
> 	(*movdf_nointeger): Likewise.
> 
> 	* config/i386/mmx.md (*mov<mode>_internal_rex64_avx): Removed.
> 	(*mov<mode>_internal_rex64): Support AVX.
> 
> 	* config/i386/sse.md (*avx_storehps): Removed.
> 	(sse_storehps): Support AVX.
> 	(*vec_dupv2df): Remove AVX support.
> 

We should print duplicated register operand only for AVX instruction.
Here is the updated patch.


H.J.
---
Index: config/i386/i386.h
===================================================================
--- config/i386/i386.h	(revision 135851)
+++ config/i386/i386.h	(working copy)
@@ -2246,6 +2246,29 @@ do {									\
 #define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
   ix86_output_addr_diff_elt ((FILE), (VALUE), (REL))
 
+/* When we see %v, we will print the 'v' prefix if TARGET_AVX is
+   true.  */
+
+#define ASM_OUTPUT_AVX_PREFIX(STREAM, PTR)	\
+{						\
+  if ((PTR)[0] == '%' && (PTR)[1] == 'v')	\
+    {						\
+      if (TARGET_AVX)				\
+	(PTR) += 1;				\
+      else					\
+	(PTR) += 2;				\
+    }						\
+}
+
+/* A C statement or statements which output an assembler instruction
+   opcode to the stdio stream STREAM.  The macro-operand PTR is a
+   variable of type `char *' which points to the opcode name in
+   its "internal" form--the form that is written in the machine
+   description.  */
+
+#define ASM_OUTPUT_OPCODE(STREAM, PTR) \
+  ASM_OUTPUT_AVX_PREFIX ((STREAM), (PTR))
+
 /* Under some conditions we need jump tables in the text section,
    because the assembler cannot handle label differences between
    sections.  This is the case for x86_64 on Mach-O for example.  */
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 135851)
+++ config/i386/i386.md	(working copy)
@@ -2295,15 +2295,19 @@
    pxor\t%0, %0
    movq\t{%1, %0|%0, %1}
    movq\t{%1, %0|%0, %1}
-   pxor\t%0, %0
-   movq\t{%1, %0|%0, %1}
-   movdqa\t{%1, %0|%0, %1}
-   movq\t{%1, %0|%0, %1}
+   %vpxor\t%0, %d0
+   %vmovq\t{%1, %0|%0, %1}
+   %vmovdqa\t{%1, %0|%0, %1}
+   %vmovq\t{%1, %0|%0, %1}
    xorps\t%0, %0
    movlps\t{%1, %0|%0, %1}
    movaps\t{%1, %0|%0, %1}
    movlps\t{%1, %0|%0, %1}"
   [(set_attr "type" "*,*,mmx,mmxmov,mmxmov,sselog1,ssemov,ssemov,ssemov,sselog1,ssemov,ssemov,ssemov")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "5,6,7,8")
+       (const_string "vex")
+       (const_string "orig")))
    (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI,TI,DI,V4SF,V2SF,V4SF,V2SF")])
 
 (define_split
@@ -2928,11 +2932,11 @@
       switch (get_attr_mode (insn))
 	{
 	case MODE_V4SF:
-	  return "xorps\t%0, %0";
+	  return "%vxorps\t%0, %d0";
 	case MODE_V2DF:
-	  return "xorpd\t%0, %0";
+	  return "%vxorpd\t%0, %d0";
 	case MODE_TI:
-	  return "pxor\t%0, %0";
+	  return "%vpxor\t%0, %d0";
 	default:
 	  gcc_unreachable ();
 	}
@@ -2942,19 +2946,43 @@
       switch (get_attr_mode (insn))
 	{
 	case MODE_V4SF:
-	  return "movaps\t{%1, %0|%0, %1}";
+	  return "%vmovaps\t{%1, %0|%0, %1}";
 	case MODE_V2DF:
-	  return "movapd\t{%1, %0|%0, %1}";
+	  return "%vmovapd\t{%1, %0|%0, %1}";
 	case MODE_TI:
-	  return "movdqa\t{%1, %0|%0, %1}";
+	  return "%vmovdqa\t{%1, %0|%0, %1}";
 	case MODE_DI:
-	  return "movq\t{%1, %0|%0, %1}";
+	  return "%vmovq\t{%1, %0|%0, %1}";
 	case MODE_DF:
-	  return "movsd\t{%1, %0|%0, %1}";
+	  if (TARGET_AVX)
+	    {
+	      if (REG_P (operands[0]) && REG_P (operands[1]))
+		return "vmovsd\t{%1, %0, %0|%0, %0, %1}";
+	      else
+		return "vmovsd\t{%1, %0|%0, %1}";
+	    }
+	  else
+	    return "movsd\t{%1, %0|%0, %1}";
 	case MODE_V1DF:
-	  return "movlpd\t{%1, %0|%0, %1}";
+	  if (TARGET_AVX)
+	    {
+	      if (REG_P (operands[0]))
+		return "vmovlpd\t{%1, %0, %0|%0, %0, %1}";
+	      else
+		return "vmovlpd\t{%1, %0|%0, %1}";
+	    }
+	  else
+	    return "movlpd\t{%1, %0|%0, %1}";
 	case MODE_V2SF:
-	  return "movlps\t{%1, %0|%0, %1}";
+	  if (TARGET_AVX)
+	    {
+	      if (REG_P (operands[0]))
+		return "vmovlps\t{%1, %0, %0|%0, %0, %1}";
+	      else
+		return "vmovlps\t{%1, %0|%0, %1}";
+	    }
+	  else
+	    return "movlps\t{%1, %0|%0, %1}";
 	default:
 	  gcc_unreachable ();
 	}
@@ -2964,6 +2992,10 @@
     }
 }
   [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "0,1,2,3,4")
+       (const_string "orig")
+       (const_string "maybe_vex")))
    (set (attr "mode")
         (cond [(eq_attr "alternative" "0,1,2")
 		 (const_string "DF")
Index: config/i386/mmx.md
===================================================================
--- config/i386/mmx.md	(revision 135851)
+++ config/i386/mmx.md	(working copy)
@@ -63,12 +63,12 @@
   DONE;
 })
 
-(define_insn "*mov<mode>_internal_rex64_avx"
+(define_insn "*mov<mode>_internal_rex64"
   [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand"
 				"=rm,r,!?y,!?y ,m  ,!y,Y2,x,x ,m,r,x")
 	(match_operand:MMXMODEI8 1 "vector_move_operand"
 				"Cr ,m,C  ,!?ym,!?y,Y2,!y,C,xm,x,x,r"))]
-  "TARGET_64BIT && TARGET_AVX
+  "TARGET_64BIT && TARGET_MMX
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
     mov{q}\t{%1, %0|%0, %1}
@@ -78,43 +78,19 @@
     movq\t{%1, %0|%0, %1}
     movdq2q\t{%1, %0|%0, %1}
     movq2dq\t{%1, %0|%0, %1}
-    vpxor\t%0, %0, %0
-    vmovq\t{%1, %0|%0, %1}
-    vmovq\t{%1, %0|%0, %1}
-    vmovq\t{%1, %0|%0, %1}
-    vmovq\t{%1, %0|%0, %1}"
+    %vpxor\t%0, %d0
+    %vmovq\t{%1, %0|%0, %1}
+    %vmovq\t{%1, %0|%0, %1}
+    %vmovq\t{%1, %0|%0, %1}
+    %vmovq\t{%1, %0|%0, %1}"
   [(set_attr "type" "imov,imov,mmx,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,ssemov,ssemov")
    (set_attr "unit" "*,*,*,*,*,mmx,mmx,*,*,*,*,*")
    (set (attr "prefix")
      (if_then_else (eq_attr "alternative" "7,8,9,10,11")
-       (const_string "vex")
+       (const_string "maybe_vex")
        (const_string "orig")))
    (set_attr "mode" "DI")])
 
-(define_insn "*mov<mode>_internal_rex64"
-  [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand"
-				"=rm,r,!?y,!?y ,m  ,!y,Y2,x,x ,m,r,x")
-	(match_operand:MMXMODEI8 1 "vector_move_operand"
-				"Cr ,m,C  ,!?ym,!?y,Y2,!y,C,xm,x,x,r"))]
-  "TARGET_64BIT && TARGET_MMX
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-  "@
-    mov{q}\t{%1, %0|%0, %1}
-    mov{q}\t{%1, %0|%0, %1}
-    pxor\t%0, %0
-    movq\t{%1, %0|%0, %1}
-    movq\t{%1, %0|%0, %1}
-    movdq2q\t{%1, %0|%0, %1}
-    movq2dq\t{%1, %0|%0, %1}
-    pxor\t%0, %0
-    movq\t{%1, %0|%0, %1}
-    movq\t{%1, %0|%0, %1}
-    movd\t{%1, %0|%0, %1}
-    movd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "imov,imov,mmx,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,ssemov,ssemov")
-   (set_attr "unit" "*,*,*,*,*,mmx,mmx,*,*,*,*,*")
-   (set_attr "mode" "DI")])
-
 (define_insn "*mov<mode>_internal_avx"
   [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand"
 			"=!?y,!?y,m  ,!y ,*Y2,*Y2,*Y2 ,m  ,r  ,m")
Index: config/i386/gas.h
===================================================================
--- config/i386/gas.h	(revision 135851)
+++ config/i386/gas.h	(working copy)
@@ -86,6 +86,7 @@ along with GCC; see the file COPYING3.  
    GAS version 1.38.1 doesn't understand the `repz' opcode mnemonic.
    So use `repe' instead.  */
 
+#undef ASM_OUTPUT_OPCODE
 #define ASM_OUTPUT_OPCODE(STREAM, PTR)	\
 {									\
   if ((PTR)[0] == 'r'							\
@@ -103,6 +104,8 @@ along with GCC; see the file COPYING3.  
 	  (PTR) += 5;							\
 	}								\
     }									\
+  else									\
+    ASM_OUTPUT_AVX_PREFIX ((STREAM), (PTR));				\
 }
 
 /* Define macro used to output shift-double opcodes when the shift
Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md	(revision 135851)
+++ config/i386/sse.md	(working copy)
@@ -3295,20 +3295,6 @@
   [(set_attr "type" "sselog")
    (set_attr "mode" "V4SF")])
 
-(define_insn "*avx_storehps"
-  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
-	(vec_select:V2SF
-	  (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
-	  (parallel [(const_int 2) (const_int 3)])))]
-  "TARGET_AVX"
-  "@
-   vmovhps\t{%1, %0|%0, %1}
-   vmovhlps\t{%1, %0, %0|%0, %0, %1}
-   vmovlps\t{%H1, %0, %0|%0, %0, %H1}"
-  [(set_attr "type" "ssemov")
-   (set_attr "prefix" "vex")
-   (set_attr "mode" "V2SF,V4SF,V2SF")])
-
 (define_insn "sse_storehps"
   [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
 	(vec_select:V2SF
@@ -3316,10 +3302,11 @@
 	  (parallel [(const_int 2) (const_int 3)])))]
   "TARGET_SSE"
   "@
-   movhps\t{%1, %0|%0, %1}
-   movhlps\t{%1, %0|%0, %1}
-   movlps\t{%H1, %0|%0, %H1}"
+   %vmovhps\t{%1, %0|%0, %1}
+   %vmovhlps\t{%1, %d0|%d0, %1}
+   %vmovlps\t{%H1, %d0|%d0, %H1}"
   [(set_attr "type" "ssemov")
+   (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "V2SF,V4SF,V2SF")])
 
 (define_expand "sse_loadhps_exp"
@@ -4425,10 +4412,8 @@
 	(vec_duplicate:V2DF
 	  (match_operand:DF 1 "register_operand" "0")))]
   "TARGET_SSE2"
-  "* return TARGET_AVX ? \"vunpcklpd\t%0, %0, %0\"
-                       : \"unpcklpd\t%0, %0\";"
+  "unpcklpd\t%0, %0"
   [(set_attr "type" "sselog1")
-   (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "V2DF")])
 
 (define_insn "*vec_concatv2df_sse3"
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 135851)
+++ config/i386/i386.c	(working copy)
@@ -8890,11 +8890,16 @@ put_condition_code (enum rtx_code code, 
    If CODE is 'x', pretend the mode is V4SFmode.
    If CODE is 't', pretend the mode is V8SFmode.
    If CODE is 'h', pretend the reg is the 'high' byte register.
-   If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.  */
+   If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
+   If CODE is 'd', duplicate the operand for AVX instruction.
+ */
 
 void
 print_reg (rtx x, int code, FILE *file)
 {
+  const char *reg;
+  bool duplicated = code == 'd' && TARGET_AVX;
+
   gcc_assert (x == pc_rtx
 	      || (REGNO (x) != ARG_POINTER_REGNUM
 		  && REGNO (x) != FRAME_POINTER_REGNUM
@@ -8959,12 +8964,14 @@ print_reg (rtx x, int code, FILE *file)
 	}
       return;
     }
+
+  reg = NULL;
   switch (code)
     {
     case 3:
       if (STACK_TOP_P (x))
 	{
-	  fputs ("st(0)", file);
+	  reg = "st(0)";
 	  break;
 	}
       /* FALLTHRU */
@@ -8977,28 +8984,39 @@ print_reg (rtx x, int code, FILE *file)
     case 16:
     case 2:
     normal:
-      fputs (hi_reg_name[REGNO (x)], file);
+      reg = hi_reg_name[REGNO (x)];
       break;
     case 1:
       if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
 	goto normal;
-      fputs (qi_reg_name[REGNO (x)], file);
+      reg = qi_reg_name[REGNO (x)];
       break;
     case 0:
       if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
 	goto normal;
-      fputs (qi_high_reg_name[REGNO (x)], file);
+      reg = qi_high_reg_name[REGNO (x)];
       break;
     case 32:
       if (SSE_REG_P (x))
 	{
+	  gcc_assert (!duplicated);
 	  putc ('y', file);
 	  fputs (hi_reg_name[REGNO (x)] + 1, file);
+	  return;
 	}
       break;
     default:
       gcc_unreachable ();
     }
+
+  fputs (reg, file);
+  if (duplicated)
+    {
+      if (ASSEMBLER_DIALECT == ASM_ATT)
+	fprintf (file, ", %%%s", reg);
+      else
+	fprintf (file, ", %s", reg);
+    }
 }
 
 /* Locate some local-dynamic symbol still in use by this function
@@ -9059,6 +9077,7 @@ get_some_local_dynamic_name (void)
    t --  likewise, print the V8SFmode name of the register.
    h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
    y -- print "st(0)" instead of "st" as a register.
+   d -- print duplicated register operand for AVX instruction.
    D -- print condition for SSE cmp instruction.
    P -- if PIC, print an @PLT suffix.
    X -- don't print any sort of PIC '@' suffix for a symbol.
@@ -9204,6 +9223,7 @@ print_operand (FILE *file, rtx x, int co
 	      gcc_unreachable ();
 	    }
 
+	case 'd':
 	case 'b':
 	case 'w':
 	case 'k':

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [AVX]: More AVX updates
  2008-05-23 23:12 ` H.J. Lu
@ 2008-05-24 21:38   ` H.J. Lu
  2008-05-25  9:02     ` H.J. Lu
  0 siblings, 1 reply; 11+ messages in thread
From: H.J. Lu @ 2008-05-24 21:38 UTC (permalink / raw)
  To: Uros Bizjak, Joey Ye, Xuepeng Guo; +Cc: GCC Patches

On Fri, May 23, 2008 at 02:48:45PM -0700, H.J. Lu wrote:
> Hi Uros,
> 
> How about this patch?
> 

Here is the updated patch. I added %v as prefix and %d to
print_reg. We can add AVX support to most SSE patterns directly.


H.J.
---
2008-05-24  H.J. Lu  <hongjiu.lu@intel.com>

	* config/i386/gas.h (ASM_OUTPUT_OPCODE): Undefine before
	define.  Use ASM_OUTPUT_AVX_PREFIX.

	* config/i386/i386.c (print_reg): Handle 'd' to duplicate
	the operand.
	(print_operand): Handle 'd'.

	* config/i386/i386.h (ASM_OUTPUT_AVX_PREFIX): New.
	(ASM_OUTPUT_OPCODE): Likewise.

	* config/i386/i386.md (*movdi_2): Support AVX.
	(*movdf_nointeger): Likewise.

	* config/i386/mmx.md (*mov<mode>_internal_rex64_avx): Removed.
	(*mov<mode>_internal_rex64): Support AVX.

	* config/i386/sse.md (*avx_storehps): Removed.
	(sse_storehps): Support AVX.
	(*vec_dupv2df): Remove AVX support.

Index: config/i386/i386.h
===================================================================
--- config/i386/i386.h	(revision 2756)
+++ config/i386/i386.h	(working copy)
@@ -2246,6 +2246,29 @@ do {									\
 #define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
   ix86_output_addr_diff_elt ((FILE), (VALUE), (REL))
 
+/* When we see %v, we will print the 'v' prefix if TARGET_AVX is
+   true.  */
+
+#define ASM_OUTPUT_AVX_PREFIX(STREAM, PTR)	\
+{						\
+  if ((PTR)[0] == '%' && (PTR)[1] == 'v')	\
+    {						\
+      if (TARGET_AVX)				\
+	(PTR) += 1;				\
+      else					\
+	(PTR) += 2;				\
+    }						\
+}
+
+/* A C statement or statements which output an assembler instruction
+   opcode to the stdio stream STREAM.  The macro-operand PTR is a
+   variable of type `char *' which points to the opcode name in
+   its "internal" form--the form that is written in the machine
+   description.  */
+
+#define ASM_OUTPUT_OPCODE(STREAM, PTR) \
+  ASM_OUTPUT_AVX_PREFIX ((STREAM), (PTR))
+
 /* Under some conditions we need jump tables in the text section,
    because the assembler cannot handle label differences between
    sections.  This is the case for x86_64 on Mach-O for example.  */
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 2756)
+++ config/i386/i386.md	(working copy)
@@ -2295,15 +2295,19 @@
    pxor\t%0, %0
    movq\t{%1, %0|%0, %1}
    movq\t{%1, %0|%0, %1}
-   pxor\t%0, %0
-   movq\t{%1, %0|%0, %1}
-   movdqa\t{%1, %0|%0, %1}
-   movq\t{%1, %0|%0, %1}
+   %vpxor\t%0, %d0
+   %vmovq\t{%1, %0|%0, %1}
+   %vmovdqa\t{%1, %0|%0, %1}
+   %vmovq\t{%1, %0|%0, %1}
    xorps\t%0, %0
    movlps\t{%1, %0|%0, %1}
    movaps\t{%1, %0|%0, %1}
    movlps\t{%1, %0|%0, %1}"
   [(set_attr "type" "*,*,mmx,mmxmov,mmxmov,sselog1,ssemov,ssemov,ssemov,sselog1,ssemov,ssemov,ssemov")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "5,6,7,8")
+       (const_string "vex")
+       (const_string "orig")))
    (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI,TI,DI,V4SF,V2SF,V4SF,V2SF")])
 
 (define_split
@@ -2928,11 +2932,11 @@
       switch (get_attr_mode (insn))
 	{
 	case MODE_V4SF:
-	  return "xorps\t%0, %0";
+	  return "%vxorps\t%0, %d0";
 	case MODE_V2DF:
-	  return "xorpd\t%0, %0";
+	  return "%vxorpd\t%0, %d0";
 	case MODE_TI:
-	  return "pxor\t%0, %0";
+	  return "%vpxor\t%0, %d0";
 	default:
 	  gcc_unreachable ();
 	}
@@ -2942,19 +2946,43 @@
       switch (get_attr_mode (insn))
 	{
 	case MODE_V4SF:
-	  return "movaps\t{%1, %0|%0, %1}";
+	  return "%vmovaps\t{%1, %0|%0, %1}";
 	case MODE_V2DF:
-	  return "movapd\t{%1, %0|%0, %1}";
+	  return "%vmovapd\t{%1, %0|%0, %1}";
 	case MODE_TI:
-	  return "movdqa\t{%1, %0|%0, %1}";
+	  return "%vmovdqa\t{%1, %0|%0, %1}";
 	case MODE_DI:
-	  return "movq\t{%1, %0|%0, %1}";
+	  return "%vmovq\t{%1, %0|%0, %1}";
 	case MODE_DF:
-	  return "movsd\t{%1, %0|%0, %1}";
+	  if (TARGET_AVX)
+	    {
+	      if (REG_P (operands[0]) && REG_P (operands[1]))
+		return "vmovsd\t{%1, %0, %0|%0, %0, %1}";
+	      else
+		return "vmovsd\t{%1, %0|%0, %1}";
+	    }
+	  else
+	    return "movsd\t{%1, %0|%0, %1}";
 	case MODE_V1DF:
-	  return "movlpd\t{%1, %0|%0, %1}";
+	  if (TARGET_AVX)
+	    {
+	      if (REG_P (operands[0]))
+		return "vmovlpd\t{%1, %0, %0|%0, %0, %1}";
+	      else
+		return "vmovlpd\t{%1, %0|%0, %1}";
+	    }
+	  else
+	    return "movlpd\t{%1, %0|%0, %1}";
 	case MODE_V2SF:
-	  return "movlps\t{%1, %0|%0, %1}";
+	  if (TARGET_AVX)
+	    {
+	      if (REG_P (operands[0]))
+		return "vmovlps\t{%1, %0, %0|%0, %0, %1}";
+	      else
+		return "vmovlps\t{%1, %0|%0, %1}";
+	    }
+	  else
+	    return "movlps\t{%1, %0|%0, %1}";
 	default:
 	  gcc_unreachable ();
 	}
@@ -2964,6 +2992,10 @@
     }
 }
   [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "0,1,2,3,4")
+       (const_string "orig")
+       (const_string "maybe_vex")))
    (set (attr "mode")
         (cond [(eq_attr "alternative" "0,1,2")
 		 (const_string "DF")
Index: config/i386/mmx.md
===================================================================
--- config/i386/mmx.md	(revision 2756)
+++ config/i386/mmx.md	(working copy)
@@ -63,12 +63,12 @@
   DONE;
 })
 
-(define_insn "*mov<mode>_internal_rex64_avx"
+(define_insn "*mov<mode>_internal_rex64"
   [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand"
 				"=rm,r,!?y,!?y ,m  ,!y,Y2,x,x ,m,r,x")
 	(match_operand:MMXMODEI8 1 "vector_move_operand"
 				"Cr ,m,C  ,!?ym,!?y,Y2,!y,C,xm,x,x,r"))]
-  "TARGET_64BIT && TARGET_AVX
+  "TARGET_64BIT && TARGET_MMX
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
     mov{q}\t{%1, %0|%0, %1}
@@ -78,43 +78,19 @@
     movq\t{%1, %0|%0, %1}
     movdq2q\t{%1, %0|%0, %1}
     movq2dq\t{%1, %0|%0, %1}
-    vpxor\t%0, %0, %0
-    vmovq\t{%1, %0|%0, %1}
-    vmovq\t{%1, %0|%0, %1}
-    vmovq\t{%1, %0|%0, %1}
-    vmovq\t{%1, %0|%0, %1}"
+    %vpxor\t%0, %d0
+    %vmovq\t{%1, %0|%0, %1}
+    %vmovq\t{%1, %0|%0, %1}
+    %vmovq\t{%1, %0|%0, %1}
+    %vmovq\t{%1, %0|%0, %1}"
   [(set_attr "type" "imov,imov,mmx,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,ssemov,ssemov")
    (set_attr "unit" "*,*,*,*,*,mmx,mmx,*,*,*,*,*")
    (set (attr "prefix")
      (if_then_else (eq_attr "alternative" "7,8,9,10,11")
-       (const_string "vex")
+       (const_string "maybe_vex")
        (const_string "orig")))
    (set_attr "mode" "DI")])
 
-(define_insn "*mov<mode>_internal_rex64"
-  [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand"
-				"=rm,r,!?y,!?y ,m  ,!y,Y2,x,x ,m,r,x")
-	(match_operand:MMXMODEI8 1 "vector_move_operand"
-				"Cr ,m,C  ,!?ym,!?y,Y2,!y,C,xm,x,x,r"))]
-  "TARGET_64BIT && TARGET_MMX
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
-  "@
-    mov{q}\t{%1, %0|%0, %1}
-    mov{q}\t{%1, %0|%0, %1}
-    pxor\t%0, %0
-    movq\t{%1, %0|%0, %1}
-    movq\t{%1, %0|%0, %1}
-    movdq2q\t{%1, %0|%0, %1}
-    movq2dq\t{%1, %0|%0, %1}
-    pxor\t%0, %0
-    movq\t{%1, %0|%0, %1}
-    movq\t{%1, %0|%0, %1}
-    movd\t{%1, %0|%0, %1}
-    movd\t{%1, %0|%0, %1}"
-  [(set_attr "type" "imov,imov,mmx,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,ssemov,ssemov")
-   (set_attr "unit" "*,*,*,*,*,mmx,mmx,*,*,*,*,*")
-   (set_attr "mode" "DI")])
-
 (define_insn "*mov<mode>_internal_avx"
   [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand"
 			"=!?y,!?y,m  ,!y ,*Y2,*Y2,*Y2 ,m  ,r  ,m")
Index: config/i386/gas.h
===================================================================
--- config/i386/gas.h	(revision 2756)
+++ config/i386/gas.h	(working copy)
@@ -86,6 +86,7 @@ along with GCC; see the file COPYING3.  
    GAS version 1.38.1 doesn't understand the `repz' opcode mnemonic.
    So use `repe' instead.  */
 
+#undef ASM_OUTPUT_OPCODE
 #define ASM_OUTPUT_OPCODE(STREAM, PTR)	\
 {									\
   if ((PTR)[0] == 'r'							\
@@ -103,6 +104,8 @@ along with GCC; see the file COPYING3.  
 	  (PTR) += 5;							\
 	}								\
     }									\
+  else									\
+    ASM_OUTPUT_AVX_PREFIX ((STREAM), (PTR));				\
 }
 
 /* Define macro used to output shift-double opcodes when the shift
Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md	(revision 2756)
+++ config/i386/sse.md	(working copy)
@@ -3295,20 +3295,6 @@
   [(set_attr "type" "sselog")
    (set_attr "mode" "V4SF")])
 
-(define_insn "*avx_storehps"
-  [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
-	(vec_select:V2SF
-	  (match_operand:V4SF 1 "nonimmediate_operand" "x,x,o")
-	  (parallel [(const_int 2) (const_int 3)])))]
-  "TARGET_AVX"
-  "@
-   vmovhps\t{%1, %0|%0, %1}
-   vmovhlps\t{%1, %0, %0|%0, %0, %1}
-   vmovlps\t{%H1, %0, %0|%0, %0, %H1}"
-  [(set_attr "type" "ssemov")
-   (set_attr "prefix" "vex")
-   (set_attr "mode" "V2SF,V4SF,V2SF")])
-
 (define_insn "sse_storehps"
   [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
 	(vec_select:V2SF
@@ -3316,10 +3302,11 @@
 	  (parallel [(const_int 2) (const_int 3)])))]
   "TARGET_SSE"
   "@
-   movhps\t{%1, %0|%0, %1}
-   movhlps\t{%1, %0|%0, %1}
-   movlps\t{%H1, %0|%0, %H1}"
+   %vmovhps\t{%1, %0|%0, %1}
+   %vmovhlps\t{%1, %d0|%d0, %1}
+   %vmovlps\t{%H1, %d0|%d0, %H1}"
   [(set_attr "type" "ssemov")
+   (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "V2SF,V4SF,V2SF")])
 
 (define_expand "sse_loadhps_exp"
@@ -4425,10 +4412,8 @@
 	(vec_duplicate:V2DF
 	  (match_operand:DF 1 "register_operand" "0")))]
   "TARGET_SSE2"
-  "* return TARGET_AVX ? \"vunpcklpd\t%0, %0, %0\"
-                       : \"unpcklpd\t%0, %0\";"
+  "unpcklpd\t%0, %0"
   [(set_attr "type" "sselog1")
-   (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "V2DF")])
 
 (define_insn "*vec_concatv2df_sse3"
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 2756)
+++ config/i386/i386.c	(working copy)
@@ -8890,11 +8890,16 @@ put_condition_code (enum rtx_code code, 
    If CODE is 'x', pretend the mode is V4SFmode.
    If CODE is 't', pretend the mode is V8SFmode.
    If CODE is 'h', pretend the reg is the 'high' byte register.
-   If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.  */
+   If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
+   If CODE is 'd', duplicate the operand.
+ */
 
 void
 print_reg (rtx x, int code, FILE *file)
 {
+  const char *reg;
+  bool duplicated = code == 'd';
+
   gcc_assert (x == pc_rtx
 	      || (REGNO (x) != ARG_POINTER_REGNUM
 		  && REGNO (x) != FRAME_POINTER_REGNUM
@@ -8959,12 +8964,14 @@ print_reg (rtx x, int code, FILE *file)
 	}
       return;
     }
+
+  reg = NULL;
   switch (code)
     {
     case 3:
       if (STACK_TOP_P (x))
 	{
-	  fputs ("st(0)", file);
+	  reg = "st(0)";
 	  break;
 	}
       /* FALLTHRU */
@@ -8977,28 +8984,39 @@ print_reg (rtx x, int code, FILE *file)
     case 16:
     case 2:
     normal:
-      fputs (hi_reg_name[REGNO (x)], file);
+      reg = hi_reg_name[REGNO (x)];
       break;
     case 1:
       if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
 	goto normal;
-      fputs (qi_reg_name[REGNO (x)], file);
+      reg = qi_reg_name[REGNO (x)];
       break;
     case 0:
       if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
 	goto normal;
-      fputs (qi_high_reg_name[REGNO (x)], file);
+      reg = qi_high_reg_name[REGNO (x)];
       break;
     case 32:
       if (SSE_REG_P (x))
 	{
+	  gcc_assert (!duplicated);
 	  putc ('y', file);
 	  fputs (hi_reg_name[REGNO (x)] + 1, file);
+	  return;
 	}
       break;
     default:
       gcc_unreachable ();
     }
+
+  fputs (reg, file);
+  if (duplicated)
+    {
+      if (ASSEMBLER_DIALECT == ASM_ATT)
+	fprintf (file, ", %%%s", reg);
+      else
+	fprintf (file, ", %s", reg);
+    }
 }
 
 /* Locate some local-dynamic symbol still in use by this function
@@ -9059,6 +9077,7 @@ get_some_local_dynamic_name (void)
    t --  likewise, print the V8SFmode name of the register.
    h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
    y -- print "st(0)" instead of "st" as a register.
+   d -- print duplicated register operand.
    D -- print condition for SSE cmp instruction.
    P -- if PIC, print an @PLT suffix.
    X -- don't print any sort of PIC '@' suffix for a symbol.
@@ -9204,6 +9223,7 @@ print_operand (FILE *file, rtx x, int co
 	      gcc_unreachable ();
 	    }
 
+	case 'd':
 	case 'b':
 	case 'w':
 	case 'k':

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [AVX]: More AVX updates
  2008-05-23 20:53 Uros Bizjak
  2008-05-23 21:27 ` H.J. Lu
@ 2008-05-23 23:12 ` H.J. Lu
  2008-05-24 21:38   ` H.J. Lu
  1 sibling, 1 reply; 11+ messages in thread
From: H.J. Lu @ 2008-05-23 23:12 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: GCC Patches

[-- Attachment #1: Type: text/plain, Size: 1162 bytes --]

Hi Uros,

How about this patch?

Thanks.

H.J.
---

2008-05-23  H.J. Lu  <hongjiu.lu@intel.com>

        * config/i386/gas.h (ASM_OUTPUT_OPCODE): Undefine before
        define.  Use ASM_OUTPUT_AVX_PREFIX.

        * config/i386/i386.c (print_operand): Check 'v'.

        * config/i386/i386.h (ASM_OUTPUT_AVX_PREFIX): New.
        (ASM_OUTPUT_OPCODE): Likewise.

        * config/i386/i386.md (*movdi_2_avx): New.
        (*movdf_nointeger): Support AVX.

On Fri, May 23, 2008 at 1:27 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
> Hello!
>
>> I am checking this patch into AVX branch. Now we can generate
>> pure AVX codes for all intrinsics in 64bit.
>>  -  "cvtts<ssemodefsuffix>2si\t{%1, %0|%0, %1}"
>> +  "* return TARGET_AVX +            ? \"vcvtts<ssemodefsuffix>2si\t{%1,
>> %0|%0, %1}\"
>> +            :\"cvtts<ssemodefsuffix>2si\t{%1, %0|%0, %1}\";"
>>
>
> What do you think about adding "%v" asm directive, i.e.:
>
> "%vcvtts<ssemodefsuffix>2si\t{%1, %0|%0, %1}"
>
> and output 'v' in print_operand () function, depending on TARGET_AVX? This
> would simplify a lot of insn templates, probably even more when 'enabled'
> attribute is used.
>
> Uros.
>

[-- Attachment #2: avx.txt --]
[-- Type: text/plain, Size: 5973 bytes --]

Index: config/i386/i386.h
===================================================================
--- config/i386/i386.h	(revision 2758)
+++ config/i386/i386.h	(working copy)
@@ -2246,6 +2246,29 @@ do {									\
 #define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL) \
   ix86_output_addr_diff_elt ((FILE), (VALUE), (REL))
 
+/* When we see %v, we will print the 'v' prefix if TARGET_AVX is
+   true.  */
+
+#define ASM_OUTPUT_AVX_PREFIX(STREAM, PTR)	\
+{						\
+  if ((PTR)[0] == '%' && (PTR)[1] == 'v')	\
+    {						\
+      if (TARGET_AVX)				\
+	(PTR) += 1;				\
+      else					\
+	(PTR) += 2;				\
+    }						\
+}
+
+/* A C statement or statements which output an assembler instruction
+   opcode to the stdio stream STREAM.  The macro-operand PTR is a
+   variable of type `char *' which points to the opcode name in
+   its "internal" form--the form that is written in the machine
+   description.  */
+
+#define ASM_OUTPUT_OPCODE(STREAM, PTR) \
+  ASM_OUTPUT_AVX_PREFIX ((STREAM), (PTR))
+
 /* Under some conditions we need jump tables in the text section,
    because the assembler cannot handle label differences between
    sections.  This is the case for x86_64 on Mach-O for example.  */
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 2758)
+++ config/i386/i386.md	(working copy)
@@ -2283,6 +2283,31 @@
    (set_attr "mode" "DI")
    (set_attr "length_immediate" "1")])
 
+(define_insn "*movdi_2_avx"
+  [(set (match_operand:DI 0 "nonimmediate_operand"
+			"=r  ,o  ,*y,m*y,*y,*Y2,m  ,*Y2,*Y2")
+	(match_operand:DI 1 "general_operand"
+			"riFo,riF,C ,*y ,m ,C  ,*Y2,*Y2,m  "))]
+  "TARGET_AVX
+   && !TARGET_64BIT
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   #
+   #
+   pxor\t%0, %0
+   movq\t{%1, %0|%0, %1}
+   movq\t{%1, %0|%0, %1}
+   vpxor\t%0, %0, %0
+   vmovq\t{%1, %0|%0, %1}
+   vmovdqa\t{%1, %0|%0, %1}
+   vmovq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "*,*,mmx,mmxmov,mmxmov,sselog1,ssemov,ssemov,ssemov")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "5,6,7,8")
+       (const_string "vex")
+       (const_string "orig")))
+   (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI,TI,DI")])
+
 (define_insn "*movdi_2"
   [(set (match_operand:DI 0 "nonimmediate_operand"
 			"=r  ,o  ,*y,m*y,*y,*Y2,m  ,*Y2,*Y2,*x,m ,*x,*x")
@@ -2928,11 +2953,11 @@
       switch (get_attr_mode (insn))
 	{
 	case MODE_V4SF:
-	  return "xorps\t%0, %0";
+	  return TARGET_AVX ? "vxorps\t%0, %0, %0": "xorps\t%0, %0";
 	case MODE_V2DF:
-	  return "xorpd\t%0, %0";
+	  return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
 	case MODE_TI:
-	  return "pxor\t%0, %0";
+	  return TARGET_AVX ? "vpxor\t%0, %0, %0" :  "pxor\t%0, %0";
 	default:
 	  gcc_unreachable ();
 	}
@@ -2942,19 +2967,43 @@
       switch (get_attr_mode (insn))
 	{
 	case MODE_V4SF:
-	  return "movaps\t{%1, %0|%0, %1}";
+	  return "%vmovaps\t{%1, %0|%0, %1}";
 	case MODE_V2DF:
-	  return "movapd\t{%1, %0|%0, %1}";
+	  return "%vmovapd\t{%1, %0|%0, %1}";
 	case MODE_TI:
-	  return "movdqa\t{%1, %0|%0, %1}";
+	  return "%vmovdqa\t{%1, %0|%0, %1}";
 	case MODE_DI:
-	  return "movq\t{%1, %0|%0, %1}";
+	  return "%vmovq\t{%1, %0|%0, %1}";
 	case MODE_DF:
-	  return "movsd\t{%1, %0|%0, %1}";
+	  if (TARGET_AVX)
+	    {
+	      if (REG_P (operands[0]) && REG_P (operands[1]))
+		return "vmovsd\t{%1, %0, %0|%0, %0, %1}";
+	      else
+		return "vmovsd\t{%1, %0|%0, %1}";
+	    }
+	  else
+	    return "movsd\t{%1, %0|%0, %1}";
 	case MODE_V1DF:
-	  return "movlpd\t{%1, %0|%0, %1}";
+	  if (TARGET_AVX)
+	    {
+	      if (REG_P (operands[0]))
+		return "vmovlpd\t{%1, %0, %0|%0, %0, %1}";
+	      else
+		return "vmovlpd\t{%1, %0|%0, %1}";
+	    }
+	  else
+	    return "movlpd\t{%1, %0|%0, %1}";
 	case MODE_V2SF:
-	  return "movlps\t{%1, %0|%0, %1}";
+	  if (TARGET_AVX)
+	    {
+	      if (REG_P (operands[0]))
+		return "vmovlps\t{%1, %0, %0|%0, %0, %1}";
+	      else
+		return "vmovlps\t{%1, %0|%0, %1}";
+	    }
+	  else
+	    return "movlps\t{%1, %0|%0, %1}";
 	default:
 	  gcc_unreachable ();
 	}
@@ -2964,6 +3013,10 @@
     }
 }
   [(set_attr "type" "fmov,fmov,fmov,multi,multi,sselog1,ssemov,ssemov,ssemov")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "0,1,2,3,4")
+       (const_string "orig")
+       (const_string "maybe_vex")))
    (set (attr "mode")
         (cond [(eq_attr "alternative" "0,1,2")
 		 (const_string "DF")
Index: config/i386/gas.h
===================================================================
--- config/i386/gas.h	(revision 2758)
+++ config/i386/gas.h	(working copy)
@@ -86,6 +86,7 @@ along with GCC; see the file COPYING3.  
    GAS version 1.38.1 doesn't understand the `repz' opcode mnemonic.
    So use `repe' instead.  */
 
+#undef ASM_OUTPUT_OPCODE
 #define ASM_OUTPUT_OPCODE(STREAM, PTR)	\
 {									\
   if ((PTR)[0] == 'r'							\
@@ -103,6 +104,8 @@ along with GCC; see the file COPYING3.  
 	  (PTR) += 5;							\
 	}								\
     }									\
+  else									\
+    ASM_OUTPUT_AVX_PREFIX ((STREAM), (PTR));				\
 }
 
 /* Define macro used to output shift-double opcodes when the shift
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 2758)
+++ config/i386/i386.c	(working copy)
@@ -9065,6 +9065,7 @@ get_some_local_dynamic_name (void)
    & -- print some in-use local-dynamic symbol name.
    H -- print a memory address offset by 8; used for sse high-parts
    Y -- print condition for SSE5 com* instruction.
+   v -- print 'v' prefix for AVX instructions.
    + -- print a branch hint as 'cs' or 'ds' prefix
    ; -- print a semicolon (after prefixes due to bug in older gas).
  */
@@ -9458,6 +9459,10 @@ print_operand (FILE *file, rtx x, int co
 	    }
 	  return;
 
+	case 'v':
+	  /* It should have been handled in ASM_OUTPUT_OPCODE.  */
+	  gcc_unreachable ();
+
 	case ';':
 #if TARGET_MACHO
 	  fputs (" ; ", file);

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [AVX]: More AVX updates
  2008-05-23 21:46   ` H.J. Lu
@ 2008-05-23 21:49     ` H.J. Lu
  0 siblings, 0 replies; 11+ messages in thread
From: H.J. Lu @ 2008-05-23 21:49 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: GCC Patches

We do ASM_OUTPUT_OPCODE in i386/gas.h. I will see what I can do.


H.J.
---
On Fri, May 23, 2008 at 2:19 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> I have to use
>
> return "%v0movaps\t{%1, %0|%0, %1}";
>
> since it asks for a operand number even if isn't needed. I don't like
> this unused operand number. Also it doesn't work on cases where
> AVX needs one more operand.
>
> H.J.
> On Fri, May 23, 2008 at 1:45 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
>> Hi Uros,
>>
>> It sounds a good idea. I will give it a try.
>>
>> Thanks.
>>
>>
>> H.J.
>> On Fri, May 23, 2008 at 1:27 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>> Hello!
>>>
>>>> I am checking this patch into AVX branch. Now we can generate
>>>> pure AVX codes for all intrinsics in 64bit.
>>>>  -  "cvtts<ssemodefsuffix>2si\t{%1, %0|%0, %1}"
>>>> +  "* return TARGET_AVX +            ? \"vcvtts<ssemodefsuffix>2si\t{%1,
>>>> %0|%0, %1}\"
>>>> +            :\"cvtts<ssemodefsuffix>2si\t{%1, %0|%0, %1}\";"
>>>>
>>>
>>> What do you think about adding "%v" asm directive, i.e.:
>>>
>>> "%vcvtts<ssemodefsuffix>2si\t{%1, %0|%0, %1}"
>>>
>>> and output 'v' in print_operand () function, depending on TARGET_AVX? This
>>> would simplify a lot of insn templates, probably even more when 'enabled'
>>> attribute is used.
>>>
>>> Uros.
>>>
>>
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [AVX]: More AVX updates
  2008-05-23 21:27 ` H.J. Lu
@ 2008-05-23 21:46   ` H.J. Lu
  2008-05-23 21:49     ` H.J. Lu
  0 siblings, 1 reply; 11+ messages in thread
From: H.J. Lu @ 2008-05-23 21:46 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: GCC Patches

I have to use

return "%v0movaps\t{%1, %0|%0, %1}";

since it asks for a operand number even if isn't needed. I don't like
this unused operand number. Also it doesn't work on cases where
AVX needs one more operand.

H.J.
On Fri, May 23, 2008 at 1:45 PM, H.J. Lu <hjl.tools@gmail.com> wrote:
> Hi Uros,
>
> It sounds a good idea. I will give it a try.
>
> Thanks.
>
>
> H.J.
> On Fri, May 23, 2008 at 1:27 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
>> Hello!
>>
>>> I am checking this patch into AVX branch. Now we can generate
>>> pure AVX codes for all intrinsics in 64bit.
>>>  -  "cvtts<ssemodefsuffix>2si\t{%1, %0|%0, %1}"
>>> +  "* return TARGET_AVX +            ? \"vcvtts<ssemodefsuffix>2si\t{%1,
>>> %0|%0, %1}\"
>>> +            :\"cvtts<ssemodefsuffix>2si\t{%1, %0|%0, %1}\";"
>>>
>>
>> What do you think about adding "%v" asm directive, i.e.:
>>
>> "%vcvtts<ssemodefsuffix>2si\t{%1, %0|%0, %1}"
>>
>> and output 'v' in print_operand () function, depending on TARGET_AVX? This
>> would simplify a lot of insn templates, probably even more when 'enabled'
>> attribute is used.
>>
>> Uros.
>>
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [AVX]: More AVX updates
  2008-05-23 20:53 Uros Bizjak
@ 2008-05-23 21:27 ` H.J. Lu
  2008-05-23 21:46   ` H.J. Lu
  2008-05-23 23:12 ` H.J. Lu
  1 sibling, 1 reply; 11+ messages in thread
From: H.J. Lu @ 2008-05-23 21:27 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: GCC Patches

Hi Uros,

It sounds a good idea. I will give it a try.

Thanks.


H.J.
On Fri, May 23, 2008 at 1:27 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
> Hello!
>
>> I am checking this patch into AVX branch. Now we can generate
>> pure AVX codes for all intrinsics in 64bit.
>>  -  "cvtts<ssemodefsuffix>2si\t{%1, %0|%0, %1}"
>> +  "* return TARGET_AVX +            ? \"vcvtts<ssemodefsuffix>2si\t{%1,
>> %0|%0, %1}\"
>> +            :\"cvtts<ssemodefsuffix>2si\t{%1, %0|%0, %1}\";"
>>
>
> What do you think about adding "%v" asm directive, i.e.:
>
> "%vcvtts<ssemodefsuffix>2si\t{%1, %0|%0, %1}"
>
> and output 'v' in print_operand () function, depending on TARGET_AVX? This
> would simplify a lot of insn templates, probably even more when 'enabled'
> attribute is used.
>
> Uros.
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [AVX]: More AVX updates
@ 2008-05-23 20:53 Uros Bizjak
  2008-05-23 21:27 ` H.J. Lu
  2008-05-23 23:12 ` H.J. Lu
  0 siblings, 2 replies; 11+ messages in thread
From: Uros Bizjak @ 2008-05-23 20:53 UTC (permalink / raw)
  To: H.J. Lu; +Cc: GCC Patches

Hello!

> I am checking this patch into AVX branch. Now we can generate
> pure AVX codes for all intrinsics in 64bit.
>   
> -  "cvtts<ssemodefsuffix>2si\t{%1, %0|%0, %1}"
> +  "* return TARGET_AVX 
> +            ? \"vcvtts<ssemodefsuffix>2si\t{%1, %0|%0, %1}\"
> +            :\"cvtts<ssemodefsuffix>2si\t{%1, %0|%0, %1}\";"
>   
What do you think about adding "%v" asm directive, i.e.:

"%vcvtts<ssemodefsuffix>2si\t{%1, %0|%0, %1}"

and output 'v' in print_operand () function, depending on TARGET_AVX? 
This would simplify a lot of insn templates, probably even more when 
'enabled' attribute is used.

Uros.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [AVX]: More AVX updates
@ 2008-05-22  6:07 H.J. Lu
  0 siblings, 0 replies; 11+ messages in thread
From: H.J. Lu @ 2008-05-22  6:07 UTC (permalink / raw)
  To: GCC Patches

[-- Attachment #1: Type: text/plain, Size: 1227 bytes --]

Hi,

I am checking this patch into AVX branch to add more AVX
support and fix prefix attribute for MMX instructions in AVX
patterns.


H.J.
---
2008-05-21  H.J. Lu  <hongjiu.lu@intel.com>

        * config/i386/i386.c (ix86_expand_vector_init_one_var): Support
        256bit AVX modes.

        * config/i386/mmx.md (*mov<mode>_internal_rex64_avx): New
        for AVX.
        (*mov<mode>_internal_avx): Likewise.

        * config/i386/sse.md (*vec_concatv2sf_avx): Set prefix to
        orig for MMX instructions.
        (*vec_concatv2di_avx): Likewise.
        (*vec_concatv2si_avx): Likewise.
        (*vec_concatv2di_rex64_avx): Likewise.

2008-05-21  H.J. Lu  <hongjiu.lu@intel.com>

        * config/i386/i386.c (ix86_expand_vector_init_general): Remove
        goto for vec_concat and vec_interleave.

2008-05-21  Joey Ye  <joey.ye@intel.com>
            Xuepeng Guo  <xuepeng.guo@intel.com>
            H.J. Lu  <hongjiu.lu@intel.com>

        * config/i386/i386.md (*movsi_1): Support AVX.
        (*movsf_1): Likewise.
        (*truncdfsf_fast_sse): Likewise.
        (*sse4_1_round<mode>2): Likewise.

        * config/i386/sse.md (*vec_extractv2di_1_rex64_avx): New.
        (*vec_extractv2di_1_avx): Likewise.

[-- Attachment #2: a.txt --]
[-- Type: text/plain, Size: 12571 bytes --]

diff -x config -x testsuite -x ChangeLog.avx \
		-x REVISION -x .svn \
	 	-upr ../../gcc-avx/gcc/config gcc/gcc/config
diff -x config -x testsuite -x ChangeLog.avx -x REVISION -x .svn -upr ../../gcc-avx/gcc/config/i386/i386.c gcc/gcc/config/i386/i386.c
--- ../../gcc-avx/gcc/config/i386/i386.c	2008-05-21 13:25:02.000000000 -0700
+++ gcc/gcc/config/i386/i386.c	2008-05-21 14:11:35.000000000 -0700
@@ -25032,6 +25032,12 @@ ix86_expand_vector_init_one_var (bool mm
 	 the general case.  */
       return false;
 
+    case V4DFmode:
+    case V4DImode:
+    case V8SFmode:
+    case V8SImode:
+    case V16HImode:
+    case V32QImode:
     case V4SFmode:
     case V4SImode:
     case V8HImode:
@@ -25340,28 +25346,17 @@ ix86_expand_vector_init_general (bool mm
     case V2SImode:
       if (!mmx_ok && !TARGET_SSE)
 	break;
+      /* FALLTHRU */
 
-      n = 2;
-      goto vec_concat;
-
+    case V8SFmode:
+    case V8SImode:
     case V4DFmode:
     case V4DImode:
     case V4SFmode:
     case V4SImode:
-      n = 4;
-      goto vec_concat;
-
     case V2DFmode:
     case V2DImode:
-      n = 2;
-      goto vec_concat;
-
-    case V8SFmode:
-    case V8SImode:
-      n = 8;
-      goto vec_concat;
-
-vec_concat:
+      n = GET_MODE_NUNITS (mode);
       for (i = 0; i < n; i++)
 	ops[i] = XVECEXP (vals, 0, i);
       ix86_expand_vector_init_concat (mode, target, ops, n);
@@ -25369,15 +25364,14 @@ vec_concat:
 
     case V32QImode:
       half_mode = V16QImode;
-      n = 32;
       goto half;
 
     case V16HImode:
       half_mode = V8HImode;
-      n = 16;
       goto half;
 
 half:
+      n = GET_MODE_NUNITS (mode);
       for (i = 0; i < n; i++)
 	ops[i] = XVECEXP (vals, 0, i);
       op0 = gen_reg_rtx (half_mode);
@@ -25393,18 +25387,13 @@ half:
     case V16QImode:
       if (!TARGET_SSE4_1)
 	break;
-
-      n = 16;
-      goto vec_interleave;
+      /* FALLTHRU */
 
     case V8HImode:
       if (!TARGET_SSE2)
 	break;
 
-      n = 8;
-      goto vec_interleave;
-
-vec_interleave:
+      n = GET_MODE_NUNITS (mode);
       for (i = 0; i < n; i++)
 	ops[i] = XVECEXP (vals, 0, i);
       ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
diff -x config -x testsuite -x ChangeLog.avx -x REVISION -x .svn -upr ../../gcc-avx/gcc/config/i386/i386.md gcc/gcc/config/i386/i386.md
--- ../../gcc-avx/gcc/config/i386/i386.md	2008-05-21 13:25:02.000000000 -0700
+++ gcc/gcc/config/i386/i386.md	2008-05-21 17:25:22.000000000 -0700
@@ -1505,20 +1505,26 @@
     {
     case TYPE_SSELOG1:
       if (get_attr_mode (insn) == MODE_TI)
-        return "pxor\t%0, %0";
-      return "xorps\t%0, %0";
+        return TARGET_AVX ? "vpxor\t%0, %0, %0"
+                          : "pxor\t%0, %0";
+      return TARGET_AVX ? "vxorps\t%0, %0, %0"
+                        : "xorps\t%0, %0";
 
     case TYPE_SSEMOV:
       switch (get_attr_mode (insn))
 	{
 	case MODE_TI:
-	  return "movdqa\t{%1, %0|%0, %1}";
+	  return TARGET_AVX ? "vmovdqa\t{%1, %0|%0, %1}"
+	                    : "movdqa\t{%1, %0|%0, %1}";
 	case MODE_V4SF:
-	  return "movaps\t{%1, %0|%0, %1}";
+	  return TARGET_AVX ? "vmovaps\t{%1, %0|%0, %1}"
+	                    : "movaps\t{%1, %0|%0, %1}";
 	case MODE_SI:
-          return "movd\t{%1, %0|%0, %1}";
+          return TARGET_AVX ? "vmovd\t{%1, %0|%0, %1}"
+                            : "movd\t{%1, %0|%0, %1}";
 	case MODE_SF:
-          return "movss\t{%1, %0|%0, %1}";
+          return TARGET_AVX ? "vmovss\t{%1, %0|%0, %1}"
+                            : "movss\t{%1, %0|%0, %1}";
 	default:
 	  gcc_unreachable ();
 	}
@@ -1552,6 +1558,10 @@
 	      (const_string "lea")
 	   ]
 	   (const_string "imov")))
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "0,1,2,3,4,5")
+       (const_string "orig")
+       (const_string "maybe_vex")))
    (set (attr "mode")
      (cond [(eq_attr "alternative" "2,3")
 	      (const_string "DI")
@@ -2715,20 +2725,31 @@
       return "mov{l}\t{%1, %0|%0, %1}";
     case 5:
       if (get_attr_mode (insn) == MODE_TI)
-	return "pxor\t%0, %0";
+	return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
       else
-	return "xorps\t%0, %0";
+	return TARGET_AVX ? "xorps\t%0, %0, %0": "xorps\t%0, %0";
     case 6:
       if (get_attr_mode (insn) == MODE_V4SF)
-	return "movaps\t{%1, %0|%0, %1}";
+	return TARGET_AVX ? "vmovaps\t{%1, %0|%0, %1}"
+			  : "movaps\t{%1, %0|%0, %1}";
+      else
+	return TARGET_AVX ? "vmovss\t{%1, %0, %0|%0, %0, %1}"
+			  : "movss\t{%1, %0|%0, %1}";
+    case 7:
+      if (TARGET_AVX)
+	return REG_P (operands[1]) ? "vmovss\t{%1, %0, %0|%0, %0, %1}"
+				   : "vmovss\t{%1, %0|%0, %1}";
       else
 	return "movss\t{%1, %0|%0, %1}";
-    case 7: case 8:
-      return "movss\t{%1, %0|%0, %1}";
+    case 8:
+      return TARGET_AVX ? "vmovss\t{%1, %0|%0, %1}"
+			: "movss\t{%1, %0|%0, %1}";
 
-    case 9: case 10:
-    case 12: case 13: case 14: case 15:
+    case 9: case 10: case 14: case 15:
       return "movd\t{%1, %0|%0, %1}";
+    case 12: case 13:
+      return TARGET_AVX ? "vmovd\t{%1, %0|%0, %1}"
+			: "movd\t{%1, %0|%0, %1}";
 
     case 11:
       return "movq\t{%1, %0|%0, %1}";
@@ -2738,6 +2759,10 @@
     }
 }
   [(set_attr "type" "fmov,fmov,fmov,imov,imov,sselog1,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov,ssemov,ssemov,mmxmov,mmxmov")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "5,6,7,8,12,13")
+       (const_string "maybe_vex")
+       (const_string "orig")))
    (set (attr "mode")
         (cond [(eq_attr "alternative" "3,4,9,10")
 		 (const_string "SI")
@@ -4337,8 +4362,10 @@
         (float_truncate:SF
           (match_operand:DF 1 "nonimmediate_operand" "xm")))]
   "TARGET_SSE2 && TARGET_SSE_MATH"
-  "cvtsd2ss\t{%1, %0|%0, %1}"
+  "* return TARGET_AVX ? \"vcvtsd2ss\t{%1, %0, %0|%0, %0, %1}\"
+                       : \"cvtsd2ss\t{%1, %0|%0, %1}\";"
   [(set_attr "type" "ssecvt")
+   (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "SF")])
 
 (define_insn "*truncdfsf_fast_i387"
@@ -17746,9 +17773,11 @@
 		       (match_operand:SI 2 "const_0_to_15_operand" "n")]
 		      UNSPEC_ROUND))]
   "TARGET_ROUND"
-  "rounds<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
+  "* return TARGET_AVX ? \"vrounds<ssemodefsuffix>\t{%2, %1, %0, %0|%0, %0, %1, %2}\"
+                       : \"rounds<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}\";"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "rintxf2"
diff -x config -x testsuite -x ChangeLog.avx -x REVISION -x .svn -upr ../../gcc-avx/gcc/config/i386/mmx.md gcc/gcc/config/i386/mmx.md
--- ../../gcc-avx/gcc/config/i386/mmx.md	2008-05-18 07:05:21.000000000 -0700
+++ gcc/gcc/config/i386/mmx.md	2008-05-21 17:25:22.000000000 -0700
@@ -63,6 +63,34 @@
   DONE;
 })
 
+(define_insn "*mov<mode>_internal_rex64_avx"
+  [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand"
+				"=rm,r,!?y,!?y ,m  ,!y,Y2,x,x ,m,r,x")
+	(match_operand:MMXMODEI8 1 "vector_move_operand"
+				"Cr ,m,C  ,!?ym,!?y,Y2,!y,C,xm,x,x,r"))]
+  "TARGET_64BIT && TARGET_AVX
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+    mov{q}\t{%1, %0|%0, %1}
+    mov{q}\t{%1, %0|%0, %1}
+    pxor\t%0, %0
+    movq\t{%1, %0|%0, %1}
+    movq\t{%1, %0|%0, %1}
+    movdq2q\t{%1, %0|%0, %1}
+    movq2dq\t{%1, %0|%0, %1}
+    vpxor\t%0, %0, %0
+    vmovq\t{%1, %0|%0, %1}
+    vmovq\t{%1, %0|%0, %1}
+    vmovq\t{%1, %0|%0, %1}
+    vmovq\t{%1, %0|%0, %1}"
+  [(set_attr "type" "imov,imov,mmx,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,ssemov,ssemov")
+   (set_attr "unit" "*,*,*,*,*,mmx,mmx,*,*,*,*,*")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "7,8,9,10,11")
+       (const_string "vex")
+       (const_string "orig")))
+   (set_attr "mode" "DI")])
+
 (define_insn "*mov<mode>_internal_rex64"
   [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand"
 				"=rm,r,!?y,!?y ,m  ,!y,Y2,x,x ,m,r,x")
@@ -87,6 +115,32 @@
    (set_attr "unit" "*,*,*,*,*,mmx,mmx,*,*,*,*,*")
    (set_attr "mode" "DI")])
 
+(define_insn "*mov<mode>_internal_avx"
+  [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand"
+			"=!?y,!?y,m  ,!y ,*Y2,*Y2,*Y2 ,m  ,r  ,m")
+	(match_operand:MMXMODEI8 1 "vector_move_operand"
+			"C   ,!ym,!?y,*Y2,!y ,C  ,*Y2m,*Y2,irm,r"))]
+  "TARGET_AVX
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+    pxor\t%0, %0
+    movq\t{%1, %0|%0, %1}
+    movq\t{%1, %0|%0, %1}
+    movdq2q\t{%1, %0|%0, %1}
+    movq2dq\t{%1, %0|%0, %1}
+    vpxor\t%0, %0, %0
+    vmovq\t{%1, %0|%0, %1}
+    vmovq\t{%1, %0|%0, %1}
+    #
+    #"
+  [(set_attr "type" "mmx,mmxmov,mmxmov,ssecvt,ssecvt,sselog1,ssemov,ssemov,*,*")
+   (set_attr "unit" "*,*,*,mmx,mmx,*,*,*,*,*")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "5,6,7")
+       (const_string "vex")
+       (const_string "orig")))
+   (set_attr "mode" "DI,DI,DI,DI,DI,TI,DI,DI,DI,DI")])
+
 (define_insn "*mov<mode>_internal"
   [(set (match_operand:MMXMODEI8 0 "nonimmediate_operand"
 			"=!?y,!?y,m  ,!y ,*Y2,*Y2,*Y2 ,m  ,*x,*x,*x,m ,r  ,m")
diff -x config -x testsuite -x ChangeLog.avx -x REVISION -x .svn -upr ../../gcc-avx/gcc/config/i386/sse.md gcc/gcc/config/i386/sse.md
--- ../../gcc-avx/gcc/config/i386/sse.md	2008-05-21 13:25:02.000000000 -0700
+++ gcc/gcc/config/i386/sse.md	2008-05-21 17:45:53.000000000 -0700
@@ -3431,7 +3431,10 @@
    punpckldq\t{%2, %0|%0, %2}
    movd\t{%1, %0|%0, %1}"
   [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
-   (set_attr "prefix" "vex")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "3,4")
+       (const_string "orig")
+       (const_string "vex")))
    (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
 
 ;; Although insertps takes register source, we prefer
@@ -6825,6 +6828,24 @@
   operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
 })
 
+(define_insn "*vec_extractv2di_1_rex64_avx"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
+	(vec_select:DI
+	  (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o,o")
+	  (parallel [(const_int 1)])))]
+  "TARGET_64BIT
+   && TARGET_AVX
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   vmovhps\t{%1, %0|%0, %1}
+   vpsrldq\t{$8, %1, %0|%0, %1, 8}
+   vmovq\t{%H1, %0|%0, %H1}
+   vmov{q}\t{%H1, %0|%0, %H1}"
+  [(set_attr "type" "ssemov,sseishft,ssemov,imov")
+   (set_attr "memory" "*,none,*,*")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V2SF,TI,TI,DI")])
+
 (define_insn "*vec_extractv2di_1_rex64"
   [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x,r")
 	(vec_select:DI
@@ -6840,6 +6861,23 @@
    (set_attr "memory" "*,none,*,*")
    (set_attr "mode" "V2SF,TI,TI,DI")])
 
+(define_insn "*vec_extractv2di_1_avx"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
+	(vec_select:DI
+	  (match_operand:V2DI 1 "nonimmediate_operand" "x,x,o")
+	  (parallel [(const_int 1)])))]
+  "!TARGET_64BIT
+   && TARGET_AVX
+   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "@
+   vmovhps\t{%1, %0|%0, %1}
+   vpsrldq\t{$8, %1, %0|%0, %1, 8}
+   vmovq\t{%H1, %0|%0, %H1}"
+  [(set_attr "type" "ssemov,sseishft,ssemov")
+   (set_attr "memory" "*,none,*")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "V2SF,TI,TI")])
+
 (define_insn "*vec_extractv2di_1_sse2"
   [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
 	(vec_select:DI
@@ -6924,7 +6962,10 @@
    punpckldq\t{%2, %0|%0, %2}
    movd\t{%1, %0|%0, %1}"
   [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
-   (set_attr "prefix" "vex")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "3,4")
+       (const_string "orig")
+       (const_string "vex")))
    (set_attr "mode" "TI,TI,TI,DI,DI")])
 
 (define_insn "*vec_concatv2si_sse4_1"
@@ -7013,7 +7054,10 @@
    vmovhps\t{%2, %1, %0|%0, %1, %2}
    vmovlps\t{%1, %2, %0|%0, %2, %1}"
   [(set_attr "type" "ssemov,ssemov,sselog,ssemov,ssemov")
-   (set_attr "prefix" "vex")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "1")
+       (const_string "orig")
+       (const_string "vex")))
    (set_attr "mode" "TI,TI,TI,V2SF,V2SF")])
 
 (define_insn "vec_concatv2di"
@@ -7047,7 +7091,10 @@
    vmovhps\t{%2, %1, %0|%0, %1, %2}
    vmovlps\t{%1, %2, %0|%0, %2, %1}"
   [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
-   (set_attr "prefix" "vex")
+   (set (attr "prefix")
+     (if_then_else (eq_attr "alternative" "3")
+       (const_string "orig")
+       (const_string "vex")))
    (set_attr "mode" "TI,TI,TI,TI,TI,V2SF,V2SF")])
 
 (define_insn "*vec_concatv2di_rex64_sse4_1"

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [AVX]: More AVX updates
@ 2008-05-20 18:42 H.J. Lu
  0 siblings, 0 replies; 11+ messages in thread
From: H.J. Lu @ 2008-05-20 18:42 UTC (permalink / raw)
  To: Gcc Patch List

[-- Attachment #1: Type: text/plain, Size: 307 bytes --]

Hi,

I am checking his patch into AVX branch to add more AVX support.


H.J.
---
2008-05-20  Xuepeng Guo  <xuepeng.guo@intel.com>

        * config/i386/sse.md (*avx_vmmul<mode>3): New.
        (*avx_pcmpestr_cconly): Likewise.
        (sse4_2_pcmpestri): Support AVX.
        (sse4_2_pcmpestrm): Likewise.

[-- Attachment #2: a.txt --]
[-- Type: text/plain, Size: 3273 bytes --]

Index: ChangeLog.internal
===================================================================
--- ChangeLog.internal	(revision 2650)
+++ ChangeLog.internal	(revision 2653)
@@ -1,3 +1,13 @@
+2008-05-20  Xuepeng Guo  <xuepeng.guo@intel.com>
+
+	* config/i386/sse.md (*avx_vmmul<mode>3): New.
+
+2008-05-20  Xuepeng Guo  <xuepeng.guo@intel.com>
+
+	* config/i386/sse.md (sse4_2_pcmpestri): Support AVX.
+	(sse4_2_pcmpestrm): Likewise.
+	(*avx_pcmpestr_cconly): New.
+
 2008-05-19  H.J. Lu  <hongjiu.lu@intel.com>
 
 	* tree-vectorizer.c (get_vectype_for_scalar_type): Mention
Index: config/i386/sse.md
===================================================================
--- config/i386/sse.md	(revision 2650)
+++ config/i386/sse.md	(revision 2653)
@@ -569,6 +569,20 @@
   [(set_attr "type" "ssemul")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*avx_vmmul<mode>3"
+  [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
+	(vec_merge:SSEMODEF2P
+	  (mult:SSEMODEF2P
+	    (match_operand:SSEMODEF2P 1 "register_operand" "x")
+	    (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
+	  (match_dup 1)
+	  (const_int 1)))]
+  "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
+  "vmuls<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ssemul")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "<ssescalarmode>")])
+
 (define_insn "<sse>_vmmul<mode>3"
   [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
 	(vec_merge:SSEMODEF2P
@@ -9508,10 +9522,12 @@
 	   (match_dup 5)]
 	  UNSPEC_PCMPESTR))]
   "TARGET_SSE4_2"
-  "pcmpestri\t{%5, %3, %1|%1, %3, %5}"
+  "* return TARGET_AVX ? \"vpcmpestri\t{%5, %3, %1|%1, %3, %5}\"
+                       : \"pcmpestri\t{%5, %3, %1|%1, %3, %5}\";"
   [(set_attr "type" "sselog")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "maybe_vex")
    (set_attr "memory" "none,load")
    (set_attr "mode" "TI")])
 
@@ -9533,13 +9549,37 @@
 	   (match_dup 5)]
 	  UNSPEC_PCMPESTR))]
   "TARGET_SSE4_2"
-  "pcmpestrm\t{%5, %3, %1|%1, %3, %5}"
+  "* return TARGET_AVX ? \"vpcmpestrm\t{%5, %3, %1|%1, %3, %5}\"
+                       : \"pcmpestrm\t{%5, %3, %1|%1, %3, %5}\";"
   [(set_attr "type" "sselog")
    (set_attr "prefix_data16" "1")
    (set_attr "prefix_extra" "1")
+   (set_attr "prefix" "maybe_vex")
    (set_attr "memory" "none,load")
    (set_attr "mode" "TI")])
 
+(define_insn "*avx_pcmpestr_cconly"
+  [(set (reg:CC FLAGS_REG)
+	(unspec:CC
+	  [(match_operand:V16QI 2 "register_operand" "x,x,x,x")
+	   (match_operand:SI 3 "register_operand" "a,a,a,a")
+	   (match_operand:V16QI 4 "nonimmediate_operand" "x,m,x,m")
+	   (match_operand:SI 5 "register_operand" "d,d,d,d")
+	   (match_operand:SI 6 "const_0_to_255_operand" "n,n,n,n")]
+	  UNSPEC_PCMPESTR))
+   (clobber (match_scratch:V16QI 0 "=Yz,Yz,X,X"))
+   (clobber (match_scratch:SI    1 "= X, X,c,c"))]
+  "TARGET_AVX"
+  "@
+   vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
+   vpcmpestrm\t{%6, %4, %2|%2, %4, %6}
+   vpcmpestri\t{%6, %4, %2|%2, %4, %6}
+   vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
+  [(set_attr "type" "sselog")
+   (set_attr "prefix" "vex")
+   (set_attr "memory" "none,load,none,load")
+   (set_attr "mode" "TI")])
+
 (define_insn "sse4_2_pcmpestr_cconly"
   [(set (reg:CC FLAGS_REG)
 	(unspec:CC

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2008-05-26 14:21 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-05-23 18:48 [AVX]: More AVX updates H.J. Lu
  -- strict thread matches above, loose matches on Subject: below --
2008-05-23 20:53 Uros Bizjak
2008-05-23 21:27 ` H.J. Lu
2008-05-23 21:46   ` H.J. Lu
2008-05-23 21:49     ` H.J. Lu
2008-05-23 23:12 ` H.J. Lu
2008-05-24 21:38   ` H.J. Lu
2008-05-25  9:02     ` H.J. Lu
2008-05-26 15:17       ` H.J. Lu
2008-05-22  6:07 H.J. Lu
2008-05-20 18:42 H.J. Lu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).