public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH, i386]: Avoid partial memory stalls for FP moves and related FP immediate cleanups
@ 2011-05-31  5:36 Uros Bizjak
  0 siblings, 0 replies; only message in thread
From: Uros Bizjak @ 2011-05-31  5:36 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1458 bytes --]

Hello!

Attached patch prevents partial memory stalls for XFmode and DFmode
(32bit) immediate->memory moves by penalizing memory target in case of
immediate move. The patch also cleans move splitters that handle
immediate operands (most notably, there are no FP subregs after the
reload pass; ix86_split_long_move splitters can be simplified and
merged into one pattern).

2011-05-30  Uros Bizjak  <ubizjak@gmail.com>

	* config/i386/i386.md (*movxf_internal): Penalize FYx*r->o alternative
	to prevent partial memory stalls.  Do not move CONST_DOUBLEs directly
	to memory for !TARGET_MEMORY_MISMATCH_STALL.
	(*movdf_internal_rex64): Do not penalize F->r alternative.
	(*movdf_internal):  Penalize FYd*r->o alternative to prevent partial
	memory stalls.  Generate SSE and x87 CONST_DOUBLE immediates only
	when optimizing function for size.  Do not move CONST_DOUBLEs
	directly to memory for !TARGET_MEMORY_MISMATCH_STALL.
	(FP move splitters): Merge {TF,XF,DF}mode move splitters.  Do not
	handle SUBREGs.  Do not check for MEM_P operands in the insn condition,
	check for ANY_FP_REGNO_P instead.
	* config/i386/constraints.md (Yd): Enable GENERAL_REGS for
	TARGET_64BIT and for TARGET_INTEGER_DFMODE_MOVES when optimizing
	function for speed.
	* config/i386/i386.c (ix86_option_override_internal): Do not
	set TARGET_INTEGER_DFMODE_MOVES here.

Patch was bootstrapped and regression tested on x86_64-pc-linux-gnu
{,-m32}, committed to mainline SVN.

Uros.

[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 11272 bytes --]

Index: i386.md
===================================================================
--- i386.md	(revision 174435)
+++ i386.md	(working copy)
@@ -2833,8 +2833,8 @@
   "ix86_expand_move (<MODE>mode, operands); DONE;")
 
 (define_insn "*movtf_internal"
-  [(set (match_operand:TF 0 "nonimmediate_operand" "=x,m,x,?r,?o")
-	(match_operand:TF 1 "general_operand" "xm,x,C,roF,Fr"))]
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=x,m,x,?r ,?o")
+	(match_operand:TF 1 "general_operand"	   "xm,x,C,roF,Fr"))]
   "TARGET_SSE2
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
@@ -2877,24 +2877,19 @@
 		   (const_string "TI"))]
 	       (const_string "DI")))])
 
-(define_split
-  [(set (match_operand:TF 0 "nonimmediate_operand" "")
-        (match_operand:TF 1 "general_operand" ""))]
-  "reload_completed
-   && !(SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]))"
-  [(const_int 0)]
-  "ix86_split_long_move (operands); DONE;")
-
+;; Possible store forwarding (partial memory) stall in alternative 4.
 (define_insn "*movxf_internal"
-  [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,Yx*r  ,o")
+  [(set (match_operand:XF 0 "nonimmediate_operand" "=f,m,f,Yx*r  ,!o")
 	(match_operand:XF 1 "general_operand"	   "fm,f,G,Yx*roF,FYx*r"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
    && (!can_create_pseudo_p ()
        || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
        || GET_CODE (operands[1]) != CONST_DOUBLE
        || (optimize_function_for_size_p (cfun)
-	   && standard_80387_constant_p (operands[1]) > 0)
-       || memory_operand (operands[0], XFmode))"
+	   && standard_80387_constant_p (operands[1]) > 0
+	   && !memory_operand (operands[0], XFmode))
+       || (!TARGET_MEMORY_MISMATCH_STALL
+	   && memory_operand (operands[0], XFmode)))"
 {
   switch (which_alternative)
     {
@@ -2905,8 +2900,10 @@
     case 2:
       return standard_80387_constant_opcode (operands[1]);
 
-    case 3: case 4:
+    case 3:
+    case 4:
       return "#";
+
     default:
       gcc_unreachable ();
     }
@@ -2914,25 +2911,11 @@
   [(set_attr "type" "fmov,fmov,fmov,multi,multi")
    (set_attr "mode" "XF,XF,XF,SI,SI")])
 
-(define_split
-  [(set (match_operand:XF 0 "nonimmediate_operand" "")
-	(match_operand:XF 1 "general_operand" ""))]
-  "reload_completed
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))
-   && ! (FP_REG_P (operands[0]) ||
-	 (GET_CODE (operands[0]) == SUBREG
-	  && FP_REG_P (SUBREG_REG (operands[0]))))
-   && ! (FP_REG_P (operands[1]) ||
-	 (GET_CODE (operands[1]) == SUBREG
-	  && FP_REG_P (SUBREG_REG (operands[1]))))"
-  [(const_int 0)]
-  "ix86_split_long_move (operands); DONE;")
-
 (define_insn "*movdf_internal_rex64"
   [(set (match_operand:DF 0 "nonimmediate_operand"
-		"=f,m,f,r ,m,!r,!m,Y2*x,Y2*x,Y2*x,m   ,Yi,r ")
+		"=f,m,f,r ,m,r,!m,Y2*x,Y2*x,Y2*x,m   ,Yi,r ")
 	(match_operand:DF 1 "general_operand"
-		"fm,f,G,rm,r,F ,F ,C   ,Y2*x,m   ,Y2*x,r ,Yi"))]
+		"fm,f,G,rm,r,F,F ,C   ,Y2*x,m   ,Y2*x,r ,Yi"))]
   "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))
    && (!can_create_pseudo_p ()
        || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
@@ -3080,21 +3063,20 @@
 ;; Possible store forwarding (partial memory) stall in alternative 4.
 (define_insn "*movdf_internal"
   [(set (match_operand:DF 0 "nonimmediate_operand"
-		"=f,m,f,Yd*r  ,o    ,Y2*x,Y2*x,Y2*x,m  ")
+		"=f,m,f,Yd*r  ,!o   ,Y2*x,Y2*x,Y2*x,m  ")
 	(match_operand:DF 1 "general_operand"
 		"fm,f,G,Yd*roF,FYd*r,C   ,Y2*x,m   ,Y2*x"))]
   "!TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))
    && (!can_create_pseudo_p ()
        || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE)
        || GET_CODE (operands[1]) != CONST_DOUBLE
-       || (!TARGET_INTEGER_DFMODE_MOVES
+       || (optimize_function_for_size_p (cfun)
 	   && ((!(TARGET_SSE2 && TARGET_SSE_MATH)
 		&& standard_80387_constant_p (operands[1]) > 0)
 	       || (TARGET_SSE2 && TARGET_SSE_MATH
 		   && standard_sse_constant_p (operands[1])))
 	   && !memory_operand (operands[0], DFmode))
-       || ((TARGET_INTEGER_DFMODE_MOVES
-	    || !TARGET_MEMORY_MISMATCH_STALL)
+       || (!TARGET_MEMORY_MISMATCH_STALL
 	   && memory_operand (operands[0], DFmode)))"
 {
   switch (which_alternative)
@@ -3215,20 +3197,6 @@
 	      ]
 	      (const_string "DF")))])
 
-(define_split
-  [(set (match_operand:DF 0 "nonimmediate_operand" "")
-	(match_operand:DF 1 "general_operand" ""))]
-  "reload_completed
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))
-   && ! (ANY_FP_REG_P (operands[0]) ||
-	 (GET_CODE (operands[0]) == SUBREG
-	  && ANY_FP_REG_P (SUBREG_REG (operands[0]))))
-   && ! (ANY_FP_REG_P (operands[1]) ||
-	 (GET_CODE (operands[1]) == SUBREG
-	  && ANY_FP_REG_P (SUBREG_REG (operands[1]))))"
-  [(const_int 0)]
-  "ix86_split_long_move (operands); DONE;")
-
 (define_insn "*movsf_internal"
   [(set (match_operand:SF 0 "nonimmediate_operand"
 	  "=f,m,f,r  ,m ,x,x,x ,m,!*y,!m,!*y,?Yi,?r,!*Ym,!r")
@@ -3331,31 +3299,19 @@
   [(set (match_operand 0 "register_operand" "")
 	(match_operand 1 "memory_operand" ""))]
   "reload_completed
-   && MEM_P (operands[1])
    && (GET_MODE (operands[0]) == TFmode
        || GET_MODE (operands[0]) == XFmode
        || GET_MODE (operands[0]) == DFmode
        || GET_MODE (operands[0]) == SFmode)
+   && ANY_FP_REGNO_P (REGNO (operands[0]))
    && (operands[2] = find_constant_src (insn))"
   [(set (match_dup 0) (match_dup 2))]
 {
   rtx c = operands[2];
-  rtx r = operands[0];
-
-  if (GET_CODE (r) == SUBREG)
-    r = SUBREG_REG (r);
+  int r = REGNO (operands[0]);
 
-  if (SSE_REG_P (r))
-    {
-      if (!standard_sse_constant_p (c))
-	FAIL;
-    }
-  else if (FP_REG_P (r))
-    {
-      if (standard_80387_constant_p (c) < 1)
-	FAIL;
-    }
-  else if (MMX_REG_P (r))
+  if ((SSE_REGNO_P (r) && !standard_sse_constant_p (c))
+      || (FP_REGNO_P (r) && standard_80387_constant_p (c) < 1))
     FAIL;
 })
 
@@ -3363,31 +3319,18 @@
   [(set (match_operand 0 "register_operand" "")
 	(float_extend (match_operand 1 "memory_operand" "")))]
   "reload_completed
-   && MEM_P (operands[1])
    && (GET_MODE (operands[0]) == TFmode
        || GET_MODE (operands[0]) == XFmode
-       || GET_MODE (operands[0]) == DFmode
-       || GET_MODE (operands[0]) == SFmode)
+       || GET_MODE (operands[0]) == DFmode)
+   && ANY_FP_REGNO_P (REGNO (operands[0]))
    && (operands[2] = find_constant_src (insn))"
   [(set (match_dup 0) (match_dup 2))]
 {
   rtx c = operands[2];
-  rtx r = operands[0];
-
-  if (GET_CODE (r) == SUBREG)
-    r = SUBREG_REG (r);
+  int r = REGNO (operands[0]);
 
-  if (SSE_REG_P (r))
-    {
-      if (!standard_sse_constant_p (c))
-	FAIL;
-    }
-  else if (FP_REG_P (r))
-    {
-      if (standard_80387_constant_p (c) < 1)
-	FAIL;
-    }
-  else if (MMX_REG_P (r))
+  if ((SSE_REGNO_P (r) && !standard_sse_constant_p (c))
+      || (FP_REGNO_P (r) && standard_80387_constant_p (c) < 1))
     FAIL;
 })
 
@@ -3411,6 +3354,17 @@
     operands[1] = CONST1_RTX (<MODE>mode);
 })
 
+(define_split
+  [(set (match_operand 0 "nonimmediate_operand" "")
+        (match_operand 1 "general_operand" ""))]
+  "reload_completed
+   && (GET_MODE (operands[0]) == TFmode
+       || GET_MODE (operands[0]) == XFmode
+       || GET_MODE (operands[0]) == DFmode)
+   && !(ANY_FP_REG_P (operands[0]) || ANY_FP_REG_P (operands[1]))"
+  [(const_int 0)]
+  "ix86_split_long_move (operands); DONE;")
+
 (define_insn "swapxf"
   [(set (match_operand:XF 0 "register_operand" "+f")
 	(match_operand:XF 1 "register_operand" "+f"))
@@ -16650,7 +16604,7 @@
   [(set (match_operand:SWI 0 "push_operand" "")
 	(match_operand:SWI 1 "memory_operand" ""))
    (match_scratch:SWI 2 "<r>")]
-  "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY
+  "!(TARGET_PUSH_MEMORY || optimize_insn_for_size_p ())
    && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (match_dup 2))])
@@ -16661,7 +16615,7 @@
   [(set (match_operand:SF 0 "push_operand" "")
 	(match_operand:SF 1 "memory_operand" ""))
    (match_scratch:SF 2 "r")]
-  "optimize_insn_for_speed_p () && !TARGET_PUSH_MEMORY
+  "!(TARGET_PUSH_MEMORY || optimize_insn_for_size_p ())
    && !RTX_FRAME_RELATED_P (peep2_next_insn (0))"
   [(set (match_dup 2) (match_dup 1))
    (set (match_dup 0) (match_dup 2))])
@@ -16813,7 +16767,7 @@
                      [(match_dup 0)
                       (match_operand:SI 1 "memory_operand" "")]))
               (clobber (reg:CC FLAGS_REG))])]
-  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY"
+  "!(TARGET_READ_MODIFY || optimize_insn_for_size_p ())"
   [(set (match_dup 2) (match_dup 1))
    (parallel [(set (match_dup 0)
                    (match_op_dup 3 [(match_dup 0) (match_dup 2)]))
@@ -16826,7 +16780,7 @@
                      [(match_operand:SI 1 "memory_operand" "")
                       (match_dup 0)]))
               (clobber (reg:CC FLAGS_REG))])]
-  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY"
+  "!(TARGET_READ_MODIFY || optimize_insn_for_size_p ())"
   [(set (match_dup 2) (match_dup 1))
    (parallel [(set (match_dup 0)
                    (match_op_dup 3 [(match_dup 2) (match_dup 0)]))
@@ -16879,7 +16833,7 @@
                      [(match_dup 0)
                       (match_operand:SI 1 "nonmemory_operand" "")]))
               (clobber (reg:CC FLAGS_REG))])]
-  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE
+  "!(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
    /* Do not split stack checking probes.  */
    && GET_CODE (operands[3]) != IOR && operands[1] != const0_rtx"
   [(set (match_dup 2) (match_dup 0))
@@ -16895,7 +16849,7 @@
                      [(match_operand:SI 1 "nonmemory_operand" "")
                       (match_dup 0)]))
               (clobber (reg:CC FLAGS_REG))])]
-  "optimize_insn_for_speed_p () && ! TARGET_READ_MODIFY_WRITE
+  "!(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
    /* Do not split stack checking probes.  */
    && GET_CODE (operands[3]) != IOR && operands[1] != const0_rtx"
   [(set (match_dup 2) (match_dup 0))
Index: constraints.md
===================================================================
--- constraints.md	(revision 174435)
+++ constraints.md	(working copy)
@@ -108,7 +108,9 @@
  "@internal Any MMX register, when inter-unit moves are enabled.")
 
 (define_register_constraint "Yd"
- "TARGET_INTEGER_DFMODE_MOVES ? GENERAL_REGS : NO_REGS"
+ "(TARGET_64BIT
+   || (TARGET_INTEGER_DFMODE_MOVES && optimize_function_for_speed_p (cfun)))
+  ? GENERAL_REGS : NO_REGS"
  "@internal Any integer register when integer DFmode moves are enabled.")
 
 (define_register_constraint "Yx"
Index: i386.c
===================================================================
--- i386.c	(revision 174435)
+++ i386.c	(working copy)
@@ -3947,13 +3947,6 @@ ix86_option_override_internal (bool main
   if (!TARGET_80387)
     target_flags |= MASK_NO_FANCY_MATH_387;
 
-  /* On 32bit targets, avoid moving DFmode values in
-     integer registers when optimizing for size.  */
-  if (TARGET_64BIT)
-    target_flags |= TARGET_INTEGER_DFMODE_MOVES;
-  else if (optimize_size)
-    target_flags &= ~TARGET_INTEGER_DFMODE_MOVES;
-
   /* Turn on MMX builtins for -msse.  */
   if (TARGET_SSE)
     {

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2011-05-30 20:55 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-05-31  5:36 [PATCH, i386]: Avoid partial memory stalls for FP moves and related FP immediate cleanups Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).