public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* RFA: patch to fix PR19398
@ 2012-11-19  2:27 Vladimir Makarov
  2012-11-19  7:55 ` Uros Bizjak
  0 siblings, 1 reply; 3+ messages in thread
From: Vladimir Makarov @ 2012-11-19  2:27 UTC (permalink / raw)
  To: GCC Patches; +Cc: Uros Bizjak

[-- Attachment #1: Type: text/plain, Size: 526 bytes --]

The following patch fixes

http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19398

Uros, there is i386.md part for which I need an approval.  Without this 
change, GCC will still generate the same code even if LRA uses an 
alternative with 'm' constraint.

2012-11-18  Vladimir Makarov  <vmakarov@redhat.com>

         PR target/19398
         * lra-constraints.c (process_alt_operands): Discourage reloads
         through secodnary memory.
         * config/i386/i386.md (fix_trunc?f?i_sse): Remove peephole2
         patterns.


[-- Attachment #2: pr19398.patch --]
[-- Type: text/plain, Size: 2462 bytes --]

Index: lra-constraints.c
===================================================================
--- lra-constraints.c	(revision 193567)
+++ lra-constraints.c	(working copy)
@@ -1942,6 +1942,19 @@
 	      if (no_regs_p && REG_P (op))
 		reject++;
 
+#ifdef SECONDARY_MEMORY_NEEDED
+	      /* If reload requires moving value through secondary
+		 memory, it will need one more insn at least.  */
+	      if (this_alternative != NO_REGS 
+		  && REG_P (op) && (cl = get_reg_class (REGNO (op))) != NO_REGS
+		  && ((curr_static_id->operand[nop].type != OP_OUT
+		       && SECONDARY_MEMORY_NEEDED (cl, this_alternative,
+						   GET_MODE (op)))
+		      || (curr_static_id->operand[nop].type != OP_IN
+			  && SECONDARY_MEMORY_NEEDED (this_alternative, cl,
+						      GET_MODE (op)))))
+		losers++;
+#endif
 	      /* Input reloads can be inherited more often than output
 		 reloads can be removed, so penalize output
 		 reloads.  */
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 193557)
+++ config/i386/i386.md	(working copy)
@@ -1875,7 +1875,7 @@
 	      (const_string "OI")))])
 
 (define_insn "*movti_internal_rex64"
-  [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,o  ,x,x ,m")
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=!r ,!o  ,x,x ,m")
 	(match_operand:TI 1 "general_operand"      "riFo,riF,C,xm,x"))]
   "TARGET_64BIT && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
@@ -4503,23 +4503,6 @@
    && peep2_reg_dead_p (2, operands[0])"
   [(set (match_dup 2) (fix:SWI48x (match_dup 1)))])
 
-;; Avoid vector decoded forms of the instruction.
-(define_peephole2
-  [(match_scratch:DF 2 "x")
-   (set (match_operand:SWI48x 0 "register_operand")
-	(fix:SWI48x (match_operand:DF 1 "memory_operand")))]
-  "TARGET_SSE2 && TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ()"
-  [(set (match_dup 2) (match_dup 1))
-   (set (match_dup 0) (fix:SWI48x (match_dup 2)))])
-
-(define_peephole2
-  [(match_scratch:SF 2 "x")
-   (set (match_operand:SWI48x 0 "register_operand")
-	(fix:SWI48x (match_operand:SF 1 "memory_operand")))]
-  "TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ()"
-  [(set (match_dup 2) (match_dup 1))
-   (set (match_dup 0) (fix:SWI48x (match_dup 2)))])
-
 (define_insn_and_split "fix_trunc<mode>_fisttp_i387_1"
   [(set (match_operand:SWI248x 0 "nonimmediate_operand")
 	(fix:SWI248x (match_operand 1 "register_operand")))]

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: RFA: patch to fix PR19398
  2012-11-19  2:27 RFA: patch to fix PR19398 Vladimir Makarov
@ 2012-11-19  7:55 ` Uros Bizjak
  2012-11-20 18:09   ` Uros Bizjak
  0 siblings, 1 reply; 3+ messages in thread
From: Uros Bizjak @ 2012-11-19  7:55 UTC (permalink / raw)
  To: Vladimir Makarov; +Cc: GCC Patches

On Mon, Nov 19, 2012 at 2:45 AM, Vladimir Makarov <vmakarov@redhat.com> wrote:
> The following patch fixes
>
> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19398
>
> Uros, there is i386.md part for which I need an approval.  Without this
> change, GCC will still generate the same code even if LRA uses an
> alternative with 'm' constraint.
>
> 2012-11-18  Vladimir Makarov  <vmakarov@redhat.com>
>
>         PR target/19398
>         * lra-constraints.c (process_alt_operands): Discourage reloads
>         through secodnary memory.
>         * config/i386/i386.md (fix_trunc?f?i_sse): Remove peephole2
>         patterns.

Thanks!

Please note that i386.md change is not correct, it is peephole2 with
"Shorten x87->SSE reload sequences ..." comment that is not effective
anymore with your patch and should now be removed. The peephole2s that
your patch removes undo LRA transformation for targets that *do not*
benefit from MEM->REG operation for this particular FIX RTX. (Also,
please note that your patch includes movti_internal_rex64 change that
was already reverted due to better fix).

Please remove mentioned peephole2 instead. The test from the PR will
show effects of LRA change for all targets, other than core2i7_64, k8
and generic 64bit targets.

The patch that removes mentioned peephole2 from i386.md is pre-approved.

Thanks,
Uros.


>

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: RFA: patch to fix PR19398
  2012-11-19  7:55 ` Uros Bizjak
@ 2012-11-20 18:09   ` Uros Bizjak
  0 siblings, 0 replies; 3+ messages in thread
From: Uros Bizjak @ 2012-11-20 18:09 UTC (permalink / raw)
  To: Vladimir Makarov; +Cc: GCC Patches

[-- Attachment #1: Type: text/plain, Size: 2661 bytes --]

On Mon, Nov 19, 2012 at 8:55 AM, Uros Bizjak <ubizjak@gmail.com> wrote:
>> The following patch fixes
>>
>> http://gcc.gnu.org/bugzilla/show_bug.cgi?id=19398
>>
>> Uros, there is i386.md part for which I need an approval.  Without this
>> change, GCC will still generate the same code even if LRA uses an
>> alternative with 'm' constraint.
>>
>> 2012-11-18  Vladimir Makarov  <vmakarov@redhat.com>
>>
>>         PR target/19398
>>         * lra-constraints.c (process_alt_operands): Discourage reloads
>>         through secodnary memory.
>>         * config/i386/i386.md (fix_trunc?f?i_sse): Remove peephole2
>>         patterns.
>
> Thanks!
>
> Please note that i386.md change is not correct, it is peephole2 with
> "Shorten x87->SSE reload sequences ..." comment that is not effective
> anymore with your patch and should now be removed. The peephole2s that
> your patch removes undo LRA transformation for targets that *do not*
> benefit from MEM->REG operation for this particular FIX RTX. (Also,
> please note that your patch includes movti_internal_rex64 change that
> was already reverted due to better fix).
>
> Please remove mentioned peephole2 instead. The test from the PR will
> show effects of LRA change for all targets, other than core2i7_64, k8
> and generic 64bit targets.
>
> The patch that removes mentioned peephole2 from i386.md is pre-approved.

I have merged my x86 target patch that removes correct peephole2 and
associated defines with Vlad's patch and committed everything
(including testcase) to mainline SVN.

Also, the patch includes macroization of a couple of RTXes in this area.

2012-11-20  Uros Bizjak  <ubizjak@gmail.com>

	* config/i386/i386.md (fix_trunc<MODEF:mode><SWI48:mode>_sse): Macroize
	insn from fix_trunc<mode>{si,di}_sse using SWI48 mode iterator.
	(peephole2 to avoid vector decoded forms): Macroize peephole2
	using MODEF mode iterator.  Use SWI48 mode iterator instead of SWI48x.

2012-11-20  Uros Bizjak  <ubizjak@gmail.com>

	PR target/19398
	* config/i386/i386.md
	(peephole2 to shorten x87->SSE reload sequences): Remove peephole2.
	* config/i386/i386.h (enum ix86_tune_indices)
	<IX86_TUNE_SHORTEN_X87_SSE>: Remove.
	(TARGET_SHORTEN_X87_SSE): Remove.
	* config/i386/i386.c (initial_ix86_tune_features): Update.

2012-11-20  Vladimir Makarov  <vmakarov@redhat.com>

	PR target/19398
	* lra-constraints.c (process_alt_operands): Discourage reloads
	through secodnary memory.

testsuite/ChangeLog:

2012-11-20  Uros Bizjak  <ubizjak@gmail.com>

	PR target/19398
	* gcc.target/i386/pr19398.c: New test.

Bootstrapped and regression tested on x86_64-pc-linux-gnu {,-m32} and
committed to mainline SVN.

Uros.

[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 6271 bytes --]

Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 193625)
+++ config/i386/i386.c	(working copy)
@@ -1855,9 +1855,6 @@ static unsigned int initial_ix86_tune_features[X86
   /* X86_TUNE_EXT_80387_CONSTANTS */
   m_PPRO | m_P4_NOCONA | m_CORE2I7 | m_ATOM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC,
 
-  /* X86_TUNE_SHORTEN_X87_SSE */
-  ~m_K8,
-
   /* X86_TUNE_AVOID_VECTOR_DECODE */
   m_CORE2I7_64 | m_K8 | m_GENERIC64,
 
Index: config/i386/i386.h
===================================================================
--- config/i386/i386.h	(revision 193625)
+++ config/i386/i386.h	(working copy)
@@ -314,7 +314,6 @@ enum ix86_tune_indices {
   X86_TUNE_PAD_RETURNS,
   X86_TUNE_PAD_SHORT_FUNCTION,
   X86_TUNE_EXT_80387_CONSTANTS,
-  X86_TUNE_SHORTEN_X87_SSE,
   X86_TUNE_AVOID_VECTOR_DECODE,
   X86_TUNE_PROMOTE_HIMODE_IMUL,
   X86_TUNE_SLOW_IMUL_IMM32_MEM,
@@ -408,7 +407,6 @@ extern unsigned char ix86_tune_features[X86_TUNE_L
 	ix86_tune_features[X86_TUNE_PAD_SHORT_FUNCTION]
 #define TARGET_EXT_80387_CONSTANTS \
 	ix86_tune_features[X86_TUNE_EXT_80387_CONSTANTS]
-#define TARGET_SHORTEN_X87_SSE	ix86_tune_features[X86_TUNE_SHORTEN_X87_SSE]
 #define TARGET_AVOID_VECTOR_DECODE \
 	ix86_tune_features[X86_TUNE_AVOID_VECTOR_DECODE]
 #define TARGET_TUNE_PROMOTE_HIMODE_IMUL \
Index: config/i386/i386.md
===================================================================
--- config/i386/i386.md	(revision 193625)
+++ config/i386/i386.md	(working copy)
@@ -4465,61 +4465,35 @@
   "operands[2] = gen_reg_rtx (SImode);")
 
 ;; When SSE is available, it is always faster to use it!
-(define_insn "fix_trunc<mode>di_sse"
-  [(set (match_operand:DI 0 "register_operand" "=r,r")
-	(fix:DI (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))]
-  "TARGET_64BIT && SSE_FLOAT_MODE_P (<MODE>mode)
+(define_insn "fix_trunc<MODEF:mode><SWI48:mode>_sse"
+  [(set (match_operand:SWI48 0 "register_operand" "=r,r")
+	(fix:SWI48 (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))]
+  "SSE_FLOAT_MODE_P (<MODEF:MODE>mode)
    && (!TARGET_FISTTP || TARGET_SSE_MATH)"
-  "%vcvtt<ssemodesuffix>2si{q}\t{%1, %0|%0, %1}"
+  "%vcvtt<MODEF:ssemodesuffix>2si<SWI48:rex64suffix>\t{%1, %0|%0, %1}"
   [(set_attr "type" "sseicvt")
    (set_attr "prefix" "maybe_vex")
-   (set_attr "prefix_rex" "1")
-   (set_attr "mode" "<MODE>")
+   (set (attr "prefix_rex")
+	(if_then_else
+	  (match_test "<SWI48:MODE>mode == DImode")
+	  (const_string "1")
+	  (const_string "*")))
+   (set_attr "mode" "<MODEF:MODE>")
    (set_attr "athlon_decode" "double,vector")
    (set_attr "amdfam10_decode" "double,double")
    (set_attr "bdver1_decode" "double,double")])
 
-(define_insn "fix_trunc<mode>si_sse"
-  [(set (match_operand:SI 0 "register_operand" "=r,r")
-	(fix:SI (match_operand:MODEF 1 "nonimmediate_operand" "x,m")))]
-  "SSE_FLOAT_MODE_P (<MODE>mode)
-   && (!TARGET_FISTTP || TARGET_SSE_MATH)"
-  "%vcvtt<ssemodesuffix>2si\t{%1, %0|%0, %1}"
-  [(set_attr "type" "sseicvt")
-   (set_attr "prefix" "maybe_vex")
-   (set_attr "mode" "<MODE>")
-   (set_attr "athlon_decode" "double,vector")
-   (set_attr "amdfam10_decode" "double,double")
-   (set_attr "bdver1_decode" "double,double")])
-
-;; Shorten x87->SSE reload sequences of fix_trunc?f?i_sse patterns.
-(define_peephole2
-  [(set (match_operand:MODEF 0 "register_operand")
-	(match_operand:MODEF 1 "memory_operand"))
-   (set (match_operand:SWI48x 2 "register_operand")
-	(fix:SWI48x (match_dup 0)))]
-  "TARGET_SHORTEN_X87_SSE
-   && !(TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ())
-   && peep2_reg_dead_p (2, operands[0])"
-  [(set (match_dup 2) (fix:SWI48x (match_dup 1)))])
-
 ;; Avoid vector decoded forms of the instruction.
 (define_peephole2
-  [(match_scratch:DF 2 "x")
-   (set (match_operand:SWI48x 0 "register_operand")
-	(fix:SWI48x (match_operand:DF 1 "memory_operand")))]
-  "TARGET_SSE2 && TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ()"
+  [(match_scratch:MODEF 2 "x")
+   (set (match_operand:SWI48 0 "register_operand")
+	(fix:SWI48 (match_operand:MODEF 1 "memory_operand")))]
+  "TARGET_AVOID_VECTOR_DECODE
+   && SSE_FLOAT_MODE_P (<MODEF:MODE>mode)
+   && optimize_insn_for_speed_p ()"
   [(set (match_dup 2) (match_dup 1))
-   (set (match_dup 0) (fix:SWI48x (match_dup 2)))])
+   (set (match_dup 0) (fix:SWI48 (match_dup 2)))])
 
-(define_peephole2
-  [(match_scratch:SF 2 "x")
-   (set (match_operand:SWI48x 0 "register_operand")
-	(fix:SWI48x (match_operand:SF 1 "memory_operand")))]
-  "TARGET_AVOID_VECTOR_DECODE && optimize_insn_for_speed_p ()"
-  [(set (match_dup 2) (match_dup 1))
-   (set (match_dup 0) (fix:SWI48x (match_dup 2)))])
-
 (define_insn_and_split "fix_trunc<mode>_fisttp_i387_1"
   [(set (match_operand:SWI248x 0 "nonimmediate_operand")
 	(fix:SWI248x (match_operand 1 "register_operand")))]
Index: lra-constraints.c
===================================================================
--- lra-constraints.c	(revision 193625)
+++ lra-constraints.c	(working copy)
@@ -1942,6 +1942,19 @@ process_alt_operands (int only_alternative)
 	      if (no_regs_p && REG_P (op))
 		reject++;
 
+#ifdef SECONDARY_MEMORY_NEEDED
+	      /* If reload requires moving value through secondary
+		 memory, it will need one more insn at least.  */
+	      if (this_alternative != NO_REGS 
+		  && REG_P (op) && (cl = get_reg_class (REGNO (op))) != NO_REGS
+		  && ((curr_static_id->operand[nop].type != OP_OUT
+		       && SECONDARY_MEMORY_NEEDED (cl, this_alternative,
+						   GET_MODE (op)))
+		      || (curr_static_id->operand[nop].type != OP_IN
+			  && SECONDARY_MEMORY_NEEDED (this_alternative, cl,
+						      GET_MODE (op)))))
+		losers++;
+#endif
 	      /* Input reloads can be inherited more often than output
 		 reloads can be removed, so penalize output
 		 reloads.  */
Index: testsuite/gcc.target/i386/pr19398.c
===================================================================
--- testsuite/gcc.target/i386/pr19398.c	(revision 0)
+++ testsuite/gcc.target/i386/pr19398.c	(working copy)
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-options "-Os -msse -mno-sse3 -mfpmath=387" } */
+
+int test (float a)
+{
+  return (a * a);
+}
+
+/* { dg-final { scan-assembler-not "cvttss2si\[^\\n\]*%xmm" } } */

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2012-11-20 18:09 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-11-19  2:27 RFA: patch to fix PR19398 Vladimir Makarov
2012-11-19  7:55 ` Uros Bizjak
2012-11-20 18:09   ` Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).