public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH, i386]: Macroize min/max patterns some more
@ 2011-09-19 18:30 Uros Bizjak
  2011-09-21  8:30 ` Jakub Jelinek
  0 siblings, 1 reply; 3+ messages in thread
From: Uros Bizjak @ 2011-09-19 18:30 UTC (permalink / raw)
  To: gcc-patches; +Cc: Jakub Jelinek

[-- Attachment #1: Type: text/plain, Size: 618 bytes --]

Hello!

2011-09-19  Uros Bizjak  <ubizjak@gmail.com>

	* config/i386/i386.md (maxmin): New code iterator.
	* config/i386/sse.md (<maxmin:code><mode>3): Macroize expander
	from <umaxmin:code><mode>3 and <smaxmin:code><mode>3 using maxmin
	code iterator.
	(*avx2_<maxmin:code><mode>3): Macroize isn from
	*avx2_<umaxmin:code><mode>3 and *avx2_<smaxmin:code><mode>3 using
	maxmin code iterator.
	(<smaxmin:code><VI124_128:mode>3): Merge with <smaxmin:code>v8hi3.
	(<umaxmin:code><VI124_128:mode>3): Merge with umaxv4si3 and
	<umaxmin:code>v16qi3.

Tested on x86_64-pc-linux-gnu {,-m32}, committed to mainline SVN.

Uros.

[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 9478 bytes --]

Index: i386.md
===================================================================
--- i386.md	(revision 178951)
+++ i386.md	(working copy)
@@ -751,6 +751,9 @@
 (define_code_attr comm [(plus "%") (ss_plus "%") (us_plus "%")
 			(minus "") (ss_minus "") (us_minus "")])
 
+;; Mapping of max and min
+(define_code_iterator maxmin [smax smin umax umin])
+
 ;; Mapping of signed max and min
 (define_code_iterator smaxmin [smax smin])
 
Index: sse.md
===================================================================
--- sse.md	(revision 178951)
+++ sse.md	(working copy)
@@ -5805,9 +5805,10 @@
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "<sseinsnmode>")])
 
+
 (define_expand "<code><mode>3"
   [(set (match_operand:VI124_256 0 "register_operand" "")
-	(umaxmin:VI124_256
+	(maxmin:VI124_256
 	  (match_operand:VI124_256 1 "nonimmediate_operand" "")
 	  (match_operand:VI124_256 2 "nonimmediate_operand" "")))]
   "TARGET_AVX2"
@@ -5815,7 +5816,7 @@
 
 (define_insn "*avx2_<code><mode>3"
   [(set (match_operand:VI124_256 0 "register_operand" "=x")
-	(umaxmin:VI124_256
+	(maxmin:VI124_256
 	  (match_operand:VI124_256 1 "nonimmediate_operand" "%x")
 	  (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))]
   "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
@@ -5826,25 +5827,75 @@
    (set_attr "mode" "OI")])
 
 (define_expand "<code><mode>3"
-  [(set (match_operand:VI124_256 0 "register_operand" "")
-	(smaxmin:VI124_256
-	  (match_operand:VI124_256 1 "nonimmediate_operand" "")
-	  (match_operand:VI124_256 2 "nonimmediate_operand" "")))]
-  "TARGET_AVX2"
-  "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
+  [(set (match_operand:VI8_AVX2 0 "register_operand" "")
+	(maxmin:VI8_AVX2 (match_operand:VI8_AVX2 1 "register_operand" "")
+			 (match_operand:VI8_AVX2 2 "register_operand" "")))]
+  "TARGET_SSE4_2"
+{
+  enum rtx_code code;
+  rtx xops[6];
+  bool ok;
 
-(define_insn "*avx2_<code><mode>3"
-  [(set (match_operand:VI124_256 0 "register_operand" "=x")
-	(smaxmin:VI124_256
-	  (match_operand:VI124_256 1 "nonimmediate_operand" "%x")
-	  (match_operand:VI124_256 2 "nonimmediate_operand" "xm")))]
-  "TARGET_AVX2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
-  "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "type" "sseiadd")
-   (set_attr "prefix_extra" "1")
-   (set_attr "prefix" "vex")
-   (set_attr "mode" "OI")])
+  xops[0] = operands[0];
 
+  if (<CODE> == SMAX || <CODE> == UMAX)
+    {
+      xops[1] = operands[1];
+      xops[2] = operands[2];
+    }
+  else
+    {
+      xops[1] = operands[2];
+      xops[2] = operands[1];
+    }
+
+  code = (<CODE> == UMAX || <CODE> == UMIN) ? GTU : GT;
+
+  xops[3] = gen_rtx_fmt_ee (code, VOIDmode, operands[1], operands[2]);
+  xops[4] = operands[1];
+  xops[5] = operands[2];
+
+  ok = ix86_expand_int_vcond (xops);
+  gcc_assert (ok);
+  DONE;
+})
+
+(define_expand "<code><mode>3"
+  [(set (match_operand:VI124_128 0 "register_operand" "")
+	(smaxmin:VI124_128 (match_operand:VI124_128 1 "register_operand" "")
+			   (match_operand:VI124_128 2 "register_operand" "")))]
+  "TARGET_SSE2"
+{
+  if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
+    ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
+  else
+    {
+      rtx xops[6];
+      bool ok;
+
+      xops[0] = operands[0];
+
+      if (<CODE> == SMAX)
+	{
+	  xops[1] = operands[1];
+	  xops[2] = operands[2];
+	}
+      else
+	{
+	  xops[1] = operands[2];
+	  xops[2] = operands[1];
+	}
+
+      xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
+      xops[4] = operands[1];
+      xops[5] = operands[2];
+
+      ok = ix86_expand_int_vcond (xops);
+      gcc_assert (ok);
+      DONE;
+    }
+})
+
 (define_insn "*sse4_1_<code><mode>3"
   [(set (match_operand:VI14_128 0 "register_operand" "=x,x")
 	(smaxmin:VI14_128
@@ -5877,58 +5928,50 @@
    (set_attr "mode" "TI")])
 
 (define_expand "<code><mode>3"
-  [(set (match_operand:VI14_128 0 "register_operand" "")
-	(smaxmin:VI14_128 (match_operand:VI14_128 1 "register_operand" "")
-			  (match_operand:VI14_128 2 "register_operand" "")))]
+  [(set (match_operand:VI124_128 0 "register_operand" "")
+	(umaxmin:VI124_128 (match_operand:VI124_128 1 "register_operand" "")
+			   (match_operand:VI124_128 2 "register_operand" "")))]
   "TARGET_SSE2"
 {
-  if (TARGET_SSE4_1)
+  if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
     ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);
+  else if (<CODE> == UMAX && <MODE>mode == V8HImode)
+    {
+      rtx op0 = operands[0], op2 = operands[2], op3 = op0;
+      if (rtx_equal_p (op3, op2))
+	op3 = gen_reg_rtx (V8HImode);
+      emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
+      emit_insn (gen_addv8hi3 (op0, op3, op2));
+      DONE;
+    }
   else
     {
       rtx xops[6];
       bool ok;
 
       xops[0] = operands[0];
-      xops[1] = operands[<CODE> == SMAX ? 1 : 2];
-      xops[2] = operands[<CODE> == SMAX ? 2 : 1];
-      xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
+
+      if (<CODE> == UMAX)
+	{
+	  xops[1] = operands[1];
+	  xops[2] = operands[2];
+	}
+      else
+	{
+	  xops[1] = operands[2];
+	  xops[2] = operands[1];
+	}
+
+      xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
       xops[4] = operands[1];
       xops[5] = operands[2];
+
       ok = ix86_expand_int_vcond (xops);
       gcc_assert (ok);
       DONE;
     }
 })
 
-(define_expand "<code>v8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "")
-	(smaxmin:V8HI
-	  (match_operand:V8HI 1 "nonimmediate_operand" "")
-	  (match_operand:V8HI 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE2"
-  "ix86_fixup_binary_operands_no_copy (<CODE>, V8HImode, operands);")
-
-(define_expand "<code><mode>3"
-  [(set (match_operand:VI8_AVX2 0 "register_operand" "")
-	(smaxmin:VI8_AVX2 (match_operand:VI8_AVX2 1 "register_operand" "")
-			  (match_operand:VI8_AVX2 2 "register_operand" "")))]
-  "TARGET_SSE4_2"
-{
-  rtx xops[6];
-  bool ok;
-
-  xops[0] = operands[0];
-  xops[1] = operands[<CODE> == SMAX ? 1 : 2];
-  xops[2] = operands[<CODE> == SMAX ? 2 : 1];
-  xops[3] = gen_rtx_GT (VOIDmode, operands[1], operands[2]);
-  xops[4] = operands[1];
-  xops[5] = operands[2];
-  ok = ix86_expand_int_vcond (xops);
-  gcc_assert (ok);
-  DONE;
-})
-
 (define_insn "*sse4_1_<code><mode>3"
   [(set (match_operand:VI24_128 0 "register_operand" "=x,x")
 	(umaxmin:VI24_128
@@ -5960,103 +6003,6 @@
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "TI")])
 
-(define_expand "<code>v16qi3"
-  [(set (match_operand:V16QI 0 "register_operand" "")
-	(umaxmin:V16QI
-	  (match_operand:V16QI 1 "nonimmediate_operand" "")
-	  (match_operand:V16QI 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE2"
-  "ix86_fixup_binary_operands_no_copy (<CODE>, V16QImode, operands);")
-
-(define_expand "umaxv8hi3"
-  [(set (match_operand:V8HI 0 "register_operand" "")
-	(umax:V8HI (match_operand:V8HI 1 "register_operand" "")
-		   (match_operand:V8HI 2 "nonimmediate_operand" "")))]
-  "TARGET_SSE2"
-{
-  if (TARGET_SSE4_1)
-    ix86_fixup_binary_operands_no_copy (UMAX, V8HImode, operands);
-  else
-    {
-      rtx op0 = operands[0], op2 = operands[2], op3 = op0;
-      if (rtx_equal_p (op3, op2))
-	op3 = gen_reg_rtx (V8HImode);
-      emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
-      emit_insn (gen_addv8hi3 (op0, op3, op2));
-      DONE;
-    }
-})
-
-(define_expand "umaxv4si3"
-  [(set (match_operand:V4SI 0 "register_operand" "")
-	(umax:V4SI (match_operand:V4SI 1 "register_operand" "")
-		   (match_operand:V4SI 2 "register_operand" "")))]
-  "TARGET_SSE2"
-{
-  if (TARGET_SSE4_1)
-    ix86_fixup_binary_operands_no_copy (UMAX, V4SImode, operands);
-  else
-    {
-      rtx xops[6];
-      bool ok;
-
-      xops[0] = operands[0];
-      xops[1] = operands[1];
-      xops[2] = operands[2];
-      xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
-      xops[4] = operands[1];
-      xops[5] = operands[2];
-      ok = ix86_expand_int_vcond (xops);
-      gcc_assert (ok);
-      DONE;
-    }
-})
-
-(define_expand "umin<mode>3"
-  [(set (match_operand:VI24_128 0 "register_operand" "")
-	(umin:VI24_128 (match_operand:VI24_128 1 "register_operand" "")
-		       (match_operand:VI24_128 2 "register_operand" "")))]
-  "TARGET_SSE2"
-{
-  if (TARGET_SSE4_1)
-    ix86_fixup_binary_operands_no_copy (UMIN, <MODE>mode, operands);
-  else
-    {
-      rtx xops[6];
-      bool ok;
-
-      xops[0] = operands[0];
-      xops[1] = operands[2];
-      xops[2] = operands[1];
-      xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
-      xops[4] = operands[1];
-      xops[5] = operands[2];
-      ok = ix86_expand_int_vcond (xops);
-      gcc_assert (ok);
-      DONE;
-    }
-})
-
-(define_expand "<code><mode>3"
-  [(set (match_operand:VI8_AVX2 0 "register_operand" "")
-	(umaxmin:VI8_AVX2 (match_operand:VI8_AVX2 1 "register_operand" "")
-			  (match_operand:VI8_AVX2 2 "register_operand" "")))]
-  "TARGET_SSE4_2"
-{
-  rtx xops[6];
-  bool ok;
-
-  xops[0] = operands[0];
-  xops[1] = operands[<CODE> == UMAX ? 1 : 2];
-  xops[2] = operands[<CODE> == UMAX ? 2 : 1];
-  xops[3] = gen_rtx_GTU (VOIDmode, operands[1], operands[2]);
-  xops[4] = operands[1];
-  xops[5] = operands[2];
-  ok = ix86_expand_int_vcond (xops);
-  gcc_assert (ok);
-  DONE;
-})
-
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Parallel integral comparisons

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH, i386]: Macroize min/max patterns some more
  2011-09-19 18:30 [PATCH, i386]: Macroize min/max patterns some more Uros Bizjak
@ 2011-09-21  8:30 ` Jakub Jelinek
  2011-09-21  8:50   ` Uros Bizjak
  0 siblings, 1 reply; 3+ messages in thread
From: Jakub Jelinek @ 2011-09-21  8:30 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches

On Mon, Sep 19, 2011 at 07:10:11PM +0200, Uros Bizjak wrote:
> 2011-09-19  Uros Bizjak  <ubizjak@gmail.com>
> 
> 	* config/i386/i386.md (maxmin): New code iterator.
> 	* config/i386/sse.md (<maxmin:code><mode>3): Macroize expander
> 	from <umaxmin:code><mode>3 and <smaxmin:code><mode>3 using maxmin
> 	code iterator.
> 	(*avx2_<maxmin:code><mode>3): Macroize isn from
> 	*avx2_<umaxmin:code><mode>3 and *avx2_<smaxmin:code><mode>3 using
> 	maxmin code iterator.
> 	(<smaxmin:code><VI124_128:mode>3): Merge with <smaxmin:code>v8hi3.
> 	(<umaxmin:code><VI124_128:mode>3): Merge with umaxv4si3 and
> 	<umaxmin:code>v16qi3.
> 
> Tested on x86_64-pc-linux-gnu {,-m32}, committed to mainline SVN.

This regressed gcc.dg/vect/vect-reduc-10.c code quality with -msse2,
-	psubusw	(%rax), %xmm0
-	paddw	(%rax), %xmm0
+	movdqa	(%rax), %xmm1
 	addq	$16, %rax
 	cmpq	$aus+2048, %rax
+	psubusw	%xmm1, %xmm0
+	paddw	%xmm1, %xmm0

the problem is that the two expanders force arguments into registers
unconditionally, while previously in some cases they were
nonimmediate_operand instead of register_operand predicated.
This patch fixes that by always using nonimmediate_operand and adding
force_reg where needed.

2011-09-21  Jakub Jelinek  <jakub@redhat.com>

	* config/i386/sse.md (<code><mode>3 smaxmin:VI124_128 expander): Use
	nonimmediate_operand instead of register_operand predicate for operands
	1 and 2, force them into registers if expanding them as comparison.
	(<code><mode>3 umaxmin:VI124_128 expander): Similarly.  For UMAX
	V8HImode force into register just operand 1.

--- gcc/config/i386/sse.md.jj	2011-09-21 09:04:21.000000000 +0200
+++ gcc/config/i386/sse.md	2011-09-21 09:37:43.000000000 +0200
@@ -5997,8 +5997,8 @@ (define_expand "<code><mode>3"
 
 (define_expand "<code><mode>3"
   [(set (match_operand:VI124_128 0 "register_operand" "")
-	(smaxmin:VI124_128 (match_operand:VI124_128 1 "register_operand" "")
-			   (match_operand:VI124_128 2 "register_operand" "")))]
+	(smaxmin:VI124_128 (match_operand:VI124_128 1 "nonimmediate_operand" "")
+			   (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
   "TARGET_SSE2"
 {
   if (TARGET_SSE4_1 || <MODE>mode == V8HImode)
@@ -6009,6 +6009,8 @@ (define_expand "<code><mode>3"
       bool ok;
 
       xops[0] = operands[0];
+      operands[1] = force_reg (<MODE>mode, operands[1]);
+      operands[2] = force_reg (<MODE>mode, operands[2]);
 
       if (<CODE> == SMAX)
 	{
@@ -6064,8 +6066,8 @@ (define_insn "*<code>v8hi3"
 
 (define_expand "<code><mode>3"
   [(set (match_operand:VI124_128 0 "register_operand" "")
-	(umaxmin:VI124_128 (match_operand:VI124_128 1 "register_operand" "")
-			   (match_operand:VI124_128 2 "register_operand" "")))]
+	(umaxmin:VI124_128 (match_operand:VI124_128 1 "nonimmediate_operand" "")
+			   (match_operand:VI124_128 2 "nonimmediate_operand" "")))]
   "TARGET_SSE2"
 {
   if (TARGET_SSE4_1 || <MODE>mode == V16QImode)
@@ -6073,6 +6075,7 @@ (define_expand "<code><mode>3"
   else if (<CODE> == UMAX && <MODE>mode == V8HImode)
     {
       rtx op0 = operands[0], op2 = operands[2], op3 = op0;
+      operands[1] = force_reg (<MODE>mode, operands[1]);
       if (rtx_equal_p (op3, op2))
 	op3 = gen_reg_rtx (V8HImode);
       emit_insn (gen_sse2_ussubv8hi3 (op3, operands[1], op2));
@@ -6084,6 +6087,9 @@ (define_expand "<code><mode>3"
       rtx xops[6];
       bool ok;
 
+      operands[1] = force_reg (<MODE>mode, operands[1]);
+      operands[2] = force_reg (<MODE>mode, operands[2]);
+
       xops[0] = operands[0];
 
       if (<CODE> == UMAX)


	Jakub

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH, i386]: Macroize min/max patterns some more
  2011-09-21  8:30 ` Jakub Jelinek
@ 2011-09-21  8:50   ` Uros Bizjak
  0 siblings, 0 replies; 3+ messages in thread
From: Uros Bizjak @ 2011-09-21  8:50 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

On Wed, Sep 21, 2011 at 9:45 AM, Jakub Jelinek <jakub@redhat.com> wrote:

>>       * config/i386/i386.md (maxmin): New code iterator.
>>       * config/i386/sse.md (<maxmin:code><mode>3): Macroize expander
>>       from <umaxmin:code><mode>3 and <smaxmin:code><mode>3 using maxmin
>>       code iterator.
>>       (*avx2_<maxmin:code><mode>3): Macroize isn from
>>       *avx2_<umaxmin:code><mode>3 and *avx2_<smaxmin:code><mode>3 using
>>       maxmin code iterator.
>>       (<smaxmin:code><VI124_128:mode>3): Merge with <smaxmin:code>v8hi3.
>>       (<umaxmin:code><VI124_128:mode>3): Merge with umaxv4si3 and
>>       <umaxmin:code>v16qi3.
>>
>> Tested on x86_64-pc-linux-gnu {,-m32}, committed to mainline SVN.
>
> This regressed gcc.dg/vect/vect-reduc-10.c code quality with -msse2,
> -       psubusw (%rax), %xmm0
> -       paddw   (%rax), %xmm0
> +       movdqa  (%rax), %xmm1
>        addq    $16, %rax
>        cmpq    $aus+2048, %rax
> +       psubusw %xmm1, %xmm0
> +       paddw   %xmm1, %xmm0
>
> the problem is that the two expanders force arguments into registers
> unconditionally, while previously in some cases they were
> nonimmediate_operand instead of register_operand predicated.
> This patch fixes that by always using nonimmediate_operand and adding
> force_reg where needed.
>
> 2011-09-21  Jakub Jelinek  <jakub@redhat.com>
>
>        * config/i386/sse.md (<code><mode>3 smaxmin:VI124_128 expander): Use
>        nonimmediate_operand instead of register_operand predicate for operands
>        1 and 2, force them into registers if expanding them as comparison.
>        (<code><mode>3 umaxmin:VI124_128 expander): Similarly.  For UMAX
>        V8HImode force into register just operand 1.

OK.

Thanks,
Uros.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2011-09-21  8:11 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-09-19 18:30 [PATCH, i386]: Macroize min/max patterns some more Uros Bizjak
2011-09-21  8:30 ` Jakub Jelinek
2011-09-21  8:50   ` Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).