[RFC PATCH, i386]: Enable post-reload compare elimination pass

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [RFC PATCH, i386]: Enable post-reload compare elimination pass
@ 2017-05-09 16:16 Uros Bizjak
  2017-05-10 14:31 ` Jakub Jelinek
  0 siblings, 1 reply; 25+ messages in thread
From: Uros Bizjak @ 2017-05-09 16:16 UTC (permalink / raw)
  To: gcc-patches; +Cc: Jeff Law, Jakub Jelinek

[-- Attachment #1: Type: text/plain, Size: 1198 bytes --]

Hello!

Attached patch enables post-reload compare elimination pass by
providing expected patterns (duplicates of existing patterns with
setters of reg and flags switched in the parallel) for flag setting
arithmetic instructions.

The merge triggers more than 3000 times during the gcc bootstrap,
mostly in cases where intervening memory load or store prevents
combine from merging the arithmetic insn and the following compare.

Also, some recent linux x86_64 defconfig build results in ~200 merges,
removing ~200 test/cmp insns. Not much, but I think the results still
warrant the pass to be enabled.

2017-05-09  Uros Bizjak  <ubizjak@gmail.com>

    * config/i386/i386-protos.h (ix86_match_ccmode_last): New prototype.
    * config/i386/i386.c (ix86_match_ccmode_1): Rename from
    ix86_match_ccmode.  Add "last" argument.  Make function static inline.
    (ix86_match_ccmode): New function.
    (ix86_match_ccmode_last): Ditto.
    (TARGET_FLAGS_REGNUM): Define.
    * config/i386/i386.md (*add<mode>_2b): New insn pattern.
    (*sub<mode>_2b): Ditto.
    (*and<mode>_2b): Ditto.
    (*<any_or:code><mode>_2b): Ditto.

Patch was bootstrapped and regression tested on x86_64-linux-gnu.

Uros.

[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 6752 bytes --]

Index: i386-protos.h
===================================================================
--- i386-protos.h	(revision 247793)
+++ i386-protos.h	(working copy)
@@ -125,6 +125,7 @@ extern void ix86_split_copysign_const (rtx []);
 extern void ix86_split_copysign_var (rtx []);
 extern bool ix86_unary_operator_ok (enum rtx_code, machine_mode, rtx[]);
 extern bool ix86_match_ccmode (rtx, machine_mode);
+extern bool ix86_match_ccmode_last (rtx, machine_mode);
 extern void ix86_expand_branch (enum rtx_code, rtx, rtx, rtx);
 extern void ix86_expand_setcc (rtx, enum rtx_code, rtx, rtx);
 extern bool ix86_expand_int_movcc (rtx[]);
Index: i386.c
===================================================================
--- i386.c	(revision 247793)
+++ i386.c	(working copy)
@@ -22337,12 +22337,8 @@ ix86_split_copysign_var (rtx operands[])
   emit_insn (gen_rtx_SET (dest, x));
 }
 
-/* Return TRUE or FALSE depending on whether the first SET in INSN
-   has source and destination with matching CC modes, and that the
-   CC mode is at least as constrained as REQ_MODE.  */
-
-bool
-ix86_match_ccmode (rtx insn, machine_mode req_mode)
+static inline bool
+ix86_match_ccmode_1 (rtx insn, machine_mode req_mode, bool last)
 {
   rtx set;
   machine_mode set_mode;
@@ -22349,7 +22345,7 @@ ix86_split_copysign_var (rtx operands[])
 
   set = PATTERN (insn);
   if (GET_CODE (set) == PARALLEL)
-    set = XVECEXP (set, 0, 0);
+    set = XVECEXP (set, 0, last ? XVECLEN (set, 0) - 1 : 0);
   gcc_assert (GET_CODE (set) == SET);
   gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
 
@@ -22393,6 +22389,26 @@ ix86_split_copysign_var (rtx operands[])
   return GET_MODE (SET_SRC (set)) == set_mode;
 }
 
+/* Return TRUE or FALSE depending on whether the first SET in INSN
+   has source and destination with matching CC modes, and that the
+   CC mode is at least as constrained as REQ_MODE.  */
+
+bool
+ix86_match_ccmode (rtx insn, machine_mode req_mode)
+{
+  return ix86_match_ccmode_1 (insn, req_mode, false);
+}
+
+/* Return TRUE or FALSE depending on whether the last SET in INSN
+   has source and destination with matching CC modes, and that the
+   CC mode is at least as constrained as REQ_MODE.  */
+
+bool
+ix86_match_ccmode_last (rtx insn, machine_mode req_mode)
+{
+  return ix86_match_ccmode_1 (insn, req_mode, true);
+}
+
 /* Generate insn patterns to do an integer compare of OPERANDS.  */
 
 static rtx
@@ -52043,6 +52059,8 @@ ix86_run_selftests (void)
 #undef TARGET_ADDRESS_COST
 #define TARGET_ADDRESS_COST ix86_address_cost
 
+#undef TARGET_FLAGS_REGNUM
+#define TARGET_FLAGS_REGNUM FLAGS_REG
 #undef TARGET_FIXED_CONDITION_CODE_REGS
 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
 #undef TARGET_CC_MODES_COMPATIBLE
Index: i386.md
===================================================================
--- i386.md	(revision 247793)
+++ i386.md	(working copy)
@@ -5917,6 +5917,52 @@
 	(const_string "*")))
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*add<mode>_2b"
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>,<r>m,<r>")
+	(plus:SWI
+	  (match_operand:SWI 1 "nonimmediate_operand" "%0,0,<r>")
+	  (match_operand:SWI 2 "<general_operand>" "<g>,<r><i>,0")))
+   (set (reg FLAGS_REG)
+	(compare
+	  (plus:SWI (match_dup 1) (match_dup 2))
+	  (const_int 0)))]
+  "ix86_match_ccmode_last (insn, CCGOCmode)
+   && ix86_binary_operator_ok (PLUS, <MODE>mode, operands)
+   && reload_completed"
+{
+  switch (get_attr_type (insn))
+    {
+    case TYPE_INCDEC:
+      if (operands[2] == const1_rtx)
+	return "inc{<imodesuffix>}\t%0";
+      else
+	{
+	  gcc_assert (operands[2] == constm1_rtx);
+	  return "dec{<imodesuffix>}\t%0";
+	}
+
+    default:
+      if (which_alternative == 2)
+	std::swap (operands[1], operands[2]);
+        
+      gcc_assert (rtx_equal_p (operands[0], operands[1]));
+      if (x86_maybe_negate_const_int (&operands[2], <MODE>mode))
+	return "sub{<imodesuffix>}\t{%2, %0|%0, %2}";
+
+      return "add{<imodesuffix>}\t{%2, %0|%0, %2}";
+    }
+}
+  [(set (attr "type")
+     (if_then_else (match_operand:SWI 2 "incdec_operand")
+	(const_string "incdec")
+	(const_string "alu")))
+   (set (attr "length_immediate")
+      (if_then_else
+	(and (eq_attr "type" "alu") (match_operand 2 "const128_operand"))
+	(const_string "1")
+	(const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
 (define_insn "*addsi_2_zext"
   [(set (reg FLAGS_REG)
@@ -6568,6 +6614,22 @@
   [(set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*sub<mode>_2b"
+  [(set (match_operand:SWI 0 "nonimmediate_operand" "=<r>m,<r>")
+	(minus:SWI
+	  (match_operand:SWI 1 "nonimmediate_operand" "0,0")
+	  (match_operand:SWI 2 "<general_operand>" "<r><i>,<r>m")))
+   (set (reg FLAGS_REG)
+	(compare
+	  (minus:SWI (match_dup 1) (match_dup 2))
+	  (const_int 0)))]
+  "ix86_match_ccmode_last (insn, CCGOCmode)
+   && ix86_binary_operator_ok (MINUS, <MODE>mode, operands)
+   && reload_completed"
+  "sub{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*subsi_2_zext"
   [(set (reg FLAGS_REG)
 	(compare
@@ -8476,6 +8538,21 @@
   [(set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*and<mode>_2b"
+  [(set (match_operand:SWI124 0 "nonimmediate_operand" "=<r>,<r>m")
+	(and:SWI124
+	  (match_operand:SWI124 1 "nonimmediate_operand" "%0,0")
+	  (match_operand:SWI124 2 "<general_operand>" "<g>,<r><i>")))
+   (set (reg FLAGS_REG)
+	(compare (and:SWI124 (match_dup 1) (match_dup 2))
+		 (const_int 0)))]
+  "ix86_match_ccmode_last (insn, CCNOmode)
+   && ix86_binary_operator_ok (AND, <MODE>mode, operands)
+   && reload_completed"
+  "and{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*andqi_2_slp"
   [(set (reg FLAGS_REG)
 	(compare (and:QI
@@ -8822,6 +8899,21 @@
   [(set_attr "type" "alu")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*<code><mode>_2b"
+  [(set (match_operand:SWI124 0 "nonimmediate_operand" "=<r>,<r>m")
+	(any_or:SWI124
+	  (match_operand:SWI124 1 "nonimmediate_operand" "%0,0")
+	  (match_operand:SWI124 2 "<general_operand>" "<g>,<r><i>")))
+   (set (reg FLAGS_REG)
+	(compare (any_or:SWI124 (match_dup 1) (match_dup 2))
+		 (const_int 0)))]
+  "ix86_match_ccmode_last (insn, CCNOmode)
+   && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+   && reload_completed"
+  "<logic>{<imodesuffix>}\t{%2, %0|%0, %2}"
+  [(set_attr "type" "alu")
+   (set_attr "mode" "<MODE>")])
+
 ;; See comment for addsi_1_zext why we do use nonimmediate_operand
 ;; ??? Special case for immediate operand is missing - it is tricky.
 (define_insn "*<code>si_2_zext"

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH, i386]: Enable post-reload compare elimination pass
  2017-05-09 16:16 [RFC PATCH, i386]: Enable post-reload compare elimination pass Uros Bizjak
@ 2017-05-10 14:31 ` Jakub Jelinek
  2017-05-10 15:24   ` Uros Bizjak
  0 siblings, 1 reply; 25+ messages in thread
From: Jakub Jelinek @ 2017-05-10 14:31 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches, Jeff Law

On Tue, May 09, 2017 at 06:06:47PM +0200, Uros Bizjak wrote:
> Attached patch enables post-reload compare elimination pass by
> providing expected patterns (duplicates of existing patterns with
> setters of reg and flags switched in the parallel) for flag setting
> arithmetic instructions.
> 
> The merge triggers more than 3000 times during the gcc bootstrap,
> mostly in cases where intervening memory load or store prevents
> combine from merging the arithmetic insn and the following compare.
> 
> Also, some recent linux x86_64 defconfig build results in ~200 merges,
> removing ~200 test/cmp insns. Not much, but I think the results still
> warrant the pass to be enabled.

Isn't the right fix instead to change the compare-elim.c pass to either
accept both reg vs. flags orderings in parallel, or both depending
on some target hook, or change it to the order i386.md and most other
major targets use and just fix up mn10300/rx (and aarch64?) to use the same
order?
I think this has been discussed before already several times.

> 
> 2017-05-09  Uros Bizjak  <ubizjak@gmail.com>
> 
>     * config/i386/i386-protos.h (ix86_match_ccmode_last): New prototype.
>     * config/i386/i386.c (ix86_match_ccmode_1): Rename from
>     ix86_match_ccmode.  Add "last" argument.  Make function static inline.
>     (ix86_match_ccmode): New function.
>     (ix86_match_ccmode_last): Ditto.
>     (TARGET_FLAGS_REGNUM): Define.
>     * config/i386/i386.md (*add<mode>_2b): New insn pattern.
>     (*sub<mode>_2b): Ditto.
>     (*and<mode>_2b): Ditto.
>     (*<any_or:code><mode>_2b): Ditto.

	Jakub

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH, i386]: Enable post-reload compare elimination pass
  2017-05-10 14:31 ` Jakub Jelinek
@ 2017-05-10 15:24   ` Uros Bizjak
  2017-05-10 19:21     ` Uros Bizjak
  0 siblings, 1 reply; 25+ messages in thread
From: Uros Bizjak @ 2017-05-10 15:24 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches, Jeff Law

On Wed, May 10, 2017 at 4:27 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Tue, May 09, 2017 at 06:06:47PM +0200, Uros Bizjak wrote:
>> Attached patch enables post-reload compare elimination pass by
>> providing expected patterns (duplicates of existing patterns with
>> setters of reg and flags switched in the parallel) for flag setting
>> arithmetic instructions.
>>
>> The merge triggers more than 3000 times during the gcc bootstrap,
>> mostly in cases where intervening memory load or store prevents
>> combine from merging the arithmetic insn and the following compare.
>>
>> Also, some recent linux x86_64 defconfig build results in ~200 merges,
>> removing ~200 test/cmp insns. Not much, but I think the results still
>> warrant the pass to be enabled.
>
> Isn't the right fix instead to change the compare-elim.c pass to either
> accept both reg vs. flags orderings in parallel, or both depending
> on some target hook, or change it to the order i386.md and most other
> major targets use and just fix up mn10300/rx (and aarch64?) to use the same
> order?

I was looking at compare-elim.c, where in line 675 function
try_eliminate_compare simply substitutes clobber of CC-reg with a new
compare RTX through a validate_change call. I'm not sure, what would
be the best way to handle both insn variants here. I was hoping
perhaps Jeff would help with the correct approach. Additional copy of
several patterns indeed seems heavily counter-productive.

> I think this has been discussed before already several times.

True, but there was no resolution. As an experiment, I was surprised,
how many cases patched compiler caught, even with the limited set of
additional patterns. So, the postreload cmpelim pass certainly brings
some benefits, in the same sense postreload ree pass does, to catch
additional opportunities that combine pass wasn't able to merge.

Uros.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH, i386]: Enable post-reload compare elimination pass
  2017-05-10 15:24   ` Uros Bizjak
@ 2017-05-10 19:21     ` Uros Bizjak
  2017-05-10 20:12       ` Uros Bizjak
                         ` (2 more replies)
  0 siblings, 3 replies; 25+ messages in thread
From: Uros Bizjak @ 2017-05-10 19:21 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches, Jeff Law, Richard Earnshaw, Nick Clifton

[-- Attachment #1: Type: text/plain, Size: 1250 bytes --]

On Wed, May 10, 2017 at 5:18 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Wed, May 10, 2017 at 4:27 PM, Jakub Jelinek <jakub@redhat.com> wrote:
>> On Tue, May 09, 2017 at 06:06:47PM +0200, Uros Bizjak wrote:
>>> Attached patch enables post-reload compare elimination pass by
>>> providing expected patterns (duplicates of existing patterns with
>>> setters of reg and flags switched in the parallel) for flag setting
>>> arithmetic instructions.
>>>
>>> The merge triggers more than 3000 times during the gcc bootstrap,
>>> mostly in cases where intervening memory load or store prevents
>>> combine from merging the arithmetic insn and the following compare.
>>>
>>> Also, some recent linux x86_64 defconfig build results in ~200 merges,
>>> removing ~200 test/cmp insns. Not much, but I think the results still
>>> warrant the pass to be enabled.
>>
>> Isn't the right fix instead to change the compare-elim.c pass to either
>> accept both reg vs. flags orderings in parallel, or both depending
>> on some target hook, or change it to the order i386.md and most other
>> major targets use and just fix up mn10300/rx (and aarch64?) to use the same
>> order?

Attached patch changes compare-elim.c order to what i386.md expects.

Thoughts?

Uros.

[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 2818 bytes --]

Index: compare-elim.c
===================================================================
--- compare-elim.c	(revision 247850)
+++ compare-elim.c	(working copy)
@@ -45,9 +45,9 @@
    (3) If an insn of form (2) can usefully set the flags, there is
        another pattern of the form
 
-	[(set (reg) (operation)
-	 (set (reg:CCM) (compare:CCM (operation) (immediate)))]
-
+	[(set (reg:CCM) (compare:CCM (operation) (immediate)))
+	 (set (reg) (operation)]
+	 
        The mode CCM will be chosen as if by SELECT_CC_MODE.
 
    Note that unlike NOTICE_UPDATE_CC, we do not handle memory operands.
@@ -582,7 +582,7 @@
 static bool
 try_eliminate_compare (struct comparison *cmp)
 {
-  rtx x, flags, in_a, in_b, cmp_src;
+  rtx flags, in_a, in_b, cmp_src;
 
   /* We must have found an interesting "clobber" preceding the compare.  */
   if (cmp->prev_clobber == NULL)
@@ -628,7 +628,8 @@
      Validate that PREV_CLOBBER itself does in fact refer to IN_A.  Do
      recall that we've already validated the shape of PREV_CLOBBER.  */
   rtx_insn *insn = cmp->prev_clobber;
-  x = XVECEXP (PATTERN (insn), 0, 0);
+
+  rtx x = XVECEXP (PATTERN (insn), 0, 0);
   if (rtx_equal_p (SET_DEST (x), in_a))
     cmp_src = SET_SRC (x);
 
@@ -666,16 +667,30 @@
     flags = gen_rtx_REG (cmp->orig_mode, targetm.flags_regnum);
 
   /* Generate a new comparison for installation in the setter.  */
-  x = copy_rtx (cmp_src);
-  x = gen_rtx_COMPARE (GET_MODE (flags), x, in_b);
-  x = gen_rtx_SET (flags, x);
+  rtx y = copy_rtx (cmp_src);
+  y = gen_rtx_COMPARE (GET_MODE (flags), y, in_b);
+  y = gen_rtx_SET (flags, y);
 
+  /* Canonicalize instruction to:
+	[(set (reg:CCM) (compare:CCM (operation) (immediate)))
+	 (set (reg) (operation)]
+  */
+
+  rtvec v = rtvec_alloc (2);
+  RTVEC_ELT (v, 0) = y;
+  RTVEC_ELT (v, 1) = x;
+  
+  rtx pat = gen_rtx_PARALLEL (VOIDmode, v);
+  
   /* Succeed if the new instruction is valid.  Note that we may have started
      a change group within maybe_select_cc_mode, therefore we must continue. */
-  validate_change (insn, &XVECEXP (PATTERN (insn), 0, 1), x, true);
+  validate_change (insn, &PATTERN (insn), pat, true);
   if (!apply_change_group ())
     return false;
 
+  printf ("TRIGGERED\n");
+  debug_rtx (pat);
+
   /* Success.  Delete the compare insn...  */
   delete_insn (cmp->insn);
 
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 247850)
+++ config/i386/i386.c	(working copy)
@@ -52043,6 +52043,8 @@
 #undef TARGET_ADDRESS_COST
 #define TARGET_ADDRESS_COST ix86_address_cost
 
+#undef TARGET_FLAGS_REGNUM
+#define TARGET_FLAGS_REGNUM FLAGS_REG
 #undef TARGET_FIXED_CONDITION_CODE_REGS
 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
 #undef TARGET_CC_MODES_COMPATIBLE

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH, i386]: Enable post-reload compare elimination pass
  2017-05-10 19:21     ` Uros Bizjak
@ 2017-05-10 20:12       ` Uros Bizjak
  2017-05-10 20:42         ` Jakub Jelinek
  2017-05-11 18:37       ` Jeff Law
  2017-05-12 18:38       ` Jeff Law
  2 siblings, 1 reply; 25+ messages in thread
From: Uros Bizjak @ 2017-05-10 20:12 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches, Jeff Law, Richard Earnshaw, Nick Clifton

On Wed, May 10, 2017 at 9:05 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Wed, May 10, 2017 at 5:18 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
>> On Wed, May 10, 2017 at 4:27 PM, Jakub Jelinek <jakub@redhat.com> wrote:
>>> On Tue, May 09, 2017 at 06:06:47PM +0200, Uros Bizjak wrote:
>>>> Attached patch enables post-reload compare elimination pass by
>>>> providing expected patterns (duplicates of existing patterns with
>>>> setters of reg and flags switched in the parallel) for flag setting
>>>> arithmetic instructions.
>>>>
>>>> The merge triggers more than 3000 times during the gcc bootstrap,
>>>> mostly in cases where intervening memory load or store prevents
>>>> combine from merging the arithmetic insn and the following compare.
>>>>
>>>> Also, some recent linux x86_64 defconfig build results in ~200 merges,
>>>> removing ~200 test/cmp insns. Not much, but I think the results still
>>>> warrant the pass to be enabled.
>>>
>>> Isn't the right fix instead to change the compare-elim.c pass to either
>>> accept both reg vs. flags orderings in parallel, or both depending
>>> on some target hook, or change it to the order i386.md and most other
>>> major targets use and just fix up mn10300/rx (and aarch64?) to use the same
>>> order?
>
> Attached patch changes compare-elim.c order to what i386.md expects.

BTW: This patch now catches 417 cases (instead of 200+) in linux
build, including e.g.:

(parallel [
        (set (reg:CCZ 17 flags)
            (compare:CCZ (lshiftrt:SI (reg:SI 4 si [orig:93 _10 ] [93])
                    (const_int 1 [0x1]))
                (const_int 0 [0])))
        (set (reg:DI 4 si)
            (zero_extend:DI (lshiftrt:SI (reg:SI 4 si [orig:93 _10 ] [93])
                    (const_int 1 [0x1]))))
    ])

Uros.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH, i386]: Enable post-reload compare elimination pass
  2017-05-10 20:12       ` Uros Bizjak
@ 2017-05-10 20:42         ` Jakub Jelinek
  2017-05-11 17:17           ` Jeff Law
  2017-05-11 22:09           ` Hans-Peter Nilsson
  0 siblings, 2 replies; 25+ messages in thread
From: Jakub Jelinek @ 2017-05-10 20:42 UTC (permalink / raw)
  To: Uros Bizjak, Jeff Law, Alexandre Oliva, Nick Clifton, Eric Botcazou
  Cc: gcc-patches, Richard Earnshaw

On Wed, May 10, 2017 at 09:57:56PM +0200, Uros Bizjak wrote:
> BTW: This patch now catches 417 cases (instead of 200+) in linux
> build, including e.g.:
> 
> (parallel [
>         (set (reg:CCZ 17 flags)
>             (compare:CCZ (lshiftrt:SI (reg:SI 4 si [orig:93 _10 ] [93])
>                     (const_int 1 [0x1]))
>                 (const_int 0 [0])))
>         (set (reg:DI 4 si)
>             (zero_extend:DI (lshiftrt:SI (reg:SI 4 si [orig:93 _10 ] [93])
>                     (const_int 1 [0x1]))))
>     ])

That looks nice.  So, I think we need analysis on what order which targets
use.  I have looked at mn10300.md, I see {add,sub}si3_flags patterns that
would need PARALLEL reordering for this compare-elim.c change and then
cmp_liw vs. liw_cmp patterns I have no clue what they do and whether
compare-elim.c would care about those or not (they have UNSPECs).  Jeff/Alex?

In rx.md I see {add,sub}si3_flags too, then ssaddsi3 and 2 peepholes that
would need changing.

In visium.md I see flags_subst_{logic,arith} define_substs,
*{add,sub}<mode>3_insn_set_{carry,overflow}
{add,sub}si3_insn_set_{carry,overflow}, negsi2_insn_set_carry
and *neg<mode>2_insn_set_overflow that would need changing.

aarch64 is the only remaining compare-elim.c enabled target
(one that defines TARGET_FLAGS_REGNUM), and that one seems to use
the same parallel order as i386.md, so compare-elim.c most likely just
doesn't work there at all.

So all in all, sounds like we need to change at least 17 patterns on 3
not very widely used targets.

	Jakub

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH, i386]: Enable post-reload compare elimination pass
  2017-05-10 20:42         ` Jakub Jelinek
@ 2017-05-11 17:17           ` Jeff Law
  2017-05-11 22:09           ` Hans-Peter Nilsson
  1 sibling, 0 replies; 25+ messages in thread
From: Jeff Law @ 2017-05-11 17:17 UTC (permalink / raw)
  To: Jakub Jelinek, Uros Bizjak, Alexandre Oliva, Nick Clifton, Eric Botcazou
  Cc: gcc-patches, Richard Earnshaw

On 05/10/2017 02:27 PM, Jakub Jelinek wrote:
> On Wed, May 10, 2017 at 09:57:56PM +0200, Uros Bizjak wrote:
>> BTW: This patch now catches 417 cases (instead of 200+) in linux
>> build, including e.g.:
>>
>> (parallel [
>>          (set (reg:CCZ 17 flags)
>>              (compare:CCZ (lshiftrt:SI (reg:SI 4 si [orig:93 _10 ] [93])
>>                      (const_int 1 [0x1]))
>>                  (const_int 0 [0])))
>>          (set (reg:DI 4 si)
>>              (zero_extend:DI (lshiftrt:SI (reg:SI 4 si [orig:93 _10 ] [93])
>>                      (const_int 1 [0x1]))))
>>      ])
> 
> That looks nice.  So, I think we need analysis on what order which targets
> use.  I have looked at mn10300.md, I see {add,sub}si3_flags patterns that
> would need PARALLEL reordering for this compare-elim.c change and then
> cmp_liw vs. liw_cmp patterns I have no clue what they do and whether
> compare-elim.c would care about those or not (they have UNSPECs).  Jeff/Alex?
No idea what the cmp_liw and liw_cmp patterns are -- they were added 
after my involvement with the mn103 ended.

I know it's been discussed before, but all that's needed here is to put 
the assignment to CC_REG first in patterns were CC_REG is assigned a 
useful value, right?  For patterns were CC_REG is clobbered, we leave 
them as-is, right?

Does this impact logicals or just arithmetic?  If it hits logicals, 
there's a few more patterns in the mn103 port and likely the rx port as 
well.  But the changes look highly mechanical.

Furthermore, it's trivial for me to build this port through newlib.  So 
I could build before the change, stash away the resulting libc.a, then 
build after the change and compare the contents of the resulting 
objects.  In theory they should be identical unless I'm missing something.

I would say don't let the mn103 stop this work.  If the work goes 
forward, I'll own cleaning up the mn103.  I don't mind assigning the 
cleanup of rx to Nick to be done after the generic changes are done.

That just leaves visium which Eric B. owns.  Note that I can test visium 
in the same way I can test mn103 and rx, so if someone does the visium 
work I can test it.

Jeff

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH, i386]: Enable post-reload compare elimination pass
  2017-05-10 19:21     ` Uros Bizjak
  2017-05-10 20:12       ` Uros Bizjak
@ 2017-05-11 18:37       ` Jeff Law
  2017-05-11 18:45         ` Jakub Jelinek
  2017-05-11 18:50         ` Uros Bizjak
  2017-05-12 18:38       ` Jeff Law
  2 siblings, 2 replies; 25+ messages in thread
From: Jeff Law @ 2017-05-11 18:37 UTC (permalink / raw)
  To: Uros Bizjak, Jakub Jelinek; +Cc: gcc-patches, Richard Earnshaw, Nick Clifton

On 05/10/2017 01:05 PM, Uros Bizjak wrote:
> On Wed, May 10, 2017 at 5:18 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
>> On Wed, May 10, 2017 at 4:27 PM, Jakub Jelinek <jakub@redhat.com> wrote:
>>> On Tue, May 09, 2017 at 06:06:47PM +0200, Uros Bizjak wrote:
>>>> Attached patch enables post-reload compare elimination pass by
>>>> providing expected patterns (duplicates of existing patterns with
>>>> setters of reg and flags switched in the parallel) for flag setting
>>>> arithmetic instructions.
>>>>
>>>> The merge triggers more than 3000 times during the gcc bootstrap,
>>>> mostly in cases where intervening memory load or store prevents
>>>> combine from merging the arithmetic insn and the following compare.
>>>>
>>>> Also, some recent linux x86_64 defconfig build results in ~200 merges,
>>>> removing ~200 test/cmp insns. Not much, but I think the results still
>>>> warrant the pass to be enabled.
>>>
>>> Isn't the right fix instead to change the compare-elim.c pass to either
>>> accept both reg vs. flags orderings in parallel, or both depending
>>> on some target hook, or change it to the order i386.md and most other
>>> major targets use and just fix up mn10300/rx (and aarch64?) to use the same
>>> order?
> 
> Attached patch changes compare-elim.c order to what i386.md expects.
> 
> Thoughts?
Haven't looked at the patch itself.  But I do have the necessary bits to 
convert the mn103 port.  It was slightly more than just fixing the md 
file, but nothing significant or time consuming.  The net result is 100% 
identical code for newlib before your patch vs after your patch w/mn103 
converted.

Hell, it was easy enough, I'll take a cut at the rx port.

jeff

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH, i386]: Enable post-reload compare elimination pass
  2017-05-11 18:37       ` Jeff Law
@ 2017-05-11 18:45         ` Jakub Jelinek
  2017-05-11 18:50         ` Uros Bizjak
  1 sibling, 0 replies; 25+ messages in thread
From: Jakub Jelinek @ 2017-05-11 18:45 UTC (permalink / raw)
  To: Jeff Law; +Cc: Uros Bizjak, gcc-patches, Richard Earnshaw, Nick Clifton

On Thu, May 11, 2017 at 12:28:56PM -0600, Jeff Law wrote:
> On 05/10/2017 01:05 PM, Uros Bizjak wrote:
> > On Wed, May 10, 2017 at 5:18 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
> > > On Wed, May 10, 2017 at 4:27 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> > > > On Tue, May 09, 2017 at 06:06:47PM +0200, Uros Bizjak wrote:
> > > > > Attached patch enables post-reload compare elimination pass by
> > > > > providing expected patterns (duplicates of existing patterns with
> > > > > setters of reg and flags switched in the parallel) for flag setting
> > > > > arithmetic instructions.
> > > > > 
> > > > > The merge triggers more than 3000 times during the gcc bootstrap,
> > > > > mostly in cases where intervening memory load or store prevents
> > > > > combine from merging the arithmetic insn and the following compare.
> > > > > 
> > > > > Also, some recent linux x86_64 defconfig build results in ~200 merges,
> > > > > removing ~200 test/cmp insns. Not much, but I think the results still
> > > > > warrant the pass to be enabled.
> > > > 
> > > > Isn't the right fix instead to change the compare-elim.c pass to either
> > > > accept both reg vs. flags orderings in parallel, or both depending
> > > > on some target hook, or change it to the order i386.md and most other
> > > > major targets use and just fix up mn10300/rx (and aarch64?) to use the same
> > > > order?
> > 
> > Attached patch changes compare-elim.c order to what i386.md expects.
> > 
> > Thoughts?
> Haven't looked at the patch itself.  But I do have the necessary bits to
> convert the mn103 port.  It was slightly more than just fixing the md file,
> but nothing significant or time consuming.  The net result is 100% identical
> code for newlib before your patch vs after your patch w/mn103 converted.
> 
> Hell, it was easy enough, I'll take a cut at the rx port.

I have completely untested patch for both, here is what I came up with:

--- gcc/config/mn10300/mn10300.md.jj	2017-01-01 12:45:40.000000000 +0100
+++ gcc/config/mn10300/mn10300.md	2017-05-11 20:12:52.685388632 +0200
@@ -592,12 +592,12 @@ (define_insn "addsi3"
 
 ;; Note that ADD IMM,SP does not set the flags, so omit that here.
 (define_insn "*addsi3_flags"
-  [(set (match_operand:SI          0 "register_operand"  "=r,!r")
-  	(plus:SI (match_operand:SI 1 "register_operand"  "%0, r")
-		 (match_operand:SI 2 "nonmemory_operand" "ri, r")))
-   (set (reg CC_REG)
-   	(compare (plus:SI (match_dup 1) (match_dup 2))
-		 (const_int 0)))]
+  [(set (reg CC_REG)
+	(compare (plus:SI (match_operand:SI 1 "register_operand"  "%0, r")
+			  (match_operand:SI 2 "nonmemory_operand" "ri, r"))
+		 (const_int 0)))
+   (set (match_operand:SI		    0 "register_operand"  "=r,!r")
+	(plus:SI (match_dup 1) (match_dup 2)))]
   "reload_completed && mn10300_match_ccmode (insn, CCZNCmode)"
   { return mn10300_output_add (operands, true); }
   [(set_attr "timings" "11,22")]
@@ -605,12 +605,13 @@ (define_insn "*addsi3_flags"
 
 ;; A helper to expand the above, with the CC_MODE filled in.
 (define_expand "addsi3_flags"
-  [(parallel [(set (match_operand:SI 0 "register_operand")
-		   (plus:SI (match_operand:SI 1 "register_operand")
-			    (match_operand:SI 2 "nonmemory_operand")))
-	      (set (reg:CCZNC CC_REG)
-		   (compare:CCZNC (plus:SI (match_dup 1) (match_dup 2))
-				  (const_int 0)))])]
+  [(parallel [(set (reg:CCZNC CC_REG)
+		   (compare:CCZNC
+		     (plus:SI (match_operand:SI 1 "register_operand")
+			      (match_operand:SI 2 "nonmemory_operand"))
+		     (const_int 0)))
+	      (set (match_operand:SI 0 "register_operand")
+		   (plus:SI (match_dup 1) (match_dup 2)))])]
   ""
 )
 
@@ -791,12 +792,12 @@ (define_insn "subsi3"
 )
 
 (define_insn "*subsi3_flags"
-  [(set (match_operand:SI           0 "register_operand"  "=r, r")
-	(minus:SI (match_operand:SI 1 "register_operand"   "0, r")
-		  (match_operand:SI 2 "nonmemory_operand"  "ri,r")))
-   (set (reg CC_REG)
-   	(compare (minus:SI (match_dup 1) (match_dup 2))
-		 (const_int 0)))]
+  [(set (reg CC_REG)
+	(compare (minus:SI (match_operand:SI 1 "register_operand"   "0, r")
+			   (match_operand:SI 2 "nonmemory_operand"  "ri,r"))
+		 (const_int 0)))
+   (set (match_operand:SI		     0 "register_operand"  "=r, r")
+	(minus:SI (match_dup 1) (match_dup 2)))]
   "reload_completed && mn10300_match_ccmode (insn, CCZNCmode)"
   "@
    sub %2,%0
@@ -807,12 +808,13 @@ (define_insn "*subsi3_flags"
 
 ;; A helper to expand the above, with the CC_MODE filled in.
 (define_expand "subsi3_flags"
-  [(parallel [(set (match_operand:SI 0 "register_operand")
-		   (minus:SI (match_operand:SI 1 "register_operand")
-			     (match_operand:SI 2 "nonmemory_operand")))
-	      (set (reg:CCZNC CC_REG)
-		   (compare:CCZNC (minus:SI (match_dup 1) (match_dup 2))
-				  (const_int 0)))])]
+  [(parallel [(set (reg:CCZNC CC_REG)
+		   (compare:CCZNC
+		     (minus:SI (match_operand:SI 1 "register_operand")
+			       (match_operand:SI 2 "nonmemory_operand"))
+		     (const_int 0)))
+	      (set (match_operand:SI 0 "register_operand")
+		   (minus:SI (match_dup 1) (match_dup 2)))])]
   ""
 )
 
@@ -1195,12 +1197,12 @@ (define_insn "andsi3"
 )
 
 (define_insn "*andsi3_flags"
-  [(set (match_operand:SI         0 "register_operand"  "=D,D,r")
-	(and:SI (match_operand:SI 1 "register_operand"  "%0,0,r")
-		(match_operand:SI 2 "nonmemory_operand" " i,D,r")))
-   (set (reg CC_REG)
-   	(compare (and:SI (match_dup 1) (match_dup 2))
-		 (const_int 0)))]
+  [(set (reg CC_REG)
+	(compare (and:SI (match_operand:SI 1 "register_operand"  "%0,0,r")
+			 (match_operand:SI 2 "nonmemory_operand" " i,D,r"))
+		 (const_int 0)))
+   (set (match_operand:SI		   0 "register_operand"  "=D,D,r")
+	(and:SI (match_dup 1) (match_dup 2)))]
   "reload_completed && mn10300_match_ccmode (insn, CCZNmode)"
   "@
    and %2,%0
@@ -1282,12 +1284,12 @@ (define_insn "iorsi3"
 )
 
 (define_insn "*iorsi3_flags"
-  [(set (match_operand:SI         0 "register_operand"  "=D,D,r")
-	(ior:SI (match_operand:SI 1 "register_operand"  "%0,0,r")
-		(match_operand:SI 2 "nonmemory_operand" " i,D,r")))
-   (set (reg CC_REG)
-   	(compare (ior:SI (match_dup 1) (match_dup 2))
-		 (const_int 0)))]
+  [(set (reg CC_REG)
+	(compare (ior:SI (match_operand:SI 1 "register_operand"  "%0,0,r")
+			 (match_operand:SI 2 "nonmemory_operand" " i,D,r"))
+		 (const_int 0)))
+   (set (match_operand:SI		   0 "register_operand"  "=D,D,r")
+	(ior:SI (match_dup 1) (match_dup 2)))]
   "reload_completed && mn10300_match_ccmode (insn, CCZNmode)"
   "@
    or %2,%0
@@ -1318,12 +1320,12 @@ (define_insn "xorsi3"
 )
 
 (define_insn "*xorsi3_flags"
-  [(set (match_operand:SI         0 "register_operand"  "=D,D,r")
-	(xor:SI (match_operand:SI 1 "register_operand"  "%0,0,r")
-		(match_operand:SI 2 "nonmemory_operand" " i,D,r")))
-   (set (reg CC_REG)
-   	(compare (xor:SI (match_dup 1) (match_dup 2))
-		 (const_int 0)))]
+  [(set (reg CC_REG)
+	(compare (xor:SI (match_operand:SI 1 "register_operand"  "%0,0,r")
+			 (match_operand:SI 2 "nonmemory_operand" " i,D,r"))
+		 (const_int 0)))
+   (set (match_operand:SI		   0 "register_operand"  "=D,D,r")
+	(xor:SI (match_dup 1) (match_dup 2)))]
   "reload_completed && mn10300_match_ccmode (insn, CCZNmode)"
   "@
    xor %2,%0
@@ -1346,11 +1348,11 @@ (define_insn "one_cmplsi2"
 )
 
 (define_insn "*one_cmplsi2_flags"
-  [(set (match_operand:SI         0 "register_operand" "=D")
-	(not:SI (match_operand:SI 1 "register_operand" " 0")))
-   (set (reg CC_REG)
-   	(compare (not:SI (match_dup 1))
-		 (const_int 0)))]
+  [(set (reg CC_REG)
+	(compare (not:SI (match_operand:SI 1 "register_operand" " 0"))
+		 (const_int 0)))
+   (set (match_operand:SI		   0 "register_operand" "=D")
+	(not:SI (match_dup 1)))]
   "reload_completed && mn10300_match_ccmode (insn, CCZNmode)"
   "not %0"
 )
--- gcc/config/rx/rx.md.jj	2017-01-01 12:45:42.000000000 +0100
+++ gcc/config/rx/rx.md	2017-05-11 20:37:59.815063239 +0200
@@ -843,11 +843,11 @@ (define_insn "abssi2"
 )
 
 (define_insn "*abssi2_flags"
-  [(set (match_operand:SI         0 "register_operand" "=r,r")
-        (abs:SI (match_operand:SI 1 "register_operand"  "0,r")))
-   (set (reg CC_REG)
-	(compare (abs:SI (match_dup 1))
-		 (const_int 0)))]
+  [(set (reg CC_REG)
+        (compare (abs:SI (match_operand:SI 1 "register_operand"  "0,r"))
+		 (const_int 0)))
+   (set (match_operand:SI		   0 "register_operand" "=r,r")
+	(abs:SI (match_dup 1)))]
   ;; Note - although the ABS instruction does set the O bit in the processor
   ;; status word, it does not do so in a way that is comparable with the CMP
   ;; instruction.  Hence we use CC_ZSmode rather than CC_ZSOmode.
@@ -897,12 +897,12 @@ (define_insn "addsi3_internal"
 )
 
 (define_insn "*addsi3_flags"
-  [(set (match_operand:SI          0 "register_operand"  "=r,r,r,r,r,r,r,r,r,r,r,r,r,r")
-	(plus:SI (match_operand:SI 1 "register_operand"  "%0,0,0,0,0,0,0,r,r,r,r,r,r,0")
-		 (match_operand:SI 2 "rx_source_operand" "r,Uint04,NEGint4,Sint08,Sint16,Sint24,i,0,r,Sint08,Sint16,Sint24,i,Q")))
-   (set (reg CC_REG)
-	(compare (plus:SI (match_dup 1) (match_dup 2))
-		 (const_int 0)))]
+  [(set (reg CC_REG)
+	(compare (plus:SI (match_operand:SI 1 "register_operand"  "%0,0,0,0,0,0,0,r,r,r,r,r,r,0")
+			  (match_operand:SI 2 "rx_source_operand" "r,Uint04,NEGint4,Sint08,Sint16,Sint24,i,0,r,Sint08,Sint16,Sint24,i,Q"))
+		 (const_int 0)))
+   (set (match_operand:SI		    0 "register_operand"  "=r,r,r,r,r,r,r,r,r,r,r,r,r,r")
+	(plus:SI (match_dup 1) (match_dup 2)))]
   "reload_completed && rx_match_ccmode (insn, CC_ZSCmode)"
   "@
   add\t%2, %0
@@ -925,12 +925,13 @@ (define_insn "*addsi3_flags"
 
 ;; A helper to expand the above with the CC_MODE filled in.
 (define_expand "addsi3_flags"
-  [(parallel [(set (match_operand:SI 0 "register_operand")
-		   (plus:SI (match_operand:SI 1 "register_operand")
-			    (match_operand:SI 2 "rx_source_operand")))
-	      (set (reg:CC_ZSC CC_REG)
-		   (compare:CC_ZSC (plus:SI (match_dup 1) (match_dup 2))
-				   (const_int 0)))])]
+  [(parallel [(set (reg:CC_ZSC CC_REG)
+		   (compare:CC_ZSC
+		     (plus:SI (match_operand:SI 1 "register_operand")
+			      (match_operand:SI 2 "rx_source_operand"))
+		     (const_int 0)))
+	      (set (match_operand:SI 0 "register_operand")
+		   (plus:SI (match_dup 1) (match_dup 2)))])]
 )
 
 (define_insn "adc_internal"
@@ -948,20 +949,20 @@ (define_insn "adc_internal"
 )
 
 (define_insn "*adc_flags"
-  [(set (match_operand:SI     0 "register_operand"  "=r,r,r,r,r,r")
-	(plus:SI
-	  (plus:SI
-	    (ltu:SI (reg:CC CC_REG) (const_int 0))
-	    (match_operand:SI 1 "register_operand"  "%0,0,0,0,0,0"))
-	  (match_operand:SI   2 "rx_source_operand" "r,Sint08,Sint16,Sint24,i,Q")))
-   (set (reg CC_REG)
-	(compare 
+  [(set (reg CC_REG)
+	(compare
 	  (plus:SI
 	    (plus:SI
 	      (ltu:SI (reg:CC CC_REG) (const_int 0))
-	      (match_dup 1))
-	    (match_dup 2))
-	  (const_int 0)))]
+	      (match_operand:SI 1 "register_operand"  "%0,0,0,0,0,0"))
+	    (match_operand:SI   2 "rx_source_operand" "r,Sint08,Sint16,Sint24,i,Q"))
+	  (const_int 0)))
+   (set (match_operand:SI	0 "register_operand"  "=r,r,r,r,r,r")
+	(plus:SI
+	  (plus:SI
+	    (ltu:SI (reg:CC CC_REG) (const_int 0))
+	    (match_dup 1))
+	  (match_dup 2)))]
   "reload_completed && rx_match_ccmode (insn, CC_ZSCmode)"
   "adc\t%2, %0"
   [(set_attr "timings" "11,11,11,11,11,33")
@@ -969,36 +970,36 @@ (define_insn "*adc_flags"
 )
 
 ;; Peepholes to match:
-;;   (set (reg A) (reg B))
 ;;   (set (CC) (compare:CC (reg A/reg B) (const_int 0)))
+;;   (set (reg A) (reg B))
 ;; and replace them with the addsi3_flags pattern, using an add
 ;; of zero to copy the register and set the condition code bits.
 (define_peephole2
-  [(set (match_operand:SI 0 "register_operand")
-        (match_operand:SI 1 "register_operand"))
-   (set (reg:CC CC_REG)
-        (compare:CC (match_dup 0)
-                    (const_int 0)))]
-  ""
-  [(parallel [(set (match_dup 0)
-		   (plus:SI (match_dup 1) (const_int 0)))
-	      (set (reg:CC_ZSC CC_REG)
+  [(set (reg:CC CC_REG)
+        (compare:CC (match_operand:SI 0 "register_operand")
+		    (const_int 0)))
+   (set (match_dup 0)
+	(match_operand:SI 1 "register_operand"))]
+  ""
+  [(parallel [(set (reg:CC_ZSC CC_REG)
 		   (compare:CC_ZSC (plus:SI (match_dup 1) (const_int 0))
-				   (const_int 0)))])]
+				   (const_int 0)))
+	      (set (match_dup 0)
+		   (plus:SI (match_dup 1) (const_int 0)))])]
 )
 
 (define_peephole2
-  [(set (match_operand:SI 0 "register_operand")
-        (match_operand:SI 1 "register_operand"))
-   (set (reg:CC CC_REG)
-        (compare:CC (match_dup 1)
-                    (const_int 0)))]
-  ""
-  [(parallel [(set (match_dup 0)
-		   (plus:SI (match_dup 1) (const_int 0)))
-	      (set (reg:CC_ZSC CC_REG)
+  [(set (reg:CC CC_REG)
+        (compare:CC (match_operand:SI 1 "register_operand")
+		    (const_int 0)))
+   (set (match_operand:SI 0 "register_operand")
+        (match_dup 1))]
+  ""
+  [(parallel [(set (reg:CC_ZSC CC_REG)
 		   (compare:CC_ZSC (plus:SI (match_dup 1) (const_int 0))
-				   (const_int 0)))])]
+				   (const_int 0)))
+	      (set (match_dup 0)
+		   (plus:SI (match_dup 1) (const_int 0)))])]
 )
 
 (define_expand "adddi3"
@@ -1109,12 +1110,12 @@ (define_insn "andsi3"
 )
 
 (define_insn "*andsi3_flags"
-  [(set (match_operand:SI         0 "register_operand"  "=r,r,r,r,r,r,r,r,r")
-	(and:SI (match_operand:SI 1 "register_operand"  "%0,0,0,0,0,0,r,r,0")
-		(match_operand:SI 2 "rx_source_operand" "r,Uint04,Sint08,Sint16,Sint24,i,0,r,Q")))
-   (set (reg CC_REG)
-	(compare (and:SI (match_dup 1) (match_dup 2))
-		 (const_int 0)))]
+  [(set (reg CC_REG)
+	(compare (and:SI (match_operand:SI 1 "register_operand"  "%0,0,0,0,0,0,r,r,0")
+			 (match_operand:SI 2 "rx_source_operand" "r,Uint04,Sint08,Sint16,Sint24,i,0,r,Q"))
+		 (const_int 0)))
+   (set (match_operand:SI		   0 "register_operand"  "=r,r,r,r,r,r,r,r,r")
+	(and:SI (match_dup 1) (match_dup 2)))]
   "reload_completed && rx_match_ccmode (insn, CC_ZSmode)"
   "@
   and\t%2, %0
@@ -1341,11 +1342,11 @@ (define_insn "negsi2"
 ;; Note that the O and C flags are not set as per a normal compare,
 ;; and thus are unusable in that context.
 (define_insn "*negsi2_flags"
-  [(set (match_operand:SI         0 "register_operand" "=r,r")
-        (neg:SI (match_operand:SI 1 "register_operand"  "0,r")))
-   (set (reg CC_REG)
-	(compare (neg:SI (match_dup 1))
-		 (const_int 0)))]
+  [(set (reg CC_REG)
+        (compare (neg:SI (match_operand:SI 1 "register_operand"  "0,r"))
+		 (const_int 0)))
+   (set (match_operand:SI		   0 "register_operand" "=r,r")
+	(neg:SI (match_dup 1)))]
   "reload_completed && rx_match_ccmode (insn, CC_ZSmode)"
   "@
   neg\t%0
@@ -1365,11 +1366,11 @@ (define_insn "one_cmplsi2"
 )
 
 (define_insn "*one_cmplsi2_flags"
-  [(set (match_operand:SI         0 "register_operand" "=r,r")
-	(not:SI (match_operand:SI 1 "register_operand"  "0,r")))
-   (set (reg CC_REG)
-	(compare (not:SI (match_dup 1))
-		 (const_int 0)))]
+  [(set (reg CC_REG)
+	(compare (not:SI (match_operand:SI 1 "register_operand"  "0,r"))
+		 (const_int 0)))
+   (set (match_operand:SI		   0 "register_operand" "=r,r")
+	(not:SI (match_dup 1)))]
   "reload_completed && rx_match_ccmode (insn, CC_ZSmode)"
   "@
   not\t%0
@@ -1398,12 +1399,12 @@ (define_insn "iorsi3"
 )
 
 (define_insn "*iorsi3_flags"
-  [(set (match_operand:SI         0 "register_operand" "=r,r,r,r,r,r,r,r,r")
-	(ior:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0,r,r,0")
-	        (match_operand:SI 2 "rx_source_operand" "r,Uint04,Sint08,Sint16,Sint24,i,0,r,Q")))
-   (set (reg CC_REG)
-	(compare (ior:SI (match_dup 1) (match_dup 2))
-		 (const_int 0)))]
+  [(set (reg CC_REG)
+	(compare (ior:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0,r,r,0")
+			 (match_operand:SI 2 "rx_source_operand" "r,Uint04,Sint08,Sint16,Sint24,i,0,r,Q"))
+		 (const_int 0)))
+   (set (match_operand:SI		   0 "register_operand" "=r,r,r,r,r,r,r,r,r")
+	(ior:SI (match_dup 1) (match_dup 2)))]
   "reload_completed && rx_match_ccmode (insn, CC_ZSmode)"
   "@
   or\t%2, %0
@@ -1430,12 +1431,12 @@ (define_insn "rotlsi3"
 )
 
 (define_insn "*rotlsi3_flags"
-  [(set (match_operand:SI            0 "register_operand" "=r")
-	(rotate:SI (match_operand:SI 1 "register_operand"  "0")
-		   (match_operand:SI 2 "rx_shift_operand" "rn")))
-   (set (reg CC_REG)
-	(compare (rotate:SI (match_dup 1) (match_dup 2))
-		 (const_int 0)))]
+  [(set (reg CC_REG)
+	(compare (rotate:SI (match_operand:SI 1 "register_operand"  "0")
+			    (match_operand:SI 2 "rx_shift_operand" "rn"))
+		 (const_int 0)))
+   (set (match_operand:SI		      0 "register_operand" "=r")
+	(rotate:SI (match_dup 1) (match_dup 2)))]
   "reload_completed && rx_match_ccmode (insn, CC_ZSmode)"
   "rotl\t%2, %0"
   [(set_attr "length" "3")]
@@ -1452,12 +1453,12 @@ (define_insn "rotrsi3"
 )
 
 (define_insn "*rotrsi3_flags"
-  [(set (match_operand:SI              0 "register_operand" "=r")
-	(rotatert:SI (match_operand:SI 1 "register_operand"  "0")
-		     (match_operand:SI 2 "rx_shift_operand" "rn")))
-   (set (reg CC_REG)
-	(compare (rotatert:SI (match_dup 1) (match_dup 2))
-		 (const_int 0)))]
+  [(set (reg CC_REG)
+	(compare (rotatert:SI (match_operand:SI 1 "register_operand"  "0")
+			      (match_operand:SI 2 "rx_shift_operand" "rn"))
+		 (const_int 0)))
+   (set (match_operand:SI			0 "register_operand" "=r")
+	(rotatert:SI (match_dup 1) (match_dup 2)))]
   "reload_completed && rx_match_ccmode (insn, CC_ZSmode)"
   "rotr\t%2, %0"
   [(set_attr "length" "3")]
@@ -1477,12 +1478,12 @@ (define_insn "ashrsi3"
 )
 
 (define_insn "*ashrsi3_flags"
-  [(set (match_operand:SI              0 "register_operand" "=r,r,r")
-	(ashiftrt:SI (match_operand:SI 1 "register_operand"  "0,0,r")
-		     (match_operand:SI 2 "rx_shift_operand"  "r,n,n")))
-   (set (reg CC_REG)
-	(compare (ashiftrt:SI (match_dup 1) (match_dup 2))
-		 (const_int 0)))]
+  [(set (reg CC_REG)
+	(compare (ashiftrt:SI (match_operand:SI 1 "register_operand"  "0,0,r")
+			      (match_operand:SI 2 "rx_shift_operand"  "r,n,n"))
+		 (const_int 0)))
+   (set (match_operand:SI              0 "register_operand" "=r,r,r")
+	(ashiftrt:SI (match_dup 1) (match_dup 2)))]
   "reload_completed && rx_match_ccmode (insn, CC_ZSmode)"
   "@
   shar\t%2, %0
@@ -1505,12 +1506,12 @@ (define_insn "lshrsi3"
 )
 
 (define_insn "*lshrsi3_flags"
-  [(set (match_operand:SI              0 "register_operand" "=r,r,r")
-	(lshiftrt:SI (match_operand:SI 1 "register_operand"  "0,0,r")
-		     (match_operand:SI 2 "rx_shift_operand"  "r,n,n")))
-   (set (reg CC_REG)
-	(compare (lshiftrt:SI (match_dup 1) (match_dup 2))
-		 (const_int 0)))]
+  [(set (reg CC_REG)
+	(compare (lshiftrt:SI (match_operand:SI 1 "register_operand"  "0,0,r")
+			      (match_operand:SI 2 "rx_shift_operand"  "r,n,n"))
+		 (const_int 0)))
+   (set (match_operand:SI			0 "register_operand" "=r,r,r")
+	(lshiftrt:SI (match_dup 1) (match_dup 2)))]
   "reload_completed && rx_match_ccmode (insn, CC_ZSmode)"
   "@
   shlr\t%2, %0
@@ -1533,12 +1534,12 @@ (define_insn "ashlsi3"
 )
 
 (define_insn "*ashlsi3_flags"
-  [(set (match_operand:SI            0 "register_operand" "=r,r,r")
-	(ashift:SI (match_operand:SI 1 "register_operand"  "0,0,r")
-	           (match_operand:SI 2 "rx_shift_operand"  "r,n,n")))
-   (set (reg CC_REG)
-	(compare (ashift:SI (match_dup 1) (match_dup 2))
-		 (const_int 0)))]
+  [(set (reg CC_REG)
+	(compare (ashift:SI (match_operand:SI 1 "register_operand"  "0,0,r")
+			    (match_operand:SI 2 "rx_shift_operand"  "r,n,n"))
+		 (const_int 0)))
+   (set (match_operand:SI		      0 "register_operand" "=r,r,r")
+	(ashift:SI (match_dup 1) (match_dup 2)))]
   "reload_completed && rx_match_ccmode (insn, CC_ZSmode)"
   "@
   shll\t%2, %0
@@ -1556,12 +1557,12 @@ (define_insn_and_split "ssaddsi3"
   ""
   "#"
   "reload_completed"
-  [(parallel [(set (match_dup 0)
-		   (plus:SI (match_dup 1) (match_dup 2)))
-	      (set (reg:CC_ZSC CC_REG)
+  [(parallel [(set (reg:CC_ZSC CC_REG)
 		   (compare:CC_ZSC
 		     (plus:SI (match_dup 1) (match_dup 2))
-		     (const_int 0)))])
+		     (const_int 0)))
+	      (set (match_dup 0)
+		   (plus:SI (match_dup 1) (match_dup 2)))])
    (set (match_dup 0)
 	(unspec:SI [(match_dup 0) (reg:CC CC_REG)] 
 		   UNSPEC_BUILTIN_SAT))]
@@ -1597,12 +1598,12 @@ (define_insn "subsi3"
 ;; Note that the O flag is set as if (compare op1 op2) not for
 ;; what is described here, (compare op0 0).
 (define_insn "*subsi3_flags"
-  [(set (match_operand:SI           0 "register_operand" "=r,r,r,r,r")
-	(minus:SI (match_operand:SI 1 "register_operand"  "0,0,0,r,0")
-		  (match_operand:SI 2 "rx_source_operand" "r,Uint04,n,r,Q")))
-   (set (reg CC_REG)
-	(compare (minus:SI (match_dup 1) (match_dup 2))
-		 (const_int 0)))]
+  [(set (reg CC_REG)
+	(compare (minus:SI (match_operand:SI 1 "register_operand"  "0,0,0,r,0")
+			   (match_operand:SI 2 "rx_source_operand" "r,Uint04,n,r,Q"))
+		 (const_int 0)))
+   (set (match_operand:SI		     0 "register_operand" "=r,r,r,r,r")
+	(minus:SI (match_dup 1) (match_dup 2)))]
   "reload_completed && rx_match_ccmode (insn, CC_ZSCmode)"
   "@
   sub\t%2, %0
@@ -1616,12 +1617,13 @@ (define_insn "*subsi3_flags"
 
 ;; A helper to expand the above with the CC_MODE filled in.
 (define_expand "subsi3_flags"
-  [(parallel [(set (match_operand:SI 0 "register_operand")
-		   (minus:SI (match_operand:SI 1 "register_operand")
-			     (match_operand:SI 2 "rx_source_operand")))
-	      (set (reg:CC_ZSC CC_REG)
-		   (compare:CC_ZSC (minus:SI (match_dup 1) (match_dup 2))
-				   (const_int 0)))])]
+  [(parallel [(set (reg:CC_ZSC CC_REG)
+		   (compare:CC_ZSC
+		     (minus:SI (match_operand:SI 1 "register_operand")
+			       (match_operand:SI 2 "rx_source_operand"))
+		     (const_int 0)))
+	      (set (match_operand:SI 0 "register_operand")
+		   (minus:SI (match_dup 1) (match_dup 2)))])]
 )
 
 (define_insn "sbb_internal"
@@ -1639,18 +1641,18 @@ (define_insn "sbb_internal"
 )
 
 (define_insn "*sbb_flags"
-  [(set (match_operand:SI     0 "register_operand"   "=r,r")
-	(minus:SI
-	  (minus:SI
-	    (match_operand:SI 1 "register_operand"   " 0,0")
-	    (match_operand:SI 2 "rx_compare_operand" " r,Q"))
-	  (geu:SI (reg:CC CC_REG) (const_int 0))))
-   (set (reg CC_REG)
+  [(set (reg CC_REG)
 	(compare
 	  (minus:SI
-	    (minus:SI (match_dup 1) (match_dup 2))
+	    (minus:SI
+	      (match_operand:SI 1 "register_operand"   " 0,0")
+	      (match_operand:SI 2 "rx_compare_operand" " r,Q"))
 	    (geu:SI (reg:CC CC_REG) (const_int 0)))
-	  (const_int 0)))]
+	  (const_int 0)))
+   (set (match_operand:SI	0 "register_operand"   "=r,r")
+	(minus:SI
+	  (minus:SI (match_dup 1) (match_dup 2))
+	  (geu:SI (reg:CC CC_REG) (const_int 0))))]
   "reload_completed"
   "sbb\t%2, %0"
   [(set_attr "timings" "11,33")
@@ -1710,13 +1712,13 @@ (define_insn "xorsi3"
 )
 
 (define_insn "*xorsi3_flags"
-  [(set (match_operand:SI         0 "register_operand" "=r,r,r,r,r,r")
-	(xor:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0")
-	        (match_operand:SI 2 "rx_source_operand"
-				  "r,Sint08,Sint16,Sint24,i,Q")))
-   (set (reg CC_REG)
-	(compare (xor:SI (match_dup 1) (match_dup 2))
-		 (const_int 0)))]
+  [(set (reg CC_REG)
+	(compare (xor:SI (match_operand:SI 1 "register_operand" "%0,0,0,0,0,0")
+			 (match_operand:SI 2 "rx_source_operand"
+						"r,Sint08,Sint16,Sint24,i,Q"))
+		 (const_int 0)))
+   (set (match_operand:SI		   0 "register_operand" "=r,r,r,r,r,r")
+	(xor:SI (match_dup 1) (match_dup 2)))]
   "reload_completed && rx_match_ccmode (insn, CC_ZSmode)"
   "xor\t%Q2, %0"
   [(set_attr "timings" "11,11,11,11,11,33")


	Jakub

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH, i386]: Enable post-reload compare elimination pass
  2017-05-11 18:37       ` Jeff Law
  2017-05-11 18:45         ` Jakub Jelinek
@ 2017-05-11 18:50         ` Uros Bizjak
  2017-05-12  7:03           ` Jeff Law
  1 sibling, 1 reply; 25+ messages in thread
From: Uros Bizjak @ 2017-05-11 18:50 UTC (permalink / raw)
  To: Jeff Law; +Cc: Jakub Jelinek, gcc-patches, Richard Earnshaw, Nick Clifton

[-- Attachment #1: Type: text/plain, Size: 1243 bytes --]

On Thu, May 11, 2017 at 8:28 PM, Jeff Law <law@redhat.com> wrote:

>> Attached patch changes compare-elim.c order to what i386.md expects.
>>
>> Thoughts?
>
> Haven't looked at the patch itself.  But I do have the necessary bits to
> convert the mn103 port.  It was slightly more than just fixing the md file,
> but nothing significant or time consuming.  The net result is 100% identical
> code for newlib before your patch vs after your patch w/mn103 converted.
>
> Hell, it was easy enough, I'll take a cut at the rx port.

Attached to this message, please find a version of the patch that
should ease transition of other targets by declaring  and handling a
temporary macro. This way, aarch64 can also test what happens when
postreload cmp elimination goes live for real.

2017-05-11  Uros Bizjak  <ubizjak@gmail.com>

    * compare-elim.c (try_eliminate_compare)
    [TARGET_HAS_ALTERNATIVE_POSTRELOAD_EMBEDDED_COMPARE_PATTERNS]:
    Canonicalize operation with embedded compare to
    [(set (reg:CCM) (compare:CCM (operation) (immediate)))
     (set (reg) (operation)].

    * config/i386/i386.c (TARGET_FLAGS_REGNUM): New define.
    * config/i386/i386.h
    (TARGET_HAS_ALTERNATIVE_POSTRELOAD_EMBEDDED_COMPARE_PATTERNS): Ditto.

Uros.

[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 3532 bytes --]

Index: compare-elim.c
===================================================================
--- compare-elim.c	(revision 247914)
+++ compare-elim.c	(working copy)
@@ -45,9 +45,9 @@ along with GCC; see the file COPYING3.  If not see
    (3) If an insn of form (2) can usefully set the flags, there is
        another pattern of the form
 
-	[(set (reg) (operation)
-	 (set (reg:CCM) (compare:CCM (operation) (immediate)))]
-
+	[(set (reg:CCM) (compare:CCM (operation) (immediate)))
+	 (set (reg) (operation)]
+	 
        The mode CCM will be chosen as if by SELECT_CC_MODE.
 
    Note that unlike NOTICE_UPDATE_CC, we do not handle memory operands.
@@ -582,7 +582,7 @@ equivalent_reg_at_start (rtx reg, rtx_insn *end, r
 static bool
 try_eliminate_compare (struct comparison *cmp)
 {
-  rtx x, flags, in_a, in_b, cmp_src;
+  rtx flags, in_a, in_b, cmp_src;
 
   /* We must have found an interesting "clobber" preceding the compare.  */
   if (cmp->prev_clobber == NULL)
@@ -628,7 +628,8 @@ try_eliminate_compare (struct comparison *cmp)
      Validate that PREV_CLOBBER itself does in fact refer to IN_A.  Do
      recall that we've already validated the shape of PREV_CLOBBER.  */
   rtx_insn *insn = cmp->prev_clobber;
-  x = XVECEXP (PATTERN (insn), 0, 0);
+
+  rtx x = XVECEXP (PATTERN (insn), 0, 0);
   if (rtx_equal_p (SET_DEST (x), in_a))
     cmp_src = SET_SRC (x);
 
@@ -666,13 +667,28 @@ try_eliminate_compare (struct comparison *cmp)
     flags = gen_rtx_REG (cmp->orig_mode, targetm.flags_regnum);
 
   /* Generate a new comparison for installation in the setter.  */
-  x = copy_rtx (cmp_src);
-  x = gen_rtx_COMPARE (GET_MODE (flags), x, in_b);
-  x = gen_rtx_SET (flags, x);
+  rtx y = copy_rtx (cmp_src);
+  y = gen_rtx_COMPARE (GET_MODE (flags), y, in_b);
+  y = gen_rtx_SET (flags, y);
 
+#if TARGET_HAS_ALTERNATIVE_POSTRELOAD_EMBEDDED_COMPARE_PATTERNS
+  /* Canonicalize instruction to:
+     [(set (reg:CCM) (compare:CCM (operation) (immediate)))
+      (set (reg) (operation)]  */
+
+  rtvec v = rtvec_alloc (2);
+  RTVEC_ELT (v, 0) = y;
+  RTVEC_ELT (v, 1) = x;
+  
+  rtx pat = gen_rtx_PARALLEL (VOIDmode, v);
+  
   /* Succeed if the new instruction is valid.  Note that we may have started
      a change group within maybe_select_cc_mode, therefore we must continue. */
-  validate_change (insn, &XVECEXP (PATTERN (insn), 0, 1), x, true);
+  validate_change (insn, &PATTERN (insn), pat, true);
+#else
+  validate_change (insn, &XVECEXP (PATTERN (insn), 0, 1), y, true);
+#endif
+  
   if (!apply_change_group ())
     return false;
 
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 247914)
+++ config/i386/i386.c	(working copy)
@@ -52043,6 +52043,8 @@ ix86_run_selftests (void)
 #undef TARGET_ADDRESS_COST
 #define TARGET_ADDRESS_COST ix86_address_cost
 
+#undef TARGET_FLAGS_REGNUM
+#define TARGET_FLAGS_REGNUM FLAGS_REG
 #undef TARGET_FIXED_CONDITION_CODE_REGS
 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
 #undef TARGET_CC_MODES_COMPATIBLE
Index: config/i386/i386.h
===================================================================
--- config/i386/i386.h	(revision 247914)
+++ config/i386/i386.h	(working copy)
@@ -2658,6 +2658,9 @@ extern void debug_dispatch_window (int);
 
 #define TARGET_SUPPORTS_WIDE_INT 1
 
+/* Temporary macro to ease transition, should be removed ASAP.  */
+#define TARGET_HAS_ALTERNATIVE_POSTRELOAD_EMBEDDED_COMPARE_PATTERNS 1
+
 /*
 Local variables:
 version-control: t

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH, i386]: Enable post-reload compare elimination pass
  2017-05-10 20:42         ` Jakub Jelinek
  2017-05-11 17:17           ` Jeff Law
@ 2017-05-11 22:09           ` Hans-Peter Nilsson
  2017-05-12  6:13             ` Jeff Law
  2017-05-12  9:43             ` Hans-Peter Nilsson
  1 sibling, 2 replies; 25+ messages in thread
From: Hans-Peter Nilsson @ 2017-05-11 22:09 UTC (permalink / raw)
  To: Jakub Jelinek
  Cc: Uros Bizjak, Jeff Law, Alexandre Oliva, Nick Clifton,
	Eric Botcazou, gcc-patches, Richard Earnshaw

On Wed, 10 May 2017, Jakub Jelinek wrote:

> On Wed, May 10, 2017 at 09:57:56PM +0200, Uros Bizjak wrote:
> > BTW: This patch now catches 417 cases (instead of 200+) in linux
> > build, including e.g.:
> >
> > (parallel [
> >         (set (reg:CCZ 17 flags)
> >             (compare:CCZ (lshiftrt:SI (reg:SI 4 si [orig:93 _10 ] [93])
> >                     (const_int 1 [0x1]))
> >                 (const_int 0 [0])))
> >         (set (reg:DI 4 si)
> >             (zero_extend:DI (lshiftrt:SI (reg:SI 4 si [orig:93 _10 ] [93])
> >                     (const_int 1 [0x1]))))
> >     ])
>
> That looks nice.  So, I think we need analysis on what order which targets
> use.  I have looked at mn10300.md, I see {add,sub}si3_flags patterns that
> would need PARALLEL reordering for this compare-elim.c change and then
> cmp_liw vs. liw_cmp patterns I have no clue what they do and whether
> compare-elim.c would care about those or not (they have UNSPECs).  Jeff/Alex?
>
> In rx.md I see {add,sub}si3_flags too, then ssaddsi3 and 2 peepholes that
> would need changing.

The canonical order of insns affecting condition-codes has been
discussed in the past too.

I was then arguing that the compare should go last, i.e.
 (parallel [(set (reg) (op...))
            (set (ccreg) (compare (op...) (const_int 0)))])
for simplicity of processing together with an alternative that
clobbers (i.e. same location in the parallel), as the canonical
order of clobbers in a parallel is that they always go last.
(IIRC from that distant past, transforming a cc0 target to
"ccreg" required at least three variants: the "naked" insn
(pre-reload?), the parallel with a clobber of ccreg, and the
actual use of ccreg; as above.)

Anyway, people seem to drift towards the ccreg-last variant for
some reason or another every time this is brought up; this time
it seems the single reason is that some existing pass expects or
generates this, using the order that causes the minimal fallout.
(Maybe it's the same pass...)  Not sure that's a good base for
decisions.

Whatever you do in the end, *please document the canonical
order* in the RTL documentation.

brgds, H-P

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH, i386]: Enable post-reload compare elimination pass
  2017-05-11 22:09           ` Hans-Peter Nilsson
@ 2017-05-12  6:13             ` Jeff Law
  2017-05-17  3:40               ` Hans-Peter Nilsson
  2017-05-12  9:43             ` Hans-Peter Nilsson
  1 sibling, 1 reply; 25+ messages in thread
From: Jeff Law @ 2017-05-12  6:13 UTC (permalink / raw)
  To: Hans-Peter Nilsson, Jakub Jelinek
  Cc: Uros Bizjak, Alexandre Oliva, Nick Clifton, Eric Botcazou,
	gcc-patches, Richard Earnshaw

On 05/11/2017 03:29 PM, Hans-Peter Nilsson wrote:
> On Wed, 10 May 2017, Jakub Jelinek wrote:
> 
>> On Wed, May 10, 2017 at 09:57:56PM +0200, Uros Bizjak wrote:
>>> BTW: This patch now catches 417 cases (instead of 200+) in linux
>>> build, including e.g.:
>>>
>>> (parallel [
>>>          (set (reg:CCZ 17 flags)
>>>              (compare:CCZ (lshiftrt:SI (reg:SI 4 si [orig:93 _10 ] [93])
>>>                      (const_int 1 [0x1]))
>>>                  (const_int 0 [0])))
>>>          (set (reg:DI 4 si)
>>>              (zero_extend:DI (lshiftrt:SI (reg:SI 4 si [orig:93 _10 ] [93])
>>>                      (const_int 1 [0x1]))))
>>>      ])
>>
>> That looks nice.  So, I think we need analysis on what order which targets
>> use.  I have looked at mn10300.md, I see {add,sub}si3_flags patterns that
>> would need PARALLEL reordering for this compare-elim.c change and then
>> cmp_liw vs. liw_cmp patterns I have no clue what they do and whether
>> compare-elim.c would care about those or not (they have UNSPECs).  Jeff/Alex?
>>
>> In rx.md I see {add,sub}si3_flags too, then ssaddsi3 and 2 peepholes that
>> would need changing.
> 
> The canonical order of insns affecting condition-codes has been
> discussed in the past too.
> 
> I was then arguing that the compare should go last, i.e.
>   (parallel [(set (reg) (op...))
>              (set (ccreg) (compare (op...) (const_int 0)))])
> for simplicity of processing together with an alternative that
> clobbers (i.e. same location in the parallel), as the canonical
> order of clobbers in a parallel is that they always go last.
> (IIRC from that distant past, transforming a cc0 target to
> "ccreg" required at least three variants: the "naked" insn
> (pre-reload?), the parallel with a clobber of ccreg, and the
> actual use of ccreg; as above.)
> 
> Anyway, people seem to drift towards the ccreg-last variant for
> some reason or another every time this is brought up; this time
> it seems the single reason is that some existing pass expects or
> generates this, using the order that causes the minimal fallout.
> (Maybe it's the same pass...)  Not sure that's a good base for
> decisions.
> 
> Whatever you do in the end, *please document the canonical
> order* in the RTL documentation.
I think what's driving the decision is more a desire not to muck with 
x86 and aarch64 and instead place the burden to change on the less 
popular ports.

Though I must admit i prefer the cc setting last since that mirrors how 
we typically do things with clobbers of the cc register.

But yes, we definitely should document the final canonical ordering.

jeff

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH, i386]: Enable post-reload compare elimination pass
  2017-05-11 18:50         ` Uros Bizjak
@ 2017-05-12  7:03           ` Jeff Law
  0 siblings, 0 replies; 25+ messages in thread
From: Jeff Law @ 2017-05-12  7:03 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: Jakub Jelinek, gcc-patches, Richard Earnshaw, Nick Clifton

On 05/11/2017 12:46 PM, Uros Bizjak wrote:
> On Thu, May 11, 2017 at 8:28 PM, Jeff Law <law@redhat.com> wrote:
> 
>>> Attached patch changes compare-elim.c order to what i386.md expects.
>>>
>>> Thoughts?
>>
>> Haven't looked at the patch itself.  But I do have the necessary bits to
>> convert the mn103 port.  It was slightly more than just fixing the md file,
>> but nothing significant or time consuming.  The net result is 100% identical
>> code for newlib before your patch vs after your patch w/mn103 converted.
>>
>> Hell, it was easy enough, I'll take a cut at the rx port.
> 
> Attached to this message, please find a version of the patch that
> should ease transition of other targets by declaring  and handling a
> temporary macro. This way, aarch64 can also test what happens when
> postreload cmp elimination goes live for real.
> 
> 2017-05-11  Uros Bizjak  <ubizjak@gmail.com>
> 
>      * compare-elim.c (try_eliminate_compare)
>      [TARGET_HAS_ALTERNATIVE_POSTRELOAD_EMBEDDED_COMPARE_PATTERNS]:
>      Canonicalize operation with embedded compare to
>      [(set (reg:CCM) (compare:CCM (operation) (immediate)))
>       (set (reg) (operation)].
Bah.  No need for handling both -- it's easy enough to just test the 
target and see what's caught or missing.

So far the mn103, rx and visium ports, generate the same code 
before/after conversion with fairly simple patches.

So I really think we should test aarch64, then do final review.

jeff

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH, i386]: Enable post-reload compare elimination pass
  2017-05-11 22:09           ` Hans-Peter Nilsson
  2017-05-12  6:13             ` Jeff Law
@ 2017-05-12  9:43             ` Hans-Peter Nilsson
  2017-05-12 10:33               ` Jakub Jelinek
  1 sibling, 1 reply; 25+ messages in thread
From: Hans-Peter Nilsson @ 2017-05-12  9:43 UTC (permalink / raw)
  To: gcc-patches

(To-list pruned, my correction doesn't need attention.)

On Thu, 11 May 2017, Hans-Peter Nilsson wrote:
> On Wed, 10 May 2017, Jakub Jelinek wrote:
>
> > On Wed, May 10, 2017 at 09:57:56PM +0200, Uros Bizjak wrote:
> > > BTW: This patch now catches 417 cases (instead of 200+) in linux
> > > build, including e.g.:
> > >
> > > (parallel [
> > >         (set (reg:CCZ 17 flags)
> > >             (compare:CCZ (lshiftrt:SI (reg:SI 4 si [orig:93 _10 ] [93])
> > >                     (const_int 1 [0x1]))
> > >                 (const_int 0 [0])))
> > >         (set (reg:DI 4 si)
> > >             (zero_extend:DI (lshiftrt:SI (reg:SI 4 si [orig:93 _10 ] [93])
> > >                     (const_int 1 [0x1]))))
> > >     ])

> Anyway, people seem to drift towards the ccreg-last variant

JFTR, I miswrote that; I meant "towards the variant with
ccreg-first" as in Uros' example kept above and as opposed to my
example.

brgds, H-P

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH, i386]: Enable post-reload compare elimination pass
  2017-05-12  9:43             ` Hans-Peter Nilsson
@ 2017-05-12 10:33               ` Jakub Jelinek
  0 siblings, 0 replies; 25+ messages in thread
From: Jakub Jelinek @ 2017-05-12 10:33 UTC (permalink / raw)
  To: Hans-Peter Nilsson; +Cc: gcc-patches

On Fri, May 12, 2017 at 05:42:59AM -0400, Hans-Peter Nilsson wrote:
> (To-list pruned, my correction doesn't need attention.)
> 
> On Thu, 11 May 2017, Hans-Peter Nilsson wrote:
> > On Wed, 10 May 2017, Jakub Jelinek wrote:
> >
> > > On Wed, May 10, 2017 at 09:57:56PM +0200, Uros Bizjak wrote:
> > > > BTW: This patch now catches 417 cases (instead of 200+) in linux
> > > > build, including e.g.:
> > > >
> > > > (parallel [
> > > >         (set (reg:CCZ 17 flags)
> > > >             (compare:CCZ (lshiftrt:SI (reg:SI 4 si [orig:93 _10 ] [93])
> > > >                     (const_int 1 [0x1]))
> > > >                 (const_int 0 [0])))
> > > >         (set (reg:DI 4 si)
> > > >             (zero_extend:DI (lshiftrt:SI (reg:SI 4 si [orig:93 _10 ] [93])
> > > >                     (const_int 1 [0x1]))))
> > > >     ])
> 
> > Anyway, people seem to drift towards the ccreg-last variant
> 
> JFTR, I miswrote that; I meant "towards the variant with
> ccreg-first" as in Uros' example kept above and as opposed to my
> example.

If I skim the current primary/secondary targets, then i386/x86_64, rs6000,
aarch64, arm, sparc, s390* all use the ccreg-first order, in mips I couldn't
find either of the orders.
Looking at other targets, in alpha, avr, bfin, c6x, cr16, cris, fr30, ft32,
h8300, ia64, iq2000, lm32, m32c, m32r, m68k, mcore, microblaze, mmix, moxie,
nds32, nios2, nvptx, pa, pdp11, riscv, rl78, sh, spu, stormy16, tilegx,
tilepro, v850, vax, xtensa I can't find any order,
arc, epiphany are ccreg-first,
frv is some weird mixture of ccreg-first (e.g. *combo_intop_compare2) and
ccreg-last (e.g. adddi3_lower),
mn10300, rx and visium are ccreg-last that we want to switch over now to
ccreg-first.
It was all from quickly skimming (mostly) the main config/<cpu>/<cpu>.md
looking for \(compare and/or \(reg.*CC and looking if there are patterns
that have the same arithmetics inside a compare or something similar vs.
the operation repeated on a set, so it is possible I've missed something.

But if the above is roughly true, then it is of course preferable to change
3 less used targets than 6 primary/secondary ones + 2 further ones.

	Jakub

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH, i386]: Enable post-reload compare elimination pass
  2017-05-10 19:21     ` Uros Bizjak
  2017-05-10 20:12       ` Uros Bizjak
  2017-05-11 18:37       ` Jeff Law
@ 2017-05-12 18:38       ` Jeff Law
  2017-05-12 19:14         ` Uros Bizjak
  2017-05-17  9:33         ` Eric Botcazou
  2 siblings, 2 replies; 25+ messages in thread
From: Jeff Law @ 2017-05-12 18:38 UTC (permalink / raw)
  To: Uros Bizjak, Jakub Jelinek; +Cc: gcc-patches, Richard Earnshaw, Nick Clifton

On 05/10/2017 01:05 PM, Uros Bizjak wrote:
> On Wed, May 10, 2017 at 5:18 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
>> On Wed, May 10, 2017 at 4:27 PM, Jakub Jelinek <jakub@redhat.com> wrote:
>>> On Tue, May 09, 2017 at 06:06:47PM +0200, Uros Bizjak wrote:
>>>> Attached patch enables post-reload compare elimination pass by
>>>> providing expected patterns (duplicates of existing patterns with
>>>> setters of reg and flags switched in the parallel) for flag setting
>>>> arithmetic instructions.
>>>>
>>>> The merge triggers more than 3000 times during the gcc bootstrap,
>>>> mostly in cases where intervening memory load or store prevents
>>>> combine from merging the arithmetic insn and the following compare.
>>>>
>>>> Also, some recent linux x86_64 defconfig build results in ~200 merges,
>>>> removing ~200 test/cmp insns. Not much, but I think the results still
>>>> warrant the pass to be enabled.
>>>
>>> Isn't the right fix instead to change the compare-elim.c pass to either
>>> accept both reg vs. flags orderings in parallel, or both depending
>>> on some target hook, or change it to the order i386.md and most other
>>> major targets use and just fix up mn10300/rx (and aarch64?) to use the same
>>> order?
> 
> Attached patch changes compare-elim.c order to what i386.md expects.
> 
> Thoughts?
I think with an appropriate change to the canonicalization rules in the 
manual this is fine.

I've got the visium, rx and mn103 patches handy to ensure they don't 
regress.  aarch64 doesn't seem to be affected either way but I didn't 
investigate why -- I expected it to improve with your change.

I'll write up a ChangeLog and commit the mn103, rx and visium changes 
after you commit the compare-elim & documentation bits.

jeff

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH, i386]: Enable post-reload compare elimination pass
  2017-05-12 18:38       ` Jeff Law
@ 2017-05-12 19:14         ` Uros Bizjak
  2017-05-12 19:52           ` Uros Bizjak
  2017-05-13 14:43           ` Markus Trippelsdorf
  2017-05-17  9:33         ` Eric Botcazou
  1 sibling, 2 replies; 25+ messages in thread
From: Uros Bizjak @ 2017-05-12 19:14 UTC (permalink / raw)
  To: Jeff Law; +Cc: Jakub Jelinek, gcc-patches, Richard Earnshaw, Nick Clifton

[-- Attachment #1: Type: text/plain, Size: 2422 bytes --]

On Fri, May 12, 2017 at 8:34 PM, Jeff Law <law@redhat.com> wrote:
> On 05/10/2017 01:05 PM, Uros Bizjak wrote:
>>
>> On Wed, May 10, 2017 at 5:18 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>>
>>> On Wed, May 10, 2017 at 4:27 PM, Jakub Jelinek <jakub@redhat.com> wrote:
>>>>
>>>> On Tue, May 09, 2017 at 06:06:47PM +0200, Uros Bizjak wrote:
>>>>>
>>>>> Attached patch enables post-reload compare elimination pass by
>>>>> providing expected patterns (duplicates of existing patterns with
>>>>> setters of reg and flags switched in the parallel) for flag setting
>>>>> arithmetic instructions.
>>>>>
>>>>> The merge triggers more than 3000 times during the gcc bootstrap,
>>>>> mostly in cases where intervening memory load or store prevents
>>>>> combine from merging the arithmetic insn and the following compare.
>>>>>
>>>>> Also, some recent linux x86_64 defconfig build results in ~200 merges,
>>>>> removing ~200 test/cmp insns. Not much, but I think the results still
>>>>> warrant the pass to be enabled.
>>>>
>>>>
>>>> Isn't the right fix instead to change the compare-elim.c pass to either
>>>> accept both reg vs. flags orderings in parallel, or both depending
>>>> on some target hook, or change it to the order i386.md and most other
>>>> major targets use and just fix up mn10300/rx (and aarch64?) to use the
>>>> same
>>>> order?
>>
>>
>> Attached patch changes compare-elim.c order to what i386.md expects.
>>
>> Thoughts?
>
> I think with an appropriate change to the canonicalization rules in the
> manual this is fine.
>
> I've got the visium, rx and mn103 patches handy to ensure they don't
> regress.  aarch64 doesn't seem to be affected either way but I didn't
> investigate why -- I expected it to improve with your change.
>
> I'll write up a ChangeLog and commit the mn103, rx and visium changes after
> you commit the compare-elim & documentation bits.

Thanks!

I went ahead and commit the patch without documentation change (which
I plan to submit in a separate patch ASAP), with the following
ChangeLog:

2017-05-12  Uros Bizjak  <ubizjak@gmail.com>

    * compare-elim.c (try_eliminate_compare): Canonicalize
    operation with embedded compare to
    [(set (reg:CCM) (compare:CCM (operation) (immediate)))
     (set (reg) (operation)].

    * config/i386/i386.c (TARGET_FLAGS_REGNUM): New define.

Re-bootstrapped and re-tested on x86_64-linux-gnu {,-m32}.

Committed to mainline SVN.

Uros.

[-- Attachment #2: p.diff.txt --]
[-- Type: text/plain, Size: 2924 bytes --]

Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 247978)
+++ config/i386/i386.c	(working copy)
@@ -51805,6 +51826,8 @@ ix86_run_selftests (void)
 #undef TARGET_ADDRESS_COST
 #define TARGET_ADDRESS_COST ix86_address_cost
 
+#undef TARGET_FLAGS_REGNUM
+#define TARGET_FLAGS_REGNUM FLAGS_REG
 #undef TARGET_FIXED_CONDITION_CODE_REGS
 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
 #undef TARGET_CC_MODES_COMPATIBLE
Index: compare-elim.c
===================================================================
--- compare-elim.c	(revision 247978)
+++ compare-elim.c	(working copy)
@@ -45,9 +45,9 @@ along with GCC; see the file COPYING3.  If not see
    (3) If an insn of form (2) can usefully set the flags, there is
        another pattern of the form
 
-	[(set (reg) (operation)
-	 (set (reg:CCM) (compare:CCM (operation) (immediate)))]
-
+	[(set (reg:CCM) (compare:CCM (operation) (immediate)))
+	 (set (reg) (operation)]
+	 
        The mode CCM will be chosen as if by SELECT_CC_MODE.
 
    Note that unlike NOTICE_UPDATE_CC, we do not handle memory operands.
@@ -582,7 +582,7 @@ equivalent_reg_at_start (rtx reg, rtx_insn *end, r
 static bool
 try_eliminate_compare (struct comparison *cmp)
 {
-  rtx x, flags, in_a, in_b, cmp_src;
+  rtx flags, in_a, in_b, cmp_src;
 
   /* We must have found an interesting "clobber" preceding the compare.  */
   if (cmp->prev_clobber == NULL)
@@ -628,7 +628,8 @@ try_eliminate_compare (struct comparison *cmp)
      Validate that PREV_CLOBBER itself does in fact refer to IN_A.  Do
      recall that we've already validated the shape of PREV_CLOBBER.  */
   rtx_insn *insn = cmp->prev_clobber;
-  x = XVECEXP (PATTERN (insn), 0, 0);
+
+  rtx x = XVECEXP (PATTERN (insn), 0, 0);
   if (rtx_equal_p (SET_DEST (x), in_a))
     cmp_src = SET_SRC (x);
 
@@ -666,13 +667,24 @@ try_eliminate_compare (struct comparison *cmp)
     flags = gen_rtx_REG (cmp->orig_mode, targetm.flags_regnum);
 
   /* Generate a new comparison for installation in the setter.  */
-  x = copy_rtx (cmp_src);
-  x = gen_rtx_COMPARE (GET_MODE (flags), x, in_b);
-  x = gen_rtx_SET (flags, x);
+  rtx y = copy_rtx (cmp_src);
+  y = gen_rtx_COMPARE (GET_MODE (flags), y, in_b);
+  y = gen_rtx_SET (flags, y);
 
+  /* Canonicalize instruction to:
+     [(set (reg:CCM) (compare:CCM (operation) (immediate)))
+      (set (reg) (operation)]  */
+
+  rtvec v = rtvec_alloc (2);
+  RTVEC_ELT (v, 0) = y;
+  RTVEC_ELT (v, 1) = x;
+  
+  rtx pat = gen_rtx_PARALLEL (VOIDmode, v);
+  
   /* Succeed if the new instruction is valid.  Note that we may have started
      a change group within maybe_select_cc_mode, therefore we must continue. */
-  validate_change (insn, &XVECEXP (PATTERN (insn), 0, 1), x, true);
+  validate_change (insn, &PATTERN (insn), pat, true);
+  
   if (!apply_change_group ())
     return false;
 

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH, i386]: Enable post-reload compare elimination pass
  2017-05-12 19:14         ` Uros Bizjak
@ 2017-05-12 19:52           ` Uros Bizjak
  2017-05-13 14:43           ` Markus Trippelsdorf
  1 sibling, 0 replies; 25+ messages in thread
From: Uros Bizjak @ 2017-05-12 19:52 UTC (permalink / raw)
  To: Jeff Law; +Cc: Jakub Jelinek, gcc-patches, Richard Earnshaw, Nick Clifton

[-- Attachment #1: Type: text/plain, Size: 2239 bytes --]

On Fri, May 12, 2017 at 9:09 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
> On Fri, May 12, 2017 at 8:34 PM, Jeff Law <law@redhat.com> wrote:
>> On 05/10/2017 01:05 PM, Uros Bizjak wrote:
>>>
>>> On Wed, May 10, 2017 at 5:18 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
>>>>
>>>> On Wed, May 10, 2017 at 4:27 PM, Jakub Jelinek <jakub@redhat.com> wrote:
>>>>>
>>>>> On Tue, May 09, 2017 at 06:06:47PM +0200, Uros Bizjak wrote:
>>>>>>
>>>>>> Attached patch enables post-reload compare elimination pass by
>>>>>> providing expected patterns (duplicates of existing patterns with
>>>>>> setters of reg and flags switched in the parallel) for flag setting
>>>>>> arithmetic instructions.
>>>>>>
>>>>>> The merge triggers more than 3000 times during the gcc bootstrap,
>>>>>> mostly in cases where intervening memory load or store prevents
>>>>>> combine from merging the arithmetic insn and the following compare.
>>>>>>
>>>>>> Also, some recent linux x86_64 defconfig build results in ~200 merges,
>>>>>> removing ~200 test/cmp insns. Not much, but I think the results still
>>>>>> warrant the pass to be enabled.
>>>>>
>>>>>
>>>>> Isn't the right fix instead to change the compare-elim.c pass to either
>>>>> accept both reg vs. flags orderings in parallel, or both depending
>>>>> on some target hook, or change it to the order i386.md and most other
>>>>> major targets use and just fix up mn10300/rx (and aarch64?) to use the
>>>>> same
>>>>> order?
>>>
>>>
>>> Attached patch changes compare-elim.c order to what i386.md expects.
>>>
>>> Thoughts?
>>
>> I think with an appropriate change to the canonicalization rules in the
>> manual this is fine.
>>
>> I've got the visium, rx and mn103 patches handy to ensure they don't
>> regress.  aarch64 doesn't seem to be affected either way but I didn't
>> investigate why -- I expected it to improve with your change.
>>
>> I'll write up a ChangeLog and commit the mn103, rx and visium changes after
>> you commit the compare-elim & documentation bits.

Something like the attached patch?

2017-05-12  Uros Bizjak  <ubizjak@gmail.com>

    * doc/md.texi ( Canonicalization of Instructions): Describe the
    canonical form of instructions that inherently set a condition
    code register.

Uros.

[-- Attachment #2: d.diff.txt --]
[-- Type: text/plain, Size: 962 bytes --]

Index: doc/md.texi
===================================================================
--- doc/md.texi	(revision 247980)
+++ doc/md.texi	(working copy)
@@ -7258,6 +7258,25 @@
 if the first argument is a condition code register or @code{(cc0)}.
 
 @item
+For instructions that inherently set a condition code register, the
+@code{compare} operator is always written as the first RTL expression of
+the @code{parallel} instruction pattern.  For example,
+
+@smallexample
+(define_insn ""
+  [(set (reg:CCZ FLAGS_REG)
+	(compare:CCZ
+	  (plus:SI
+	    (match_operand:SI 1 "register_operand" "%r")
+	    (match_operand:SI 2 "register_operand" "r"))
+	  (const_int 0)))
+   (set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_dup 1) (match_dup 2)))]
+  ""
+  "addl %0, %1, %2")
+@end smallexample
+
+@item
 An operand of @code{neg}, @code{not}, @code{mult}, @code{plus}, or
 @code{minus} is made the first operand under the same conditions as
 above.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH, i386]: Enable post-reload compare elimination pass
  2017-05-12 19:14         ` Uros Bizjak
  2017-05-12 19:52           ` Uros Bizjak
@ 2017-05-13 14:43           ` Markus Trippelsdorf
  2017-05-13 19:58             ` Uros Bizjak
  1 sibling, 1 reply; 25+ messages in thread
From: Markus Trippelsdorf @ 2017-05-13 14:43 UTC (permalink / raw)
  To: Uros Bizjak
  Cc: Jeff Law, Jakub Jelinek, gcc-patches, Richard Earnshaw, Nick Clifton

On 2017.05.12 at 21:09 +0200, Uros Bizjak wrote:
> On Fri, May 12, 2017 at 8:34 PM, Jeff Law <law@redhat.com> wrote:
> > On 05/10/2017 01:05 PM, Uros Bizjak wrote:
> >>
> >> On Wed, May 10, 2017 at 5:18 PM, Uros Bizjak <ubizjak@gmail.com> wrote:
> >>>
> >>> On Wed, May 10, 2017 at 4:27 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> >>>>
> >>>> On Tue, May 09, 2017 at 06:06:47PM +0200, Uros Bizjak wrote:
> >>>>>
> >>>>> Attached patch enables post-reload compare elimination pass by
> >>>>> providing expected patterns (duplicates of existing patterns with
> >>>>> setters of reg and flags switched in the parallel) for flag setting
> >>>>> arithmetic instructions.
> >>>>>
> >>>>> The merge triggers more than 3000 times during the gcc bootstrap,
> >>>>> mostly in cases where intervening memory load or store prevents
> >>>>> combine from merging the arithmetic insn and the following compare.
> >>>>>
> >>>>> Also, some recent linux x86_64 defconfig build results in ~200 merges,
> >>>>> removing ~200 test/cmp insns. Not much, but I think the results still
> >>>>> warrant the pass to be enabled.
> >>>>
> >>>>
> >>>> Isn't the right fix instead to change the compare-elim.c pass to either
> >>>> accept both reg vs. flags orderings in parallel, or both depending
> >>>> on some target hook, or change it to the order i386.md and most other
> >>>> major targets use and just fix up mn10300/rx (and aarch64?) to use the
> >>>> same
> >>>> order?
> >>
> >>
> >> Attached patch changes compare-elim.c order to what i386.md expects.
> >>
> >> Thoughts?
> >
> > I think with an appropriate change to the canonicalization rules in the
> > manual this is fine.
> >
> > I've got the visium, rx and mn103 patches handy to ensure they don't
> > regress.  aarch64 doesn't seem to be affected either way but I didn't
> > investigate why -- I expected it to improve with your change.
> >
> > I'll write up a ChangeLog and commit the mn103, rx and visium changes after
> > you commit the compare-elim & documentation bits.
>
> Thanks!
>
> I went ahead and commit the patch without documentation change (which
> I plan to submit in a separate patch ASAP), with the following
> ChangeLog:
>
> 2017-05-12  Uros Bizjak  <ubizjak@gmail.com>
>
>     * compare-elim.c (try_eliminate_compare): Canonicalize
>     operation with embedded compare to
>     [(set (reg:CCM) (compare:CCM (operation) (immediate)))
>      (set (reg) (operation)].
>
>     * config/i386/i386.c (TARGET_FLAGS_REGNUM): New define.
>
> Re-bootstrapped and re-tested on x86_64-linux-gnu {,-m32}.
>
> Committed to mainline SVN.

This causes gcc.dg/atomic/c11-atomic-exec-2.c to ICE with e.g.
-march=nehalem:

markus@x4 gcc % gcc -std=c11 -pedantic-errors -march=nehalem -O2 ./gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-2.c
./gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-2.c: In function â€˜test_minusâ€™:
./gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-2.c:120:1: internal compiler error: in ix86_cc_mode, at config/i386/i386.c:22485
 }
 ^
0xea7b37 ix86_cc_mode(rtx_code, rtx_def*, rtx_def*)
        /home/markus/gcc/gcc/config/i386/i386.c:22485
0x1246e11 maybe_select_cc_mode
        /home/markus/gcc/gcc/compare-elim.c:500
0x1246e11 try_eliminate_compare
        /home/markus/gcc/gcc/compare-elim.c:665
0x1246e11 execute_compare_elim_after_reload
        /home/markus/gcc/gcc/compare-elim.c:727
0x1246e11 execute
        /home/markus/gcc/gcc/compare-elim.c:770


--
Markus

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH, i386]: Enable post-reload compare elimination pass
  2017-05-13 14:43           ` Markus Trippelsdorf
@ 2017-05-13 19:58             ` Uros Bizjak
  0 siblings, 0 replies; 25+ messages in thread
From: Uros Bizjak @ 2017-05-13 19:58 UTC (permalink / raw)
  To: Markus Trippelsdorf
  Cc: Jeff Law, Jakub Jelinek, gcc-patches, Richard Earnshaw, Nick Clifton

On Sat, May 13, 2017 at 2:47 PM, Markus Trippelsdorf
<markus@trippelsdorf.de> wrote:
>> 2017-05-12  Uros Bizjak  <ubizjak@gmail.com>
>>
>>     * compare-elim.c (try_eliminate_compare): Canonicalize
>>     operation with embedded compare to
>>     [(set (reg:CCM) (compare:CCM (operation) (immediate)))
>>      (set (reg) (operation)].
>>
>>     * config/i386/i386.c (TARGET_FLAGS_REGNUM): New define.
>>
>> Re-bootstrapped and re-tested on x86_64-linux-gnu {,-m32}.
>>
>> Committed to mainline SVN.
>
> This causes gcc.dg/atomic/c11-atomic-exec-2.c to ICE with e.g.
> -march=nehalem:
>
> markus@x4 gcc % gcc -std=c11 -pedantic-errors -march=nehalem -O2 ./gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-2.c
> ./gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-2.c: In function ‘test_minus’:
> ./gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-2.c:120:1: internal compiler error: in ix86_cc_mode, at config/i386/i386.c:22485
>  }
>  ^
> 0xea7b37 ix86_cc_mode(rtx_code, rtx_def*, rtx_def*)
>         /home/markus/gcc/gcc/config/i386/i386.c:22485
> 0x1246e11 maybe_select_cc_mode
>         /home/markus/gcc/gcc/compare-elim.c:500
> 0x1246e11 try_eliminate_compare
>         /home/markus/gcc/gcc/compare-elim.c:665
> 0x1246e11 execute_compare_elim_after_reload
>         /home/markus/gcc/gcc/compare-elim.c:727
> 0x1246e11 execute
>         /home/markus/gcc/gcc/compare-elim.c:770

-mtune=intel is able to copy SFmode value through general registers,
resulting in the following sequence:

(insn 288 1008 1009 29 (parallel [
            (set (reg:DI 4 si [687])
                (lshiftrt:DI (reg:DI 4 si [687])
                    (const_int 32 [0x20])))
            (clobber (reg:CC 17 flags))
        ]) "ttt.c":41 546 {*lshrdi3_1}
     (nil))
(insn 1009 288 289 29 (set (reg:DI 2 cx [419])
        (reg:DI 4 si [687])) "ttt.c":41 81 {*movdi_internal}
     (nil))
(insn 289 1009 291 29 (set (reg:SF 21 xmm0 [425])
        (reg:SF 2 cx [419])) "ttt.c":41 127 {*movsf_internal}
     (nil))
(insn 291 289 292 29 (set (reg:CCFPU 17 flags)
        (compare:CCFPU (reg:SF 21 xmm0 [425])
            (reg:SF 27 xmm6 [688]))) "ttt.c":41 51 {*cmpiusf}
     (expr_list:REG_EQUAL (compare:CCFPU (reg:SF 21 xmm0 [425])
            (const_double:SF 0.0 [0x0.0p+0]))
        (nil)))

Looking at compare-elim.c, equivalent_reg_at_start, the function
traces the value in %xmm0 to %rsi. So, try_eliminate_compare tries to
check if the target supports the compare of

                 (lshiftrt:DI (reg:DI 4 si [687])
                    (const_int 32 [0x20])))

with

(reg:SF 27 xmm6 [688])

which won't fly, since compare operands must have the same modes.

I'm testing the following patch that ensures that the mode of register
copy, found by equivalent_reg_at_start equals original mode.

--cut here--
Index: compare-elim.c
===================================================================
--- compare-elim.c      (revision 247995)
+++ compare-elim.c      (working copy)
@@ -526,6 +526,7 @@ maybe_select_cc_mode (struct comparison *cmp, rtx
 static rtx
 equivalent_reg_at_start (rtx reg, rtx_insn *end, rtx_insn *start)
 {
+  machine_mode orig_mode = GET_MODE (reg);
   rtx_insn *bb_head = BB_HEAD (BLOCK_FOR_INSN (end));

   for (rtx_insn *insn = PREV_INSN (end);
@@ -572,6 +573,9 @@ equivalent_reg_at_start (rtx reg, rtx_insn *end, r
        return NULL_RTX;
     }

+  if (GET_MODE (reg) != orig_mode)
+    return NULL_RTX;
+
   return reg;
 }

--cut here--

Uros.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH, i386]: Enable post-reload compare elimination pass
  2017-05-12  6:13             ` Jeff Law
@ 2017-05-17  3:40               ` Hans-Peter Nilsson
  2017-05-17  6:52                 ` Uros Bizjak
  0 siblings, 1 reply; 25+ messages in thread
From: Hans-Peter Nilsson @ 2017-05-17  3:40 UTC (permalink / raw)
  To: Jeff Law
  Cc: Jakub Jelinek, Uros Bizjak, Alexandre Oliva, Nick Clifton,
	Eric Botcazou, gcc-patches, Richard Earnshaw

On Fri, 12 May 2017, Jeff Law wrote:
> On 05/11/2017 03:29 PM, Hans-Peter Nilsson wrote:
> > The canonical order of insns affecting condition-codes has been
> > discussed in the past too.
> >
> > I was then arguing that the compare should go last, i.e.
> >   (parallel [(set (reg) (op...))
> >              (set (ccreg) (compare (op...) (const_int 0)))])
> > for simplicity of processing together with an alternative that
> > clobbers (i.e. same location in the parallel), as the canonical
> > order of clobbers in a parallel is that they always go last.
> > (IIRC from that distant past, transforming a cc0 target to
> > "ccreg" required at least three variants: the "naked" insn
> > (pre-reload?), the parallel with a clobber of ccreg, and the
> > actual use of ccreg; as above.)
> >
> > Anyway, people seem to drift towards the ccreg-last variant for

(miswrite: ccreg-*first*, as opposed to the above)

> > some reason or another every time this is brought up; this time
> > it seems the single reason is that some existing pass expects or
> > generates this, using the order that causes the minimal fallout.
> > (Maybe it's the same pass...)  Not sure that's a good base for
> > decisions.
> >
> > Whatever you do in the end, *please document the canonical
> > order* in the RTL documentation.
> I think what's driving the decision is more a desire not to muck with x86 and
> aarch64 and instead place the burden to change on the less popular ports.
>
> Though I must admit i prefer the cc setting last since that mirrors how we
> typically do things with clobbers of the cc register.

IOW.

> But yes, we definitely should document the final canonical ordering.

Is that about to also happen?

I foresee in another half-a-dozen years, and *this* iteration is
forgotten, someone bothered enough to argue eloquently coming
around, doing rtl-level-maintenance, maybe a new pass (ok maybe
not a *new RTL-pass* :) sees that order as wrong for the reason
listed above, and does the legwork to switch the order around.
It will be ok to change it again then, because the order just
happened this time because of minimal-edit-reasons, right?
Noone can argue that it was a thoughtful deliberate change that
we bothered to document, to stay consistent?  ;)

brgds, H-P

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH, i386]: Enable post-reload compare elimination pass
  2017-05-17  3:40               ` Hans-Peter Nilsson
@ 2017-05-17  6:52                 ` Uros Bizjak
  2017-05-17 13:11                   ` Hans-Peter Nilsson
  2017-06-23  4:47                   ` Jeff Law
  0 siblings, 2 replies; 25+ messages in thread
From: Uros Bizjak @ 2017-05-17  6:52 UTC (permalink / raw)
  To: Hans-Peter Nilsson
  Cc: Jeff Law, Jakub Jelinek, Alexandre Oliva, Nick Clifton,
	Eric Botcazou, gcc-patches, Richard Earnshaw

On Wed, May 17, 2017 at 4:45 AM, Hans-Peter Nilsson <hp@bitrange.com> wrote:

>> But yes, we definitely should document the final canonical ordering.
>
> Is that about to also happen?
>
> I foresee in another half-a-dozen years, and *this* iteration is
> forgotten, someone bothered enough to argue eloquently coming
> around, doing rtl-level-maintenance, maybe a new pass (ok maybe
> not a *new RTL-pass* :) sees that order as wrong for the reason
> listed above, and does the legwork to switch the order around.
> It will be ok to change it again then, because the order just
> happened this time because of minimal-edit-reasons, right?
> Noone can argue that it was a thoughtful deliberate change that
> we bothered to document, to stay consistent?  ;)

The proposed doc patch is wiating for review at [1].

[1] https://gcc.gnu.org/ml/gcc-patches/2017-05/msg01073.html

Uros.

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH, i386]: Enable post-reload compare elimination pass
  2017-05-12 18:38       ` Jeff Law
  2017-05-12 19:14         ` Uros Bizjak
@ 2017-05-17  9:33         ` Eric Botcazou
  1 sibling, 0 replies; 25+ messages in thread
From: Eric Botcazou @ 2017-05-17  9:33 UTC (permalink / raw)
  To: Jeff Law
  Cc: gcc-patches, Uros Bizjak, Jakub Jelinek, Richard Earnshaw, Nick Clifton

> I've got the visium, rx and mn103 patches handy to ensure they don't
> regress.  aarch64 doesn't seem to be affected either way but I didn't
> investigate why -- I expected it to improve with your change.

Thanks for taking care of this, but a couple of comments in config/visium are 
now backwards and must be adjusted too:

Index: config/visium/visium.c
===================================================================
--- config/visium/visium.c	(revision 248139)
+++ config/visium/visium.c	(working copy)
@@ -922,8 +922,8 @@ empty_delay_slot (rtx_insn *insn)
   return 1;
 }
 
-/* Wrapper around single_set which returns the first SET of a pair if the
-   second SET is to the flags register.  */
+/* Wrapper around single_set which returns the second SET of a pair if the
+   first SET is to the flags register.  */
 
 static rtx
 single_set_and_flags (rtx_insn *insn)
Index: config/visium/visium.md
===================================================================
--- config/visium/visium.md	(revision 248139)
+++ config/visium/visium.md	(working copy)
@@ -240,7 +240,7 @@ (define_code_attr add_str [(plus "plus")
 ;;
 ;; Substitutions.
 ;;
-;; They are used to define the second instruction of the pairs required by
+;; They are used to define the first instruction of the pairs required by
 ;; the postreload compare elimination pass, with a first variant for the
 ;; logical insns and a second variant for the arithmetic insns.
 ;;

I'm going to install the fixlet.

-- 
Eric Botcazou

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH, i386]: Enable post-reload compare elimination pass
  2017-05-17  6:52                 ` Uros Bizjak
@ 2017-05-17 13:11                   ` Hans-Peter Nilsson
  2017-06-23  4:47                   ` Jeff Law
  1 sibling, 0 replies; 25+ messages in thread
From: Hans-Peter Nilsson @ 2017-05-17 13:11 UTC (permalink / raw)
  To: Uros Bizjak
  Cc: Jeff Law, Jakub Jelinek, Alexandre Oliva, Nick Clifton,
	Eric Botcazou, gcc-patches, Richard Earnshaw

On Wed, 17 May 2017, Uros Bizjak wrote:
> On Wed, May 17, 2017 at 4:45 AM, Hans-Peter Nilsson <hp@bitrange.com> wrote:
> >> But yes, we definitely should document the final canonical ordering.
> >
> > Is that about to also happen?
> The proposed doc patch is wiating for review at [1].
>
> [1] https://gcc.gnu.org/ml/gcc-patches/2017-05/msg01073.html

Thanks.  LGTM.

brgds, H-P

^ permalink raw reply	[flat|nested] 25+ messages in thread

* Re: [RFC PATCH, i386]: Enable post-reload compare elimination pass
  2017-05-17  6:52                 ` Uros Bizjak
  2017-05-17 13:11                   ` Hans-Peter Nilsson
@ 2017-06-23  4:47                   ` Jeff Law
  1 sibling, 0 replies; 25+ messages in thread
From: Jeff Law @ 2017-06-23  4:47 UTC (permalink / raw)
  To: Uros Bizjak, Hans-Peter Nilsson
  Cc: Jakub Jelinek, Alexandre Oliva, Nick Clifton, Eric Botcazou,
	gcc-patches, Richard Earnshaw

On 05/17/2017 12:33 AM, Uros Bizjak wrote:
> On Wed, May 17, 2017 at 4:45 AM, Hans-Peter Nilsson <hp@bitrange.com> wrote:
> 
>>> But yes, we definitely should document the final canonical ordering.
>>
>> Is that about to also happen?
>>
>> I foresee in another half-a-dozen years, and *this* iteration is
>> forgotten, someone bothered enough to argue eloquently coming
>> around, doing rtl-level-maintenance, maybe a new pass (ok maybe
>> not a *new RTL-pass* :) sees that order as wrong for the reason
>> listed above, and does the legwork to switch the order around.
>> It will be ok to change it again then, because the order just
>> happened this time because of minimal-edit-reasons, right?
>> Noone can argue that it was a thoughtful deliberate change that
>> we bothered to document, to stay consistent?  ;)
> 
> The proposed doc patch is wiating for review at [1].
> 
> [1] https://gcc.gnu.org/ml/gcc-patches/2017-05/msg01073.html
OK for the trunk.

Sorry for the delay,
jeff

^ permalink raw reply	[flat|nested] 25+ messages in thread

end of thread, other threads:[~2017-06-23  4:47 UTC | newest]

Thread overview: 25+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-05-09 16:16 [RFC PATCH, i386]: Enable post-reload compare elimination pass Uros Bizjak
2017-05-10 14:31 ` Jakub Jelinek
2017-05-10 15:24   ` Uros Bizjak
2017-05-10 19:21     ` Uros Bizjak
2017-05-10 20:12       ` Uros Bizjak
2017-05-10 20:42         ` Jakub Jelinek
2017-05-11 17:17           ` Jeff Law
2017-05-11 22:09           ` Hans-Peter Nilsson
2017-05-12  6:13             ` Jeff Law
2017-05-17  3:40               ` Hans-Peter Nilsson
2017-05-17  6:52                 ` Uros Bizjak
2017-05-17 13:11                   ` Hans-Peter Nilsson
2017-06-23  4:47                   ` Jeff Law
2017-05-12  9:43             ` Hans-Peter Nilsson
2017-05-12 10:33               ` Jakub Jelinek
2017-05-11 18:37       ` Jeff Law
2017-05-11 18:45         ` Jakub Jelinek
2017-05-11 18:50         ` Uros Bizjak
2017-05-12  7:03           ` Jeff Law
2017-05-12 18:38       ` Jeff Law
2017-05-12 19:14         ` Uros Bizjak
2017-05-12 19:52           ` Uros Bizjak
2017-05-13 14:43           ` Markus Trippelsdorf
2017-05-13 19:58             ` Uros Bizjak
2017-05-17  9:33         ` Eric Botcazou

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).