public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 0/6] aarch64: Implement TImode comparisons
@ 2020-03-19  6:47 Richard Henderson
  2020-03-19  6:48 ` [PATCH 1/6] aarch64: Add ucmp_*_carryinC patterns for all usub_*_carryinC Richard Henderson
                   ` (5 more replies)
  0 siblings, 6 replies; 7+ messages in thread
From: Richard Henderson @ 2020-03-19  6:47 UTC (permalink / raw)
  To: gcc-patches
  Cc: richard.earnshaw, richard.sandiford, marcus.shawcroft, kyrylo.tkachov

This is attacking case 3 of PR 94174.

The existing ccmp optimization happens at the gimple level,
which means that rtl expansion of TImode stuff cannot take
advantage.  But we can to even better than the existing
ccmp optimization.

This expansion is similar size to our current branchful 
expansion, but all straight-line code.  I will assume in
general that the branch predictor will work better with
fewer branches.

E.g.

-  10:  b7f800a3        tbnz    x3, #63, 24 <__subvti3+0x24>
-  14:  eb02003f        cmp     x1, x2
-  18:  5400010c        b.gt    38 <__subvti3+0x38>
-  1c:  54000140        b.eq    44 <__subvti3+0x44>  // b.none
-  20:  d65f03c0        ret
-  24:  eb01005f        cmp     x2, x1
-  28:  5400008c        b.gt    38 <__subvti3+0x38>
-  2c:  54ffffa1        b.ne    20 <__subvti3+0x20>  // b.any
-  30:  eb00009f        cmp     x4, x0
-  34:  54ffff69        b.ls    20 <__subvti3+0x20>  // b.plast
-  38:  a9bf7bfd        stp     x29, x30, [sp, #-16]!
-  3c:  910003fd        mov     x29, sp
-  40:  94000000        bl      0 <abort>
-  44:  eb04001f        cmp     x0, x4
-  48:  54ffff88        b.hi    38 <__subvti3+0x38>  // b.pmore
-  4c:  d65f03c0        ret

+  10:  b7f800e3        tbnz    x3, #63, 2c <__subvti3+0x2c>
+  14:  eb01005f        cmp     x2, x1
+  18:  1a9fb7e2        cset    w2, ge  // ge = tcont
+  1c:  fa400080        ccmp    x4, x0, #0x0, eq  // eq = none
+  20:  7a40a844        ccmp    w2, #0x0, #0x4, ge  // ge = tcont
+  24:  540000e0        b.eq    40 <__subvti3+0x40>  // b.none
+  28:  d65f03c0        ret
+  2c:  eb01005f        cmp     x2, x1
+  30:  1a9fc7e2        cset    w2, le
+  34:  fa400081        ccmp    x4, x0, #0x1, eq  // eq = none
+  38:  7a40d844        ccmp    w2, #0x0, #0x4, le
+  3c:  54ffff60        b.eq    28 <__subvti3+0x28>  // b.none
+  40:  a9bf7bfd        stp     x29, x30, [sp, #-16]!
+  44:  910003fd        mov     x29, sp
+  48:  94000000        bl      0 <abort>

So one less insn, but 2 branches instead of 6.

As for the specific case of the PR,

void test_int128(__int128 a, uint64_t l)
{
	if ((__int128_t)a - l <= 1)
		doit();
}

    0:  eb020000        subs    x0, x0, x2
    4:  da1f0021        sbc     x1, x1, xzr
    8:  f100003f        cmp     x1, #0x0
-   c:  5400004d        b.le    14 <test_int128+0x14>
-  10:  d65f03c0        ret
-  14:  54000061        b.ne    20 <test_int128+0x20>  // b.any
-  18:  f100041f        cmp     x0, #0x1
-  1c:  54ffffa8        b.hi    10 <test_int128+0x10>  // b.pmore
+   c:  1a9fc7e1        cset    w1, le
+  10:  fa410801        ccmp    x0, #0x1, #0x1, eq  // eq = none
+  14:  7a40d824        ccmp    w1, #0x0, #0x4, le
+  18:  54000041        b.ne    20 <test_int128+0x20>  // b.any
+  1c:  d65f03c0        ret
   20:  14000000        b       0 <doit>


r~


Richard Henderson (6):
  aarch64: Add ucmp_*_carryinC patterns for all usub_*_carryinC
  aarch64: Adjust result of aarch64_gen_compare_reg
  aarch64: Accept 0 as first argument to compares
  aarch64: Simplify @ccmp<cc_mode><mode> operands
  aarch64: Improve nzcv argument to ccmp
  aarch64: Implement TImode comparisons

 gcc/config/aarch64/aarch64.c              | 304 ++++++++++++++++------
 gcc/config/aarch64/aarch64-simd.md        |  18 +-
 gcc/config/aarch64/aarch64-speculation.cc |   5 +-
 gcc/config/aarch64/aarch64.md             | 280 ++++++++++++++------
 4 files changed, 429 insertions(+), 178 deletions(-)

-- 
2.20.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/6] aarch64: Add ucmp_*_carryinC patterns for all usub_*_carryinC
  2020-03-19  6:47 [PATCH 0/6] aarch64: Implement TImode comparisons Richard Henderson
@ 2020-03-19  6:48 ` Richard Henderson
  2020-03-19  6:48 ` [PATCH 2/6] aarch64: Adjust result of aarch64_gen_compare_reg Richard Henderson
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Richard Henderson @ 2020-03-19  6:48 UTC (permalink / raw)
  To: gcc-patches
  Cc: richard.earnshaw, richard.sandiford, marcus.shawcroft, kyrylo.tkachov

Use xzr for the output when we only require the flags output.
This will be used shortly for TImode comparisons.

	* config/aarch64/aarch64.md (ucmp<GPI>3_carryinC): New.
	(*ucmp<GPI>3_carryinC_z1): New.
	(*ucmp<GPI>3_carryinC_z2): New.
	(*ucmp<GPI>3_carryinC): New.
---
 gcc/config/aarch64/aarch64.md | 50 +++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index c7c4d1dd519..fcc1ddafaec 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -3439,6 +3439,18 @@
    ""
 )
 
+(define_expand "ucmp<GPI:mode>3_carryinC"
+   [(set (reg:CC CC_REGNUM)
+	 (compare:CC
+	   (zero_extend:<DWI>
+	     (match_operand:GPI 0 "register_operand"))
+	   (plus:<DWI>
+	     (zero_extend:<DWI>
+	       (match_operand:GPI 1 "register_operand"))
+	     (ltu:<DWI> (reg:CC CC_REGNUM) (const_int 0)))))]
+   ""
+)
+
 (define_insn "*usub<GPI:mode>3_carryinC_z1"
   [(set (reg:CC CC_REGNUM)
 	(compare:CC
@@ -3456,6 +3468,19 @@
   [(set_attr "type" "adc_reg")]
 )
 
+(define_insn "*ucmp<GPI:mode>3_carryinC_z1"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC
+	  (const_int 0)
+	  (plus:<DWI>
+	    (zero_extend:<DWI>
+	      (match_operand:GPI 0 "register_operand" "r"))
+	    (match_operand:<DWI> 1 "aarch64_borrow_operation" ""))))]
+   ""
+   "sbcs\\t<w>zr, <w>zr, %<w>0"
+  [(set_attr "type" "adc_reg")]
+)
+
 (define_insn "*usub<GPI:mode>3_carryinC_z2"
   [(set (reg:CC CC_REGNUM)
 	(compare:CC
@@ -3471,6 +3496,17 @@
   [(set_attr "type" "adc_reg")]
 )
 
+(define_insn "*ucmp<GPI:mode>3_carryinC_z2"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC
+	  (zero_extend:<DWI>
+	    (match_operand:GPI 0 "register_operand" "r"))
+	  (match_operand:<DWI> 1 "aarch64_borrow_operation" "")))]
+   ""
+   "sbcs\\t<w>zr, %<w>0, <w>zr"
+  [(set_attr "type" "adc_reg")]
+)
+
 (define_insn "*usub<GPI:mode>3_carryinC"
   [(set (reg:CC CC_REGNUM)
 	(compare:CC
@@ -3489,6 +3525,20 @@
   [(set_attr "type" "adc_reg")]
 )
 
+(define_insn "*ucmp<GPI:mode>3_carryinC"
+  [(set (reg:CC CC_REGNUM)
+	(compare:CC
+	  (zero_extend:<DWI>
+	    (match_operand:GPI 0 "register_operand" "r"))
+	  (plus:<DWI>
+	    (zero_extend:<DWI>
+	      (match_operand:GPI 1 "register_operand" "r"))
+	    (match_operand:<DWI> 2 "aarch64_borrow_operation" ""))))]
+   ""
+   "sbcs\\t<w>zr, %<w>0, %<w>1"
+  [(set_attr "type" "adc_reg")]
+)
+
 (define_expand "sub<GPI:mode>3_carryinV"
   [(parallel
      [(set (reg:CC_V CC_REGNUM)
-- 
2.20.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 2/6] aarch64: Adjust result of aarch64_gen_compare_reg
  2020-03-19  6:47 [PATCH 0/6] aarch64: Implement TImode comparisons Richard Henderson
  2020-03-19  6:48 ` [PATCH 1/6] aarch64: Add ucmp_*_carryinC patterns for all usub_*_carryinC Richard Henderson
@ 2020-03-19  6:48 ` Richard Henderson
  2020-03-19  6:48 ` [PATCH 3/6] aarch64: Accept 0 as first argument to compares Richard Henderson
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Richard Henderson @ 2020-03-19  6:48 UTC (permalink / raw)
  To: gcc-patches
  Cc: richard.earnshaw, richard.sandiford, marcus.shawcroft, kyrylo.tkachov

Return the entire comparison expression, not just the cc_reg.
This will allow the routine to adjust the comparison code as
needed for TImode comparisons.

Note that some users were passing e.g. EQ to aarch64_gen_compare_reg
and then using gen_rtx_NE.  Pass the proper code in the first place.

	* config/aarch64/aarch64.c (aarch64_gen_compare_reg): Return
	the final comparison for code & cc_reg.
	(aarch64_gen_compare_reg_maybe_ze): Likewise.
	(aarch64_expand_compare_and_swap): Update to match -- do not
	build the final comparison here, but PUT_MODE as necessary.
	(aarch64_split_compare_and_swap): Use prebuilt comparison.
	* config/aarch64/aarch64-simd.md (aarch64_cm<COMPARISONS>di): Likewise.
	(aarch64_cm<UCOMPARISONS>di): Likewise.
	(aarch64_cmtstdi): Likewise.
	* config/aarch64/aarch64-speculation.cc
	(aarch64_speculation_establish_tracker): Likewise.
	* config/aarch64/aarch64.md (cbranch<GPI>4, cbranch<GPF>4): Likewise.
	(mod<GPI>3, abs<GPI>2): Likewise.
	(cstore<GPI>4, cstore<GPF>4): Likewise.
	(cmov<GPI>6, cmov<GPF>6): Likewise.
	(mov<ALLI>cc, mov<GPF><GPI>cc, mov<GPF>cc): Likewise.
	(<NEG_NOT><GPI>cc): Likewise.
	(ffs<GPI>2): Likewise.
	(cstorecc4): Remove redundant "".
---
 gcc/config/aarch64/aarch64.c              | 26 +++---
 gcc/config/aarch64/aarch64-simd.md        | 18 ++---
 gcc/config/aarch64/aarch64-speculation.cc |  5 +-
 gcc/config/aarch64/aarch64.md             | 96 ++++++++++-------------
 4 files changed, 63 insertions(+), 82 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index c90de65de12..619357fa210 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2328,7 +2328,7 @@ emit_set_insn (rtx x, rtx y)
 }
 
 /* X and Y are two things to compare using CODE.  Emit the compare insn and
-   return the rtx for register 0 in the proper mode.  */
+   return the rtx for the CCmode comparison.  */
 rtx
 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
 {
@@ -2359,7 +2359,7 @@ aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
       cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
       emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x, y));
     }
-  return cc_reg;
+  return gen_rtx_fmt_ee (code, VOIDmode, cc_reg, const0_rtx);
 }
 
 /* Similarly, but maybe zero-extend Y if Y_MODE < SImode.  */
@@ -2382,7 +2382,7 @@ aarch64_gen_compare_reg_maybe_ze (RTX_CODE code, rtx x, rtx y,
 	  cc_mode = CC_SWPmode;
 	  cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
 	  emit_set_insn (cc_reg, t);
-	  return cc_reg;
+	  return gen_rtx_fmt_ee (code, VOIDmode, cc_reg, const0_rtx);
 	}
     }
 
@@ -18506,7 +18506,8 @@ aarch64_expand_compare_and_swap (rtx operands[])
 
       emit_insn (gen_aarch64_compare_and_swap_lse (mode, rval, mem,
 						   newval, mod_s));
-      cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
+      x = aarch64_gen_compare_reg_maybe_ze (EQ, rval, oldval, mode);
+      PUT_MODE (x, SImode);
     }
   else if (TARGET_OUTLINE_ATOMICS)
     {
@@ -18517,7 +18518,8 @@ aarch64_expand_compare_and_swap (rtx operands[])
       rval = emit_library_call_value (func, NULL_RTX, LCT_NORMAL, r_mode,
 				      oldval, mode, newval, mode,
 				      XEXP (mem, 0), Pmode);
-      cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
+      x = aarch64_gen_compare_reg_maybe_ze (EQ, rval, oldval, mode);
+      PUT_MODE (x, SImode);
     }
   else
     {
@@ -18529,13 +18531,13 @@ aarch64_expand_compare_and_swap (rtx operands[])
       emit_insn (GEN_FCN (code) (rval, mem, oldval, newval,
 				 is_weak, mod_s, mod_f));
       cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
+      x = gen_rtx_EQ (SImode, cc_reg, const0_rtx);
     }
 
   if (r_mode != mode)
     rval = gen_lowpart (mode, rval);
   emit_move_insn (operands[1], rval);
 
-  x = gen_rtx_EQ (SImode, cc_reg, const0_rtx);
   emit_insn (gen_rtx_SET (bval, x));
 }
 
@@ -18610,10 +18612,8 @@ aarch64_split_compare_and_swap (rtx operands[])
   if (strong_zero_p)
     x = gen_rtx_NE (VOIDmode, rval, const0_rtx);
   else
-    {
-      rtx cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
-      x = gen_rtx_NE (VOIDmode, cc_reg, const0_rtx);
-    }
+    x = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
+
   x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
 			    gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
   aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
@@ -18626,8 +18626,7 @@ aarch64_split_compare_and_swap (rtx operands[])
 	{
 	  /* Emit an explicit compare instruction, so that we can correctly
 	     track the condition codes.  */
-	  rtx cc_reg = aarch64_gen_compare_reg (NE, scratch, const0_rtx);
-	  x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
+	  x = aarch64_gen_compare_reg (NE, scratch, const0_rtx);
 	}
       else
 	x = gen_rtx_NE (VOIDmode, scratch, const0_rtx);
@@ -18722,8 +18721,7 @@ aarch64_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
     {
       /* Emit an explicit compare instruction, so that we can correctly
 	 track the condition codes.  */
-      rtx cc_reg = aarch64_gen_compare_reg (NE, cond, const0_rtx);
-      x = gen_rtx_NE (GET_MODE (cc_reg), cc_reg, const0_rtx);
+      x = aarch64_gen_compare_reg (NE, cond, const0_rtx);
     }
   else
     x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 24a11fb5040..69e099a2c23 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -4800,10 +4800,8 @@
     if (GP_REGNUM_P (REGNO (operands[0]))
 	&& GP_REGNUM_P (REGNO (operands[1])))
       {
-	machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
-	rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
-	rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
-	emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
+	rtx cmp = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
+	emit_insn (gen_cstoredi_neg (operands[0], cmp, XEXP (cmp, 0)));
 	DONE;
       }
     /* Otherwise, we expand to a similar pattern which does not
@@ -4863,10 +4861,8 @@
     if (GP_REGNUM_P (REGNO (operands[0]))
 	&& GP_REGNUM_P (REGNO (operands[1])))
       {
-	machine_mode mode = CCmode;
-	rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
-	rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
-	emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
+	rtx cmp = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
+	emit_insn (gen_cstoredi_neg (operands[0], cmp, XEXP (cmp, 0)));
 	DONE;
       }
     /* Otherwise, we expand to a similar pattern which does not
@@ -4936,10 +4932,8 @@
 	&& GP_REGNUM_P (REGNO (operands[1])))
       {
 	rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
-	machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
-	rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
-	rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
-	emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
+	rtx cmp = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
+	emit_insn (gen_cstoredi_neg (operands[0], cmp, XEXP (cmp, 0)));
 	DONE;
       }
     /* Otherwise, we expand to a similar pattern which does not
diff --git a/gcc/config/aarch64/aarch64-speculation.cc b/gcc/config/aarch64/aarch64-speculation.cc
index f490b64ae61..87d5964871b 100644
--- a/gcc/config/aarch64/aarch64-speculation.cc
+++ b/gcc/config/aarch64/aarch64-speculation.cc
@@ -162,9 +162,8 @@ aarch64_speculation_establish_tracker ()
   rtx sp = gen_rtx_REG (DImode, SP_REGNUM);
   rtx tracker = gen_rtx_REG (DImode, SPECULATION_TRACKER_REGNUM);
   start_sequence ();
-  rtx cc = aarch64_gen_compare_reg (EQ, sp, const0_rtx);
-  emit_insn (gen_cstoredi_neg (tracker,
-			       gen_rtx_NE (CCmode, cc, const0_rtx), cc));
+  rtx x = aarch64_gen_compare_reg (NE, sp, const0_rtx);
+  emit_insn (gen_cstoredi_neg (tracker, x, XEXP (x, 0)));
   rtx_insn *seq = get_insns ();
   end_sequence ();
   return seq;
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index fcc1ddafaec..29dfd6df30c 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -464,12 +464,12 @@
 			   (label_ref (match_operand 3 "" ""))
 			   (pc)))]
   ""
-  "
-  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1],
+{
+  operands[0] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1],
 					 operands[2]);
+  operands[1] = XEXP (operands[0], 0);
   operands[2] = const0_rtx;
-  "
-)
+})
 
 (define_expand "cbranch<mode>4"
   [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
@@ -478,12 +478,12 @@
 			   (label_ref (match_operand 3 "" ""))
 			   (pc)))]
   ""
-  "
-  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1],
+{
+  operands[0] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1],
 					 operands[2]);
+  operands[1] = XEXP (operands[0], 0);
   operands[2] = const0_rtx;
-  "
-)
+})
 
 (define_expand "cbranchcc4"
   [(set (pc) (if_then_else
@@ -598,9 +598,8 @@
     if (val == 2)
       {
 	rtx masked = gen_reg_rtx (<MODE>mode);
-	rtx ccreg = aarch64_gen_compare_reg (LT, operands[1], const0_rtx);
+	rtx x = aarch64_gen_compare_reg (LT, operands[1], const0_rtx);
 	emit_insn (gen_and<mode>3 (masked, operands[1], mask));
-	rtx x = gen_rtx_LT (VOIDmode, ccreg, const0_rtx);
 	emit_insn (gen_csneg3<mode>_insn (operands[0], x, masked, masked));
 	DONE;
       }
@@ -3671,8 +3670,7 @@
    (match_operand:GPI 1 "register_operand")]
   ""
   {
-    rtx ccreg = aarch64_gen_compare_reg (LT, operands[1], const0_rtx);
-    rtx x = gen_rtx_LT (VOIDmode, ccreg, const0_rtx);
+    rtx x = aarch64_gen_compare_reg (LT, operands[1], const0_rtx);
     emit_insn (gen_csneg3<mode>_insn (operands[0], x, operands[1], operands[1]));
     DONE;
   }
@@ -4086,12 +4084,13 @@
 	 [(match_operand:GPI 2 "register_operand")
 	  (match_operand:GPI 3 "aarch64_plus_operand")]))]
   ""
-  "
-  operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
-				      operands[3]);
+{
+  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
+				         operands[3]);
+  PUT_MODE (operands[1], SImode);
+  operands[2] = XEXP (operands[1], 0);
   operands[3] = const0_rtx;
-  "
-)
+})
 
 (define_expand "cstorecc4"
   [(set (match_operand:SI 0 "register_operand")
@@ -4099,11 +4098,10 @@
 	[(match_operand 2 "cc_register")
          (match_operand 3 "const0_operand")]))]
   ""
-"{
+{
   emit_insn (gen_rtx_SET (operands[0], operands[1]));
   DONE;
-}")
-
+})
 
 (define_expand "cstore<mode>4"
   [(set (match_operand:SI 0 "register_operand")
@@ -4111,12 +4109,13 @@
 	 [(match_operand:GPF 2 "register_operand")
 	  (match_operand:GPF 3 "aarch64_fp_compare_operand")]))]
   ""
-  "
-  operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
-				      operands[3]);
+{
+  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
+				         operands[3]);
+  PUT_MODE (operands[1], SImode);
+  operands[2] = XEXP (operands[1], 0);
   operands[3] = const0_rtx;
-  "
-)
+})
 
 (define_insn "aarch64_cstore<mode>"
   [(set (match_operand:ALLI 0 "register_operand" "=r")
@@ -4202,12 +4201,12 @@
 	 (match_operand:GPI 4 "register_operand")
 	 (match_operand:GPI 5 "register_operand")))]
   ""
-  "
-  operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
-				      operands[3]);
+{
+  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
+				         operands[3]);
+  operands[2] = XEXP (operands[1], 0);
   operands[3] = const0_rtx;
-  "
-)
+})
 
 (define_expand "cmov<mode>6"
   [(set (match_operand:GPF 0 "register_operand")
@@ -4218,12 +4217,12 @@
 	 (match_operand:GPF 4 "register_operand")
 	 (match_operand:GPF 5 "register_operand")))]
   ""
-  "
-  operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
-				      operands[3]);
+{
+  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
+				         operands[3]);
+  operands[2] = XEXP (operands[1], 0);
   operands[3] = const0_rtx;
-  "
-)
+})
 
 (define_insn "*cmov<mode>_insn"
   [(set (match_operand:ALLI 0 "register_operand" "=r,r,r,r,r,r,r")
@@ -4300,15 +4299,13 @@
 			   (match_operand:ALLI 3 "register_operand")))]
   ""
   {
-    rtx ccreg;
     enum rtx_code code = GET_CODE (operands[1]);
 
     if (code == UNEQ || code == LTGT)
       FAIL;
 
-    ccreg = aarch64_gen_compare_reg (code, XEXP (operands[1], 0),
-				     XEXP (operands[1], 1));
-    operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx);
+    operands[1] = aarch64_gen_compare_reg (code, XEXP (operands[1], 0),
+					   XEXP (operands[1], 1));
   }
 )
 
@@ -4319,15 +4316,13 @@
 			  (match_operand:GPF 3 "register_operand")))]
   ""
   {
-    rtx ccreg;
     enum rtx_code code = GET_CODE (operands[1]);
 
     if (code == UNEQ || code == LTGT)
       FAIL;
 
-    ccreg = aarch64_gen_compare_reg (code, XEXP (operands[1], 0),
-				  XEXP (operands[1], 1));
-    operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx);
+    operands[1] = aarch64_gen_compare_reg (code, XEXP (operands[1], 0),
+					   XEXP (operands[1], 1));
   }
 )
 
@@ -4338,15 +4333,13 @@
 			  (match_operand:GPF 3 "register_operand")))]
   ""
   {
-    rtx ccreg;
     enum rtx_code code = GET_CODE (operands[1]);
 
     if (code == UNEQ || code == LTGT)
       FAIL;
 
-    ccreg = aarch64_gen_compare_reg (code, XEXP (operands[1], 0),
-				  XEXP (operands[1], 1));
-    operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx);
+    operands[1] = aarch64_gen_compare_reg (code, XEXP (operands[1], 0),
+					   XEXP (operands[1], 1));
   }
 )
 
@@ -4357,15 +4350,13 @@
 			  (match_operand:GPI 3 "register_operand")))]
   ""
   {
-    rtx ccreg;
     enum rtx_code code = GET_CODE (operands[1]);
 
     if (code == UNEQ || code == LTGT)
       FAIL;
 
-    ccreg = aarch64_gen_compare_reg (code, XEXP (operands[1], 0),
-				      XEXP (operands[1], 1));
-    operands[1] = gen_rtx_fmt_ee (code, VOIDmode, ccreg, const0_rtx);
+    operands[1] = aarch64_gen_compare_reg (code, XEXP (operands[1], 0),
+					   XEXP (operands[1], 1));
   }
 )
 
@@ -4874,8 +4865,7 @@
    (match_operand:GPI 1 "register_operand")]
   ""
   {
-    rtx ccreg = aarch64_gen_compare_reg (EQ, operands[1], const0_rtx);
-    rtx x = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
+    rtx x = aarch64_gen_compare_reg (NE, operands[1], const0_rtx);
 
     emit_insn (gen_rbit<mode>2 (operands[0], operands[1]));
     emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
-- 
2.20.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 3/6] aarch64: Accept 0 as first argument to compares
  2020-03-19  6:47 [PATCH 0/6] aarch64: Implement TImode comparisons Richard Henderson
  2020-03-19  6:48 ` [PATCH 1/6] aarch64: Add ucmp_*_carryinC patterns for all usub_*_carryinC Richard Henderson
  2020-03-19  6:48 ` [PATCH 2/6] aarch64: Adjust result of aarch64_gen_compare_reg Richard Henderson
@ 2020-03-19  6:48 ` Richard Henderson
  2020-03-19  6:48 ` [PATCH 4/6] aarch64: Simplify @ccmp<cc_mode><mode> operands Richard Henderson
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Richard Henderson @ 2020-03-19  6:48 UTC (permalink / raw)
  To: gcc-patches
  Cc: richard.earnshaw, richard.sandiford, marcus.shawcroft, kyrylo.tkachov

While cmp (extended register) and cmp (immediate) uses <Wn|WSP>,
cmp (shifted register) uses <Wn>.  So we can perform cmp xzr, x0.

For ccmp, we only have <Wn> as an input.

	* config/aarch64/aarch64.md (cmp<GPI>): For operand 0, use
	aarch64_reg_or_zero.  Shuffle reg/reg to last alternative
	and accept Z.
	(@ccmpcc<GPI>): For operand 0, use aarch64_reg_or_zero and Z.
	(@ccmpcc<GPI>_rev): Likewise.
---
 gcc/config/aarch64/aarch64.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 29dfd6df30c..0fe41117640 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -502,7 +502,7 @@
 	   [(match_operand 0 "cc_register" "")
 	    (const_int 0)])
 	  (compare:CC_ONLY
-	    (match_operand:GPI 2 "register_operand" "r,r,r")
+	    (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ,rZ,rZ")
 	    (match_operand:GPI 3 "aarch64_ccmp_operand" "r,Uss,Usn"))
 	  (unspec:CC_ONLY
 	    [(match_operand 5 "immediate_operand")]
@@ -542,7 +542,7 @@
 	    [(match_operand 5 "immediate_operand")]
 	    UNSPEC_NZCV)
 	  (compare:CC_ONLY
-	    (match_operand:GPI 2 "register_operand" "r,r,r")
+	    (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ,rZ,rZ")
 	    (match_operand:GPI 3 "aarch64_ccmp_operand" "r,Uss,Usn"))))]
   ""
   "@
@@ -4009,14 +4009,14 @@
 
 (define_insn "cmp<mode>"
   [(set (reg:CC CC_REGNUM)
-	(compare:CC (match_operand:GPI 0 "register_operand" "rk,rk,rk")
-		    (match_operand:GPI 1 "aarch64_plus_operand" "r,I,J")))]
+	(compare:CC (match_operand:GPI 0 "aarch64_reg_or_zero" "rk,rk,rkZ")
+		    (match_operand:GPI 1 "aarch64_plus_operand" "I,J,rZ")))]
   ""
   "@
-   cmp\\t%<w>0, %<w>1
    cmp\\t%<w>0, %1
-   cmn\\t%<w>0, #%n1"
-  [(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
+   cmn\\t%<w>0, #%n1
+   cmp\\t%<w>0, %<w>1"
+  [(set_attr "type" "alus_imm,alus_imm,alus_sreg")]
 )
 
 (define_insn "fcmp<mode>"
-- 
2.20.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 4/6] aarch64: Simplify @ccmp<cc_mode><mode> operands
  2020-03-19  6:47 [PATCH 0/6] aarch64: Implement TImode comparisons Richard Henderson
                   ` (2 preceding siblings ...)
  2020-03-19  6:48 ` [PATCH 3/6] aarch64: Accept 0 as first argument to compares Richard Henderson
@ 2020-03-19  6:48 ` Richard Henderson
  2020-03-19  6:48 ` [PATCH 5/6] aarch64: Improve nzcv argument to ccmp Richard Henderson
  2020-03-19  6:48 ` [PATCH 6/6] aarch64: Implement TImode comparisons Richard Henderson
  5 siblings, 0 replies; 7+ messages in thread
From: Richard Henderson @ 2020-03-19  6:48 UTC (permalink / raw)
  To: gcc-patches
  Cc: richard.earnshaw, richard.sandiford, marcus.shawcroft, kyrylo.tkachov

The first two arguments were "reversed", in that operand 0 was not
the output, but the input cc_reg.  Remove operand 0 entirely, since
we can get the input cc_reg from within the operand 3 comparison
expression.  This moves the output operand to index 0.

	* config/aarch64/aarch64.md (@ccmpcc<GPI>): New expander; remove
	operand 0; change operand 3 from match_operator to match_operand.
	(*ccmpcc<GPI>): Rename from @ccmp; swap numbers of operand 0 & 1.
	(@ccmp<CCFP_CCFPE><GPF>, *ccmp<CCFP_CCFPE><GPF>): Likewise.
	(@ccmpcc<GPI>_rev, *ccmpcc<GPI>_rev): Likewise.
	(@ccmp<CCFP_CCFPE><GPF>_rev, *ccmp<CCFP_CCFPE><GPF>_rev): Likewise.
	* config/aarch64/aarch64.c (aarch64_gen_compare_reg): Update to match.
	(aarch64_gen_ccmp_next): Likewise.
---
 gcc/config/aarch64/aarch64.c  | 21 +++++-----
 gcc/config/aarch64/aarch64.md | 76 +++++++++++++++++++++++++++++------
 2 files changed, 74 insertions(+), 23 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 619357fa210..16ff40fc267 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2349,7 +2349,7 @@ aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
 
       rtx x_hi = operand_subword (x, 1, 0, TImode);
       rtx y_hi = operand_subword (y, 1, 0, TImode);
-      emit_insn (gen_ccmpccdi (cc_reg, cc_reg, x_hi, y_hi,
+      emit_insn (gen_ccmpccdi (cc_reg, x_hi, y_hi,
 			       gen_rtx_EQ (cc_mode, cc_reg, const0_rtx),
 			       GEN_INT (AARCH64_EQ)));
     }
@@ -20445,7 +20445,7 @@ aarch64_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
   machine_mode op_mode, cmp_mode, cc_mode = CCmode;
   int unsignedp = TYPE_UNSIGNED (TREE_TYPE (treeop0));
   insn_code icode;
-  struct expand_operand ops[6];
+  struct expand_operand ops[5];
   int aarch64_cond;
 
   push_to_sequence (*prep_seq);
@@ -20484,8 +20484,8 @@ aarch64_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
 
   icode = code_for_ccmp (cc_mode, cmp_mode);
 
-  op0 = prepare_operand (icode, op0, 2, op_mode, cmp_mode, unsignedp);
-  op1 = prepare_operand (icode, op1, 3, op_mode, cmp_mode, unsignedp);
+  op0 = prepare_operand (icode, op0, 1, op_mode, cmp_mode, unsignedp);
+  op1 = prepare_operand (icode, op1, 2, op_mode, cmp_mode, unsignedp);
   if (!op0 || !op1)
     {
       end_sequence ();
@@ -20517,15 +20517,14 @@ aarch64_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
       aarch64_cond = AARCH64_INVERSE_CONDITION_CODE (aarch64_cond);
     }
 
-  create_fixed_operand (&ops[0], XEXP (prev, 0));
-  create_fixed_operand (&ops[1], target);
-  create_fixed_operand (&ops[2], op0);
-  create_fixed_operand (&ops[3], op1);
-  create_fixed_operand (&ops[4], prev);
-  create_fixed_operand (&ops[5], GEN_INT (aarch64_cond));
+  create_fixed_operand (&ops[0], target);
+  create_fixed_operand (&ops[1], op0);
+  create_fixed_operand (&ops[2], op1);
+  create_fixed_operand (&ops[3], prev);
+  create_fixed_operand (&ops[4], GEN_INT (aarch64_cond));
 
   push_to_sequence (*gen_seq);
-  if (!maybe_expand_insn (icode, 6, ops))
+  if (!maybe_expand_insn (icode, 5, ops))
     {
       end_sequence ();
       return NULL_RTX;
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 0fe41117640..12213176103 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -495,11 +495,24 @@
   ""
   "")
 
-(define_insn "@ccmp<CC_ONLY:mode><GPI:mode>"
-  [(set (match_operand:CC_ONLY 1 "cc_register" "")
+(define_expand "@ccmp<CC_ONLY:mode><GPI:mode>"
+  [(set (match_operand:CC_ONLY 0 "cc_register")
+	(if_then_else:CC_ONLY
+	  (match_operand 3 "aarch64_comparison_operator")
+	  (compare:CC_ONLY
+	    (match_operand:GPI 1 "aarch64_reg_or_zero")
+	    (match_operand:GPI 2 "aarch64_ccmp_operand"))
+	  (unspec:CC_ONLY
+	    [(match_operand 4 "immediate_operand")]
+	    UNSPEC_NZCV)))]
+  ""
+)
+
+(define_insn "*ccmp<CC_ONLY:mode><GPI:mode>"
+  [(set (match_operand:CC_ONLY 0 "cc_register" "")
 	(if_then_else:CC_ONLY
 	  (match_operator 4 "aarch64_comparison_operator"
-	   [(match_operand 0 "cc_register" "")
+	   [(match_operand 1 "cc_register" "")
 	    (const_int 0)])
 	  (compare:CC_ONLY
 	    (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ,rZ,rZ")
@@ -515,11 +528,24 @@
   [(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
 )
 
-(define_insn "@ccmp<CCFP_CCFPE:mode><GPF:mode>"
-  [(set (match_operand:CCFP_CCFPE 1 "cc_register" "")
+(define_expand "@ccmp<CCFP_CCFPE:mode><GPF:mode>"
+  [(set (match_operand:CCFP_CCFPE 0 "cc_register")
+	(if_then_else:CCFP_CCFPE
+	  (match_operand 3 "aarch64_comparison_operator")
+	  (compare:CCFP_CCFPE
+	    (match_operand:GPF 1 "register_operand")
+	    (match_operand:GPF 2 "register_operand"))
+	  (unspec:CCFP_CCFPE
+	    [(match_operand 4 "immediate_operand")]
+	    UNSPEC_NZCV)))]
+  ""
+)
+
+(define_insn "*ccmp<CCFP_CCFPE:mode><GPF:mode>"
+  [(set (match_operand:CCFP_CCFPE 0 "cc_register" "")
 	(if_then_else:CCFP_CCFPE
 	  (match_operator 4 "aarch64_comparison_operator"
-	   [(match_operand 0 "cc_register" "")
+	   [(match_operand 1 "cc_register" "")
 	    (const_int 0)])
 	  (compare:CCFP_CCFPE
 	    (match_operand:GPF 2 "register_operand" "w")
@@ -532,11 +558,24 @@
   [(set_attr "type" "fccmp<s>")]
 )
 
-(define_insn "@ccmp<CC_ONLY:mode><GPI:mode>_rev"
-  [(set (match_operand:CC_ONLY 1 "cc_register" "")
+(define_expand "@ccmp<CC_ONLY:mode><GPI:mode>_rev"
+  [(set (match_operand:CC_ONLY 0 "cc_register")
+	(if_then_else:CC_ONLY
+	  (match_operand 3 "aarch64_comparison_operator")
+	  (unspec:CC_ONLY
+	    [(match_operand 4 "immediate_operand")]
+	    UNSPEC_NZCV)
+	  (compare:CC_ONLY
+	    (match_operand:GPI 1 "aarch64_reg_or_zero")
+	    (match_operand:GPI 2 "aarch64_ccmp_operand"))))]
+  ""
+)
+
+(define_insn "*ccmp<CC_ONLY:mode><GPI:mode>_rev"
+  [(set (match_operand:CC_ONLY 0 "cc_register" "")
 	(if_then_else:CC_ONLY
 	  (match_operator 4 "aarch64_comparison_operator"
-	   [(match_operand 0 "cc_register" "")
+	   [(match_operand 1 "cc_register" "")
 	    (const_int 0)])
 	  (unspec:CC_ONLY
 	    [(match_operand 5 "immediate_operand")]
@@ -552,11 +591,24 @@
   [(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
 )
 
-(define_insn "@ccmp<CCFP_CCFPE:mode><GPF:mode>_rev"
-  [(set (match_operand:CCFP_CCFPE 1 "cc_register" "")
+(define_expand "@ccmp<CCFP_CCFPE:mode><GPF:mode>_rev"
+  [(set (match_operand:CCFP_CCFPE 0 "cc_register")
+	(if_then_else:CCFP_CCFPE
+	  (match_operand 3 "aarch64_comparison_operator")
+	  (unspec:CCFP_CCFPE
+	    [(match_operand 4 "immediate_operand")]
+	    UNSPEC_NZCV)
+	  (compare:CCFP_CCFPE
+	    (match_operand:GPF 1 "register_operand")
+	    (match_operand:GPF 2 "register_operand"))))]
+  ""
+)
+
+(define_insn "*ccmp<CCFP_CCFPE:mode><GPF:mode>_rev"
+  [(set (match_operand:CCFP_CCFPE 0 "cc_register" "")
 	(if_then_else:CCFP_CCFPE
 	  (match_operator 4 "aarch64_comparison_operator"
-	   [(match_operand 0 "cc_register" "")
+	   [(match_operand 1 "cc_register" "")
 	    (const_int 0)])
 	  (unspec:CCFP_CCFPE
 	    [(match_operand 5 "immediate_operand")]
-- 
2.20.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 5/6] aarch64: Improve nzcv argument to ccmp
  2020-03-19  6:47 [PATCH 0/6] aarch64: Implement TImode comparisons Richard Henderson
                   ` (3 preceding siblings ...)
  2020-03-19  6:48 ` [PATCH 4/6] aarch64: Simplify @ccmp<cc_mode><mode> operands Richard Henderson
@ 2020-03-19  6:48 ` Richard Henderson
  2020-03-19  6:48 ` [PATCH 6/6] aarch64: Implement TImode comparisons Richard Henderson
  5 siblings, 0 replies; 7+ messages in thread
From: Richard Henderson @ 2020-03-19  6:48 UTC (permalink / raw)
  To: gcc-patches
  Cc: richard.earnshaw, richard.sandiford, marcus.shawcroft, kyrylo.tkachov

Currently we use %k to interpret an aarch64_cond_code value.
This interpretation is done via an array, aarch64_nzcv_codes.
The rtl is neither hindered nor harmed by using the proper
nzcv value itself, so index the array earlier than later.
This makes it easier to compare the rtl to the assembly.

It is slightly confusing in that aarch64_nzcv_codes has
values of nzcv which produce the inverse of the code that
is the index.  Invert those values.

	* config/aarch64/aarch64.c (AARCH64_CC_{NZCV}): Move up.
	(aarch64_nzcv_codes): Move up; reverse values of even/odd entries.
	(aarch64_gen_compare_reg): Use aarch64_nzcv_codes in
	gen_ccmpccdi generation.
	(aarch64_print_operand): Remove case 'k'.
	(aarch64_gen_ccmp_next): Invert condition for !AND, remove
	inversion for AND; use aarch64_nzcv_codes.
	* config/aarch64/aarch64.md (*ccmpcc<GPI>): Remove %k from
	all alternatives.
	(*ccmpcc<GPI>_rev, *ccmp<CCFP><GPF>, *ccmp<CCFP><GPF>_rev): Likewise.
---
 gcc/config/aarch64/aarch64.c  | 81 +++++++++++++++--------------------
 gcc/config/aarch64/aarch64.md | 16 +++----
 2 files changed, 42 insertions(+), 55 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 16ff40fc267..d7899dad759 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1270,6 +1270,36 @@ aarch64_cc;
 
 #define AARCH64_INVERSE_CONDITION_CODE(X) ((aarch64_cc) (((int) X) ^ 1))
 
+/* N Z C V.  */
+#define AARCH64_CC_V 1
+#define AARCH64_CC_C (1 << 1)
+#define AARCH64_CC_Z (1 << 2)
+#define AARCH64_CC_N (1 << 3)
+
+/*
+ * N Z C V flags for ccmp.  Indexed by aarch64_cond_code.
+ * These are the flags to make the given code be *true*.
+ */
+static const int aarch64_nzcv_codes[] =
+{
+  AARCH64_CC_Z,	/* EQ, Z == 1.  */
+  0,		/* NE, Z == 0.  */
+  AARCH64_CC_C,	/* CS, C == 1.  */
+  0,		/* CC, C == 0.  */
+  AARCH64_CC_N,	/* MI, N == 1.  */
+  0,		/* PL, N == 0.  */
+  AARCH64_CC_V,	/* VS, V == 1.  */
+  0,		/* VC, V == 0.  */
+  AARCH64_CC_C,	/* HI, C == 1 && Z == 0.  */
+  0,		/* LS, !(C == 1 && Z == 0).  */
+  0,		/* GE, N == V.  */
+  AARCH64_CC_V,	/* LT, N != V.  */
+  0,		/* GT, Z == 0 && N == V.  */
+  AARCH64_CC_V,	/* LE, !(Z == 0 && N == V).  */
+  0,		/* AL, Any.  */
+  0		/* NV, Any.  */
+};
+
 struct aarch64_branch_protect_type
 {
   /* The type's name that the user passes to the branch-protection option
@@ -2351,7 +2381,7 @@ aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
       rtx y_hi = operand_subword (y, 1, 0, TImode);
       emit_insn (gen_ccmpccdi (cc_reg, x_hi, y_hi,
 			       gen_rtx_EQ (cc_mode, cc_reg, const0_rtx),
-			       GEN_INT (AARCH64_EQ)));
+			       GEN_INT (aarch64_nzcv_codes[AARCH64_NE])));
     }
   else
     {
@@ -9302,33 +9332,6 @@ aarch64_const_vec_all_in_range_p (rtx vec,
   return true;
 }
 
-/* N Z C V.  */
-#define AARCH64_CC_V 1
-#define AARCH64_CC_C (1 << 1)
-#define AARCH64_CC_Z (1 << 2)
-#define AARCH64_CC_N (1 << 3)
-
-/* N Z C V flags for ccmp.  Indexed by AARCH64_COND_CODE.  */
-static const int aarch64_nzcv_codes[] =
-{
-  0,		/* EQ, Z == 1.  */
-  AARCH64_CC_Z,	/* NE, Z == 0.  */
-  0,		/* CS, C == 1.  */
-  AARCH64_CC_C,	/* CC, C == 0.  */
-  0,		/* MI, N == 1.  */
-  AARCH64_CC_N, /* PL, N == 0.  */
-  0,		/* VS, V == 1.  */
-  AARCH64_CC_V, /* VC, V == 0.  */
-  0,		/* HI, C ==1 && Z == 0.  */
-  AARCH64_CC_C,	/* LS, !(C == 1 && Z == 0).  */
-  AARCH64_CC_V,	/* GE, N == V.  */
-  0,		/* LT, N != V.  */
-  AARCH64_CC_Z, /* GT, Z == 0 && N == V.  */
-  0,		/* LE, !(Z == 0 && N == V).  */
-  0,		/* AL, Any.  */
-  0		/* NV, Any.  */
-};
-
 /* Print floating-point vector immediate operand X to F, negating it
    first if NEGATE is true.  Return true on success, false if it isn't
    a constant we can handle.  */
@@ -9416,7 +9419,6 @@ sizetochar (int size)
 			(32-bit or 64-bit).
      '0':		Print a normal operand, if it's a general register,
 			then we assume DImode.
-     'k':		Print NZCV for conditional compare instructions.
      'A':		Output address constant representing the first
 			argument of X, specifying a relocation offset
 			if appropriate.
@@ -9866,22 +9868,6 @@ aarch64_print_operand (FILE *f, rtx x, int code)
       output_addr_const (asm_out_file, x);
       break;
 
-    case 'k':
-      {
-	HOST_WIDE_INT cond_code;
-
-	if (!CONST_INT_P (x))
-	  {
-	    output_operand_lossage ("invalid operand for '%%%c'", code);
-	    return;
-	  }
-
-	cond_code = INTVAL (x);
-	gcc_assert (cond_code >= 0 && cond_code <= AARCH64_NV);
-	asm_fprintf (f, "%d", aarch64_nzcv_codes[cond_code]);
-      }
-      break;
-
     case 'y':
     case 'z':
       {
@@ -20514,14 +20500,15 @@ aarch64_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
 	  rtx_code code = reverse_condition (prev_code);
 	  prev = gen_rtx_fmt_ee (code, VOIDmode, XEXP (prev, 0), const0_rtx);
 	}
-      aarch64_cond = AARCH64_INVERSE_CONDITION_CODE (aarch64_cond);
     }
+  else
+    aarch64_cond = AARCH64_INVERSE_CONDITION_CODE (aarch64_cond);
 
   create_fixed_operand (&ops[0], target);
   create_fixed_operand (&ops[1], op0);
   create_fixed_operand (&ops[2], op1);
   create_fixed_operand (&ops[3], prev);
-  create_fixed_operand (&ops[4], GEN_INT (aarch64_cond));
+  create_fixed_operand (&ops[4], GEN_INT (aarch64_nzcv_codes[aarch64_cond]));
 
   push_to_sequence (*gen_seq);
   if (!maybe_expand_insn (icode, 5, ops))
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 12213176103..c789b641e7c 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -522,9 +522,9 @@
 	    UNSPEC_NZCV)))]
   ""
   "@
-   ccmp\\t%<w>2, %<w>3, %k5, %m4
-   ccmp\\t%<w>2, %3, %k5, %m4
-   ccmn\\t%<w>2, #%n3, %k5, %m4"
+   ccmp\\t%<w>2, %<w>3, %5, %m4
+   ccmp\\t%<w>2, %3, %5, %m4
+   ccmn\\t%<w>2, #%n3, %5, %m4"
   [(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
 )
 
@@ -554,7 +554,7 @@
 	    [(match_operand 5 "immediate_operand")]
 	    UNSPEC_NZCV)))]
   "TARGET_FLOAT"
-  "fccmp<e>\\t%<s>2, %<s>3, %k5, %m4"
+  "fccmp<e>\\t%<s>2, %<s>3, %5, %m4"
   [(set_attr "type" "fccmp<s>")]
 )
 
@@ -585,9 +585,9 @@
 	    (match_operand:GPI 3 "aarch64_ccmp_operand" "r,Uss,Usn"))))]
   ""
   "@
-   ccmp\\t%<w>2, %<w>3, %k5, %M4
-   ccmp\\t%<w>2, %3, %k5, %M4
-   ccmn\\t%<w>2, #%n3, %k5, %M4"
+   ccmp\\t%<w>2, %<w>3, %5, %M4
+   ccmp\\t%<w>2, %3, %5, %M4
+   ccmn\\t%<w>2, #%n3, %5, %M4"
   [(set_attr "type" "alus_sreg,alus_imm,alus_imm")]
 )
 
@@ -617,7 +617,7 @@
 	    (match_operand:GPF 2 "register_operand" "w")
 	    (match_operand:GPF 3 "register_operand" "w"))))]
   "TARGET_FLOAT"
-  "fccmp<e>\\t%<s>2, %<s>3, %k5, %M4"
+  "fccmp<e>\\t%<s>2, %<s>3, %5, %M4"
   [(set_attr "type" "fccmp<s>")]
 )
 
-- 
2.20.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 6/6] aarch64: Implement TImode comparisons
  2020-03-19  6:47 [PATCH 0/6] aarch64: Implement TImode comparisons Richard Henderson
                   ` (4 preceding siblings ...)
  2020-03-19  6:48 ` [PATCH 5/6] aarch64: Improve nzcv argument to ccmp Richard Henderson
@ 2020-03-19  6:48 ` Richard Henderson
  5 siblings, 0 replies; 7+ messages in thread
From: Richard Henderson @ 2020-03-19  6:48 UTC (permalink / raw)
  To: gcc-patches
  Cc: richard.earnshaw, richard.sandiford, marcus.shawcroft, kyrylo.tkachov

Use ccmp to perform all TImode comparisons branchless.

	* config/aarch64/aarch64.c (aarch64_gen_compare_reg): Expand all of
	the comparisons for TImode, not just NE.
	* config/aarch64/aarch64.md (cbranchti4, cstoreti4): New.
---
 gcc/config/aarch64/aarch64.c  | 182 +++++++++++++++++++++++++++++++---
 gcc/config/aarch64/aarch64.md |  28 ++++++
 2 files changed, 196 insertions(+), 14 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index d7899dad759..911dc1c91cd 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -2363,32 +2363,186 @@ rtx
 aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y)
 {
   machine_mode cmp_mode = GET_MODE (x);
-  machine_mode cc_mode;
   rtx cc_reg;
 
   if (cmp_mode == TImode)
     {
-      gcc_assert (code == NE);
-
-      cc_mode = CCmode;
-      cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
-
       rtx x_lo = operand_subword (x, 0, 0, TImode);
-      rtx y_lo = operand_subword (y, 0, 0, TImode);
-      emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x_lo, y_lo));
-
       rtx x_hi = operand_subword (x, 1, 0, TImode);
-      rtx y_hi = operand_subword (y, 1, 0, TImode);
-      emit_insn (gen_ccmpccdi (cc_reg, x_hi, y_hi,
-			       gen_rtx_EQ (cc_mode, cc_reg, const0_rtx),
-			       GEN_INT (aarch64_nzcv_codes[AARCH64_NE])));
+      rtx y_lo, y_hi, tmp;
+
+      if (y == const0_rtx)
+	{
+	  y_lo = y_hi = y;
+	  switch (code)
+	    {
+	    case EQ:
+	    case NE:
+	      /* For equality, IOR the two halves together.  If this gets
+		 used for a branch, we expect this to fold to cbz/cbnz;
+		 otherwise it's no larger than cmp+ccmp below.  Beware of
+		 the compare-and-swap post-reload split and use cmp+ccmp.  */
+	      if (!can_create_pseudo_p ())
+		break;
+	      tmp = gen_reg_rtx (DImode);
+	      emit_insn (gen_iordi3 (tmp, x_hi, x_lo));
+	      emit_insn (gen_cmpdi (tmp, const0_rtx));
+	      cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
+	      goto done;
+
+	    case LT:
+	    case GE:
+	      /* Check only the sign bit.  Choose to expose this detail,
+		 lest something later tries to use a COMPARE in a way
+		 that doesn't correspond.  This is "tst".  */
+	      cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
+	      tmp = gen_rtx_AND (DImode, x_hi, GEN_INT (INT64_MIN));
+	      tmp = gen_rtx_COMPARE (CC_NZmode, tmp, const0_rtx);
+	      emit_set_insn (cc_reg, tmp);
+	      code = (code == LT ? NE : EQ);
+	      goto done;
+
+	    case LE:
+	    case GT:
+	      /* For GT, (x_hi >= 0) && ((x_hi | x_lo) != 0),
+		 and of course the inverse for LE.  */
+	      emit_insn (gen_cmpdi (x_hi, const0_rtx));
+
+	      tmp = gen_reg_rtx (DImode);
+	      emit_insn (gen_iordi3 (tmp, x_hi, x_lo));
+
+	      /* Combine the two terms:
+		 (GE ? (compare tmp 0) : EQ),
+		 so that the whole term is true for NE, false for EQ.  */
+	      cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
+	      emit_insn (gen_ccmpccdi
+			 (cc_reg, tmp, const0_rtx,
+			  gen_rtx_GE (VOIDmode, cc_reg, const0_rtx),
+			  GEN_INT (aarch64_nzcv_codes[AARCH64_EQ])));
+
+	      /* The result is entirely within the Z bit. */
+	      code = (code == GT ? NE : EQ);
+	      goto done;
+
+	    default:
+	      break;
+	    }
+	}
+      else
+	{
+	  y_lo = operand_subword (y, 0, 0, TImode);
+	  y_hi = operand_subword (y, 1, 0, TImode);
+	}
+
+      cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
+      switch (code)
+	{
+	case EQ:
+	case NE:
+	  /* For EQ, (x_lo == y_lo) && (x_hi == y_hi).  */
+	  emit_insn (gen_cmpdi (x_lo, y_lo));
+	  emit_insn (gen_ccmpccdi (cc_reg, x_hi, y_hi,
+				   gen_rtx_EQ (VOIDmode, cc_reg, const0_rtx),
+				   GEN_INT (aarch64_nzcv_codes[AARCH64_NE])));
+	  break;
+
+	case LEU:
+	case GTU:
+	  std::swap (x_lo, y_lo);
+	  std::swap (x_hi, y_hi);
+	  code = swap_condition (code);
+	  /* fall through */
+
+	case LTU:
+	case GEU:
+	  /* For LTU, (x - y), as double-word arithmetic.  */
+	  emit_insn (gen_cmpdi (x_lo, y_lo));
+	  /* The ucmp*_carryinC pattern uses zero_extend, and so cannot
+	     take the constant 0 we allow elsewhere.  Force to reg now
+	     and allow combine to eliminate via simplification.  */
+	  x_hi = force_reg (DImode, x_hi);
+	  y_hi = force_reg (DImode, y_hi);
+	  emit_insn (gen_ucmpdi3_carryinC(x_hi, y_hi));
+	  /* The result is entirely within the C bit. */
+	  break;
+
+	case LE:
+	case GT:
+	  /*
+	   * For LE,
+	   *    !((x_hi > y_hi) || (x_hi == y_hi && x_lo > y_lo))
+	   * -> !(x_hi > y_hi) && !(x_hi == y_hi && x_lo > y_lo)
+	   * -> (x_hi <= y_hi) && !(x_hi == y_hi && x_lo > y_lo)
+	   */
+
+	  /* Compute the first term (x_hi <= y_hi) and save it in tmp. */
+	  tmp = gen_reg_rtx (SImode);
+	  emit_insn (gen_cmpdi (x_hi, y_hi));
+	  emit_set_insn (tmp, gen_rtx_LE (SImode, cc_reg, const0_rtx));
+
+	  /* Compute the second term (x_hi == y_hi && x_lo > y_lo):
+	     (EQ ? (compare x_lo y_lo) : LE),
+	     so that the whole term is true for GT, false for LE.  */
+	  emit_insn (gen_ccmpccdi (cc_reg, x_lo, y_lo,
+				   gen_rtx_EQ (VOIDmode, cc_reg, const0_rtx),
+				   GEN_INT (aarch64_nzcv_codes[AARCH64_LE])));
+
+	  /* Combine the two terms.  Since we want !(second_term):
+	     (LE ? (compare tmp 0) : EQ),
+	     so that the whole term is true for NE, false for EQ.  */
+	  emit_insn (gen_ccmpccsi (cc_reg, tmp, const0_rtx,
+				   gen_rtx_LE (VOIDmode, cc_reg, const0_rtx),
+				   GEN_INT (aarch64_nzcv_codes[AARCH64_EQ])));
+
+	  /* The result is entirely within the Z bit. */
+	  code = (code == GE ? NE : EQ);
+	  break;
+
+	case LT:
+	case GE:
+	  /*
+	   * For GE,
+	   *    !((x_hi < y_hi) || (x_hi == y_hi && x_lo < y_lo))
+	   * -> !(x_hi < y_hi) && !(x_hi == y_hi && x_lo < y_lo)
+	   * -> (x_hi >= y_hi) && !(x_hi == y_hi && x_lo < y_lo)
+	   * and of course the inverse for LT.
+	   */
+
+	  /* Compute the first term (x_hi >= y_hi) and save it in tmp. */
+	  tmp = gen_reg_rtx (SImode);
+	  emit_insn (gen_cmpdi (x_hi, y_hi));
+	  emit_set_insn (tmp, gen_rtx_GE (SImode, cc_reg, const0_rtx));
+
+	  /* Compute the second term (x_hi == y_hi && x_lo < y_lo):
+	     (EQ ? (compare x_lo y_lo) : GE),
+	     so that the whole term is true for LT, false for GE.  */
+	  emit_insn (gen_ccmpccdi (cc_reg, x_lo, y_lo,
+				   gen_rtx_EQ (VOIDmode, cc_reg, const0_rtx),
+				   GEN_INT (aarch64_nzcv_codes[AARCH64_GE])));
+
+	  /* Combine the two terms.  Since we want !(second_term):
+	     (GE ? (compare tmp 0) : EQ),
+	     so that the whole term is true for NE, false for EQ.  */
+	  emit_insn (gen_ccmpccsi (cc_reg, tmp, const0_rtx,
+				   gen_rtx_GE (VOIDmode, cc_reg, const0_rtx),
+				   GEN_INT (aarch64_nzcv_codes[AARCH64_EQ])));
+
+	  /* The result is entirely within the Z bit. */
+	  code = (code == GE ? NE : EQ);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
     }
   else
     {
-      cc_mode = SELECT_CC_MODE (code, x, y);
+      machine_mode cc_mode = SELECT_CC_MODE (code, x, y);
       cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM);
       emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x, y));
     }
+
+ done:
   return gen_rtx_fmt_ee (code, VOIDmode, cc_reg, const0_rtx);
 }
 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index c789b641e7c..fb076b60e3c 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -471,6 +471,20 @@
   operands[2] = const0_rtx;
 })
 
+(define_expand "cbranchti4"
+  [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
+			    [(match_operand:TI 1 "register_operand")
+			     (match_operand:TI 2 "aarch64_reg_or_zero")])
+			   (label_ref (match_operand 3 "" ""))
+			   (pc)))]
+  ""
+{
+  operands[0] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1],
+					 operands[2]);
+  operands[1] = XEXP (operands[0], 0);
+  operands[2] = const0_rtx;
+})
+
 (define_expand "cbranch<mode>4"
   [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
 			    [(match_operand:GPF 1 "register_operand")
@@ -4144,6 +4158,20 @@
   operands[3] = const0_rtx;
 })
 
+(define_expand "cstoreti4"
+  [(set (match_operand:SI 0 "register_operand")
+	(match_operator:SI 1 "aarch64_comparison_operator"
+	 [(match_operand:TI 2 "register_operand")
+	  (match_operand:TI 3 "aarch64_reg_or_zero")]))]
+  ""
+{
+  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
+				         operands[3]);
+  PUT_MODE (operands[1], SImode);
+  operands[2] = XEXP (operands[1], 0);
+  operands[3] = const0_rtx;
+})
+
 (define_expand "cstorecc4"
   [(set (match_operand:SI 0 "register_operand")
        (match_operator 1 "aarch64_comparison_operator_mode"
-- 
2.20.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2020-03-19  6:48 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-03-19  6:47 [PATCH 0/6] aarch64: Implement TImode comparisons Richard Henderson
2020-03-19  6:48 ` [PATCH 1/6] aarch64: Add ucmp_*_carryinC patterns for all usub_*_carryinC Richard Henderson
2020-03-19  6:48 ` [PATCH 2/6] aarch64: Adjust result of aarch64_gen_compare_reg Richard Henderson
2020-03-19  6:48 ` [PATCH 3/6] aarch64: Accept 0 as first argument to compares Richard Henderson
2020-03-19  6:48 ` [PATCH 4/6] aarch64: Simplify @ccmp<cc_mode><mode> operands Richard Henderson
2020-03-19  6:48 ` [PATCH 5/6] aarch64: Improve nzcv argument to ccmp Richard Henderson
2020-03-19  6:48 ` [PATCH 6/6] aarch64: Implement TImode comparisons Richard Henderson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).