[PATCH, aarch64 3/4] aarch64: Add movprfx alternatives for predicate patterns

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH, aarch64 3/4] aarch64: Add movprfx alternatives for predicate patterns
  2018-07-02  3:41 [PATCH, aarch64 0/4] Add movprfx patterns and alternatives Richard Henderson
  2018-07-02  3:41 ` [PATCH, aarch64 2/4] aarch64: Remove predicate from inside SVE_COND_FP_BINARY Richard Henderson
  2018-07-02  3:41 ` [PATCH, aarch64 4/4] aarch64: Add movprfx patterns for zero and unmatched select Richard Henderson
@ 2018-07-02  3:41 ` Richard Henderson
  2018-07-02 11:55   ` Richard Sandiford
  2018-07-02  3:41 ` [PATCH, aarch64 1/4] aarch64: Add movprfx alternatives for unpredicated patterns Richard Henderson
  3 siblings, 1 reply; 10+ messages in thread
From: Richard Henderson @ 2018-07-02  3:41 UTC (permalink / raw)
  To: gcc-patches; +Cc: richard.sandiford, marcus.shawcroft, james.greenhalgh

	* config/aarch64/iterators.md (SVE_INT_BINARY_REV): Remove.
	(SVE_COND_FP_BINARY_REV): Remove.
	(sve_int_op_rev, sve_fp_op_rev): New.
	* config/aarch64/aarch64-sve.md (*cond_<SVE_INT_BINARY><SVE_I>_0): New.
	(*cond_<SVE_INT_BINARY_SD><SVE_SDI>_0): New.
	(*cond_<SVE_COND_FP_BINARY><SVE_F>_0): New.
	(*cond_<SVE_INT_BINARY><SVE_I>_2): Rename, add movprfx alternative.
	(*cond_<SVE_INT_BINARY_SD><SVE_SDI>_2): Similarly.
	(*cond_<SVE_COND_FP_BINARY><SVE_F>_2): Similarly.
	(*cond_<SVE_INT_BINARY><SVE_I>_3): Similarly; use sve_int_op_rev.
	(*cond_<SVE_INT_BINARY_SD><SVE_SDI>_3): Similarly.
	(*cond_<SVE_COND_FP_BINARY><SVE_F>_3): Similarly; use sve_fp_op_rev.
---
 gcc/config/aarch64/aarch64.c      |   8 +-
 gcc/config/aarch64/aarch64-sve.md | 163 ++++++++++++++++++++++--------
 gcc/config/aarch64/iterators.md   |  26 ++++-
 3 files changed, 149 insertions(+), 48 deletions(-)

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index b88e7cac27a..3af7e98e166 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -16098,7 +16098,13 @@ aarch64_sve_prepare_conditional_op (rtx *operands, unsigned int nops,
      whereas selecting the input avoids the MOVPRFX:
 
 	SEL Z0.S, P1, Z2.S, Z4.S
-	ADD Z0.S, P1/M, Z0.S, Z3.S.  */
+	ADD Z0.S, P1/M, Z0.S, Z3.S.
+
+     ??? Matching the other input can produce
+
+	MOVPRFX Z4.S, P1/M, Z2.S
+	ADD Z4.S, P1/M, Z4.S, Z3.S
+   */
   machine_mode mode = GET_MODE (operands[0]);
   rtx temp = gen_reg_rtx (mode);
   rtvec vec = gen_rtvec (3, operands[1], operands[2], operands[nops - 1]);
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 2aceef65c80..db16affc093 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -1841,57 +1841,108 @@
 })
 
 ;; Predicated integer operations.
-(define_insn "*cond_<optab><mode>"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w")
+;; All other things being equal, prefer the patterns for which the
+;; destination matches the select input, as that gives us the most
+;; freedom to swap the other operands.
+
+(define_insn "*cond_<optab><mode>_0"
+  [(set (match_operand:SVE_I 0 "register_operand" "+w, w, ?&w")
 	(unspec:SVE_I
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
 	   (SVE_INT_BINARY:SVE_I
-	     (match_operand:SVE_I 2 "register_operand" "0")
-	     (match_operand:SVE_I 3 "register_operand" "w"))
-	   (match_dup 2)]
+	     (match_operand:SVE_I 2 "register_operand" "0, w, w")
+	     (match_operand:SVE_I 3 "register_operand" "w, 0, w"))
+	   (match_dup 0)]
 	  UNSPEC_SEL))]
   "TARGET_SVE"
-  "<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0, %1/m, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,*,yes")]
 )
 
-(define_insn "*cond_<optab><mode>"
-  [(set (match_operand:SVE_SDI 0 "register_operand" "=w")
+(define_insn "*cond_<optab><mode>_0"
+  [(set (match_operand:SVE_SDI 0 "register_operand" "+w, w, ?&w")
 	(unspec:SVE_SDI
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
 	   (SVE_INT_BINARY_SD:SVE_SDI
-	     (match_operand:SVE_SDI 2 "register_operand" "0")
-	     (match_operand:SVE_SDI 3 "register_operand" "w"))
-	   (match_dup 2)]
+	     (match_operand:SVE_SDI 2 "register_operand" "0, w, w")
+	     (match_operand:SVE_SDI 3 "register_operand" "w, 0, w"))
+	   (match_dup 0)]
 	  UNSPEC_SEL))]
   "TARGET_SVE"
-  "<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0, %1/m, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,*,yes")]
 )
 
-;; Predicated integer operations with the operands reversed.
-(define_insn "*cond_<optab><mode>"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w")
+;; Predicated integer operations with select matching the first operand.
+(define_insn "*cond_<optab><mode>_2"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
 	(unspec:SVE_I
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
-	   (SVE_INT_BINARY_REV:SVE_I
-	     (match_operand:SVE_I 2 "register_operand" "w")
-	     (match_operand:SVE_I 3 "register_operand" "0"))
-	   (match_dup 3)]
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (SVE_INT_BINARY:SVE_I
+	     (match_operand:SVE_I 2 "register_operand" "0, w")
+	     (match_operand:SVE_I 3 "register_operand" "w, w"))
+	   (match_dup 2)]
 	  UNSPEC_SEL))]
   "TARGET_SVE"
-  "<sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
 )
 
-(define_insn "*cond_<optab><mode>"
-  [(set (match_operand:SVE_SDI 0 "register_operand" "=w")
+(define_insn "*cond_<optab><mode>_2"
+  [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
 	(unspec:SVE_SDI
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
 	   (SVE_INT_BINARY_SD:SVE_SDI
-	     (match_operand:SVE_SDI 2 "register_operand" "w")
-	     (match_operand:SVE_SDI 3 "register_operand" "0"))
+	     (match_operand:SVE_SDI 2 "register_operand" "0, w")
+	     (match_operand:SVE_SDI 3 "register_operand" "w, w"))
+	   (match_dup 2)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; Predicated integer operations with select matching the second operand.
+(define_insn "*cond_<optab><mode>_3"
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (SVE_INT_BINARY:SVE_I
+	     (match_operand:SVE_I 2 "register_operand" "w, w")
+	     (match_operand:SVE_I 3 "register_operand" "0, w"))
 	   (match_dup 3)]
 	  UNSPEC_SEL))]
   "TARGET_SVE"
-  "<sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+  "@
+   <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
+)
+
+(define_insn "*cond_<optab><mode>_3"
+  [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_SDI
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (SVE_INT_BINARY_SD:SVE_SDI
+	     (match_operand:SVE_SDI 2 "register_operand" "w, w")
+	     (match_operand:SVE_SDI 3 "register_operand" "0, w"))
+	   (match_dup 3)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
 )
 
 ;; Set operand 0 to the last active element in operand 3, or to tied
@@ -2687,34 +2738,60 @@
   aarch64_sve_prepare_conditional_op (operands, 5, <commutative>);
 })
 
-;; Predicated floating-point operations.
-(define_insn "*cond_<optab><mode>"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w")
+;; Predicated floating-point operations with select matching output.
+(define_insn "*cond_<optab><mode>_0"
+  [(set (match_operand:SVE_F 0 "register_operand" "+w, w, ?&w")
 	(unspec:SVE_F
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
 	   (unspec:SVE_F
-	     [(match_operand:SVE_F 2 "register_operand" "0")
-	      (match_operand:SVE_F 3 "register_operand" "w")]
+	     [(match_dup 1)
+	      (match_operand:SVE_F 2 "register_operand" "0, w, w")
+	      (match_operand:SVE_F 3 "register_operand" "w, 0, w")]
+	     SVE_COND_FP_BINARY)
+	   (match_dup 0)]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0, %1/m, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,*,yes")]
+)
+
+;; Predicated floating-point operations with select matching first operand.
+(define_insn "*cond_<optab><mode>_2"
+  [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_F
+	     [(match_operand:SVE_F 2 "register_operand" "0, w")
+	      (match_operand:SVE_F 3 "register_operand" "w, w")]
 	     SVE_COND_FP_BINARY)
 	   (match_dup 2)]
 	  UNSPEC_SEL))]
   "TARGET_SVE"
-  "<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  "@
+   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
 )
 
-;; Predicated floating-point operations with the operands reversed.
-(define_insn "*cond_<optab><mode>"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w")
+;; Predicated floating-point operations with select matching second operand.
+(define_insn "*cond_<optab><mode>_3"
+  [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
 	(unspec:SVE_F
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
 	   (unspec:SVE_F
-	     [(match_operand:SVE_F 2 "register_operand" "w")
-	      (match_operand:SVE_F 3 "register_operand" "0")]
+	     [(match_operand:SVE_F 2 "register_operand" "w, w")
+	      (match_operand:SVE_F 3 "register_operand" "0, w")]
 	     SVE_COND_FP_BINARY)
 	   (match_dup 3)]
 	  UNSPEC_SEL))]
   "TARGET_SVE"
-  "<sve_fp_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+  "@
+   <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
 )
 
 ;; Shift an SVE vector left and insert a scalar into element 0.
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index c5ef2eecf20..965dc6bf4f3 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1207,11 +1207,11 @@
 ;; SVE floating-point unary operations.
 (define_code_iterator SVE_FP_UNARY [neg abs sqrt])
 
+;; SVE integer binary operations.
 (define_code_iterator SVE_INT_BINARY [plus minus mult smax umax smin umin
 				      and ior xor])
 
-(define_code_iterator SVE_INT_BINARY_REV [minus])
-
+;; SVE integer binary division operations.
 (define_code_iterator SVE_INT_BINARY_SD [div udiv])
 
 ;; SVE integer comparisons.
@@ -1402,6 +1402,19 @@
 			      (not "not")
 			      (popcount "cnt")])
 
+(define_code_attr sve_int_op_rev [(plus "add")
+			          (minus "subr")
+			          (mult "mul")
+			          (div "sdivr")
+			          (udiv "udivr")
+			          (smin "smin")
+			          (smax "smax")
+			          (umin "umin")
+			          (umax "umax")
+			          (and "and")
+			          (ior "orr")
+			          (xor "eor")])
+
 ;; The floating-point SVE instruction that implements an rtx code.
 (define_code_attr sve_fp_op [(plus "fadd")
 			     (neg "fneg")
@@ -1550,8 +1563,6 @@
 					 UNSPEC_COND_MUL UNSPEC_COND_DIV
 					 UNSPEC_COND_MAX UNSPEC_COND_MIN])
 
-(define_int_iterator SVE_COND_FP_BINARY_REV [UNSPEC_COND_SUB UNSPEC_COND_DIV])
-
 (define_int_iterator SVE_COND_FP_CMP [UNSPEC_COND_LT UNSPEC_COND_LE
 				      UNSPEC_COND_EQ UNSPEC_COND_NE
 				      UNSPEC_COND_GE UNSPEC_COND_GT])
@@ -1802,6 +1813,13 @@
 			    (UNSPEC_COND_MAX "fmaxnm")
 			    (UNSPEC_COND_MIN "fminnm")])
 
+(define_int_attr sve_fp_op_rev [(UNSPEC_COND_ADD "fadd")
+			        (UNSPEC_COND_SUB "fsubr")
+			        (UNSPEC_COND_MUL "fmul")
+			        (UNSPEC_COND_DIV "fdivr")
+			        (UNSPEC_COND_MAX "fmaxnm")
+			        (UNSPEC_COND_MIN "fminnm")])
+
 (define_int_attr commutative [(UNSPEC_COND_ADD "true")
 			      (UNSPEC_COND_SUB "false")
 			      (UNSPEC_COND_MUL "true")
-- 
2.17.1

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH, aarch64 2/4] aarch64: Remove predicate from inside SVE_COND_FP_BINARY
  2018-07-02  3:41 [PATCH, aarch64 0/4] Add movprfx patterns and alternatives Richard Henderson
@ 2018-07-02  3:41 ` Richard Henderson
  2018-07-02 11:57   ` Richard Sandiford
  2018-07-02  3:41 ` [PATCH, aarch64 4/4] aarch64: Add movprfx patterns for zero and unmatched select Richard Henderson
                   ` (2 subsequent siblings)
  3 siblings, 1 reply; 10+ messages in thread
From: Richard Henderson @ 2018-07-02  3:41 UTC (permalink / raw)
  To: gcc-patches; +Cc: richard.sandiford, marcus.shawcroft, james.greenhalgh

The predicate is present within the containing UNSPEC_SEL;
there is no need to duplicate it.

	* config/aarch64/aarch64-sve.md (cond_<SVE_COND_FP_BINARY><SVE_F>):
	Remove match_dup 1 from the inner unspec.
	(*cond_<SVE_COND_FP_BINARY><SVE_F>): Likewise.
---
 gcc/config/aarch64/aarch64-sve.md | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 3dee6a4376d..2aceef65c80 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -2677,8 +2677,7 @@
 	(unspec:SVE_F
 	  [(match_operand:<VPRED> 1 "register_operand")
 	   (unspec:SVE_F
-	     [(match_dup 1)
-	      (match_operand:SVE_F 2 "register_operand")
+	     [(match_operand:SVE_F 2 "register_operand")
 	      (match_operand:SVE_F 3 "register_operand")]
 	     SVE_COND_FP_BINARY)
 	   (match_operand:SVE_F 4 "register_operand")]
@@ -2694,8 +2693,7 @@
 	(unspec:SVE_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
 	   (unspec:SVE_F
-	     [(match_dup 1)
-	      (match_operand:SVE_F 2 "register_operand" "0")
+	     [(match_operand:SVE_F 2 "register_operand" "0")
 	      (match_operand:SVE_F 3 "register_operand" "w")]
 	     SVE_COND_FP_BINARY)
 	   (match_dup 2)]
@@ -2710,8 +2708,7 @@
 	(unspec:SVE_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
 	   (unspec:SVE_F
-	     [(match_dup 1)
-	      (match_operand:SVE_F 2 "register_operand" "w")
+	     [(match_operand:SVE_F 2 "register_operand" "w")
 	      (match_operand:SVE_F 3 "register_operand" "0")]
 	     SVE_COND_FP_BINARY)
 	   (match_dup 3)]
-- 
2.17.1

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH, aarch64 1/4] aarch64: Add movprfx alternatives for unpredicated patterns
  2018-07-02  3:41 [PATCH, aarch64 0/4] Add movprfx patterns and alternatives Richard Henderson
                   ` (2 preceding siblings ...)
  2018-07-02  3:41 ` [PATCH, aarch64 3/4] aarch64: Add movprfx alternatives for predicate patterns Richard Henderson
@ 2018-07-02  3:41 ` Richard Henderson
  2018-07-02 11:56   ` Richard Sandiford
  3 siblings, 1 reply; 10+ messages in thread
From: Richard Henderson @ 2018-07-02  3:41 UTC (permalink / raw)
  To: gcc-patches; +Cc: richard.sandiford, marcus.shawcroft, james.greenhalgh

	* config/aarch64/aarch64.md (movprfx): New attr.
	(length): Default movprfx to 8.
	* config/aarch64/aarch64-sve.md (*mul<SVE_I>3): Add movprfx alt.
	(*madd<SVE_I>, *msub<SVE_I): Likewise.
	(*<su>mul<SVE_I>3_highpart): Likewise.
	(*<SVE_INT_BINARY_SD><SVE_SDI>3): Likewise.
	(*v<ASHIFT><SVE_I>3): Likewise.
	(*<su><MAXMIN><SVE_I>3): Likewise.
	(*<su><MAXMIN><SVE_F>3): Likewise.
	(*fma<SVE_F>4, *fnma<SVE_F>4): Likewise.
	(*fms<SVE_F>4, *fnms<SVE_F>4): Likewise.
	(*div<SVE_F>4): Likewise.
---
 gcc/config/aarch64/aarch64-sve.md | 184 ++++++++++++++++++------------
 gcc/config/aarch64/aarch64.md     |  11 +-
 2 files changed, 116 insertions(+), 79 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 8e2433385a8..3dee6a4376d 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -937,47 +937,53 @@
 ;; to gain much and would make the instruction seem less uniform to the
 ;; register allocator.
 (define_insn "*mul<mode>3"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
 	(unspec:SVE_I
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
 	   (mult:SVE_I
-	     (match_operand:SVE_I 2 "register_operand" "%0, 0")
-	     (match_operand:SVE_I 3 "aarch64_sve_mul_operand" "vsm, w"))]
+	     (match_operand:SVE_I 2 "register_operand" "%0, 0, w")
+	     (match_operand:SVE_I 3 "aarch64_sve_mul_operand" "vsm, w, w"))]
 	  UNSPEC_MERGE_PTRUE))]
   "TARGET_SVE"
   "@
    mul\t%0.<Vetype>, %0.<Vetype>, #%3
-   mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+   mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,*,yes")]
 )
 
 (define_insn "*madd<mode>"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
 	(plus:SVE_I
 	  (unspec:SVE_I
-	    [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-	     (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w")
-			 (match_operand:SVE_I 3 "register_operand" "w, w"))]
+	    [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	     (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w")
+			 (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
 	    UNSPEC_MERGE_PTRUE)
-	  (match_operand:SVE_I 4 "register_operand" "w, 0")))]
+	  (match_operand:SVE_I 4 "register_operand" "w, 0, w")))]
   "TARGET_SVE"
   "@
    mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
-   mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
+   mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,*,yes")]
 )
 
 (define_insn "*msub<mode>3"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
 	(minus:SVE_I
-	  (match_operand:SVE_I 4 "register_operand" "w, 0")
+	  (match_operand:SVE_I 4 "register_operand" "w, 0, w")
 	  (unspec:SVE_I
-	    [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-	     (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w")
-			 (match_operand:SVE_I 3 "register_operand" "w, w"))]
+	    [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	     (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w")
+			 (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
 	    UNSPEC_MERGE_PTRUE)))]
   "TARGET_SVE"
   "@
    msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
-   mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
+   mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,*,yes")]
 )
 
 ;; Unpredicated highpart multiplication.
@@ -997,15 +1003,18 @@
 
 ;; Predicated highpart multiplication.
 (define_insn "*<su>mul<mode>3_highpart"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w")
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
 	(unspec:SVE_I
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
-	   (unspec:SVE_I [(match_operand:SVE_I 2 "register_operand" "%0")
-			  (match_operand:SVE_I 3 "register_operand" "w")]
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_I [(match_operand:SVE_I 2 "register_operand" "%0, w")
+			  (match_operand:SVE_I 3 "register_operand" "w, w")]
 			 MUL_HIGHPART)]
 	  UNSPEC_MERGE_PTRUE))]
   "TARGET_SVE"
-  "<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  "@
+   <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
 )
 
 ;; Unpredicated division.
@@ -1025,17 +1034,19 @@
 
 ;; Division predicated with a PTRUE.
 (define_insn "*<optab><mode>3"
-  [(set (match_operand:SVE_SDI 0 "register_operand" "=w, w")
+  [(set (match_operand:SVE_SDI 0 "register_operand" "=w, w, ?&w")
 	(unspec:SVE_SDI
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
 	   (SVE_INT_BINARY_SD:SVE_SDI
-	     (match_operand:SVE_SDI 2 "register_operand" "0, w")
-	     (match_operand:SVE_SDI 3 "aarch64_sve_mul_operand" "w, 0"))]
+	     (match_operand:SVE_SDI 2 "register_operand" "0, w, w")
+	     (match_operand:SVE_SDI 3 "aarch64_sve_mul_operand" "w, 0, w"))]
 	  UNSPEC_MERGE_PTRUE))]
   "TARGET_SVE"
   "@
    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
-   <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+   <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,*,yes")]
 )
 
 ;; Unpredicated NEG, NOT and POPCOUNT.
@@ -1222,17 +1233,19 @@
 ;; or X isn't likely to gain much and would make the instruction seem
 ;; less uniform to the register allocator.
 (define_insn "*v<optab><mode>3"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
 	(unspec:SVE_I
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
 	   (ASHIFT:SVE_I
-	     (match_operand:SVE_I 2 "register_operand" "w, 0")
-	     (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w"))]
+	     (match_operand:SVE_I 2 "register_operand" "w, 0, w")
+	     (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, w"))]
 	  UNSPEC_MERGE_PTRUE))]
   "TARGET_SVE"
   "@
    <shift>\t%0.<Vetype>, %2.<Vetype>, #%3
-   <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+   <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,*,yes")]
 )
 
 ;; LSL, LSR and ASR by a scalar, which expands into one of the vector
@@ -1723,14 +1736,17 @@
 
 ;; Integer MIN/MAX predicated with a PTRUE.
 (define_insn "*<su><maxmin><mode>3"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w")
+  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
 	(unspec:SVE_I
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
-	   (MAXMIN:SVE_I (match_operand:SVE_I 2 "register_operand" "%0")
-			 (match_operand:SVE_I 3 "register_operand" "w"))]
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (MAXMIN:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w")
+			 (match_operand:SVE_I 3 "register_operand" "w, w"))]
 	  UNSPEC_MERGE_PTRUE))]
   "TARGET_SVE"
-  "<su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  "@
+   <su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;<su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
 )
 
 ;; Unpredicated floating-point MIN/MAX.
@@ -1749,14 +1765,17 @@
 
 ;; Floating-point MIN/MAX predicated with a PTRUE.
 (define_insn "*<su><maxmin><mode>3"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w")
+  [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
 	(unspec:SVE_F
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
-	   (FMAXMIN:SVE_F (match_operand:SVE_F 2 "register_operand" "%0")
-			  (match_operand:SVE_F 3 "register_operand" "w"))]
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (FMAXMIN:SVE_F (match_operand:SVE_F 2 "register_operand" "%0, w")
+			  (match_operand:SVE_F 3 "register_operand" "w, w"))]
 	  UNSPEC_MERGE_PTRUE))]
   "TARGET_SVE"
-  "f<maxmin>nm\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  "@
+   f<maxmin>nm\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;f<maxmin>nm\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
 )
 
 ;; Unpredicated fmin/fmax.
@@ -1776,15 +1795,18 @@
 
 ;; fmin/fmax predicated with a PTRUE.
 (define_insn "*<maxmin_uns><mode>3"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w")
+  [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
 	(unspec:SVE_F
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
-	   (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "%0")
-			  (match_operand:SVE_F 3 "register_operand" "w")]
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "%0, w")
+			  (match_operand:SVE_F 3 "register_operand" "w, w")]
 			 FMAXMIN_UNS)]
 	  UNSPEC_MERGE_PTRUE))]
   "TARGET_SVE"
-  "<maxmin_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  "@
+   <maxmin_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+   movprfx\t%0, %2\;<maxmin_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,yes")]
 )
 
 ;; Predicated integer operations with select.
@@ -2146,17 +2168,19 @@
 
 ;; fma predicated with a PTRUE.
 (define_insn "*fma<mode>4"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
+  [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
 	(unspec:SVE_F
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-	   (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w")
-		      (match_operand:SVE_F 4 "register_operand" "w, w")
-		      (match_operand:SVE_F 2 "register_operand" "w, 0"))]
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w, w")
+		      (match_operand:SVE_F 4 "register_operand" "w, w, w")
+		      (match_operand:SVE_F 2 "register_operand" "w, 0, w"))]
 	  UNSPEC_MERGE_PTRUE))]
   "TARGET_SVE"
   "@
    fmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
-   fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
+   fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+   movprfx\t%0, %2\;fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
+  [(set_attr "movprfx" "*,*,yes")]
 )
 
 ;; Unpredicated fnma (%0 = (-%1 * %2) + %3).
@@ -2177,18 +2201,20 @@
 
 ;; fnma predicated with a PTRUE.
 (define_insn "*fnma<mode>4"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
+  [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
 	(unspec:SVE_F
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
 	   (fma:SVE_F (neg:SVE_F
-			(match_operand:SVE_F 3 "register_operand" "%0, w"))
-		      (match_operand:SVE_F 4 "register_operand" "w, w")
-		      (match_operand:SVE_F 2 "register_operand" "w, 0"))]
+			(match_operand:SVE_F 3 "register_operand" "%0, w, w"))
+		      (match_operand:SVE_F 4 "register_operand" "w, w, w")
+		      (match_operand:SVE_F 2 "register_operand" "w, 0, w"))]
 	  UNSPEC_MERGE_PTRUE))]
   "TARGET_SVE"
   "@
    fmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
-   fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
+   fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+   movprfx\t%0, %2\;fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
+  [(set_attr "movprfx" "*,*,yes")]
 )
 
 ;; Unpredicated fms (%0 = (%1 * %2) - %3).
@@ -2209,18 +2235,20 @@
 
 ;; fms predicated with a PTRUE.
 (define_insn "*fms<mode>4"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
+  [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
 	(unspec:SVE_F
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-	   (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w")
-		      (match_operand:SVE_F 4 "register_operand" "w, w")
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w, w")
+		      (match_operand:SVE_F 4 "register_operand" "w, w, w")
 		      (neg:SVE_F
-			(match_operand:SVE_F 2 "register_operand" "w, 0")))]
+			(match_operand:SVE_F 2 "register_operand" "w, 0, w")))]
 	  UNSPEC_MERGE_PTRUE))]
   "TARGET_SVE"
   "@
    fnmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
-   fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
+   fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+   movprfx\t%0, %2\;fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
+  [(set_attr "movprfx" "*,*,yes")]
 )
 
 ;; Unpredicated fnms (%0 = (-%1 * %2) - %3).
@@ -2242,19 +2270,21 @@
 
 ;; fnms predicated with a PTRUE.
 (define_insn "*fnms<mode>4"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
+  [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
 	(unspec:SVE_F
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
 	   (fma:SVE_F (neg:SVE_F
-			(match_operand:SVE_F 3 "register_operand" "%0, w"))
-		      (match_operand:SVE_F 4 "register_operand" "w, w")
+			(match_operand:SVE_F 3 "register_operand" "%0, w, w"))
+		      (match_operand:SVE_F 4 "register_operand" "w, w, w")
 		      (neg:SVE_F
-			(match_operand:SVE_F 2 "register_operand" "w, 0")))]
+			(match_operand:SVE_F 2 "register_operand" "w, 0, w")))]
 	  UNSPEC_MERGE_PTRUE))]
   "TARGET_SVE"
   "@
    fnmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
-   fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
+   fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+   movprfx\t%0, %2\;fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
+  [(set_attr "movprfx" "*,*,yes")]
 )
 
 ;; Unpredicated floating-point division.
@@ -2273,16 +2303,18 @@
 
 ;; Floating-point division predicated with a PTRUE.
 (define_insn "*div<mode>3"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
+  [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
 	(unspec:SVE_F
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
-	   (div:SVE_F (match_operand:SVE_F 2 "register_operand" "0, w")
-		      (match_operand:SVE_F 3 "register_operand" "w, 0"))]
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (div:SVE_F (match_operand:SVE_F 2 "register_operand" "0, w, w")
+		      (match_operand:SVE_F 3 "register_operand" "w, 0, w"))]
 	  UNSPEC_MERGE_PTRUE))]
   "TARGET_SVE"
   "@
    fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
-   fdivr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+   fdivr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+   movprfx\t%0, %2\;fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "*,*,yes")]
 )
 
 ;; Unpredicated FNEG, FABS and FSQRT.
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 4ac6332a200..a014a012cc1 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -251,9 +251,6 @@
 ;; will be disabled when !TARGET_SVE.
 (define_attr "sve" "no,yes" (const_string "no"))
 
-(define_attr "length" ""
-  (const_int 4))
-
 ;; Attribute that controls whether an alternative is enabled or not.
 ;; Currently it is only used to disable alternatives which touch fp or simd
 ;; registers when -mgeneral-regs-only is specified.
@@ -277,6 +274,14 @@
 ;; 1 :=: yes
 (define_attr "far_branch" "" (const_int 0))
 
+;; Attribute that specifies whether the alternative uses MOVPRFX.
+(define_attr "movprfx" "no,yes" (const_string "no"))
+
+(define_attr "length" ""
+  (cond [(eq_attr "movprfx" "yes")
+           (const_int 8)
+        ] (const_int 4)))
+
 ;; Strictly for compatibility with AArch32 in pipeline models, since AArch64 has
 ;; no predicated insns.
 (define_attr "predicated" "yes,no" (const_string "no"))
-- 
2.17.1

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH, aarch64 4/4] aarch64: Add movprfx patterns for zero and unmatched select
  2018-07-02  3:41 [PATCH, aarch64 0/4] Add movprfx patterns and alternatives Richard Henderson
  2018-07-02  3:41 ` [PATCH, aarch64 2/4] aarch64: Remove predicate from inside SVE_COND_FP_BINARY Richard Henderson
@ 2018-07-02  3:41 ` Richard Henderson
  2018-07-02 11:57   ` Richard Sandiford
  2018-07-02  3:41 ` [PATCH, aarch64 3/4] aarch64: Add movprfx alternatives for predicate patterns Richard Henderson
  2018-07-02  3:41 ` [PATCH, aarch64 1/4] aarch64: Add movprfx alternatives for unpredicated patterns Richard Henderson
  3 siblings, 1 reply; 10+ messages in thread
From: Richard Henderson @ 2018-07-02  3:41 UTC (permalink / raw)
  To: gcc-patches; +Cc: richard.sandiford, marcus.shawcroft, james.greenhalgh

	* config/aarch64/aarch64-protos.h, config/aarch64/aarch64.c
	(aarch64_sve_prepare_conditional_op): Remove.
	* config/aarch64/aarch64-sve.md (cond_<SVE_INT_BINARY><SVE_I>):
	Allow aarch64_simd_reg_or_zero as select operand; remove
	the aarch64_sve_prepare_conditional_op call.
	(cond_<SVE_INT_BINARY_SD><SVE_SDI>): Likewise.
	(cond_<SVE_COND_FP_BINARY><SVE_F>): Likewise.
	(*cond_<SVE_INT_BINARY><SVE_I>_z): New pattern.
	(*cond_<SVE_INT_BINARY_SD><SVE_SDI>_z): New pattern.
	(*cond_<SVE_COND_FP_BINARY><SVE_F>_z): New pattern.
	(*cond_<SVE_INT_BINARY><SVE_I>_any): New pattern.
	(*cond_<SVE_INT_BINARY_SD><SVE_SDI>_any): New pattern.
	(*cond_<SVE_COND_FP_BINARY><SVE_F>_any): New pattern
	and a splitters to match all of the *_any patterns.
	* config/aarch64/predicates.md (aarch64_sve_any_binary_operator): New.
---
 gcc/config/aarch64/aarch64-protos.h |   1 -
 gcc/config/aarch64/aarch64.c        |  54 ----------
 gcc/config/aarch64/aarch64-sve.md   | 154 ++++++++++++++++++++++++----
 gcc/config/aarch64/predicates.md    |   3 +
 4 files changed, 136 insertions(+), 76 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 87c6ae20278..514ddc457ca 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -513,7 +513,6 @@ bool aarch64_gen_adjusted_ldpstp (rtx *, bool, scalar_mode, RTX_CODE);
 void aarch64_expand_sve_vec_cmp_int (rtx, rtx_code, rtx, rtx);
 bool aarch64_expand_sve_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
 void aarch64_expand_sve_vcond (machine_mode, machine_mode, rtx *);
-void aarch64_sve_prepare_conditional_op (rtx *, unsigned int, bool);
 #endif /* RTX_CODE */
 
 void aarch64_init_builtins (void);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 3af7e98e166..d75d45f4b8b 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -16058,60 +16058,6 @@ aarch64_expand_sve_vcond (machine_mode data_mode, machine_mode cmp_mode,
   emit_set_insn (ops[0], gen_rtx_UNSPEC (data_mode, vec, UNSPEC_SEL));
 }
 
-/* Prepare a cond_<optab><mode> operation that has the operands
-   given by OPERANDS, where:
-
-   - operand 0 is the destination
-   - operand 1 is a predicate
-   - operands 2 to NOPS - 2 are the operands to an operation that is
-     performed for active lanes
-   - operand NOPS - 1 specifies the values to use for inactive lanes.
-
-   COMMUTATIVE_P is true if operands 2 and 3 are commutative.  In that case,
-   no pattern is provided for a tie between operands 3 and NOPS - 1.  */
-
-void
-aarch64_sve_prepare_conditional_op (rtx *operands, unsigned int nops,
-				    bool commutative_p)
-{
-  /* We can do the operation directly if the "else" value matches one
-     of the other inputs.  */
-  for (unsigned int i = 2; i < nops - 1; ++i)
-    if (rtx_equal_p (operands[i], operands[nops - 1]))
-      {
-	if (i == 3 && commutative_p)
-	  std::swap (operands[2], operands[3]);
-	return;
-      }
-
-  /* If the "else" value is different from the other operands, we have
-     the choice of doing a SEL on the output or a SEL on an input.
-     Neither choice is better in all cases, but one advantage of
-     selecting the input is that it can avoid a move when the output
-     needs to be distinct from the inputs.  E.g. if operand N maps to
-     register N, selecting the output would give:
-
-	MOVPRFX Z0.S, Z2.S
-	ADD Z0.S, P1/M, Z0.S, Z3.S
-	SEL Z0.S, P1, Z0.S, Z4.S
-
-     whereas selecting the input avoids the MOVPRFX:
-
-	SEL Z0.S, P1, Z2.S, Z4.S
-	ADD Z0.S, P1/M, Z0.S, Z3.S.
-
-     ??? Matching the other input can produce
-
-	MOVPRFX Z4.S, P1/M, Z2.S
-	ADD Z4.S, P1/M, Z4.S, Z3.S
-   */
-  machine_mode mode = GET_MODE (operands[0]);
-  rtx temp = gen_reg_rtx (mode);
-  rtvec vec = gen_rtvec (3, operands[1], operands[2], operands[nops - 1]);
-  emit_set_insn (temp, gen_rtx_UNSPEC (mode, vec, UNSPEC_SEL));
-  operands[2] = operands[nops - 1] = temp;
-}
-
 /* Implement TARGET_MODES_TIEABLE_P.  In principle we should always return
    true.  However due to issues with register allocation it is preferable
    to avoid tieing integer scalar and FP scalar modes.  Executing integer
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index db16affc093..b16d0455159 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -1817,13 +1817,10 @@
 	   (SVE_INT_BINARY:SVE_I
 	     (match_operand:SVE_I 2 "register_operand")
 	     (match_operand:SVE_I 3 "register_operand"))
-	   (match_operand:SVE_I 4 "register_operand")]
+	   (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
 	  UNSPEC_SEL))]
   "TARGET_SVE"
-{
-  bool commutative_p = (GET_RTX_CLASS (<CODE>) == RTX_COMM_ARITH);
-  aarch64_sve_prepare_conditional_op (operands, 5, commutative_p);
-})
+)
 
 (define_expand "cond_<optab><mode>"
   [(set (match_operand:SVE_SDI 0 "register_operand")
@@ -1832,19 +1829,12 @@
 	   (SVE_INT_BINARY_SD:SVE_SDI
 	     (match_operand:SVE_SDI 2 "register_operand")
 	     (match_operand:SVE_SDI 3 "register_operand"))
-	   (match_operand:SVE_SDI 4 "register_operand")]
+	   (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero")]
 	  UNSPEC_SEL))]
   "TARGET_SVE"
-{
-  bool commutative_p = (GET_RTX_CLASS (<CODE>) == RTX_COMM_ARITH);
-  aarch64_sve_prepare_conditional_op (operands, 5, commutative_p);
-})
-
-;; Predicated integer operations.
-;; All other things being equal, prefer the patterns for which the
-;; destination matches the select input, as that gives us the most
-;; freedom to swap the other operands.
+)
 
+;; Predicated integer operations with select matching the output operand.
 (define_insn "*cond_<optab><mode>_0"
   [(set (match_operand:SVE_I 0 "register_operand" "+w, w, ?&w")
 	(unspec:SVE_I
@@ -1945,6 +1935,87 @@
   [(set_attr "movprfx" "*,yes")]
 )
 
+;; Predicated integer operations with select matching zero.
+(define_insn "*cond_<optab><mode>_z"
+  [(set (match_operand:SVE_I 0 "register_operand" "=&w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (SVE_INT_BINARY:SVE_I
+	     (match_operand:SVE_I 2 "register_operand" "w")
+	     (match_operand:SVE_I 3 "register_operand" "w"))
+	   (match_operand:SVE_I 4 "aarch64_simd_imm_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "yes")]
+)
+
+(define_insn "*cond_<optab><mode>_z"
+  [(set (match_operand:SVE_SDI 0 "register_operand" "=&w")
+	(unspec:SVE_SDI
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (SVE_INT_BINARY_SD:SVE_SDI
+	     (match_operand:SVE_SDI 2 "register_operand" "w")
+	     (match_operand:SVE_SDI 3 "register_operand" "w"))
+	   (match_operand:SVE_SDI 4 "aarch64_simd_imm_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "yes")]
+)
+
+;; Synthetic predications with select unmatched.
+(define_insn "*cond_<optab><mode>_any"
+  [(set (match_operand:SVE_I 0 "register_operand" "=&w")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (SVE_INT_BINARY:SVE_I
+	     (match_operand:SVE_I 2 "register_operand" "w")
+	     (match_operand:SVE_I 3 "register_operand" "w"))
+	   (match_operand:SVE_I 4 "register_operand"   "w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "#"
+)
+
+(define_insn "*cond_<optab><mode>_any"
+  [(set (match_operand:SVE_SDI 0 "register_operand" "=&w")
+	(unspec:SVE_SDI
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (SVE_INT_BINARY_SD:SVE_I
+	     (match_operand:SVE_SDI 2 "register_operand" "w")
+	     (match_operand:SVE_SDI 3 "register_operand" "w"))
+	   (match_operand:SVE_SDI 4 "register_operand"   "w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "#"
+)
+
+(define_split
+  [(set (match_operand:SVE_I 0 "register_operand")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand")
+	   (match_operator:SVE_I 5 "aarch64_sve_any_binary_operator"
+	     [(match_operand:SVE_I 2 "register_operand")
+	      (match_operand:SVE_I 3 "register_operand")])
+	   (match_operand:SVE_I 4 "register_operand")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE && reload_completed
+   && !(rtx_equal_p (operands[0], operands[4])
+        || rtx_equal_p (operands[2], operands[4])
+        || rtx_equal_p (operands[3], operands[4]))"
+  ; Not matchable by any one insn or movprfx insn.  We need a separate select.
+  [(set (match_dup 0)
+	(unspec:SVE_I [(match_dup 1) (match_dup 2) (match_dup 4)]
+                      UNSPEC_SEL))
+   (set (match_dup 0)
+	(unspec:SVE_I
+	  [(match_dup 1)
+	   (match_op_dup 5 [(match_dup 0) (match_dup 3)])
+           (match_dup 0)]
+	  UNSPEC_SEL))]
+)
+
 ;; Set operand 0 to the last active element in operand 3, or to tied
 ;; operand 1 if no elements are active.
 (define_insn "fold_extract_last_<mode>"
@@ -2731,12 +2802,10 @@
 	     [(match_operand:SVE_F 2 "register_operand")
 	      (match_operand:SVE_F 3 "register_operand")]
 	     SVE_COND_FP_BINARY)
-	   (match_operand:SVE_F 4 "register_operand")]
+	   (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
 	  UNSPEC_SEL))]
   "TARGET_SVE"
-{
-  aarch64_sve_prepare_conditional_op (operands, 5, <commutative>);
-})
+)
 
 ;; Predicated floating-point operations with select matching output.
 (define_insn "*cond_<optab><mode>_0"
@@ -2744,8 +2813,7 @@
 	(unspec:SVE_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
 	   (unspec:SVE_F
-	     [(match_dup 1)
-	      (match_operand:SVE_F 2 "register_operand" "0, w, w")
+	     [(match_operand:SVE_F 2 "register_operand" "0, w, w")
 	      (match_operand:SVE_F 3 "register_operand" "w, 0, w")]
 	     SVE_COND_FP_BINARY)
 	   (match_dup 0)]
@@ -2794,6 +2862,50 @@
   [(set_attr "movprfx" "*,yes")]
 )
 
+;; Predicated floating-point operations with select matching zero.
+(define_insn "*cond_<optab><mode>_z"
+  [(set (match_operand:SVE_F 0 "register_operand" "=&w")
+	(unspec:SVE_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (unspec:SVE_F
+	     [(match_operand:SVE_F 2 "register_operand" "w")
+	      (match_operand:SVE_F 3 "register_operand" "w")]
+	     SVE_COND_FP_BINARY)
+	   (match_operand:SVE_F 4 "aarch64_simd_imm_zero")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  [(set_attr "movprfx" "yes")]
+)
+
+;; Synthetic predication of floating-point operations with select unmatched.
+(define_insn_and_split "*cond_<optab><mode>_any"
+  [(set (match_operand:SVE_F 0 "register_operand" "=&w")
+	(unspec:SVE_F
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (unspec:SVE_F
+	     [(match_operand:SVE_F 2 "register_operand" "w")
+	      (match_operand:SVE_F 3 "register_operand" "w")]
+	     SVE_COND_FP_BINARY)
+	   (match_operand:SVE_F 4 "register_operand" "w")]
+	  UNSPEC_SEL))]
+  "TARGET_SVE"
+  "#"
+  "&& reload_completed
+   && !(rtx_equal_p (operands[0], operands[4])
+        || rtx_equal_p (operands[2], operands[4])
+        || rtx_equal_p (operands[3], operands[4]))"
+  ; Not matchable by any one insn or movprfx insn.  We need a separate select.
+  [(set (match_dup 0)
+	(unspec:SVE_F [(match_dup 1) (match_dup 2) (match_dup 4)] UNSPEC_SEL))
+   (set (match_dup 0)
+	(unspec:SVE_F
+	  [(match_dup 1)
+	   (unspec:SVE_F [(match_dup 0) (match_dup 3)] SVE_COND_FP_BINARY)
+           (match_dup 0)]
+	  UNSPEC_SEL))]
+)
+
 ;; Shift an SVE vector left and insert a scalar into element 0.
 (define_insn "vec_shl_insert_<mode>"
   [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 7aec76d681f..4acbc218a8d 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -625,3 +625,6 @@
 ;; A special predicate that doesn't match a particular mode.
 (define_special_predicate "aarch64_any_register_operand"
   (match_code "reg"))
+
+(define_predicate "aarch64_sve_any_binary_operator"
+  (match_code "plus,minus,mult,div,udiv,smax,umax,smin,umin,and,ior,xor"))
-- 
2.17.1

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH, aarch64 0/4] Add movprfx patterns and alternatives
@ 2018-07-02  3:41 Richard Henderson
  2018-07-02  3:41 ` [PATCH, aarch64 2/4] aarch64: Remove predicate from inside SVE_COND_FP_BINARY Richard Henderson
                   ` (3 more replies)
  0 siblings, 4 replies; 10+ messages in thread
From: Richard Henderson @ 2018-07-02  3:41 UTC (permalink / raw)
  To: gcc-patches; +Cc: richard.sandiford, marcus.shawcroft, james.greenhalgh

These don't fire very often, but at least a few times within the
testsuite.  Enough to test my qemu implementation of the insns.


r~


Richard Henderson (4):
  aarch64: Add movprfx alternatives for unpredicated patterns
  aarch64: Remove predicate from inside SVE_COND_FP_BINARY
  aarch64: Add movprfx alternatives for predicate patterns
  aarch64: Add movprfx patterns for zero and unmatched select

 gcc/config/aarch64/aarch64-protos.h |   1 -
 gcc/config/aarch64/aarch64.c        |  48 ---
 gcc/config/aarch64/aarch64-sve.md   | 488 ++++++++++++++++++++--------
 gcc/config/aarch64/aarch64.md       |  11 +-
 gcc/config/aarch64/iterators.md     |  26 +-
 gcc/config/aarch64/predicates.md    |   3 +
 6 files changed, 386 insertions(+), 191 deletions(-)

-- 
2.17.1

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH, aarch64 3/4] aarch64: Add movprfx alternatives for predicate patterns
  2018-07-02  3:41 ` [PATCH, aarch64 3/4] aarch64: Add movprfx alternatives for predicate patterns Richard Henderson
@ 2018-07-02 11:55   ` Richard Sandiford
  2018-07-02 15:32     ` Richard Henderson
  0 siblings, 1 reply; 10+ messages in thread
From: Richard Sandiford @ 2018-07-02 11:55 UTC (permalink / raw)
  To: Richard Henderson; +Cc: gcc-patches, marcus.shawcroft, james.greenhalgh

Richard Henderson <rth@twiddle.net> writes:
> @@ -2687,34 +2738,60 @@
>    aarch64_sve_prepare_conditional_op (operands, 5, <commutative>);
>  })
>  
> -;; Predicated floating-point operations.
> -(define_insn "*cond_<optab><mode>"
> -  [(set (match_operand:SVE_F 0 "register_operand" "=w")
> +;; Predicated floating-point operations with select matching output.
> +(define_insn "*cond_<optab><mode>_0"
> +  [(set (match_operand:SVE_F 0 "register_operand" "+w, w, ?&w")
>  	(unspec:SVE_F
> -	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
> +	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
>  	   (unspec:SVE_F
> -	     [(match_operand:SVE_F 2 "register_operand" "0")
> -	      (match_operand:SVE_F 3 "register_operand" "w")]
> +	     [(match_dup 1)
> +	      (match_operand:SVE_F 2 "register_operand" "0, w, w")
> +	      (match_operand:SVE_F 3 "register_operand" "w, 0, w")]
> +	     SVE_COND_FP_BINARY)
> +	   (match_dup 0)]
> +	  UNSPEC_SEL))]
> +  "TARGET_SVE"
> +  "@
> +   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
> +   <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
> +   movprfx\t%0, %1/m, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
> +  [(set_attr "movprfx" "*,*,yes")]
> +)

Reintroduces a (match_dup 1) into the SVE_COND_FP_BINARY.

OK otherwise, thanks.

The original reason for using SVE_COND_FP_BINARY rather than rtx codes
was to emphasise that nothing happens for inactive lanes: this is really
a predicated operation that returns "don't care" values for inactive lanes
fused with a select that "happens" to use (but in fact always uses) the same
predicate.  So from that point of view it seemed natural for both unspecs
to have the predicate.

OTOH, since SVE_COND_FP_BINARY is never used independently, and since it's
an unspec, I guess it doesn't matter much either way.

Richard

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH, aarch64 1/4] aarch64: Add movprfx alternatives for unpredicated patterns
  2018-07-02  3:41 ` [PATCH, aarch64 1/4] aarch64: Add movprfx alternatives for unpredicated patterns Richard Henderson
@ 2018-07-02 11:56   ` Richard Sandiford
  0 siblings, 0 replies; 10+ messages in thread
From: Richard Sandiford @ 2018-07-02 11:56 UTC (permalink / raw)
  To: Richard Henderson; +Cc: gcc-patches, marcus.shawcroft, james.greenhalgh

Richard Henderson <rth@twiddle.net> writes:
> 	* config/aarch64/aarch64.md (movprfx): New attr.
> 	(length): Default movprfx to 8.
> 	* config/aarch64/aarch64-sve.md (*mul<SVE_I>3): Add movprfx alt.
> 	(*madd<SVE_I>, *msub<SVE_I): Likewise.
> 	(*<su>mul<SVE_I>3_highpart): Likewise.
> 	(*<SVE_INT_BINARY_SD><SVE_SDI>3): Likewise.
> 	(*v<ASHIFT><SVE_I>3): Likewise.
> 	(*<su><MAXMIN><SVE_I>3): Likewise.
> 	(*<su><MAXMIN><SVE_F>3): Likewise.
> 	(*fma<SVE_F>4, *fnma<SVE_F>4): Likewise.
> 	(*fms<SVE_F>4, *fnms<SVE_F>4): Likewise.
> 	(*div<SVE_F>4): Likewise.

OK, thanks.

Richard

> ---
>  gcc/config/aarch64/aarch64-sve.md | 184 ++++++++++++++++++------------
>  gcc/config/aarch64/aarch64.md     |  11 +-
>  2 files changed, 116 insertions(+), 79 deletions(-)
>
> diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
> index 8e2433385a8..3dee6a4376d 100644
> --- a/gcc/config/aarch64/aarch64-sve.md
> +++ b/gcc/config/aarch64/aarch64-sve.md
> @@ -937,47 +937,53 @@
>  ;; to gain much and would make the instruction seem less uniform to the
>  ;; register allocator.
>  (define_insn "*mul<mode>3"
> -  [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
> +  [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
>  	(unspec:SVE_I
> -	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
> +	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
>  	   (mult:SVE_I
> -	     (match_operand:SVE_I 2 "register_operand" "%0, 0")
> -	     (match_operand:SVE_I 3 "aarch64_sve_mul_operand" "vsm, w"))]
> +	     (match_operand:SVE_I 2 "register_operand" "%0, 0, w")
> +	     (match_operand:SVE_I 3 "aarch64_sve_mul_operand" "vsm, w, w"))]
>  	  UNSPEC_MERGE_PTRUE))]
>    "TARGET_SVE"
>    "@
>     mul\t%0.<Vetype>, %0.<Vetype>, #%3
> -   mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
> +   mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
> +   movprfx\t%0, %2\;mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
> +  [(set_attr "movprfx" "*,*,yes")]
>  )
>  
>  (define_insn "*madd<mode>"
> -  [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
> +  [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
>  	(plus:SVE_I
>  	  (unspec:SVE_I
> -	    [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
> -	     (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w")
> -			 (match_operand:SVE_I 3 "register_operand" "w, w"))]
> +	    [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
> +	     (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w")
> +			 (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
>  	    UNSPEC_MERGE_PTRUE)
> -	  (match_operand:SVE_I 4 "register_operand" "w, 0")))]
> +	  (match_operand:SVE_I 4 "register_operand" "w, 0, w")))]
>    "TARGET_SVE"
>    "@
>     mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
> -   mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
> +   mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
> +   movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
> +  [(set_attr "movprfx" "*,*,yes")]
>  )
>  
>  (define_insn "*msub<mode>3"
> -  [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
> +  [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
>  	(minus:SVE_I
> -	  (match_operand:SVE_I 4 "register_operand" "w, 0")
> +	  (match_operand:SVE_I 4 "register_operand" "w, 0, w")
>  	  (unspec:SVE_I
> -	    [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
> -	     (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w")
> -			 (match_operand:SVE_I 3 "register_operand" "w, w"))]
> +	    [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
> +	     (mult:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w, w")
> +			 (match_operand:SVE_I 3 "register_operand" "w, w, w"))]
>  	    UNSPEC_MERGE_PTRUE)))]
>    "TARGET_SVE"
>    "@
>     msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
> -   mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
> +   mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
> +   movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
> +  [(set_attr "movprfx" "*,*,yes")]
>  )
>  
>  ;; Unpredicated highpart multiplication.
> @@ -997,15 +1003,18 @@
>  
>  ;; Predicated highpart multiplication.
>  (define_insn "*<su>mul<mode>3_highpart"
> -  [(set (match_operand:SVE_I 0 "register_operand" "=w")
> +  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
>  	(unspec:SVE_I
> -	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
> -	   (unspec:SVE_I [(match_operand:SVE_I 2 "register_operand" "%0")
> -			  (match_operand:SVE_I 3 "register_operand" "w")]
> +	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
> +	   (unspec:SVE_I [(match_operand:SVE_I 2 "register_operand" "%0, w")
> +			  (match_operand:SVE_I 3 "register_operand" "w, w")]
>  			 MUL_HIGHPART)]
>  	  UNSPEC_MERGE_PTRUE))]
>    "TARGET_SVE"
> -  "<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
> +  "@
> +   <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
> +   movprfx\t%0, %2\;<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
> +  [(set_attr "movprfx" "*,yes")]
>  )
>  
>  ;; Unpredicated division.
> @@ -1025,17 +1034,19 @@
>  
>  ;; Division predicated with a PTRUE.
>  (define_insn "*<optab><mode>3"
> -  [(set (match_operand:SVE_SDI 0 "register_operand" "=w, w")
> +  [(set (match_operand:SVE_SDI 0 "register_operand" "=w, w, ?&w")
>  	(unspec:SVE_SDI
> -	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
> +	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
>  	   (SVE_INT_BINARY_SD:SVE_SDI
> -	     (match_operand:SVE_SDI 2 "register_operand" "0, w")
> -	     (match_operand:SVE_SDI 3 "aarch64_sve_mul_operand" "w, 0"))]
> +	     (match_operand:SVE_SDI 2 "register_operand" "0, w, w")
> +	     (match_operand:SVE_SDI 3 "aarch64_sve_mul_operand" "w, 0, w"))]
>  	  UNSPEC_MERGE_PTRUE))]
>    "TARGET_SVE"
>    "@
>     <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
> -   <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
> +   <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
> +   movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
> +  [(set_attr "movprfx" "*,*,yes")]
>  )
>  
>  ;; Unpredicated NEG, NOT and POPCOUNT.
> @@ -1222,17 +1233,19 @@
>  ;; or X isn't likely to gain much and would make the instruction seem
>  ;; less uniform to the register allocator.
>  (define_insn "*v<optab><mode>3"
> -  [(set (match_operand:SVE_I 0 "register_operand" "=w, w")
> +  [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
>  	(unspec:SVE_I
> -	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
> +	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
>  	   (ASHIFT:SVE_I
> -	     (match_operand:SVE_I 2 "register_operand" "w, 0")
> -	     (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w"))]
> +	     (match_operand:SVE_I 2 "register_operand" "w, 0, w")
> +	     (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, w"))]
>  	  UNSPEC_MERGE_PTRUE))]
>    "TARGET_SVE"
>    "@
>     <shift>\t%0.<Vetype>, %2.<Vetype>, #%3
> -   <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
> +   <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
> +   movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
> +  [(set_attr "movprfx" "*,*,yes")]
>  )
>  
>  ;; LSL, LSR and ASR by a scalar, which expands into one of the vector
> @@ -1723,14 +1736,17 @@
>  
>  ;; Integer MIN/MAX predicated with a PTRUE.
>  (define_insn "*<su><maxmin><mode>3"
> -  [(set (match_operand:SVE_I 0 "register_operand" "=w")
> +  [(set (match_operand:SVE_I 0 "register_operand" "=w, ?&w")
>  	(unspec:SVE_I
> -	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
> -	   (MAXMIN:SVE_I (match_operand:SVE_I 2 "register_operand" "%0")
> -			 (match_operand:SVE_I 3 "register_operand" "w"))]
> +	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
> +	   (MAXMIN:SVE_I (match_operand:SVE_I 2 "register_operand" "%0, w")
> +			 (match_operand:SVE_I 3 "register_operand" "w, w"))]
>  	  UNSPEC_MERGE_PTRUE))]
>    "TARGET_SVE"
> -  "<su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
> +  "@
> +   <su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
> +   movprfx\t%0, %2\;<su><maxmin>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
> +  [(set_attr "movprfx" "*,yes")]
>  )
>  
>  ;; Unpredicated floating-point MIN/MAX.
> @@ -1749,14 +1765,17 @@
>  
>  ;; Floating-point MIN/MAX predicated with a PTRUE.
>  (define_insn "*<su><maxmin><mode>3"
> -  [(set (match_operand:SVE_F 0 "register_operand" "=w")
> +  [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
>  	(unspec:SVE_F
> -	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
> -	   (FMAXMIN:SVE_F (match_operand:SVE_F 2 "register_operand" "%0")
> -			  (match_operand:SVE_F 3 "register_operand" "w"))]
> +	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
> +	   (FMAXMIN:SVE_F (match_operand:SVE_F 2 "register_operand" "%0, w")
> +			  (match_operand:SVE_F 3 "register_operand" "w, w"))]
>  	  UNSPEC_MERGE_PTRUE))]
>    "TARGET_SVE"
> -  "f<maxmin>nm\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
> +  "@
> +   f<maxmin>nm\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
> +   movprfx\t%0, %2\;f<maxmin>nm\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
> +  [(set_attr "movprfx" "*,yes")]
>  )
>  
>  ;; Unpredicated fmin/fmax.
> @@ -1776,15 +1795,18 @@
>  
>  ;; fmin/fmax predicated with a PTRUE.
>  (define_insn "*<maxmin_uns><mode>3"
> -  [(set (match_operand:SVE_F 0 "register_operand" "=w")
> +  [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
>  	(unspec:SVE_F
> -	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
> -	   (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "%0")
> -			  (match_operand:SVE_F 3 "register_operand" "w")]
> +	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
> +	   (unspec:SVE_F [(match_operand:SVE_F 2 "register_operand" "%0, w")
> +			  (match_operand:SVE_F 3 "register_operand" "w, w")]
>  			 FMAXMIN_UNS)]
>  	  UNSPEC_MERGE_PTRUE))]
>    "TARGET_SVE"
> -  "<maxmin_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
> +  "@
> +   <maxmin_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
> +   movprfx\t%0, %2\;<maxmin_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
> +  [(set_attr "movprfx" "*,yes")]
>  )
>  
>  ;; Predicated integer operations with select.
> @@ -2146,17 +2168,19 @@
>  
>  ;; fma predicated with a PTRUE.
>  (define_insn "*fma<mode>4"
> -  [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
> +  [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
>  	(unspec:SVE_F
> -	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
> -	   (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w")
> -		      (match_operand:SVE_F 4 "register_operand" "w, w")
> -		      (match_operand:SVE_F 2 "register_operand" "w, 0"))]
> +	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
> +	   (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w, w")
> +		      (match_operand:SVE_F 4 "register_operand" "w, w, w")
> +		      (match_operand:SVE_F 2 "register_operand" "w, 0, w"))]
>  	  UNSPEC_MERGE_PTRUE))]
>    "TARGET_SVE"
>    "@
>     fmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
> -   fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
> +   fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
> +   movprfx\t%0, %2\;fmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
> +  [(set_attr "movprfx" "*,*,yes")]
>  )
>  
>  ;; Unpredicated fnma (%0 = (-%1 * %2) + %3).
> @@ -2177,18 +2201,20 @@
>  
>  ;; fnma predicated with a PTRUE.
>  (define_insn "*fnma<mode>4"
> -  [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
> +  [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
>  	(unspec:SVE_F
> -	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
> +	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
>  	   (fma:SVE_F (neg:SVE_F
> -			(match_operand:SVE_F 3 "register_operand" "%0, w"))
> -		      (match_operand:SVE_F 4 "register_operand" "w, w")
> -		      (match_operand:SVE_F 2 "register_operand" "w, 0"))]
> +			(match_operand:SVE_F 3 "register_operand" "%0, w, w"))
> +		      (match_operand:SVE_F 4 "register_operand" "w, w, w")
> +		      (match_operand:SVE_F 2 "register_operand" "w, 0, w"))]
>  	  UNSPEC_MERGE_PTRUE))]
>    "TARGET_SVE"
>    "@
>     fmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
> -   fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
> +   fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
> +   movprfx\t%0, %2\;fmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
> +  [(set_attr "movprfx" "*,*,yes")]
>  )
>  
>  ;; Unpredicated fms (%0 = (%1 * %2) - %3).
> @@ -2209,18 +2235,20 @@
>  
>  ;; fms predicated with a PTRUE.
>  (define_insn "*fms<mode>4"
> -  [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
> +  [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
>  	(unspec:SVE_F
> -	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
> -	   (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w")
> -		      (match_operand:SVE_F 4 "register_operand" "w, w")
> +	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
> +	   (fma:SVE_F (match_operand:SVE_F 3 "register_operand" "%0, w, w")
> +		      (match_operand:SVE_F 4 "register_operand" "w, w, w")
>  		      (neg:SVE_F
> -			(match_operand:SVE_F 2 "register_operand" "w, 0")))]
> +			(match_operand:SVE_F 2 "register_operand" "w, 0, w")))]
>  	  UNSPEC_MERGE_PTRUE))]
>    "TARGET_SVE"
>    "@
>     fnmsb\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
> -   fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
> +   fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
> +   movprfx\t%0, %2\;fnmls\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
> +  [(set_attr "movprfx" "*,*,yes")]
>  )
>  
>  ;; Unpredicated fnms (%0 = (-%1 * %2) - %3).
> @@ -2242,19 +2270,21 @@
>  
>  ;; fnms predicated with a PTRUE.
>  (define_insn "*fnms<mode>4"
> -  [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
> +  [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
>  	(unspec:SVE_F
> -	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
> +	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
>  	   (fma:SVE_F (neg:SVE_F
> -			(match_operand:SVE_F 3 "register_operand" "%0, w"))
> -		      (match_operand:SVE_F 4 "register_operand" "w, w")
> +			(match_operand:SVE_F 3 "register_operand" "%0, w, w"))
> +		      (match_operand:SVE_F 4 "register_operand" "w, w, w")
>  		      (neg:SVE_F
> -			(match_operand:SVE_F 2 "register_operand" "w, 0")))]
> +			(match_operand:SVE_F 2 "register_operand" "w, 0, w")))]
>  	  UNSPEC_MERGE_PTRUE))]
>    "TARGET_SVE"
>    "@
>     fnmad\t%0.<Vetype>, %1/m, %4.<Vetype>, %2.<Vetype>
> -   fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
> +   fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
> +   movprfx\t%0, %2\;fnmla\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
> +  [(set_attr "movprfx" "*,*,yes")]
>  )
>  
>  ;; Unpredicated floating-point division.
> @@ -2273,16 +2303,18 @@
>  
>  ;; Floating-point division predicated with a PTRUE.
>  (define_insn "*div<mode>3"
> -  [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
> +  [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
>  	(unspec:SVE_F
> -	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
> -	   (div:SVE_F (match_operand:SVE_F 2 "register_operand" "0, w")
> -		      (match_operand:SVE_F 3 "register_operand" "w, 0"))]
> +	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
> +	   (div:SVE_F (match_operand:SVE_F 2 "register_operand" "0, w, w")
> +		      (match_operand:SVE_F 3 "register_operand" "w, 0, w"))]
>  	  UNSPEC_MERGE_PTRUE))]
>    "TARGET_SVE"
>    "@
>     fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
> -   fdivr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
> +   fdivr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
> +   movprfx\t%0, %2\;fdiv\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
> +  [(set_attr "movprfx" "*,*,yes")]
>  )
>  
>  ;; Unpredicated FNEG, FABS and FSQRT.
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 4ac6332a200..a014a012cc1 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -251,9 +251,6 @@
>  ;; will be disabled when !TARGET_SVE.
>  (define_attr "sve" "no,yes" (const_string "no"))
>  
> -(define_attr "length" ""
> -  (const_int 4))
> -
>  ;; Attribute that controls whether an alternative is enabled or not.
>  ;; Currently it is only used to disable alternatives which touch fp or simd
>  ;; registers when -mgeneral-regs-only is specified.
> @@ -277,6 +274,14 @@
>  ;; 1 :=: yes
>  (define_attr "far_branch" "" (const_int 0))
>  
> +;; Attribute that specifies whether the alternative uses MOVPRFX.
> +(define_attr "movprfx" "no,yes" (const_string "no"))
> +
> +(define_attr "length" ""
> +  (cond [(eq_attr "movprfx" "yes")
> +           (const_int 8)
> +        ] (const_int 4)))
> +
>  ;; Strictly for compatibility with AArch32 in pipeline models, since AArch64 has
>  ;; no predicated insns.
>  (define_attr "predicated" "yes,no" (const_string "no"))

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH, aarch64 4/4] aarch64: Add movprfx patterns for zero and unmatched select
  2018-07-02  3:41 ` [PATCH, aarch64 4/4] aarch64: Add movprfx patterns for zero and unmatched select Richard Henderson
@ 2018-07-02 11:57   ` Richard Sandiford
  0 siblings, 0 replies; 10+ messages in thread
From: Richard Sandiford @ 2018-07-02 11:57 UTC (permalink / raw)
  To: Richard Henderson; +Cc: gcc-patches, marcus.shawcroft, james.greenhalgh

Richard Henderson <rth@twiddle.net> writes:
> 	* config/aarch64/aarch64-protos.h, config/aarch64/aarch64.c
> 	(aarch64_sve_prepare_conditional_op): Remove.
> 	* config/aarch64/aarch64-sve.md (cond_<SVE_INT_BINARY><SVE_I>):
> 	Allow aarch64_simd_reg_or_zero as select operand; remove
> 	the aarch64_sve_prepare_conditional_op call.
> 	(cond_<SVE_INT_BINARY_SD><SVE_SDI>): Likewise.
> 	(cond_<SVE_COND_FP_BINARY><SVE_F>): Likewise.
> 	(*cond_<SVE_INT_BINARY><SVE_I>_z): New pattern.
> 	(*cond_<SVE_INT_BINARY_SD><SVE_SDI>_z): New pattern.
> 	(*cond_<SVE_COND_FP_BINARY><SVE_F>_z): New pattern.
> 	(*cond_<SVE_INT_BINARY><SVE_I>_any): New pattern.
> 	(*cond_<SVE_INT_BINARY_SD><SVE_SDI>_any): New pattern.
> 	(*cond_<SVE_COND_FP_BINARY><SVE_F>_any): New pattern
> 	and a splitters to match all of the *_any patterns.
> 	* config/aarch64/predicates.md (aarch64_sve_any_binary_operator): New.
> ---
>  gcc/config/aarch64/aarch64-protos.h |   1 -
>  gcc/config/aarch64/aarch64.c        |  54 ----------
>  gcc/config/aarch64/aarch64-sve.md   | 154 ++++++++++++++++++++++++----
>  gcc/config/aarch64/predicates.md    |   3 +
>  4 files changed, 136 insertions(+), 76 deletions(-)

OK, thanks.

Richard

>
> diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
> index 87c6ae20278..514ddc457ca 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -513,7 +513,6 @@ bool aarch64_gen_adjusted_ldpstp (rtx *, bool, scalar_mode, RTX_CODE);
>  void aarch64_expand_sve_vec_cmp_int (rtx, rtx_code, rtx, rtx);
>  bool aarch64_expand_sve_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
>  void aarch64_expand_sve_vcond (machine_mode, machine_mode, rtx *);
> -void aarch64_sve_prepare_conditional_op (rtx *, unsigned int, bool);
>  #endif /* RTX_CODE */
>  
>  void aarch64_init_builtins (void);
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 3af7e98e166..d75d45f4b8b 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -16058,60 +16058,6 @@ aarch64_expand_sve_vcond (machine_mode data_mode, machine_mode cmp_mode,
>    emit_set_insn (ops[0], gen_rtx_UNSPEC (data_mode, vec, UNSPEC_SEL));
>  }
>  
> -/* Prepare a cond_<optab><mode> operation that has the operands
> -   given by OPERANDS, where:
> -
> -   - operand 0 is the destination
> -   - operand 1 is a predicate
> -   - operands 2 to NOPS - 2 are the operands to an operation that is
> -     performed for active lanes
> -   - operand NOPS - 1 specifies the values to use for inactive lanes.
> -
> -   COMMUTATIVE_P is true if operands 2 and 3 are commutative.  In that case,
> -   no pattern is provided for a tie between operands 3 and NOPS - 1.  */
> -
> -void
> -aarch64_sve_prepare_conditional_op (rtx *operands, unsigned int nops,
> -				    bool commutative_p)
> -{
> -  /* We can do the operation directly if the "else" value matches one
> -     of the other inputs.  */
> -  for (unsigned int i = 2; i < nops - 1; ++i)
> -    if (rtx_equal_p (operands[i], operands[nops - 1]))
> -      {
> -	if (i == 3 && commutative_p)
> -	  std::swap (operands[2], operands[3]);
> -	return;
> -      }
> -
> -  /* If the "else" value is different from the other operands, we have
> -     the choice of doing a SEL on the output or a SEL on an input.
> -     Neither choice is better in all cases, but one advantage of
> -     selecting the input is that it can avoid a move when the output
> -     needs to be distinct from the inputs.  E.g. if operand N maps to
> -     register N, selecting the output would give:
> -
> -	MOVPRFX Z0.S, Z2.S
> -	ADD Z0.S, P1/M, Z0.S, Z3.S
> -	SEL Z0.S, P1, Z0.S, Z4.S
> -
> -     whereas selecting the input avoids the MOVPRFX:
> -
> -	SEL Z0.S, P1, Z2.S, Z4.S
> -	ADD Z0.S, P1/M, Z0.S, Z3.S.
> -
> -     ??? Matching the other input can produce
> -
> -	MOVPRFX Z4.S, P1/M, Z2.S
> -	ADD Z4.S, P1/M, Z4.S, Z3.S
> -   */
> -  machine_mode mode = GET_MODE (operands[0]);
> -  rtx temp = gen_reg_rtx (mode);
> -  rtvec vec = gen_rtvec (3, operands[1], operands[2], operands[nops - 1]);
> -  emit_set_insn (temp, gen_rtx_UNSPEC (mode, vec, UNSPEC_SEL));
> -  operands[2] = operands[nops - 1] = temp;
> -}
> -
>  /* Implement TARGET_MODES_TIEABLE_P.  In principle we should always return
>     true.  However due to issues with register allocation it is preferable
>     to avoid tieing integer scalar and FP scalar modes.  Executing integer
> diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
> index db16affc093..b16d0455159 100644
> --- a/gcc/config/aarch64/aarch64-sve.md
> +++ b/gcc/config/aarch64/aarch64-sve.md
> @@ -1817,13 +1817,10 @@
>  	   (SVE_INT_BINARY:SVE_I
>  	     (match_operand:SVE_I 2 "register_operand")
>  	     (match_operand:SVE_I 3 "register_operand"))
> -	   (match_operand:SVE_I 4 "register_operand")]
> +	   (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
>  	  UNSPEC_SEL))]
>    "TARGET_SVE"
> -{
> -  bool commutative_p = (GET_RTX_CLASS (<CODE>) == RTX_COMM_ARITH);
> -  aarch64_sve_prepare_conditional_op (operands, 5, commutative_p);
> -})
> +)
>  
>  (define_expand "cond_<optab><mode>"
>    [(set (match_operand:SVE_SDI 0 "register_operand")
> @@ -1832,19 +1829,12 @@
>  	   (SVE_INT_BINARY_SD:SVE_SDI
>  	     (match_operand:SVE_SDI 2 "register_operand")
>  	     (match_operand:SVE_SDI 3 "register_operand"))
> -	   (match_operand:SVE_SDI 4 "register_operand")]
> +	   (match_operand:SVE_SDI 4 "aarch64_simd_reg_or_zero")]
>  	  UNSPEC_SEL))]
>    "TARGET_SVE"
> -{
> -  bool commutative_p = (GET_RTX_CLASS (<CODE>) == RTX_COMM_ARITH);
> -  aarch64_sve_prepare_conditional_op (operands, 5, commutative_p);
> -})
> -
> -;; Predicated integer operations.
> -;; All other things being equal, prefer the patterns for which the
> -;; destination matches the select input, as that gives us the most
> -;; freedom to swap the other operands.
> +)
>  
> +;; Predicated integer operations with select matching the output operand.
>  (define_insn "*cond_<optab><mode>_0"
>    [(set (match_operand:SVE_I 0 "register_operand" "+w, w, ?&w")
>  	(unspec:SVE_I
> @@ -1945,6 +1935,87 @@
>    [(set_attr "movprfx" "*,yes")]
>  )
>  
> +;; Predicated integer operations with select matching zero.
> +(define_insn "*cond_<optab><mode>_z"
> +  [(set (match_operand:SVE_I 0 "register_operand" "=&w")
> +	(unspec:SVE_I
> +	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
> +	   (SVE_INT_BINARY:SVE_I
> +	     (match_operand:SVE_I 2 "register_operand" "w")
> +	     (match_operand:SVE_I 3 "register_operand" "w"))
> +	   (match_operand:SVE_I 4 "aarch64_simd_imm_zero")]
> +	  UNSPEC_SEL))]
> +  "TARGET_SVE"
> +  "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
> +  [(set_attr "movprfx" "yes")]
> +)
> +
> +(define_insn "*cond_<optab><mode>_z"
> +  [(set (match_operand:SVE_SDI 0 "register_operand" "=&w")
> +	(unspec:SVE_SDI
> +	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
> +	   (SVE_INT_BINARY_SD:SVE_SDI
> +	     (match_operand:SVE_SDI 2 "register_operand" "w")
> +	     (match_operand:SVE_SDI 3 "register_operand" "w"))
> +	   (match_operand:SVE_SDI 4 "aarch64_simd_imm_zero")]
> +	  UNSPEC_SEL))]
> +  "TARGET_SVE"
> +  "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
> +  [(set_attr "movprfx" "yes")]
> +)
> +
> +;; Synthetic predications with select unmatched.
> +(define_insn "*cond_<optab><mode>_any"
> +  [(set (match_operand:SVE_I 0 "register_operand" "=&w")
> +	(unspec:SVE_I
> +	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
> +	   (SVE_INT_BINARY:SVE_I
> +	     (match_operand:SVE_I 2 "register_operand" "w")
> +	     (match_operand:SVE_I 3 "register_operand" "w"))
> +	   (match_operand:SVE_I 4 "register_operand"   "w")]
> +	  UNSPEC_SEL))]
> +  "TARGET_SVE"
> +  "#"
> +)
> +
> +(define_insn "*cond_<optab><mode>_any"
> +  [(set (match_operand:SVE_SDI 0 "register_operand" "=&w")
> +	(unspec:SVE_SDI
> +	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
> +	   (SVE_INT_BINARY_SD:SVE_I
> +	     (match_operand:SVE_SDI 2 "register_operand" "w")
> +	     (match_operand:SVE_SDI 3 "register_operand" "w"))
> +	   (match_operand:SVE_SDI 4 "register_operand"   "w")]
> +	  UNSPEC_SEL))]
> +  "TARGET_SVE"
> +  "#"
> +)
> +
> +(define_split
> +  [(set (match_operand:SVE_I 0 "register_operand")
> +	(unspec:SVE_I
> +	  [(match_operand:<VPRED> 1 "register_operand")
> +	   (match_operator:SVE_I 5 "aarch64_sve_any_binary_operator"
> +	     [(match_operand:SVE_I 2 "register_operand")
> +	      (match_operand:SVE_I 3 "register_operand")])
> +	   (match_operand:SVE_I 4 "register_operand")]
> +	  UNSPEC_SEL))]
> +  "TARGET_SVE && reload_completed
> +   && !(rtx_equal_p (operands[0], operands[4])
> +        || rtx_equal_p (operands[2], operands[4])
> +        || rtx_equal_p (operands[3], operands[4]))"
> +  ; Not matchable by any one insn or movprfx insn.  We need a separate select.
> +  [(set (match_dup 0)
> +	(unspec:SVE_I [(match_dup 1) (match_dup 2) (match_dup 4)]
> +                      UNSPEC_SEL))
> +   (set (match_dup 0)
> +	(unspec:SVE_I
> +	  [(match_dup 1)
> +	   (match_op_dup 5 [(match_dup 0) (match_dup 3)])
> +           (match_dup 0)]
> +	  UNSPEC_SEL))]
> +)
> +
>  ;; Set operand 0 to the last active element in operand 3, or to tied
>  ;; operand 1 if no elements are active.
>  (define_insn "fold_extract_last_<mode>"
> @@ -2731,12 +2802,10 @@
>  	     [(match_operand:SVE_F 2 "register_operand")
>  	      (match_operand:SVE_F 3 "register_operand")]
>  	     SVE_COND_FP_BINARY)
> -	   (match_operand:SVE_F 4 "register_operand")]
> +	   (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
>  	  UNSPEC_SEL))]
>    "TARGET_SVE"
> -{
> -  aarch64_sve_prepare_conditional_op (operands, 5, <commutative>);
> -})
> +)
>  
>  ;; Predicated floating-point operations with select matching output.
>  (define_insn "*cond_<optab><mode>_0"
> @@ -2744,8 +2813,7 @@
>  	(unspec:SVE_F
>  	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
>  	   (unspec:SVE_F
> -	     [(match_dup 1)
> -	      (match_operand:SVE_F 2 "register_operand" "0, w, w")
> +	     [(match_operand:SVE_F 2 "register_operand" "0, w, w")
>  	      (match_operand:SVE_F 3 "register_operand" "w, 0, w")]
>  	     SVE_COND_FP_BINARY)
>  	   (match_dup 0)]
> @@ -2794,6 +2862,50 @@
>    [(set_attr "movprfx" "*,yes")]
>  )
>  
> +;; Predicated floating-point operations with select matching zero.
> +(define_insn "*cond_<optab><mode>_z"
> +  [(set (match_operand:SVE_F 0 "register_operand" "=&w")
> +	(unspec:SVE_F
> +	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
> +	   (unspec:SVE_F
> +	     [(match_operand:SVE_F 2 "register_operand" "w")
> +	      (match_operand:SVE_F 3 "register_operand" "w")]
> +	     SVE_COND_FP_BINARY)
> +	   (match_operand:SVE_F 4 "aarch64_simd_imm_zero")]
> +	  UNSPEC_SEL))]
> +  "TARGET_SVE"
> +  "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
> +  [(set_attr "movprfx" "yes")]
> +)
> +
> +;; Synthetic predication of floating-point operations with select unmatched.
> +(define_insn_and_split "*cond_<optab><mode>_any"
> +  [(set (match_operand:SVE_F 0 "register_operand" "=&w")
> +	(unspec:SVE_F
> +	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
> +	   (unspec:SVE_F
> +	     [(match_operand:SVE_F 2 "register_operand" "w")
> +	      (match_operand:SVE_F 3 "register_operand" "w")]
> +	     SVE_COND_FP_BINARY)
> +	   (match_operand:SVE_F 4 "register_operand" "w")]
> +	  UNSPEC_SEL))]
> +  "TARGET_SVE"
> +  "#"
> +  "&& reload_completed
> +   && !(rtx_equal_p (operands[0], operands[4])
> +        || rtx_equal_p (operands[2], operands[4])
> +        || rtx_equal_p (operands[3], operands[4]))"
> +  ; Not matchable by any one insn or movprfx insn.  We need a separate select.
> +  [(set (match_dup 0)
> +	(unspec:SVE_F [(match_dup 1) (match_dup 2) (match_dup 4)] UNSPEC_SEL))
> +   (set (match_dup 0)
> +	(unspec:SVE_F
> +	  [(match_dup 1)
> +	   (unspec:SVE_F [(match_dup 0) (match_dup 3)] SVE_COND_FP_BINARY)
> +           (match_dup 0)]
> +	  UNSPEC_SEL))]
> +)
> +
>  ;; Shift an SVE vector left and insert a scalar into element 0.
>  (define_insn "vec_shl_insert_<mode>"
>    [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
> diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
> index 7aec76d681f..4acbc218a8d 100644
> --- a/gcc/config/aarch64/predicates.md
> +++ b/gcc/config/aarch64/predicates.md
> @@ -625,3 +625,6 @@
>  ;; A special predicate that doesn't match a particular mode.
>  (define_special_predicate "aarch64_any_register_operand"
>    (match_code "reg"))
> +
> +(define_predicate "aarch64_sve_any_binary_operator"
> +  (match_code "plus,minus,mult,div,udiv,smax,umax,smin,umin,and,ior,xor"))

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH, aarch64 2/4] aarch64: Remove predicate from inside SVE_COND_FP_BINARY
  2018-07-02  3:41 ` [PATCH, aarch64 2/4] aarch64: Remove predicate from inside SVE_COND_FP_BINARY Richard Henderson
@ 2018-07-02 11:57   ` Richard Sandiford
  0 siblings, 0 replies; 10+ messages in thread
From: Richard Sandiford @ 2018-07-02 11:57 UTC (permalink / raw)
  To: Richard Henderson; +Cc: gcc-patches, marcus.shawcroft, james.greenhalgh

Richard Henderson <rth@twiddle.net> writes:
> The predicate is present within the containing UNSPEC_SEL;
> there is no need to duplicate it.
>
> 	* config/aarch64/aarch64-sve.md (cond_<SVE_COND_FP_BINARY><SVE_F>):
> 	Remove match_dup 1 from the inner unspec.
> 	(*cond_<SVE_COND_FP_BINARY><SVE_F>): Likewise.

OK, thanks.

Richard

> ---
>  gcc/config/aarch64/aarch64-sve.md | 9 +++------
>  1 file changed, 3 insertions(+), 6 deletions(-)
>
> diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
> index 3dee6a4376d..2aceef65c80 100644
> --- a/gcc/config/aarch64/aarch64-sve.md
> +++ b/gcc/config/aarch64/aarch64-sve.md
> @@ -2677,8 +2677,7 @@
>  	(unspec:SVE_F
>  	  [(match_operand:<VPRED> 1 "register_operand")
>  	   (unspec:SVE_F
> -	     [(match_dup 1)
> -	      (match_operand:SVE_F 2 "register_operand")
> +	     [(match_operand:SVE_F 2 "register_operand")
>  	      (match_operand:SVE_F 3 "register_operand")]
>  	     SVE_COND_FP_BINARY)
>  	   (match_operand:SVE_F 4 "register_operand")]
> @@ -2694,8 +2693,7 @@
>  	(unspec:SVE_F
>  	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
>  	   (unspec:SVE_F
> -	     [(match_dup 1)
> -	      (match_operand:SVE_F 2 "register_operand" "0")
> +	     [(match_operand:SVE_F 2 "register_operand" "0")
>  	      (match_operand:SVE_F 3 "register_operand" "w")]
>  	     SVE_COND_FP_BINARY)
>  	   (match_dup 2)]
> @@ -2710,8 +2708,7 @@
>  	(unspec:SVE_F
>  	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
>  	   (unspec:SVE_F
> -	     [(match_dup 1)
> -	      (match_operand:SVE_F 2 "register_operand" "w")
> +	     [(match_operand:SVE_F 2 "register_operand" "w")
>  	      (match_operand:SVE_F 3 "register_operand" "0")]
>  	     SVE_COND_FP_BINARY)
>  	   (match_dup 3)]

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH, aarch64 3/4] aarch64: Add movprfx alternatives for predicate patterns
  2018-07-02 11:55   ` Richard Sandiford
@ 2018-07-02 15:32     ` Richard Henderson
  0 siblings, 0 replies; 10+ messages in thread
From: Richard Henderson @ 2018-07-02 15:32 UTC (permalink / raw)
  To: gcc-patches, marcus.shawcroft, james.greenhalgh, richard.sandiford

On 07/02/2018 04:55 AM, Richard Sandiford wrote:
>> +;; Predicated floating-point operations with select matching output.
>> +(define_insn "*cond_<optab><mode>_0"
>> +  [(set (match_operand:SVE_F 0 "register_operand" "+w, w, ?&w")
>>  	(unspec:SVE_F
>> -	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
>> +	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
>>  	   (unspec:SVE_F
>> -	     [(match_operand:SVE_F 2 "register_operand" "0")
>> -	      (match_operand:SVE_F 3 "register_operand" "w")]
>> +	     [(match_dup 1)
>> +	      (match_operand:SVE_F 2 "register_operand" "0, w, w")
>> +	      (match_operand:SVE_F 3 "register_operand" "w, 0, w")]
>> +	     SVE_COND_FP_BINARY)
>> +	   (match_dup 0)]
>> +	  UNSPEC_SEL))]
>> +  "TARGET_SVE"
>> +  "@
>> +   <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
>> +   <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
>> +   movprfx\t%0, %1/m, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
>> +  [(set_attr "movprfx" "*,*,yes")]
>> +)
> 
> Reintroduces a (match_dup 1) into the SVE_COND_FP_BINARY.
> 
> OK otherwise, thanks.

Feh, and fixed again in patch 4.
I've squashed all 4 patches for final commit,
so the intermediate breakage is gone.

Thanks for the review.


r~

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2018-07-02 15:32 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-07-02  3:41 [PATCH, aarch64 0/4] Add movprfx patterns and alternatives Richard Henderson
2018-07-02  3:41 ` [PATCH, aarch64 2/4] aarch64: Remove predicate from inside SVE_COND_FP_BINARY Richard Henderson
2018-07-02 11:57   ` Richard Sandiford
2018-07-02  3:41 ` [PATCH, aarch64 4/4] aarch64: Add movprfx patterns for zero and unmatched select Richard Henderson
2018-07-02 11:57   ` Richard Sandiford
2018-07-02  3:41 ` [PATCH, aarch64 3/4] aarch64: Add movprfx alternatives for predicate patterns Richard Henderson
2018-07-02 11:55   ` Richard Sandiford
2018-07-02 15:32     ` Richard Henderson
2018-07-02  3:41 ` [PATCH, aarch64 1/4] aarch64: Add movprfx alternatives for unpredicated patterns Richard Henderson
2018-07-02 11:56   ` Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).