public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH, AArch64] Make zero_extends explicit for common SImode patterns
@ 2012-12-13 17:26 Ian Bolton
  2012-12-13 20:40 ` Richard Henderson
  2012-12-13 20:42 ` Richard Henderson
  0 siblings, 2 replies; 6+ messages in thread
From: Ian Bolton @ 2012-12-13 17:26 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1842 bytes --]

Season's greetings to you! :)

I've made zero_extend versions of SI mode patterns that write
to W registers in order to make the implicit zero_extend that
they do explicit, so GCC can be smarter about when it actually
needs to plant a zero_extend (uxtw).

This patch significantly reduces the number of redundant
uxtw instructions seen in a variety of programs.

(There are further patterns that can be done, but I have them
in a separate patch that's still in development.)

OK for trunk and backport to ARM/aarch64-4.7-branch?

Cheers,
Ian


2012-12-13  Ian Bolton  <ian.bolton@arm.com>

	* gcc/config/aarch64/aarch64.md
	(*addsi3_aarch64_uxtw): New pattern.
	(*addsi3_compare0_uxtw): New pattern.
	(*add_<shift>_si_uxtw): New pattern.
	(*add_<optab><SHORT:mode>_si_uxtw): New pattern.
	(*add_<optab><SHORT:mode>_shft_si_uxtw): New pattern.
	(*add_<optab><SHORT:mode>_mult_si_uxtw): New pattern.
	(*add_<optab>si_multp2_uxtw): New pattern.
	(*addsi3_carryin_uxtw): New pattern.
	(*addsi3_carryin_alt1_uxtw): New pattern.
	(*addsi3_carryin_alt2_uxtw): New pattern.
	(*addsi3_carryin_alt3_uxtw): New pattern.
	(*add_uxtsi_multp2_uxtw): New pattern.
	(*subsi3_uxtw): New pattern.
	(*subsi3_compare0_uxtw): New pattern.
	(*sub_<shift>_si_uxtw): New pattern.
	(*sub_mul_imm_si_uxtw): New pattern.
	(*sub_<optab><SHORT:mode>_si_uxtw): New pattern.
	(*sub_<optab><SHORT:mode>_shft_si_uxtw): New pattern.
	(*sub_<optab>si_multp2_uxtw): New pattern.
	(*sub_uxtsi_multp2_uxtw): New pattern.
	(*negsi2_uxtw): New pattern.
	(*negsi2_compare0_uxtw): New pattern.
	(*neg_<shift>_si2_uxtw): New pattern.
	(*neg_mul_imm_si2_uxtw): New pattern.
	(*mulsi3_uxtw): New pattern.
	(*maddsi_uxtw): New pattern.
	(*msubsi_uxtw): New pattern.
	(*mulsi_neg_uxtw): New pattern.
	(*<su_optab>divsi3_uxtw): New pattern.

[-- Attachment #2: aarch64-rzx-patch1-v2.txt --]
[-- Type: text/plain, Size: 19246 bytes --]

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index a9a8b5f..d5c0206 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1271,6 +1273,22 @@
    (set_attr "mode" "SI")]
 )
 
+;; zero_extend version of above
+(define_insn "*addsi3_aarch64_uxtw"
+  [(set
+    (match_operand:DI 0 "register_operand" "=rk,rk,rk")
+    (zero_extend:DI (plus:SI
+     (match_operand:SI 1 "register_operand" "%rk,rk,rk")
+     (match_operand:SI 2 "aarch64_plus_operand" "I,r,J"))))]
+  ""
+  "@
+  add\\t%w0, %w1, %2
+  add\\t%w0, %w1, %w2
+  sub\\t%w0, %w1, #%n2"
+  [(set_attr "v8type" "alu")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*adddi3_aarch64"
   [(set
     (match_operand:DI 0 "register_operand" "=rk,rk,rk,!w")
@@ -1304,6 +1322,23 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*addsi3_compare0_uxtw"
+  [(set (reg:CC_NZ CC_REGNUM)
+	(compare:CC_NZ
+	 (plus:SI (match_operand:SI 1 "register_operand" "%r,r")
+		   (match_operand:SI 2 "aarch64_plus_operand" "rI,J"))
+	 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
+  ""
+  "@
+  adds\\t%w0, %w1, %w2
+  subs\\t%w0, %w1, #%n2"
+  [(set_attr "v8type" "alus")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*add<mode>3nr_compare0"
   [(set (reg:CC_NZ CC_REGNUM)
 	(compare:CC_NZ
@@ -1340,6 +1375,19 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*add_<shift>_si_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI (plus:SI
+           (ASHIFT:SI (match_operand:SI 1 "register_operand" "r")
+		       (match_operand:QI 2 "aarch64_shift_imm_si" "n"))
+	   (match_operand:SI 3 "register_operand" "r"))))]
+  ""
+  "add\\t%w0, %w3, %w1, <shift> %2"
+  [(set_attr "v8type" "alu_shift")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*add_mul_imm_<mode>"
   [(set (match_operand:GPI 0 "register_operand" "=rk")
 	(plus:GPI (mult:GPI (match_operand:GPI 1 "register_operand" "r")
@@ -1361,6 +1409,17 @@
    (set_attr "mode" "<GPI:MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*add_<optab><SHORT:mode>_si_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI (plus:SI (ANY_EXTEND:SI (match_operand:SHORT 1 "register_operand" "r"))
+		  (match_operand:GPI 2 "register_operand" "r"))))]
+  ""
+  "add\\t%w0, %w2, %w, <su>xt<SHORT:size>"
+  [(set_attr "v8type" "alu_ext")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*add_<optab><ALLX:mode>_shft_<GPI:mode>"
   [(set (match_operand:GPI 0 "register_operand" "=rk")
 	(plus:GPI (ashift:GPI (ANY_EXTEND:GPI
@@ -1373,6 +1432,19 @@
    (set_attr "mode" "<GPI:MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*add_<optab><SHORT:mode>_shft_si_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI (plus:SI (ashift:SI (ANY_EXTEND:SI
+			       (match_operand:SHORT 1 "register_operand" "r"))
+			      (match_operand 2 "aarch64_imm3" "Ui3"))
+		  (match_operand:SI 3 "register_operand" "r"))))]
+  ""
+  "add\\t%w0, %w3, %w1, <su>xt<SHORT:size> %2"
+  [(set_attr "v8type" "alu_ext")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*add_<optab><ALLX:mode>_mult_<GPI:mode>"
   [(set (match_operand:GPI 0 "register_operand" "=rk")
 	(plus:GPI (mult:GPI (ANY_EXTEND:GPI
@@ -1385,6 +1457,19 @@
    (set_attr "mode" "<GPI:MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*add_<optab><SHORT:mode>_mult_si_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI (plus:SI (mult:SI (ANY_EXTEND:SI
+			     (match_operand:SHORT 1 "register_operand" "r"))
+			    (match_operand 2 "aarch64_pwr_imm3" "Up3"))
+		  (match_operand:SI 3 "register_operand" "r"))))]
+  ""
+  "add\\t%w0, %w3, %w1, <su>xt<SHORT:size> %p2"
+  [(set_attr "v8type" "alu_ext")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*add_<optab><mode>_multp2"
   [(set (match_operand:GPI 0 "register_operand" "=rk")
 	(plus:GPI (ANY_EXTRACT:GPI
@@ -1399,6 +1484,21 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*add_<optab>si_multp2_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI (plus:SI (ANY_EXTRACT:SI
+		   (mult:SI (match_operand:SI 1 "register_operand" "r")
+			     (match_operand 2 "aarch64_pwr_imm3" "Up3"))
+		   (match_operand 3 "const_int_operand" "n")
+		   (const_int 0))
+		  (match_operand:SI 4 "register_operand" "r"))))]
+  "aarch64_is_extend_from_extract (SImode, operands[2], operands[3])"
+  "add\\t%w0, %w4, %w1, <su>xt%e3 %p2"
+  [(set_attr "v8type" "alu_ext")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*add<mode>3_carryin"
   [(set
     (match_operand:GPI 0 "register_operand" "=r")
@@ -1412,6 +1512,20 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*addsi3_carryin_uxtw"
+  [(set
+    (match_operand:DI 0 "register_operand" "=r")
+    (zero_extend:DI (plus:SI (geu:SI (reg:CC CC_REGNUM) (const_int 0))
+	      (plus:SI
+		(match_operand:SI 1 "register_operand" "r")
+		(match_operand:SI 2 "register_operand" "r")))))]
+   ""
+   "adc\\t%w0, %w1, %w2"
+  [(set_attr "v8type" "adc")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*add<mode>3_carryin_alt1"
   [(set
     (match_operand:GPI 0 "register_operand" "=r")
@@ -1425,6 +1539,20 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*addsi3_carryin_alt1_uxtw"
+  [(set
+    (match_operand:DI 0 "register_operand" "=r")
+    (zero_extend:DI (plus:SI (plus:SI
+		(match_operand:SI 1 "register_operand" "r")
+		(match_operand:SI 2 "register_operand" "r"))
+              (geu:SI (reg:CC CC_REGNUM) (const_int 0)))))]
+   ""
+   "adc\\t%w0, %w1, %w2"
+  [(set_attr "v8type" "adc")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*add<mode>3_carryin_alt2"
   [(set
     (match_operand:GPI 0 "register_operand" "=r")
@@ -1438,6 +1566,20 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*addsi3_carryin_alt2_uxtw"
+  [(set
+    (match_operand:DI 0 "register_operand" "=r")
+    (zero_extend:DI (plus:SI (plus:SI
+                (geu:SI (reg:CC CC_REGNUM) (const_int 0))
+		(match_operand:SI 1 "register_operand" "r"))
+	      (match_operand:SI 2 "register_operand" "r"))))]
+   ""
+   "adc\\t%w0, %w1, %w2"
+  [(set_attr "v8type" "adc")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*add<mode>3_carryin_alt3"
   [(set
     (match_operand:GPI 0 "register_operand" "=r")
@@ -1451,6 +1593,20 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*addsi3_carryin_alt3_uxtw"
+  [(set
+    (match_operand:DI 0 "register_operand" "=r")
+    (zero_extend:DI (plus:SI (plus:SI
+                (geu:SI (reg:CC CC_REGNUM) (const_int 0))
+		(match_operand:SI 2 "register_operand" "r"))
+	      (match_operand:SI 1 "register_operand" "r"))))]
+   ""
+   "adc\\t%w0, %w1, %w2"
+  [(set_attr "v8type" "adc")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*add_uxt<mode>_multp2"
   [(set (match_operand:GPI 0 "register_operand" "=rk")
 	(plus:GPI (and:GPI
@@ -1467,6 +1623,23 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*add_uxtsi_multp2_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI (plus:SI (and:SI
+		   (mult:SI (match_operand:SI 1 "register_operand" "r")
+			     (match_operand 2 "aarch64_pwr_imm3" "Up3"))
+		   (match_operand 3 "const_int_operand" "n"))
+		  (match_operand:SI 4 "register_operand" "r"))))]
+  "aarch64_uxt_size (exact_log2 (INTVAL (operands[2])), INTVAL (operands[3])) != 0"
+  "*
+  operands[3] = GEN_INT (aarch64_uxt_size (exact_log2 (INTVAL (operands[2])),
+					   INTVAL (operands[3])));
+  return \"add\t%w0, %w4, %w1, uxt%e3 %p2\";"
+  [(set_attr "v8type" "alu_ext")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "subsi3"
   [(set (match_operand:SI 0 "register_operand" "=rk")
 	(minus:SI (match_operand:SI 1 "register_operand" "r")
@@ -1477,6 +1650,17 @@
    (set_attr "mode" "SI")]
 )
 
+;; zero_extend version of above
+(define_insn "*subsi3_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI (minus:SI (match_operand:SI 1 "register_operand" "r")
+		   (match_operand:SI 2 "register_operand" "r"))))]
+  ""
+  "sub\\t%w0, %w1, %w2"
+  [(set_attr "v8type" "alu")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "subdi3"
   [(set (match_operand:DI 0 "register_operand" "=rk,!w")
 	(minus:DI (match_operand:DI 1 "register_operand" "r,!w")
@@ -1504,6 +1687,20 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*subsi3_compare0_uxtw"
+  [(set (reg:CC_NZ CC_REGNUM)
+	(compare:CC_NZ (minus:SI (match_operand:SI 1 "register_operand" "r")
+				  (match_operand:SI 2 "register_operand" "r"))
+		       (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (minus:SI (match_dup 1) (match_dup 2))))]
+  ""
+  "subs\\t%w0, %w1, %w2"
+  [(set_attr "v8type" "alus")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*sub_<shift>_<mode>"
   [(set (match_operand:GPI 0 "register_operand" "=rk")
 	(minus:GPI (match_operand:GPI 3 "register_operand" "r")
@@ -1516,6 +1713,19 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*sub_<shift>_si_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI (minus:SI (match_operand:SI 3 "register_operand" "r")
+		   (ASHIFT:SI
+		    (match_operand:SI 1 "register_operand" "r")
+		    (match_operand:QI 2 "aarch64_shift_imm_si" "n")))))]
+  ""
+  "sub\\t%w0, %w3, %w1, <shift> %2"
+  [(set_attr "v8type" "alu_shift")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*sub_mul_imm_<mode>"
   [(set (match_operand:GPI 0 "register_operand" "=rk")
 	(minus:GPI (match_operand:GPI 3 "register_operand" "r")
@@ -1528,6 +1738,19 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*sub_mul_imm_si_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI (minus:SI (match_operand:SI 3 "register_operand" "r")
+		   (mult:SI
+		    (match_operand:SI 1 "register_operand" "r")
+		    (match_operand:QI 2 "aarch64_pwr_2_si" "n")))))]
+  ""
+  "sub\\t%w0, %w3, %w1, lsl %p2"
+  [(set_attr "v8type" "alu_shift")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*sub_<optab><ALLX:mode>_<GPI:mode>"
   [(set (match_operand:GPI 0 "register_operand" "=rk")
 	(minus:GPI (match_operand:GPI 1 "register_operand" "r")
@@ -1539,6 +1762,18 @@
    (set_attr "mode" "<GPI:MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*sub_<optab><SHORT:mode>_si_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI (minus:SI (match_operand:SI 1 "register_operand" "r")
+		   (ANY_EXTEND:SI
+		    (match_operand:SHORT 2 "register_operand" "r")))))]
+  ""
+  "sub\\t%w0, %w1, %w2, <su>xt<SHORT:size>"
+  [(set_attr "v8type" "alu_ext")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*sub_<optab><ALLX:mode>_shft_<GPI:mode>"
   [(set (match_operand:GPI 0 "register_operand" "=rk")
 	(minus:GPI (match_operand:GPI 1 "register_operand" "r")
@@ -1551,6 +1786,19 @@
    (set_attr "mode" "<GPI:MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*sub_<optab><SHORT:mode>_shft_si_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI (minus:SI (match_operand:SI 1 "register_operand" "r")
+		   (ashift:SI (ANY_EXTEND:SI
+				(match_operand:SHORT 2 "register_operand" "r"))
+			       (match_operand 3 "aarch64_imm3" "Ui3")))))]
+  ""
+  "sub\\t%w0, %w1, %w2, <su>xt<SHORT:size> %3"
+  [(set_attr "v8type" "alu_ext")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*sub_<optab><mode>_multp2"
   [(set (match_operand:GPI 0 "register_operand" "=rk")
 	(minus:GPI (match_operand:GPI 4 "register_operand" "r")
@@ -1565,6 +1813,21 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*sub_<optab>si_multp2_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI (minus:SI (match_operand:SI 4 "register_operand" "r")
+		   (ANY_EXTRACT:SI
+		    (mult:SI (match_operand:SI 1 "register_operand" "r")
+			      (match_operand 2 "aarch64_pwr_imm3" "Up3"))
+		    (match_operand 3 "const_int_operand" "n")
+		    (const_int 0)))))]
+  "aarch64_is_extend_from_extract (SImode, operands[2], operands[3])"
+  "sub\\t%w0, %w4, %w1, <su>xt%e3 %p2"
+  [(set_attr "v8type" "alu_ext")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*sub_uxt<mode>_multp2"
   [(set (match_operand:GPI 0 "register_operand" "=rk")
 	(minus:GPI (match_operand:GPI 4 "register_operand" "r")
@@ -1581,6 +1844,23 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*sub_uxtsi_multp2_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI (minus:SI (match_operand:SI 4 "register_operand" "r")
+		   (and:SI
+		    (mult:SI (match_operand:SI 1 "register_operand" "r")
+			      (match_operand 2 "aarch64_pwr_imm3" "Up3"))
+		    (match_operand 3 "const_int_operand" "n")))))]
+  "aarch64_uxt_size (exact_log2 (INTVAL (operands[2])),INTVAL (operands[3])) != 0"
+  "*
+  operands[3] = GEN_INT (aarch64_uxt_size (exact_log2 (INTVAL (operands[2])),
+					   INTVAL (operands[3])));
+  return \"sub\t%w0, %w4, %w1, uxt%e3 %p2\";"
+  [(set_attr "v8type" "alu_ext")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "neg<mode>2"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(neg:GPI (match_operand:GPI 1 "register_operand" "r")))]
@@ -1590,6 +1870,16 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*negsi2_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (neg:SI (match_operand:SI 1 "register_operand" "r"))))]
+  ""
+  "neg\\t%w0, %w1"
+  [(set_attr "v8type" "alu")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*neg<mode>2_compare0"
   [(set (reg:CC_NZ CC_REGNUM)
 	(compare:CC_NZ (neg:GPI (match_operand:GPI 1 "register_operand" "r"))
@@ -1602,6 +1892,19 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*negsi2_compare0_uxtw"
+  [(set (reg:CC_NZ CC_REGNUM)
+	(compare:CC_NZ (neg:SI (match_operand:SI 1 "register_operand" "r"))
+		       (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (neg:SI (match_dup 1))))]
+  ""
+  "negs\\t%w0, %w1"
+  [(set_attr "v8type" "alus")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*neg_<shift>_<mode>2"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(neg:GPI (ASHIFT:GPI
@@ -1613,6 +1916,18 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*neg_<shift>_si2_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (neg:SI (ASHIFT:SI
+		  (match_operand:SI 1 "register_operand" "r")
+		  (match_operand:QI 2 "aarch64_shift_imm_si" "n")))))]
+  ""
+  "neg\\t%w0, %w1, <shift> %2"
+  [(set_attr "v8type" "alu_shift")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*neg_mul_imm_<mode>2"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(neg:GPI (mult:GPI
@@ -1624,6 +1939,18 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*neg_mul_imm_si2_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (neg:SI (mult:SI
+		  (match_operand:SI 1 "register_operand" "r")
+		  (match_operand:QI 2 "aarch64_pwr_2_si" "n")))))]
+  ""
+  "neg\\t%w0, %w1, lsl %p2"
+  [(set_attr "v8type" "alu_shift")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "mul<mode>3"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(mult:GPI (match_operand:GPI 1 "register_operand" "r")
@@ -1634,6 +1961,17 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*mulsi3_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (mult:SI (match_operand:SI 1 "register_operand" "r")
+		  (match_operand:SI 2 "register_operand" "r"))))]
+  ""
+  "mul\\t%w0, %w1, %w2"
+  [(set_attr "v8type" "mult")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*madd<mode>"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(plus:GPI (mult:GPI (match_operand:GPI 1 "register_operand" "r")
@@ -1645,6 +1983,18 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*maddsi_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r")
+			    (match_operand:SI 2 "register_operand" "r"))
+		  (match_operand:SI 3 "register_operand" "r"))))]
+  ""
+  "madd\\t%w0, %w1, %w2, %w3"
+  [(set_attr "v8type" "madd")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*msub<mode>"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(minus:GPI (match_operand:GPI 3 "register_operand" "r")
@@ -1657,6 +2007,19 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*msubsi_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (minus:SI (match_operand:SI 3 "register_operand" "r")
+		   (mult:SI (match_operand:SI 1 "register_operand" "r")
+			     (match_operand:SI 2 "register_operand" "r")))))]
+
+  ""
+  "msub\\t%w0, %w1, %w2, %w3"
+  [(set_attr "v8type" "madd")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*mul<mode>_neg"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(mult:GPI (neg:GPI (match_operand:GPI 1 "register_operand" "r"))
@@ -1668,6 +2031,18 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*mulsi_neg_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (mult:SI (neg:SI (match_operand:SI 1 "register_operand" "r"))
+		  (match_operand:SI 2 "register_operand" "r"))))]
+
+  ""
+  "mneg\\t%w0, %w1, %w2"
+  [(set_attr "v8type" "mult")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "<su_optab>mulsidi3"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(mult:DI (ANY_EXTEND:DI (match_operand:SI 1 "register_operand" "r"))
@@ -1738,6 +2113,17 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*<su_optab>divsi3_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (ANY_DIV:SI (match_operand:SI 1 "register_operand" "r")
+		     (match_operand:SI 2 "register_operand" "r"))))]
+  ""
+  "<su>div\\t%w0, %w1, %w2"
+  [(set_attr "v8type" "<su>div")
+   (set_attr "mode" "SI")]
+)
+
 ;; -------------------------------------------------------------------
 ;; Comparison insns
 ;; -------------------------------------------------------------------

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH, AArch64] Make zero_extends explicit for common SImode patterns
  2012-12-13 17:26 [PATCH, AArch64] Make zero_extends explicit for common SImode patterns Ian Bolton
@ 2012-12-13 20:40 ` Richard Henderson
  2012-12-13 20:42 ` Richard Henderson
  1 sibling, 0 replies; 6+ messages in thread
From: Richard Henderson @ 2012-12-13 20:40 UTC (permalink / raw)
  To: Ian Bolton; +Cc: gcc-patches

On 12/13/2012 09:25 AM, Ian Bolton wrote:
> +  "add\\t%w0, %w2, %w, <su>xt<SHORT:size>"

                      ^^^ %w1


r~

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH, AArch64] Make zero_extends explicit for common SImode patterns
  2012-12-13 17:26 [PATCH, AArch64] Make zero_extends explicit for common SImode patterns Ian Bolton
  2012-12-13 20:40 ` Richard Henderson
@ 2012-12-13 20:42 ` Richard Henderson
  2012-12-14 11:44   ` Ian Bolton
  1 sibling, 1 reply; 6+ messages in thread
From: Richard Henderson @ 2012-12-13 20:42 UTC (permalink / raw)
  To: Ian Bolton; +Cc: gcc-patches

On 12/13/2012 09:25 AM, Ian Bolton wrote:
> This patch significantly reduces the number of redundant
> uxtw instructions seen in a variety of programs.
> 
> (There are further patterns that can be done, but I have them
> in a separate patch that's still in development.)

What do you get if you enable flag_ree, as we do for x86_64?
In theory this should avoid even more extensions...


C.f. common/config/i386/i386-common.c:

static const struct default_options ix86_option_optimization_table[] =
  {
    /* Enable redundant extension instructions removal at -O2 and higher.  */
    { OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 },



r~

^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATCH, AArch64] Make zero_extends explicit for common SImode patterns
  2012-12-13 20:42 ` Richard Henderson
@ 2012-12-14 11:44   ` Ian Bolton
  2012-12-14 12:00     ` Ian Bolton
  0 siblings, 1 reply; 6+ messages in thread
From: Ian Bolton @ 2012-12-14 11:44 UTC (permalink / raw)
  To: 'Richard Henderson'; +Cc: gcc-patches

Hi Richard,

> +  "add\\t%w0, %w2, %w, <su>xt<SHORT:size>"
>
>                      ^^^ %w1

Got spot. I guess that pattern hasn't fired yet then!  I'll fix it.


> > This patch significantly reduces the number of redundant
> > uxtw instructions seen in a variety of programs.
> >
> > (There are further patterns that can be done, but I have them
> > in a separate patch that's still in development.)
> 
> What do you get if you enable flag_ree, as we do for x86_64?
> In theory this should avoid even more extensions...
> 
> 
> C.f. common/config/i386/i386-common.c:
> 
> static const struct default_options ix86_option_optimization_table[] =
>   {
>     /* Enable redundant extension instructions removal at -O2 and
> higher.  */
>     { OPT_LEVELS_2_PLUS, OPT_free, NULL, 1 },
> 

I should have said that I am indeed running with REE enabled.  It has
some impact (about 70 further UXTW removed from the set of binaries
I've been building) and seems to mostly be good across basic blocks
within the same function.  As far as I can tell, there is no downside
to REE, so I think it should be enabled by default for O2 or higher
on AArch64 too.

I'll prepare a new patch ...




^ permalink raw reply	[flat|nested] 6+ messages in thread

* RE: [PATCH, AArch64] Make zero_extends explicit for common SImode patterns
  2012-12-14 11:44   ` Ian Bolton
@ 2012-12-14 12:00     ` Ian Bolton
  2012-12-17 14:44       ` Richard Earnshaw
  0 siblings, 1 reply; 6+ messages in thread
From: Ian Bolton @ 2012-12-14 12:00 UTC (permalink / raw)
  To: Ian Bolton, 'Richard Henderson'; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 723 bytes --]

> Hi Richard,
> 
> > +  "add\\t%w0, %w2, %w, <su>xt<SHORT:size>"
> >
> >                      ^^^ %w1
> 
> Got spot. I guess that pattern hasn't fired yet then!  I'll fix it.

Now fixed in v3.

> I should have said that I am indeed running with REE enabled.  It has
> some impact (about 70 further UXTW removed from the set of binaries
> I've been building) and seems to mostly be good across basic blocks
> within the same function.  As far as I can tell, there is no downside
> to REE, so I think it should be enabled by default for O2 or higher
> on AArch64 too.
> 

I'm going to enable REE in a separate patch.

Is this one OK to commit here and backport to ARM/aarch64-4.7-branch?

Thanks,
Ian

[-- Attachment #2: aarch64-rzx-patch1-v3.txt --]
[-- Type: text/plain, Size: 19247 bytes --]

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index a9a8b5f..d5c0206 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1271,6 +1273,22 @@
    (set_attr "mode" "SI")]
 )
 
+;; zero_extend version of above
+(define_insn "*addsi3_aarch64_uxtw"
+  [(set
+    (match_operand:DI 0 "register_operand" "=rk,rk,rk")
+    (zero_extend:DI (plus:SI
+     (match_operand:SI 1 "register_operand" "%rk,rk,rk")
+     (match_operand:SI 2 "aarch64_plus_operand" "I,r,J"))))]
+  ""
+  "@
+  add\\t%w0, %w1, %2
+  add\\t%w0, %w1, %w2
+  sub\\t%w0, %w1, #%n2"
+  [(set_attr "v8type" "alu")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*adddi3_aarch64"
   [(set
     (match_operand:DI 0 "register_operand" "=rk,rk,rk,!w")
@@ -1304,6 +1322,23 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*addsi3_compare0_uxtw"
+  [(set (reg:CC_NZ CC_REGNUM)
+	(compare:CC_NZ
+	 (plus:SI (match_operand:SI 1 "register_operand" "%r,r")
+		   (match_operand:SI 2 "aarch64_plus_operand" "rI,J"))
+	 (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r,r")
+	(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
+  ""
+  "@
+  adds\\t%w0, %w1, %w2
+  subs\\t%w0, %w1, #%n2"
+  [(set_attr "v8type" "alus")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*add<mode>3nr_compare0"
   [(set (reg:CC_NZ CC_REGNUM)
 	(compare:CC_NZ
@@ -1340,6 +1375,19 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*add_<shift>_si_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI (plus:SI
+           (ASHIFT:SI (match_operand:SI 1 "register_operand" "r")
+		       (match_operand:QI 2 "aarch64_shift_imm_si" "n"))
+	   (match_operand:SI 3 "register_operand" "r"))))]
+  ""
+  "add\\t%w0, %w3, %w1, <shift> %2"
+  [(set_attr "v8type" "alu_shift")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*add_mul_imm_<mode>"
   [(set (match_operand:GPI 0 "register_operand" "=rk")
 	(plus:GPI (mult:GPI (match_operand:GPI 1 "register_operand" "r")
@@ -1361,6 +1409,17 @@
    (set_attr "mode" "<GPI:MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*add_<optab><SHORT:mode>_si_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI (plus:SI (ANY_EXTEND:SI (match_operand:SHORT 1 "register_operand" "r"))
+		  (match_operand:GPI 2 "register_operand" "r"))))]
+  ""
+  "add\\t%w0, %w2, %w1, <su>xt<SHORT:size>"
+  [(set_attr "v8type" "alu_ext")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*add_<optab><ALLX:mode>_shft_<GPI:mode>"
   [(set (match_operand:GPI 0 "register_operand" "=rk")
 	(plus:GPI (ashift:GPI (ANY_EXTEND:GPI
@@ -1373,6 +1432,19 @@
    (set_attr "mode" "<GPI:MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*add_<optab><SHORT:mode>_shft_si_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI (plus:SI (ashift:SI (ANY_EXTEND:SI
+			       (match_operand:SHORT 1 "register_operand" "r"))
+			      (match_operand 2 "aarch64_imm3" "Ui3"))
+		  (match_operand:SI 3 "register_operand" "r"))))]
+  ""
+  "add\\t%w0, %w3, %w1, <su>xt<SHORT:size> %2"
+  [(set_attr "v8type" "alu_ext")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*add_<optab><ALLX:mode>_mult_<GPI:mode>"
   [(set (match_operand:GPI 0 "register_operand" "=rk")
 	(plus:GPI (mult:GPI (ANY_EXTEND:GPI
@@ -1385,6 +1457,19 @@
    (set_attr "mode" "<GPI:MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*add_<optab><SHORT:mode>_mult_si_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI (plus:SI (mult:SI (ANY_EXTEND:SI
+			     (match_operand:SHORT 1 "register_operand" "r"))
+			    (match_operand 2 "aarch64_pwr_imm3" "Up3"))
+		  (match_operand:SI 3 "register_operand" "r"))))]
+  ""
+  "add\\t%w0, %w3, %w1, <su>xt<SHORT:size> %p2"
+  [(set_attr "v8type" "alu_ext")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*add_<optab><mode>_multp2"
   [(set (match_operand:GPI 0 "register_operand" "=rk")
 	(plus:GPI (ANY_EXTRACT:GPI
@@ -1399,6 +1484,21 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*add_<optab>si_multp2_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI (plus:SI (ANY_EXTRACT:SI
+		   (mult:SI (match_operand:SI 1 "register_operand" "r")
+			     (match_operand 2 "aarch64_pwr_imm3" "Up3"))
+		   (match_operand 3 "const_int_operand" "n")
+		   (const_int 0))
+		  (match_operand:SI 4 "register_operand" "r"))))]
+  "aarch64_is_extend_from_extract (SImode, operands[2], operands[3])"
+  "add\\t%w0, %w4, %w1, <su>xt%e3 %p2"
+  [(set_attr "v8type" "alu_ext")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*add<mode>3_carryin"
   [(set
     (match_operand:GPI 0 "register_operand" "=r")
@@ -1412,6 +1512,20 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*addsi3_carryin_uxtw"
+  [(set
+    (match_operand:DI 0 "register_operand" "=r")
+    (zero_extend:DI (plus:SI (geu:SI (reg:CC CC_REGNUM) (const_int 0))
+	      (plus:SI
+		(match_operand:SI 1 "register_operand" "r")
+		(match_operand:SI 2 "register_operand" "r")))))]
+   ""
+   "adc\\t%w0, %w1, %w2"
+  [(set_attr "v8type" "adc")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*add<mode>3_carryin_alt1"
   [(set
     (match_operand:GPI 0 "register_operand" "=r")
@@ -1425,6 +1539,20 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*addsi3_carryin_alt1_uxtw"
+  [(set
+    (match_operand:DI 0 "register_operand" "=r")
+    (zero_extend:DI (plus:SI (plus:SI
+		(match_operand:SI 1 "register_operand" "r")
+		(match_operand:SI 2 "register_operand" "r"))
+              (geu:SI (reg:CC CC_REGNUM) (const_int 0)))))]
+   ""
+   "adc\\t%w0, %w1, %w2"
+  [(set_attr "v8type" "adc")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*add<mode>3_carryin_alt2"
   [(set
     (match_operand:GPI 0 "register_operand" "=r")
@@ -1438,6 +1566,20 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*addsi3_carryin_alt2_uxtw"
+  [(set
+    (match_operand:DI 0 "register_operand" "=r")
+    (zero_extend:DI (plus:SI (plus:SI
+                (geu:SI (reg:CC CC_REGNUM) (const_int 0))
+		(match_operand:SI 1 "register_operand" "r"))
+	      (match_operand:SI 2 "register_operand" "r"))))]
+   ""
+   "adc\\t%w0, %w1, %w2"
+  [(set_attr "v8type" "adc")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*add<mode>3_carryin_alt3"
   [(set
     (match_operand:GPI 0 "register_operand" "=r")
@@ -1451,6 +1593,20 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*addsi3_carryin_alt3_uxtw"
+  [(set
+    (match_operand:DI 0 "register_operand" "=r")
+    (zero_extend:DI (plus:SI (plus:SI
+                (geu:SI (reg:CC CC_REGNUM) (const_int 0))
+		(match_operand:SI 2 "register_operand" "r"))
+	      (match_operand:SI 1 "register_operand" "r"))))]
+   ""
+   "adc\\t%w0, %w1, %w2"
+  [(set_attr "v8type" "adc")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*add_uxt<mode>_multp2"
   [(set (match_operand:GPI 0 "register_operand" "=rk")
 	(plus:GPI (and:GPI
@@ -1467,6 +1623,23 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*add_uxtsi_multp2_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI (plus:SI (and:SI
+		   (mult:SI (match_operand:SI 1 "register_operand" "r")
+			     (match_operand 2 "aarch64_pwr_imm3" "Up3"))
+		   (match_operand 3 "const_int_operand" "n"))
+		  (match_operand:SI 4 "register_operand" "r"))))]
+  "aarch64_uxt_size (exact_log2 (INTVAL (operands[2])), INTVAL (operands[3])) != 0"
+  "*
+  operands[3] = GEN_INT (aarch64_uxt_size (exact_log2 (INTVAL (operands[2])),
+					   INTVAL (operands[3])));
+  return \"add\t%w0, %w4, %w1, uxt%e3 %p2\";"
+  [(set_attr "v8type" "alu_ext")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "subsi3"
   [(set (match_operand:SI 0 "register_operand" "=rk")
 	(minus:SI (match_operand:SI 1 "register_operand" "r")
@@ -1477,6 +1650,17 @@
    (set_attr "mode" "SI")]
 )
 
+;; zero_extend version of above
+(define_insn "*subsi3_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI (minus:SI (match_operand:SI 1 "register_operand" "r")
+		   (match_operand:SI 2 "register_operand" "r"))))]
+  ""
+  "sub\\t%w0, %w1, %w2"
+  [(set_attr "v8type" "alu")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "subdi3"
   [(set (match_operand:DI 0 "register_operand" "=rk,!w")
 	(minus:DI (match_operand:DI 1 "register_operand" "r,!w")
@@ -1504,6 +1687,20 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*subsi3_compare0_uxtw"
+  [(set (reg:CC_NZ CC_REGNUM)
+	(compare:CC_NZ (minus:SI (match_operand:SI 1 "register_operand" "r")
+				  (match_operand:SI 2 "register_operand" "r"))
+		       (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (minus:SI (match_dup 1) (match_dup 2))))]
+  ""
+  "subs\\t%w0, %w1, %w2"
+  [(set_attr "v8type" "alus")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*sub_<shift>_<mode>"
   [(set (match_operand:GPI 0 "register_operand" "=rk")
 	(minus:GPI (match_operand:GPI 3 "register_operand" "r")
@@ -1516,6 +1713,19 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*sub_<shift>_si_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI (minus:SI (match_operand:SI 3 "register_operand" "r")
+		   (ASHIFT:SI
+		    (match_operand:SI 1 "register_operand" "r")
+		    (match_operand:QI 2 "aarch64_shift_imm_si" "n")))))]
+  ""
+  "sub\\t%w0, %w3, %w1, <shift> %2"
+  [(set_attr "v8type" "alu_shift")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*sub_mul_imm_<mode>"
   [(set (match_operand:GPI 0 "register_operand" "=rk")
 	(minus:GPI (match_operand:GPI 3 "register_operand" "r")
@@ -1528,6 +1738,19 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*sub_mul_imm_si_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI (minus:SI (match_operand:SI 3 "register_operand" "r")
+		   (mult:SI
+		    (match_operand:SI 1 "register_operand" "r")
+		    (match_operand:QI 2 "aarch64_pwr_2_si" "n")))))]
+  ""
+  "sub\\t%w0, %w3, %w1, lsl %p2"
+  [(set_attr "v8type" "alu_shift")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*sub_<optab><ALLX:mode>_<GPI:mode>"
   [(set (match_operand:GPI 0 "register_operand" "=rk")
 	(minus:GPI (match_operand:GPI 1 "register_operand" "r")
@@ -1539,6 +1762,18 @@
    (set_attr "mode" "<GPI:MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*sub_<optab><SHORT:mode>_si_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI (minus:SI (match_operand:SI 1 "register_operand" "r")
+		   (ANY_EXTEND:SI
+		    (match_operand:SHORT 2 "register_operand" "r")))))]
+  ""
+  "sub\\t%w0, %w1, %w2, <su>xt<SHORT:size>"
+  [(set_attr "v8type" "alu_ext")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*sub_<optab><ALLX:mode>_shft_<GPI:mode>"
   [(set (match_operand:GPI 0 "register_operand" "=rk")
 	(minus:GPI (match_operand:GPI 1 "register_operand" "r")
@@ -1551,6 +1786,19 @@
    (set_attr "mode" "<GPI:MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*sub_<optab><SHORT:mode>_shft_si_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI (minus:SI (match_operand:SI 1 "register_operand" "r")
+		   (ashift:SI (ANY_EXTEND:SI
+				(match_operand:SHORT 2 "register_operand" "r"))
+			       (match_operand 3 "aarch64_imm3" "Ui3")))))]
+  ""
+  "sub\\t%w0, %w1, %w2, <su>xt<SHORT:size> %3"
+  [(set_attr "v8type" "alu_ext")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*sub_<optab><mode>_multp2"
   [(set (match_operand:GPI 0 "register_operand" "=rk")
 	(minus:GPI (match_operand:GPI 4 "register_operand" "r")
@@ -1565,6 +1813,21 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*sub_<optab>si_multp2_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI (minus:SI (match_operand:SI 4 "register_operand" "r")
+		   (ANY_EXTRACT:SI
+		    (mult:SI (match_operand:SI 1 "register_operand" "r")
+			      (match_operand 2 "aarch64_pwr_imm3" "Up3"))
+		    (match_operand 3 "const_int_operand" "n")
+		    (const_int 0)))))]
+  "aarch64_is_extend_from_extract (SImode, operands[2], operands[3])"
+  "sub\\t%w0, %w4, %w1, <su>xt%e3 %p2"
+  [(set_attr "v8type" "alu_ext")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*sub_uxt<mode>_multp2"
   [(set (match_operand:GPI 0 "register_operand" "=rk")
 	(minus:GPI (match_operand:GPI 4 "register_operand" "r")
@@ -1581,6 +1844,23 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*sub_uxtsi_multp2_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=rk")
+	(zero_extend:DI (minus:SI (match_operand:SI 4 "register_operand" "r")
+		   (and:SI
+		    (mult:SI (match_operand:SI 1 "register_operand" "r")
+			      (match_operand 2 "aarch64_pwr_imm3" "Up3"))
+		    (match_operand 3 "const_int_operand" "n")))))]
+  "aarch64_uxt_size (exact_log2 (INTVAL (operands[2])),INTVAL (operands[3])) != 0"
+  "*
+  operands[3] = GEN_INT (aarch64_uxt_size (exact_log2 (INTVAL (operands[2])),
+					   INTVAL (operands[3])));
+  return \"sub\t%w0, %w4, %w1, uxt%e3 %p2\";"
+  [(set_attr "v8type" "alu_ext")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "neg<mode>2"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(neg:GPI (match_operand:GPI 1 "register_operand" "r")))]
@@ -1590,6 +1870,16 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*negsi2_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (neg:SI (match_operand:SI 1 "register_operand" "r"))))]
+  ""
+  "neg\\t%w0, %w1"
+  [(set_attr "v8type" "alu")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*neg<mode>2_compare0"
   [(set (reg:CC_NZ CC_REGNUM)
 	(compare:CC_NZ (neg:GPI (match_operand:GPI 1 "register_operand" "r"))
@@ -1602,6 +1892,19 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*negsi2_compare0_uxtw"
+  [(set (reg:CC_NZ CC_REGNUM)
+	(compare:CC_NZ (neg:SI (match_operand:SI 1 "register_operand" "r"))
+		       (const_int 0)))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (neg:SI (match_dup 1))))]
+  ""
+  "negs\\t%w0, %w1"
+  [(set_attr "v8type" "alus")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*neg_<shift>_<mode>2"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(neg:GPI (ASHIFT:GPI
@@ -1613,6 +1916,18 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*neg_<shift>_si2_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (neg:SI (ASHIFT:SI
+		  (match_operand:SI 1 "register_operand" "r")
+		  (match_operand:QI 2 "aarch64_shift_imm_si" "n")))))]
+  ""
+  "neg\\t%w0, %w1, <shift> %2"
+  [(set_attr "v8type" "alu_shift")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*neg_mul_imm_<mode>2"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(neg:GPI (mult:GPI
@@ -1624,6 +1939,18 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*neg_mul_imm_si2_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (neg:SI (mult:SI
+		  (match_operand:SI 1 "register_operand" "r")
+		  (match_operand:QI 2 "aarch64_pwr_2_si" "n")))))]
+  ""
+  "neg\\t%w0, %w1, lsl %p2"
+  [(set_attr "v8type" "alu_shift")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "mul<mode>3"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(mult:GPI (match_operand:GPI 1 "register_operand" "r")
@@ -1634,6 +1961,17 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*mulsi3_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (mult:SI (match_operand:SI 1 "register_operand" "r")
+		  (match_operand:SI 2 "register_operand" "r"))))]
+  ""
+  "mul\\t%w0, %w1, %w2"
+  [(set_attr "v8type" "mult")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*madd<mode>"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(plus:GPI (mult:GPI (match_operand:GPI 1 "register_operand" "r")
@@ -1645,6 +1983,18 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*maddsi_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (plus:SI (mult:SI (match_operand:SI 1 "register_operand" "r")
+			    (match_operand:SI 2 "register_operand" "r"))
+		  (match_operand:SI 3 "register_operand" "r"))))]
+  ""
+  "madd\\t%w0, %w1, %w2, %w3"
+  [(set_attr "v8type" "madd")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*msub<mode>"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(minus:GPI (match_operand:GPI 3 "register_operand" "r")
@@ -1657,6 +2007,19 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*msubsi_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (minus:SI (match_operand:SI 3 "register_operand" "r")
+		   (mult:SI (match_operand:SI 1 "register_operand" "r")
+			     (match_operand:SI 2 "register_operand" "r")))))]
+
+  ""
+  "msub\\t%w0, %w1, %w2, %w3"
+  [(set_attr "v8type" "madd")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "*mul<mode>_neg"
   [(set (match_operand:GPI 0 "register_operand" "=r")
 	(mult:GPI (neg:GPI (match_operand:GPI 1 "register_operand" "r"))
@@ -1668,6 +2031,18 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*mulsi_neg_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (mult:SI (neg:SI (match_operand:SI 1 "register_operand" "r"))
+		  (match_operand:SI 2 "register_operand" "r"))))]
+
+  ""
+  "mneg\\t%w0, %w1, %w2"
+  [(set_attr "v8type" "mult")
+   (set_attr "mode" "SI")]
+)
+
 (define_insn "<su_optab>mulsidi3"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(mult:DI (ANY_EXTEND:DI (match_operand:SI 1 "register_operand" "r"))
@@ -1738,6 +2113,17 @@
    (set_attr "mode" "<MODE>")]
 )
 
+;; zero_extend version of above
+(define_insn "*<su_optab>divsi3_uxtw"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (ANY_DIV:SI (match_operand:SI 1 "register_operand" "r")
+		     (match_operand:SI 2 "register_operand" "r"))))]
+  ""
+  "<su>div\\t%w0, %w1, %w2"
+  [(set_attr "v8type" "<su>div")
+   (set_attr "mode" "SI")]
+)
+
 ;; -------------------------------------------------------------------
 ;; Comparison insns
 ;; -------------------------------------------------------------------

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH, AArch64] Make zero_extends explicit for common SImode patterns
  2012-12-14 12:00     ` Ian Bolton
@ 2012-12-17 14:44       ` Richard Earnshaw
  0 siblings, 0 replies; 6+ messages in thread
From: Richard Earnshaw @ 2012-12-17 14:44 UTC (permalink / raw)
  To: Ian Bolton; +Cc: 'Richard Henderson', gcc-patches

On 14/12/12 11:59, Ian Bolton wrote:
>> Hi Richard,
>>
>>> +  "add\\t%w0, %w2, %w, <su>xt<SHORT:size>"
>>>
>>>                       ^^^ %w1
>>
>> Got spot. I guess that pattern hasn't fired yet then!  I'll fix it.
>
> Now fixed in v3.
>
>> I should have said that I am indeed running with REE enabled.  It has
>> some impact (about 70 further UXTW removed from the set of binaries
>> I've been building) and seems to mostly be good across basic blocks
>> within the same function.  As far as I can tell, there is no downside
>> to REE, so I think it should be enabled by default for O2 or higher
>> on AArch64 too.
>>
>
> I'm going to enable REE in a separate patch.
>
> Is this one OK to commit here and backport to ARM/aarch64-4.7-branch?
>
> Thanks,
> Ian
>

Please can you fix up the indentation.  You've copied a lot of patterns 
and made the code changes, but then left the indetation inconsitent.

For example:

;; zero_extend version of above
(define_insn "*add_<shift>_si_uxtw"
   [(set (match_operand:DI 0 "register_operand" "=rk")
	(zero_extend:DI (plus:SI
            (ASHIFT:SI (match_operand:SI 1 "register_operand" "r")
		       (match_operand:QI 2 "aarch64_shift_imm_si" "n"))
	   (match_operand:SI 3 "register_operand" "r"))))]
   ""
   "add\\t%w0, %w3, %w1, <shift> %2"
   [(set_attr "v8type" "alu_shift")
    (set_attr "mode" "SI")]
)

Should be reformatted to:

;; zero_extend version of above
(define_insn "*add_<shift>_si_uxtw"
   [(set (match_operand:DI 0 "register_operand" "=rk")
	(zero_extend:DI
          (plus:SI
           (ASHIFT:SI (match_operand:SI 1 "register_operand" "r")
		     (match_operand:QI 2 "aarch64_shift_imm_si" "n"))
	  (match_operand:SI 3 "register_operand" "r"))))]
   ""
   "add\\t%w0, %w3, %w1, <shift> %2"
   [(set_attr "v8type" "alu_shift")
    (set_attr "mode" "SI")]
)

Which makes the association of operands much clearer.

OK with that change.

R.


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2012-12-17 14:44 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-12-13 17:26 [PATCH, AArch64] Make zero_extends explicit for common SImode patterns Ian Bolton
2012-12-13 20:40 ` Richard Henderson
2012-12-13 20:42 ` Richard Henderson
2012-12-14 11:44   ` Ian Bolton
2012-12-14 12:00     ` Ian Bolton
2012-12-17 14:44       ` Richard Earnshaw

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).