public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [ARM Patch 1/n] PR53447: optimizations of 64bit ALU operation with constant
@ 2012-05-26 13:43 Carrot Wei
  2012-06-04  9:55 ` Carrot Wei
  0 siblings, 1 reply; 13+ messages in thread
From: Carrot Wei @ 2012-05-26 13:43 UTC (permalink / raw)
  To: gcc-patches

Hi,

As described in PR53447, many 64bit ALU operations with constant can be
optimized to use corresponding 32bit instructions with immediate operands.

This is the first part of the patches that deals with 64bit add. It directly
extends the patterns adddi3, arm_adddi3 and adddi3_neon to handle constant
operands.

Tested on arm qemu without regression.

OK for trunk?

thanks
Carrot

2012-05-26  Wei Guozhi  <carrot@google.com>

	PR target/53447
	* gcc.target/arm/pr53447-1.c: New testcase.


2012-05-26  Wei Guozhi  <carrot@google.com>

	PR target/53447
	* config/arm/arm-protos.h (const_ok_for_adddi): New prototype.
	* config/arm/arm.c (const_ok_for_adddi): New function.
	* config/arm/constraints.md (Dd): New constraint.
	* config/arm/arm.md (adddi3): Extend it to handle constants.
	(arm_adddi3): Likewise.
	* config/arm/neon.md (adddi3_neon): Likewise.


Index: testsuite/gcc.target/arm/pr53447-1.c
===================================================================
--- testsuite/gcc.target/arm/pr53447-1.c	(revision 0)
+++ testsuite/gcc.target/arm/pr53447-1.c	(revision 0)
@@ -0,0 +1,8 @@
+/* { dg-options "-O2" }  */
+/* { dg-require-effective-target arm32 } */
+/* { dg-final { scan-assembler-not "mov" } } */
+
+void t0p(long long * p)
+{
+  *p += 0x100000001;
+}
Index: config/arm/arm.c
===================================================================
--- config/arm/arm.c	(revision 187751)
+++ config/arm/arm.c	(working copy)
@@ -2497,6 +2497,17 @@
     }
 }

+/* Return TRUE if int I is a valid immediate constant used by pattern
+   arm_adddi3.  */
+int
+const_ok_for_adddi (HOST_WIDE_INT i)
+{
+  HOST_WIDE_INT high = (i >> 32) & 0xFFFFFFFF;
+  HOST_WIDE_INT low = i & 0xFFFFFFFF;
+  return (const_ok_for_arm (high)
+	  && (const_ok_for_arm (low) || const_ok_for_arm (-low)));
+}
+
 /* Emit a sequence of insns to handle a large constant.
    CODE is the code of the operation required, it can be any of SET, PLUS,
    IOR, AND, XOR, MINUS;
Index: config/arm/arm-protos.h
===================================================================
--- config/arm/arm-protos.h	(revision 187751)
+++ config/arm/arm-protos.h	(working copy)
@@ -47,6 +47,7 @@
 extern bool arm_small_register_classes_for_mode_p (enum machine_mode);
 extern int arm_hard_regno_mode_ok (unsigned int, enum machine_mode);
 extern bool arm_modes_tieable_p (enum machine_mode, enum machine_mode);
+extern int const_ok_for_adddi (HOST_WIDE_INT);
 extern int const_ok_for_arm (HOST_WIDE_INT);
 extern int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
 extern int arm_split_constant (RTX_CODE, enum machine_mode, rtx,
Index: config/arm/neon.md
===================================================================
--- config/arm/neon.md	(revision 187751)
+++ config/arm/neon.md	(working copy)
@@ -588,9 +588,9 @@
 )

 (define_insn "adddi3_neon"
-  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w")
-        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w")
-                 (match_operand:DI 2 "s_register_operand" "w,r,0,w")))
+  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
+        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
+                 (match_operand:DI 2 "reg_or_int_operand" "w,r,0,w,r,Dd,Dd")))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_NEON"
 {
@@ -600,13 +600,16 @@
     case 3: return "vadd.i64\t%P0, %P1, %P2";
     case 1: return "#";
     case 2: return "#";
+    case 4: return "#";
+    case 5: return "#";
+    case 6: return "#";
     default: gcc_unreachable ();
     }
 }
-  [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
-   (set_attr "conds" "*,clob,clob,*")
-   (set_attr "length" "*,8,8,*")
-   (set_attr "arch" "nota8,*,*,onlya8")]
+  [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1,*,*,*")
+   (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
+   (set_attr "length" "*,8,8,*,*,*,*")
+   (set_attr "arch" "nota8,*,*,onlya8,*,*,*")]
 )

 (define_insn "*sub<mode>3_neon"
Index: config/arm/constraints.md
===================================================================
--- config/arm/constraints.md	(revision 187751)
+++ config/arm/constraints.md	(working copy)
@@ -29,7 +29,7 @@
 ;; in Thumb-1 state: I, J, K, L, M, N, O

 ;; The following multi-letter normal constraints have been used:
-;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dt, Dz
+;; in ARM/Thumb-2 state: Da, Db, Dc, Dd, Dn, Dl, DL, Dv, Dy, Di, Dt, Dz
 ;; in Thumb-1 state: Pa, Pb, Pc, Pd, Pe
 ;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py

@@ -251,6 +251,12 @@
       (match_test "TARGET_32BIT && arm_const_double_inline_cost (op) == 4
 		   && !(optimize_size || arm_ld_sched)")))

+(define_constraint "Dd"
+ "@internal
+ In ARM/Thumb-2 state a const_int that can be used by insn adddi."
+ (and (match_code "const_int")
+      (match_test "TARGET_32BIT && const_ok_for_adddi (ival)")))
+
 (define_constraint "Di"
  "@internal
   In ARM/Thumb-2 state a const_int or const_double where both the high
Index: config/arm/arm.md
===================================================================
--- config/arm/arm.md	(revision 187751)
+++ config/arm/arm.md	(working copy)
@@ -574,10 +574,21 @@
  [(parallel
    [(set (match_operand:DI           0 "s_register_operand" "")
 	  (plus:DI (match_operand:DI 1 "s_register_operand" "")
-	           (match_operand:DI 2 "s_register_operand" "")))
+	           (match_operand:DI 2 "reg_or_int_operand" "")))
     (clobber (reg:CC CC_REGNUM))])]
   "TARGET_EITHER"
   "
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      if (TARGET_32BIT && const_ok_for_adddi (INTVAL (operands[2])))
+	{
+	  emit_insn (gen_arm_adddi3 (operands[0], operands[1], operands[2]));
+	  DONE;
+	}
+      else
+	operands[2] = force_reg (DImode, operands[2]);
+    }
+
   if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
     {
       if (!cirrus_fp_register (operands[0], DImode))
@@ -609,10 +620,10 @@
   [(set_attr "length" "4")]
 )

-(define_insn_and_split "*arm_adddi3"
-  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r")
-	(plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0")
-		 (match_operand:DI 2 "s_register_operand" "r,  0")))
+(define_insn_and_split "arm_adddi3"
+  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r,&r,&r,&r")
+	(plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0, r, 0, r")
+		 (match_operand:DI 2 "reg_or_int_operand" "r,  0, r, Dd,Dd")))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_MAVERICK) && !TARGET_NEON"
   "#"
@@ -630,8 +641,17 @@
     operands[0] = gen_lowpart (SImode, operands[0]);
     operands[4] = gen_highpart (SImode, operands[1]);
     operands[1] = gen_lowpart (SImode, operands[1]);
-    operands[5] = gen_highpart (SImode, operands[2]);
-    operands[2] = gen_lowpart (SImode, operands[2]);
+    if (GET_CODE (operands[2]) == CONST_INT)
+      {
+	HOST_WIDE_INT v = INTVAL (operands[2]);
+	operands[5] = GEN_INT (ARM_SIGN_EXTEND ((v >> 32) & 0xFFFFFFFF));
+	operands[2] = GEN_INT (ARM_SIGN_EXTEND (v & 0xFFFFFFFF));
+      }
+    else
+      {
+	operands[5] = gen_highpart (SImode, operands[2]);
+	operands[2] = gen_lowpart (SImode, operands[2]);
+      }
   }"
   [(set_attr "conds" "clob")
    (set_attr "length" "8")]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [ARM Patch 1/n] PR53447: optimizations of 64bit ALU operation with constant
  2012-05-26 13:43 [ARM Patch 1/n] PR53447: optimizations of 64bit ALU operation with constant Carrot Wei
@ 2012-06-04  9:55 ` Carrot Wei
  2012-06-06 11:26   ` Carrot Wei
  0 siblings, 1 reply; 13+ messages in thread
From: Carrot Wei @ 2012-06-04  9:55 UTC (permalink / raw)
  To: gcc-patches

Hi

I updated the patch to correct the length of insn adddi3_neon.

thanks
Carrot


2012-06-04  Wei Guozhi  <carrot@google.com>

	PR target/53447
	* gcc.target/arm/pr53447-1.c: New testcase.


2012-06-04  Wei Guozhi  <carrot@google.com>

	PR target/53447
	* config/arm/arm-protos.h (const_ok_for_adddi): New prototype.
	* config/arm/arm.c (const_ok_for_adddi): New function.
	* config/arm/constraints.md (Dd): New constraint.
	* config/arm/arm.md (adddi3): Extend it to handle constants.
	(arm_adddi3): Likewise.
	* config/arm/neon.md (adddi3_neon): Likewise.


Index: testsuite/gcc.target/arm/pr53447-1.c
===================================================================
--- testsuite/gcc.target/arm/pr53447-1.c	(revision 0)
+++ testsuite/gcc.target/arm/pr53447-1.c	(revision 0)
@@ -0,0 +1,8 @@
+/* { dg-options "-O2" }  */
+/* { dg-require-effective-target arm32 } */
+/* { dg-final { scan-assembler-not "mov" } } */
+
+void t0p(long long * p)
+{
+  *p += 0x100000001;
+}
Index: config/arm/arm.c
===================================================================
--- config/arm/arm.c	(revision 187751)
+++ config/arm/arm.c	(working copy)
@@ -2497,6 +2497,17 @@
     }
 }

+/* Return TRUE if int I is a valid immediate constant used by pattern
+   arm_adddi3.  */
+int
+const_ok_for_adddi (HOST_WIDE_INT i)
+{
+  HOST_WIDE_INT high = (i >> 32) & 0xFFFFFFFF;
+  HOST_WIDE_INT low = i & 0xFFFFFFFF;
+  return (const_ok_for_arm (high)
+	  && (const_ok_for_arm (low) || const_ok_for_arm (-low)));
+}
+
 /* Emit a sequence of insns to handle a large constant.
    CODE is the code of the operation required, it can be any of SET, PLUS,
    IOR, AND, XOR, MINUS;
Index: config/arm/arm-protos.h
===================================================================
--- config/arm/arm-protos.h	(revision 187751)
+++ config/arm/arm-protos.h	(working copy)
@@ -47,6 +47,7 @@
 extern bool arm_small_register_classes_for_mode_p (enum machine_mode);
 extern int arm_hard_regno_mode_ok (unsigned int, enum machine_mode);
 extern bool arm_modes_tieable_p (enum machine_mode, enum machine_mode);
+extern int const_ok_for_adddi (HOST_WIDE_INT);
 extern int const_ok_for_arm (HOST_WIDE_INT);
 extern int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
 extern int arm_split_constant (RTX_CODE, enum machine_mode, rtx,
Index: config/arm/neon.md
===================================================================
--- config/arm/neon.md	(revision 187751)
+++ config/arm/neon.md	(working copy)
@@ -588,9 +588,9 @@
 )

 (define_insn "adddi3_neon"
-  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w")
-        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w")
-                 (match_operand:DI 2 "s_register_operand" "w,r,0,w")))
+  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
+        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
+                 (match_operand:DI 2 "reg_or_int_operand" "w,r,0,w,r,Dd,Dd")))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_NEON"
 {
@@ -600,13 +600,16 @@
     case 3: return "vadd.i64\t%P0, %P1, %P2";
     case 1: return "#";
     case 2: return "#";
+    case 4: return "#";
+    case 5: return "#";
+    case 6: return "#";
     default: gcc_unreachable ();
     }
 }
-  [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
-   (set_attr "conds" "*,clob,clob,*")
-   (set_attr "length" "*,8,8,*")
-   (set_attr "arch" "nota8,*,*,onlya8")]
+  [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1,*,*,*")
+   (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
+   (set_attr "length" "*,8,8,*,8,8,8")
+   (set_attr "arch" "nota8,*,*,onlya8,*,*,*")]
 )

 (define_insn "*sub<mode>3_neon"
Index: config/arm/constraints.md
===================================================================
--- config/arm/constraints.md	(revision 187751)
+++ config/arm/constraints.md	(working copy)
@@ -29,7 +29,7 @@
 ;; in Thumb-1 state: I, J, K, L, M, N, O

 ;; The following multi-letter normal constraints have been used:
-;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dt, Dz
+;; in ARM/Thumb-2 state: Da, Db, Dc, Dd, Dn, Dl, DL, Dv, Dy, Di, Dt, Dz
 ;; in Thumb-1 state: Pa, Pb, Pc, Pd, Pe
 ;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py

@@ -251,6 +251,12 @@
       (match_test "TARGET_32BIT && arm_const_double_inline_cost (op) == 4
 		   && !(optimize_size || arm_ld_sched)")))

+(define_constraint "Dd"
+ "@internal
+ In ARM/Thumb-2 state a const_int that can be used by insn adddi."
+ (and (match_code "const_int")
+      (match_test "TARGET_32BIT && const_ok_for_adddi (ival)")))
+
 (define_constraint "Di"
  "@internal
   In ARM/Thumb-2 state a const_int or const_double where both the high
Index: config/arm/arm.md
===================================================================
--- config/arm/arm.md	(revision 187751)
+++ config/arm/arm.md	(working copy)
@@ -574,10 +574,21 @@
  [(parallel
    [(set (match_operand:DI           0 "s_register_operand" "")
 	  (plus:DI (match_operand:DI 1 "s_register_operand" "")
-	           (match_operand:DI 2 "s_register_operand" "")))
+	           (match_operand:DI 2 "reg_or_int_operand" "")))
     (clobber (reg:CC CC_REGNUM))])]
   "TARGET_EITHER"
   "
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      if (TARGET_32BIT && const_ok_for_adddi (INTVAL (operands[2])))
+	{
+	  emit_insn (gen_arm_adddi3 (operands[0], operands[1], operands[2]));
+	  DONE;
+	}
+      else
+	operands[2] = force_reg (DImode, operands[2]);
+    }
+
   if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
     {
       if (!cirrus_fp_register (operands[0], DImode))
@@ -609,10 +620,10 @@
   [(set_attr "length" "4")]
 )

-(define_insn_and_split "*arm_adddi3"
-  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r")
-	(plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0")
-		 (match_operand:DI 2 "s_register_operand" "r,  0")))
+(define_insn_and_split "arm_adddi3"
+  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r,&r,&r,&r")
+	(plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0, r, 0, r")
+		 (match_operand:DI 2 "reg_or_int_operand" "r,  0, r, Dd,Dd")))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_MAVERICK) && !TARGET_NEON"
   "#"
@@ -630,8 +641,17 @@
     operands[0] = gen_lowpart (SImode, operands[0]);
     operands[4] = gen_highpart (SImode, operands[1]);
     operands[1] = gen_lowpart (SImode, operands[1]);
-    operands[5] = gen_highpart (SImode, operands[2]);
-    operands[2] = gen_lowpart (SImode, operands[2]);
+    if (GET_CODE (operands[2]) == CONST_INT)
+      {
+	HOST_WIDE_INT v = INTVAL (operands[2]);
+	operands[5] = GEN_INT (ARM_SIGN_EXTEND ((v >> 32) & 0xFFFFFFFF));
+	operands[2] = GEN_INT (ARM_SIGN_EXTEND (v & 0xFFFFFFFF));
+      }
+    else
+      {
+	operands[5] = gen_highpart (SImode, operands[2]);
+	operands[2] = gen_lowpart (SImode, operands[2]);
+      }
   }"
   [(set_attr "conds" "clob")
    (set_attr "length" "8")]



On Sat, May 26, 2012 at 9:42 PM, Carrot Wei <carrot@google.com> wrote:
> Hi,
>
> As described in PR53447, many 64bit ALU operations with constant can be
> optimized to use corresponding 32bit instructions with immediate operands.
>
> This is the first part of the patches that deals with 64bit add. It directly
> extends the patterns adddi3, arm_adddi3 and adddi3_neon to handle constant
> operands.
>
> Tested on arm qemu without regression.
>
> OK for trunk?
>
> thanks
> Carrot
>
> 2012-05-26  Wei Guozhi  <carrot@google.com>
>
>        PR target/53447
>        * gcc.target/arm/pr53447-1.c: New testcase.
>
>
> 2012-05-26  Wei Guozhi  <carrot@google.com>
>
>        PR target/53447
>        * config/arm/arm-protos.h (const_ok_for_adddi): New prototype.
>        * config/arm/arm.c (const_ok_for_adddi): New function.
>        * config/arm/constraints.md (Dd): New constraint.
>        * config/arm/arm.md (adddi3): Extend it to handle constants.
>        (arm_adddi3): Likewise.
>        * config/arm/neon.md (adddi3_neon): Likewise.
>
>
> Index: testsuite/gcc.target/arm/pr53447-1.c
> ===================================================================
> --- testsuite/gcc.target/arm/pr53447-1.c        (revision 0)
> +++ testsuite/gcc.target/arm/pr53447-1.c        (revision 0)
> @@ -0,0 +1,8 @@
> +/* { dg-options "-O2" }  */
> +/* { dg-require-effective-target arm32 } */
> +/* { dg-final { scan-assembler-not "mov" } } */
> +
> +void t0p(long long * p)
> +{
> +  *p += 0x100000001;
> +}
> Index: config/arm/arm.c
> ===================================================================
> --- config/arm/arm.c    (revision 187751)
> +++ config/arm/arm.c    (working copy)
> @@ -2497,6 +2497,17 @@
>     }
>  }
>
> +/* Return TRUE if int I is a valid immediate constant used by pattern
> +   arm_adddi3.  */
> +int
> +const_ok_for_adddi (HOST_WIDE_INT i)
> +{
> +  HOST_WIDE_INT high = (i >> 32) & 0xFFFFFFFF;
> +  HOST_WIDE_INT low = i & 0xFFFFFFFF;
> +  return (const_ok_for_arm (high)
> +         && (const_ok_for_arm (low) || const_ok_for_arm (-low)));
> +}
> +
>  /* Emit a sequence of insns to handle a large constant.
>    CODE is the code of the operation required, it can be any of SET, PLUS,
>    IOR, AND, XOR, MINUS;
> Index: config/arm/arm-protos.h
> ===================================================================
> --- config/arm/arm-protos.h     (revision 187751)
> +++ config/arm/arm-protos.h     (working copy)
> @@ -47,6 +47,7 @@
>  extern bool arm_small_register_classes_for_mode_p (enum machine_mode);
>  extern int arm_hard_regno_mode_ok (unsigned int, enum machine_mode);
>  extern bool arm_modes_tieable_p (enum machine_mode, enum machine_mode);
> +extern int const_ok_for_adddi (HOST_WIDE_INT);
>  extern int const_ok_for_arm (HOST_WIDE_INT);
>  extern int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
>  extern int arm_split_constant (RTX_CODE, enum machine_mode, rtx,
> Index: config/arm/neon.md
> ===================================================================
> --- config/arm/neon.md  (revision 187751)
> +++ config/arm/neon.md  (working copy)
> @@ -588,9 +588,9 @@
>  )
>
>  (define_insn "adddi3_neon"
> -  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w")
> -        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w")
> -                 (match_operand:DI 2 "s_register_operand" "w,r,0,w")))
> +  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
> +        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
> +                 (match_operand:DI 2 "reg_or_int_operand" "w,r,0,w,r,Dd,Dd")))
>    (clobber (reg:CC CC_REGNUM))]
>   "TARGET_NEON"
>  {
> @@ -600,13 +600,16 @@
>     case 3: return "vadd.i64\t%P0, %P1, %P2";
>     case 1: return "#";
>     case 2: return "#";
> +    case 4: return "#";
> +    case 5: return "#";
> +    case 6: return "#";
>     default: gcc_unreachable ();
>     }
>  }
> -  [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
> -   (set_attr "conds" "*,clob,clob,*")
> -   (set_attr "length" "*,8,8,*")
> -   (set_attr "arch" "nota8,*,*,onlya8")]
> +  [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1,*,*,*")
> +   (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
> +   (set_attr "length" "*,8,8,*,*,*,*")
> +   (set_attr "arch" "nota8,*,*,onlya8,*,*,*")]
>  )
>
>  (define_insn "*sub<mode>3_neon"
> Index: config/arm/constraints.md
> ===================================================================
> --- config/arm/constraints.md   (revision 187751)
> +++ config/arm/constraints.md   (working copy)
> @@ -29,7 +29,7 @@
>  ;; in Thumb-1 state: I, J, K, L, M, N, O
>
>  ;; The following multi-letter normal constraints have been used:
> -;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dt, Dz
> +;; in ARM/Thumb-2 state: Da, Db, Dc, Dd, Dn, Dl, DL, Dv, Dy, Di, Dt, Dz
>  ;; in Thumb-1 state: Pa, Pb, Pc, Pd, Pe
>  ;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py
>
> @@ -251,6 +251,12 @@
>       (match_test "TARGET_32BIT && arm_const_double_inline_cost (op) == 4
>                   && !(optimize_size || arm_ld_sched)")))
>
> +(define_constraint "Dd"
> + "@internal
> + In ARM/Thumb-2 state a const_int that can be used by insn adddi."
> + (and (match_code "const_int")
> +      (match_test "TARGET_32BIT && const_ok_for_adddi (ival)")))
> +
>  (define_constraint "Di"
>  "@internal
>   In ARM/Thumb-2 state a const_int or const_double where both the high
> Index: config/arm/arm.md
> ===================================================================
> --- config/arm/arm.md   (revision 187751)
> +++ config/arm/arm.md   (working copy)
> @@ -574,10 +574,21 @@
>  [(parallel
>    [(set (match_operand:DI           0 "s_register_operand" "")
>          (plus:DI (match_operand:DI 1 "s_register_operand" "")
> -                  (match_operand:DI 2 "s_register_operand" "")))
> +                  (match_operand:DI 2 "reg_or_int_operand" "")))
>     (clobber (reg:CC CC_REGNUM))])]
>   "TARGET_EITHER"
>   "
> +  if (GET_CODE (operands[2]) == CONST_INT)
> +    {
> +      if (TARGET_32BIT && const_ok_for_adddi (INTVAL (operands[2])))
> +       {
> +         emit_insn (gen_arm_adddi3 (operands[0], operands[1], operands[2]));
> +         DONE;
> +       }
> +      else
> +       operands[2] = force_reg (DImode, operands[2]);
> +    }
> +
>   if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
>     {
>       if (!cirrus_fp_register (operands[0], DImode))
> @@ -609,10 +620,10 @@
>   [(set_attr "length" "4")]
>  )
>
> -(define_insn_and_split "*arm_adddi3"
> -  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r")
> -       (plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0")
> -                (match_operand:DI 2 "s_register_operand" "r,  0")))
> +(define_insn_and_split "arm_adddi3"
> +  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r,&r,&r,&r")
> +       (plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0, r, 0, r")
> +                (match_operand:DI 2 "reg_or_int_operand" "r,  0, r, Dd,Dd")))
>    (clobber (reg:CC CC_REGNUM))]
>   "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_MAVERICK) && !TARGET_NEON"
>   "#"
> @@ -630,8 +641,17 @@
>     operands[0] = gen_lowpart (SImode, operands[0]);
>     operands[4] = gen_highpart (SImode, operands[1]);
>     operands[1] = gen_lowpart (SImode, operands[1]);
> -    operands[5] = gen_highpart (SImode, operands[2]);
> -    operands[2] = gen_lowpart (SImode, operands[2]);
> +    if (GET_CODE (operands[2]) == CONST_INT)
> +      {
> +       HOST_WIDE_INT v = INTVAL (operands[2]);
> +       operands[5] = GEN_INT (ARM_SIGN_EXTEND ((v >> 32) & 0xFFFFFFFF));
> +       operands[2] = GEN_INT (ARM_SIGN_EXTEND (v & 0xFFFFFFFF));
> +      }
> +    else
> +      {
> +       operands[5] = gen_highpart (SImode, operands[2]);
> +       operands[2] = gen_lowpart (SImode, operands[2]);
> +      }
>   }"
>   [(set_attr "conds" "clob")
>    (set_attr "length" "8")]

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [ARM Patch 1/n] PR53447: optimizations of 64bit ALU operation with constant
  2012-06-04  9:55 ` Carrot Wei
@ 2012-06-06 11:26   ` Carrot Wei
  2012-06-08 10:17     ` Carrot Wei
  0 siblings, 1 reply; 13+ messages in thread
From: Carrot Wei @ 2012-06-06 11:26 UTC (permalink / raw)
  To: gcc-patches

In the original patch, if "add r0, c" is not possible, but "sub r0,
-c" is possible, it will use the sub instruction. Although they
generate same result, but they may generate different CF flag, and
cause subsequent adc to compute out wrong result. So I updated the
patch to avoid using sub instruction.

Tested on arm qemu with both arm/thumb mode.

thanks
Carrot


2012-06-06  Wei Guozhi  <carrot@google.com>

	PR target/53447
	* gcc.target/arm/pr53447-1.c: New testcase.


2012-06-06  Wei Guozhi  <carrot@google.com>

	PR target/53447
	* config/arm/arm.md (adddi3): Extend it to handle constants.
	(arm_adddi3): Likewise.
	* config/arm/neon.md (adddi3_neon): Likewise.


Index: testsuite/gcc.target/arm/pr53447-1.c
===================================================================
--- testsuite/gcc.target/arm/pr53447-1.c	(revision 0)
+++ testsuite/gcc.target/arm/pr53447-1.c	(revision 0)
@@ -0,0 +1,8 @@
+/* { dg-options "-O2" }  */
+/* { dg-require-effective-target arm32 } */
+/* { dg-final { scan-assembler-not "mov" } } */
+
+void t0p(long long * p)
+{
+  *p += 0x100000001;
+}
Index: config/arm/neon.md
===================================================================
--- config/arm/neon.md	(revision 187751)
+++ config/arm/neon.md	(working copy)
@@ -588,9 +588,9 @@
 )

 (define_insn "adddi3_neon"
-  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w")
-        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w")
-                 (match_operand:DI 2 "s_register_operand" "w,r,0,w")))
+  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
+        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
+                 (match_operand:DI 2 "reg_or_int_operand" "w,r,0,w,r,Di,Di")))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_NEON"
 {
@@ -600,13 +600,16 @@
     case 3: return "vadd.i64\t%P0, %P1, %P2";
     case 1: return "#";
     case 2: return "#";
+    case 4: return "#";
+    case 5: return "#";
+    case 6: return "#";
     default: gcc_unreachable ();
     }
 }
-  [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
-   (set_attr "conds" "*,clob,clob,*")
-   (set_attr "length" "*,8,8,*")
-   (set_attr "arch" "nota8,*,*,onlya8")]
+  [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1,*,*,*")
+   (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
+   (set_attr "length" "*,8,8,*,8,8,8")
+   (set_attr "arch" "nota8,*,*,onlya8,*,*,*")]
 )

 (define_insn "*sub<mode>3_neon"
Index: config/arm/arm.md
===================================================================
--- config/arm/arm.md	(revision 187751)
+++ config/arm/arm.md	(working copy)
@@ -574,10 +574,21 @@
  [(parallel
    [(set (match_operand:DI           0 "s_register_operand" "")
 	  (plus:DI (match_operand:DI 1 "s_register_operand" "")
-	           (match_operand:DI 2 "s_register_operand" "")))
+	           (match_operand:DI 2 "reg_or_int_operand" "")))
     (clobber (reg:CC CC_REGNUM))])]
   "TARGET_EITHER"
   "
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      if (TARGET_32BIT && arm_const_double_by_immediates (operands[2]))
+	{
+	  emit_insn (gen_arm_adddi3 (operands[0], operands[1], operands[2]));
+	  DONE;
+	}
+      else
+	operands[2] = force_reg (DImode, operands[2]);
+    }
+
   if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
     {
       if (!cirrus_fp_register (operands[0], DImode))
@@ -609,10 +620,10 @@
   [(set_attr "length" "4")]
 )

-(define_insn_and_split "*arm_adddi3"
-  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r")
-	(plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0")
-		 (match_operand:DI 2 "s_register_operand" "r,  0")))
+(define_insn_and_split "arm_adddi3"
+  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r,&r,&r,&r")
+	(plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0, r, 0, r")
+		 (match_operand:DI 2 "reg_or_int_operand" "r,  0, r, Di,Di")))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_MAVERICK) && !TARGET_NEON"
   "#"
@@ -630,8 +641,17 @@
     operands[0] = gen_lowpart (SImode, operands[0]);
     operands[4] = gen_highpart (SImode, operands[1]);
     operands[1] = gen_lowpart (SImode, operands[1]);
-    operands[5] = gen_highpart (SImode, operands[2]);
-    operands[2] = gen_lowpart (SImode, operands[2]);
+    if (GET_CODE (operands[2]) == CONST_INT)
+      {
+	HOST_WIDE_INT v = INTVAL (operands[2]);
+	operands[5] = GEN_INT (ARM_SIGN_EXTEND ((v >> 32) & 0xFFFFFFFF));
+	operands[2] = GEN_INT (ARM_SIGN_EXTEND (v & 0xFFFFFFFF));
+      }
+    else
+      {
+	operands[5] = gen_highpart (SImode, operands[2]);
+	operands[2] = gen_lowpart (SImode, operands[2]);
+      }
   }"
   [(set_attr "conds" "clob")
    (set_attr "length" "8")]

On Mon, Jun 4, 2012 at 5:55 PM, Carrot Wei <carrot@google.com> wrote:
> Hi
>
> I updated the patch to correct the length of insn adddi3_neon.
>
> thanks
> Carrot
>
>
> 2012-06-04  Wei Guozhi  <carrot@google.com>
>
>        PR target/53447
>        * gcc.target/arm/pr53447-1.c: New testcase.
>
>
> 2012-06-04  Wei Guozhi  <carrot@google.com>
>
>        PR target/53447
>        * config/arm/arm-protos.h (const_ok_for_adddi): New prototype.
>        * config/arm/arm.c (const_ok_for_adddi): New function.
>        * config/arm/constraints.md (Dd): New constraint.
>        * config/arm/arm.md (adddi3): Extend it to handle constants.
>        (arm_adddi3): Likewise.
>        * config/arm/neon.md (adddi3_neon): Likewise.
>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [ARM Patch 1/n] PR53447: optimizations of 64bit ALU operation with constant
  2012-06-06 11:26   ` Carrot Wei
@ 2012-06-08 10:17     ` Carrot Wei
  2012-06-27 16:14       ` Ramana Radhakrishnan
  0 siblings, 1 reply; 13+ messages in thread
From: Carrot Wei @ 2012-06-08 10:17 UTC (permalink / raw)
  To: gcc-patches

Hi

In rtl expression, substract a constant c is expressed as add a value -c, so it
is alse processed by adddi3, and I extend it more to handle a subtraction of
64bit constant. I created an insn pattern arm_subdi3_immediate to specifically
represent substraction with 64bit constant while continue keeping the add rtl
expression.

Tested on arm qemu with both arm/thumb modes. OK for trunk?

thanks
Carrot


2012-06-08  Wei Guozhi  <carrot@google.com>

	PR target/53447
	* gcc.target/arm/pr53447-1.c: New testcase.
	* gcc.target/arm/pr53447-5.c: New testcase.


2012-06-08  Wei Guozhi  <carrot@google.com>

	PR target/53447
	* config/arm/constraints.md (Dd): New constraint.
	* config/arm/predicates.md (arm_neg_immediate_di_operand): New
	predicate.
	* config/arm/arm.md (adddi3): Extend it to handle constants.
	(arm_subdi3_immediate): New insn pattern.
	(arm_adddi3): Extend it to handle constants.
	* config/arm/neon.md (adddi3_neon): Likewise.


Index: testsuite/gcc.target/arm/pr53447-1.c
===================================================================
--- testsuite/gcc.target/arm/pr53447-1.c	(revision 0)
+++ testsuite/gcc.target/arm/pr53447-1.c	(revision 0)
@@ -0,0 +1,8 @@
+/* { dg-options "-O2" }  */
+/* { dg-require-effective-target arm32 } */
+/* { dg-final { scan-assembler-not "mov" } } */
+
+void t0p(long long * p)
+{
+  *p += 0x100000001;
+}
Index: testsuite/gcc.target/arm/pr53447-5.c
===================================================================
--- testsuite/gcc.target/arm/pr53447-5.c	(revision 0)
+++ testsuite/gcc.target/arm/pr53447-5.c	(revision 0)
@@ -0,0 +1,8 @@
+/* { dg-options "-O2" }  */
+/* { dg-require-effective-target arm32 } */
+/* { dg-final { scan-assembler-not "mov" } } */
+
+void t0p(long long * p)
+{
+  *p -= 0x100000008;
+}
Index: config/arm/neon.md
===================================================================
--- config/arm/neon.md	(revision 187751)
+++ config/arm/neon.md	(working copy)
@@ -588,9 +588,9 @@
 )

 (define_insn "adddi3_neon"
-  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w")
-        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w")
-                 (match_operand:DI 2 "s_register_operand" "w,r,0,w")))
+  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
+        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
+                 (match_operand:DI 2 "arm_di_operand"     "w,r,0,w,r,Di,Di")))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_NEON"
 {
@@ -600,13 +600,16 @@
     case 3: return "vadd.i64\t%P0, %P1, %P2";
     case 1: return "#";
     case 2: return "#";
+    case 4: return "#";
+    case 5: return "#";
+    case 6: return "#";
     default: gcc_unreachable ();
     }
 }
-  [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
-   (set_attr "conds" "*,clob,clob,*")
-   (set_attr "length" "*,8,8,*")
-   (set_attr "arch" "nota8,*,*,onlya8")]
+  [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1,*,*,*")
+   (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
+   (set_attr "length" "*,8,8,*,8,8,8")
+   (set_attr "arch" "nota8,*,*,onlya8,*,*,*")]
 )

 (define_insn "*sub<mode>3_neon"
Index: config/arm/constraints.md
===================================================================
--- config/arm/constraints.md	(revision 187751)
+++ config/arm/constraints.md	(working copy)
@@ -29,7 +29,7 @@
 ;; in Thumb-1 state: I, J, K, L, M, N, O

 ;; The following multi-letter normal constraints have been used:
-;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dt, Dz
+;; in ARM/Thumb-2 state: Da, Db, Dc, Dd, Dn, Dl, DL, Dv, Dy, Di, Dt, Dz
 ;; in Thumb-1 state: Pa, Pb, Pc, Pd, Pe
 ;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py

@@ -251,6 +251,13 @@
       (match_test "TARGET_32BIT && arm_const_double_inline_cost (op) == 4
 		   && !(optimize_size || arm_ld_sched)")))

+(define_constraint "Dd"
+ "@internal
+  In ARM/Thumb-2 state a const_int whose negative value satisfy constraint
+  Di."
+ (and (match_code "const_int")
+      (match_test "TARGET_32BIT && arm_const_double_by_immediates
(GEN_INT (-ival))")))
+
 (define_constraint "Di"
  "@internal
   In ARM/Thumb-2 state a const_int or const_double where both the high
Index: config/arm/predicates.md
===================================================================
--- config/arm/predicates.md	(revision 187751)
+++ config/arm/predicates.md	(working copy)
@@ -117,6 +117,10 @@
   (and (match_code "const_int,const_double")
        (match_test "arm_const_double_by_immediates (op)")))

+(define_predicate "arm_neg_immediate_di_operand"
+  (and (match_code "const_int")
+       (match_test "arm_const_double_by_immediates (GEN_INT (-INTVAL (op)))")))
+
 (define_predicate "arm_neg_immediate_operand"
   (and (match_code "const_int")
        (match_test "const_ok_for_arm (-INTVAL (op))")))
Index: config/arm/arm.md
===================================================================
--- config/arm/arm.md	(revision 187751)
+++ config/arm/arm.md	(working copy)
@@ -574,10 +574,29 @@
  [(parallel
    [(set (match_operand:DI           0 "s_register_operand" "")
 	  (plus:DI (match_operand:DI 1 "s_register_operand" "")
-	           (match_operand:DI 2 "s_register_operand" "")))
+	           (match_operand:DI 2 "reg_or_int_operand" "")))
     (clobber (reg:CC CC_REGNUM))])]
   "TARGET_EITHER"
   "
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      rtx neg_val = GEN_INT (-INTVAL (operands[2]));
+      if (TARGET_32BIT && arm_const_double_by_immediates (operands[2]))
+	{
+	  emit_insn (gen_arm_adddi3 (operands[0], operands[1], operands[2]));
+	  DONE;
+	}
+      else if (TARGET_32BIT && arm_const_double_by_immediates (neg_val))
+	{
+	  emit_insn (gen_arm_subdi3_immediate (operands[0],
+					       operands[1],
+					       operands[2]));
+	  DONE;
+	}
+      else
+	operands[2] = force_reg (DImode, operands[2]);
+    }
+
   if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
     {
       if (!cirrus_fp_register (operands[0], DImode))
@@ -609,10 +628,10 @@
   [(set_attr "length" "4")]
 )

-(define_insn_and_split "*arm_adddi3"
-  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r")
-	(plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0")
-		 (match_operand:DI 2 "s_register_operand" "r,  0")))
+(define_insn_and_split "arm_adddi3"
+  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r,&r,&r,&r")
+	(plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0, r, 0, r")
+		 (match_operand:DI 2 "arm_di_operand"     "r,  0, r, Di,Di")))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_MAVERICK) && !TARGET_NEON"
   "#"
@@ -630,8 +649,17 @@
     operands[0] = gen_lowpart (SImode, operands[0]);
     operands[4] = gen_highpart (SImode, operands[1]);
     operands[1] = gen_lowpart (SImode, operands[1]);
-    operands[5] = gen_highpart (SImode, operands[2]);
-    operands[2] = gen_lowpart (SImode, operands[2]);
+    if (GET_CODE (operands[2]) == CONST_INT)
+      {
+	HOST_WIDE_INT v = INTVAL (operands[2]);
+	operands[5] = GEN_INT (ARM_SIGN_EXTEND ((v >> 32) & 0xFFFFFFFF));
+	operands[2] = GEN_INT (ARM_SIGN_EXTEND (v & 0xFFFFFFFF));
+      }
+    else
+      {
+	operands[5] = gen_highpart (SImode, operands[2]);
+	operands[2] = gen_lowpart (SImode, operands[2]);
+      }
   }"
   [(set_attr "conds" "clob")
    (set_attr "length" "8")]
@@ -1122,6 +1150,25 @@
    (set_attr "length" "8")]
 )

+(define_insn "arm_subdi3_immediate"
+  [(set (match_operand:DI          0 "s_register_operand"           "=&r,&r")
+        (plus:DI (match_operand:DI 1 "s_register_operand"           "0, r")
+                 (match_operand:DI 2 "arm_neg_immediate_di_operand" "Dd,Dd")))
+   (clobber (reg:CC CC_REGNUM))]
+  "TARGET_32BIT"
+  "*
+  {
+    HOST_WIDE_INT v = -INTVAL (operands[2]);
+    operands[3] = GEN_INT (ARM_SIGN_EXTEND ((v >> 32) & 0xFFFFFFFF));
+    operands[2] = GEN_INT (ARM_SIGN_EXTEND (v & 0xFFFFFFFF));
+    output_asm_insn (\"subs\\t%Q0, %Q1, %2\", operands);
+    output_asm_insn (\"sbc\\t%R0, %R1, %3\", operands);
+    return \"\";
+  }"
+  [(set_attr "conds" "clob")
+   (set_attr "length" "8")]
+)
+
 (define_insn "*thumb_subdi3"
   [(set (match_operand:DI           0 "register_operand" "=l")
 	(minus:DI (match_operand:DI 1 "register_operand"  "0")



On Wed, Jun 6, 2012 at 7:16 PM, Carrot Wei <carrot@google.com> wrote:
> In the original patch, if "add r0, c" is not possible, but "sub r0,
> -c" is possible, it will use the sub instruction. Although they
> generate same result, but they may generate different CF flag, and
> cause subsequent adc to compute out wrong result. So I updated the
> patch to avoid using sub instruction.
>
> Tested on arm qemu with both arm/thumb mode.
>
> thanks
> Carrot
>
>
> 2012-06-06  Wei Guozhi  <carrot@google.com>
>
>        PR target/53447
>        * gcc.target/arm/pr53447-1.c: New testcase.
>
>
> 2012-06-06  Wei Guozhi  <carrot@google.com>
>
>        PR target/53447
>        * config/arm/arm.md (adddi3): Extend it to handle constants.
>        (arm_adddi3): Likewise.
>        * config/arm/neon.md (adddi3_neon): Likewise.
>
>
> Index: testsuite/gcc.target/arm/pr53447-1.c
> ===================================================================
> --- testsuite/gcc.target/arm/pr53447-1.c        (revision 0)
> +++ testsuite/gcc.target/arm/pr53447-1.c        (revision 0)
> @@ -0,0 +1,8 @@
> +/* { dg-options "-O2" }  */
> +/* { dg-require-effective-target arm32 } */
> +/* { dg-final { scan-assembler-not "mov" } } */
> +
> +void t0p(long long * p)
> +{
> +  *p += 0x100000001;
> +}
> Index: config/arm/neon.md
> ===================================================================
> --- config/arm/neon.md  (revision 187751)
> +++ config/arm/neon.md  (working copy)
> @@ -588,9 +588,9 @@
>  )
>
>  (define_insn "adddi3_neon"
> -  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w")
> -        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w")
> -                 (match_operand:DI 2 "s_register_operand" "w,r,0,w")))
> +  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
> +        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
> +                 (match_operand:DI 2 "reg_or_int_operand" "w,r,0,w,r,Di,Di")))
>    (clobber (reg:CC CC_REGNUM))]
>   "TARGET_NEON"
>  {
> @@ -600,13 +600,16 @@
>     case 3: return "vadd.i64\t%P0, %P1, %P2";
>     case 1: return "#";
>     case 2: return "#";
> +    case 4: return "#";
> +    case 5: return "#";
> +    case 6: return "#";
>     default: gcc_unreachable ();
>     }
>  }
> -  [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
> -   (set_attr "conds" "*,clob,clob,*")
> -   (set_attr "length" "*,8,8,*")
> -   (set_attr "arch" "nota8,*,*,onlya8")]
> +  [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1,*,*,*")
> +   (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
> +   (set_attr "length" "*,8,8,*,8,8,8")
> +   (set_attr "arch" "nota8,*,*,onlya8,*,*,*")]
>  )
>
>  (define_insn "*sub<mode>3_neon"
> Index: config/arm/arm.md
> ===================================================================
> --- config/arm/arm.md   (revision 187751)
> +++ config/arm/arm.md   (working copy)
> @@ -574,10 +574,21 @@
>  [(parallel
>    [(set (match_operand:DI           0 "s_register_operand" "")
>          (plus:DI (match_operand:DI 1 "s_register_operand" "")
> -                  (match_operand:DI 2 "s_register_operand" "")))
> +                  (match_operand:DI 2 "reg_or_int_operand" "")))
>     (clobber (reg:CC CC_REGNUM))])]
>   "TARGET_EITHER"
>   "
> +  if (GET_CODE (operands[2]) == CONST_INT)
> +    {
> +      if (TARGET_32BIT && arm_const_double_by_immediates (operands[2]))
> +       {
> +         emit_insn (gen_arm_adddi3 (operands[0], operands[1], operands[2]));
> +         DONE;
> +       }
> +      else
> +       operands[2] = force_reg (DImode, operands[2]);
> +    }
> +
>   if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
>     {
>       if (!cirrus_fp_register (operands[0], DImode))
> @@ -609,10 +620,10 @@
>   [(set_attr "length" "4")]
>  )
>
> -(define_insn_and_split "*arm_adddi3"
> -  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r")
> -       (plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0")
> -                (match_operand:DI 2 "s_register_operand" "r,  0")))
> +(define_insn_and_split "arm_adddi3"
> +  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r,&r,&r,&r")
> +       (plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0, r, 0, r")
> +                (match_operand:DI 2 "reg_or_int_operand" "r,  0, r, Di,Di")))
>    (clobber (reg:CC CC_REGNUM))]
>   "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_MAVERICK) && !TARGET_NEON"
>   "#"
> @@ -630,8 +641,17 @@
>     operands[0] = gen_lowpart (SImode, operands[0]);
>     operands[4] = gen_highpart (SImode, operands[1]);
>     operands[1] = gen_lowpart (SImode, operands[1]);
> -    operands[5] = gen_highpart (SImode, operands[2]);
> -    operands[2] = gen_lowpart (SImode, operands[2]);
> +    if (GET_CODE (operands[2]) == CONST_INT)
> +      {
> +       HOST_WIDE_INT v = INTVAL (operands[2]);
> +       operands[5] = GEN_INT (ARM_SIGN_EXTEND ((v >> 32) & 0xFFFFFFFF));
> +       operands[2] = GEN_INT (ARM_SIGN_EXTEND (v & 0xFFFFFFFF));
> +      }
> +    else
> +      {
> +       operands[5] = gen_highpart (SImode, operands[2]);
> +       operands[2] = gen_lowpart (SImode, operands[2]);
> +      }
>   }"
>   [(set_attr "conds" "clob")
>    (set_attr "length" "8")]
>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [ARM Patch 1/n] PR53447: optimizations of 64bit ALU operation with constant
  2012-06-08 10:17     ` Carrot Wei
@ 2012-06-27 16:14       ` Ramana Radhakrishnan
  2012-06-28  9:37         ` Carrot Wei
  0 siblings, 1 reply; 13+ messages in thread
From: Ramana Radhakrishnan @ 2012-06-27 16:14 UTC (permalink / raw)
  To: Carrot Wei; +Cc: gcc-patches, Richard Earnshaw

On 8 June 2012 10:12, Carrot Wei <carrot@google.com> wrote:
> Hi
>
> In rtl expression, substract a constant c is expressed as add a value -c, so it
> is alse processed by adddi3, and I extend it more to handle a subtraction of
> 64bit constant. I created an insn pattern arm_subdi3_immediate to specifically
> represent substraction with 64bit constant while continue keeping the add rtl
> expression.
>

Sorry about the time it has taken to review this patch -Thanks for
tackling this but I'm not convinced that this patch is correct and
definitely can be more efficient.

The range of valid 64 bit constants allowed would be in my opinion are
the following- obtained by dividing the 64 bit constant into 2 32 bit
halves (upper32 and lower32 referred to as upper and lower below)

 arm_not_operand (upper) && arm_add_operand (lower) which boils down
to the valid combination of

  adds lo : adc hi - both positive constants.
  adds lo ; sbc hi  - lower positive, upper negative
  subs lo ; sbc hi - lower negative, upper negative
  subs lo ; adc hi  - lower negative, upper positive


Therefore I'd do the following -

* Don't make *arm_adddi3 a named pattern - we don't need that.
* Change the *addsi3_carryin_<optab> pattern to be something like this :

--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -1001,12 +1001,14 @@
 )

 (define_insn "*addsi3_carryin_<optab>"
-  [(set (match_operand:SI 0 "s_register_operand" "=r")
-       (plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%r")
-                         (match_operand:SI 2 "arm_rhs_operand" "rI"))
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+       (plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%r,r"
+                         (match_operand:SI 2 "arm_not_operand" "rI,K"
                 (LTUGEU:SI (reg:<cnb> CC_REGNUM) (const_int 0))))]
   "TARGET_32BIT"
-  "adc%?\\t%0, %1, %2"
+  "@
+  adc%?\\t%0, %1, %2
+  sbc%?\\t%0, %1, %#n2"
   [(set_attr "conds" "use")]
 )

* I'd like a new const_ok_for_dimode_op function that dealt with each
of these operations, thus your plus operation with a DImode constant
would just be a check similar to what I've said above.
* You then don't need the new subdi3_immediate pattern and the split
can happen after reload. Adjust predicates and constraints
accordingly, delete it. Also please use CONST_INT_P instead of
GET_CODE (x) == CONST_INT in your patch.

regards,
Ramana











> Tested on arm qemu with both arm/thumb modes. OK for trunk?
>
> thanks
> Carrot
>
>
> 2012-06-08  Wei Guozhi  <carrot@google.com>
>
>        PR target/53447
>        * gcc.target/arm/pr53447-1.c: New testcase.
>        * gcc.target/arm/pr53447-5.c: New testcase.
>
>
> 2012-06-08  Wei Guozhi  <carrot@google.com>
>
>        PR target/53447
>        * config/arm/constraints.md (Dd): New constraint.
>        * config/arm/predicates.md (arm_neg_immediate_di_operand): New
>        predicate.
>        * config/arm/arm.md (adddi3): Extend it to handle constants.
>        (arm_subdi3_immediate): New insn pattern.
>        (arm_adddi3): Extend it to handle constants.
>        * config/arm/neon.md (adddi3_neon): Likewise.
>
>
> Index: testsuite/gcc.target/arm/pr53447-1.c
> ===================================================================
> --- testsuite/gcc.target/arm/pr53447-1.c        (revision 0)
> +++ testsuite/gcc.target/arm/pr53447-1.c        (revision 0)
> @@ -0,0 +1,8 @@
> +/* { dg-options "-O2" }  */
> +/* { dg-require-effective-target arm32 } */
> +/* { dg-final { scan-assembler-not "mov" } } */
> +
> +void t0p(long long * p)
> +{
> +  *p += 0x100000001;
> +}
> Index: testsuite/gcc.target/arm/pr53447-5.c
> ===================================================================
> --- testsuite/gcc.target/arm/pr53447-5.c        (revision 0)
> +++ testsuite/gcc.target/arm/pr53447-5.c        (revision 0)
> @@ -0,0 +1,8 @@
> +/* { dg-options "-O2" }  */
> +/* { dg-require-effective-target arm32 } */
> +/* { dg-final { scan-assembler-not "mov" } } */
> +
> +void t0p(long long * p)
> +{
> +  *p -= 0x100000008;
> +}
> Index: config/arm/neon.md
> ===================================================================
> --- config/arm/neon.md  (revision 187751)
> +++ config/arm/neon.md  (working copy)
> @@ -588,9 +588,9 @@
>  )
>
>  (define_insn "adddi3_neon"
> -  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w")
> -        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w")
> -                 (match_operand:DI 2 "s_register_operand" "w,r,0,w")))
> +  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
> +        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
> +                 (match_operand:DI 2 "arm_di_operand"     "w,r,0,w,r,Di,Di")))
>    (clobber (reg:CC CC_REGNUM))]
>   "TARGET_NEON"
>  {
> @@ -600,13 +600,16 @@
>     case 3: return "vadd.i64\t%P0, %P1, %P2";
>     case 1: return "#";
>     case 2: return "#";
> +    case 4: return "#";
> +    case 5: return "#";
> +    case 6: return "#";
>     default: gcc_unreachable ();
>     }
>  }
> -  [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
> -   (set_attr "conds" "*,clob,clob,*")
> -   (set_attr "length" "*,8,8,*")
> -   (set_attr "arch" "nota8,*,*,onlya8")]
> +  [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1,*,*,*")
> +   (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
> +   (set_attr "length" "*,8,8,*,8,8,8")
> +   (set_attr "arch" "nota8,*,*,onlya8,*,*,*")]
>  )
>
>  (define_insn "*sub<mode>3_neon"
> Index: config/arm/constraints.md
> ===================================================================
> --- config/arm/constraints.md   (revision 187751)
> +++ config/arm/constraints.md   (working copy)
> @@ -29,7 +29,7 @@
>  ;; in Thumb-1 state: I, J, K, L, M, N, O
>
>  ;; The following multi-letter normal constraints have been used:
> -;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dt, Dz
> +;; in ARM/Thumb-2 state: Da, Db, Dc, Dd, Dn, Dl, DL, Dv, Dy, Di, Dt, Dz
>  ;; in Thumb-1 state: Pa, Pb, Pc, Pd, Pe
>  ;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py
>
> @@ -251,6 +251,13 @@
>       (match_test "TARGET_32BIT && arm_const_double_inline_cost (op) == 4
>                   && !(optimize_size || arm_ld_sched)")))
>
> +(define_constraint "Dd"
> + "@internal
> +  In ARM/Thumb-2 state a const_int whose negative value satisfy constraint
> +  Di."
> + (and (match_code "const_int")
> +      (match_test "TARGET_32BIT && arm_const_double_by_immediates
> (GEN_INT (-ival))")))
> +
>  (define_constraint "Di"
>  "@internal
>   In ARM/Thumb-2 state a const_int or const_double where both the high
> Index: config/arm/predicates.md
> ===================================================================
> --- config/arm/predicates.md    (revision 187751)
> +++ config/arm/predicates.md    (working copy)
> @@ -117,6 +117,10 @@
>   (and (match_code "const_int,const_double")
>        (match_test "arm_const_double_by_immediates (op)")))
>
> +(define_predicate "arm_neg_immediate_di_operand"
> +  (and (match_code "const_int")
> +       (match_test "arm_const_double_by_immediates (GEN_INT (-INTVAL (op)))")))
> +
>  (define_predicate "arm_neg_immediate_operand"
>   (and (match_code "const_int")
>        (match_test "const_ok_for_arm (-INTVAL (op))")))
> Index: config/arm/arm.md
> ===================================================================
> --- config/arm/arm.md   (revision 187751)
> +++ config/arm/arm.md   (working copy)
> @@ -574,10 +574,29 @@
>  [(parallel
>    [(set (match_operand:DI           0 "s_register_operand" "")
>          (plus:DI (match_operand:DI 1 "s_register_operand" "")
> -                  (match_operand:DI 2 "s_register_operand" "")))
> +                  (match_operand:DI 2 "reg_or_int_operand" "")))
>     (clobber (reg:CC CC_REGNUM))])]
>   "TARGET_EITHER"
>   "
> +  if (GET_CODE (operands[2]) == CONST_INT)
> +    {
> +      rtx neg_val = GEN_INT (-INTVAL (operands[2]));
> +      if (TARGET_32BIT && arm_const_double_by_immediates (operands[2]))
> +       {
> +         emit_insn (gen_arm_adddi3 (operands[0], operands[1], operands[2]));
> +         DONE;
> +       }
> +      else if (TARGET_32BIT && arm_const_double_by_immediates (neg_val))
> +       {
> +         emit_insn (gen_arm_subdi3_immediate (operands[0],
> +                                              operands[1],
> +                                              operands[2]));
> +         DONE;
> +       }
> +      else
> +       operands[2] = force_reg (DImode, operands[2]);
> +    }
> +
>   if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
>     {
>       if (!cirrus_fp_register (operands[0], DImode))
> @@ -609,10 +628,10 @@
>   [(set_attr "length" "4")]
>  )
>
> -(define_insn_and_split "*arm_adddi3"
> -  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r")
> -       (plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0")
> -                (match_operand:DI 2 "s_register_operand" "r,  0")))
> +(define_insn_and_split "arm_adddi3"
> +  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r,&r,&r,&r")
> +       (plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0, r, 0, r")
> +                (match_operand:DI 2 "arm_di_operand"     "r,  0, r, Di,Di")))
>    (clobber (reg:CC CC_REGNUM))]
>   "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_MAVERICK) && !TARGET_NEON"
>   "#"
> @@ -630,8 +649,17 @@
>     operands[0] = gen_lowpart (SImode, operands[0]);
>     operands[4] = gen_highpart (SImode, operands[1]);
>     operands[1] = gen_lowpart (SImode, operands[1]);
> -    operands[5] = gen_highpart (SImode, operands[2]);
> -    operands[2] = gen_lowpart (SImode, operands[2]);
> +    if (GET_CODE (operands[2]) == CONST_INT)
> +      {
> +       HOST_WIDE_INT v = INTVAL (operands[2]);
> +       operands[5] = GEN_INT (ARM_SIGN_EXTEND ((v >> 32) & 0xFFFFFFFF));
> +       operands[2] = GEN_INT (ARM_SIGN_EXTEND (v & 0xFFFFFFFF));
> +      }
> +    else
> +      {
> +       operands[5] = gen_highpart (SImode, operands[2]);
> +       operands[2] = gen_lowpart (SImode, operands[2]);
> +      }
>   }"
>   [(set_attr "conds" "clob")
>    (set_attr "length" "8")]
> @@ -1122,6 +1150,25 @@
>    (set_attr "length" "8")]
>  )
>
> +(define_insn "arm_subdi3_immediate"
> +  [(set (match_operand:DI          0 "s_register_operand"           "=&r,&r")
> +        (plus:DI (match_operand:DI 1 "s_register_operand"           "0, r")
> +                 (match_operand:DI 2 "arm_neg_immediate_di_operand" "Dd,Dd")))
> +   (clobber (reg:CC CC_REGNUM))]
> +  "TARGET_32BIT"
> +  "*
> +  {
> +    HOST_WIDE_INT v = -INTVAL (operands[2]);
> +    operands[3] = GEN_INT (ARM_SIGN_EXTEND ((v >> 32) & 0xFFFFFFFF));
> +    operands[2] = GEN_INT (ARM_SIGN_EXTEND (v & 0xFFFFFFFF));
> +    output_asm_insn (\"subs\\t%Q0, %Q1, %2\", operands);
> +    output_asm_insn (\"sbc\\t%R0, %R1, %3\", operands);
> +    return \"\";
> +  }"
> +  [(set_attr "conds" "clob")
> +   (set_attr "length" "8")]
> +)
> +
>  (define_insn "*thumb_subdi3"
>   [(set (match_operand:DI           0 "register_operand" "=l")
>        (minus:DI (match_operand:DI 1 "register_operand"  "0")
>
>
>
> On Wed, Jun 6, 2012 at 7:16 PM, Carrot Wei <carrot@google.com> wrote:
>> In the original patch, if "add r0, c" is not possible, but "sub r0,
>> -c" is possible, it will use the sub instruction. Although they
>> generate same result, but they may generate different CF flag, and
>> cause subsequent adc to compute out wrong result. So I updated the
>> patch to avoid using sub instruction.
>>
>> Tested on arm qemu with both arm/thumb mode.
>>
>> thanks
>> Carrot
>>
>>
>> 2012-06-06  Wei Guozhi  <carrot@google.com>
>>
>>        PR target/53447
>>        * gcc.target/arm/pr53447-1.c: New testcase.
>>
>>
>> 2012-06-06  Wei Guozhi  <carrot@google.com>
>>
>>        PR target/53447
>>        * config/arm/arm.md (adddi3): Extend it to handle constants.
>>        (arm_adddi3): Likewise.
>>        * config/arm/neon.md (adddi3_neon): Likewise.
>>
>>
>> Index: testsuite/gcc.target/arm/pr53447-1.c
>> ===================================================================
>> --- testsuite/gcc.target/arm/pr53447-1.c        (revision 0)
>> +++ testsuite/gcc.target/arm/pr53447-1.c        (revision 0)
>> @@ -0,0 +1,8 @@
>> +/* { dg-options "-O2" }  */
>> +/* { dg-require-effective-target arm32 } */
>> +/* { dg-final { scan-assembler-not "mov" } } */
>> +
>> +void t0p(long long * p)
>> +{
>> +  *p += 0x100000001;
>> +}
>> Index: config/arm/neon.md
>> ===================================================================
>> --- config/arm/neon.md  (revision 187751)
>> +++ config/arm/neon.md  (working copy)
>> @@ -588,9 +588,9 @@
>>  )
>>
>>  (define_insn "adddi3_neon"
>> -  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w")
>> -        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w")
>> -                 (match_operand:DI 2 "s_register_operand" "w,r,0,w")))
>> +  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
>> +        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
>> +                 (match_operand:DI 2 "reg_or_int_operand" "w,r,0,w,r,Di,Di")))
>>    (clobber (reg:CC CC_REGNUM))]
>>   "TARGET_NEON"
>>  {
>> @@ -600,13 +600,16 @@
>>     case 3: return "vadd.i64\t%P0, %P1, %P2";
>>     case 1: return "#";
>>     case 2: return "#";
>> +    case 4: return "#";
>> +    case 5: return "#";
>> +    case 6: return "#";
>>     default: gcc_unreachable ();
>>     }
>>  }
>> -  [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
>> -   (set_attr "conds" "*,clob,clob,*")
>> -   (set_attr "length" "*,8,8,*")
>> -   (set_attr "arch" "nota8,*,*,onlya8")]
>> +  [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1,*,*,*")
>> +   (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
>> +   (set_attr "length" "*,8,8,*,8,8,8")
>> +   (set_attr "arch" "nota8,*,*,onlya8,*,*,*")]
>>  )
>>
>>  (define_insn "*sub<mode>3_neon"
>> Index: config/arm/arm.md
>> ===================================================================
>> --- config/arm/arm.md   (revision 187751)
>> +++ config/arm/arm.md   (working copy)
>> @@ -574,10 +574,21 @@
>>  [(parallel
>>    [(set (match_operand:DI           0 "s_register_operand" "")
>>          (plus:DI (match_operand:DI 1 "s_register_operand" "")
>> -                  (match_operand:DI 2 "s_register_operand" "")))
>> +                  (match_operand:DI 2 "reg_or_int_operand" "")))
>>     (clobber (reg:CC CC_REGNUM))])]
>>   "TARGET_EITHER"
>>   "
>> +  if (GET_CODE (operands[2]) == CONST_INT)
>> +    {
>> +      if (TARGET_32BIT && arm_const_double_by_immediates (operands[2]))
>> +       {
>> +         emit_insn (gen_arm_adddi3 (operands[0], operands[1], operands[2]));
>> +         DONE;
>> +       }
>> +      else
>> +       operands[2] = force_reg (DImode, operands[2]);
>> +    }
>> +
>>   if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
>>     {
>>       if (!cirrus_fp_register (operands[0], DImode))
>> @@ -609,10 +620,10 @@
>>   [(set_attr "length" "4")]
>>  )
>>
>> -(define_insn_and_split "*arm_adddi3"
>> -  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r")
>> -       (plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0")
>> -                (match_operand:DI 2 "s_register_operand" "r,  0")))
>> +(define_insn_and_split "arm_adddi3"
>> +  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r,&r,&r,&r")
>> +       (plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0, r, 0, r")
>> +                (match_operand:DI 2 "reg_or_int_operand" "r,  0, r, Di,Di")))
>>    (clobber (reg:CC CC_REGNUM))]
>>   "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_MAVERICK) && !TARGET_NEON"
>>   "#"
>> @@ -630,8 +641,17 @@
>>     operands[0] = gen_lowpart (SImode, operands[0]);
>>     operands[4] = gen_highpart (SImode, operands[1]);
>>     operands[1] = gen_lowpart (SImode, operands[1]);
>> -    operands[5] = gen_highpart (SImode, operands[2]);
>> -    operands[2] = gen_lowpart (SImode, operands[2]);
>> +    if (GET_CODE (operands[2]) == CONST_INT)
>> +      {
>> +       HOST_WIDE_INT v = INTVAL (operands[2]);
>> +       operands[5] = GEN_INT (ARM_SIGN_EXTEND ((v >> 32) & 0xFFFFFFFF));
>> +       operands[2] = GEN_INT (ARM_SIGN_EXTEND (v & 0xFFFFFFFF));
>> +      }
>> +    else
>> +      {
>> +       operands[5] = gen_highpart (SImode, operands[2]);
>> +       operands[2] = gen_lowpart (SImode, operands[2]);
>> +      }
>>   }"
>>   [(set_attr "conds" "clob")
>>    (set_attr "length" "8")]
>>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [ARM Patch 1/n] PR53447: optimizations of 64bit ALU operation with constant
  2012-06-27 16:14       ` Ramana Radhakrishnan
@ 2012-06-28  9:37         ` Carrot Wei
  2012-06-28 10:02           ` Ramana Radhakrishnan
  2012-06-28 10:28           ` Richard Earnshaw
  0 siblings, 2 replies; 13+ messages in thread
From: Carrot Wei @ 2012-06-28  9:37 UTC (permalink / raw)
  To: Ramana Radhakrishnan; +Cc: gcc-patches, Richard Earnshaw

Hi Ramana

Thanks for the review, please see my inlined comments.

On Thu, Jun 28, 2012 at 12:02 AM, Ramana Radhakrishnan
<ramana.radhakrishnan@linaro.org> wrote:
>
> On 8 June 2012 10:12, Carrot Wei <carrot@google.com> wrote:
> > Hi
> >
> > In rtl expression, substract a constant c is expressed as add a value -c, so it
> > is alse processed by adddi3, and I extend it more to handle a subtraction of
> > 64bit constant. I created an insn pattern arm_subdi3_immediate to specifically
> > represent substraction with 64bit constant while continue keeping the add rtl
> > expression.
> >
>
> Sorry about the time it has taken to review this patch -Thanks for
> tackling this but I'm not convinced that this patch is correct and
> definitely can be more efficient.
>
> The range of valid 64 bit constants allowed would be in my opinion are
> the following- obtained by dividing the 64 bit constant into 2 32 bit
> halves (upper32 and lower32 referred to as upper and lower below)
>
>  arm_not_operand (upper) && arm_add_operand (lower) which boils down
> to the valid combination of
>
>  adds lo : adc hi - both positive constants.
>  adds lo ; sbc hi  - lower positive, upper negative
I assume you mean "sbc -hi" or "sbc abs(hi)", similar for following instructions

>
>  subs lo ; sbc hi - lower negative, upper negative
>  subs lo ; adc hi  - lower negative, upper positive
>
My first version did the similar thing, but in some cases subs and
adds may generate different carry flag. Assume the low word is 0 and
high word is negative, your method will generate

adds r0, r0, 0
sbc   r1, r1, abs(hi)

My method generates

subs r0, r0, 0
sbc   r1, r1, abs(hi)

ARM's definition of subs is

(result, carry, overflow) = AddWithCarry(R[n], NOT(imm32), ‘1’);

So the subs instruction will set carry flag, but adds clear carry
flag, and finally generate different result in r1.

>
> Therefore I'd do the following -
>
> * Don't make *arm_adddi3 a named pattern - we don't need that.
> * Change the *addsi3_carryin_<optab> pattern to be something like this :
>
> --- a/gcc/config/arm/arm.md
> +++ b/gcc/config/arm/arm.md
> @@ -1001,12 +1001,14 @@
>  )
>
>  (define_insn "*addsi3_carryin_<optab>"
> -  [(set (match_operand:SI 0 "s_register_operand" "=r")
> -       (plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%r")
> -                         (match_operand:SI 2 "arm_rhs_operand" "rI"))
> +  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
> +       (plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%r,r"
> +                         (match_operand:SI 2 "arm_not_operand" "rI,K"

Do you mean arm_add_operand?

>                 (LTUGEU:SI (reg:<cnb> CC_REGNUM) (const_int 0))))]
>   "TARGET_32BIT"
> -  "adc%?\\t%0, %1, %2"
> +  "@
> +  adc%?\\t%0, %1, %2
> +  sbc%?\\t%0, %1, %#n2"
>   [(set_attr "conds" "use")]
>  )
>
> * I'd like a new const_ok_for_dimode_op function that dealt with each
> of these operations, thus your plus operation with a DImode constant
> would just be a check similar to what I've said above.

Good idea, it will make the interface cleaner. I will do it later.

> * You then don't need the new subdi3_immediate pattern and the split
> can happen after reload. Adjust predicates and constraints
> accordingly, delete it. Also please use CONST_INT_P instead of

Even if I delete subdi3_immediate pattern, we still need the
predicates and constraints to represent the negative di numbers in
other patterns.

thanks
Carrot

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [ARM Patch 1/n] PR53447: optimizations of 64bit ALU operation with constant
  2012-06-28  9:37         ` Carrot Wei
@ 2012-06-28 10:02           ` Ramana Radhakrishnan
  2012-06-28 11:43             ` Carrot Wei
  2012-06-28 10:28           ` Richard Earnshaw
  1 sibling, 1 reply; 13+ messages in thread
From: Ramana Radhakrishnan @ 2012-06-28 10:02 UTC (permalink / raw)
  To: Carrot Wei; +Cc: gcc-patches, Richard Earnshaw

On 28 June 2012 10:03, Carrot Wei <carrot@google.com> wrote:
> Hi Ramana
>
> Thanks for the review, please see my inlined comments.
>
> On Thu, Jun 28, 2012 at 12:02 AM, Ramana Radhakrishnan
> <ramana.radhakrishnan@linaro.org> wrote:
>>
>> On 8 June 2012 10:12, Carrot Wei <carrot@google.com> wrote:
>> > Hi
>> >
>> > In rtl expression, substract a constant c is expressed as add a value -c, so it
>> > is alse processed by adddi3, and I extend it more to handle a subtraction of
>> > 64bit constant. I created an insn pattern arm_subdi3_immediate to specifically
>> > represent substraction with 64bit constant while continue keeping the add rtl
>> > expression.
>> >
>>
>> Sorry about the time it has taken to review this patch -Thanks for
>> tackling this but I'm not convinced that this patch is correct and
>> definitely can be more efficient.
>>
>> The range of valid 64 bit constants allowed would be in my opinion are
>> the following- obtained by dividing the 64 bit constant into 2 32 bit
>> halves (upper32 and lower32 referred to as upper and lower below)
>>
>>  arm_not_operand (upper) && arm_add_operand (lower) which boils down
>> to the valid combination of
>>
>>  adds lo : adc hi - both positive constants.
>>  adds lo ; sbc hi  - lower positive, upper negative

> I assume you mean "sbc -hi" or "sbc abs(hi)", similar for following instructions

hi = ~upper32

lower = lower 32 bits of the constant
hi =  ~ (upper32 bits) of the constant ( bitwise twiddle not a negate :) )

For e.g.

unsigned long long foo4 (unsigned long long x)
{
 return x - 0x25ULL;
}

should be
subs r0, r0, #37
sbc   r1, r1, #0

Notice that it's #0 and not 1 ..... :)



>
>>
>>  subs lo ; sbc hi - lower negative, upper negative
>>  subs lo ; adc hi  - lower negative, upper positive
>>
> My first version did the similar thing, but in some cases subs and
> adds may generate different carry flag. Assume the low word is 0 and
> high word is negative, your method will generate
>
> adds r0, r0, 0
> sbc   r1, r1, abs(hi)

No it will generate

adds r0, r0, #0
sbc    r1, r1, ~hi

and not abs (hi)



>
> My method generates
>
> subs r0, r0, 0
> sbc   r1, r1, abs(hi)
>
> ARM's definition of subs is
>
> (result, carry, overflow) = AddWithCarry(R[n], NOT(imm32), ‘1’);
>
> So the subs instruction will set carry flag, but adds clear carry
> flag, and finally generate different result in r1.
>
>>
>> Therefore I'd do the following -
>>
>> * Don't make *arm_adddi3 a named pattern - we don't need that.
>> * Change the *addsi3_carryin_<optab> pattern to be something like this :
>>
>> --- a/gcc/config/arm/arm.md
>> +++ b/gcc/config/arm/arm.md
>> @@ -1001,12 +1001,14 @@
>>  )
>>
>>  (define_insn "*addsi3_carryin_<optab>"
>> -  [(set (match_operand:SI 0 "s_register_operand" "=r")
>> -       (plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%r")
>> -                         (match_operand:SI 2 "arm_rhs_operand" "rI"))
>> +  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
>> +       (plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%r,r"
>> +                         (match_operand:SI 2 "arm_not_operand" "rI,K"
>
> Do you mean arm_add_operand?

No I mean arm_not_operand and it was a deliberate choice as explained above.

>
>>                 (LTUGEU:SI (reg:<cnb> CC_REGNUM) (const_int 0))))]
>>   "TARGET_32BIT"
>> -  "adc%?\\t%0, %1, %2"
>> +  "@
>> +  adc%?\\t%0, %1, %2
>> +  sbc%?\\t%0, %1, %#n2"
>>   [(set_attr "conds" "use")]
>>  )
>>
>> * I'd like a new const_ok_for_dimode_op function that dealt with each
>> of these operations, thus your plus operation with a DImode constant
>> would just be a check similar to what I've said above.
>
> Good idea, it will make the interface cleaner. I will do it later.

I think it should help with a clean interface for all the operations
you plan to add.

>
>> * You then don't need the new subdi3_immediate pattern and the split
>> can happen after reload. Adjust predicates and constraints
>> accordingly, delete it. Also please use CONST_INT_P instead of
>
> Even if I delete subdi3_immediate pattern, we still need the
> predicates and constraints to represent the negative di numbers in
> other patterns.

I agree you need the predicate - I suspect you can get away with a
single constraint for all valid add immediate DImode operands
especially if you are splitting it later to the constituent forms.



regards,
Ramana


>
> thanks
> Carrot

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [ARM Patch 1/n] PR53447: optimizations of 64bit ALU operation with constant
  2012-06-28  9:37         ` Carrot Wei
  2012-06-28 10:02           ` Ramana Radhakrishnan
@ 2012-06-28 10:28           ` Richard Earnshaw
  1 sibling, 0 replies; 13+ messages in thread
From: Richard Earnshaw @ 2012-06-28 10:28 UTC (permalink / raw)
  To: Carrot Wei; +Cc: Ramana Radhakrishnan, gcc-patches

On 28/06/12 10:03, Carrot Wei wrote:
> Hi Ramana
> 
> Thanks for the review, please see my inlined comments.
> 
> On Thu, Jun 28, 2012 at 12:02 AM, Ramana Radhakrishnan
> <ramana.radhakrishnan@linaro.org> wrote:
>>
>> On 8 June 2012 10:12, Carrot Wei <carrot@google.com> wrote:
>>> Hi
>>>
>>> In rtl expression, substract a constant c is expressed as add a value -c, so it
>>> is alse processed by adddi3, and I extend it more to handle a subtraction of
>>> 64bit constant. I created an insn pattern arm_subdi3_immediate to specifically
>>> represent substraction with 64bit constant while continue keeping the add rtl
>>> expression.
>>>
>>
>> Sorry about the time it has taken to review this patch -Thanks for
>> tackling this but I'm not convinced that this patch is correct and
>> definitely can be more efficient.
>>
>> The range of valid 64 bit constants allowed would be in my opinion are
>> the following- obtained by dividing the 64 bit constant into 2 32 bit
>> halves (upper32 and lower32 referred to as upper and lower below)
>>
>>  arm_not_operand (upper) && arm_add_operand (lower) which boils down
>> to the valid combination of
>>
>>  adds lo : adc hi - both positive constants.
>>  adds lo ; sbc hi  - lower positive, upper negative
> I assume you mean "sbc -hi" or "sbc abs(hi)", similar for following instructions
> 

No, it's sbc ~hi -- bitwise inversion

It all falls out from the specification, where

	adc == X + Y + C
and
	sbc == X + ~Y + C.

Hence the need to use arm_not_operand.

R.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [ARM Patch 1/n] PR53447: optimizations of 64bit ALU operation with constant
  2012-06-28 10:02           ` Ramana Radhakrishnan
@ 2012-06-28 11:43             ` Carrot Wei
  2012-06-28 12:30               ` Ramana Radhakrishnan
  0 siblings, 1 reply; 13+ messages in thread
From: Carrot Wei @ 2012-06-28 11:43 UTC (permalink / raw)
  To: Ramana Radhakrishnan; +Cc: gcc-patches, Richard Earnshaw

On Thu, Jun 28, 2012 at 5:37 PM, Ramana Radhakrishnan
<ramana.radhakrishnan@linaro.org> wrote:
> On 28 June 2012 10:03, Carrot Wei <carrot@google.com> wrote:
>> Hi Ramana
>>
>> Thanks for the review, please see my inlined comments.
>>
>> On Thu, Jun 28, 2012 at 12:02 AM, Ramana Radhakrishnan
>> <ramana.radhakrishnan@linaro.org> wrote:
>>>
>>> On 8 June 2012 10:12, Carrot Wei <carrot@google.com> wrote:
>>> > Hi
>>> >
>>> > In rtl expression, substract a constant c is expressed as add a value -c, so it
>>> > is alse processed by adddi3, and I extend it more to handle a subtraction of
>>> > 64bit constant. I created an insn pattern arm_subdi3_immediate to specifically
>>> > represent substraction with 64bit constant while continue keeping the add rtl
>>> > expression.
>>> >
>>>
>>> Sorry about the time it has taken to review this patch -Thanks for
>>> tackling this but I'm not convinced that this patch is correct and
>>> definitely can be more efficient.
>>>
>>> The range of valid 64 bit constants allowed would be in my opinion are
>>> the following- obtained by dividing the 64 bit constant into 2 32 bit
>>> halves (upper32 and lower32 referred to as upper and lower below)
>>>
>>>  arm_not_operand (upper) && arm_add_operand (lower) which boils down
>>> to the valid combination of
>>>
>>>  adds lo : adc hi - both positive constants.
>>>  adds lo ; sbc hi  - lower positive, upper negative
>
>> I assume you mean "sbc -hi" or "sbc abs(hi)", similar for following instructions
>
> hi = ~upper32
>
> lower = lower 32 bits of the constant
> hi =  ~ (upper32 bits) of the constant ( bitwise twiddle not a negate :) )
>
> For e.g.
>
> unsigned long long foo4 (unsigned long long x)
> {
>  return x - 0x25ULL;
> }
>
> should be
> subs r0, r0, #37
> sbc   r1, r1, #0
>
> Notice that it's #0 and not 1 ..... :)
>
>
>
>>
>>>
>>>  subs lo ; sbc hi - lower negative, upper negative
>>>  subs lo ; adc hi  - lower negative, upper positive
>>>

Thank you for the detailed explanation. So the four cases should be

 adds lo : adc hi - both positive constants.
 adds lo ; sbc ~hi  - lower positive, upper negative
 subs -lo ; sbc ~hi - lower negative, upper negative
 subs -lo ; adc hi  - lower negative, upper positive


>> My first version did the similar thing, but in some cases subs and
>> adds may generate different carry flag. Assume the low word is 0 and
>> high word is negative, your method will generate
>>
>> adds r0, r0, 0
>> sbc   r1, r1, abs(hi)
>
> No it will generate
>
> adds r0, r0, #0
> sbc    r1, r1, ~hi
>
> and not abs (hi)
>
>
>
>>
>> My method generates
>>
>> subs r0, r0, 0
>> sbc   r1, r1, abs(hi)
>>
>> ARM's definition of subs is
>>
>> (result, carry, overflow) = AddWithCarry(R[n], NOT(imm32), ‘1’);
>>
>> So the subs instruction will set carry flag, but adds clear carry
>> flag, and finally generate different result in r1.
>>
>>>
>>> Therefore I'd do the following -
>>>
>>> * Don't make *arm_adddi3 a named pattern - we don't need that.
>>> * Change the *addsi3_carryin_<optab> pattern to be something like this :
>>>
>>> --- a/gcc/config/arm/arm.md
>>> +++ b/gcc/config/arm/arm.md
>>> @@ -1001,12 +1001,14 @@
>>>  )
>>>
>>>  (define_insn "*addsi3_carryin_<optab>"
>>> -  [(set (match_operand:SI 0 "s_register_operand" "=r")
>>> -       (plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%r")
>>> -                         (match_operand:SI 2 "arm_rhs_operand" "rI"))
>>> +  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
>>> +       (plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%r,r"
>>> +                         (match_operand:SI 2 "arm_not_operand" "rI,K"
>>
>> Do you mean arm_add_operand?
>
> No I mean arm_not_operand and it was a deliberate choice as explained above.
>
>>
>>>                 (LTUGEU:SI (reg:<cnb> CC_REGNUM) (const_int 0))))]
>>>   "TARGET_32BIT"
>>> -  "adc%?\\t%0, %1, %2"
>>> +  "@
>>> +  adc%?\\t%0, %1, %2
>>> +  sbc%?\\t%0, %1, %#n2"

Since constraint "K" is logical not, not negative, should the last
line be following?

+  sbc%?\\t%0, %1, #%B2"

thanks
Carrot

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [ARM Patch 1/n] PR53447: optimizations of 64bit ALU operation with constant
  2012-06-28 11:43             ` Carrot Wei
@ 2012-06-28 12:30               ` Ramana Radhakrishnan
  2012-06-29 13:15                 ` Carrot Wei
  0 siblings, 1 reply; 13+ messages in thread
From: Ramana Radhakrishnan @ 2012-06-28 12:30 UTC (permalink / raw)
  To: Carrot Wei; +Cc: gcc-patches, Richard Earnshaw

>  subs -lo ; sbc ~hi - lower negative, upper negative
>  subs -lo ; adc hi  - lower negative, upper positive

Yes.

<snip>

>>
>>>
>>>>                 (LTUGEU:SI (reg:<cnb> CC_REGNUM) (const_int 0))))]
>>>>   "TARGET_32BIT"
>>>> -  "adc%?\\t%0, %1, %2"
>>>> +  "@
>>>> +  adc%?\\t%0, %1, %2
>>>> +  sbc%?\\t%0, %1, %#n2"
>
> Since constraint "K" is logical not, not negative, should the last
> line be following?
>
> +  sbc%?\\t%0, %1, #%B2"

Indeed that was a typo on my part. Sorry about that.

Ramana

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [ARM Patch 1/n] PR53447: optimizations of 64bit ALU operation with constant
  2012-06-28 12:30               ` Ramana Radhakrishnan
@ 2012-06-29 13:15                 ` Carrot Wei
  2012-06-29 14:27                   ` Ramana Radhakrishnan
  0 siblings, 1 reply; 13+ messages in thread
From: Carrot Wei @ 2012-06-29 13:15 UTC (permalink / raw)
  To: Ramana Radhakrishnan; +Cc: gcc-patches, Richard Earnshaw

Hi

So the following is updated patch. Tested on qemu with arm/thumb modes
without regression.

thanks
Carrot


2012-06-29  Wei Guozhi  <carrot@google.com>

	PR target/53447
	* gcc.target/arm/pr53447-1.c: New testcase.
	* gcc.target/arm/pr53447-2.c: New testcase.


2012-06-29  Wei Guozhi  <carrot@google.com>

	PR target/53447
	* config/arm/arm-protos.h (const_ok_for_dimode_op): New prototype.
	* config/arm/arm.c (const_ok_for_dimode_op): New function.
	* config/arm/constraints.md (Dd): New constraint.
	* config/arm/predicates.md (arm_adddi_operand): New predicate.
	* config/arm/arm.md (adddi3): Extend it to handle constants.
	(arm_adddi3): Likewise.
	(addsi3_carryin_<optab>): Extend it to handle sbc case.
	* config/arm/neon.md (adddi3_neon): Extend it to handle constants.


Index: testsuite/gcc.target/arm/pr53447-1.c
===================================================================
--- testsuite/gcc.target/arm/pr53447-1.c	(revision 0)
+++ testsuite/gcc.target/arm/pr53447-1.c	(revision 0)
@@ -0,0 +1,8 @@
+/* { dg-options "-O2" }  */
+/* { dg-require-effective-target arm32 } */
+/* { dg-final { scan-assembler-not "mov" } } */
+
+void t0p(long long * p)
+{
+  *p += 0x100000001;
+}
Index: testsuite/gcc.target/arm/pr53447-2.c
===================================================================
--- testsuite/gcc.target/arm/pr53447-2.c	(revision 0)
+++ testsuite/gcc.target/arm/pr53447-2.c	(revision 0)
@@ -0,0 +1,8 @@
+/* { dg-options "-O2" }  */
+/* { dg-require-effective-target arm32 } */
+/* { dg-final { scan-assembler-not "mov" } } */
+
+void t0p(long long * p)
+{
+  *p -= 0x100000008;
+}
Index: config/arm/arm.c
===================================================================
--- config/arm/arm.c	(revision 187751)
+++ config/arm/arm.c	(working copy)
@@ -2497,6 +2497,28 @@
     }
 }

+/* Return true if I is a valid di mode constant for the operation CODE.  */
+int
+const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
+{
+  HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
+  HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
+  rtx hi = GEN_INT (hi_val);
+  rtx lo = GEN_INT (lo_val);
+
+  if (TARGET_THUMB1)
+    return 0;
+
+  switch (code)
+    {
+    case PLUS:
+      return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
+
+    default:
+      return 0;
+    }
+}
+
 /* Emit a sequence of insns to handle a large constant.
    CODE is the code of the operation required, it can be any of SET, PLUS,
    IOR, AND, XOR, MINUS;
Index: config/arm/arm-protos.h
===================================================================
--- config/arm/arm-protos.h	(revision 187751)
+++ config/arm/arm-protos.h	(working copy)
@@ -49,6 +49,7 @@
 extern bool arm_modes_tieable_p (enum machine_mode, enum machine_mode);
 extern int const_ok_for_arm (HOST_WIDE_INT);
 extern int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
+extern int const_ok_for_dimode_op (HOST_WIDE_INT, enum rtx_code);
 extern int arm_split_constant (RTX_CODE, enum machine_mode, rtx,
 			       HOST_WIDE_INT, rtx, rtx, int);
 extern RTX_CODE arm_canonicalize_comparison (RTX_CODE, rtx *, rtx *);
Index: config/arm/neon.md
===================================================================
--- config/arm/neon.md	(revision 187751)
+++ config/arm/neon.md	(working copy)
@@ -588,9 +588,9 @@
 )

 (define_insn "adddi3_neon"
-  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w")
-        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w")
-                 (match_operand:DI 2 "s_register_operand" "w,r,0,w")))
+  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
+        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
+                 (match_operand:DI 2 "arm_adddi_operand"
"w,r,0,w,r,Dd,Dd")))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_NEON"
 {
@@ -600,13 +600,16 @@
     case 3: return "vadd.i64\t%P0, %P1, %P2";
     case 1: return "#";
     case 2: return "#";
+    case 4: return "#";
+    case 5: return "#";
+    case 6: return "#";
     default: gcc_unreachable ();
     }
 }
-  [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
-   (set_attr "conds" "*,clob,clob,*")
-   (set_attr "length" "*,8,8,*")
-   (set_attr "arch" "nota8,*,*,onlya8")]
+  [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1,*,*,*")
+   (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
+   (set_attr "length" "*,8,8,*,8,8,8")
+   (set_attr "arch" "nota8,*,*,onlya8,*,*,*")]
 )

 (define_insn "*sub<mode>3_neon"
Index: config/arm/constraints.md
===================================================================
--- config/arm/constraints.md	(revision 187751)
+++ config/arm/constraints.md	(working copy)
@@ -29,7 +29,7 @@
 ;; in Thumb-1 state: I, J, K, L, M, N, O

 ;; The following multi-letter normal constraints have been used:
-;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dt, Dz
+;; in ARM/Thumb-2 state: Da, Db, Dc, Dd, Dn, Dl, DL, Dv, Dy, Di, Dt, Dz
 ;; in Thumb-1 state: Pa, Pb, Pc, Pd, Pe
 ;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py

@@ -251,6 +251,12 @@
       (match_test "TARGET_32BIT && arm_const_double_inline_cost (op) == 4
 		   && !(optimize_size || arm_ld_sched)")))

+(define_constraint "Dd"
+ "@internal
+  In ARM/Thumb-2 state a const_int that can be used by insn adddi."
+ (and (match_code "const_int")
+      (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, PLUS)")))
+
 (define_constraint "Di"
  "@internal
   In ARM/Thumb-2 state a const_int or const_double where both the high
Index: config/arm/predicates.md
===================================================================
--- config/arm/predicates.md	(revision 187751)
+++ config/arm/predicates.md	(working copy)
@@ -154,6 +154,11 @@
   (ior (match_operand 0 "arm_rhs_operand")
        (match_operand 0 "arm_neg_immediate_operand")))

+(define_predicate "arm_adddi_operand"
+  (ior (match_operand 0 "s_register_operand")
+       (and (match_code "const_int")
+	    (match_test "const_ok_for_dimode_op (INTVAL (op), PLUS)"))))
+
 (define_predicate "arm_addimm_operand"
   (ior (match_operand 0 "arm_immediate_operand")
        (match_operand 0 "arm_neg_immediate_operand")))
Index: config/arm/arm.md
===================================================================
--- config/arm/arm.md	(revision 187751)
+++ config/arm/arm.md	(working copy)
@@ -574,7 +574,7 @@
  [(parallel
    [(set (match_operand:DI           0 "s_register_operand" "")
 	  (plus:DI (match_operand:DI 1 "s_register_operand" "")
-	           (match_operand:DI 2 "s_register_operand" "")))
+	           (match_operand:DI 2 "arm_adddi_operand"  "")))
     (clobber (reg:CC CC_REGNUM))])]
   "TARGET_EITHER"
   "
@@ -609,10 +609,10 @@
   [(set_attr "length" "4")]
 )

-(define_insn_and_split "*arm_adddi3"
-  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r")
-	(plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0")
-		 (match_operand:DI 2 "s_register_operand" "r,  0")))
+(define_insn_and_split "arm_adddi3"
+  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r,&r,&r,&r")
+	(plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0, r, 0, r")
+		 (match_operand:DI 2 "arm_adddi_operand"  "r,  0, r, Dd, Dd")))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_MAVERICK) && !TARGET_NEON"
   "#"
@@ -630,8 +630,17 @@
     operands[0] = gen_lowpart (SImode, operands[0]);
     operands[4] = gen_highpart (SImode, operands[1]);
     operands[1] = gen_lowpart (SImode, operands[1]);
-    operands[5] = gen_highpart (SImode, operands[2]);
-    operands[2] = gen_lowpart (SImode, operands[2]);
+    if (GET_CODE (operands[2]) == CONST_INT)
+      {
+	HOST_WIDE_INT v = INTVAL (operands[2]);
+	operands[5] = GEN_INT (ARM_SIGN_EXTEND ((v >> 32) & 0xFFFFFFFF));
+	operands[2] = GEN_INT (ARM_SIGN_EXTEND (v & 0xFFFFFFFF));
+      }
+    else
+      {
+	operands[5] = gen_highpart (SImode, operands[2]);
+	operands[2] = gen_lowpart (SImode, operands[2]);
+      }
   }"
   [(set_attr "conds" "clob")
    (set_attr "length" "8")]
@@ -980,12 +989,14 @@
 )

 (define_insn "*addsi3_carryin_<optab>"
-  [(set (match_operand:SI 0 "s_register_operand" "=r")
-	(plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%r")
-			  (match_operand:SI 2 "arm_rhs_operand" "rI"))
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%r,r")
+			  (match_operand:SI 2 "arm_not_operand" "rI,K"))
 		 (LTUGEU:SI (reg:<cnb> CC_REGNUM) (const_int 0))))]
   "TARGET_32BIT"
-  "adc%?\\t%0, %1, %2"
+  "@
+   adc%?\\t%0, %1, %2
+   sbc%?\\t%0, %1, #%B2"
   [(set_attr "conds" "use")]
 )




On Thu, Jun 28, 2012 at 7:48 PM, Ramana Radhakrishnan
<ramana.radhakrishnan@linaro.org> wrote:
>
> >  subs -lo ; sbc ~hi - lower negative, upper negative
> >  subs -lo ; adc hi  - lower negative, upper positive
>
> Yes.
>
> <snip>
>
> >>
> >>>
> >>>>                 (LTUGEU:SI (reg:<cnb> CC_REGNUM) (const_int 0))))]
> >>>>   "TARGET_32BIT"
> >>>> -  "adc%?\\t%0, %1, %2"
> >>>> +  "@
> >>>> +  adc%?\\t%0, %1, %2
> >>>> +  sbc%?\\t%0, %1, %#n2"
> >
> > Since constraint "K" is logical not, not negative, should the last
> > line be following?
> >
> > +  sbc%?\\t%0, %1, #%B2"
>
> Indeed that was a typo on my part. Sorry about that.
>
> Ramana

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [ARM Patch 1/n] PR53447: optimizations of 64bit ALU operation with constant
  2012-06-29 13:15                 ` Carrot Wei
@ 2012-06-29 14:27                   ` Ramana Radhakrishnan
  2012-07-01 15:30                     ` Carrot Wei
  0 siblings, 1 reply; 13+ messages in thread
From: Ramana Radhakrishnan @ 2012-06-29 14:27 UTC (permalink / raw)
  To: Carrot Wei; +Cc: gcc-patches, Richard Earnshaw

On 29 June 2012 12:23, Carrot Wei <carrot@google.com> wrote:
> Hi
>
> So the following is updated patch. Tested on qemu with arm/thumb modes

Assuming this testing was with and without neon ? Because the patterns
changed are different whether you use Neon or not.

> without regression.

Can you add some tests for all 4 cases ? See comments inline below for
some changes ?

Ok with those changes if no regressions for above mentioned testing.

>
> thanks
> Carrot
>
>
> 2012-06-29  Wei Guozhi  <carrot@google.com>
>
>        PR target/53447
>        * gcc.target/arm/pr53447-1.c: New testcase.
>        * gcc.target/arm/pr53447-2.c: New testcase.
>
>
> 2012-06-29  Wei Guozhi  <carrot@google.com>
>
>        PR target/53447
>        * config/arm/arm-protos.h (const_ok_for_dimode_op): New prototype.
>        * config/arm/arm.c (const_ok_for_dimode_op): New function.
>        * config/arm/constraints.md (Dd): New constraint.
>        * config/arm/predicates.md (arm_adddi_operand): New predicate.
>        * config/arm/arm.md (adddi3): Extend it to handle constants.
>        (arm_adddi3): Likewise.
>        (addsi3_carryin_<optab>): Extend it to handle sbc case.
>        * config/arm/neon.md (adddi3_neon): Extend it to handle constants.
>
>
> Index: testsuite/gcc.target/arm/pr53447-1.c
> ===================================================================
> --- testsuite/gcc.target/arm/pr53447-1.c        (revision 0)
> +++ testsuite/gcc.target/arm/pr53447-1.c        (revision 0)
> @@ -0,0 +1,8 @@
> +/* { dg-options "-O2" }  */
> +/* { dg-require-effective-target arm32 } */
> +/* { dg-final { scan-assembler-not "mov" } } */
> +
> +void t0p(long long * p)
> +{
> +  *p += 0x100000001;
> +}
> Index: testsuite/gcc.target/arm/pr53447-2.c
> ===================================================================
> --- testsuite/gcc.target/arm/pr53447-2.c        (revision 0)
> +++ testsuite/gcc.target/arm/pr53447-2.c        (revision 0)
> @@ -0,0 +1,8 @@
> +/* { dg-options "-O2" }  */
> +/* { dg-require-effective-target arm32 } */
> +/* { dg-final { scan-assembler-not "mov" } } */
> +
> +void t0p(long long * p)
> +{
> +  *p -= 0x100000008;
> +}
> Index: config/arm/arm.c
> ===================================================================
> --- config/arm/arm.c    (revision 187751)
> +++ config/arm/arm.c    (working copy)
> @@ -2497,6 +2497,28 @@
>     }
>  }
>
> +/* Return true if I is a valid di mode constant for the operation CODE.  */
> +int
> +const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
> +{
> +  HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
> +  HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
> +  rtx hi = GEN_INT (hi_val);
> +  rtx lo = GEN_INT (lo_val);
> +
> +  if (TARGET_THUMB1)
> +    return 0;
> +
> +  switch (code)
> +    {
> +    case PLUS:
> +      return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
> +
> +    default:
> +      return 0;
> +    }
> +}
> +
>  /* Emit a sequence of insns to handle a large constant.
>    CODE is the code of the operation required, it can be any of SET, PLUS,
>    IOR, AND, XOR, MINUS;
> Index: config/arm/arm-protos.h
> ===================================================================
> --- config/arm/arm-protos.h     (revision 187751)
> +++ config/arm/arm-protos.h     (working copy)
> @@ -49,6 +49,7 @@
>  extern bool arm_modes_tieable_p (enum machine_mode, enum machine_mode);
>  extern int const_ok_for_arm (HOST_WIDE_INT);
>  extern int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
> +extern int const_ok_for_dimode_op (HOST_WIDE_INT, enum rtx_code);
>  extern int arm_split_constant (RTX_CODE, enum machine_mode, rtx,
>                               HOST_WIDE_INT, rtx, rtx, int);
>  extern RTX_CODE arm_canonicalize_comparison (RTX_CODE, rtx *, rtx *);
> Index: config/arm/neon.md
> ===================================================================
> --- config/arm/neon.md  (revision 187751)
> +++ config/arm/neon.md  (working copy)
> @@ -588,9 +588,9 @@
>  )
>
>  (define_insn "adddi3_neon"
> -  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w")
> -        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w")
> -                 (match_operand:DI 2 "s_register_operand" "w,r,0,w")))
> +  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
> +        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
> +                 (match_operand:DI 2 "arm_adddi_operand"
> "w,r,0,w,r,Dd,Dd")))
>    (clobber (reg:CC CC_REGNUM))]
>   "TARGET_NEON"
>  {
> @@ -600,13 +600,16 @@
>     case 3: return "vadd.i64\t%P0, %P1, %P2";
>     case 1: return "#";
>     case 2: return "#";
> +    case 4: return "#";
> +    case 5: return "#";
> +    case 6: return "#";
>     default: gcc_unreachable ();
>     }
>  }
> -  [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
> -   (set_attr "conds" "*,clob,clob,*")
> -   (set_attr "length" "*,8,8,*")
> -   (set_attr "arch" "nota8,*,*,onlya8")]
> +  [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1,*,*,*")
> +   (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
> +   (set_attr "length" "*,8,8,*,8,8,8")
> +   (set_attr "arch" "nota8,*,*,onlya8,*,*,*")]
>  )
>
>  (define_insn "*sub<mode>3_neon"
> Index: config/arm/constraints.md
> ===================================================================
> --- config/arm/constraints.md   (revision 187751)
> +++ config/arm/constraints.md   (working copy)
> @@ -29,7 +29,7 @@
>  ;; in Thumb-1 state: I, J, K, L, M, N, O
>
>  ;; The following multi-letter normal constraints have been used:
> -;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dt, Dz
> +;; in ARM/Thumb-2 state: Da, Db, Dc, Dd, Dn, Dl, DL, Dv, Dy, Di, Dt, Dz
>  ;; in Thumb-1 state: Pa, Pb, Pc, Pd, Pe
>  ;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py
>
> @@ -251,6 +251,12 @@
>       (match_test "TARGET_32BIT && arm_const_double_inline_cost (op) == 4
>                   && !(optimize_size || arm_ld_sched)")))
>
> +(define_constraint "Dd"
> + "@internal
> +  In ARM/Thumb-2 state a const_int that can be used by insn adddi."
> + (and (match_code "const_int")
> +      (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, PLUS)")))
> +
>  (define_constraint "Di"
>  "@internal
>   In ARM/Thumb-2 state a const_int or const_double where both the high
> Index: config/arm/predicates.md
> ===================================================================
> --- config/arm/predicates.md    (revision 187751)
> +++ config/arm/predicates.md    (working copy)
> @@ -154,6 +154,11 @@
>   (ior (match_operand 0 "arm_rhs_operand")
>        (match_operand 0 "arm_neg_immediate_operand")))
>
> +(define_predicate "arm_adddi_operand"
> +  (ior (match_operand 0 "s_register_operand")
> +       (and (match_code "const_int")
> +           (match_test "const_ok_for_dimode_op (INTVAL (op), PLUS)"))))
> +
>  (define_predicate "arm_addimm_operand"
>   (ior (match_operand 0 "arm_immediate_operand")
>        (match_operand 0 "arm_neg_immediate_operand")))
> Index: config/arm/arm.md
> ===================================================================
> --- config/arm/arm.md   (revision 187751)
> +++ config/arm/arm.md   (working copy)
> @@ -574,7 +574,7 @@
>  [(parallel
>    [(set (match_operand:DI           0 "s_register_operand" "")
>          (plus:DI (match_operand:DI 1 "s_register_operand" "")
> -                  (match_operand:DI 2 "s_register_operand" "")))
> +                  (match_operand:DI 2 "arm_adddi_operand"  "")))
>     (clobber (reg:CC CC_REGNUM))])]
>   "TARGET_EITHER"
>   "
> @@ -609,10 +609,10 @@
>   [(set_attr "length" "4")]
>  )
>
> -(define_insn_and_split "*arm_adddi3"
> -  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r")
> -       (plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0")
> -                (match_operand:DI 2 "s_register_operand" "r,  0")))
> +(define_insn_and_split "arm_adddi3"
> +  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r,&r,&r,&r")
> +       (plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0, r, 0, r")
> +                (match_operand:DI 2 "arm_adddi_operand"  "r,  0, r, Dd, Dd")))
>    (clobber (reg:CC CC_REGNUM))]
>   "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_MAVERICK) && !TARGET_NEON"
>   "#"
> @@ -630,8 +630,17 @@
>     operands[0] = gen_lowpart (SImode, operands[0]);
>     operands[4] = gen_highpart (SImode, operands[1]);
>     operands[1] = gen_lowpart (SImode, operands[1]);
> -    operands[5] = gen_highpart (SImode, operands[2]);
> -    operands[2] = gen_lowpart (SImode, operands[2]);
> +    if (GET_CODE (operands[2]) == CONST_INT)
> +      {
> +       HOST_WIDE_INT v = INTVAL (operands[2]);
> +       operands[5] = GEN_INT (ARM_SIGN_EXTEND ((v >> 32) & 0xFFFFFFFF));
> +       operands[2] = GEN_INT (ARM_SIGN_EXTEND (v & 0xFFFFFFFF));
> +      }
> +    else
> +      {
> +       operands[5] = gen_highpart (SImode, operands[2]);
> +       operands[2] = gen_lowpart (SImode, operands[2]);
> +      }

Instead


  operands[5] = gen_highpart_mode (SImode, DImode, operands[2]);
  operands[2] = gen_lowpart (SImode, operands[2]);

So you don't need a check there.



>   }"
>   [(set_attr "conds" "clob")
>    (set_attr "length" "8")]
> @@ -980,12 +989,14 @@
>  )
>
>  (define_insn "*addsi3_carryin_<optab>"
> -  [(set (match_operand:SI 0 "s_register_operand" "=r")
> -       (plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%r")
> -                         (match_operand:SI 2 "arm_rhs_operand" "rI"))
> +  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
> +       (plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%r,r")
> +                         (match_operand:SI 2 "arm_not_operand" "rI,K"))
>                 (LTUGEU:SI (reg:<cnb> CC_REGNUM) (const_int 0))))]
>   "TARGET_32BIT"
> -  "adc%?\\t%0, %1, %2"
> +  "@
> +   adc%?\\t%0, %1, %2
> +   sbc%?\\t%0, %1, #%B2"
>   [(set_attr "conds" "use")]
>  )

Any reason why you didn't consider making these changes to the
*addsi3_carryin_alt2<optab> pattern ?


regards,
Ramana


>
>
>
>
> On Thu, Jun 28, 2012 at 7:48 PM, Ramana Radhakrishnan
> <ramana.radhakrishnan@linaro.org> wrote:
>>
>> >  subs -lo ; sbc ~hi - lower negative, upper negative
>> >  subs -lo ; adc hi  - lower negative, upper positive
>>
>> Yes.
>>
>> <snip>
>>
>> >>
>> >>>
>> >>>>                 (LTUGEU:SI (reg:<cnb> CC_REGNUM) (const_int 0))))]
>> >>>>   "TARGET_32BIT"
>> >>>> -  "adc%?\\t%0, %1, %2"
>> >>>> +  "@
>> >>>> +  adc%?\\t%0, %1, %2
>> >>>> +  sbc%?\\t%0, %1, %#n2"
>> >
>> > Since constraint "K" is logical not, not negative, should the last
>> > line be following?
>> >
>> > +  sbc%?\\t%0, %1, #%B2"
>>
>> Indeed that was a typo on my part. Sorry about that.
>>
>> Ramana

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [ARM Patch 1/n] PR53447: optimizations of 64bit ALU operation with constant
  2012-06-29 14:27                   ` Ramana Radhakrishnan
@ 2012-07-01 15:30                     ` Carrot Wei
  0 siblings, 0 replies; 13+ messages in thread
From: Carrot Wei @ 2012-07-01 15:30 UTC (permalink / raw)
  To: Ramana Radhakrishnan; +Cc: gcc-patches, Richard Earnshaw

On Fri, Jun 29, 2012 at 9:57 PM, Ramana Radhakrishnan
<ramana.radhakrishnan@linaro.org> wrote:
>
> On 29 June 2012 12:23, Carrot Wei <carrot@google.com> wrote:
> > Hi
> >
> > So the following is updated patch. Tested on qemu with arm/thumb modes
>
> Assuming this testing was with and without neon ? Because the patterns
> changed are different whether you use Neon or not.
>
Now the patch has been tested with all combination of arm/thumb
neon/non-neon modes.

> > without regression.
>
> Can you add some tests for all 4 cases ? See comments inline below for
> some changes ?
>
New test cases added.

> Ok with those changes if no regressions for above mentioned testing.
>
> > Index: config/arm/arm.md
> > ===================================================================
> > --- config/arm/arm.md   (revision 187751)
> > +++ config/arm/arm.md   (working copy)
> > @@ -574,7 +574,7 @@
> >  [(parallel
> >    [(set (match_operand:DI           0 "s_register_operand" "")
> >          (plus:DI (match_operand:DI 1 "s_register_operand" "")
> > -                  (match_operand:DI 2 "s_register_operand" "")))
> > +                  (match_operand:DI 2 "arm_adddi_operand"  "")))
> >     (clobber (reg:CC CC_REGNUM))])]
> >   "TARGET_EITHER"
> >   "
> > @@ -609,10 +609,10 @@
> >   [(set_attr "length" "4")]
> >  )
> >
> > -(define_insn_and_split "*arm_adddi3"
> > -  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r")
> > -       (plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0")
> > -                (match_operand:DI 2 "s_register_operand" "r,  0")))
> > +(define_insn_and_split "arm_adddi3"
> > +  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r,&r,&r,&r")
> > +       (plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0, r, 0, r")
> > +                (match_operand:DI 2 "arm_adddi_operand"  "r,  0, r, Dd, Dd")))
> >    (clobber (reg:CC CC_REGNUM))]
> >   "TARGET_32BIT && !(TARGET_HARD_FLOAT && TARGET_MAVERICK) && !TARGET_NEON"
> >   "#"
> > @@ -630,8 +630,17 @@
> >     operands[0] = gen_lowpart (SImode, operands[0]);
> >     operands[4] = gen_highpart (SImode, operands[1]);
> >     operands[1] = gen_lowpart (SImode, operands[1]);
> > -    operands[5] = gen_highpart (SImode, operands[2]);
> > -    operands[2] = gen_lowpart (SImode, operands[2]);
> > +    if (GET_CODE (operands[2]) == CONST_INT)
> > +      {
> > +       HOST_WIDE_INT v = INTVAL (operands[2]);
> > +       operands[5] = GEN_INT (ARM_SIGN_EXTEND ((v >> 32) & 0xFFFFFFFF));
> > +       operands[2] = GEN_INT (ARM_SIGN_EXTEND (v & 0xFFFFFFFF));
> > +      }
> > +    else
> > +      {
> > +       operands[5] = gen_highpart (SImode, operands[2]);
> > +       operands[2] = gen_lowpart (SImode, operands[2]);
> > +      }
>
> Instead
>
>
>  operands[5] = gen_highpart_mode (SImode, DImode, operands[2]);
>  operands[2] = gen_lowpart (SImode, operands[2]);
>
> So you don't need a check there.
>
A good method.
>
>
> >   }"
> >   [(set_attr "conds" "clob")
> >    (set_attr "length" "8")]
> > @@ -980,12 +989,14 @@
> >  )
> >
> >  (define_insn "*addsi3_carryin_<optab>"
> > -  [(set (match_operand:SI 0 "s_register_operand" "=r")
> > -       (plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%r")
> > -                         (match_operand:SI 2 "arm_rhs_operand" "rI"))
> > +  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
> > +       (plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%r,r")
> > +                         (match_operand:SI 2 "arm_not_operand" "rI,K"))
> >                 (LTUGEU:SI (reg:<cnb> CC_REGNUM) (const_int 0))))]
> >   "TARGET_32BIT"
> > -  "adc%?\\t%0, %1, %2"
> > +  "@
> > +   adc%?\\t%0, %1, %2
> > +   sbc%?\\t%0, %1, #%B2"
> >   [(set_attr "conds" "use")]
> >  )
>
> Any reason why you didn't consider making these changes to the
> *addsi3_carryin_alt2<optab> pattern ?
>
It's simply because of my ignorance.

thanks
Carrot

The actually committed patch is as following


2012-07-01  Wei Guozhi  <carrot@google.com>

	PR target/53447
	* gcc.target/arm/pr53447-1.c: New testcase.
	* gcc.target/arm/pr53447-2.c: New testcase.
	* gcc.target/arm/pr53447-3.c: New testcase.
	* gcc.target/arm/pr53447-4.c: New testcase.


2012-07-01  Wei Guozhi  <carrot@google.com>

	PR target/53447
	* config/arm/arm-protos.h (const_ok_for_dimode_op): New prototype.
	* config/arm/arm.c (const_ok_for_dimode_op): New function.
	* config/arm/constraints.md (Dd): New constraint.
	* config/arm/predicates.md (arm_adddi_operand): New predicate.
	* config/arm/arm.md (adddi3): Extend it to handle constants.
	(arm_adddi3): Likewise.
	(addsi3_carryin_<optab>): Extend it to handle sbc case.
	(addsi3_carryin_alt2_<optab>): Likewise.
	* config/arm/neon.md (adddi3_neon): Extend it to handle constants.



Index: testsuite/gcc.target/arm/pr53447-1.c
===================================================================
--- testsuite/gcc.target/arm/pr53447-1.c	(revision 0)
+++ testsuite/gcc.target/arm/pr53447-1.c	(revision 0)
@@ -0,0 +1,8 @@
+/* { dg-options "-O2" }  */
+/* { dg-require-effective-target arm32 } */
+/* { dg-final { scan-assembler-not "mov" } } */
+
+void t0p(long long * p)
+{
+  *p += 0x100000001;
+}
Index: testsuite/gcc.target/arm/pr53447-2.c
===================================================================
--- testsuite/gcc.target/arm/pr53447-2.c	(revision 0)
+++ testsuite/gcc.target/arm/pr53447-2.c	(revision 0)
@@ -0,0 +1,8 @@
+/* { dg-options "-O2" }  */
+/* { dg-require-effective-target arm32 } */
+/* { dg-final { scan-assembler-not "mov" } } */
+
+void t0p(long long * p)
+{
+  *p -= 0x100000008;
+}
Index: testsuite/gcc.target/arm/pr53447-3.c
===================================================================
--- testsuite/gcc.target/arm/pr53447-3.c	(revision 0)
+++ testsuite/gcc.target/arm/pr53447-3.c	(revision 0)
@@ -0,0 +1,9 @@
+/* { dg-options "-O2" }  */
+/* { dg-require-effective-target arm32 } */
+/* { dg-final { scan-assembler-not "mov" } } */
+
+
+void t0p(long long * p)
+{
+  *p +=0x1fffffff8;
+}
Index: testsuite/gcc.target/arm/pr53447-4.c
===================================================================
--- testsuite/gcc.target/arm/pr53447-4.c	(revision 0)
+++ testsuite/gcc.target/arm/pr53447-4.c	(revision 0)
@@ -0,0 +1,9 @@
+/* { dg-options "-O2" }  */
+/* { dg-require-effective-target arm32 } */
+/* { dg-final { scan-assembler-not "mov" } } */
+
+
+void t0p(long long * p)
+{
+  *p -=0x1fffffff8;
+}


Index: config/arm/arm.c
===================================================================
--- config/arm/arm.c	(revision 189101)
+++ config/arm/arm.c	(working copy)
@@ -2507,6 +2507,28 @@
     }
 }

+/* Return true if I is a valid di mode constant for the operation CODE.  */
+int
+const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
+{
+  HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
+  HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
+  rtx hi = GEN_INT (hi_val);
+  rtx lo = GEN_INT (lo_val);
+
+  if (TARGET_THUMB1)
+    return 0;
+
+  switch (code)
+    {
+    case PLUS:
+      return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
+
+    default:
+      return 0;
+    }
+}
+
 /* Emit a sequence of insns to handle a large constant.
    CODE is the code of the operation required, it can be any of SET, PLUS,
    IOR, AND, XOR, MINUS;
Index: config/arm/arm-protos.h
===================================================================
--- config/arm/arm-protos.h	(revision 189101)
+++ config/arm/arm-protos.h	(working copy)
@@ -50,6 +50,7 @@
 extern bool arm_modes_tieable_p (enum machine_mode, enum machine_mode);
 extern int const_ok_for_arm (HOST_WIDE_INT);
 extern int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
+extern int const_ok_for_dimode_op (HOST_WIDE_INT, enum rtx_code);
 extern int arm_split_constant (RTX_CODE, enum machine_mode, rtx,
 			       HOST_WIDE_INT, rtx, rtx, int);
 extern RTX_CODE arm_canonicalize_comparison (RTX_CODE, rtx *, rtx *);
Index: config/arm/neon.md
===================================================================
--- config/arm/neon.md	(revision 189101)
+++ config/arm/neon.md	(working copy)
@@ -587,9 +587,9 @@
 )

 (define_insn "adddi3_neon"
-  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w")
-        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w")
-                 (match_operand:DI 2 "s_register_operand" "w,r,0,w")))
+  [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
+        (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
+                 (match_operand:DI 2 "arm_adddi_operand"
"w,r,0,w,r,Dd,Dd")))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_NEON"
 {
@@ -599,13 +599,16 @@
     case 3: return "vadd.i64\t%P0, %P1, %P2";
     case 1: return "#";
     case 2: return "#";
+    case 4: return "#";
+    case 5: return "#";
+    case 6: return "#";
     default: gcc_unreachable ();
     }
 }
-  [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
-   (set_attr "conds" "*,clob,clob,*")
-   (set_attr "length" "*,8,8,*")
-   (set_attr "arch" "nota8,*,*,onlya8")]
+  [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1,*,*,*")
+   (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
+   (set_attr "length" "*,8,8,*,8,8,8")
+   (set_attr "arch" "nota8,*,*,onlya8,*,*,*")]
 )

 (define_insn "*sub<mode>3_neon"
Index: config/arm/constraints.md
===================================================================
--- config/arm/constraints.md	(revision 189101)
+++ config/arm/constraints.md	(working copy)
@@ -31,7 +31,7 @@
 ;; 'H' was previously used for FPA.

 ;; The following multi-letter normal constraints have been used:
-;; in ARM/Thumb-2 state: Da, Db, Dc, Dn, Dl, DL, Dv, Dy, Di, Dt, Dz
+;; in ARM/Thumb-2 state: Da, Db, Dc, Dd, Dn, Dl, DL, Dv, Dy, Di, Dt, Dz
 ;; in Thumb-1 state: Pa, Pb, Pc, Pd, Pe
 ;; in Thumb-2 state: Pj, PJ, Ps, Pt, Pu, Pv, Pw, Px, Py

@@ -242,6 +242,12 @@
       (match_test "TARGET_32BIT && arm_const_double_inline_cost (op) == 4
 		   && !(optimize_size || arm_ld_sched)")))

+(define_constraint "Dd"
+ "@internal
+  In ARM/Thumb-2 state a const_int that can be used by insn adddi."
+ (and (match_code "const_int")
+      (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, PLUS)")))
+
 (define_constraint "Di"
  "@internal
   In ARM/Thumb-2 state a const_int or const_double where both the high
Index: config/arm/predicates.md
===================================================================
--- config/arm/predicates.md	(revision 189101)
+++ config/arm/predicates.md	(working copy)
@@ -141,6 +141,11 @@
   (ior (match_operand 0 "arm_rhs_operand")
        (match_operand 0 "arm_neg_immediate_operand")))

+(define_predicate "arm_adddi_operand"
+  (ior (match_operand 0 "s_register_operand")
+       (and (match_code "const_int")
+	    (match_test "const_ok_for_dimode_op (INTVAL (op), PLUS)"))))
+
 (define_predicate "arm_addimm_operand"
   (ior (match_operand 0 "arm_immediate_operand")
        (match_operand 0 "arm_neg_immediate_operand")))
Index: config/arm/arm.md
===================================================================
--- config/arm/arm.md	(revision 189101)
+++ config/arm/arm.md	(working copy)
@@ -604,7 +604,7 @@
  [(parallel
    [(set (match_operand:DI           0 "s_register_operand" "")
 	  (plus:DI (match_operand:DI 1 "s_register_operand" "")
-	           (match_operand:DI 2 "s_register_operand" "")))
+	           (match_operand:DI 2 "arm_adddi_operand"  "")))
     (clobber (reg:CC CC_REGNUM))])]
   "TARGET_EITHER"
   "
@@ -630,9 +630,9 @@
 )

 (define_insn_and_split "*arm_adddi3"
-  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r")
-	(plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0")
-		 (match_operand:DI 2 "s_register_operand" "r,  0")))
+  [(set (match_operand:DI          0 "s_register_operand" "=&r,&r,&r,&r,&r")
+	(plus:DI (match_operand:DI 1 "s_register_operand" "%0, 0, r, 0, r")
+		 (match_operand:DI 2 "arm_adddi_operand"  "r,  0, r, Dd, Dd")))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_32BIT && !TARGET_NEON"
   "#"
@@ -650,7 +650,7 @@
     operands[0] = gen_lowpart (SImode, operands[0]);
     operands[4] = gen_highpart (SImode, operands[1]);
     operands[1] = gen_lowpart (SImode, operands[1]);
-    operands[5] = gen_highpart (SImode, operands[2]);
+    operands[5] = gen_highpart_mode (SImode, DImode, operands[2]);
     operands[2] = gen_lowpart (SImode, operands[2]);
   }"
   [(set_attr "conds" "clob")
@@ -1001,22 +1001,26 @@
 )

 (define_insn "*addsi3_carryin_<optab>"
-  [(set (match_operand:SI 0 "s_register_operand" "=r")
-	(plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%r")
-			  (match_operand:SI 2 "arm_rhs_operand" "rI"))
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+	(plus:SI (plus:SI (match_operand:SI 1 "s_register_operand" "%r,r")
+			  (match_operand:SI 2 "arm_not_operand" "rI,K"))
 		 (LTUGEU:SI (reg:<cnb> CC_REGNUM) (const_int 0))))]
   "TARGET_32BIT"
-  "adc%?\\t%0, %1, %2"
+  "@
+   adc%?\\t%0, %1, %2
+   sbc%?\\t%0, %1, #%B2"
   [(set_attr "conds" "use")]
 )

 (define_insn "*addsi3_carryin_alt2_<optab>"
-  [(set (match_operand:SI 0 "s_register_operand" "=r")
+  [(set (match_operand:SI 0 "s_register_operand" "=r,r")
 	(plus:SI (plus:SI (LTUGEU:SI (reg:<cnb> CC_REGNUM) (const_int 0))
-			  (match_operand:SI 1 "s_register_operand" "%r"))
-		 (match_operand:SI 2 "arm_rhs_operand" "rI")))]
+			  (match_operand:SI 1 "s_register_operand" "%r,r"))
+		 (match_operand:SI 2 "arm_rhs_operand" "rI,K")))]
   "TARGET_32BIT"
-  "adc%?\\t%0, %1, %2"
+  "@
+   adc%?\\t%0, %1, %2
+   sbc%?\\t%0, %1, #%B2"
   [(set_attr "conds" "use")]
 )

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2012-07-01 15:30 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-05-26 13:43 [ARM Patch 1/n] PR53447: optimizations of 64bit ALU operation with constant Carrot Wei
2012-06-04  9:55 ` Carrot Wei
2012-06-06 11:26   ` Carrot Wei
2012-06-08 10:17     ` Carrot Wei
2012-06-27 16:14       ` Ramana Radhakrishnan
2012-06-28  9:37         ` Carrot Wei
2012-06-28 10:02           ` Ramana Radhakrishnan
2012-06-28 11:43             ` Carrot Wei
2012-06-28 12:30               ` Ramana Radhakrishnan
2012-06-29 13:15                 ` Carrot Wei
2012-06-29 14:27                   ` Ramana Radhakrishnan
2012-07-01 15:30                     ` Carrot Wei
2012-06-28 10:28           ` Richard Earnshaw

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).