[PATCH][AArch64] Improve TI mode address offsets

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH][AArch64] Improve TI mode address offsets
@ 2016-11-10 17:16 Wilco Dijkstra
  2016-11-11 10:26 ` Richard Earnshaw
  0 siblings, 1 reply; 6+ messages in thread
From: Wilco Dijkstra @ 2016-11-10 17:16 UTC (permalink / raw)
  To: GCC Patches; +Cc: nd

Improve TI mode address offsets - these may either use LDP of 64-bit or
LDR of 128-bit, so we need to use the correct intersection of offsets.
When splitting a large offset into base and offset, use a signed 9-bit 
unscaled offset.

Remove the Ump constraint on movti and movtf instructions as this blocks
the reload optimizer from merging address CSEs (is this supposed to work
only on 'm' constraints?).  The result is improved codesize, especially
wrf and gamess in SPEC2006.


int f (int x)
{
  __int128_t arr[100];
  arr[31] = 0;
  arr[48] = 0;
  arr[79] = 0;
  arr[65] = 0;
  arr[70] = 0;
  return arr[x];
}

Before patch (note the multiple redundant add x1, sp, 1024):
        sub     sp, sp, #1600
        sbfiz   x0, x0, 4, 32
        add     x1, sp, 256
        stp     xzr, xzr, [x1, 240]
        add     x1, sp, 768
        stp     xzr, xzr, [x1]
        add     x1, sp, 1024
        stp     xzr, xzr, [x1, 240]
        add     x1, sp, 1024
        stp     xzr, xzr, [x1, 16]
        add     x1, sp, 1024
        stp     xzr, xzr, [x1, 96]
        ldr     w0, [sp, x0]
        add     sp, sp, 1600
        ret

After patch:
        sub     sp, sp, #1600
        sbfiz   x0, x0, 4, 32
        add     x1, sp, 1024
        stp     xzr, xzr, [sp, 496]
        stp     xzr, xzr, [x1, -256]
        stp     xzr, xzr, [x1, 240]
        stp     xzr, xzr, [x1, 16]
        stp     xzr, xzr, [x1, 96]
        ldr     w0, [sp, x0]
        add     sp, sp, 1600
        ret


Bootstrap & regress OK.

ChangeLog:
2015-11-10  Wilco Dijkstra  <wdijkstr@arm.com>

    gcc/
	* config/aarch64/aarch64.md (movti_aarch64): Change Ump to m.
	(movtf_aarch64): Likewise.
	* config/aarch64/aarch64.c (aarch64_classify_address):
	Use correct intersection of offsets.
	(aarch64_legitimize_address_displacement): Use 9-bit signed offsets.
	(aarch64_legitimize_address): Use 9-bit signed offsets for TI/TF mode.
	Use 7-bit signed scaled mode for modes > 16 bytes.

--
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 3045e6d6447d5c1860feb51708eeb2a21d2caca9..45f44e96ba9e9d3c8c41d977aa509fa13398a8fd 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -4066,7 +4066,8 @@ aarch64_classify_address (struct aarch64_address_info *info,
 	     instruction memory accesses.  */
 	  if (mode == TImode || mode == TFmode)
 	    return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
-		    && offset_9bit_signed_unscaled_p (mode, offset));
+		    && (offset_9bit_signed_unscaled_p (mode, offset)
+			|| offset_12bit_unsigned_scaled_p (mode, offset)));
 
 	  /* A 7bit offset check because OImode will emit a ldp/stp
 	     instruction (only big endian will get here).
@@ -4270,18 +4271,19 @@ aarch64_legitimate_address_p (machine_mode mode, rtx x,
 /* Split an out-of-range address displacement into a base and offset.
    Use 4KB range for 1- and 2-byte accesses and a 16KB range otherwise
    to increase opportunities for sharing the base address of different sizes.
-   For TI/TFmode and unaligned accesses use a 256-byte range.  */
+   For unaligned accesses and TI/TF mode use the signed 9-bit range.  */
 static bool
 aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode)
 {
-  HOST_WIDE_INT mask = GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3fff;
+  HOST_WIDE_INT offset = INTVAL (*disp);
+  HOST_WIDE_INT base = offset & ~(GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3ffc);
 
-  if (mode == TImode || mode == TFmode ||
-      (INTVAL (*disp) & (GET_MODE_SIZE (mode) - 1)) != 0)
-    mask = 0xff;
+  if (mode == TImode || mode == TFmode
+      || (offset & (GET_MODE_SIZE (mode) - 1)) != 0)
+    base = (offset + 0x100) & ~0x1ff;
 
-  *off = GEN_INT (INTVAL (*disp) & ~mask);
-  *disp = GEN_INT (INTVAL (*disp) & mask);
+  *off = GEN_INT (base);
+  *disp = GEN_INT (offset - base);
   return true;
 }
 
@@ -5148,12 +5150,10 @@ aarch64_legitimize_address (rtx x, rtx /* orig_x  */, machine_mode mode)
 	  x = gen_rtx_PLUS (Pmode, base, offset_rtx);
 	}
 
-      /* Does it look like we'll need a load/store-pair operation?  */
+      /* Does it look like we'll need a 16-byte load/store-pair operation?  */
       HOST_WIDE_INT base_offset;
-      if (GET_MODE_SIZE (mode) > 16
-	  || mode == TImode)
-	base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
-		       & ~((128 * GET_MODE_SIZE (mode)) - 1));
+      if (GET_MODE_SIZE (mode) > 16)
+	base_offset = (offset + 0x400) & ~0x7f0;
       /* For offsets aren't a multiple of the access size, the limit is
 	 -256...255.  */
       else if (offset & (GET_MODE_SIZE (mode) - 1))
@@ -5167,6 +5167,8 @@ aarch64_legitimize_address (rtx x, rtx /* orig_x  */, machine_mode mode)
       /* Small negative offsets are supported.  */
       else if (IN_RANGE (offset, -256, 0))
 	base_offset = 0;
+      else if (mode == TImode || mode == TFmode)
+	base_offset = (offset + 0x100) & ~0x1ff;
       /* Use 12-bit offset by access size.  */
       else
 	base_offset = offset & (~0xfff * GET_MODE_SIZE (mode));
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 24b7288976dd0452f41475e40f02750fc56a2a20..62eda569f9b642ac569a61718d7debf7eae1b59e 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1094,9 +1094,9 @@
 
 (define_insn "*movti_aarch64"
   [(set (match_operand:TI 0
-	 "nonimmediate_operand"  "=r, *w,r ,*w,r  ,Ump,Ump,*w,m")
+	 "nonimmediate_operand"  "=r, *w,r ,*w,r,m,m,*w,m")
 	(match_operand:TI 1
-	 "aarch64_movti_operand" " rn,r ,*w,*w,Ump,r  ,Z  , m,*w"))]
+	 "aarch64_movti_operand" " rn,r ,*w,*w,m,r,Z, m,*w"))]
   "(register_operand (operands[0], TImode)
     || aarch64_reg_or_zero (operands[1], TImode))"
   "@
@@ -1211,9 +1211,9 @@
 
 (define_insn "*movtf_aarch64"
   [(set (match_operand:TF 0
-	 "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r ,Ump

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH][AArch64] Improve TI mode address offsets
  2016-11-10 17:16 [PATCH][AArch64] Improve TI mode address offsets Wilco Dijkstra
@ 2016-11-11 10:26 ` Richard Earnshaw
  2016-11-11 13:14   ` Wilco Dijkstra
  0 siblings, 1 reply; 6+ messages in thread
From: Richard Earnshaw @ 2016-11-11 10:26 UTC (permalink / raw)
  To: Wilco Dijkstra, GCC Patches; +Cc: nd

On 10/11/16 17:16, Wilco Dijkstra wrote:
> Improve TI mode address offsets - these may either use LDP of 64-bit or
> LDR of 128-bit, so we need to use the correct intersection of offsets.
> When splitting a large offset into base and offset, use a signed 9-bit 
> unscaled offset.
> 
> Remove the Ump constraint on movti and movtf instructions as this blocks
> the reload optimizer from merging address CSEs (is this supposed to work
> only on 'm' constraints?).  The result is improved codesize, especially
> wrf and gamess in SPEC2006.
> 
> 
> int f (int x)
> {
>   __int128_t arr[100];
>   arr[31] = 0;
>   arr[48] = 0;
>   arr[79] = 0;
>   arr[65] = 0;
>   arr[70] = 0;
>   return arr[x];
> }
> 
> Before patch (note the multiple redundant add x1, sp, 1024):
>         sub     sp, sp, #1600
>         sbfiz   x0, x0, 4, 32
>         add     x1, sp, 256
>         stp     xzr, xzr, [x1, 240]
>         add     x1, sp, 768
>         stp     xzr, xzr, [x1]
>         add     x1, sp, 1024
>         stp     xzr, xzr, [x1, 240]
>         add     x1, sp, 1024
>         stp     xzr, xzr, [x1, 16]
>         add     x1, sp, 1024
>         stp     xzr, xzr, [x1, 96]
>         ldr     w0, [sp, x0]
>         add     sp, sp, 1600
>         ret
> 
> After patch:
>         sub     sp, sp, #1600
>         sbfiz   x0, x0, 4, 32
>         add     x1, sp, 1024
>         stp     xzr, xzr, [sp, 496]
>         stp     xzr, xzr, [x1, -256]
>         stp     xzr, xzr, [x1, 240]
>         stp     xzr, xzr, [x1, 16]
>         stp     xzr, xzr, [x1, 96]
>         ldr     w0, [sp, x0]
>         add     sp, sp, 1600
>         ret
> 
> 
> Bootstrap & regress OK.
> 
> ChangeLog:
> 2015-11-10  Wilco Dijkstra  <wdijkstr@arm.com>
> 
>     gcc/
> 	* config/aarch64/aarch64.md (movti_aarch64): Change Ump to m.
> 	(movtf_aarch64): Likewise.
> 	* config/aarch64/aarch64.c (aarch64_classify_address):
> 	Use correct intersection of offsets.
> 	(aarch64_legitimize_address_displacement): Use 9-bit signed offsets.
> 	(aarch64_legitimize_address): Use 9-bit signed offsets for TI/TF mode.
> 	Use 7-bit signed scaled mode for modes > 16 bytes.
> 
> --
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 3045e6d6447d5c1860feb51708eeb2a21d2caca9..45f44e96ba9e9d3c8c41d977aa509fa13398a8fd 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -4066,7 +4066,8 @@ aarch64_classify_address (struct aarch64_address_info *info,
>  	     instruction memory accesses.  */
>  	  if (mode == TImode || mode == TFmode)
>  	    return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
> -		    && offset_9bit_signed_unscaled_p (mode, offset));
> +		    && (offset_9bit_signed_unscaled_p (mode, offset)
> +			|| offset_12bit_unsigned_scaled_p (mode, offset)));
>  
>  	  /* A 7bit offset check because OImode will emit a ldp/stp
>  	     instruction (only big endian will get here).
> @@ -4270,18 +4271,19 @@ aarch64_legitimate_address_p (machine_mode mode, rtx x,
>  /* Split an out-of-range address displacement into a base and offset.
>     Use 4KB range for 1- and 2-byte accesses and a 16KB range otherwise
>     to increase opportunities for sharing the base address of different sizes.
> -   For TI/TFmode and unaligned accesses use a 256-byte range.  */
> +   For unaligned accesses and TI/TF mode use the signed 9-bit range.  */
>  static bool
>  aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode)
>  {
> -  HOST_WIDE_INT mask = GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3fff;
> +  HOST_WIDE_INT offset = INTVAL (*disp);
> +  HOST_WIDE_INT base = offset & ~(GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3ffc);
>  
> -  if (mode == TImode || mode == TFmode ||
> -      (INTVAL (*disp) & (GET_MODE_SIZE (mode) - 1)) != 0)
> -    mask = 0xff;
> +  if (mode == TImode || mode == TFmode
> +      || (offset & (GET_MODE_SIZE (mode) - 1)) != 0)
> +    base = (offset + 0x100) & ~0x1ff;
>  
> -  *off = GEN_INT (INTVAL (*disp) & ~mask);
> -  *disp = GEN_INT (INTVAL (*disp) & mask);
> +  *off = GEN_INT (base);
> +  *disp = GEN_INT (offset - base);
>    return true;
>  }
>  
> @@ -5148,12 +5150,10 @@ aarch64_legitimize_address (rtx x, rtx /* orig_x  */, machine_mode mode)
>  	  x = gen_rtx_PLUS (Pmode, base, offset_rtx);
>  	}
>  
> -      /* Does it look like we'll need a load/store-pair operation?  */
> +      /* Does it look like we'll need a 16-byte load/store-pair operation?  */
>        HOST_WIDE_INT base_offset;
> -      if (GET_MODE_SIZE (mode) > 16
> -	  || mode == TImode)
> -	base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
> -		       & ~((128 * GET_MODE_SIZE (mode)) - 1));
> +      if (GET_MODE_SIZE (mode) > 16)
> +	base_offset = (offset + 0x400) & ~0x7f0;
>        /* For offsets aren't a multiple of the access size, the limit is
>  	 -256...255.  */
>        else if (offset & (GET_MODE_SIZE (mode) - 1))
> @@ -5167,6 +5167,8 @@ aarch64_legitimize_address (rtx x, rtx /* orig_x  */, machine_mode mode)
>        /* Small negative offsets are supported.  */
>        else if (IN_RANGE (offset, -256, 0))
>  	base_offset = 0;
> +      else if (mode == TImode || mode == TFmode)
> +	base_offset = (offset + 0x100) & ~0x1ff;
>        /* Use 12-bit offset by access size.  */
>        else
>  	base_offset = offset & (~0xfff * GET_MODE_SIZE (mode));
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 24b7288976dd0452f41475e40f02750fc56a2a20..62eda569f9b642ac569a61718d7debf7eae1b59e 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -1094,9 +1094,9 @@
>  
>  (define_insn "*movti_aarch64"
>    [(set (match_operand:TI 0
> -	 "nonimmediate_operand"  "=r, *w,r ,*w,r  ,Ump,Ump,*w,m")
> +	 "nonimmediate_operand"  "=r, *w,r ,*w,r,m,m,*w,m")
>  	(match_operand:TI 1
> -	 "aarch64_movti_operand" " rn,r ,*w,*w,Ump,r  ,Z  , m,*w"))]
> +	 "aarch64_movti_operand" " rn,r ,*w,*w,m,r,Z, m,*w"))]
>    "(register_operand (operands[0], TImode)
>      || aarch64_reg_or_zero (operands[1], TImode))"
>    "@
> @@ -1211,9 +1211,9 @@
>  
>  (define_insn "*movtf_aarch64"
>    [(set (match_operand:TF 0
> -	 "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r ,Ump
> 


Has this patch been truncated?  The last line above looks to be part-way
through a hunk.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH][AArch64] Improve TI mode address offsets
  2016-11-11 10:26 ` Richard Earnshaw
@ 2016-11-11 13:14   ` Wilco Dijkstra
  2016-12-06 15:26     ` Wilco Dijkstra
  2016-12-06 17:00     ` James Greenhalgh
  0 siblings, 2 replies; 6+ messages in thread
From: Wilco Dijkstra @ 2016-11-11 13:14 UTC (permalink / raw)
  To: Richard Earnshaw, GCC Patches; +Cc: nd

Richard Earnshaw wrote:

> Has this patch been truncated?  The last line above looks to be part-way
> through a hunk.

Oops sorry, it seems the last few lines are missing. Here is the full version:

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 3045e6d6447d5c1860feb51708eeb2a21d2caca9..45f44e96ba9e9d3c8c41d977aa509fa13398a8fd 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -4066,7 +4066,8 @@ aarch64_classify_address (struct aarch64_address_info *info,
 	     instruction memory accesses.  */
 	  if (mode == TImode || mode == TFmode)
 	    return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
-		    && offset_9bit_signed_unscaled_p (mode, offset));
+		    && (offset_9bit_signed_unscaled_p (mode, offset)
+			|| offset_12bit_unsigned_scaled_p (mode, offset)));
 
 	  /* A 7bit offset check because OImode will emit a ldp/stp
 	     instruction (only big endian will get here).
@@ -4270,18 +4271,19 @@ aarch64_legitimate_address_p (machine_mode mode, rtx x,
 /* Split an out-of-range address displacement into a base and offset.
    Use 4KB range for 1- and 2-byte accesses and a 16KB range otherwise
    to increase opportunities for sharing the base address of different sizes.
-   For TI/TFmode and unaligned accesses use a 256-byte range.  */
+   For unaligned accesses and TI/TF mode use the signed 9-bit range.  */
 static bool
 aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode)
 {
-  HOST_WIDE_INT mask = GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3fff;
+  HOST_WIDE_INT offset = INTVAL (*disp);
+  HOST_WIDE_INT base = offset & ~(GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3ffc);
 
-  if (mode == TImode || mode == TFmode ||
-      (INTVAL (*disp) & (GET_MODE_SIZE (mode) - 1)) != 0)
-    mask = 0xff;
+  if (mode == TImode || mode == TFmode
+      || (offset & (GET_MODE_SIZE (mode) - 1)) != 0)
+    base = (offset + 0x100) & ~0x1ff;
 
-  *off = GEN_INT (INTVAL (*disp) & ~mask);
-  *disp = GEN_INT (INTVAL (*disp) & mask);
+  *off = GEN_INT (base);
+  *disp = GEN_INT (offset - base);
   return true;
 }
 
@@ -5148,12 +5150,10 @@ aarch64_legitimize_address (rtx x, rtx /* orig_x  */, machine_mode mode)
 	  x = gen_rtx_PLUS (Pmode, base, offset_rtx);
 	}
 
-      /* Does it look like we'll need a load/store-pair operation?  */
+      /* Does it look like we'll need a 16-byte load/store-pair operation?  */
       HOST_WIDE_INT base_offset;
-      if (GET_MODE_SIZE (mode) > 16
-	  || mode == TImode)
-	base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
-		       & ~((128 * GET_MODE_SIZE (mode)) - 1));
+      if (GET_MODE_SIZE (mode) > 16)
+	base_offset = (offset + 0x400) & ~0x7f0;
       /* For offsets aren't a multiple of the access size, the limit is
 	 -256...255.  */
       else if (offset & (GET_MODE_SIZE (mode) - 1))
@@ -5167,6 +5167,8 @@ aarch64_legitimize_address (rtx x, rtx /* orig_x  */, machine_mode mode)
       /* Small negative offsets are supported.  */
       else if (IN_RANGE (offset, -256, 0))
 	base_offset = 0;
+      else if (mode == TImode || mode == TFmode)
+	base_offset = (offset + 0x100) & ~0x1ff;
       /* Use 12-bit offset by access size.  */
       else
 	base_offset = offset & (~0xfff * GET_MODE_SIZE (mode));
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 24b7288976dd0452f41475e40f02750fc56a2a20..62eda569f9b642ac569a61718d7debf7eae1b59e 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1094,9 +1094,9 @@
 
 (define_insn "*movti_aarch64"
   [(set (match_operand:TI 0
-	 "nonimmediate_operand"  "=r, *w,r ,*w,r  ,Ump,Ump,*w,m")
+	 "nonimmediate_operand"  "=r, *w,r ,*w,r,m,m,*w,m")
 	(match_operand:TI 1
-	 "aarch64_movti_operand" " rn,r ,*w,*w,Ump,r  ,Z  , m,*w"))]
+	 "aarch64_movti_operand" " rn,r ,*w,*w,m,r,Z, m,*w"))]
   "(register_operand (operands[0], TImode)
     || aarch64_reg_or_zero (operands[1], TImode))"
   "@
@@ -1211,9 +1211,9 @@
 
 (define_insn "*movtf_aarch64"
   [(set (match_operand:TF 0
-	 "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r ,Ump,Ump")
+	 "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r,m ,m")
 	(match_operand:TF 1
-	 "general_operand"      " w,?r, ?r,w ,Y,Y ,m,w,Ump,?r ,Y"))]
+	 "general_operand"      " w,?r, ?r,w ,Y,Y ,m,w,m ,?r,Y"))]
   "TARGET_FLOAT && (register_operand (operands[0], TFmode)
     || aarch64_reg_or_fp_zero (operands[1], TFmode))"
   "@
 

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH][AArch64] Improve TI mode address offsets
  2016-11-11 13:14   ` Wilco Dijkstra
@ 2016-12-06 15:26     ` Wilco Dijkstra
  2016-12-06 17:00     ` James Greenhalgh
  1 sibling, 0 replies; 6+ messages in thread
From: Wilco Dijkstra @ 2016-12-06 15:26 UTC (permalink / raw)
  To: Richard Earnshaw, GCC Patches, James Greenhalgh; +Cc: nd


ping


From: Wilco Dijkstra
Sent: 11 November 2016 13:14
To: Richard Earnshaw; GCC Patches
Cc: nd
Subject: Re: [PATCH][AArch64] Improve TI mode address offsets
    
Richard Earnshaw wrote:

> Has this patch been truncated?  The last line above looks to be part-way
> through a hunk.

Oops sorry, it seems the last few lines are missing. Here is the full version:

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 3045e6d6447d5c1860feb51708eeb2a21d2caca9..45f44e96ba9e9d3c8c41d977aa509fa13398a8fd 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -4066,7 +4066,8 @@ aarch64_classify_address (struct aarch64_address_info *info,
              instruction memory accesses.  */
           if (mode == TImode || mode == TFmode)
             return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
-                   && offset_9bit_signed_unscaled_p (mode, offset));
+                   && (offset_9bit_signed_unscaled_p (mode, offset)
+                       || offset_12bit_unsigned_scaled_p (mode, offset)));
 
           /* A 7bit offset check because OImode will emit a ldp/stp
              instruction (only big endian will get here).
@@ -4270,18 +4271,19 @@ aarch64_legitimate_address_p (machine_mode mode, rtx x,
 /* Split an out-of-range address displacement into a base and offset.
    Use 4KB range for 1- and 2-byte accesses and a 16KB range otherwise
    to increase opportunities for sharing the base address of different sizes.
-   For TI/TFmode and unaligned accesses use a 256-byte range.  */
+   For unaligned accesses and TI/TF mode use the signed 9-bit range.  */
 static bool
 aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode)
 {
-  HOST_WIDE_INT mask = GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3fff;
+  HOST_WIDE_INT offset = INTVAL (*disp);
+  HOST_WIDE_INT base = offset & ~(GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3ffc);
 
-  if (mode == TImode || mode == TFmode ||
-      (INTVAL (*disp) & (GET_MODE_SIZE (mode) - 1)) != 0)
-    mask = 0xff;
+  if (mode == TImode || mode == TFmode
+      || (offset & (GET_MODE_SIZE (mode) - 1)) != 0)
+    base = (offset + 0x100) & ~0x1ff;
 
-  *off = GEN_INT (INTVAL (*disp) & ~mask);
-  *disp = GEN_INT (INTVAL (*disp) & mask);
+  *off = GEN_INT (base);
+  *disp = GEN_INT (offset - base);
   return true;
 }
 
@@ -5148,12 +5150,10 @@ aarch64_legitimize_address (rtx x, rtx /* orig_x  */, machine_mode mode)
           x = gen_rtx_PLUS (Pmode, base, offset_rtx);
         }
 
-      /* Does it look like we'll need a load/store-pair operation?  */
+      /* Does it look like we'll need a 16-byte load/store-pair operation?  */
       HOST_WIDE_INT base_offset;
-      if (GET_MODE_SIZE (mode) > 16
-         || mode == TImode)
-       base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
-                      & ~((128 * GET_MODE_SIZE (mode)) - 1));
+      if (GET_MODE_SIZE (mode) > 16)
+       base_offset = (offset + 0x400) & ~0x7f0;
       /* For offsets aren't a multiple of the access size, the limit is
          -256...255.  */
       else if (offset & (GET_MODE_SIZE (mode) - 1))
@@ -5167,6 +5167,8 @@ aarch64_legitimize_address (rtx x, rtx /* orig_x  */, machine_mode mode)
       /* Small negative offsets are supported.  */
       else if (IN_RANGE (offset, -256, 0))
         base_offset = 0;
+      else if (mode == TImode || mode == TFmode)
+       base_offset = (offset + 0x100) & ~0x1ff;
       /* Use 12-bit offset by access size.  */
       else
         base_offset = offset & (~0xfff * GET_MODE_SIZE (mode));
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 24b7288976dd0452f41475e40f02750fc56a2a20..62eda569f9b642ac569a61718d7debf7eae1b59e 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1094,9 +1094,9 @@
 
 (define_insn "*movti_aarch64"
   [(set (match_operand:TI 0
-        "nonimmediate_operand"  "=r, *w,r ,*w,r  ,Ump,Ump,*w,m")
+        "nonimmediate_operand"  "=r, *w,r ,*w,r,m,m,*w,m")
         (match_operand:TI 1
-        "aarch64_movti_operand" " rn,r ,*w,*w,Ump,r  ,Z  , m,*w"))]
+        "aarch64_movti_operand" " rn,r ,*w,*w,m,r,Z, m,*w"))]
   "(register_operand (operands[0], TImode)
     || aarch64_reg_or_zero (operands[1], TImode))"
   "@
@@ -1211,9 +1211,9 @@
 
 (define_insn "*movtf_aarch64"
   [(set (match_operand:TF 0
-        "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r ,Ump,Ump")
+        "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r,m ,m")
         (match_operand:TF 1
-        "general_operand"      " w,?r, ?r,w ,Y,Y ,m,w,Ump,?r ,Y"))]
+        "general_operand"      " w,?r, ?r,w ,Y,Y ,m,w,m ,?r,Y"))]
   "TARGET_FLOAT && (register_operand (operands[0], TFmode)
     || aarch64_reg_or_fp_zero (operands[1], TFmode))"
   "@
     

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH][AArch64] Improve TI mode address offsets
  2016-11-11 13:14   ` Wilco Dijkstra
  2016-12-06 15:26     ` Wilco Dijkstra
@ 2016-12-06 17:00     ` James Greenhalgh
  2016-12-08 10:03       ` James Greenhalgh
  1 sibling, 1 reply; 6+ messages in thread
From: James Greenhalgh @ 2016-12-06 17:00 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: Richard Earnshaw, GCC Patches, nd

On Fri, Nov 11, 2016 at 01:14:15PM +0000, Wilco Dijkstra wrote:
> Richard Earnshaw wrote:
> 
> > Has this patch been truncated?Â  The last line above looks to be part-way
> > through a hunk.
> 
> Oops sorry, it seems the last few lines are missing. Here is the full version:

OK.

Thanks,
James

> 
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 3045e6d6447d5c1860feb51708eeb2a21d2caca9..45f44e96ba9e9d3c8c41d977aa509fa13398a8fd 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -4066,7 +4066,8 @@ aarch64_classify_address (struct aarch64_address_info *info,
>  	     instruction memory accesses.  */
>  	  if (mode == TImode || mode == TFmode)
>  	    return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
> -		    && offset_9bit_signed_unscaled_p (mode, offset));
> +		    && (offset_9bit_signed_unscaled_p (mode, offset)
> +			|| offset_12bit_unsigned_scaled_p (mode, offset)));
>  
>  	  /* A 7bit offset check because OImode will emit a ldp/stp
>  	     instruction (only big endian will get here).
> @@ -4270,18 +4271,19 @@ aarch64_legitimate_address_p (machine_mode mode, rtx x,
>  /* Split an out-of-range address displacement into a base and offset.
>     Use 4KB range for 1- and 2-byte accesses and a 16KB range otherwise
>     to increase opportunities for sharing the base address of different sizes.
> -   For TI/TFmode and unaligned accesses use a 256-byte range.  */
> +   For unaligned accesses and TI/TF mode use the signed 9-bit range.  */
>  static bool
>  aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode)
>  {
> -  HOST_WIDE_INT mask = GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3fff;
> +  HOST_WIDE_INT offset = INTVAL (*disp);
> +  HOST_WIDE_INT base = offset & ~(GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3ffc);
>  
> -  if (mode == TImode || mode == TFmode ||
> -      (INTVAL (*disp) & (GET_MODE_SIZE (mode) - 1)) != 0)
> -    mask = 0xff;
> +  if (mode == TImode || mode == TFmode
> +      || (offset & (GET_MODE_SIZE (mode) - 1)) != 0)
> +    base = (offset + 0x100) & ~0x1ff;
>  
> -  *off = GEN_INT (INTVAL (*disp) & ~mask);
> -  *disp = GEN_INT (INTVAL (*disp) & mask);
> +  *off = GEN_INT (base);
> +  *disp = GEN_INT (offset - base);
>    return true;
>  }
>  
> @@ -5148,12 +5150,10 @@ aarch64_legitimize_address (rtx x, rtx /* orig_x  */, machine_mode mode)
>  	  x = gen_rtx_PLUS (Pmode, base, offset_rtx);
>  	}
>  
> -      /* Does it look like we'll need a load/store-pair operation?  */
> +      /* Does it look like we'll need a 16-byte load/store-pair operation?  */
>        HOST_WIDE_INT base_offset;
> -      if (GET_MODE_SIZE (mode) > 16
> -	  || mode == TImode)
> -	base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
> -		       & ~((128 * GET_MODE_SIZE (mode)) - 1));
> +      if (GET_MODE_SIZE (mode) > 16)
> +	base_offset = (offset + 0x400) & ~0x7f0;
>        /* For offsets aren't a multiple of the access size, the limit is
>  	 -256...255.  */
>        else if (offset & (GET_MODE_SIZE (mode) - 1))
> @@ -5167,6 +5167,8 @@ aarch64_legitimize_address (rtx x, rtx /* orig_x  */, machine_mode mode)
>        /* Small negative offsets are supported.  */
>        else if (IN_RANGE (offset, -256, 0))
>  	base_offset = 0;
> +      else if (mode == TImode || mode == TFmode)
> +	base_offset = (offset + 0x100) & ~0x1ff;
>        /* Use 12-bit offset by access size.  */
>        else
>  	base_offset = offset & (~0xfff * GET_MODE_SIZE (mode));
> diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> index 24b7288976dd0452f41475e40f02750fc56a2a20..62eda569f9b642ac569a61718d7debf7eae1b59e 100644
> --- a/gcc/config/aarch64/aarch64.md
> +++ b/gcc/config/aarch64/aarch64.md
> @@ -1094,9 +1094,9 @@
>  
>  (define_insn "*movti_aarch64"
>    [(set (match_operand:TI 0
> -	 "nonimmediate_operand"  "=r, *w,r ,*w,r  ,Ump,Ump,*w,m")
> +	 "nonimmediate_operand"  "=r, *w,r ,*w,r,m,m,*w,m")
>  	(match_operand:TI 1
> -	 "aarch64_movti_operand" " rn,r ,*w,*w,Ump,r  ,Z  , m,*w"))]
> +	 "aarch64_movti_operand" " rn,r ,*w,*w,m,r,Z, m,*w"))]
>    "(register_operand (operands[0], TImode)
>      || aarch64_reg_or_zero (operands[1], TImode))"
>    "@
> @@ -1211,9 +1211,9 @@
>  
>  (define_insn "*movtf_aarch64"
>    [(set (match_operand:TF 0
> -	 "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r ,Ump,Ump")
> +	 "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r,m ,m")
>  	(match_operand:TF 1
> -	 "general_operand"      " w,?r, ?r,w ,Y,Y ,m,w,Ump,?r ,Y"))]
> +	 "general_operand"      " w,?r, ?r,w ,Y,Y ,m,w,m ,?r,Y"))]
>    "TARGET_FLOAT && (register_operand (operands[0], TFmode)
>      || aarch64_reg_or_fp_zero (operands[1], TFmode))"
>    "@
>  

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH][AArch64] Improve TI mode address offsets
  2016-12-06 17:00     ` James Greenhalgh
@ 2016-12-08 10:03       ` James Greenhalgh
  0 siblings, 0 replies; 6+ messages in thread
From: James Greenhalgh @ 2016-12-08 10:03 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: Richard Earnshaw, GCC Patches, nd, doko

On Tue, Dec 06, 2016 at 05:00:25PM +0000, James Greenhalgh wrote:
> On Fri, Nov 11, 2016 at 01:14:15PM +0000, Wilco Dijkstra wrote:
> > Richard Earnshaw wrote:
> > 
> > > Has this patch been truncated?Â  The last line above looks to be part-way
> > > through a hunk.
> > 
> > Oops sorry, it seems the last few lines are missing. Here is the full version:
> 
> OK.

This patch has caused around 250 new failures when using the tiny memory
model or when using -mfix-cortex-a53-843419 (causing a bootstrap failure
with --enable-fix-cortex-a53-843419 )

See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=78733 for more details.

Thanks,
James

> > 
> > diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> > index 3045e6d6447d5c1860feb51708eeb2a21d2caca9..45f44e96ba9e9d3c8c41d977aa509fa13398a8fd 100644
> > --- a/gcc/config/aarch64/aarch64.c
> > +++ b/gcc/config/aarch64/aarch64.c
> > @@ -4066,7 +4066,8 @@ aarch64_classify_address (struct aarch64_address_info *info,
> >  	     instruction memory accesses.  */
> >  	  if (mode == TImode || mode == TFmode)
> >  	    return (aarch64_offset_7bit_signed_scaled_p (DImode, offset)
> > -		    && offset_9bit_signed_unscaled_p (mode, offset));
> > +		    && (offset_9bit_signed_unscaled_p (mode, offset)
> > +			|| offset_12bit_unsigned_scaled_p (mode, offset)));
> >  
> >  	  /* A 7bit offset check because OImode will emit a ldp/stp
> >  	     instruction (only big endian will get here).
> > @@ -4270,18 +4271,19 @@ aarch64_legitimate_address_p (machine_mode mode, rtx x,
> >  /* Split an out-of-range address displacement into a base and offset.
> >     Use 4KB range for 1- and 2-byte accesses and a 16KB range otherwise
> >     to increase opportunities for sharing the base address of different sizes.
> > -   For TI/TFmode and unaligned accesses use a 256-byte range.  */
> > +   For unaligned accesses and TI/TF mode use the signed 9-bit range.  */
> >  static bool
> >  aarch64_legitimize_address_displacement (rtx *disp, rtx *off, machine_mode mode)
> >  {
> > -  HOST_WIDE_INT mask = GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3fff;
> > +  HOST_WIDE_INT offset = INTVAL (*disp);
> > +  HOST_WIDE_INT base = offset & ~(GET_MODE_SIZE (mode) < 4 ? 0xfff : 0x3ffc);
> >  
> > -  if (mode == TImode || mode == TFmode ||
> > -      (INTVAL (*disp) & (GET_MODE_SIZE (mode) - 1)) != 0)
> > -    mask = 0xff;
> > +  if (mode == TImode || mode == TFmode
> > +      || (offset & (GET_MODE_SIZE (mode) - 1)) != 0)
> > +    base = (offset + 0x100) & ~0x1ff;
> >  
> > -  *off = GEN_INT (INTVAL (*disp) & ~mask);
> > -  *disp = GEN_INT (INTVAL (*disp) & mask);
> > +  *off = GEN_INT (base);
> > +  *disp = GEN_INT (offset - base);
> >    return true;
> >  }
> >  
> > @@ -5148,12 +5150,10 @@ aarch64_legitimize_address (rtx x, rtx /* orig_x  */, machine_mode mode)
> >  	  x = gen_rtx_PLUS (Pmode, base, offset_rtx);
> >  	}
> >  
> > -      /* Does it look like we'll need a load/store-pair operation?  */
> > +      /* Does it look like we'll need a 16-byte load/store-pair operation?  */
> >        HOST_WIDE_INT base_offset;
> > -      if (GET_MODE_SIZE (mode) > 16
> > -	  || mode == TImode)
> > -	base_offset = ((offset + 64 * GET_MODE_SIZE (mode))
> > -		       & ~((128 * GET_MODE_SIZE (mode)) - 1));
> > +      if (GET_MODE_SIZE (mode) > 16)
> > +	base_offset = (offset + 0x400) & ~0x7f0;
> >        /* For offsets aren't a multiple of the access size, the limit is
> >  	 -256...255.  */
> >        else if (offset & (GET_MODE_SIZE (mode) - 1))
> > @@ -5167,6 +5167,8 @@ aarch64_legitimize_address (rtx x, rtx /* orig_x  */, machine_mode mode)
> >        /* Small negative offsets are supported.  */
> >        else if (IN_RANGE (offset, -256, 0))
> >  	base_offset = 0;
> > +      else if (mode == TImode || mode == TFmode)
> > +	base_offset = (offset + 0x100) & ~0x1ff;
> >        /* Use 12-bit offset by access size.  */
> >        else
> >  	base_offset = offset & (~0xfff * GET_MODE_SIZE (mode));
> > diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
> > index 24b7288976dd0452f41475e40f02750fc56a2a20..62eda569f9b642ac569a61718d7debf7eae1b59e 100644
> > --- a/gcc/config/aarch64/aarch64.md
> > +++ b/gcc/config/aarch64/aarch64.md
> > @@ -1094,9 +1094,9 @@
> >  
> >  (define_insn "*movti_aarch64"
> >    [(set (match_operand:TI 0
> > -	 "nonimmediate_operand"  "=r, *w,r ,*w,r  ,Ump,Ump,*w,m")
> > +	 "nonimmediate_operand"  "=r, *w,r ,*w,r,m,m,*w,m")
> >  	(match_operand:TI 1
> > -	 "aarch64_movti_operand" " rn,r ,*w,*w,Ump,r  ,Z  , m,*w"))]
> > +	 "aarch64_movti_operand" " rn,r ,*w,*w,m,r,Z, m,*w"))]
> >    "(register_operand (operands[0], TImode)
> >      || aarch64_reg_or_zero (operands[1], TImode))"
> >    "@
> > @@ -1211,9 +1211,9 @@
> >  
> >  (define_insn "*movtf_aarch64"
> >    [(set (match_operand:TF 0
> > -	 "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r ,Ump,Ump")
> > +	 "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r,m ,m")
> >  	(match_operand:TF 1
> > -	 "general_operand"      " w,?r, ?r,w ,Y,Y ,m,w,Ump,?r ,Y"))]
> > +	 "general_operand"      " w,?r, ?r,w ,Y,Y ,m,w,m ,?r,Y"))]
> >    "TARGET_FLOAT && (register_operand (operands[0], TFmode)
> >      || aarch64_reg_or_fp_zero (operands[1], TFmode))"
> >    "@
> >  

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2016-12-08 10:03 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-11-10 17:16 [PATCH][AArch64] Improve TI mode address offsets Wilco Dijkstra
2016-11-11 10:26 ` Richard Earnshaw
2016-11-11 13:14   ` Wilco Dijkstra
2016-12-06 15:26     ` Wilco Dijkstra
2016-12-06 17:00     ` James Greenhalgh
2016-12-08 10:03       ` James Greenhalgh

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).