public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH][Aarch64] v2: Arithmetic overflow common functions [Patch 1/4]
@ 2018-06-06 17:14 Michael Collison
  2018-06-08  0:19 ` James Greenhalgh
  0 siblings, 1 reply; 4+ messages in thread
From: Michael Collison @ 2018-06-06 17:14 UTC (permalink / raw)
  To: GCC Patches; +Cc: nd

[-- Attachment #1: Type: text/plain, Size: 1378 bytes --]

This is a respin of a AArch64 patch that adds support for builtin arithmetic overflow operations. This update separates the patch into multiple pieces and addresses comments made by Richard Earnshaw here:

https://gcc.gnu.org/ml/gcc-patches/2017-07/msg00249.html

Original patch and motivation for patch here:

https://gcc.gnu.org/ml/gcc-patches/2017-05/msg01512.html

This patch primarily contains common functions in aarch64.c for generating TImode scratch registers,
and common rtl functions utilized by the overflow patterns in aarch64.md. In addition a new mode representing overflow CC_Vmode is introduced.

Bootstrapped and tested on aarch64-linux-gnu. Okay for trunk?

2018-05-31  Michael Collison  <michael.collison@arm.com>
        Richard Henderson <rth@redhat.com>

* config/aarch64/aarch64-modes.def (CC_V): New.
* config/aarch64/aarch64-protos.h
(aarch64_add_128bit_scratch_regs): Declare
(aarch64_subv_128bit_scratch_regs): Declare.
(aarch64_expand_subvti): Declare.
(aarch64_gen_unlikely_cbranch): Declare
* config/aarch64/aarch64.c (aarch64_select_cc_mode): Test
for signed overflow using CC_Vmode.
(aarch64_get_condition_code_1): Handle CC_Vmode.
(aarch64_gen_unlikely_cbranch): New function.
(aarch64_add_128bit_scratch_regs): New function.
(aarch64_subv_128bit_scratch_regs): New function.
(aarch64_expand_subvti): New function.

[-- Attachment #2: gnutools-6308-pt1.patch --]
[-- Type: application/octet-stream, Size: 8062 bytes --]

From 81802f087d0a15d8f1081917ebe75aba70897ec8 Mon Sep 17 00:00:00 2001
From: Michael Collison <michael.collison@arm.com>
Date: Fri, 1 Jun 2018 00:11:14 -0700
Subject: [PATCH] [PATCH 1/4] Gnutools 6308 common files

---
 gcc/config/aarch64/aarch64-modes.def |   1 +
 gcc/config/aarch64/aarch64-protos.h  |  13 +++-
 gcc/config/aarch64/aarch64.c         | 141 +++++++++++++++++++++++++++++++++++
 3 files changed, 154 insertions(+), 1 deletion(-)

diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def
index 1a05b6c..ea7ecc2 100644
--- a/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc/config/aarch64/aarch64-modes.def
@@ -24,6 +24,7 @@ CC_MODE (CC_SWP);
 CC_MODE (CC_NZ);    /* Only N and Z bits of condition flags are valid.  */
 CC_MODE (CC_Z);     /* Only Z bit of condition flags is valid.  */
 CC_MODE (CC_C);     /* Only C bit of condition flags is valid.  */
+CC_MODE (CC_V);     /* Only V bit of condition flags is valid.  */
 
 /* Half-precision floating point for __fp16.  */
 FLOAT_MODE (HF, 2, 0);
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 4ea50ac..090ed2c 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -467,6 +467,16 @@ void aarch64_relayout_simd_types (void);
 void aarch64_reset_previous_fndecl (void);
 bool aarch64_return_address_signing_enabled (void);
 void aarch64_save_restore_target_globals (tree);
+void aarch64_add_128bit_scratch_regs (rtx, rtx, rtx *,
+				      rtx *, rtx *,
+				      rtx *, rtx *,
+				      rtx *);
+void aarch64_subv_128bit_scratch_regs (rtx, rtx, rtx *,
+				       rtx *, rtx *,
+				       rtx *, rtx *, rtx *);
+void aarch64_expand_subvti (rtx, rtx, rtx,
+			    rtx, rtx, rtx, rtx);
+
 
 /* Initialize builtins for SIMD intrinsics.  */
 void init_aarch64_simd_builtins (void);
@@ -493,7 +503,8 @@ void aarch64_split_simd_move (rtx, rtx);
 bool aarch64_float_const_representable_p (rtx);
 
 #if defined (RTX_CODE)
-
+void aarch64_gen_unlikely_cbranch (enum rtx_code, machine_mode cc_mode,
+				   rtx label_ref);
 bool aarch64_legitimate_address_p (machine_mode, rtx, bool,
 				   aarch64_addr_query_type = ADDR_QUERY_M);
 machine_mode aarch64_select_cc_mode (RTX_CODE, rtx, rtx);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index c94f709..af8b6cc 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -6377,6 +6377,13 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
       && GET_CODE (y) == ZERO_EXTEND)
     return CC_Cmode;
 
+  /* A test for signed overflow.  */
+  if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode)
+      && code == NE
+      && GET_CODE (x) == PLUS
+      && GET_CODE (y) == SIGN_EXTEND)
+    return CC_Vmode;
+
   /* For everything else, return CCmode.  */
   return CCmode;
 }
@@ -6483,6 +6490,15 @@ aarch64_get_condition_code_1 (machine_mode mode, enum rtx_code comp_code)
 	}
       break;
 
+    case E_CC_Vmode:
+      switch (comp_code)
+	{
+	case NE: return AARCH64_VS;
+	case EQ: return AARCH64_VC;
+	default: return -1;
+	}
+      break;
+
     default:
       return -1;
     }
@@ -16337,6 +16353,131 @@ aarch64_split_dimode_const_store (rtx dst, rtx src)
   return true;
 }
 
+/* Generate RTL for a conditional branch with rtx comparison CODE in
+   mode CC_MODE.  The destination of the unlikely conditional branch
+   is LABEL_REF.  */
+
+void
+aarch64_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
+			      rtx label_ref)
+{
+  rtx x;
+  x = gen_rtx_fmt_ee (code, VOIDmode,
+		      gen_rtx_REG (cc_mode, CC_REGNUM),
+		      const0_rtx);
+
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (VOIDmode, label_ref),
+			    pc_rtx);
+  aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
+}
+
+/* Generate DImode scratch registers for 128-bit (TImode) addition.
+
+   OP1 represents the TImode destination operand 1
+   OP2 represents the TImode destination operand 2
+   LOW_DEST represents the low half (DImode) of TImode operand 0
+   LOW_IN1 represents the low half (DImode) of TImode operand 1
+   LOW_IN2 represents the low half (DImode) of TImode operand 2
+   HIGH_DEST represents the high half (DImode) of TImode operand 0
+   HIGH_IN1 represents the high half (DImode) of TImode operand 1
+   HIGH_IN2 represents the high half (DImode) of TImode operand 2.  */
+
+void
+aarch64_add_128bit_scratch_regs (rtx op1, rtx op2, rtx *low_dest,
+				 rtx *low_in1, rtx *low_in2,
+				 rtx *high_dest, rtx *high_in1,
+				 rtx *high_in2)
+{
+  *low_dest = gen_reg_rtx (DImode);
+  *low_in1 = gen_lowpart (DImode, op1);
+  *low_in2 = simplify_gen_subreg (DImode, op2, TImode,
+				  subreg_lowpart_offset (DImode, TImode));
+  *high_dest = gen_reg_rtx (DImode);
+  *high_in1 = gen_highpart (DImode, op1);
+  *high_in2 = simplify_gen_subreg (DImode, op2, TImode,
+				   subreg_highpart_offset (DImode, TImode));
+}
+
+/* Generate DImode scratch registers for 128-bit (TImode) subtraction.
+
+   This function differs from 'arch64_add_128bit_scratch_regs' in that
+   OP1 can be an immediate constant (zero). We must call
+   subreg_highpart_offset with DImode and TImode arguments, otherwise
+   VOIDmode will be used for the const_int which generates an internal
+   error from subreg_size_highpart_offset which does not expect a size of zero.
+
+   OP1 represents the TImode destination operand 1
+   OP2 represents the TImode destination operand 2
+   LOW_DEST represents the low half (DImode) of TImode operand 0
+   LOW_IN1 represents the low half (DImode) of TImode operand 1
+   LOW_IN2 represents the low half (DImode) of TImode operand 2
+   HIGH_DEST represents the high half (DImode) of TImode operand 0
+   HIGH_IN1 represents the high half (DImode) of TImode operand 1
+   HIGH_IN2 represents the high half (DImode) of TImode operand 2.  */
+
+
+void
+aarch64_subv_128bit_scratch_regs (rtx op1, rtx op2, rtx *low_dest,
+				  rtx *low_in1, rtx *low_in2,
+				  rtx *high_dest, rtx *high_in1,
+				  rtx *high_in2)
+{
+  *low_dest = gen_reg_rtx (DImode);
+  *low_in1 = simplify_gen_subreg (DImode, op1, TImode,
+				  subreg_lowpart_offset (DImode, TImode));
+
+  *low_in2 = simplify_gen_subreg (DImode, op2, TImode,
+				  subreg_lowpart_offset (DImode, TImode));
+  *high_dest = gen_reg_rtx (DImode);
+
+  *high_in1 = simplify_gen_subreg (DImode, op1, TImode,
+				   subreg_highpart_offset (DImode, TImode));
+  *high_in2 = simplify_gen_subreg (DImode, op2, TImode,
+				   subreg_highpart_offset (DImode, TImode));
+}
+
+/* Generate RTL for 128-bit (TImode) subtraction with overflow.
+
+   OP0 represents the TImode destination operand 0
+   LOW_DEST represents the low half (DImode) of TImode operand 0
+   LOW_IN1 represents the low half (DImode) of TImode operand 1
+   LOW_IN2 represents the low half (DImode) of TImode operand 2
+   HIGH_DEST represents the high half (DImode) of TImode operand 0
+   HIGH_IN1 represents the high half (DImode) of TImode operand 1
+   HIGH_IN2 represents the high half (DImode) of TImode operand 2.  */
+
+void
+aarch64_expand_subvti (rtx op0, rtx low_dest, rtx low_in1,
+		       rtx low_in2, rtx high_dest, rtx high_in1,
+		       rtx high_in2)
+{
+  if (low_in2 == const0_rtx)
+    {
+      low_dest = low_in1;
+      emit_insn (gen_subdi3_compare1 (high_dest, high_in1,
+				      force_reg (DImode, high_in2)));
+    }
+  else
+    {
+      if (CONST_INT_P (low_in2))
+	{
+	  low_in2 = force_reg (DImode, GEN_INT (-UINTVAL (low_in2)));
+	  high_in2 = force_reg (DImode, high_in2);
+	  emit_insn (gen_adddi3_compareC (low_dest, low_in1, low_in2));
+	}
+      else
+	emit_insn (gen_subdi3_compare1 (low_dest, low_in1, low_in2));
+      emit_insn (gen_subdi3_carryinCV (high_dest,
+				       force_reg (DImode, high_in1),
+				       high_in2));
+    }
+
+  emit_move_insn (gen_lowpart (DImode, op0), low_dest);
+  emit_move_insn (gen_highpart (DImode, op0), high_dest);
+
+}
+
 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
 
 static unsigned HOST_WIDE_INT
-- 
2.7.4


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH][Aarch64] v2: Arithmetic overflow common functions [Patch 1/4]
  2018-06-06 17:14 [PATCH][Aarch64] v2: Arithmetic overflow common functions [Patch 1/4] Michael Collison
@ 2018-06-08  0:19 ` James Greenhalgh
  2018-06-08 21:45   ` Michael Collison
  0 siblings, 1 reply; 4+ messages in thread
From: James Greenhalgh @ 2018-06-08  0:19 UTC (permalink / raw)
  To: Michael Collison; +Cc: GCC Patches, nd

On Wed, Jun 06, 2018 at 12:14:03PM -0500, Michael Collison wrote:
> This is a respin of a AArch64 patch that adds support for builtin arithmetic overflow operations. This update separates the patch into multiple pieces and addresses comments made by Richard Earnshaw here:
> 
> https://gcc.gnu.org/ml/gcc-patches/2017-07/msg00249.html
> 
> Original patch and motivation for patch here:
> 
> https://gcc.gnu.org/ml/gcc-patches/2017-05/msg01512.html
> 
> This patch primarily contains common functions in aarch64.c for generating TImode scratch registers,
> and common rtl functions utilized by the overflow patterns in aarch64.md. In addition a new mode representing overflow CC_Vmode is introduced.
> 
> Bootstrapped and tested on aarch64-linux-gnu. Okay for trunk?

Normally it is preferred that each patch in a series stands independent of
the others. So if I apply just 1/4 I should get a working toolchain. You
have some dependencies here between 1/4 and 3/4.

Rather than ask you to rework these patches, I think I'll instead ask you to
squash them all to a single commit after we're done with review. That will
save you some rebase work and maintain the property that trunk can be built
at most revisions.


> (aarch64_add_128bit_scratch_regs): Declare
> (aarch64_subv_128bit_scratch_regs): Declare.

Why use 128bit in the function name rather than
call it aarch64_subvti_scratch_regs ?


> @@ -16337,6 +16353,131 @@ aarch64_split_dimode_const_store (rtx dst, rtx src)
>    return true;
>  }
>  
> +/* Generate RTL for a conditional branch with rtx comparison CODE in
> +   mode CC_MODE.  The destination of the unlikely conditional branch
> +   is LABEL_REF.  */
> +
> +void
> +aarch64_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
> +			      rtx label_ref)
> +{
> +  rtx x;
> +  x = gen_rtx_fmt_ee (code, VOIDmode,
> +		      gen_rtx_REG (cc_mode, CC_REGNUM),
> +		      const0_rtx);
> +
> +  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
> +			    gen_rtx_LABEL_REF (VOIDmode, label_ref),
> +			    pc_rtx);
> +  aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
> +}
> +

I'm a bit surprised this is AArh64 specific and there are no helper functions
to get you here. Not that it should block the patch;l but if we can reuse
something I'd prefer we did.

> +void
> +aarch64_expand_subvti (rtx op0, rtx low_dest, rtx low_in1,
> +		       rtx low_in2, rtx high_dest, rtx high_in1,
> +		       rtx high_in2)
> +{
> +  if (low_in2 == const0_rtx)
> +    {
> +      low_dest = low_in1;
> +      emit_insn (gen_subdi3_compare1 (high_dest, high_in1,
> +				      force_reg (DImode, high_in2)));
> +    }
> +  else
> +    {
> +      if (CONST_INT_P (low_in2))
> +	{
> +	  low_in2 = force_reg (DImode, GEN_INT (-UINTVAL (low_in2)));
> +	  high_in2 = force_reg (DImode, high_in2);
> +	  emit_insn (gen_adddi3_compareC (low_dest, low_in1, low_in2));
> +	}
> +      else
> +	emit_insn (gen_subdi3_compare1 (low_dest, low_in1, low_in2));
> +      emit_insn (gen_subdi3_carryinCV (high_dest,
> +				       force_reg (DImode, high_in1),
> +				       high_in2));

This is where we'd break the build. gen_subdi3_carryinCV isn't defined
until 3/4.

The above points are minor.

This patch is OK with them cleaned up, once I've reviewed the other 3 parts
to this series.

James

> 
> 2018-05-31  Michael Collison  <michael.collison@arm.com>
>         Richard Henderson <rth@redhat.com>
> 
> * config/aarch64/aarch64-modes.def (CC_V): New.
> * config/aarch64/aarch64-protos.h
> (aarch64_add_128bit_scratch_regs): Declare
> (aarch64_subv_128bit_scratch_regs): Declare.
> (aarch64_expand_subvti): Declare.
> (aarch64_gen_unlikely_cbranch): Declare
> * config/aarch64/aarch64.c (aarch64_select_cc_mode): Test
> for signed overflow using CC_Vmode.
> (aarch64_get_condition_code_1): Handle CC_Vmode.
> (aarch64_gen_unlikely_cbranch): New function.
> (aarch64_add_128bit_scratch_regs): New function.
> (aarch64_subv_128bit_scratch_regs): New function.
> (aarch64_expand_subvti): New function.


^ permalink raw reply	[flat|nested] 4+ messages in thread

* RE: [PATCH][Aarch64] v2: Arithmetic overflow common functions [Patch 1/4]
  2018-06-08  0:19 ` James Greenhalgh
@ 2018-06-08 21:45   ` Michael Collison
  0 siblings, 0 replies; 4+ messages in thread
From: Michael Collison @ 2018-06-08 21:45 UTC (permalink / raw)
  To: James Greenhalgh; +Cc: GCC Patches, nd

[-- Attachment #1: Type: text/plain, Size: 4721 bytes --]

Patch updated as requested:

- name changed from 'aarch64_add_128bit_scratch_regs' to 'aarch64_addti_scratch_regs'
- name changed from 'aarch64_subv_128bit_scratch_reg's to ' aarch64_subvti_scratch_regs'

I did not find any helper function to replace ' aarch64_gen_unlikely_cbranch'.

Okay for trunk?


-----Original Message-----
From: James Greenhalgh <james.greenhalgh@arm.com> 
Sent: Thursday, June 7, 2018 5:19 PM
To: Michael Collison <Michael.Collison@arm.com>
Cc: GCC Patches <gcc-patches@gcc.gnu.org>; nd <nd@arm.com>
Subject: Re: [PATCH][Aarch64] v2: Arithmetic overflow common functions [Patch 1/4]

On Wed, Jun 06, 2018 at 12:14:03PM -0500, Michael Collison wrote:
> This is a respin of a AArch64 patch that adds support for builtin arithmetic overflow operations. This update separates the patch into multiple pieces and addresses comments made by Richard Earnshaw here:
> 
> https://gcc.gnu.org/ml/gcc-patches/2017-07/msg00249.html
> 
> Original patch and motivation for patch here:
> 
> https://gcc.gnu.org/ml/gcc-patches/2017-05/msg01512.html
> 
> This patch primarily contains common functions in aarch64.c for 
> generating TImode scratch registers, and common rtl functions utilized by the overflow patterns in aarch64.md. In addition a new mode representing overflow CC_Vmode is introduced.
> 
> Bootstrapped and tested on aarch64-linux-gnu. Okay for trunk?

Normally it is preferred that each patch in a series stands independent of the others. So if I apply just 1/4 I should get a working toolchain. You have some dependencies here between 1/4 and 3/4.

Rather than ask you to rework these patches, I think I'll instead ask you to squash them all to a single commit after we're done with review. That will save you some rebase work and maintain the property that trunk can be built at most revisions.


> (aarch64_add_128bit_scratch_regs): Declare
> (aarch64_subv_128bit_scratch_regs): Declare.

Why use 128bit in the function name rather than call it aarch64_subvti_scratch_regs ?


> @@ -16337,6 +16353,131 @@ aarch64_split_dimode_const_store (rtx dst, rtx src)
>    return true;
>  }
>  
> +/* Generate RTL for a conditional branch with rtx comparison CODE in
> +   mode CC_MODE.  The destination of the unlikely conditional branch
> +   is LABEL_REF.  */
> +
> +void
> +aarch64_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
> +			      rtx label_ref)
> +{
> +  rtx x;
> +  x = gen_rtx_fmt_ee (code, VOIDmode,
> +		      gen_rtx_REG (cc_mode, CC_REGNUM),
> +		      const0_rtx);
> +
> +  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
> +			    gen_rtx_LABEL_REF (VOIDmode, label_ref),
> +			    pc_rtx);
> +  aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x)); }
> +

I'm a bit surprised this is AArh64 specific and there are no helper functions to get you here. Not that it should block the patch;l but if we can reuse something I'd prefer we did.

> +void
> +aarch64_expand_subvti (rtx op0, rtx low_dest, rtx low_in1,
> +		       rtx low_in2, rtx high_dest, rtx high_in1,
> +		       rtx high_in2)
> +{
> +  if (low_in2 == const0_rtx)
> +    {
> +      low_dest = low_in1;
> +      emit_insn (gen_subdi3_compare1 (high_dest, high_in1,
> +				      force_reg (DImode, high_in2)));
> +    }
> +  else
> +    {
> +      if (CONST_INT_P (low_in2))
> +	{
> +	  low_in2 = force_reg (DImode, GEN_INT (-UINTVAL (low_in2)));
> +	  high_in2 = force_reg (DImode, high_in2);
> +	  emit_insn (gen_adddi3_compareC (low_dest, low_in1, low_in2));
> +	}
> +      else
> +	emit_insn (gen_subdi3_compare1 (low_dest, low_in1, low_in2));
> +      emit_insn (gen_subdi3_carryinCV (high_dest,
> +				       force_reg (DImode, high_in1),
> +				       high_in2));

This is where we'd break the build. gen_subdi3_carryinCV isn't defined until 3/4.

The above points are minor.

This patch is OK with them cleaned up, once I've reviewed the other 3 parts to this series.

James

> 
> 2018-05-31  Michael Collison  <michael.collison@arm.com>
>         Richard Henderson <rth@redhat.com>
> 
> * config/aarch64/aarch64-modes.def (CC_V): New.
> * config/aarch64/aarch64-protos.h
> (aarch64_add_128bit_scratch_regs): Declare
> (aarch64_subv_128bit_scratch_regs): Declare.
> (aarch64_expand_subvti): Declare.
> (aarch64_gen_unlikely_cbranch): Declare
> * config/aarch64/aarch64.c (aarch64_select_cc_mode): Test for signed 
> overflow using CC_Vmode.
> (aarch64_get_condition_code_1): Handle CC_Vmode.
> (aarch64_gen_unlikely_cbranch): New function.
> (aarch64_add_128bit_scratch_regs): New function.
> (aarch64_subv_128bit_scratch_regs): New function.
> (aarch64_expand_subvti): New function.



[-- Attachment #2: gnutools-6308-pt1.patch --]
[-- Type: application/octet-stream, Size: 8074 bytes --]

From 9288848378f4db80dbee4d7a7103332ecb77502e Mon Sep 17 00:00:00 2001
From: Michael Collison <michael.collison@arm.com>
Date: Fri, 1 Jun 2018 00:11:14 -0700
Subject: [PATCH] [PATCH 1/4] Gnutools 6308 common files

Rename functions to more descriptive

Fix comment
---
 gcc/config/aarch64/aarch64-modes.def |   1 +
 gcc/config/aarch64/aarch64-protos.h  |  13 +++-
 gcc/config/aarch64/aarch64.c         | 141 +++++++++++++++++++++++++++++++++++
 3 files changed, 154 insertions(+), 1 deletion(-)

diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def
index 1a05b6c..ea7ecc2 100644
--- a/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc/config/aarch64/aarch64-modes.def
@@ -24,6 +24,7 @@ CC_MODE (CC_SWP);
 CC_MODE (CC_NZ);    /* Only N and Z bits of condition flags are valid.  */
 CC_MODE (CC_Z);     /* Only Z bit of condition flags is valid.  */
 CC_MODE (CC_C);     /* Only C bit of condition flags is valid.  */
+CC_MODE (CC_V);     /* Only V bit of condition flags is valid.  */
 
 /* Half-precision floating point for __fp16.  */
 FLOAT_MODE (HF, 2, 0);
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 4ea50ac..ee8db68 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -467,6 +467,16 @@ void aarch64_relayout_simd_types (void);
 void aarch64_reset_previous_fndecl (void);
 bool aarch64_return_address_signing_enabled (void);
 void aarch64_save_restore_target_globals (tree);
+void aarch64_addti_scratch_regs (rtx, rtx, rtx *,
+				 rtx *, rtx *,
+				 rtx *, rtx *,
+				 rtx *);
+void aarch64_subvti_scratch_regs (rtx, rtx, rtx *,
+				  rtx *, rtx *,
+				  rtx *, rtx *, rtx *);
+void aarch64_expand_subvti (rtx, rtx, rtx,
+			    rtx, rtx, rtx, rtx);
+
 
 /* Initialize builtins for SIMD intrinsics.  */
 void init_aarch64_simd_builtins (void);
@@ -493,7 +503,8 @@ void aarch64_split_simd_move (rtx, rtx);
 bool aarch64_float_const_representable_p (rtx);
 
 #if defined (RTX_CODE)
-
+void aarch64_gen_unlikely_cbranch (enum rtx_code, machine_mode cc_mode,
+				   rtx label_ref);
 bool aarch64_legitimate_address_p (machine_mode, rtx, bool,
 				   aarch64_addr_query_type = ADDR_QUERY_M);
 machine_mode aarch64_select_cc_mode (RTX_CODE, rtx, rtx);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index c94f709..3b86360 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -6377,6 +6377,13 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
       && GET_CODE (y) == ZERO_EXTEND)
     return CC_Cmode;
 
+  /* A test for signed overflow.  */
+  if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode)
+      && code == NE
+      && GET_CODE (x) == PLUS
+      && GET_CODE (y) == SIGN_EXTEND)
+    return CC_Vmode;
+
   /* For everything else, return CCmode.  */
   return CCmode;
 }
@@ -6483,6 +6490,15 @@ aarch64_get_condition_code_1 (machine_mode mode, enum rtx_code comp_code)
 	}
       break;
 
+    case E_CC_Vmode:
+      switch (comp_code)
+	{
+	case NE: return AARCH64_VS;
+	case EQ: return AARCH64_VC;
+	default: return -1;
+	}
+      break;
+
     default:
       return -1;
     }
@@ -16337,6 +16353,131 @@ aarch64_split_dimode_const_store (rtx dst, rtx src)
   return true;
 }
 
+/* Generate RTL for a conditional branch with rtx comparison CODE in
+   mode CC_MODE.  The destination of the unlikely conditional branch
+   is LABEL_REF.  */
+
+void
+aarch64_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
+			      rtx label_ref)
+{
+  rtx x;
+  x = gen_rtx_fmt_ee (code, VOIDmode,
+		      gen_rtx_REG (cc_mode, CC_REGNUM),
+		      const0_rtx);
+
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (VOIDmode, label_ref),
+			    pc_rtx);
+  aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
+}
+
+/* Generate DImode scratch registers for 128-bit (TImode) addition.
+
+   OP1 represents the TImode destination operand 1
+   OP2 represents the TImode destination operand 2
+   LOW_DEST represents the low half (DImode) of TImode operand 0
+   LOW_IN1 represents the low half (DImode) of TImode operand 1
+   LOW_IN2 represents the low half (DImode) of TImode operand 2
+   HIGH_DEST represents the high half (DImode) of TImode operand 0
+   HIGH_IN1 represents the high half (DImode) of TImode operand 1
+   HIGH_IN2 represents the high half (DImode) of TImode operand 2.  */
+
+void
+aarch64_addti_scratch_regs (rtx op1, rtx op2, rtx *low_dest,
+			    rtx *low_in1, rtx *low_in2,
+			    rtx *high_dest, rtx *high_in1,
+			    rtx *high_in2)
+{
+  *low_dest = gen_reg_rtx (DImode);
+  *low_in1 = gen_lowpart (DImode, op1);
+  *low_in2 = simplify_gen_subreg (DImode, op2, TImode,
+				  subreg_lowpart_offset (DImode, TImode));
+  *high_dest = gen_reg_rtx (DImode);
+  *high_in1 = gen_highpart (DImode, op1);
+  *high_in2 = simplify_gen_subreg (DImode, op2, TImode,
+				   subreg_highpart_offset (DImode, TImode));
+}
+
+/* Generate DImode scratch registers for 128-bit (TImode) subtraction.
+
+   This function differs from 'arch64_addti_scratch_regs' in that
+   OP1 can be an immediate constant (zero). We must call
+   subreg_highpart_offset with DImode and TImode arguments, otherwise
+   VOIDmode will be used for the const_int which generates an internal
+   error from subreg_size_highpart_offset which does not expect a size of zero.
+
+   OP1 represents the TImode destination operand 1
+   OP2 represents the TImode destination operand 2
+   LOW_DEST represents the low half (DImode) of TImode operand 0
+   LOW_IN1 represents the low half (DImode) of TImode operand 1
+   LOW_IN2 represents the low half (DImode) of TImode operand 2
+   HIGH_DEST represents the high half (DImode) of TImode operand 0
+   HIGH_IN1 represents the high half (DImode) of TImode operand 1
+   HIGH_IN2 represents the high half (DImode) of TImode operand 2.  */
+
+
+void
+aarch64_subvti_scratch_regs (rtx op1, rtx op2, rtx *low_dest,
+			     rtx *low_in1, rtx *low_in2,
+			     rtx *high_dest, rtx *high_in1,
+			     rtx *high_in2)
+{
+  *low_dest = gen_reg_rtx (DImode);
+  *low_in1 = simplify_gen_subreg (DImode, op1, TImode,
+				  subreg_lowpart_offset (DImode, TImode));
+
+  *low_in2 = simplify_gen_subreg (DImode, op2, TImode,
+				  subreg_lowpart_offset (DImode, TImode));
+  *high_dest = gen_reg_rtx (DImode);
+
+  *high_in1 = simplify_gen_subreg (DImode, op1, TImode,
+				   subreg_highpart_offset (DImode, TImode));
+  *high_in2 = simplify_gen_subreg (DImode, op2, TImode,
+				   subreg_highpart_offset (DImode, TImode));
+}
+
+/* Generate RTL for 128-bit (TImode) subtraction with overflow.
+
+   OP0 represents the TImode destination operand 0
+   LOW_DEST represents the low half (DImode) of TImode operand 0
+   LOW_IN1 represents the low half (DImode) of TImode operand 1
+   LOW_IN2 represents the low half (DImode) of TImode operand 2
+   HIGH_DEST represents the high half (DImode) of TImode operand 0
+   HIGH_IN1 represents the high half (DImode) of TImode operand 1
+   HIGH_IN2 represents the high half (DImode) of TImode operand 2.  */
+
+void
+aarch64_expand_subvti (rtx op0, rtx low_dest, rtx low_in1,
+		       rtx low_in2, rtx high_dest, rtx high_in1,
+		       rtx high_in2)
+{
+  if (low_in2 == const0_rtx)
+    {
+      low_dest = low_in1;
+      emit_insn (gen_subdi3_compare1 (high_dest, high_in1,
+				      force_reg (DImode, high_in2)));
+    }
+  else
+    {
+      if (CONST_INT_P (low_in2))
+	{
+	  low_in2 = force_reg (DImode, GEN_INT (-UINTVAL (low_in2)));
+	  high_in2 = force_reg (DImode, high_in2);
+	  emit_insn (gen_adddi3_compareC (low_dest, low_in1, low_in2));
+	}
+      else
+	emit_insn (gen_subdi3_compare1 (low_dest, low_in1, low_in2));
+      emit_insn (gen_subdi3_carryinCV (high_dest,
+				       force_reg (DImode, high_in1),
+				       high_in2));
+    }
+
+  emit_move_insn (gen_lowpart (DImode, op0), low_dest);
+  emit_move_insn (gen_highpart (DImode, op0), high_dest);
+
+}
+
 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
 
 static unsigned HOST_WIDE_INT
-- 
2.7.4


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH][Aarch64] v2: Arithmetic overflow common functions [Patch 1/4]
@ 2017-11-15  7:28 Michael Collison
  0 siblings, 0 replies; 4+ messages in thread
From: Michael Collison @ 2017-11-15  7:28 UTC (permalink / raw)
  To: GCC Patches; +Cc: nd

[-- Attachment #1: Type: text/plain, Size: 1389 bytes --]

This is a respin of a AArch64 patch that adds support for builtin arithmetic overflow operations. This update separates the patch into multiple pieces and addresses comments made by Richard Earnshaw here:

https://gcc.gnu.org/ml/gcc-patches/2017-07/msg00249.html

Original patch and motivation for patch here:

https://gcc.gnu.org/ml/gcc-patches/2017-05/msg01512.html

This patch primarily contains common functions in aarch64.c for generating TImode scratch registers,
and common rtl functions utilized by the overflow patterns in aarch64.md. In addition a new mode representing overflow, CC_Vmode is introduced.

Bootstrapped and tested on aarch64-linux-gnu. Okay for trunk?

2017-10-26  Michael Collison  <michael.collison@arm.com>
	    Richard Henderson <rth@redhat.com>

	* config/aarch64/aarch64-modes.def (CC_V): New.
	* config/aarch64/aarch64-protos.h
	(aarch64_add_128bit_scratch_regs): Declare
	(aarch64_subv_128bit_scratch_regs): Declare.
	(aarch64_expand_subvti): Declare.
	(aarch64_gen_unlikely_cbranch): Declare
	* config/aarch64/aarch64.c (aarch64_select_cc_mode): Test
	for signed overflow using CC_Vmode.
	(aarch64_get_condition_code_1): Handle CC_Vmode.
	(aarch64_gen_unlikely_cbranch): New function.
	(aarch64_add_128bit_scratch_regs): New function.
	(aarch64_subv_128bit_scratch_regs): New function.
	(aarch64_expand_subvti): New function.

[-- Attachment #2: gnutools-6308-common-v2.patch.patch --]
[-- Type: application/octet-stream, Size: 7576 bytes --]

diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def
index 195976c..3b9a151 100644
--- a/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc/config/aarch64/aarch64-modes.def
@@ -24,6 +24,7 @@ CC_MODE (CC_SWP);
 CC_MODE (CC_NZ);    /* Only N and Z bits of condition flags are valid.  */
 CC_MODE (CC_Z);     /* Only Z bit of condition flags is valid.  */
 CC_MODE (CC_C);     /* Only C bit of condition flags is valid.  */
+CC_MODE (CC_V);     /* Only V bit of condition flags is valid.  */
 
 /* Half-precision floating point for __fp16.  */
 FLOAT_MODE (HF, 2, 0);
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 5d7c5df..28618c3 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -415,6 +415,16 @@ void aarch64_relayout_simd_types (void);
 void aarch64_reset_previous_fndecl (void);
 bool aarch64_return_address_signing_enabled (void);
 void aarch64_save_restore_target_globals (tree);
+void aarch64_add_128bit_scratch_regs (rtx, rtx, rtx *,
+				      rtx *, rtx *,
+				      rtx *, rtx *,
+				      rtx *);
+void aarch64_subv_128bit_scratch_regs (rtx, rtx, rtx *,
+				       rtx *, rtx *,
+				       rtx *, rtx *, rtx *);
+void aarch64_expand_subvti (rtx, rtx, rtx,
+			    rtx, rtx, rtx, rtx);
+
 
 /* Initialize builtins for SIMD intrinsics.  */
 void init_aarch64_simd_builtins (void);
@@ -439,6 +449,8 @@ bool aarch64_float_const_representable_p (rtx);
 
 #if defined (RTX_CODE)
 
+void aarch64_gen_unlikely_cbranch (enum rtx_code, machine_mode cc_mode,
+				   rtx label_ref);
 bool aarch64_legitimate_address_p (machine_mode, rtx, RTX_CODE, bool);
 machine_mode aarch64_select_cc_mode (RTX_CODE, rtx, rtx);
 rtx aarch64_gen_compare_reg (RTX_CODE, rtx, rtx);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index d1aaf19..061d139 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -5013,6 +5013,13 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
       && GET_CODE (y) == ZERO_EXTEND)
     return CC_Cmode;
 
+  /* A test for signed overflow.  */
+  if ((GET_MODE (x) == DImode || GET_MODE (x) == TImode)
+      && code == NE
+      && GET_CODE (x) == PLUS
+      && GET_CODE (y) == SIGN_EXTEND)
+    return CC_Vmode;
+
   /* For everything else, return CCmode.  */
   return CCmode;
 }
@@ -5119,6 +5126,15 @@ aarch64_get_condition_code_1 (machine_mode mode, enum rtx_code comp_code)
 	}
       break;
 
+    case E_CC_Vmode:
+      switch (comp_code)
+	{
+	case NE: return AARCH64_VS;
+	case EQ: return AARCH64_VC;
+	default: return -1;
+	}
+      break;
+
     default:
       return -1;
     }
@@ -14150,6 +14166,131 @@ aarch64_split_dimode_const_store (rtx dst, rtx src)
   return true;
 }
 
+/* Generate RTL for a conditional branch with rtx comparison CODE in
+   mode CC_MODE.  The destination of the unlikely conditional branch
+   is LABEL_REF.  */
+
+void
+aarch64_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
+			      rtx label_ref)
+{
+  rtx x;
+  x = gen_rtx_fmt_ee (code, VOIDmode,
+		      gen_rtx_REG (cc_mode, CC_REGNUM),
+		      const0_rtx);
+
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
+			    gen_rtx_LABEL_REF (VOIDmode, label_ref),
+			    pc_rtx);
+  aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
+}
+
+/* Generate DImode scratch registers for 128-bit (TImode) addition.
+
+   OP1 represents the TImode destination operand 1
+   OP2 represents the TImode destination operand 2
+   LOW_DEST represents the low half (DImode) of TImode operand 0
+   LOW_IN1 represents the low half (DImode) of TImode operand 1
+   LOW_IN2 represents the low half (DImode) of TImode operand 2
+   HIGH_DEST represents the high half (DImode) of TImode operand 0
+   HIGH_IN1 represents the high half (DImode) of TImode operand 1
+   HIGH_IN2 represents the high half (DImode) of TImode operand 2.  */
+
+void
+aarch64_add_128bit_scratch_regs (rtx op1, rtx op2, rtx *low_dest,
+				 rtx *low_in1, rtx *low_in2,
+				 rtx *high_dest, rtx *high_in1,
+				 rtx *high_in2)
+{
+  *low_dest = gen_reg_rtx (DImode);
+  *low_in1 = gen_lowpart (DImode, op1);
+  *low_in2 = simplify_gen_subreg (DImode, op2, TImode,
+				  subreg_lowpart_offset (DImode, TImode));
+  *high_dest = gen_reg_rtx (DImode);
+  *high_in1 = gen_highpart (DImode, op1);
+  *high_in2 = simplify_gen_subreg (DImode, op2, TImode,
+				   subreg_highpart_offset (DImode, TImode));
+}
+
+/* Generate DImode scratch registers for 128-bit (TImode) subtraction.
+
+   This function differs from 'arch64_add_128bit_scratch_regs' in that
+   OP1 can be an immediate constant (zero). We must call
+   subreg_highpart_offset with DImode and TImode arguments, otherwise
+   VOIDmode will be used for the const_int which generates an internal
+   error from subreg_size_highpart_offset which does not expect a size of zero.
+
+   OP1 represents the TImode destination operand 1
+   OP2 represents the TImode destination operand 2
+   LOW_DEST represents the low half (DImode) of TImode operand 0
+   LOW_IN1 represents the low half (DImode) of TImode operand 1
+   LOW_IN2 represents the low half (DImode) of TImode operand 2
+   HIGH_DEST represents the high half (DImode) of TImode operand 0
+   HIGH_IN1 represents the high half (DImode) of TImode operand 1
+   HIGH_IN2 represents the high half (DImode) of TImode operand 2.  */
+
+
+void
+aarch64_subv_128bit_scratch_regs (rtx op1, rtx op2, rtx *low_dest,
+				  rtx *low_in1, rtx *low_in2,
+				  rtx *high_dest, rtx *high_in1,
+				  rtx *high_in2)
+{
+  *low_dest = gen_reg_rtx (DImode);
+  *low_in1 = simplify_gen_subreg (DImode, op1, TImode,
+				  subreg_lowpart_offset (DImode, TImode));
+
+  *low_in2 = simplify_gen_subreg (DImode, op2, TImode,
+				  subreg_lowpart_offset (DImode, TImode));
+  *high_dest = gen_reg_rtx (DImode);
+
+  *high_in1 = simplify_gen_subreg (DImode, op1, TImode,
+				   subreg_highpart_offset (DImode, TImode));
+  *high_in2 = simplify_gen_subreg (DImode, op2, TImode,
+				   subreg_highpart_offset (DImode, TImode));
+}
+
+/* Generate RTL for 128-bit (TImode) subtraction with overflow.
+
+   OP0 represents the TImode destination operand 0
+   LOW_DEST represents the low half (DImode) of TImode operand 0
+   LOW_IN1 represents the low half (DImode) of TImode operand 1
+   LOW_IN2 represents the low half (DImode) of TImode operand 2
+   HIGH_DEST represents the high half (DImode) of TImode operand 0
+   HIGH_IN1 represents the high half (DImode) of TImode operand 1
+   HIGH_IN2 represents the high half (DImode) of TImode operand 2.  */
+
+void
+aarch64_expand_subvti (rtx op0, rtx low_dest, rtx low_in1,
+		       rtx low_in2, rtx high_dest, rtx high_in1,
+		       rtx high_in2)
+{
+  if (low_in2 == const0_rtx)
+    {
+      low_dest = low_in1;
+      emit_insn (gen_subdi3_compare1 (high_dest, high_in1,
+				      force_reg (DImode, high_in2)));
+    }
+  else
+    {
+      if (CONST_INT_P (low_in2))
+	{
+	  low_in2 = force_reg (DImode, GEN_INT (-UINTVAL (low_in2)));
+	  high_in2 = force_reg (DImode, high_in2);
+	  emit_insn (gen_adddi3_compareC (low_dest, low_in1, low_in2));
+	}
+      else
+	emit_insn (gen_subdi3_compare1 (low_dest, low_in1, low_in2));
+      emit_insn (gen_subdi3_carryinCV (high_dest,
+				       force_reg (DImode, high_in1),
+				       high_in2));
+    }
+
+  emit_move_insn (gen_lowpart (DImode, op0), low_dest);
+  emit_move_insn (gen_highpart (DImode, op0), high_dest);
+
+}
+
 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
 
 static unsigned HOST_WIDE_INT

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2018-06-08 21:45 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-06-06 17:14 [PATCH][Aarch64] v2: Arithmetic overflow common functions [Patch 1/4] Michael Collison
2018-06-08  0:19 ` James Greenhalgh
2018-06-08 21:45   ` Michael Collison
  -- strict thread matches above, loose matches on Subject: below --
2017-11-15  7:28 Michael Collison

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).