public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH][AArch64][1/2] PR rtl-optimization/68796 Add compare-of-zero_extract pattern
@ 2015-12-17 15:36 Kyrill Tkachov
  2015-12-17 17:24 ` James Greenhalgh
  0 siblings, 1 reply; 4+ messages in thread
From: Kyrill Tkachov @ 2015-12-17 15:36 UTC (permalink / raw)
  To: GCC Patches; +Cc: Marcus Shawcroft, Richard Earnshaw, James Greenhalgh

[-- Attachment #1: Type: text/plain, Size: 2476 bytes --]

Hi all,

In this PR I'm trying to increase the use of the aarch64 instruction TST that performs a
bitwise AND with a bitmask and compares the result with zero.
GCC has many ways of representing these operations in RTL. Depending on the mask, the target
and the context it might be an AND-immediate, a ZERO_EXTRACT or a ZERO_EXTEND of a subreg.

aarch64.md already contains a pattern for the compare with and-immediate case, which is the most
general form of this, but it doesn't match in many common cases

The documentation on canonicalization in md.texi says:
"Equality comparisons of a group of bits (usually a single bit) with zero
  will be written using @code{zero_extract} rather than the equivalent
  @code{and} or @code{sign_extract} operations. "

This means that we should define a compare with a zero-extract pattern in aarch64,
which is what this patch does. It's fairly simple: it constructs the TST mask from
the operands of the zero_extract and updates the SELECT_CC_MODE implementation to
assign the correct CC_NZ mode to such comparisons.  Note that this is valid only
for equality comparisons against zero.

So for the testcase:
int
f1 (int x)
{
   if (x & 1)
     return 1;
   return x;
}

we now generate:
f1:
         tst     x0, 1
         csinc   w0, w0, wzr, eq
         ret

instead of the previous:
f1:
         and     w1, w0, 1
         cmp     w1, wzr
         csinc   w0, w0, wzr, eq
         ret


and for the testcase:
int
f2 (long x)
{
    return ((short) x >= 0) ? x : 0;
}

we now generate:
f2:
         tst     x0, 32768
         csel    x0, x0, xzr, eq
         ret

instead of:
f2:
         sxth    w1, w0
         cmp     w1, wzr
         csel    x0, x0, xzr, ge
         ret

i.e. we test the sign bit rather than perform the full comparison with zero.

Bootstrapped and tested on aarch64-none-linux-gnu.

Ok for trunk?

Thanks,
Kyrill

2015-12-17  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

     PR rtl-optimization/68796
     * config/aarch64/aarch64.md (*and<mode>3nr_compare0_zextract):
     New pattern.
     * config/aarch64/aarch64.c (aarch64_select_cc_mode): Handle
     ZERO_EXTRACT comparison with zero.
     (aarch64_mask_from_zextract_ops): New function.
     * config/aarch64/aarch64-protos.h (aarch64_mask_from_zextract_ops):
     New prototype.

2015-12-17  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>

     PR rtl-optimization/68796
     * gcc.target/aarch64/tst_3.c: New test.
     * gcc.target/aarch64/tst_4.c: Likewise.

[-- Attachment #2: aarch64-cmp-zextract.patch --]
[-- Type: text/x-patch, Size: 4138 bytes --]

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 87d6eb1358845527d7068550925949802a7e48e2..febca98d38d5f09c97b0f79adc55bb29eca217b9 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -330,6 +330,7 @@ int aarch64_uxt_size (int, HOST_WIDE_INT);
 int aarch64_vec_fpconst_pow_of_2 (rtx);
 rtx aarch64_final_eh_return_addr (void);
 rtx aarch64_legitimize_reload_address (rtx *, machine_mode, int, int, int);
+rtx aarch64_mask_from_zextract_ops (rtx, rtx);
 const char *aarch64_output_move_struct (rtx *operands);
 rtx aarch64_return_addr (int, rtx);
 rtx aarch64_simd_gen_const_vector_dup (machine_mode, int);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index cb8955d5d6c909e8179bb1ab8203eb165f55e4b6..58a9fc68f391162ed9847d7fb79d70d3ee9919f5 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -4147,7 +4147,9 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
       && y == const0_rtx
       && (code == EQ || code == NE || code == LT || code == GE)
       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
-	  || GET_CODE (x) == NEG))
+	  || GET_CODE (x) == NEG
+	  || (GET_CODE (x) == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1))
+	      && CONST_INT_P (XEXP (x, 2)))))
     return CC_NZmode;
 
   /* A compare with a shifted operand.  Because of canonicalization,
@@ -10757,6 +10759,21 @@ aarch64_simd_imm_zero_p (rtx x, machine_mode mode)
   return x == CONST0_RTX (mode);
 }
 
+
+/* Return the bitmask CONST_INT to select the bits required by a zero extract
+   operation of width WIDTH at bit position POS.  */
+
+rtx
+aarch64_mask_from_zextract_ops (rtx width, rtx pos)
+{
+  gcc_assert (CONST_INT_P (width));
+  gcc_assert (CONST_INT_P (pos));
+
+  unsigned HOST_WIDE_INT mask
+    = ((unsigned HOST_WIDE_INT)1 << UINTVAL (width)) - 1;
+  return GEN_INT (mask << UINTVAL (pos));
+}
+
 bool
 aarch64_simd_imm_scalar_p (rtx x, machine_mode mode ATTRIBUTE_UNUSED)
 {
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 4604fd2588be87944a72224dccb3dfb32e42a1ad..fd2b3ef64f1736545948eb49e5ac6dfbd206e3e9 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -3698,6 +3698,28 @@ (define_insn "*and<mode>3nr_compare0"
   [(set_attr "type" "logics_reg,logics_imm")]
 )
 
+(define_insn "*and<mode>3nr_compare0_zextract"
+  [(set (reg:CC_NZ CC_REGNUM)
+	(compare:CC_NZ
+	 (zero_extract:GPI (match_operand:GPI 0 "register_operand" "r")
+		  (match_operand:GPI 1 "const_int_operand" "n")
+		  (match_operand:GPI 2 "const_int_operand" "n"))
+	 (const_int 0)))]
+  "INTVAL (operands[1]) > 0
+   && ((INTVAL (operands[1]) + INTVAL (operands[2]))
+	<= GET_MODE_BITSIZE (<MODE>mode))
+   && aarch64_bitmask_imm (
+	UINTVAL (aarch64_mask_from_zextract_ops (operands[1],
+						 operands[2])),
+	<MODE>mode)"
+  {
+    operands[1]
+      = aarch64_mask_from_zextract_ops (operands[1], operands[2]);
+    return "tst\\t%<w>0, %1";
+  }
+  [(set_attr "type" "logics_shift_imm")]
+)
+
 (define_insn "*and_<SHIFT:optab><mode>3nr_compare0"
   [(set (reg:CC_NZ CC_REGNUM)
 	(compare:CC_NZ
diff --git a/gcc/testsuite/gcc.target/aarch64/tst_3.c b/gcc/testsuite/gcc.target/aarch64/tst_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..2204b33f3bc2ea974b3b0a7d1a5bdca7c6b37b82
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/tst_3.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+f1 (int x)
+{
+  if (x & 1)
+    return 1;
+  return x;
+}
+
+/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]*.*1" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/tst_4.c b/gcc/testsuite/gcc.target/aarch64/tst_4.c
new file mode 100644
index 0000000000000000000000000000000000000000..2b869c05c87ec120e1632a1420349a5eb98ff895
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/tst_4.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+f1 (long x)
+{
+   return ((short) x >= 0) ? x : 0;
+}
+
+/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]*.*32768\n" } } */

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH][AArch64][1/2] PR rtl-optimization/68796 Add compare-of-zero_extract pattern
  2015-12-17 15:36 [PATCH][AArch64][1/2] PR rtl-optimization/68796 Add compare-of-zero_extract pattern Kyrill Tkachov
@ 2015-12-17 17:24 ` James Greenhalgh
  2015-12-17 17:38   ` Kyrill Tkachov
  2015-12-18  9:53   ` Kyrill Tkachov
  0 siblings, 2 replies; 4+ messages in thread
From: James Greenhalgh @ 2015-12-17 17:24 UTC (permalink / raw)
  To: Kyrill Tkachov; +Cc: GCC Patches, Marcus Shawcroft, Richard Earnshaw

On Thu, Dec 17, 2015 at 03:36:40PM +0000, Kyrill Tkachov wrote:
> 2015-12-17  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
> 
>     PR rtl-optimization/68796
>     * config/aarch64/aarch64.md (*and<mode>3nr_compare0_zextract):
>     New pattern.
>     * config/aarch64/aarch64.c (aarch64_select_cc_mode): Handle
>     ZERO_EXTRACT comparison with zero.
>     (aarch64_mask_from_zextract_ops): New function.
>     * config/aarch64/aarch64-protos.h (aarch64_mask_from_zextract_ops):
>     New prototype.
> 
> 2015-12-17  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
> 
>     PR rtl-optimization/68796
>     * gcc.target/aarch64/tst_3.c: New test.
>     * gcc.target/aarch64/tst_4.c: Likewise.

Two comments.

> diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
> index 87d6eb1358845527d7068550925949802a7e48e2..febca98d38d5f09c97b0f79adc55bb29eca217b9 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -330,6 +330,7 @@ int aarch64_uxt_size (int, HOST_WIDE_INT);
>  int aarch64_vec_fpconst_pow_of_2 (rtx);
>  rtx aarch64_final_eh_return_addr (void);
>  rtx aarch64_legitimize_reload_address (rtx *, machine_mode, int, int, int);
> +rtx aarch64_mask_from_zextract_ops (rtx, rtx);
>  const char *aarch64_output_move_struct (rtx *operands);
>  rtx aarch64_return_addr (int, rtx);
>  rtx aarch64_simd_gen_const_vector_dup (machine_mode, int);
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index cb8955d5d6c909e8179bb1ab8203eb165f55e4b6..58a9fc68f391162ed9847d7fb79d70d3ee9919f5 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -4147,7 +4147,9 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
>        && y == const0_rtx
>        && (code == EQ || code == NE || code == LT || code == GE)
>        && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
> -	  || GET_CODE (x) == NEG))
> +	  || GET_CODE (x) == NEG
> +	  || (GET_CODE (x) == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1))
> +	      && CONST_INT_P (XEXP (x, 2)))))
>      return CC_NZmode;
>  
>    /* A compare with a shifted operand.  Because of canonicalization,
> @@ -10757,6 +10759,21 @@ aarch64_simd_imm_zero_p (rtx x, machine_mode mode)
>    return x == CONST0_RTX (mode);
>  }
>  
> +
> +/* Return the bitmask CONST_INT to select the bits required by a zero extract
> +   operation of width WIDTH at bit position POS.  */
> +
> +rtx
> +aarch64_mask_from_zextract_ops (rtx width, rtx pos)
> +{

It is up to you, but would this not more naturally be:

  unsigned HOST_WIDE_INT
  aarch64_mask_from_zextract_ops (rtx width, rtx pos)

Given how it gets used elsewhere?

> +  gcc_assert (CONST_INT_P (width));
> +  gcc_assert (CONST_INT_P (pos));
> +
> +  unsigned HOST_WIDE_INT mask
> +    = ((unsigned HOST_WIDE_INT)1 << UINTVAL (width)) - 1;

Space between (unsigned HOST_WIDE_INT) and 1.

> +  return GEN_INT (mask << UINTVAL (pos));
> +}
> +
>  bool
>  aarch64_simd_imm_scalar_p (rtx x, machine_mode mode ATTRIBUTE_UNUSED)
>  {

Otherwise, this is OK.

Thanks,
James

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH][AArch64][1/2] PR rtl-optimization/68796 Add compare-of-zero_extract pattern
  2015-12-17 17:24 ` James Greenhalgh
@ 2015-12-17 17:38   ` Kyrill Tkachov
  2015-12-18  9:53   ` Kyrill Tkachov
  1 sibling, 0 replies; 4+ messages in thread
From: Kyrill Tkachov @ 2015-12-17 17:38 UTC (permalink / raw)
  To: James Greenhalgh; +Cc: GCC Patches, Marcus Shawcroft, Richard Earnshaw

Hi James,

On 17/12/15 17:24, James Greenhalgh wrote:
> On Thu, Dec 17, 2015 at 03:36:40PM +0000, Kyrill Tkachov wrote:
>> 2015-12-17  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
>>
>>      PR rtl-optimization/68796
>>      * config/aarch64/aarch64.md (*and<mode>3nr_compare0_zextract):
>>      New pattern.
>>      * config/aarch64/aarch64.c (aarch64_select_cc_mode): Handle
>>      ZERO_EXTRACT comparison with zero.
>>      (aarch64_mask_from_zextract_ops): New function.
>>      * config/aarch64/aarch64-protos.h (aarch64_mask_from_zextract_ops):
>>      New prototype.
>>
>> 2015-12-17  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
>>
>>      PR rtl-optimization/68796
>>      * gcc.target/aarch64/tst_3.c: New test.
>>      * gcc.target/aarch64/tst_4.c: Likewise.
> Two comments.
>
>> diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
>> index 87d6eb1358845527d7068550925949802a7e48e2..febca98d38d5f09c97b0f79adc55bb29eca217b9 100644
>> --- a/gcc/config/aarch64/aarch64-protos.h
>> +++ b/gcc/config/aarch64/aarch64-protos.h
>> @@ -330,6 +330,7 @@ int aarch64_uxt_size (int, HOST_WIDE_INT);
>>   int aarch64_vec_fpconst_pow_of_2 (rtx);
>>   rtx aarch64_final_eh_return_addr (void);
>>   rtx aarch64_legitimize_reload_address (rtx *, machine_mode, int, int, int);
>> +rtx aarch64_mask_from_zextract_ops (rtx, rtx);
>>   const char *aarch64_output_move_struct (rtx *operands);
>>   rtx aarch64_return_addr (int, rtx);
>>   rtx aarch64_simd_gen_const_vector_dup (machine_mode, int);
>> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
>> index cb8955d5d6c909e8179bb1ab8203eb165f55e4b6..58a9fc68f391162ed9847d7fb79d70d3ee9919f5 100644
>> --- a/gcc/config/aarch64/aarch64.c
>> +++ b/gcc/config/aarch64/aarch64.c
>> @@ -4147,7 +4147,9 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
>>         && y == const0_rtx
>>         && (code == EQ || code == NE || code == LT || code == GE)
>>         && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
>> -	  || GET_CODE (x) == NEG))
>> +	  || GET_CODE (x) == NEG
>> +	  || (GET_CODE (x) == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1))
>> +	      && CONST_INT_P (XEXP (x, 2)))))
>>       return CC_NZmode;
>>   
>>     /* A compare with a shifted operand.  Because of canonicalization,
>> @@ -10757,6 +10759,21 @@ aarch64_simd_imm_zero_p (rtx x, machine_mode mode)
>>     return x == CONST0_RTX (mode);
>>   }
>>   
>> +
>> +/* Return the bitmask CONST_INT to select the bits required by a zero extract
>> +   operation of width WIDTH at bit position POS.  */
>> +
>> +rtx
>> +aarch64_mask_from_zextract_ops (rtx width, rtx pos)
>> +{
> It is up to you, but would this not more naturally be:
>
>    unsigned HOST_WIDE_INT
>    aarch64_mask_from_zextract_ops (rtx width, rtx pos)
>
> Given how it gets used elsewhere?

It gets used in exactly two places, once in the condition of the pattern
where we have to extract its UINTVAL and once when outputting the assembly
string where we want the rtx wrapper around it to assign it to operands[1],
so I'd argue it's a 50-50 choice.
So I'll leave it as it is unless you have a strong preference.

>> +  gcc_assert (CONST_INT_P (width));
>> +  gcc_assert (CONST_INT_P (pos));
>> +
>> +  unsigned HOST_WIDE_INT mask
>> +    = ((unsigned HOST_WIDE_INT)1 << UINTVAL (width)) - 1;
> Space between (unsigned HOST_WIDE_INT) and 1.
>

Consider it done.
Thanks,
Kyrill

>> +  return GEN_INT (mask << UINTVAL (pos));
>> +}
>> +
>>   bool
>>   aarch64_simd_imm_scalar_p (rtx x, machine_mode mode ATTRIBUTE_UNUSED)
>>   {
> Otherwise, this is OK.
>
> Thanks,
> James
>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH][AArch64][1/2] PR rtl-optimization/68796 Add compare-of-zero_extract pattern
  2015-12-17 17:24 ` James Greenhalgh
  2015-12-17 17:38   ` Kyrill Tkachov
@ 2015-12-18  9:53   ` Kyrill Tkachov
  1 sibling, 0 replies; 4+ messages in thread
From: Kyrill Tkachov @ 2015-12-18  9:53 UTC (permalink / raw)
  To: James Greenhalgh; +Cc: GCC Patches, Marcus Shawcroft, Richard Earnshaw

[-- Attachment #1: Type: text/plain, Size: 3423 bytes --]


On 17/12/15 17:24, James Greenhalgh wrote:
> On Thu, Dec 17, 2015 at 03:36:40PM +0000, Kyrill Tkachov wrote:
>> 2015-12-17  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
>>
>>      PR rtl-optimization/68796
>>      * config/aarch64/aarch64.md (*and<mode>3nr_compare0_zextract):
>>      New pattern.
>>      * config/aarch64/aarch64.c (aarch64_select_cc_mode): Handle
>>      ZERO_EXTRACT comparison with zero.
>>      (aarch64_mask_from_zextract_ops): New function.
>>      * config/aarch64/aarch64-protos.h (aarch64_mask_from_zextract_ops):
>>      New prototype.
>>
>> 2015-12-17  Kyrylo Tkachov  <kyrylo.tkachov@arm.com>
>>
>>      PR rtl-optimization/68796
>>      * gcc.target/aarch64/tst_3.c: New test.
>>      * gcc.target/aarch64/tst_4.c: Likewise.
> Two comments.
>
>> diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
>> index 87d6eb1358845527d7068550925949802a7e48e2..febca98d38d5f09c97b0f79adc55bb29eca217b9 100644
>> --- a/gcc/config/aarch64/aarch64-protos.h
>> +++ b/gcc/config/aarch64/aarch64-protos.h
>> @@ -330,6 +330,7 @@ int aarch64_uxt_size (int, HOST_WIDE_INT);
>>   int aarch64_vec_fpconst_pow_of_2 (rtx);
>>   rtx aarch64_final_eh_return_addr (void);
>>   rtx aarch64_legitimize_reload_address (rtx *, machine_mode, int, int, int);
>> +rtx aarch64_mask_from_zextract_ops (rtx, rtx);
>>   const char *aarch64_output_move_struct (rtx *operands);
>>   rtx aarch64_return_addr (int, rtx);
>>   rtx aarch64_simd_gen_const_vector_dup (machine_mode, int);
>> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
>> index cb8955d5d6c909e8179bb1ab8203eb165f55e4b6..58a9fc68f391162ed9847d7fb79d70d3ee9919f5 100644
>> --- a/gcc/config/aarch64/aarch64.c
>> +++ b/gcc/config/aarch64/aarch64.c
>> @@ -4147,7 +4147,9 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
>>         && y == const0_rtx
>>         && (code == EQ || code == NE || code == LT || code == GE)
>>         && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
>> -	  || GET_CODE (x) == NEG))
>> +	  || GET_CODE (x) == NEG
>> +	  || (GET_CODE (x) == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1))
>> +	      && CONST_INT_P (XEXP (x, 2)))))
>>       return CC_NZmode;
>>   
>>     /* A compare with a shifted operand.  Because of canonicalization,
>> @@ -10757,6 +10759,21 @@ aarch64_simd_imm_zero_p (rtx x, machine_mode mode)
>>     return x == CONST0_RTX (mode);
>>   }
>>   
>> +
>> +/* Return the bitmask CONST_INT to select the bits required by a zero extract
>> +   operation of width WIDTH at bit position POS.  */
>> +
>> +rtx
>> +aarch64_mask_from_zextract_ops (rtx width, rtx pos)
>> +{
> It is up to you, but would this not more naturally be:
>
>    unsigned HOST_WIDE_INT
>    aarch64_mask_from_zextract_ops (rtx width, rtx pos)
>
> Given how it gets used elsewhere?
>
>> +  gcc_assert (CONST_INT_P (width));
>> +  gcc_assert (CONST_INT_P (pos));
>> +
>> +  unsigned HOST_WIDE_INT mask
>> +    = ((unsigned HOST_WIDE_INT)1 << UINTVAL (width)) - 1;
> Space between (unsigned HOST_WIDE_INT) and 1.
>
>> +  return GEN_INT (mask << UINTVAL (pos));
>> +}
>> +
>>   bool
>>   aarch64_simd_imm_scalar_p (rtx x, machine_mode mode ATTRIBUTE_UNUSED)
>>   {
> Otherwise, this is OK.

Thanks, I've chosen to keep the return type of aarch64_mask_from_zextract_ops as rtx
and fixed the whitespace. I'm committing this version to trunk.

Thanks,
Kyrill


> Thanks,
> James
>


[-- Attachment #2: aarch64-compare-zextract.patch --]
[-- Type: text/x-patch, Size: 4139 bytes --]

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 87d6eb1358845527d7068550925949802a7e48e2..febca98d38d5f09c97b0f79adc55bb29eca217b9 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -330,6 +330,7 @@ int aarch64_uxt_size (int, HOST_WIDE_INT);
 int aarch64_vec_fpconst_pow_of_2 (rtx);
 rtx aarch64_final_eh_return_addr (void);
 rtx aarch64_legitimize_reload_address (rtx *, machine_mode, int, int, int);
+rtx aarch64_mask_from_zextract_ops (rtx, rtx);
 const char *aarch64_output_move_struct (rtx *operands);
 rtx aarch64_return_addr (int, rtx);
 rtx aarch64_simd_gen_const_vector_dup (machine_mode, int);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 474dca4f4b98179b64cfc29aa689d71363c736cd..a174a4c2f12acc2b0558782798312dfa17cdf5d5 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -4147,7 +4147,9 @@ aarch64_select_cc_mode (RTX_CODE code, rtx x, rtx y)
       && y == const0_rtx
       && (code == EQ || code == NE || code == LT || code == GE)
       && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS || GET_CODE (x) == AND
-	  || GET_CODE (x) == NEG))
+	  || GET_CODE (x) == NEG
+	  || (GET_CODE (x) == ZERO_EXTRACT && CONST_INT_P (XEXP (x, 1))
+	      && CONST_INT_P (XEXP (x, 2)))))
     return CC_NZmode;
 
   /* A compare with a shifted operand.  Because of canonicalization,
@@ -10759,6 +10761,21 @@ aarch64_simd_imm_zero_p (rtx x, machine_mode mode)
   return x == CONST0_RTX (mode);
 }
 
+
+/* Return the bitmask CONST_INT to select the bits required by a zero extract
+   operation of width WIDTH at bit position POS.  */
+
+rtx
+aarch64_mask_from_zextract_ops (rtx width, rtx pos)
+{
+  gcc_assert (CONST_INT_P (width));
+  gcc_assert (CONST_INT_P (pos));
+
+  unsigned HOST_WIDE_INT mask
+    = ((unsigned HOST_WIDE_INT) 1 << UINTVAL (width)) - 1;
+  return GEN_INT (mask << UINTVAL (pos));
+}
+
 bool
 aarch64_simd_imm_scalar_p (rtx x, machine_mode mode ATTRIBUTE_UNUSED)
 {
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 4604fd2588be87944a72224dccb3dfb32e42a1ad..fd2b3ef64f1736545948eb49e5ac6dfbd206e3e9 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -3698,6 +3698,28 @@ (define_insn "*and<mode>3nr_compare0"
   [(set_attr "type" "logics_reg,logics_imm")]
 )
 
+(define_insn "*and<mode>3nr_compare0_zextract"
+  [(set (reg:CC_NZ CC_REGNUM)
+	(compare:CC_NZ
+	 (zero_extract:GPI (match_operand:GPI 0 "register_operand" "r")
+		  (match_operand:GPI 1 "const_int_operand" "n")
+		  (match_operand:GPI 2 "const_int_operand" "n"))
+	 (const_int 0)))]
+  "INTVAL (operands[1]) > 0
+   && ((INTVAL (operands[1]) + INTVAL (operands[2]))
+	<= GET_MODE_BITSIZE (<MODE>mode))
+   && aarch64_bitmask_imm (
+	UINTVAL (aarch64_mask_from_zextract_ops (operands[1],
+						 operands[2])),
+	<MODE>mode)"
+  {
+    operands[1]
+      = aarch64_mask_from_zextract_ops (operands[1], operands[2]);
+    return "tst\\t%<w>0, %1";
+  }
+  [(set_attr "type" "logics_shift_imm")]
+)
+
 (define_insn "*and_<SHIFT:optab><mode>3nr_compare0"
   [(set (reg:CC_NZ CC_REGNUM)
 	(compare:CC_NZ
diff --git a/gcc/testsuite/gcc.target/aarch64/tst_3.c b/gcc/testsuite/gcc.target/aarch64/tst_3.c
new file mode 100644
index 0000000000000000000000000000000000000000..2204b33f3bc2ea974b3b0a7d1a5bdca7c6b37b82
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/tst_3.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+f1 (int x)
+{
+  if (x & 1)
+    return 1;
+  return x;
+}
+
+/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]*.*1" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/tst_4.c b/gcc/testsuite/gcc.target/aarch64/tst_4.c
new file mode 100644
index 0000000000000000000000000000000000000000..2b869c05c87ec120e1632a1420349a5eb98ff895
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/tst_4.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int
+f1 (long x)
+{
+   return ((short) x >= 0) ? x : 0;
+}
+
+/* { dg-final { scan-assembler "tst\t(x|w)\[0-9\]*.*32768\n" } } */

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2015-12-18  9:53 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-12-17 15:36 [PATCH][AArch64][1/2] PR rtl-optimization/68796 Add compare-of-zero_extract pattern Kyrill Tkachov
2015-12-17 17:24 ` James Greenhalgh
2015-12-17 17:38   ` Kyrill Tkachov
2015-12-18  9:53   ` Kyrill Tkachov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).