public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [Aarch64][PATCH] Improve Logical And Immediate Expressions
@ 2016-10-27 20:44 Michael Collison
  2016-11-17 16:26 ` James Greenhalgh
  0 siblings, 1 reply; 7+ messages in thread
From: Michael Collison @ 2016-10-27 20:44 UTC (permalink / raw)
  To: gcc-patches; +Cc: nd

[-- Attachment #1: Type: text/plain, Size: 2357 bytes --]

This patch is designed to improve code generation for "and" instructions with certain immediate operands.

For the following test case:

int f2(int x)
{
   x &= 0x0ffffff8;

   x &= 0xff001fff;

   return x;
}

the trunk aarch64 compiler generates:

mov	w1, 8184
movk	w1, 0xf00, lsl 16
and	w0, w0, w1

We can generate one fewer instruction if we recognize certain constants. With the attached patch the current trunk compiler generates:

and	w0, w0, 268435448
and	w0, w0, -16769025

Bootstrapped and make check successfully completed with no regressions on aarch64-linux-gnu.

Okay for trunk?

Regards,

Michael Collison

------------------------------------------------------------------------------------

ChangeLog entries are as follows:

*** gcc/ChangeLog ***

2016-10-27  Michael Collison  <michael.collison@arm.com>

	* config/aarch64/aarch64-protos.h
	(aarch64_and_split_imm1, aarch64_and_split_imm2)
	(aarch64_and_bitmask_imm): New prototypes
	* config/aarch64/aarch64.c (aarch64_and_split_imm1):
	New overloaded function to create bit mask covering the
	lowest to highest bits set.
	(aarch64_and_split_imm2): New overloaded functions to create bit
	mask of zeros between first and last bit set.
	(aarch64_and_bitmask_imm): New function to determine if a integer
	is a valid two instruction "and" operation.
	* config/aarch64/aarch64.md:(and<mode>3): New define_insn and _split
	allowing wider range of constants with "and" operations.
	* (ior<mode>3, xor<mode>3): Use new LOGICAL2 iterator to prevent
	"and" operator from matching restricted constant range used for
	ior and xor operators.
	* config/aarch64/constraints.md (UsO constraint): New SImode constraint
	for constants in "and" operations.
	(UsP constraint): New DImode constraint for constants in "and" operations.
	* config/aarch64/iterators.md (lconst2): New mode iterator.
	(LOGICAL2): New code iterator.
	* config/aarch64/predicates.md (aarch64_logical_and_immediate): New
	predicate
	(aarch64_logical_and_operand): New predicate allowing extended constants
	for "and" operations.

*** gcc/testsuite/ChangeLog ***

2016-10-27  Michael Collison  <michael.collison@arm.com>

testsuite/
	* gcc.target/aarch64/and_const.c: New test to verify
	additional constants are recognized and fewer instructions generated.


[-- Attachment #2: gnutools_5860_ipreview3.patch --]
[-- Type: application/octet-stream, Size: 5849 bytes --]

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 3cdd69b..7ef8cdf 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -300,6 +300,9 @@ extern struct tune_params aarch64_tune_params;
 HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
 int aarch64_get_condition_code (rtx);
 bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode);
+unsigned HOST_WIDE_INT aarch64_and_split_imm1 (HOST_WIDE_INT val_in);
+unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in);
+bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode);
 int aarch64_branch_cost (bool, bool);
 enum aarch64_symbol_type aarch64_classify_symbolic_expression (rtx);
 bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 3e663eb..db82c5c 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -3600,6 +3600,46 @@ aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
   return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26];
 }
 
+/* Create mask of ones, covering the lowest to highest bits set in VAL_IN.  */
+
+unsigned HOST_WIDE_INT
+aarch64_and_split_imm1 (HOST_WIDE_INT val_in)
+{
+  int first_bit_set = ctz_hwi (val_in);
+  int last_bit_set = floor_log2 (val_in);
+  gcc_assert (first_bit_set < last_bit_set);
+
+  return ((HOST_WIDE_INT_UC (2) << last_bit_set) -
+	  (HOST_WIDE_INT_1U << first_bit_set));
+}
+
+/* Create constant with zero bits between first and last bit set and one
+   bits elsewhere.  */
+
+unsigned HOST_WIDE_INT
+aarch64_and_split_imm2 (HOST_WIDE_INT val_in)
+{
+  return val_in | ~aarch64_and_split_imm1 (val_in);
+}
+
+/* Return true if VAL_IN is a valid 'and' bitmask immediate.  */
+
+bool
+aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode)
+{
+  if (aarch64_bitmask_imm (val_in, mode))
+    return false;
+
+  if (aarch64_move_imm (val_in, mode))
+    return false;
+
+  unsigned HOST_WIDE_INT imm2 = aarch64_and_split_imm2 (val_in);
+
+  if (!aarch64_bitmask_imm (imm2, mode))
+    return false;
+
+  return true;
+}
 
 /* Return true if val is an immediate that can be loaded into a
    register in a single instruction.  */
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index c95258b..462bf02 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -3404,6 +3404,26 @@
 ;; Logical operations
 ;; -------------------------------------------------------------------
 
+
+(define_insn_and_split "*aarch64_and<mode>_imm2"
+  [(set (match_operand:GPI 0 "register_operand" "=rk")
+	(and:GPI (match_operand:GPI 1 "register_operand" "%r")
+		 (match_operand:GPI 2 "aarch64_logical_and_immediate" "<lconst2>")))]
+  ""
+  "#"
+  "true"
+  [(const_int 0)]
+  {
+     HOST_WIDE_INT val = INTVAL (operands[2]);
+     rtx imm1 = GEN_INT (aarch64_and_split_imm1 (val));
+     rtx imm2 = GEN_INT (aarch64_and_split_imm2 (val));
+
+     emit_insn (gen_and<mode>3 (operands[0], operands[1], imm1));
+     emit_insn (gen_and<mode>3 (operands[0], operands[0], imm2));
+     DONE;
+  }
+)
+
 (define_insn "<optab><mode>3"
   [(set (match_operand:GPI 0 "register_operand" "=r,rk,w")
 	(LOGICAL:GPI (match_operand:GPI 1 "register_operand" "%r,r,w")
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index d64a7eb..7a2847a 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -69,6 +69,16 @@
  (and (match_code "const_int")
       (match_test "aarch64_move_imm (ival, DImode)")))
 
+(define_constraint "UsO"
+ "A constant that can be used with a 32-bit and operation."
+ (and (match_code "const_int")
+      (match_test "aarch64_and_bitmask_imm (ival, SImode)")))
+
+(define_constraint "UsP"
+ "A constant that can be used with a 64-bit and operation."
+ (and (match_code "const_int")
+      (match_test "aarch64_and_bitmask_imm (ival, DImode)")))
+
 (define_constraint "S"
   "A constraint that matches an absolute symbolic address."
   (and (match_code "const,symbol_ref,label_ref")
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 52f645a..3d63f34 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -440,6 +440,9 @@
 ;; Attribute to describe constants acceptable in logical operations
 (define_mode_attr lconst [(SI "K") (DI "L")])
 
+;; Attribute to describe constants acceptable in logical and operations
+(define_mode_attr lconst2 [(SI "UsO") (DI "UsP")])
+
 ;; Map a mode to a specific constraint character.
 (define_mode_attr cmode [(QI "q") (HI "h") (SI "s") (DI "d")])
 
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index ebda6d8..9839d20 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -106,6 +106,10 @@
   (ior (match_operand 0 "register_operand")
        (match_operand 0 "aarch64_logical_immediate")))
 
+(define_predicate "aarch64_logical_and_immediate"
+  (and (match_code "const_int")
+       (match_test "aarch64_and_bitmask_imm (INTVAL (op), mode)")))
+
 (define_predicate "aarch64_shift_imm_si"
   (and (match_code "const_int")
        (match_test "(unsigned HOST_WIDE_INT) INTVAL (op) < 32")))
diff --git a/gcc/testsuite/gcc.target/aarch64/and_const.c b/gcc/testsuite/gcc.target/aarch64/and_const.c
new file mode 100644
index 0000000..9c377d7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/and_const.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int f2 (int x)
+{
+   x &= 0x0ffffff8;
+
+   x &= 0xff001fff;
+
+   return x;
+}
+
+/* { dg-final { scan-assembler-times "and\t" 2 } } */
+/* { dg-final { scan-assembler-not "movk\t" } } */
-- 
1.9.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Aarch64][PATCH] Improve Logical And Immediate Expressions
  2016-10-27 20:44 [Aarch64][PATCH] Improve Logical And Immediate Expressions Michael Collison
@ 2016-11-17 16:26 ` James Greenhalgh
  2016-11-18  7:42   ` Michael Collison
  0 siblings, 1 reply; 7+ messages in thread
From: James Greenhalgh @ 2016-11-17 16:26 UTC (permalink / raw)
  To: Michael Collison; +Cc: gcc-patches, nd

On Thu, Oct 27, 2016 at 08:44:02PM +0000, Michael Collison wrote:
> This patch is designed to improve code generation for "and" instructions with
> certain immediate operands.
> 
> For the following test case:
> 
> int f2(int x)
> {
>    x &= 0x0ffffff8;
> 
>    x &= 0xff001fff;
> 
>    return x;
> }
> 
> the trunk aarch64 compiler generates:
> 
> mov	w1, 8184
> movk	w1, 0xf00, lsl 16
> and	w0, w0, w1
> 
> We can generate one fewer instruction if we recognize certain constants. With
> the attached patch the current trunk compiler generates:
> 
> and	w0, w0, 268435448
> and	w0, w0, -16769025
> 
> Bootstrapped and make check successfully completed with no regressions on
> aarch64-linux-gnu.
> 
> Okay for trunk?

Hi Michael,

I have some minor comments in line.

> diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
> index 3cdd69b..7ef8cdf 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -300,6 +300,9 @@ extern struct tune_params aarch64_tune_params;
>  HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
>  int aarch64_get_condition_code (rtx);
>  bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode);
> +unsigned HOST_WIDE_INT aarch64_and_split_imm1 (HOST_WIDE_INT val_in);
> +unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in);
> +bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode);
>  int aarch64_branch_cost (bool, bool);
>  enum aarch64_symbol_type aarch64_classify_symbolic_expression (rtx);
>  bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT);
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 3e663eb..db82c5c 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -3600,6 +3600,46 @@ aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
>    return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26];
>  }
>  
> +/* Create mask of ones, covering the lowest to highest bits set in VAL_IN.  */
> +
> +unsigned HOST_WIDE_INT
> +aarch64_and_split_imm1 (HOST_WIDE_INT val_in)
> +{
> +  int first_bit_set = ctz_hwi (val_in);
> +  int last_bit_set = floor_log2 (val_in);
> +  gcc_assert (first_bit_set < last_bit_set);

This assert can never trigger (by definition) unless VAL_IN == 0 (in which
case floor_log2 = -1, ctz_hwi = HOST_BITS_PER_WORD_SIZE). If that's the case
you're testing for, then this assert would be more explicit as

 gcc_assert (val_in != 0)

I'd suggest renaming these to lowest_bit_set and highest_bit_set, as first
and last are ambiguous (you have them the opposite way round from what
I'd consider first [highest, thus leading bits] and last
[lowest/trailing bits]).

> +
> +  return ((HOST_WIDE_INT_UC (2) << last_bit_set) -
> +	  (HOST_WIDE_INT_1U << first_bit_set));
> +}
> +
> +/* Create constant with zero bits between first and last bit set and one
> +   bits elsewhere.  */

I can't parse this comment in relation to what the code below does.
Looking at the code, you're building a new constant where the bits between
the first and last bits are unmodified, and all other bits are set to one.

i.e. for 0000 1010 1000 you'd generate 1111 1010 1111.

> +unsigned HOST_WIDE_INT
> +aarch64_and_split_imm2 (HOST_WIDE_INT val_in)
> +{
> +  return val_in | ~aarch64_and_split_imm1 (val_in);
> +}
> +
> +/* Return true if VAL_IN is a valid 'and' bitmask immediate.  */
> +
> +bool
> +aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode)
> +{
> +  if (aarch64_bitmask_imm (val_in, mode))
> +    return false;
> +
> +  if (aarch64_move_imm (val_in, mode))
> +    return false;
> +
> +  unsigned HOST_WIDE_INT imm2 = aarch64_and_split_imm2 (val_in);
> +
> +  if (!aarch64_bitmask_imm (imm2, mode))
> +    return false;
> +
> +  return true;

You can replace these four lines with:

  return aarch64_bitmask_imm (imm2, mode);

> +}
>  
>  /* Return true if val is an immediate that can be loaded into a
>     register in a single instruction.  */

> diff --git a/gcc/testsuite/gcc.target/aarch64/and_const.c b/gcc/testsuite/gcc.target/aarch64/and_const.c
> new file mode 100644
> index 0000000..9c377d7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/and_const.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +int f2 (int x)
> +{
> +   x &= 0x0ffffff8;
> +
> +   x &= 0xff001fff;
> +
> +   return x;
> +}

It would be good to have a test for the DImode cases too (using long long).

Thanks,
James

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [Aarch64][PATCH] Improve Logical And Immediate Expressions
  2016-11-17 16:26 ` James Greenhalgh
@ 2016-11-18  7:42   ` Michael Collison
  2016-11-21  9:52     ` James Greenhalgh
  0 siblings, 1 reply; 7+ messages in thread
From: Michael Collison @ 2016-11-18  7:42 UTC (permalink / raw)
  To: James Greenhalgh; +Cc: gcc-patches, nd

[-- Attachment #1: Type: text/plain, Size: 6679 bytes --]

James,

I incorporated all your suggestions, and successfully bootstrapped and re-ran the testsuite.

Okay for trunk?

2016-11-18  Michael Collison  <michael.collison@arm.com>

	* config/aarch64/aarch64-protos.h
	(aarch64_and_split_imm1, aarch64_and_split_imm2)
	(aarch64_and_bitmask_imm): New prototypes
	* config/aarch64/aarch64.c (aarch64_and_split_imm1):
	New overloaded function to create bit mask covering the
	lowest to highest bits set.
	(aarch64_and_split_imm2): New overloaded functions to create bit
	mask of zeros between first and last bit set.
	(aarch64_and_bitmask_imm): New function to determine if a integer
	is a valid two instruction "and" operation.
	* config/aarch64/aarch64.md:(and<mode>3): New define_insn and _split
	allowing wider range of constants with "and" operations.
	* (ior<mode>3, xor<mode>3): Use new LOGICAL2 iterator to prevent
	"and" operator from matching restricted constant range used for
	ior and xor operators.
	* config/aarch64/constraints.md (UsO constraint): New SImode constraint
	for constants in "and" operantions.
	(UsP constraint): New DImode constraint for constants in "and" operations.
	* config/aarch64/iterators.md (lconst2): New mode iterator.
	(LOGICAL2): New code iterator.
	* config/aarch64/predicates.md (aarch64_logical_and_immediate): New
	predicate
	(aarch64_logical_and_operand): New predicate allowing extended constants
	for "and" operations.
	* testsuite/gcc.target/aarch64/and_const.c: New test to verify
	additional constants are recognized and fewer instructions generated.
	* testsuite/gcc.target/aarch64/and_const2.c: New test to verify
	additional constants are recognized and fewer instructions generated.

-----Original Message-----
From: James Greenhalgh [mailto:james.greenhalgh@arm.com] 
Sent: Thursday, November 17, 2016 9:26 AM
To: Michael Collison
Cc: gcc-patches@gcc.gnu.org; nd
Subject: Re: [Aarch64][PATCH] Improve Logical And Immediate Expressions

On Thu, Oct 27, 2016 at 08:44:02PM +0000, Michael Collison wrote:
> This patch is designed to improve code generation for "and" 
> instructions with certain immediate operands.
> 
> For the following test case:
> 
> int f2(int x)
> {
>    x &= 0x0ffffff8;
> 
>    x &= 0xff001fff;
> 
>    return x;
> }
> 
> the trunk aarch64 compiler generates:
> 
> mov	w1, 8184
> movk	w1, 0xf00, lsl 16
> and	w0, w0, w1
> 
> We can generate one fewer instruction if we recognize certain 
> constants. With the attached patch the current trunk compiler generates:
> 
> and	w0, w0, 268435448
> and	w0, w0, -16769025
> 
> Bootstrapped and make check successfully completed with no regressions 
> on aarch64-linux-gnu.
> 
> Okay for trunk?

Hi Michael,

I have some minor comments in line.

> diff --git a/gcc/config/aarch64/aarch64-protos.h 
> b/gcc/config/aarch64/aarch64-protos.h
> index 3cdd69b..7ef8cdf 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -300,6 +300,9 @@ extern struct tune_params aarch64_tune_params;  
> HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);  
> int aarch64_get_condition_code (rtx);  bool aarch64_bitmask_imm 
> (HOST_WIDE_INT val, machine_mode);
> +unsigned HOST_WIDE_INT aarch64_and_split_imm1 (HOST_WIDE_INT val_in); 
> +unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in); 
> +bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, 
> +machine_mode mode);
>  int aarch64_branch_cost (bool, bool);  enum aarch64_symbol_type 
> aarch64_classify_symbolic_expression (rtx);  bool 
> aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT); diff --git 
> a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 
> 3e663eb..db82c5c 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -3600,6 +3600,46 @@ aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
>    return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26];  }
>  
> +/* Create mask of ones, covering the lowest to highest bits set in 
> +VAL_IN.  */
> +
> +unsigned HOST_WIDE_INT
> +aarch64_and_split_imm1 (HOST_WIDE_INT val_in) {
> +  int first_bit_set = ctz_hwi (val_in);
> +  int last_bit_set = floor_log2 (val_in);
> +  gcc_assert (first_bit_set < last_bit_set);

This assert can never trigger (by definition) unless VAL_IN == 0 (in which case floor_log2 = -1, ctz_hwi = HOST_BITS_PER_WORD_SIZE). If that's the case you're testing for, then this assert would be more explicit as

 gcc_assert (val_in != 0)

I'd suggest renaming these to lowest_bit_set and highest_bit_set, as first and last are ambiguous (you have them the opposite way round from what I'd consider first [highest, thus leading bits] and last [lowest/trailing bits]).

> +
> +  return ((HOST_WIDE_INT_UC (2) << last_bit_set) -
> +	  (HOST_WIDE_INT_1U << first_bit_set)); }
> +
> +/* Create constant with zero bits between first and last bit set and one
> +   bits elsewhere.  */

I can't parse this comment in relation to what the code below does.
Looking at the code, you're building a new constant where the bits between the first and last bits are unmodified, and all other bits are set to one.

i.e. for 0000 1010 1000 you'd generate 1111 1010 1111.

> +unsigned HOST_WIDE_INT
> +aarch64_and_split_imm2 (HOST_WIDE_INT val_in) {
> +  return val_in | ~aarch64_and_split_imm1 (val_in); }
> +
> +/* Return true if VAL_IN is a valid 'and' bitmask immediate.  */
> +
> +bool
> +aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode 
> +mode) {
> +  if (aarch64_bitmask_imm (val_in, mode))
> +    return false;
> +
> +  if (aarch64_move_imm (val_in, mode))
> +    return false;
> +
> +  unsigned HOST_WIDE_INT imm2 = aarch64_and_split_imm2 (val_in);
> +
> +  if (!aarch64_bitmask_imm (imm2, mode))
> +    return false;
> +
> +  return true;

You can replace these four lines with:

  return aarch64_bitmask_imm (imm2, mode);

> +}
>  
>  /* Return true if val is an immediate that can be loaded into a
>     register in a single instruction.  */

> diff --git a/gcc/testsuite/gcc.target/aarch64/and_const.c 
> b/gcc/testsuite/gcc.target/aarch64/and_const.c
> new file mode 100644
> index 0000000..9c377d7
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/and_const.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2" } */
> +
> +int f2 (int x)
> +{
> +   x &= 0x0ffffff8;
> +
> +   x &= 0xff001fff;
> +
> +   return x;
> +}

It would be good to have a test for the DImode cases too (using long long).

Thanks,
James


[-- Attachment #2: gnutools_5860_ipreview4.patch --]
[-- Type: application/octet-stream, Size: 6304 bytes --]

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 3cdd69b..7ef8cdf 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -300,6 +300,9 @@ extern struct tune_params aarch64_tune_params;
 HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
 int aarch64_get_condition_code (rtx);
 bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode);
+unsigned HOST_WIDE_INT aarch64_and_split_imm1 (HOST_WIDE_INT val_in);
+unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in);
+bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode);
 int aarch64_branch_cost (bool, bool);
 enum aarch64_symbol_type aarch64_classify_symbolic_expression (rtx);
 bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 3e663eb..8e33c40 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -3600,6 +3600,43 @@ aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
   return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26];
 }
 
+/* Create mask of ones, covering the lowest to highest bits set in VAL_IN.  */
+
+unsigned HOST_WIDE_INT
+aarch64_and_split_imm1 (HOST_WIDE_INT val_in)
+{
+  int lowest_bit_set = ctz_hwi (val_in);
+  int highest_bit_set = floor_log2 (val_in);
+  gcc_assert (val_in != 0);
+
+  return ((HOST_WIDE_INT_UC (2) << highest_bit_set) -
+	  (HOST_WIDE_INT_1U << lowest_bit_set));
+}
+
+/* Create constant where bits outside of lowest bit set to highest bit set
+   are set to 1.  */
+
+unsigned HOST_WIDE_INT
+aarch64_and_split_imm2 (HOST_WIDE_INT val_in)
+{
+  return val_in | ~aarch64_and_split_imm1 (val_in);
+}
+
+/* Return true if VAL_IN is a valid 'and' bitmask immediate.  */
+
+bool
+aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode)
+{
+  if (aarch64_bitmask_imm (val_in, mode))
+    return false;
+
+  if (aarch64_move_imm (val_in, mode))
+    return false;
+
+  unsigned HOST_WIDE_INT imm2 = aarch64_and_split_imm2 (val_in);
+
+  return aarch64_bitmask_imm (imm2, mode);
+}
 
 /* Return true if val is an immediate that can be loaded into a
    register in a single instruction.  */
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index c95258b..462bf02 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -3404,6 +3404,26 @@
 ;; Logical operations
 ;; -------------------------------------------------------------------
 
+
+(define_insn_and_split "*aarch64_and<mode>_imm2"
+  [(set (match_operand:GPI 0 "register_operand" "=rk")
+	(and:GPI (match_operand:GPI 1 "register_operand" "%r")
+		 (match_operand:GPI 2 "aarch64_logical_and_immediate" "<lconst2>")))]
+  ""
+  "#"
+  "true"
+  [(const_int 0)]
+  {
+     HOST_WIDE_INT val = INTVAL (operands[2]);
+     rtx imm1 = GEN_INT (aarch64_and_split_imm1 (val));
+     rtx imm2 = GEN_INT (aarch64_and_split_imm2 (val));
+
+     emit_insn (gen_and<mode>3 (operands[0], operands[1], imm1));
+     emit_insn (gen_and<mode>3 (operands[0], operands[0], imm2));
+     DONE;
+  }
+)
+
 (define_insn "<optab><mode>3"
   [(set (match_operand:GPI 0 "register_operand" "=r,rk,w")
 	(LOGICAL:GPI (match_operand:GPI 1 "register_operand" "%r,r,w")
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index d64a7eb..7a2847a 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -69,6 +69,16 @@
  (and (match_code "const_int")
       (match_test "aarch64_move_imm (ival, DImode)")))
 
+(define_constraint "UsO"
+ "A constant that can be used with a 32-bit and operation."
+ (and (match_code "const_int")
+      (match_test "aarch64_and_bitmask_imm (ival, SImode)")))
+
+(define_constraint "UsP"
+ "A constant that can be used with a 64-bit and operation."
+ (and (match_code "const_int")
+      (match_test "aarch64_and_bitmask_imm (ival, DImode)")))
+
 (define_constraint "S"
   "A constraint that matches an absolute symbolic address."
   (and (match_code "const,symbol_ref,label_ref")
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 52f645a..3d63f34 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -440,6 +440,9 @@
 ;; Attribute to describe constants acceptable in logical operations
 (define_mode_attr lconst [(SI "K") (DI "L")])
 
+;; Attribute to describe constants acceptable in logical and operations
+(define_mode_attr lconst2 [(SI "UsO") (DI "UsP")])
+
 ;; Map a mode to a specific constraint character.
 (define_mode_attr cmode [(QI "q") (HI "h") (SI "s") (DI "d")])
 
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index ebda6d8..9839d20 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -106,6 +106,10 @@
   (ior (match_operand 0 "register_operand")
        (match_operand 0 "aarch64_logical_immediate")))
 
+(define_predicate "aarch64_logical_and_immediate"
+  (and (match_code "const_int")
+       (match_test "aarch64_and_bitmask_imm (INTVAL (op), mode)")))
+
 (define_predicate "aarch64_shift_imm_si"
   (and (match_code "const_int")
        (match_test "(unsigned HOST_WIDE_INT) INTVAL (op) < 32")))
diff --git a/gcc/testsuite/gcc.target/aarch64/and_const.c b/gcc/testsuite/gcc.target/aarch64/and_const.c
new file mode 100644
index 0000000..9c377d7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/and_const.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int f2 (int x)
+{
+   x &= 0x0ffffff8;
+
+   x &= 0xff001fff;
+
+   return x;
+}
+
+/* { dg-final { scan-assembler-times "and\t" 2 } } */
+/* { dg-final { scan-assembler-not "movk\t" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/and_const2.c b/gcc/testsuite/gcc.target/aarch64/and_const2.c
new file mode 100644
index 0000000..e2b8924
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/and_const2.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+long long f2 (long x)
+{
+   x &= 0x0ffffffffffffff8LL;
+
+   x &= 0xff001fffLL;
+
+   return x;
+}
+
+/* { dg-final { scan-assembler-times "and\t" 2 } } */
+/* { dg-final { scan-assembler-not "movk\t" } } */
-- 
1.9.1


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Aarch64][PATCH] Improve Logical And Immediate Expressions
  2016-11-18  7:42   ` Michael Collison
@ 2016-11-21  9:52     ` James Greenhalgh
  2016-11-23 13:42       ` Christophe Lyon
  0 siblings, 1 reply; 7+ messages in thread
From: James Greenhalgh @ 2016-11-21  9:52 UTC (permalink / raw)
  To: Michael Collison; +Cc: gcc-patches, nd

On Fri, Nov 18, 2016 at 07:41:58AM +0000, Michael Collison wrote:
> James,
> 
> I incorporated all your suggestions, and successfully bootstrapped and re-ran
> the testsuite.
> 
> Okay for trunk?
> 
> 2016-11-18  Michael Collison  <michael.collison@arm.com>
> 
> 	* config/aarch64/aarch64-protos.h
> 	(aarch64_and_split_imm1, aarch64_and_split_imm2)
> 	(aarch64_and_bitmask_imm): New prototypes
> 	* config/aarch64/aarch64.c (aarch64_and_split_imm1):
> 	New overloaded function to create bit mask covering the
> 	lowest to highest bits set.
> 	(aarch64_and_split_imm2): New overloaded functions to create bit
> 	mask of zeros between first and last bit set.
> 	(aarch64_and_bitmask_imm): New function to determine if a integer
> 	is a valid two instruction "and" operation.
> 	* config/aarch64/aarch64.md:(and<mode>3): New define_insn and _split
> 	allowing wider range of constants with "and" operations.
> 	* (ior<mode>3, xor<mode>3): Use new LOGICAL2 iterator to prevent
> 	"and" operator from matching restricted constant range used for
> 	ior and xor operators.
> 	* config/aarch64/constraints.md (UsO constraint): New SImode constraint
> 	for constants in "and" operantions.
> 	(UsP constraint): New DImode constraint for constants in "and" operations.
> 	* config/aarch64/iterators.md (lconst2): New mode iterator.
> 	(LOGICAL2): New code iterator.
> 	* config/aarch64/predicates.md (aarch64_logical_and_immediate): New
> 	predicate
> 	(aarch64_logical_and_operand): New predicate allowing extended constants
> 	for "and" operations.
> 	* testsuite/gcc.target/aarch64/and_const.c: New test to verify
> 	additional constants are recognized and fewer instructions generated.
> 	* testsuite/gcc.target/aarch64/and_const2.c: New test to verify
> 	additional constants are recognized and fewer instructions generated.
> 

> diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
> index 3cdd69b..7ef8cdf 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -300,6 +300,9 @@ extern struct tune_params aarch64_tune_params;
>  HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
>  int aarch64_get_condition_code (rtx);
>  bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode);
> +unsigned HOST_WIDE_INT aarch64_and_split_imm1 (HOST_WIDE_INT val_in);
> +unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in);
> +bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode);
>  int aarch64_branch_cost (bool, bool);
>  enum aarch64_symbol_type aarch64_classify_symbolic_expression (rtx);
>  bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT);
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 3e663eb..8e33c40 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -3600,6 +3600,43 @@ aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
>    return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26];
>  }
>  
> +/* Create mask of ones, covering the lowest to highest bits set in VAL_IN.  */
> +
> +unsigned HOST_WIDE_INT
> +aarch64_and_split_imm1 (HOST_WIDE_INT val_in)
> +{
> +  int lowest_bit_set = ctz_hwi (val_in);
> +  int highest_bit_set = floor_log2 (val_in);
> +  gcc_assert (val_in != 0);

The comment above the function should make this precondition clear. Or you
could pick a well defined behaviour for when val_in == 0 (return 0?), if
that fits the rest of the algorithm.

Otherwise, this patch looks OK to me.

Thanks,
James 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Aarch64][PATCH] Improve Logical And Immediate Expressions
  2016-11-21  9:52     ` James Greenhalgh
@ 2016-11-23 13:42       ` Christophe Lyon
  2016-11-23 16:30         ` Michael Collison
  0 siblings, 1 reply; 7+ messages in thread
From: Christophe Lyon @ 2016-11-23 13:42 UTC (permalink / raw)
  To: James Greenhalgh; +Cc: Michael Collison, gcc-patches, nd

Hi Michael,


On 21 November 2016 at 10:52, James Greenhalgh <james.greenhalgh@arm.com> wrote:
> On Fri, Nov 18, 2016 at 07:41:58AM +0000, Michael Collison wrote:
>> James,
>>
>> I incorporated all your suggestions, and successfully bootstrapped and re-ran
>> the testsuite.
>>
>> Okay for trunk?
>>
>> 2016-11-18  Michael Collison  <michael.collison@arm.com>
>>
>>       * config/aarch64/aarch64-protos.h
>>       (aarch64_and_split_imm1, aarch64_and_split_imm2)
>>       (aarch64_and_bitmask_imm): New prototypes
>>       * config/aarch64/aarch64.c (aarch64_and_split_imm1):
>>       New overloaded function to create bit mask covering the
>>       lowest to highest bits set.
>>       (aarch64_and_split_imm2): New overloaded functions to create bit
>>       mask of zeros between first and last bit set.
>>       (aarch64_and_bitmask_imm): New function to determine if a integer
>>       is a valid two instruction "and" operation.
>>       * config/aarch64/aarch64.md:(and<mode>3): New define_insn and _split
>>       allowing wider range of constants with "and" operations.
>>       * (ior<mode>3, xor<mode>3): Use new LOGICAL2 iterator to prevent
>>       "and" operator from matching restricted constant range used for
>>       ior and xor operators.
>>       * config/aarch64/constraints.md (UsO constraint): New SImode constraint
>>       for constants in "and" operantions.
>>       (UsP constraint): New DImode constraint for constants in "and" operations.
>>       * config/aarch64/iterators.md (lconst2): New mode iterator.
>>       (LOGICAL2): New code iterator.
>>       * config/aarch64/predicates.md (aarch64_logical_and_immediate): New
>>       predicate
>>       (aarch64_logical_and_operand): New predicate allowing extended constants
>>       for "and" operations.
>>       * testsuite/gcc.target/aarch64/and_const.c: New test to verify
>>       additional constants are recognized and fewer instructions generated.
>>       * testsuite/gcc.target/aarch64/and_const2.c: New test to verify
>>       additional constants are recognized and fewer instructions generated.
>>
>
>> diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
>> index 3cdd69b..7ef8cdf 100644
>> --- a/gcc/config/aarch64/aarch64-protos.h
>> +++ b/gcc/config/aarch64/aarch64-protos.h
>> @@ -300,6 +300,9 @@ extern struct tune_params aarch64_tune_params;
>>  HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
>>  int aarch64_get_condition_code (rtx);
>>  bool aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode);
>> +unsigned HOST_WIDE_INT aarch64_and_split_imm1 (HOST_WIDE_INT val_in);
>> +unsigned HOST_WIDE_INT aarch64_and_split_imm2 (HOST_WIDE_INT val_in);
>> +bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode);
>>  int aarch64_branch_cost (bool, bool);
>>  enum aarch64_symbol_type aarch64_classify_symbolic_expression (rtx);
>>  bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT);
>> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
>> index 3e663eb..8e33c40 100644
>> --- a/gcc/config/aarch64/aarch64.c
>> +++ b/gcc/config/aarch64/aarch64.c
>> @@ -3600,6 +3600,43 @@ aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
>>    return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26];
>>  }
>>
>> +/* Create mask of ones, covering the lowest to highest bits set in VAL_IN.  */
>> +
>> +unsigned HOST_WIDE_INT
>> +aarch64_and_split_imm1 (HOST_WIDE_INT val_in)
>> +{
>> +  int lowest_bit_set = ctz_hwi (val_in);
>> +  int highest_bit_set = floor_log2 (val_in);
>> +  gcc_assert (val_in != 0);
>
> The comment above the function should make this precondition clear. Or you
> could pick a well defined behaviour for when val_in == 0 (return 0?), if
> that fits the rest of the algorithm.
>
> Otherwise, this patch looks OK to me.
>
> Thanks,
> James
>

This patch (r242739) causes a regression on aarch64:

  gcc.dg/zero_bits_compound-2.c scan-assembler-times \\(and: 2
now fails.

Christophe.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* RE: [Aarch64][PATCH] Improve Logical And Immediate Expressions
  2016-11-23 13:42       ` Christophe Lyon
@ 2016-11-23 16:30         ` Michael Collison
  2016-11-23 18:43           ` Christophe Lyon
  0 siblings, 1 reply; 7+ messages in thread
From: Michael Collison @ 2016-11-23 16:30 UTC (permalink / raw)
  To: Christophe Lyon, James Greenhalgh; +Cc: gcc-patches, nd

Hi Christophe,

This is not a regression per se; the patch causes the test case to generate one less instruction overall, but one additional 'and'. Trunk before the patch (-O2):

foo:
	and	w0, w0, 255
	lsl	w1, w0, 20
	orr	w0, w1, w0, lsl 8
	mov	w1, 65024
	movk	w1, 0xfe0, lsl 16
	and	w0, w0, w1
	ret

The new trunk aarch64 compiler with the patch generates fewer instructions but one additional 'and'

foo:
	and	w0, w0, 255
	lsl	w1, w0, 20
	orr	w0, w1, w0, lsl 8
	and	x0, x0, 268434944
	and	x0, x0, -2031617
	ret

-----Original Message-----
From: Christophe Lyon [mailto:christophe.lyon@linaro.org] 
Sent: Wednesday, November 23, 2016 6:42 AM
To: James Greenhalgh
Cc: Michael Collison; gcc-patches@gcc.gnu.org; nd
Subject: Re: [Aarch64][PATCH] Improve Logical And Immediate Expressions

Hi Michael,


On 21 November 2016 at 10:52, James Greenhalgh <james.greenhalgh@arm.com> wrote:
> On Fri, Nov 18, 2016 at 07:41:58AM +0000, Michael Collison wrote:
>> James,
>>
>> I incorporated all your suggestions, and successfully bootstrapped 
>> and re-ran the testsuite.
>>
>> Okay for trunk?
>>
>> 2016-11-18  Michael Collison  <michael.collison@arm.com>
>>
>>       * config/aarch64/aarch64-protos.h
>>       (aarch64_and_split_imm1, aarch64_and_split_imm2)
>>       (aarch64_and_bitmask_imm): New prototypes
>>       * config/aarch64/aarch64.c (aarch64_and_split_imm1):
>>       New overloaded function to create bit mask covering the
>>       lowest to highest bits set.
>>       (aarch64_and_split_imm2): New overloaded functions to create bit
>>       mask of zeros between first and last bit set.
>>       (aarch64_and_bitmask_imm): New function to determine if a integer
>>       is a valid two instruction "and" operation.
>>       * config/aarch64/aarch64.md:(and<mode>3): New define_insn and _split
>>       allowing wider range of constants with "and" operations.
>>       * (ior<mode>3, xor<mode>3): Use new LOGICAL2 iterator to prevent
>>       "and" operator from matching restricted constant range used for
>>       ior and xor operators.
>>       * config/aarch64/constraints.md (UsO constraint): New SImode constraint
>>       for constants in "and" operantions.
>>       (UsP constraint): New DImode constraint for constants in "and" operations.
>>       * config/aarch64/iterators.md (lconst2): New mode iterator.
>>       (LOGICAL2): New code iterator.
>>       * config/aarch64/predicates.md (aarch64_logical_and_immediate): New
>>       predicate
>>       (aarch64_logical_and_operand): New predicate allowing extended constants
>>       for "and" operations.
>>       * testsuite/gcc.target/aarch64/and_const.c: New test to verify
>>       additional constants are recognized and fewer instructions generated.
>>       * testsuite/gcc.target/aarch64/and_const2.c: New test to verify
>>       additional constants are recognized and fewer instructions generated.
>>
>
>> diff --git a/gcc/config/aarch64/aarch64-protos.h 
>> b/gcc/config/aarch64/aarch64-protos.h
>> index 3cdd69b..7ef8cdf 100644
>> --- a/gcc/config/aarch64/aarch64-protos.h
>> +++ b/gcc/config/aarch64/aarch64-protos.h
>> @@ -300,6 +300,9 @@ extern struct tune_params aarch64_tune_params;  
>> HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, 
>> unsigned);  int aarch64_get_condition_code (rtx);  bool 
>> aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode);
>> +unsigned HOST_WIDE_INT aarch64_and_split_imm1 (HOST_WIDE_INT 
>> +val_in); unsigned HOST_WIDE_INT aarch64_and_split_imm2 
>> +(HOST_WIDE_INT val_in); bool aarch64_and_bitmask_imm (unsigned 
>> +HOST_WIDE_INT val_in, machine_mode mode);
>>  int aarch64_branch_cost (bool, bool);  enum aarch64_symbol_type 
>> aarch64_classify_symbolic_expression (rtx);  bool 
>> aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT); diff --git 
>> a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 
>> 3e663eb..8e33c40 100644
>> --- a/gcc/config/aarch64/aarch64.c
>> +++ b/gcc/config/aarch64/aarch64.c
>> @@ -3600,6 +3600,43 @@ aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
>>    return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26];  
>> }
>>
>> +/* Create mask of ones, covering the lowest to highest bits set in 
>> +VAL_IN.  */
>> +
>> +unsigned HOST_WIDE_INT
>> +aarch64_and_split_imm1 (HOST_WIDE_INT val_in) {
>> +  int lowest_bit_set = ctz_hwi (val_in);
>> +  int highest_bit_set = floor_log2 (val_in);
>> +  gcc_assert (val_in != 0);
>
> The comment above the function should make this precondition clear. Or 
> you could pick a well defined behaviour for when val_in == 0 (return 
> 0?), if that fits the rest of the algorithm.
>
> Otherwise, this patch looks OK to me.
>
> Thanks,
> James
>

This patch (r242739) causes a regression on aarch64:

  gcc.dg/zero_bits_compound-2.c scan-assembler-times \\(and: 2 now fails.

Christophe.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [Aarch64][PATCH] Improve Logical And Immediate Expressions
  2016-11-23 16:30         ` Michael Collison
@ 2016-11-23 18:43           ` Christophe Lyon
  0 siblings, 0 replies; 7+ messages in thread
From: Christophe Lyon @ 2016-11-23 18:43 UTC (permalink / raw)
  To: Michael Collison; +Cc: James Greenhalgh, gcc-patches, nd

On 23 November 2016 at 17:30, Michael Collison <Michael.Collison@arm.com> wrote:
> Hi Christophe,
>
> This is not a regression per se; the patch causes the test case to generate one less instruction overall, but one additional 'and'. Trunk before the patch (-O2):
>
> foo:
>         and     w0, w0, 255
>         lsl     w1, w0, 20
>         orr     w0, w1, w0, lsl 8
>         mov     w1, 65024
>         movk    w1, 0xfe0, lsl 16
>         and     w0, w0, w1
>         ret
>
> The new trunk aarch64 compiler with the patch generates fewer instructions but one additional 'and'
>
> foo:
>         and     w0, w0, 255
>         lsl     w1, w0, 20
>         orr     w0, w1, w0, lsl 8
>         and     x0, x0, 268434944
>         and     x0, x0, -2031617
>         ret
>

OK,
by "regression" I meant that a test used to pass and now fails.
It looks like you have to update it to take into account the new, better code.

Christophe

> -----Original Message-----
> From: Christophe Lyon [mailto:christophe.lyon@linaro.org]
> Sent: Wednesday, November 23, 2016 6:42 AM
> To: James Greenhalgh
> Cc: Michael Collison; gcc-patches@gcc.gnu.org; nd
> Subject: Re: [Aarch64][PATCH] Improve Logical And Immediate Expressions
>
> Hi Michael,
>
>
> On 21 November 2016 at 10:52, James Greenhalgh <james.greenhalgh@arm.com> wrote:
>> On Fri, Nov 18, 2016 at 07:41:58AM +0000, Michael Collison wrote:
>>> James,
>>>
>>> I incorporated all your suggestions, and successfully bootstrapped
>>> and re-ran the testsuite.
>>>
>>> Okay for trunk?
>>>
>>> 2016-11-18  Michael Collison  <michael.collison@arm.com>
>>>
>>>       * config/aarch64/aarch64-protos.h
>>>       (aarch64_and_split_imm1, aarch64_and_split_imm2)
>>>       (aarch64_and_bitmask_imm): New prototypes
>>>       * config/aarch64/aarch64.c (aarch64_and_split_imm1):
>>>       New overloaded function to create bit mask covering the
>>>       lowest to highest bits set.
>>>       (aarch64_and_split_imm2): New overloaded functions to create bit
>>>       mask of zeros between first and last bit set.
>>>       (aarch64_and_bitmask_imm): New function to determine if a integer
>>>       is a valid two instruction "and" operation.
>>>       * config/aarch64/aarch64.md:(and<mode>3): New define_insn and _split
>>>       allowing wider range of constants with "and" operations.
>>>       * (ior<mode>3, xor<mode>3): Use new LOGICAL2 iterator to prevent
>>>       "and" operator from matching restricted constant range used for
>>>       ior and xor operators.
>>>       * config/aarch64/constraints.md (UsO constraint): New SImode constraint
>>>       for constants in "and" operantions.
>>>       (UsP constraint): New DImode constraint for constants in "and" operations.
>>>       * config/aarch64/iterators.md (lconst2): New mode iterator.
>>>       (LOGICAL2): New code iterator.
>>>       * config/aarch64/predicates.md (aarch64_logical_and_immediate): New
>>>       predicate
>>>       (aarch64_logical_and_operand): New predicate allowing extended constants
>>>       for "and" operations.
>>>       * testsuite/gcc.target/aarch64/and_const.c: New test to verify
>>>       additional constants are recognized and fewer instructions generated.
>>>       * testsuite/gcc.target/aarch64/and_const2.c: New test to verify
>>>       additional constants are recognized and fewer instructions generated.
>>>
>>
>>> diff --git a/gcc/config/aarch64/aarch64-protos.h
>>> b/gcc/config/aarch64/aarch64-protos.h
>>> index 3cdd69b..7ef8cdf 100644
>>> --- a/gcc/config/aarch64/aarch64-protos.h
>>> +++ b/gcc/config/aarch64/aarch64-protos.h
>>> @@ -300,6 +300,9 @@ extern struct tune_params aarch64_tune_params;
>>> HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned,
>>> unsigned);  int aarch64_get_condition_code (rtx);  bool
>>> aarch64_bitmask_imm (HOST_WIDE_INT val, machine_mode);
>>> +unsigned HOST_WIDE_INT aarch64_and_split_imm1 (HOST_WIDE_INT
>>> +val_in); unsigned HOST_WIDE_INT aarch64_and_split_imm2
>>> +(HOST_WIDE_INT val_in); bool aarch64_and_bitmask_imm (unsigned
>>> +HOST_WIDE_INT val_in, machine_mode mode);
>>>  int aarch64_branch_cost (bool, bool);  enum aarch64_symbol_type
>>> aarch64_classify_symbolic_expression (rtx);  bool
>>> aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT); diff --git
>>> a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index
>>> 3e663eb..8e33c40 100644
>>> --- a/gcc/config/aarch64/aarch64.c
>>> +++ b/gcc/config/aarch64/aarch64.c
>>> @@ -3600,6 +3600,43 @@ aarch64_bitmask_imm (HOST_WIDE_INT val_in, machine_mode mode)
>>>    return val == mask * bitmask_imm_mul[__builtin_clz (bits) - 26];
>>> }
>>>
>>> +/* Create mask of ones, covering the lowest to highest bits set in
>>> +VAL_IN.  */
>>> +
>>> +unsigned HOST_WIDE_INT
>>> +aarch64_and_split_imm1 (HOST_WIDE_INT val_in) {
>>> +  int lowest_bit_set = ctz_hwi (val_in);
>>> +  int highest_bit_set = floor_log2 (val_in);
>>> +  gcc_assert (val_in != 0);
>>
>> The comment above the function should make this precondition clear. Or
>> you could pick a well defined behaviour for when val_in == 0 (return
>> 0?), if that fits the rest of the algorithm.
>>
>> Otherwise, this patch looks OK to me.
>>
>> Thanks,
>> James
>>
>
> This patch (r242739) causes a regression on aarch64:
>
>   gcc.dg/zero_bits_compound-2.c scan-assembler-times \\(and: 2 now fails.
>
> Christophe.

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2016-11-23 18:43 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-10-27 20:44 [Aarch64][PATCH] Improve Logical And Immediate Expressions Michael Collison
2016-11-17 16:26 ` James Greenhalgh
2016-11-18  7:42   ` Michael Collison
2016-11-21  9:52     ` James Greenhalgh
2016-11-23 13:42       ` Christophe Lyon
2016-11-23 16:30         ` Michael Collison
2016-11-23 18:43           ` Christophe Lyon

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).