public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH][AArch64] Add BIC-imm and ORR-imm SIMD pattern
@ 2017-04-18 16:51 Sudi Das
  2017-05-05 13:38 ` Richard Earnshaw (lists)
  0 siblings, 1 reply; 12+ messages in thread
From: Sudi Das @ 2017-04-18 16:51 UTC (permalink / raw)
  To: gcc-patches; +Cc: nd, Marcus Shawcroft, Richard Earnshaw, James Greenhalgh

[-- Attachment #1: Type: text/plain, Size: 1606 bytes --]


Hello all

This patch adds the support for BIC (vector, immediate) and ORR (vector, immediate) SIMD patterns to the AArch64 backend.
One of the examples of this is : (with -O2 -ftree-vectorize)

void
bic_s (short *a)
{
  for (int i = 0; i < 1024; i++)
    a[i] &= ~(0xff);
}

which now produces :
bic_s:
	add	x1, x0, 2048
	.p2align 2
.L2:
	ldr	q0, [x0]
	bic	v0.8h, #255
	str	q0, [x0], 16
	cmp	x1, x0
	bne	.L2
	ret

instead of
bic_s:
	movi	v1.8h, 0xff, lsl 8
	add	x1, x0, 2048
	.p2align 2
.L2:
	ldr	q0, [x0]
	and	v0.16b, v0.16b, v1.16b
	str	q0, [x0], 16
	cmp	x1, x0
	bne	.L2
	ret

Added new tests and checked for regressions on bootstrapped aarch64-none-linux-gnu
Ok for stage 1?

Thanks 
Sudi

2017-04-04 Sudakshina Das  <sudi.das@arm.com>

	* config/aarch64/aarch64-protos.h (enum simd_immediate_check): New check type
	for aarch64_simd_valid_immediate.
	(aarch64_output_simd_general_immediate): New declaration.
	(aarch64_simd_valid_immediate): Update prototype.

	* config/aarch64/aarch64-simd.md (*bic_imm_<mode>3): New pattern.
	(*ior_imm_<mode>3): Likewise.

	* config/aarch64/aarch64.c (aarch64_simd_valid_immediate): Function now checks
	for valid immediate for BIC and ORR based on new enum argument.
	(aarch64_output_simd_general_immediate): New function to output new BIC/ORR.
 
	* config/aarch64/predicates.md (aarch64_simd_valid_bic_imm_p) : New.
	(aarch64_simd_valid_orr_imm_p) : Likewise.

2017-04-04 Sudakshina Das  <sudi.das@arm.com>

	* gcc.target/aarch64/bic_imm_1.c: New test.
	* gcc.target/aarch64/orr_imm_1.c: Likewise.

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: patch-7260-2.diff --]
[-- Type: text/x-patch; name="patch-7260-2.diff", Size: 11730 bytes --]

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 9543f8c..89cc455 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -297,6 +297,15 @@ enum aarch64_parse_opt_result
   AARCH64_PARSE_INVALID_ARG		/* Invalid arch, tune, cpu arg.  */
 };
 
+/* Enum to distinguish which type of check is to be done in
+   aarch64_simd_valid_immediate.  This is used as a bitmask where CHECK_ALL
+   has both bits set.  Adding new types would require changes accordingly.  */
+enum simd_immediate_check {
+  CHECK_I   = 1,	/* Perform only non-inverted immediate checks (ORR).  */
+  CHECK_NI  = 2,	/* Perform only inverted immediate checks (BIC).  */
+  CHECK_ALL = 3		/* Perform all checks (MOVI/MNVI).  */
+};
+
 extern struct tune_params aarch64_tune_params;
 
 HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
@@ -334,6 +343,8 @@ rtx aarch64_reverse_mask (enum machine_mode);
 bool aarch64_offset_7bit_signed_scaled_p (machine_mode, HOST_WIDE_INT);
 char *aarch64_output_scalar_simd_mov_immediate (rtx, machine_mode);
 char *aarch64_output_simd_mov_immediate (rtx, machine_mode, unsigned);
+char *aarch64_output_simd_general_immediate (rtx, machine_mode, unsigned,
+					     const char*);
 bool aarch64_pad_arg_upward (machine_mode, const_tree);
 bool aarch64_pad_reg_upward (machine_mode, const_tree, bool);
 bool aarch64_regno_ok_for_base_p (int, bool);
@@ -345,7 +356,8 @@ bool aarch64_simd_imm_zero_p (rtx, machine_mode);
 bool aarch64_simd_scalar_immediate_valid_for_move (rtx, machine_mode);
 bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool);
 bool aarch64_simd_valid_immediate (rtx, machine_mode, bool,
-				   struct simd_immediate_info *);
+				   struct simd_immediate_info *,
+				   enum simd_immediate_check w = CHECK_ALL);
 bool aarch64_split_dimode_const_store (rtx, rtx);
 bool aarch64_symbolic_address_p (rtx);
 bool aarch64_uimm12_shift (HOST_WIDE_INT);
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index c462164..92275dc 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -280,6 +280,26 @@
   [(set_attr "type" "neon_logic<q>")]
 )
 
+(define_insn "*bic_imm_<mode>3"
+ [(set (match_operand:VDQ_I 0 "register_operand" "=w")
+       (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "0")
+		(match_operand:VDQ_I 2 "aarch64_simd_valid_bic_imm_p" "")))]
+ "TARGET_SIMD"
+ { return aarch64_output_simd_general_immediate (operands[2],
+			<MODE>mode, GET_MODE_BITSIZE (<MODE>mode), "bic"); }
+  [(set_attr "type" "neon_logic<q>")]
+)
+
+(define_insn "*ior_imm_<mode>3"
+ [(set (match_operand:VDQ_I 0 "register_operand" "=w")
+       (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "0")
+		(match_operand:VDQ_I 2 "aarch64_simd_valid_orr_imm_p" "")))]
+ "TARGET_SIMD"
+ { return aarch64_output_simd_general_immediate (operands[2],
+			<MODE>mode, GET_MODE_BITSIZE (<MODE>mode), "orr"); }
+  [(set_attr "type" "neon_logic<q>")]
+)
+
 (define_insn "add<mode>3"
   [(set (match_operand:VDQ_I 0 "register_operand" "=w")
         (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 4f769a4..450c42d 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -11066,7 +11066,8 @@ aarch64_vect_float_const_representable_p (rtx x)
 /* Return true for valid and false for invalid.  */
 bool
 aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
-			      struct simd_immediate_info *info)
+			      struct simd_immediate_info *info,
+			      enum simd_immediate_check which)
 {
 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)	\
   matches = 1;						\
@@ -11130,54 +11131,65 @@ aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
 
   do
     {
-      CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
-	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
+      if (which & CHECK_I)
+	{
+	  CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
+		 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
 
-      CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
-	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
+	  CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
+		 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
 
-      CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
-	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
+	  CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
+		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
 
-      CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
-	     && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
+	  CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
+		 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
 
-      CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
+	  CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
 
-      CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
+	  CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
+	}
 
-      CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
-	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
+      if (which & CHECK_NI)
+	{
+	  CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
+		 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
 
-      CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
-	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
+	  CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
+		 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
 
-      CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
-	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
+	  CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
+		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
 
-      CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
-	     && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
+	  CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
+		 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
 
-      CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
+	  CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
 
-      CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
+	  CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
+	}
 
-      CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
-	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
+      /* Shifting ones / 8-bit / 64-bit variants only checked
+	 for 'ALL' (MOVI/MVNI).  */
+      if (which == CHECK_ALL)
+	{
+	  CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
+		 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
 
-      CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
-	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
+	  CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
+		 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
 
-      CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
-	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
+	  CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
+		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
 
-      CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
-	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
+	  CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
+		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
 
-      CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
+	  CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
 
-      CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
-	     && bytes[i] == bytes[(i + 8) % idx], 0, 0);
+	  CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
+		 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
+	}
     }
   while (0);
 
@@ -12598,6 +12610,47 @@ aarch64_output_simd_mov_immediate (rtx const_vector,
   return templ;
 }
 
+/* This function is similar to aarch64_output_simd_mov_immediate, used for
+   immediate versions of 'bic' or 'orr'.  */
+char*
+aarch64_output_simd_general_immediate (rtx const_vector,
+				       machine_mode mode,
+				       unsigned width,
+				       const char *mnemonic)
+{
+  bool is_valid;
+  static char templ[40];
+  unsigned int lane_count = 0;
+  char element_char;
+
+  struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
+
+  if (strcmp (mnemonic, "orr") == 0)
+    is_valid = aarch64_simd_valid_immediate (const_vector, mode, false,
+					     &info, CHECK_I);
+  else
+    is_valid = aarch64_simd_valid_immediate (const_vector, mode, false,
+					     &info, CHECK_NI);
+
+  gcc_assert (is_valid);
+  gcc_assert (CONST_INT_P (info.value));
+
+  element_char = sizetochar (info.element_width);
+  lane_count = width / info.element_width;
+
+  if (lane_count == 1)
+    sprintf (templ, "%s\t%%d0, #" HOST_WIDE_INT_PRINT_DEC,
+	      mnemonic, UINTVAL (info.value));
+  else if (info.shift)
+    sprintf (templ, "%s\t%%0.%d%c, #" HOST_WIDE_INT_PRINT_DEC
+	      ", %s #%d", mnemonic, lane_count, element_char,
+	      UINTVAL (info.value), "lsl", info.shift);
+  else
+    sprintf (templ, "%s\t%%0.%d%c, #" HOST_WIDE_INT_PRINT_DEC,
+	      mnemonic, lane_count, element_char, UINTVAL (info.value));
+  return templ;
+}
+
 char*
 aarch64_output_scalar_simd_mov_immediate (rtx immediate,
 					  machine_mode mode)
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index e83d45b..fe65f2b 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -307,6 +307,18 @@
   return aarch64_simd_shift_imm_p (op, mode, false);
 })
 
+(define_special_predicate "aarch64_simd_valid_bic_imm_p"
+  (match_code "const_vector")
+{
+  return aarch64_simd_valid_immediate (op, mode, false, NULL, CHECK_NI);
+})
+
+(define_special_predicate "aarch64_simd_valid_orr_imm_p"
+  (match_code "const_vector")
+{
+  return aarch64_simd_valid_immediate (op, mode, false, NULL, CHECK_I);
+})
+
 (define_predicate "aarch64_simd_reg_or_zero"
   (and (match_code "reg,subreg,const_int,const_double,const_vector")
        (ior (match_operand 0 "register_operand")
diff --git a/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c b/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c
new file mode 100644
index 0000000..d94dd90
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c
@@ -0,0 +1,26 @@
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+void
+bic_s (short *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] &= ~(0xff);
+}
+
+void
+bic_ss (short *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] &= ~(0xff00);
+}
+
+void
+bic_int (int *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] &= ~(0xff);
+}
+
+/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.8h, #255" } } */
+/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.8h, #255, lsl #8" } } */
+/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.4s, #255" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c b/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c
new file mode 100644
index 0000000..919a6ef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c
@@ -0,0 +1,18 @@
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+void
+orr_s (short *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] |= 0xab;
+}
+
+void
+orr_int (int *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] |= 0xab;
+}
+
+/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.8h, #171" } } */
+/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.4s, #171" } } */

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH][AArch64] Add BIC-imm and ORR-imm SIMD pattern
  2017-04-18 16:51 [PATCH][AArch64] Add BIC-imm and ORR-imm SIMD pattern Sudi Das
@ 2017-05-05 13:38 ` Richard Earnshaw (lists)
  2017-08-07 13:56   ` Sudi Das
  0 siblings, 1 reply; 12+ messages in thread
From: Richard Earnshaw (lists) @ 2017-05-05 13:38 UTC (permalink / raw)
  To: Sudi Das, gcc-patches; +Cc: nd, Marcus Shawcroft, James Greenhalgh

On 18/04/17 17:39, Sudi Das wrote:
> 
> Hello all
> 
> This patch adds the support for BIC (vector, immediate) and ORR (vector, immediate) SIMD patterns to the AArch64 backend.
> One of the examples of this is : (with -O2 -ftree-vectorize)
> 
> void
> bic_s (short *a)
> {
>   for (int i = 0; i < 1024; i++)
>     a[i] &= ~(0xff);
> }
> 
> which now produces :
> bic_s:
> 	add	x1, x0, 2048
> 	.p2align 2
> .L2:
> 	ldr	q0, [x0]
> 	bic	v0.8h, #255
> 	str	q0, [x0], 16
> 	cmp	x1, x0
> 	bne	.L2
> 	ret
> 
> instead of
> bic_s:
> 	movi	v1.8h, 0xff, lsl 8
> 	add	x1, x0, 2048
> 	.p2align 2
> .L2:
> 	ldr	q0, [x0]
> 	and	v0.16b, v0.16b, v1.16b
> 	str	q0, [x0], 16
> 	cmp	x1, x0
> 	bne	.L2
> 	ret
> 
> Added new tests and checked for regressions on bootstrapped aarch64-none-linux-gnu
> Ok for stage 1?
> 
> Thanks 
> Sudi
> 
> 2017-04-04 Sudakshina Das  <sudi.das@arm.com>
> 
> 	* config/aarch64/aarch64-protos.h (enum simd_immediate_check): New check type
> 	for aarch64_simd_valid_immediate.
> 	(aarch64_output_simd_general_immediate): New declaration.
> 	(aarch64_simd_valid_immediate): Update prototype.
> 
> 	* config/aarch64/aarch64-simd.md (*bic_imm_<mode>3): New pattern.
> 	(*ior_imm_<mode>3): Likewise.
> 
> 	* config/aarch64/aarch64.c (aarch64_simd_valid_immediate): Function now checks
> 	for valid immediate for BIC and ORR based on new enum argument.
> 	(aarch64_output_simd_general_immediate): New function to output new BIC/ORR.
>  
> 	* config/aarch64/predicates.md (aarch64_simd_valid_bic_imm_p) : New.
> 	(aarch64_simd_valid_orr_imm_p) : Likewise.
> 
> 2017-04-04 Sudakshina Das  <sudi.das@arm.com>
> 
> 	* gcc.target/aarch64/bic_imm_1.c: New test.
> 	* gcc.target/aarch64/orr_imm_1.c: Likewise.
> 
> 
> patch-7260-2.diff
> 
> 
> diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
> index 9543f8c..89cc455 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -297,6 +297,15 @@ enum aarch64_parse_opt_result
>    AARCH64_PARSE_INVALID_ARG		/* Invalid arch, tune, cpu arg.  */
>  };
>  
> +/* Enum to distinguish which type of check is to be done in
> +   aarch64_simd_valid_immediate.  This is used as a bitmask where CHECK_ALL
> +   has both bits set.  Adding new types would require changes accordingly.  */
> +enum simd_immediate_check {
> +  CHECK_I   = 1,	/* Perform only non-inverted immediate checks (ORR).  */
> +  CHECK_NI  = 2,	/* Perform only inverted immediate checks (BIC).  */
> +  CHECK_ALL = 3		/* Perform all checks (MOVI/MNVI).  */
> +};
> +
>  extern struct tune_params aarch64_tune_params;
>  
>  HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
> @@ -334,6 +343,8 @@ rtx aarch64_reverse_mask (enum machine_mode);
>  bool aarch64_offset_7bit_signed_scaled_p (machine_mode, HOST_WIDE_INT);
>  char *aarch64_output_scalar_simd_mov_immediate (rtx, machine_mode);
>  char *aarch64_output_simd_mov_immediate (rtx, machine_mode, unsigned);
> +char *aarch64_output_simd_general_immediate (rtx, machine_mode, unsigned,
> +					     const char*);
>  bool aarch64_pad_arg_upward (machine_mode, const_tree);
>  bool aarch64_pad_reg_upward (machine_mode, const_tree, bool);
>  bool aarch64_regno_ok_for_base_p (int, bool);
> @@ -345,7 +356,8 @@ bool aarch64_simd_imm_zero_p (rtx, machine_mode);
>  bool aarch64_simd_scalar_immediate_valid_for_move (rtx, machine_mode);
>  bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool);
>  bool aarch64_simd_valid_immediate (rtx, machine_mode, bool,
> -				   struct simd_immediate_info *);
> +				   struct simd_immediate_info *,
> +				   enum simd_immediate_check w = CHECK_ALL);
>  bool aarch64_split_dimode_const_store (rtx, rtx);
>  bool aarch64_symbolic_address_p (rtx);
>  bool aarch64_uimm12_shift (HOST_WIDE_INT);
> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
> index c462164..92275dc 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -280,6 +280,26 @@
>    [(set_attr "type" "neon_logic<q>")]
>  )
>  
> +(define_insn "*bic_imm_<mode>3"
> + [(set (match_operand:VDQ_I 0 "register_operand" "=w")
> +       (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "0")
> +		(match_operand:VDQ_I 2 "aarch64_simd_valid_bic_imm_p" "")))]
> + "TARGET_SIMD"
> + { return aarch64_output_simd_general_immediate (operands[2],
> +			<MODE>mode, GET_MODE_BITSIZE (<MODE>mode), "bic"); }
> +  [(set_attr "type" "neon_logic<q>")]
> +)
> +
> +(define_insn "*ior_imm_<mode>3"
> + [(set (match_operand:VDQ_I 0 "register_operand" "=w")
> +       (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "0")
> +		(match_operand:VDQ_I 2 "aarch64_simd_valid_orr_imm_p" "")))]
> + "TARGET_SIMD"
> + { return aarch64_output_simd_general_immediate (operands[2],
> +			<MODE>mode, GET_MODE_BITSIZE (<MODE>mode), "orr"); }
> +  [(set_attr "type" "neon_logic<q>")]
> +)

Both of these generate the same RTL constructs as the simd ior<mode>3
and and<mode>3 patterns, so should be merged as subcases of those
patterns (and handled with suitable constraint alternatives).

R.

> +
>  (define_insn "add<mode>3"
>    [(set (match_operand:VDQ_I 0 "register_operand" "=w")
>          (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 4f769a4..450c42d 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -11066,7 +11066,8 @@ aarch64_vect_float_const_representable_p (rtx x)
>  /* Return true for valid and false for invalid.  */
>  bool
>  aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
> -			      struct simd_immediate_info *info)
> +			      struct simd_immediate_info *info,
> +			      enum simd_immediate_check which)
>  {
>  #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)	\
>    matches = 1;						\
> @@ -11130,54 +11131,65 @@ aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
>  
>    do
>      {
> -      CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
> -	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
> +      if (which & CHECK_I)
> +	{
> +	  CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
> +		 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
>  
> -      CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
> -	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
> +	  CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
> +		 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
>  
> -      CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
> -	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
> +	  CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
> +		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
>  
> -      CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
> -	     && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
> +	  CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
> +		 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
>  
> -      CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
> +	  CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
>  
> -      CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
> +	  CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
> +	}
>  
> -      CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
> -	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
> +      if (which & CHECK_NI)
> +	{
> +	  CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
> +		 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
>  
> -      CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
> -	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
> +	  CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
> +		 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
>  
> -      CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
> -	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
> +	  CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
> +		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
>  
> -      CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
> -	     && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
> +	  CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
> +		 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
>  
> -      CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
> +	  CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
>  
> -      CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
> +	  CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
> +	}
>  
> -      CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
> -	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
> +      /* Shifting ones / 8-bit / 64-bit variants only checked
> +	 for 'ALL' (MOVI/MVNI).  */
> +      if (which == CHECK_ALL)
> +	{
> +	  CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
> +		 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
>  
> -      CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
> -	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
> +	  CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
> +		 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
>  
> -      CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
> -	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
> +	  CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
> +		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
>  
> -      CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
> -	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
> +	  CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
> +		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
>  
> -      CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
> +	  CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
>  
> -      CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
> -	     && bytes[i] == bytes[(i + 8) % idx], 0, 0);
> +	  CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
> +		 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
> +	}
>      }
>    while (0);
>  
> @@ -12598,6 +12610,47 @@ aarch64_output_simd_mov_immediate (rtx const_vector,
>    return templ;
>  }
>  
> +/* This function is similar to aarch64_output_simd_mov_immediate, used for
> +   immediate versions of 'bic' or 'orr'.  */
> +char*
> +aarch64_output_simd_general_immediate (rtx const_vector,
> +				       machine_mode mode,
> +				       unsigned width,
> +				       const char *mnemonic)
> +{
> +  bool is_valid;
> +  static char templ[40];
> +  unsigned int lane_count = 0;
> +  char element_char;
> +
> +  struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
> +
> +  if (strcmp (mnemonic, "orr") == 0)
> +    is_valid = aarch64_simd_valid_immediate (const_vector, mode, false,
> +					     &info, CHECK_I);
> +  else
> +    is_valid = aarch64_simd_valid_immediate (const_vector, mode, false,
> +					     &info, CHECK_NI);
> +
> +  gcc_assert (is_valid);
> +  gcc_assert (CONST_INT_P (info.value));
> +
> +  element_char = sizetochar (info.element_width);
> +  lane_count = width / info.element_width;
> +
> +  if (lane_count == 1)
> +    sprintf (templ, "%s\t%%d0, #" HOST_WIDE_INT_PRINT_DEC,
> +	      mnemonic, UINTVAL (info.value));
> +  else if (info.shift)
> +    sprintf (templ, "%s\t%%0.%d%c, #" HOST_WIDE_INT_PRINT_DEC
> +	      ", %s #%d", mnemonic, lane_count, element_char,
> +	      UINTVAL (info.value), "lsl", info.shift);
> +  else
> +    sprintf (templ, "%s\t%%0.%d%c, #" HOST_WIDE_INT_PRINT_DEC,
> +	      mnemonic, lane_count, element_char, UINTVAL (info.value));
> +  return templ;
> +}
> +
>  char*
>  aarch64_output_scalar_simd_mov_immediate (rtx immediate,
>  					  machine_mode mode)
> diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
> index e83d45b..fe65f2b 100644
> --- a/gcc/config/aarch64/predicates.md
> +++ b/gcc/config/aarch64/predicates.md
> @@ -307,6 +307,18 @@
>    return aarch64_simd_shift_imm_p (op, mode, false);
>  })
>  
> +(define_special_predicate "aarch64_simd_valid_bic_imm_p"
> +  (match_code "const_vector")
> +{
> +  return aarch64_simd_valid_immediate (op, mode, false, NULL, CHECK_NI);
> +})
> +
> +(define_special_predicate "aarch64_simd_valid_orr_imm_p"
> +  (match_code "const_vector")
> +{
> +  return aarch64_simd_valid_immediate (op, mode, false, NULL, CHECK_I);
> +})
> +
>  (define_predicate "aarch64_simd_reg_or_zero"
>    (and (match_code "reg,subreg,const_int,const_double,const_vector")
>         (ior (match_operand 0 "register_operand")
> diff --git a/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c b/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c
> new file mode 100644
> index 0000000..d94dd90
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c
> @@ -0,0 +1,26 @@
> +/* { dg-options "-O2 -ftree-vectorize" } */
> +
> +void
> +bic_s (short *a)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    a[i] &= ~(0xff);
> +}
> +
> +void
> +bic_ss (short *a)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    a[i] &= ~(0xff00);
> +}
> +
> +void
> +bic_int (int *a)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    a[i] &= ~(0xff);
> +}
> +
> +/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.8h, #255" } } */
> +/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.8h, #255, lsl #8" } } */
> +/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.4s, #255" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c b/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c
> new file mode 100644
> index 0000000..919a6ef
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c
> @@ -0,0 +1,18 @@
> +/* { dg-options "-O2 -ftree-vectorize" } */
> +
> +void
> +orr_s (short *a)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    a[i] |= 0xab;
> +}
> +
> +void
> +orr_int (int *a)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    a[i] |= 0xab;
> +}
> +
> +/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.8h, #171" } } */
> +/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.4s, #171" } } */
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH][AArch64] Add BIC-imm and ORR-imm SIMD pattern
  2017-05-05 13:38 ` Richard Earnshaw (lists)
@ 2017-08-07 13:56   ` Sudi Das
  2017-09-20 10:39     ` James Greenhalgh
  0 siblings, 1 reply; 12+ messages in thread
From: Sudi Das @ 2017-08-07 13:56 UTC (permalink / raw)
  To: Richard Earnshaw, gcc-patches; +Cc: nd, Marcus Shawcroft, James Greenhalgh

[-- Attachment #1: Type: text/plain, Size: 17000 bytes --]


Hi Richard

I have updated the patch according to your comments. Thanks for pointing it out and sorry for the delay!

Sudi

2017-08-07 Sudakshina Das  <sudi.das@arm.com>

	* config/aarch64/aarch64-protos.h (enum simd_immediate_check): New check type
	for aarch64_simd_valid_immediate.
	(aarch64_output_simd_general_immediate): New declaration.
	(aarch64_simd_valid_immediate): Update prototype.

	* config/aarch64/aarch64-simd.md (orr<mode>3): modified pattern to add
	support for ORR-immediate.
	(and<mode>3): modified pattern to add support for BIC-immediate.

	* config/aarch64/aarch64.c (aarch64_simd_valid_immediate): Function now checks
	for valid immediate for BIC and ORR based on new enum argument.
	(aarch64_output_simd_general_immediate): New function to output new BIC/ORR.
 
	* config/aarch64/constraints.md (Do): New vector immediate constraint.
	(Db): Likewise.

2017-08-07 Sudakshina Das  <sudi.das@arm.com>

	* gcc.target/aarch64/bic_imm_1.c: New test.
	* gcc.target/aarch64/orr_imm_1.c: Likewise.





From: Richard Earnshaw (lists) <Richard.Earnshaw@arm.com>
Sent: Friday, May 5, 2017 2:30 PM
To: Sudi Das; gcc-patches@gcc.gnu.org
Cc: nd; Marcus Shawcroft; James Greenhalgh
Subject: Re: [PATCH][AArch64] Add BIC-imm and ORR-imm SIMD pattern
    
On 18/04/17 17:39, Sudi Das wrote:
> 
> Hello all
> 
> This patch adds the support for BIC (vector, immediate) and ORR (vector, immediate) SIMD patterns to the AArch64 backend.
> One of the examples of this is : (with -O2 -ftree-vectorize)
> 
> void
> bic_s (short *a)
> {
>   for (int i = 0; i < 1024; i++)
>     a[i] &= ~(0xff);
> }
> 
> which now produces :
> bic_s:
>        add     x1, x0, 2048
>        .p2align 2
> .L2:
>        ldr     q0, [x0]
>        bic     v0.8h, #255
>        str     q0, [x0], 16
>        cmp     x1, x0
>        bne     .L2
>        ret
> 
> instead of
> bic_s:
>        movi    v1.8h, 0xff, lsl 8
>        add     x1, x0, 2048
>        .p2align 2
> .L2:
>        ldr     q0, [x0]
>        and     v0.16b, v0.16b, v1.16b
>        str     q0, [x0], 16
>        cmp     x1, x0
>        bne     .L2
>        ret
> 
> Added new tests and checked for regressions on bootstrapped aarch64-none-linux-gnu
> Ok for stage 1?
> 
> Thanks 
> Sudi
> 
> 2017-04-04 Sudakshina Das  <sudi.das@arm.com>
> 
>        * config/aarch64/aarch64-protos.h (enum simd_immediate_check): New check type
>        for aarch64_simd_valid_immediate.
>        (aarch64_output_simd_general_immediate): New declaration.
>        (aarch64_simd_valid_immediate): Update prototype.
> 
>        * config/aarch64/aarch64-simd.md (*bic_imm_<mode>3): New pattern.
>        (*ior_imm_<mode>3): Likewise.
> 
>        * config/aarch64/aarch64.c (aarch64_simd_valid_immediate): Function now checks
>        for valid immediate for BIC and ORR based on new enum argument.
>        (aarch64_output_simd_general_immediate): New function to output new BIC/ORR.
>  
>        * config/aarch64/predicates.md (aarch64_simd_valid_bic_imm_p) : New.
>        (aarch64_simd_valid_orr_imm_p) : Likewise.
> 
> 2017-04-04 Sudakshina Das  <sudi.das@arm.com>
> 
>        * gcc.target/aarch64/bic_imm_1.c: New test.
>        * gcc.target/aarch64/orr_imm_1.c: Likewise.
> 
> 
> patch-7260-2.diff
> 
> 
> diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
> index 9543f8c..89cc455 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -297,6 +297,15 @@ enum aarch64_parse_opt_result
>    AARCH64_PARSE_INVALID_ARG          /* Invalid arch, tune, cpu arg.  */
>  };
>  
> +/* Enum to distinguish which type of check is to be done in
> +   aarch64_simd_valid_immediate.  This is used as a bitmask where CHECK_ALL
> +   has both bits set.  Adding new types would require changes accordingly.  */
> +enum simd_immediate_check {
> +  CHECK_I   = 1,     /* Perform only non-inverted immediate checks (ORR).  */
> +  CHECK_NI  = 2,     /* Perform only inverted immediate checks (BIC).  */
> +  CHECK_ALL = 3              /* Perform all checks (MOVI/MNVI).  */
> +};
> +
>  extern struct tune_params aarch64_tune_params;
>  
>  HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
> @@ -334,6 +343,8 @@ rtx aarch64_reverse_mask (enum machine_mode);
>  bool aarch64_offset_7bit_signed_scaled_p (machine_mode, HOST_WIDE_INT);
>  char *aarch64_output_scalar_simd_mov_immediate (rtx, machine_mode);
>  char *aarch64_output_simd_mov_immediate (rtx, machine_mode, unsigned);
> +char *aarch64_output_simd_general_immediate (rtx, machine_mode, unsigned,
> +                                          const char*);
>  bool aarch64_pad_arg_upward (machine_mode, const_tree);
>  bool aarch64_pad_reg_upward (machine_mode, const_tree, bool);
>  bool aarch64_regno_ok_for_base_p (int, bool);
> @@ -345,7 +356,8 @@ bool aarch64_simd_imm_zero_p (rtx, machine_mode);
>  bool aarch64_simd_scalar_immediate_valid_for_move (rtx, machine_mode);
>  bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool);
>  bool aarch64_simd_valid_immediate (rtx, machine_mode, bool,
> -                                struct simd_immediate_info *);
> +                                struct simd_immediate_info *,
> +                                enum simd_immediate_check w = CHECK_ALL);
>  bool aarch64_split_dimode_const_store (rtx, rtx);
>  bool aarch64_symbolic_address_p (rtx);
>  bool aarch64_uimm12_shift (HOST_WIDE_INT);
> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
> index c462164..92275dc 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -280,6 +280,26 @@
>    [(set_attr "type" "neon_logic<q>")]
>  )
>  
> +(define_insn "*bic_imm_<mode>3"
> + [(set (match_operand:VDQ_I 0 "register_operand" "=w")
> +       (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "0")
> +             (match_operand:VDQ_I 2 "aarch64_simd_valid_bic_imm_p" "")))]
> + "TARGET_SIMD"
> + { return aarch64_output_simd_general_immediate (operands[2],
> +                     <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), "bic"); }
> +  [(set_attr "type" "neon_logic<q>")]
> +)
> +
> +(define_insn "*ior_imm_<mode>3"
> + [(set (match_operand:VDQ_I 0 "register_operand" "=w")
> +       (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "0")
> +             (match_operand:VDQ_I 2 "aarch64_simd_valid_orr_imm_p" "")))]
> + "TARGET_SIMD"
> + { return aarch64_output_simd_general_immediate (operands[2],
> +                     <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), "orr"); }
> +  [(set_attr "type" "neon_logic<q>")]
> +)

Both of these generate the same RTL constructs as the simd ior<mode>3
and and<mode>3 patterns, so should be merged as subcases of those
patterns (and handled with suitable constraint alternatives).

R.

> +
>  (define_insn "add<mode>3"
>    [(set (match_operand:VDQ_I 0 "register_operand" "=w")
>          (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 4f769a4..450c42d 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -11066,7 +11066,8 @@ aarch64_vect_float_const_representable_p (rtx x)
>  /* Return true for valid and false for invalid.  */
>  bool
>  aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
> -                           struct simd_immediate_info *info)
> +                           struct simd_immediate_info *info,
> +                           enum simd_immediate_check which)
>  {
>  #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)       \
>    matches = 1;                                               \
> @@ -11130,54 +11131,65 @@ aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
>  
>    do
>      {
> -      CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
> -          && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
> +      if (which & CHECK_I)
> +     {
> +       CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
> +              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
>  
> -      CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
> -          && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
> +       CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
> +              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
>  
> -      CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
> -          && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
> +       CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
> +              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
>  
> -      CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
> -          && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
> +       CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
> +              && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
>  
> -      CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
> +       CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
>  
> -      CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
> +       CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
> +     }
>  
> -      CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
> -          && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
> +      if (which & CHECK_NI)
> +     {
> +       CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
> +              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
>  
> -      CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
> -          && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
> +       CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
> +              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
>  
> -      CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
> -          && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
> +       CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
> +              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
>  
> -      CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
> -          && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
> +       CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
> +              && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
>  
> -      CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
> +       CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
>  
> -      CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
> +       CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
> +     }
>  
> -      CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
> -          && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
> +      /* Shifting ones / 8-bit / 64-bit variants only checked
> +      for 'ALL' (MOVI/MVNI).  */
> +      if (which == CHECK_ALL)
> +     {
> +       CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
> +              && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
>  
> -      CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
> -          && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
> +       CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
> +              && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
>  
> -      CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
> -          && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
> +       CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
> +              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
>  
> -      CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
> -          && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
> +       CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
> +              && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
>  
> -      CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
> +       CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
>  
> -      CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
> -          && bytes[i] == bytes[(i + 8) % idx], 0, 0);
> +       CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
> +              && bytes[i] == bytes[(i + 8) % idx], 0, 0);
> +     }
>      }
>    while (0);
>  
> @@ -12598,6 +12610,47 @@ aarch64_output_simd_mov_immediate (rtx const_vector,
>    return templ;
>  }
>  
> +/* This function is similar to aarch64_output_simd_mov_immediate, used for
> +   immediate versions of 'bic' or 'orr'.  */
> +char*
> +aarch64_output_simd_general_immediate (rtx const_vector,
> +                                    machine_mode mode,
> +                                    unsigned width,
> +                                    const char *mnemonic)
> +{
> +  bool is_valid;
> +  static char templ[40];
> +  unsigned int lane_count = 0;
> +  char element_char;
> +
> +  struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
> +
> +  if (strcmp (mnemonic, "orr") == 0)
> +    is_valid = aarch64_simd_valid_immediate (const_vector, mode, false,
> +                                          &info, CHECK_I);
> +  else
> +    is_valid = aarch64_simd_valid_immediate (const_vector, mode, false,
> +                                          &info, CHECK_NI);
> +
> +  gcc_assert (is_valid);
> +  gcc_assert (CONST_INT_P (info.value));
> +
> +  element_char = sizetochar (info.element_width);
> +  lane_count = width / info.element_width;
> +
> +  if (lane_count == 1)
> +    sprintf (templ, "%s\t%%d0, #" HOST_WIDE_INT_PRINT_DEC,
> +           mnemonic, UINTVAL (info.value));
> +  else if (info.shift)
> +    sprintf (templ, "%s\t%%0.%d%c, #" HOST_WIDE_INT_PRINT_DEC
> +           ", %s #%d", mnemonic, lane_count, element_char,
> +           UINTVAL (info.value), "lsl", info.shift);
> +  else
> +    sprintf (templ, "%s\t%%0.%d%c, #" HOST_WIDE_INT_PRINT_DEC,
> +           mnemonic, lane_count, element_char, UINTVAL (info.value));
> +  return templ;
> +}
> +
>  char*
>  aarch64_output_scalar_simd_mov_immediate (rtx immediate,
>                                          machine_mode mode)
> diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
> index e83d45b..fe65f2b 100644
> --- a/gcc/config/aarch64/predicates.md
> +++ b/gcc/config/aarch64/predicates.md
> @@ -307,6 +307,18 @@
>    return aarch64_simd_shift_imm_p (op, mode, false);
>  })
>  
> +(define_special_predicate "aarch64_simd_valid_bic_imm_p"
> +  (match_code "const_vector")
> +{
> +  return aarch64_simd_valid_immediate (op, mode, false, NULL, CHECK_NI);
> +})
> +
> +(define_special_predicate "aarch64_simd_valid_orr_imm_p"
> +  (match_code "const_vector")
> +{
> +  return aarch64_simd_valid_immediate (op, mode, false, NULL, CHECK_I);
> +})
> +
>  (define_predicate "aarch64_simd_reg_or_zero"
>    (and (match_code "reg,subreg,const_int,const_double,const_vector")
>         (ior (match_operand 0 "register_operand")
> diff --git a/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c b/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c
> new file mode 100644
> index 0000000..d94dd90
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c
> @@ -0,0 +1,26 @@
> +/* { dg-options "-O2 -ftree-vectorize" } */
> +
> +void
> +bic_s (short *a)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    a[i] &= ~(0xff);
> +}
> +
> +void
> +bic_ss (short *a)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    a[i] &= ~(0xff00);
> +}
> +
> +void
> +bic_int (int *a)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    a[i] &= ~(0xff);
> +}
> +
> +/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.8h, #255" } } */
> +/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.8h, #255, lsl #8" } } */
> +/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.4s, #255" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c b/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c
> new file mode 100644
> index 0000000..919a6ef
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c
> @@ -0,0 +1,18 @@
> +/* { dg-options "-O2 -ftree-vectorize" } */
> +
> +void
> +orr_s (short *a)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    a[i] |= 0xab;
> +}
> +
> +void
> +orr_int (int *a)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    a[i] |= 0xab;
> +}
> +
> +/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.8h, #171" } } */
> +/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.4s, #171" } } */
> 

    

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: patch-7260-3.diff --]
[-- Type: text/x-patch; name="patch-7260-3.diff", Size: 12555 bytes --]

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index beff28e..5ae73c7 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -308,6 +308,15 @@ enum aarch64_parse_opt_result
   AARCH64_PARSE_INVALID_ARG		/* Invalid arch, tune, cpu arg.  */
 };
 
+/* Enum to distinguish which type of check is to be done in
+   aarch64_simd_valid_immediate.  This is used as a bitmask where CHECK_ALL
+   has both bits set.  Adding new types would require changes accordingly.  */
+enum simd_immediate_check {
+  CHECK_I   = 1,	/* Perform only non-inverted immediate checks (ORR).  */
+  CHECK_NI  = 2,	/* Perform only inverted immediate checks (BIC).  */
+  CHECK_ALL = 3		/* Perform all checks (MOVI/MNVI).  */
+};
+
 extern struct tune_params aarch64_tune_params;
 
 HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
@@ -349,6 +358,8 @@ rtx aarch64_reverse_mask (machine_mode);
 bool aarch64_offset_7bit_signed_scaled_p (machine_mode, HOST_WIDE_INT);
 char *aarch64_output_scalar_simd_mov_immediate (rtx, machine_mode);
 char *aarch64_output_simd_mov_immediate (rtx, machine_mode, unsigned);
+char *aarch64_output_simd_general_immediate (rtx, machine_mode, unsigned,
+					     const char*);
 bool aarch64_pad_arg_upward (machine_mode, const_tree);
 bool aarch64_pad_reg_upward (machine_mode, const_tree, bool);
 bool aarch64_regno_ok_for_base_p (int, bool);
@@ -360,7 +371,8 @@ bool aarch64_simd_imm_zero_p (rtx, machine_mode);
 bool aarch64_simd_scalar_immediate_valid_for_move (rtx, machine_mode);
 bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool);
 bool aarch64_simd_valid_immediate (rtx, machine_mode, bool,
-				   struct simd_immediate_info *);
+				   struct simd_immediate_info *,
+				   enum simd_immediate_check w = CHECK_ALL);
 bool aarch64_split_dimode_const_store (rtx, rtx);
 bool aarch64_symbolic_address_p (rtx);
 bool aarch64_uimm12_shift (HOST_WIDE_INT);
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 011fcec0..9e16cbd 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -516,21 +516,45 @@
   [(set_attr "type" "neon_fp_abd_<stype><q>")]
 )
 
+;; For AND (vector, register) and BIC (vector, immediate)
 (define_insn "and<mode>3"
-  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
-        (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
-		 (match_operand:VDQ_I 2 "register_operand" "w")))]
+  [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
+	(and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
+		 (match_operand:VDQ_I 2 "nonmemory_operand" "w,Db")))]
   "TARGET_SIMD"
-  "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
+  {
+   switch (which_alternative)
+     {
+     case 0:
+	return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
+     case 1:
+	return aarch64_output_simd_general_immediate (operands[2],
+			<MODE>mode, GET_MODE_BITSIZE (<MODE>mode), "bic");
+     default:
+	gcc_unreachable ();
+    }
+  }
   [(set_attr "type" "neon_logic<q>")]
 )
 
+;; For ORR (vector, register) and ORR (vector, immediate)
 (define_insn "ior<mode>3"
-  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
-        (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
-		 (match_operand:VDQ_I 2 "register_operand" "w")))]
+  [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
+	(ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
+		 (match_operand:VDQ_I 2 "nonmemory_operand" "w,Do")))]
   "TARGET_SIMD"
-  "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
+  {
+   switch (which_alternative)
+     {
+     case 0:
+	return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
+     case 1:
+	return aarch64_output_simd_general_immediate (operands[2],
+			<MODE>mode, GET_MODE_BITSIZE (<MODE>mode), "orr");
+     default:
+	gcc_unreachable ();
+    }
+  }
   [(set_attr "type" "neon_logic<q>")]
 )
 
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 055ebaf..2cce5af 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -11440,7 +11440,8 @@ aarch64_vect_float_const_representable_p (rtx x)
 /* Return true for valid and false for invalid.  */
 bool
 aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
-			      struct simd_immediate_info *info)
+			      struct simd_immediate_info *info,
+			      enum simd_immediate_check which)
 {
 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)	\
   matches = 1;						\
@@ -11504,54 +11505,65 @@ aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
 
   do
     {
-      CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
-	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
+      if (which & CHECK_I)
+	{
+	  CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
+		 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
 
-      CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
-	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
+	  CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
+		 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
 
-      CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
-	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
+	  CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
+		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
 
-      CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
-	     && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
+	  CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
+		 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
 
-      CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
+	  CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
 
-      CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
+	  CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
+	}
 
-      CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
-	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
+      if (which & CHECK_NI)
+	{
+	  CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
+		 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
 
-      CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
-	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
+	  CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
+		 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
 
-      CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
-	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
+	  CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
+		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
 
-      CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
-	     && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
+	  CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
+		 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
 
-      CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
+	  CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
 
-      CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
+	  CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
+	}
 
-      CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
-	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
+      /* Shifting ones / 8-bit / 64-bit variants only checked
+	 for 'ALL' (MOVI/MVNI).  */
+      if (which == CHECK_ALL)
+	{
+	  CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
+		 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
 
-      CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
-	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
+	  CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
+		 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
 
-      CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
-	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
+	  CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
+		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
 
-      CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
-	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
+	  CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
+		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
 
-      CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
+	  CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
 
-      CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
-	     && bytes[i] == bytes[(i + 8) % idx], 0, 0);
+	  CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
+		 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
+	}
     }
   while (0);
 
@@ -13029,6 +13041,47 @@ aarch64_output_simd_mov_immediate (rtx const_vector,
   return templ;
 }
 
+/* This function is similar to aarch64_output_simd_mov_immediate, used for
+   immediate versions of 'bic' or 'orr'.  */
+char*
+aarch64_output_simd_general_immediate (rtx const_vector,
+				       machine_mode mode,
+				       unsigned width,
+				       const char *mnemonic)
+{
+  bool is_valid;
+  static char templ[40];
+  unsigned int lane_count = 0;
+  char element_char;
+
+  struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
+
+  if (strcmp (mnemonic, "orr") == 0)
+    is_valid = aarch64_simd_valid_immediate (const_vector, mode, false,
+					     &info, CHECK_I);
+  else
+    is_valid = aarch64_simd_valid_immediate (const_vector, mode, false,
+					     &info, CHECK_NI);
+
+  gcc_assert (is_valid);
+  gcc_assert (CONST_INT_P (info.value));
+
+  element_char = sizetochar (info.element_width);
+  lane_count = width / info.element_width;
+
+  if (lane_count == 1)
+    sprintf (templ, "%s\t%%d0, #" HOST_WIDE_INT_PRINT_DEC,
+	     mnemonic, UINTVAL (info.value));
+  else if (info.shift)
+    sprintf (templ, "%s\t%%0.%d%c, #" HOST_WIDE_INT_PRINT_DEC
+	     ", %s #%d", mnemonic, lane_count, element_char,
+	     UINTVAL (info.value), "lsl", info.shift);
+  else
+    sprintf (templ, "%s\t%%0.%d%c, #" HOST_WIDE_INT_PRINT_DEC,
+	     mnemonic, lane_count, element_char, UINTVAL (info.value));
+  return templ;
+}
+
 char*
 aarch64_output_scalar_simd_mov_immediate (rtx immediate,  machine_mode mode)
 {
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 9ce3d4e..e277170 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -189,6 +189,20 @@
       (match_test "aarch64_simd_valid_immediate (op, GET_MODE (op),
 						 false, NULL)")))
 
+(define_constraint "Do"
+  "@internal
+ A constraint that matches vector of immediates for orr."
+ (and (match_code "const_vector")
+      (match_test "aarch64_simd_valid_immediate (op, mode, false,
+						 NULL, CHECK_I)")))
+
+(define_constraint "Db"
+  "@internal
+ A constraint that matches vector of immediates for bic."
+ (and (match_code "const_vector")
+      (match_test "aarch64_simd_valid_immediate (op, mode, false,
+						 NULL, CHECK_NI)")))
+
 (define_constraint "Dh"
   "@internal
  A constraint that matches an immediate operand valid for\
diff --git a/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c b/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c
new file mode 100644
index 0000000..d94dd90
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c
@@ -0,0 +1,26 @@
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+void
+bic_s (short *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] &= ~(0xff);
+}
+
+void
+bic_ss (short *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] &= ~(0xff00);
+}
+
+void
+bic_int (int *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] &= ~(0xff);
+}
+
+/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.8h, #255" } } */
+/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.8h, #255, lsl #8" } } */
+/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.4s, #255" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c b/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c
new file mode 100644
index 0000000..919a6ef
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c
@@ -0,0 +1,18 @@
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+void
+orr_s (short *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] |= 0xab;
+}
+
+void
+orr_int (int *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] |= 0xab;
+}
+
+/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.8h, #171" } } */
+/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.4s, #171" } } */

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH][AArch64] Add BIC-imm and ORR-imm SIMD pattern
  2017-08-07 13:56   ` Sudi Das
@ 2017-09-20 10:39     ` James Greenhalgh
  2017-09-25 10:14       ` Sudi Das
  0 siblings, 1 reply; 12+ messages in thread
From: James Greenhalgh @ 2017-09-20 10:39 UTC (permalink / raw)
  To: Sudi Das; +Cc: Richard Earnshaw, gcc-patches, nd, Marcus Shawcroft

On Mon, Aug 07, 2017 at 02:56:09PM +0100, Sudi Das wrote:
> 
> Hi Richard
> 
> I have updated the patch according to your comments. Thanks for pointing it
> out and sorry for the delay!

Hi Sudi,

I've taken a look at your patch - at a high level, I think that adding
aarch64_output_simd_general_immediate seems like an over-complication, and
I'm not sure I follow why we need some of the logic you add. I think we
might be able to really simplify this patch, by adding a new character to
aarch64_print_operand for an (optionally) shifted immmediate to bic/orr.

> diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
> index beff28e..5ae73c7 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -308,6 +308,15 @@ enum aarch64_parse_opt_result
>    AARCH64_PARSE_INVALID_ARG		/* Invalid arch, tune, cpu arg.  */
>  };
>  
> +/* Enum to distinguish which type of check is to be done in
> +   aarch64_simd_valid_immediate.  This is used as a bitmask where CHECK_ALL
> +   has both bits set.  Adding new types would require changes accordingly.  */
> +enum simd_immediate_check {
> +  CHECK_I   = 1,	/* Perform only non-inverted immediate checks (ORR).  */
> +  CHECK_NI  = 2,	/* Perform only inverted immediate checks (BIC).  */

Are these comments the right way round? If so, why is I non-inverted and
NI inverted - those names seem back to front to me.

These should probably all be AARCH64_CHECK_* rather than just CHECK_*

> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 055ebaf..2cce5af 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -13029,6 +13041,47 @@ aarch64_output_simd_mov_immediate (rtx const_vector,
>    return templ;
>  }
>  
> +/* This function is similar to aarch64_output_simd_mov_immediate, used for
> +   immediate versions of 'bic' or 'orr'.  */
> +char*
> +aarch64_output_simd_general_immediate (rtx const_vector,
> +				       machine_mode mode,
> +				       unsigned width,

You can drop this parameter - it is always GET_MODE_BITSIZE (mode) so just
calculate that in the function body.

> +				       const char *mnemonic)
> +{
> +  bool is_valid;
> +  static char templ[40];
> +  unsigned int lane_count = 0;
> +  char element_char;
> +
> +  struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
> +
> +  if (strcmp (mnemonic, "orr") == 0)

For all the difference it makes, I'd just pass a bool and save save
yourself the short string comparison.

> +    is_valid = aarch64_simd_valid_immediate (const_vector, mode, false,
> +					     &info, CHECK_I);
> +  else
> +    is_valid = aarch64_simd_valid_immediate (const_vector, mode, false,
> +					     &info, CHECK_NI);
> +
> +  gcc_assert (is_valid);
> +  gcc_assert (CONST_INT_P (info.value));
> +  element_char = sizetochar (info.element_width);
> +  lane_count = width / info.element_width;

Width is always GET_MODE_BITSIZE (mode), and mode is one of
V8QI V16QI V4HI V8HI V2SI V4SI V2DI - so width is either 64 or 128.
in the CHECK_I and CHECK_NI cases of aarch64_simd_valid_immediate the elsize
(used to set info.element_width) is only ever 16 or 32. That means lane count
is either going to be 2, 4 or 8...

> +  if (lane_count == 1)
> +    sprintf (templ, "%s\t%%d0, #" HOST_WIDE_INT_PRINT_DEC,
> +	     mnemonic, UINTVAL (info.value));

Which means this case never fires -- Which is a good thing, as my copy of
the Arm Architecture Reference Manual doesn't have scalar forms of these
instructions. Simplifying this down to two cases is what makes me think we
might be able to get away with doing this in aarch64_print_operand.

> +  else if (info.shift)
> +    sprintf (templ, "%s\t%%0.%d%c, #" HOST_WIDE_INT_PRINT_DEC
> +	     ", %s #%d", mnemonic, lane_count, element_char,
> +	     UINTVAL (info.value), "lsl", info.shift);
> +  else
> +    sprintf (templ, "%s\t%%0.%d%c, #" HOST_WIDE_INT_PRINT_DEC,
> +	     mnemonic, lane_count, element_char, UINTVAL (info.value));

Here we need to know if we're creating a shifted immediate or not, that might
be the part that makes doing it in aarch64_print_operand hard.

Could you try it out, and see how the code ends up looking?

> diff --git a/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c b/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c
> new file mode 100644
> index 0000000..d94dd90
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c
> @@ -0,0 +1,26 @@
> +/* { dg-options "-O2 -ftree-vectorize" } */
> +
> +void
> +bic_s (short *a)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    a[i] &= ~(0xff);
> +}
> +
> +void
> +bic_ss (short *a)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    a[i] &= ~(0xff00);
> +}
> +
> +void
> +bic_int (int *a)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    a[i] &= ~(0xff);
> +}

I'd like to see some more test cases, testing more of the shifted immediate
forms you support.

Additionally, I'd like these to be assemble tests, so we check we're
creating valid instructions.

Thanks,
James

> +/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.8h, #255" } } */
> +/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.8h, #255, lsl #8" } } */
> +/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.4s, #255" } } */

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH][AArch64] Add BIC-imm and ORR-imm SIMD pattern
  2017-09-20 10:39     ` James Greenhalgh
@ 2017-09-25 10:14       ` Sudi Das
  2017-09-26 19:04         ` James Greenhalgh
  0 siblings, 1 reply; 12+ messages in thread
From: Sudi Das @ 2017-09-25 10:14 UTC (permalink / raw)
  To: James Greenhalgh; +Cc: Richard Earnshaw, gcc-patches, nd, Marcus Shawcroft

[-- Attachment #1: Type: text/plain, Size: 6707 bytes --]


Hi James

I put aarch64_output_simd_general_immediate looking at the similarities of the immediates
for mov/mvni and orr/bic. The CHECK macro in aarch64_simd_valid_immediate both checks
and converts the immediates in a manner that are needed for the instructions.

Having said that, I agree that maybe I could have refactored aarch64_output_simd_mov_immediate
to do the work rather than creating a new functions to do similar things. I have done so in this patch.

I have also changed the names of the enum simd_immediate_check to be better indicative of what they are doing. 

Lastly I have added more cases in the tests (according to all the possible CHECKs) and made
them dg-do assemble (although I had to add --save-temps so that the scan-assembler
would work). Do you think I should not put that option and rather create separate tests?

Thanks,
Sudi


From: James Greenhalgh <james.greenhalgh@arm.com>
Sent: Wednesday, September 20, 2017 11:39 AM
To: Sudi Das
Cc: Richard Earnshaw; gcc-patches@gcc.gnu.org; nd; Marcus Shawcroft
Subject: Re: [PATCH][AArch64] Add BIC-imm and ORR-imm SIMD pattern
    
On Mon, Aug 07, 2017 at 02:56:09PM +0100, Sudi Das wrote:
> 
> Hi Richard
> 
> I have updated the patch according to your comments. Thanks for pointing it
> out and sorry for the delay!

Hi Sudi,

I've taken a look at your patch - at a high level, I think that adding
aarch64_output_simd_general_immediate seems like an over-complication, and
I'm not sure I follow why we need some of the logic you add. I think we
might be able to really simplify this patch, by adding a new character to
aarch64_print_operand for an (optionally) shifted immmediate to bic/orr.

> diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
> index beff28e..5ae73c7 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -308,6 +308,15 @@ enum aarch64_parse_opt_result
>    AARCH64_PARSE_INVALID_ARG          /* Invalid arch, tune, cpu arg.  */
>  };
>  
> +/* Enum to distinguish which type of check is to be done in
> +   aarch64_simd_valid_immediate.  This is used as a bitmask where CHECK_ALL
> +   has both bits set.  Adding new types would require changes accordingly.  */
> +enum simd_immediate_check {
> +  CHECK_I   = 1,     /* Perform only non-inverted immediate checks (ORR).  */
> +  CHECK_NI  = 2,     /* Perform only inverted immediate checks (BIC).  */

Are these comments the right way round? If so, why is I non-inverted and
NI inverted - those names seem back to front to me.

These should probably all be AARCH64_CHECK_* rather than just CHECK_*

> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 055ebaf..2cce5af 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -13029,6 +13041,47 @@ aarch64_output_simd_mov_immediate (rtx const_vector,
>    return templ;
>  }
>  
> +/* This function is similar to aarch64_output_simd_mov_immediate, used for
> +   immediate versions of 'bic' or 'orr'.  */
> +char*
> +aarch64_output_simd_general_immediate (rtx const_vector,
> +                                    machine_mode mode,
> +                                    unsigned width,

You can drop this parameter - it is always GET_MODE_BITSIZE (mode) so just
calculate that in the function body.

> +                                    const char *mnemonic)
> +{
> +  bool is_valid;
> +  static char templ[40];
> +  unsigned int lane_count = 0;
> +  char element_char;
> +
> +  struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
> +
> +  if (strcmp (mnemonic, "orr") == 0)

For all the difference it makes, I'd just pass a bool and save save
yourself the short string comparison.

> +    is_valid = aarch64_simd_valid_immediate (const_vector, mode, false,
> +                                          &info, CHECK_I);
> +  else
> +    is_valid = aarch64_simd_valid_immediate (const_vector, mode, false,
> +                                          &info, CHECK_NI);
> +
> +  gcc_assert (is_valid);
> +  gcc_assert (CONST_INT_P (info.value));
> +  element_char = sizetochar (info.element_width);
> +  lane_count = width / info.element_width;

Width is always GET_MODE_BITSIZE (mode), and mode is one of
V8QI V16QI V4HI V8HI V2SI V4SI V2DI - so width is either 64 or 128.
in the CHECK_I and CHECK_NI cases of aarch64_simd_valid_immediate the elsize
(used to set info.element_width) is only ever 16 or 32. That means lane count
is either going to be 2, 4 or 8...

> +  if (lane_count == 1)
> +    sprintf (templ, "%s\t%%d0, #" HOST_WIDE_INT_PRINT_DEC,
> +          mnemonic, UINTVAL (info.value));

Which means this case never fires -- Which is a good thing, as my copy of
the Arm Architecture Reference Manual doesn't have scalar forms of these
instructions. Simplifying this down to two cases is what makes me think we
might be able to get away with doing this in aarch64_print_operand.

> +  else if (info.shift)
> +    sprintf (templ, "%s\t%%0.%d%c, #" HOST_WIDE_INT_PRINT_DEC
> +          ", %s #%d", mnemonic, lane_count, element_char,
> +          UINTVAL (info.value), "lsl", info.shift);
> +  else
> +    sprintf (templ, "%s\t%%0.%d%c, #" HOST_WIDE_INT_PRINT_DEC,
> +          mnemonic, lane_count, element_char, UINTVAL (info.value));

Here we need to know if we're creating a shifted immediate or not, that might
be the part that makes doing it in aarch64_print_operand hard.

Could you try it out, and see how the code ends up looking?

> diff --git a/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c b/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c
> new file mode 100644
> index 0000000..d94dd90
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c
> @@ -0,0 +1,26 @@
> +/* { dg-options "-O2 -ftree-vectorize" } */
> +
> +void
> +bic_s (short *a)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    a[i] &= ~(0xff);
> +}
> +
> +void
> +bic_ss (short *a)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    a[i] &= ~(0xff00);
> +}
> +
> +void
> +bic_int (int *a)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    a[i] &= ~(0xff);
> +}

I'd like to see some more test cases, testing more of the shifted immediate
forms you support.

Additionally, I'd like these to be assemble tests, so we check we're
creating valid instructions.

Thanks,
James

> +/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.8h, #255" } } */
> +/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.8h, #255, lsl #8" } } */
> +/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.4s, #255" } } */

    

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: patch-7260-4.diff --]
[-- Type: text/x-patch; name="patch-7260-4.diff", Size: 15395 bytes --]

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index e67c2ed..ebef8d1 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -308,6 +308,16 @@ enum aarch64_parse_opt_result
   AARCH64_PARSE_INVALID_ARG		/* Invalid arch, tune, cpu arg.  */
 };
 
+/* Enum to distinguish which type of check is to be done in
+   aarch64_simd_valid_immediate.  This is used as a bitmask where
+   AARCH64_CHECK_MOV has both bits set.  Thus AARCH64_CHECK_MOV will
+   perform all checks.  Adding new types would require changes accordingly.  */
+enum simd_immediate_check {
+  AARCH64_CHECK_ORR  = 1,	/* Perform immediate checks for ORR.  */
+  AARCH64_CHECK_BIC  = 2,	/* Perform immediate checks for BIC.  */
+  AARCH64_CHECK_MOV  = 3	/* Perform all checks (used for MOVI/MNVI).  */
+};
+
 extern struct tune_params aarch64_tune_params;
 
 HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
@@ -345,7 +355,8 @@ bool aarch64_mov_operand_p (rtx, machine_mode);
 rtx aarch64_reverse_mask (machine_mode);
 bool aarch64_offset_7bit_signed_scaled_p (machine_mode, HOST_WIDE_INT);
 char *aarch64_output_scalar_simd_mov_immediate (rtx, scalar_int_mode);
-char *aarch64_output_simd_mov_immediate (rtx, machine_mode, unsigned);
+char *aarch64_output_simd_mov_immediate (rtx, machine_mode, unsigned,
+			enum simd_immediate_check w = AARCH64_CHECK_MOV);
 bool aarch64_pad_reg_upward (machine_mode, const_tree, bool);
 bool aarch64_regno_ok_for_base_p (int, bool);
 bool aarch64_regno_ok_for_index_p (int, bool);
@@ -356,7 +367,8 @@ bool aarch64_simd_imm_zero_p (rtx, machine_mode);
 bool aarch64_simd_scalar_immediate_valid_for_move (rtx, scalar_int_mode);
 bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool);
 bool aarch64_simd_valid_immediate (rtx, machine_mode, bool,
-				   struct simd_immediate_info *);
+			struct simd_immediate_info *,
+			enum simd_immediate_check w = AARCH64_CHECK_MOV);
 bool aarch64_split_dimode_const_store (rtx, rtx);
 bool aarch64_symbolic_address_p (rtx);
 bool aarch64_uimm12_shift (HOST_WIDE_INT);
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 70e9339..0a7e5e8 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -558,21 +558,45 @@
   [(set_attr "type" "neon_fp_abd_<stype><q>")]
 )
 
+;; For AND (vector, register) and BIC (vector, immediate)
 (define_insn "and<mode>3"
-  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
-        (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
-		 (match_operand:VDQ_I 2 "register_operand" "w")))]
+  [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
+	(and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
+		 (match_operand:VDQ_I 2 "nonmemory_operand" "w,Db")))]
   "TARGET_SIMD"
-  "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
+  {
+    switch (which_alternative)
+      {
+      case 0:
+	return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
+      case 1:
+	return aarch64_output_simd_mov_immediate (operands[2],
+	   <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), AARCH64_CHECK_BIC);
+      default:
+	gcc_unreachable ();
+      }
+  }
   [(set_attr "type" "neon_logic<q>")]
 )
 
+;; For ORR (vector, register) and ORR (vector, immediate)
 (define_insn "ior<mode>3"
-  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
-        (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
-		 (match_operand:VDQ_I 2 "register_operand" "w")))]
+  [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
+	(ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
+		 (match_operand:VDQ_I 2 "nonmemory_operand" "w,Do")))]
   "TARGET_SIMD"
-  "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
+  {
+    switch (which_alternative)
+      {
+      case 0:
+	return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
+      case 1:
+	return aarch64_output_simd_mov_immediate (operands[2],
+		<MODE>mode, GET_MODE_BITSIZE (<MODE>mode), AARCH64_CHECK_ORR);
+      default:
+	gcc_unreachable ();
+      }
+  }
   [(set_attr "type" "neon_logic<q>")]
 )
 
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 5e26cb7..1ae18ef 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -11471,7 +11471,8 @@ aarch64_vect_float_const_representable_p (rtx x)
 /* Return true for valid and false for invalid.  */
 bool
 aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
-			      struct simd_immediate_info *info)
+			      struct simd_immediate_info *info,
+			      enum simd_immediate_check which)
 {
 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)	\
   matches = 1;						\
@@ -11539,54 +11540,65 @@ aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
 
   do
     {
-      CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
-	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
+      if (which & AARCH64_CHECK_ORR)
+	{
+	  CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
+		 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
 
-      CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
-	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
+	  CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
+		 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
 
-      CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
-	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
+	  CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
+		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
 
-      CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
-	     && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
+	  CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
+		 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
 
-      CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
+	  CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
 
-      CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
+	  CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
+	}
 
-      CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
-	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
+      if (which & AARCH64_CHECK_BIC)
+	{
+	  CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
+		 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
 
-      CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
-	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
+	  CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
+		 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
 
-      CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
-	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
+	  CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
+		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
 
-      CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
-	     && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
+	  CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
+		 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
 
-      CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
+	  CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
 
-      CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
+	  CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
+	}
 
-      CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
-	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
+      /* Shifting ones / 8-bit / 64-bit variants only checked
+	 for 'ALL' (MOVI/MVNI).  */
+      if (which == AARCH64_CHECK_MOV)
+	{
+	  CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
+		 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
 
-      CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
-	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
+	  CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
+		 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
 
-      CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
-	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
+	  CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
+		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
 
-      CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
-	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
+	  CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
+		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
 
-      CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
+	  CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
 
-      CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
-	     && bytes[i] == bytes[(i + 8) % idx], 0, 0);
+	  CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
+		 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
+	}
     }
   while (0);
 
@@ -13001,7 +13013,8 @@ aarch64_float_const_representable_p (rtx x)
 char*
 aarch64_output_simd_mov_immediate (rtx const_vector,
 				   machine_mode mode,
-				   unsigned width)
+				   unsigned width,
+				   enum simd_immediate_check which)
 {
   bool is_valid;
   static char templ[40];
@@ -13015,7 +13028,8 @@ aarch64_output_simd_mov_immediate (rtx const_vector,
   /* This will return true to show const_vector is legal for use as either
      a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate.  It will
      also update INFO to show how the immediate should be generated.  */
-  is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
+  is_valid = aarch64_simd_valid_immediate (const_vector, mode, false,
+					   &info, which);
   gcc_assert (is_valid);
 
   element_char = sizetochar (info.element_width);
@@ -13046,20 +13060,37 @@ aarch64_output_simd_mov_immediate (rtx const_vector,
 	}
     }
 
-  mnemonic = info.mvn ? "mvni" : "movi";
-  shift_op = info.msl ? "msl" : "lsl";
-
   gcc_assert (CONST_INT_P (info.value));
-  if (lane_count == 1)
-    snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
-	      mnemonic, UINTVAL (info.value));
-  else if (info.shift)
-    snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
-	      ", %s %d", mnemonic, lane_count, element_char,
-	      UINTVAL (info.value), shift_op, info.shift);
+
+  if (which == AARCH64_CHECK_MOV)
+    {
+      mnemonic = info.mvn ? "mvni" : "movi";
+      shift_op = info.msl ? "msl" : "lsl";
+      if (lane_count == 1)
+	snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
+		  mnemonic, UINTVAL (info.value));
+      else if (info.shift)
+	snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, "
+		  HOST_WIDE_INT_PRINT_HEX ", %s %d", mnemonic, lane_count,
+		  element_char, UINTVAL (info.value), shift_op, info.shift);
+      else
+	snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, "
+		  HOST_WIDE_INT_PRINT_HEX, mnemonic, lane_count,
+		  element_char, UINTVAL (info.value));
+    }
   else
-    snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
-	      mnemonic, lane_count, element_char, UINTVAL (info.value));
+    {
+      /* For AARCH64_CHECK_BIC and AARCH64_CHECK_ORR.  */
+      mnemonic = info.mvn ? "bic" : "orr";
+      if (info.shift)
+	snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #"
+		  HOST_WIDE_INT_PRINT_DEC ", %s #%d", mnemonic, lane_count,
+		  element_char, UINTVAL (info.value), "lsl", info.shift);
+      else
+	snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #"
+		  HOST_WIDE_INT_PRINT_DEC, mnemonic, lane_count,
+		  element_char, UINTVAL (info.value));
+    }
   return templ;
 }
 
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 3649fb4..77c510c 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -190,6 +190,20 @@
   (and (match_code "const_double")
        (match_test "aarch64_can_const_movi_rtx_p (op, GET_MODE (op))")))
 
+(define_constraint "Do"
+  "@internal
+   A constraint that matches vector of immediates for orr."
+ (and (match_code "const_vector")
+      (match_test "aarch64_simd_valid_immediate (op, mode, false,
+						 NULL, AARCH64_CHECK_ORR)")))
+
+(define_constraint "Db"
+  "@internal
+   A constraint that matches vector of immediates for bic."
+ (and (match_code "const_vector")
+      (match_test "aarch64_simd_valid_immediate (op, mode, false,
+						 NULL, AARCH64_CHECK_BIC)")))
+
 (define_constraint "Dn"
   "@internal
  A constraint that matches vector of immediates."
diff --git a/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c b/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c
new file mode 100644
index 0000000..b14f009
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c
@@ -0,0 +1,56 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 --save-temps -ftree-vectorize" } */
+
+/* Each function uses the correspoding 'CLASS' in
+   Marco CHECK (aarch64_simd_valid_immediate).  */
+
+void
+bic_6 (int *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] &= ~(0xab);
+}
+
+void
+bic_7 (int *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] &= ~(0xcd00);
+}
+
+void
+bic_8 (int *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] &= ~(0xef0000);
+}
+
+void
+bic_9 (int *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] &= ~(0x12000000);
+}
+
+void
+bic_10 (short *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] &= ~(0x34);
+}
+
+
+void
+bic_11 (short *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] &= ~(0x5600);
+}
+
+
+/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.4s, #171" } } */
+/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.4s, #205, lsl #8" } } */
+/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.4s, #239, lsl #16" } } */
+/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.4s, #18, lsl #24" } } */
+/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.8h, #52" } } */
+/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.8h, #86, lsl #8" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c b/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c
new file mode 100644
index 0000000..ff6f683
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c
@@ -0,0 +1,54 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 --save-temps -ftree-vectorize" } */
+
+/* Each function uses the correspoding 'CLASS' in
+   Marco CHECK (aarch64_simd_valid_immediate).  */
+
+void
+orr_0 (int *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] |= 0xab;
+}
+
+void
+orr_1 (int *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] |= 0x0000cd00;
+}
+
+void
+orr_2 (int *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] |= 0x00ef0000;
+}
+
+void
+orr_3 (int *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] |= 0x12000000;
+}
+
+void
+orr_4 (short *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] |= 0x00340034;
+}
+
+void
+orr_5 (int *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] |= 0x56005600;
+}
+
+/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.4s, #171" } } */
+/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.4s, #205, lsl #8" } } */
+/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.4s, #239, lsl #16" } } */
+/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.4s, #18, lsl #24" } } */
+/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.8h, #52" } } */
+/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.8h, #86, lsl #8" } } */

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH][AArch64] Add BIC-imm and ORR-imm SIMD pattern
  2017-09-25 10:14       ` Sudi Das
@ 2017-09-26 19:04         ` James Greenhalgh
  2017-09-27 17:57           ` Sudi Das
  0 siblings, 1 reply; 12+ messages in thread
From: James Greenhalgh @ 2017-09-26 19:04 UTC (permalink / raw)
  To: Sudi Das; +Cc: Richard Earnshaw, gcc-patches, nd, Marcus Shawcroft

On Mon, Sep 25, 2017 at 11:13:57AM +0100, Sudi Das wrote:
> 
> Hi James
> 
> I put aarch64_output_simd_general_immediate looking at the similarities of
> the immediates for mov/mvni and orr/bic. The CHECK macro in
> aarch64_simd_valid_immediate both checks
> and converts the immediates in a manner that are needed for the instructions.
> 
> Having said that, I agree that maybe I could have refactored
> aarch64_output_simd_mov_immediate to do the work rather than creating a new
> functions to do similar things. I have done so in this patch.

Thanks, this looks much neater.

> I have also changed the names of the enum simd_immediate_check to be better
> indicative of what they are doing. 

Thanks, I'd tweak them to look more like the bitmasks you use them as, but
that is a small change for my personal preference.

> Lastly I have added more cases in the tests (according to all the possible
> CHECKs) and made them dg-do assemble (although I had to add --save-temps so
> that the scan-assembler would work). Do you think I should not put that
> option and rather create separate tests?

This is good - thanks.

I think clean up the enum definitions and this patch will be good.

> @@ -308,6 +308,16 @@ enum aarch64_parse_opt_result
>    AARCH64_PARSE_INVALID_ARG		/* Invalid arch, tune, cpu arg.  */
>  };
>  
> +/* Enum to distinguish which type of check is to be done in
> +   aarch64_simd_valid_immediate.  This is used as a bitmask where
> +   AARCH64_CHECK_MOV has both bits set.  Thus AARCH64_CHECK_MOV will
> +   perform all checks.  Adding new types would require changes accordingly.  */
> +enum simd_immediate_check {
> +  AARCH64_CHECK_ORR  = 1,	/* Perform immediate checks for ORR.  */
> +  AARCH64_CHECK_BIC  = 2,	/* Perform immediate checks for BIC.  */
> +  AARCH64_CHECK_MOV  = 3	/* Perform all checks (used for MOVI/MNVI).  */

These are used in bit-mask style, so how about:

  AARCH64_CHECK_ORR = 1 << 0,
  AARCH64_CHECK_BIC = 1 << 1,
  AARCH64_CHECK_MOV = AARCH64_CHECK_ORR | AARCH64_CHECK_BIC

Which is more self-documenting.

> @@ -13001,7 +13013,8 @@ aarch64_float_const_representable_p (rtx x)
>  char*
>  aarch64_output_simd_mov_immediate (rtx const_vector,
>  				   machine_mode mode,
> -				   unsigned width)
> +				   unsigned width,
> +				   enum simd_immediate_check which)

This function is sorely missing a comment explaining the parameters - it
would be very helpful if you could add one as part of this patch.

Thanks,
James

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH][AArch64] Add BIC-imm and ORR-imm SIMD pattern
  2017-09-26 19:04         ` James Greenhalgh
@ 2017-09-27 17:57           ` Sudi Das
  2017-09-28  8:55             ` Richard Earnshaw (lists)
  0 siblings, 1 reply; 12+ messages in thread
From: Sudi Das @ 2017-09-27 17:57 UTC (permalink / raw)
  To: James Greenhalgh; +Cc: Richard Earnshaw, gcc-patches, nd, Marcus Shawcroft

[-- Attachment #1: Type: text/plain, Size: 3955 bytes --]



Hi James

I have made the requested changes to the patch.


2017-09-27 Sudakshina Das  <sudi.das@arm.com>

	* config/aarch64/aarch64-protos.h (enum simd_immediate_check): New check type
	for aarch64_simd_valid_immediate.
	(aarch64_output_simd_mov_immediate): Update prototype.
	(aarch64_simd_valid_immediate): Update prototype.

	* config/aarch64/aarch64-simd.md (orr<mode>3): modified pattern to add
	support for ORR-immediate.
	(and<mode>3): modified pattern to add support for BIC-immediate.

	* config/aarch64/aarch64.c (aarch64_simd_valid_immediate): Function now checks
	for valid immediate for BIC and ORR based on new enum argument.
	(aarch64_output_simd_mov_immediate): Function now used to output BIC/ORR imm
	as well based on new enum argument.
 
	* config/aarch64/constraints.md (Do): New vector immediate constraint.
	(Db): Likewise.

2017-09-27 Sudakshina Das  <sudi.das@arm.com>

	* gcc.target/aarch64/bic_imm_1.c: New test.
	* gcc.target/aarch64/orr_imm_1.c: Likewise.


Thanks
Sudi

  
From: James Greenhalgh <james.greenhalgh@arm.com>
Sent: Tuesday, September 26, 2017 8:04:38 PM
To: Sudi Das
Cc: Richard Earnshaw; gcc-patches@gcc.gnu.org; nd; Marcus Shawcroft
Subject: Re: [PATCH][AArch64] Add BIC-imm and ORR-imm SIMD pattern
    
On Mon, Sep 25, 2017 at 11:13:57AM +0100, Sudi Das wrote:
> 
> Hi James
> 
> I put aarch64_output_simd_general_immediate looking at the similarities of
> the immediates for mov/mvni and orr/bic. The CHECK macro in
> aarch64_simd_valid_immediate both checks
> and converts the immediates in a manner that are needed for the instructions.
> 
> Having said that, I agree that maybe I could have refactored
> aarch64_output_simd_mov_immediate to do the work rather than creating a new
> functions to do similar things. I have done so in this patch.

Thanks, this looks much neater.

> I have also changed the names of the enum simd_immediate_check to be better
> indicative of what they are doing. 

Thanks, I'd tweak them to look more like the bitmasks you use them as, but
that is a small change for my personal preference.

> Lastly I have added more cases in the tests (according to all the possible
> CHECKs) and made them dg-do assemble (although I had to add --save-temps so
> that the scan-assembler would work). Do you think I should not put that
> option and rather create separate tests?

This is good - thanks.

I think clean up the enum definitions and this patch will be good.

> @@ -308,6 +308,16 @@ enum aarch64_parse_opt_result
>    AARCH64_PARSE_INVALID_ARG          /* Invalid arch, tune, cpu arg.  */
>  };
>  
> +/* Enum to distinguish which type of check is to be done in
> +   aarch64_simd_valid_immediate.  This is used as a bitmask where
> +   AARCH64_CHECK_MOV has both bits set.  Thus AARCH64_CHECK_MOV will
> +   perform all checks.  Adding new types would require changes accordingly.  */
> +enum simd_immediate_check {
> +  AARCH64_CHECK_ORR  = 1,    /* Perform immediate checks for ORR.  */
> +  AARCH64_CHECK_BIC  = 2,    /* Perform immediate checks for BIC.  */
> +  AARCH64_CHECK_MOV  = 3     /* Perform all checks (used for MOVI/MNVI).  */

These are used in bit-mask style, so how about:

  AARCH64_CHECK_ORR = 1 << 0,
  AARCH64_CHECK_BIC = 1 << 1,
  AARCH64_CHECK_MOV = AARCH64_CHECK_ORR | AARCH64_CHECK_BIC

Which is more self-documenting.

> @@ -13001,7 +13013,8 @@ aarch64_float_const_representable_p (rtx x)
>  char*
>  aarch64_output_simd_mov_immediate (rtx const_vector,
>                                   machine_mode mode,
> -                                unsigned width)
> +                                unsigned width,
> +                                enum simd_immediate_check which)

This function is sorely missing a comment explaining the parameters - it
would be very helpful if you could add one as part of this patch.

Thanks,
James

    

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: patch-7260-5.diff --]
[-- Type: text/x-patch; name="patch-7260-5.diff", Size: 15867 bytes --]

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index e67c2ed..5d7c5df 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -308,6 +308,16 @@ enum aarch64_parse_opt_result
   AARCH64_PARSE_INVALID_ARG		/* Invalid arch, tune, cpu arg.  */
 };
 
+/* Enum to distinguish which type of check is to be done in
+   aarch64_simd_valid_immediate.  This is used as a bitmask where
+   AARCH64_CHECK_MOV has both bits set.  Thus AARCH64_CHECK_MOV will
+   perform all checks.  Adding new types would require changes accordingly.  */
+enum simd_immediate_check {
+  AARCH64_CHECK_ORR  = 1 << 0,
+  AARCH64_CHECK_BIC  = 1 << 1,
+  AARCH64_CHECK_MOV  = AARCH64_CHECK_ORR | AARCH64_CHECK_BIC
+};
+
 extern struct tune_params aarch64_tune_params;
 
 HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
@@ -345,7 +355,8 @@ bool aarch64_mov_operand_p (rtx, machine_mode);
 rtx aarch64_reverse_mask (machine_mode);
 bool aarch64_offset_7bit_signed_scaled_p (machine_mode, HOST_WIDE_INT);
 char *aarch64_output_scalar_simd_mov_immediate (rtx, scalar_int_mode);
-char *aarch64_output_simd_mov_immediate (rtx, machine_mode, unsigned);
+char *aarch64_output_simd_mov_immediate (rtx, machine_mode, unsigned,
+			enum simd_immediate_check w = AARCH64_CHECK_MOV);
 bool aarch64_pad_reg_upward (machine_mode, const_tree, bool);
 bool aarch64_regno_ok_for_base_p (int, bool);
 bool aarch64_regno_ok_for_index_p (int, bool);
@@ -356,7 +367,8 @@ bool aarch64_simd_imm_zero_p (rtx, machine_mode);
 bool aarch64_simd_scalar_immediate_valid_for_move (rtx, scalar_int_mode);
 bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool);
 bool aarch64_simd_valid_immediate (rtx, machine_mode, bool,
-				   struct simd_immediate_info *);
+			struct simd_immediate_info *,
+			enum simd_immediate_check w = AARCH64_CHECK_MOV);
 bool aarch64_split_dimode_const_store (rtx, rtx);
 bool aarch64_symbolic_address_p (rtx);
 bool aarch64_uimm12_shift (HOST_WIDE_INT);
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 70e9339..0a7e5e8 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -558,21 +558,45 @@
   [(set_attr "type" "neon_fp_abd_<stype><q>")]
 )
 
+;; For AND (vector, register) and BIC (vector, immediate)
 (define_insn "and<mode>3"
-  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
-        (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
-		 (match_operand:VDQ_I 2 "register_operand" "w")))]
+  [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
+	(and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
+		 (match_operand:VDQ_I 2 "nonmemory_operand" "w,Db")))]
   "TARGET_SIMD"
-  "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
+  {
+    switch (which_alternative)
+      {
+      case 0:
+	return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
+      case 1:
+	return aarch64_output_simd_mov_immediate (operands[2],
+	   <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), AARCH64_CHECK_BIC);
+      default:
+	gcc_unreachable ();
+      }
+  }
   [(set_attr "type" "neon_logic<q>")]
 )
 
+;; For ORR (vector, register) and ORR (vector, immediate)
 (define_insn "ior<mode>3"
-  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
-        (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
-		 (match_operand:VDQ_I 2 "register_operand" "w")))]
+  [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
+	(ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
+		 (match_operand:VDQ_I 2 "nonmemory_operand" "w,Do")))]
   "TARGET_SIMD"
-  "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
+  {
+    switch (which_alternative)
+      {
+      case 0:
+	return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
+      case 1:
+	return aarch64_output_simd_mov_immediate (operands[2],
+		<MODE>mode, GET_MODE_BITSIZE (<MODE>mode), AARCH64_CHECK_ORR);
+      default:
+	gcc_unreachable ();
+      }
+  }
   [(set_attr "type" "neon_logic<q>")]
 )
 
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 5e26cb7..6a6b3d5 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -11471,7 +11471,8 @@ aarch64_vect_float_const_representable_p (rtx x)
 /* Return true for valid and false for invalid.  */
 bool
 aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
-			      struct simd_immediate_info *info)
+			      struct simd_immediate_info *info,
+			      enum simd_immediate_check which)
 {
 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)	\
   matches = 1;						\
@@ -11539,54 +11540,65 @@ aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
 
   do
     {
-      CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
-	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
+      if (which & AARCH64_CHECK_ORR)
+	{
+	  CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
+		 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
 
-      CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
-	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
+	  CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
+		 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
 
-      CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
-	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
+	  CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
+		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
 
-      CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
-	     && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
+	  CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
+		 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
 
-      CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
+	  CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
 
-      CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
+	  CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
+	}
 
-      CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
-	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
+      if (which & AARCH64_CHECK_BIC)
+	{
+	  CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
+		 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
 
-      CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
-	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
+	  CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
+		 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
 
-      CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
-	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
+	  CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
+		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
 
-      CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
-	     && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
+	  CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
+		 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
 
-      CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
+	  CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
 
-      CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
+	  CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
+	}
 
-      CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
-	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
+      /* Shifting ones / 8-bit / 64-bit variants only checked
+	 for 'ALL' (MOVI/MVNI).  */
+      if (which == AARCH64_CHECK_MOV)
+	{
+	  CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
+		 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
 
-      CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
-	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
+	  CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
+		 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
 
-      CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
-	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
+	  CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
+		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
 
-      CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
-	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
+	  CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
+		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
 
-      CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
+	  CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
 
-      CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
-	     && bytes[i] == bytes[(i + 8) % idx], 0, 0);
+	  CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
+		 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
+	}
     }
   while (0);
 
@@ -12998,10 +13010,14 @@ aarch64_float_const_representable_p (rtx x)
   return (exponent >= 0 && exponent <= 7);
 }
 
+/* Returns the string with the instruction for AdvSIMD MOVI, MVNI, ORR or BIC
+   immediate with a CONST_VECTOR of MODE and WIDTH.  WHICH selects whether to
+   output MOVI/MVNI, ORR or BIC immediate.  */
 char*
 aarch64_output_simd_mov_immediate (rtx const_vector,
 				   machine_mode mode,
-				   unsigned width)
+				   unsigned width,
+				   enum simd_immediate_check which)
 {
   bool is_valid;
   static char templ[40];
@@ -13013,9 +13029,11 @@ aarch64_output_simd_mov_immediate (rtx const_vector,
   struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
 
   /* This will return true to show const_vector is legal for use as either
-     a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate.  It will
-     also update INFO to show how the immediate should be generated.  */
-  is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
+     a AdvSIMD MOVI instruction (or, implicitly, MVNI), ORR or BIC immediate.
+     It will also update INFO to show how the immediate should be generated.
+     WHICH selects whether to check for MOVI/MVNI, ORR or BIC.  */
+  is_valid = aarch64_simd_valid_immediate (const_vector, mode, false,
+					   &info, which);
   gcc_assert (is_valid);
 
   element_char = sizetochar (info.element_width);
@@ -13046,20 +13064,37 @@ aarch64_output_simd_mov_immediate (rtx const_vector,
 	}
     }
 
-  mnemonic = info.mvn ? "mvni" : "movi";
-  shift_op = info.msl ? "msl" : "lsl";
-
   gcc_assert (CONST_INT_P (info.value));
-  if (lane_count == 1)
-    snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
-	      mnemonic, UINTVAL (info.value));
-  else if (info.shift)
-    snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
-	      ", %s %d", mnemonic, lane_count, element_char,
-	      UINTVAL (info.value), shift_op, info.shift);
+
+  if (which == AARCH64_CHECK_MOV)
+    {
+      mnemonic = info.mvn ? "mvni" : "movi";
+      shift_op = info.msl ? "msl" : "lsl";
+      if (lane_count == 1)
+	snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
+		  mnemonic, UINTVAL (info.value));
+      else if (info.shift)
+	snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, "
+		  HOST_WIDE_INT_PRINT_HEX ", %s %d", mnemonic, lane_count,
+		  element_char, UINTVAL (info.value), shift_op, info.shift);
+      else
+	snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, "
+		  HOST_WIDE_INT_PRINT_HEX, mnemonic, lane_count,
+		  element_char, UINTVAL (info.value));
+    }
   else
-    snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
-	      mnemonic, lane_count, element_char, UINTVAL (info.value));
+    {
+      /* For AARCH64_CHECK_BIC and AARCH64_CHECK_ORR.  */
+      mnemonic = info.mvn ? "bic" : "orr";
+      if (info.shift)
+	snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #"
+		  HOST_WIDE_INT_PRINT_DEC ", %s #%d", mnemonic, lane_count,
+		  element_char, UINTVAL (info.value), "lsl", info.shift);
+      else
+	snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #"
+		  HOST_WIDE_INT_PRINT_DEC, mnemonic, lane_count,
+		  element_char, UINTVAL (info.value));
+    }
   return templ;
 }
 
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 3649fb4..77c510c 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -190,6 +190,20 @@
   (and (match_code "const_double")
        (match_test "aarch64_can_const_movi_rtx_p (op, GET_MODE (op))")))
 
+(define_constraint "Do"
+  "@internal
+   A constraint that matches vector of immediates for orr."
+ (and (match_code "const_vector")
+      (match_test "aarch64_simd_valid_immediate (op, mode, false,
+						 NULL, AARCH64_CHECK_ORR)")))
+
+(define_constraint "Db"
+  "@internal
+   A constraint that matches vector of immediates for bic."
+ (and (match_code "const_vector")
+      (match_test "aarch64_simd_valid_immediate (op, mode, false,
+						 NULL, AARCH64_CHECK_BIC)")))
+
 (define_constraint "Dn"
   "@internal
  A constraint that matches vector of immediates."
diff --git a/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c b/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c
new file mode 100644
index 0000000..b14f009
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c
@@ -0,0 +1,56 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 --save-temps -ftree-vectorize" } */
+
+/* Each function uses the correspoding 'CLASS' in
+   Marco CHECK (aarch64_simd_valid_immediate).  */
+
+void
+bic_6 (int *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] &= ~(0xab);
+}
+
+void
+bic_7 (int *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] &= ~(0xcd00);
+}
+
+void
+bic_8 (int *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] &= ~(0xef0000);
+}
+
+void
+bic_9 (int *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] &= ~(0x12000000);
+}
+
+void
+bic_10 (short *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] &= ~(0x34);
+}
+
+
+void
+bic_11 (short *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] &= ~(0x5600);
+}
+
+
+/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.4s, #171" } } */
+/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.4s, #205, lsl #8" } } */
+/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.4s, #239, lsl #16" } } */
+/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.4s, #18, lsl #24" } } */
+/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.8h, #52" } } */
+/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.8h, #86, lsl #8" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c b/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c
new file mode 100644
index 0000000..ff6f683
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c
@@ -0,0 +1,54 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 --save-temps -ftree-vectorize" } */
+
+/* Each function uses the correspoding 'CLASS' in
+   Marco CHECK (aarch64_simd_valid_immediate).  */
+
+void
+orr_0 (int *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] |= 0xab;
+}
+
+void
+orr_1 (int *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] |= 0x0000cd00;
+}
+
+void
+orr_2 (int *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] |= 0x00ef0000;
+}
+
+void
+orr_3 (int *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] |= 0x12000000;
+}
+
+void
+orr_4 (short *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] |= 0x00340034;
+}
+
+void
+orr_5 (int *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] |= 0x56005600;
+}
+
+/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.4s, #171" } } */
+/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.4s, #205, lsl #8" } } */
+/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.4s, #239, lsl #16" } } */
+/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.4s, #18, lsl #24" } } */
+/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.8h, #52" } } */
+/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.8h, #86, lsl #8" } } */

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH][AArch64] Add BIC-imm and ORR-imm SIMD pattern
  2017-09-27 17:57           ` Sudi Das
@ 2017-09-28  8:55             ` Richard Earnshaw (lists)
  2017-10-02  9:05               ` Sudi Das
  0 siblings, 1 reply; 12+ messages in thread
From: Richard Earnshaw (lists) @ 2017-09-28  8:55 UTC (permalink / raw)
  To: Sudi Das, James Greenhalgh; +Cc: gcc-patches, nd, Marcus Shawcroft

On 27/09/17 18:57, Sudi Das wrote:
> 
> 
> Hi James
> 
> I have made the requested changes to the patch.
> 
> 
> 2017-09-27 Sudakshina Das  <sudi.das@arm.com>
> 
> 	* config/aarch64/aarch64-protos.h (enum simd_immediate_check): New check type
> 	for aarch64_simd_valid_immediate.
> 	(aarch64_output_simd_mov_immediate): Update prototype.
> 	(aarch64_simd_valid_immediate): Update prototype.
> 
> 	* config/aarch64/aarch64-simd.md (orr<mode>3): modified pattern to add
> 	support for ORR-immediate.
> 	(and<mode>3): modified pattern to add support for BIC-immediate.
> 
> 	* config/aarch64/aarch64.c (aarch64_simd_valid_immediate): Function now checks
> 	for valid immediate for BIC and ORR based on new enum argument.
> 	(aarch64_output_simd_mov_immediate): Function now used to output BIC/ORR imm
> 	as well based on new enum argument.
>  
> 	* config/aarch64/constraints.md (Do): New vector immediate constraint.
> 	(Db): Likewise.
> 
> 2017-09-27 Sudakshina Das  <sudi.das@arm.com>
> 
> 	* gcc.target/aarch64/bic_imm_1.c: New test.
> 	* gcc.target/aarch64/orr_imm_1.c: Likewise.
> 
> 
> Thanks
> Sudi
> 
>   
> From: James Greenhalgh <james.greenhalgh@arm.com>
> Sent: Tuesday, September 26, 2017 8:04:38 PM
> To: Sudi Das
> Cc: Richard Earnshaw; gcc-patches@gcc.gnu.org; nd; Marcus Shawcroft
> Subject: Re: [PATCH][AArch64] Add BIC-imm and ORR-imm SIMD pattern
>     
> On Mon, Sep 25, 2017 at 11:13:57AM +0100, Sudi Das wrote:
>>
>> Hi James
>>
>> I put aarch64_output_simd_general_immediate looking at the similarities of
>> the immediates for mov/mvni and orr/bic. The CHECK macro in
>> aarch64_simd_valid_immediate both checks
>> and converts the immediates in a manner that are needed for the instructions.
>>
>> Having said that, I agree that maybe I could have refactored
>> aarch64_output_simd_mov_immediate to do the work rather than creating a new
>> functions to do similar things. I have done so in this patch.
> 
> Thanks, this looks much neater.
> 
>> I have also changed the names of the enum simd_immediate_check to be better
>> indicative of what they are doing. 
> 
> Thanks, I'd tweak them to look more like the bitmasks you use them as, but
> that is a small change for my personal preference.
> 
>> Lastly I have added more cases in the tests (according to all the possible
>> CHECKs) and made them dg-do assemble (although I had to add --save-temps so
>> that the scan-assembler would work). Do you think I should not put that
>> option and rather create separate tests?
> 
> This is good - thanks.
> 
> I think clean up the enum definitions and this patch will be good.
> 
>> @@ -308,6 +308,16 @@ enum aarch64_parse_opt_result
>>     AARCH64_PARSE_INVALID_ARG          /* Invalid arch, tune, cpu arg.  */
>>   };
>>   
>> +/* Enum to distinguish which type of check is to be done in
>> +   aarch64_simd_valid_immediate.  This is used as a bitmask where
>> +   AARCH64_CHECK_MOV has both bits set.  Thus AARCH64_CHECK_MOV will
>> +   perform all checks.  Adding new types would require changes accordingly.  */
>> +enum simd_immediate_check {
>> +  AARCH64_CHECK_ORR  = 1,    /* Perform immediate checks for ORR.  */
>> +  AARCH64_CHECK_BIC  = 2,    /* Perform immediate checks for BIC.  */
>> +  AARCH64_CHECK_MOV  = 3     /* Perform all checks (used for MOVI/MNVI).  */
> 
> These are used in bit-mask style, so how about:
> 
>   AARCH64_CHECK_ORR = 1 << 0,
>   AARCH64_CHECK_BIC = 1 << 1,
>   AARCH64_CHECK_MOV = AARCH64_CHECK_ORR | AARCH64_CHECK_BIC
> 
> Which is more self-documenting.
> 
>> @@ -13001,7 +13013,8 @@ aarch64_float_const_representable_p (rtx x)
>>   char*
>>   aarch64_output_simd_mov_immediate (rtx const_vector,
>>                                    machine_mode mode,
>> -                                unsigned width)
>> +                                unsigned width,
>> +                                enum simd_immediate_check which)
> 
> This function is sorely missing a comment explaining the parameters - it
> would be very helpful if you could add one as part of this patch.
> 
> Thanks,
> James
> 
>     
> 
+;; For AND (vector, register) and BIC (vector, immediate)
 (define_insn "and<mode>3"
-  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
-        (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
-		 (match_operand:VDQ_I 2 "register_operand" "w")))]
+  [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
+	(and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
+		 (match_operand:VDQ_I 2 "nonmemory_operand" "w,Db")))]

You should define a new predicate operation for operand 2 that accepts
just registers or the valid constants.  Otherwise you'll may get
spilling during register allocation.

Similarly for the other pattern.

R.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH][AArch64] Add BIC-imm and ORR-imm SIMD pattern
  2017-09-28  8:55             ` Richard Earnshaw (lists)
@ 2017-10-02  9:05               ` Sudi Das
  2017-10-04 15:41                 ` Richard Earnshaw (lists)
  0 siblings, 1 reply; 12+ messages in thread
From: Sudi Das @ 2017-10-02  9:05 UTC (permalink / raw)
  To: Richard Earnshaw, James Greenhalgh; +Cc: gcc-patches, nd, Marcus Shawcroft

[-- Attachment #1: Type: text/plain, Size: 6372 bytes --]


Hi Richard

Thanks, I have made the change to the patch.


2017-10-02 Sudakshina Das  <sudi.das@arm.com>

	* config/aarch64/aarch64-protos.h (enum simd_immediate_check): New check type
	for aarch64_simd_valid_immediate.
	(aarch64_output_simd_mov_immediate): Update prototype.
	(aarch64_simd_valid_immediate): Update prototype.

	* config/aarch64/aarch64-simd.md (orr<mode>3): modified pattern to add
	support for ORR-immediate.
	(and<mode>3): modified pattern to add support for BIC-immediate.

	* config/aarch64/aarch64.c (aarch64_simd_valid_immediate): Function now checks
	for valid immediate for BIC and ORR based on new enum argument.
	(aarch64_output_simd_mov_immediate): Function now used to output BIC/ORR imm
	as well based on new enum argument.
 
	* config/aarch64/constraints.md (Do): New vector immediate constraint.
	(Db) : Likewise.

	* config/aarch64/predicates.md (aarch64_reg_or_orr_imm): New predicate.
	(aarch64_reg_or_bic_imm): Likewise.


2017-10-02 Sudakshina Das  <sudi.das@arm.com>

	* gcc.target/aarch64/bic_imm_1.c: New test.
	* gcc.target/aarch64/orr_imm_1.c: Likewise.


From: Richard Earnshaw (lists) <Richard.Earnshaw@arm.com>
Sent: Thursday, September 28, 2017 9:55 AM
To: Sudi Das; James Greenhalgh
Cc: gcc-patches@gcc.gnu.org; nd; Marcus Shawcroft
Subject: Re: [PATCH][AArch64] Add BIC-imm and ORR-imm SIMD pattern
    
On 27/09/17 18:57, Sudi Das wrote:
> 
> 
> Hi James
> 
> I have made the requested changes to the patch.
> 
> 
> 2017-09-27 Sudakshina Das  <sudi.das@arm.com>
> 
>        * config/aarch64/aarch64-protos.h (enum simd_immediate_check): New check type
>        for aarch64_simd_valid_immediate.
>        (aarch64_output_simd_mov_immediate): Update prototype.
>        (aarch64_simd_valid_immediate): Update prototype.
> 
>        * config/aarch64/aarch64-simd.md (orr<mode>3): modified pattern to add
>        support for ORR-immediate.
>        (and<mode>3): modified pattern to add support for BIC-immediate.
> 
>        * config/aarch64/aarch64.c (aarch64_simd_valid_immediate): Function now checks
>        for valid immediate for BIC and ORR based on new enum argument.
>        (aarch64_output_simd_mov_immediate): Function now used to output BIC/ORR imm
>        as well based on new enum argument.
>  
>        * config/aarch64/constraints.md (Do): New vector immediate constraint.
>        (Db): Likewise.
> 
> 2017-09-27 Sudakshina Das  <sudi.das@arm.com>
> 
>        * gcc.target/aarch64/bic_imm_1.c: New test.
>        * gcc.target/aarch64/orr_imm_1.c: Likewise.
> 
> 
> Thanks
> Sudi
> 
>   
> From: James Greenhalgh <james.greenhalgh@arm.com>
> Sent: Tuesday, September 26, 2017 8:04:38 PM
> To: Sudi Das
> Cc: Richard Earnshaw; gcc-patches@gcc.gnu.org; nd; Marcus Shawcroft
> Subject: Re: [PATCH][AArch64] Add BIC-imm and ORR-imm SIMD pattern
>     
> On Mon, Sep 25, 2017 at 11:13:57AM +0100, Sudi Das wrote:
>>
>> Hi James
>>
>> I put aarch64_output_simd_general_immediate looking at the similarities of
>> the immediates for mov/mvni and orr/bic. The CHECK macro in
>> aarch64_simd_valid_immediate both checks
>> and converts the immediates in a manner that are needed for the instructions.
>>
>> Having said that, I agree that maybe I could have refactored
>> aarch64_output_simd_mov_immediate to do the work rather than creating a new
>> functions to do similar things. I have done so in this patch.
> 
> Thanks, this looks much neater.
> 
>> I have also changed the names of the enum simd_immediate_check to be better
>> indicative of what they are doing. 
> 
> Thanks, I'd tweak them to look more like the bitmasks you use them as, but
> that is a small change for my personal preference.
> 
>> Lastly I have added more cases in the tests (according to all the possible
>> CHECKs) and made them dg-do assemble (although I had to add --save-temps so
>> that the scan-assembler would work). Do you think I should not put that
>> option and rather create separate tests?
> 
> This is good - thanks.
> 
> I think clean up the enum definitions and this patch will be good.
> 
>> @@ -308,6 +308,16 @@ enum aarch64_parse_opt_result
>>     AARCH64_PARSE_INVALID_ARG          /* Invalid arch, tune, cpu arg.  */
>>   };
>>   
>> +/* Enum to distinguish which type of check is to be done in
>> +   aarch64_simd_valid_immediate.  This is used as a bitmask where
>> +   AARCH64_CHECK_MOV has both bits set.  Thus AARCH64_CHECK_MOV will
>> +   perform all checks.  Adding new types would require changes accordingly.  */
>> +enum simd_immediate_check {
>> +  AARCH64_CHECK_ORR  = 1,    /* Perform immediate checks for ORR.  */
>> +  AARCH64_CHECK_BIC  = 2,    /* Perform immediate checks for BIC.  */
>> +  AARCH64_CHECK_MOV  = 3     /* Perform all checks (used for MOVI/MNVI).  */
> 
> These are used in bit-mask style, so how about:
> 
>   AARCH64_CHECK_ORR = 1 << 0,
>   AARCH64_CHECK_BIC = 1 << 1,
>   AARCH64_CHECK_MOV = AARCH64_CHECK_ORR | AARCH64_CHECK_BIC
> 
> Which is more self-documenting.
> 
>> @@ -13001,7 +13013,8 @@ aarch64_float_const_representable_p (rtx x)
>>   char*
>>   aarch64_output_simd_mov_immediate (rtx const_vector,
>>                                    machine_mode mode,
>> -                                unsigned width)
>> +                                unsigned width,
>> +                                enum simd_immediate_check which)
> 
> This function is sorely missing a comment explaining the parameters - it
> would be very helpful if you could add one as part of this patch.
> 
> Thanks,
> James
> 
>     
> 
+;; For AND (vector, register) and BIC (vector, immediate)
 (define_insn "and<mode>3"
-  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
-        (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
-                (match_operand:VDQ_I 2 "register_operand" "w")))]
+  [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
+       (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
+                (match_operand:VDQ_I 2 "nonmemory_operand" "w,Db")))]

You should define a new predicate operation for operand 2 that accepts
just registers or the valid constants.  Otherwise you'll may get
spilling during register allocation.

Similarly for the other pattern.

R.
    

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: patch-7260-6.diff --]
[-- Type: text/x-patch; name="patch-7260-6.diff", Size: 16699 bytes --]

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index e67c2ed..5d7c5df 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -308,6 +308,16 @@ enum aarch64_parse_opt_result
   AARCH64_PARSE_INVALID_ARG		/* Invalid arch, tune, cpu arg.  */
 };
 
+/* Enum to distinguish which type of check is to be done in
+   aarch64_simd_valid_immediate.  This is used as a bitmask where
+   AARCH64_CHECK_MOV has both bits set.  Thus AARCH64_CHECK_MOV will
+   perform all checks.  Adding new types would require changes accordingly.  */
+enum simd_immediate_check {
+  AARCH64_CHECK_ORR  = 1 << 0,
+  AARCH64_CHECK_BIC  = 1 << 1,
+  AARCH64_CHECK_MOV  = AARCH64_CHECK_ORR | AARCH64_CHECK_BIC
+};
+
 extern struct tune_params aarch64_tune_params;
 
 HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
@@ -345,7 +355,8 @@ bool aarch64_mov_operand_p (rtx, machine_mode);
 rtx aarch64_reverse_mask (machine_mode);
 bool aarch64_offset_7bit_signed_scaled_p (machine_mode, HOST_WIDE_INT);
 char *aarch64_output_scalar_simd_mov_immediate (rtx, scalar_int_mode);
-char *aarch64_output_simd_mov_immediate (rtx, machine_mode, unsigned);
+char *aarch64_output_simd_mov_immediate (rtx, machine_mode, unsigned,
+			enum simd_immediate_check w = AARCH64_CHECK_MOV);
 bool aarch64_pad_reg_upward (machine_mode, const_tree, bool);
 bool aarch64_regno_ok_for_base_p (int, bool);
 bool aarch64_regno_ok_for_index_p (int, bool);
@@ -356,7 +367,8 @@ bool aarch64_simd_imm_zero_p (rtx, machine_mode);
 bool aarch64_simd_scalar_immediate_valid_for_move (rtx, scalar_int_mode);
 bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool);
 bool aarch64_simd_valid_immediate (rtx, machine_mode, bool,
-				   struct simd_immediate_info *);
+			struct simd_immediate_info *,
+			enum simd_immediate_check w = AARCH64_CHECK_MOV);
 bool aarch64_split_dimode_const_store (rtx, rtx);
 bool aarch64_symbolic_address_p (rtx);
 bool aarch64_uimm12_shift (HOST_WIDE_INT);
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 70e9339..12da8be 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -558,21 +558,45 @@
   [(set_attr "type" "neon_fp_abd_<stype><q>")]
 )
 
+;; For AND (vector, register) and BIC (vector, immediate)
 (define_insn "and<mode>3"
-  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
-        (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
-		 (match_operand:VDQ_I 2 "register_operand" "w")))]
+  [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
+	(and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
+		   (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
   "TARGET_SIMD"
-  "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
+  {
+    switch (which_alternative)
+      {
+      case 0:
+	return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
+      case 1:
+	return aarch64_output_simd_mov_immediate (operands[2],
+	   <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), AARCH64_CHECK_BIC);
+      default:
+	gcc_unreachable ();
+      }
+  }
   [(set_attr "type" "neon_logic<q>")]
 )
 
+;; For ORR (vector, register) and ORR (vector, immediate)
 (define_insn "ior<mode>3"
-  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
-        (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
-		 (match_operand:VDQ_I 2 "register_operand" "w")))]
+  [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
+	(ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
+		   (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
   "TARGET_SIMD"
-  "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
+  {
+    switch (which_alternative)
+      {
+      case 0:
+	return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
+      case 1:
+	return aarch64_output_simd_mov_immediate (operands[2],
+		<MODE>mode, GET_MODE_BITSIZE (<MODE>mode), AARCH64_CHECK_ORR);
+      default:
+	gcc_unreachable ();
+      }
+  }
   [(set_attr "type" "neon_logic<q>")]
 )
 
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 5e26cb7..6a6b3d5 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -11471,7 +11471,8 @@ aarch64_vect_float_const_representable_p (rtx x)
 /* Return true for valid and false for invalid.  */
 bool
 aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
-			      struct simd_immediate_info *info)
+			      struct simd_immediate_info *info,
+			      enum simd_immediate_check which)
 {
 #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)	\
   matches = 1;						\
@@ -11539,54 +11540,65 @@ aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
 
   do
     {
-      CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
-	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
+      if (which & AARCH64_CHECK_ORR)
+	{
+	  CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
+		 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
 
-      CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
-	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
+	  CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
+		 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
 
-      CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
-	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
+	  CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
+		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
 
-      CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
-	     && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
+	  CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
+		 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
 
-      CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
+	  CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
 
-      CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
+	  CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
+	}
 
-      CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
-	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
+      if (which & AARCH64_CHECK_BIC)
+	{
+	  CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
+		 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
 
-      CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
-	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
+	  CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
+		 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
 
-      CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
-	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
+	  CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
+		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
 
-      CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
-	     && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
+	  CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
+		 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
 
-      CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
+	  CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
 
-      CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
+	  CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
+	}
 
-      CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
-	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
+      /* Shifting ones / 8-bit / 64-bit variants only checked
+	 for 'ALL' (MOVI/MVNI).  */
+      if (which == AARCH64_CHECK_MOV)
+	{
+	  CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
+		 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
 
-      CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
-	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
+	  CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
+		 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
 
-      CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
-	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
+	  CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
+		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
 
-      CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
-	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
+	  CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
+		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
 
-      CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
+	  CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
 
-      CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
-	     && bytes[i] == bytes[(i + 8) % idx], 0, 0);
+	  CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
+		 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
+	}
     }
   while (0);
 
@@ -12998,10 +13010,14 @@ aarch64_float_const_representable_p (rtx x)
   return (exponent >= 0 && exponent <= 7);
 }
 
+/* Returns the string with the instruction for AdvSIMD MOVI, MVNI, ORR or BIC
+   immediate with a CONST_VECTOR of MODE and WIDTH.  WHICH selects whether to
+   output MOVI/MVNI, ORR or BIC immediate.  */
 char*
 aarch64_output_simd_mov_immediate (rtx const_vector,
 				   machine_mode mode,
-				   unsigned width)
+				   unsigned width,
+				   enum simd_immediate_check which)
 {
   bool is_valid;
   static char templ[40];
@@ -13013,9 +13029,11 @@ aarch64_output_simd_mov_immediate (rtx const_vector,
   struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
 
   /* This will return true to show const_vector is legal for use as either
-     a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate.  It will
-     also update INFO to show how the immediate should be generated.  */
-  is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
+     a AdvSIMD MOVI instruction (or, implicitly, MVNI), ORR or BIC immediate.
+     It will also update INFO to show how the immediate should be generated.
+     WHICH selects whether to check for MOVI/MVNI, ORR or BIC.  */
+  is_valid = aarch64_simd_valid_immediate (const_vector, mode, false,
+					   &info, which);
   gcc_assert (is_valid);
 
   element_char = sizetochar (info.element_width);
@@ -13046,20 +13064,37 @@ aarch64_output_simd_mov_immediate (rtx const_vector,
 	}
     }
 
-  mnemonic = info.mvn ? "mvni" : "movi";
-  shift_op = info.msl ? "msl" : "lsl";
-
   gcc_assert (CONST_INT_P (info.value));
-  if (lane_count == 1)
-    snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
-	      mnemonic, UINTVAL (info.value));
-  else if (info.shift)
-    snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
-	      ", %s %d", mnemonic, lane_count, element_char,
-	      UINTVAL (info.value), shift_op, info.shift);
+
+  if (which == AARCH64_CHECK_MOV)
+    {
+      mnemonic = info.mvn ? "mvni" : "movi";
+      shift_op = info.msl ? "msl" : "lsl";
+      if (lane_count == 1)
+	snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
+		  mnemonic, UINTVAL (info.value));
+      else if (info.shift)
+	snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, "
+		  HOST_WIDE_INT_PRINT_HEX ", %s %d", mnemonic, lane_count,
+		  element_char, UINTVAL (info.value), shift_op, info.shift);
+      else
+	snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, "
+		  HOST_WIDE_INT_PRINT_HEX, mnemonic, lane_count,
+		  element_char, UINTVAL (info.value));
+    }
   else
-    snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
-	      mnemonic, lane_count, element_char, UINTVAL (info.value));
+    {
+      /* For AARCH64_CHECK_BIC and AARCH64_CHECK_ORR.  */
+      mnemonic = info.mvn ? "bic" : "orr";
+      if (info.shift)
+	snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #"
+		  HOST_WIDE_INT_PRINT_DEC ", %s #%d", mnemonic, lane_count,
+		  element_char, UINTVAL (info.value), "lsl", info.shift);
+      else
+	snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #"
+		  HOST_WIDE_INT_PRINT_DEC, mnemonic, lane_count,
+		  element_char, UINTVAL (info.value));
+    }
   return templ;
 }
 
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 3649fb4..77c510c 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -190,6 +190,20 @@
   (and (match_code "const_double")
        (match_test "aarch64_can_const_movi_rtx_p (op, GET_MODE (op))")))
 
+(define_constraint "Do"
+  "@internal
+   A constraint that matches vector of immediates for orr."
+ (and (match_code "const_vector")
+      (match_test "aarch64_simd_valid_immediate (op, mode, false,
+						 NULL, AARCH64_CHECK_ORR)")))
+
+(define_constraint "Db"
+  "@internal
+   A constraint that matches vector of immediates for bic."
+ (and (match_code "const_vector")
+      (match_test "aarch64_simd_valid_immediate (op, mode, false,
+						 NULL, AARCH64_CHECK_BIC)")))
+
 (define_constraint "Dn"
   "@internal
  A constraint that matches vector of immediates."
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 11243c4..887a13e 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -69,6 +69,16 @@
 		 (ior (match_test "op == constm1_rtx")
 		      (match_test "op == const1_rtx"))))))
 
+(define_predicate "aarch64_reg_or_orr_imm"
+   (ior (match_operand 0 "register_operand")
+	(match_test "aarch64_simd_valid_immediate (op, mode, false,
+						   NULL, AARCH64_CHECK_ORR)")))
+
+(define_predicate "aarch64_reg_or_bic_imm"
+   (ior (match_operand 0 "register_operand")
+	(match_test "aarch64_simd_valid_immediate (op, mode, false,
+						   NULL, AARCH64_CHECK_BIC)")))
+
 (define_predicate "aarch64_fp_compare_operand"
   (ior (match_operand 0 "register_operand")
        (and (match_code "const_double")
diff --git a/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c b/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c
new file mode 100644
index 0000000..b14f009
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c
@@ -0,0 +1,56 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 --save-temps -ftree-vectorize" } */
+
+/* Each function uses the correspoding 'CLASS' in
+   Marco CHECK (aarch64_simd_valid_immediate).  */
+
+void
+bic_6 (int *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] &= ~(0xab);
+}
+
+void
+bic_7 (int *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] &= ~(0xcd00);
+}
+
+void
+bic_8 (int *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] &= ~(0xef0000);
+}
+
+void
+bic_9 (int *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] &= ~(0x12000000);
+}
+
+void
+bic_10 (short *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] &= ~(0x34);
+}
+
+
+void
+bic_11 (short *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] &= ~(0x5600);
+}
+
+
+/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.4s, #171" } } */
+/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.4s, #205, lsl #8" } } */
+/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.4s, #239, lsl #16" } } */
+/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.4s, #18, lsl #24" } } */
+/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.8h, #52" } } */
+/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.8h, #86, lsl #8" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c b/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c
new file mode 100644
index 0000000..ff6f683
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c
@@ -0,0 +1,54 @@
+/* { dg-do assemble } */
+/* { dg-options "-O2 --save-temps -ftree-vectorize" } */
+
+/* Each function uses the correspoding 'CLASS' in
+   Marco CHECK (aarch64_simd_valid_immediate).  */
+
+void
+orr_0 (int *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] |= 0xab;
+}
+
+void
+orr_1 (int *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] |= 0x0000cd00;
+}
+
+void
+orr_2 (int *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] |= 0x00ef0000;
+}
+
+void
+orr_3 (int *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] |= 0x12000000;
+}
+
+void
+orr_4 (short *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] |= 0x00340034;
+}
+
+void
+orr_5 (int *a)
+{
+  for (int i = 0; i < 1024; i++)
+    a[i] |= 0x56005600;
+}
+
+/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.4s, #171" } } */
+/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.4s, #205, lsl #8" } } */
+/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.4s, #239, lsl #16" } } */
+/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.4s, #18, lsl #24" } } */
+/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.8h, #52" } } */
+/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.8h, #86, lsl #8" } } */

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH][AArch64] Add BIC-imm and ORR-imm SIMD pattern
  2017-10-02  9:05               ` Sudi Das
@ 2017-10-04 15:41                 ` Richard Earnshaw (lists)
  2017-10-04 23:05                   ` Steve Ellcey
  0 siblings, 1 reply; 12+ messages in thread
From: Richard Earnshaw (lists) @ 2017-10-04 15:41 UTC (permalink / raw)
  To: Sudi Das, James Greenhalgh; +Cc: gcc-patches, nd, Marcus Shawcroft

On 02/10/17 10:05, Sudi Das wrote:
> 
> Hi Richard
> 
> Thanks, I have made the change to the patch.
> 
> 
> 2017-10-02 Sudakshina Das  <sudi.das@arm.com>
> 
> 	* config/aarch64/aarch64-protos.h (enum simd_immediate_check): New check type
> 	for aarch64_simd_valid_immediate.
> 	(aarch64_output_simd_mov_immediate): Update prototype.
> 	(aarch64_simd_valid_immediate): Update prototype.
> 
> 	* config/aarch64/aarch64-simd.md (orr<mode>3): modified pattern to add
> 	support for ORR-immediate.
> 	(and<mode>3): modified pattern to add support for BIC-immediate.
> 
> 	* config/aarch64/aarch64.c (aarch64_simd_valid_immediate): Function now checks
> 	for valid immediate for BIC and ORR based on new enum argument.
> 	(aarch64_output_simd_mov_immediate): Function now used to output BIC/ORR imm
> 	as well based on new enum argument.
>  
> 	* config/aarch64/constraints.md (Do): New vector immediate constraint.
> 	(Db) : Likewise.
> 
> 	* config/aarch64/predicates.md (aarch64_reg_or_orr_imm): New predicate.
> 	(aarch64_reg_or_bic_imm): Likewise.
> 
> 
> 2017-10-02 Sudakshina Das  <sudi.das@arm.com>
> 
> 	* gcc.target/aarch64/bic_imm_1.c: New test.
> 	* gcc.target/aarch64/orr_imm_1.c: Likewise.
> 

OK.

R.

> 
> From: Richard Earnshaw (lists) <Richard.Earnshaw@arm.com>
> Sent: Thursday, September 28, 2017 9:55 AM
> To: Sudi Das; James Greenhalgh
> Cc: gcc-patches@gcc.gnu.org; nd; Marcus Shawcroft
> Subject: Re: [PATCH][AArch64] Add BIC-imm and ORR-imm SIMD pattern
>     
> On 27/09/17 18:57, Sudi Das wrote:
>>
>>
>> Hi James
>>
>> I have made the requested changes to the patch.
>>
>>
>> 2017-09-27 Sudakshina Das  <sudi.das@arm.com>
>>
>>         * config/aarch64/aarch64-protos.h (enum simd_immediate_check): New check type
>>         for aarch64_simd_valid_immediate.
>>         (aarch64_output_simd_mov_immediate): Update prototype.
>>         (aarch64_simd_valid_immediate): Update prototype.
>>
>>         * config/aarch64/aarch64-simd.md (orr<mode>3): modified pattern to add
>>         support for ORR-immediate.
>>         (and<mode>3): modified pattern to add support for BIC-immediate.
>>
>>         * config/aarch64/aarch64.c (aarch64_simd_valid_immediate): Function now checks
>>         for valid immediate for BIC and ORR based on new enum argument.
>>         (aarch64_output_simd_mov_immediate): Function now used to output BIC/ORR imm
>>         as well based on new enum argument.
>>   
>>         * config/aarch64/constraints.md (Do): New vector immediate constraint.
>>         (Db): Likewise.
>>
>> 2017-09-27 Sudakshina Das  <sudi.das@arm.com>
>>
>>         * gcc.target/aarch64/bic_imm_1.c: New test.
>>         * gcc.target/aarch64/orr_imm_1.c: Likewise.
>>
>>
>> Thanks
>> Sudi
>>
>>    
>> From: James Greenhalgh <james.greenhalgh@arm.com>
>> Sent: Tuesday, September 26, 2017 8:04:38 PM
>> To: Sudi Das
>> Cc: Richard Earnshaw; gcc-patches@gcc.gnu.org; nd; Marcus Shawcroft
>> Subject: Re: [PATCH][AArch64] Add BIC-imm and ORR-imm SIMD pattern
>>      
>> On Mon, Sep 25, 2017 at 11:13:57AM +0100, Sudi Das wrote:
>>>
>>> Hi James
>>>
>>> I put aarch64_output_simd_general_immediate looking at the similarities of
>>> the immediates for mov/mvni and orr/bic. The CHECK macro in
>>> aarch64_simd_valid_immediate both checks
>>> and converts the immediates in a manner that are needed for the instructions.
>>>
>>> Having said that, I agree that maybe I could have refactored
>>> aarch64_output_simd_mov_immediate to do the work rather than creating a new
>>> functions to do similar things. I have done so in this patch.
>>
>> Thanks, this looks much neater.
>>
>>> I have also changed the names of the enum simd_immediate_check to be better
>>> indicative of what they are doing. 
>>
>> Thanks, I'd tweak them to look more like the bitmasks you use them as, but
>> that is a small change for my personal preference.
>>
>>> Lastly I have added more cases in the tests (according to all the possible
>>> CHECKs) and made them dg-do assemble (although I had to add --save-temps so
>>> that the scan-assembler would work). Do you think I should not put that
>>> option and rather create separate tests?
>>
>> This is good - thanks.
>>
>> I think clean up the enum definitions and this patch will be good.
>>
>>> @@ -308,6 +308,16 @@ enum aarch64_parse_opt_result
>>>      AARCH64_PARSE_INVALID_ARG          /* Invalid arch, tune, cpu arg.  */
>>>    };
>>>    
>>> +/* Enum to distinguish which type of check is to be done in
>>> +   aarch64_simd_valid_immediate.  This is used as a bitmask where
>>> +   AARCH64_CHECK_MOV has both bits set.  Thus AARCH64_CHECK_MOV will
>>> +   perform all checks.  Adding new types would require changes accordingly.  */
>>> +enum simd_immediate_check {
>>> +  AARCH64_CHECK_ORR  = 1,    /* Perform immediate checks for ORR.  */
>>> +  AARCH64_CHECK_BIC  = 2,    /* Perform immediate checks for BIC.  */
>>> +  AARCH64_CHECK_MOV  = 3     /* Perform all checks (used for MOVI/MNVI).  */
>>
>> These are used in bit-mask style, so how about:
>>
>>    AARCH64_CHECK_ORR = 1 << 0,
>>    AARCH64_CHECK_BIC = 1 << 1,
>>    AARCH64_CHECK_MOV = AARCH64_CHECK_ORR | AARCH64_CHECK_BIC
>>
>> Which is more self-documenting.
>>
>>> @@ -13001,7 +13013,8 @@ aarch64_float_const_representable_p (rtx x)
>>>    char*
>>>    aarch64_output_simd_mov_immediate (rtx const_vector,
>>>                                     machine_mode mode,
>>> -                                unsigned width)
>>> +                                unsigned width,
>>> +                                enum simd_immediate_check which)
>>
>> This function is sorely missing a comment explaining the parameters - it
>> would be very helpful if you could add one as part of this patch.
>>
>> Thanks,
>> James
>>
>>      
>>
> +;; For AND (vector, register) and BIC (vector, immediate)
>  (define_insn "and<mode>3"
> -  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
> -        (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
> -                (match_operand:VDQ_I 2 "register_operand" "w")))]
> +  [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
> +       (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
> +                (match_operand:VDQ_I 2 "nonmemory_operand" "w,Db")))]
> 
> You should define a new predicate operation for operand 2 that accepts
> just registers or the valid constants.  Otherwise you'll may get
> spilling during register allocation.
> 
> Similarly for the other pattern.
> 
> R.
>     
> 
> 
> patch-7260-6.diff
> 
> 
> diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
> index e67c2ed..5d7c5df 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -308,6 +308,16 @@ enum aarch64_parse_opt_result
>    AARCH64_PARSE_INVALID_ARG		/* Invalid arch, tune, cpu arg.  */
>  };
>  
> +/* Enum to distinguish which type of check is to be done in
> +   aarch64_simd_valid_immediate.  This is used as a bitmask where
> +   AARCH64_CHECK_MOV has both bits set.  Thus AARCH64_CHECK_MOV will
> +   perform all checks.  Adding new types would require changes accordingly.  */
> +enum simd_immediate_check {
> +  AARCH64_CHECK_ORR  = 1 << 0,
> +  AARCH64_CHECK_BIC  = 1 << 1,
> +  AARCH64_CHECK_MOV  = AARCH64_CHECK_ORR | AARCH64_CHECK_BIC
> +};
> +
>  extern struct tune_params aarch64_tune_params;
>  
>  HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
> @@ -345,7 +355,8 @@ bool aarch64_mov_operand_p (rtx, machine_mode);
>  rtx aarch64_reverse_mask (machine_mode);
>  bool aarch64_offset_7bit_signed_scaled_p (machine_mode, HOST_WIDE_INT);
>  char *aarch64_output_scalar_simd_mov_immediate (rtx, scalar_int_mode);
> -char *aarch64_output_simd_mov_immediate (rtx, machine_mode, unsigned);
> +char *aarch64_output_simd_mov_immediate (rtx, machine_mode, unsigned,
> +			enum simd_immediate_check w = AARCH64_CHECK_MOV);
>  bool aarch64_pad_reg_upward (machine_mode, const_tree, bool);
>  bool aarch64_regno_ok_for_base_p (int, bool);
>  bool aarch64_regno_ok_for_index_p (int, bool);
> @@ -356,7 +367,8 @@ bool aarch64_simd_imm_zero_p (rtx, machine_mode);
>  bool aarch64_simd_scalar_immediate_valid_for_move (rtx, scalar_int_mode);
>  bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool);
>  bool aarch64_simd_valid_immediate (rtx, machine_mode, bool,
> -				   struct simd_immediate_info *);
> +			struct simd_immediate_info *,
> +			enum simd_immediate_check w = AARCH64_CHECK_MOV);
>  bool aarch64_split_dimode_const_store (rtx, rtx);
>  bool aarch64_symbolic_address_p (rtx);
>  bool aarch64_uimm12_shift (HOST_WIDE_INT);
> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
> index 70e9339..12da8be 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -558,21 +558,45 @@
>    [(set_attr "type" "neon_fp_abd_<stype><q>")]
>  )
>  
> +;; For AND (vector, register) and BIC (vector, immediate)
>  (define_insn "and<mode>3"
> -  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
> -        (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
> -		 (match_operand:VDQ_I 2 "register_operand" "w")))]
> +  [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
> +	(and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
> +		   (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
>    "TARGET_SIMD"
> -  "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
> +  {
> +    switch (which_alternative)
> +      {
> +      case 0:
> +	return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
> +      case 1:
> +	return aarch64_output_simd_mov_immediate (operands[2],
> +	   <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), AARCH64_CHECK_BIC);
> +      default:
> +	gcc_unreachable ();
> +      }
> +  }
>    [(set_attr "type" "neon_logic<q>")]
>  )
>  
> +;; For ORR (vector, register) and ORR (vector, immediate)
>  (define_insn "ior<mode>3"
> -  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
> -        (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
> -		 (match_operand:VDQ_I 2 "register_operand" "w")))]
> +  [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
> +	(ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
> +		   (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
>    "TARGET_SIMD"
> -  "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
> +  {
> +    switch (which_alternative)
> +      {
> +      case 0:
> +	return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
> +      case 1:
> +	return aarch64_output_simd_mov_immediate (operands[2],
> +		<MODE>mode, GET_MODE_BITSIZE (<MODE>mode), AARCH64_CHECK_ORR);
> +      default:
> +	gcc_unreachable ();
> +      }
> +  }
>    [(set_attr "type" "neon_logic<q>")]
>  )
>  
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 5e26cb7..6a6b3d5 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -11471,7 +11471,8 @@ aarch64_vect_float_const_representable_p (rtx x)
>  /* Return true for valid and false for invalid.  */
>  bool
>  aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
> -			      struct simd_immediate_info *info)
> +			      struct simd_immediate_info *info,
> +			      enum simd_immediate_check which)
>  {
>  #define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG)	\
>    matches = 1;						\
> @@ -11539,54 +11540,65 @@ aarch64_simd_valid_immediate (rtx op, machine_mode mode, bool inverse,
>  
>    do
>      {
> -      CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
> -	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
> +      if (which & AARCH64_CHECK_ORR)
> +	{
> +	  CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
> +		 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 0, 0);
>  
> -      CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
> -	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
> +	  CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
> +		 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
>  
> -      CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
> -	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
> +	  CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
> +		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
>  
> -      CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
> -	     && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
> +	  CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
> +		 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3], 24, 0);
>  
> -      CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
> +	  CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0, 0, 0);
>  
> -      CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
> +	  CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1], 8, 0);
> +	}
>  
> -      CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
> -	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
> +      if (which & AARCH64_CHECK_BIC)
> +	{
> +	  CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
> +		 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 0, 1);
>  
> -      CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
> -	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
> +	  CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
> +		 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
>  
> -      CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
> -	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
> +	  CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
> +		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
>  
> -      CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
> -	     && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
> +	  CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
> +		 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3], 24, 1);
>  
> -      CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
> +	  CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff, 0, 1);
>  
> -      CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
> +	  CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1], 8, 1);
> +	}
>  
> -      CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
> -	     && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
> +      /* Shifting ones / 8-bit / 64-bit variants only checked
> +	 for 'ALL' (MOVI/MVNI).  */
> +      if (which == AARCH64_CHECK_MOV)
> +	{
> +	  CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
> +		 && bytes[i + 2] == 0 && bytes[i + 3] == 0, 8, 0);
>  
> -      CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
> -	     && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
> +	  CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
> +		 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff, 8, 1);
>  
> -      CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
> -	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
> +	  CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
> +		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0, 16, 0);
>  
> -      CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
> -	     && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
> +	  CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
> +		 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff, 16, 1);
>  
> -      CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
> +	  CHECK (1, 8, 16, bytes[i] == bytes[0], 0, 0);
>  
> -      CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
> -	     && bytes[i] == bytes[(i + 8) % idx], 0, 0);
> +	  CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
> +		 && bytes[i] == bytes[(i + 8) % idx], 0, 0);
> +	}
>      }
>    while (0);
>  
> @@ -12998,10 +13010,14 @@ aarch64_float_const_representable_p (rtx x)
>    return (exponent >= 0 && exponent <= 7);
>  }
>  
> +/* Returns the string with the instruction for AdvSIMD MOVI, MVNI, ORR or BIC
> +   immediate with a CONST_VECTOR of MODE and WIDTH.  WHICH selects whether to
> +   output MOVI/MVNI, ORR or BIC immediate.  */
>  char*
>  aarch64_output_simd_mov_immediate (rtx const_vector,
>  				   machine_mode mode,
> -				   unsigned width)
> +				   unsigned width,
> +				   enum simd_immediate_check which)
>  {
>    bool is_valid;
>    static char templ[40];
> @@ -13013,9 +13029,11 @@ aarch64_output_simd_mov_immediate (rtx const_vector,
>    struct simd_immediate_info info = { NULL_RTX, 0, 0, false, false };
>  
>    /* This will return true to show const_vector is legal for use as either
> -     a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate.  It will
> -     also update INFO to show how the immediate should be generated.  */
> -  is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
> +     a AdvSIMD MOVI instruction (or, implicitly, MVNI), ORR or BIC immediate.
> +     It will also update INFO to show how the immediate should be generated.
> +     WHICH selects whether to check for MOVI/MVNI, ORR or BIC.  */
> +  is_valid = aarch64_simd_valid_immediate (const_vector, mode, false,
> +					   &info, which);
>    gcc_assert (is_valid);
>  
>    element_char = sizetochar (info.element_width);
> @@ -13046,20 +13064,37 @@ aarch64_output_simd_mov_immediate (rtx const_vector,
>  	}
>      }
>  
> -  mnemonic = info.mvn ? "mvni" : "movi";
> -  shift_op = info.msl ? "msl" : "lsl";
> -
>    gcc_assert (CONST_INT_P (info.value));
> -  if (lane_count == 1)
> -    snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
> -	      mnemonic, UINTVAL (info.value));
> -  else if (info.shift)
> -    snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
> -	      ", %s %d", mnemonic, lane_count, element_char,
> -	      UINTVAL (info.value), shift_op, info.shift);
> +
> +  if (which == AARCH64_CHECK_MOV)
> +    {
> +      mnemonic = info.mvn ? "mvni" : "movi";
> +      shift_op = info.msl ? "msl" : "lsl";
> +      if (lane_count == 1)
> +	snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
> +		  mnemonic, UINTVAL (info.value));
> +      else if (info.shift)
> +	snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, "
> +		  HOST_WIDE_INT_PRINT_HEX ", %s %d", mnemonic, lane_count,
> +		  element_char, UINTVAL (info.value), shift_op, info.shift);
> +      else
> +	snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, "
> +		  HOST_WIDE_INT_PRINT_HEX, mnemonic, lane_count,
> +		  element_char, UINTVAL (info.value));
> +    }
>    else
> -    snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
> -	      mnemonic, lane_count, element_char, UINTVAL (info.value));
> +    {
> +      /* For AARCH64_CHECK_BIC and AARCH64_CHECK_ORR.  */
> +      mnemonic = info.mvn ? "bic" : "orr";
> +      if (info.shift)
> +	snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #"
> +		  HOST_WIDE_INT_PRINT_DEC ", %s #%d", mnemonic, lane_count,
> +		  element_char, UINTVAL (info.value), "lsl", info.shift);
> +      else
> +	snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #"
> +		  HOST_WIDE_INT_PRINT_DEC, mnemonic, lane_count,
> +		  element_char, UINTVAL (info.value));
> +    }
>    return templ;
>  }
>  
> diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
> index 3649fb4..77c510c 100644
> --- a/gcc/config/aarch64/constraints.md
> +++ b/gcc/config/aarch64/constraints.md
> @@ -190,6 +190,20 @@
>    (and (match_code "const_double")
>         (match_test "aarch64_can_const_movi_rtx_p (op, GET_MODE (op))")))
>  
> +(define_constraint "Do"
> +  "@internal
> +   A constraint that matches vector of immediates for orr."
> + (and (match_code "const_vector")
> +      (match_test "aarch64_simd_valid_immediate (op, mode, false,
> +						 NULL, AARCH64_CHECK_ORR)")))
> +
> +(define_constraint "Db"
> +  "@internal
> +   A constraint that matches vector of immediates for bic."
> + (and (match_code "const_vector")
> +      (match_test "aarch64_simd_valid_immediate (op, mode, false,
> +						 NULL, AARCH64_CHECK_BIC)")))
> +
>  (define_constraint "Dn"
>    "@internal
>   A constraint that matches vector of immediates."
> diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
> index 11243c4..887a13e 100644
> --- a/gcc/config/aarch64/predicates.md
> +++ b/gcc/config/aarch64/predicates.md
> @@ -69,6 +69,16 @@
>  		 (ior (match_test "op == constm1_rtx")
>  		      (match_test "op == const1_rtx"))))))
>  
> +(define_predicate "aarch64_reg_or_orr_imm"
> +   (ior (match_operand 0 "register_operand")
> +	(match_test "aarch64_simd_valid_immediate (op, mode, false,
> +						   NULL, AARCH64_CHECK_ORR)")))
> +
> +(define_predicate "aarch64_reg_or_bic_imm"
> +   (ior (match_operand 0 "register_operand")
> +	(match_test "aarch64_simd_valid_immediate (op, mode, false,
> +						   NULL, AARCH64_CHECK_BIC)")))
> +
>  (define_predicate "aarch64_fp_compare_operand"
>    (ior (match_operand 0 "register_operand")
>         (and (match_code "const_double")
> diff --git a/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c b/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c
> new file mode 100644
> index 0000000..b14f009
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/bic_imm_1.c
> @@ -0,0 +1,56 @@
> +/* { dg-do assemble } */
> +/* { dg-options "-O2 --save-temps -ftree-vectorize" } */
> +
> +/* Each function uses the correspoding 'CLASS' in
> +   Marco CHECK (aarch64_simd_valid_immediate).  */
> +
> +void
> +bic_6 (int *a)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    a[i] &= ~(0xab);
> +}
> +
> +void
> +bic_7 (int *a)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    a[i] &= ~(0xcd00);
> +}
> +
> +void
> +bic_8 (int *a)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    a[i] &= ~(0xef0000);
> +}
> +
> +void
> +bic_9 (int *a)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    a[i] &= ~(0x12000000);
> +}
> +
> +void
> +bic_10 (short *a)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    a[i] &= ~(0x34);
> +}
> +
> +
> +void
> +bic_11 (short *a)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    a[i] &= ~(0x5600);
> +}
> +
> +
> +/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.4s, #171" } } */
> +/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.4s, #205, lsl #8" } } */
> +/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.4s, #239, lsl #16" } } */
> +/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.4s, #18, lsl #24" } } */
> +/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.8h, #52" } } */
> +/* { dg-final { scan-assembler "bic\\tv\[0-9\]+.8h, #86, lsl #8" } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c b/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c
> new file mode 100644
> index 0000000..ff6f683
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/orr_imm_1.c
> @@ -0,0 +1,54 @@
> +/* { dg-do assemble } */
> +/* { dg-options "-O2 --save-temps -ftree-vectorize" } */
> +
> +/* Each function uses the correspoding 'CLASS' in
> +   Marco CHECK (aarch64_simd_valid_immediate).  */
> +
> +void
> +orr_0 (int *a)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    a[i] |= 0xab;
> +}
> +
> +void
> +orr_1 (int *a)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    a[i] |= 0x0000cd00;
> +}
> +
> +void
> +orr_2 (int *a)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    a[i] |= 0x00ef0000;
> +}
> +
> +void
> +orr_3 (int *a)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    a[i] |= 0x12000000;
> +}
> +
> +void
> +orr_4 (short *a)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    a[i] |= 0x00340034;
> +}
> +
> +void
> +orr_5 (int *a)
> +{
> +  for (int i = 0; i < 1024; i++)
> +    a[i] |= 0x56005600;
> +}
> +
> +/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.4s, #171" } } */
> +/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.4s, #205, lsl #8" } } */
> +/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.4s, #239, lsl #16" } } */
> +/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.4s, #18, lsl #24" } } */
> +/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.8h, #52" } } */
> +/* { dg-final { scan-assembler "orr\\tv\[0-9\]+.8h, #86, lsl #8" } } */
> 

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH][AArch64] Add BIC-imm and ORR-imm SIMD pattern
  2017-10-04 15:41                 ` Richard Earnshaw (lists)
@ 2017-10-04 23:05                   ` Steve Ellcey
  2017-10-05 15:41                     ` Sudi Das
  0 siblings, 1 reply; 12+ messages in thread
From: Steve Ellcey @ 2017-10-04 23:05 UTC (permalink / raw)
  To: Richard Earnshaw (lists), Sudi Das, James Greenhalgh
  Cc: gcc-patches, nd, Marcus Shawcroft

On Wed, 2017-10-04 at 16:41 +0100, Richard Earnshaw (lists) wrote:
> On 02/10/17 10:05, Sudi Das wrote:
> > 
> > 2017-10-02 Sudakshina Das  <sudi.das@arm.com>
> > 
> > 	* config/aarch64/aarch64-protos.h (enum simd_immediate_check): New check type
> > 	for aarch64_simd_valid_immediate.
> > 	(aarch64_output_simd_mov_immediate): Update prototype.
> > 	(aarch64_simd_valid_immediate): Update prototype.
> > 
> > 	* config/aarch64/aarch64-simd.md (orr<mode>3): modified pattern to add
> > 	support for ORR-immediate.
> > 	(and<mode>3): modified pattern to add support for BIC-immediate.
> > 
> > 	* config/aarch64/aarch64.c (aarch64_simd_valid_immediate): Function now checks
> > 	for valid immediate for BIC and ORR based on new enum argument.
> > 	(aarch64_output_simd_mov_immediate): Function now used to output BIC/ORR imm
> > 	as well based on new enum argument.
> >  
> > 	* config/aarch64/constraints.md (Do): New vector immediate constraint.
> > 	(Db) : Likewise.
> > 
> > 	* config/aarch64/predicates.md (aarch64_reg_or_orr_imm): New
> > predicate.
> > 	(aarch64_reg_or_bic_imm): Likewise.

I think this patch is causing a bunch of test failures on aarch64.  I
had to apply the patch for PR82396 (that was reverted) in order to
build ToT GCC, but when I did that and ran the testsuite I got a bunch
of failures like:

/home/sellcey/cavium-pr-27386/src/gcc/gcc/testsuite/gcc.c-
torture/compile/pr54713-1.c:45:18: internal compiler error: in
aarch64_simd_valid_immediate, at config/aarch64/aarch64.c:11539
0xf2227b aarch64_simd_valid_immediate(rtx_def*, machine_mode, bool,
simd_immediate_info*, simd_immediate_check)
        ../../../src/gcc/gcc/config/aarch64/aarch64.c:11539
0x11047b3 aarch64_reg_or_bic_imm(rtx_def*, machine_mode)
        ../../../src/gcc/gcc/config/aarch64/predicates.md:79
0xab29ab insn_operand_matches(insn_code, unsigned int, rtx_def*)
        ../../../src/gcc/gcc/optabs.c:6891
0xab29ab maybe_legitimize_operand_same_code
        ../../../src/gcc/gcc/optabs.c:6919
0xab545f maybe_legitimize_operand
        ../../../src/gcc/gcc/optabs.c:6990
0xab545f maybe_legitimize_operands(insn_code, unsigned int, unsigned
int, expand_operand*)
        ../../../src/gcc/gcc/optabs.c:7055
0xab5a8f maybe_gen_insn(insn_code, unsigned int, expand_operand*)
        ../../../src/gcc/gcc/optabs.c:7073
0xab8503 expand_binop_directly
        ../../../src/gcc/gcc/optabs.c:1075
0xab87af expand_binop(machine_mode, optab_tag, rtx_def*, rtx_def*,
rtx_def*, int, optab_methods)
        ../../../src/gcc/gcc/optabs.c:1156
0x8736d7 expand_expr_real_2(separate_ops*, rtx_def*, machine_mode,
expand_modifier)
        ../../../src/gcc/gcc/expr.c:9582
0x749027 expand_gimple_stmt_1
        ../../../src/gcc/gcc/cfgexpand.c:3691
0x749027 expand_gimple_stmt
        ../../../src/gcc/gcc/cfgexpand.c:3751
0x750387 expand_gimple_basic_block
        ../../../src/gcc/gcc/cfgexpand.c:5750
0x751ef7 execute
        ../../../src/gcc/gcc/cfgexpand.c:6357

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH][AArch64] Add BIC-imm and ORR-imm SIMD pattern
  2017-10-04 23:05                   ` Steve Ellcey
@ 2017-10-05 15:41                     ` Sudi Das
  0 siblings, 0 replies; 12+ messages in thread
From: Sudi Das @ 2017-10-05 15:41 UTC (permalink / raw)
  To: Richard Earnshaw, James Greenhalgh, sellcey
  Cc: gcc-patches, nd, Marcus Shawcroft


Hi Steve

Sorry about this.
I am on it. I have a fix and I am running tests on it right now.

Sudi


From: Steve Ellcey <sellcey@cavium.com>
Sent: Thursday, October 5, 2017 12:05 AM
To: Richard Earnshaw; Sudi Das; James Greenhalgh
Cc: gcc-patches@gcc.gnu.org; nd; Marcus Shawcroft
Subject: Re: [PATCH][AArch64] Add BIC-imm and ORR-imm SIMD pattern
    
On Wed, 2017-10-04 at 16:41 +0100, Richard Earnshaw (lists) wrote:
> On 02/10/17 10:05, Sudi Das wrote:
> > 
> > 2017-10-02 Sudakshina Das  <sudi.das@arm.com>
> > 
> >      * config/aarch64/aarch64-protos.h (enum simd_immediate_check): New check type
> >      for aarch64_simd_valid_immediate.
> >      (aarch64_output_simd_mov_immediate): Update prototype.
> >      (aarch64_simd_valid_immediate): Update prototype.
> > 
> >      * config/aarch64/aarch64-simd.md (orr<mode>3): modified pattern to add
> >      support for ORR-immediate.
> >      (and<mode>3): modified pattern to add support for BIC-immediate.
> > 
> >      * config/aarch64/aarch64.c (aarch64_simd_valid_immediate): Function now checks
> >      for valid immediate for BIC and ORR based on new enum argument.
> >      (aarch64_output_simd_mov_immediate): Function now used to output BIC/ORR imm
> >      as well based on new enum argument.
> >  
> >      * config/aarch64/constraints.md (Do): New vector immediate constraint.
> >      (Db) : Likewise.
> > 
> >      * config/aarch64/predicates.md (aarch64_reg_or_orr_imm): New
> > predicate.
> >      (aarch64_reg_or_bic_imm): Likewise.

I think this patch is causing a bunch of test failures on aarch64.  I
had to apply the patch for PR82396 (that was reverted) in order to
build ToT GCC, but when I did that and ran the testsuite I got a bunch
of failures like:

/home/sellcey/cavium-pr-27386/src/gcc/gcc/testsuite/gcc.c-
torture/compile/pr54713-1.c:45:18: internal compiler error: in
aarch64_simd_valid_immediate, at config/aarch64/aarch64.c:11539
0xf2227b aarch64_simd_valid_immediate(rtx_def*, machine_mode, bool,
simd_immediate_info*, simd_immediate_check)
        ../../../src/gcc/gcc/config/aarch64/aarch64.c:11539
0x11047b3 aarch64_reg_or_bic_imm(rtx_def*, machine_mode)
        ../../../src/gcc/gcc/config/aarch64/predicates.md:79
0xab29ab insn_operand_matches(insn_code, unsigned int, rtx_def*)
        ../../../src/gcc/gcc/optabs.c:6891
0xab29ab maybe_legitimize_operand_same_code
        ../../../src/gcc/gcc/optabs.c:6919
0xab545f maybe_legitimize_operand
        ../../../src/gcc/gcc/optabs.c:6990
0xab545f maybe_legitimize_operands(insn_code, unsigned int, unsigned
int, expand_operand*)
        ../../../src/gcc/gcc/optabs.c:7055
0xab5a8f maybe_gen_insn(insn_code, unsigned int, expand_operand*)
        ../../../src/gcc/gcc/optabs.c:7073
0xab8503 expand_binop_directly
        ../../../src/gcc/gcc/optabs.c:1075
0xab87af expand_binop(machine_mode, optab_tag, rtx_def*, rtx_def*,
rtx_def*, int, optab_methods)
        ../../../src/gcc/gcc/optabs.c:1156
0x8736d7 expand_expr_real_2(separate_ops*, rtx_def*, machine_mode,
expand_modifier)
        ../../../src/gcc/gcc/expr.c:9582
0x749027 expand_gimple_stmt_1
        ../../../src/gcc/gcc/cfgexpand.c:3691
0x749027 expand_gimple_stmt
        ../../../src/gcc/gcc/cfgexpand.c:3751
0x750387 expand_gimple_basic_block
        ../../../src/gcc/gcc/cfgexpand.c:5750
0x751ef7 execute
        ../../../src/gcc/gcc/cfgexpand.c:6357
    

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2017-10-05 15:41 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-04-18 16:51 [PATCH][AArch64] Add BIC-imm and ORR-imm SIMD pattern Sudi Das
2017-05-05 13:38 ` Richard Earnshaw (lists)
2017-08-07 13:56   ` Sudi Das
2017-09-20 10:39     ` James Greenhalgh
2017-09-25 10:14       ` Sudi Das
2017-09-26 19:04         ` James Greenhalgh
2017-09-27 17:57           ` Sudi Das
2017-09-28  8:55             ` Richard Earnshaw (lists)
2017-10-02  9:05               ` Sudi Das
2017-10-04 15:41                 ` Richard Earnshaw (lists)
2017-10-04 23:05                   ` Steve Ellcey
2017-10-05 15:41                     ` Sudi Das

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).