[PATCH] aarch64: Model zero-high-half semantics of XTN instruction in RTL

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH]  aarch64: Model zero-high-half semantics of XTN instruction in RTL
@ 2021-06-15  9:45 Jonathan Wright
  2021-06-16  9:04 ` [PATCH V2] " Jonathan Wright
  0 siblings, 1 reply; 3+ messages in thread
From: Jonathan Wright @ 2021-06-15  9:45 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1071 bytes --]

Hi,

Modeling the zero-high-half semantics of the XTN narrowing
instruction in RTL indicates to the compiler that this is a totally
destructive operation. This enables more RTL simplifications and also
prevents some register allocation issues.

Regression tested and bootstrapped on aarch64-none-linux-gnu - no
issues.

Ok for master?

Thanks,
Jonathan

---

gcc/ChangeLog:

2021-06-11  Jonathan Wright  <jonathan.wright@arm.com>

	* config/aarch64/aarch64-simd.md (aarch64_xtn<mode>_insn_le):
	Define - modeling zero-high-half semantics.
	(aarch64_xtn<mode>): Change to an expander that emits the
	appropriate instruction depending on endianness.
	(aarch64_xtn<mode>_insn_be): Define - modeling zero-high-half
	semantics.
	(aarch64_xtn2<mode>_le): Rename to...
	(aarch64_xtn2<mode>_insn_le): This.
	(aarch64_xtn2<mode>_be): Rename to...
	(aarch64_xtn2<mode>_insn_be): This.
	(vec_pack_trunc_<mode>): Emit truncation instruction instead
	of aarch64_xtn.
	* config/aarch64/iterators.md (Vnarrowd): Add Vnarrowd mode
	attribute iterator.

[-- Attachment #2: rb14563.patch --]
[-- Type: application/octet-stream, Size: 5773 bytes --]

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index e750faed1dbd940cdfa216d858b98f3bc25bba42..b23556b551cbbef420950007e9714acf190a534d 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1690,17 +1690,48 @@
 
 ;; Narrowing operations.
 
-;; For doubles.
+(define_insn "aarch64_xtn<mode>_insn_le"
+  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+	(vec_concat:<VNARROWQ2>
+	  (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
+	  (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")))]
+  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
+  "xtn\\t%0.<Vntype>, %1.<Vtype>"
+  [(set_attr "type" "neon_move_narrow_q")]
+)
 
-(define_insn "aarch64_xtn<mode>"
-  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
-	(truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
-  "TARGET_SIMD"
+(define_insn "aarch64_xtn<mode>_insn_be"
+  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+	(vec_concat:<VNARROWQ2>
+	  (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")
+	  (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))))]
+  "TARGET_SIMD && BYTES_BIG_ENDIAN"
   "xtn\\t%0.<Vntype>, %1.<Vtype>"
   [(set_attr "type" "neon_move_narrow_q")]
 )
 
-(define_insn "aarch64_xtn2<mode>_le"
+(define_expand "aarch64_xtn<mode>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand")
+	(truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand")))]
+  "TARGET_SIMD"
+  {
+    rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
+    if (BYTES_BIG_ENDIAN)
+      emit_insn (gen_aarch64_xtn<mode>_insn_be (tmp, operands[1],
+				CONST0_RTX (<VNARROWQ>mode)));
+    else
+      emit_insn (gen_aarch64_xtn<mode>_insn_le (tmp, operands[1],
+				CONST0_RTX (<VNARROWQ>mode)));
+
+    /* The intrinsic expects a narrow result, so emit a subreg that will get
+       optimized away as appropriate.  */
+    emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
+						 <VNARROWQ2>mode));
+    DONE;
+  }
+)
+
+(define_insn "aarch64_xtn2<mode>_insn_le"
   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
 	(vec_concat:<VNARROWQ2>
 	  (match_operand:<VNARROWQ> 1 "register_operand" "0")
@@ -1710,7 +1741,7 @@
   [(set_attr "type" "neon_move_narrow_q")]
 )
 
-(define_insn "aarch64_xtn2<mode>_be"
+(define_insn "aarch64_xtn2<mode>_insn_be"
   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
 	(vec_concat:<VNARROWQ2>
 	  (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))
@@ -1727,15 +1758,17 @@
   "TARGET_SIMD"
   {
     if (BYTES_BIG_ENDIAN)
-      emit_insn (gen_aarch64_xtn2<mode>_be (operands[0], operands[1],
-					     operands[2]));
+      emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[0], operands[1],
+						 operands[2]));
     else
-      emit_insn (gen_aarch64_xtn2<mode>_le (operands[0], operands[1],
-					     operands[2]));
+      emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[0], operands[1],
+						 operands[2]));
     DONE;
   }
 )
 
+;; Packing doubles.
+
 (define_expand "vec_pack_trunc_<mode>"
  [(match_operand:<VNARROWD> 0 "register_operand")
   (match_operand:VDN 1 "register_operand")
@@ -1748,10 +1781,35 @@
 
   emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
   emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
-  emit_insn (gen_aarch64_xtn<Vdbl> (operands[0], tempreg));
+  emit_insn (gen_trunc<Vdbl><Vnarrowd>2 (operands[0], tempreg));
   DONE;
 })
 
+;; Packing quads.
+
+(define_expand "vec_pack_trunc_<mode>"
+ [(set (match_operand:<VNARROWQ2> 0 "register_operand")
+       (vec_concat:<VNARROWQ2>
+	 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand"))
+	 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))))]
+ "TARGET_SIMD"
+ {
+   rtx tmpreg = gen_reg_rtx (<VNARROWQ>mode);
+   int lo = BYTES_BIG_ENDIAN ? 2 : 1;
+   int hi = BYTES_BIG_ENDIAN ? 1 : 2;
+
+   emit_insn (gen_trunc<mode><Vnarrowq>2 (tmpreg, operands[lo]));
+
+   if (BYTES_BIG_ENDIAN)
+     emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[0], tmpreg,
+						operands[hi]));
+   else
+     emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[0], tmpreg,
+						operands[hi]));
+   DONE;
+ }
+)
+
 (define_insn "aarch64_shrn<mode>_insn_le"
   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
 	(vec_concat:<VNARROWQ2>
@@ -1936,29 +1994,6 @@
   }
 )
 
-;; For quads.
-
-(define_expand "vec_pack_trunc_<mode>"
- [(set (match_operand:<VNARROWQ2> 0 "register_operand")
-       (vec_concat:<VNARROWQ2>
-	 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand"))
-	 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))))]
- "TARGET_SIMD"
- {
-   rtx tmpreg = gen_reg_rtx (<VNARROWQ>mode);
-   int lo = BYTES_BIG_ENDIAN ? 2 : 1;
-   int hi = BYTES_BIG_ENDIAN ? 1 : 2;
-
-   emit_insn (gen_aarch64_xtn<mode> (tmpreg, operands[lo]));
-
-   if (BYTES_BIG_ENDIAN)
-     emit_insn (gen_aarch64_xtn2<mode>_be (operands[0], tmpreg, operands[hi]));
-   else
-     emit_insn (gen_aarch64_xtn2<mode>_le (operands[0], tmpreg, operands[hi]));
-   DONE;
- }
-)
-
 ;; Widening operations.
 
 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index e9047d00d979411752d9aeddaadb05ec38e3a145..caa42f8f169fbf2cf46a90cf73dee05619acc300 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1257,6 +1257,8 @@
 ;; Narrowed modes for VDN.
 (define_mode_attr VNARROWD [(V4HI "V8QI") (V2SI "V4HI")
 			    (DI   "V2SI")])
+(define_mode_attr Vnarrowd [(V4HI "v8qi") (V2SI "v4hi")
+			    (DI   "v2si")])
 
 ;; Narrowed double-modes for VQN (Used for XTN).
 (define_mode_attr VNARROWQ [(V8HI "V8QI") (V4SI "V4HI")

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [PATCH V2]  aarch64: Model zero-high-half semantics of XTN instruction in RTL
  2021-06-15  9:45 [PATCH] aarch64: Model zero-high-half semantics of XTN instruction in RTL Jonathan Wright
@ 2021-06-16  9:04 ` Jonathan Wright
  2021-06-16 12:28   ` Richard Sandiford
  0 siblings, 1 reply; 3+ messages in thread
From: Jonathan Wright @ 2021-06-16  9:04 UTC (permalink / raw)
  To: gcc-patches; +Cc: Kyrylo Tkachov, Richard Sandiford

[-- Attachment #1: Type: text/plain, Size: 2405 bytes --]

Hi,

Version 2 of this patch adds tests to verify the benefit of this change.

Ok for master?

Thanks,
Jonathan

---

gcc/ChangeLog:

2021-06-11  Jonathan Wright  <jonathan.wright@arm.com>

	* config/aarch64/aarch64-simd.md (aarch64_xtn<mode>_insn_le):
	Define - modeling zero-high-half semantics.
	(aarch64_xtn<mode>): Change to an expander that emits the
	appropriate instruction depending on endianness.
	(aarch64_xtn<mode>_insn_be): Define - modeling zero-high-half
	semantics.
	(aarch64_xtn2<mode>_le): Rename to...
	(aarch64_xtn2<mode>_insn_le): This.
	(aarch64_xtn2<mode>_be): Rename to...
	(aarch64_xtn2<mode>_insn_be): This.
	(vec_pack_trunc_<mode>): Emit truncation instruction instead
	of aarch64_xtn.
	* config/aarch64/iterators.md (Vnarrowd): Add Vnarrowd mode
	attribute iterator.

gcc/testsuite/ChangeLog:

	* gcc.target/aarch64/narrow_zero_high_half.c: Add new tests.


From: Gcc-patches <gcc-patches-bounces+jonathan.wright=arm.com@gcc.gnu.org> on behalf of Jonathan Wright via Gcc-patches <gcc-patches@gcc.gnu.org>
Sent: 15 June 2021 10:45
To: gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org>
Subject: [PATCH] aarch64: Model zero-high-half semantics of XTN instruction in RTL 
 
Hi,

Modeling the zero-high-half semantics of the XTN narrowing
instruction in RTL indicates to the compiler that this is a totally
destructive operation. This enables more RTL simplifications and also
prevents some register allocation issues.

Regression tested and bootstrapped on aarch64-none-linux-gnu - no
issues.

Ok for master?

Thanks,
Jonathan

---

gcc/ChangeLog:

2021-06-11  Jonathan Wright  <jonathan.wright@arm.com>

        * config/aarch64/aarch64-simd.md (aarch64_xtn<mode>_insn_le):
        Define - modeling zero-high-half semantics.
        (aarch64_xtn<mode>): Change to an expander that emits the
        appropriate instruction depending on endianness.
        (aarch64_xtn<mode>_insn_be): Define - modeling zero-high-half
        semantics.
        (aarch64_xtn2<mode>_le): Rename to...
        (aarch64_xtn2<mode>_insn_le): This.
        (aarch64_xtn2<mode>_be): Rename to...
        (aarch64_xtn2<mode>_insn_be): This.
        (vec_pack_trunc_<mode>): Emit truncation instruction instead
        of aarch64_xtn.
        * config/aarch64/iterators.md (Vnarrowd): Add Vnarrowd mode
        attribute iterator.

[-- Attachment #2: rb14563.patch --]
[-- Type: application/octet-stream, Size: 7288 bytes --]

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index e750faed1dbd940cdfa216d858b98f3bc25bba42..b23556b551cbbef420950007e9714acf190a534d 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1690,17 +1690,48 @@
 
 ;; Narrowing operations.
 
-;; For doubles.
+(define_insn "aarch64_xtn<mode>_insn_le"
+  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+	(vec_concat:<VNARROWQ2>
+	  (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
+	  (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")))]
+  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
+  "xtn\\t%0.<Vntype>, %1.<Vtype>"
+  [(set_attr "type" "neon_move_narrow_q")]
+)
 
-(define_insn "aarch64_xtn<mode>"
-  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
-	(truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
-  "TARGET_SIMD"
+(define_insn "aarch64_xtn<mode>_insn_be"
+  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
+	(vec_concat:<VNARROWQ2>
+	  (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")
+	  (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))))]
+  "TARGET_SIMD && BYTES_BIG_ENDIAN"
   "xtn\\t%0.<Vntype>, %1.<Vtype>"
   [(set_attr "type" "neon_move_narrow_q")]
 )
 
-(define_insn "aarch64_xtn2<mode>_le"
+(define_expand "aarch64_xtn<mode>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand")
+	(truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand")))]
+  "TARGET_SIMD"
+  {
+    rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
+    if (BYTES_BIG_ENDIAN)
+      emit_insn (gen_aarch64_xtn<mode>_insn_be (tmp, operands[1],
+				CONST0_RTX (<VNARROWQ>mode)));
+    else
+      emit_insn (gen_aarch64_xtn<mode>_insn_le (tmp, operands[1],
+				CONST0_RTX (<VNARROWQ>mode)));
+
+    /* The intrinsic expects a narrow result, so emit a subreg that will get
+       optimized away as appropriate.  */
+    emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
+						 <VNARROWQ2>mode));
+    DONE;
+  }
+)
+
+(define_insn "aarch64_xtn2<mode>_insn_le"
   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
 	(vec_concat:<VNARROWQ2>
 	  (match_operand:<VNARROWQ> 1 "register_operand" "0")
@@ -1710,7 +1741,7 @@
   [(set_attr "type" "neon_move_narrow_q")]
 )
 
-(define_insn "aarch64_xtn2<mode>_be"
+(define_insn "aarch64_xtn2<mode>_insn_be"
   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
 	(vec_concat:<VNARROWQ2>
 	  (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))
@@ -1727,15 +1758,17 @@
   "TARGET_SIMD"
   {
     if (BYTES_BIG_ENDIAN)
-      emit_insn (gen_aarch64_xtn2<mode>_be (operands[0], operands[1],
-					     operands[2]));
+      emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[0], operands[1],
+						 operands[2]));
     else
-      emit_insn (gen_aarch64_xtn2<mode>_le (operands[0], operands[1],
-					     operands[2]));
+      emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[0], operands[1],
+						 operands[2]));
     DONE;
   }
 )
 
+;; Packing doubles.
+
 (define_expand "vec_pack_trunc_<mode>"
  [(match_operand:<VNARROWD> 0 "register_operand")
   (match_operand:VDN 1 "register_operand")
@@ -1748,10 +1781,35 @@
 
   emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
   emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
-  emit_insn (gen_aarch64_xtn<Vdbl> (operands[0], tempreg));
+  emit_insn (gen_trunc<Vdbl><Vnarrowd>2 (operands[0], tempreg));
   DONE;
 })
 
+;; Packing quads.
+
+(define_expand "vec_pack_trunc_<mode>"
+ [(set (match_operand:<VNARROWQ2> 0 "register_operand")
+       (vec_concat:<VNARROWQ2>
+	 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand"))
+	 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))))]
+ "TARGET_SIMD"
+ {
+   rtx tmpreg = gen_reg_rtx (<VNARROWQ>mode);
+   int lo = BYTES_BIG_ENDIAN ? 2 : 1;
+   int hi = BYTES_BIG_ENDIAN ? 1 : 2;
+
+   emit_insn (gen_trunc<mode><Vnarrowq>2 (tmpreg, operands[lo]));
+
+   if (BYTES_BIG_ENDIAN)
+     emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[0], tmpreg,
+						operands[hi]));
+   else
+     emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[0], tmpreg,
+						operands[hi]));
+   DONE;
+ }
+)
+
 (define_insn "aarch64_shrn<mode>_insn_le"
   [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
 	(vec_concat:<VNARROWQ2>
@@ -1936,29 +1994,6 @@
   }
 )
 
-;; For quads.
-
-(define_expand "vec_pack_trunc_<mode>"
- [(set (match_operand:<VNARROWQ2> 0 "register_operand")
-       (vec_concat:<VNARROWQ2>
-	 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand"))
-	 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))))]
- "TARGET_SIMD"
- {
-   rtx tmpreg = gen_reg_rtx (<VNARROWQ>mode);
-   int lo = BYTES_BIG_ENDIAN ? 2 : 1;
-   int hi = BYTES_BIG_ENDIAN ? 1 : 2;
-
-   emit_insn (gen_aarch64_xtn<mode> (tmpreg, operands[lo]));
-
-   if (BYTES_BIG_ENDIAN)
-     emit_insn (gen_aarch64_xtn2<mode>_be (operands[0], tmpreg, operands[hi]));
-   else
-     emit_insn (gen_aarch64_xtn2<mode>_le (operands[0], tmpreg, operands[hi]));
-   DONE;
- }
-)
-
 ;; Widening operations.
 
 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index e9047d00d979411752d9aeddaadb05ec38e3a145..caa42f8f169fbf2cf46a90cf73dee05619acc300 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1257,6 +1257,8 @@
 ;; Narrowed modes for VDN.
 (define_mode_attr VNARROWD [(V4HI "V8QI") (V2SI "V4HI")
 			    (DI   "V2SI")])
+(define_mode_attr Vnarrowd [(V4HI "v8qi") (V2SI "v4hi")
+			    (DI   "v2si")])
 
 ;; Narrowed double-modes for VQN (Used for XTN).
 (define_mode_attr VNARROWQ [(V8HI "V8QI") (V4SI "V4HI")
diff --git a/gcc/testsuite/gcc.target/aarch64/narrow_zero_high_half.c b/gcc/testsuite/gcc.target/aarch64/narrow_zero_high_half.c
index 27fa0e640ab2b37781376c40ce4ca37602c72393..78c474f3025cbf56d14323d8f05bfb73e003ebfd 100644
--- a/gcc/testsuite/gcc.target/aarch64/narrow_zero_high_half.c
+++ b/gcc/testsuite/gcc.target/aarch64/narrow_zero_high_half.c
@@ -48,6 +48,21 @@ TEST_SHIFT (vqrshrun_n, uint8x16_t, int16x8_t, s16, u8)
 TEST_SHIFT (vqrshrun_n, uint16x8_t, int32x4_t, s32, u16)
 TEST_SHIFT (vqrshrun_n, uint32x4_t, int64x2_t, s64, u32)
 
+#define TEST_UNARY(name, rettype, intype, fs, rs) \
+  rettype test_ ## name ## _ ## fs ## _zero_high \
+		(intype a) \
+	{ \
+		return vcombine_ ## rs (name ## _ ## fs (a), \
+					vdup_n_ ## rs (0)); \
+	}
+
+TEST_UNARY (vmovn, int8x16_t, int16x8_t, s16, s8)
+TEST_UNARY (vmovn, int16x8_t, int32x4_t, s32, s16)
+TEST_UNARY (vmovn, int32x4_t, int64x2_t, s64, s32)
+TEST_UNARY (vmovn, uint8x16_t, uint16x8_t, u16, u8)
+TEST_UNARY (vmovn, uint16x8_t, uint32x4_t, u32, u16)
+TEST_UNARY (vmovn, uint32x4_t, uint64x2_t, u64, u32)
+
 /* { dg-final { scan-assembler-not "dup\\t" } } */
 
 /* { dg-final { scan-assembler-times "\\trshrn\\tv" 6} }  */
@@ -58,3 +73,4 @@ TEST_SHIFT (vqrshrun_n, uint32x4_t, int64x2_t, s64, u32)
 /* { dg-final { scan-assembler-times "\\tuqshrn\\tv" 3} }  */
 /* { dg-final { scan-assembler-times "\\tsqrshrn\\tv" 3} }  */
 /* { dg-final { scan-assembler-times "\\tuqrshrn\\tv" 3} }  */
+/* { dg-final { scan-assembler-times "\\txtn\\tv" 6} }  */

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH V2] aarch64: Model zero-high-half semantics of XTN instruction in RTL
  2021-06-16  9:04 ` [PATCH V2] " Jonathan Wright
@ 2021-06-16 12:28   ` Richard Sandiford
  0 siblings, 0 replies; 3+ messages in thread
From: Richard Sandiford @ 2021-06-16 12:28 UTC (permalink / raw)
  To: Jonathan Wright; +Cc: gcc-patches, Kyrylo Tkachov

Jonathan Wright <Jonathan.Wright@arm.com> writes:
> Hi,
>
> Version 2 of this patch adds tests to verify the benefit of this change.
>
> Ok for master?
>
> Thanks,
> Jonathan
>
> ---
>
> gcc/ChangeLog:
>
> 2021-06-11  Jonathan Wright  <jonathan.wright@arm.com>
>
>         * config/aarch64/aarch64-simd.md (aarch64_xtn<mode>_insn_le):
>         Define - modeling zero-high-half semantics.
>         (aarch64_xtn<mode>): Change to an expander that emits the
>         appropriate instruction depending on endianness.
>         (aarch64_xtn<mode>_insn_be): Define - modeling zero-high-half
>         semantics.
>         (aarch64_xtn2<mode>_le): Rename to...
>         (aarch64_xtn2<mode>_insn_le): This.
>         (aarch64_xtn2<mode>_be): Rename to...
>         (aarch64_xtn2<mode>_insn_be): This.
>         (vec_pack_trunc_<mode>): Emit truncation instruction instead
>         of aarch64_xtn.
>         * config/aarch64/iterators.md (Vnarrowd): Add Vnarrowd mode
>         attribute iterator.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/aarch64/narrow_zero_high_half.c: Add new tests.

OK, thanks.

> From: Gcc-patches <gcc-patches-bounces+jonathan.wright=arm.com@gcc.gnu.org> on behalf of Jonathan Wright via Gcc-patches <gcc-patches@gcc.gnu.org>
> Sent: 15 June 2021 10:45
> To: gcc-patches@gcc.gnu.org <gcc-patches@gcc.gnu.org>
> Subject: [PATCH] aarch64: Model zero-high-half semantics of XTN instruction in RTL
>
> Hi,
>
> Modeling the zero-high-half semantics of the XTN narrowing
> instruction in RTL indicates to the compiler that this is a totally
> destructive operation. This enables more RTL simplifications and also
> prevents some register allocation issues.
>
> Regression tested and bootstrapped on aarch64-none-linux-gnu - no
> issues.
>
> Ok for master?
>
> Thanks,
> Jonathan
>
> ---
>
> gcc/ChangeLog:
>
> 2021-06-11  Jonathan Wright  <jonathan.wright@arm.com>
>
>         * config/aarch64/aarch64-simd.md (aarch64_xtn<mode>_insn_le):
>         Define - modeling zero-high-half semantics.
>         (aarch64_xtn<mode>): Change to an expander that emits the
>         appropriate instruction depending on endianness.
>         (aarch64_xtn<mode>_insn_be): Define - modeling zero-high-half
>         semantics.
>         (aarch64_xtn2<mode>_le): Rename to...
>         (aarch64_xtn2<mode>_insn_le): This.
>         (aarch64_xtn2<mode>_be): Rename to...
>         (aarch64_xtn2<mode>_insn_be): This.
>         (vec_pack_trunc_<mode>): Emit truncation instruction instead
>         of aarch64_xtn.
>         * config/aarch64/iterators.md (Vnarrowd): Add Vnarrowd mode
>         attribute iterator.
>
> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
> index e750faed1dbd940cdfa216d858b98f3bc25bba42..b23556b551cbbef420950007e9714acf190a534d 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -1690,17 +1690,48 @@
>  
>  ;; Narrowing operations.
>  
> -;; For doubles.
> +(define_insn "aarch64_xtn<mode>_insn_le"
> +  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
> +	(vec_concat:<VNARROWQ2>
> +	  (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
> +	  (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")))]
> +  "TARGET_SIMD && !BYTES_BIG_ENDIAN"
> +  "xtn\\t%0.<Vntype>, %1.<Vtype>"
> +  [(set_attr "type" "neon_move_narrow_q")]
> +)
>  
> -(define_insn "aarch64_xtn<mode>"
> -  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
> -	(truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
> -  "TARGET_SIMD"
> +(define_insn "aarch64_xtn<mode>_insn_be"
> +  [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
> +	(vec_concat:<VNARROWQ2>
> +	  (match_operand:<VNARROWQ> 2 "aarch64_simd_or_scalar_imm_zero")
> +	  (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))))]
> +  "TARGET_SIMD && BYTES_BIG_ENDIAN"
>    "xtn\\t%0.<Vntype>, %1.<Vtype>"
>    [(set_attr "type" "neon_move_narrow_q")]
>  )
>  
> -(define_insn "aarch64_xtn2<mode>_le"
> +(define_expand "aarch64_xtn<mode>"
> +  [(set (match_operand:<VNARROWQ> 0 "register_operand")
> +	(truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand")))]
> +  "TARGET_SIMD"
> +  {
> +    rtx tmp = gen_reg_rtx (<VNARROWQ2>mode);
> +    if (BYTES_BIG_ENDIAN)
> +      emit_insn (gen_aarch64_xtn<mode>_insn_be (tmp, operands[1],
> +				CONST0_RTX (<VNARROWQ>mode)));
> +    else
> +      emit_insn (gen_aarch64_xtn<mode>_insn_le (tmp, operands[1],
> +				CONST0_RTX (<VNARROWQ>mode)));
> +
> +    /* The intrinsic expects a narrow result, so emit a subreg that will get
> +       optimized away as appropriate.  */
> +    emit_move_insn (operands[0], lowpart_subreg (<VNARROWQ>mode, tmp,
> +						 <VNARROWQ2>mode));
> +    DONE;
> +  }
> +)
> +
> +(define_insn "aarch64_xtn2<mode>_insn_le"
>    [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
>  	(vec_concat:<VNARROWQ2>
>  	  (match_operand:<VNARROWQ> 1 "register_operand" "0")
> @@ -1710,7 +1741,7 @@
>    [(set_attr "type" "neon_move_narrow_q")]
>  )
>  
> -(define_insn "aarch64_xtn2<mode>_be"
> +(define_insn "aarch64_xtn2<mode>_insn_be"
>    [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
>  	(vec_concat:<VNARROWQ2>
>  	  (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))
> @@ -1727,15 +1758,17 @@
>    "TARGET_SIMD"
>    {
>      if (BYTES_BIG_ENDIAN)
> -      emit_insn (gen_aarch64_xtn2<mode>_be (operands[0], operands[1],
> -					     operands[2]));
> +      emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[0], operands[1],
> +						 operands[2]));
>      else
> -      emit_insn (gen_aarch64_xtn2<mode>_le (operands[0], operands[1],
> -					     operands[2]));
> +      emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[0], operands[1],
> +						 operands[2]));
>      DONE;
>    }
>  )
>  
> +;; Packing doubles.
> +
>  (define_expand "vec_pack_trunc_<mode>"
>   [(match_operand:<VNARROWD> 0 "register_operand")
>    (match_operand:VDN 1 "register_operand")
> @@ -1748,10 +1781,35 @@
>  
>    emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
>    emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
> -  emit_insn (gen_aarch64_xtn<Vdbl> (operands[0], tempreg));
> +  emit_insn (gen_trunc<Vdbl><Vnarrowd>2 (operands[0], tempreg));
>    DONE;
>  })
>  
> +;; Packing quads.
> +
> +(define_expand "vec_pack_trunc_<mode>"
> + [(set (match_operand:<VNARROWQ2> 0 "register_operand")
> +       (vec_concat:<VNARROWQ2>
> +	 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand"))
> +	 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))))]
> + "TARGET_SIMD"
> + {
> +   rtx tmpreg = gen_reg_rtx (<VNARROWQ>mode);
> +   int lo = BYTES_BIG_ENDIAN ? 2 : 1;
> +   int hi = BYTES_BIG_ENDIAN ? 1 : 2;
> +
> +   emit_insn (gen_trunc<mode><Vnarrowq>2 (tmpreg, operands[lo]));
> +
> +   if (BYTES_BIG_ENDIAN)
> +     emit_insn (gen_aarch64_xtn2<mode>_insn_be (operands[0], tmpreg,
> +						operands[hi]));
> +   else
> +     emit_insn (gen_aarch64_xtn2<mode>_insn_le (operands[0], tmpreg,
> +						operands[hi]));
> +   DONE;
> + }
> +)
> +
>  (define_insn "aarch64_shrn<mode>_insn_le"
>    [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
>  	(vec_concat:<VNARROWQ2>
> @@ -1936,29 +1994,6 @@
>    }
>  )
>  
> -;; For quads.
> -
> -(define_expand "vec_pack_trunc_<mode>"
> - [(set (match_operand:<VNARROWQ2> 0 "register_operand")
> -       (vec_concat:<VNARROWQ2>
> -	 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand"))
> -	 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand"))))]
> - "TARGET_SIMD"
> - {
> -   rtx tmpreg = gen_reg_rtx (<VNARROWQ>mode);
> -   int lo = BYTES_BIG_ENDIAN ? 2 : 1;
> -   int hi = BYTES_BIG_ENDIAN ? 1 : 2;
> -
> -   emit_insn (gen_aarch64_xtn<mode> (tmpreg, operands[lo]));
> -
> -   if (BYTES_BIG_ENDIAN)
> -     emit_insn (gen_aarch64_xtn2<mode>_be (operands[0], tmpreg, operands[hi]));
> -   else
> -     emit_insn (gen_aarch64_xtn2<mode>_le (operands[0], tmpreg, operands[hi]));
> -   DONE;
> - }
> -)
> -
>  ;; Widening operations.
>  
>  (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
> diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
> index e9047d00d979411752d9aeddaadb05ec38e3a145..caa42f8f169fbf2cf46a90cf73dee05619acc300 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -1257,6 +1257,8 @@
>  ;; Narrowed modes for VDN.
>  (define_mode_attr VNARROWD [(V4HI "V8QI") (V2SI "V4HI")
>  			    (DI   "V2SI")])
> +(define_mode_attr Vnarrowd [(V4HI "v8qi") (V2SI "v4hi")
> +			    (DI   "v2si")])
>  
>  ;; Narrowed double-modes for VQN (Used for XTN).
>  (define_mode_attr VNARROWQ [(V8HI "V8QI") (V4SI "V4HI")
> diff --git a/gcc/testsuite/gcc.target/aarch64/narrow_zero_high_half.c b/gcc/testsuite/gcc.target/aarch64/narrow_zero_high_half.c
> index 27fa0e640ab2b37781376c40ce4ca37602c72393..78c474f3025cbf56d14323d8f05bfb73e003ebfd 100644
> --- a/gcc/testsuite/gcc.target/aarch64/narrow_zero_high_half.c
> +++ b/gcc/testsuite/gcc.target/aarch64/narrow_zero_high_half.c
> @@ -48,6 +48,21 @@ TEST_SHIFT (vqrshrun_n, uint8x16_t, int16x8_t, s16, u8)
>  TEST_SHIFT (vqrshrun_n, uint16x8_t, int32x4_t, s32, u16)
>  TEST_SHIFT (vqrshrun_n, uint32x4_t, int64x2_t, s64, u32)
>  
> +#define TEST_UNARY(name, rettype, intype, fs, rs) \
> +  rettype test_ ## name ## _ ## fs ## _zero_high \
> +		(intype a) \
> +	{ \
> +		return vcombine_ ## rs (name ## _ ## fs (a), \
> +					vdup_n_ ## rs (0)); \
> +	}
> +
> +TEST_UNARY (vmovn, int8x16_t, int16x8_t, s16, s8)
> +TEST_UNARY (vmovn, int16x8_t, int32x4_t, s32, s16)
> +TEST_UNARY (vmovn, int32x4_t, int64x2_t, s64, s32)
> +TEST_UNARY (vmovn, uint8x16_t, uint16x8_t, u16, u8)
> +TEST_UNARY (vmovn, uint16x8_t, uint32x4_t, u32, u16)
> +TEST_UNARY (vmovn, uint32x4_t, uint64x2_t, u64, u32)
> +
>  /* { dg-final { scan-assembler-not "dup\\t" } } */
>  
>  /* { dg-final { scan-assembler-times "\\trshrn\\tv" 6} }  */
> @@ -58,3 +73,4 @@ TEST_SHIFT (vqrshrun_n, uint32x4_t, int64x2_t, s64, u32)
>  /* { dg-final { scan-assembler-times "\\tuqshrn\\tv" 3} }  */
>  /* { dg-final { scan-assembler-times "\\tsqrshrn\\tv" 3} }  */
>  /* { dg-final { scan-assembler-times "\\tuqrshrn\\tv" 3} }  */
> +/* { dg-final { scan-assembler-times "\\txtn\\tv" 6} }  */

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-06-16 12:28 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-15  9:45 [PATCH] aarch64: Model zero-high-half semantics of XTN instruction in RTL Jonathan Wright
2021-06-16  9:04 ` [PATCH V2] " Jonathan Wright
2021-06-16 12:28   ` Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).