public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] aarch64: Refactor TBL/TBX RTL patterns
@ 2021-07-19 16:43 Jonathan Wright
  2021-07-20  6:53 ` Richard Sandiford
  0 siblings, 1 reply; 2+ messages in thread
From: Jonathan Wright @ 2021-07-19 16:43 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 2349 bytes --]

Hi,

As subject, this patch renames the two-source-register TBL/TBX RTL
patterns so that their names better reflect what they do, rather than
confusing them with tbl3 or tbx4 patterns. Also use the correct
"neon_tbl2" type attribute for both patterns.

Rename single-source-register TBL/TBX patterns for consistency.

Bootstrapped and regression tested on aarch64-none-linux-gnu - no
issues.

Ok for master?

Thanks,
Jonathan

---

gcc/ChangeLog:

2021-07-08  Jonathan Wright  <jonathan.wright@arm.com>

	* config/aarch64/aarch64-simd-builtins.def: Use two variant
	generators for all TBL/TBX intrinsics and rename to
	consistent forms: qtbl[1234] or qtbx[1234].
	* config/aarch64/aarch64-simd.md (aarch64_tbl1<mode>):
	Rename to...
	(aarch64_qtbl1<mode>): This.
	(aarch64_tbx1<mode>): Rename to...
	(aarch64_qtbx1<mode>): This.
	(aarch64_tbl2v16qi): Delete.
	(aarch64_tbl3<mode>): Rename to...
	(aarch64_qtbl2<mode>): This.
	(aarch64_tbx4<mode>): Rename to...
	(aarch64_qtbx2<mode>): This.
	* config/aarch64/aarch64.c (aarch64_expand_vec_perm_1): Use
	renamed qtbl1 and qtbl2 RTL patterns.
	* config/aarch64/arm_neon.h (vqtbl1_p8): Use renamed qtbl1
	RTL pattern.
	(vqtbl1_s8): Likewise.
	(vqtbl1_u8): Likewise.
	(vqtbl1q_p8): Likewise.
	(vqtbl1q_s8): Likewise.
	(vqtbl1q_u8): Likewise.
	(vqtbx1_s8): Use renamed qtbx1 RTL pattern.
	(vqtbx1_u8): Likewise.
	(vqtbx1_p8): Likewise.
	(vqtbx1q_s8): Likewise.
	(vqtbx1q_u8): Likewise.
	(vqtbx1q_p8): Likewise.
	(vtbl1_s8): Use renamed qtbl1 RTL pattern.
	(vtbl1_u8): Likewise.
	(vtbl1_p8): Likewise.
	(vtbl2_s8): Likewise
	(vtbl2_u8): Likewise.
	(vtbl2_p8): Likewise.
	(vtbl3_s8): Use renamed qtbl2 RTL pattern.
	(vtbl3_u8): Likewise.
	(vtbl3_p8): Likewise.
	(vtbl4_s8): Likewise.
	(vtbl4_u8): Likewise.
	(vtbl4_p8): Likewise.
	(vtbx2_s8): Use renamed qtbx2 RTL pattern.
	(vtbx2_u8): Likewise.
	(vtbx2_p8): Likewise.
	(vqtbl2_s8): Use renamed qtbl2 RTL pattern.
	(vqtbl2_u8): Likewise.
	(vqtbl2_p8): Likewise.
	(vqtbl2q_s8): Likewise.
	(vqtbl2q_u8): Likewise.
	(vqtbl2q_p8): Likewise.
	(vqtbx2_s8): Use renamed qtbx2 RTL pattern.
	(vqtbx2_u8): Likewise.
	(vqtbx2_p8): Likewise.
	(vqtbx2q_s8): Likewise.
	(vqtbx2q_u8): Likewise.
	(vqtbx2q_p8): Likewise.
	(vtbx4_s8): Likewise.
	(vtbx4_u8): Likewise.
	(vtbx4_p8): Likewise.

[-- Attachment #2: rb14671.patch --]
[-- Type: application/octet-stream, Size: 25345 bytes --]

diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index 063f503ebd96657f017dfaa067cb231991376bda..b7f1237b1ffd0d4ca283c853be1cc94b9fc35260 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -718,37 +718,31 @@
   VAR1 (BINOPP, crypto_pmull, 0, NONE, di)
   VAR1 (BINOPP, crypto_pmull, 0, NONE, v2di)
 
-  /* Implemented by aarch64_tbl3<mode>.  */
-  VAR1 (BINOP, tbl3, 0, NONE, v8qi)
-  VAR1 (BINOP, tbl3, 0, NONE, v16qi)
+  /* Implemented by aarch64_qtbl1<mode>.  */
+  VAR2 (BINOP, qtbl1, 0, NONE, v8qi, v16qi)
+  VAR2 (BINOPU, qtbl1, 0, NONE, v8qi, v16qi)
 
-  /* Implemented by aarch64_tbl1<mode>.  */
-  VAR2 (BINOP, tbl1, 0, NONE, v8qi, v16qi)
-  VAR2 (BINOPU, tbl1, 0, NONE, v8qi, v16qi)
+  /* Implemented by aarch64_qtbl2<mode>.  */
+  VAR2 (BINOP, qtbl2, 0, NONE, v8qi, v16qi)
 
   /* Implemented by aarch64_qtbl3<mode>.  */
-  VAR1 (BINOP, qtbl3, 0, NONE, v8qi)
-  VAR1 (BINOP, qtbl3, 0, NONE, v16qi)
+  VAR2 (BINOP, qtbl3, 0, NONE, v8qi, v16qi)
 
   /* Implemented by aarch64_qtbl4<mode>.  */
-  VAR1 (BINOP, qtbl4, 0, NONE, v8qi)
-  VAR1 (BINOP, qtbl4, 0, NONE, v16qi)
+  VAR2 (BINOP, qtbl4, 0, NONE, v8qi, v16qi)
 
-  /* Implemented by aarch64_tbx1<mode>.  */
-  VAR2 (TERNOP, tbx1, 0, NONE, v8qi, v16qi)
-  VAR2 (TERNOPU, tbx1, 0, NONE, v8qi, v16qi)
+  /* Implemented by aarch64_qtbx1<mode>.  */
+  VAR2 (TERNOP, qtbx1, 0, NONE, v8qi, v16qi)
+  VAR2 (TERNOPU, qtbx1, 0, NONE, v8qi, v16qi)
 
-  /* Implemented by aarch64_tbx4<mode>.  */
-  VAR1 (TERNOP, tbx4, 0, NONE, v8qi)
-  VAR1 (TERNOP, tbx4, 0, NONE, v16qi)
+  /* Implemented by aarch64_qtbx2<mode>.  */
+  VAR2 (TERNOP, qtbx2, 0, NONE, v8qi, v16qi)
 
   /* Implemented by aarch64_qtbx3<mode>.  */
-  VAR1 (TERNOP, qtbx3, 0, NONE, v8qi)
-  VAR1 (TERNOP, qtbx3, 0, NONE, v16qi)
+  VAR2 (TERNOP, qtbx3, 0, NONE, v8qi, v16qi)
 
   /* Implemented by aarch64_qtbx4<mode>.  */
-  VAR1 (TERNOP, qtbx4, 0, NONE, v8qi)
-  VAR1 (TERNOP, qtbx4, 0, NONE, v16qi)
+  VAR2 (TERNOP, qtbx4, 0, NONE, v8qi, v16qi)
 
   /* Builtins for ARMv8.1-A Adv.SIMD instructions.  */
 
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 74890989cb3045798bf8d0241467eaaf72238297..7332a735d35846e0d9375ad2686ed7ecdb09cd29 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -6948,7 +6948,7 @@
     {
       rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
       rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
-      emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
+      emit_insn (gen_aarch64_qtbl1v16qi (op0, op1, operands[2]));
     }
   DONE;
 }
@@ -7425,7 +7425,7 @@
   DONE;
 })
 
-(define_insn "aarch64_tbl1<mode>"
+(define_insn "aarch64_qtbl1<mode>"
   [(set (match_operand:VB 0 "register_operand" "=w")
 	(unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
 		    (match_operand:VB 2 "register_operand" "w")]
@@ -7435,7 +7435,7 @@
   [(set_attr "type" "neon_tbl1<q>")]
 )
 
-(define_insn "aarch64_tbx1<mode>"
+(define_insn "aarch64_qtbx1<mode>"
   [(set (match_operand:VB 0 "register_operand" "=w")
 	(unspec:VB [(match_operand:VB 1 "register_operand" "0")
 		    (match_operand:V16QI 2 "register_operand" "w")
@@ -7448,27 +7448,17 @@
 
 ;; Two source registers.
 
-(define_insn "aarch64_tbl2v16qi"
-  [(set (match_operand:V16QI 0 "register_operand" "=w")
-	(unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
-		       (match_operand:V16QI 2 "register_operand" "w")]
-		      UNSPEC_TBL))]
-  "TARGET_SIMD"
-  "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
-  [(set_attr "type" "neon_tbl2_q")]
-)
-
-(define_insn "aarch64_tbl3<mode>"
+(define_insn "aarch64_qtbl2<mode>"
   [(set (match_operand:VB 0 "register_operand" "=w")
 	(unspec:VB [(match_operand:OI 1 "register_operand" "w")
 		      (match_operand:VB 2 "register_operand" "w")]
 		      UNSPEC_TBL))]
   "TARGET_SIMD"
   "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
-  [(set_attr "type" "neon_tbl3")]
+  [(set_attr "type" "neon_tbl2")]
 )
 
-(define_insn "aarch64_tbx4<mode>"
+(define_insn "aarch64_qtbx2<mode>"
   [(set (match_operand:VB 0 "register_operand" "=w")
 	(unspec:VB [(match_operand:VB 1 "register_operand" "0")
 		      (match_operand:OI 2 "register_operand" "w")
@@ -7476,7 +7466,7 @@
 		      UNSPEC_TBX))]
   "TARGET_SIMD"
   "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
-  [(set_attr "type" "neon_tbl4")]
+  [(set_attr "type" "neon_tbl2")]
 )
 
 ;; Three source registers.
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index f5b25a7f7041645921e6ad85714efda73b993492..3bdf19d71b54d0ade8e5648323f6e1f012bc4f8f 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -22047,11 +22047,11 @@ aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
 	  /* Expand the argument to a V16QI mode by duplicating it.  */
 	  rtx pair = gen_reg_rtx (V16QImode);
 	  emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
-	  emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
+	  emit_insn (gen_aarch64_qtbl1v8qi (target, pair, sel));
 	}
       else
 	{
-	  emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
+	  emit_insn (gen_aarch64_qtbl1v16qi (target, op0, sel));
 	}
     }
   else
@@ -22062,13 +22062,13 @@ aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
 	{
 	  pair = gen_reg_rtx (V16QImode);
 	  emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
-	  emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
+	  emit_insn (gen_aarch64_qtbl1v8qi (target, pair, sel));
 	}
       else
 	{
 	  pair = gen_reg_rtx (OImode);
 	  emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
-	  emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
+	  emit_insn (gen_aarch64_qtbl2v16qi (target, pair, sel));
 	}
     }
 }
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 00d76ea937ace5763746478cbdfadf6479e0b15a..1048d7c7eaac14554142eaa7544159a50929b7f1 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -9534,90 +9534,90 @@ __extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbl1_p8 (poly8x16_t __tab, uint8x8_t __idx)
 {
-  return (poly8x8_t) __builtin_aarch64_tbl1v8qi ((int8x16_t) __tab,
-						 (int8x8_t) __idx);
+  return (poly8x8_t) __builtin_aarch64_qtbl1v8qi ((int8x16_t) __tab,
+						  (int8x8_t) __idx);
 }
 
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbl1_s8 (int8x16_t __tab, uint8x8_t __idx)
 {
-  return __builtin_aarch64_tbl1v8qi (__tab, (int8x8_t) __idx);
+  return __builtin_aarch64_qtbl1v8qi (__tab, (int8x8_t) __idx);
 }
 
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbl1_u8 (uint8x16_t __tab, uint8x8_t __idx)
 {
-  return __builtin_aarch64_tbl1v8qi_uuu (__tab, __idx);
+  return __builtin_aarch64_qtbl1v8qi_uuu (__tab, __idx);
 }
 
 __extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbl1q_p8 (poly8x16_t __tab, uint8x16_t __idx)
 {
-  return (poly8x16_t) __builtin_aarch64_tbl1v16qi ((int8x16_t) __tab,
-						   (int8x16_t) __idx);
+  return (poly8x16_t) __builtin_aarch64_qtbl1v16qi ((int8x16_t) __tab,
+						    (int8x16_t) __idx);
 }
 
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbl1q_s8 (int8x16_t __tab, uint8x16_t __idx)
 {
-  return __builtin_aarch64_tbl1v16qi (__tab, (int8x16_t) __idx);
+  return __builtin_aarch64_qtbl1v16qi (__tab, (int8x16_t) __idx);
 }
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbl1q_u8 (uint8x16_t __tab, uint8x16_t __idx)
 {
-  return __builtin_aarch64_tbl1v16qi_uuu (__tab, __idx);
+  return __builtin_aarch64_qtbl1v16qi_uuu (__tab, __idx);
 }
 
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbx1_s8 (int8x8_t __r, int8x16_t __tab, uint8x8_t __idx)
 {
-  return __builtin_aarch64_tbx1v8qi (__r, __tab, (int8x8_t) __idx);
+  return __builtin_aarch64_qtbx1v8qi (__r, __tab, (int8x8_t) __idx);
 }
 
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbx1_u8 (uint8x8_t __r, uint8x16_t __tab, uint8x8_t __idx)
 {
-  return __builtin_aarch64_tbx1v8qi_uuuu (__r, __tab, __idx);
+  return __builtin_aarch64_qtbx1v8qi_uuuu (__r, __tab, __idx);
 }
 
 __extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbx1_p8 (poly8x8_t __r, poly8x16_t __tab, uint8x8_t __idx)
 {
-  return (poly8x8_t) __builtin_aarch64_tbx1v8qi ((int8x8_t) __r,
-						 (int8x16_t) __tab,
-						 (int8x8_t) __idx);
+  return (poly8x8_t) __builtin_aarch64_qtbx1v8qi ((int8x8_t) __r,
+						  (int8x16_t) __tab,
+						  (int8x8_t) __idx);
 }
 
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbx1q_s8 (int8x16_t __r, int8x16_t __tab, uint8x16_t __idx)
 {
-  return __builtin_aarch64_tbx1v16qi (__r, __tab, (int8x16_t) __idx);
+  return __builtin_aarch64_qtbx1v16qi (__r, __tab, (int8x16_t) __idx);
 }
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbx1q_u8 (uint8x16_t __r, uint8x16_t __tab, uint8x16_t __idx)
 {
-  return __builtin_aarch64_tbx1v16qi_uuuu (__r, __tab, __idx);
+  return __builtin_aarch64_qtbx1v16qi_uuuu (__r, __tab, __idx);
 }
 
 __extension__ extern __inline poly8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqtbx1q_p8 (poly8x16_t __r, poly8x16_t __tab, uint8x16_t __idx)
 {
-  return (poly8x16_t) __builtin_aarch64_tbx1v16qi ((int8x16_t) __r,
-						   (int8x16_t) __tab,
-						   (int8x16_t) __idx);
+  return (poly8x16_t) __builtin_aarch64_qtbx1v16qi ((int8x16_t) __r,
+						    (int8x16_t) __tab,
+						    (int8x16_t) __idx);
 }
 
 /* V7 legacy table intrinsics.  */
@@ -9628,7 +9628,7 @@ vtbl1_s8 (int8x8_t __tab, int8x8_t __idx)
 {
   int8x16_t __temp = vcombine_s8 (__tab,
 				  vcreate_s8 (__AARCH64_UINT64_C (0x0)));
-  return __builtin_aarch64_tbl1v8qi (__temp, __idx);
+  return __builtin_aarch64_qtbl1v8qi (__temp, __idx);
 }
 
 __extension__ extern __inline uint8x8_t
@@ -9637,7 +9637,7 @@ vtbl1_u8 (uint8x8_t __tab, uint8x8_t __idx)
 {
   uint8x16_t __temp = vcombine_u8 (__tab,
 				   vcreate_u8 (__AARCH64_UINT64_C (0x0)));
-  return __builtin_aarch64_tbl1v8qi_uuu (__temp, __idx);
+  return __builtin_aarch64_qtbl1v8qi_uuu (__temp, __idx);
 }
 
 __extension__ extern __inline poly8x8_t
@@ -9646,8 +9646,8 @@ vtbl1_p8 (poly8x8_t __tab, uint8x8_t __idx)
 {
   poly8x16_t __temp = vcombine_p8 (__tab,
 				   vcreate_p8 (__AARCH64_UINT64_C (0x0)));
-  return (poly8x8_t) __builtin_aarch64_tbl1v8qi ((int8x16_t) __temp,
-						 (int8x8_t) __idx);
+  return (poly8x8_t) __builtin_aarch64_qtbl1v8qi ((int8x16_t) __temp,
+						  (int8x8_t) __idx);
 }
 
 __extension__ extern __inline int8x8_t
@@ -9655,7 +9655,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtbl2_s8 (int8x8x2_t __tab, int8x8_t __idx)
 {
   int8x16_t __temp = vcombine_s8 (__tab.val[0], __tab.val[1]);
-  return __builtin_aarch64_tbl1v8qi (__temp, __idx);
+  return __builtin_aarch64_qtbl1v8qi (__temp, __idx);
 }
 
 __extension__ extern __inline uint8x8_t
@@ -9663,7 +9663,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtbl2_u8 (uint8x8x2_t __tab, uint8x8_t __idx)
 {
   uint8x16_t __temp = vcombine_u8 (__tab.val[0], __tab.val[1]);
-  return __builtin_aarch64_tbl1v8qi_uuu (__temp, __idx);
+  return __builtin_aarch64_qtbl1v8qi_uuu (__temp, __idx);
 }
 
 __extension__ extern __inline poly8x8_t
@@ -9671,15 +9671,14 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtbl2_p8 (poly8x8x2_t __tab, uint8x8_t __idx)
 {
   poly8x16_t __temp = vcombine_p8 (__tab.val[0], __tab.val[1]);
-  return (poly8x8_t) __builtin_aarch64_tbl1v8qi ((int8x16_t) __temp,
-						 (int8x8_t) __idx);
+  return (poly8x8_t) __builtin_aarch64_qtbl1v8qi ((int8x16_t) __temp,
+						  (int8x8_t) __idx);
 }
 
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtbl3_s8 (int8x8x3_t __tab, int8x8_t __idx)
 {
-  int8x8_t __result;
   int8x16x2_t __temp;
   __builtin_aarch64_simd_oi __o;
   __temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]);
@@ -9688,15 +9687,13 @@ vtbl3_s8 (int8x8x3_t __tab, int8x8_t __idx)
 					   (int8x16_t) __temp.val[0], 0);
   __o = __builtin_aarch64_set_qregoiv16qi (__o,
 					   (int8x16_t) __temp.val[1], 1);
-  __result = __builtin_aarch64_tbl3v8qi (__o, __idx);
-  return __result;
+  return __builtin_aarch64_qtbl2v8qi (__o, __idx);
 }
 
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtbl3_u8 (uint8x8x3_t __tab, uint8x8_t __idx)
 {
-  uint8x8_t __result;
   uint8x16x2_t __temp;
   __builtin_aarch64_simd_oi __o;
   __temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]);
@@ -9705,15 +9702,13 @@ vtbl3_u8 (uint8x8x3_t __tab, uint8x8_t __idx)
 					   (int8x16_t) __temp.val[0], 0);
   __o = __builtin_aarch64_set_qregoiv16qi (__o,
 					   (int8x16_t) __temp.val[1], 1);
-  __result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx);
-  return __result;
+  return (uint8x8_t)__builtin_aarch64_qtbl2v8qi (__o, (int8x8_t)__idx);
 }
 
 __extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtbl3_p8 (poly8x8x3_t __tab, uint8x8_t __idx)
 {
-  poly8x8_t __result;
   poly8x16x2_t __temp;
   __builtin_aarch64_simd_oi __o;
   __temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]);
@@ -9722,15 +9717,13 @@ vtbl3_p8 (poly8x8x3_t __tab, uint8x8_t __idx)
 					   (int8x16_t) __temp.val[0], 0);
   __o = __builtin_aarch64_set_qregoiv16qi (__o,
 					   (int8x16_t) __temp.val[1], 1);
-  __result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx);
-  return __result;
+  return (poly8x8_t)__builtin_aarch64_qtbl2v8qi (__o, (int8x8_t)__idx);
 }
 
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtbl4_s8 (int8x8x4_t __tab, int8x8_t __idx)
 {
-  int8x8_t __result;
   int8x16x2_t __temp;
   __builtin_aarch64_simd_oi __o;
   __temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]);
@@ -9739,15 +9732,13 @@ vtbl4_s8 (int8x8x4_t __tab, int8x8_t __idx)
 					   (int8x16_t) __temp.val[0], 0);
   __o = __builtin_aarch64_set_qregoiv16qi (__o,
 					   (int8x16_t) __temp.val[1], 1);
-  __result = __builtin_aarch64_tbl3v8qi (__o, __idx);
-  return __result;
+  return __builtin_aarch64_qtbl2v8qi (__o, __idx);
 }
 
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtbl4_u8 (uint8x8x4_t __tab, uint8x8_t __idx)
 {
-  uint8x8_t __result;
   uint8x16x2_t __temp;
   __builtin_aarch64_simd_oi __o;
   __temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]);
@@ -9756,15 +9747,13 @@ vtbl4_u8 (uint8x8x4_t __tab, uint8x8_t __idx)
 					   (int8x16_t) __temp.val[0], 0);
   __o = __builtin_aarch64_set_qregoiv16qi (__o,
 					   (int8x16_t) __temp.val[1], 1);
-  __result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx);
-  return __result;
+  return (uint8x8_t)__builtin_aarch64_qtbl2v8qi (__o, (int8x8_t)__idx);
 }
 
 __extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtbl4_p8 (poly8x8x4_t __tab, uint8x8_t __idx)
 {
-  poly8x8_t __result;
   poly8x16x2_t __temp;
   __builtin_aarch64_simd_oi __o;
   __temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]);
@@ -9773,8 +9762,7 @@ vtbl4_p8 (poly8x8x4_t __tab, uint8x8_t __idx)
 					   (int8x16_t) __temp.val[0], 0);
   __o = __builtin_aarch64_set_qregoiv16qi (__o,
 					   (int8x16_t) __temp.val[1], 1);
-  __result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx);
-  return __result;
+  return(poly8x8_t)__builtin_aarch64_qtbl2v8qi (__o, (int8x8_t)__idx);
 }
 
 __extension__ extern __inline int8x8_t
@@ -9782,7 +9770,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtbx2_s8 (int8x8_t __r, int8x8x2_t __tab, int8x8_t __idx)
 {
   int8x16_t __temp = vcombine_s8 (__tab.val[0], __tab.val[1]);
-  return __builtin_aarch64_tbx1v8qi (__r, __temp, __idx);
+  return __builtin_aarch64_qtbx1v8qi (__r, __temp, __idx);
 }
 
 __extension__ extern __inline uint8x8_t
@@ -9790,7 +9778,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtbx2_u8 (uint8x8_t __r, uint8x8x2_t __tab, uint8x8_t __idx)
 {
   uint8x16_t __temp = vcombine_u8 (__tab.val[0], __tab.val[1]);
-  return __builtin_aarch64_tbx1v8qi_uuuu (__r, __temp, __idx);
+  return __builtin_aarch64_qtbx1v8qi_uuuu (__r, __temp, __idx);
 }
 
 __extension__ extern __inline poly8x8_t
@@ -9798,9 +9786,9 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtbx2_p8 (poly8x8_t __r, poly8x8x2_t __tab, uint8x8_t __idx)
 {
   poly8x16_t __temp = vcombine_p8 (__tab.val[0], __tab.val[1]);
-  return (poly8x8_t) __builtin_aarch64_tbx1v8qi ((int8x8_t) __r,
-						 (int8x16_t) __temp,
-						 (int8x8_t) __idx);
+  return (poly8x8_t) __builtin_aarch64_qtbx1v8qi ((int8x8_t) __r,
+						  (int8x16_t) __temp,
+						  (int8x8_t) __idx);
 }
 
 /* End of temporary inline asm.  */
@@ -23335,7 +23323,7 @@ vqtbl2_s8 (int8x16x2_t __tab, uint8x8_t __idx)
   __builtin_aarch64_simd_oi __o;
   __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[0], 0);
   __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[1], 1);
-  return __builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx);
+  return __builtin_aarch64_qtbl2v8qi (__o, (int8x8_t)__idx);
 }
 
 __extension__ extern __inline uint8x8_t
@@ -23345,7 +23333,7 @@ vqtbl2_u8 (uint8x16x2_t __tab, uint8x8_t __idx)
   __builtin_aarch64_simd_oi __o;
   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  return (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx);
+  return (uint8x8_t)__builtin_aarch64_qtbl2v8qi (__o, (int8x8_t)__idx);
 }
 
 __extension__ extern __inline poly8x8_t
@@ -23355,7 +23343,7 @@ vqtbl2_p8 (poly8x16x2_t __tab, uint8x8_t __idx)
   __builtin_aarch64_simd_oi __o;
   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  return (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx);
+  return (poly8x8_t)__builtin_aarch64_qtbl2v8qi (__o, (int8x8_t)__idx);
 }
 
 __extension__ extern __inline int8x16_t
@@ -23365,7 +23353,7 @@ vqtbl2q_s8 (int8x16x2_t __tab, uint8x16_t __idx)
   __builtin_aarch64_simd_oi __o;
   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  return __builtin_aarch64_tbl3v16qi (__o, (int8x16_t)__idx);
+  return __builtin_aarch64_qtbl2v16qi (__o, (int8x16_t)__idx);
 }
 
 __extension__ extern __inline uint8x16_t
@@ -23375,7 +23363,7 @@ vqtbl2q_u8 (uint8x16x2_t __tab, uint8x16_t __idx)
   __builtin_aarch64_simd_oi __o;
   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  return (uint8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)__idx);
+  return (uint8x16_t)__builtin_aarch64_qtbl2v16qi (__o, (int8x16_t)__idx);
 }
 
 __extension__ extern __inline poly8x16_t
@@ -23385,7 +23373,7 @@ vqtbl2q_p8 (poly8x16x2_t __tab, uint8x16_t __idx)
   __builtin_aarch64_simd_oi __o;
   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  return (poly8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)__idx);
+  return (poly8x16_t)__builtin_aarch64_qtbl2v16qi (__o, (int8x16_t)__idx);
 }
 
 /* vqtbl3 */
@@ -23539,7 +23527,7 @@ vqtbx2_s8 (int8x8_t __r, int8x16x2_t __tab, uint8x8_t __idx)
   __builtin_aarch64_simd_oi __o;
   __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[0], 0);
   __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[1], 1);
-  return __builtin_aarch64_tbx4v8qi (__r, __o, (int8x8_t)__idx);
+  return __builtin_aarch64_qtbx2v8qi (__r, __o, (int8x8_t)__idx);
 }
 
 __extension__ extern __inline uint8x8_t
@@ -23549,8 +23537,8 @@ vqtbx2_u8 (uint8x8_t __r, uint8x16x2_t __tab, uint8x8_t __idx)
   __builtin_aarch64_simd_oi __o;
   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  return (uint8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o,
-						(int8x8_t)__idx);
+  return (uint8x8_t)__builtin_aarch64_qtbx2v8qi ((int8x8_t)__r, __o,
+						 (int8x8_t)__idx);
 }
 
 __extension__ extern __inline poly8x8_t
@@ -23560,8 +23548,8 @@ vqtbx2_p8 (poly8x8_t __r, poly8x16x2_t __tab, uint8x8_t __idx)
   __builtin_aarch64_simd_oi __o;
   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  return (poly8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o,
-						(int8x8_t)__idx);
+  return (poly8x8_t)__builtin_aarch64_qtbx2v8qi ((int8x8_t)__r, __o,
+						 (int8x8_t)__idx);
 }
 
 __extension__ extern __inline int8x16_t
@@ -23571,7 +23559,7 @@ vqtbx2q_s8 (int8x16_t __r, int8x16x2_t __tab, uint8x16_t __idx)
   __builtin_aarch64_simd_oi __o;
   __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[0], 0);
   __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[1], 1);
-  return __builtin_aarch64_tbx4v16qi (__r, __o, (int8x16_t)__idx);
+  return __builtin_aarch64_qtbx2v16qi (__r, __o, (int8x16_t)__idx);
 }
 
 __extension__ extern __inline uint8x16_t
@@ -23581,7 +23569,7 @@ vqtbx2q_u8 (uint8x16_t __r, uint8x16x2_t __tab, uint8x16_t __idx)
   __builtin_aarch64_simd_oi __o;
   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  return (uint8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)__r, __o,
+  return (uint8x16_t)__builtin_aarch64_qtbx2v16qi ((int8x16_t)__r, __o,
 						  (int8x16_t)__idx);
 }
 
@@ -23592,8 +23580,8 @@ vqtbx2q_p8 (poly8x16_t __r, poly8x16x2_t __tab, uint8x16_t __idx)
   __builtin_aarch64_simd_oi __o;
   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
   __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
-  return (poly8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)__r, __o,
-						  (int8x16_t)__idx);
+  return (poly8x16_t)__builtin_aarch64_qtbx2v16qi ((int8x16_t)__r, __o,
+						   (int8x16_t)__idx);
 }
 
 /* vqtbx3 */
@@ -28511,7 +28499,6 @@ __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtbx4_s8 (int8x8_t __r, int8x8x4_t __tab, int8x8_t __idx)
 {
-  int8x8_t __result;
   int8x16x2_t __temp;
   __builtin_aarch64_simd_oi __o;
   __temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]);
@@ -28520,15 +28507,13 @@ vtbx4_s8 (int8x8_t __r, int8x8x4_t __tab, int8x8_t __idx)
 					   (int8x16_t) __temp.val[0], 0);
   __o = __builtin_aarch64_set_qregoiv16qi (__o,
 					   (int8x16_t) __temp.val[1], 1);
-  __result = __builtin_aarch64_tbx4v8qi (__r, __o, __idx);
-  return __result;
+  return __builtin_aarch64_qtbx2v8qi (__r, __o, __idx);
 }
 
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtbx4_u8 (uint8x8_t __r, uint8x8x4_t __tab, uint8x8_t __idx)
 {
-  uint8x8_t __result;
   uint8x16x2_t __temp;
   __builtin_aarch64_simd_oi __o;
   __temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]);
@@ -28537,16 +28522,14 @@ vtbx4_u8 (uint8x8_t __r, uint8x8x4_t __tab, uint8x8_t __idx)
 					   (int8x16_t) __temp.val[0], 0);
   __o = __builtin_aarch64_set_qregoiv16qi (__o,
 					   (int8x16_t) __temp.val[1], 1);
-  __result = (uint8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o,
-						  (int8x8_t)__idx);
-  return __result;
+  return (uint8x8_t)__builtin_aarch64_qtbx2v8qi ((int8x8_t)__r, __o,
+						 (int8x8_t)__idx);
 }
 
 __extension__ extern __inline poly8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vtbx4_p8 (poly8x8_t __r, poly8x8x4_t __tab, uint8x8_t __idx)
 {
-  poly8x8_t __result;
   poly8x16x2_t __temp;
   __builtin_aarch64_simd_oi __o;
   __temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]);
@@ -28555,9 +28538,8 @@ vtbx4_p8 (poly8x8_t __r, poly8x8x4_t __tab, uint8x8_t __idx)
 					   (int8x16_t) __temp.val[0], 0);
   __o = __builtin_aarch64_set_qregoiv16qi (__o,
 					   (int8x16_t) __temp.val[1], 1);
-  __result = (poly8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o,
-						  (int8x8_t)__idx);
-  return __result;
+  return (poly8x8_t)__builtin_aarch64_qtbx2v8qi ((int8x8_t)__r, __o,
+						 (int8x8_t)__idx);
 }
 
 /* vtrn */

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH] aarch64: Refactor TBL/TBX RTL patterns
  2021-07-19 16:43 [PATCH] aarch64: Refactor TBL/TBX RTL patterns Jonathan Wright
@ 2021-07-20  6:53 ` Richard Sandiford
  0 siblings, 0 replies; 2+ messages in thread
From: Richard Sandiford @ 2021-07-20  6:53 UTC (permalink / raw)
  To: Jonathan Wright via Gcc-patches

Jonathan Wright via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
> Hi,
>
> As subject, this patch renames the two-source-register TBL/TBX RTL
> patterns so that their names better reflect what they do, rather than
> confusing them with tbl3 or tbx4 patterns. Also use the correct
> "neon_tbl2" type attribute for both patterns.
>
> Rename single-source-register TBL/TBX patterns for consistency.
>
> Bootstrapped and regression tested on aarch64-none-linux-gnu - no
> issues.
>
> Ok for master?

OK.  Nice clean-up, thanks.

Richard

> Thanks,
> Jonathan
>
> ---
>
> gcc/ChangeLog:
>
> 2021-07-08  Jonathan Wright  <jonathan.wright@arm.com>
>
> 	* config/aarch64/aarch64-simd-builtins.def: Use two variant
> 	generators for all TBL/TBX intrinsics and rename to
> 	consistent forms: qtbl[1234] or qtbx[1234].
> 	* config/aarch64/aarch64-simd.md (aarch64_tbl1<mode>):
> 	Rename to...
> 	(aarch64_qtbl1<mode>): This.
> 	(aarch64_tbx1<mode>): Rename to...
> 	(aarch64_qtbx1<mode>): This.
> 	(aarch64_tbl2v16qi): Delete.
> 	(aarch64_tbl3<mode>): Rename to...
> 	(aarch64_qtbl2<mode>): This.
> 	(aarch64_tbx4<mode>): Rename to...
> 	(aarch64_qtbx2<mode>): This.
> 	* config/aarch64/aarch64.c (aarch64_expand_vec_perm_1): Use
> 	renamed qtbl1 and qtbl2 RTL patterns.
> 	* config/aarch64/arm_neon.h (vqtbl1_p8): Use renamed qtbl1
> 	RTL pattern.
> 	(vqtbl1_s8): Likewise.
> 	(vqtbl1_u8): Likewise.
> 	(vqtbl1q_p8): Likewise.
> 	(vqtbl1q_s8): Likewise.
> 	(vqtbl1q_u8): Likewise.
> 	(vqtbx1_s8): Use renamed qtbx1 RTL pattern.
> 	(vqtbx1_u8): Likewise.
> 	(vqtbx1_p8): Likewise.
> 	(vqtbx1q_s8): Likewise.
> 	(vqtbx1q_u8): Likewise.
> 	(vqtbx1q_p8): Likewise.
> 	(vtbl1_s8): Use renamed qtbl1 RTL pattern.
> 	(vtbl1_u8): Likewise.
> 	(vtbl1_p8): Likewise.
> 	(vtbl2_s8): Likewise
> 	(vtbl2_u8): Likewise.
> 	(vtbl2_p8): Likewise.
> 	(vtbl3_s8): Use renamed qtbl2 RTL pattern.
> 	(vtbl3_u8): Likewise.
> 	(vtbl3_p8): Likewise.
> 	(vtbl4_s8): Likewise.
> 	(vtbl4_u8): Likewise.
> 	(vtbl4_p8): Likewise.
> 	(vtbx2_s8): Use renamed qtbx2 RTL pattern.
> 	(vtbx2_u8): Likewise.
> 	(vtbx2_p8): Likewise.
> 	(vqtbl2_s8): Use renamed qtbl2 RTL pattern.
> 	(vqtbl2_u8): Likewise.
> 	(vqtbl2_p8): Likewise.
> 	(vqtbl2q_s8): Likewise.
> 	(vqtbl2q_u8): Likewise.
> 	(vqtbl2q_p8): Likewise.
> 	(vqtbx2_s8): Use renamed qtbx2 RTL pattern.
> 	(vqtbx2_u8): Likewise.
> 	(vqtbx2_p8): Likewise.
> 	(vqtbx2q_s8): Likewise.
> 	(vqtbx2q_u8): Likewise.
> 	(vqtbx2q_p8): Likewise.
> 	(vtbx4_s8): Likewise.
> 	(vtbx4_u8): Likewise.
> 	(vtbx4_p8): Likewise.
>
> diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
> index 063f503ebd96657f017dfaa067cb231991376bda..b7f1237b1ffd0d4ca283c853be1cc94b9fc35260 100644
> --- a/gcc/config/aarch64/aarch64-simd-builtins.def
> +++ b/gcc/config/aarch64/aarch64-simd-builtins.def
> @@ -718,37 +718,31 @@
>    VAR1 (BINOPP, crypto_pmull, 0, NONE, di)
>    VAR1 (BINOPP, crypto_pmull, 0, NONE, v2di)
>  
> -  /* Implemented by aarch64_tbl3<mode>.  */
> -  VAR1 (BINOP, tbl3, 0, NONE, v8qi)
> -  VAR1 (BINOP, tbl3, 0, NONE, v16qi)
> +  /* Implemented by aarch64_qtbl1<mode>.  */
> +  VAR2 (BINOP, qtbl1, 0, NONE, v8qi, v16qi)
> +  VAR2 (BINOPU, qtbl1, 0, NONE, v8qi, v16qi)
>  
> -  /* Implemented by aarch64_tbl1<mode>.  */
> -  VAR2 (BINOP, tbl1, 0, NONE, v8qi, v16qi)
> -  VAR2 (BINOPU, tbl1, 0, NONE, v8qi, v16qi)
> +  /* Implemented by aarch64_qtbl2<mode>.  */
> +  VAR2 (BINOP, qtbl2, 0, NONE, v8qi, v16qi)
>  
>    /* Implemented by aarch64_qtbl3<mode>.  */
> -  VAR1 (BINOP, qtbl3, 0, NONE, v8qi)
> -  VAR1 (BINOP, qtbl3, 0, NONE, v16qi)
> +  VAR2 (BINOP, qtbl3, 0, NONE, v8qi, v16qi)
>  
>    /* Implemented by aarch64_qtbl4<mode>.  */
> -  VAR1 (BINOP, qtbl4, 0, NONE, v8qi)
> -  VAR1 (BINOP, qtbl4, 0, NONE, v16qi)
> +  VAR2 (BINOP, qtbl4, 0, NONE, v8qi, v16qi)
>  
> -  /* Implemented by aarch64_tbx1<mode>.  */
> -  VAR2 (TERNOP, tbx1, 0, NONE, v8qi, v16qi)
> -  VAR2 (TERNOPU, tbx1, 0, NONE, v8qi, v16qi)
> +  /* Implemented by aarch64_qtbx1<mode>.  */
> +  VAR2 (TERNOP, qtbx1, 0, NONE, v8qi, v16qi)
> +  VAR2 (TERNOPU, qtbx1, 0, NONE, v8qi, v16qi)
>  
> -  /* Implemented by aarch64_tbx4<mode>.  */
> -  VAR1 (TERNOP, tbx4, 0, NONE, v8qi)
> -  VAR1 (TERNOP, tbx4, 0, NONE, v16qi)
> +  /* Implemented by aarch64_qtbx2<mode>.  */
> +  VAR2 (TERNOP, qtbx2, 0, NONE, v8qi, v16qi)
>  
>    /* Implemented by aarch64_qtbx3<mode>.  */
> -  VAR1 (TERNOP, qtbx3, 0, NONE, v8qi)
> -  VAR1 (TERNOP, qtbx3, 0, NONE, v16qi)
> +  VAR2 (TERNOP, qtbx3, 0, NONE, v8qi, v16qi)
>  
>    /* Implemented by aarch64_qtbx4<mode>.  */
> -  VAR1 (TERNOP, qtbx4, 0, NONE, v8qi)
> -  VAR1 (TERNOP, qtbx4, 0, NONE, v16qi)
> +  VAR2 (TERNOP, qtbx4, 0, NONE, v8qi, v16qi)
>  
>    /* Builtins for ARMv8.1-A Adv.SIMD instructions.  */
>  
> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
> index 74890989cb3045798bf8d0241467eaaf72238297..7332a735d35846e0d9375ad2686ed7ecdb09cd29 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -6948,7 +6948,7 @@
>      {
>        rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
>        rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
> -      emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
> +      emit_insn (gen_aarch64_qtbl1v16qi (op0, op1, operands[2]));
>      }
>    DONE;
>  }
> @@ -7425,7 +7425,7 @@
>    DONE;
>  })
>  
> -(define_insn "aarch64_tbl1<mode>"
> +(define_insn "aarch64_qtbl1<mode>"
>    [(set (match_operand:VB 0 "register_operand" "=w")
>  	(unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
>  		    (match_operand:VB 2 "register_operand" "w")]
> @@ -7435,7 +7435,7 @@
>    [(set_attr "type" "neon_tbl1<q>")]
>  )
>  
> -(define_insn "aarch64_tbx1<mode>"
> +(define_insn "aarch64_qtbx1<mode>"
>    [(set (match_operand:VB 0 "register_operand" "=w")
>  	(unspec:VB [(match_operand:VB 1 "register_operand" "0")
>  		    (match_operand:V16QI 2 "register_operand" "w")
> @@ -7448,27 +7448,17 @@
>  
>  ;; Two source registers.
>  
> -(define_insn "aarch64_tbl2v16qi"
> -  [(set (match_operand:V16QI 0 "register_operand" "=w")
> -	(unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
> -		       (match_operand:V16QI 2 "register_operand" "w")]
> -		      UNSPEC_TBL))]
> -  "TARGET_SIMD"
> -  "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
> -  [(set_attr "type" "neon_tbl2_q")]
> -)
> -
> -(define_insn "aarch64_tbl3<mode>"
> +(define_insn "aarch64_qtbl2<mode>"
>    [(set (match_operand:VB 0 "register_operand" "=w")
>  	(unspec:VB [(match_operand:OI 1 "register_operand" "w")
>  		      (match_operand:VB 2 "register_operand" "w")]
>  		      UNSPEC_TBL))]
>    "TARGET_SIMD"
>    "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
> -  [(set_attr "type" "neon_tbl3")]
> +  [(set_attr "type" "neon_tbl2")]
>  )
>  
> -(define_insn "aarch64_tbx4<mode>"
> +(define_insn "aarch64_qtbx2<mode>"
>    [(set (match_operand:VB 0 "register_operand" "=w")
>  	(unspec:VB [(match_operand:VB 1 "register_operand" "0")
>  		      (match_operand:OI 2 "register_operand" "w")
> @@ -7476,7 +7466,7 @@
>  		      UNSPEC_TBX))]
>    "TARGET_SIMD"
>    "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
> -  [(set_attr "type" "neon_tbl4")]
> +  [(set_attr "type" "neon_tbl2")]
>  )
>  
>  ;; Three source registers.
> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index f5b25a7f7041645921e6ad85714efda73b993492..3bdf19d71b54d0ade8e5648323f6e1f012bc4f8f 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -22047,11 +22047,11 @@ aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
>  	  /* Expand the argument to a V16QI mode by duplicating it.  */
>  	  rtx pair = gen_reg_rtx (V16QImode);
>  	  emit_insn (gen_aarch64_combinev8qi (pair, op0, op0));
> -	  emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
> +	  emit_insn (gen_aarch64_qtbl1v8qi (target, pair, sel));
>  	}
>        else
>  	{
> -	  emit_insn (gen_aarch64_tbl1v16qi (target, op0, sel));
> +	  emit_insn (gen_aarch64_qtbl1v16qi (target, op0, sel));
>  	}
>      }
>    else
> @@ -22062,13 +22062,13 @@ aarch64_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
>  	{
>  	  pair = gen_reg_rtx (V16QImode);
>  	  emit_insn (gen_aarch64_combinev8qi (pair, op0, op1));
> -	  emit_insn (gen_aarch64_tbl1v8qi (target, pair, sel));
> +	  emit_insn (gen_aarch64_qtbl1v8qi (target, pair, sel));
>  	}
>        else
>  	{
>  	  pair = gen_reg_rtx (OImode);
>  	  emit_insn (gen_aarch64_combinev16qi (pair, op0, op1));
> -	  emit_insn (gen_aarch64_tbl2v16qi (target, pair, sel));
> +	  emit_insn (gen_aarch64_qtbl2v16qi (target, pair, sel));
>  	}
>      }
>  }
> diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
> index 00d76ea937ace5763746478cbdfadf6479e0b15a..1048d7c7eaac14554142eaa7544159a50929b7f1 100644
> --- a/gcc/config/aarch64/arm_neon.h
> +++ b/gcc/config/aarch64/arm_neon.h
> @@ -9534,90 +9534,90 @@ __extension__ extern __inline poly8x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vqtbl1_p8 (poly8x16_t __tab, uint8x8_t __idx)
>  {
> -  return (poly8x8_t) __builtin_aarch64_tbl1v8qi ((int8x16_t) __tab,
> -						 (int8x8_t) __idx);
> +  return (poly8x8_t) __builtin_aarch64_qtbl1v8qi ((int8x16_t) __tab,
> +						  (int8x8_t) __idx);
>  }
>  
>  __extension__ extern __inline int8x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vqtbl1_s8 (int8x16_t __tab, uint8x8_t __idx)
>  {
> -  return __builtin_aarch64_tbl1v8qi (__tab, (int8x8_t) __idx);
> +  return __builtin_aarch64_qtbl1v8qi (__tab, (int8x8_t) __idx);
>  }
>  
>  __extension__ extern __inline uint8x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vqtbl1_u8 (uint8x16_t __tab, uint8x8_t __idx)
>  {
> -  return __builtin_aarch64_tbl1v8qi_uuu (__tab, __idx);
> +  return __builtin_aarch64_qtbl1v8qi_uuu (__tab, __idx);
>  }
>  
>  __extension__ extern __inline poly8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vqtbl1q_p8 (poly8x16_t __tab, uint8x16_t __idx)
>  {
> -  return (poly8x16_t) __builtin_aarch64_tbl1v16qi ((int8x16_t) __tab,
> -						   (int8x16_t) __idx);
> +  return (poly8x16_t) __builtin_aarch64_qtbl1v16qi ((int8x16_t) __tab,
> +						    (int8x16_t) __idx);
>  }
>  
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vqtbl1q_s8 (int8x16_t __tab, uint8x16_t __idx)
>  {
> -  return __builtin_aarch64_tbl1v16qi (__tab, (int8x16_t) __idx);
> +  return __builtin_aarch64_qtbl1v16qi (__tab, (int8x16_t) __idx);
>  }
>  
>  __extension__ extern __inline uint8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vqtbl1q_u8 (uint8x16_t __tab, uint8x16_t __idx)
>  {
> -  return __builtin_aarch64_tbl1v16qi_uuu (__tab, __idx);
> +  return __builtin_aarch64_qtbl1v16qi_uuu (__tab, __idx);
>  }
>  
>  __extension__ extern __inline int8x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vqtbx1_s8 (int8x8_t __r, int8x16_t __tab, uint8x8_t __idx)
>  {
> -  return __builtin_aarch64_tbx1v8qi (__r, __tab, (int8x8_t) __idx);
> +  return __builtin_aarch64_qtbx1v8qi (__r, __tab, (int8x8_t) __idx);
>  }
>  
>  __extension__ extern __inline uint8x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vqtbx1_u8 (uint8x8_t __r, uint8x16_t __tab, uint8x8_t __idx)
>  {
> -  return __builtin_aarch64_tbx1v8qi_uuuu (__r, __tab, __idx);
> +  return __builtin_aarch64_qtbx1v8qi_uuuu (__r, __tab, __idx);
>  }
>  
>  __extension__ extern __inline poly8x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vqtbx1_p8 (poly8x8_t __r, poly8x16_t __tab, uint8x8_t __idx)
>  {
> -  return (poly8x8_t) __builtin_aarch64_tbx1v8qi ((int8x8_t) __r,
> -						 (int8x16_t) __tab,
> -						 (int8x8_t) __idx);
> +  return (poly8x8_t) __builtin_aarch64_qtbx1v8qi ((int8x8_t) __r,
> +						  (int8x16_t) __tab,
> +						  (int8x8_t) __idx);
>  }
>  
>  __extension__ extern __inline int8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vqtbx1q_s8 (int8x16_t __r, int8x16_t __tab, uint8x16_t __idx)
>  {
> -  return __builtin_aarch64_tbx1v16qi (__r, __tab, (int8x16_t) __idx);
> +  return __builtin_aarch64_qtbx1v16qi (__r, __tab, (int8x16_t) __idx);
>  }
>  
>  __extension__ extern __inline uint8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vqtbx1q_u8 (uint8x16_t __r, uint8x16_t __tab, uint8x16_t __idx)
>  {
> -  return __builtin_aarch64_tbx1v16qi_uuuu (__r, __tab, __idx);
> +  return __builtin_aarch64_qtbx1v16qi_uuuu (__r, __tab, __idx);
>  }
>  
>  __extension__ extern __inline poly8x16_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vqtbx1q_p8 (poly8x16_t __r, poly8x16_t __tab, uint8x16_t __idx)
>  {
> -  return (poly8x16_t) __builtin_aarch64_tbx1v16qi ((int8x16_t) __r,
> -						   (int8x16_t) __tab,
> -						   (int8x16_t) __idx);
> +  return (poly8x16_t) __builtin_aarch64_qtbx1v16qi ((int8x16_t) __r,
> +						    (int8x16_t) __tab,
> +						    (int8x16_t) __idx);
>  }
>  
>  /* V7 legacy table intrinsics.  */
> @@ -9628,7 +9628,7 @@ vtbl1_s8 (int8x8_t __tab, int8x8_t __idx)
>  {
>    int8x16_t __temp = vcombine_s8 (__tab,
>  				  vcreate_s8 (__AARCH64_UINT64_C (0x0)));
> -  return __builtin_aarch64_tbl1v8qi (__temp, __idx);
> +  return __builtin_aarch64_qtbl1v8qi (__temp, __idx);
>  }
>  
>  __extension__ extern __inline uint8x8_t
> @@ -9637,7 +9637,7 @@ vtbl1_u8 (uint8x8_t __tab, uint8x8_t __idx)
>  {
>    uint8x16_t __temp = vcombine_u8 (__tab,
>  				   vcreate_u8 (__AARCH64_UINT64_C (0x0)));
> -  return __builtin_aarch64_tbl1v8qi_uuu (__temp, __idx);
> +  return __builtin_aarch64_qtbl1v8qi_uuu (__temp, __idx);
>  }
>  
>  __extension__ extern __inline poly8x8_t
> @@ -9646,8 +9646,8 @@ vtbl1_p8 (poly8x8_t __tab, uint8x8_t __idx)
>  {
>    poly8x16_t __temp = vcombine_p8 (__tab,
>  				   vcreate_p8 (__AARCH64_UINT64_C (0x0)));
> -  return (poly8x8_t) __builtin_aarch64_tbl1v8qi ((int8x16_t) __temp,
> -						 (int8x8_t) __idx);
> +  return (poly8x8_t) __builtin_aarch64_qtbl1v8qi ((int8x16_t) __temp,
> +						  (int8x8_t) __idx);
>  }
>  
>  __extension__ extern __inline int8x8_t
> @@ -9655,7 +9655,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vtbl2_s8 (int8x8x2_t __tab, int8x8_t __idx)
>  {
>    int8x16_t __temp = vcombine_s8 (__tab.val[0], __tab.val[1]);
> -  return __builtin_aarch64_tbl1v8qi (__temp, __idx);
> +  return __builtin_aarch64_qtbl1v8qi (__temp, __idx);
>  }
>  
>  __extension__ extern __inline uint8x8_t
> @@ -9663,7 +9663,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vtbl2_u8 (uint8x8x2_t __tab, uint8x8_t __idx)
>  {
>    uint8x16_t __temp = vcombine_u8 (__tab.val[0], __tab.val[1]);
> -  return __builtin_aarch64_tbl1v8qi_uuu (__temp, __idx);
> +  return __builtin_aarch64_qtbl1v8qi_uuu (__temp, __idx);
>  }
>  
>  __extension__ extern __inline poly8x8_t
> @@ -9671,15 +9671,14 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vtbl2_p8 (poly8x8x2_t __tab, uint8x8_t __idx)
>  {
>    poly8x16_t __temp = vcombine_p8 (__tab.val[0], __tab.val[1]);
> -  return (poly8x8_t) __builtin_aarch64_tbl1v8qi ((int8x16_t) __temp,
> -						 (int8x8_t) __idx);
> +  return (poly8x8_t) __builtin_aarch64_qtbl1v8qi ((int8x16_t) __temp,
> +						  (int8x8_t) __idx);
>  }
>  
>  __extension__ extern __inline int8x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vtbl3_s8 (int8x8x3_t __tab, int8x8_t __idx)
>  {
> -  int8x8_t __result;
>    int8x16x2_t __temp;
>    __builtin_aarch64_simd_oi __o;
>    __temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]);
> @@ -9688,15 +9687,13 @@ vtbl3_s8 (int8x8x3_t __tab, int8x8_t __idx)
>  					   (int8x16_t) __temp.val[0], 0);
>    __o = __builtin_aarch64_set_qregoiv16qi (__o,
>  					   (int8x16_t) __temp.val[1], 1);
> -  __result = __builtin_aarch64_tbl3v8qi (__o, __idx);
> -  return __result;
> +  return __builtin_aarch64_qtbl2v8qi (__o, __idx);
>  }
>  
>  __extension__ extern __inline uint8x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vtbl3_u8 (uint8x8x3_t __tab, uint8x8_t __idx)
>  {
> -  uint8x8_t __result;
>    uint8x16x2_t __temp;
>    __builtin_aarch64_simd_oi __o;
>    __temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]);
> @@ -9705,15 +9702,13 @@ vtbl3_u8 (uint8x8x3_t __tab, uint8x8_t __idx)
>  					   (int8x16_t) __temp.val[0], 0);
>    __o = __builtin_aarch64_set_qregoiv16qi (__o,
>  					   (int8x16_t) __temp.val[1], 1);
> -  __result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx);
> -  return __result;
> +  return (uint8x8_t)__builtin_aarch64_qtbl2v8qi (__o, (int8x8_t)__idx);
>  }
>  
>  __extension__ extern __inline poly8x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vtbl3_p8 (poly8x8x3_t __tab, uint8x8_t __idx)
>  {
> -  poly8x8_t __result;
>    poly8x16x2_t __temp;
>    __builtin_aarch64_simd_oi __o;
>    __temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]);
> @@ -9722,15 +9717,13 @@ vtbl3_p8 (poly8x8x3_t __tab, uint8x8_t __idx)
>  					   (int8x16_t) __temp.val[0], 0);
>    __o = __builtin_aarch64_set_qregoiv16qi (__o,
>  					   (int8x16_t) __temp.val[1], 1);
> -  __result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx);
> -  return __result;
> +  return (poly8x8_t)__builtin_aarch64_qtbl2v8qi (__o, (int8x8_t)__idx);
>  }
>  
>  __extension__ extern __inline int8x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vtbl4_s8 (int8x8x4_t __tab, int8x8_t __idx)
>  {
> -  int8x8_t __result;
>    int8x16x2_t __temp;
>    __builtin_aarch64_simd_oi __o;
>    __temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]);
> @@ -9739,15 +9732,13 @@ vtbl4_s8 (int8x8x4_t __tab, int8x8_t __idx)
>  					   (int8x16_t) __temp.val[0], 0);
>    __o = __builtin_aarch64_set_qregoiv16qi (__o,
>  					   (int8x16_t) __temp.val[1], 1);
> -  __result = __builtin_aarch64_tbl3v8qi (__o, __idx);
> -  return __result;
> +  return __builtin_aarch64_qtbl2v8qi (__o, __idx);
>  }
>  
>  __extension__ extern __inline uint8x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vtbl4_u8 (uint8x8x4_t __tab, uint8x8_t __idx)
>  {
> -  uint8x8_t __result;
>    uint8x16x2_t __temp;
>    __builtin_aarch64_simd_oi __o;
>    __temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]);
> @@ -9756,15 +9747,13 @@ vtbl4_u8 (uint8x8x4_t __tab, uint8x8_t __idx)
>  					   (int8x16_t) __temp.val[0], 0);
>    __o = __builtin_aarch64_set_qregoiv16qi (__o,
>  					   (int8x16_t) __temp.val[1], 1);
> -  __result = (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx);
> -  return __result;
> +  return (uint8x8_t)__builtin_aarch64_qtbl2v8qi (__o, (int8x8_t)__idx);
>  }
>  
>  __extension__ extern __inline poly8x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vtbl4_p8 (poly8x8x4_t __tab, uint8x8_t __idx)
>  {
> -  poly8x8_t __result;
>    poly8x16x2_t __temp;
>    __builtin_aarch64_simd_oi __o;
>    __temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]);
> @@ -9773,8 +9762,7 @@ vtbl4_p8 (poly8x8x4_t __tab, uint8x8_t __idx)
>  					   (int8x16_t) __temp.val[0], 0);
>    __o = __builtin_aarch64_set_qregoiv16qi (__o,
>  					   (int8x16_t) __temp.val[1], 1);
> -  __result = (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx);
> -  return __result;
> +  return(poly8x8_t)__builtin_aarch64_qtbl2v8qi (__o, (int8x8_t)__idx);
>  }
>  
>  __extension__ extern __inline int8x8_t
> @@ -9782,7 +9770,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vtbx2_s8 (int8x8_t __r, int8x8x2_t __tab, int8x8_t __idx)
>  {
>    int8x16_t __temp = vcombine_s8 (__tab.val[0], __tab.val[1]);
> -  return __builtin_aarch64_tbx1v8qi (__r, __temp, __idx);
> +  return __builtin_aarch64_qtbx1v8qi (__r, __temp, __idx);
>  }
>  
>  __extension__ extern __inline uint8x8_t
> @@ -9790,7 +9778,7 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vtbx2_u8 (uint8x8_t __r, uint8x8x2_t __tab, uint8x8_t __idx)
>  {
>    uint8x16_t __temp = vcombine_u8 (__tab.val[0], __tab.val[1]);
> -  return __builtin_aarch64_tbx1v8qi_uuuu (__r, __temp, __idx);
> +  return __builtin_aarch64_qtbx1v8qi_uuuu (__r, __temp, __idx);
>  }
>  
>  __extension__ extern __inline poly8x8_t
> @@ -9798,9 +9786,9 @@ __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vtbx2_p8 (poly8x8_t __r, poly8x8x2_t __tab, uint8x8_t __idx)
>  {
>    poly8x16_t __temp = vcombine_p8 (__tab.val[0], __tab.val[1]);
> -  return (poly8x8_t) __builtin_aarch64_tbx1v8qi ((int8x8_t) __r,
> -						 (int8x16_t) __temp,
> -						 (int8x8_t) __idx);
> +  return (poly8x8_t) __builtin_aarch64_qtbx1v8qi ((int8x8_t) __r,
> +						  (int8x16_t) __temp,
> +						  (int8x8_t) __idx);
>  }
>  
>  /* End of temporary inline asm.  */
> @@ -23335,7 +23323,7 @@ vqtbl2_s8 (int8x16x2_t __tab, uint8x8_t __idx)
>    __builtin_aarch64_simd_oi __o;
>    __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[0], 0);
>    __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[1], 1);
> -  return __builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx);
> +  return __builtin_aarch64_qtbl2v8qi (__o, (int8x8_t)__idx);
>  }
>  
>  __extension__ extern __inline uint8x8_t
> @@ -23345,7 +23333,7 @@ vqtbl2_u8 (uint8x16x2_t __tab, uint8x8_t __idx)
>    __builtin_aarch64_simd_oi __o;
>    __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
>    __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
> -  return (uint8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx);
> +  return (uint8x8_t)__builtin_aarch64_qtbl2v8qi (__o, (int8x8_t)__idx);
>  }
>  
>  __extension__ extern __inline poly8x8_t
> @@ -23355,7 +23343,7 @@ vqtbl2_p8 (poly8x16x2_t __tab, uint8x8_t __idx)
>    __builtin_aarch64_simd_oi __o;
>    __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
>    __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
> -  return (poly8x8_t)__builtin_aarch64_tbl3v8qi (__o, (int8x8_t)__idx);
> +  return (poly8x8_t)__builtin_aarch64_qtbl2v8qi (__o, (int8x8_t)__idx);
>  }
>  
>  __extension__ extern __inline int8x16_t
> @@ -23365,7 +23353,7 @@ vqtbl2q_s8 (int8x16x2_t __tab, uint8x16_t __idx)
>    __builtin_aarch64_simd_oi __o;
>    __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
>    __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
> -  return __builtin_aarch64_tbl3v16qi (__o, (int8x16_t)__idx);
> +  return __builtin_aarch64_qtbl2v16qi (__o, (int8x16_t)__idx);
>  }
>  
>  __extension__ extern __inline uint8x16_t
> @@ -23375,7 +23363,7 @@ vqtbl2q_u8 (uint8x16x2_t __tab, uint8x16_t __idx)
>    __builtin_aarch64_simd_oi __o;
>    __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
>    __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
> -  return (uint8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)__idx);
> +  return (uint8x16_t)__builtin_aarch64_qtbl2v16qi (__o, (int8x16_t)__idx);
>  }
>  
>  __extension__ extern __inline poly8x16_t
> @@ -23385,7 +23373,7 @@ vqtbl2q_p8 (poly8x16x2_t __tab, uint8x16_t __idx)
>    __builtin_aarch64_simd_oi __o;
>    __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
>    __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
> -  return (poly8x16_t)__builtin_aarch64_tbl3v16qi (__o, (int8x16_t)__idx);
> +  return (poly8x16_t)__builtin_aarch64_qtbl2v16qi (__o, (int8x16_t)__idx);
>  }
>  
>  /* vqtbl3 */
> @@ -23539,7 +23527,7 @@ vqtbx2_s8 (int8x8_t __r, int8x16x2_t __tab, uint8x8_t __idx)
>    __builtin_aarch64_simd_oi __o;
>    __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[0], 0);
>    __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[1], 1);
> -  return __builtin_aarch64_tbx4v8qi (__r, __o, (int8x8_t)__idx);
> +  return __builtin_aarch64_qtbx2v8qi (__r, __o, (int8x8_t)__idx);
>  }
>  
>  __extension__ extern __inline uint8x8_t
> @@ -23549,8 +23537,8 @@ vqtbx2_u8 (uint8x8_t __r, uint8x16x2_t __tab, uint8x8_t __idx)
>    __builtin_aarch64_simd_oi __o;
>    __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
>    __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
> -  return (uint8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o,
> -						(int8x8_t)__idx);
> +  return (uint8x8_t)__builtin_aarch64_qtbx2v8qi ((int8x8_t)__r, __o,
> +						 (int8x8_t)__idx);
>  }
>  
>  __extension__ extern __inline poly8x8_t
> @@ -23560,8 +23548,8 @@ vqtbx2_p8 (poly8x8_t __r, poly8x16x2_t __tab, uint8x8_t __idx)
>    __builtin_aarch64_simd_oi __o;
>    __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
>    __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
> -  return (poly8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o,
> -						(int8x8_t)__idx);
> +  return (poly8x8_t)__builtin_aarch64_qtbx2v8qi ((int8x8_t)__r, __o,
> +						 (int8x8_t)__idx);
>  }
>  
>  __extension__ extern __inline int8x16_t
> @@ -23571,7 +23559,7 @@ vqtbx2q_s8 (int8x16_t __r, int8x16x2_t __tab, uint8x16_t __idx)
>    __builtin_aarch64_simd_oi __o;
>    __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[0], 0);
>    __o = __builtin_aarch64_set_qregoiv16qi (__o, __tab.val[1], 1);
> -  return __builtin_aarch64_tbx4v16qi (__r, __o, (int8x16_t)__idx);
> +  return __builtin_aarch64_qtbx2v16qi (__r, __o, (int8x16_t)__idx);
>  }
>  
>  __extension__ extern __inline uint8x16_t
> @@ -23581,7 +23569,7 @@ vqtbx2q_u8 (uint8x16_t __r, uint8x16x2_t __tab, uint8x16_t __idx)
>    __builtin_aarch64_simd_oi __o;
>    __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
>    __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
> -  return (uint8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)__r, __o,
> +  return (uint8x16_t)__builtin_aarch64_qtbx2v16qi ((int8x16_t)__r, __o,
>  						  (int8x16_t)__idx);
>  }
>  
> @@ -23592,8 +23580,8 @@ vqtbx2q_p8 (poly8x16_t __r, poly8x16x2_t __tab, uint8x16_t __idx)
>    __builtin_aarch64_simd_oi __o;
>    __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[0], 0);
>    __o = __builtin_aarch64_set_qregoiv16qi (__o, (int8x16_t)__tab.val[1], 1);
> -  return (poly8x16_t)__builtin_aarch64_tbx4v16qi ((int8x16_t)__r, __o,
> -						  (int8x16_t)__idx);
> +  return (poly8x16_t)__builtin_aarch64_qtbx2v16qi ((int8x16_t)__r, __o,
> +						   (int8x16_t)__idx);
>  }
>  
>  /* vqtbx3 */
> @@ -28511,7 +28499,6 @@ __extension__ extern __inline int8x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vtbx4_s8 (int8x8_t __r, int8x8x4_t __tab, int8x8_t __idx)
>  {
> -  int8x8_t __result;
>    int8x16x2_t __temp;
>    __builtin_aarch64_simd_oi __o;
>    __temp.val[0] = vcombine_s8 (__tab.val[0], __tab.val[1]);
> @@ -28520,15 +28507,13 @@ vtbx4_s8 (int8x8_t __r, int8x8x4_t __tab, int8x8_t __idx)
>  					   (int8x16_t) __temp.val[0], 0);
>    __o = __builtin_aarch64_set_qregoiv16qi (__o,
>  					   (int8x16_t) __temp.val[1], 1);
> -  __result = __builtin_aarch64_tbx4v8qi (__r, __o, __idx);
> -  return __result;
> +  return __builtin_aarch64_qtbx2v8qi (__r, __o, __idx);
>  }
>  
>  __extension__ extern __inline uint8x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vtbx4_u8 (uint8x8_t __r, uint8x8x4_t __tab, uint8x8_t __idx)
>  {
> -  uint8x8_t __result;
>    uint8x16x2_t __temp;
>    __builtin_aarch64_simd_oi __o;
>    __temp.val[0] = vcombine_u8 (__tab.val[0], __tab.val[1]);
> @@ -28537,16 +28522,14 @@ vtbx4_u8 (uint8x8_t __r, uint8x8x4_t __tab, uint8x8_t __idx)
>  					   (int8x16_t) __temp.val[0], 0);
>    __o = __builtin_aarch64_set_qregoiv16qi (__o,
>  					   (int8x16_t) __temp.val[1], 1);
> -  __result = (uint8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o,
> -						  (int8x8_t)__idx);
> -  return __result;
> +  return (uint8x8_t)__builtin_aarch64_qtbx2v8qi ((int8x8_t)__r, __o,
> +						 (int8x8_t)__idx);
>  }
>  
>  __extension__ extern __inline poly8x8_t
>  __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
>  vtbx4_p8 (poly8x8_t __r, poly8x8x4_t __tab, uint8x8_t __idx)
>  {
> -  poly8x8_t __result;
>    poly8x16x2_t __temp;
>    __builtin_aarch64_simd_oi __o;
>    __temp.val[0] = vcombine_p8 (__tab.val[0], __tab.val[1]);
> @@ -28555,9 +28538,8 @@ vtbx4_p8 (poly8x8_t __r, poly8x8x4_t __tab, uint8x8_t __idx)
>  					   (int8x16_t) __temp.val[0], 0);
>    __o = __builtin_aarch64_set_qregoiv16qi (__o,
>  					   (int8x16_t) __temp.val[1], 1);
> -  __result = (poly8x8_t)__builtin_aarch64_tbx4v8qi ((int8x8_t)__r, __o,
> -						  (int8x8_t)__idx);
> -  return __result;
> +  return (poly8x8_t)__builtin_aarch64_qtbx2v8qi ((int8x8_t)__r, __o,
> +						 (int8x8_t)__idx);
>  }
>  
>  /* vtrn */

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2021-07-20  6:53 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-19 16:43 [PATCH] aarch64: Refactor TBL/TBX RTL patterns Jonathan Wright
2021-07-20  6:53 ` Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).