public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [SVE ACLE] Fix a typo and dot_prod_optab interface
@ 2019-01-25 12:45 Richard Sandiford
  0 siblings, 0 replies; only message in thread
From: Richard Sandiford @ 2019-01-25 12:45 UTC (permalink / raw)
  To: gcc-patches; +Cc: Kugan Vivekanandarajah, Prathamesh Kulkarni

[-- Attachment #1: Type: text/plain, Size: 217 bytes --]

I've applied the patches below to aarch64/sve-acle-branch.  The first
fixes the modes and oerand order of the dot_prod patterns, and the second
fixes a typo in one of the vector duplicate patterns.

Thanks,
Richard



[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: dot-prod-modes.patch --]
[-- Type: text/x-diff, Size: 6134 bytes --]

[SVE ACLE] Fix sdot/udot modes

This patch makes the dot_prod patterns have the mode and operand
order expected by the optabs.  I'll leave proper use of the patterns
in autovec for later, since it's not really ACLE work.


diff --git a/gcc/config/aarch64/aarch64-sve-builtins.c b/gcc/config/aarch64/aarch64-sve-builtins.c
index 0e3db669422..ed06db9b7c6 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.c
+++ b/gcc/config/aarch64/aarch64-sve-builtins.c
@@ -501,7 +501,7 @@ private:
   rtx expand_signed_pred_op (rtx_code, rtx_code, int,
 			     unsigned int = DEFAULT_MERGE_ARGNO);
   rtx expand_signed_pred_op (int, int, int);
-  rtx expand_via_unpred_direct_optab (optab);
+  rtx expand_via_unpred_direct_optab (optab, machine_mode = VOIDmode);
   rtx expand_via_unpred_insn (insn_code);
   rtx expand_via_pred_direct_optab (optab, unsigned int = DEFAULT_MERGE_ARGNO);
   rtx expand_via_sel_insn (insn_code);
@@ -512,9 +512,11 @@ private:
 
   void require_immediate_range (unsigned int, HOST_WIDE_INT, HOST_WIDE_INT);
 
+  void rotate_inputs_left (unsigned int, unsigned int);
   bool try_negating_argument (unsigned int, machine_mode);
 
   machine_mode get_mode (unsigned int);
+  machine_mode get_quarter_mode (unsigned int);
   machine_mode get_pred_mode (unsigned int);
   rtx get_fallback_value (machine_mode, unsigned int,
 			  unsigned int, unsigned int &);
@@ -2127,10 +2129,14 @@ function_expander::expand_div (bool reversed_p)
 rtx
 function_expander::expand_dot ()
 {
+  /* In the optab, the multiplication operands come before the
+     accumulator operand.  */
+  rotate_inputs_left (0, 3);
+  machine_mode mode = get_quarter_mode (0);
   if (type_suffixes[m_fi.types[0]].unsigned_p)
-    return expand_via_unpred_direct_optab (udot_prod_optab);
+    return expand_via_unpred_direct_optab (udot_prod_optab, mode);
   else
-    return expand_via_unpred_direct_optab (sdot_prod_optab);
+    return expand_via_unpred_direct_optab (sdot_prod_optab, mode);
 }
 
 /* Expand a call to svdup.  */
@@ -2264,10 +2270,7 @@ function_expander::expand_msb (unsigned int merge_argno)
 rtx
 function_expander::expand_mla ()
 {
-  rtx t = m_args[1];
-  m_args[1] = m_args[2];
-  m_args[2] = m_args[3];
-  m_args[3] = t;
+  rotate_inputs_left (1, 4);
   return expand_mad (3);
 }
 
@@ -2282,10 +2285,7 @@ function_expander::expand_mla ()
 rtx
 function_expander::expand_mls ()
 {
-  rtx t = m_args[1];
-  m_args[1] = m_args[2];
-  m_args[2] = m_args[3];
-  m_args[3] = t;
+  rotate_inputs_left (1, 4);
   return expand_msb (3);
 }
 
@@ -2401,11 +2401,14 @@ function_expander::expand_sub (bool reversed_p)
 }
 
 /* Implement the call using optab OP, which is an unpredicated direct
-   (i.e. single-mode) optab.  */
+   (i.e. single-mode) optab.  MODE is the mode of the operation,
+   or VOIDmode if the mode associated with type suffix 0 is correct.  */
 rtx
-function_expander::expand_via_unpred_direct_optab (optab op)
+function_expander::expand_via_unpred_direct_optab (optab op,
+						   machine_mode mode)
 {
-  machine_mode mode = get_mode (0);
+  if (mode == VOIDmode)
+    mode = get_mode (0);
   insn_code icode = direct_optab_handler (op, mode);
   return expand_via_unpred_insn (icode);
 }
@@ -2664,6 +2667,17 @@ function_expander::require_immediate_range (unsigned int argno,
   m_args[argno] = GEN_INT (min);
 }
 
+/* Rotate inputs m_args[START:END] one position to the left, so that
+   m_args[START] becomes m_args[END - 1].  */
+void
+function_expander::rotate_inputs_left (unsigned int start, unsigned int end)
+{
+  rtx new_last = m_args[start];
+  for (unsigned int i = start; i < end - 1; ++i)
+    m_args[i] = m_args[i + 1];
+  m_args[end - 1] = new_last;
+}
+
 /* Return true if argument I is a constant argument that can be negated
    at compile time, replacing it with the negated value if so.  MODE is the
    associated vector mode, but the argument could be a single element.  */
@@ -2689,6 +2703,14 @@ function_expander::get_mode (unsigned int i)
   return TYPE_MODE (m_fi.vector_type (i));
 }
 
+/* Return the vector mode for elements that are a quarter the size of integer
+   type suffix I.  */
+machine_mode
+function_expander::get_quarter_mode (unsigned int i)
+{
+  return TYPE_MODE (m_fi.quarter_vector_type (i));
+}
+
 /* Return the predicate mode associated with type suffix I.  */
 machine_mode
 function_expander::get_pred_mode (unsigned int i)
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index 47a94882ea2..b08029085b5 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -3904,15 +3904,15 @@
 )
 
 ;; Unpredicated DOT product.
-(define_insn "<sur>dot_prod<mode>"
+(define_insn "<sur>dot_prod<vsi2qi>"
   [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
-	(plus:SVE_SDI (unspec:SVE_SDI [(match_operand:<VSI2QI> 2 "register_operand" "w, w")
-				       (match_operand:<VSI2QI> 3 "register_operand" "w, w")]
+	(plus:SVE_SDI (unspec:SVE_SDI [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
+				       (match_operand:<VSI2QI> 2 "register_operand" "w, w")]
 		       DOTPROD)
-	(match_operand:SVE_SDI 1 "register_operand" "0, w")))]
+	(match_operand:SVE_SDI 3 "register_operand" "0, w")))]
   "TARGET_SVE"
   "@
-   <sur>dot\\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>
-   movprfx\t%0, %1\;<sur>dot\\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>"
+   <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>
+   movprfx\t%0, %3\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>"
   [(set_attr "movprfx" "*,yes")]
 )
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 5baa9d53448..3067bea8a33 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1032,7 +1032,8 @@
 		      (V2SF "p") (V4SF  "v")
 		      (V4HF "v") (V8HF  "v")])
 
-(define_mode_attr vsi2qi [(V2SI "v8qi") (V4SI "v16qi")])
+(define_mode_attr vsi2qi [(V2SI "v8qi") (V4SI "v16qi")
+			  (VNx4SI "vnx16qi") (VNx2DI "vnx8hi")])
 (define_mode_attr VSI2QI [(V2SI "V8QI") (V4SI "V16QI")
 			  (VNx4SI "VNx16QI") (VNx2DI "VNx8HI")])
 

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #3: vcond-mask-pattersn.patch --]
[-- Type: text/x-diff, Size: 32370 bytes --]

[SVE ACLE] Fix @vcond_mask_<mode><vpred> patterns

This started as a fix for a typo in @vcond_mask_<mode><vpred>:

   movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<Vetype>1

Operand 1 here is a vector, not a scalar, so it should be referenced
using %1.<Vetype> rather than %<Vetype>1.  This was causing several
execution failures on the branch.

But vector MOV /M is an alias of SEL, which doesn't take MOVPRFX,
so just changing the operand syntax wouldn't give a valid alternative.
The patch therefore drops the alternative instead.  To keep the pre-RA
instruction selection tighter, we now need to exclude the old alternative
using instruction conditions.

Also, I'd originally thought Dn was the right choice of constraint
for the float immediate patterns, but that includes DUPM constants too,
which can't be predicated.  The patch therefore tightens the predicates
and constraints to only accept things accepted by CPY and FCPY, and makes
sure that "movprfx" is only set to "yes" for alternatives that need it.


diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index ba3fa071e8a..a758b23ae86 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -410,7 +410,7 @@ char *aarch64_output_sve_inc_dec_immediate (const char *, rtx);
 char *aarch64_output_scalar_simd_mov_immediate (rtx, scalar_int_mode);
 char *aarch64_output_simd_mov_immediate (rtx, unsigned,
 			enum simd_immediate_check w = AARCH64_CHECK_MOV);
-char *aarch64_output_sve_mov_immediate (rtx, int = -1, bool = false);
+char *aarch64_output_sve_mov_immediate (rtx);
 bool aarch64_pad_reg_upward (machine_mode, const_tree, bool);
 bool aarch64_regno_ok_for_base_p (int, bool);
 bool aarch64_regno_ok_for_index_p (int, bool);
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index b08029085b5..bd635645050 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -1903,37 +1903,39 @@
 ;; vcond_mask operand order: true, false, mask
 ;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
 ;; SEL operand order:        mask, true, false
-(define_insn "@vcond_mask_<mode><vpred>"
-  [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w, w, ?&w")
-	(unspec:SVE_I
-	  [(match_operand:<VPRED> 3 "register_operand" "Upa, Upa, Upl, Upa, Upl")
-	   (match_operand:SVE_I 1 "aarch64_sve_dup_reg_or_imm" "w, vss, w, vss, vss")
-	   (match_operand:SVE_I 2 "aarch64_simd_reg_or_zero" "w, 0, Dz, Dz, w")]
+(define_expand "@vcond_mask_<mode><vpred>"
+  [(set (match_operand:SVE_ALL 0 "register_operand")
+	(unspec:SVE_ALL
+	  [(match_operand:<VPRED> 3 "register_operand")
+	   (match_operand:SVE_ALL 1 "aarch64_sve_dup_reg_or_imm")
+	   (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")]
 	  UNSPEC_SEL))]
   "TARGET_SVE"
-  "@
-   sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>
-   mov\t%0.<Vetype>, %3/m, #%1
-   movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %1.<Vetype>
-   mov\t%0.<Vetype>, %3/z, #%1
-   movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, #%1"
-  [(set_attr "movprfx" "*,*,yes,*,yes")]
+  {
+    if (register_operand (operands[1], <MODE>mode))
+      operands[2] = force_reg (<MODE>mode, operands[2]);
+  }
 )
 
-(define_insn "@vcond_mask_<mode><vpred>"
-  [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w, ?&w")
-	(unspec:SVE_F
-	  [(match_operand:<VPRED> 3 "register_operand" "Upa, Upl, Upl, Upl")
-	   (match_operand:SVE_F 1 "aarch64_nonmemory_operand" "w, Dn, w, Dn")
-	   (match_operand:SVE_F 2 "aarch64_simd_reg_or_zero" "w, 0, Dz, Dz")]
+(define_insn "*vcond_mask_<mode><vpred>"
+  [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w, w, ?w, ?&w, ?&w")
+	(unspec:SVE_ALL
+	  [(match_operand:<VPRED> 3 "register_operand" "Upa, Upa, Upa, Upa, Upl, Upl, Upl")
+	   (match_operand:SVE_ALL 1 "aarch64_sve_dup_reg_or_imm" "w, vss, vss, Ufc, Ufc, vss, Ufc")
+	   (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero" "w, 0, Dz, 0, Dz, w, w")]
 	  UNSPEC_SEL))]
-  "TARGET_SVE"
+  "TARGET_SVE
+   && (!register_operand (operands[1], <MODE>mode)
+       || register_operand (operands[2], <MODE>mode))"
   "@
    sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>
-   * return aarch64_output_sve_mov_immediate (operands[1], 3, true);
-   movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<Vetype>1
-   * return aarch64_output_sve_mov_immediate (operands[1], 3, false);"
-  [(set_attr "movprfx" "*,yes,yes,yes")]
+   mov\t%0.<Vetype>, %3/m, #%I1
+   mov\t%0.<Vetype>, %3/z, #%I1
+   fmov\t%0.<Vetype>, %3/m, #%1
+   movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;fmov\t%0.<Vetype>, %3/m, #%1
+   movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, #%I1
+   movprfx\t%0, %2\;fmov\t%0.<Vetype>, %3/m, #%1"
+  [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")]
 )
 
 (define_insn "@aarch64_sel_dup<mode>"
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 02cb1af3246..b786850d4db 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1171,6 +1171,16 @@ aarch64_dbx_register_number (unsigned regno)
    return DWARF_FRAME_REGISTERS;
 }
 
+/* If X is a CONST_DOUBLE, return its bit representation as a constant
+   integer, otherwise return X unmodified.  */
+static rtx
+aarch64_bit_representation (rtx x)
+{
+  if (CONST_DOUBLE_P (x))
+    x = gen_lowpart (int_mode_for_mode (GET_MODE (x)).require (), x);
+  return x;
+}
+
 /* Return true if MODE is any of the Advanced SIMD structure modes.  */
 static bool
 aarch64_advsimd_struct_mode_p (machine_mode mode)
@@ -6562,7 +6572,8 @@ aarch64_print_vector_float_operand (FILE *f, rtx x, bool negate)
   if (negate)
     r = real_value_negate (&r);
 
-  /* We only handle the SVE single-bit immediates here.  */
+  /* Handle the SVE single-bit immediates specially, since they have a
+     fixed form in the assembly syntax.  */
   if (real_equal (&r, &dconst0))
     asm_fprintf (f, "0.0");
   else if (real_equal (&r, &dconst2))
@@ -6572,7 +6583,13 @@ aarch64_print_vector_float_operand (FILE *f, rtx x, bool negate)
   else if (real_equal (&r, &dconsthalf))
     asm_fprintf (f, "0.5");
   else
-    return false;
+    {
+      const int buf_size = 20;
+      char float_buf[buf_size] = {'\0'};
+      real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size,
+				1, GET_MODE (elt));
+      asm_fprintf (f, "%s", float_buf);
+    }
 
   return true;
 }
@@ -6601,6 +6618,11 @@ sizetochar (int size)
 			and print it as an unsigned integer, in decimal.
      'e':		Print the sign/zero-extend size as a character 8->b,
 			16->h, 32->w.
+     'I':		If the operand is a duplicated vector constant,
+			replace it with the duplicated scalar.  If the
+			operand is then a floating-point constant, replace
+			it with the integer bit representation.  Print the
+			transformed constant as a signed decimal number.
      'p':		Prints N such that 2^N == X (X must be power of 2 and
 			const int).
      'P':		Print the number of non-zero bits in X (a const_int).
@@ -6727,6 +6749,19 @@ aarch64_print_operand (FILE *f, rtx x, int code)
       asm_fprintf (f, "%s", reg_names [REGNO (x) + 1]);
       break;
 
+    case 'I':
+      {
+	x = aarch64_bit_representation (unwrap_const_vec_duplicate (x));
+	if (CONST_INT_P (x))
+	  asm_fprintf (f, "%wd", INTVAL (x));
+	else
+	  {
+	    output_operand_lossage ("invalid operand for '%%%c'", code);
+	    return;
+	  }
+	break;
+      }
+
     case 'M':
     case 'm':
       {
@@ -13028,13 +13063,11 @@ aarch64_sve_bitmask_immediate_p (rtx x)
 bool
 aarch64_sve_dup_immediate_p (rtx x)
 {
-  rtx elt;
-
-  if (!const_vec_duplicate_p (x, &elt)
-      || !CONST_INT_P (elt))
+  x = aarch64_bit_representation (unwrap_const_vec_duplicate (x));
+  if (!CONST_INT_P (x))
     return false;
 
-  HOST_WIDE_INT val = INTVAL (elt);
+  HOST_WIDE_INT val = INTVAL (x);
   if (val & 0xff)
     return IN_RANGE (val, -0x80, 0x7f);
   return IN_RANGE (val, -0x8000, 0x7f00);
@@ -14676,6 +14709,7 @@ aarch64_float_const_representable_p (rtx x)
   REAL_VALUE_TYPE r, m;
   bool fail;
 
+  x = unwrap_const_vec_duplicate (x);
   if (!CONST_DOUBLE_P (x))
     return false;
 
@@ -14852,15 +14886,12 @@ aarch64_output_scalar_simd_mov_immediate (rtx immediate, scalar_int_mode mode)
 }
 
 /* Return the output string to use for moving immediate CONST_VECTOR
-   into an SVE register.  If the move is predicated, PRED_REG is the
-   number of the operand that contains the predicate register,
-   otherwise it is -1.  MERGE_P is true if a predicated move should
-   use merge predication rather than zero predication.  */
+   into an SVE register.  */
 
 char *
-aarch64_output_sve_mov_immediate (rtx const_vector, int pred_reg, bool merge_p)
+aarch64_output_sve_mov_immediate (rtx const_vector)
 {
-  static char templ[60];
+  static char templ[40];
   struct simd_immediate_info info;
   char element_char;
 
@@ -14904,40 +14935,14 @@ aarch64_output_sve_mov_immediate (rtx const_vector, int pred_reg, bool merge_p)
 				    CONST_DOUBLE_REAL_VALUE (info.value),
 				    buf_size, buf_size, 1, info.elt_mode);
 
-	  if (pred_reg == -1)
-	    snprintf (templ, sizeof (templ), "fmov\t%%0.%c, #%s",
-		      element_char, float_buf);
-	  else
-	    {
-	      if (merge_p)
-		snprintf (templ, sizeof (templ), "fmov\t%%0.%c, %%%d/m, #%s",
-			  element_char,
-			  pred_reg,
-			  float_buf);
-	      else
-		snprintf (templ, sizeof (templ),
-			  "movprfx\t%%0.%c, %%%d/z, %%0.%c\n"
-			  "\tfmov\t%%0.%c, %%%d/m, #%s",
-			  element_char,
-			  pred_reg,
-			  element_char,
-			  element_char,
-			  pred_reg,
-			  float_buf);
-	    }
+	  snprintf (templ, sizeof (templ), "fmov\t%%0.%c, #%s",
+		    element_char, float_buf);
 	  return templ;
 	}
     }
 
-  if (pred_reg == -1)
-    snprintf (templ, sizeof (templ), "mov\t%%0.%c, #" HOST_WIDE_INT_PRINT_DEC,
-	      element_char, INTVAL (info.value));
-  else
-    snprintf (templ, sizeof (templ),
-	      "mov\t%%0.%c, %%%d%s, #" HOST_WIDE_INT_PRINT_DEC,
-	      element_char,
-	      pred_reg, merge_p ? "/m" : "/z",
-	      INTVAL (info.value));
+  snprintf (templ, sizeof (templ), "mov\t%%0.%c, #" HOST_WIDE_INT_PRINT_DEC,
+	    element_char, INTVAL (info.value));
   return templ;
 }
 
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index f4660d872cb..f2639537caa 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -278,7 +278,7 @@
 (define_constraint "Ufc"
   "A floating point constant which can be used with an\
    FMOV immediate operation."
-  (and (match_code "const_double")
+  (and (match_code "const_double,const_vector")
        (match_test "aarch64_float_const_representable_p (op)")))
 
 (define_constraint "Uvi"
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index fa7085a59cd..3c644409869 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -543,7 +543,8 @@
 
 (define_predicate "aarch64_sve_dup_immediate"
   (and (match_code "const,const_vector")
-       (match_test "aarch64_sve_dup_immediate_p (op)")))
+       (ior (match_test "aarch64_sve_dup_immediate_p (op)")
+	    (match_test "aarch64_float_const_representable_p (op)"))))
 
 (define_predicate "aarch64_sve_dup_reg_or_imm"
   (ior (match_operand 0 "register_operand")
diff --git a/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_f16.c b/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_f16.c
index 1e5c43a622b..6acbf9b71b1 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_f16.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_f16.c
@@ -70,7 +70,7 @@ TEST_UNIFORM_ZS (dup_h0_f16, svfloat16_t, __fp16,
 
 /*
 ** dup_1_f16_m:
-**	fmov	z0\.h, p0/m, #1.0(e\+0)?
+**	mov	z0\.h, p0/m, #15360
 **	ret
 */
 TEST_UNIFORM_Z (dup_1_f16_m, svfloat16_t,
@@ -88,7 +88,7 @@ TEST_UNIFORM_Z (dup_0_f16_m, svfloat16_t,
 
 /*
 ** dup_8_f16_m:
-**	fmov	z0\.h, p0/m, #8.0(e\+0)?
+**	mov	z0\.h, p0/m, #18432
 **	ret
 */
 TEST_UNIFORM_Z (dup_8_f16_m, svfloat16_t,
@@ -132,8 +132,7 @@ TEST_UNIFORM_ZS (dup_h0_f16_m, svfloat16_t, __fp16,
 
 /*
 ** dup_1_f16_z:
-**	movprfx	z0\.h, p0/z, z0\.h
-**	fmov	z0\.h, p0/m, #1.0(e\+0)?
+**	mov	z0\.h, p0/z, #15360
 **	ret
 */
 TEST_UNIFORM_Z (dup_1_f16_z, svfloat16_t,
@@ -151,8 +150,7 @@ TEST_UNIFORM_Z (dup_0_f16_z, svfloat16_t,
 
 /*
 ** dup_8_f16_z:
-**	movprfx	z0\.h, p0/z, z0\.h
-**	fmov	z0\.h, p0/m, #8.0(e\+0)?
+**	mov	z0\.h, p0/z, #18432
 **	ret
 */
 TEST_UNIFORM_Z (dup_8_f16_z, svfloat16_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_s16.c b/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_s16.c
index 6a5af81ed3a..cd082cdea8f 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_s16.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_s16.c
@@ -631,9 +631,9 @@ TEST_UNIFORM_Z (dup_127_s16_z, svint16_t,
 
 /*
 ** dup_128_s16_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.h), #128
-**	movprfx	z0\.h, p0/z, z0\.h
-**	mov	z0\.h, p0/m, \1
+**	sel	z0\.h, p0, \2, \1\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_128_s16_z, svint16_t,
@@ -652,9 +652,9 @@ TEST_UNIFORM_Z (dup_253_s16_z, svint16_t,
 
 /*
 ** dup_254_s16_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.h), #254
-**	movprfx	z0\.h, p0/z, z0\.h
-**	mov	z0\.h, p0/m, \1
+**	sel	z0\.h, p0, \2, \1\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_254_s16_z, svint16_t,
@@ -663,9 +663,9 @@ TEST_UNIFORM_Z (dup_254_s16_z, svint16_t,
 
 /*
 ** dup_255_s16_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.h), #255
-**	movprfx	z0\.h, p0/z, z0\.h
-**	mov	z0\.h, p0/m, \1
+**	sel	z0\.h, p0, \2, \1\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_255_s16_z, svint16_t,
@@ -683,9 +683,9 @@ TEST_UNIFORM_Z (dup_256_s16_z, svint16_t,
 
 /*
 ** dup_257_s16_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+)\.b, #1
-**	movprfx	z0\.h, p0/z, z0\.h
-**	mov	z0\.h, p0/m, \1\.h
+**	sel	z0\.h, p0, \2\.h, \1\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_257_s16_z, svint16_t,
@@ -722,9 +722,9 @@ TEST_UNIFORM_Z (dup_7ffd_s16_z, svint16_t,
 
 /*
 ** dup_7ffe_s16_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.h), #32766
-**	movprfx	z0\.h, p0/z, z0\.h
-**	mov	z0\.h, p0/m, \1
+**	sel	z0\.h, p0, \2, \1\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_7ffe_s16_z, svint16_t,
@@ -733,9 +733,9 @@ TEST_UNIFORM_Z (dup_7ffe_s16_z, svint16_t,
 
 /*
 ** dup_7fff_s16_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.h), #32767
-**	movprfx	z0\.h, p0/z, z0\.h
-**	mov	z0\.h, p0/m, \1
+**	sel	z0\.h, p0, \2, \1\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_7fff_s16_z, svint16_t,
@@ -762,9 +762,9 @@ TEST_UNIFORM_Z (dup_m128_s16_z, svint16_t,
 
 /*
 ** dup_m129_s16_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.h), #-129
-**	movprfx	z0\.h, p0/z, z0\.h
-**	mov	z0\.h, p0/m, \1
+**	sel	z0\.h, p0, \2, \1\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_m129_s16_z, svint16_t,
@@ -783,9 +783,9 @@ TEST_UNIFORM_Z (dup_m254_s16_z, svint16_t,
 
 /*
 ** dup_m255_s16_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.h), #-255
-**	movprfx	z0\.h, p0/z, z0\.h
-**	mov	z0\.h, p0/m, \1
+**	sel	z0\.h, p0, \2, \1\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_m255_s16_z, svint16_t,
@@ -803,9 +803,9 @@ TEST_UNIFORM_Z (dup_m256_s16_z, svint16_t,
 
 /*
 ** dup_m257_s16_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.h), #-257
-**	movprfx	z0\.h, p0/z, z0\.h
-**	mov	z0\.h, p0/m, \1
+**	sel	z0\.h, p0, \2, \1\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_m257_s16_z, svint16_t,
@@ -814,9 +814,9 @@ TEST_UNIFORM_Z (dup_m257_s16_z, svint16_t,
 
 /*
 ** dup_m258_s16_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+)\.b, #-2
-**	movprfx	z0\.h, p0/z, z0\.h
-**	mov	z0\.h, p0/m, \1\.h
+**	sel	z0\.h, p0, \2\.h, \1\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_m258_s16_z, svint16_t,
@@ -848,9 +848,9 @@ TEST_UNIFORM_Z (dup_m7f00_s16_z, svint16_t,
 
 /*
 ** dup_m7f01_s16_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.h), #-32513
-**	movprfx	z0\.h, p0/z, z0\.h
-**	mov	z0\.h, p0/m, \1
+**	sel	z0\.h, p0, \2, \1\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_m7f01_s16_z, svint16_t,
@@ -869,9 +869,9 @@ TEST_UNIFORM_Z (dup_m7ffe_s16_z, svint16_t,
 
 /*
 ** dup_m7fff_s16_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.h), #-32767
-**	movprfx	z0\.h, p0/z, z0\.h
-**	mov	z0\.h, p0/m, \1
+**	sel	z0\.h, p0, \2, \1\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_m7fff_s16_z, svint16_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_s32.c b/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_s32.c
index 90c5a3a04e1..5951f60de5a 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_s32.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_s32.c
@@ -623,9 +623,9 @@ TEST_UNIFORM_Z (dup_127_s32_z, svint32_t,
 
 /*
 ** dup_128_s32_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.s), #128
-**	movprfx	z0\.s, p0/z, z0\.s
-**	mov	z0\.s, p0/m, \1
+**	sel	z0\.s, p0, \2, \1\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_128_s32_z, svint32_t,
@@ -644,9 +644,9 @@ TEST_UNIFORM_Z (dup_253_s32_z, svint32_t,
 
 /*
 ** dup_254_s32_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.s), #254
-**	movprfx	z0\.s, p0/z, z0\.s
-**	mov	z0\.s, p0/m, \1
+**	sel	z0\.s, p0, \2, \1\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_254_s32_z, svint32_t,
@@ -655,9 +655,9 @@ TEST_UNIFORM_Z (dup_254_s32_z, svint32_t,
 
 /*
 ** dup_255_s32_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.s), #255
-**	movprfx	z0\.s, p0/z, z0\.s
-**	mov	z0\.s, p0/m, \1
+**	sel	z0\.s, p0, \2, \1\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_255_s32_z, svint32_t,
@@ -708,9 +708,9 @@ TEST_UNIFORM_Z (dup_7ffd_s32_z, svint32_t,
 
 /*
 ** dup_7ffe_s32_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.s), #32766
-**	movprfx	z0\.s, p0/z, z0\.s
-**	mov	z0\.s, p0/m, \1
+**	sel	z0\.s, p0, \2, \1\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_7ffe_s32_z, svint32_t,
@@ -719,9 +719,9 @@ TEST_UNIFORM_Z (dup_7ffe_s32_z, svint32_t,
 
 /*
 ** dup_7fff_s32_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.s), #32767
-**	movprfx	z0\.s, p0/z, z0\.s
-**	mov	z0\.s, p0/m, \1
+**	sel	z0\.s, p0, \2, \1\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_7fff_s32_z, svint32_t,
@@ -748,9 +748,9 @@ TEST_UNIFORM_Z (dup_m128_s32_z, svint32_t,
 
 /*
 ** dup_m129_s32_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.s), #-129
-**	movprfx	z0\.s, p0/z, z0\.s
-**	mov	z0\.s, p0/m, \1
+**	sel	z0\.s, p0, \2, \1\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_m129_s32_z, svint32_t,
@@ -769,9 +769,9 @@ TEST_UNIFORM_Z (dup_m254_s32_z, svint32_t,
 
 /*
 ** dup_m255_s32_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.s), #-255
-**	movprfx	z0\.s, p0/z, z0\.s
-**	mov	z0\.s, p0/m, \1
+**	sel	z0\.s, p0, \2, \1\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_m255_s32_z, svint32_t,
@@ -789,9 +789,9 @@ TEST_UNIFORM_Z (dup_m256_s32_z, svint32_t,
 
 /*
 ** dup_m257_s32_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.s), #-257
-**	movprfx	z0\.s, p0/z, z0\.s
-**	mov	z0\.s, p0/m, \1
+**	sel	z0\.s, p0, \2, \1\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_m257_s32_z, svint32_t,
@@ -828,9 +828,9 @@ TEST_UNIFORM_Z (dup_m7f00_s32_z, svint32_t,
 
 /*
 ** dup_m7f01_s32_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.s), #-32513
-**	movprfx	z0\.s, p0/z, z0\.s
-**	mov	z0\.s, p0/m, \1
+**	sel	z0\.s, p0, \2, \1\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_m7f01_s32_z, svint32_t,
@@ -849,9 +849,9 @@ TEST_UNIFORM_Z (dup_m7ffe_s32_z, svint32_t,
 
 /*
 ** dup_m7fff_s32_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.s), #-32767
-**	movprfx	z0\.s, p0/z, z0\.s
-**	mov	z0\.s, p0/m, \1
+**	sel	z0\.s, p0, \2, \1\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_m7fff_s32_z, svint32_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_s64.c b/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_s64.c
index 2c694e9d87e..4104c34f1ee 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_s64.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_s64.c
@@ -623,9 +623,9 @@ TEST_UNIFORM_Z (dup_127_s64_z, svint64_t,
 
 /*
 ** dup_128_s64_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.d), #128
-**	movprfx	z0\.d, p0/z, z0\.d
-**	mov	z0\.d, p0/m, \1
+**	sel	z0\.d, p0, \2, \1\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_128_s64_z, svint64_t,
@@ -644,9 +644,9 @@ TEST_UNIFORM_Z (dup_253_s64_z, svint64_t,
 
 /*
 ** dup_254_s64_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.d), #254
-**	movprfx	z0\.d, p0/z, z0\.d
-**	mov	z0\.d, p0/m, \1
+**	sel	z0\.d, p0, \2, \1\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_254_s64_z, svint64_t,
@@ -655,9 +655,9 @@ TEST_UNIFORM_Z (dup_254_s64_z, svint64_t,
 
 /*
 ** dup_255_s64_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.d), #255
-**	movprfx	z0\.d, p0/z, z0\.d
-**	mov	z0\.d, p0/m, \1
+**	sel	z0\.d, p0, \2, \1\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_255_s64_z, svint64_t,
@@ -708,9 +708,9 @@ TEST_UNIFORM_Z (dup_7ffd_s64_z, svint64_t,
 
 /*
 ** dup_7ffe_s64_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.d), #32766
-**	movprfx	z0\.d, p0/z, z0\.d
-**	mov	z0\.d, p0/m, \1
+**	sel	z0\.d, p0, \2, \1\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_7ffe_s64_z, svint64_t,
@@ -719,9 +719,9 @@ TEST_UNIFORM_Z (dup_7ffe_s64_z, svint64_t,
 
 /*
 ** dup_7fff_s64_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.d), #32767
-**	movprfx	z0\.d, p0/z, z0\.d
-**	mov	z0\.d, p0/m, \1
+**	sel	z0\.d, p0, \2, \1\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_7fff_s64_z, svint64_t,
@@ -748,9 +748,9 @@ TEST_UNIFORM_Z (dup_m128_s64_z, svint64_t,
 
 /*
 ** dup_m129_s64_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.d), #-129
-**	movprfx	z0\.d, p0/z, z0\.d
-**	mov	z0\.d, p0/m, \1
+**	sel	z0\.d, p0, \2, \1\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_m129_s64_z, svint64_t,
@@ -769,9 +769,9 @@ TEST_UNIFORM_Z (dup_m254_s64_z, svint64_t,
 
 /*
 ** dup_m255_s64_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.d), #-255
-**	movprfx	z0\.d, p0/z, z0\.d
-**	mov	z0\.d, p0/m, \1
+**	sel	z0\.d, p0, \2, \1\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_m255_s64_z, svint64_t,
@@ -789,9 +789,9 @@ TEST_UNIFORM_Z (dup_m256_s64_z, svint64_t,
 
 /*
 ** dup_m257_s64_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.d), #-257
-**	movprfx	z0\.d, p0/z, z0\.d
-**	mov	z0\.d, p0/m, \1
+**	sel	z0\.d, p0, \2, \1\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_m257_s64_z, svint64_t,
@@ -828,9 +828,9 @@ TEST_UNIFORM_Z (dup_m7f00_s64_z, svint64_t,
 
 /*
 ** dup_m7f01_s64_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.d), #-32513
-**	movprfx	z0\.d, p0/z, z0\.d
-**	mov	z0\.d, p0/m, \1
+**	sel	z0\.d, p0, \2, \1\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_m7f01_s64_z, svint64_t,
@@ -849,9 +849,9 @@ TEST_UNIFORM_Z (dup_m7ffe_s64_z, svint64_t,
 
 /*
 ** dup_m7fff_s64_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.d), #-32767
-**	movprfx	z0\.d, p0/z, z0\.d
-**	mov	z0\.d, p0/m, \1
+**	sel	z0\.d, p0, \2, \1\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_m7fff_s64_z, svint64_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_u16.c b/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_u16.c
index 9871ac2fdbf..d8ec1aa2c02 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_u16.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_u16.c
@@ -631,9 +631,9 @@ TEST_UNIFORM_Z (dup_127_u16_z, svuint16_t,
 
 /*
 ** dup_128_u16_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.h), #128
-**	movprfx	z0\.h, p0/z, z0\.h
-**	mov	z0\.h, p0/m, \1
+**	sel	z0\.h, p0, \2, \1\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_128_u16_z, svuint16_t,
@@ -652,9 +652,9 @@ TEST_UNIFORM_Z (dup_253_u16_z, svuint16_t,
 
 /*
 ** dup_254_u16_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.h), #254
-**	movprfx	z0\.h, p0/z, z0\.h
-**	mov	z0\.h, p0/m, \1
+**	sel	z0\.h, p0, \2, \1\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_254_u16_z, svuint16_t,
@@ -663,9 +663,9 @@ TEST_UNIFORM_Z (dup_254_u16_z, svuint16_t,
 
 /*
 ** dup_255_u16_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.h), #255
-**	movprfx	z0\.h, p0/z, z0\.h
-**	mov	z0\.h, p0/m, \1
+**	sel	z0\.h, p0, \2, \1\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_255_u16_z, svuint16_t,
@@ -683,9 +683,9 @@ TEST_UNIFORM_Z (dup_256_u16_z, svuint16_t,
 
 /*
 ** dup_257_u16_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+)\.b, #1
-**	movprfx	z0\.h, p0/z, z0\.h
-**	mov	z0\.h, p0/m, \1\.h
+**	sel	z0\.h, p0, \2\.h, \1\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_257_u16_z, svuint16_t,
@@ -722,9 +722,9 @@ TEST_UNIFORM_Z (dup_7ffd_u16_z, svuint16_t,
 
 /*
 ** dup_7ffe_u16_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.h), #32766
-**	movprfx	z0\.h, p0/z, z0\.h
-**	mov	z0\.h, p0/m, \1
+**	sel	z0\.h, p0, \2, \1\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_7ffe_u16_z, svuint16_t,
@@ -733,9 +733,9 @@ TEST_UNIFORM_Z (dup_7ffe_u16_z, svuint16_t,
 
 /*
 ** dup_7fff_u16_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.h), #32767
-**	movprfx	z0\.h, p0/z, z0\.h
-**	mov	z0\.h, p0/m, \1
+**	sel	z0\.h, p0, \2, \1\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_7fff_u16_z, svuint16_t,
@@ -762,9 +762,9 @@ TEST_UNIFORM_Z (dup_m128_u16_z, svuint16_t,
 
 /*
 ** dup_m129_u16_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.h), #-129
-**	movprfx	z0\.h, p0/z, z0\.h
-**	mov	z0\.h, p0/m, \1
+**	sel	z0\.h, p0, \2, \1\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_m129_u16_z, svuint16_t,
@@ -783,9 +783,9 @@ TEST_UNIFORM_Z (dup_m254_u16_z, svuint16_t,
 
 /*
 ** dup_m255_u16_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.h), #-255
-**	movprfx	z0\.h, p0/z, z0\.h
-**	mov	z0\.h, p0/m, \1
+**	sel	z0\.h, p0, \2, \1\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_m255_u16_z, svuint16_t,
@@ -803,9 +803,9 @@ TEST_UNIFORM_Z (dup_m256_u16_z, svuint16_t,
 
 /*
 ** dup_m257_u16_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.h), #-257
-**	movprfx	z0\.h, p0/z, z0\.h
-**	mov	z0\.h, p0/m, \1
+**	sel	z0\.h, p0, \2, \1\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_m257_u16_z, svuint16_t,
@@ -814,9 +814,9 @@ TEST_UNIFORM_Z (dup_m257_u16_z, svuint16_t,
 
 /*
 ** dup_m258_u16_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+)\.b, #-2
-**	movprfx	z0\.h, p0/z, z0\.h
-**	mov	z0\.h, p0/m, \1\.h
+**	sel	z0\.h, p0, \2\.h, \1\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_m258_u16_z, svuint16_t,
@@ -848,9 +848,9 @@ TEST_UNIFORM_Z (dup_m7f00_u16_z, svuint16_t,
 
 /*
 ** dup_m7f01_u16_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.h), #-32513
-**	movprfx	z0\.h, p0/z, z0\.h
-**	mov	z0\.h, p0/m, \1
+**	sel	z0\.h, p0, \2, \1\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_m7f01_u16_z, svuint16_t,
@@ -869,9 +869,9 @@ TEST_UNIFORM_Z (dup_m7ffe_u16_z, svuint16_t,
 
 /*
 ** dup_m7fff_u16_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.h), #-32767
-**	movprfx	z0\.h, p0/z, z0\.h
-**	mov	z0\.h, p0/m, \1
+**	sel	z0\.h, p0, \2, \1\.h
 **	ret
 */
 TEST_UNIFORM_Z (dup_m7fff_u16_z, svuint16_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_u32.c b/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_u32.c
index 1fd5be9ccbd..e673b5f16c3 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_u32.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_u32.c
@@ -623,9 +623,9 @@ TEST_UNIFORM_Z (dup_127_u32_z, svuint32_t,
 
 /*
 ** dup_128_u32_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.s), #128
-**	movprfx	z0\.s, p0/z, z0\.s
-**	mov	z0\.s, p0/m, \1
+**	sel	z0\.s, p0, \2, \1\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_128_u32_z, svuint32_t,
@@ -644,9 +644,9 @@ TEST_UNIFORM_Z (dup_253_u32_z, svuint32_t,
 
 /*
 ** dup_254_u32_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.s), #254
-**	movprfx	z0\.s, p0/z, z0\.s
-**	mov	z0\.s, p0/m, \1
+**	sel	z0\.s, p0, \2, \1\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_254_u32_z, svuint32_t,
@@ -655,9 +655,9 @@ TEST_UNIFORM_Z (dup_254_u32_z, svuint32_t,
 
 /*
 ** dup_255_u32_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.s), #255
-**	movprfx	z0\.s, p0/z, z0\.s
-**	mov	z0\.s, p0/m, \1
+**	sel	z0\.s, p0, \2, \1\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_255_u32_z, svuint32_t,
@@ -708,9 +708,9 @@ TEST_UNIFORM_Z (dup_7ffd_u32_z, svuint32_t,
 
 /*
 ** dup_7ffe_u32_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.s), #32766
-**	movprfx	z0\.s, p0/z, z0\.s
-**	mov	z0\.s, p0/m, \1
+**	sel	z0\.s, p0, \2, \1\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_7ffe_u32_z, svuint32_t,
@@ -719,9 +719,9 @@ TEST_UNIFORM_Z (dup_7ffe_u32_z, svuint32_t,
 
 /*
 ** dup_7fff_u32_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.s), #32767
-**	movprfx	z0\.s, p0/z, z0\.s
-**	mov	z0\.s, p0/m, \1
+**	sel	z0\.s, p0, \2, \1\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_7fff_u32_z, svuint32_t,
@@ -748,9 +748,9 @@ TEST_UNIFORM_Z (dup_m128_u32_z, svuint32_t,
 
 /*
 ** dup_m129_u32_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.s), #-129
-**	movprfx	z0\.s, p0/z, z0\.s
-**	mov	z0\.s, p0/m, \1
+**	sel	z0\.s, p0, \2, \1\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_m129_u32_z, svuint32_t,
@@ -769,9 +769,9 @@ TEST_UNIFORM_Z (dup_m254_u32_z, svuint32_t,
 
 /*
 ** dup_m255_u32_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.s), #-255
-**	movprfx	z0\.s, p0/z, z0\.s
-**	mov	z0\.s, p0/m, \1
+**	sel	z0\.s, p0, \2, \1\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_m255_u32_z, svuint32_t,
@@ -789,9 +789,9 @@ TEST_UNIFORM_Z (dup_m256_u32_z, svuint32_t,
 
 /*
 ** dup_m257_u32_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.s), #-257
-**	movprfx	z0\.s, p0/z, z0\.s
-**	mov	z0\.s, p0/m, \1
+**	sel	z0\.s, p0, \2, \1\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_m257_u32_z, svuint32_t,
@@ -828,9 +828,9 @@ TEST_UNIFORM_Z (dup_m7f00_u32_z, svuint32_t,
 
 /*
 ** dup_m7f01_u32_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.s), #-32513
-**	movprfx	z0\.s, p0/z, z0\.s
-**	mov	z0\.s, p0/m, \1
+**	sel	z0\.s, p0, \2, \1\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_m7f01_u32_z, svuint32_t,
@@ -849,9 +849,9 @@ TEST_UNIFORM_Z (dup_m7ffe_u32_z, svuint32_t,
 
 /*
 ** dup_m7fff_u32_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.s), #-32767
-**	movprfx	z0\.s, p0/z, z0\.s
-**	mov	z0\.s, p0/m, \1
+**	sel	z0\.s, p0, \2, \1\.s
 **	ret
 */
 TEST_UNIFORM_Z (dup_m7fff_u32_z, svuint32_t,
diff --git a/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_u64.c b/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_u64.c
index afac1b0ea91..0baa183b3d3 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_u64.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve-acle/asm/dup_u64.c
@@ -623,9 +623,9 @@ TEST_UNIFORM_Z (dup_127_u64_z, svuint64_t,
 
 /*
 ** dup_128_u64_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.d), #128
-**	movprfx	z0\.d, p0/z, z0\.d
-**	mov	z0\.d, p0/m, \1
+**	sel	z0\.d, p0, \2, \1\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_128_u64_z, svuint64_t,
@@ -644,9 +644,9 @@ TEST_UNIFORM_Z (dup_253_u64_z, svuint64_t,
 
 /*
 ** dup_254_u64_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.d), #254
-**	movprfx	z0\.d, p0/z, z0\.d
-**	mov	z0\.d, p0/m, \1
+**	sel	z0\.d, p0, \2, \1\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_254_u64_z, svuint64_t,
@@ -655,9 +655,9 @@ TEST_UNIFORM_Z (dup_254_u64_z, svuint64_t,
 
 /*
 ** dup_255_u64_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.d), #255
-**	movprfx	z0\.d, p0/z, z0\.d
-**	mov	z0\.d, p0/m, \1
+**	sel	z0\.d, p0, \2, \1\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_255_u64_z, svuint64_t,
@@ -708,9 +708,9 @@ TEST_UNIFORM_Z (dup_7ffd_u64_z, svuint64_t,
 
 /*
 ** dup_7ffe_u64_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.d), #32766
-**	movprfx	z0\.d, p0/z, z0\.d
-**	mov	z0\.d, p0/m, \1
+**	sel	z0\.d, p0, \2, \1\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_7ffe_u64_z, svuint64_t,
@@ -719,9 +719,9 @@ TEST_UNIFORM_Z (dup_7ffe_u64_z, svuint64_t,
 
 /*
 ** dup_7fff_u64_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.d), #32767
-**	movprfx	z0\.d, p0/z, z0\.d
-**	mov	z0\.d, p0/m, \1
+**	sel	z0\.d, p0, \2, \1\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_7fff_u64_z, svuint64_t,
@@ -748,9 +748,9 @@ TEST_UNIFORM_Z (dup_m128_u64_z, svuint64_t,
 
 /*
 ** dup_m129_u64_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.d), #-129
-**	movprfx	z0\.d, p0/z, z0\.d
-**	mov	z0\.d, p0/m, \1
+**	sel	z0\.d, p0, \2, \1\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_m129_u64_z, svuint64_t,
@@ -769,9 +769,9 @@ TEST_UNIFORM_Z (dup_m254_u64_z, svuint64_t,
 
 /*
 ** dup_m255_u64_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.d), #-255
-**	movprfx	z0\.d, p0/z, z0\.d
-**	mov	z0\.d, p0/m, \1
+**	sel	z0\.d, p0, \2, \1\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_m255_u64_z, svuint64_t,
@@ -789,9 +789,9 @@ TEST_UNIFORM_Z (dup_m256_u64_z, svuint64_t,
 
 /*
 ** dup_m257_u64_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.d), #-257
-**	movprfx	z0\.d, p0/z, z0\.d
-**	mov	z0\.d, p0/m, \1
+**	sel	z0\.d, p0, \2, \1\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_m257_u64_z, svuint64_t,
@@ -828,9 +828,9 @@ TEST_UNIFORM_Z (dup_m7f00_u64_z, svuint64_t,
 
 /*
 ** dup_m7f01_u64_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.d), #-32513
-**	movprfx	z0\.d, p0/z, z0\.d
-**	mov	z0\.d, p0/m, \1
+**	sel	z0\.d, p0, \2, \1\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_m7f01_u64_z, svuint64_t,
@@ -849,9 +849,9 @@ TEST_UNIFORM_Z (dup_m7ffe_u64_z, svuint64_t,
 
 /*
 ** dup_m7fff_u64_z:
+**	mov	(z[0-9]+)\.b, #0
 **	mov	(z[0-9]+\.d), #-32767
-**	movprfx	z0\.d, p0/z, z0\.d
-**	mov	z0\.d, p0/m, \1
+**	sel	z0\.d, p0, \2, \1\.d
 **	ret
 */
 TEST_UNIFORM_Z (dup_m7fff_u64_z, svuint64_t,

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2019-01-25 12:36 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-01-25 12:45 [SVE ACLE] Fix a typo and dot_prod_optab interface Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).