[PATCH] AArch64: Use LDP/STP for large struct types

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH] AArch64: Use LDP/STP for large struct types
@ 2024-05-15 10:13 Wilco Dijkstra
  2024-05-16 19:31 ` Richard Sandiford
  0 siblings, 1 reply; 4+ messages in thread
From: Wilco Dijkstra @ 2024-05-15 10:13 UTC (permalink / raw)
  To: GCC Patches; +Cc: Richard Sandiford, Richard Earnshaw

Use LDP/STP for large struct types as they have useful immediate offsets and are typically faster.
This removes differences between little and big endian and allows use of LDP/STP without UNSPEC.

Passes regress and bootstrap, OK for commit?

gcc:
        * config/aarch64/aarch64.cc (aarch64_classify_address): Treat SIMD structs identically
        in little and bigendian.
        * config/aarch64/aarch64.md (aarch64_mov<mode>): Remove VSTRUCT instructions.
        (aarch64_be_mov<mode>): Allow little-endian, rename to aarch64_mov<mode>.
        (aarch64_be_movoi): Allow little-endian, rename to aarch64_movoi.
        (aarch64_be_movci): Allow little-endian, rename to aarch64_movci.
        (aarch64_be_movxi): Allow little-endian, rename to aarch64_movxi.
        Remove big-endian special case in define_split variants.

gcc/testsuite:
        * gcc.target/aarch64/torture/simd-abi-8.c: Update to check for LDP/STP.

---

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 16b7445d9f72f77a98ab262e21fd24e6cc97eba0..bb8b6963fd5117be82afe6ccd7154ae5302c3691 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -7917,32 +7917,6 @@
   [(set_attr "type" "neon_store1_4reg<q>")]
 )
 
-(define_insn "*aarch64_mov<mode>"
-  [(set (match_operand:VSTRUCT_QD 0 "aarch64_simd_nonimmediate_operand")
-	(match_operand:VSTRUCT_QD 1 "aarch64_simd_general_operand"))]
-  "TARGET_SIMD && !BYTES_BIG_ENDIAN
-   && (register_operand (operands[0], <MODE>mode)
-       || register_operand (operands[1], <MODE>mode))"
-  {@ [ cons: =0 , 1   ; attrs: type                    , length        ]
-     [ w        , w   ; multiple                       , <insn_count>  ] #
-     [ Utv      , w   ; neon_store<nregs>_<nregs>reg_q , 4             ] st1\t{%S1.<Vtype> - %<Vendreg>1.<Vtype>}, %0
-     [ w        , Utv ; neon_load<nregs>_<nregs>reg_q  , 4             ] ld1\t{%S0.<Vtype> - %<Vendreg>0.<Vtype>}, %1
-  }
-)
-
-(define_insn "*aarch64_mov<mode>"
-  [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand")
-	(match_operand:VSTRUCT 1 "aarch64_simd_general_operand"))]
-  "TARGET_SIMD && !BYTES_BIG_ENDIAN
-   && (register_operand (operands[0], <MODE>mode)
-       || register_operand (operands[1], <MODE>mode))"
-  {@ [ cons: =0 , 1   ; attrs: type                    , length        ]
-     [ w        , w   ; multiple                       , <insn_count>  ] #
-     [ Utv      , w   ; neon_store<nregs>_<nregs>reg_q , 4             ] st1\t{%S1.16b - %<Vendreg>1.16b}, %0
-     [ w        , Utv ; neon_load<nregs>_<nregs>reg_q  , 4             ] ld1\t{%S0.16b - %<Vendreg>0.16b}, %1
-  }
-)
-
 (define_insn "*aarch64_movv8di"
   [(set (match_operand:V8DI 0 "nonimmediate_operand" "=r,m,r")
 	(match_operand:V8DI 1 "general_operand" " r,r,m"))]
@@ -7972,11 +7946,10 @@
   [(set_attr "type" "neon_store1_1reg<q>")]
 )
 
-(define_insn "*aarch64_be_mov<mode>"
+(define_insn "*aarch64_mov<mode>"
   [(set (match_operand:VSTRUCT_2D 0 "nonimmediate_operand")
 	(match_operand:VSTRUCT_2D 1 "general_operand"))]
   "TARGET_FLOAT
-   && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
    && (register_operand (operands[0], <MODE>mode)
        || register_operand (operands[1], <MODE>mode))"
   {@ [ cons: =0 , 1 ; attrs: type , length ]
@@ -7986,11 +7959,10 @@
   }
 )
 
-(define_insn "*aarch64_be_mov<mode>"
+(define_insn "*aarch64_mov<mode>"
   [(set (match_operand:VSTRUCT_2Q 0 "nonimmediate_operand")
 	(match_operand:VSTRUCT_2Q 1 "general_operand"))]
   "TARGET_FLOAT
-   && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
    && (register_operand (operands[0], <MODE>mode)
        || register_operand (operands[1], <MODE>mode))"
   {@ [ cons: =0 , 1 ; attrs: type , arch , length ]
@@ -8000,11 +7972,10 @@
   }
 )
 
-(define_insn "*aarch64_be_movoi"
+(define_insn "*aarch64_movoi"
   [(set (match_operand:OI 0 "nonimmediate_operand")
 	(match_operand:OI 1 "general_operand"))]
   "TARGET_FLOAT
-   && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
    && (register_operand (operands[0], OImode)
        || register_operand (operands[1], OImode))"
   {@ [ cons: =0 , 1 ; attrs: type , arch , length ]
@@ -8014,11 +7985,10 @@
   }
 )
 
-(define_insn "*aarch64_be_mov<mode>"
+(define_insn "*aarch64_mov<mode>"
   [(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand" "=w,o,w")
 	(match_operand:VSTRUCT_3QD 1 "general_operand"      " w,w,o"))]
   "TARGET_FLOAT
-   && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
    && (register_operand (operands[0], <MODE>mode)
        || register_operand (operands[1], <MODE>mode))"
   "#"
@@ -8027,11 +7997,10 @@
    (set_attr "length" "12,8,8")]
 )
 
-(define_insn "*aarch64_be_movci"
+(define_insn "*aarch64_movci"
   [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
 	(match_operand:CI 1 "general_operand"      " w,w,o"))]
   "TARGET_FLOAT
-   && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
    && (register_operand (operands[0], CImode)
        || register_operand (operands[1], CImode))"
   "#"
@@ -8040,11 +8009,10 @@
    (set_attr "length" "12,8,8")]
 )
 
-(define_insn "*aarch64_be_mov<mode>"
+(define_insn "*aarch64_mov<mode>"
   [(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand" "=w,o,w")
 	(match_operand:VSTRUCT_4QD 1 "general_operand"      " w,w,o"))]
   "TARGET_FLOAT
-   && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
    && (register_operand (operands[0], <MODE>mode)
        || register_operand (operands[1], <MODE>mode))"
   "#"
@@ -8053,11 +8021,10 @@
    (set_attr "length" "16,8,8")]
 )
 
-(define_insn "*aarch64_be_movxi"
+(define_insn "*aarch64_movxi"
   [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
 	(match_operand:XI 1 "general_operand"      " w,w,o"))]
   "TARGET_FLOAT
-   && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
    && (register_operand (operands[0], XImode)
        || register_operand (operands[1], XImode))"
   "#"
@@ -8094,11 +8061,8 @@
 {
   if (register_operand (operands[0], <MODE>mode)
       && register_operand (operands[1], <MODE>mode))
-    {
-      aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 3);
-      DONE;
-    }
-  else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
+    aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 3);
+  else
     {
       int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
       machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
@@ -8116,10 +8080,8 @@
 							operands[1],
 							<MODE>mode,
 							2 * elt_size)));
-      DONE;
     }
-  else
-    FAIL;
+  DONE;
 })
 
 (define_split
@@ -8130,11 +8092,8 @@
 {
   if (register_operand (operands[0], CImode)
       && register_operand (operands[1], CImode))
-    {
-      aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
-      DONE;
-    }
-  else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
+    aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
+  else
     {
       emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
 		      simplify_gen_subreg (OImode, operands[1], CImode, 0));
@@ -8144,10 +8103,8 @@
 		      gen_lowpart (V16QImode,
 				   simplify_gen_subreg (TImode, operands[1],
 							CImode, 32)));
-      DONE;
     }
-  else
-    FAIL;
+  DONE;
 })
 
 (define_split
@@ -8158,11 +8115,8 @@
 {
   if (register_operand (operands[0], <MODE>mode)
       && register_operand (operands[1], <MODE>mode))
-    {
-      aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 4);
-      DONE;
-    }
-  else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
+    aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 4);
+  else
     {
       int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
       machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
@@ -8174,10 +8128,8 @@
 					   <MODE>mode, 2 * elt_size),
 		      simplify_gen_subreg (pair_mode, operands[1],
 					   <MODE>mode, 2 * elt_size));
-      DONE;
     }
-  else
-    FAIL;
+  DONE;
 })
 
 (define_split
@@ -8188,20 +8140,15 @@
 {
   if (register_operand (operands[0], XImode)
       && register_operand (operands[1], XImode))
-    {
-      aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
-      DONE;
-    }
-  else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
+    aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
+  else
     {
       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
 		      simplify_gen_subreg (OImode, operands[1], XImode, 0));
       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
 		      simplify_gen_subreg (OImode, operands[1], XImode, 32));
-      DONE;
     }
-  else
-    FAIL;
+  DONE;
 })
 
 (define_split
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 8751d8741b8888a7c4ff24b2cd1fb3848c3f8e06..284e0a8cfa55b5d5e2dfac459fd3eb59acf6cb75 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -10439,7 +10439,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
   unsigned int vec_flags = aarch64_classify_vector_mode (mode);
   vec_flags &= ~VEC_PARTIAL;
 
-  /* On BE, we use load/store pair for all large int mode load/stores.
+  /* We use load/store pair for all large int mode load/stores.
      TI/TF/TDmode may also use a load/store pair.  */
   bool advsimd_struct_p = (vec_flags == (VEC_ADVSIMD | VEC_STRUCT));
   bool load_store_pair_p = (type == ADDR_QUERY_LDP_STP
@@ -10447,8 +10447,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
 			    || mode == TImode
 			    || mode == TFmode
 			    || mode == TDmode
-			    || ((!TARGET_SIMD || BYTES_BIG_ENDIAN)
-				&& advsimd_struct_p));
+			    || advsimd_struct_p);
   /* If we are dealing with ADDR_QUERY_LDP_STP_N that means the incoming mode
      corresponds to the actual size of the memory being loaded/stored and the
      mode of the corresponding addressing mode is half of that.  */
@@ -10482,14 +10481,6 @@ aarch64_classify_address (struct aarch64_address_info *info,
       && (code != REG && code != PLUS))
     return false;
 
-  /* On LE, for AdvSIMD, don't support anything other than POST_INC or
-     REG addressing.  */
-  if (advsimd_struct_p
-      && TARGET_SIMD
-      && !BYTES_BIG_ENDIAN
-      && (code != POST_INC && code != REG))
-    return false;
-
   gcc_checking_assert (GET_MODE (x) == VOIDmode
 		       || SCALAR_INT_MODE_P (GET_MODE (x)));
 
diff --git a/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-8.c b/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-8.c
index 2b278caaa4e5815869cf35e88c121d79a7b1d647..e8871c7f22134de8ced14a5a8d9669c33efda4f8 100644
--- a/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-8.c
+++ b/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-8.c
@@ -17,7 +17,5 @@ g (int64x2x4_t *ptr)
   *ptr = save;
 }
 
-/* { dg-final { scan-assembler-times {\tld1\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tst1\t} 1 } } */
-/* { dg-final { scan-assembler-not {\tld[pr]\tq} } } */
-/* { dg-final { scan-assembler-not {\tst[pr]\tq} } } */
+/* { dg-final { scan-assembler {\tld[pr]\tq} } } */
+/* { dg-final { scan-assembler {\tst[pr]\tq} } } */


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] AArch64: Use LDP/STP for large struct types
  2024-05-15 10:13 [PATCH] AArch64: Use LDP/STP for large struct types Wilco Dijkstra
@ 2024-05-16 19:31 ` Richard Sandiford
  2024-05-16 19:41   ` Richard Sandiford
  0 siblings, 1 reply; 4+ messages in thread
From: Richard Sandiford @ 2024-05-16 19:31 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: GCC Patches, Richard Earnshaw

Wilco Dijkstra <Wilco.Dijkstra@arm.com> writes:
> Use LDP/STP for large struct types as they have useful immediate offsets and are typically faster.
> This removes differences between little and big endian and allows use of LDP/STP without UNSPEC.
>
> Passes regress and bootstrap, OK for commit?
>
> gcc:
>         * config/aarch64/aarch64.cc (aarch64_classify_address): Treat SIMD structs identically
>         in little and bigendian.
>         * config/aarch64/aarch64.md (aarch64_mov<mode>): Remove VSTRUCT instructions.
>         (aarch64_be_mov<mode>): Allow little-endian, rename to aarch64_mov<mode>.
>         (aarch64_be_movoi): Allow little-endian, rename to aarch64_movoi.
>         (aarch64_be_movci): Allow little-endian, rename to aarch64_movci.
>         (aarch64_be_movxi): Allow little-endian, rename to aarch64_movxi.
>         Remove big-endian special case in define_split variants.
>
> gcc/testsuite:
>         * gcc.target/aarch64/torture/simd-abi-8.c: Update to check for LDP/STP.

I'm nervous about approving the removal of something that was deliberately
added by the initial commits. :)  But, even ignoring the extra offset range,
using LDP/STP makes strong intuitive sense for 2-register modes.  And for
3- and 4-registers modes, it's not surprising if the split that the
patch performs is (at worst) equivalent to what the hardware would do
itself or (at best) something that the hardware handles slightly better.

It's also a significant clean-up.

My only concern is that the main uses of these modes are for LD[234] and
ST[234].  By imposing the LD1/ST1 restrictions, the current little-endian
definition of "m" also corresponds to what LD[234] and ST[234] expect.
This in turn means that ivopts will optimise induction variable selection
to account for the fact that LD[234] and ST[234] do not support offsets.

I think the effect of the patch will be to make ivopts optimise LD[234]
and ST[234] on the assumption that they have the same range as LDP/STP.
We could avoid that if we

(1) Keep:

> @@ -10482,14 +10481,6 @@ aarch64_classify_address (struct aarch64_address_info *info,
>        && (code != REG && code != PLUS))
>      return false;
>  
> -  /* On LE, for AdvSIMD, don't support anything other than POST_INC or
> -     REG addressing.  */
> -  if (advsimd_struct_p
> -      && TARGET_SIMD
> -      && !BYTES_BIG_ENDIAN
> -      && (code != POST_INC && code != REG))
> -    return false;
> -
>    gcc_checking_assert (GET_MODE (x) == VOIDmode
>  		       || SCALAR_INT_MODE_P (GET_MODE (x)));
>  

but drop the !BYTES_BIG_ENDIAN condition.

(2) Make Ump a defined_relaxed_memory_constraint (so that it accepts
more than "m" does).

(3) Use Ump instead of "o" in the move patterns.

Of course, this might make pure gimple-level data-shuffling worse.
I suppose it could also make RTL passes handle your memcpy use case
more pessimistically, although I'm not sure whether that would be for
legitimate reasons.

So another alternative would be to go with the patch as-is,
but add a new mechanism for gimple to query the valid addresses
for IFN_(MASK_)LOAD_LANES and IFN_(MASK_)STORE_LANES, rather than
relying purely on the legitimate address mechanism,.  Ideally, the new
interface would be generic enough that we could use it for target (md)
builtins as well, to better optimise ACLE code.

So the patch is OK as-is from my POV, but I think it's relatively
important that we try to fix the ivopts handling before GCC 15.

Thanks,
Richard

> ---
>
> diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
> index 16b7445d9f72f77a98ab262e21fd24e6cc97eba0..bb8b6963fd5117be82afe6ccd7154ae5302c3691 100644
> --- a/gcc/config/aarch64/aarch64-simd.md
> +++ b/gcc/config/aarch64/aarch64-simd.md
> @@ -7917,32 +7917,6 @@
>    [(set_attr "type" "neon_store1_4reg<q>")]
>  )
>  
> -(define_insn "*aarch64_mov<mode>"
> -  [(set (match_operand:VSTRUCT_QD 0 "aarch64_simd_nonimmediate_operand")
> -	(match_operand:VSTRUCT_QD 1 "aarch64_simd_general_operand"))]
> -  "TARGET_SIMD && !BYTES_BIG_ENDIAN
> -   && (register_operand (operands[0], <MODE>mode)
> -       || register_operand (operands[1], <MODE>mode))"
> -  {@ [ cons: =0 , 1   ; attrs: type                    , length        ]
> -     [ w        , w   ; multiple                       , <insn_count>  ] #
> -     [ Utv      , w   ; neon_store<nregs>_<nregs>reg_q , 4             ] st1\t{%S1.<Vtype> - %<Vendreg>1.<Vtype>}, %0
> -     [ w        , Utv ; neon_load<nregs>_<nregs>reg_q  , 4             ] ld1\t{%S0.<Vtype> - %<Vendreg>0.<Vtype>}, %1
> -  }
> -)
> -
> -(define_insn "*aarch64_mov<mode>"
> -  [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand")
> -	(match_operand:VSTRUCT 1 "aarch64_simd_general_operand"))]
> -  "TARGET_SIMD && !BYTES_BIG_ENDIAN
> -   && (register_operand (operands[0], <MODE>mode)
> -       || register_operand (operands[1], <MODE>mode))"
> -  {@ [ cons: =0 , 1   ; attrs: type                    , length        ]
> -     [ w        , w   ; multiple                       , <insn_count>  ] #
> -     [ Utv      , w   ; neon_store<nregs>_<nregs>reg_q , 4             ] st1\t{%S1.16b - %<Vendreg>1.16b}, %0
> -     [ w        , Utv ; neon_load<nregs>_<nregs>reg_q  , 4             ] ld1\t{%S0.16b - %<Vendreg>0.16b}, %1
> -  }
> -)
> -
>  (define_insn "*aarch64_movv8di"
>    [(set (match_operand:V8DI 0 "nonimmediate_operand" "=r,m,r")
>  	(match_operand:V8DI 1 "general_operand" " r,r,m"))]
> @@ -7972,11 +7946,10 @@
>    [(set_attr "type" "neon_store1_1reg<q>")]
>  )
>  
> -(define_insn "*aarch64_be_mov<mode>"
> +(define_insn "*aarch64_mov<mode>"
>    [(set (match_operand:VSTRUCT_2D 0 "nonimmediate_operand")
>  	(match_operand:VSTRUCT_2D 1 "general_operand"))]
>    "TARGET_FLOAT
> -   && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
>     && (register_operand (operands[0], <MODE>mode)
>         || register_operand (operands[1], <MODE>mode))"
>    {@ [ cons: =0 , 1 ; attrs: type , length ]
> @@ -7986,11 +7959,10 @@
>    }
>  )
>  
> -(define_insn "*aarch64_be_mov<mode>"
> +(define_insn "*aarch64_mov<mode>"
>    [(set (match_operand:VSTRUCT_2Q 0 "nonimmediate_operand")
>  	(match_operand:VSTRUCT_2Q 1 "general_operand"))]
>    "TARGET_FLOAT
> -   && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
>     && (register_operand (operands[0], <MODE>mode)
>         || register_operand (operands[1], <MODE>mode))"
>    {@ [ cons: =0 , 1 ; attrs: type , arch , length ]
> @@ -8000,11 +7972,10 @@
>    }
>  )
>  
> -(define_insn "*aarch64_be_movoi"
> +(define_insn "*aarch64_movoi"
>    [(set (match_operand:OI 0 "nonimmediate_operand")
>  	(match_operand:OI 1 "general_operand"))]
>    "TARGET_FLOAT
> -   && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
>     && (register_operand (operands[0], OImode)
>         || register_operand (operands[1], OImode))"
>    {@ [ cons: =0 , 1 ; attrs: type , arch , length ]
> @@ -8014,11 +7985,10 @@
>    }
>  )
>  
> -(define_insn "*aarch64_be_mov<mode>"
> +(define_insn "*aarch64_mov<mode>"
>    [(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand" "=w,o,w")
>  	(match_operand:VSTRUCT_3QD 1 "general_operand"      " w,w,o"))]
>    "TARGET_FLOAT
> -   && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
>     && (register_operand (operands[0], <MODE>mode)
>         || register_operand (operands[1], <MODE>mode))"
>    "#"
> @@ -8027,11 +7997,10 @@
>     (set_attr "length" "12,8,8")]
>  )
>  
> -(define_insn "*aarch64_be_movci"
> +(define_insn "*aarch64_movci"
>    [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
>  	(match_operand:CI 1 "general_operand"      " w,w,o"))]
>    "TARGET_FLOAT
> -   && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
>     && (register_operand (operands[0], CImode)
>         || register_operand (operands[1], CImode))"
>    "#"
> @@ -8040,11 +8009,10 @@
>     (set_attr "length" "12,8,8")]
>  )
>  
> -(define_insn "*aarch64_be_mov<mode>"
> +(define_insn "*aarch64_mov<mode>"
>    [(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand" "=w,o,w")
>  	(match_operand:VSTRUCT_4QD 1 "general_operand"      " w,w,o"))]
>    "TARGET_FLOAT
> -   && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
>     && (register_operand (operands[0], <MODE>mode)
>         || register_operand (operands[1], <MODE>mode))"
>    "#"
> @@ -8053,11 +8021,10 @@
>     (set_attr "length" "16,8,8")]
>  )
>  
> -(define_insn "*aarch64_be_movxi"
> +(define_insn "*aarch64_movxi"
>    [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
>  	(match_operand:XI 1 "general_operand"      " w,w,o"))]
>    "TARGET_FLOAT
> -   && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
>     && (register_operand (operands[0], XImode)
>         || register_operand (operands[1], XImode))"
>    "#"
> @@ -8094,11 +8061,8 @@
>  {
>    if (register_operand (operands[0], <MODE>mode)
>        && register_operand (operands[1], <MODE>mode))
> -    {
> -      aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 3);
> -      DONE;
> -    }
> -  else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
> +    aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 3);
> +  else
>      {
>        int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
>        machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
> @@ -8116,10 +8080,8 @@
>  							operands[1],
>  							<MODE>mode,
>  							2 * elt_size)));
> -      DONE;
>      }
> -  else
> -    FAIL;
> +  DONE;
>  })
>  
>  (define_split
> @@ -8130,11 +8092,8 @@
>  {
>    if (register_operand (operands[0], CImode)
>        && register_operand (operands[1], CImode))
> -    {
> -      aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
> -      DONE;
> -    }
> -  else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
> +    aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
> +  else
>      {
>        emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
>  		      simplify_gen_subreg (OImode, operands[1], CImode, 0));
> @@ -8144,10 +8103,8 @@
>  		      gen_lowpart (V16QImode,
>  				   simplify_gen_subreg (TImode, operands[1],
>  							CImode, 32)));
> -      DONE;
>      }
> -  else
> -    FAIL;
> +  DONE;
>  })
>  
>  (define_split
> @@ -8158,11 +8115,8 @@
>  {
>    if (register_operand (operands[0], <MODE>mode)
>        && register_operand (operands[1], <MODE>mode))
> -    {
> -      aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 4);
> -      DONE;
> -    }
> -  else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
> +    aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 4);
> +  else
>      {
>        int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
>        machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
> @@ -8174,10 +8128,8 @@
>  					   <MODE>mode, 2 * elt_size),
>  		      simplify_gen_subreg (pair_mode, operands[1],
>  					   <MODE>mode, 2 * elt_size));
> -      DONE;
>      }
> -  else
> -    FAIL;
> +  DONE;
>  })
>  
>  (define_split
> @@ -8188,20 +8140,15 @@
>  {
>    if (register_operand (operands[0], XImode)
>        && register_operand (operands[1], XImode))
> -    {
> -      aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
> -      DONE;
> -    }
> -  else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
> +    aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
> +  else
>      {
>        emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
>  		      simplify_gen_subreg (OImode, operands[1], XImode, 0));
>        emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
>  		      simplify_gen_subreg (OImode, operands[1], XImode, 32));
> -      DONE;
>      }
> -  else
> -    FAIL;
> +  DONE;
>  })
>  
>  (define_split
> diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
> index 8751d8741b8888a7c4ff24b2cd1fb3848c3f8e06..284e0a8cfa55b5d5e2dfac459fd3eb59acf6cb75 100644
> --- a/gcc/config/aarch64/aarch64.cc
> +++ b/gcc/config/aarch64/aarch64.cc
> @@ -10439,7 +10439,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
>    unsigned int vec_flags = aarch64_classify_vector_mode (mode);
>    vec_flags &= ~VEC_PARTIAL;
>  
> -  /* On BE, we use load/store pair for all large int mode load/stores.
> +  /* We use load/store pair for all large int mode load/stores.
>       TI/TF/TDmode may also use a load/store pair.  */
>    bool advsimd_struct_p = (vec_flags == (VEC_ADVSIMD | VEC_STRUCT));
>    bool load_store_pair_p = (type == ADDR_QUERY_LDP_STP
> @@ -10447,8 +10447,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
>  			    || mode == TImode
>  			    || mode == TFmode
>  			    || mode == TDmode
> -			    || ((!TARGET_SIMD || BYTES_BIG_ENDIAN)
> -				&& advsimd_struct_p));
> +			    || advsimd_struct_p);
>    /* If we are dealing with ADDR_QUERY_LDP_STP_N that means the incoming mode
>       corresponds to the actual size of the memory being loaded/stored and the
>       mode of the corresponding addressing mode is half of that.  */
> @@ -10482,14 +10481,6 @@ aarch64_classify_address (struct aarch64_address_info *info,
>        && (code != REG && code != PLUS))
>      return false;
>  
> -  /* On LE, for AdvSIMD, don't support anything other than POST_INC or
> -     REG addressing.  */
> -  if (advsimd_struct_p
> -      && TARGET_SIMD
> -      && !BYTES_BIG_ENDIAN
> -      && (code != POST_INC && code != REG))
> -    return false;
> -
>    gcc_checking_assert (GET_MODE (x) == VOIDmode
>  		       || SCALAR_INT_MODE_P (GET_MODE (x)));
>  
> diff --git a/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-8.c b/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-8.c
> index 2b278caaa4e5815869cf35e88c121d79a7b1d647..e8871c7f22134de8ced14a5a8d9669c33efda4f8 100644
> --- a/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-8.c
> +++ b/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-8.c
> @@ -17,7 +17,5 @@ g (int64x2x4_t *ptr)
>    *ptr = save;
>  }
>  
> -/* { dg-final { scan-assembler-times {\tld1\t} 1 } } */
> -/* { dg-final { scan-assembler-times {\tst1\t} 1 } } */
> -/* { dg-final { scan-assembler-not {\tld[pr]\tq} } } */
> -/* { dg-final { scan-assembler-not {\tst[pr]\tq} } } */
> +/* { dg-final { scan-assembler {\tld[pr]\tq} } } */
> +/* { dg-final { scan-assembler {\tst[pr]\tq} } } */

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] AArch64: Use LDP/STP for large struct types
  2024-05-16 19:31 ` Richard Sandiford
@ 2024-05-16 19:41   ` Richard Sandiford
  0 siblings, 0 replies; 4+ messages in thread
From: Richard Sandiford @ 2024-05-16 19:41 UTC (permalink / raw)
  To: Wilco Dijkstra; +Cc: GCC Patches, Richard Earnshaw

Richard Sandiford <richard.sandiford@arm.com> writes:
> Wilco Dijkstra <Wilco.Dijkstra@arm.com> writes:
>> Use LDP/STP for large struct types as they have useful immediate offsets and are typically faster.
>> This removes differences between little and big endian and allows use of LDP/STP without UNSPEC.
>>
>> Passes regress and bootstrap, OK for commit?
>>
>> gcc:
>>         * config/aarch64/aarch64.cc (aarch64_classify_address): Treat SIMD structs identically
>>         in little and bigendian.
>>         * config/aarch64/aarch64.md (aarch64_mov<mode>): Remove VSTRUCT instructions.
>>         (aarch64_be_mov<mode>): Allow little-endian, rename to aarch64_mov<mode>.
>>         (aarch64_be_movoi): Allow little-endian, rename to aarch64_movoi.
>>         (aarch64_be_movci): Allow little-endian, rename to aarch64_movci.
>>         (aarch64_be_movxi): Allow little-endian, rename to aarch64_movxi.
>>         Remove big-endian special case in define_split variants.
>>
>> gcc/testsuite:
>>         * gcc.target/aarch64/torture/simd-abi-8.c: Update to check for LDP/STP.
>
> [...]
> So another alternative would be to go with the patch as-is,
> but add a new mechanism for gimple to query the valid addresses
> for IFN_(MASK_)LOAD_LANES and IFN_(MASK_)STORE_LANES, rather than
> relying purely on the legitimate address mechanism,.  Ideally, the new
> interface would be generic enough that we could use it for target (md)
> builtins as well, to better optimise ACLE code.

Gah, just realised after sending that there's another potential problem.
Currently inline asms can assume that "m" will only include the LD1/ST1
range for little-endian.  We might need to consider using
TARGET_MEM_CONSTRAINT, so that we continue to present the same
interface to asms, but can use the wider range internally.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH] AArch64: Use LDP/STP for large struct types
@ 2024-05-15 10:13 Wilco Dijkstra
  0 siblings, 0 replies; 4+ messages in thread
From: Wilco Dijkstra @ 2024-05-15 10:13 UTC (permalink / raw)
  To: GCC Patches; +Cc: Richard Sandiford, Richard Earnshaw

Use LDP/STP for large struct types as they have useful immediate offsets and are typically faster.
This removes differences between little and big endian and allows use of LDP/STP without UNSPEC.

Passes regress and bootstrap, OK for commit?

gcc:
        * config/aarch64/aarch64.cc (aarch64_classify_address): Treat SIMD structs identically
        in little and bigendian.
        * config/aarch64/aarch64.md (aarch64_mov<mode>): Remove VSTRUCT instructions.
        (aarch64_be_mov<mode>): Allow little-endian, rename to aarch64_mov<mode>.
        (aarch64_be_movoi): Allow little-endian, rename to aarch64_movoi.
        (aarch64_be_movci): Allow little-endian, rename to aarch64_movci.
        (aarch64_be_movxi): Allow little-endian, rename to aarch64_movxi.
        Remove big-endian special case in define_split variants.

gcc/testsuite:
        * gcc.target/aarch64/torture/simd-abi-8.c: Update to check for LDP/STP.

---

diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 16b7445d9f72f77a98ab262e21fd24e6cc97eba0..bb8b6963fd5117be82afe6ccd7154ae5302c3691 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -7917,32 +7917,6 @@
   [(set_attr "type" "neon_store1_4reg<q>")]
 )
 
-(define_insn "*aarch64_mov<mode>"
-  [(set (match_operand:VSTRUCT_QD 0 "aarch64_simd_nonimmediate_operand")
-	(match_operand:VSTRUCT_QD 1 "aarch64_simd_general_operand"))]
-  "TARGET_SIMD && !BYTES_BIG_ENDIAN
-   && (register_operand (operands[0], <MODE>mode)
-       || register_operand (operands[1], <MODE>mode))"
-  {@ [ cons: =0 , 1   ; attrs: type                    , length        ]
-     [ w        , w   ; multiple                       , <insn_count>  ] #
-     [ Utv      , w   ; neon_store<nregs>_<nregs>reg_q , 4             ] st1\t{%S1.<Vtype> - %<Vendreg>1.<Vtype>}, %0
-     [ w        , Utv ; neon_load<nregs>_<nregs>reg_q  , 4             ] ld1\t{%S0.<Vtype> - %<Vendreg>0.<Vtype>}, %1
-  }
-)
-
-(define_insn "*aarch64_mov<mode>"
-  [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand")
-	(match_operand:VSTRUCT 1 "aarch64_simd_general_operand"))]
-  "TARGET_SIMD && !BYTES_BIG_ENDIAN
-   && (register_operand (operands[0], <MODE>mode)
-       || register_operand (operands[1], <MODE>mode))"
-  {@ [ cons: =0 , 1   ; attrs: type                    , length        ]
-     [ w        , w   ; multiple                       , <insn_count>  ] #
-     [ Utv      , w   ; neon_store<nregs>_<nregs>reg_q , 4             ] st1\t{%S1.16b - %<Vendreg>1.16b}, %0
-     [ w        , Utv ; neon_load<nregs>_<nregs>reg_q  , 4             ] ld1\t{%S0.16b - %<Vendreg>0.16b}, %1
-  }
-)
-
 (define_insn "*aarch64_movv8di"
   [(set (match_operand:V8DI 0 "nonimmediate_operand" "=r,m,r")
 	(match_operand:V8DI 1 "general_operand" " r,r,m"))]
@@ -7972,11 +7946,10 @@
   [(set_attr "type" "neon_store1_1reg<q>")]
 )
 
-(define_insn "*aarch64_be_mov<mode>"
+(define_insn "*aarch64_mov<mode>"
   [(set (match_operand:VSTRUCT_2D 0 "nonimmediate_operand")
 	(match_operand:VSTRUCT_2D 1 "general_operand"))]
   "TARGET_FLOAT
-   && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
    && (register_operand (operands[0], <MODE>mode)
        || register_operand (operands[1], <MODE>mode))"
   {@ [ cons: =0 , 1 ; attrs: type , length ]
@@ -7986,11 +7959,10 @@
   }
 )
 
-(define_insn "*aarch64_be_mov<mode>"
+(define_insn "*aarch64_mov<mode>"
   [(set (match_operand:VSTRUCT_2Q 0 "nonimmediate_operand")
 	(match_operand:VSTRUCT_2Q 1 "general_operand"))]
   "TARGET_FLOAT
-   && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
    && (register_operand (operands[0], <MODE>mode)
        || register_operand (operands[1], <MODE>mode))"
   {@ [ cons: =0 , 1 ; attrs: type , arch , length ]
@@ -8000,11 +7972,10 @@
   }
 )
 
-(define_insn "*aarch64_be_movoi"
+(define_insn "*aarch64_movoi"
   [(set (match_operand:OI 0 "nonimmediate_operand")
 	(match_operand:OI 1 "general_operand"))]
   "TARGET_FLOAT
-   && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
    && (register_operand (operands[0], OImode)
        || register_operand (operands[1], OImode))"
   {@ [ cons: =0 , 1 ; attrs: type , arch , length ]
@@ -8014,11 +7985,10 @@
   }
 )
 
-(define_insn "*aarch64_be_mov<mode>"
+(define_insn "*aarch64_mov<mode>"
   [(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand" "=w,o,w")
 	(match_operand:VSTRUCT_3QD 1 "general_operand"      " w,w,o"))]
   "TARGET_FLOAT
-   && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
    && (register_operand (operands[0], <MODE>mode)
        || register_operand (operands[1], <MODE>mode))"
   "#"
@@ -8027,11 +7997,10 @@
    (set_attr "length" "12,8,8")]
 )
 
-(define_insn "*aarch64_be_movci"
+(define_insn "*aarch64_movci"
   [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
 	(match_operand:CI 1 "general_operand"      " w,w,o"))]
   "TARGET_FLOAT
-   && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
    && (register_operand (operands[0], CImode)
        || register_operand (operands[1], CImode))"
   "#"
@@ -8040,11 +8009,10 @@
    (set_attr "length" "12,8,8")]
 )
 
-(define_insn "*aarch64_be_mov<mode>"
+(define_insn "*aarch64_mov<mode>"
   [(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand" "=w,o,w")
 	(match_operand:VSTRUCT_4QD 1 "general_operand"      " w,w,o"))]
   "TARGET_FLOAT
-   && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
    && (register_operand (operands[0], <MODE>mode)
        || register_operand (operands[1], <MODE>mode))"
   "#"
@@ -8053,11 +8021,10 @@
    (set_attr "length" "16,8,8")]
 )
 
-(define_insn "*aarch64_be_movxi"
+(define_insn "*aarch64_movxi"
   [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
 	(match_operand:XI 1 "general_operand"      " w,w,o"))]
   "TARGET_FLOAT
-   && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
    && (register_operand (operands[0], XImode)
        || register_operand (operands[1], XImode))"
   "#"
@@ -8094,11 +8061,8 @@
 {
   if (register_operand (operands[0], <MODE>mode)
       && register_operand (operands[1], <MODE>mode))
-    {
-      aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 3);
-      DONE;
-    }
-  else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
+    aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 3);
+  else
     {
       int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
       machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
@@ -8116,10 +8080,8 @@
 							operands[1],
 							<MODE>mode,
 							2 * elt_size)));
-      DONE;
     }
-  else
-    FAIL;
+  DONE;
 })
 
 (define_split
@@ -8130,11 +8092,8 @@
 {
   if (register_operand (operands[0], CImode)
       && register_operand (operands[1], CImode))
-    {
-      aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
-      DONE;
-    }
-  else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
+    aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
+  else
     {
       emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
 		      simplify_gen_subreg (OImode, operands[1], CImode, 0));
@@ -8144,10 +8103,8 @@
 		      gen_lowpart (V16QImode,
 				   simplify_gen_subreg (TImode, operands[1],
 							CImode, 32)));
-      DONE;
     }
-  else
-    FAIL;
+  DONE;
 })
 
 (define_split
@@ -8158,11 +8115,8 @@
 {
   if (register_operand (operands[0], <MODE>mode)
       && register_operand (operands[1], <MODE>mode))
-    {
-      aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 4);
-      DONE;
-    }
-  else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
+    aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 4);
+  else
     {
       int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
       machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
@@ -8174,10 +8128,8 @@
 					   <MODE>mode, 2 * elt_size),
 		      simplify_gen_subreg (pair_mode, operands[1],
 					   <MODE>mode, 2 * elt_size));
-      DONE;
     }
-  else
-    FAIL;
+  DONE;
 })
 
 (define_split
@@ -8188,20 +8140,15 @@
 {
   if (register_operand (operands[0], XImode)
       && register_operand (operands[1], XImode))
-    {
-      aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
-      DONE;
-    }
-  else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
+    aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
+  else
     {
       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
 		      simplify_gen_subreg (OImode, operands[1], XImode, 0));
       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
 		      simplify_gen_subreg (OImode, operands[1], XImode, 32));
-      DONE;
     }
-  else
-    FAIL;
+  DONE;
 })
 
 (define_split
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 8751d8741b8888a7c4ff24b2cd1fb3848c3f8e06..284e0a8cfa55b5d5e2dfac459fd3eb59acf6cb75 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -10439,7 +10439,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
   unsigned int vec_flags = aarch64_classify_vector_mode (mode);
   vec_flags &= ~VEC_PARTIAL;
 
-  /* On BE, we use load/store pair for all large int mode load/stores.
+  /* We use load/store pair for all large int mode load/stores.
      TI/TF/TDmode may also use a load/store pair.  */
   bool advsimd_struct_p = (vec_flags == (VEC_ADVSIMD | VEC_STRUCT));
   bool load_store_pair_p = (type == ADDR_QUERY_LDP_STP
@@ -10447,8 +10447,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
 			    || mode == TImode
 			    || mode == TFmode
 			    || mode == TDmode
-			    || ((!TARGET_SIMD || BYTES_BIG_ENDIAN)
-				&& advsimd_struct_p));
+			    || advsimd_struct_p);
   /* If we are dealing with ADDR_QUERY_LDP_STP_N that means the incoming mode
      corresponds to the actual size of the memory being loaded/stored and the
      mode of the corresponding addressing mode is half of that.  */
@@ -10482,14 +10481,6 @@ aarch64_classify_address (struct aarch64_address_info *info,
       && (code != REG && code != PLUS))
     return false;
 
-  /* On LE, for AdvSIMD, don't support anything other than POST_INC or
-     REG addressing.  */
-  if (advsimd_struct_p
-      && TARGET_SIMD
-      && !BYTES_BIG_ENDIAN
-      && (code != POST_INC && code != REG))
-    return false;
-
   gcc_checking_assert (GET_MODE (x) == VOIDmode
 		       || SCALAR_INT_MODE_P (GET_MODE (x)));
 
diff --git a/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-8.c b/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-8.c
index 2b278caaa4e5815869cf35e88c121d79a7b1d647..e8871c7f22134de8ced14a5a8d9669c33efda4f8 100644
--- a/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-8.c
+++ b/gcc/testsuite/gcc.target/aarch64/torture/simd-abi-8.c
@@ -17,7 +17,5 @@ g (int64x2x4_t *ptr)
   *ptr = save;
 }
 
-/* { dg-final { scan-assembler-times {\tld1\t} 1 } } */
-/* { dg-final { scan-assembler-times {\tst1\t} 1 } } */
-/* { dg-final { scan-assembler-not {\tld[pr]\tq} } } */
-/* { dg-final { scan-assembler-not {\tst[pr]\tq} } } */
+/* { dg-final { scan-assembler {\tld[pr]\tq} } } */
+/* { dg-final { scan-assembler {\tst[pr]\tq} } } */


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2024-05-16 19:41 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-05-15 10:13 [PATCH] AArch64: Use LDP/STP for large struct types Wilco Dijkstra
2024-05-16 19:31 ` Richard Sandiford
2024-05-16 19:41   ` Richard Sandiford
  -- strict thread matches above, loose matches on Subject: below --
2024-05-15 10:13 Wilco Dijkstra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).