public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-9323] aarch64: Remove SME2.1 forms of LUTI2/4
@ 2024-03-05 17:51 Richard Sandiford
  0 siblings, 0 replies; only message in thread
From: Richard Sandiford @ 2024-03-05 17:51 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:8776468d9e57ace5f832c1368243a6dbce9984d5

commit r14-9323-g8776468d9e57ace5f832c1368243a6dbce9984d5
Author: Richard Sandiford <richard.sandiford@arm.com>
Date:   Tue Mar 5 17:51:24 2024 +0000

    aarch64: Remove SME2.1 forms of LUTI2/4
    
    I was over-eager when adding support for strided SME2 instructions
    and accidentally included forms of LUTI2 and LUTI4 that are only
    available with SME2.1, not SME2.  This patch removes them for now.
    We're planning to add proper support for SME2.1 in the GCC 15
    timeframe.
    
    Sorry for the blunder :(
    
    gcc/
            * config/aarch64/aarch64.md (stride_type): Remove luti_consecutive
            and luti_strided.
            * config/aarch64/aarch64-sme.md
            (@aarch64_sme_lut<LUTI_BITS><mode>): Remove stride_type attribute.
            (@aarch64_sme_lut<LUTI_BITS><mode>_strided2): Delete.
            (@aarch64_sme_lut<LUTI_BITS><mode>_strided4): Likewise.
            * config/aarch64/aarch64-early-ra.cc (is_stride_candidate)
            (early_ra::maybe_convert_to_strided_access): Remove support for
            strided LUTI2 and LUTI4.
    
    gcc/testsuite/
            * gcc.target/aarch64/sme/strided_1.c (test5): Remove.

Diff:
---
 gcc/config/aarch64/aarch64-early-ra.cc           | 20 +------
 gcc/config/aarch64/aarch64-sme.md                | 70 ------------------------
 gcc/config/aarch64/aarch64.md                    |  3 +-
 gcc/testsuite/gcc.target/aarch64/sme/strided_1.c | 55 -------------------
 4 files changed, 3 insertions(+), 145 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-early-ra.cc b/gcc/config/aarch64/aarch64-early-ra.cc
index 8530b0ae41e..1e2c823cb2e 100644
--- a/gcc/config/aarch64/aarch64-early-ra.cc
+++ b/gcc/config/aarch64/aarch64-early-ra.cc
@@ -1060,8 +1060,7 @@ is_stride_candidate (rtx_insn *insn)
     return false;
 
   auto stride_type = get_attr_stride_type (insn);
-  return (stride_type == STRIDE_TYPE_LUTI_CONSECUTIVE
-	  || stride_type == STRIDE_TYPE_LD1_CONSECUTIVE
+  return (stride_type == STRIDE_TYPE_LD1_CONSECUTIVE
 	  || stride_type == STRIDE_TYPE_ST1_CONSECUTIVE);
 }
 
@@ -3212,8 +3211,7 @@ early_ra::maybe_convert_to_strided_access (rtx_insn *insn)
   auto stride_type = get_attr_stride_type (insn);
   rtx pat = PATTERN (insn);
   rtx op;
-  if (stride_type == STRIDE_TYPE_LUTI_CONSECUTIVE
-      || stride_type == STRIDE_TYPE_LD1_CONSECUTIVE)
+  if (stride_type == STRIDE_TYPE_LD1_CONSECUTIVE)
     op = SET_DEST (pat);
   else if (stride_type == STRIDE_TYPE_ST1_CONSECUTIVE)
     op = XVECEXP (SET_SRC (pat), 0, 1);
@@ -3263,20 +3261,6 @@ early_ra::maybe_convert_to_strided_access (rtx_insn *insn)
       XVECEXP (SET_SRC (pat), 0, XVECLEN (SET_SRC (pat), 0) - 1)
 	= *recog_data.dup_loc[0];
     }
-  else if (stride_type == STRIDE_TYPE_LUTI_CONSECUTIVE)
-    {
-      auto bits = INTVAL (XVECEXP (SET_SRC (pat), 0, 4));
-      if (range.count == 2)
-	pat = gen_aarch64_sme_lut_strided2 (bits, single_mode,
-					    regs[0], regs[1],
-					    recog_data.operand[1],
-					    recog_data.operand[2]);
-      else
-	pat = gen_aarch64_sme_lut_strided4 (bits, single_mode,
-					    regs[0], regs[1], regs[2], regs[3],
-					    recog_data.operand[1],
-					    recog_data.operand[2]);
-    }
   else
     gcc_unreachable ();
   PATTERN (insn) = pat;
diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md
index c95d4aa696c..78ad2fc699f 100644
--- a/gcc/config/aarch64/aarch64-sme.md
+++ b/gcc/config/aarch64/aarch64-sme.md
@@ -1939,74 +1939,4 @@
   "TARGET_STREAMING_SME2
    && !(<LUTI_BITS> == 4 && <vector_count> == 4 && <elem_bits> == 8)"
   "luti<LUTI_BITS>\t%0, zt0, %1[%2]"
-  [(set_attr "stride_type" "luti_consecutive")]
-)
-
-(define_insn "@aarch64_sme_lut<LUTI_BITS><mode>_strided2"
-  [(set (match_operand:SVE_FULL_BHS 0 "aarch64_simd_register" "=Uwd")
-	(unspec:SVE_FULL_BHS
-	  [(reg:V8DI ZT0_REGNUM)
-	   (reg:DI SME_STATE_REGNUM)
-	   (match_operand:VNx16QI 2 "register_operand" "w")
-	   (match_operand:DI 3 "const_int_operand")
-	   (const_int LUTI_BITS)
-	   (const_int 0)]
-	  UNSPEC_SME_LUTI))
-   (set (match_operand:SVE_FULL_BHS 1 "aarch64_simd_register" "=w")
-	(unspec:SVE_FULL_BHS
-	  [(reg:V8DI ZT0_REGNUM)
-	   (reg:DI SME_STATE_REGNUM)
-	   (match_dup 2)
-	   (match_dup 3)
-	   (const_int LUTI_BITS)
-	   (const_int 1)]
-	  UNSPEC_SME_LUTI))]
-  "TARGET_STREAMING_SME2
-   && aarch64_strided_registers_p (operands, 2, 8)"
-  "luti<LUTI_BITS>\t{%0.<Vetype>, %1.<Vetype>}, zt0, %2[%3]"
-  [(set_attr "stride_type" "luti_strided")]
-)
-
-(define_insn "@aarch64_sme_lut<LUTI_BITS><mode>_strided4"
-  [(set (match_operand:SVE_FULL_BHS 0 "aarch64_simd_register" "=Uwt")
-	(unspec:SVE_FULL_BHS
-	  [(reg:V8DI ZT0_REGNUM)
-	   (reg:DI SME_STATE_REGNUM)
-	   (match_operand:VNx16QI 4 "register_operand" "w")
-	   (match_operand:DI 5 "const_int_operand")
-	   (const_int LUTI_BITS)
-	   (const_int 0)]
-	  UNSPEC_SME_LUTI))
-   (set (match_operand:SVE_FULL_BHS 1 "aarch64_simd_register" "=w")
-	(unspec:SVE_FULL_BHS
-	  [(reg:V8DI ZT0_REGNUM)
-	   (reg:DI SME_STATE_REGNUM)
-	   (match_dup 4)
-	   (match_dup 5)
-	   (const_int LUTI_BITS)
-	   (const_int 1)]
-	  UNSPEC_SME_LUTI))
-   (set (match_operand:SVE_FULL_BHS 2 "aarch64_simd_register" "=w")
-	(unspec:SVE_FULL_BHS
-	  [(reg:V8DI ZT0_REGNUM)
-	   (reg:DI SME_STATE_REGNUM)
-	   (match_dup 4)
-	   (match_dup 5)
-	   (const_int LUTI_BITS)
-	   (const_int 2)]
-	  UNSPEC_SME_LUTI))
-   (set (match_operand:SVE_FULL_BHS 3 "aarch64_simd_register" "=w")
-	(unspec:SVE_FULL_BHS
-	  [(reg:V8DI ZT0_REGNUM)
-	   (reg:DI SME_STATE_REGNUM)
-	   (match_dup 4)
-	   (match_dup 5)
-	   (const_int LUTI_BITS)
-	   (const_int 3)]
-	  UNSPEC_SME_LUTI))]
-  "TARGET_STREAMING_SME2
-   && !(<LUTI_BITS> == 4 && <elem_bits> == 8)
-   && aarch64_strided_registers_p (operands, 4, 4)"
-  "luti<LUTI_BITS>\t{%0.<Vetype>, %1.<Vetype>, %2.<Vetype>, %3.<Vetype>}, zt0, %4[%5]"
-  [(set_attr "stride_type" "luti_strided")]
 )
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 33fbe1b2e8d..7d51d923bf6 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -553,8 +553,7 @@
 ;; The RTL mapping therefore applies at LD1 granularity, rather than
 ;; being broken down into individual types of load.
 (define_attr "stride_type"
-  "none,ld1_consecutive,ld1_strided,st1_consecutive,st1_strided,
-   luti_consecutive,luti_strided"
+  "none,ld1_consecutive,ld1_strided,st1_consecutive,st1_strided"
   (const_string "none"))
 
 ;; Attribute used to identify load pair and store pair instructions.
diff --git a/gcc/testsuite/gcc.target/aarch64/sme/strided_1.c b/gcc/testsuite/gcc.target/aarch64/sme/strided_1.c
index 3620fff3668..73aac0683ea 100644
--- a/gcc/testsuite/gcc.target/aarch64/sme/strided_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sme/strided_1.c
@@ -180,61 +180,6 @@ void test4(int32_t *dest, int32_t *src) __arm_streaming
 		       svget4(l2, 3), svget4(l3, 3)));
 }
 
-/*
-** test5:
-**	ptrue	[^\n]+
-**	ld1b	[^\n]+
-**	ld1b	[^\n]+
-**	ptrue	([^\n]+)\.s
-**	ld1w	[^\n]+, \1/z, \[x0\]
-**	luti4	{z16\.s, z20\.s, z24\.s, z28\.s}, zt0, z[0-9]+\[0\]
-**	luti4	{z17\.s, z21\.s, z25\.s, z29\.s}, zt0, z[0-9]+\[1\]
-**	luti4	{z18\.s, z22\.s, z26\.s, z30\.s}, zt0, z[0-9]+\[0\]
-**	luti4	{z19\.s, z23\.s, z27\.s, z31\.s}, zt0, z[0-9]+\[1\]
-**	uclamp	{z16\.s - z19\.s}, z[0-9]+\.s, z[0-9]+\.s
-**	uclamp	{z20\.s - z23\.s}, z[0-9]+\.s, z[0-9]+\.s
-**	uclamp	{z24\.s - z27\.s}, z[0-9]+\.s, z[0-9]+\.s
-**	uclamp	{z28\.s - z31\.s}, z[0-9]+\.s, z[0-9]+\.s
-**	st1w	{z16\.s - z19\.s}, \1, \[x0\]
-**	st1w	{z20\.s - z23\.s}, \1, \[x0, #4, mul vl\]
-**	st1w	{z24\.s - z27\.s}, \1, \[x0, #8, mul vl\]
-**	st1w	{z28\.s - z31\.s}, \1, \[x0, #12, mul vl\]
-**	ret
-*/
-void test5(uint32_t *dest, uint8_t *indices)
-  __arm_streaming __arm_preserves("za") __arm_inout("zt0")
-{
-  svuint8_t indices1 = svld1_vnum(svptrue_b8(), indices, 0);
-  svuint8_t indices2 = svld1_vnum(svptrue_b8(), indices, 2);
-
-  svcount_t pg = svptrue_c32();
-  svuint32x4_t bounds = svld1_x4(pg, dest);
-
-  svuint32x4_t x0 = svluti4_lane_zt_u32_x4(0, indices1, 0);
-  svuint32x4_t x1 = svluti4_lane_zt_u32_x4(0, indices1, 1);
-  svuint32x4_t x2 = svluti4_lane_zt_u32_x4(0, indices2, 0);
-  svuint32x4_t x3 = svluti4_lane_zt_u32_x4(0, indices2, 1);
-
-  svuint32x4_t y0 = svcreate4(svget4(x0, 0), svget4(x1, 0),
-			      svget4(x2, 0), svget4(x3, 0));
-  svuint32x4_t y1 = svcreate4(svget4(x0, 1), svget4(x1, 1),
-			      svget4(x2, 1), svget4(x3, 1));
-  svuint32x4_t y2 = svcreate4(svget4(x0, 2), svget4(x1, 2),
-			      svget4(x2, 2), svget4(x3, 2));
-  svuint32x4_t y3 = svcreate4(svget4(x0, 3), svget4(x1, 3),
-			      svget4(x2, 3), svget4(x3, 3));
-
-  y0 = svclamp(y0, svget4(bounds, 0), svget4(bounds, 1));
-  y1 = svclamp(y1, svget4(bounds, 2), svget4(bounds, 3));
-  y2 = svclamp(y2, svget4(bounds, 0), svget4(bounds, 1));
-  y3 = svclamp(y3, svget4(bounds, 2), svget4(bounds, 3));
-
-  svst1_vnum(pg, dest, 0, y0);
-  svst1_vnum(pg, dest, 4, y1);
-  svst1_vnum(pg, dest, 8, y2);
-  svst1_vnum(pg, dest, 12, y3);
-}
-
 /*
 ** test6:
 **	ptrue	[^\n]+

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2024-03-05 17:51 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-03-05 17:51 [gcc r14-9323] aarch64: Remove SME2.1 forms of LUTI2/4 Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).