From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <tnfchris@sourceware.org>
Received: by sourceware.org (Postfix, from userid 1984)
	id 932F43858D28; Fri,  6 Jan 2023 12:59:09 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 932F43858D28
DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org;
	s=default; t=1673009949;
	bh=6NGx6TISguUmKhWDA9PwhKNsOikIz7PNq828CUt2BKE=;
	h=From:To:Subject:Date:From;
	b=ud6BGbpmnyNU1tAYGXn9NVSISSRVVoGwpRsAAZ6H/1ZvJSTEhENve8c0VavKfKg2k
	 054ab/dpAFnnMHHHD3WNnlKzDLH8JN1COdR1JcfAQdnv1oW8ubn1Ktp9Q3K020nGMi
	 2e+jKHIN4AnpPr3gRI4VvDLknS89eBLEWEidYKks=
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
Content-Type: text/plain; charset="utf-8"
From: Tamar Christina <tnfchris@gcc.gnu.org>
To: gcc-cvs@gcc.gnu.org
Subject: [gcc r13-5046] Revert "aarch64: Make existing V2HF be usable."
X-Act-Checkin: gcc
X-Git-Author: Tamar Christina <tamar.christina@arm.com>
X-Git-Refname: refs/heads/master
X-Git-Oldrev: 17176326de45cc7de4f8cedf4414b40df0df39bb
X-Git-Newrev: a40c22c377c7cc657b0feaf0119d84d0d142a318
Message-Id: <20230106125909.932F43858D28@sourceware.org>
Date: Fri,  6 Jan 2023 12:59:09 +0000 (GMT)
List-Id: <gcc-cvs.sourceware.org>

https://gcc.gnu.org/g:a40c22c377c7cc657b0feaf0119d84d0d142a318

commit r13-5046-ga40c22c377c7cc657b0feaf0119d84d0d142a318
Author: Tamar Christina <tamar.christina@arm.com>
Date:   Fri Jan 6 12:57:50 2023 +0000

    Revert "aarch64: Make existing V2HF be usable."
    
    This reverts commit 2cba118e538ba0b7582af7f9fb5ba2dfbb772f8e.

Diff:
---
 gcc/config/aarch64/aarch64-simd.md           | 88 +++++++++-------------------
 gcc/config/aarch64/aarch64.cc                |  1 -
 gcc/config/aarch64/iterators.md              | 30 +++-------
 gcc/config/arm/types.md                      |  6 +-
 gcc/testsuite/gcc.target/aarch64/sve/slp_1.c | 10 ++--
 5 files changed, 45 insertions(+), 90 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index c0e6164b3bd..104088f67d2 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -19,10 +19,10 @@
 ;; <http://www.gnu.org/licenses/>.
 
 (define_expand "mov<mode>"
-  [(set (match_operand:VMOVE 0 "nonimmediate_operand")
-	(match_operand:VMOVE 1 "general_operand"))]
+  [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
+	(match_operand:VALL_F16 1 "general_operand"))]
   "TARGET_FLOAT"
-{
+  "
   /* Force the operand into a register if it is not an
      immediate whose use can be replaced with xzr.
      If the mode is 16 bytes wide, then we will be doing
@@ -46,11 +46,12 @@
       aarch64_expand_vector_init (operands[0], operands[1]);
       DONE;
     }
-})
+  "
+)
 
 (define_expand "movmisalign<mode>"
-  [(set (match_operand:VMOVE 0 "nonimmediate_operand")
-        (match_operand:VMOVE 1 "general_operand"))]
+  [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
+        (match_operand:VALL_F16 1 "general_operand"))]
   "TARGET_FLOAT && !STRICT_ALIGNMENT"
 {
   /* This pattern is not permitted to fail during expansion: if both arguments
@@ -72,16 +73,6 @@
   [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
 )
 
-(define_insn "aarch64_simd_dupv2hf"
-  [(set (match_operand:V2HF 0 "register_operand" "=w")
-	(vec_duplicate:V2HF
-	  (match_operand:HF 1 "register_operand" "0")))]
-  "TARGET_SIMD"
-  "@
-   sli\\t%d0, %d0, 16"
-  [(set_attr "type" "neon_shift_imm")]
-)
-
 (define_insn "aarch64_simd_dup<mode>"
   [(set (match_operand:VDQF_F16 0 "register_operand" "=w,w")
 	(vec_duplicate:VDQF_F16
@@ -94,10 +85,10 @@
 )
 
 (define_insn "aarch64_dup_lane<mode>"
-  [(set (match_operand:VMOVE 0 "register_operand" "=w")
-	(vec_duplicate:VMOVE
+  [(set (match_operand:VALL_F16 0 "register_operand" "=w")
+	(vec_duplicate:VALL_F16
 	  (vec_select:<VEL>
-	    (match_operand:VMOVE 1 "register_operand" "w")
+	    (match_operand:VALL_F16 1 "register_operand" "w")
 	    (parallel [(match_operand:SI 2 "immediate_operand" "i")])
           )))]
   "TARGET_SIMD"
@@ -159,29 +150,6 @@
    (set_attr "arch" "*,*,*,*,*,*,*,simd,*")]
 )
 
-(define_insn "*aarch64_simd_movv2hf"
-  [(set (match_operand:V2HF 0 "nonimmediate_operand"
-		"=w, m,  m,  w, ?r, ?w, ?r, w, w")
-	(match_operand:V2HF 1 "general_operand"
-		"m,  Dz, w,  w,  w,  r,  r, Dz, Dn"))]
-  "TARGET_SIMD_F16INST
-   && (register_operand (operands[0], V2HFmode)
-       || aarch64_simd_reg_or_zero (operands[1], V2HFmode))"
-   "@
-    ldr\\t%s0, %1
-    str\\twzr, %0
-    str\\t%s1, %0
-    mov\\t%0.2s[0], %1.2s[0]
-    umov\\t%w0, %1.s[0]
-    fmov\\t%s0, %w1
-    mov\\t%w0, %w1
-    movi\\t%d0, 0
-    * return aarch64_output_simd_mov_immediate (operands[1], 32);"
-  [(set_attr "type" "neon_load1_1reg, store_8, neon_store1_1reg,\
-		     neon_logic, neon_to_gp, f_mcr,\
-		     mov_reg, neon_move, neon_move")]
-)
-
 (define_insn "*aarch64_simd_mov<VQMOV:mode>"
   [(set (match_operand:VQMOV 0 "nonimmediate_operand"
 		"=w, Umn,  m,  w, ?r, ?w, ?r, w,  w")
@@ -225,7 +193,7 @@
 
 (define_insn "aarch64_store_lane0<mode>"
   [(set (match_operand:<VEL> 0 "memory_operand" "=m")
-	(vec_select:<VEL> (match_operand:VMOVE 1 "register_operand" "w")
+	(vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
 			(parallel [(match_operand 2 "const_int_operand" "n")])))]
   "TARGET_SIMD
    && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
@@ -1090,11 +1058,11 @@
 )
 
 (define_insn "aarch64_simd_vec_set<mode>"
-  [(set (match_operand:VMOVE 0 "register_operand" "=w,w,w")
-	(vec_merge:VMOVE
-	    (vec_duplicate:VMOVE
+  [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
+	(vec_merge:VALL_F16
+	    (vec_duplicate:VALL_F16
 		(match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand" "w,?r,Utv"))
-	    (match_operand:VMOVE 3 "register_operand" "0,0,0")
+	    (match_operand:VALL_F16 3 "register_operand" "0,0,0")
 	    (match_operand:SI 2 "immediate_operand" "i,i,i")))]
   "TARGET_SIMD"
   {
@@ -1116,14 +1084,14 @@
 )
 
 (define_insn "@aarch64_simd_vec_copy_lane<mode>"
-  [(set (match_operand:VMOVE 0 "register_operand" "=w")
-	(vec_merge:VMOVE
-	    (vec_duplicate:VMOVE
+  [(set (match_operand:VALL_F16 0 "register_operand" "=w")
+	(vec_merge:VALL_F16
+	    (vec_duplicate:VALL_F16
 	      (vec_select:<VEL>
-		(match_operand:VMOVE 3 "register_operand" "w")
+		(match_operand:VALL_F16 3 "register_operand" "w")
 		(parallel
 		  [(match_operand:SI 4 "immediate_operand" "i")])))
-	    (match_operand:VMOVE 1 "register_operand" "0")
+	    (match_operand:VALL_F16 1 "register_operand" "0")
 	    (match_operand:SI 2 "immediate_operand" "i")))]
   "TARGET_SIMD"
   {
@@ -1431,7 +1399,7 @@
 )
 
 (define_expand "vec_set<mode>"
-  [(match_operand:VMOVE 0 "register_operand")
+  [(match_operand:VALL_F16 0 "register_operand")
    (match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand")
    (match_operand:SI 2 "immediate_operand")]
   "TARGET_SIMD"
@@ -3550,7 +3518,7 @@
 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function.  (This is FP smax/smin).
 (define_expand "reduc_<optab>_scal_<mode>"
   [(match_operand:<VEL> 0 "register_operand")
-   (unspec:<VEL> [(match_operand:VHSDF_P 1 "register_operand")]
+   (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
 		 FMAXMINV)]
   "TARGET_SIMD"
   {
@@ -3565,7 +3533,7 @@
 
 (define_expand "reduc_<fmaxmin>_scal_<mode>"
   [(match_operand:<VEL> 0 "register_operand")
-   (unspec:<VEL> [(match_operand:VHSDF_P 1 "register_operand")]
+   (unspec:<VEL> [(match_operand:VHSDF 1 "register_operand")]
 		 FMAXMINNMV)]
   "TARGET_SIMD"
   {
@@ -3609,8 +3577,8 @@
 )
 
 (define_insn "aarch64_reduc_<optab>_internal<mode>"
- [(set (match_operand:VHSDF_P 0 "register_operand" "=w")
-       (unspec:VHSDF_P [(match_operand:VHSDF_P 1 "register_operand" "w")]
+ [(set (match_operand:VHSDF 0 "register_operand" "=w")
+       (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
 		      FMAXMINV))]
  "TARGET_SIMD"
  "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
@@ -4255,7 +4223,7 @@
 (define_insn_and_split "aarch64_get_lane<mode>"
   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
 	(vec_select:<VEL>
-	  (match_operand:VMOVE 1 "register_operand" "w, w, w")
+	  (match_operand:VALL_F16 1 "register_operand" "w, w, w")
 	  (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
   "TARGET_SIMD"
   {
@@ -8060,7 +8028,7 @@
 ;; Standard pattern name vec_init<mode><Vel>.
 
 (define_expand "vec_init<mode><Vel>"
-  [(match_operand:VMOVE 0 "register_operand")
+  [(match_operand:VALL_F16 0 "register_operand")
    (match_operand 1 "" "")]
   "TARGET_SIMD"
 {
@@ -8139,7 +8107,7 @@
 
 (define_expand "vec_extract<mode><Vel>"
   [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
-   (match_operand:VMOVE 1 "register_operand")
+   (match_operand:VALL_F16 1 "register_operand")
    (match_operand:SI 2 "immediate_operand")]
   "TARGET_SIMD"
 {
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 9a79a9e7928..7591a66e008 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -3634,7 +3634,6 @@ aarch64_classify_vector_mode (machine_mode mode)
     case E_V8BFmode:
     case E_V4SFmode:
     case E_V2DFmode:
-    case E_V2HFmode:
       return TARGET_FLOAT ? VEC_ADVSIMD : 0;
 
     default:
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index a521dbde1ec..5b26443e5b6 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -160,10 +160,6 @@
 (define_mode_iterator VHSDF [(V4HF "TARGET_SIMD_F16INST")
 			     (V8HF "TARGET_SIMD_F16INST")
 			     V2SF V4SF V2DF])
-;; Advanced SIMD Float modes suitable for reduction or pairwise operations
-(define_mode_iterator VHSDF_P [(V4HF "TARGET_SIMD_F16INST")
-			       (V8HF "TARGET_SIMD_F16INST")
-			       V2SF V4SF V2DF (V2HF "TARGET_SIMD_F16INST")])
 
 ;; Advanced SIMD Float modes, and DF.
 (define_mode_iterator VDQF_DF [V2SF V4SF V2DF DF])
@@ -192,22 +188,15 @@
 (define_mode_iterator VALLF [V2SF V4SF V2DF SF DF])
 
 ;; Advanced SIMD Float modes with 2 elements.
-(define_mode_iterator V2F [V2SF V2DF V2HF])
+(define_mode_iterator V2F [V2SF V2DF])
 
 ;; All Advanced SIMD modes on which we support any arithmetic operations.
 (define_mode_iterator VALL [V8QI V16QI V4HI V8HI V2SI V4SI V2DI V2SF V4SF V2DF])
 
-;; The set of all modes for which vld1 intrinsics are provided.
+;; All Advanced SIMD modes suitable for moving, loading, and storing.
 (define_mode_iterator VALL_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI
 				V4HF V8HF V4BF V8BF V2SF V4SF V2DF])
 
-;; All Advanced SIMD modes suitable for moving, loading, and storing
-;; including V2HF
-(define_mode_iterator VMOVE [V8QI V16QI V4HI V8HI V2SI V4SI V2DI
-			     V4HF V8HF V4BF V8BF V2SF V4SF V2DF
-			     (V2HF "TARGET_SIMD_F16INST")])
-
-
 ;; The VALL_F16 modes except the 128-bit 2-element ones.
 (define_mode_iterator VALL_F16_NO_V2Q [V8QI V16QI V4HI V8HI V2SI V4SI
 				V4HF V8HF V2SF V4SF])
@@ -1090,7 +1079,7 @@
 			  (V2SF "2") (V4SF "4")
 			  (V1DF "1") (V2DF "2")
 			  (DI "1") (DF "1")
-			  (V8DI "8") (V2HF "2")])
+			  (V8DI "8")])
 
 ;; Map a mode to the number of bits in it, if the size of the mode
 ;; is constant.
@@ -1209,7 +1198,7 @@
 (define_mode_attr Vetype [(V8QI "b") (V16QI "b")
 			  (V4HI "h") (V8HI  "h")
 			  (V2SI "s") (V4SI  "s")
-			  (V2DI "d") (V2HF  "h")
+			  (V2DI "d")
 			  (V4HF "h") (V8HF  "h")
 			  (V2SF "s") (V4SF  "s")
 			  (V2DF "d")
@@ -1301,7 +1290,7 @@
 ;; more accurately.
 (define_mode_attr stype [(V8QI "b") (V16QI "b") (V4HI "s") (V8HI "s")
 			 (V2SI "s") (V4SI "s") (V2DI "d") (V4HF "s")
-			 (V8HF "s") (V2SF "s") (V4SF "s") (V2DF "d") (V2HF "s")
+			 (V8HF "s") (V2SF "s") (V4SF "s") (V2DF "d")
 			 (HF "s") (SF "s") (DF "d") (QI "b") (HI "s")
 			 (SI "s") (DI "d")])
 
@@ -1376,8 +1365,8 @@
 		       (V4HF "HF") (V8HF  "HF")
 		       (V2SF "SF") (V4SF  "SF")
 		       (DF   "DF") (V2DF  "DF")
-		       (SI   "SI") (V2HF  "HF")
-		       (QI   "QI") (HI    "HI")
+		       (SI   "SI") (HI    "HI")
+		       (QI   "QI")
 		       (V4BF "BF") (V8BF "BF")
 		       (VNx16QI "QI") (VNx8QI "QI") (VNx4QI "QI") (VNx2QI "QI")
 		       (VNx8HI "HI") (VNx4HI "HI") (VNx2HI "HI")
@@ -1397,7 +1386,7 @@
 		       (V2SF "sf") (V4SF "sf")
 		       (V2DF "df") (DF   "df")
 		       (SI   "si") (HI   "hi")
-		       (QI   "qi") (V2HF "hf")
+		       (QI   "qi")
 		       (V4BF "bf") (V8BF "bf")
 		       (VNx16QI "qi") (VNx8QI "qi") (VNx4QI "qi") (VNx2QI "qi")
 		       (VNx8HI "hi") (VNx4HI "hi") (VNx2HI "hi")
@@ -1882,7 +1871,7 @@
 		     (V4HF "") (V8HF "_q")
 		     (V4BF "") (V8BF "_q")
 		     (V2SF "") (V4SF  "_q")
-		     (V2HF "") (V2DF  "_q")
+			       (V2DF  "_q")
 		     (QI "") (HI "") (SI "") (DI "") (HF "") (SF "") (DF "")
 		     (V2x8QI "") (V2x16QI "_q")
 		     (V2x4HI "") (V2x8HI "_q")
@@ -1921,7 +1910,6 @@
 		      (V2SI "p") (V4SI  "v")
 		      (V2DI "p") (V2DF  "p")
 		      (V2SF "p") (V4SF  "v")
-		      (V2HF "p")
 		      (V4HF "v") (V8HF  "v")])
 
 (define_mode_attr vsi2qi [(V2SI "v8qi") (V4SI "v16qi")
diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md
index 0a656bd792d..83e29563c8e 100644
--- a/gcc/config/arm/types.md
+++ b/gcc/config/arm/types.md
@@ -484,7 +484,6 @@
 ; neon_fp_minmax_s_q
 ; neon_fp_minmax_d
 ; neon_fp_minmax_d_q
-; neon_fp_reduc_add_h
 ; neon_fp_reduc_add_s
 ; neon_fp_reduc_add_s_q
 ; neon_fp_reduc_add_d
@@ -1035,7 +1034,6 @@
   neon_fp_minmax_d,\
   neon_fp_minmax_d_q,\
 \
-  neon_fp_reduc_add_h,\
   neon_fp_reduc_add_s,\
   neon_fp_reduc_add_s_q,\
   neon_fp_reduc_add_d,\
@@ -1260,8 +1258,8 @@
           neon_fp_compare_d, neon_fp_compare_d_q, neon_fp_minmax_s,\
           neon_fp_minmax_s_q, neon_fp_minmax_d, neon_fp_minmax_d_q,\
           neon_fp_neg_s, neon_fp_neg_s_q, neon_fp_neg_d, neon_fp_neg_d_q,\
-          neon_fp_reduc_add_h, neon_fp_reduc_add_s, neon_fp_reduc_add_s_q,\
-          neon_fp_reduc_add_d, neon_fp_reduc_add_d_q, neon_fp_reduc_minmax_s,\
+          neon_fp_reduc_add_s, neon_fp_reduc_add_s_q, neon_fp_reduc_add_d,\
+          neon_fp_reduc_add_d_q, neon_fp_reduc_minmax_s,
           neon_fp_reduc_minmax_s_q, neon_fp_reduc_minmax_d,\
           neon_fp_reduc_minmax_d_q,\
           neon_fp_cvt_narrow_s_q, neon_fp_cvt_narrow_d_q,\
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c
index e6021c5a427..07d71a63414 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c
@@ -30,9 +30,11 @@ vec_slp_##TYPE (TYPE *restrict a, TYPE b, TYPE c, int n)	\
 TEST_ALL (VEC_PERM)
 
 /* We should use one DUP for each of the 8-, 16- and 32-bit types,
-   We should use two DUPs for each of the three 64-bit types.  */
+   although we currently use LD1RW for _Float16.  We should use two
+   DUPs for each of the three 64-bit types.  */
 /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, [hw]} 2 } } */
-/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, [sw]} 3 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, [sw]} 2 } } */
+/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 1 } } */
 /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, [dx]} 9 } } */
 /* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
 /* { dg-final { scan-assembler-not {\tzip2\t} } } */
@@ -51,7 +53,7 @@ TEST_ALL (VEC_PERM)
 /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */
 /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */
 /* { dg-final { scan-assembler-not {\tldr} } } */
-/* { dg-final { scan-assembler-not {\tstr} } } */
-/* { dg-final { scan-assembler-not {\tstr\th[0-9]+} } } */
+/* { dg-final { scan-assembler-times {\tstr} 2 } } */
+/* { dg-final { scan-assembler-times {\tstr\th[0-9]+} 2 } } */
 
 /* { dg-final { scan-assembler-not {\tuqdec} } } */