public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 1/3] s390: Recognize further vpdi and vmr{l,h} pattern
@ 2023-11-09  8:22 Stefan Schulze Frielinghaus
  2023-11-09  8:22 ` [PATCH 2/3] s390: Add expand_perm_reverse_elements Stefan Schulze Frielinghaus
                   ` (2 more replies)
  0 siblings, 3 replies; 6+ messages in thread
From: Stefan Schulze Frielinghaus @ 2023-11-09  8:22 UTC (permalink / raw)
  To: krebbel, gcc-patches; +Cc: Stefan Schulze Frielinghaus

Deal with cases where vpdi and vmr{l,h} are still applicable if the
operands of those instructions are swapped.  For example, currently for

V2DI foo (V2DI x)
{
  return (V2DI) {x[1], x[0]};
}

the assembler sequence

vlgvg   %r1,%v24,1
vzero   %v0
vlvgg   %v0,%r1,0
vmrhg   %v24,%v0,%v24

is emitted.  With this patch a single vpdi is emitted.

Extensive tests are included in a subsequent patch of this series where
more cases are covered.

Bootstrapped and regtested on s390.  Ok for mainline?

gcc/ChangeLog:

	* config/s390/s390.cc (expand_perm_with_merge): Deal with cases
	where vmr{l,h} are still applicable if the operands are swapped.
	(expand_perm_with_vpdi): Likewise for vpdi.
---
 gcc/config/s390/s390.cc | 118 ++++++++++++++++++++++++++++++----------
 1 file changed, 90 insertions(+), 28 deletions(-)

diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index 64f56d8effa..185eb59f8b8 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -17532,40 +17532,86 @@ struct expand_vec_perm_d
 static bool
 expand_perm_with_merge (const struct expand_vec_perm_d &d)
 {
-  bool merge_lo_p = true;
-  bool merge_hi_p = true;
-
-  if (d.nelt % 2)
+  static const unsigned char hi_perm_di[2] = {0, 2};
+  static const unsigned char hi_perm_si[4] = {0, 4, 1, 5};
+  static const unsigned char hi_perm_hi[8] = {0, 8, 1, 9, 2, 10, 3, 11};
+  static const unsigned char hi_perm_qi[16]
+    = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
+
+  static const unsigned char hi_perm_di_swap[2] = {2, 0};
+  static const unsigned char hi_perm_si_swap[4] = {4, 0, 6, 2};
+  static const unsigned char hi_perm_hi_swap[8] = {8, 0, 10, 2, 12, 4, 14, 6};
+  static const unsigned char hi_perm_qi_swap[16]
+    = {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14};
+
+  static const unsigned char lo_perm_di[2] = {1, 3};
+  static const unsigned char lo_perm_si[4] = {2, 6, 3, 7};
+  static const unsigned char lo_perm_hi[8] = {4, 12, 5, 13, 6, 14, 7, 15};
+  static const unsigned char lo_perm_qi[16]
+    = {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31};
+
+  static const unsigned char lo_perm_di_swap[2] = {3, 1};
+  static const unsigned char lo_perm_si_swap[4] = {5, 1, 7, 3};
+  static const unsigned char lo_perm_hi_swap[8] = {9, 1, 11, 3, 13, 5, 15, 7};
+  static const unsigned char lo_perm_qi_swap[16]
+    = {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15};
+
+  bool merge_lo_p = false;
+  bool merge_hi_p = false;
+  bool swap_operands_p = false;
+
+  if ((d.nelt == 2 && memcmp (d.perm, hi_perm_di, 2) == 0)
+      || (d.nelt == 4 && memcmp (d.perm, hi_perm_si, 4) == 0)
+      || (d.nelt == 8 && memcmp (d.perm, hi_perm_hi, 8) == 0)
+      || (d.nelt == 16 && memcmp (d.perm, hi_perm_qi, 16) == 0))
+    {
+      merge_hi_p = true;
+    }
+  else if ((d.nelt == 2 && memcmp (d.perm, hi_perm_di_swap, 2) == 0)
+	   || (d.nelt == 4 && memcmp (d.perm, hi_perm_si_swap, 4) == 0)
+	   || (d.nelt == 8 && memcmp (d.perm, hi_perm_hi_swap, 8) == 0)
+	   || (d.nelt == 16 && memcmp (d.perm, hi_perm_qi_swap, 16) == 0))
+    {
+      merge_hi_p = true;
+      swap_operands_p = true;
+    }
+  else if ((d.nelt == 2 && memcmp (d.perm, lo_perm_di, 2) == 0)
+	   || (d.nelt == 4 && memcmp (d.perm, lo_perm_si, 4) == 0)
+	   || (d.nelt == 8 && memcmp (d.perm, lo_perm_hi, 8) == 0)
+	   || (d.nelt == 16 && memcmp (d.perm, lo_perm_qi, 16) == 0))
+    {
+      merge_lo_p = true;
+    }
+  else if ((d.nelt == 2 && memcmp (d.perm, lo_perm_di_swap, 2) == 0)
+	   || (d.nelt == 4 && memcmp (d.perm, lo_perm_si_swap, 4) == 0)
+	   || (d.nelt == 8 && memcmp (d.perm, lo_perm_hi_swap, 8) == 0)
+	   || (d.nelt == 16 && memcmp (d.perm, lo_perm_qi_swap, 16) == 0))
+    {
+      merge_lo_p = true;
+      swap_operands_p = true;
+    }
+
+  if (!merge_lo_p && !merge_hi_p)
     return false;
 
-  // For V4SI this checks for: { 0, 4, 1, 5 }
-  for (int telt = 0; telt < d.nelt; telt++)
-    if (d.perm[telt] != telt / 2 + (telt % 2) * d.nelt)
-      {
-	merge_hi_p = false;
-	break;
-      }
+  if (d.testing_p)
+    return merge_lo_p || merge_hi_p;
 
-  if (!merge_hi_p)
+  rtx op0, op1;
+  if (swap_operands_p)
     {
-      // For V4SI this checks for: { 2, 6, 3, 7 }
-      for (int telt = 0; telt < d.nelt; telt++)
-	if (d.perm[telt] != (telt + d.nelt) / 2 + (telt % 2) * d.nelt)
-	  {
-	    merge_lo_p = false;
-	    break;
-	  }
+      op0 = d.op1;
+      op1 = d.op0;
     }
   else
-    merge_lo_p = false;
-
-  if (d.testing_p)
-    return merge_lo_p || merge_hi_p;
+    {
+      op0 = d.op0;
+      op1 = d.op1;
+    }
 
-  if (merge_lo_p || merge_hi_p)
-    s390_expand_merge (d.target, d.op0, d.op1, merge_hi_p);
+  s390_expand_merge (d.target, op0, op1, merge_hi_p);
 
-  return merge_lo_p || merge_hi_p;
+  return true;
 }
 
 /* Try to expand the vector permute operation described by D using the
@@ -17582,6 +17628,7 @@ expand_perm_with_vpdi (const struct expand_vec_perm_d &d)
 {
   bool vpdi1_p = false;
   bool vpdi4_p = false;
+  bool swap_operands_p = false;
   rtx op0_reg, op1_reg;
 
   // Only V2DI and V2DF are supported here.
@@ -17590,11 +17637,20 @@ expand_perm_with_vpdi (const struct expand_vec_perm_d &d)
 
   if (d.perm[0] == 0 && d.perm[1] == 3)
     vpdi1_p = true;
-
-  if ((d.perm[0] == 1 && d.perm[1] == 2)
+  else if (d.perm[0] == 2 && d.perm[1] == 1)
+    {
+      vpdi1_p = true;
+      swap_operands_p = true;
+    }
+  else if ((d.perm[0] == 1 && d.perm[1] == 2)
       || (d.perm[0] == 1 && d.perm[1] == 0)
       || (d.perm[0] == 3 && d.perm[1] == 2))
     vpdi4_p = true;
+  else if (d.perm[0] == 3 && d.perm[1] == 0)
+    {
+      vpdi4_p = true;
+      swap_operands_p = true;
+    }
 
   if (!vpdi1_p && !vpdi4_p)
     return false;
@@ -17611,6 +17667,12 @@ expand_perm_with_vpdi (const struct expand_vec_perm_d &d)
     op1_reg = op0_reg;
   else if (d.only_op1)
     op0_reg = op1_reg;
+  else if (swap_operands_p)
+    {
+      rtx tmp = op0_reg;
+      op0_reg = op1_reg;
+      op1_reg = tmp;
+    }
 
   if (vpdi1_p)
     emit_insn (gen_vpdi1 (d.vmode, d.target, op0_reg, op1_reg));
-- 
2.41.0


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 2/3] s390: Add expand_perm_reverse_elements
  2023-11-09  8:22 [PATCH 1/3] s390: Recognize further vpdi and vmr{l,h} pattern Stefan Schulze Frielinghaus
@ 2023-11-09  8:22 ` Stefan Schulze Frielinghaus
  2023-11-09  8:27   ` Andreas Krebbel
  2023-11-09  8:22 ` [PATCH 3/3] s390: Revise vector reverse elements Stefan Schulze Frielinghaus
  2023-11-09  8:27 ` [PATCH 1/3] s390: Recognize further vpdi and vmr{l,h} pattern Andreas Krebbel
  2 siblings, 1 reply; 6+ messages in thread
From: Stefan Schulze Frielinghaus @ 2023-11-09  8:22 UTC (permalink / raw)
  To: krebbel, gcc-patches; +Cc: Stefan Schulze Frielinghaus

Replace expand_perm_with_rot, expand_perm_with_vster, and
expand_perm_with_vstbrq with a general implementation
expand_perm_reverse_elements.

Bootstrapped and regtested on s390.  Ok for mainline?

gcc/ChangeLog:

	* config/s390/s390.cc (expand_perm_with_rot): Remove.
	(expand_perm_reverse_elements): New.
	(expand_perm_with_vster): Remove.
	(expand_perm_with_vstbrq): Remove.
	(vectorize_vec_perm_const_1): Replace removed functions with new
	one.
---
 gcc/config/s390/s390.cc | 88 ++++++++---------------------------------
 1 file changed, 16 insertions(+), 72 deletions(-)

diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index 185eb59f8b8..e36efec8ddc 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -17693,78 +17693,28 @@ is_reverse_perm_mask (const struct expand_vec_perm_d &d)
   return true;
 }
 
-/* The case of reversing a four-element vector [0, 1, 2, 3]
-   can be handled by first permuting the doublewords
-   [2, 3, 0, 1] and subsequently rotating them by 32 bits.  */
 static bool
-expand_perm_with_rot (const struct expand_vec_perm_d &d)
+expand_perm_reverse_elements (const struct expand_vec_perm_d &d)
 {
-  if (d.nelt != 4)
+  if (d.op0 != d.op1 || !is_reverse_perm_mask (d))
     return false;
 
-  if (d.op0 == d.op1 && is_reverse_perm_mask (d))
-    {
-      if (d.testing_p)
-	return true;
-
-      rtx tmp = gen_reg_rtx (d.vmode);
-      rtx op0_reg = force_reg (GET_MODE (d.op0), d.op0);
-
-      emit_insn (gen_vpdi4_2 (d.vmode, tmp, op0_reg, op0_reg));
-      if (d.vmode == V4SImode)
-	emit_insn (gen_rotlv4si3_di (d.target, tmp));
-      else if (d.vmode == V4SFmode)
-	emit_insn (gen_rotlv4sf3_di (d.target, tmp));
-
-      return true;
-    }
-
-  return false;
-}
+  if (d.testing_p)
+    return true;
 
-/* If we just reverse the elements, emit an eltswap if we have
-   vler/vster.  */
-static bool
-expand_perm_with_vster (const struct expand_vec_perm_d &d)
-{
-  if (TARGET_VXE2 && d.op0 == d.op1 && is_reverse_perm_mask (d)
-      && (d.vmode == V2DImode || d.vmode == V2DFmode
-	  || d.vmode == V4SImode || d.vmode == V4SFmode
-	  || d.vmode == V8HImode))
+  switch (d.vmode)
     {
-      if (d.testing_p)
-	return true;
-
-      if (d.vmode == V2DImode)
-	emit_insn (gen_eltswapv2di (d.target, d.op0));
-      else if (d.vmode == V2DFmode)
-	emit_insn (gen_eltswapv2df (d.target, d.op0));
-      else if (d.vmode == V4SImode)
-	emit_insn (gen_eltswapv4si (d.target, d.op0));
-      else if (d.vmode == V4SFmode)
-	emit_insn (gen_eltswapv4sf (d.target, d.op0));
-      else if (d.vmode == V8HImode)
-	emit_insn (gen_eltswapv8hi (d.target, d.op0));
-      return true;
+    case V1TImode: emit_move_insn (d.target, d.op0); break;
+    case V2DImode: emit_insn (gen_eltswapv2di (d.target, d.op0)); break;
+    case V4SImode: emit_insn (gen_eltswapv4si (d.target, d.op0)); break;
+    case V8HImode: emit_insn (gen_eltswapv8hi (d.target, d.op0)); break;
+    case V16QImode: emit_insn (gen_eltswapv16qi (d.target, d.op0)); break;
+    case V2DFmode: emit_insn (gen_eltswapv2df (d.target, d.op0)); break;
+    case V4SFmode: emit_insn (gen_eltswapv4sf (d.target, d.op0)); break;
+    default: gcc_unreachable();
     }
-  return false;
-}
 
-/* If we reverse a byte-vector this is the same as
-   byte reversing it which can be done with vstbrq.  */
-static bool
-expand_perm_with_vstbrq (const struct expand_vec_perm_d &d)
-{
-  if (TARGET_VXE2 && d.op0 == d.op1 && is_reverse_perm_mask (d)
-      && d.vmode == V16QImode)
-    {
-      if (d.testing_p)
-	return true;
-
-      emit_insn (gen_eltswapv16qi (d.target, d.op0));
-      return true;
-    }
-  return false;
+  return true;
 }
 
 /* Try to emit vlbr/vstbr.  Note, this is only a candidate insn since
@@ -17826,21 +17776,15 @@ expand_perm_as_a_vlbr_vstbr_candidate (const struct expand_vec_perm_d &d)
 static bool
 vectorize_vec_perm_const_1 (const struct expand_vec_perm_d &d)
 {
-  if (expand_perm_with_merge (d))
-    return true;
-
-  if (expand_perm_with_vster (d))
+  if (expand_perm_reverse_elements (d))
     return true;
 
-  if (expand_perm_with_vstbrq (d))
+  if (expand_perm_with_merge (d))
     return true;
 
   if (expand_perm_with_vpdi (d))
     return true;
 
-  if (expand_perm_with_rot (d))
-    return true;
-
   if (expand_perm_as_a_vlbr_vstbr_candidate (d))
     return true;
 
-- 
2.41.0


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH 3/3] s390: Revise vector reverse elements
  2023-11-09  8:22 [PATCH 1/3] s390: Recognize further vpdi and vmr{l,h} pattern Stefan Schulze Frielinghaus
  2023-11-09  8:22 ` [PATCH 2/3] s390: Add expand_perm_reverse_elements Stefan Schulze Frielinghaus
@ 2023-11-09  8:22 ` Stefan Schulze Frielinghaus
  2023-11-09  8:27   ` Andreas Krebbel
  2023-11-09  8:27 ` [PATCH 1/3] s390: Recognize further vpdi and vmr{l,h} pattern Andreas Krebbel
  2 siblings, 1 reply; 6+ messages in thread
From: Stefan Schulze Frielinghaus @ 2023-11-09  8:22 UTC (permalink / raw)
  To: krebbel, gcc-patches; +Cc: Stefan Schulze Frielinghaus

Replace UNSPEC_VEC_ELTSWAP with a vec_select implementation.

Furthermore, for a vector reverse elements operation between registers
of mode V8HI perform three rotates instead of a vperm operation since
the latter involves loading the permutation vector from the literal
pool.

Prior z15, instead of
  larl + vl + vl + vperm
prefer
  vl + vpdi (+ verllg (+ verllf))
for a load operation.

Likewise, prior z15, instead of
  larl + vl + vperm + vst
prefer
  vpdi (+ verllg (+ verllf)) + vst
for a store operation.

Bootstrapped and regtested on s390.  Ok for mainline?

gcc/ChangeLog:

	* config/s390/s390.md: Remove UNSPEC_VEC_ELTSWAP.
	* config/s390/vector.md (eltswapv16qi): New expander.
	(*eltswapv16qi): New insn and splitter.
	(eltswapv8hi): New insn and splitter.
	(eltswap<mode>): New insn and splitter for modes V_HW_4 as well
	as V_HW_2.
	* config/s390/vx-builtins.md (eltswap<mode>): Remove.
	(*eltswapv16qi): Remove.
	(*eltswap<mode>): Remove.
	(*eltswap<mode>_emu): Remove.

gcc/testsuite/ChangeLog:

	* gcc.target/s390/zvector/vec-reve-load-halfword-z14.c: Remove
	vperm and substitude by vpdi et al.
	* gcc.target/s390/zvector/vec-reve-load-halfword.c: Likewise.
	* gcc.target/s390/vector/reverse-elements-1.c: New test.
	* gcc.target/s390/vector/reverse-elements-2.c: New test.
	* gcc.target/s390/vector/reverse-elements-3.c: New test.
	* gcc.target/s390/vector/reverse-elements-4.c: New test.
	* gcc.target/s390/vector/reverse-elements-5.c: New test.
	* gcc.target/s390/vector/reverse-elements-6.c: New test.
	* gcc.target/s390/vector/reverse-elements-7.c: New test.
---
 gcc/config/s390/s390.md                       |   2 -
 gcc/config/s390/vector.md                     | 146 ++++++++++++++++++
 gcc/config/s390/vx-builtins.md                | 143 -----------------
 .../s390/vector/reverse-elements-1.c          |  46 ++++++
 .../s390/vector/reverse-elements-2.c          |  16 ++
 .../s390/vector/reverse-elements-3.c          |  56 +++++++
 .../s390/vector/reverse-elements-4.c          |  67 ++++++++
 .../s390/vector/reverse-elements-5.c          |  56 +++++++
 .../s390/vector/reverse-elements-6.c          |  67 ++++++++
 .../s390/vector/reverse-elements-7.c          |  67 ++++++++
 .../s390/zvector/vec-reve-load-halfword-z14.c |   4 +-
 .../s390/zvector/vec-reve-load-halfword.c     |   4 +-
 12 files changed, 527 insertions(+), 147 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/reverse-elements-1.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/reverse-elements-2.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/reverse-elements-3.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/reverse-elements-4.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/reverse-elements-5.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/reverse-elements-6.c
 create mode 100644 gcc/testsuite/gcc.target/s390/vector/reverse-elements-7.c

diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 3f29ba21442..f5e559c1ba4 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -241,8 +241,6 @@
    UNSPEC_VEC_VFMIN
    UNSPEC_VEC_VFMAX
 
-   UNSPEC_VEC_ELTSWAP
-
    UNSPEC_NNPA_VCLFNHS_V8HI
    UNSPEC_NNPA_VCLFNLS_V8HI
    UNSPEC_NNPA_VCRNFS_V8HI
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 7d1eb36e844..c478fce09df 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -948,6 +948,152 @@
   operands[5] = simplify_gen_subreg (DFmode, operands[1], TFmode, 8);
 })
 
+;; VECTOR REVERSE ELEMENTS V16QI
+
+(define_expand "eltswapv16qi"
+  [(parallel
+    [(set (match_operand:V16QI  0 "nonimmediate_operand")
+	  (vec_select:V16QI
+	   (match_operand:V16QI 1 "nonimmediate_operand")
+	   (match_dup 2)))
+     (use (match_dup 3))])]
+  "TARGET_VX"
+{
+  rtvec vec = rtvec_alloc (16);
+  for (int i = 0; i < 16; ++i)
+    RTVEC_ELT (vec, i) = GEN_INT (15 - i);
+  operands[2] = gen_rtx_PARALLEL (VOIDmode, vec);
+  operands[3] = gen_rtx_CONST_VECTOR (V16QImode, vec);
+})
+
+(define_insn_and_split "*eltswapv16qi"
+  [(set (match_operand:V16QI  0 "nonimmediate_operand" "=v,^R,^v")
+	(vec_select:V16QI
+	 (match_operand:V16QI 1 "nonimmediate_operand"  "v,^v,^R")
+	 (parallel [(const_int 15)
+		    (const_int 14)
+		    (const_int 13)
+		    (const_int 12)
+		    (const_int 11)
+		    (const_int 10)
+		    (const_int 9)
+		    (const_int 8)
+		    (const_int 7)
+		    (const_int 6)
+		    (const_int 5)
+		    (const_int 4)
+		    (const_int 3)
+		    (const_int 2)
+		    (const_int 1)
+		    (const_int 0)])))
+   (use (match_operand:V16QI 2 "permute_pattern_operand" "v,X,X"))]
+  "TARGET_VX"
+  "@
+   #
+   vstbrq\t%v1,%0
+   vlbrq\t%v0,%1"
+  "&& reload_completed && REG_P (operands[0]) && REG_P (operands[1])"
+  [(set (match_dup 0)
+	(unspec:V16QI [(match_dup 1)
+		       (match_dup 1)
+		       (match_dup 2)]
+		      UNSPEC_VEC_PERM))]
+  ""
+  [(set_attr "cpu_facility" "*,vxe2,vxe2")
+   (set_attr "op_type" "*,VRX,VRX")])
+
+;; VECTOR REVERSE ELEMENTS V8HI
+
+(define_insn_and_split "eltswapv8hi"
+  [(set (match_operand:V8HI  0 "nonimmediate_operand" "=v,R,v")
+	(vec_select:V8HI
+	 (match_operand:V8HI 1 "nonimmediate_operand"  "v,v,R")
+	 (parallel [(const_int 7)
+		    (const_int 6)
+		    (const_int 5)
+		    (const_int 4)
+		    (const_int 3)
+		    (const_int 2)
+		    (const_int 1)
+		    (const_int 0)])))
+   (clobber (match_scratch:V2DI 2 "=&v,X,X"))
+   (clobber (match_scratch:V4SI 3 "=&v,X,X"))]
+  "TARGET_VX"
+  "@
+   #
+   vsterh\t%v1,%0
+   vlerh\t%v0,%1"
+  "&& reload_completed && REG_P (operands[0]) && REG_P (operands[1])"
+  [(set (match_dup 2)
+	(subreg:V2DI (match_dup 1) 0))
+   (set (match_dup 2)
+	(vec_select:V2DI
+	 (match_dup 2)
+	 (parallel [(const_int 1) (const_int 0)])))
+   (set (match_dup 2)
+	(rotate:V2DI
+	 (match_dup 2)
+	 (const_int 32)))
+   (set (match_dup 3)
+	(subreg:V4SI (match_dup 2) 0))
+   (set (match_dup 3)
+	(rotate:V4SI
+	 (match_dup 3)
+	 (const_int 16)))
+   (set (match_dup 0)
+	(subreg:V8HI (match_dup 3) 0))]
+  ""
+  [(set_attr "cpu_facility" "*,vxe2,vxe2")
+   (set_attr "op_type" "*,VRX,VRX")])
+
+;; VECTOR REVERSE ELEMENTS V4SI / V4SF
+
+(define_insn_and_split "eltswap<mode>"
+  [(set (match_operand:V_HW_4  0 "nonimmediate_operand" "=v,R,v")
+	(vec_select:V_HW_4
+	 (match_operand:V_HW_4 1 "nonimmediate_operand"  "v,v,R")
+	 (parallel [(const_int 3)
+		    (const_int 2)
+		    (const_int 1)
+		    (const_int 0)])))
+   (clobber (match_scratch:V2DI 2 "=&v,X,X"))]
+  "TARGET_VX"
+  "@
+   #
+   vsterf\t%v1,%0
+   vlerf\t%v0,%1"
+  "&& reload_completed && REG_P (operands[0]) && REG_P (operands[1])"
+  [(set (match_dup 2)
+	(subreg:V2DI (match_dup 1) 0))
+   (set (match_dup 2)
+	(vec_select:V2DI
+	 (match_dup 2)
+	 (parallel [(const_int 1) (const_int 0)])))
+   (set (match_dup 2)
+	(rotate:V2DI
+	 (match_dup 2)
+	 (const_int 32)))
+   (set (match_dup 0)
+	(subreg:V_HW_4 (match_dup 2) 0))]
+  ""
+  [(set_attr "cpu_facility" "*,vxe2,vxe2")
+   (set_attr "op_type" "*,VRX,VRX")])
+
+;; VECTOR REVERSE ELEMENTS V2DI / V2DF
+
+(define_insn "eltswap<mode>"
+  [(set (match_operand:V_HW_2  0 "nonimmediate_operand" "=v,R,v")
+	(vec_select:V_HW_2
+	 (match_operand:V_HW_2 1 "nonimmediate_operand"  "v,v,R")
+	 (parallel [(const_int 1)
+		    (const_int 0)])))]
+  "TARGET_VX"
+  "@
+   vpdi\t%v0,%v1,%v1,4
+   vsterg\t%v1,%0
+   vlerg\t%v0,%1"
+  [(set_attr "cpu_facility" "vx,vxe2,vxe2")
+   (set_attr "op_type" "VRR,VRX,VRX")])
 
 ;;
 ;; Vector integer arithmetic instructions
diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md
index 10eae76777f..6f42c91e8ae 100644
--- a/gcc/config/s390/vx-builtins.md
+++ b/gcc/config/s390/vx-builtins.md
@@ -2163,149 +2163,6 @@
   "<vw>fmax<sdx>b\t%v0,%v1,%v2,%b3"
   [(set_attr "op_type" "VRR")])
 
-; The element reversal builtins introduced with z15 have been made
-; available also for older CPUs down to z13.
-(define_expand "eltswap<mode>"
-  [(set (match_operand:VEC_HW                 0 "nonimmediate_operand" "")
-	(unspec:VEC_HW [(match_operand:VEC_HW 1 "nonimmediate_operand" "")]
-		       UNSPEC_VEC_ELTSWAP))]
-  "TARGET_VX")
-
-; The byte element reversal is implemented as 128 bit byte swap.
-; Alternatively this could be emitted as bswap:V1TI but the required
-; subregs appear to confuse combine.
-(define_insn "*eltswapv16qi"
-  [(set (match_operand:V16QI                0 "nonimmediate_operand" "=v,v,R")
-	(unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand"  "v,R,v")]
-		      UNSPEC_VEC_ELTSWAP))]
-  "TARGET_VXE2"
-  "@
-   #
-   vlbrq\t%v0,%v1
-   vstbrq\t%v1,%v0"
-  [(set_attr "op_type" "*,VRX,VRX")])
-
-; vlerh, vlerf, vlerg, vsterh, vsterf, vsterg
-(define_insn "*eltswap<mode>"
-  [(set (match_operand:V_HW_HSD                   0 "nonimmediate_operand" "=v,v,R")
-	(unspec:V_HW_HSD [(match_operand:V_HW_HSD 1 "nonimmediate_operand"  "v,R,v")]
-			 UNSPEC_VEC_ELTSWAP))]
-  "TARGET_VXE2"
-  "@
-   #
-   vler<bhfgq>\t%v0,%v1
-   vster<bhfgq>\t%v1,%v0"
-  [(set_attr "op_type" "*,VRX,VRX")])
-
-; The emulation pattern below will also accept
-;  vst (eltswap (vl))
-; i.e. both operands in memory, which reload needs to fix.
-; Split into
-;  vl
-;  vster (=vst (eltswap))
-; since we prefer vster over vler as long as the latter
-; does not support alignment hints.
-(define_split
-  [(set (match_operand:VEC_HW                 0 "memory_operand" "")
-	(unspec:VEC_HW [(match_operand:VEC_HW 1 "memory_operand" "")]
-		       UNSPEC_VEC_ELTSWAP))]
-  "TARGET_VXE2 && can_create_pseudo_p ()"
-  [(set (match_dup 2) (match_dup 1))
-   (set (match_dup 0)
-	(unspec:VEC_HW [(match_dup 2)] UNSPEC_VEC_ELTSWAP))]
-{
-  operands[2] = gen_reg_rtx (<MODE>mode);
-})
-
-
-; Swapping v2df/v2di can be done via vpdi on z13 and z14.
-(define_split
-  [(set (match_operand:V_HW_2                 0 "register_operand" "")
-	(unspec:V_HW_2 [(match_operand:V_HW_2 1 "register_operand" "")]
-		       UNSPEC_VEC_ELTSWAP))]
-  "TARGET_VX && can_create_pseudo_p ()"
-  [(set (match_operand:V_HW_2     0 "register_operand" "=v")
-	(vec_select:V_HW_2
-	 (vec_concat:<vec_2x_nelts>
-	  (match_operand:V_HW_2 1 "register_operand"  "v")
-	  (match_dup 1))
-	 (parallel [(const_int 1) (const_int 2)])))]
-)
-
-
-; Swapping v4df/v4si can be done via vpdi and rot.
-(define_split
-  [(set (match_operand:V_HW_4                 0 "register_operand" "")
-	(unspec:V_HW_4 [(match_operand:V_HW_4 1 "register_operand" "")]
-		       UNSPEC_VEC_ELTSWAP))]
-  "TARGET_VX && can_create_pseudo_p ()"
-  [(set (match_dup 2)
-	(vec_select:V_HW_4
-	 (vec_concat:<vec_2x_nelts>
-	  (match_dup 1)
-	  (match_dup 1))
-	 (parallel [(const_int 2) (const_int 3) (const_int 4) (const_int 5)])))
- (set (match_dup 3)
-  (subreg:V2DI (match_dup 2) 0))
- (set (match_dup 4)
-  (rotate:V2DI
-   (match_dup 3)
-   (const_int 32)))
- (set (match_operand:V_HW_4 0)
-  (subreg:V_HW_4 (match_dup 4) 0))]
-{
-  operands[2] = gen_reg_rtx (<MODE>mode);
-  operands[3] = gen_reg_rtx (V2DImode);
-  operands[4] = gen_reg_rtx (V2DImode);
-})
-
-; z15 has instructions for doing element reversal from mem to reg
-; or the other way around.  For reg to reg or on pre z15 machines
-; we have to emulate it with vector permute.
-(define_insn_and_split "*eltswap<mode>_emu"
-  [(set (match_operand:VEC_HW                 0 "nonimmediate_operand" "=vR")
-	(unspec:VEC_HW [(match_operand:VEC_HW 1 "nonimmediate_operand" "vR")]
-		       UNSPEC_VEC_ELTSWAP))]
-  "TARGET_VX && can_create_pseudo_p ()"
-  "#"
-  "&& ((!memory_operand (operands[0], <MODE>mode)
-        && !memory_operand (operands[1], <MODE>mode))
-       || !TARGET_VXE2)"
-  [(set (match_dup 3)
-	(unspec:V16QI [(match_dup 4)
-		       (match_dup 4)
-		       (match_dup 2)]
-		      UNSPEC_VEC_PERM))
-   (set (match_dup 0) (subreg:VEC_HW (match_dup 3) 0))]
-{
-  static char p[4][16] =
-    { { 15, 14, 13, 12, 11, 10, 9,  8,  7,  6,  5,  4,  3,  2,  1,  0 },   /* Q */
-      { 14, 15, 12, 13, 10, 11, 8,  9,  6,  7,  4,  5,  2,  3,  0,  1 },   /* H */
-      { 12, 13, 14, 15, 8,  9,  10, 11, 4,  5,  6,  7,  0,  1,  2,  3 },   /* S */
-      { 8,  9,  10, 11, 12, 13, 14, 15, 0,  1,  2,  3,  4,  5,  6,  7 } }; /* D */
-  char *perm;
-  rtx perm_rtx[16], constv;
-
-  switch (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)))
-    {
-    case 1: perm = p[0]; break;
-    case 2: perm = p[1]; break;
-    case 4: perm = p[2]; break;
-    case 8: perm = p[3]; break;
-    default: gcc_unreachable ();
-    }
-
-  for (int i = 0; i < 16; i++)
-    perm_rtx[i] = GEN_INT (perm[i]);
-
-  operands[1] = force_reg (<MODE>mode, operands[1]);
-  operands[2] = gen_reg_rtx (V16QImode);
-  operands[3] = gen_reg_rtx (V16QImode);
-  operands[4] = simplify_gen_subreg (V16QImode, operands[1], <MODE>mode, 0);
-  constv = force_const_mem (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm_rtx)));
-  emit_move_insn (operands[2], constv);
-})
-
 ; vec_insert (__builtin_bswap32 (*a), b, 1)        set-element-bswap-2.c
 ; b[1] = __builtin_bswap32 (*a)                    set-element-bswap-3.c
 ; vlebrh, vlebrf, vlebrg
diff --git a/gcc/testsuite/gcc.target/s390/vector/reverse-elements-1.c b/gcc/testsuite/gcc.target/s390/vector/reverse-elements-1.c
new file mode 100644
index 00000000000..4a2541b7ae6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/reverse-elements-1.c
@@ -0,0 +1,46 @@
+/* { dg-compile } */
+/* { dg-options "-O3 -mzarch -march=z13" } */
+/* { dg-require-effective-target s390_vx } */
+/* { dg-final { scan-assembler-times {\tvpdi\t} 4 } } */
+/* { dg-final { scan-assembler-not {\tvperm\t} } } */
+
+typedef short __attribute__ ((vector_size (16))) V8HI;
+typedef int __attribute__ ((vector_size (16))) V4SI;
+typedef long long __attribute__ ((vector_size (16))) V2DI;
+typedef double __attribute__ ((vector_size (16))) V2DF;
+
+V8HI
+v8hi (V8HI x)
+{
+  V8HI y;
+  for (int i = 0; i < 8; ++i)
+    y[i] = x[7 - i];
+  return y;
+}
+
+V4SI
+v4si (V4SI x)
+{
+  V4SI y;
+  for (int i = 0; i < 4; ++i)
+    y[i] = x[3 - i];
+  return y;
+}
+
+V2DI
+v2di (V2DI x)
+{
+  V2DI y;
+  for (int i = 0; i < 2; ++i)
+    y[i] = x[1 - i];
+  return y;
+}
+
+V2DF
+v2df (V2DF x)
+{
+  V2DF y;
+  for (int i = 0; i < 2; ++i)
+    y[i] = x[1 - i];
+  return y;
+}
diff --git a/gcc/testsuite/gcc.target/s390/vector/reverse-elements-2.c b/gcc/testsuite/gcc.target/s390/vector/reverse-elements-2.c
new file mode 100644
index 00000000000..ec0d1da7d57
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/reverse-elements-2.c
@@ -0,0 +1,16 @@
+/* { dg-compile } */
+/* { dg-options "-O3 -mzarch -march=z14" } */
+/* { dg-require-effective-target s390_vxe } */
+/* { dg-final { scan-assembler-times {\tvpdi\t} 1 } } */
+/* { dg-final { scan-assembler-not {\tvperm\t} } } */
+
+typedef float __attribute__ ((vector_size (16))) V4SF;
+
+V4SF
+v4sf (V4SF x)
+{
+  V4SF y;
+  for (int i = 0; i < 4; ++i)
+    y[i] = x[3 - i];
+  return y;
+}
diff --git a/gcc/testsuite/gcc.target/s390/vector/reverse-elements-3.c b/gcc/testsuite/gcc.target/s390/vector/reverse-elements-3.c
new file mode 100644
index 00000000000..3f69db8831c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/reverse-elements-3.c
@@ -0,0 +1,56 @@
+/* { dg-compile } */
+/* { dg-options "-O3 -mzarch -march=z14" } */
+/* { dg-require-effective-target s390_vxe } */
+/* { dg-final { scan-assembler-times {\tvpdi\t} 5 } } */
+/* { dg-final { scan-assembler-not {\tvperm\t} } } */
+
+typedef short __attribute__ ((vector_size (16))) V8HI;
+typedef int __attribute__ ((vector_size (16))) V4SI;
+typedef long long __attribute__ ((vector_size (16))) V2DI;
+typedef float __attribute__ ((vector_size (16))) V4SF;
+typedef double __attribute__ ((vector_size (16))) V2DF;
+
+V8HI
+v8hi (V8HI *x)
+{
+  V8HI y;
+  for (int i = 0; i < 8; ++i)
+    y[i] = (*x)[7 - i];
+  return y;
+}
+
+V4SI
+v4si (V4SI *x)
+{
+  V4SI y;
+  for (int i = 0; i < 4; ++i)
+    y[i] = (*x)[3 - i];
+  return y;
+}
+
+V2DI
+v2di (V2DI *x)
+{
+  V2DI y;
+  for (int i = 0; i < 2; ++i)
+    y[i] = (*x)[1 - i];
+  return y;
+}
+
+V4SF
+v4sf (V4SF *x)
+{
+  V4SF y;
+  for (int i = 0; i < 4; ++i)
+    y[i] = (*x)[3 - i];
+  return y;
+}
+
+V2DF
+v2df (V2DF *x)
+{
+  V2DF y;
+  for (int i = 0; i < 2; ++i)
+    y[i] = (*x)[1 - i];
+  return y;
+}
diff --git a/gcc/testsuite/gcc.target/s390/vector/reverse-elements-4.c b/gcc/testsuite/gcc.target/s390/vector/reverse-elements-4.c
new file mode 100644
index 00000000000..5027ed55f50
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/reverse-elements-4.c
@@ -0,0 +1,67 @@
+/* { dg-compile } */
+/* { dg-options "-O3 -mzarch -march=z15" } */
+/* { dg-require-effective-target s390_vxe2 } */
+/* { dg-final { scan-assembler-times {\tvlbrq\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tvler[hfg]\t} 5 } } */
+/* { dg-final { scan-assembler-not {\tvperm\t} } } */
+
+typedef signed char __attribute__ ((vector_size (16))) V16QI;
+typedef short __attribute__ ((vector_size (16))) V8HI;
+typedef int __attribute__ ((vector_size (16))) V4SI;
+typedef long long __attribute__ ((vector_size (16))) V2DI;
+typedef float __attribute__ ((vector_size (16))) V4SF;
+typedef double __attribute__ ((vector_size (16))) V2DF;
+
+V16QI
+v16qi (V16QI *x)
+{
+  V16QI y;
+  for (int i = 0; i < 16; ++i)
+    y[i] = (*x)[15 - i];
+  return y;
+}
+
+V8HI
+v8hi (V8HI *x)
+{
+  V8HI y;
+  for (int i = 0; i < 8; ++i)
+    y[i] = (*x)[7 - i];
+  return y;
+}
+
+V4SI
+v4si (V4SI *x)
+{
+  V4SI y;
+  for (int i = 0; i < 4; ++i)
+    y[i] = (*x)[3 - i];
+  return y;
+}
+
+V2DI
+v2di (V2DI *x)
+{
+  V2DI y;
+  for (int i = 0; i < 2; ++i)
+    y[i] = (*x)[1 - i];
+  return y;
+}
+
+V4SF
+v4sf (V4SF *x)
+{
+  V4SF y;
+  for (int i = 0; i < 4; ++i)
+    y[i] = (*x)[3 - i];
+  return y;
+}
+
+V2DF
+v2df (V2DF *x)
+{
+  V2DF y;
+  for (int i = 0; i < 2; ++i)
+    y[i] = (*x)[1 - i];
+  return y;
+}
diff --git a/gcc/testsuite/gcc.target/s390/vector/reverse-elements-5.c b/gcc/testsuite/gcc.target/s390/vector/reverse-elements-5.c
new file mode 100644
index 00000000000..8c250aa681b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/reverse-elements-5.c
@@ -0,0 +1,56 @@
+/* { dg-compile } */
+/* { dg-options "-O3 -mzarch -march=z14" } */
+/* { dg-require-effective-target s390_vxe } */
+/* { dg-final { scan-assembler-times {\tvpdi\t} 5 } } */
+/* { dg-final { scan-assembler-not {\tvperm\t} } } */
+
+typedef short __attribute__ ((vector_size (16))) V8HI;
+typedef int __attribute__ ((vector_size (16))) V4SI;
+typedef long long __attribute__ ((vector_size (16))) V2DI;
+typedef float __attribute__ ((vector_size (16))) V4SF;
+typedef double __attribute__ ((vector_size (16))) V2DF;
+
+void
+v8hi (V8HI *x, V8HI y)
+{
+  V8HI z;
+  for (int i = 0; i < 8; ++i)
+    z[i] = y[7 - i];
+  *x = z;
+}
+
+void
+v4si (V4SI *x, V4SI y)
+{
+  V4SI z;
+  for (int i = 0; i < 4; ++i)
+    z[i] = y[3 - i];
+  *x = z;
+}
+
+void
+v2di (V2DI *x, V2DI y)
+{
+  V2DI z;
+  for (int i = 0; i < 2; ++i)
+    z[i] = y[1 - i];
+  *x = z;
+}
+
+void
+v4sf (V4SF *x, V4SF y)
+{
+  V4SF z;
+  for (int i = 0; i < 4; ++i)
+    z[i] = y[3 - i];
+  *x = z;
+}
+
+void
+v2df (V2DF *x, V2DF y)
+{
+  V2DF z;
+  for (int i = 0; i < 2; ++i)
+    z[i] = y[1 - i];
+  *x = z;
+}
diff --git a/gcc/testsuite/gcc.target/s390/vector/reverse-elements-6.c b/gcc/testsuite/gcc.target/s390/vector/reverse-elements-6.c
new file mode 100644
index 00000000000..7e2b2356788
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/reverse-elements-6.c
@@ -0,0 +1,67 @@
+/* { dg-compile } */
+/* { dg-options "-O3 -mzarch -march=z15" } */
+/* { dg-require-effective-target s390_vxe2 } */
+/* { dg-final { scan-assembler-times {\tvstbrq\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tvster[hfg]\t} 5 } } */
+/* { dg-final { scan-assembler-not {\tvperm\t} } } */
+
+typedef signed char __attribute__ ((vector_size (16))) V16QI;
+typedef short __attribute__ ((vector_size (16))) V8HI;
+typedef int __attribute__ ((vector_size (16))) V4SI;
+typedef long long __attribute__ ((vector_size (16))) V2DI;
+typedef float __attribute__ ((vector_size (16))) V4SF;
+typedef double __attribute__ ((vector_size (16))) V2DF;
+
+void
+v16qi (V16QI *x, V16QI y)
+{
+  V16QI z;
+  for (int i = 0; i < 16; ++i)
+    z[i] = y[15 - i];
+  *x = z;
+}
+
+void
+v8hi (V8HI *x, V8HI y)
+{
+  V8HI z;
+  for (int i = 0; i < 8; ++i)
+    z[i] = y[7 - i];
+  *x = z;
+}
+
+void
+v4si (V4SI *x, V4SI y)
+{
+  V4SI z;
+  for (int i = 0; i < 4; ++i)
+    z[i] = y[3 - i];
+  *x = z;
+}
+
+void
+v2di (V2DI *x, V2DI y)
+{
+  V2DI z;
+  for (int i = 0; i < 2; ++i)
+    z[i] = y[1 - i];
+  *x = z;
+}
+
+void
+v4sf (V4SF *x, V4SF y)
+{
+  V4SF z;
+  for (int i = 0; i < 4; ++i)
+    z[i] = y[3 - i];
+  *x = z;
+}
+
+void
+v2df (V2DF *x, V2DF y)
+{
+  V2DF z;
+  for (int i = 0; i < 2; ++i)
+    z[i] = y[1 - i];
+  *x = z;
+}
diff --git a/gcc/testsuite/gcc.target/s390/vector/reverse-elements-7.c b/gcc/testsuite/gcc.target/s390/vector/reverse-elements-7.c
new file mode 100644
index 00000000000..046fcc0790a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/reverse-elements-7.c
@@ -0,0 +1,67 @@
+/* { dg-compile } */
+/* { dg-options "-O3 -mzarch -march=z15" } */
+/* { dg-require-effective-target s390_vxe2 } */
+/* { dg-final { scan-assembler-times {\tvstbrq\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tvster[hfg]\t} 5 } } */
+/* { dg-final { scan-assembler-not {\tvperm\t} } } */
+
+typedef signed char __attribute__ ((vector_size (16))) V16QI;
+typedef short __attribute__ ((vector_size (16))) V8HI;
+typedef int __attribute__ ((vector_size (16))) V4SI;
+typedef long long __attribute__ ((vector_size (16))) V2DI;
+typedef float __attribute__ ((vector_size (16))) V4SF;
+typedef double __attribute__ ((vector_size (16))) V2DF;
+
+void
+v16qi (V16QI *x, V16QI *y)
+{
+  V16QI z;
+  for (int i = 0; i < 16; ++i)
+    z[i] = (*y)[15 - i];
+  *x = z;
+}
+
+void
+v8hi (V8HI *x, V8HI *y)
+{
+  V8HI z;
+  for (int i = 0; i < 8; ++i)
+    z[i] = (*y)[7 - i];
+  *x = z;
+}
+
+void
+v4si (V4SI *x, V4SI *y)
+{
+  V4SI z;
+  for (int i = 0; i < 4; ++i)
+    z[i] = (*y)[3 - i];
+  *x = z;
+}
+
+void
+v2di (V2DI *x, V2DI *y)
+{
+  V2DI z;
+  for (int i = 0; i < 2; ++i)
+    z[i] = (*y)[1 - i];
+  *x = z;
+}
+
+void
+v4sf (V4SF *x, V4SF *y)
+{
+  V4SF z;
+  for (int i = 0; i < 4; ++i)
+    z[i] = (*y)[3 - i];
+  *x = z;
+}
+
+void
+v2df (V2DF *x, V2DF *y)
+{
+  V2DF z;
+  for (int i = 0; i < 2; ++i)
+    z[i] = (*y)[1 - i];
+  *x = z;
+}
diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-halfword-z14.c b/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-halfword-z14.c
index 4938ac20613..3c1e9338f80 100644
--- a/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-halfword-z14.c
+++ b/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-halfword-z14.c
@@ -21,4 +21,6 @@ baz (signed short *x)
   return vec_reve (vec_xl (0, x));
 }
 
-/* { dg-final { scan-assembler-times "vperm\t" 3 } } */
+/* { dg-final { scan-assembler-times "vpdi\t" 3 } } */
+/* { dg-final { scan-assembler-times "verllg\t" 3 } } */
+/* { dg-final { scan-assembler-times "verllf\t" 3 } } */
diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-halfword.c b/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-halfword.c
index 3c9229922ec..7b1c3f885cd 100644
--- a/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-halfword.c
+++ b/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-halfword.c
@@ -9,7 +9,9 @@ foo (vector signed short x)
   return vec_reve (x);
 }
 
-/* { dg-final { scan-assembler-times "vperm\t" 1 } } */
+/* { dg-final { scan-assembler-times "vpdi\t" 1 } } */
+/* { dg-final { scan-assembler-times "verllg\t" 1 } } */
+/* { dg-final { scan-assembler-times "verllf\t" 1 } } */
 
 
 vector signed short
-- 
2.41.0


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 1/3] s390: Recognize further vpdi and vmr{l,h} pattern
  2023-11-09  8:22 [PATCH 1/3] s390: Recognize further vpdi and vmr{l,h} pattern Stefan Schulze Frielinghaus
  2023-11-09  8:22 ` [PATCH 2/3] s390: Add expand_perm_reverse_elements Stefan Schulze Frielinghaus
  2023-11-09  8:22 ` [PATCH 3/3] s390: Revise vector reverse elements Stefan Schulze Frielinghaus
@ 2023-11-09  8:27 ` Andreas Krebbel
  2 siblings, 0 replies; 6+ messages in thread
From: Andreas Krebbel @ 2023-11-09  8:27 UTC (permalink / raw)
  To: Stefan Schulze Frielinghaus, gcc-patches

On 11/9/23 09:22, Stefan Schulze Frielinghaus wrote:
> Deal with cases where vpdi and vmr{l,h} are still applicable if the
> operands of those instructions are swapped.  For example, currently for
> 
> V2DI foo (V2DI x)
> {
>   return (V2DI) {x[1], x[0]};
> }
> 
> the assembler sequence
> 
> vlgvg   %r1,%v24,1
> vzero   %v0
> vlvgg   %v0,%r1,0
> vmrhg   %v24,%v0,%v24
> 
> is emitted.  With this patch a single vpdi is emitted.
> 
> Extensive tests are included in a subsequent patch of this series where
> more cases are covered.
> 
> Bootstrapped and regtested on s390.  Ok for mainline?
> 
> gcc/ChangeLog:
> 
> 	* config/s390/s390.cc (expand_perm_with_merge): Deal with cases
> 	where vmr{l,h} are still applicable if the operands are swapped.
> 	(expand_perm_with_vpdi): Likewise for vpdi.

Ok, Thanks!

Andreas


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 2/3] s390: Add expand_perm_reverse_elements
  2023-11-09  8:22 ` [PATCH 2/3] s390: Add expand_perm_reverse_elements Stefan Schulze Frielinghaus
@ 2023-11-09  8:27   ` Andreas Krebbel
  0 siblings, 0 replies; 6+ messages in thread
From: Andreas Krebbel @ 2023-11-09  8:27 UTC (permalink / raw)
  To: Stefan Schulze Frielinghaus, gcc-patches

On 11/9/23 09:22, Stefan Schulze Frielinghaus wrote:
> Replace expand_perm_with_rot, expand_perm_with_vster, and
> expand_perm_with_vstbrq with a general implementation
> expand_perm_reverse_elements.
> 
> Bootstrapped and regtested on s390.  Ok for mainline?
> 
> gcc/ChangeLog:
> 
> 	* config/s390/s390.cc (expand_perm_with_rot): Remove.
> 	(expand_perm_reverse_elements): New.
> 	(expand_perm_with_vster): Remove.
> 	(expand_perm_with_vstbrq): Remove.
> 	(vectorize_vec_perm_const_1): Replace removed functions with new
> 	one.

Ok, thanks!

Andreas


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH 3/3] s390: Revise vector reverse elements
  2023-11-09  8:22 ` [PATCH 3/3] s390: Revise vector reverse elements Stefan Schulze Frielinghaus
@ 2023-11-09  8:27   ` Andreas Krebbel
  0 siblings, 0 replies; 6+ messages in thread
From: Andreas Krebbel @ 2023-11-09  8:27 UTC (permalink / raw)
  To: Stefan Schulze Frielinghaus, gcc-patches

On 11/9/23 09:22, Stefan Schulze Frielinghaus wrote:
> Replace UNSPEC_VEC_ELTSWAP with a vec_select implementation.
> 
> Furthermore, for a vector reverse elements operation between registers
> of mode V8HI perform three rotates instead of a vperm operation since
> the latter involves loading the permutation vector from the literal
> pool.
> 
> Prior z15, instead of
>   larl + vl + vl + vperm
> prefer
>   vl + vpdi (+ verllg (+ verllf))
> for a load operation.
> 
> Likewise, prior z15, instead of
>   larl + vl + vperm + vst
> prefer
>   vpdi (+ verllg (+ verllf)) + vst
> for a store operation.
> 
> Bootstrapped and regtested on s390.  Ok for mainline?
> 
> gcc/ChangeLog:
> 
> 	* config/s390/s390.md: Remove UNSPEC_VEC_ELTSWAP.
> 	* config/s390/vector.md (eltswapv16qi): New expander.
> 	(*eltswapv16qi): New insn and splitter.
> 	(eltswapv8hi): New insn and splitter.
> 	(eltswap<mode>): New insn and splitter for modes V_HW_4 as well
> 	as V_HW_2.
> 	* config/s390/vx-builtins.md (eltswap<mode>): Remove.
> 	(*eltswapv16qi): Remove.
> 	(*eltswap<mode>): Remove.
> 	(*eltswap<mode>_emu): Remove.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/s390/zvector/vec-reve-load-halfword-z14.c: Remove
> 	vperm and substitude by vpdi et al.
> 	* gcc.target/s390/zvector/vec-reve-load-halfword.c: Likewise.
> 	* gcc.target/s390/vector/reverse-elements-1.c: New test.
> 	* gcc.target/s390/vector/reverse-elements-2.c: New test.
> 	* gcc.target/s390/vector/reverse-elements-3.c: New test.
> 	* gcc.target/s390/vector/reverse-elements-4.c: New test.
> 	* gcc.target/s390/vector/reverse-elements-5.c: New test.
> 	* gcc.target/s390/vector/reverse-elements-6.c: New test.
> 	* gcc.target/s390/vector/reverse-elements-7.c: New test.

Ok, thanks!

Andreas


^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2023-11-09  8:28 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-09  8:22 [PATCH 1/3] s390: Recognize further vpdi and vmr{l,h} pattern Stefan Schulze Frielinghaus
2023-11-09  8:22 ` [PATCH 2/3] s390: Add expand_perm_reverse_elements Stefan Schulze Frielinghaus
2023-11-09  8:27   ` Andreas Krebbel
2023-11-09  8:22 ` [PATCH 3/3] s390: Revise vector reverse elements Stefan Schulze Frielinghaus
2023-11-09  8:27   ` Andreas Krebbel
2023-11-09  8:27 ` [PATCH 1/3] s390: Recognize further vpdi and vmr{l,h} pattern Andreas Krebbel

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).