* [PATCH 1/3] s390: Recognize further vpdi and vmr{l,h} pattern
@ 2023-11-09 8:22 Stefan Schulze Frielinghaus
2023-11-09 8:22 ` [PATCH 2/3] s390: Add expand_perm_reverse_elements Stefan Schulze Frielinghaus
` (2 more replies)
0 siblings, 3 replies; 6+ messages in thread
From: Stefan Schulze Frielinghaus @ 2023-11-09 8:22 UTC (permalink / raw)
To: krebbel, gcc-patches; +Cc: Stefan Schulze Frielinghaus
Deal with cases where vpdi and vmr{l,h} are still applicable if the
operands of those instructions are swapped. For example, currently for
V2DI foo (V2DI x)
{
return (V2DI) {x[1], x[0]};
}
the assembler sequence
vlgvg %r1,%v24,1
vzero %v0
vlvgg %v0,%r1,0
vmrhg %v24,%v0,%v24
is emitted. With this patch a single vpdi is emitted.
Extensive tests are included in a subsequent patch of this series where
more cases are covered.
Bootstrapped and regtested on s390. Ok for mainline?
gcc/ChangeLog:
* config/s390/s390.cc (expand_perm_with_merge): Deal with cases
where vmr{l,h} are still applicable if the operands are swapped.
(expand_perm_with_vpdi): Likewise for vpdi.
---
gcc/config/s390/s390.cc | 118 ++++++++++++++++++++++++++++++----------
1 file changed, 90 insertions(+), 28 deletions(-)
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index 64f56d8effa..185eb59f8b8 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -17532,40 +17532,86 @@ struct expand_vec_perm_d
static bool
expand_perm_with_merge (const struct expand_vec_perm_d &d)
{
- bool merge_lo_p = true;
- bool merge_hi_p = true;
-
- if (d.nelt % 2)
+ static const unsigned char hi_perm_di[2] = {0, 2};
+ static const unsigned char hi_perm_si[4] = {0, 4, 1, 5};
+ static const unsigned char hi_perm_hi[8] = {0, 8, 1, 9, 2, 10, 3, 11};
+ static const unsigned char hi_perm_qi[16]
+ = {0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23};
+
+ static const unsigned char hi_perm_di_swap[2] = {2, 0};
+ static const unsigned char hi_perm_si_swap[4] = {4, 0, 6, 2};
+ static const unsigned char hi_perm_hi_swap[8] = {8, 0, 10, 2, 12, 4, 14, 6};
+ static const unsigned char hi_perm_qi_swap[16]
+ = {16, 0, 18, 2, 20, 4, 22, 6, 24, 8, 26, 10, 28, 12, 30, 14};
+
+ static const unsigned char lo_perm_di[2] = {1, 3};
+ static const unsigned char lo_perm_si[4] = {2, 6, 3, 7};
+ static const unsigned char lo_perm_hi[8] = {4, 12, 5, 13, 6, 14, 7, 15};
+ static const unsigned char lo_perm_qi[16]
+ = {8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31};
+
+ static const unsigned char lo_perm_di_swap[2] = {3, 1};
+ static const unsigned char lo_perm_si_swap[4] = {5, 1, 7, 3};
+ static const unsigned char lo_perm_hi_swap[8] = {9, 1, 11, 3, 13, 5, 15, 7};
+ static const unsigned char lo_perm_qi_swap[16]
+ = {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15};
+
+ bool merge_lo_p = false;
+ bool merge_hi_p = false;
+ bool swap_operands_p = false;
+
+ if ((d.nelt == 2 && memcmp (d.perm, hi_perm_di, 2) == 0)
+ || (d.nelt == 4 && memcmp (d.perm, hi_perm_si, 4) == 0)
+ || (d.nelt == 8 && memcmp (d.perm, hi_perm_hi, 8) == 0)
+ || (d.nelt == 16 && memcmp (d.perm, hi_perm_qi, 16) == 0))
+ {
+ merge_hi_p = true;
+ }
+ else if ((d.nelt == 2 && memcmp (d.perm, hi_perm_di_swap, 2) == 0)
+ || (d.nelt == 4 && memcmp (d.perm, hi_perm_si_swap, 4) == 0)
+ || (d.nelt == 8 && memcmp (d.perm, hi_perm_hi_swap, 8) == 0)
+ || (d.nelt == 16 && memcmp (d.perm, hi_perm_qi_swap, 16) == 0))
+ {
+ merge_hi_p = true;
+ swap_operands_p = true;
+ }
+ else if ((d.nelt == 2 && memcmp (d.perm, lo_perm_di, 2) == 0)
+ || (d.nelt == 4 && memcmp (d.perm, lo_perm_si, 4) == 0)
+ || (d.nelt == 8 && memcmp (d.perm, lo_perm_hi, 8) == 0)
+ || (d.nelt == 16 && memcmp (d.perm, lo_perm_qi, 16) == 0))
+ {
+ merge_lo_p = true;
+ }
+ else if ((d.nelt == 2 && memcmp (d.perm, lo_perm_di_swap, 2) == 0)
+ || (d.nelt == 4 && memcmp (d.perm, lo_perm_si_swap, 4) == 0)
+ || (d.nelt == 8 && memcmp (d.perm, lo_perm_hi_swap, 8) == 0)
+ || (d.nelt == 16 && memcmp (d.perm, lo_perm_qi_swap, 16) == 0))
+ {
+ merge_lo_p = true;
+ swap_operands_p = true;
+ }
+
+ if (!merge_lo_p && !merge_hi_p)
return false;
- // For V4SI this checks for: { 0, 4, 1, 5 }
- for (int telt = 0; telt < d.nelt; telt++)
- if (d.perm[telt] != telt / 2 + (telt % 2) * d.nelt)
- {
- merge_hi_p = false;
- break;
- }
+ if (d.testing_p)
+ return merge_lo_p || merge_hi_p;
- if (!merge_hi_p)
+ rtx op0, op1;
+ if (swap_operands_p)
{
- // For V4SI this checks for: { 2, 6, 3, 7 }
- for (int telt = 0; telt < d.nelt; telt++)
- if (d.perm[telt] != (telt + d.nelt) / 2 + (telt % 2) * d.nelt)
- {
- merge_lo_p = false;
- break;
- }
+ op0 = d.op1;
+ op1 = d.op0;
}
else
- merge_lo_p = false;
-
- if (d.testing_p)
- return merge_lo_p || merge_hi_p;
+ {
+ op0 = d.op0;
+ op1 = d.op1;
+ }
- if (merge_lo_p || merge_hi_p)
- s390_expand_merge (d.target, d.op0, d.op1, merge_hi_p);
+ s390_expand_merge (d.target, op0, op1, merge_hi_p);
- return merge_lo_p || merge_hi_p;
+ return true;
}
/* Try to expand the vector permute operation described by D using the
@@ -17582,6 +17628,7 @@ expand_perm_with_vpdi (const struct expand_vec_perm_d &d)
{
bool vpdi1_p = false;
bool vpdi4_p = false;
+ bool swap_operands_p = false;
rtx op0_reg, op1_reg;
// Only V2DI and V2DF are supported here.
@@ -17590,11 +17637,20 @@ expand_perm_with_vpdi (const struct expand_vec_perm_d &d)
if (d.perm[0] == 0 && d.perm[1] == 3)
vpdi1_p = true;
-
- if ((d.perm[0] == 1 && d.perm[1] == 2)
+ else if (d.perm[0] == 2 && d.perm[1] == 1)
+ {
+ vpdi1_p = true;
+ swap_operands_p = true;
+ }
+ else if ((d.perm[0] == 1 && d.perm[1] == 2)
|| (d.perm[0] == 1 && d.perm[1] == 0)
|| (d.perm[0] == 3 && d.perm[1] == 2))
vpdi4_p = true;
+ else if (d.perm[0] == 3 && d.perm[1] == 0)
+ {
+ vpdi4_p = true;
+ swap_operands_p = true;
+ }
if (!vpdi1_p && !vpdi4_p)
return false;
@@ -17611,6 +17667,12 @@ expand_perm_with_vpdi (const struct expand_vec_perm_d &d)
op1_reg = op0_reg;
else if (d.only_op1)
op0_reg = op1_reg;
+ else if (swap_operands_p)
+ {
+ rtx tmp = op0_reg;
+ op0_reg = op1_reg;
+ op1_reg = tmp;
+ }
if (vpdi1_p)
emit_insn (gen_vpdi1 (d.vmode, d.target, op0_reg, op1_reg));
--
2.41.0
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH 2/3] s390: Add expand_perm_reverse_elements
2023-11-09 8:22 [PATCH 1/3] s390: Recognize further vpdi and vmr{l,h} pattern Stefan Schulze Frielinghaus
@ 2023-11-09 8:22 ` Stefan Schulze Frielinghaus
2023-11-09 8:27 ` Andreas Krebbel
2023-11-09 8:22 ` [PATCH 3/3] s390: Revise vector reverse elements Stefan Schulze Frielinghaus
2023-11-09 8:27 ` [PATCH 1/3] s390: Recognize further vpdi and vmr{l,h} pattern Andreas Krebbel
2 siblings, 1 reply; 6+ messages in thread
From: Stefan Schulze Frielinghaus @ 2023-11-09 8:22 UTC (permalink / raw)
To: krebbel, gcc-patches; +Cc: Stefan Schulze Frielinghaus
Replace expand_perm_with_rot, expand_perm_with_vster, and
expand_perm_with_vstbrq with a general implementation
expand_perm_reverse_elements.
Bootstrapped and regtested on s390. Ok for mainline?
gcc/ChangeLog:
* config/s390/s390.cc (expand_perm_with_rot): Remove.
(expand_perm_reverse_elements): New.
(expand_perm_with_vster): Remove.
(expand_perm_with_vstbrq): Remove.
(vectorize_vec_perm_const_1): Replace removed functions with new
one.
---
gcc/config/s390/s390.cc | 88 ++++++++---------------------------------
1 file changed, 16 insertions(+), 72 deletions(-)
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index 185eb59f8b8..e36efec8ddc 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -17693,78 +17693,28 @@ is_reverse_perm_mask (const struct expand_vec_perm_d &d)
return true;
}
-/* The case of reversing a four-element vector [0, 1, 2, 3]
- can be handled by first permuting the doublewords
- [2, 3, 0, 1] and subsequently rotating them by 32 bits. */
static bool
-expand_perm_with_rot (const struct expand_vec_perm_d &d)
+expand_perm_reverse_elements (const struct expand_vec_perm_d &d)
{
- if (d.nelt != 4)
+ if (d.op0 != d.op1 || !is_reverse_perm_mask (d))
return false;
- if (d.op0 == d.op1 && is_reverse_perm_mask (d))
- {
- if (d.testing_p)
- return true;
-
- rtx tmp = gen_reg_rtx (d.vmode);
- rtx op0_reg = force_reg (GET_MODE (d.op0), d.op0);
-
- emit_insn (gen_vpdi4_2 (d.vmode, tmp, op0_reg, op0_reg));
- if (d.vmode == V4SImode)
- emit_insn (gen_rotlv4si3_di (d.target, tmp));
- else if (d.vmode == V4SFmode)
- emit_insn (gen_rotlv4sf3_di (d.target, tmp));
-
- return true;
- }
-
- return false;
-}
+ if (d.testing_p)
+ return true;
-/* If we just reverse the elements, emit an eltswap if we have
- vler/vster. */
-static bool
-expand_perm_with_vster (const struct expand_vec_perm_d &d)
-{
- if (TARGET_VXE2 && d.op0 == d.op1 && is_reverse_perm_mask (d)
- && (d.vmode == V2DImode || d.vmode == V2DFmode
- || d.vmode == V4SImode || d.vmode == V4SFmode
- || d.vmode == V8HImode))
+ switch (d.vmode)
{
- if (d.testing_p)
- return true;
-
- if (d.vmode == V2DImode)
- emit_insn (gen_eltswapv2di (d.target, d.op0));
- else if (d.vmode == V2DFmode)
- emit_insn (gen_eltswapv2df (d.target, d.op0));
- else if (d.vmode == V4SImode)
- emit_insn (gen_eltswapv4si (d.target, d.op0));
- else if (d.vmode == V4SFmode)
- emit_insn (gen_eltswapv4sf (d.target, d.op0));
- else if (d.vmode == V8HImode)
- emit_insn (gen_eltswapv8hi (d.target, d.op0));
- return true;
+ case V1TImode: emit_move_insn (d.target, d.op0); break;
+ case V2DImode: emit_insn (gen_eltswapv2di (d.target, d.op0)); break;
+ case V4SImode: emit_insn (gen_eltswapv4si (d.target, d.op0)); break;
+ case V8HImode: emit_insn (gen_eltswapv8hi (d.target, d.op0)); break;
+ case V16QImode: emit_insn (gen_eltswapv16qi (d.target, d.op0)); break;
+ case V2DFmode: emit_insn (gen_eltswapv2df (d.target, d.op0)); break;
+ case V4SFmode: emit_insn (gen_eltswapv4sf (d.target, d.op0)); break;
+ default: gcc_unreachable();
}
- return false;
-}
-/* If we reverse a byte-vector this is the same as
- byte reversing it which can be done with vstbrq. */
-static bool
-expand_perm_with_vstbrq (const struct expand_vec_perm_d &d)
-{
- if (TARGET_VXE2 && d.op0 == d.op1 && is_reverse_perm_mask (d)
- && d.vmode == V16QImode)
- {
- if (d.testing_p)
- return true;
-
- emit_insn (gen_eltswapv16qi (d.target, d.op0));
- return true;
- }
- return false;
+ return true;
}
/* Try to emit vlbr/vstbr. Note, this is only a candidate insn since
@@ -17826,21 +17776,15 @@ expand_perm_as_a_vlbr_vstbr_candidate (const struct expand_vec_perm_d &d)
static bool
vectorize_vec_perm_const_1 (const struct expand_vec_perm_d &d)
{
- if (expand_perm_with_merge (d))
- return true;
-
- if (expand_perm_with_vster (d))
+ if (expand_perm_reverse_elements (d))
return true;
- if (expand_perm_with_vstbrq (d))
+ if (expand_perm_with_merge (d))
return true;
if (expand_perm_with_vpdi (d))
return true;
- if (expand_perm_with_rot (d))
- return true;
-
if (expand_perm_as_a_vlbr_vstbr_candidate (d))
return true;
--
2.41.0
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH 3/3] s390: Revise vector reverse elements
2023-11-09 8:22 [PATCH 1/3] s390: Recognize further vpdi and vmr{l,h} pattern Stefan Schulze Frielinghaus
2023-11-09 8:22 ` [PATCH 2/3] s390: Add expand_perm_reverse_elements Stefan Schulze Frielinghaus
@ 2023-11-09 8:22 ` Stefan Schulze Frielinghaus
2023-11-09 8:27 ` Andreas Krebbel
2023-11-09 8:27 ` [PATCH 1/3] s390: Recognize further vpdi and vmr{l,h} pattern Andreas Krebbel
2 siblings, 1 reply; 6+ messages in thread
From: Stefan Schulze Frielinghaus @ 2023-11-09 8:22 UTC (permalink / raw)
To: krebbel, gcc-patches; +Cc: Stefan Schulze Frielinghaus
Replace UNSPEC_VEC_ELTSWAP with a vec_select implementation.
Furthermore, for a vector reverse elements operation between registers
of mode V8HI perform three rotates instead of a vperm operation since
the latter involves loading the permutation vector from the literal
pool.
Prior z15, instead of
larl + vl + vl + vperm
prefer
vl + vpdi (+ verllg (+ verllf))
for a load operation.
Likewise, prior z15, instead of
larl + vl + vperm + vst
prefer
vpdi (+ verllg (+ verllf)) + vst
for a store operation.
Bootstrapped and regtested on s390. Ok for mainline?
gcc/ChangeLog:
* config/s390/s390.md: Remove UNSPEC_VEC_ELTSWAP.
* config/s390/vector.md (eltswapv16qi): New expander.
(*eltswapv16qi): New insn and splitter.
(eltswapv8hi): New insn and splitter.
(eltswap<mode>): New insn and splitter for modes V_HW_4 as well
as V_HW_2.
* config/s390/vx-builtins.md (eltswap<mode>): Remove.
(*eltswapv16qi): Remove.
(*eltswap<mode>): Remove.
(*eltswap<mode>_emu): Remove.
gcc/testsuite/ChangeLog:
* gcc.target/s390/zvector/vec-reve-load-halfword-z14.c: Remove
vperm and substitude by vpdi et al.
* gcc.target/s390/zvector/vec-reve-load-halfword.c: Likewise.
* gcc.target/s390/vector/reverse-elements-1.c: New test.
* gcc.target/s390/vector/reverse-elements-2.c: New test.
* gcc.target/s390/vector/reverse-elements-3.c: New test.
* gcc.target/s390/vector/reverse-elements-4.c: New test.
* gcc.target/s390/vector/reverse-elements-5.c: New test.
* gcc.target/s390/vector/reverse-elements-6.c: New test.
* gcc.target/s390/vector/reverse-elements-7.c: New test.
---
gcc/config/s390/s390.md | 2 -
gcc/config/s390/vector.md | 146 ++++++++++++++++++
gcc/config/s390/vx-builtins.md | 143 -----------------
.../s390/vector/reverse-elements-1.c | 46 ++++++
.../s390/vector/reverse-elements-2.c | 16 ++
.../s390/vector/reverse-elements-3.c | 56 +++++++
.../s390/vector/reverse-elements-4.c | 67 ++++++++
.../s390/vector/reverse-elements-5.c | 56 +++++++
.../s390/vector/reverse-elements-6.c | 67 ++++++++
.../s390/vector/reverse-elements-7.c | 67 ++++++++
.../s390/zvector/vec-reve-load-halfword-z14.c | 4 +-
.../s390/zvector/vec-reve-load-halfword.c | 4 +-
12 files changed, 527 insertions(+), 147 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/s390/vector/reverse-elements-1.c
create mode 100644 gcc/testsuite/gcc.target/s390/vector/reverse-elements-2.c
create mode 100644 gcc/testsuite/gcc.target/s390/vector/reverse-elements-3.c
create mode 100644 gcc/testsuite/gcc.target/s390/vector/reverse-elements-4.c
create mode 100644 gcc/testsuite/gcc.target/s390/vector/reverse-elements-5.c
create mode 100644 gcc/testsuite/gcc.target/s390/vector/reverse-elements-6.c
create mode 100644 gcc/testsuite/gcc.target/s390/vector/reverse-elements-7.c
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 3f29ba21442..f5e559c1ba4 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -241,8 +241,6 @@
UNSPEC_VEC_VFMIN
UNSPEC_VEC_VFMAX
- UNSPEC_VEC_ELTSWAP
-
UNSPEC_NNPA_VCLFNHS_V8HI
UNSPEC_NNPA_VCLFNLS_V8HI
UNSPEC_NNPA_VCRNFS_V8HI
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index 7d1eb36e844..c478fce09df 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -948,6 +948,152 @@
operands[5] = simplify_gen_subreg (DFmode, operands[1], TFmode, 8);
})
+;; VECTOR REVERSE ELEMENTS V16QI
+
+(define_expand "eltswapv16qi"
+ [(parallel
+ [(set (match_operand:V16QI 0 "nonimmediate_operand")
+ (vec_select:V16QI
+ (match_operand:V16QI 1 "nonimmediate_operand")
+ (match_dup 2)))
+ (use (match_dup 3))])]
+ "TARGET_VX"
+{
+ rtvec vec = rtvec_alloc (16);
+ for (int i = 0; i < 16; ++i)
+ RTVEC_ELT (vec, i) = GEN_INT (15 - i);
+ operands[2] = gen_rtx_PARALLEL (VOIDmode, vec);
+ operands[3] = gen_rtx_CONST_VECTOR (V16QImode, vec);
+})
+
+(define_insn_and_split "*eltswapv16qi"
+ [(set (match_operand:V16QI 0 "nonimmediate_operand" "=v,^R,^v")
+ (vec_select:V16QI
+ (match_operand:V16QI 1 "nonimmediate_operand" "v,^v,^R")
+ (parallel [(const_int 15)
+ (const_int 14)
+ (const_int 13)
+ (const_int 12)
+ (const_int 11)
+ (const_int 10)
+ (const_int 9)
+ (const_int 8)
+ (const_int 7)
+ (const_int 6)
+ (const_int 5)
+ (const_int 4)
+ (const_int 3)
+ (const_int 2)
+ (const_int 1)
+ (const_int 0)])))
+ (use (match_operand:V16QI 2 "permute_pattern_operand" "v,X,X"))]
+ "TARGET_VX"
+ "@
+ #
+ vstbrq\t%v1,%0
+ vlbrq\t%v0,%1"
+ "&& reload_completed && REG_P (operands[0]) && REG_P (operands[1])"
+ [(set (match_dup 0)
+ (unspec:V16QI [(match_dup 1)
+ (match_dup 1)
+ (match_dup 2)]
+ UNSPEC_VEC_PERM))]
+ ""
+ [(set_attr "cpu_facility" "*,vxe2,vxe2")
+ (set_attr "op_type" "*,VRX,VRX")])
+
+;; VECTOR REVERSE ELEMENTS V8HI
+
+(define_insn_and_split "eltswapv8hi"
+ [(set (match_operand:V8HI 0 "nonimmediate_operand" "=v,R,v")
+ (vec_select:V8HI
+ (match_operand:V8HI 1 "nonimmediate_operand" "v,v,R")
+ (parallel [(const_int 7)
+ (const_int 6)
+ (const_int 5)
+ (const_int 4)
+ (const_int 3)
+ (const_int 2)
+ (const_int 1)
+ (const_int 0)])))
+ (clobber (match_scratch:V2DI 2 "=&v,X,X"))
+ (clobber (match_scratch:V4SI 3 "=&v,X,X"))]
+ "TARGET_VX"
+ "@
+ #
+ vsterh\t%v1,%0
+ vlerh\t%v0,%1"
+ "&& reload_completed && REG_P (operands[0]) && REG_P (operands[1])"
+ [(set (match_dup 2)
+ (subreg:V2DI (match_dup 1) 0))
+ (set (match_dup 2)
+ (vec_select:V2DI
+ (match_dup 2)
+ (parallel [(const_int 1) (const_int 0)])))
+ (set (match_dup 2)
+ (rotate:V2DI
+ (match_dup 2)
+ (const_int 32)))
+ (set (match_dup 3)
+ (subreg:V4SI (match_dup 2) 0))
+ (set (match_dup 3)
+ (rotate:V4SI
+ (match_dup 3)
+ (const_int 16)))
+ (set (match_dup 0)
+ (subreg:V8HI (match_dup 3) 0))]
+ ""
+ [(set_attr "cpu_facility" "*,vxe2,vxe2")
+ (set_attr "op_type" "*,VRX,VRX")])
+
+;; VECTOR REVERSE ELEMENTS V4SI / V4SF
+
+(define_insn_and_split "eltswap<mode>"
+ [(set (match_operand:V_HW_4 0 "nonimmediate_operand" "=v,R,v")
+ (vec_select:V_HW_4
+ (match_operand:V_HW_4 1 "nonimmediate_operand" "v,v,R")
+ (parallel [(const_int 3)
+ (const_int 2)
+ (const_int 1)
+ (const_int 0)])))
+ (clobber (match_scratch:V2DI 2 "=&v,X,X"))]
+ "TARGET_VX"
+ "@
+ #
+ vsterf\t%v1,%0
+ vlerf\t%v0,%1"
+ "&& reload_completed && REG_P (operands[0]) && REG_P (operands[1])"
+ [(set (match_dup 2)
+ (subreg:V2DI (match_dup 1) 0))
+ (set (match_dup 2)
+ (vec_select:V2DI
+ (match_dup 2)
+ (parallel [(const_int 1) (const_int 0)])))
+ (set (match_dup 2)
+ (rotate:V2DI
+ (match_dup 2)
+ (const_int 32)))
+ (set (match_dup 0)
+ (subreg:V_HW_4 (match_dup 2) 0))]
+ ""
+ [(set_attr "cpu_facility" "*,vxe2,vxe2")
+ (set_attr "op_type" "*,VRX,VRX")])
+
+;; VECTOR REVERSE ELEMENTS V2DI / V2DF
+
+(define_insn "eltswap<mode>"
+ [(set (match_operand:V_HW_2 0 "nonimmediate_operand" "=v,R,v")
+ (vec_select:V_HW_2
+ (match_operand:V_HW_2 1 "nonimmediate_operand" "v,v,R")
+ (parallel [(const_int 1)
+ (const_int 0)])))]
+ "TARGET_VX"
+ "@
+ vpdi\t%v0,%v1,%v1,4
+ vsterg\t%v1,%0
+ vlerg\t%v0,%1"
+ [(set_attr "cpu_facility" "vx,vxe2,vxe2")
+ (set_attr "op_type" "VRR,VRX,VRX")])
;;
;; Vector integer arithmetic instructions
diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md
index 10eae76777f..6f42c91e8ae 100644
--- a/gcc/config/s390/vx-builtins.md
+++ b/gcc/config/s390/vx-builtins.md
@@ -2163,149 +2163,6 @@
"<vw>fmax<sdx>b\t%v0,%v1,%v2,%b3"
[(set_attr "op_type" "VRR")])
-; The element reversal builtins introduced with z15 have been made
-; available also for older CPUs down to z13.
-(define_expand "eltswap<mode>"
- [(set (match_operand:VEC_HW 0 "nonimmediate_operand" "")
- (unspec:VEC_HW [(match_operand:VEC_HW 1 "nonimmediate_operand" "")]
- UNSPEC_VEC_ELTSWAP))]
- "TARGET_VX")
-
-; The byte element reversal is implemented as 128 bit byte swap.
-; Alternatively this could be emitted as bswap:V1TI but the required
-; subregs appear to confuse combine.
-(define_insn "*eltswapv16qi"
- [(set (match_operand:V16QI 0 "nonimmediate_operand" "=v,v,R")
- (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "v,R,v")]
- UNSPEC_VEC_ELTSWAP))]
- "TARGET_VXE2"
- "@
- #
- vlbrq\t%v0,%v1
- vstbrq\t%v1,%v0"
- [(set_attr "op_type" "*,VRX,VRX")])
-
-; vlerh, vlerf, vlerg, vsterh, vsterf, vsterg
-(define_insn "*eltswap<mode>"
- [(set (match_operand:V_HW_HSD 0 "nonimmediate_operand" "=v,v,R")
- (unspec:V_HW_HSD [(match_operand:V_HW_HSD 1 "nonimmediate_operand" "v,R,v")]
- UNSPEC_VEC_ELTSWAP))]
- "TARGET_VXE2"
- "@
- #
- vler<bhfgq>\t%v0,%v1
- vster<bhfgq>\t%v1,%v0"
- [(set_attr "op_type" "*,VRX,VRX")])
-
-; The emulation pattern below will also accept
-; vst (eltswap (vl))
-; i.e. both operands in memory, which reload needs to fix.
-; Split into
-; vl
-; vster (=vst (eltswap))
-; since we prefer vster over vler as long as the latter
-; does not support alignment hints.
-(define_split
- [(set (match_operand:VEC_HW 0 "memory_operand" "")
- (unspec:VEC_HW [(match_operand:VEC_HW 1 "memory_operand" "")]
- UNSPEC_VEC_ELTSWAP))]
- "TARGET_VXE2 && can_create_pseudo_p ()"
- [(set (match_dup 2) (match_dup 1))
- (set (match_dup 0)
- (unspec:VEC_HW [(match_dup 2)] UNSPEC_VEC_ELTSWAP))]
-{
- operands[2] = gen_reg_rtx (<MODE>mode);
-})
-
-
-; Swapping v2df/v2di can be done via vpdi on z13 and z14.
-(define_split
- [(set (match_operand:V_HW_2 0 "register_operand" "")
- (unspec:V_HW_2 [(match_operand:V_HW_2 1 "register_operand" "")]
- UNSPEC_VEC_ELTSWAP))]
- "TARGET_VX && can_create_pseudo_p ()"
- [(set (match_operand:V_HW_2 0 "register_operand" "=v")
- (vec_select:V_HW_2
- (vec_concat:<vec_2x_nelts>
- (match_operand:V_HW_2 1 "register_operand" "v")
- (match_dup 1))
- (parallel [(const_int 1) (const_int 2)])))]
-)
-
-
-; Swapping v4df/v4si can be done via vpdi and rot.
-(define_split
- [(set (match_operand:V_HW_4 0 "register_operand" "")
- (unspec:V_HW_4 [(match_operand:V_HW_4 1 "register_operand" "")]
- UNSPEC_VEC_ELTSWAP))]
- "TARGET_VX && can_create_pseudo_p ()"
- [(set (match_dup 2)
- (vec_select:V_HW_4
- (vec_concat:<vec_2x_nelts>
- (match_dup 1)
- (match_dup 1))
- (parallel [(const_int 2) (const_int 3) (const_int 4) (const_int 5)])))
- (set (match_dup 3)
- (subreg:V2DI (match_dup 2) 0))
- (set (match_dup 4)
- (rotate:V2DI
- (match_dup 3)
- (const_int 32)))
- (set (match_operand:V_HW_4 0)
- (subreg:V_HW_4 (match_dup 4) 0))]
-{
- operands[2] = gen_reg_rtx (<MODE>mode);
- operands[3] = gen_reg_rtx (V2DImode);
- operands[4] = gen_reg_rtx (V2DImode);
-})
-
-; z15 has instructions for doing element reversal from mem to reg
-; or the other way around. For reg to reg or on pre z15 machines
-; we have to emulate it with vector permute.
-(define_insn_and_split "*eltswap<mode>_emu"
- [(set (match_operand:VEC_HW 0 "nonimmediate_operand" "=vR")
- (unspec:VEC_HW [(match_operand:VEC_HW 1 "nonimmediate_operand" "vR")]
- UNSPEC_VEC_ELTSWAP))]
- "TARGET_VX && can_create_pseudo_p ()"
- "#"
- "&& ((!memory_operand (operands[0], <MODE>mode)
- && !memory_operand (operands[1], <MODE>mode))
- || !TARGET_VXE2)"
- [(set (match_dup 3)
- (unspec:V16QI [(match_dup 4)
- (match_dup 4)
- (match_dup 2)]
- UNSPEC_VEC_PERM))
- (set (match_dup 0) (subreg:VEC_HW (match_dup 3) 0))]
-{
- static char p[4][16] =
- { { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }, /* Q */
- { 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1 }, /* H */
- { 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3 }, /* S */
- { 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7 } }; /* D */
- char *perm;
- rtx perm_rtx[16], constv;
-
- switch (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)))
- {
- case 1: perm = p[0]; break;
- case 2: perm = p[1]; break;
- case 4: perm = p[2]; break;
- case 8: perm = p[3]; break;
- default: gcc_unreachable ();
- }
-
- for (int i = 0; i < 16; i++)
- perm_rtx[i] = GEN_INT (perm[i]);
-
- operands[1] = force_reg (<MODE>mode, operands[1]);
- operands[2] = gen_reg_rtx (V16QImode);
- operands[3] = gen_reg_rtx (V16QImode);
- operands[4] = simplify_gen_subreg (V16QImode, operands[1], <MODE>mode, 0);
- constv = force_const_mem (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm_rtx)));
- emit_move_insn (operands[2], constv);
-})
-
; vec_insert (__builtin_bswap32 (*a), b, 1) set-element-bswap-2.c
; b[1] = __builtin_bswap32 (*a) set-element-bswap-3.c
; vlebrh, vlebrf, vlebrg
diff --git a/gcc/testsuite/gcc.target/s390/vector/reverse-elements-1.c b/gcc/testsuite/gcc.target/s390/vector/reverse-elements-1.c
new file mode 100644
index 00000000000..4a2541b7ae6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/reverse-elements-1.c
@@ -0,0 +1,46 @@
+/* { dg-compile } */
+/* { dg-options "-O3 -mzarch -march=z13" } */
+/* { dg-require-effective-target s390_vx } */
+/* { dg-final { scan-assembler-times {\tvpdi\t} 4 } } */
+/* { dg-final { scan-assembler-not {\tvperm\t} } } */
+
+typedef short __attribute__ ((vector_size (16))) V8HI;
+typedef int __attribute__ ((vector_size (16))) V4SI;
+typedef long long __attribute__ ((vector_size (16))) V2DI;
+typedef double __attribute__ ((vector_size (16))) V2DF;
+
+V8HI
+v8hi (V8HI x)
+{
+ V8HI y;
+ for (int i = 0; i < 8; ++i)
+ y[i] = x[7 - i];
+ return y;
+}
+
+V4SI
+v4si (V4SI x)
+{
+ V4SI y;
+ for (int i = 0; i < 4; ++i)
+ y[i] = x[3 - i];
+ return y;
+}
+
+V2DI
+v2di (V2DI x)
+{
+ V2DI y;
+ for (int i = 0; i < 2; ++i)
+ y[i] = x[1 - i];
+ return y;
+}
+
+V2DF
+v2df (V2DF x)
+{
+ V2DF y;
+ for (int i = 0; i < 2; ++i)
+ y[i] = x[1 - i];
+ return y;
+}
diff --git a/gcc/testsuite/gcc.target/s390/vector/reverse-elements-2.c b/gcc/testsuite/gcc.target/s390/vector/reverse-elements-2.c
new file mode 100644
index 00000000000..ec0d1da7d57
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/reverse-elements-2.c
@@ -0,0 +1,16 @@
+/* { dg-compile } */
+/* { dg-options "-O3 -mzarch -march=z14" } */
+/* { dg-require-effective-target s390_vxe } */
+/* { dg-final { scan-assembler-times {\tvpdi\t} 1 } } */
+/* { dg-final { scan-assembler-not {\tvperm\t} } } */
+
+typedef float __attribute__ ((vector_size (16))) V4SF;
+
+V4SF
+v4sf (V4SF x)
+{
+ V4SF y;
+ for (int i = 0; i < 4; ++i)
+ y[i] = x[3 - i];
+ return y;
+}
diff --git a/gcc/testsuite/gcc.target/s390/vector/reverse-elements-3.c b/gcc/testsuite/gcc.target/s390/vector/reverse-elements-3.c
new file mode 100644
index 00000000000..3f69db8831c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/reverse-elements-3.c
@@ -0,0 +1,56 @@
+/* { dg-compile } */
+/* { dg-options "-O3 -mzarch -march=z14" } */
+/* { dg-require-effective-target s390_vxe } */
+/* { dg-final { scan-assembler-times {\tvpdi\t} 5 } } */
+/* { dg-final { scan-assembler-not {\tvperm\t} } } */
+
+typedef short __attribute__ ((vector_size (16))) V8HI;
+typedef int __attribute__ ((vector_size (16))) V4SI;
+typedef long long __attribute__ ((vector_size (16))) V2DI;
+typedef float __attribute__ ((vector_size (16))) V4SF;
+typedef double __attribute__ ((vector_size (16))) V2DF;
+
+V8HI
+v8hi (V8HI *x)
+{
+ V8HI y;
+ for (int i = 0; i < 8; ++i)
+ y[i] = (*x)[7 - i];
+ return y;
+}
+
+V4SI
+v4si (V4SI *x)
+{
+ V4SI y;
+ for (int i = 0; i < 4; ++i)
+ y[i] = (*x)[3 - i];
+ return y;
+}
+
+V2DI
+v2di (V2DI *x)
+{
+ V2DI y;
+ for (int i = 0; i < 2; ++i)
+ y[i] = (*x)[1 - i];
+ return y;
+}
+
+V4SF
+v4sf (V4SF *x)
+{
+ V4SF y;
+ for (int i = 0; i < 4; ++i)
+ y[i] = (*x)[3 - i];
+ return y;
+}
+
+V2DF
+v2df (V2DF *x)
+{
+ V2DF y;
+ for (int i = 0; i < 2; ++i)
+ y[i] = (*x)[1 - i];
+ return y;
+}
diff --git a/gcc/testsuite/gcc.target/s390/vector/reverse-elements-4.c b/gcc/testsuite/gcc.target/s390/vector/reverse-elements-4.c
new file mode 100644
index 00000000000..5027ed55f50
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/reverse-elements-4.c
@@ -0,0 +1,67 @@
+/* { dg-compile } */
+/* { dg-options "-O3 -mzarch -march=z15" } */
+/* { dg-require-effective-target s390_vxe2 } */
+/* { dg-final { scan-assembler-times {\tvlbrq\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tvler[hfg]\t} 5 } } */
+/* { dg-final { scan-assembler-not {\tvperm\t} } } */
+
+typedef signed char __attribute__ ((vector_size (16))) V16QI;
+typedef short __attribute__ ((vector_size (16))) V8HI;
+typedef int __attribute__ ((vector_size (16))) V4SI;
+typedef long long __attribute__ ((vector_size (16))) V2DI;
+typedef float __attribute__ ((vector_size (16))) V4SF;
+typedef double __attribute__ ((vector_size (16))) V2DF;
+
+V16QI
+v16qi (V16QI *x)
+{
+ V16QI y;
+ for (int i = 0; i < 16; ++i)
+ y[i] = (*x)[15 - i];
+ return y;
+}
+
+V8HI
+v8hi (V8HI *x)
+{
+ V8HI y;
+ for (int i = 0; i < 8; ++i)
+ y[i] = (*x)[7 - i];
+ return y;
+}
+
+V4SI
+v4si (V4SI *x)
+{
+ V4SI y;
+ for (int i = 0; i < 4; ++i)
+ y[i] = (*x)[3 - i];
+ return y;
+}
+
+V2DI
+v2di (V2DI *x)
+{
+ V2DI y;
+ for (int i = 0; i < 2; ++i)
+ y[i] = (*x)[1 - i];
+ return y;
+}
+
+V4SF
+v4sf (V4SF *x)
+{
+ V4SF y;
+ for (int i = 0; i < 4; ++i)
+ y[i] = (*x)[3 - i];
+ return y;
+}
+
+V2DF
+v2df (V2DF *x)
+{
+ V2DF y;
+ for (int i = 0; i < 2; ++i)
+ y[i] = (*x)[1 - i];
+ return y;
+}
diff --git a/gcc/testsuite/gcc.target/s390/vector/reverse-elements-5.c b/gcc/testsuite/gcc.target/s390/vector/reverse-elements-5.c
new file mode 100644
index 00000000000..8c250aa681b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/reverse-elements-5.c
@@ -0,0 +1,56 @@
+/* { dg-compile } */
+/* { dg-options "-O3 -mzarch -march=z14" } */
+/* { dg-require-effective-target s390_vxe } */
+/* { dg-final { scan-assembler-times {\tvpdi\t} 5 } } */
+/* { dg-final { scan-assembler-not {\tvperm\t} } } */
+
+typedef short __attribute__ ((vector_size (16))) V8HI;
+typedef int __attribute__ ((vector_size (16))) V4SI;
+typedef long long __attribute__ ((vector_size (16))) V2DI;
+typedef float __attribute__ ((vector_size (16))) V4SF;
+typedef double __attribute__ ((vector_size (16))) V2DF;
+
+void
+v8hi (V8HI *x, V8HI y)
+{
+ V8HI z;
+ for (int i = 0; i < 8; ++i)
+ z[i] = y[7 - i];
+ *x = z;
+}
+
+void
+v4si (V4SI *x, V4SI y)
+{
+ V4SI z;
+ for (int i = 0; i < 4; ++i)
+ z[i] = y[3 - i];
+ *x = z;
+}
+
+void
+v2di (V2DI *x, V2DI y)
+{
+ V2DI z;
+ for (int i = 0; i < 2; ++i)
+ z[i] = y[1 - i];
+ *x = z;
+}
+
+void
+v4sf (V4SF *x, V4SF y)
+{
+ V4SF z;
+ for (int i = 0; i < 4; ++i)
+ z[i] = y[3 - i];
+ *x = z;
+}
+
+void
+v2df (V2DF *x, V2DF y)
+{
+ V2DF z;
+ for (int i = 0; i < 2; ++i)
+ z[i] = y[1 - i];
+ *x = z;
+}
diff --git a/gcc/testsuite/gcc.target/s390/vector/reverse-elements-6.c b/gcc/testsuite/gcc.target/s390/vector/reverse-elements-6.c
new file mode 100644
index 00000000000..7e2b2356788
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/reverse-elements-6.c
@@ -0,0 +1,67 @@
+/* { dg-compile } */
+/* { dg-options "-O3 -mzarch -march=z15" } */
+/* { dg-require-effective-target s390_vxe2 } */
+/* { dg-final { scan-assembler-times {\tvstbrq\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tvster[hfg]\t} 5 } } */
+/* { dg-final { scan-assembler-not {\tvperm\t} } } */
+
+typedef signed char __attribute__ ((vector_size (16))) V16QI;
+typedef short __attribute__ ((vector_size (16))) V8HI;
+typedef int __attribute__ ((vector_size (16))) V4SI;
+typedef long long __attribute__ ((vector_size (16))) V2DI;
+typedef float __attribute__ ((vector_size (16))) V4SF;
+typedef double __attribute__ ((vector_size (16))) V2DF;
+
+void
+v16qi (V16QI *x, V16QI y)
+{
+ V16QI z;
+ for (int i = 0; i < 16; ++i)
+ z[i] = y[15 - i];
+ *x = z;
+}
+
+void
+v8hi (V8HI *x, V8HI y)
+{
+ V8HI z;
+ for (int i = 0; i < 8; ++i)
+ z[i] = y[7 - i];
+ *x = z;
+}
+
+void
+v4si (V4SI *x, V4SI y)
+{
+ V4SI z;
+ for (int i = 0; i < 4; ++i)
+ z[i] = y[3 - i];
+ *x = z;
+}
+
+void
+v2di (V2DI *x, V2DI y)
+{
+ V2DI z;
+ for (int i = 0; i < 2; ++i)
+ z[i] = y[1 - i];
+ *x = z;
+}
+
+void
+v4sf (V4SF *x, V4SF y)
+{
+ V4SF z;
+ for (int i = 0; i < 4; ++i)
+ z[i] = y[3 - i];
+ *x = z;
+}
+
+void
+v2df (V2DF *x, V2DF y)
+{
+ V2DF z;
+ for (int i = 0; i < 2; ++i)
+ z[i] = y[1 - i];
+ *x = z;
+}
diff --git a/gcc/testsuite/gcc.target/s390/vector/reverse-elements-7.c b/gcc/testsuite/gcc.target/s390/vector/reverse-elements-7.c
new file mode 100644
index 00000000000..046fcc0790a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/vector/reverse-elements-7.c
@@ -0,0 +1,67 @@
+/* { dg-compile } */
+/* { dg-options "-O3 -mzarch -march=z15" } */
+/* { dg-require-effective-target s390_vxe2 } */
+/* { dg-final { scan-assembler-times {\tvstbrq\t} 1 } } */
+/* { dg-final { scan-assembler-times {\tvster[hfg]\t} 5 } } */
+/* { dg-final { scan-assembler-not {\tvperm\t} } } */
+
+typedef signed char __attribute__ ((vector_size (16))) V16QI;
+typedef short __attribute__ ((vector_size (16))) V8HI;
+typedef int __attribute__ ((vector_size (16))) V4SI;
+typedef long long __attribute__ ((vector_size (16))) V2DI;
+typedef float __attribute__ ((vector_size (16))) V4SF;
+typedef double __attribute__ ((vector_size (16))) V2DF;
+
+void
+v16qi (V16QI *x, V16QI *y)
+{
+ V16QI z;
+ for (int i = 0; i < 16; ++i)
+ z[i] = (*y)[15 - i];
+ *x = z;
+}
+
+void
+v8hi (V8HI *x, V8HI *y)
+{
+ V8HI z;
+ for (int i = 0; i < 8; ++i)
+ z[i] = (*y)[7 - i];
+ *x = z;
+}
+
+void
+v4si (V4SI *x, V4SI *y)
+{
+ V4SI z;
+ for (int i = 0; i < 4; ++i)
+ z[i] = (*y)[3 - i];
+ *x = z;
+}
+
+void
+v2di (V2DI *x, V2DI *y)
+{
+ V2DI z;
+ for (int i = 0; i < 2; ++i)
+ z[i] = (*y)[1 - i];
+ *x = z;
+}
+
+void
+v4sf (V4SF *x, V4SF *y)
+{
+ V4SF z;
+ for (int i = 0; i < 4; ++i)
+ z[i] = (*y)[3 - i];
+ *x = z;
+}
+
+void
+v2df (V2DF *x, V2DF *y)
+{
+ V2DF z;
+ for (int i = 0; i < 2; ++i)
+ z[i] = (*y)[1 - i];
+ *x = z;
+}
diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-halfword-z14.c b/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-halfword-z14.c
index 4938ac20613..3c1e9338f80 100644
--- a/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-halfword-z14.c
+++ b/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-halfword-z14.c
@@ -21,4 +21,6 @@ baz (signed short *x)
return vec_reve (vec_xl (0, x));
}
-/* { dg-final { scan-assembler-times "vperm\t" 3 } } */
+/* { dg-final { scan-assembler-times "vpdi\t" 3 } } */
+/* { dg-final { scan-assembler-times "verllg\t" 3 } } */
+/* { dg-final { scan-assembler-times "verllf\t" 3 } } */
diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-halfword.c b/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-halfword.c
index 3c9229922ec..7b1c3f885cd 100644
--- a/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-halfword.c
+++ b/gcc/testsuite/gcc.target/s390/zvector/vec-reve-load-halfword.c
@@ -9,7 +9,9 @@ foo (vector signed short x)
return vec_reve (x);
}
-/* { dg-final { scan-assembler-times "vperm\t" 1 } } */
+/* { dg-final { scan-assembler-times "vpdi\t" 1 } } */
+/* { dg-final { scan-assembler-times "verllg\t" 1 } } */
+/* { dg-final { scan-assembler-times "verllf\t" 1 } } */
vector signed short
--
2.41.0
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH 1/3] s390: Recognize further vpdi and vmr{l,h} pattern
2023-11-09 8:22 [PATCH 1/3] s390: Recognize further vpdi and vmr{l,h} pattern Stefan Schulze Frielinghaus
2023-11-09 8:22 ` [PATCH 2/3] s390: Add expand_perm_reverse_elements Stefan Schulze Frielinghaus
2023-11-09 8:22 ` [PATCH 3/3] s390: Revise vector reverse elements Stefan Schulze Frielinghaus
@ 2023-11-09 8:27 ` Andreas Krebbel
2 siblings, 0 replies; 6+ messages in thread
From: Andreas Krebbel @ 2023-11-09 8:27 UTC (permalink / raw)
To: Stefan Schulze Frielinghaus, gcc-patches
On 11/9/23 09:22, Stefan Schulze Frielinghaus wrote:
> Deal with cases where vpdi and vmr{l,h} are still applicable if the
> operands of those instructions are swapped. For example, currently for
>
> V2DI foo (V2DI x)
> {
> return (V2DI) {x[1], x[0]};
> }
>
> the assembler sequence
>
> vlgvg %r1,%v24,1
> vzero %v0
> vlvgg %v0,%r1,0
> vmrhg %v24,%v0,%v24
>
> is emitted. With this patch a single vpdi is emitted.
>
> Extensive tests are included in a subsequent patch of this series where
> more cases are covered.
>
> Bootstrapped and regtested on s390. Ok for mainline?
>
> gcc/ChangeLog:
>
> * config/s390/s390.cc (expand_perm_with_merge): Deal with cases
> where vmr{l,h} are still applicable if the operands are swapped.
> (expand_perm_with_vpdi): Likewise for vpdi.
Ok, Thanks!
Andreas
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH 2/3] s390: Add expand_perm_reverse_elements
2023-11-09 8:22 ` [PATCH 2/3] s390: Add expand_perm_reverse_elements Stefan Schulze Frielinghaus
@ 2023-11-09 8:27 ` Andreas Krebbel
0 siblings, 0 replies; 6+ messages in thread
From: Andreas Krebbel @ 2023-11-09 8:27 UTC (permalink / raw)
To: Stefan Schulze Frielinghaus, gcc-patches
On 11/9/23 09:22, Stefan Schulze Frielinghaus wrote:
> Replace expand_perm_with_rot, expand_perm_with_vster, and
> expand_perm_with_vstbrq with a general implementation
> expand_perm_reverse_elements.
>
> Bootstrapped and regtested on s390. Ok for mainline?
>
> gcc/ChangeLog:
>
> * config/s390/s390.cc (expand_perm_with_rot): Remove.
> (expand_perm_reverse_elements): New.
> (expand_perm_with_vster): Remove.
> (expand_perm_with_vstbrq): Remove.
> (vectorize_vec_perm_const_1): Replace removed functions with new
> one.
Ok, thanks!
Andreas
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH 3/3] s390: Revise vector reverse elements
2023-11-09 8:22 ` [PATCH 3/3] s390: Revise vector reverse elements Stefan Schulze Frielinghaus
@ 2023-11-09 8:27 ` Andreas Krebbel
0 siblings, 0 replies; 6+ messages in thread
From: Andreas Krebbel @ 2023-11-09 8:27 UTC (permalink / raw)
To: Stefan Schulze Frielinghaus, gcc-patches
On 11/9/23 09:22, Stefan Schulze Frielinghaus wrote:
> Replace UNSPEC_VEC_ELTSWAP with a vec_select implementation.
>
> Furthermore, for a vector reverse elements operation between registers
> of mode V8HI perform three rotates instead of a vperm operation since
> the latter involves loading the permutation vector from the literal
> pool.
>
> Prior z15, instead of
> larl + vl + vl + vperm
> prefer
> vl + vpdi (+ verllg (+ verllf))
> for a load operation.
>
> Likewise, prior z15, instead of
> larl + vl + vperm + vst
> prefer
> vpdi (+ verllg (+ verllf)) + vst
> for a store operation.
>
> Bootstrapped and regtested on s390. Ok for mainline?
>
> gcc/ChangeLog:
>
> * config/s390/s390.md: Remove UNSPEC_VEC_ELTSWAP.
> * config/s390/vector.md (eltswapv16qi): New expander.
> (*eltswapv16qi): New insn and splitter.
> (eltswapv8hi): New insn and splitter.
> (eltswap<mode>): New insn and splitter for modes V_HW_4 as well
> as V_HW_2.
> * config/s390/vx-builtins.md (eltswap<mode>): Remove.
> (*eltswapv16qi): Remove.
> (*eltswap<mode>): Remove.
> (*eltswap<mode>_emu): Remove.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/s390/zvector/vec-reve-load-halfword-z14.c: Remove
> vperm and substitude by vpdi et al.
> * gcc.target/s390/zvector/vec-reve-load-halfword.c: Likewise.
> * gcc.target/s390/vector/reverse-elements-1.c: New test.
> * gcc.target/s390/vector/reverse-elements-2.c: New test.
> * gcc.target/s390/vector/reverse-elements-3.c: New test.
> * gcc.target/s390/vector/reverse-elements-4.c: New test.
> * gcc.target/s390/vector/reverse-elements-5.c: New test.
> * gcc.target/s390/vector/reverse-elements-6.c: New test.
> * gcc.target/s390/vector/reverse-elements-7.c: New test.
Ok, thanks!
Andreas
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2023-11-09 8:28 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-11-09 8:22 [PATCH 1/3] s390: Recognize further vpdi and vmr{l,h} pattern Stefan Schulze Frielinghaus
2023-11-09 8:22 ` [PATCH 2/3] s390: Add expand_perm_reverse_elements Stefan Schulze Frielinghaus
2023-11-09 8:27 ` Andreas Krebbel
2023-11-09 8:22 ` [PATCH 3/3] s390: Revise vector reverse elements Stefan Schulze Frielinghaus
2023-11-09 8:27 ` Andreas Krebbel
2023-11-09 8:27 ` [PATCH 1/3] s390: Recognize further vpdi and vmr{l,h} pattern Andreas Krebbel
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).