public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/vendors/redhat/heads/gcc-8-branch)] PR94613: Fix vec_sel builtin for IBM Z
@ 2020-09-17 17:00 Jakub Jelinek
  0 siblings, 0 replies; only message in thread
From: Jakub Jelinek @ 2020-09-17 17:00 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:b027538774a21691b2a560162a2134837846d41a

commit b027538774a21691b2a560162a2134837846d41a
Author: Andreas Krebbel <krebbel@linux.ibm.com>
Date:   Mon May 4 10:43:02 2020 +0200

    PR94613: Fix vec_sel builtin for IBM Z
    
    The vsel instruction is a bit-wise select instruction.  Using an
    IF_THEN_ELSE to express it in RTL is wrong and leads to wrong code being
    generated in the combine pass.
    
    With the patch the pattern is written using bit operations.  However,
    I've just noticed that the manual still demands a fixed point mode for
    AND/IOR and friends although several targets emit bit ops on floating
    point vectors (including i386, Power, and s390). So I assume this is a
    safe thing to do?!
    
    gcc/ChangeLog:
    
    2020-05-04  Andreas Krebbel  <krebbel@linux.ibm.com>
    
            Backport from mainline
            2020-04-20  Andreas Krebbel  <krebbel@linux.ibm.com>
    
            PR target/94613
            * config/s390/s390-builtin-types.def: Add 3 new function modes.
            * config/s390/s390-builtins.def: Add mode dependent low-level
            builtin and map the overloaded builtins to these.
            * config/s390/vx-builtins.md ("vec_selV_HW"): Rename to ...
            ("vsel<V_HW"): ... this and rewrite the pattern with bitops.
    
    gcc/testsuite/ChangeLog:
    
    2020-05-04  Andreas Krebbel  <krebbel@linux.ibm.com>
    
            Backport from mainline
            2020-04-20  Andreas Krebbel  <krebbel@linux.ibm.com>
    
            PR target/94613
            * gcc.target/s390/zvector/pr94613.c: New test.
            * gcc.target/s390/zvector/vec_sel-1.c: New test.

Diff:
---
 gcc/ChangeLog                                     |  12 ++
 gcc/config/s390/s390-builtin-types.def            |   3 +
 gcc/config/s390/s390-builtins.def                 |  65 ++++---
 gcc/config/s390/vx-builtins.md                    |  27 ++-
 gcc/testsuite/ChangeLog                           |   9 +
 gcc/testsuite/gcc.target/s390/zvector/pr94613.c   |  38 ++++
 gcc/testsuite/gcc.target/s390/zvector/vec_sel-1.c | 211 ++++++++++++++++++++++
 7 files changed, 321 insertions(+), 44 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d44ef73395a..bdc3a9c348b 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,15 @@
+2020-05-04  Andreas Krebbel  <krebbel@linux.ibm.com>
+
+	Backport from mainline
+	2020-04-20  Andreas Krebbel  <krebbel@linux.ibm.com>
+
+	PR target/94613
+	* config/s390/s390-builtin-types.def: Add 3 new function modes.
+	* config/s390/s390-builtins.def: Add mode dependent low-level
+	builtin and map the overloaded builtins to these.
+	* config/s390/vx-builtins.md ("vec_selV_HW"): Rename to ...
+	("vsel<V_HW"): ... this and rewrite the pattern with bitops.
+
 2020-04-29  Andre Vieira  <andre.simoesdiasvieira@arm.com>
 
 	Backport from mainline.
diff --git a/gcc/config/s390/s390-builtin-types.def b/gcc/config/s390/s390-builtin-types.def
index 6f257c6de77..9779666a733 100644
--- a/gcc/config/s390/s390-builtin-types.def
+++ b/gcc/config/s390/s390-builtin-types.def
@@ -293,6 +293,7 @@ DEF_FN_TYPE_3 (BT_FN_UV16QI_UV2DI_UV2DI_UV16QI, BT_UV16QI, BT_UV2DI, BT_UV2DI, B
 DEF_FN_TYPE_3 (BT_FN_UV16QI_UV8HI_UV8HI_INTPTR, BT_UV16QI, BT_UV8HI, BT_UV8HI, BT_INTPTR)
 DEF_FN_TYPE_3 (BT_FN_UV2DI_UV2DI_ULONGLONG_INT, BT_UV2DI, BT_UV2DI, BT_ULONGLONG, BT_INT)
 DEF_FN_TYPE_3 (BT_FN_UV2DI_UV2DI_UV2DI_INT, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_INT)
+DEF_FN_TYPE_3 (BT_FN_UV2DI_UV2DI_UV2DI_UV2DI, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_UV2DI)
 DEF_FN_TYPE_3 (BT_FN_UV2DI_UV4SI_UV4SI_UV2DI, BT_UV2DI, BT_UV4SI, BT_UV4SI, BT_UV2DI)
 DEF_FN_TYPE_3 (BT_FN_UV2DI_V2DF_INT_INT, BT_UV2DI, BT_V2DF, BT_INT, BT_INT)
 DEF_FN_TYPE_3 (BT_FN_UV4SI_UV2DI_UV2DI_INTPTR, BT_UV4SI, BT_UV2DI, BT_UV2DI, BT_INTPTR)
@@ -316,6 +317,7 @@ DEF_FN_TYPE_3 (BT_FN_V2DF_V2DF_DBL_INT, BT_V2DF, BT_V2DF, BT_DBL, BT_INT)
 DEF_FN_TYPE_3 (BT_FN_V2DF_V2DF_UCHAR_UCHAR, BT_V2DF, BT_V2DF, BT_UCHAR, BT_UCHAR)
 DEF_FN_TYPE_3 (BT_FN_V2DF_V2DF_UINT_UINT, BT_V2DF, BT_V2DF, BT_UINT, BT_UINT)
 DEF_FN_TYPE_3 (BT_FN_V2DF_V2DF_V2DF_INT, BT_V2DF, BT_V2DF, BT_V2DF, BT_INT)
+DEF_FN_TYPE_3 (BT_FN_V2DF_V2DF_V2DF_UV2DI, BT_V2DF, BT_V2DF, BT_V2DF, BT_UV2DI)
 DEF_FN_TYPE_3 (BT_FN_V2DF_V2DF_V2DF_V2DF, BT_V2DF, BT_V2DF, BT_V2DF, BT_V2DF)
 DEF_FN_TYPE_3 (BT_FN_V2DF_V2DI_INT_INT, BT_V2DF, BT_V2DI, BT_INT, BT_INT)
 DEF_FN_TYPE_3 (BT_FN_V2DI_UV2DI_UV2DI_INTPTR, BT_V2DI, BT_UV2DI, BT_UV2DI, BT_INTPTR)
@@ -328,6 +330,7 @@ DEF_FN_TYPE_3 (BT_FN_V4SF_V2DF_INT_INT, BT_V4SF, BT_V2DF, BT_INT, BT_INT)
 DEF_FN_TYPE_3 (BT_FN_V4SF_V4SF_FLT_INT, BT_V4SF, BT_V4SF, BT_FLT, BT_INT)
 DEF_FN_TYPE_3 (BT_FN_V4SF_V4SF_UCHAR_UCHAR, BT_V4SF, BT_V4SF, BT_UCHAR, BT_UCHAR)
 DEF_FN_TYPE_3 (BT_FN_V4SF_V4SF_V4SF_INT, BT_V4SF, BT_V4SF, BT_V4SF, BT_INT)
+DEF_FN_TYPE_3 (BT_FN_V4SF_V4SF_V4SF_UV4SI, BT_V4SF, BT_V4SF, BT_V4SF, BT_UV4SI)
 DEF_FN_TYPE_3 (BT_FN_V4SF_V4SF_V4SF_V4SF, BT_V4SF, BT_V4SF, BT_V4SF, BT_V4SF)
 DEF_FN_TYPE_3 (BT_FN_V4SI_UV4SI_UV4SI_INTPTR, BT_V4SI, BT_UV4SI, BT_UV4SI, BT_INTPTR)
 DEF_FN_TYPE_3 (BT_FN_V4SI_V2DI_V2DI_INTPTR, BT_V4SI, BT_V2DI, BT_V2DI, BT_INTPTR)
diff --git a/gcc/config/s390/s390-builtins.def b/gcc/config/s390/s390-builtins.def
index 365e794316a..006669718fa 100644
--- a/gcc/config/s390/s390-builtins.def
+++ b/gcc/config/s390/s390-builtins.def
@@ -685,36 +685,41 @@ B_DEF      (s390_vsceg,                 vec_scatter_elementv2di,0,
 
 /* First two operands are swapped in s390-c.c */
 OB_DEF     (s390_vec_sel,               s390_vec_sel_b8_a,  s390_vec_sel_dbl_b, B_VX,               BT_FN_OV4SI_OV4SI_OV4SI_OV4SI)
-OB_DEF_VAR (s390_vec_sel_b8_a,          s390_vsel,          0,                  0,                  BT_OV_BV16QI_BV16QI_BV16QI_UV16QI)
-OB_DEF_VAR (s390_vec_sel_b8_b,          s390_vsel,          0,                  0,                  BT_OV_BV16QI_BV16QI_BV16QI_BV16QI)
-OB_DEF_VAR (s390_vec_sel_s8_a,          s390_vsel,          0,                  0,                  BT_OV_V16QI_V16QI_V16QI_UV16QI)
-OB_DEF_VAR (s390_vec_sel_s8_b,          s390_vsel,          0,                  0,                  BT_OV_V16QI_V16QI_V16QI_BV16QI)
-OB_DEF_VAR (s390_vec_sel_u8_a,          s390_vsel,          0,                  0,                  BT_OV_UV16QI_UV16QI_UV16QI_UV16QI)
-OB_DEF_VAR (s390_vec_sel_u8_b,          s390_vsel,          0,                  0,                  BT_OV_UV16QI_UV16QI_UV16QI_BV16QI)
-OB_DEF_VAR (s390_vec_sel_b16_a,         s390_vsel,          0,                  0,                  BT_OV_BV8HI_BV8HI_BV8HI_UV8HI)
-OB_DEF_VAR (s390_vec_sel_b16_b,         s390_vsel,          0,                  0,                  BT_OV_BV8HI_BV8HI_BV8HI_BV8HI)
-OB_DEF_VAR (s390_vec_sel_s16_a,         s390_vsel,          0,                  0,                  BT_OV_V8HI_V8HI_V8HI_UV8HI)
-OB_DEF_VAR (s390_vec_sel_s16_b,         s390_vsel,          0,                  0,                  BT_OV_V8HI_V8HI_V8HI_BV8HI)
-OB_DEF_VAR (s390_vec_sel_u16_a,         s390_vsel,          0,                  0,                  BT_OV_UV8HI_UV8HI_UV8HI_UV8HI)
-OB_DEF_VAR (s390_vec_sel_u16_b,         s390_vsel,          0,                  0,                  BT_OV_UV8HI_UV8HI_UV8HI_BV8HI)
-OB_DEF_VAR (s390_vec_sel_b32_a,         s390_vsel,          0,                  0,                  BT_OV_BV4SI_BV4SI_BV4SI_UV4SI)
-OB_DEF_VAR (s390_vec_sel_b32_b,         s390_vsel,          0,                  0,                  BT_OV_BV4SI_BV4SI_BV4SI_BV4SI)
-OB_DEF_VAR (s390_vec_sel_s32_a,         s390_vsel,          0,                  0,                  BT_OV_V4SI_V4SI_V4SI_UV4SI)
-OB_DEF_VAR (s390_vec_sel_s32_b,         s390_vsel,          0,                  0,                  BT_OV_V4SI_V4SI_V4SI_BV4SI)
-OB_DEF_VAR (s390_vec_sel_u32_a,         s390_vsel,          0,                  0,                  BT_OV_UV4SI_UV4SI_UV4SI_UV4SI)
-OB_DEF_VAR (s390_vec_sel_u32_b,         s390_vsel,          0,                  0,                  BT_OV_UV4SI_UV4SI_UV4SI_BV4SI)
-OB_DEF_VAR (s390_vec_sel_b64_a,         s390_vsel,          0,                  0,                  BT_OV_BV2DI_BV2DI_BV2DI_UV2DI)
-OB_DEF_VAR (s390_vec_sel_b64_b,         s390_vsel,          0,                  0,                  BT_OV_BV2DI_BV2DI_BV2DI_BV2DI)
-OB_DEF_VAR (s390_vec_sel_s64_a,         s390_vsel,          0,                  0,                  BT_OV_V2DI_V2DI_V2DI_UV2DI)
-OB_DEF_VAR (s390_vec_sel_s64_b,         s390_vsel,          0,                  0,                  BT_OV_V2DI_V2DI_V2DI_BV2DI)
-OB_DEF_VAR (s390_vec_sel_u64_a,         s390_vsel,          0,                  0,                  BT_OV_UV2DI_UV2DI_UV2DI_UV2DI)
-OB_DEF_VAR (s390_vec_sel_u64_b,         s390_vsel,          0,                  0,                  BT_OV_UV2DI_UV2DI_UV2DI_BV2DI)
-OB_DEF_VAR (s390_vec_sel_flt_a,         s390_vsel,          B_VXE,              0,                  BT_OV_V4SF_V4SF_V4SF_UV4SI)
-OB_DEF_VAR (s390_vec_sel_flt_b,         s390_vsel,          B_VXE,              0,                  BT_OV_V4SF_V4SF_V4SF_BV4SI)
-OB_DEF_VAR (s390_vec_sel_dbl_a,         s390_vsel,          0,                  0,                  BT_OV_V2DF_V2DF_V2DF_UV2DI)
-OB_DEF_VAR (s390_vec_sel_dbl_b,         s390_vsel,          0,                  0,                  BT_OV_V2DF_V2DF_V2DF_BV2DI)
-
-B_DEF      (s390_vsel,                  vec_selv16qi,       0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI_UV16QI)
+OB_DEF_VAR (s390_vec_sel_b8_a,          s390_vselb,         0,                  0,                  BT_OV_BV16QI_BV16QI_BV16QI_UV16QI)
+OB_DEF_VAR (s390_vec_sel_b8_b,          s390_vselb,         0,                  0,                  BT_OV_BV16QI_BV16QI_BV16QI_BV16QI)
+OB_DEF_VAR (s390_vec_sel_s8_a,          s390_vselb,         0,                  0,                  BT_OV_V16QI_V16QI_V16QI_UV16QI)
+OB_DEF_VAR (s390_vec_sel_s8_b,          s390_vselb,         0,                  0,                  BT_OV_V16QI_V16QI_V16QI_BV16QI)
+OB_DEF_VAR (s390_vec_sel_u8_a,          s390_vselb,         0,                  0,                  BT_OV_UV16QI_UV16QI_UV16QI_UV16QI)
+OB_DEF_VAR (s390_vec_sel_u8_b,          s390_vselb,         0,                  0,                  BT_OV_UV16QI_UV16QI_UV16QI_BV16QI)
+OB_DEF_VAR (s390_vec_sel_b16_a,         s390_vselh,         0,                  0,                  BT_OV_BV8HI_BV8HI_BV8HI_UV8HI)
+OB_DEF_VAR (s390_vec_sel_b16_b,         s390_vselh,         0,                  0,                  BT_OV_BV8HI_BV8HI_BV8HI_BV8HI)
+OB_DEF_VAR (s390_vec_sel_s16_a,         s390_vselh,         0,                  0,                  BT_OV_V8HI_V8HI_V8HI_UV8HI)
+OB_DEF_VAR (s390_vec_sel_s16_b,         s390_vselh,         0,                  0,                  BT_OV_V8HI_V8HI_V8HI_BV8HI)
+OB_DEF_VAR (s390_vec_sel_u16_a,         s390_vselh,         0,                  0,                  BT_OV_UV8HI_UV8HI_UV8HI_UV8HI)
+OB_DEF_VAR (s390_vec_sel_u16_b,         s390_vselh,         0,                  0,                  BT_OV_UV8HI_UV8HI_UV8HI_BV8HI)
+OB_DEF_VAR (s390_vec_sel_b32_a,         s390_vself,         0,                  0,                  BT_OV_BV4SI_BV4SI_BV4SI_UV4SI)
+OB_DEF_VAR (s390_vec_sel_b32_b,         s390_vself,         0,                  0,                  BT_OV_BV4SI_BV4SI_BV4SI_BV4SI)
+OB_DEF_VAR (s390_vec_sel_s32_a,         s390_vself,         0,                  0,                  BT_OV_V4SI_V4SI_V4SI_UV4SI)
+OB_DEF_VAR (s390_vec_sel_s32_b,         s390_vself,         0,                  0,                  BT_OV_V4SI_V4SI_V4SI_BV4SI)
+OB_DEF_VAR (s390_vec_sel_u32_a,         s390_vself,         0,                  0,                  BT_OV_UV4SI_UV4SI_UV4SI_UV4SI)
+OB_DEF_VAR (s390_vec_sel_u32_b,         s390_vself,         0,                  0,                  BT_OV_UV4SI_UV4SI_UV4SI_BV4SI)
+OB_DEF_VAR (s390_vec_sel_b64_a,         s390_vselg,         0,                  0,                  BT_OV_BV2DI_BV2DI_BV2DI_UV2DI)
+OB_DEF_VAR (s390_vec_sel_b64_b,         s390_vselg,         0,                  0,                  BT_OV_BV2DI_BV2DI_BV2DI_BV2DI)
+OB_DEF_VAR (s390_vec_sel_s64_a,         s390_vselg,         0,                  0,                  BT_OV_V2DI_V2DI_V2DI_UV2DI)
+OB_DEF_VAR (s390_vec_sel_s64_b,         s390_vselg,         0,                  0,                  BT_OV_V2DI_V2DI_V2DI_BV2DI)
+OB_DEF_VAR (s390_vec_sel_u64_a,         s390_vselg,         0,                  0,                  BT_OV_UV2DI_UV2DI_UV2DI_UV2DI)
+OB_DEF_VAR (s390_vec_sel_u64_b,         s390_vselg,         0,                  0,                  BT_OV_UV2DI_UV2DI_UV2DI_BV2DI)
+OB_DEF_VAR (s390_vec_sel_flt_a,         s390_vself_flt,     B_VXE,              0,                  BT_OV_V4SF_V4SF_V4SF_UV4SI)
+OB_DEF_VAR (s390_vec_sel_flt_b,         s390_vself_flt,     B_VXE,              0,                  BT_OV_V4SF_V4SF_V4SF_BV4SI)
+OB_DEF_VAR (s390_vec_sel_dbl_a,         s390_vselg_dbl,     0,                  0,                  BT_OV_V2DF_V2DF_V2DF_UV2DI)
+OB_DEF_VAR (s390_vec_sel_dbl_b,         s390_vselg_dbl,     0,                  0,                  BT_OV_V2DF_V2DF_V2DF_BV2DI)
+
+B_DEF      (s390_vselb,                 vselv16qi,          0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI_UV16QI)
+B_DEF      (s390_vselh,                 vselv8hi,           0,                  B_VX,               0,                  BT_FN_UV8HI_UV8HI_UV8HI_UV8HI)
+B_DEF      (s390_vself,                 vselv4si,           0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI_UV4SI_UV4SI)
+B_DEF      (s390_vselg,                 vselv2di,           0,                  B_VX,               0,                  BT_FN_UV2DI_UV2DI_UV2DI_UV2DI)
+B_DEF      (s390_vself_flt,             vselv4sf,           0,                  B_VXE,              0,                  BT_FN_V4SF_V4SF_V4SF_UV4SI)
+B_DEF      (s390_vselg_dbl,             vselv2df,           0,                  B_VX,               0,                  BT_FN_V2DF_V2DF_V2DF_UV2DI)
 
 OB_DEF     (s390_vec_extend_s64,        s390_vec_extend_s64_s8,s390_vec_extend_s64_s32,B_VX,        BT_FN_OV4SI_OV4SI)
 OB_DEF_VAR (s390_vec_extend_s64_s8,     s390_vsegb,         0,                  0,                  BT_OV_V2DI_V16QI)
diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md
index 37a64ab58f9..210cfdbaba2 100644
--- a/gcc/config/s390/vx-builtins.md
+++ b/gcc/config/s390/vx-builtins.md
@@ -487,23 +487,22 @@
 
 ; Vector select
 
-; Operand 3 selects bits from either OP1 (0) or OP2 (1)
+; for all b in bits op0[b] = op3[b] == 0 ? op2[b] : op1[b]
+; implemented as: op0 = (op1 & op3) | (op2 & ~op3)
 
-; Comparison operator should not matter as long as we always use the same ?!
+; Used to expand the vec_sel builtin. Operands op1 and op2 already got
+; swapped in s390-c.c when we get here.
 
-; Operands 1 and 2 are swapped in order to match the altivec builtin.
-; If operand 3 is a const_int bitmask this would be vec_merge
-(define_expand "vec_sel<mode>"
-  [(set (match_operand:V_HW 0 "register_operand" "")
-	(if_then_else:V_HW
-	 (eq (match_operand:<tointvec> 3 "register_operand"  "")
-	     (match_dup 4))
-	 (match_operand:V_HW 2 "register_operand"  "")
-	 (match_operand:V_HW 1 "register_operand"  "")))]
+(define_insn "vsel<mode>"
+  [(set (match_operand:V_HW                      0 "register_operand" "=v")
+	(ior:V_HW
+	 (and:V_HW (match_operand:V_HW           1 "register_operand"  "v")
+		   (match_operand:V_HW           3 "register_operand"  "v"))
+	 (and:V_HW (not:V_HW (match_dup 3))
+		   (match_operand:V_HW           2 "register_operand"  "v"))))]
   "TARGET_VX"
-{
-  operands[4] = CONST0_RTX (<tointvec>mode);
-})
+  "vsel\t%v0,%1,%2,%3"
+  [(set_attr "op_type" "VRR")])
 
 
 ; Vector sign extend to doubleword
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index b454fe17b8c..263e999b6dd 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,12 @@
+2020-05-04  Andreas Krebbel  <krebbel@linux.ibm.com>
+
+	Backport from mainline
+	2020-04-20  Andreas Krebbel  <krebbel@linux.ibm.com>
+
+	PR target/94613
+	* gcc.target/s390/zvector/pr94613.c: New test.
+	* gcc.target/s390/zvector/vec_sel-1.c: New test.
+
 2020-05-01  Thomas Koenig  <tkoenig@gcc.gnu.org>
 
 	PR fortran/93956
diff --git a/gcc/testsuite/gcc.target/s390/zvector/pr94613.c b/gcc/testsuite/gcc.target/s390/zvector/pr94613.c
new file mode 100644
index 00000000000..c3581d82ede
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/zvector/pr94613.c
@@ -0,0 +1,38 @@
+/* { dg-do run } */
+/* { dg-require-effective-target s390_vx } */
+/* { dg-options "-O3 -mzarch -march=z13 -mzvector -std=gnu99 --save-temps" } */
+
+#include <vecintrin.h>
+
+/* The initial implementation of vec_sel used an IF_THEN_ELSE rtx.
+   This did NOT match what the vsel instruction does.  vsel is a
+   bit-wise operation.  Using IF_THEN_ELSE made the + operation to be
+   simplified away in combine.  A plus operation affects other bits in
+   the same element. Hence per-element simplifications are wrong for
+   vsel.  */
+vector unsigned char __attribute__((noinline))
+foo (vector unsigned char a, vector unsigned char b, vector unsigned char c)
+{
+  return vec_sel (a + b, c, a);
+}
+
+/* FIXME: The OR operation still should be optimized away in that case.  */
+vector unsigned char __attribute__((noinline))
+bar (vector unsigned char a, vector unsigned char b, vector unsigned char c)
+{
+  return vec_sel (a | b, c, a);
+}
+
+int
+main ()
+{
+  vector unsigned char v = (vector unsigned char){ 1 };
+
+  if (foo (v, v, v)[0] != 3)
+      __builtin_abort ();
+
+  if (bar (v, v, v)[0] != 1)
+    __builtin_abort ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/s390/zvector/vec_sel-1.c b/gcc/testsuite/gcc.target/s390/zvector/vec_sel-1.c
new file mode 100644
index 00000000000..d310f70d3a5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/s390/zvector/vec_sel-1.c
@@ -0,0 +1,211 @@
+/* { dg-do run } */
+/* { dg-require-effective-target s390_vxe } */
+/* { dg-options "-O3 -mzarch -march=z14 -mzvector --save-temps -Wno-attributes" } */
+
+#include <string.h>
+#include <vecintrin.h>
+
+typedef vector signed char v16qi;
+typedef vector unsigned char uv16qi;
+typedef vector bool char bv16qi;
+
+typedef vector signed short int v8hi;
+typedef vector unsigned short int uv8hi;
+typedef vector bool short int bv8hi;
+
+typedef vector signed int v4si;
+typedef vector unsigned int uv4si;
+typedef vector bool int bv4si;
+
+typedef vector signed long long v2di;
+typedef vector unsigned long long uv2di;
+typedef vector bool long long bv2di;
+
+typedef vector float v4sf;
+typedef vector double v2df;
+
+#define NUM_CONSTS 8
+
+const v16qi v16qi_vals[NUM_CONSTS] =
+  { (v16qi){ 1 },
+    (v16qi){ 2 },
+    (v16qi){ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 },
+    (v16qi){ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2 },
+    (v16qi){ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1 },
+    (v16qi){ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 },
+    (v16qi){ 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 },
+    (v16qi){ 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1 }
+  };
+
+const v8hi v8hi_vals[NUM_CONSTS] =
+  { (v8hi){ 1 },
+    (v8hi){ 2 },
+    (v8hi){ 1,1,1,1,1,1,1,1 },
+    (v8hi){ 2,2,2,2,2,2,2,2 },
+    (v8hi){ -1,-1,-1,-1,-1,-1,-1,-1 },
+    (v8hi){ 0,0,0,0,0,0,0,0 },
+    (v8hi){ 1,2,3,4,5,6,7,8 },
+    (v8hi){ 8,7,6,5,4,3,2,1 }
+  };
+
+const v4si v4si_vals[NUM_CONSTS] =
+  { (v4si){ 1 },
+    (v4si){ 2 },
+    (v4si){ 1,1,1,1 },
+    (v4si){ 2,2,2,2 },
+    (v4si){ -1,-1,-1,-1 },
+    (v4si){ 0,0,0,0 },
+    (v4si){ 1,2,3,4 },
+    (v4si){ 4,3,2,1 }
+  };
+
+const v2di v2di_vals[NUM_CONSTS] =
+  { (v2di){ 1 },
+    (v2di){ 2 },
+    (v2di){ 1,1 },
+    (v2di){ 2,2 },
+    (v2di){ -1,-1 },
+    (v2di){ 0,0 },
+    (v2di){ 1,2 },
+    (v2di){ 2,1 }
+  };
+
+const v4sf v4sf_vals[NUM_CONSTS] =
+  { (v4sf){ 1.0f },
+    (v4sf){ 2.0f },
+    (v4sf){ 1.0f,1.0f,1.0f,1.0f },
+    (v4sf){ 2.0f,2.0f,2.0f,2.0f },
+    (v4sf){ -1.0f,-1.0f,-1.0f,-1.0f },
+    (v4sf){ 0.0f,0.0f,0.0f,0.0f },
+    (v4sf){ 1.1f,2.1f,3.1f,4.1f },
+    (v4sf){ 4.1f,3.1f,2.1f,1.1f }
+  };
+
+const v2df v2df_vals[NUM_CONSTS] =
+  { (v2df){ 1.0 },
+    (v2df){ 2.0 },
+    (v2df){ 1.0,1.0 },
+    (v2df){ 2.0,2.0 },
+    (v2df){ -1.0,-1.0 },
+    (v2df){ 0.0,0.0 },
+    (v2df){ 1.1,2.1 },
+    (v2df){ 2.1,1.1 }
+  };
+
+/* Each bit of the result vector has the value of the corresponding
+   bit of A if the corresponding bit of C is 0, or the value of the
+   corresponding bit of B otherwise.  */
+void __attribute__((noinline, noclone, target ("arch=zEC12")))
+emul (unsigned char *result, unsigned char *a,
+      unsigned char *b, unsigned char *c)
+{
+  for (int i = 0; i < 16; i++)
+    result[i] = (a[i] & ~c[i]) | (b[i] & c[i]);
+}
+
+#define GENFUNC(NAME, T1, T2)						\
+  T1 __attribute__((noinline, noclone))					\
+  NAME##_reg (T1 a, T1 b, T2 c) { return vec_sel (a, b, c); }		\
+  void __attribute__((noinline, noclone))				\
+  NAME##_mem (T1 *a, T1 *b, T2 *c, T1 *out) { *out = vec_sel (*a, *b, *c); } \
+  T1 __attribute__((always_inline))					\
+  NAME##_const (T1 a, T1 b, T2 c) { return vec_sel (a, b, c); }
+
+GENFUNC (vec_sel_b8_a, bv16qi, uv16qi)
+GENFUNC (vec_sel_b8_b, bv16qi, bv16qi)
+GENFUNC (vec_sel_s8_a,  v16qi, uv16qi)
+GENFUNC (vec_sel_s8_b,  v16qi, bv16qi)
+GENFUNC (vec_sel_u8_a, uv16qi, uv16qi)
+GENFUNC (vec_sel_u8_b, uv16qi, bv16qi)
+
+GENFUNC (vec_sel_b16_a, bv8hi, uv8hi)
+GENFUNC (vec_sel_b16_b, bv8hi, bv8hi)
+GENFUNC (vec_sel_s16_a,  v8hi, uv8hi)
+GENFUNC (vec_sel_s16_b,  v8hi, bv8hi)
+GENFUNC (vec_sel_u16_a, uv8hi, uv8hi)
+GENFUNC (vec_sel_u16_b, uv8hi, bv8hi)
+
+GENFUNC (vec_sel_b32_a, bv4si, uv4si)
+GENFUNC (vec_sel_b32_b, bv4si, bv4si)
+GENFUNC (vec_sel_s32_a,  v4si, uv4si)
+GENFUNC (vec_sel_s32_b,  v4si, bv4si)
+GENFUNC (vec_sel_u32_a, uv4si, uv4si)
+GENFUNC (vec_sel_u32_b, uv4si, bv4si)
+
+GENFUNC (vec_sel_b64_a, bv2di, uv2di)
+GENFUNC (vec_sel_b64_b, bv2di, bv2di)
+GENFUNC (vec_sel_s64_a,  v2di, uv2di)
+GENFUNC (vec_sel_s64_b,  v2di, bv2di)
+GENFUNC (vec_sel_u64_a, uv2di, uv2di)
+GENFUNC (vec_sel_u64_b, uv2di, bv2di)
+
+GENFUNC (vec_sel_flt_a,  v4sf, uv4si)
+GENFUNC (vec_sel_flt_b,  v4sf, bv4si)
+
+GENFUNC (vec_sel_dbl_a,  v2df, uv2di)
+GENFUNC (vec_sel_dbl_b,  v2df, bv2di)
+
+#define TESTFUNC(NAME, T1, T2, VAL_TYPE)				\
+  for (int i = 0; i < NUM_CONSTS; i++)					\
+    for (int j = 0; j < NUM_CONSTS; j++)				\
+      for (int k = 0; k < NUM_CONSTS; k++)				\
+	{								\
+	  unsigned char result[16];					\
+	  T1 in1 = (T1)VAL_TYPE##_vals[i];				\
+	  T1 in2 = (T1)VAL_TYPE##_vals[j];				\
+	  T2 in3 = (T2)VAL_TYPE##_vals[k];				\
+	  emul (result, (char*)&in1, (char*)&in2, (char*)&in3);		\
+									\
+	  T1 reg = NAME##_reg (in1, in2, in3);				\
+	  if (memcmp ((char*)&reg, result, 16) != 0)			\
+	    __builtin_abort ();						\
+									\
+	  T1 mem;							\
+	  NAME##_mem (&in1, &in2, &in3, &mem);				\
+	  if (memcmp ((char*)&mem, result, 16) != 0)			\
+	    __builtin_abort ();						\
+									\
+	  T1 cons = NAME##_const (in1, in2, in3);			\
+	  if (memcmp ((char*)&cons, result, 16) != 0)			\
+	    __builtin_abort ();						\
+	}
+
+int
+main ()
+{
+  TESTFUNC (vec_sel_b8_a, bv16qi, uv16qi, v16qi);
+  TESTFUNC (vec_sel_b8_b, bv16qi, bv16qi, v16qi);
+  TESTFUNC (vec_sel_s8_a,  v16qi, uv16qi, v16qi);
+  TESTFUNC (vec_sel_s8_b,  v16qi, bv16qi, v16qi);
+  TESTFUNC (vec_sel_u8_a, uv16qi, uv16qi, v16qi);
+  TESTFUNC (vec_sel_u8_b, uv16qi, bv16qi, v16qi);
+
+  TESTFUNC (vec_sel_b16_a, bv8hi, uv8hi, v8hi);
+  TESTFUNC (vec_sel_b16_b, bv8hi, bv8hi, v8hi);
+  TESTFUNC (vec_sel_s16_a,  v8hi, uv8hi, v8hi);
+  TESTFUNC (vec_sel_s16_b,  v8hi, bv8hi, v8hi);
+  TESTFUNC (vec_sel_u16_a, uv8hi, uv8hi, v8hi);
+  TESTFUNC (vec_sel_u16_b, uv8hi, bv8hi, v8hi);
+
+  TESTFUNC (vec_sel_b32_a, bv4si, uv4si, v4si);
+  TESTFUNC (vec_sel_b32_b, bv4si, bv4si, v4si);
+  TESTFUNC (vec_sel_s32_a,  v4si, uv4si, v4si);
+  TESTFUNC (vec_sel_s32_b,  v4si, bv4si, v4si);
+  TESTFUNC (vec_sel_u32_a, uv4si, uv4si, v4si);
+  TESTFUNC (vec_sel_u32_b, uv4si, bv4si, v4si);
+
+  TESTFUNC (vec_sel_b64_a, bv2di, uv2di, v2di);
+  TESTFUNC (vec_sel_b64_b, bv2di, bv2di, v2di);
+  TESTFUNC (vec_sel_s64_a,  v2di, uv2di, v2di);
+  TESTFUNC (vec_sel_s64_b,  v2di, bv2di, v2di);
+  TESTFUNC (vec_sel_u64_a, uv2di, uv2di, v2di);
+  TESTFUNC (vec_sel_u64_b, uv2di, bv2di, v2di);
+
+  TESTFUNC (vec_sel_flt_a,  v4sf, uv4si, v4sf);
+  TESTFUNC (vec_sel_flt_b,  v4sf, bv4si, v4sf);
+
+  TESTFUNC (vec_sel_dbl_a,  v2df, uv2di, v2df);
+  TESTFUNC (vec_sel_dbl_b,  v2df, bv2di, v2df);
+}
+
+/* { dg-final { scan-assembler {\n\tvsel\t} } } */


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2020-09-17 17:00 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-17 17:00 [gcc(refs/vendors/redhat/heads/gcc-8-branch)] PR94613: Fix vec_sel builtin for IBM Z Jakub Jelinek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).