public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/work066)] Generate XXSPLTI32DX on power10.
@ 2021-08-26  4:00 Michael Meissner
  0 siblings, 0 replies; only message in thread
From: Michael Meissner @ 2021-08-26  4:00 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:87fff6c1187d3ce3f9c3f387ed6ea7dda47aa0c0

commit 87fff6c1187d3ce3f9c3f387ed6ea7dda47aa0c0
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Thu Aug 26 00:00:26 2021 -0400

    Generate XXSPLTI32DX on power10.
    
    This patch generates XXSPLTI32DX for SF/DF floating point constants that
    cannot be generated with the XXSPLTIDP instruction.  In addition, it adds
    support for using XXSPLTI32DX to load up V2DF constants, where both constants
    are the same.
    
    At the present time, XXSPLTI32DX is not enabled by default.
    
    2021-08-25  Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
            * config/rs6000/constraint.md (eD): New constraint.
            * config/rs6000/predicates.md (easy_fp_constant): If the constant
            can be loaded with XXSPLTI32DX, it is easy.
            (xxsplti32dx_operand): New predicate.
            (easy_vector_constant): If the constant can be loaded with
            XXSPLTI32DX, it is easy.
            * config/rs6000/rs6000-protos.h (xxsplti32dx_constant_p): New
            declaration.
            * config/rs6000/rs6000.c (xxsplti32dx_constant_float_p): New
            helper function.
            (xxsplti32dx_constant_p): New function.
            (output_vec_const_move): If the operand can be loaded with
            XXSPLTI32DX, split it.
            (prefixed_permute_p): Constants loaded with XXSPLTI32DX are
            prefixed.
            * config/rs6000/rs6000.md (movsf_hardfloat): Add support for
            constants loaded with XXSPLTI32DX.
            (mov<mode>_hardfloat32, FMOVE64 iterator):  Add support for
            constants loaded with XXSPLTI32DX.
            (mov<mode>_hardfloat64, FMOVE64 iterator):  Add support for
            constants loaded with XXSPLTI32DX.
            * config/rs6000/rs6000.opt (-mxxsplti32dx): New option.
            * config/rs6000/vsx.md (UNSPEC_XXSPLTI32DX_CONST): New unspec.
            (XXSPLTI32DX): New mode iterator.
            (xxsplti32dx_<mode>): New insn and splitter for XXSPLTI32DX.
            (xxsplti32dx_<mode>_first): New insn.
            (xxsplti32dx_<mode>_second): New insn.
    
    gcc/testsuite/
            * gcc.target/powerpc/pr86731-fwrapv-longlong.c: Update insn
            count.
            * gcc.target/powerpc/vec-splat-constant-sf.c: Update insn count.
            Add -mxxsplti32dx option.
            * gcc.target/powerpc/vec-splat-constant-df.c: Likewise.
            * gcc.target/powerpc/vec-splat-constant-v2df.c: Likewise.

Diff:
---
 gcc/config/rs6000/constraints.md                   |   6 +
 gcc/config/rs6000/predicates.md                    |  19 ++++
 gcc/config/rs6000/rs6000-protos.h                  |   2 +
 gcc/config/rs6000/rs6000.c                         | 122 ++++++++++++++++++++-
 gcc/config/rs6000/rs6000.md                        |  61 ++++++++---
 gcc/config/rs6000/rs6000.opt                       |   5 +
 gcc/config/rs6000/vsx.md                           |  74 +++++++++++++
 .../gcc.target/powerpc/pr86731-fwrapv-longlong.c   |   5 +-
 .../gcc.target/powerpc/vec-splat-constant-df.c     |  11 +-
 .../gcc.target/powerpc/vec-splat-constant-sf.c     |   7 +-
 .../gcc.target/powerpc/vec-splat-constant-v2df.c   |  10 +-
 11 files changed, 293 insertions(+), 29 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 82fecca4a91..6317f928dc4 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -208,6 +208,12 @@
   (and (match_code "const_int")
        (match_test "((- (unsigned HOST_WIDE_INT) ival) + 0x8000) < 0x10000")))
 
+;; SF/DF/V2DF/DI/V2DI scalar or vector constant that can be loaded with a pair
+;; of XXSPLTI32DX instructions.
+(define_constraint "eD"
+  "A vector constant that can be loaded with XXSPLTI32DX instructions."
+  (match_operand 0 "xxsplti32dx_operand"))
+
 ;; SF/DF/V2DF scalar or vector constant that can be loaded with XXSPLTIDP
 (define_constraint "eF"
   "A vector constant that can be loaded with the XXSPLTIDP instruction."
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index b601f73600f..a5d51f4ad84 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -611,6 +611,11 @@
   if (lxvkq_operand (op, mode))
     return 1;
 
+  /* If we have the ISA 3.1 XXSPLTI32DX instruction, see if the constant can
+     be loaded with a pair of those instructions.  */
+  if (xxsplti32dx_operand (op, mode))
+    return 1;
+
   /* Otherwise consider floating point constants hard, so that the
      constant gets pushed to memory during the early RTL phases.  This
      has the advantage that double precision constants that can be
@@ -678,6 +683,17 @@
   return lxvkq_constant_p (op, mode, &immediate);
 })
 
+;; Return 1 if operand is a SF/DF CONST_DOUBLE or V2DF/V2DI CONST_VECTOR that
+;; can be loaded via a pair f ISA 3.1 XXSPLTI32DX instructions.  Do not return
+;; true if the value can be loaded with the XXSPLTIDP instruction or XXSPLTIB
+;; to load 0.
+(define_predicate "xxsplti32dx_operand"
+  (match_code "const_double,const_vector,vec_duplicate")
+{
+  HOST_WIDE_INT high = 0, low = 0;
+  return xxsplti32dx_constant_p (op, mode, &high, &low);
+})
+
 ;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a
 ;; vector register without using memory.
 (define_predicate "easy_vector_constant"
@@ -697,6 +713,9 @@
       if (xxspltidp_operand (op, mode))
 	return true;
 
+      if (xxsplti32dx_operand (op, mode))
+	return true;
+
       if (TARGET_P9_VECTOR
           && xxspltib_constant_p (op, mode, &num_insns, &value))
 	return true;
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 181d20d7e05..744b1ad73e6 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -34,6 +34,8 @@ extern int easy_altivec_constant (rtx, machine_mode);
 extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
 extern bool xxspltidp_constant_p (rtx, machine_mode, HOST_WIDE_INT *);
 extern bool xxspltiw_constant_p (rtx, machine_mode, HOST_WIDE_INT *);
+extern bool xxsplti32dx_constant_p (rtx, machine_mode, HOST_WIDE_INT *,
+				    HOST_WIDE_INT *);
 extern bool lxvkq_constant_p (rtx, machine_mode, int *);
 extern int vspltis_shifted (rtx);
 extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 16c225a604f..46ff68bf3bd 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -6869,6 +6869,119 @@ lxvkq_constant_p (rtx op,
   return false;
 }
 
+/* Return true if OP is a floating point constant that can be loaded with the
+   XXSPLTI32DX instruction.  If the constant can be loaded with the simpler
+   XXSPLTIDP (constants that can fit as SFmode constants) or XXSPLTIB (0.0)
+   instructions, return false.
+
+   Return the two 32-bit constants to use in the two XXSPLTI32DX instructions
+   via HIGH_PTR and LOW_PTR.  */
+
+static bool
+xxsplti32dx_constant_float_p (rtx op,
+			      machine_mode mode,
+			      HOST_WIDE_INT *high_ptr,
+			      HOST_WIDE_INT *low_ptr)
+{
+  HOST_WIDE_INT xxspltidp_value = 0;
+
+  if (!CONST_DOUBLE_P (op))
+    return false;
+
+  if (mode != SFmode && mode != DFmode)
+    return false;
+
+  if (op == CONST0_RTX (mode))
+    return false;
+
+  if (xxspltidp_constant_p (op, mode, &xxspltidp_value))
+    return false;
+
+  long high_low[2];
+  const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
+  REAL_VALUE_TO_TARGET_DOUBLE (*rv, high_low);
+
+  /* The double precision value is laid out in memory order.  We need to undo
+     this for XXSPLTI32DX.  */
+  if (!BYTES_BIG_ENDIAN)
+    std::swap (high_low[0], high_low[1]);
+
+  *high_ptr = high_low[0];
+  *low_ptr = high_low[1];
+  return true;
+}
+
+/* Return true if OP is of the given MODE and can be synthesized with ISA 3.1
+   XXSPLTI32DX instruction.  If the instruction can be synthesized with
+   XXSPLTIDP or is 0/-1, return false.
+
+   We handle the following types of constants:
+
+     1) vector double constants where each element is the same and you can't
+        load the constant with XXSPLTIDP;
+
+     2) vector long long constants where each element is the same;
+
+     3) Scalar floating point constants that can't be loaded with XXSPLTIDP.
+
+   Return the two 32-bit constants to use in the two XXSPLTI32DX instructions
+   via HIGH_PTR and LOW_PTR.  */
+
+bool
+xxsplti32dx_constant_p (rtx op,
+			machine_mode mode,
+			HOST_WIDE_INT *high_ptr,
+			HOST_WIDE_INT *low_ptr)
+{
+  *high_ptr = *low_ptr = 0;
+
+  if (!TARGET_PREFIXED || !TARGET_VSX || !TARGET_XXSPLTI32DX)
+    return false;
+
+  if (mode == VOIDmode)
+    mode = GET_MODE (op);
+
+  if (op == CONST0_RTX (mode))
+    return false;
+
+  switch (mode)
+    {
+    default:
+      break;
+
+    case E_V2DFmode:
+      {
+	if (!const_vector_all_elements_equal_p (op, mode))
+	  return false;
+
+	rtx ele = CONST_VECTOR_ELT (op, 0);
+	return xxsplti32dx_constant_float_p (ele, DFmode, high_ptr, low_ptr);
+      }
+
+    case E_SFmode:
+    case E_DFmode:
+      return xxsplti32dx_constant_float_p (op, mode, high_ptr, low_ptr);
+
+    case E_V2DImode:
+      {
+	if (!const_vector_all_elements_equal_p (op, mode))
+	  return false;
+
+	/* If we can generate XXSPLTIB and VEXTSB2D, don't return true.  */
+	rtx ele = CONST_VECTOR_ELT (op, 0);
+	HOST_WIDE_INT value = INTVAL (ele);
+	if (IN_RANGE (value, -128, 127))
+	  return false;
+
+	*high_ptr = (value >> 32) & 0xffffffff;
+	*low_ptr = value & 0xffffffff;
+	return true;
+      }
+    }
+
+  return false;
+}
+
 const char *
 output_vec_const_move (rtx *operands)
 {
@@ -6934,6 +7047,9 @@ output_vec_const_move (rtx *operands)
 	  return "lxvkq %x0,%2";
 	}
 
+      if (xxsplti32dx_operand (vec, mode))
+	return "#";
+
       if (TARGET_P9_VECTOR
 	  && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
 	{
@@ -26612,7 +26728,11 @@ prefixed_permute_p (rtx_insn *insn)
     case DFmode:
     case SFmode:
     case V2DFmode:
-      return xxspltidp_operand (src, mode);
+      return (xxspltidp_operand (src, mode)
+	      || xxsplti32dx_operand (src, mode));
+
+    case V2DImode:
+      return xxsplti32dx_operand (src, mode);
 
     default:
       break;
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index bf3bfed3b88..d6af66a1728 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7728,17 +7728,17 @@
 ;;
 ;;	LWZ          LFS        LXSSP       LXSSPX     STFS       STXSSP
 ;;	STXSSPX      STW        XXLXOR      LI         FMR        XSCPSGNDP
-;;	MR           MT<x>      MF<x>       NOP        XXSPLTIDP
+;;	MR           MT<x>      MF<x>       NOP        XXSPLTIDP  XXSPLTI32DX
 
 (define_insn "movsf_hardfloat"
   [(set (match_operand:SF 0 "nonimmediate_operand"
 	 "=!r,       f,         v,          wa,        m,         wY,
 	  Z,         m,         wa,         !r,        f,         wa,
-	  !r,        *c*l,      !r,         *h,        wa")
+	  !r,        *c*l,      !r,         *h,        wa,        wa")
 	(match_operand:SF 1 "input_operand"
 	 "m,         m,         wY,         Z,         f,         v,
 	  wa,        r,         j,          j,         f,         wa,
-	  r,         r,         *h,         0,         eF"))]
+	  r,         r,         *h,         0,         eF,        eD"))]
   "(register_operand (operands[0], SFmode)
    || register_operand (operands[1], SFmode))
    && TARGET_HARD_FLOAT
@@ -7761,15 +7761,24 @@
    mt%0 %1
    mf%1 %0
    nop
+   #
    #"
   [(set_attr "type"
 	"load,       fpload,    fpload,     fpload,    fpstore,   fpstore,
 	 fpstore,    store,     veclogical, integer,   fpsimple,  fpsimple,
-	 *,          mtjmpr,    mfjmpr,     *,         vecperm")
+	 *,          mtjmpr,    mfjmpr,     *,         vecperm,   vecperm")
    (set_attr "isa"
 	"*,          *,         p9v,        p8v,       *,         p9v,
 	 p8v,        *,         *,          *,         *,         *,
-	 *,          *,         *,          *,         p10")])
+	 *,          *,         *,          *,         p10,       p10")
+   (set_attr "max_prefixed_insns"
+        "*,          *,         *,          *,         *,         *,
+         *,          *,         *,          *,         *,         *,
+         *,          *,         *,          *,         *,         2")
+   (set_attr "num_insns"
+        "*,          *,         *,          *,         *,         *,
+         *,          *,         *,          *,         *,         *,
+         *,          *,         *,          *,         *,         2")])
 
 ;;	LWZ          LFIWZX     STW        STFIWX     MTVSRWZ    MFVSRWZ
 ;;	FMR          MR         MT%0       MF%1       NOP
@@ -8029,18 +8038,18 @@
 
 ;;           STFD         LFD         FMR         LXSD        STXSD
 ;;           LXSD         STXSD       XXLOR       XXLXOR      GPR<-0
-;;           LWZ          STW         MR          XXSPLTIDP
+;;           LWZ          STW         MR          XXSPLTIDP   XXSPLTI32DX
 
 
 (define_insn "*mov<mode>_hardfloat32"
   [(set (match_operand:FMOVE64 0 "nonimmediate_operand"
             "=m,          d,          d,          <f64_p9>,   wY,
               <f64_av>,   Z,          <f64_vsx>,  <f64_vsx>,  !r,
-              Y,          r,          !r,         wa")
+              Y,          r,          !r,         wa,         wa")
 	(match_operand:FMOVE64 1 "input_operand"
              "d,          m,          d,          wY,         <f64_p9>,
               Z,          <f64_av>,   <f64_vsx>,  <zero_fp>,  <zero_fp>,
-              r,          Y,          r,          eF"))]
+              r,          Y,          r,          eF,         eD"))]
   "! TARGET_POWERPC64 && TARGET_HARD_FLOAT
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode))"
@@ -8058,20 +8067,29 @@
    #
    #
    #
+   #
    #"
   [(set_attr "type"
             "fpstore,     fpload,     fpsimple,   fpload,     fpstore,
              fpload,      fpstore,    veclogical, veclogical, two,
-             store,       load,       two,        vecperm")
+             store,       load,       two,        vecperm,    vecperm")
    (set_attr "size" "64")
    (set_attr "length"
             "*,           *,          *,          *,          *,
              *,           *,          *,          *,          8,
-             8,           8,          8,          *")
+             8,           8,          8,          *,          *")
    (set_attr "isa"
             "*,           *,          *,          p9v,        p9v,
              p7v,         p7v,        *,          *,          *,
-             *,           *,          *,          p10")])
+             *,           *,          *,          p10,        p10")
+   (set_attr "max_prefixed_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          2")
+   (set_attr "num_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          2")])
 
 ;;           STW      LWZ     MR      G-const H-const F-const
 
@@ -8098,19 +8116,19 @@
 ;;           STFD         LFD         FMR         LXSD        STXSD
 ;;           LXSDX        STXSDX      XXLOR       XXLXOR      LI 0
 ;;           STD          LD          MR          MT{CTR,LR}  MF{CTR,LR}
-;;           NOP          MFVSRD      MTVSRD      XXSPLTIDP
+;;           NOP          MFVSRD      MTVSRD      XXSPLTIDP   XXSPLTI32DX
 
 (define_insn "*mov<mode>_hardfloat64"
   [(set (match_operand:FMOVE64 0 "nonimmediate_operand"
            "=m,           d,          d,          <f64_p9>,   wY,
              <f64_av>,    Z,          <f64_vsx>,  <f64_vsx>,  !r,
              YZ,          r,          !r,         *c*l,       !r,
-            *h,           r,          <f64_dm>,   wa")
+            *h,           r,          <f64_dm>,   wa,         wa")
 	(match_operand:FMOVE64 1 "input_operand"
             "d,           m,          d,          wY,         <f64_p9>,
              Z,           <f64_av>,   <f64_vsx>,  <zero_fp>,  <zero_fp>,
              r,           YZ,         r,          r,          *h,
-             0,           <f64_dm>,   r,          eF"))]
+             0,           <f64_dm>,   r,          eF,         eD"))]
   "TARGET_POWERPC64 && TARGET_HARD_FLOAT
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode))"
@@ -8133,18 +8151,29 @@
    nop
    mfvsrd %0,%x1
    mtvsrd %x0,%1
+   #
    #"
   [(set_attr "type"
             "fpstore,     fpload,     fpsimple,   fpload,     fpstore,
              fpload,      fpstore,    veclogical, veclogical, integer,
              store,       load,       *,          mtjmpr,     mfjmpr,
-             *,           mfvsr,      mtvsr,      vecperm")
+             *,           mfvsr,      mtvsr,      vecperm,    vecperm")
    (set_attr "size" "64")
    (set_attr "isa"
             "*,           *,          *,          p9v,        p9v,
              p7v,         p7v,        *,          *,          *,
              *,           *,          *,          *,          *,
-             *,           p8v,        p8v,        p10")])
+             *,           p8v,        p8v,        p10,        p10")
+   (set_attr "max_prefixed_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          2")
+   (set_attr "num_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          *")])
 
 ;;           STD      LD       MR      MT<SPR> MF<SPR> G-const
 ;;           H-const  F-const  Special
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index ebdf7cd036d..ffdffaf6160 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -648,6 +648,11 @@ mxxspltiw
 Target Undocumented Var(TARGET_XXSPLTIW) Init(1) Save
 Generate (do not generate) XXSPLTIW instructions.
 
+; Do not enable this by default at the present time.
+mxxsplti32dx
+Target Undocumented Var(TARGET_XXSPLTI32DX) Init(0) Save
+Generate (do not generate) XXSPLTI32DX instructions.
+
 mlxvkq
 Target Undocumented Var(TARGET_LXVKQ) Init(1) Save
 Generate (do not generate) LXVKQ instructions.
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 4f716a9f2d2..9a99ea9331c 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -376,6 +376,7 @@
    UNSPEC_XXSPLTIW
    UNSPEC_XXSPLTIDP
    UNSPEC_XXSPLTI32DX
+   UNSPEC_XXSPLTI32DX_CONST
    UNSPEC_XXBLEND
    UNSPEC_XXPERMX
   ])
@@ -6620,6 +6621,79 @@
   [(set_attr "type" "vecperm")
    (set_attr "prefixed" "yes")])
 
+;; XXSPLTI32DX used to create 64-bit constants or vector constants where the
+;; even elements match and the odd elements match.
+(define_mode_iterator XXSPLTI32DX [SF DF V2DF V2DI])
+
+(define_insn_and_split "*xxsplti32dx_<mode>"
+  [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa")
+	(match_operand:XXSPLTI32DX 1 "xxsplti32dx_operand" "eD"))]
+  "TARGET_XXSPLTI32DX"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:XXSPLTI32DX [(match_dup 2)
+			     (match_dup 3)] UNSPEC_XXSPLTI32DX_CONST))
+   (set (match_dup 0)
+	(unspec:XXSPLTI32DX [(match_dup 0)
+			     (match_dup 4)
+			     (match_dup 5)] UNSPEC_XXSPLTI32DX_CONST))]
+{
+  HOST_WIDE_INT high = 0, low = 0;
+
+  if (!xxsplti32dx_constant_p (operands[1], <MODE>mode, &high, &low))
+    gcc_unreachable ();
+
+  /* If the low bits are 0 or all 1s, initialize that word first.  This way we
+     can use a smaller XXSPLTIB/XXLXOR/XXLORC instruction instead the first
+     XXSPLTI32DX.  */
+  if (low == 0 || low ==  -1)
+    {
+      operands[2] = const1_rtx;
+      operands[3] = GEN_INT (low);
+      operands[4] = const0_rtx;
+      operands[5] = GEN_INT (high);
+    }
+  else
+    {
+      operands[2] = const0_rtx;
+      operands[3] = GEN_INT (high);
+      operands[4] = const1_rtx;
+      operands[5] = GEN_INT (low);
+    }
+}
+  [(set_attr "type" "vecperm")
+   (set_attr "prefixed" "yes")
+   (set_attr "num_insns" "2")
+   (set_attr "max_prefixed_insns" "2")])
+
+;; First word of XXSPLTI32DX
+(define_insn "*xxsplti32dx_<mode>_first"
+  [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa,wa,wa")
+	(unspec:XXSPLTI32DX [(match_operand 1 "u1bit_cint_operand" "n,n,n")
+			     (match_operand 2 "const_int_operand" "O,wM,n")]
+			    UNSPEC_XXSPLTI32DX_CONST))]
+  "TARGET_XXSPLTI32DX"
+  "@
+   xxlxor %x0,%x0,%x0
+   xxlorc %x0,%x0,%x0
+   xxsplti32dx %x0,%1,%2"
+  [(set_attr "type" "veclogical,veclogical,vecperm")
+   (set_attr "prefixed" "*,*,yes")])
+
+;; Second word of XXSPLTI32DX
+(define_insn "*xxsplti32dx_<mode>_second"
+  [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa")
+	(unspec:XXSPLTI32DX [(match_operand:XXSPLTI32DX 1 "vsx_register_operand" "0")
+			     (match_operand 2 "u1bit_cint_operand" "n")
+			     (match_operand 3 "const_int_operand" "n")]
+			    UNSPEC_XXSPLTI32DX_CONST))]
+  "TARGET_XXSPLTI32DX"
+  "xxsplti32dx %x0,%2,%3"
+  [(set_attr "type" "vecperm")
+   (set_attr "prefixed" "yes")])
+
+
 ;; XXBLEND built-in function support
 (define_insn "xxblend_<mode>"
   [(set (match_operand:VM3 0 "register_operand" "=wa")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c b/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c
index bd1502bb30a..7fb6ec978a6 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c
@@ -26,9 +26,10 @@ vector signed long long splats4(void)
 
 /* Codegen will consist of splat and shift instructions for most types.
    If folding is enabled, the vec_sl tests using vector long long type will
-   generate a lvx instead of a vspltisw+vsld pair.  */
+   generate a lvx instead of a vspltisw+vsld pair.  On power10, it may
+   generate an xxsplti32dx instead of a load.  */
 
 /* { dg-final { scan-assembler-times {\mvspltis[bhw]\M} 0 } } */
 /* { dg-final { scan-assembler-times {\mvsl[bhwd]\M} 0 } } */
-/* { dg-final { scan-assembler-times {\mp?lxv\M|\mlxv\M|\mlxvd2x\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mp?lxv\M|\mlxv\M|\mlxvd2x\M|\mxxsplti32dx\M} 2 } } */
 
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c
index 8f6e176f9af..27ded2b6d1d 100644
--- a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target power10_ok } */
-/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+/* { dg-options "-mdejagnu-cpu=power10 -msplti32dx -O2" } */
 
 #include <math.h>
 
@@ -48,13 +48,16 @@ scalar_double_m_inf (void)	/* XXSPLTIDP.  */
 double
 scalar_double_pi (void)
 {
-  return M_PI;			/* PLFD.  */
+  return M_PI;			/* 2x XXSPLTI32DX.  */
 }
 
 double
 scalar_double_denorm (void)
 {
-  return 0x1p-149f;		/* PLFD.  */
+  return 0x1p-149f;		/* XXLXOR, XXSPLTI32DX.  */
 }
 
-/* { dg-final { scan-assembler-times {\mxxspltidp\M} 5 } } */
+/* { dg-final { scan-assembler-times {\mxxspltidp\M}   5 } } */
+/* { dg-final { scan-assembler-times {\mxxsplti32dx\M} 3 } } */
+/* { dg-final { scan-assembler-not   {\mplfd\M}          } } */
+/* { dg-final { scan-assembler-not   {\mplxsd\M}         } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c
index 72504bdfbbd..74b2466f74c 100644
--- a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target power10_ok } */
-/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+/* { dg-options "-mdejagnu-cpu=power10 -mxxsplti32dx -O2" } */
 
 #include <math.h>
 
@@ -57,4 +57,7 @@ scalar_float_denorm (void)
   return 0x1p-149f;		/* PLFS.  */
 }
 
-/* { dg-final { scan-assembler-times {\mxxspltidp\M} 6 } } */
+/* { dg-final { scan-assembler-times {\mxxspltidp\M}   6 } } */
+/* { dg-final { scan-assembler-times {\mxxsplti32dx\M} 1 } } */
+/* { dg-final { scan-assembler-not   {\mplfs\M}          } } */
+/* { dg-final { scan-assembler-not   {\mplxssp\M}        } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c
index 82ffc86f8aa..f438c3787a0 100644
--- a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target power10_ok } */
-/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+/* { dg-options "-mdejagnu-cpu=power10 -mxxsplti32dx -O2" } */
 
 #include <math.h>
 
@@ -51,14 +51,16 @@ v2df_double_m_inf (void)
 vector double
 v2df_double_pi (void)
 {
-  return (vector double) { M_PI, M_PI };		/* PLVX.  */
+  return (vector double) { M_PI, M_PI };		/* 2x XXSPLTI32DX.  */
 }
 
 vector double
 v2df_double_denorm (void)
 {
   return (vector double) { (double)0x1p-149f,
-			   (double)0x1p-149f };		/* PLVX.  */
+			   (double)0x1p-149f };	/* XXLXOR, XXSPLTI32DX.  */
 }
 
-/* { dg-final { scan-assembler-times {\mxxspltidp\M} 5 } } */
+/* { dg-final { scan-assembler-times {\mxxspltidp\M}   5 } } */
+/* { dg-final { scan-assembler-times {\mxxsplti32dx\M} 3 } } */
+/* { dg-final { scan-assembler-not   {\mplxv\M}          } } */


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-08-26  4:00 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-08-26  4:00 [gcc(refs/users/meissner/heads/work066)] Generate XXSPLTI32DX on power10 Michael Meissner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).