public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/meissner/heads/work061)] Generate XXSPLTI32DX on power10.
@ 2021-07-22  1:09 Michael Meissner
  0 siblings, 0 replies; 5+ messages in thread
From: Michael Meissner @ 2021-07-22  1:09 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:4374b3b8c2c72e871d1c031768c0054aae707910

commit 4374b3b8c2c72e871d1c031768c0054aae707910
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Wed Jul 21 21:09:34 2021 -0400

    Generate XXSPLTI32DX on power10.
    
    This patch generates XXSPLTI32DX for SF/DF floating point constants that
    cannot be generated with the XXSPLTIDP instruction.  In addition, it adds
    support for using XXSPLTI32DX to load up V2DF constants, where both constants
    are the same.
    
    2021-07-21  Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
            * config/rs6000/constraint.md (eD): New constraint.
            * config/rs6000/predicates.md (easy_fp_constant): If the constant
            can be loaded with XXSPLTI32DX, it is easy.
            (xxsplti32dx_operand): New predicate.
            (easy_vector_constant): If the constant can be loaded with
            XXSPLTI32DX, it is easy.
            * config/rs6000/rs6000-protos.h (xxsplti32dx_constant_p): New
            declaration.
            * config/rs6000/rs6000-cpus.h (ISA_3_1_MASKS_SERVER): Add
            -mxxsplti32dx.
            (POWERPC_MASKS): Add -mxxsplti32dx.
            * config/rs6000/rs6000.c (rs6000_option_override_internal): Add
            support for -mxxsplti32dx.
            (xxsplti32dx_constant_float_p): New helper function.
            (xxsplti32dx_constant_p): New function.
            (output_vec_const_move): If the operand can be loaded with
            XXSPLTI32DX, split it.
            (rs6000_opt_masks): Add -mxxsplti32dx.
            * config/rs6000/rs6000.md (movsf_hardfloat): Add support for
            constants loaded with XXSPLTI32DX.
            (mov<mode>_hardfloat32, FMOVE64 iterator):  Add support for
            constants loaded with XXSPLTI32DX.
            (mov<mode>_hardfloat64, FMOVE64 iterator):  Add support for
            constants loaded with XXSPLTI32DX.
            * config/rs6000/rs6000.opt (-mxxsplti32dx): New option.
            * config/rs6000/vsx.md (UNSPEC_XXSPLTI32DX_CONST): New unspec.
            (XXSPLTI32DX): New mode iterator.
            (xxsplti32dx_<mode>): New insn and splitter for XXSPLTI32DX.
            (xxsplti32dx_<mode>_first): New insn.
            (xxsplti32dx_<mode>_second): New insn.
    
    gcc/testsuite/
            * gcc.target/powerpc/vec-splat-constant-sf.c: Update insn count.
            * gcc.target/powerpc/vec-splat-constant-df.c: Update insn count.
            * gcc.target/powerpc/vec-splat-constant-v2df.c: Update insn
            count.

Diff:
---
 gcc/config/rs6000/constraints.md                   |   6 +
 gcc/config/rs6000/predicates.md                    |  18 +++
 gcc/config/rs6000/rs6000-protos.h                  |   2 +
 gcc/config/rs6000/rs6000.c                         | 127 +++++++++++++++++++++
 gcc/config/rs6000/rs6000.md                        |  67 ++++++++---
 gcc/config/rs6000/rs6000.opt                       |   4 +
 gcc/config/rs6000/vsx.md                           |  72 ++++++++++++
 .../gcc.target/powerpc/vec-splat-constant-df.c     |   9 +-
 .../gcc.target/powerpc/vec-splat-constant-sf.c     |   5 +-
 .../gcc.target/powerpc/vec-splat-constant-v2df.c   |  10 +-
 10 files changed, 293 insertions(+), 27 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index e1fadd63580..d665e2a94db 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -208,6 +208,12 @@
   (and (match_code "const_int")
        (match_test "((- (unsigned HOST_WIDE_INT) ival) + 0x8000) < 0x10000")))
 
+;; SF/DF/V2DF/DI/V2DI scalar or vector constant that can be loaded with a pair
+;; of XXSPLTI32DX instructions.
+(define_constraint "eD"
+  "A vector constant that can be loaded with XXSPLTI32DX instructions."
+  (match_operand 0 "xxsplti32dx_operand"))
+
 ;; SF/DF/V2DF scalar or vector constant that can be loaded with XXSPLTIDP
 (define_constraint "eF"
   "A vector constant that can be loaded with the XXSPLTIDP instruction."
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 5d84723e6a1..c74ac6ac9f9 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -606,6 +606,11 @@
   if (xxspltidp_operand (op, mode))
     return 1;
 
+  /* If we have the ISA 3.1 XXSPLTI32DX instruction, see if the constant can
+     be loaded with a pair of those instructions.  */
+  if (xxsplti32dx_operand (op, mode))
+    return 1;
+
   /* Otherwise consider floating point constants hard, so that the
      constant gets pushed to memory during the early RTL phases.  This
      has the advantage that double precision constants that can be
@@ -684,6 +689,16 @@
   return xxspltidp_constant_p (op, mode, &value);
 })
 
+;; Return 1 if operand is a SF/DF CONST_DOUBLE or V2DF CONST_VECTOR that can be
+;; loaded via a pair f ISA 3.1 XXSPLTI32DX instructions.  Do not return true if
+;; the value can be loaded with the XXSPLTIDP instruction or XXSPLTIB to load 0.
+(define_predicate "xxsplti32dx_operand"
+  (match_code "const_double,const_vector,vec_duplicate")
+{
+  HOST_WIDE_INT high = 0, low = 0;
+  return xxsplti32dx_constant_p (op, mode, &high, &low);
+})
+
 ;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a
 ;; vector register without using memory.
 (define_predicate "easy_vector_constant"
@@ -703,6 +718,9 @@
       if (xxspltidp_operand (op, mode))
 	return true;
 
+      if (xxsplti32dx_operand (op, mode))
+	return true;
+
       if (TARGET_P9_VECTOR
           && xxspltib_constant_p (op, mode, &num_insns, &value))
 	return true;
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index ce8e993100e..3015c1330ed 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -33,6 +33,8 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int,
 extern int easy_altivec_constant (rtx, machine_mode);
 extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
 extern bool xxspltidp_constant_p (rtx, machine_mode, HOST_WIDE_INT *);
+extern bool xxsplti32dx_constant_p (rtx, machine_mode, HOST_WIDE_INT *,
+				    HOST_WIDE_INT *);
 extern int vspltis_shifted (rtx);
 extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
 extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 383157c22aa..79e7a0419e2 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -4312,6 +4312,17 @@ rs6000_option_override_internal (bool global_init_p)
       rs6000_isa_flags &= ~OPTION_MASK_PCREL;
     }
 
+  /* Validate -mxxsplti32dx, which is a prefixed instruction.  We also check
+     for VSX because you need the basic VSX infrastruction for this
+     instruction.  */
+  if (TARGET_XXSPLTI32DX < 0)
+    TARGET_XXSPLTI32DX = TARGET_PREFIXED && TARGET_VSX;
+  else if (TARGET_XXSPLTI32DX > 0 && (!TARGET_PREFIXED || !TARGET_VSX))
+    {
+      error ("%qs requires %qs and %qs", "-mxxspli32dx", "-mprefixed", "-mvsx");
+      TARGET_XXSPLTIDP = 0;
+    }
+
   /* Validate -mxxspltidp, which is a prefixed instruction.  We also check for
      VSX because you need the basic VSX infrastruction for this
      instruction.  */
@@ -6639,6 +6650,119 @@ xxspltidp_constant_p (rtx op,
   return true;
 }
 
+/* Return true if OP is a floating point constant that can be loaded with the
+   XXSPLTI32DX instruction.  If the constant can be loaded with the simpler
+   XXSPLTIDP (constants that can fit as SFmode constants) or XXSPLTIB (0.0)
+   instructions, return false.
+
+   Return the two 32-bit constants to use in the two XXSPLTI32DX instructions
+   via HIGH_PTR and LOW_PTR.  */
+
+static bool
+xxsplti32dx_constant_float_p (rtx op,
+			      machine_mode mode,
+			      HOST_WIDE_INT *high_ptr,
+			      HOST_WIDE_INT *low_ptr)
+{
+  HOST_WIDE_INT xxspltidp_value = 0;
+
+  if (!CONST_DOUBLE_P (op))
+    return false;
+
+  if (mode != SFmode && mode != DFmode)
+    return false;
+
+  if (op == CONST0_RTX (mode))
+    return false;
+
+  if (xxspltidp_constant_p (op, mode, &xxspltidp_value))
+    return false;
+
+  long high_low[2];
+  const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
+  REAL_VALUE_TO_TARGET_DOUBLE (*rv, high_low);
+
+  /* The double precision value is laid out in memory order.  We need to undo
+     this for XXSPLTI32DX.  */
+  if (!BYTES_BIG_ENDIAN)
+    std::swap (high_low[0], high_low[1]);
+
+  *high_ptr = high_low[0];
+  *low_ptr = high_low[1];
+  return true;
+}
+
+/* Return true if OP is of the given MODE and can be synthesized with ISA 3.1
+   XXSPLTI32DX instruction.  If the instruction can be synthesized with
+   XXSPLTIDP or is 0/-1, return false.
+
+   We handle the following types of constants:
+
+     1) vector double constants where each element is the same and you can't
+        load the constant with XXSPLTIDP;
+
+     2) vector long long constants where each element is the same;
+
+     3) Scalar floating point constants that can't be loaded with XXSPLTIDP.
+
+   Return the two 32-bit constants to use in the two XXSPLTI32DX instructions
+   via HIGH_PTR and LOW_PTR.  */
+
+bool
+xxsplti32dx_constant_p (rtx op,
+			machine_mode mode,
+			HOST_WIDE_INT *high_ptr,
+			HOST_WIDE_INT *low_ptr)
+{
+  *high_ptr = *low_ptr = 0;
+
+  if (!TARGET_XXSPLTI32DX)
+    return false;
+
+  if (mode == VOIDmode)
+    mode = GET_MODE (op);
+
+  if (op == CONST0_RTX (mode))
+    return false;
+
+  switch (mode)
+    {
+    default:
+      break;
+
+    case E_V2DFmode:
+      {
+	rtx ele = const_vector_element_all_same (op);
+	if (!ele)
+	  return false;
+
+	return xxsplti32dx_constant_float_p (ele, DFmode, high_ptr, low_ptr);
+      }
+
+    case E_SFmode:
+    case E_DFmode:
+      return xxsplti32dx_constant_float_p (op, mode, high_ptr, low_ptr);
+
+    case E_V2DImode:
+      {
+	rtx ele = const_vector_element_all_same (op);
+	if (!ele)
+	  return false;
+
+	/* If we can generate XXSPLTIB and VEXTSB2D, don't return true.  */
+	HOST_WIDE_INT value = INTVAL (ele);
+	if (IN_RANGE (value, -128, 127))
+	  return false;
+
+	*high_ptr = value >> 32;
+	*low_ptr = value & 0xffffffff;
+	return true;
+      }
+    }
+
+  return false;
+}
+
 const char *
 output_vec_const_move (rtx *operands)
 {
@@ -6687,6 +6811,9 @@ output_vec_const_move (rtx *operands)
 	  || xxspltidp_operand (vec, mode))
 	return "#";
 
+      if (xxsplti32dx_operand (vec, mode))
+	return "#";
+
       if (TARGET_P9_VECTOR
 	  && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
 	{
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 0d2a9b56f6c..75a8a680395 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7723,17 +7723,17 @@
 ;;
 ;;	LWZ          LFS        LXSSP       LXSSPX     STFS       STXSSP
 ;;	STXSSPX      STW        XXLXOR      LI         FMR        XSCPSGNDP
-;;	MR           MT<x>      MF<x>       NOP        XXSPLTIDP
+;;	MR           MT<x>      MF<x>       NOP        XXSPLTIDP  XXSPLTI32DX
 
 (define_insn "movsf_hardfloat"
   [(set (match_operand:SF 0 "nonimmediate_operand"
 	 "=!r,       f,         v,          wa,        m,         wY,
 	  Z,         m,         wa,         !r,        f,         wa,
-	  !r,        *c*l,      !r,         *h,        wa")
+	  !r,        *c*l,      !r,         *h,        wa,        wa")
 	(match_operand:SF 1 "input_operand"
 	 "m,         m,         wY,         Z,         f,         v,
 	  wa,        r,         j,          j,         f,         wa,
-	  r,         r,         *h,         0,         eF"))]
+	  r,         r,         *h,         0,         eF,        eD"))]
   "(register_operand (operands[0], SFmode)
    || register_operand (operands[1], SFmode))
    && TARGET_HARD_FLOAT
@@ -7756,19 +7756,28 @@
    mt%0 %1
    mf%1 %0
    nop
+   #
    #"
   [(set_attr "type"
 	"load,       fpload,    fpload,     fpload,    fpstore,   fpstore,
 	 fpstore,    store,     veclogical, integer,   fpsimple,  fpsimple,
-	 *,          mtjmpr,    mfjmpr,     *,         vecperm")
+	 *,          mtjmpr,    mfjmpr,     *,         vecperm,   vecperm")
    (set_attr "isa"
 	"*,          *,         p9v,        p8v,       *,         p9v,
 	 p8v,        *,         *,          *,         *,         *,
-	 *,          *,         *,          *,         p10")
+	 *,          *,         *,          *,         p10,       p10")
    (set_attr "prefixed"
 	"*,          *,         *,          *,         *,         *,
 	 *,          *,         *,          *,         *,         *,
-	 *,          *,         *,          *,         yes")])
+	 *,          *,         *,          *,         yes,       yes")
+   (set_attr "max_prefixed_insns"
+	"*,          *,         *,          *,         *,         *,
+	 *,          *,         *,          *,         *,         *,
+	 *,          *,         *,          *,         *,         2")
+   (set_attr "num_insns"
+	"*,          *,         *,          *,         *,         *,
+	 *,          *,         *,          *,         *,         *,
+	 *,          *,         *,          *,         *,         2")])
 
 ;;	LWZ          LFIWZX     STW        STFIWX     MTVSRWZ    MFVSRWZ
 ;;	FMR          MR         MT%0       MF%1       NOP
@@ -8028,18 +8037,18 @@
 
 ;;           STFD         LFD         FMR         LXSD        STXSD
 ;;           LXSD         STXSD       XXLOR       XXLXOR      GPR<-0
-;;           LWZ          STW         MR          XXSPLTIDP
+;;           LWZ          STW         MR          XXSPLTIDP   XXSPLTI32DX
 
 
 (define_insn "*mov<mode>_hardfloat32"
   [(set (match_operand:FMOVE64 0 "nonimmediate_operand"
             "=m,          d,          d,          <f64_p9>,   wY,
               <f64_av>,   Z,          <f64_vsx>,  <f64_vsx>,  !r,
-              Y,          r,          !r,         wa")
+              Y,          r,          !r,         wa,         wa")
 	(match_operand:FMOVE64 1 "input_operand"
              "d,          m,          d,          wY,         <f64_p9>,
               Z,          <f64_av>,   <f64_vsx>,  <zero_fp>,  <zero_fp>,
-              r,          Y,          r,          eF"))]
+              r,          Y,          r,          eF,         eD"))]
   "! TARGET_POWERPC64 && TARGET_HARD_FLOAT
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode))"
@@ -8057,24 +8066,33 @@
    #
    #
    #
+   #
    #"
   [(set_attr "type"
             "fpstore,     fpload,     fpsimple,   fpload,     fpstore,
              fpload,      fpstore,    veclogical, veclogical, two,
-             store,       load,       two,        vecperm")
+             store,       load,       two,        vecperm,    vecperm")
    (set_attr "size" "64")
    (set_attr "length"
             "*,           *,          *,          *,          *,
              *,           *,          *,          *,          8,
-             8,           8,          8,          *")
+             8,           8,          8,          *,          *")
    (set_attr "isa"
             "*,           *,          *,          p9v,        p9v,
              p7v,         p7v,        *,          *,          *,
-             *,           *,          *,          p10")
+             *,           *,          *,          p10,        p10")
    (set_attr "prefixed"
             "*,           *,          *,          *,          *,
              *,           *,          *,          *,          *,
-             *,           *,          *,          yes")])
+             *,           *,          *,          yes,        yes")
+   (set_attr "max_prefixed_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          2")
+   (set_attr "num_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          2")])
 
 ;;           STW      LWZ     MR      G-const H-const F-const
 
@@ -8101,19 +8119,19 @@
 ;;           STFD         LFD         FMR         LXSD        STXSD
 ;;           LXSDX        STXSDX      XXLOR       XXLXOR      LI 0
 ;;           STD          LD          MR          MT{CTR,LR}  MF{CTR,LR}
-;;           NOP          MFVSRD      MTVSRD      XXSPLTIDP
+;;           NOP          MFVSRD      MTVSRD      XXSPLTIDP   XXSPLTI32DX
 
 (define_insn "*mov<mode>_hardfloat64"
   [(set (match_operand:FMOVE64 0 "nonimmediate_operand"
            "=m,           d,          d,          <f64_p9>,   wY,
              <f64_av>,    Z,          <f64_vsx>,  <f64_vsx>,  !r,
              YZ,          r,          !r,         *c*l,       !r,
-            *h,           r,          <f64_dm>,   wa")
+            *h,           r,          <f64_dm>,   wa,         wa")
 	(match_operand:FMOVE64 1 "input_operand"
             "d,           m,          d,          wY,         <f64_p9>,
              Z,           <f64_av>,   <f64_vsx>,  <zero_fp>,  <zero_fp>,
              r,           YZ,         r,          r,          *h,
-             0,           <f64_dm>,   r,          eF"))]
+             0,           <f64_dm>,   r,          eF,         eD"))]
   "TARGET_POWERPC64 && TARGET_HARD_FLOAT
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode))"
@@ -8136,23 +8154,34 @@
    nop
    mfvsrd %0,%x1
    mtvsrd %x0,%1
+   #
    #"
   [(set_attr "type"
             "fpstore,     fpload,     fpsimple,   fpload,     fpstore,
              fpload,      fpstore,    veclogical, veclogical, integer,
              store,       load,       *,          mtjmpr,     mfjmpr,
-             *,           mfvsr,      mtvsr,      vecperm")
+             *,           mfvsr,      mtvsr,      vecperm,    vecperm")
    (set_attr "size" "64")
    (set_attr "isa"
             "*,           *,          *,          p9v,        p9v,
              p7v,         p7v,        *,          *,          *,
              *,           *,          *,          *,          *,
-             *,           p8v,        p8v,        p10")
+             *,           p8v,        p8v,        p10,        p10")
    (set_attr "prefixed"
             "*,           *,          *,          *,          *,
              *,           *,          *,          *,          *,
              *,           *,          *,          *,          *,
-             *,           *,          *,          yes")])
+             *,           *,          *,          yes,        yes")
+   (set_attr "max_prefixed_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          2")
+   (set_attr "num_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          *")])
 
 ;;           STD      LD       MR      MT<SPR> MF<SPR> G-const
 ;;           H-const  F-const  Special
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index b5234676c03..0c5d334f31c 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -647,3 +647,7 @@ Generate (do not generate) XXSPLTIW instructions.
 mxxspltidp
 Target Undocumented Var(TARGET_XXSPLTIDP) Init(-1) Save
 Generate (do not generate) XXSPLTIDP instructions.
+
+mxxsplti32dx
+Target Undocumented Var(TARGET_XXSPLTI32DX) Init(-1) Save
+Generate (do not generate) XXSPLTI32DX instructions.
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index c0b8d49e1d9..d3eb3c469b6 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -375,6 +375,7 @@
    UNSPEC_XXEVAL
    UNSPEC_XXSPLTIDP
    UNSPEC_XXSPLTI32DX
+   UNSPEC_XXSPLTI32DX_CONST
    UNSPEC_XXBLEND
    UNSPEC_XXPERMX
   ])
@@ -6699,3 +6700,74 @@
   "xxspltidp %x0,%1"
  [(set_attr "type" "vecperm")
   (set_attr "prefixed" "yes")])
+
+;; XXSPLTI32DX used to create 64-bit constants or vector constants where the
+;; even elements match and the odd elements match.
+(define_mode_iterator XXSPLTI32DX [SF DF V2DF V2DI])
+
+(define_insn_and_split "*xxsplti32dx_<mode>"
+  [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa")
+	(match_operand:XXSPLTI32DX 1 "xxsplti32dx_operand"))]
+  "TARGET_XXSPLTI32DX"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:XXSPLTI32DX [(match_dup 2)
+			     (match_dup 3)] UNSPEC_XXSPLTI32DX_CONST))
+   (set (match_dup 0)
+	(unspec:XXSPLTI32DX [(match_dup 0)
+			     (match_dup 4)
+			     (match_dup 5)] UNSPEC_XXSPLTI32DX_CONST))]
+{
+  HOST_WIDE_INT high = 0, low = 0;
+
+  if (!xxsplti32dx_constant_p (operands[1], <MODE>mode, &high, &low))
+    gcc_unreachable ();
+
+  /* If the low bits are 0 or all 1s, initialize that word first.  This way we
+     can use a smaller XXSPLTIB instruction instead the first XXSPLTI32DX.  */
+  if (low == 0 || low ==  -1)
+    {
+      operands[2] = const1_rtx;
+      operands[3] = GEN_INT (low);
+      operands[4] = const0_rtx;
+      operands[5] = GEN_INT (high);
+    }
+  else
+    {
+      operands[2] = const0_rtx;
+      operands[3] = GEN_INT (high);
+      operands[4] = const1_rtx;
+      operands[5] = GEN_INT (low);
+    }
+}
+  [(set_attr "type" "vecperm")
+   (set_attr "prefixed" "yes")
+   (set_attr "num_insns" "2")
+   (set_attr "max_prefixed_insns" "2")])
+
+;; First word of XXSPLTI32DX
+(define_insn "*xxsplti32dx_<mode>_first"
+  [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa,wa,wa")
+	(unspec:XXSPLTI32DX [(match_operand 1 "u1bit_cint_operand" "n,n,n")
+			     (match_operand 2 "const_int_operand" "O,wM,n")]
+			    UNSPEC_XXSPLTI32DX_CONST))]
+  "TARGET_XXSPLTI32DX"
+  "@
+   xxspltib %x0,0
+   xxspltib %x0,255
+   xxsplti32dx %x0,%1,%2"
+  [(set_attr "type" "vecperm")
+   (set_attr "prefixed" "*,*,yes")])
+
+;; Second word of XXSPLTI32DX
+(define_insn "*xxsplti32dx_<mode>_second"
+  [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa")
+	(unspec:XXSPLTI32DX [(match_operand:XXSPLTI32DX 1 "vsx_register_operand" "0")
+			     (match_operand 2 "u1bit_cint_operand" "n")
+			     (match_operand 3 "const_int_operand" "n")]
+			    UNSPEC_XXSPLTI32DX_CONST))]
+  "TARGET_XXSPLTI32DX"
+  "xxsplti32dx %x0,%2,%3"
+  [(set_attr "type" "vecperm")
+   (set_attr "prefixed" "yes")])
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c
index 8f6e176f9af..1435ef4ef4f 100644
--- a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c
@@ -48,13 +48,16 @@ scalar_double_m_inf (void)	/* XXSPLTIDP.  */
 double
 scalar_double_pi (void)
 {
-  return M_PI;			/* PLFD.  */
+  return M_PI;			/* 2x XXSPLTI32DX.  */
 }
 
 double
 scalar_double_denorm (void)
 {
-  return 0x1p-149f;		/* PLFD.  */
+  return 0x1p-149f;		/* XXSPLTIB, XXSPLTI32DX.  */
 }
 
-/* { dg-final { scan-assembler-times {\mxxspltidp\M} 5 } } */
+/* { dg-final { scan-assembler-times {\mxxspltidp\M}   5 } } */
+/* { dg-final { scan-assembler-times {\mxxsplti32dx\M} 3 } } */
+/* { dg-final { scan-assembler-not   {\mplfd\M}          } } */
+/* { dg-final { scan-assembler-not   {\mplxsd\M}         } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c
index 72504bdfbbd..e9a45d5159d 100644
--- a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c
@@ -57,4 +57,7 @@ scalar_float_denorm (void)
   return 0x1p-149f;		/* PLFS.  */
 }
 
-/* { dg-final { scan-assembler-times {\mxxspltidp\M} 6 } } */
+/* { dg-final { scan-assembler-times {\mxxspltidp\M}   6 } } */
+/* { dg-final { scan-assembler-times {\mxxsplti32dx\M} 1 } } */
+/* { dg-final { scan-assembler-not   {\mplfs\M}          } } */
+/* { dg-final { scan-assembler-not   {\mplxssp\M}        } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c
index d509459292c..d81198b163d 100644
--- a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c
@@ -51,14 +51,16 @@ v2df_double_m_inf (void)
 vector double
 v2df_double_pi (void)
 {
-  return (vector double) { M_PI, M_PI };		/* PLFD.  */
+  return (vector double) { M_PI, M_PI };		/* 2x XXSPLTI32DX.  */
 }
 
 vector double
 v2df_double_denorm (void)
 {
-  return (vector double) { (double)0x1p-149f,
-			   (double)0x1p-149f };		/* PLFD.  */
+  return (vector double) { (double)0x1p-149f,		/* XXSPLTIB, */
+			   (double)0x1p-149f };		/* XXSPLTI32DX.  */
 }
 
-/* { dg-final { scan-assembler-times {\mxxspltidp\M} 5 } } */
+/* { dg-final { scan-assembler-times {\mxxspltidp\M}   5 } } */
+/* { dg-final { scan-assembler-times {\mxxsplti32dx\M} 3 } } */
+/* { dg-final { scan-assembler-not   {\mplxv\M}          } } */


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [gcc(refs/users/meissner/heads/work061)] Generate XXSPLTI32DX on power10.
@ 2021-07-22  5:54 Michael Meissner
  0 siblings, 0 replies; 5+ messages in thread
From: Michael Meissner @ 2021-07-22  5:54 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:44d9a87a11b7fc5fd68800dd9e162ad5ea69fd88

commit 44d9a87a11b7fc5fd68800dd9e162ad5ea69fd88
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Thu Jul 22 01:53:48 2021 -0400

    Generate XXSPLTI32DX on power10.
    
    This patch generates XXSPLTI32DX for SF/DF floating point constants that
    cannot be generated with the XXSPLTIDP instruction.  In addition, it adds
    support for using XXSPLTI32DX to load up V2DF constants, where both constants
    are the same.
    
    2021-07-22  Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
            * config/rs6000/constraint.md (eD): New constraint.
            * config/rs6000/predicates.md (easy_fp_constant): If the constant
            can be loaded with XXSPLTI32DX, it is easy.
            (xxsplti32dx_operand): New predicate.
            (easy_vector_constant): If the constant can be loaded with
            XXSPLTI32DX, it is easy.
            * config/rs6000/rs6000-protos.h (xxsplti32dx_constant_p): New
            declaration.
            * config/rs6000/rs6000-cpus.h (ISA_3_1_MASKS_SERVER): Add
            -mxxsplti32dx.
            (POWERPC_MASKS): Add -mxxsplti32dx.
            * config/rs6000/rs6000.c (xxsplti32dx_constant_float_p): New
            helper function.
            (xxsplti32dx_constant_p): New function.
            (output_vec_const_move): If the operand can be loaded with
            XXSPLTI32DX, split it.
            * config/rs6000/rs6000.h (TARGET_XXSPLTI32DX): New macro.
            * config/rs6000/rs6000.md (movsf_hardfloat): Add support for
            constants loaded with XXSPLTI32DX.
            (mov<mode>_hardfloat32, FMOVE64 iterator):  Add support for
            constants loaded with XXSPLTI32DX.
            (mov<mode>_hardfloat64, FMOVE64 iterator):  Add support for
            constants loaded with XXSPLTI32DX.
            * config/rs6000/rs6000.opt (-mxxsplti32dx): New option.
            * config/rs6000/vsx.md (UNSPEC_XXSPLTI32DX_CONST): New unspec.
            (XXSPLTI32DX): New mode iterator.
            (xxsplti32dx_<mode>): New insn and splitter for XXSPLTI32DX.
            (xxsplti32dx_<mode>_first): New insn.
            (xxsplti32dx_<mode>_second): New insn.
    
    gcc/testsuite/
            * gcc.target/powerpc/pr86731-fwrapv-longlong.c: Update insn
            count.
            * gcc.target/powerpc/vec-splat-constant-sf.c: Update insn count.
            * gcc.target/powerpc/vec-splat-constant-df.c: Update insn count.
            * gcc.target/powerpc/vec-splat-constant-v2df.c: Update insn
            count.

Diff:
---
 gcc/config/rs6000/constraints.md                   |   6 ++
 gcc/config/rs6000/predicates.md                    |  18 ++++
 gcc/config/rs6000/rs6000-protos.h                  |   2 +
 gcc/config/rs6000/rs6000.c                         | 116 +++++++++++++++++++++
 gcc/config/rs6000/rs6000.h                         |   2 +
 gcc/config/rs6000/rs6000.md                        |  67 ++++++++----
 gcc/config/rs6000/rs6000.opt                       |   4 +
 gcc/config/rs6000/vsx.md                           |  72 +++++++++++++
 .../gcc.target/powerpc/pr86731-fwrapv-longlong.c   |   5 +-
 .../gcc.target/powerpc/vec-splat-constant-df.c     |   9 +-
 .../gcc.target/powerpc/vec-splat-constant-sf.c     |   5 +-
 .../gcc.target/powerpc/vec-splat-constant-v2df.c   |  10 +-
 12 files changed, 287 insertions(+), 29 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index e1fadd63580..d665e2a94db 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -208,6 +208,12 @@
   (and (match_code "const_int")
        (match_test "((- (unsigned HOST_WIDE_INT) ival) + 0x8000) < 0x10000")))
 
+;; SF/DF/V2DF/DI/V2DI scalar or vector constant that can be loaded with a pair
+;; of XXSPLTI32DX instructions.
+(define_constraint "eD"
+  "A vector constant that can be loaded with XXSPLTI32DX instructions."
+  (match_operand 0 "xxsplti32dx_operand"))
+
 ;; SF/DF/V2DF scalar or vector constant that can be loaded with XXSPLTIDP
 (define_constraint "eF"
   "A vector constant that can be loaded with the XXSPLTIDP instruction."
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 5d84723e6a1..c74ac6ac9f9 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -606,6 +606,11 @@
   if (xxspltidp_operand (op, mode))
     return 1;
 
+  /* If we have the ISA 3.1 XXSPLTI32DX instruction, see if the constant can
+     be loaded with a pair of those instructions.  */
+  if (xxsplti32dx_operand (op, mode))
+    return 1;
+
   /* Otherwise consider floating point constants hard, so that the
      constant gets pushed to memory during the early RTL phases.  This
      has the advantage that double precision constants that can be
@@ -684,6 +689,16 @@
   return xxspltidp_constant_p (op, mode, &value);
 })
 
+;; Return 1 if operand is a SF/DF CONST_DOUBLE or V2DF CONST_VECTOR that can be
+;; loaded via a pair f ISA 3.1 XXSPLTI32DX instructions.  Do not return true if
+;; the value can be loaded with the XXSPLTIDP instruction or XXSPLTIB to load 0.
+(define_predicate "xxsplti32dx_operand"
+  (match_code "const_double,const_vector,vec_duplicate")
+{
+  HOST_WIDE_INT high = 0, low = 0;
+  return xxsplti32dx_constant_p (op, mode, &high, &low);
+})
+
 ;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a
 ;; vector register without using memory.
 (define_predicate "easy_vector_constant"
@@ -703,6 +718,9 @@
       if (xxspltidp_operand (op, mode))
 	return true;
 
+      if (xxsplti32dx_operand (op, mode))
+	return true;
+
       if (TARGET_P9_VECTOR
           && xxspltib_constant_p (op, mode, &num_insns, &value))
 	return true;
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index ce8e993100e..3015c1330ed 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -33,6 +33,8 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int,
 extern int easy_altivec_constant (rtx, machine_mode);
 extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
 extern bool xxspltidp_constant_p (rtx, machine_mode, HOST_WIDE_INT *);
+extern bool xxsplti32dx_constant_p (rtx, machine_mode, HOST_WIDE_INT *,
+				    HOST_WIDE_INT *);
 extern int vspltis_shifted (rtx);
 extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
 extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 3b41fc6230a..20ad9d0165e 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -6617,6 +6617,119 @@ xxspltidp_constant_p (rtx op,
   return true;
 }
 
+/* Return true if OP is a floating point constant that can be loaded with the
+   XXSPLTI32DX instruction.  If the constant can be loaded with the simpler
+   XXSPLTIDP (constants that can fit as SFmode constants) or XXSPLTIB (0.0)
+   instructions, return false.
+
+   Return the two 32-bit constants to use in the two XXSPLTI32DX instructions
+   via HIGH_PTR and LOW_PTR.  */
+
+static bool
+xxsplti32dx_constant_float_p (rtx op,
+			      machine_mode mode,
+			      HOST_WIDE_INT *high_ptr,
+			      HOST_WIDE_INT *low_ptr)
+{
+  HOST_WIDE_INT xxspltidp_value = 0;
+
+  if (!CONST_DOUBLE_P (op))
+    return false;
+
+  if (mode != SFmode && mode != DFmode)
+    return false;
+
+  if (op == CONST0_RTX (mode))
+    return false;
+
+  if (xxspltidp_constant_p (op, mode, &xxspltidp_value))
+    return false;
+
+  long high_low[2];
+  const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
+  REAL_VALUE_TO_TARGET_DOUBLE (*rv, high_low);
+
+  /* The double precision value is laid out in memory order.  We need to undo
+     this for XXSPLTI32DX.  */
+  if (!BYTES_BIG_ENDIAN)
+    std::swap (high_low[0], high_low[1]);
+
+  *high_ptr = high_low[0];
+  *low_ptr = high_low[1];
+  return true;
+}
+
+/* Return true if OP is of the given MODE and can be synthesized with ISA 3.1
+   XXSPLTI32DX instruction.  If the instruction can be synthesized with
+   XXSPLTIDP or is 0/-1, return false.
+
+   We handle the following types of constants:
+
+     1) vector double constants where each element is the same and you can't
+        load the constant with XXSPLTIDP;
+
+     2) vector long long constants where each element is the same;
+
+     3) Scalar floating point constants that can't be loaded with XXSPLTIDP.
+
+   Return the two 32-bit constants to use in the two XXSPLTI32DX instructions
+   via HIGH_PTR and LOW_PTR.  */
+
+bool
+xxsplti32dx_constant_p (rtx op,
+			machine_mode mode,
+			HOST_WIDE_INT *high_ptr,
+			HOST_WIDE_INT *low_ptr)
+{
+  *high_ptr = *low_ptr = 0;
+
+  if (!TARGET_XXSPLTI32DX)
+    return false;
+
+  if (mode == VOIDmode)
+    mode = GET_MODE (op);
+
+  if (op == CONST0_RTX (mode))
+    return false;
+
+  switch (mode)
+    {
+    default:
+      break;
+
+    case E_V2DFmode:
+      {
+	rtx ele = const_vector_element_all_same (op);
+	if (!ele)
+	  return false;
+
+	return xxsplti32dx_constant_float_p (ele, DFmode, high_ptr, low_ptr);
+      }
+
+    case E_SFmode:
+    case E_DFmode:
+      return xxsplti32dx_constant_float_p (op, mode, high_ptr, low_ptr);
+
+    case E_V2DImode:
+      {
+	rtx ele = const_vector_element_all_same (op);
+	if (!ele)
+	  return false;
+
+	/* If we can generate XXSPLTIB and VEXTSB2D, don't return true.  */
+	HOST_WIDE_INT value = INTVAL (ele);
+	if (IN_RANGE (value, -128, 127))
+	  return false;
+
+	*high_ptr = value >> 32;
+	*low_ptr = value & 0xffffffff;
+	return true;
+      }
+    }
+
+  return false;
+}
+
 const char *
 output_vec_const_move (rtx *operands)
 {
@@ -6665,6 +6778,9 @@ output_vec_const_move (rtx *operands)
 	  || xxspltidp_operand (vec, mode))
 	return "#";
 
+      if (xxsplti32dx_operand (vec, mode))
+	return "#";
+
       if (TARGET_P9_VECTOR
 	  && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
 	{
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 9cd0e654c81..9e1c09c3a64 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -512,6 +512,8 @@ extern int rs6000_vector_align[];
 
 /* Whether we can generate the XXSPLTI* prefixed instructions.  We also need
    VSX instructions to be generated.  */
+#define TARGET_XXSPLTI32DX	(TARGET_XXSPLTI32DX_DEBUG && TARGET_PREFIXED \
+				 && TARGET_VSX)
 #define TARGET_XXSPLTIDP	(TARGET_XXSPLTIDP_DEBUG && TARGET_PREFIXED \
 				 && TARGET_VSX)
 #define TARGET_XXSPLTIW		(TARGET_XXSPLTIW_DEBUG && TARGET_PREFIXED \
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 0d2a9b56f6c..75a8a680395 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7723,17 +7723,17 @@
 ;;
 ;;	LWZ          LFS        LXSSP       LXSSPX     STFS       STXSSP
 ;;	STXSSPX      STW        XXLXOR      LI         FMR        XSCPSGNDP
-;;	MR           MT<x>      MF<x>       NOP        XXSPLTIDP
+;;	MR           MT<x>      MF<x>       NOP        XXSPLTIDP  XXSPLTI32DX
 
 (define_insn "movsf_hardfloat"
   [(set (match_operand:SF 0 "nonimmediate_operand"
 	 "=!r,       f,         v,          wa,        m,         wY,
 	  Z,         m,         wa,         !r,        f,         wa,
-	  !r,        *c*l,      !r,         *h,        wa")
+	  !r,        *c*l,      !r,         *h,        wa,        wa")
 	(match_operand:SF 1 "input_operand"
 	 "m,         m,         wY,         Z,         f,         v,
 	  wa,        r,         j,          j,         f,         wa,
-	  r,         r,         *h,         0,         eF"))]
+	  r,         r,         *h,         0,         eF,        eD"))]
   "(register_operand (operands[0], SFmode)
    || register_operand (operands[1], SFmode))
    && TARGET_HARD_FLOAT
@@ -7756,19 +7756,28 @@
    mt%0 %1
    mf%1 %0
    nop
+   #
    #"
   [(set_attr "type"
 	"load,       fpload,    fpload,     fpload,    fpstore,   fpstore,
 	 fpstore,    store,     veclogical, integer,   fpsimple,  fpsimple,
-	 *,          mtjmpr,    mfjmpr,     *,         vecperm")
+	 *,          mtjmpr,    mfjmpr,     *,         vecperm,   vecperm")
    (set_attr "isa"
 	"*,          *,         p9v,        p8v,       *,         p9v,
 	 p8v,        *,         *,          *,         *,         *,
-	 *,          *,         *,          *,         p10")
+	 *,          *,         *,          *,         p10,       p10")
    (set_attr "prefixed"
 	"*,          *,         *,          *,         *,         *,
 	 *,          *,         *,          *,         *,         *,
-	 *,          *,         *,          *,         yes")])
+	 *,          *,         *,          *,         yes,       yes")
+   (set_attr "max_prefixed_insns"
+	"*,          *,         *,          *,         *,         *,
+	 *,          *,         *,          *,         *,         *,
+	 *,          *,         *,          *,         *,         2")
+   (set_attr "num_insns"
+	"*,          *,         *,          *,         *,         *,
+	 *,          *,         *,          *,         *,         *,
+	 *,          *,         *,          *,         *,         2")])
 
 ;;	LWZ          LFIWZX     STW        STFIWX     MTVSRWZ    MFVSRWZ
 ;;	FMR          MR         MT%0       MF%1       NOP
@@ -8028,18 +8037,18 @@
 
 ;;           STFD         LFD         FMR         LXSD        STXSD
 ;;           LXSD         STXSD       XXLOR       XXLXOR      GPR<-0
-;;           LWZ          STW         MR          XXSPLTIDP
+;;           LWZ          STW         MR          XXSPLTIDP   XXSPLTI32DX
 
 
 (define_insn "*mov<mode>_hardfloat32"
   [(set (match_operand:FMOVE64 0 "nonimmediate_operand"
             "=m,          d,          d,          <f64_p9>,   wY,
               <f64_av>,   Z,          <f64_vsx>,  <f64_vsx>,  !r,
-              Y,          r,          !r,         wa")
+              Y,          r,          !r,         wa,         wa")
 	(match_operand:FMOVE64 1 "input_operand"
              "d,          m,          d,          wY,         <f64_p9>,
               Z,          <f64_av>,   <f64_vsx>,  <zero_fp>,  <zero_fp>,
-              r,          Y,          r,          eF"))]
+              r,          Y,          r,          eF,         eD"))]
   "! TARGET_POWERPC64 && TARGET_HARD_FLOAT
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode))"
@@ -8057,24 +8066,33 @@
    #
    #
    #
+   #
    #"
   [(set_attr "type"
             "fpstore,     fpload,     fpsimple,   fpload,     fpstore,
              fpload,      fpstore,    veclogical, veclogical, two,
-             store,       load,       two,        vecperm")
+             store,       load,       two,        vecperm,    vecperm")
    (set_attr "size" "64")
    (set_attr "length"
             "*,           *,          *,          *,          *,
              *,           *,          *,          *,          8,
-             8,           8,          8,          *")
+             8,           8,          8,          *,          *")
    (set_attr "isa"
             "*,           *,          *,          p9v,        p9v,
              p7v,         p7v,        *,          *,          *,
-             *,           *,          *,          p10")
+             *,           *,          *,          p10,        p10")
    (set_attr "prefixed"
             "*,           *,          *,          *,          *,
              *,           *,          *,          *,          *,
-             *,           *,          *,          yes")])
+             *,           *,          *,          yes,        yes")
+   (set_attr "max_prefixed_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          2")
+   (set_attr "num_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          2")])
 
 ;;           STW      LWZ     MR      G-const H-const F-const
 
@@ -8101,19 +8119,19 @@
 ;;           STFD         LFD         FMR         LXSD        STXSD
 ;;           LXSDX        STXSDX      XXLOR       XXLXOR      LI 0
 ;;           STD          LD          MR          MT{CTR,LR}  MF{CTR,LR}
-;;           NOP          MFVSRD      MTVSRD      XXSPLTIDP
+;;           NOP          MFVSRD      MTVSRD      XXSPLTIDP   XXSPLTI32DX
 
 (define_insn "*mov<mode>_hardfloat64"
   [(set (match_operand:FMOVE64 0 "nonimmediate_operand"
            "=m,           d,          d,          <f64_p9>,   wY,
              <f64_av>,    Z,          <f64_vsx>,  <f64_vsx>,  !r,
              YZ,          r,          !r,         *c*l,       !r,
-            *h,           r,          <f64_dm>,   wa")
+            *h,           r,          <f64_dm>,   wa,         wa")
 	(match_operand:FMOVE64 1 "input_operand"
             "d,           m,          d,          wY,         <f64_p9>,
              Z,           <f64_av>,   <f64_vsx>,  <zero_fp>,  <zero_fp>,
              r,           YZ,         r,          r,          *h,
-             0,           <f64_dm>,   r,          eF"))]
+             0,           <f64_dm>,   r,          eF,         eD"))]
   "TARGET_POWERPC64 && TARGET_HARD_FLOAT
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode))"
@@ -8136,23 +8154,34 @@
    nop
    mfvsrd %0,%x1
    mtvsrd %x0,%1
+   #
    #"
   [(set_attr "type"
             "fpstore,     fpload,     fpsimple,   fpload,     fpstore,
              fpload,      fpstore,    veclogical, veclogical, integer,
              store,       load,       *,          mtjmpr,     mfjmpr,
-             *,           mfvsr,      mtvsr,      vecperm")
+             *,           mfvsr,      mtvsr,      vecperm,    vecperm")
    (set_attr "size" "64")
    (set_attr "isa"
             "*,           *,          *,          p9v,        p9v,
              p7v,         p7v,        *,          *,          *,
              *,           *,          *,          *,          *,
-             *,           p8v,        p8v,        p10")
+             *,           p8v,        p8v,        p10,        p10")
    (set_attr "prefixed"
             "*,           *,          *,          *,          *,
              *,           *,          *,          *,          *,
              *,           *,          *,          *,          *,
-             *,           *,          *,          yes")])
+             *,           *,          *,          yes,        yes")
+   (set_attr "max_prefixed_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          2")
+   (set_attr "num_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          *")])
 
 ;;           STD      LD       MR      MT<SPR> MF<SPR> G-const
 ;;           H-const  F-const  Special
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 3d6266f9ba0..c57d8f9d138 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -640,6 +640,10 @@ mprivileged
 Target Var(rs6000_privileged) Init(0)
 Generate code that will run in privileged state.
 
+mxxsplti32dx
+Target Undocumented Var(TARGET_XXSPLTI32DX_DEBUG) Init(1) Save
+Generate (do not generate) XXSPLTI32DX instructions.
+
 mxxspltidp
 Target Undocumented Var(TARGET_XXSPLTIDP_DEBUG) Init(1) Save
 Generate (do not generate) XXSPLTIDP instructions.
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 3adf7e194d2..9ec143d285d 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -375,6 +375,7 @@
    UNSPEC_XXEVAL
    UNSPEC_XXSPLTIDP
    UNSPEC_XXSPLTI32DX
+   UNSPEC_XXSPLTI32DX_CONST
    UNSPEC_XXBLEND
    UNSPEC_XXPERMX
   ])
@@ -6699,3 +6700,74 @@
   "xxspltidp %x0,%1"
  [(set_attr "type" "vecperm")
   (set_attr "prefixed" "yes")])
+
+;; XXSPLTI32DX used to create 64-bit constants or vector constants where the
+;; even elements match and the odd elements match.
+(define_mode_iterator XXSPLTI32DX [SF DF V2DF V2DI])
+
+(define_insn_and_split "*xxsplti32dx_<mode>"
+  [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa")
+	(match_operand:XXSPLTI32DX 1 "xxsplti32dx_operand"))]
+  "TARGET_XXSPLTI32DX"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:XXSPLTI32DX [(match_dup 2)
+			     (match_dup 3)] UNSPEC_XXSPLTI32DX_CONST))
+   (set (match_dup 0)
+	(unspec:XXSPLTI32DX [(match_dup 0)
+			     (match_dup 4)
+			     (match_dup 5)] UNSPEC_XXSPLTI32DX_CONST))]
+{
+  HOST_WIDE_INT high = 0, low = 0;
+
+  if (!xxsplti32dx_constant_p (operands[1], <MODE>mode, &high, &low))
+    gcc_unreachable ();
+
+  /* If the low bits are 0 or all 1s, initialize that word first.  This way we
+     can use a smaller XXSPLTIB instruction instead the first XXSPLTI32DX.  */
+  if (low == 0 || low ==  -1)
+    {
+      operands[2] = const1_rtx;
+      operands[3] = GEN_INT (low);
+      operands[4] = const0_rtx;
+      operands[5] = GEN_INT (high);
+    }
+  else
+    {
+      operands[2] = const0_rtx;
+      operands[3] = GEN_INT (high);
+      operands[4] = const1_rtx;
+      operands[5] = GEN_INT (low);
+    }
+}
+  [(set_attr "type" "vecperm")
+   (set_attr "prefixed" "yes")
+   (set_attr "num_insns" "2")
+   (set_attr "max_prefixed_insns" "2")])
+
+;; First word of XXSPLTI32DX
+(define_insn "*xxsplti32dx_<mode>_first"
+  [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa,wa,wa")
+	(unspec:XXSPLTI32DX [(match_operand 1 "u1bit_cint_operand" "n,n,n")
+			     (match_operand 2 "const_int_operand" "O,wM,n")]
+			    UNSPEC_XXSPLTI32DX_CONST))]
+  "TARGET_XXSPLTI32DX"
+  "@
+   xxspltib %x0,0
+   xxspltib %x0,255
+   xxsplti32dx %x0,%1,%2"
+  [(set_attr "type" "vecperm")
+   (set_attr "prefixed" "*,*,yes")])
+
+;; Second word of XXSPLTI32DX
+(define_insn "*xxsplti32dx_<mode>_second"
+  [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa")
+	(unspec:XXSPLTI32DX [(match_operand:XXSPLTI32DX 1 "vsx_register_operand" "0")
+			     (match_operand 2 "u1bit_cint_operand" "n")
+			     (match_operand 3 "const_int_operand" "n")]
+			    UNSPEC_XXSPLTI32DX_CONST))]
+  "TARGET_XXSPLTI32DX"
+  "xxsplti32dx %x0,%2,%3"
+  [(set_attr "type" "vecperm")
+   (set_attr "prefixed" "yes")])
diff --git a/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c b/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c
index bd1502bb30a..7fb6ec978a6 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c
@@ -26,9 +26,10 @@ vector signed long long splats4(void)
 
 /* Codegen will consist of splat and shift instructions for most types.
    If folding is enabled, the vec_sl tests using vector long long type will
-   generate a lvx instead of a vspltisw+vsld pair.  */
+   generate a lvx instead of a vspltisw+vsld pair.  On power10, it may
+   generate an xxsplti32dx instead of a load.  */
 
 /* { dg-final { scan-assembler-times {\mvspltis[bhw]\M} 0 } } */
 /* { dg-final { scan-assembler-times {\mvsl[bhwd]\M} 0 } } */
-/* { dg-final { scan-assembler-times {\mp?lxv\M|\mlxv\M|\mlxvd2x\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mp?lxv\M|\mlxv\M|\mlxvd2x\M|\mxxsplti32dx\M} 2 } } */
 
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c
index 8f6e176f9af..1435ef4ef4f 100644
--- a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c
@@ -48,13 +48,16 @@ scalar_double_m_inf (void)	/* XXSPLTIDP.  */
 double
 scalar_double_pi (void)
 {
-  return M_PI;			/* PLFD.  */
+  return M_PI;			/* 2x XXSPLTI32DX.  */
 }
 
 double
 scalar_double_denorm (void)
 {
-  return 0x1p-149f;		/* PLFD.  */
+  return 0x1p-149f;		/* XXSPLTIB, XXSPLTI32DX.  */
 }
 
-/* { dg-final { scan-assembler-times {\mxxspltidp\M} 5 } } */
+/* { dg-final { scan-assembler-times {\mxxspltidp\M}   5 } } */
+/* { dg-final { scan-assembler-times {\mxxsplti32dx\M} 3 } } */
+/* { dg-final { scan-assembler-not   {\mplfd\M}          } } */
+/* { dg-final { scan-assembler-not   {\mplxsd\M}         } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c
index 72504bdfbbd..e9a45d5159d 100644
--- a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c
@@ -57,4 +57,7 @@ scalar_float_denorm (void)
   return 0x1p-149f;		/* PLFS.  */
 }
 
-/* { dg-final { scan-assembler-times {\mxxspltidp\M} 6 } } */
+/* { dg-final { scan-assembler-times {\mxxspltidp\M}   6 } } */
+/* { dg-final { scan-assembler-times {\mxxsplti32dx\M} 1 } } */
+/* { dg-final { scan-assembler-not   {\mplfs\M}          } } */
+/* { dg-final { scan-assembler-not   {\mplxssp\M}        } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c
index d509459292c..d81198b163d 100644
--- a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c
@@ -51,14 +51,16 @@ v2df_double_m_inf (void)
 vector double
 v2df_double_pi (void)
 {
-  return (vector double) { M_PI, M_PI };		/* PLFD.  */
+  return (vector double) { M_PI, M_PI };		/* 2x XXSPLTI32DX.  */
 }
 
 vector double
 v2df_double_denorm (void)
 {
-  return (vector double) { (double)0x1p-149f,
-			   (double)0x1p-149f };		/* PLFD.  */
+  return (vector double) { (double)0x1p-149f,		/* XXSPLTIB, */
+			   (double)0x1p-149f };		/* XXSPLTI32DX.  */
 }
 
-/* { dg-final { scan-assembler-times {\mxxspltidp\M} 5 } } */
+/* { dg-final { scan-assembler-times {\mxxspltidp\M}   5 } } */
+/* { dg-final { scan-assembler-times {\mxxsplti32dx\M} 3 } } */
+/* { dg-final { scan-assembler-not   {\mplxv\M}          } } */


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [gcc(refs/users/meissner/heads/work061)] Generate XXSPLTI32DX on power10.
@ 2021-07-22  3:28 Michael Meissner
  0 siblings, 0 replies; 5+ messages in thread
From: Michael Meissner @ 2021-07-22  3:28 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:6fdc179c7e7dae2001c5efd68ce8e185d7f81f44

commit 6fdc179c7e7dae2001c5efd68ce8e185d7f81f44
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Wed Jul 21 23:21:06 2021 -0400

    Generate XXSPLTI32DX on power10.
    
    This patch generates XXSPLTI32DX for SF/DF floating point constants that
    cannot be generated with the XXSPLTIDP instruction.  In addition, it adds
    support for using XXSPLTI32DX to load up V2DF constants, where both constants
    are the same.
    
    2021-07-21  Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
            * config/rs6000/constraint.md (eD): New constraint.
            * config/rs6000/predicates.md (easy_fp_constant): If the constant
            can be loaded with XXSPLTI32DX, it is easy.
            (xxsplti32dx_operand): New predicate.
            (easy_vector_constant): If the constant can be loaded with
            XXSPLTI32DX, it is easy.
            * config/rs6000/rs6000-protos.h (xxsplti32dx_constant_p): New
            declaration.
            * config/rs6000/rs6000-cpus.h (ISA_3_1_MASKS_SERVER): Add
            -mxxsplti32dx.
            (POWERPC_MASKS): Add -mxxsplti32dx.
            * config/rs6000/rs6000.c (rs6000_option_override_internal): Add
            support for -mxxsplti32dx.
            (xxsplti32dx_constant_float_p): New helper function.
            (xxsplti32dx_constant_p): New function.
            (output_vec_const_move): If the operand can be loaded with
            XXSPLTI32DX, split it.
            * config/rs6000/rs6000.md (movsf_hardfloat): Add support for
            constants loaded with XXSPLTI32DX.
            (mov<mode>_hardfloat32, FMOVE64 iterator):  Add support for
            constants loaded with XXSPLTI32DX.
            (mov<mode>_hardfloat64, FMOVE64 iterator):  Add support for
            constants loaded with XXSPLTI32DX.
            * config/rs6000/rs6000.opt (-mxxsplti32dx): New option.
            * config/rs6000/vsx.md (UNSPEC_XXSPLTI32DX_CONST): New unspec.
            (XXSPLTI32DX): New mode iterator.
            (xxsplti32dx_<mode>): New insn and splitter for XXSPLTI32DX.
            (xxsplti32dx_<mode>_first): New insn.
            (xxsplti32dx_<mode>_second): New insn.
    
    gcc/testsuite/
            * gcc.target/powerpc/vec-splat-constant-sf.c: Update insn count.
            * gcc.target/powerpc/vec-splat-constant-df.c: Update insn count.
            * gcc.target/powerpc/vec-splat-constant-v2df.c: Update insn
            count.

Diff:
---
 gcc/config/rs6000/constraints.md                   |   6 ++
 gcc/config/rs6000/predicates.md                    |  18 ++++
 gcc/config/rs6000/rs6000-protos.h                  |   2 +
 gcc/config/rs6000/rs6000.c                         | 116 +++++++++++++++++++++
 gcc/config/rs6000/rs6000.h                         |   2 +
 gcc/config/rs6000/rs6000.md                        |  67 ++++++++----
 gcc/config/rs6000/rs6000.opt                       |   4 +
 gcc/config/rs6000/vsx.md                           |  72 +++++++++++++
 .../gcc.target/powerpc/vec-splat-constant-df.c     |   9 +-
 .../gcc.target/powerpc/vec-splat-constant-sf.c     |   5 +-
 .../gcc.target/powerpc/vec-splat-constant-v2df.c   |  10 +-
 11 files changed, 284 insertions(+), 27 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index e1fadd63580..d665e2a94db 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -208,6 +208,12 @@
   (and (match_code "const_int")
        (match_test "((- (unsigned HOST_WIDE_INT) ival) + 0x8000) < 0x10000")))
 
+;; SF/DF/V2DF/DI/V2DI scalar or vector constant that can be loaded with a pair
+;; of XXSPLTI32DX instructions.
+(define_constraint "eD"
+  "A vector constant that can be loaded with XXSPLTI32DX instructions."
+  (match_operand 0 "xxsplti32dx_operand"))
+
 ;; SF/DF/V2DF scalar or vector constant that can be loaded with XXSPLTIDP
 (define_constraint "eF"
   "A vector constant that can be loaded with the XXSPLTIDP instruction."
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 5d84723e6a1..c74ac6ac9f9 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -606,6 +606,11 @@
   if (xxspltidp_operand (op, mode))
     return 1;
 
+  /* If we have the ISA 3.1 XXSPLTI32DX instruction, see if the constant can
+     be loaded with a pair of those instructions.  */
+  if (xxsplti32dx_operand (op, mode))
+    return 1;
+
   /* Otherwise consider floating point constants hard, so that the
      constant gets pushed to memory during the early RTL phases.  This
      has the advantage that double precision constants that can be
@@ -684,6 +689,16 @@
   return xxspltidp_constant_p (op, mode, &value);
 })
 
+;; Return 1 if operand is a SF/DF CONST_DOUBLE or V2DF CONST_VECTOR that can be
+;; loaded via a pair f ISA 3.1 XXSPLTI32DX instructions.  Do not return true if
+;; the value can be loaded with the XXSPLTIDP instruction or XXSPLTIB to load 0.
+(define_predicate "xxsplti32dx_operand"
+  (match_code "const_double,const_vector,vec_duplicate")
+{
+  HOST_WIDE_INT high = 0, low = 0;
+  return xxsplti32dx_constant_p (op, mode, &high, &low);
+})
+
 ;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a
 ;; vector register without using memory.
 (define_predicate "easy_vector_constant"
@@ -703,6 +718,9 @@
       if (xxspltidp_operand (op, mode))
 	return true;
 
+      if (xxsplti32dx_operand (op, mode))
+	return true;
+
       if (TARGET_P9_VECTOR
           && xxspltib_constant_p (op, mode, &num_insns, &value))
 	return true;
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index ce8e993100e..3015c1330ed 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -33,6 +33,8 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int,
 extern int easy_altivec_constant (rtx, machine_mode);
 extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
 extern bool xxspltidp_constant_p (rtx, machine_mode, HOST_WIDE_INT *);
+extern bool xxsplti32dx_constant_p (rtx, machine_mode, HOST_WIDE_INT *,
+				    HOST_WIDE_INT *);
 extern int vspltis_shifted (rtx);
 extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
 extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 3b41fc6230a..20ad9d0165e 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -6617,6 +6617,119 @@ xxspltidp_constant_p (rtx op,
   return true;
 }
 
+/* Return true if OP is a floating point constant that can be loaded with the
+   XXSPLTI32DX instruction.  If the constant can be loaded with the simpler
+   XXSPLTIDP (constants that can fit as SFmode constants) or XXSPLTIB (0.0)
+   instructions, return false.
+
+   Return the two 32-bit constants to use in the two XXSPLTI32DX instructions
+   via HIGH_PTR and LOW_PTR.  */
+
+static bool
+xxsplti32dx_constant_float_p (rtx op,
+			      machine_mode mode,
+			      HOST_WIDE_INT *high_ptr,
+			      HOST_WIDE_INT *low_ptr)
+{
+  HOST_WIDE_INT xxspltidp_value = 0;
+
+  if (!CONST_DOUBLE_P (op))
+    return false;
+
+  if (mode != SFmode && mode != DFmode)
+    return false;
+
+  if (op == CONST0_RTX (mode))
+    return false;
+
+  if (xxspltidp_constant_p (op, mode, &xxspltidp_value))
+    return false;
+
+  long high_low[2];
+  const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
+  REAL_VALUE_TO_TARGET_DOUBLE (*rv, high_low);
+
+  /* The double precision value is laid out in memory order.  We need to undo
+     this for XXSPLTI32DX.  */
+  if (!BYTES_BIG_ENDIAN)
+    std::swap (high_low[0], high_low[1]);
+
+  *high_ptr = high_low[0];
+  *low_ptr = high_low[1];
+  return true;
+}
+
+/* Return true if OP is of the given MODE and can be synthesized with ISA 3.1
+   XXSPLTI32DX instruction.  If the instruction can be synthesized with
+   XXSPLTIDP or is 0/-1, return false.
+
+   We handle the following types of constants:
+
+     1) vector double constants where each element is the same and you can't
+        load the constant with XXSPLTIDP;
+
+     2) vector long long constants where each element is the same;
+
+     3) Scalar floating point constants that can't be loaded with XXSPLTIDP.
+
+   Return the two 32-bit constants to use in the two XXSPLTI32DX instructions
+   via HIGH_PTR and LOW_PTR.  */
+
+bool
+xxsplti32dx_constant_p (rtx op,
+			machine_mode mode,
+			HOST_WIDE_INT *high_ptr,
+			HOST_WIDE_INT *low_ptr)
+{
+  *high_ptr = *low_ptr = 0;
+
+  if (!TARGET_XXSPLTI32DX)
+    return false;
+
+  if (mode == VOIDmode)
+    mode = GET_MODE (op);
+
+  if (op == CONST0_RTX (mode))
+    return false;
+
+  switch (mode)
+    {
+    default:
+      break;
+
+    case E_V2DFmode:
+      {
+	rtx ele = const_vector_element_all_same (op);
+	if (!ele)
+	  return false;
+
+	return xxsplti32dx_constant_float_p (ele, DFmode, high_ptr, low_ptr);
+      }
+
+    case E_SFmode:
+    case E_DFmode:
+      return xxsplti32dx_constant_float_p (op, mode, high_ptr, low_ptr);
+
+    case E_V2DImode:
+      {
+	rtx ele = const_vector_element_all_same (op);
+	if (!ele)
+	  return false;
+
+	/* If we can generate XXSPLTIB and VEXTSB2D, don't return true.  */
+	HOST_WIDE_INT value = INTVAL (ele);
+	if (IN_RANGE (value, -128, 127))
+	  return false;
+
+	*high_ptr = value >> 32;
+	*low_ptr = value & 0xffffffff;
+	return true;
+      }
+    }
+
+  return false;
+}
+
 const char *
 output_vec_const_move (rtx *operands)
 {
@@ -6665,6 +6778,9 @@ output_vec_const_move (rtx *operands)
 	  || xxspltidp_operand (vec, mode))
 	return "#";
 
+      if (xxsplti32dx_operand (vec, mode))
+	return "#";
+
       if (TARGET_P9_VECTOR
 	  && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
 	{
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 9cd0e654c81..9e1c09c3a64 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -512,6 +512,8 @@ extern int rs6000_vector_align[];
 
 /* Whether we can generate the XXSPLTI* prefixed instructions.  We also need
    VSX instructions to be generated.  */
+#define TARGET_XXSPLTI32DX	(TARGET_XXSPLTI32DX_DEBUG && TARGET_PREFIXED \
+				 && TARGET_VSX)
 #define TARGET_XXSPLTIDP	(TARGET_XXSPLTIDP_DEBUG && TARGET_PREFIXED \
 				 && TARGET_VSX)
 #define TARGET_XXSPLTIW		(TARGET_XXSPLTIW_DEBUG && TARGET_PREFIXED \
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 0d2a9b56f6c..75a8a680395 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7723,17 +7723,17 @@
 ;;
 ;;	LWZ          LFS        LXSSP       LXSSPX     STFS       STXSSP
 ;;	STXSSPX      STW        XXLXOR      LI         FMR        XSCPSGNDP
-;;	MR           MT<x>      MF<x>       NOP        XXSPLTIDP
+;;	MR           MT<x>      MF<x>       NOP        XXSPLTIDP  XXSPLTI32DX
 
 (define_insn "movsf_hardfloat"
   [(set (match_operand:SF 0 "nonimmediate_operand"
 	 "=!r,       f,         v,          wa,        m,         wY,
 	  Z,         m,         wa,         !r,        f,         wa,
-	  !r,        *c*l,      !r,         *h,        wa")
+	  !r,        *c*l,      !r,         *h,        wa,        wa")
 	(match_operand:SF 1 "input_operand"
 	 "m,         m,         wY,         Z,         f,         v,
 	  wa,        r,         j,          j,         f,         wa,
-	  r,         r,         *h,         0,         eF"))]
+	  r,         r,         *h,         0,         eF,        eD"))]
   "(register_operand (operands[0], SFmode)
    || register_operand (operands[1], SFmode))
    && TARGET_HARD_FLOAT
@@ -7756,19 +7756,28 @@
    mt%0 %1
    mf%1 %0
    nop
+   #
    #"
   [(set_attr "type"
 	"load,       fpload,    fpload,     fpload,    fpstore,   fpstore,
 	 fpstore,    store,     veclogical, integer,   fpsimple,  fpsimple,
-	 *,          mtjmpr,    mfjmpr,     *,         vecperm")
+	 *,          mtjmpr,    mfjmpr,     *,         vecperm,   vecperm")
    (set_attr "isa"
 	"*,          *,         p9v,        p8v,       *,         p9v,
 	 p8v,        *,         *,          *,         *,         *,
-	 *,          *,         *,          *,         p10")
+	 *,          *,         *,          *,         p10,       p10")
    (set_attr "prefixed"
 	"*,          *,         *,          *,         *,         *,
 	 *,          *,         *,          *,         *,         *,
-	 *,          *,         *,          *,         yes")])
+	 *,          *,         *,          *,         yes,       yes")
+   (set_attr "max_prefixed_insns"
+	"*,          *,         *,          *,         *,         *,
+	 *,          *,         *,          *,         *,         *,
+	 *,          *,         *,          *,         *,         2")
+   (set_attr "num_insns"
+	"*,          *,         *,          *,         *,         *,
+	 *,          *,         *,          *,         *,         *,
+	 *,          *,         *,          *,         *,         2")])
 
 ;;	LWZ          LFIWZX     STW        STFIWX     MTVSRWZ    MFVSRWZ
 ;;	FMR          MR         MT%0       MF%1       NOP
@@ -8028,18 +8037,18 @@
 
 ;;           STFD         LFD         FMR         LXSD        STXSD
 ;;           LXSD         STXSD       XXLOR       XXLXOR      GPR<-0
-;;           LWZ          STW         MR          XXSPLTIDP
+;;           LWZ          STW         MR          XXSPLTIDP   XXSPLTI32DX
 
 
 (define_insn "*mov<mode>_hardfloat32"
   [(set (match_operand:FMOVE64 0 "nonimmediate_operand"
             "=m,          d,          d,          <f64_p9>,   wY,
               <f64_av>,   Z,          <f64_vsx>,  <f64_vsx>,  !r,
-              Y,          r,          !r,         wa")
+              Y,          r,          !r,         wa,         wa")
 	(match_operand:FMOVE64 1 "input_operand"
              "d,          m,          d,          wY,         <f64_p9>,
               Z,          <f64_av>,   <f64_vsx>,  <zero_fp>,  <zero_fp>,
-              r,          Y,          r,          eF"))]
+              r,          Y,          r,          eF,         eD"))]
   "! TARGET_POWERPC64 && TARGET_HARD_FLOAT
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode))"
@@ -8057,24 +8066,33 @@
    #
    #
    #
+   #
    #"
   [(set_attr "type"
             "fpstore,     fpload,     fpsimple,   fpload,     fpstore,
              fpload,      fpstore,    veclogical, veclogical, two,
-             store,       load,       two,        vecperm")
+             store,       load,       two,        vecperm,    vecperm")
    (set_attr "size" "64")
    (set_attr "length"
             "*,           *,          *,          *,          *,
              *,           *,          *,          *,          8,
-             8,           8,          8,          *")
+             8,           8,          8,          *,          *")
    (set_attr "isa"
             "*,           *,          *,          p9v,        p9v,
              p7v,         p7v,        *,          *,          *,
-             *,           *,          *,          p10")
+             *,           *,          *,          p10,        p10")
    (set_attr "prefixed"
             "*,           *,          *,          *,          *,
              *,           *,          *,          *,          *,
-             *,           *,          *,          yes")])
+             *,           *,          *,          yes,        yes")
+   (set_attr "max_prefixed_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          2")
+   (set_attr "num_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          2")])
 
 ;;           STW      LWZ     MR      G-const H-const F-const
 
@@ -8101,19 +8119,19 @@
 ;;           STFD         LFD         FMR         LXSD        STXSD
 ;;           LXSDX        STXSDX      XXLOR       XXLXOR      LI 0
 ;;           STD          LD          MR          MT{CTR,LR}  MF{CTR,LR}
-;;           NOP          MFVSRD      MTVSRD      XXSPLTIDP
+;;           NOP          MFVSRD      MTVSRD      XXSPLTIDP   XXSPLTI32DX
 
 (define_insn "*mov<mode>_hardfloat64"
   [(set (match_operand:FMOVE64 0 "nonimmediate_operand"
            "=m,           d,          d,          <f64_p9>,   wY,
              <f64_av>,    Z,          <f64_vsx>,  <f64_vsx>,  !r,
              YZ,          r,          !r,         *c*l,       !r,
-            *h,           r,          <f64_dm>,   wa")
+            *h,           r,          <f64_dm>,   wa,         wa")
 	(match_operand:FMOVE64 1 "input_operand"
             "d,           m,          d,          wY,         <f64_p9>,
              Z,           <f64_av>,   <f64_vsx>,  <zero_fp>,  <zero_fp>,
              r,           YZ,         r,          r,          *h,
-             0,           <f64_dm>,   r,          eF"))]
+             0,           <f64_dm>,   r,          eF,         eD"))]
   "TARGET_POWERPC64 && TARGET_HARD_FLOAT
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode))"
@@ -8136,23 +8154,34 @@
    nop
    mfvsrd %0,%x1
    mtvsrd %x0,%1
+   #
    #"
   [(set_attr "type"
             "fpstore,     fpload,     fpsimple,   fpload,     fpstore,
              fpload,      fpstore,    veclogical, veclogical, integer,
              store,       load,       *,          mtjmpr,     mfjmpr,
-             *,           mfvsr,      mtvsr,      vecperm")
+             *,           mfvsr,      mtvsr,      vecperm,    vecperm")
    (set_attr "size" "64")
    (set_attr "isa"
             "*,           *,          *,          p9v,        p9v,
              p7v,         p7v,        *,          *,          *,
              *,           *,          *,          *,          *,
-             *,           p8v,        p8v,        p10")
+             *,           p8v,        p8v,        p10,        p10")
    (set_attr "prefixed"
             "*,           *,          *,          *,          *,
              *,           *,          *,          *,          *,
              *,           *,          *,          *,          *,
-             *,           *,          *,          yes")])
+             *,           *,          *,          yes,        yes")
+   (set_attr "max_prefixed_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          2")
+   (set_attr "num_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          *")])
 
 ;;           STD      LD       MR      MT<SPR> MF<SPR> G-const
 ;;           H-const  F-const  Special
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 3d6266f9ba0..c57d8f9d138 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -640,6 +640,10 @@ mprivileged
 Target Var(rs6000_privileged) Init(0)
 Generate code that will run in privileged state.
 
+mxxsplti32dx
+Target Undocumented Var(TARGET_XXSPLTI32DX_DEBUG) Init(1) Save
+Generate (do not generate) XXSPLTI32DX instructions.
+
 mxxspltidp
 Target Undocumented Var(TARGET_XXSPLTIDP_DEBUG) Init(1) Save
 Generate (do not generate) XXSPLTIDP instructions.
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 3adf7e194d2..9ec143d285d 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -375,6 +375,7 @@
    UNSPEC_XXEVAL
    UNSPEC_XXSPLTIDP
    UNSPEC_XXSPLTI32DX
+   UNSPEC_XXSPLTI32DX_CONST
    UNSPEC_XXBLEND
    UNSPEC_XXPERMX
   ])
@@ -6699,3 +6700,74 @@
   "xxspltidp %x0,%1"
  [(set_attr "type" "vecperm")
   (set_attr "prefixed" "yes")])
+
+;; XXSPLTI32DX used to create 64-bit constants or vector constants where the
+;; even elements match and the odd elements match.
+(define_mode_iterator XXSPLTI32DX [SF DF V2DF V2DI])
+
+(define_insn_and_split "*xxsplti32dx_<mode>"
+  [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa")
+	(match_operand:XXSPLTI32DX 1 "xxsplti32dx_operand"))]
+  "TARGET_XXSPLTI32DX"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:XXSPLTI32DX [(match_dup 2)
+			     (match_dup 3)] UNSPEC_XXSPLTI32DX_CONST))
+   (set (match_dup 0)
+	(unspec:XXSPLTI32DX [(match_dup 0)
+			     (match_dup 4)
+			     (match_dup 5)] UNSPEC_XXSPLTI32DX_CONST))]
+{
+  HOST_WIDE_INT high = 0, low = 0;
+
+  if (!xxsplti32dx_constant_p (operands[1], <MODE>mode, &high, &low))
+    gcc_unreachable ();
+
+  /* If the low bits are 0 or all 1s, initialize that word first.  This way we
+     can use a smaller XXSPLTIB instruction instead the first XXSPLTI32DX.  */
+  if (low == 0 || low ==  -1)
+    {
+      operands[2] = const1_rtx;
+      operands[3] = GEN_INT (low);
+      operands[4] = const0_rtx;
+      operands[5] = GEN_INT (high);
+    }
+  else
+    {
+      operands[2] = const0_rtx;
+      operands[3] = GEN_INT (high);
+      operands[4] = const1_rtx;
+      operands[5] = GEN_INT (low);
+    }
+}
+  [(set_attr "type" "vecperm")
+   (set_attr "prefixed" "yes")
+   (set_attr "num_insns" "2")
+   (set_attr "max_prefixed_insns" "2")])
+
+;; First word of XXSPLTI32DX
+(define_insn "*xxsplti32dx_<mode>_first"
+  [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa,wa,wa")
+	(unspec:XXSPLTI32DX [(match_operand 1 "u1bit_cint_operand" "n,n,n")
+			     (match_operand 2 "const_int_operand" "O,wM,n")]
+			    UNSPEC_XXSPLTI32DX_CONST))]
+  "TARGET_XXSPLTI32DX"
+  "@
+   xxspltib %x0,0
+   xxspltib %x0,255
+   xxsplti32dx %x0,%1,%2"
+  [(set_attr "type" "vecperm")
+   (set_attr "prefixed" "*,*,yes")])
+
+;; Second word of XXSPLTI32DX
+(define_insn "*xxsplti32dx_<mode>_second"
+  [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa")
+	(unspec:XXSPLTI32DX [(match_operand:XXSPLTI32DX 1 "vsx_register_operand" "0")
+			     (match_operand 2 "u1bit_cint_operand" "n")
+			     (match_operand 3 "const_int_operand" "n")]
+			    UNSPEC_XXSPLTI32DX_CONST))]
+  "TARGET_XXSPLTI32DX"
+  "xxsplti32dx %x0,%2,%3"
+  [(set_attr "type" "vecperm")
+   (set_attr "prefixed" "yes")])
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c
index 8f6e176f9af..1435ef4ef4f 100644
--- a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c
@@ -48,13 +48,16 @@ scalar_double_m_inf (void)	/* XXSPLTIDP.  */
 double
 scalar_double_pi (void)
 {
-  return M_PI;			/* PLFD.  */
+  return M_PI;			/* 2x XXSPLTI32DX.  */
 }
 
 double
 scalar_double_denorm (void)
 {
-  return 0x1p-149f;		/* PLFD.  */
+  return 0x1p-149f;		/* XXSPLTIB, XXSPLTI32DX.  */
 }
 
-/* { dg-final { scan-assembler-times {\mxxspltidp\M} 5 } } */
+/* { dg-final { scan-assembler-times {\mxxspltidp\M}   5 } } */
+/* { dg-final { scan-assembler-times {\mxxsplti32dx\M} 3 } } */
+/* { dg-final { scan-assembler-not   {\mplfd\M}          } } */
+/* { dg-final { scan-assembler-not   {\mplxsd\M}         } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c
index 72504bdfbbd..e9a45d5159d 100644
--- a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c
@@ -57,4 +57,7 @@ scalar_float_denorm (void)
   return 0x1p-149f;		/* PLFS.  */
 }
 
-/* { dg-final { scan-assembler-times {\mxxspltidp\M} 6 } } */
+/* { dg-final { scan-assembler-times {\mxxspltidp\M}   6 } } */
+/* { dg-final { scan-assembler-times {\mxxsplti32dx\M} 1 } } */
+/* { dg-final { scan-assembler-not   {\mplfs\M}          } } */
+/* { dg-final { scan-assembler-not   {\mplxssp\M}        } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c
index d509459292c..d81198b163d 100644
--- a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c
@@ -51,14 +51,16 @@ v2df_double_m_inf (void)
 vector double
 v2df_double_pi (void)
 {
-  return (vector double) { M_PI, M_PI };		/* PLFD.  */
+  return (vector double) { M_PI, M_PI };		/* 2x XXSPLTI32DX.  */
 }
 
 vector double
 v2df_double_denorm (void)
 {
-  return (vector double) { (double)0x1p-149f,
-			   (double)0x1p-149f };		/* PLFD.  */
+  return (vector double) { (double)0x1p-149f,		/* XXSPLTIB, */
+			   (double)0x1p-149f };		/* XXSPLTI32DX.  */
 }
 
-/* { dg-final { scan-assembler-times {\mxxspltidp\M} 5 } } */
+/* { dg-final { scan-assembler-times {\mxxspltidp\M}   5 } } */
+/* { dg-final { scan-assembler-times {\mxxsplti32dx\M} 3 } } */
+/* { dg-final { scan-assembler-not   {\mplxv\M}          } } */


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [gcc(refs/users/meissner/heads/work061)] Generate XXSPLTI32DX on power10.
@ 2021-07-22  3:21 Michael Meissner
  0 siblings, 0 replies; 5+ messages in thread
From: Michael Meissner @ 2021-07-22  3:21 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:62c8d79422bdb30f19abe4ecedde350aae4ebc34

commit 62c8d79422bdb30f19abe4ecedde350aae4ebc34
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Wed Jul 21 23:21:06 2021 -0400

    Generate XXSPLTI32DX on power10.
    
    This patch generates XXSPLTI32DX for SF/DF floating point constants that
    cannot be generated with the XXSPLTIDP instruction.  In addition, it adds
    support for using XXSPLTI32DX to load up V2DF constants, where both constants
    are the same.
    
    2021-07-21  Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
            * config/rs6000/constraint.md (eD): New constraint.
            * config/rs6000/predicates.md (easy_fp_constant): If the constant
            can be loaded with XXSPLTI32DX, it is easy.
            (xxsplti32dx_operand): New predicate.
            (easy_vector_constant): If the constant can be loaded with
            XXSPLTI32DX, it is easy.
            * config/rs6000/rs6000-protos.h (xxsplti32dx_constant_p): New
            declaration.
            * config/rs6000/rs6000-cpus.h (ISA_3_1_MASKS_SERVER): Add
            -mxxsplti32dx.
            (POWERPC_MASKS): Add -mxxsplti32dx.
            * config/rs6000/rs6000.c (rs6000_option_override_internal): Add
            support for -mxxsplti32dx.
            (xxsplti32dx_constant_float_p): New helper function.
            (xxsplti32dx_constant_p): New function.
            (output_vec_const_move): If the operand can be loaded with
            XXSPLTI32DX, split it.
            (rs6000_opt_masks): Add -mxxsplti32dx.
            * config/rs6000/rs6000.md (movsf_hardfloat): Add support for
            constants loaded with XXSPLTI32DX.
            (mov<mode>_hardfloat32, FMOVE64 iterator):  Add support for
            constants loaded with XXSPLTI32DX.
            (mov<mode>_hardfloat64, FMOVE64 iterator):  Add support for
            constants loaded with XXSPLTI32DX.
            * config/rs6000/rs6000.opt (-mxxsplti32dx): New option.
            * config/rs6000/vsx.md (UNSPEC_XXSPLTI32DX_CONST): New unspec.
            (XXSPLTI32DX): New mode iterator.
            (xxsplti32dx_<mode>): New insn and splitter for XXSPLTI32DX.
            (xxsplti32dx_<mode>_first): New insn.
            (xxsplti32dx_<mode>_second): New insn.
    
    gcc/testsuite/
            * gcc.target/powerpc/vec-splat-constant-sf.c: Update insn count.
            * gcc.target/powerpc/vec-splat-constant-df.c: Update insn count.
            * gcc.target/powerpc/vec-splat-constant-v2df.c: Update insn
            count.

Diff:
---
 gcc/config/rs6000/constraints.md                   |   6 ++
 gcc/config/rs6000/predicates.md                    |  18 ++++
 gcc/config/rs6000/rs6000-protos.h                  |   2 +
 gcc/config/rs6000/rs6000.c                         | 116 +++++++++++++++++++++
 gcc/config/rs6000/rs6000.h                         |   2 +
 gcc/config/rs6000/rs6000.md                        |  67 ++++++++----
 gcc/config/rs6000/rs6000.opt                       |   4 +
 gcc/config/rs6000/vsx.md                           |  72 +++++++++++++
 .../gcc.target/powerpc/vec-splat-constant-df.c     |   9 +-
 .../gcc.target/powerpc/vec-splat-constant-sf.c     |   5 +-
 .../gcc.target/powerpc/vec-splat-constant-v2df.c   |  10 +-
 11 files changed, 284 insertions(+), 27 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index e1fadd63580..d665e2a94db 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -208,6 +208,12 @@
   (and (match_code "const_int")
        (match_test "((- (unsigned HOST_WIDE_INT) ival) + 0x8000) < 0x10000")))
 
+;; SF/DF/V2DF/DI/V2DI scalar or vector constant that can be loaded with a pair
+;; of XXSPLTI32DX instructions.
+(define_constraint "eD"
+  "A vector constant that can be loaded with XXSPLTI32DX instructions."
+  (match_operand 0 "xxsplti32dx_operand"))
+
 ;; SF/DF/V2DF scalar or vector constant that can be loaded with XXSPLTIDP
 (define_constraint "eF"
   "A vector constant that can be loaded with the XXSPLTIDP instruction."
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 5d84723e6a1..c74ac6ac9f9 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -606,6 +606,11 @@
   if (xxspltidp_operand (op, mode))
     return 1;
 
+  /* If we have the ISA 3.1 XXSPLTI32DX instruction, see if the constant can
+     be loaded with a pair of those instructions.  */
+  if (xxsplti32dx_operand (op, mode))
+    return 1;
+
   /* Otherwise consider floating point constants hard, so that the
      constant gets pushed to memory during the early RTL phases.  This
      has the advantage that double precision constants that can be
@@ -684,6 +689,16 @@
   return xxspltidp_constant_p (op, mode, &value);
 })
 
+;; Return 1 if operand is a SF/DF CONST_DOUBLE or V2DF CONST_VECTOR that can be
+;; loaded via a pair f ISA 3.1 XXSPLTI32DX instructions.  Do not return true if
+;; the value can be loaded with the XXSPLTIDP instruction or XXSPLTIB to load 0.
+(define_predicate "xxsplti32dx_operand"
+  (match_code "const_double,const_vector,vec_duplicate")
+{
+  HOST_WIDE_INT high = 0, low = 0;
+  return xxsplti32dx_constant_p (op, mode, &high, &low);
+})
+
 ;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a
 ;; vector register without using memory.
 (define_predicate "easy_vector_constant"
@@ -703,6 +718,9 @@
       if (xxspltidp_operand (op, mode))
 	return true;
 
+      if (xxsplti32dx_operand (op, mode))
+	return true;
+
       if (TARGET_P9_VECTOR
           && xxspltib_constant_p (op, mode, &num_insns, &value))
 	return true;
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index ce8e993100e..3015c1330ed 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -33,6 +33,8 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int,
 extern int easy_altivec_constant (rtx, machine_mode);
 extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
 extern bool xxspltidp_constant_p (rtx, machine_mode, HOST_WIDE_INT *);
+extern bool xxsplti32dx_constant_p (rtx, machine_mode, HOST_WIDE_INT *,
+				    HOST_WIDE_INT *);
 extern int vspltis_shifted (rtx);
 extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
 extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 3b41fc6230a..20ad9d0165e 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -6617,6 +6617,119 @@ xxspltidp_constant_p (rtx op,
   return true;
 }
 
+/* Return true if OP is a floating point constant that can be loaded with the
+   XXSPLTI32DX instruction.  If the constant can be loaded with the simpler
+   XXSPLTIDP (constants that can fit as SFmode constants) or XXSPLTIB (0.0)
+   instructions, return false.
+
+   Return the two 32-bit constants to use in the two XXSPLTI32DX instructions
+   via HIGH_PTR and LOW_PTR.  */
+
+static bool
+xxsplti32dx_constant_float_p (rtx op,
+			      machine_mode mode,
+			      HOST_WIDE_INT *high_ptr,
+			      HOST_WIDE_INT *low_ptr)
+{
+  HOST_WIDE_INT xxspltidp_value = 0;
+
+  if (!CONST_DOUBLE_P (op))
+    return false;
+
+  if (mode != SFmode && mode != DFmode)
+    return false;
+
+  if (op == CONST0_RTX (mode))
+    return false;
+
+  if (xxspltidp_constant_p (op, mode, &xxspltidp_value))
+    return false;
+
+  long high_low[2];
+  const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
+  REAL_VALUE_TO_TARGET_DOUBLE (*rv, high_low);
+
+  /* The double precision value is laid out in memory order.  We need to undo
+     this for XXSPLTI32DX.  */
+  if (!BYTES_BIG_ENDIAN)
+    std::swap (high_low[0], high_low[1]);
+
+  *high_ptr = high_low[0];
+  *low_ptr = high_low[1];
+  return true;
+}
+
+/* Return true if OP is of the given MODE and can be synthesized with ISA 3.1
+   XXSPLTI32DX instruction.  If the instruction can be synthesized with
+   XXSPLTIDP or is 0/-1, return false.
+
+   We handle the following types of constants:
+
+     1) vector double constants where each element is the same and you can't
+        load the constant with XXSPLTIDP;
+
+     2) vector long long constants where each element is the same;
+
+     3) Scalar floating point constants that can't be loaded with XXSPLTIDP.
+
+   Return the two 32-bit constants to use in the two XXSPLTI32DX instructions
+   via HIGH_PTR and LOW_PTR.  */
+
+bool
+xxsplti32dx_constant_p (rtx op,
+			machine_mode mode,
+			HOST_WIDE_INT *high_ptr,
+			HOST_WIDE_INT *low_ptr)
+{
+  *high_ptr = *low_ptr = 0;
+
+  if (!TARGET_XXSPLTI32DX)
+    return false;
+
+  if (mode == VOIDmode)
+    mode = GET_MODE (op);
+
+  if (op == CONST0_RTX (mode))
+    return false;
+
+  switch (mode)
+    {
+    default:
+      break;
+
+    case E_V2DFmode:
+      {
+	rtx ele = const_vector_element_all_same (op);
+	if (!ele)
+	  return false;
+
+	return xxsplti32dx_constant_float_p (ele, DFmode, high_ptr, low_ptr);
+      }
+
+    case E_SFmode:
+    case E_DFmode:
+      return xxsplti32dx_constant_float_p (op, mode, high_ptr, low_ptr);
+
+    case E_V2DImode:
+      {
+	rtx ele = const_vector_element_all_same (op);
+	if (!ele)
+	  return false;
+
+	/* If we can generate XXSPLTIB and VEXTSB2D, don't return true.  */
+	HOST_WIDE_INT value = INTVAL (ele);
+	if (IN_RANGE (value, -128, 127))
+	  return false;
+
+	*high_ptr = value >> 32;
+	*low_ptr = value & 0xffffffff;
+	return true;
+      }
+    }
+
+  return false;
+}
+
 const char *
 output_vec_const_move (rtx *operands)
 {
@@ -6665,6 +6778,9 @@ output_vec_const_move (rtx *operands)
 	  || xxspltidp_operand (vec, mode))
 	return "#";
 
+      if (xxsplti32dx_operand (vec, mode))
+	return "#";
+
       if (TARGET_P9_VECTOR
 	  && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
 	{
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 9cd0e654c81..9e1c09c3a64 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -512,6 +512,8 @@ extern int rs6000_vector_align[];
 
 /* Whether we can generate the XXSPLTI* prefixed instructions.  We also need
    VSX instructions to be generated.  */
+#define TARGET_XXSPLTI32DX	(TARGET_XXSPLTI32DX_DEBUG && TARGET_PREFIXED \
+				 && TARGET_VSX)
 #define TARGET_XXSPLTIDP	(TARGET_XXSPLTIDP_DEBUG && TARGET_PREFIXED \
 				 && TARGET_VSX)
 #define TARGET_XXSPLTIW		(TARGET_XXSPLTIW_DEBUG && TARGET_PREFIXED \
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 0d2a9b56f6c..75a8a680395 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7723,17 +7723,17 @@
 ;;
 ;;	LWZ          LFS        LXSSP       LXSSPX     STFS       STXSSP
 ;;	STXSSPX      STW        XXLXOR      LI         FMR        XSCPSGNDP
-;;	MR           MT<x>      MF<x>       NOP        XXSPLTIDP
+;;	MR           MT<x>      MF<x>       NOP        XXSPLTIDP  XXSPLTI32DX
 
 (define_insn "movsf_hardfloat"
   [(set (match_operand:SF 0 "nonimmediate_operand"
 	 "=!r,       f,         v,          wa,        m,         wY,
 	  Z,         m,         wa,         !r,        f,         wa,
-	  !r,        *c*l,      !r,         *h,        wa")
+	  !r,        *c*l,      !r,         *h,        wa,        wa")
 	(match_operand:SF 1 "input_operand"
 	 "m,         m,         wY,         Z,         f,         v,
 	  wa,        r,         j,          j,         f,         wa,
-	  r,         r,         *h,         0,         eF"))]
+	  r,         r,         *h,         0,         eF,        eD"))]
   "(register_operand (operands[0], SFmode)
    || register_operand (operands[1], SFmode))
    && TARGET_HARD_FLOAT
@@ -7756,19 +7756,28 @@
    mt%0 %1
    mf%1 %0
    nop
+   #
    #"
   [(set_attr "type"
 	"load,       fpload,    fpload,     fpload,    fpstore,   fpstore,
 	 fpstore,    store,     veclogical, integer,   fpsimple,  fpsimple,
-	 *,          mtjmpr,    mfjmpr,     *,         vecperm")
+	 *,          mtjmpr,    mfjmpr,     *,         vecperm,   vecperm")
    (set_attr "isa"
 	"*,          *,         p9v,        p8v,       *,         p9v,
 	 p8v,        *,         *,          *,         *,         *,
-	 *,          *,         *,          *,         p10")
+	 *,          *,         *,          *,         p10,       p10")
    (set_attr "prefixed"
 	"*,          *,         *,          *,         *,         *,
 	 *,          *,         *,          *,         *,         *,
-	 *,          *,         *,          *,         yes")])
+	 *,          *,         *,          *,         yes,       yes")
+   (set_attr "max_prefixed_insns"
+	"*,          *,         *,          *,         *,         *,
+	 *,          *,         *,          *,         *,         *,
+	 *,          *,         *,          *,         *,         2")
+   (set_attr "num_insns"
+	"*,          *,         *,          *,         *,         *,
+	 *,          *,         *,          *,         *,         *,
+	 *,          *,         *,          *,         *,         2")])
 
 ;;	LWZ          LFIWZX     STW        STFIWX     MTVSRWZ    MFVSRWZ
 ;;	FMR          MR         MT%0       MF%1       NOP
@@ -8028,18 +8037,18 @@
 
 ;;           STFD         LFD         FMR         LXSD        STXSD
 ;;           LXSD         STXSD       XXLOR       XXLXOR      GPR<-0
-;;           LWZ          STW         MR          XXSPLTIDP
+;;           LWZ          STW         MR          XXSPLTIDP   XXSPLTI32DX
 
 
 (define_insn "*mov<mode>_hardfloat32"
   [(set (match_operand:FMOVE64 0 "nonimmediate_operand"
             "=m,          d,          d,          <f64_p9>,   wY,
               <f64_av>,   Z,          <f64_vsx>,  <f64_vsx>,  !r,
-              Y,          r,          !r,         wa")
+              Y,          r,          !r,         wa,         wa")
 	(match_operand:FMOVE64 1 "input_operand"
              "d,          m,          d,          wY,         <f64_p9>,
               Z,          <f64_av>,   <f64_vsx>,  <zero_fp>,  <zero_fp>,
-              r,          Y,          r,          eF"))]
+              r,          Y,          r,          eF,         eD"))]
   "! TARGET_POWERPC64 && TARGET_HARD_FLOAT
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode))"
@@ -8057,24 +8066,33 @@
    #
    #
    #
+   #
    #"
   [(set_attr "type"
             "fpstore,     fpload,     fpsimple,   fpload,     fpstore,
              fpload,      fpstore,    veclogical, veclogical, two,
-             store,       load,       two,        vecperm")
+             store,       load,       two,        vecperm,    vecperm")
    (set_attr "size" "64")
    (set_attr "length"
             "*,           *,          *,          *,          *,
              *,           *,          *,          *,          8,
-             8,           8,          8,          *")
+             8,           8,          8,          *,          *")
    (set_attr "isa"
             "*,           *,          *,          p9v,        p9v,
              p7v,         p7v,        *,          *,          *,
-             *,           *,          *,          p10")
+             *,           *,          *,          p10,        p10")
    (set_attr "prefixed"
             "*,           *,          *,          *,          *,
              *,           *,          *,          *,          *,
-             *,           *,          *,          yes")])
+             *,           *,          *,          yes,        yes")
+   (set_attr "max_prefixed_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          2")
+   (set_attr "num_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          2")])
 
 ;;           STW      LWZ     MR      G-const H-const F-const
 
@@ -8101,19 +8119,19 @@
 ;;           STFD         LFD         FMR         LXSD        STXSD
 ;;           LXSDX        STXSDX      XXLOR       XXLXOR      LI 0
 ;;           STD          LD          MR          MT{CTR,LR}  MF{CTR,LR}
-;;           NOP          MFVSRD      MTVSRD      XXSPLTIDP
+;;           NOP          MFVSRD      MTVSRD      XXSPLTIDP   XXSPLTI32DX
 
 (define_insn "*mov<mode>_hardfloat64"
   [(set (match_operand:FMOVE64 0 "nonimmediate_operand"
            "=m,           d,          d,          <f64_p9>,   wY,
              <f64_av>,    Z,          <f64_vsx>,  <f64_vsx>,  !r,
              YZ,          r,          !r,         *c*l,       !r,
-            *h,           r,          <f64_dm>,   wa")
+            *h,           r,          <f64_dm>,   wa,         wa")
 	(match_operand:FMOVE64 1 "input_operand"
             "d,           m,          d,          wY,         <f64_p9>,
              Z,           <f64_av>,   <f64_vsx>,  <zero_fp>,  <zero_fp>,
              r,           YZ,         r,          r,          *h,
-             0,           <f64_dm>,   r,          eF"))]
+             0,           <f64_dm>,   r,          eF,         eD"))]
   "TARGET_POWERPC64 && TARGET_HARD_FLOAT
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode))"
@@ -8136,23 +8154,34 @@
    nop
    mfvsrd %0,%x1
    mtvsrd %x0,%1
+   #
    #"
   [(set_attr "type"
             "fpstore,     fpload,     fpsimple,   fpload,     fpstore,
              fpload,      fpstore,    veclogical, veclogical, integer,
              store,       load,       *,          mtjmpr,     mfjmpr,
-             *,           mfvsr,      mtvsr,      vecperm")
+             *,           mfvsr,      mtvsr,      vecperm,    vecperm")
    (set_attr "size" "64")
    (set_attr "isa"
             "*,           *,          *,          p9v,        p9v,
              p7v,         p7v,        *,          *,          *,
              *,           *,          *,          *,          *,
-             *,           p8v,        p8v,        p10")
+             *,           p8v,        p8v,        p10,        p10")
    (set_attr "prefixed"
             "*,           *,          *,          *,          *,
              *,           *,          *,          *,          *,
              *,           *,          *,          *,          *,
-             *,           *,          *,          yes")])
+             *,           *,          *,          yes,        yes")
+   (set_attr "max_prefixed_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          2")
+   (set_attr "num_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          *")])
 
 ;;           STD      LD       MR      MT<SPR> MF<SPR> G-const
 ;;           H-const  F-const  Special
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 3d6266f9ba0..c57d8f9d138 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -640,6 +640,10 @@ mprivileged
 Target Var(rs6000_privileged) Init(0)
 Generate code that will run in privileged state.
 
+mxxsplti32dx
+Target Undocumented Var(TARGET_XXSPLTI32DX_DEBUG) Init(1) Save
+Generate (do not generate) XXSPLTI32DX instructions.
+
 mxxspltidp
 Target Undocumented Var(TARGET_XXSPLTIDP_DEBUG) Init(1) Save
 Generate (do not generate) XXSPLTIDP instructions.
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 3adf7e194d2..9ec143d285d 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -375,6 +375,7 @@
    UNSPEC_XXEVAL
    UNSPEC_XXSPLTIDP
    UNSPEC_XXSPLTI32DX
+   UNSPEC_XXSPLTI32DX_CONST
    UNSPEC_XXBLEND
    UNSPEC_XXPERMX
   ])
@@ -6699,3 +6700,74 @@
   "xxspltidp %x0,%1"
  [(set_attr "type" "vecperm")
   (set_attr "prefixed" "yes")])
+
+;; XXSPLTI32DX used to create 64-bit constants or vector constants where the
+;; even elements match and the odd elements match.
+(define_mode_iterator XXSPLTI32DX [SF DF V2DF V2DI])
+
+(define_insn_and_split "*xxsplti32dx_<mode>"
+  [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa")
+	(match_operand:XXSPLTI32DX 1 "xxsplti32dx_operand"))]
+  "TARGET_XXSPLTI32DX"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:XXSPLTI32DX [(match_dup 2)
+			     (match_dup 3)] UNSPEC_XXSPLTI32DX_CONST))
+   (set (match_dup 0)
+	(unspec:XXSPLTI32DX [(match_dup 0)
+			     (match_dup 4)
+			     (match_dup 5)] UNSPEC_XXSPLTI32DX_CONST))]
+{
+  HOST_WIDE_INT high = 0, low = 0;
+
+  if (!xxsplti32dx_constant_p (operands[1], <MODE>mode, &high, &low))
+    gcc_unreachable ();
+
+  /* If the low bits are 0 or all 1s, initialize that word first.  This way we
+     can use a smaller XXSPLTIB instruction instead the first XXSPLTI32DX.  */
+  if (low == 0 || low ==  -1)
+    {
+      operands[2] = const1_rtx;
+      operands[3] = GEN_INT (low);
+      operands[4] = const0_rtx;
+      operands[5] = GEN_INT (high);
+    }
+  else
+    {
+      operands[2] = const0_rtx;
+      operands[3] = GEN_INT (high);
+      operands[4] = const1_rtx;
+      operands[5] = GEN_INT (low);
+    }
+}
+  [(set_attr "type" "vecperm")
+   (set_attr "prefixed" "yes")
+   (set_attr "num_insns" "2")
+   (set_attr "max_prefixed_insns" "2")])
+
+;; First word of XXSPLTI32DX
+(define_insn "*xxsplti32dx_<mode>_first"
+  [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa,wa,wa")
+	(unspec:XXSPLTI32DX [(match_operand 1 "u1bit_cint_operand" "n,n,n")
+			     (match_operand 2 "const_int_operand" "O,wM,n")]
+			    UNSPEC_XXSPLTI32DX_CONST))]
+  "TARGET_XXSPLTI32DX"
+  "@
+   xxspltib %x0,0
+   xxspltib %x0,255
+   xxsplti32dx %x0,%1,%2"
+  [(set_attr "type" "vecperm")
+   (set_attr "prefixed" "*,*,yes")])
+
+;; Second word of XXSPLTI32DX
+(define_insn "*xxsplti32dx_<mode>_second"
+  [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa")
+	(unspec:XXSPLTI32DX [(match_operand:XXSPLTI32DX 1 "vsx_register_operand" "0")
+			     (match_operand 2 "u1bit_cint_operand" "n")
+			     (match_operand 3 "const_int_operand" "n")]
+			    UNSPEC_XXSPLTI32DX_CONST))]
+  "TARGET_XXSPLTI32DX"
+  "xxsplti32dx %x0,%2,%3"
+  [(set_attr "type" "vecperm")
+   (set_attr "prefixed" "yes")])
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c
index 8f6e176f9af..1435ef4ef4f 100644
--- a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c
@@ -48,13 +48,16 @@ scalar_double_m_inf (void)	/* XXSPLTIDP.  */
 double
 scalar_double_pi (void)
 {
-  return M_PI;			/* PLFD.  */
+  return M_PI;			/* 2x XXSPLTI32DX.  */
 }
 
 double
 scalar_double_denorm (void)
 {
-  return 0x1p-149f;		/* PLFD.  */
+  return 0x1p-149f;		/* XXSPLTIB, XXSPLTI32DX.  */
 }
 
-/* { dg-final { scan-assembler-times {\mxxspltidp\M} 5 } } */
+/* { dg-final { scan-assembler-times {\mxxspltidp\M}   5 } } */
+/* { dg-final { scan-assembler-times {\mxxsplti32dx\M} 3 } } */
+/* { dg-final { scan-assembler-not   {\mplfd\M}          } } */
+/* { dg-final { scan-assembler-not   {\mplxsd\M}         } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c
index 72504bdfbbd..e9a45d5159d 100644
--- a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c
@@ -57,4 +57,7 @@ scalar_float_denorm (void)
   return 0x1p-149f;		/* PLFS.  */
 }
 
-/* { dg-final { scan-assembler-times {\mxxspltidp\M} 6 } } */
+/* { dg-final { scan-assembler-times {\mxxspltidp\M}   6 } } */
+/* { dg-final { scan-assembler-times {\mxxsplti32dx\M} 1 } } */
+/* { dg-final { scan-assembler-not   {\mplfs\M}          } } */
+/* { dg-final { scan-assembler-not   {\mplxssp\M}        } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c
index d509459292c..d81198b163d 100644
--- a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c
@@ -51,14 +51,16 @@ v2df_double_m_inf (void)
 vector double
 v2df_double_pi (void)
 {
-  return (vector double) { M_PI, M_PI };		/* PLFD.  */
+  return (vector double) { M_PI, M_PI };		/* 2x XXSPLTI32DX.  */
 }
 
 vector double
 v2df_double_denorm (void)
 {
-  return (vector double) { (double)0x1p-149f,
-			   (double)0x1p-149f };		/* PLFD.  */
+  return (vector double) { (double)0x1p-149f,		/* XXSPLTIB, */
+			   (double)0x1p-149f };		/* XXSPLTI32DX.  */
 }
 
-/* { dg-final { scan-assembler-times {\mxxspltidp\M} 5 } } */
+/* { dg-final { scan-assembler-times {\mxxspltidp\M}   5 } } */
+/* { dg-final { scan-assembler-times {\mxxsplti32dx\M} 3 } } */
+/* { dg-final { scan-assembler-not   {\mplxv\M}          } } */


^ permalink raw reply	[flat|nested] 5+ messages in thread

* [gcc(refs/users/meissner/heads/work061)] Generate XXSPLTI32DX on power10.
@ 2021-07-21 22:57 Michael Meissner
  0 siblings, 0 replies; 5+ messages in thread
From: Michael Meissner @ 2021-07-21 22:57 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:2491ef79ae519b6a8668569e8023513b04c1150b

commit 2491ef79ae519b6a8668569e8023513b04c1150b
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Wed Jul 21 18:57:22 2021 -0400

    Generate XXSPLTI32DX on power10.
    
    This patch generates XXSPLTI32DX for SF/DF floating point constants that
    cannot be generated with the XXSPLTIDP instruction.  In addition, it adds
    support for using XXSPLTI32DX to load up V2DF constants, where both constants
    are the same.
    
    2021-07-21  Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
            * config/rs6000/constraint.md (eD): New constraint.
            * config/rs6000/predicates.md (easy_fp_constant): If the constant
            can be loaded with XXSPLTI32DX, it is easy.
            (xxsplti32dx_operand): New predicate.
            (easy_vector_constant): If the constant can be loaded with
            XXSPLTI32DX, it is easy.
            * config/rs6000/rs6000-protos.h (xxsplti32dx_constant_p): New
            declaration.
            * config/rs6000/rs6000-cpus.h (ISA_3_1_MASKS_SERVER): Add
            -mxxsplti32dx.
            (POWERPC_MASKS): Add -mxxsplti32dx.
            * config/rs6000/rs6000.c (rs6000_option_override_internal): Add
            support for -mxxsplti32dx.
            (xxsplti32dx_constant_float_p): New helper function.
            (xxsplti32dx_constant_p): New function.
            (output_vec_const_move): If the operand can be loaded with
            XXSPLTI32DX, split it.
            (rs6000_opt_masks): Add -mxxsplti32dx.
            * config/rs6000/rs6000.md (movsf_hardfloat): Add support for
            constants loaded with XXSPLTI32DX.
            (mov<mode>_hardfloat32, FMOVE64 iterator):  Add support for
            constants loaded with XXSPLTI32DX.
            (mov<mode>_hardfloat64, FMOVE64 iterator):  Add support for
            constants loaded with XXSPLTI32DX.
            * config/rs6000/rs6000.opt (-mxxsplti32dx): New option.
            * config/rs6000/vsx.md (UNSPEC_XXSPLTI32DX_CONST): New unspec.
            (XXSPLTI32DX): New mode iterator.
            (xxsplti32dx_<mode>): New insn and splitter for XXSPLTI32DX.
            (xxsplti32dx_<mode>_first): New insn.
            (xxsplti32dx_<mode>_second): New insn.
    
    gcc/testsuite/
            * gcc.target/powerpc/vec-splat-constant-sf.c: Update insn count.
            * gcc.target/powerpc/vec-splat-constant-df.c: Update insn count.
            * gcc.target/powerpc/vec-splat-constant-v2df.c: Update insn
            count.

Diff:
---
 gcc/config/rs6000/constraints.md                   |   6 +
 gcc/config/rs6000/predicates.md                    |  18 +++
 gcc/config/rs6000/rs6000-protos.h                  |   2 +
 gcc/config/rs6000/rs6000.c                         | 124 +++++++++++++++++++++
 gcc/config/rs6000/rs6000.md                        |  67 +++++++----
 gcc/config/rs6000/rs6000.opt                       |   4 +
 gcc/config/rs6000/vsx.md                           |  72 ++++++++++++
 .../gcc.target/powerpc/vec-splat-constant-df.c     |   9 +-
 .../gcc.target/powerpc/vec-splat-constant-sf.c     |   5 +-
 .../gcc.target/powerpc/vec-splat-constant-v2df.c   |  10 +-
 10 files changed, 290 insertions(+), 27 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index e1fadd63580..d665e2a94db 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -208,6 +208,12 @@
   (and (match_code "const_int")
        (match_test "((- (unsigned HOST_WIDE_INT) ival) + 0x8000) < 0x10000")))
 
+;; SF/DF/V2DF/DI/V2DI scalar or vector constant that can be loaded with a pair
+;; of XXSPLTI32DX instructions.
+(define_constraint "eD"
+  "A vector constant that can be loaded with XXSPLTI32DX instructions."
+  (match_operand 0 "xxsplti32dx_operand"))
+
 ;; SF/DF/V2DF scalar or vector constant that can be loaded with XXSPLTIDP
 (define_constraint "eF"
   "A vector constant that can be loaded with the XXSPLTIDP instruction."
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 5d84723e6a1..c74ac6ac9f9 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -606,6 +606,11 @@
   if (xxspltidp_operand (op, mode))
     return 1;
 
+  /* If we have the ISA 3.1 XXSPLTI32DX instruction, see if the constant can
+     be loaded with a pair of those instructions.  */
+  if (xxsplti32dx_operand (op, mode))
+    return 1;
+
   /* Otherwise consider floating point constants hard, so that the
      constant gets pushed to memory during the early RTL phases.  This
      has the advantage that double precision constants that can be
@@ -684,6 +689,16 @@
   return xxspltidp_constant_p (op, mode, &value);
 })
 
+;; Return 1 if operand is a SF/DF CONST_DOUBLE or V2DF CONST_VECTOR that can be
+;; loaded via a pair f ISA 3.1 XXSPLTI32DX instructions.  Do not return true if
+;; the value can be loaded with the XXSPLTIDP instruction or XXSPLTIB to load 0.
+(define_predicate "xxsplti32dx_operand"
+  (match_code "const_double,const_vector,vec_duplicate")
+{
+  HOST_WIDE_INT high = 0, low = 0;
+  return xxsplti32dx_constant_p (op, mode, &high, &low);
+})
+
 ;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a
 ;; vector register without using memory.
 (define_predicate "easy_vector_constant"
@@ -703,6 +718,9 @@
       if (xxspltidp_operand (op, mode))
 	return true;
 
+      if (xxsplti32dx_operand (op, mode))
+	return true;
+
       if (TARGET_P9_VECTOR
           && xxspltib_constant_p (op, mode, &num_insns, &value))
 	return true;
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index ce8e993100e..3015c1330ed 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -33,6 +33,8 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int,
 extern int easy_altivec_constant (rtx, machine_mode);
 extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
 extern bool xxspltidp_constant_p (rtx, machine_mode, HOST_WIDE_INT *);
+extern bool xxsplti32dx_constant_p (rtx, machine_mode, HOST_WIDE_INT *,
+				    HOST_WIDE_INT *);
 extern int vspltis_shifted (rtx);
 extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
 extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 475a5f2a08c..da7e3368611 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -4312,6 +4312,14 @@ rs6000_option_override_internal (bool global_init_p)
       rs6000_isa_flags &= ~OPTION_MASK_PCREL;
     }
 
+  if (TARGET_XXSPLTI32DX < 0)
+    TARGET_XXSPLTI32DX = TARGET_PREFIXED;
+  else if (TARGET_XXSPLTI32DX > 0 && !TARGET_PREFIXED)
+    {
+      error ("%qs requires %qs", "-mxxspli32dx", "-mprefixed");
+      TARGET_XXSPLTI32DX = 0;
+    }
+
   if (TARGET_XXSPLTIDP < 0)
     TARGET_XXSPLTIDP = TARGET_PREFIXED;
   else if (TARGET_XXSPLTIDP > 0 && !TARGET_PREFIXED)
@@ -6633,6 +6641,119 @@ xxspltidp_constant_p (rtx op,
   return true;
 }
 
+/* Return true if OP is a floating point constant that can be loaded with the
+   XXSPLTI32DX instruction.  If the constant can be loaded with the simpler
+   XXSPLTIDP (constants that can fit as SFmode constants) or XXSPLTIB (0.0)
+   instructions, return false.
+
+   Return the two 32-bit constants to use in the two XXSPLTI32DX instructions
+   via HIGH_PTR and LOW_PTR.  */
+
+static bool
+xxsplti32dx_constant_float_p (rtx op,
+			      machine_mode mode,
+			      HOST_WIDE_INT *high_ptr,
+			      HOST_WIDE_INT *low_ptr)
+{
+  HOST_WIDE_INT xxspltidp_value = 0;
+
+  if (!CONST_DOUBLE_P (op))
+    return false;
+
+  if (mode != SFmode && mode != DFmode)
+    return false;
+
+  if (op == CONST0_RTX (mode))
+    return false;
+
+  if (xxspltidp_constant_p (op, mode, &xxspltidp_value))
+    return false;
+
+  long high_low[2];
+  const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
+  REAL_VALUE_TO_TARGET_DOUBLE (*rv, high_low);
+
+  /* The double precision value is laid out in memory order.  We need to undo
+     this for XXSPLTI32DX.  */
+  if (!BYTES_BIG_ENDIAN)
+    std::swap (high_low[0], high_low[1]);
+
+  *high_ptr = high_low[0];
+  *low_ptr = high_low[1];
+  return true;
+}
+
+/* Return true if OP is of the given MODE and can be synthesized with ISA 3.1
+   XXSPLTI32DX instruction.  If the instruction can be synthesized with
+   XXSPLTIDP or is 0/-1, return false.
+
+   We handle the following types of constants:
+
+     1) vector double constants where each element is the same and you can't
+        load the constant with XXSPLTIDP;
+
+     2) vector long long constants where each element is the same;
+
+     3) Scalar floating point constants that can't be loaded with XXSPLTIDP.
+
+   Return the two 32-bit constants to use in the two XXSPLTI32DX instructions
+   via HIGH_PTR and LOW_PTR.  */
+
+bool
+xxsplti32dx_constant_p (rtx op,
+			machine_mode mode,
+			HOST_WIDE_INT *high_ptr,
+			HOST_WIDE_INT *low_ptr)
+{
+  *high_ptr = *low_ptr = 0;
+
+  if (!TARGET_XXSPLTI32DX)
+    return false;
+
+  if (mode == VOIDmode)
+    mode = GET_MODE (op);
+
+  if (op == CONST0_RTX (mode))
+    return false;
+
+  switch (mode)
+    {
+    default:
+      break;
+
+    case E_V2DFmode:
+      {
+	rtx ele = const_vector_element_all_same (op);
+	if (!ele)
+	  return false;
+
+	return xxsplti32dx_constant_float_p (ele, DFmode, high_ptr, low_ptr);
+      }
+
+    case E_SFmode:
+    case E_DFmode:
+      return xxsplti32dx_constant_float_p (op, mode, high_ptr, low_ptr);
+
+    case E_V2DImode:
+      {
+	rtx ele = const_vector_element_all_same (op);
+	if (!ele)
+	  return false;
+
+	/* If we can generate XXSPLTIB and VEXTSB2D, don't return true.  */
+	HOST_WIDE_INT value = INTVAL (ele);
+	if (IN_RANGE (value, -128, 127))
+	  return false;
+
+	*high_ptr = value >> 32;
+	*low_ptr = value & 0xffffffff;
+	return true;
+      }
+    }
+
+  return false;
+}
+
 const char *
 output_vec_const_move (rtx *operands)
 {
@@ -6681,6 +6802,9 @@ output_vec_const_move (rtx *operands)
 	  || xxspltidp_operand (vec, mode))
 	return "#";
 
+      if (xxsplti32dx_operand (vec, mode))
+	return "#";
+
       if (TARGET_P9_VECTOR
 	  && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
 	{
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 0d2a9b56f6c..75a8a680395 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7723,17 +7723,17 @@
 ;;
 ;;	LWZ          LFS        LXSSP       LXSSPX     STFS       STXSSP
 ;;	STXSSPX      STW        XXLXOR      LI         FMR        XSCPSGNDP
-;;	MR           MT<x>      MF<x>       NOP        XXSPLTIDP
+;;	MR           MT<x>      MF<x>       NOP        XXSPLTIDP  XXSPLTI32DX
 
 (define_insn "movsf_hardfloat"
   [(set (match_operand:SF 0 "nonimmediate_operand"
 	 "=!r,       f,         v,          wa,        m,         wY,
 	  Z,         m,         wa,         !r,        f,         wa,
-	  !r,        *c*l,      !r,         *h,        wa")
+	  !r,        *c*l,      !r,         *h,        wa,        wa")
 	(match_operand:SF 1 "input_operand"
 	 "m,         m,         wY,         Z,         f,         v,
 	  wa,        r,         j,          j,         f,         wa,
-	  r,         r,         *h,         0,         eF"))]
+	  r,         r,         *h,         0,         eF,        eD"))]
   "(register_operand (operands[0], SFmode)
    || register_operand (operands[1], SFmode))
    && TARGET_HARD_FLOAT
@@ -7756,19 +7756,28 @@
    mt%0 %1
    mf%1 %0
    nop
+   #
    #"
   [(set_attr "type"
 	"load,       fpload,    fpload,     fpload,    fpstore,   fpstore,
 	 fpstore,    store,     veclogical, integer,   fpsimple,  fpsimple,
-	 *,          mtjmpr,    mfjmpr,     *,         vecperm")
+	 *,          mtjmpr,    mfjmpr,     *,         vecperm,   vecperm")
    (set_attr "isa"
 	"*,          *,         p9v,        p8v,       *,         p9v,
 	 p8v,        *,         *,          *,         *,         *,
-	 *,          *,         *,          *,         p10")
+	 *,          *,         *,          *,         p10,       p10")
    (set_attr "prefixed"
 	"*,          *,         *,          *,         *,         *,
 	 *,          *,         *,          *,         *,         *,
-	 *,          *,         *,          *,         yes")])
+	 *,          *,         *,          *,         yes,       yes")
+   (set_attr "max_prefixed_insns"
+	"*,          *,         *,          *,         *,         *,
+	 *,          *,         *,          *,         *,         *,
+	 *,          *,         *,          *,         *,         2")
+   (set_attr "num_insns"
+	"*,          *,         *,          *,         *,         *,
+	 *,          *,         *,          *,         *,         *,
+	 *,          *,         *,          *,         *,         2")])
 
 ;;	LWZ          LFIWZX     STW        STFIWX     MTVSRWZ    MFVSRWZ
 ;;	FMR          MR         MT%0       MF%1       NOP
@@ -8028,18 +8037,18 @@
 
 ;;           STFD         LFD         FMR         LXSD        STXSD
 ;;           LXSD         STXSD       XXLOR       XXLXOR      GPR<-0
-;;           LWZ          STW         MR          XXSPLTIDP
+;;           LWZ          STW         MR          XXSPLTIDP   XXSPLTI32DX
 
 
 (define_insn "*mov<mode>_hardfloat32"
   [(set (match_operand:FMOVE64 0 "nonimmediate_operand"
             "=m,          d,          d,          <f64_p9>,   wY,
               <f64_av>,   Z,          <f64_vsx>,  <f64_vsx>,  !r,
-              Y,          r,          !r,         wa")
+              Y,          r,          !r,         wa,         wa")
 	(match_operand:FMOVE64 1 "input_operand"
              "d,          m,          d,          wY,         <f64_p9>,
               Z,          <f64_av>,   <f64_vsx>,  <zero_fp>,  <zero_fp>,
-              r,          Y,          r,          eF"))]
+              r,          Y,          r,          eF,         eD"))]
   "! TARGET_POWERPC64 && TARGET_HARD_FLOAT
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode))"
@@ -8057,24 +8066,33 @@
    #
    #
    #
+   #
    #"
   [(set_attr "type"
             "fpstore,     fpload,     fpsimple,   fpload,     fpstore,
              fpload,      fpstore,    veclogical, veclogical, two,
-             store,       load,       two,        vecperm")
+             store,       load,       two,        vecperm,    vecperm")
    (set_attr "size" "64")
    (set_attr "length"
             "*,           *,          *,          *,          *,
              *,           *,          *,          *,          8,
-             8,           8,          8,          *")
+             8,           8,          8,          *,          *")
    (set_attr "isa"
             "*,           *,          *,          p9v,        p9v,
              p7v,         p7v,        *,          *,          *,
-             *,           *,          *,          p10")
+             *,           *,          *,          p10,        p10")
    (set_attr "prefixed"
             "*,           *,          *,          *,          *,
              *,           *,          *,          *,          *,
-             *,           *,          *,          yes")])
+             *,           *,          *,          yes,        yes")
+   (set_attr "max_prefixed_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          2")
+   (set_attr "num_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          2")])
 
 ;;           STW      LWZ     MR      G-const H-const F-const
 
@@ -8101,19 +8119,19 @@
 ;;           STFD         LFD         FMR         LXSD        STXSD
 ;;           LXSDX        STXSDX      XXLOR       XXLXOR      LI 0
 ;;           STD          LD          MR          MT{CTR,LR}  MF{CTR,LR}
-;;           NOP          MFVSRD      MTVSRD      XXSPLTIDP
+;;           NOP          MFVSRD      MTVSRD      XXSPLTIDP   XXSPLTI32DX
 
 (define_insn "*mov<mode>_hardfloat64"
   [(set (match_operand:FMOVE64 0 "nonimmediate_operand"
            "=m,           d,          d,          <f64_p9>,   wY,
              <f64_av>,    Z,          <f64_vsx>,  <f64_vsx>,  !r,
              YZ,          r,          !r,         *c*l,       !r,
-            *h,           r,          <f64_dm>,   wa")
+            *h,           r,          <f64_dm>,   wa,         wa")
 	(match_operand:FMOVE64 1 "input_operand"
             "d,           m,          d,          wY,         <f64_p9>,
              Z,           <f64_av>,   <f64_vsx>,  <zero_fp>,  <zero_fp>,
              r,           YZ,         r,          r,          *h,
-             0,           <f64_dm>,   r,          eF"))]
+             0,           <f64_dm>,   r,          eF,         eD"))]
   "TARGET_POWERPC64 && TARGET_HARD_FLOAT
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode))"
@@ -8136,23 +8154,34 @@
    nop
    mfvsrd %0,%x1
    mtvsrd %x0,%1
+   #
    #"
   [(set_attr "type"
             "fpstore,     fpload,     fpsimple,   fpload,     fpstore,
              fpload,      fpstore,    veclogical, veclogical, integer,
              store,       load,       *,          mtjmpr,     mfjmpr,
-             *,           mfvsr,      mtvsr,      vecperm")
+             *,           mfvsr,      mtvsr,      vecperm,    vecperm")
    (set_attr "size" "64")
    (set_attr "isa"
             "*,           *,          *,          p9v,        p9v,
              p7v,         p7v,        *,          *,          *,
              *,           *,          *,          *,          *,
-             *,           p8v,        p8v,        p10")
+             *,           p8v,        p8v,        p10,        p10")
    (set_attr "prefixed"
             "*,           *,          *,          *,          *,
              *,           *,          *,          *,          *,
              *,           *,          *,          *,          *,
-             *,           *,          *,          yes")])
+             *,           *,          *,          yes,        yes")
+   (set_attr "max_prefixed_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          2")
+   (set_attr "num_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          *")])
 
 ;;           STD      LD       MR      MT<SPR> MF<SPR> G-const
 ;;           H-const  F-const  Special
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index b5234676c03..0c5d334f31c 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -647,3 +647,7 @@ Generate (do not generate) XXSPLTIW instructions.
 mxxspltidp
 Target Undocumented Var(TARGET_XXSPLTIDP) Init(-1) Save
 Generate (do not generate) XXSPLTIDP instructions.
+
+mxxsplti32dx
+Target Undocumented Var(TARGET_XXSPLTI32DX) Init(-1) Save
+Generate (do not generate) XXSPLTI32DX instructions.
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index c0b8d49e1d9..d3eb3c469b6 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -375,6 +375,7 @@
    UNSPEC_XXEVAL
    UNSPEC_XXSPLTIDP
    UNSPEC_XXSPLTI32DX
+   UNSPEC_XXSPLTI32DX_CONST
    UNSPEC_XXBLEND
    UNSPEC_XXPERMX
   ])
@@ -6699,3 +6700,74 @@
   "xxspltidp %x0,%1"
  [(set_attr "type" "vecperm")
   (set_attr "prefixed" "yes")])
+
+;; XXSPLTI32DX used to create 64-bit constants or vector constants where the
+;; even elements match and the odd elements match.
+(define_mode_iterator XXSPLTI32DX [SF DF V2DF V2DI])
+
+(define_insn_and_split "*xxsplti32dx_<mode>"
+  [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa")
+	(match_operand:XXSPLTI32DX 1 "xxsplti32dx_operand"))]
+  "TARGET_XXSPLTI32DX"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:XXSPLTI32DX [(match_dup 2)
+			     (match_dup 3)] UNSPEC_XXSPLTI32DX_CONST))
+   (set (match_dup 0)
+	(unspec:XXSPLTI32DX [(match_dup 0)
+			     (match_dup 4)
+			     (match_dup 5)] UNSPEC_XXSPLTI32DX_CONST))]
+{
+  HOST_WIDE_INT high = 0, low = 0;
+
+  if (!xxsplti32dx_constant_p (operands[1], <MODE>mode, &high, &low))
+    gcc_unreachable ();
+
+  /* If the low bits are 0 or all 1s, initialize that word first.  This way we
+     can use a smaller XXSPLTIB instruction instead the first XXSPLTI32DX.  */
+  if (low == 0 || low ==  -1)
+    {
+      operands[2] = const1_rtx;
+      operands[3] = GEN_INT (low);
+      operands[4] = const0_rtx;
+      operands[5] = GEN_INT (high);
+    }
+  else
+    {
+      operands[2] = const0_rtx;
+      operands[3] = GEN_INT (high);
+      operands[4] = const1_rtx;
+      operands[5] = GEN_INT (low);
+    }
+}
+  [(set_attr "type" "vecperm")
+   (set_attr "prefixed" "yes")
+   (set_attr "num_insns" "2")
+   (set_attr "max_prefixed_insns" "2")])
+
+;; First word of XXSPLTI32DX
+(define_insn "*xxsplti32dx_<mode>_first"
+  [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa,wa,wa")
+	(unspec:XXSPLTI32DX [(match_operand 1 "u1bit_cint_operand" "n,n,n")
+			     (match_operand 2 "const_int_operand" "O,wM,n")]
+			    UNSPEC_XXSPLTI32DX_CONST))]
+  "TARGET_XXSPLTI32DX"
+  "@
+   xxspltib %x0,0
+   xxspltib %x0,255
+   xxsplti32dx %x0,%1,%2"
+  [(set_attr "type" "vecperm")
+   (set_attr "prefixed" "*,*,yes")])
+
+;; Second word of XXSPLTI32DX
+(define_insn "*xxsplti32dx_<mode>_second"
+  [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa")
+	(unspec:XXSPLTI32DX [(match_operand:XXSPLTI32DX 1 "vsx_register_operand" "0")
+			     (match_operand 2 "u1bit_cint_operand" "n")
+			     (match_operand 3 "const_int_operand" "n")]
+			    UNSPEC_XXSPLTI32DX_CONST))]
+  "TARGET_XXSPLTI32DX"
+  "xxsplti32dx %x0,%2,%3"
+  [(set_attr "type" "vecperm")
+   (set_attr "prefixed" "yes")])
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c
index 8f6e176f9af..1435ef4ef4f 100644
--- a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c
@@ -48,13 +48,16 @@ scalar_double_m_inf (void)	/* XXSPLTIDP.  */
 double
 scalar_double_pi (void)
 {
-  return M_PI;			/* PLFD.  */
+  return M_PI;			/* 2x XXSPLTI32DX.  */
 }
 
 double
 scalar_double_denorm (void)
 {
-  return 0x1p-149f;		/* PLFD.  */
+  return 0x1p-149f;		/* XXSPLTIB, XXSPLTI32DX.  */
 }
 
-/* { dg-final { scan-assembler-times {\mxxspltidp\M} 5 } } */
+/* { dg-final { scan-assembler-times {\mxxspltidp\M}   5 } } */
+/* { dg-final { scan-assembler-times {\mxxsplti32dx\M} 3 } } */
+/* { dg-final { scan-assembler-not   {\mplfd\M}          } } */
+/* { dg-final { scan-assembler-not   {\mplxsd\M}         } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c
index 72504bdfbbd..e9a45d5159d 100644
--- a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c
@@ -57,4 +57,7 @@ scalar_float_denorm (void)
   return 0x1p-149f;		/* PLFS.  */
 }
 
-/* { dg-final { scan-assembler-times {\mxxspltidp\M} 6 } } */
+/* { dg-final { scan-assembler-times {\mxxspltidp\M}   6 } } */
+/* { dg-final { scan-assembler-times {\mxxsplti32dx\M} 1 } } */
+/* { dg-final { scan-assembler-not   {\mplfs\M}          } } */
+/* { dg-final { scan-assembler-not   {\mplxssp\M}        } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c
index d509459292c..d81198b163d 100644
--- a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c
@@ -51,14 +51,16 @@ v2df_double_m_inf (void)
 vector double
 v2df_double_pi (void)
 {
-  return (vector double) { M_PI, M_PI };		/* PLFD.  */
+  return (vector double) { M_PI, M_PI };		/* 2x XXSPLTI32DX.  */
 }
 
 vector double
 v2df_double_denorm (void)
 {
-  return (vector double) { (double)0x1p-149f,
-			   (double)0x1p-149f };		/* PLFD.  */
+  return (vector double) { (double)0x1p-149f,		/* XXSPLTIB, */
+			   (double)0x1p-149f };		/* XXSPLTI32DX.  */
 }
 
-/* { dg-final { scan-assembler-times {\mxxspltidp\M} 5 } } */
+/* { dg-final { scan-assembler-times {\mxxspltidp\M}   5 } } */
+/* { dg-final { scan-assembler-times {\mxxsplti32dx\M} 3 } } */
+/* { dg-final { scan-assembler-not   {\mplxv\M}          } } */


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2021-07-22  5:54 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-22  1:09 [gcc(refs/users/meissner/heads/work061)] Generate XXSPLTI32DX on power10 Michael Meissner
  -- strict thread matches above, loose matches on Subject: below --
2021-07-22  5:54 Michael Meissner
2021-07-22  3:28 Michael Meissner
2021-07-22  3:21 Michael Meissner
2021-07-21 22:57 Michael Meissner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).