[gcc(refs/users/meissner/heads/work067)] Generate XXSPLTI32DX on power10.

public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed

* [gcc(refs/users/meissner/heads/work067)] Generate XXSPLTI32DX on power10.
@ 2021-09-07  2:15 Michael Meissner
  0 siblings, 0 replies; 3+ messages in thread
From: Michael Meissner @ 2021-09-07  2:15 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:fc2edca14c8197df42539514ff832d6f787ea16e

commit fc2edca14c8197df42539514ff832d6f787ea16e
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Mon Sep 6 22:14:47 2021 -0400

    Generate XXSPLTI32DX on power10.
    
    This patch generates XXSPLTI32DX for SF/DF floating point constants that
    cannot be generated with the XXSPLTIDP instruction.  In addition, it adds
    support for using XXSPLTI32DX to load up V2DF constants, where both constants
    are the same.
    
    At the present time, XXSPLTI32DX is not enabled by default.
    
    2021-09-06  Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/constraint.md (eD): New constraint.
            * config/rs6000/predicates.md (easy_fp_constant): If the constant
            can be loaded with XXSPLTI32DX, it is easy.
            (xxsplti32dx_operand): New predicate.
            (easy_vector_constant): If the constant can be loaded with
            XXSPLTI32DX, it is easy.
            * config/rs6000/rs6000-protos.h (xxsplti32dx_constant_p): New
            declaration.
            * config/rs6000/rs6000.c (xxsplti32dx_constant_float_p): New
            helper function.
            (xxsplti32dx_constant_p): New function.
            (output_vec_const_move): If the operand can be loaded with
            XXSPLTI32DX, split it.
            (prefixed_permute_p): Constants loaded with XXSPLTI32DX are
            prefixed.
            * config/rs6000/rs6000.md (movsf_hardfloat): Add support for
            constants loaded with XXSPLTI32DX.
            (mov<mode>_hardfloat32, FMOVE64 iterator):  Add support for
            constants loaded with XXSPLTI32DX.
            (mov<mode>_hardfloat64, FMOVE64 iterator):  Add support for
            constants loaded with XXSPLTI32DX.
            * config/rs6000/rs6000.opt (-mxxsplti32dx): New option.
            * config/rs6000/vsx.md (UNSPEC_XXSPLTI32DX_CONST): New unspec.
            (XXSPLTI32DX): New mode iterator.
            (xxsplti32dx_<mode>): New insn and splitter for XXSPLTI32DX.
            (xxsplti32dx_<mode>_first): New insn.
            (xxsplti32dx_<mode>_second): New insn.
    
    gcc/testsuite/
    
            * gcc.target/powerpc/pr86731-fwrapv-longlong.c: Update insn
            count.
            * gcc.target/powerpc/vec-splat-constant-sf.c: Update insn count.
            Add -mxxsplti32dx option.
            * gcc.target/powerpc/vec-splat-constant-df.c: Likewise.
            * gcc.target/powerpc/vec-splat-constant-v2df.c: Likewise.

Diff:
---
 gcc/config/rs6000/constraints.md                   |   6 +
 gcc/config/rs6000/predicates.md                    |  19 ++++
 gcc/config/rs6000/rs6000-protos.h                  |   2 +
 gcc/config/rs6000/rs6000.c                         | 122 ++++++++++++++++++++-
 gcc/config/rs6000/rs6000.md                        |  61 ++++++++---
 gcc/config/rs6000/rs6000.opt                       |   5 +
 gcc/config/rs6000/vsx.md                           |  74 +++++++++++++
 .../gcc.target/powerpc/pr86731-fwrapv-longlong.c   |   5 +-
 .../gcc.target/powerpc/vec-splat-constant-df.c     |  11 +-
 .../gcc.target/powerpc/vec-splat-constant-sf.c     |   7 +-
 .../gcc.target/powerpc/vec-splat-constant-v2df.c   |  10 +-
 11 files changed, 293 insertions(+), 29 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 82fecca4a91..6317f928dc4 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -208,6 +208,12 @@
   (and (match_code "const_int")
        (match_test "((- (unsigned HOST_WIDE_INT) ival) + 0x8000) < 0x10000")))
 
+;; SF/DF/V2DF/DI/V2DI scalar or vector constant that can be loaded with a pair
+;; of XXSPLTI32DX instructions.
+(define_constraint "eD"
+  "A vector constant that can be loaded with XXSPLTI32DX instructions."
+  (match_operand 0 "xxsplti32dx_operand"))
+
 ;; SF/DF/V2DF scalar or vector constant that can be loaded with XXSPLTIDP
 (define_constraint "eF"
   "A vector constant that can be loaded with the XXSPLTIDP instruction."
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index b601f73600f..a5d51f4ad84 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -611,6 +611,11 @@
   if (lxvkq_operand (op, mode))
     return 1;
 
+  /* If we have the ISA 3.1 XXSPLTI32DX instruction, see if the constant can
+     be loaded with a pair of those instructions.  */
+  if (xxsplti32dx_operand (op, mode))
+    return 1;
+
   /* Otherwise consider floating point constants hard, so that the
      constant gets pushed to memory during the early RTL phases.  This
      has the advantage that double precision constants that can be
@@ -678,6 +683,17 @@
   return lxvkq_constant_p (op, mode, &immediate);
 })
 
+;; Return 1 if operand is a SF/DF CONST_DOUBLE or V2DF/V2DI CONST_VECTOR that
+;; can be loaded via a pair f ISA 3.1 XXSPLTI32DX instructions.  Do not return
+;; true if the value can be loaded with the XXSPLTIDP instruction or XXSPLTIB
+;; to load 0.
+(define_predicate "xxsplti32dx_operand"
+  (match_code "const_double,const_vector,vec_duplicate")
+{
+  HOST_WIDE_INT high = 0, low = 0;
+  return xxsplti32dx_constant_p (op, mode, &high, &low);
+})
+
 ;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a
 ;; vector register without using memory.
 (define_predicate "easy_vector_constant"
@@ -697,6 +713,9 @@
       if (xxspltidp_operand (op, mode))
 	return true;
 
+      if (xxsplti32dx_operand (op, mode))
+	return true;
+
       if (TARGET_P9_VECTOR
           && xxspltib_constant_p (op, mode, &num_insns, &value))
 	return true;
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 181d20d7e05..744b1ad73e6 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -34,6 +34,8 @@ extern int easy_altivec_constant (rtx, machine_mode);
 extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
 extern bool xxspltidp_constant_p (rtx, machine_mode, HOST_WIDE_INT *);
 extern bool xxspltiw_constant_p (rtx, machine_mode, HOST_WIDE_INT *);
+extern bool xxsplti32dx_constant_p (rtx, machine_mode, HOST_WIDE_INT *,
+				    HOST_WIDE_INT *);
 extern bool lxvkq_constant_p (rtx, machine_mode, int *);
 extern int vspltis_shifted (rtx);
 extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index ba33ec5bc60..7bbf29a3e1c 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -6926,6 +6926,119 @@ lxvkq_constant_p (rtx op,
   return false;
 }
 
+/* Return true if OP is a floating point constant that can be loaded with the
+   XXSPLTI32DX instruction.  If the constant can be loaded with the simpler
+   XXSPLTIDP (constants that can fit as SFmode constants) or XXSPLTIB (0.0)
+   instructions, return false.
+
+   Return the two 32-bit constants to use in the two XXSPLTI32DX instructions
+   via HIGH_PTR and LOW_PTR.  */
+
+static bool
+xxsplti32dx_constant_float_p (rtx op,
+			      machine_mode mode,
+			      HOST_WIDE_INT *high_ptr,
+			      HOST_WIDE_INT *low_ptr)
+{
+  HOST_WIDE_INT xxspltidp_value = 0;
+
+  if (!CONST_DOUBLE_P (op))
+    return false;
+
+  if (mode != SFmode && mode != DFmode)
+    return false;
+
+  if (op == CONST0_RTX (mode))
+    return false;
+
+  if (xxspltidp_constant_p (op, mode, &xxspltidp_value))
+    return false;
+
+  long high_low[2];
+  const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
+  REAL_VALUE_TO_TARGET_DOUBLE (*rv, high_low);
+
+  /* The double precision value is laid out in memory order.  We need to undo
+     this for XXSPLTI32DX.  */
+  if (!BYTES_BIG_ENDIAN)
+    std::swap (high_low[0], high_low[1]);
+
+  *high_ptr = high_low[0];
+  *low_ptr = high_low[1];
+  return true;
+}
+
+/* Return true if OP is of the given MODE and can be synthesized with ISA 3.1
+   XXSPLTI32DX instruction.  If the instruction can be synthesized with
+   XXSPLTIDP or is 0/-1, return false.
+
+   We handle the following types of constants:
+
+     1) vector double constants where each element is the same and you can't
+        load the constant with XXSPLTIDP;
+
+     2) vector long long constants where each element is the same;
+
+     3) Scalar floating point constants that can't be loaded with XXSPLTIDP.
+
+   Return the two 32-bit constants to use in the two XXSPLTI32DX instructions
+   via HIGH_PTR and LOW_PTR.  */
+
+bool
+xxsplti32dx_constant_p (rtx op,
+			machine_mode mode,
+			HOST_WIDE_INT *high_ptr,
+			HOST_WIDE_INT *low_ptr)
+{
+  *high_ptr = *low_ptr = 0;
+
+  if (!TARGET_PREFIXED || !TARGET_VSX || !TARGET_XXSPLTI32DX)
+    return false;
+
+  if (mode == VOIDmode)
+    mode = GET_MODE (op);
+
+  if (op == CONST0_RTX (mode))
+    return false;
+
+  switch (mode)
+    {
+    default:
+      break;
+
+    case E_V2DFmode:
+      {
+	if (!const_vector_all_elements_equal_p (op, mode))
+	  return false;
+
+	rtx ele = CONST_VECTOR_ELT (op, 0);
+	return xxsplti32dx_constant_float_p (ele, DFmode, high_ptr, low_ptr);
+      }
+
+    case E_SFmode:
+    case E_DFmode:
+      return xxsplti32dx_constant_float_p (op, mode, high_ptr, low_ptr);
+
+    case E_V2DImode:
+      {
+	if (!const_vector_all_elements_equal_p (op, mode))
+	  return false;
+
+	/* If we can generate XXSPLTIB and VEXTSB2D, don't return true.  */
+	rtx ele = CONST_VECTOR_ELT (op, 0);
+	HOST_WIDE_INT value = INTVAL (ele);
+	if (IN_RANGE (value, -128, 127))
+	  return false;
+
+	*high_ptr = (value >> 32) & 0xffffffff;
+	*low_ptr = value & 0xffffffff;
+	return true;
+      }
+    }
+
+  return false;
+}
+
 const char *
 output_vec_const_move (rtx *operands)
 {
@@ -6991,6 +7104,9 @@ output_vec_const_move (rtx *operands)
 	  return "lxvkq %x0,%2";
 	}
 
+      if (xxsplti32dx_operand (vec, mode))
+	return "#";
+
       if (TARGET_P9_VECTOR
 	  && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
 	{
@@ -26770,7 +26886,11 @@ prefixed_permute_p (rtx_insn *insn)
     case DFmode:
     case SFmode:
     case V2DFmode:
-      return xxspltidp_operand (src, mode);
+      return (xxspltidp_operand (src, mode)
+	      || xxsplti32dx_operand (src, mode));
+
+    case V2DImode:
+      return xxsplti32dx_operand (src, mode);
 
     default:
       break;
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index bf3bfed3b88..d6af66a1728 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7728,17 +7728,17 @@
 ;;
 ;;	LWZ          LFS        LXSSP       LXSSPX     STFS       STXSSP
 ;;	STXSSPX      STW        XXLXOR      LI         FMR        XSCPSGNDP
-;;	MR           MT<x>      MF<x>       NOP        XXSPLTIDP
+;;	MR           MT<x>      MF<x>       NOP        XXSPLTIDP  XXSPLTI32DX
 
 (define_insn "movsf_hardfloat"
   [(set (match_operand:SF 0 "nonimmediate_operand"
 	 "=!r,       f,         v,          wa,        m,         wY,
 	  Z,         m,         wa,         !r,        f,         wa,
-	  !r,        *c*l,      !r,         *h,        wa")
+	  !r,        *c*l,      !r,         *h,        wa,        wa")
 	(match_operand:SF 1 "input_operand"
 	 "m,         m,         wY,         Z,         f,         v,
 	  wa,        r,         j,          j,         f,         wa,
-	  r,         r,         *h,         0,         eF"))]
+	  r,         r,         *h,         0,         eF,        eD"))]
   "(register_operand (operands[0], SFmode)
    || register_operand (operands[1], SFmode))
    && TARGET_HARD_FLOAT
@@ -7761,15 +7761,24 @@
    mt%0 %1
    mf%1 %0
    nop
+   #
    #"
   [(set_attr "type"
 	"load,       fpload,    fpload,     fpload,    fpstore,   fpstore,
 	 fpstore,    store,     veclogical, integer,   fpsimple,  fpsimple,
-	 *,          mtjmpr,    mfjmpr,     *,         vecperm")
+	 *,          mtjmpr,    mfjmpr,     *,         vecperm,   vecperm")
    (set_attr "isa"
 	"*,          *,         p9v,        p8v,       *,         p9v,
 	 p8v,        *,         *,          *,         *,         *,
-	 *,          *,         *,          *,         p10")])
+	 *,          *,         *,          *,         p10,       p10")
+   (set_attr "max_prefixed_insns"
+        "*,          *,         *,          *,         *,         *,
+         *,          *,         *,          *,         *,         *,
+         *,          *,         *,          *,         *,         2")
+   (set_attr "num_insns"
+        "*,          *,         *,          *,         *,         *,
+         *,          *,         *,          *,         *,         *,
+         *,          *,         *,          *,         *,         2")])
 
 ;;	LWZ          LFIWZX     STW        STFIWX     MTVSRWZ    MFVSRWZ
 ;;	FMR          MR         MT%0       MF%1       NOP
@@ -8029,18 +8038,18 @@
 
 ;;           STFD         LFD         FMR         LXSD        STXSD
 ;;           LXSD         STXSD       XXLOR       XXLXOR      GPR<-0
-;;           LWZ          STW         MR          XXSPLTIDP
+;;           LWZ          STW         MR          XXSPLTIDP   XXSPLTI32DX
 
 
 (define_insn "*mov<mode>_hardfloat32"
   [(set (match_operand:FMOVE64 0 "nonimmediate_operand"
             "=m,          d,          d,          <f64_p9>,   wY,
               <f64_av>,   Z,          <f64_vsx>,  <f64_vsx>,  !r,
-              Y,          r,          !r,         wa")
+              Y,          r,          !r,         wa,         wa")
 	(match_operand:FMOVE64 1 "input_operand"
              "d,          m,          d,          wY,         <f64_p9>,
               Z,          <f64_av>,   <f64_vsx>,  <zero_fp>,  <zero_fp>,
-              r,          Y,          r,          eF"))]
+              r,          Y,          r,          eF,         eD"))]
   "! TARGET_POWERPC64 && TARGET_HARD_FLOAT
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode))"
@@ -8058,20 +8067,29 @@
    #
    #
    #
+   #
    #"
   [(set_attr "type"
             "fpstore,     fpload,     fpsimple,   fpload,     fpstore,
              fpload,      fpstore,    veclogical, veclogical, two,
-             store,       load,       two,        vecperm")
+             store,       load,       two,        vecperm,    vecperm")
    (set_attr "size" "64")
    (set_attr "length"
             "*,           *,          *,          *,          *,
              *,           *,          *,          *,          8,
-             8,           8,          8,          *")
+             8,           8,          8,          *,          *")
    (set_attr "isa"
             "*,           *,          *,          p9v,        p9v,
              p7v,         p7v,        *,          *,          *,
-             *,           *,          *,          p10")])
+             *,           *,          *,          p10,        p10")
+   (set_attr "max_prefixed_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          2")
+   (set_attr "num_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          2")])
 
 ;;           STW      LWZ     MR      G-const H-const F-const
 
@@ -8098,19 +8116,19 @@
 ;;           STFD         LFD         FMR         LXSD        STXSD
 ;;           LXSDX        STXSDX      XXLOR       XXLXOR      LI 0
 ;;           STD          LD          MR          MT{CTR,LR}  MF{CTR,LR}
-;;           NOP          MFVSRD      MTVSRD      XXSPLTIDP
+;;           NOP          MFVSRD      MTVSRD      XXSPLTIDP   XXSPLTI32DX
 
 (define_insn "*mov<mode>_hardfloat64"
   [(set (match_operand:FMOVE64 0 "nonimmediate_operand"
            "=m,           d,          d,          <f64_p9>,   wY,
              <f64_av>,    Z,          <f64_vsx>,  <f64_vsx>,  !r,
              YZ,          r,          !r,         *c*l,       !r,
-            *h,           r,          <f64_dm>,   wa")
+            *h,           r,          <f64_dm>,   wa,         wa")
 	(match_operand:FMOVE64 1 "input_operand"
             "d,           m,          d,          wY,         <f64_p9>,
              Z,           <f64_av>,   <f64_vsx>,  <zero_fp>,  <zero_fp>,
              r,           YZ,         r,          r,          *h,
-             0,           <f64_dm>,   r,          eF"))]
+             0,           <f64_dm>,   r,          eF,         eD"))]
   "TARGET_POWERPC64 && TARGET_HARD_FLOAT
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode))"
@@ -8133,18 +8151,29 @@
    nop
    mfvsrd %0,%x1
    mtvsrd %x0,%1
+   #
    #"
   [(set_attr "type"
             "fpstore,     fpload,     fpsimple,   fpload,     fpstore,
              fpload,      fpstore,    veclogical, veclogical, integer,
              store,       load,       *,          mtjmpr,     mfjmpr,
-             *,           mfvsr,      mtvsr,      vecperm")
+             *,           mfvsr,      mtvsr,      vecperm,    vecperm")
    (set_attr "size" "64")
    (set_attr "isa"
             "*,           *,          *,          p9v,        p9v,
              p7v,         p7v,        *,          *,          *,
              *,           *,          *,          *,          *,
-             *,           p8v,        p8v,        p10")])
+             *,           p8v,        p8v,        p10,        p10")
+   (set_attr "max_prefixed_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          2")
+   (set_attr "num_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          *")])
 
 ;;           STD      LD       MR      MT<SPR> MF<SPR> G-const
 ;;           H-const  F-const  Special
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 3a9011f98f9..77ece90c710 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -653,6 +653,11 @@ mxxspltiw
 Target Undocumented Var(TARGET_XXSPLTIW) Init(0) Save
 Generate (do not generate) XXSPLTIW instructions.
 
+; Do not enable this by default at the present time.
+mxxsplti32dx
+Target Undocumented Var(TARGET_XXSPLTI32DX) Init(0) Save
+Generate (do not generate) XXSPLTI32DX instructions.
+
 ; Do not enable this by default at the present time.
 mlxvkq
 Target Undocumented Var(TARGET_LXVKQ) Init(0) Save
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 4f716a9f2d2..9a99ea9331c 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -376,6 +376,7 @@
    UNSPEC_XXSPLTIW
    UNSPEC_XXSPLTIDP
    UNSPEC_XXSPLTI32DX
+   UNSPEC_XXSPLTI32DX_CONST
    UNSPEC_XXBLEND
    UNSPEC_XXPERMX
   ])
@@ -6620,6 +6621,79 @@
   [(set_attr "type" "vecperm")
    (set_attr "prefixed" "yes")])
 
+;; XXSPLTI32DX used to create 64-bit constants or vector constants where the
+;; even elements match and the odd elements match.
+(define_mode_iterator XXSPLTI32DX [SF DF V2DF V2DI])
+
+(define_insn_and_split "*xxsplti32dx_<mode>"
+  [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa")
+	(match_operand:XXSPLTI32DX 1 "xxsplti32dx_operand" "eD"))]
+  "TARGET_XXSPLTI32DX"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:XXSPLTI32DX [(match_dup 2)
+			     (match_dup 3)] UNSPEC_XXSPLTI32DX_CONST))
+   (set (match_dup 0)
+	(unspec:XXSPLTI32DX [(match_dup 0)
+			     (match_dup 4)
+			     (match_dup 5)] UNSPEC_XXSPLTI32DX_CONST))]
+{
+  HOST_WIDE_INT high = 0, low = 0;
+
+  if (!xxsplti32dx_constant_p (operands[1], <MODE>mode, &high, &low))
+    gcc_unreachable ();
+
+  /* If the low bits are 0 or all 1s, initialize that word first.  This way we
+     can use a smaller XXSPLTIB/XXLXOR/XXLORC instruction instead the first
+     XXSPLTI32DX.  */
+  if (low == 0 || low ==  -1)
+    {
+      operands[2] = const1_rtx;
+      operands[3] = GEN_INT (low);
+      operands[4] = const0_rtx;
+      operands[5] = GEN_INT (high);
+    }
+  else
+    {
+      operands[2] = const0_rtx;
+      operands[3] = GEN_INT (high);
+      operands[4] = const1_rtx;
+      operands[5] = GEN_INT (low);
+    }
+}
+  [(set_attr "type" "vecperm")
+   (set_attr "prefixed" "yes")
+   (set_attr "num_insns" "2")
+   (set_attr "max_prefixed_insns" "2")])
+
+;; First word of XXSPLTI32DX
+(define_insn "*xxsplti32dx_<mode>_first"
+  [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa,wa,wa")
+	(unspec:XXSPLTI32DX [(match_operand 1 "u1bit_cint_operand" "n,n,n")
+			     (match_operand 2 "const_int_operand" "O,wM,n")]
+			    UNSPEC_XXSPLTI32DX_CONST))]
+  "TARGET_XXSPLTI32DX"
+  "@
+   xxlxor %x0,%x0,%x0
+   xxlorc %x0,%x0,%x0
+   xxsplti32dx %x0,%1,%2"
+  [(set_attr "type" "veclogical,veclogical,vecperm")
+   (set_attr "prefixed" "*,*,yes")])
+
+;; Second word of XXSPLTI32DX
+(define_insn "*xxsplti32dx_<mode>_second"
+  [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa")
+	(unspec:XXSPLTI32DX [(match_operand:XXSPLTI32DX 1 "vsx_register_operand" "0")
+			     (match_operand 2 "u1bit_cint_operand" "n")
+			     (match_operand 3 "const_int_operand" "n")]
+			    UNSPEC_XXSPLTI32DX_CONST))]
+  "TARGET_XXSPLTI32DX"
+  "xxsplti32dx %x0,%2,%3"
+  [(set_attr "type" "vecperm")
+   (set_attr "prefixed" "yes")])
+
+
 ;; XXBLEND built-in function support
 (define_insn "xxblend_<mode>"
   [(set (match_operand:VM3 0 "register_operand" "=wa")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c b/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c
index bd1502bb30a..7fb6ec978a6 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c
@@ -26,9 +26,10 @@ vector signed long long splats4(void)
 
 /* Codegen will consist of splat and shift instructions for most types.
    If folding is enabled, the vec_sl tests using vector long long type will
-   generate a lvx instead of a vspltisw+vsld pair.  */
+   generate a lvx instead of a vspltisw+vsld pair.  On power10, it may
+   generate an xxsplti32dx instead of a load.  */
 
 /* { dg-final { scan-assembler-times {\mvspltis[bhw]\M} 0 } } */
 /* { dg-final { scan-assembler-times {\mvsl[bhwd]\M} 0 } } */
-/* { dg-final { scan-assembler-times {\mp?lxv\M|\mlxv\M|\mlxvd2x\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mp?lxv\M|\mlxv\M|\mlxvd2x\M|\mxxsplti32dx\M} 2 } } */
 
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c
index 8f6e176f9af..daa9b584845 100644
--- a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target power10_ok } */
-/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+/* { dg-options "-mdejagnu-cpu=power10 -mxxsplti32dx -O2" } */
 
 #include <math.h>
 
@@ -48,13 +48,16 @@ scalar_double_m_inf (void)	/* XXSPLTIDP.  */
 double
 scalar_double_pi (void)
 {
-  return M_PI;			/* PLFD.  */
+  return M_PI;			/* 2x XXSPLTI32DX.  */
 }
 
 double
 scalar_double_denorm (void)
 {
-  return 0x1p-149f;		/* PLFD.  */
+  return 0x1p-149f;		/* XXLXOR, XXSPLTI32DX.  */
 }
 
-/* { dg-final { scan-assembler-times {\mxxspltidp\M} 5 } } */
+/* { dg-final { scan-assembler-times {\mxxspltidp\M}   5 } } */
+/* { dg-final { scan-assembler-times {\mxxsplti32dx\M} 3 } } */
+/* { dg-final { scan-assembler-not   {\mplfd\M}          } } */
+/* { dg-final { scan-assembler-not   {\mplxsd\M}         } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c
index 72504bdfbbd..74b2466f74c 100644
--- a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target power10_ok } */
-/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+/* { dg-options "-mdejagnu-cpu=power10 -mxxsplti32dx -O2" } */
 
 #include <math.h>
 
@@ -57,4 +57,7 @@ scalar_float_denorm (void)
   return 0x1p-149f;		/* PLFS.  */
 }
 
-/* { dg-final { scan-assembler-times {\mxxspltidp\M} 6 } } */
+/* { dg-final { scan-assembler-times {\mxxspltidp\M}   6 } } */
+/* { dg-final { scan-assembler-times {\mxxsplti32dx\M} 1 } } */
+/* { dg-final { scan-assembler-not   {\mplfs\M}          } } */
+/* { dg-final { scan-assembler-not   {\mplxssp\M}        } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c
index 82ffc86f8aa..f438c3787a0 100644
--- a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target power10_ok } */
-/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+/* { dg-options "-mdejagnu-cpu=power10 -mxxsplti32dx -O2" } */
 
 #include <math.h>
 
@@ -51,14 +51,16 @@ v2df_double_m_inf (void)
 vector double
 v2df_double_pi (void)
 {
-  return (vector double) { M_PI, M_PI };		/* PLVX.  */
+  return (vector double) { M_PI, M_PI };		/* 2x XXSPLTI32DX.  */
 }
 
 vector double
 v2df_double_denorm (void)
 {
   return (vector double) { (double)0x1p-149f,
-			   (double)0x1p-149f };		/* PLVX.  */
+			   (double)0x1p-149f };	/* XXLXOR, XXSPLTI32DX.  */
 }
 
-/* { dg-final { scan-assembler-times {\mxxspltidp\M} 5 } } */
+/* { dg-final { scan-assembler-times {\mxxspltidp\M}   5 } } */
+/* { dg-final { scan-assembler-times {\mxxsplti32dx\M} 3 } } */
+/* { dg-final { scan-assembler-not   {\mplxv\M}          } } */


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [gcc(refs/users/meissner/heads/work067)] Generate XXSPLTI32DX on power10.
@ 2021-09-07  0:26 Michael Meissner
  0 siblings, 0 replies; 3+ messages in thread
From: Michael Meissner @ 2021-09-07  0:26 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:a411bb50a9960637500a029558b4b23bd98daa69

commit a411bb50a9960637500a029558b4b23bd98daa69
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Mon Sep 6 20:25:57 2021 -0400

    Generate XXSPLTI32DX on power10.
    
    This patch generates XXSPLTI32DX for SF/DF floating point constants that
    cannot be generated with the XXSPLTIDP instruction.  In addition, it adds
    support for using XXSPLTI32DX to load up V2DF constants, where both constants
    are the same.
    
    At the present time, XXSPLTI32DX is not enabled by default.
    
    2021-09-06  Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/constraint.md (eD): New constraint.
            * config/rs6000/predicates.md (easy_fp_constant): If the constant
            can be loaded with XXSPLTI32DX, it is easy.
            (xxsplti32dx_operand): New predicate.
            (easy_vector_constant): If the constant can be loaded with
            XXSPLTI32DX, it is easy.
            * config/rs6000/rs6000-protos.h (xxsplti32dx_constant_p): New
            declaration.
            * config/rs6000/rs6000.c (xxsplti32dx_constant_float_p): New
            helper function.
            (xxsplti32dx_constant_p): New function.
            (output_vec_const_move): If the operand can be loaded with
            XXSPLTI32DX, split it.
            (prefixed_permute_p): Constants loaded with XXSPLTI32DX are
            prefixed.
            * config/rs6000/rs6000.md (movsf_hardfloat): Add support for
            constants loaded with XXSPLTI32DX.
            (mov<mode>_hardfloat32, FMOVE64 iterator):  Add support for
            constants loaded with XXSPLTI32DX.
            (mov<mode>_hardfloat64, FMOVE64 iterator):  Add support for
            constants loaded with XXSPLTI32DX.
            * config/rs6000/rs6000.opt (-mxxsplti32dx): New option.
            * config/rs6000/vsx.md (UNSPEC_XXSPLTI32DX_CONST): New unspec.
            (XXSPLTI32DX): New mode iterator.
            (xxsplti32dx_<mode>): New insn and splitter for XXSPLTI32DX.
            (xxsplti32dx_<mode>_first): New insn.
            (xxsplti32dx_<mode>_second): New insn.
    
    gcc/testsuite/
    
            * gcc.target/powerpc/pr86731-fwrapv-longlong.c: Update insn
            count.
            * gcc.target/powerpc/vec-splat-constant-sf.c: Update insn count.
            Add -mxxsplti32dx option.
            * gcc.target/powerpc/vec-splat-constant-df.c: Likewise.
            * gcc.target/powerpc/vec-splat-constant-v2df.c: Likewise.

Diff:
---
 gcc/config/rs6000/constraints.md                   |   6 +
 gcc/config/rs6000/predicates.md                    |  19 ++++
 gcc/config/rs6000/rs6000-protos.h                  |   2 +
 gcc/config/rs6000/rs6000.c                         | 122 ++++++++++++++++++++-
 gcc/config/rs6000/rs6000.md                        |  61 ++++++++---
 gcc/config/rs6000/rs6000.opt                       |   5 +
 gcc/config/rs6000/vsx.md                           |  74 +++++++++++++
 .../gcc.target/powerpc/pr86731-fwrapv-longlong.c   |   5 +-
 .../gcc.target/powerpc/vec-splat-constant-df.c     |  11 +-
 .../gcc.target/powerpc/vec-splat-constant-sf.c     |   7 +-
 .../gcc.target/powerpc/vec-splat-constant-v2df.c   |  10 +-
 11 files changed, 293 insertions(+), 29 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 82fecca4a91..6317f928dc4 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -208,6 +208,12 @@
   (and (match_code "const_int")
        (match_test "((- (unsigned HOST_WIDE_INT) ival) + 0x8000) < 0x10000")))
 
+;; SF/DF/V2DF/DI/V2DI scalar or vector constant that can be loaded with a pair
+;; of XXSPLTI32DX instructions.
+(define_constraint "eD"
+  "A vector constant that can be loaded with XXSPLTI32DX instructions."
+  (match_operand 0 "xxsplti32dx_operand"))
+
 ;; SF/DF/V2DF scalar or vector constant that can be loaded with XXSPLTIDP
 (define_constraint "eF"
   "A vector constant that can be loaded with the XXSPLTIDP instruction."
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index b601f73600f..a5d51f4ad84 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -611,6 +611,11 @@
   if (lxvkq_operand (op, mode))
     return 1;
 
+  /* If we have the ISA 3.1 XXSPLTI32DX instruction, see if the constant can
+     be loaded with a pair of those instructions.  */
+  if (xxsplti32dx_operand (op, mode))
+    return 1;
+
   /* Otherwise consider floating point constants hard, so that the
      constant gets pushed to memory during the early RTL phases.  This
      has the advantage that double precision constants that can be
@@ -678,6 +683,17 @@
   return lxvkq_constant_p (op, mode, &immediate);
 })
 
+;; Return 1 if operand is a SF/DF CONST_DOUBLE or V2DF/V2DI CONST_VECTOR that
+;; can be loaded via a pair f ISA 3.1 XXSPLTI32DX instructions.  Do not return
+;; true if the value can be loaded with the XXSPLTIDP instruction or XXSPLTIB
+;; to load 0.
+(define_predicate "xxsplti32dx_operand"
+  (match_code "const_double,const_vector,vec_duplicate")
+{
+  HOST_WIDE_INT high = 0, low = 0;
+  return xxsplti32dx_constant_p (op, mode, &high, &low);
+})
+
 ;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a
 ;; vector register without using memory.
 (define_predicate "easy_vector_constant"
@@ -697,6 +713,9 @@
       if (xxspltidp_operand (op, mode))
 	return true;
 
+      if (xxsplti32dx_operand (op, mode))
+	return true;
+
       if (TARGET_P9_VECTOR
           && xxspltib_constant_p (op, mode, &num_insns, &value))
 	return true;
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 181d20d7e05..744b1ad73e6 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -34,6 +34,8 @@ extern int easy_altivec_constant (rtx, machine_mode);
 extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
 extern bool xxspltidp_constant_p (rtx, machine_mode, HOST_WIDE_INT *);
 extern bool xxspltiw_constant_p (rtx, machine_mode, HOST_WIDE_INT *);
+extern bool xxsplti32dx_constant_p (rtx, machine_mode, HOST_WIDE_INT *,
+				    HOST_WIDE_INT *);
 extern bool lxvkq_constant_p (rtx, machine_mode, int *);
 extern int vspltis_shifted (rtx);
 extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index ba33ec5bc60..7bbf29a3e1c 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -6926,6 +6926,119 @@ lxvkq_constant_p (rtx op,
   return false;
 }
 
+/* Return true if OP is a floating point constant that can be loaded with the
+   XXSPLTI32DX instruction.  If the constant can be loaded with the simpler
+   XXSPLTIDP (constants that can fit as SFmode constants) or XXSPLTIB (0.0)
+   instructions, return false.
+
+   Return the two 32-bit constants to use in the two XXSPLTI32DX instructions
+   via HIGH_PTR and LOW_PTR.  */
+
+static bool
+xxsplti32dx_constant_float_p (rtx op,
+			      machine_mode mode,
+			      HOST_WIDE_INT *high_ptr,
+			      HOST_WIDE_INT *low_ptr)
+{
+  HOST_WIDE_INT xxspltidp_value = 0;
+
+  if (!CONST_DOUBLE_P (op))
+    return false;
+
+  if (mode != SFmode && mode != DFmode)
+    return false;
+
+  if (op == CONST0_RTX (mode))
+    return false;
+
+  if (xxspltidp_constant_p (op, mode, &xxspltidp_value))
+    return false;
+
+  long high_low[2];
+  const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
+  REAL_VALUE_TO_TARGET_DOUBLE (*rv, high_low);
+
+  /* The double precision value is laid out in memory order.  We need to undo
+     this for XXSPLTI32DX.  */
+  if (!BYTES_BIG_ENDIAN)
+    std::swap (high_low[0], high_low[1]);
+
+  *high_ptr = high_low[0];
+  *low_ptr = high_low[1];
+  return true;
+}
+
+/* Return true if OP is of the given MODE and can be synthesized with ISA 3.1
+   XXSPLTI32DX instruction.  If the instruction can be synthesized with
+   XXSPLTIDP or is 0/-1, return false.
+
+   We handle the following types of constants:
+
+     1) vector double constants where each element is the same and you can't
+        load the constant with XXSPLTIDP;
+
+     2) vector long long constants where each element is the same;
+
+     3) Scalar floating point constants that can't be loaded with XXSPLTIDP.
+
+   Return the two 32-bit constants to use in the two XXSPLTI32DX instructions
+   via HIGH_PTR and LOW_PTR.  */
+
+bool
+xxsplti32dx_constant_p (rtx op,
+			machine_mode mode,
+			HOST_WIDE_INT *high_ptr,
+			HOST_WIDE_INT *low_ptr)
+{
+  *high_ptr = *low_ptr = 0;
+
+  if (!TARGET_PREFIXED || !TARGET_VSX || !TARGET_XXSPLTI32DX)
+    return false;
+
+  if (mode == VOIDmode)
+    mode = GET_MODE (op);
+
+  if (op == CONST0_RTX (mode))
+    return false;
+
+  switch (mode)
+    {
+    default:
+      break;
+
+    case E_V2DFmode:
+      {
+	if (!const_vector_all_elements_equal_p (op, mode))
+	  return false;
+
+	rtx ele = CONST_VECTOR_ELT (op, 0);
+	return xxsplti32dx_constant_float_p (ele, DFmode, high_ptr, low_ptr);
+      }
+
+    case E_SFmode:
+    case E_DFmode:
+      return xxsplti32dx_constant_float_p (op, mode, high_ptr, low_ptr);
+
+    case E_V2DImode:
+      {
+	if (!const_vector_all_elements_equal_p (op, mode))
+	  return false;
+
+	/* If we can generate XXSPLTIB and VEXTSB2D, don't return true.  */
+	rtx ele = CONST_VECTOR_ELT (op, 0);
+	HOST_WIDE_INT value = INTVAL (ele);
+	if (IN_RANGE (value, -128, 127))
+	  return false;
+
+	*high_ptr = (value >> 32) & 0xffffffff;
+	*low_ptr = value & 0xffffffff;
+	return true;
+      }
+    }
+
+  return false;
+}
+
 const char *
 output_vec_const_move (rtx *operands)
 {
@@ -6991,6 +7104,9 @@ output_vec_const_move (rtx *operands)
 	  return "lxvkq %x0,%2";
 	}
 
+      if (xxsplti32dx_operand (vec, mode))
+	return "#";
+
       if (TARGET_P9_VECTOR
 	  && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
 	{
@@ -26770,7 +26886,11 @@ prefixed_permute_p (rtx_insn *insn)
     case DFmode:
     case SFmode:
     case V2DFmode:
-      return xxspltidp_operand (src, mode);
+      return (xxspltidp_operand (src, mode)
+	      || xxsplti32dx_operand (src, mode));
+
+    case V2DImode:
+      return xxsplti32dx_operand (src, mode);
 
     default:
       break;
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index bf3bfed3b88..d6af66a1728 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7728,17 +7728,17 @@
 ;;
 ;;	LWZ          LFS        LXSSP       LXSSPX     STFS       STXSSP
 ;;	STXSSPX      STW        XXLXOR      LI         FMR        XSCPSGNDP
-;;	MR           MT<x>      MF<x>       NOP        XXSPLTIDP
+;;	MR           MT<x>      MF<x>       NOP        XXSPLTIDP  XXSPLTI32DX
 
 (define_insn "movsf_hardfloat"
   [(set (match_operand:SF 0 "nonimmediate_operand"
 	 "=!r,       f,         v,          wa,        m,         wY,
 	  Z,         m,         wa,         !r,        f,         wa,
-	  !r,        *c*l,      !r,         *h,        wa")
+	  !r,        *c*l,      !r,         *h,        wa,        wa")
 	(match_operand:SF 1 "input_operand"
 	 "m,         m,         wY,         Z,         f,         v,
 	  wa,        r,         j,          j,         f,         wa,
-	  r,         r,         *h,         0,         eF"))]
+	  r,         r,         *h,         0,         eF,        eD"))]
   "(register_operand (operands[0], SFmode)
    || register_operand (operands[1], SFmode))
    && TARGET_HARD_FLOAT
@@ -7761,15 +7761,24 @@
    mt%0 %1
    mf%1 %0
    nop
+   #
    #"
   [(set_attr "type"
 	"load,       fpload,    fpload,     fpload,    fpstore,   fpstore,
 	 fpstore,    store,     veclogical, integer,   fpsimple,  fpsimple,
-	 *,          mtjmpr,    mfjmpr,     *,         vecperm")
+	 *,          mtjmpr,    mfjmpr,     *,         vecperm,   vecperm")
    (set_attr "isa"
 	"*,          *,         p9v,        p8v,       *,         p9v,
 	 p8v,        *,         *,          *,         *,         *,
-	 *,          *,         *,          *,         p10")])
+	 *,          *,         *,          *,         p10,       p10")
+   (set_attr "max_prefixed_insns"
+        "*,          *,         *,          *,         *,         *,
+         *,          *,         *,          *,         *,         *,
+         *,          *,         *,          *,         *,         2")
+   (set_attr "num_insns"
+        "*,          *,         *,          *,         *,         *,
+         *,          *,         *,          *,         *,         *,
+         *,          *,         *,          *,         *,         2")])
 
 ;;	LWZ          LFIWZX     STW        STFIWX     MTVSRWZ    MFVSRWZ
 ;;	FMR          MR         MT%0       MF%1       NOP
@@ -8029,18 +8038,18 @@
 
 ;;           STFD         LFD         FMR         LXSD        STXSD
 ;;           LXSD         STXSD       XXLOR       XXLXOR      GPR<-0
-;;           LWZ          STW         MR          XXSPLTIDP
+;;           LWZ          STW         MR          XXSPLTIDP   XXSPLTI32DX
 
 
 (define_insn "*mov<mode>_hardfloat32"
   [(set (match_operand:FMOVE64 0 "nonimmediate_operand"
             "=m,          d,          d,          <f64_p9>,   wY,
               <f64_av>,   Z,          <f64_vsx>,  <f64_vsx>,  !r,
-              Y,          r,          !r,         wa")
+              Y,          r,          !r,         wa,         wa")
 	(match_operand:FMOVE64 1 "input_operand"
              "d,          m,          d,          wY,         <f64_p9>,
               Z,          <f64_av>,   <f64_vsx>,  <zero_fp>,  <zero_fp>,
-              r,          Y,          r,          eF"))]
+              r,          Y,          r,          eF,         eD"))]
   "! TARGET_POWERPC64 && TARGET_HARD_FLOAT
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode))"
@@ -8058,20 +8067,29 @@
    #
    #
    #
+   #
    #"
   [(set_attr "type"
             "fpstore,     fpload,     fpsimple,   fpload,     fpstore,
              fpload,      fpstore,    veclogical, veclogical, two,
-             store,       load,       two,        vecperm")
+             store,       load,       two,        vecperm,    vecperm")
    (set_attr "size" "64")
    (set_attr "length"
             "*,           *,          *,          *,          *,
              *,           *,          *,          *,          8,
-             8,           8,          8,          *")
+             8,           8,          8,          *,          *")
    (set_attr "isa"
             "*,           *,          *,          p9v,        p9v,
              p7v,         p7v,        *,          *,          *,
-             *,           *,          *,          p10")])
+             *,           *,          *,          p10,        p10")
+   (set_attr "max_prefixed_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          2")
+   (set_attr "num_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          2")])
 
 ;;           STW      LWZ     MR      G-const H-const F-const
 
@@ -8098,19 +8116,19 @@
 ;;           STFD         LFD         FMR         LXSD        STXSD
 ;;           LXSDX        STXSDX      XXLOR       XXLXOR      LI 0
 ;;           STD          LD          MR          MT{CTR,LR}  MF{CTR,LR}
-;;           NOP          MFVSRD      MTVSRD      XXSPLTIDP
+;;           NOP          MFVSRD      MTVSRD      XXSPLTIDP   XXSPLTI32DX
 
 (define_insn "*mov<mode>_hardfloat64"
   [(set (match_operand:FMOVE64 0 "nonimmediate_operand"
            "=m,           d,          d,          <f64_p9>,   wY,
              <f64_av>,    Z,          <f64_vsx>,  <f64_vsx>,  !r,
              YZ,          r,          !r,         *c*l,       !r,
-            *h,           r,          <f64_dm>,   wa")
+            *h,           r,          <f64_dm>,   wa,         wa")
 	(match_operand:FMOVE64 1 "input_operand"
             "d,           m,          d,          wY,         <f64_p9>,
              Z,           <f64_av>,   <f64_vsx>,  <zero_fp>,  <zero_fp>,
              r,           YZ,         r,          r,          *h,
-             0,           <f64_dm>,   r,          eF"))]
+             0,           <f64_dm>,   r,          eF,         eD"))]
   "TARGET_POWERPC64 && TARGET_HARD_FLOAT
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode))"
@@ -8133,18 +8151,29 @@
    nop
    mfvsrd %0,%x1
    mtvsrd %x0,%1
+   #
    #"
   [(set_attr "type"
             "fpstore,     fpload,     fpsimple,   fpload,     fpstore,
              fpload,      fpstore,    veclogical, veclogical, integer,
              store,       load,       *,          mtjmpr,     mfjmpr,
-             *,           mfvsr,      mtvsr,      vecperm")
+             *,           mfvsr,      mtvsr,      vecperm,    vecperm")
    (set_attr "size" "64")
    (set_attr "isa"
             "*,           *,          *,          p9v,        p9v,
              p7v,         p7v,        *,          *,          *,
              *,           *,          *,          *,          *,
-             *,           p8v,        p8v,        p10")])
+             *,           p8v,        p8v,        p10,        p10")
+   (set_attr "max_prefixed_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          2")
+   (set_attr "num_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          *")])
 
 ;;           STD      LD       MR      MT<SPR> MF<SPR> G-const
 ;;           H-const  F-const  Special
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 3a9011f98f9..77ece90c710 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -653,6 +653,11 @@ mxxspltiw
 Target Undocumented Var(TARGET_XXSPLTIW) Init(0) Save
 Generate (do not generate) XXSPLTIW instructions.
 
+; Do not enable this by default at the present time.
+mxxsplti32dx
+Target Undocumented Var(TARGET_XXSPLTI32DX) Init(0) Save
+Generate (do not generate) XXSPLTI32DX instructions.
+
 ; Do not enable this by default at the present time.
 mlxvkq
 Target Undocumented Var(TARGET_LXVKQ) Init(0) Save
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 4f716a9f2d2..9a99ea9331c 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -376,6 +376,7 @@
    UNSPEC_XXSPLTIW
    UNSPEC_XXSPLTIDP
    UNSPEC_XXSPLTI32DX
+   UNSPEC_XXSPLTI32DX_CONST
    UNSPEC_XXBLEND
    UNSPEC_XXPERMX
   ])
@@ -6620,6 +6621,79 @@
   [(set_attr "type" "vecperm")
    (set_attr "prefixed" "yes")])
 
+;; XXSPLTI32DX used to create 64-bit constants or vector constants where the
+;; even elements match and the odd elements match.
+(define_mode_iterator XXSPLTI32DX [SF DF V2DF V2DI])
+
+(define_insn_and_split "*xxsplti32dx_<mode>"
+  [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa")
+	(match_operand:XXSPLTI32DX 1 "xxsplti32dx_operand" "eD"))]
+  "TARGET_XXSPLTI32DX"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:XXSPLTI32DX [(match_dup 2)
+			     (match_dup 3)] UNSPEC_XXSPLTI32DX_CONST))
+   (set (match_dup 0)
+	(unspec:XXSPLTI32DX [(match_dup 0)
+			     (match_dup 4)
+			     (match_dup 5)] UNSPEC_XXSPLTI32DX_CONST))]
+{
+  HOST_WIDE_INT high = 0, low = 0;
+
+  if (!xxsplti32dx_constant_p (operands[1], <MODE>mode, &high, &low))
+    gcc_unreachable ();
+
+  /* If the low bits are 0 or all 1s, initialize that word first.  This way we
+     can use a smaller XXSPLTIB/XXLXOR/XXLORC instruction instead the first
+     XXSPLTI32DX.  */
+  if (low == 0 || low ==  -1)
+    {
+      operands[2] = const1_rtx;
+      operands[3] = GEN_INT (low);
+      operands[4] = const0_rtx;
+      operands[5] = GEN_INT (high);
+    }
+  else
+    {
+      operands[2] = const0_rtx;
+      operands[3] = GEN_INT (high);
+      operands[4] = const1_rtx;
+      operands[5] = GEN_INT (low);
+    }
+}
+  [(set_attr "type" "vecperm")
+   (set_attr "prefixed" "yes")
+   (set_attr "num_insns" "2")
+   (set_attr "max_prefixed_insns" "2")])
+
+;; First word of XXSPLTI32DX
+(define_insn "*xxsplti32dx_<mode>_first"
+  [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa,wa,wa")
+	(unspec:XXSPLTI32DX [(match_operand 1 "u1bit_cint_operand" "n,n,n")
+			     (match_operand 2 "const_int_operand" "O,wM,n")]
+			    UNSPEC_XXSPLTI32DX_CONST))]
+  "TARGET_XXSPLTI32DX"
+  "@
+   xxlxor %x0,%x0,%x0
+   xxlorc %x0,%x0,%x0
+   xxsplti32dx %x0,%1,%2"
+  [(set_attr "type" "veclogical,veclogical,vecperm")
+   (set_attr "prefixed" "*,*,yes")])
+
+;; Second word of XXSPLTI32DX
+(define_insn "*xxsplti32dx_<mode>_second"
+  [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa")
+	(unspec:XXSPLTI32DX [(match_operand:XXSPLTI32DX 1 "vsx_register_operand" "0")
+			     (match_operand 2 "u1bit_cint_operand" "n")
+			     (match_operand 3 "const_int_operand" "n")]
+			    UNSPEC_XXSPLTI32DX_CONST))]
+  "TARGET_XXSPLTI32DX"
+  "xxsplti32dx %x0,%2,%3"
+  [(set_attr "type" "vecperm")
+   (set_attr "prefixed" "yes")])
+
+
 ;; XXBLEND built-in function support
 (define_insn "xxblend_<mode>"
   [(set (match_operand:VM3 0 "register_operand" "=wa")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c b/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c
index bd1502bb30a..7fb6ec978a6 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c
@@ -26,9 +26,10 @@ vector signed long long splats4(void)
 
 /* Codegen will consist of splat and shift instructions for most types.
    If folding is enabled, the vec_sl tests using vector long long type will
-   generate a lvx instead of a vspltisw+vsld pair.  */
+   generate a lvx instead of a vspltisw+vsld pair.  On power10, it may
+   generate an xxsplti32dx instead of a load.  */
 
 /* { dg-final { scan-assembler-times {\mvspltis[bhw]\M} 0 } } */
 /* { dg-final { scan-assembler-times {\mvsl[bhwd]\M} 0 } } */
-/* { dg-final { scan-assembler-times {\mp?lxv\M|\mlxv\M|\mlxvd2x\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mp?lxv\M|\mlxv\M|\mlxvd2x\M|\mxxsplti32dx\M} 2 } } */
 
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c
index 8f6e176f9af..daa9b584845 100644
--- a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target power10_ok } */
-/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+/* { dg-options "-mdejagnu-cpu=power10 -mxxsplti32dx -O2" } */
 
 #include <math.h>
 
@@ -48,13 +48,16 @@ scalar_double_m_inf (void)	/* XXSPLTIDP.  */
 double
 scalar_double_pi (void)
 {
-  return M_PI;			/* PLFD.  */
+  return M_PI;			/* 2x XXSPLTI32DX.  */
 }
 
 double
 scalar_double_denorm (void)
 {
-  return 0x1p-149f;		/* PLFD.  */
+  return 0x1p-149f;		/* XXLXOR, XXSPLTI32DX.  */
 }
 
-/* { dg-final { scan-assembler-times {\mxxspltidp\M} 5 } } */
+/* { dg-final { scan-assembler-times {\mxxspltidp\M}   5 } } */
+/* { dg-final { scan-assembler-times {\mxxsplti32dx\M} 3 } } */
+/* { dg-final { scan-assembler-not   {\mplfd\M}          } } */
+/* { dg-final { scan-assembler-not   {\mplxsd\M}         } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c
index 72504bdfbbd..74b2466f74c 100644
--- a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target power10_ok } */
-/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+/* { dg-options "-mdejagnu-cpu=power10 -mxxsplti32dx -O2" } */
 
 #include <math.h>
 
@@ -57,4 +57,7 @@ scalar_float_denorm (void)
   return 0x1p-149f;		/* PLFS.  */
 }
 
-/* { dg-final { scan-assembler-times {\mxxspltidp\M} 6 } } */
+/* { dg-final { scan-assembler-times {\mxxspltidp\M}   6 } } */
+/* { dg-final { scan-assembler-times {\mxxsplti32dx\M} 1 } } */
+/* { dg-final { scan-assembler-not   {\mplfs\M}          } } */
+/* { dg-final { scan-assembler-not   {\mplxssp\M}        } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c
index 82ffc86f8aa..f438c3787a0 100644
--- a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target power10_ok } */
-/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+/* { dg-options "-mdejagnu-cpu=power10 -mxxsplti32dx -O2" } */
 
 #include <math.h>
 
@@ -51,14 +51,16 @@ v2df_double_m_inf (void)
 vector double
 v2df_double_pi (void)
 {
-  return (vector double) { M_PI, M_PI };		/* PLVX.  */
+  return (vector double) { M_PI, M_PI };		/* 2x XXSPLTI32DX.  */
 }
 
 vector double
 v2df_double_denorm (void)
 {
   return (vector double) { (double)0x1p-149f,
-			   (double)0x1p-149f };		/* PLVX.  */
+			   (double)0x1p-149f };	/* XXLXOR, XXSPLTI32DX.  */
 }
 
-/* { dg-final { scan-assembler-times {\mxxspltidp\M} 5 } } */
+/* { dg-final { scan-assembler-times {\mxxspltidp\M}   5 } } */
+/* { dg-final { scan-assembler-times {\mxxsplti32dx\M} 3 } } */
+/* { dg-final { scan-assembler-not   {\mplxv\M}          } } */


^ permalink raw reply	[flat|nested] 3+ messages in thread

* [gcc(refs/users/meissner/heads/work067)] Generate XXSPLTI32DX on power10.
@ 2021-09-02 23:34 Michael Meissner
  0 siblings, 0 replies; 3+ messages in thread
From: Michael Meissner @ 2021-09-02 23:34 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:8722f501aa5e7aa95ae78ee9dfe5308955708a8f

commit 8722f501aa5e7aa95ae78ee9dfe5308955708a8f
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Thu Sep 2 19:34:02 2021 -0400

    Generate XXSPLTI32DX on power10.
    
    This patch generates XXSPLTI32DX for SF/DF floating point constants that
    cannot be generated with the XXSPLTIDP instruction.  In addition, it adds
    support for using XXSPLTI32DX to load up V2DF constants, where both constants
    are the same.
    
    At the present time, XXSPLTI32DX is not enabled by default.
    
    2021-09-02  Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
            * config/rs6000/constraint.md (eD): New constraint.
            * config/rs6000/predicates.md (easy_fp_constant): If the constant
            can be loaded with XXSPLTI32DX, it is easy.
            (xxsplti32dx_operand): New predicate.
            (easy_vector_constant): If the constant can be loaded with
            XXSPLTI32DX, it is easy.
            * config/rs6000/rs6000-protos.h (xxsplti32dx_constant_p): New
            declaration.
            * config/rs6000/rs6000.c (xxsplti32dx_constant_float_p): New
            helper function.
            (xxsplti32dx_constant_p): New function.
            (output_vec_const_move): If the operand can be loaded with
            XXSPLTI32DX, split it.
            (prefixed_permute_p): Constants loaded with XXSPLTI32DX are
            prefixed.
            * config/rs6000/rs6000.md (movsf_hardfloat): Add support for
            constants loaded with XXSPLTI32DX.
            (mov<mode>_hardfloat32, FMOVE64 iterator):  Add support for
            constants loaded with XXSPLTI32DX.
            (mov<mode>_hardfloat64, FMOVE64 iterator):  Add support for
            constants loaded with XXSPLTI32DX.
            * config/rs6000/rs6000.opt (-mxxsplti32dx): New option.
            * config/rs6000/vsx.md (UNSPEC_XXSPLTI32DX_CONST): New unspec.
            (XXSPLTI32DX): New mode iterator.
            (xxsplti32dx_<mode>): New insn and splitter for XXSPLTI32DX.
            (xxsplti32dx_<mode>_first): New insn.
            (xxsplti32dx_<mode>_second): New insn.
    
    gcc/testsuite/
            * gcc.target/powerpc/pr86731-fwrapv-longlong.c: Update insn
            count.
            * gcc.target/powerpc/vec-splat-constant-sf.c: Update insn count.
            Add -mxxsplti32dx option.
            * gcc.target/powerpc/vec-splat-constant-df.c: Likewise.
            * gcc.target/powerpc/vec-splat-constant-v2df.c: Likewise.

Diff:
---
 gcc/config/rs6000/constraints.md                   |   6 +
 gcc/config/rs6000/predicates.md                    |  19 ++++
 gcc/config/rs6000/rs6000-protos.h                  |   2 +
 gcc/config/rs6000/rs6000.c                         | 122 ++++++++++++++++++++-
 gcc/config/rs6000/rs6000.md                        |  61 ++++++++---
 gcc/config/rs6000/rs6000.opt                       |   5 +
 gcc/config/rs6000/vsx.md                           |  74 +++++++++++++
 .../gcc.target/powerpc/pr86731-fwrapv-longlong.c   |   5 +-
 .../gcc.target/powerpc/vec-splat-constant-df.c     |  11 +-
 .../gcc.target/powerpc/vec-splat-constant-sf.c     |   7 +-
 .../gcc.target/powerpc/vec-splat-constant-v2df.c   |  10 +-
 11 files changed, 293 insertions(+), 29 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 82fecca4a91..6317f928dc4 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -208,6 +208,12 @@
   (and (match_code "const_int")
        (match_test "((- (unsigned HOST_WIDE_INT) ival) + 0x8000) < 0x10000")))
 
+;; SF/DF/V2DF/DI/V2DI scalar or vector constant that can be loaded with a pair
+;; of XXSPLTI32DX instructions.
+(define_constraint "eD"
+  "A vector constant that can be loaded with XXSPLTI32DX instructions."
+  (match_operand 0 "xxsplti32dx_operand"))
+
 ;; SF/DF/V2DF scalar or vector constant that can be loaded with XXSPLTIDP
 (define_constraint "eF"
   "A vector constant that can be loaded with the XXSPLTIDP instruction."
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index b601f73600f..a5d51f4ad84 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -611,6 +611,11 @@
   if (lxvkq_operand (op, mode))
     return 1;
 
+  /* If we have the ISA 3.1 XXSPLTI32DX instruction, see if the constant can
+     be loaded with a pair of those instructions.  */
+  if (xxsplti32dx_operand (op, mode))
+    return 1;
+
   /* Otherwise consider floating point constants hard, so that the
      constant gets pushed to memory during the early RTL phases.  This
      has the advantage that double precision constants that can be
@@ -678,6 +683,17 @@
   return lxvkq_constant_p (op, mode, &immediate);
 })
 
+;; Return 1 if operand is a SF/DF CONST_DOUBLE or V2DF/V2DI CONST_VECTOR that
+;; can be loaded via a pair f ISA 3.1 XXSPLTI32DX instructions.  Do not return
+;; true if the value can be loaded with the XXSPLTIDP instruction or XXSPLTIB
+;; to load 0.
+(define_predicate "xxsplti32dx_operand"
+  (match_code "const_double,const_vector,vec_duplicate")
+{
+  HOST_WIDE_INT high = 0, low = 0;
+  return xxsplti32dx_constant_p (op, mode, &high, &low);
+})
+
 ;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a
 ;; vector register without using memory.
 (define_predicate "easy_vector_constant"
@@ -697,6 +713,9 @@
       if (xxspltidp_operand (op, mode))
 	return true;
 
+      if (xxsplti32dx_operand (op, mode))
+	return true;
+
       if (TARGET_P9_VECTOR
           && xxspltib_constant_p (op, mode, &num_insns, &value))
 	return true;
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 181d20d7e05..744b1ad73e6 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -34,6 +34,8 @@ extern int easy_altivec_constant (rtx, machine_mode);
 extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
 extern bool xxspltidp_constant_p (rtx, machine_mode, HOST_WIDE_INT *);
 extern bool xxspltiw_constant_p (rtx, machine_mode, HOST_WIDE_INT *);
+extern bool xxsplti32dx_constant_p (rtx, machine_mode, HOST_WIDE_INT *,
+				    HOST_WIDE_INT *);
 extern bool lxvkq_constant_p (rtx, machine_mode, int *);
 extern int vspltis_shifted (rtx);
 extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 2e768a8cbf0..b9ebd56c993 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -6926,6 +6926,119 @@ lxvkq_constant_p (rtx op,
   return false;
 }
 
+/* Return true if OP is a floating point constant that can be loaded with the
+   XXSPLTI32DX instruction.  If the constant can be loaded with the simpler
+   XXSPLTIDP (constants that can fit as SFmode constants) or XXSPLTIB (0.0)
+   instructions, return false.
+
+   Return the two 32-bit constants to use in the two XXSPLTI32DX instructions
+   via HIGH_PTR and LOW_PTR.  */
+
+static bool
+xxsplti32dx_constant_float_p (rtx op,
+			      machine_mode mode,
+			      HOST_WIDE_INT *high_ptr,
+			      HOST_WIDE_INT *low_ptr)
+{
+  HOST_WIDE_INT xxspltidp_value = 0;
+
+  if (!CONST_DOUBLE_P (op))
+    return false;
+
+  if (mode != SFmode && mode != DFmode)
+    return false;
+
+  if (op == CONST0_RTX (mode))
+    return false;
+
+  if (xxspltidp_constant_p (op, mode, &xxspltidp_value))
+    return false;
+
+  long high_low[2];
+  const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
+  REAL_VALUE_TO_TARGET_DOUBLE (*rv, high_low);
+
+  /* The double precision value is laid out in memory order.  We need to undo
+     this for XXSPLTI32DX.  */
+  if (!BYTES_BIG_ENDIAN)
+    std::swap (high_low[0], high_low[1]);
+
+  *high_ptr = high_low[0];
+  *low_ptr = high_low[1];
+  return true;
+}
+
+/* Return true if OP is of the given MODE and can be synthesized with ISA 3.1
+   XXSPLTI32DX instruction.  If the instruction can be synthesized with
+   XXSPLTIDP or is 0/-1, return false.
+
+   We handle the following types of constants:
+
+     1) vector double constants where each element is the same and you can't
+        load the constant with XXSPLTIDP;
+
+     2) vector long long constants where each element is the same;
+
+     3) Scalar floating point constants that can't be loaded with XXSPLTIDP.
+
+   Return the two 32-bit constants to use in the two XXSPLTI32DX instructions
+   via HIGH_PTR and LOW_PTR.  */
+
+bool
+xxsplti32dx_constant_p (rtx op,
+			machine_mode mode,
+			HOST_WIDE_INT *high_ptr,
+			HOST_WIDE_INT *low_ptr)
+{
+  *high_ptr = *low_ptr = 0;
+
+  if (!TARGET_PREFIXED || !TARGET_VSX || !TARGET_XXSPLTI32DX)
+    return false;
+
+  if (mode == VOIDmode)
+    mode = GET_MODE (op);
+
+  if (op == CONST0_RTX (mode))
+    return false;
+
+  switch (mode)
+    {
+    default:
+      break;
+
+    case E_V2DFmode:
+      {
+	if (!const_vector_all_elements_equal_p (op, mode))
+	  return false;
+
+	rtx ele = CONST_VECTOR_ELT (op, 0);
+	return xxsplti32dx_constant_float_p (ele, DFmode, high_ptr, low_ptr);
+      }
+
+    case E_SFmode:
+    case E_DFmode:
+      return xxsplti32dx_constant_float_p (op, mode, high_ptr, low_ptr);
+
+    case E_V2DImode:
+      {
+	if (!const_vector_all_elements_equal_p (op, mode))
+	  return false;
+
+	/* If we can generate XXSPLTIB and VEXTSB2D, don't return true.  */
+	rtx ele = CONST_VECTOR_ELT (op, 0);
+	HOST_WIDE_INT value = INTVAL (ele);
+	if (IN_RANGE (value, -128, 127))
+	  return false;
+
+	*high_ptr = (value >> 32) & 0xffffffff;
+	*low_ptr = value & 0xffffffff;
+	return true;
+      }
+    }
+
+  return false;
+}
+
 const char *
 output_vec_const_move (rtx *operands)
 {
@@ -6991,6 +7104,9 @@ output_vec_const_move (rtx *operands)
 	  return "lxvkq %x0,%2";
 	}
 
+      if (xxsplti32dx_operand (vec, mode))
+	return "#";
+
       if (TARGET_P9_VECTOR
 	  && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
 	{
@@ -26760,7 +26876,11 @@ prefixed_permute_p (rtx_insn *insn)
     case DFmode:
     case SFmode:
     case V2DFmode:
-      return xxspltidp_operand (src, mode);
+      return (xxspltidp_operand (src, mode)
+	      || xxsplti32dx_operand (src, mode));
+
+    case V2DImode:
+      return xxsplti32dx_operand (src, mode);
 
     default:
       break;
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index bf3bfed3b88..d6af66a1728 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -7728,17 +7728,17 @@
 ;;
 ;;	LWZ          LFS        LXSSP       LXSSPX     STFS       STXSSP
 ;;	STXSSPX      STW        XXLXOR      LI         FMR        XSCPSGNDP
-;;	MR           MT<x>      MF<x>       NOP        XXSPLTIDP
+;;	MR           MT<x>      MF<x>       NOP        XXSPLTIDP  XXSPLTI32DX
 
 (define_insn "movsf_hardfloat"
   [(set (match_operand:SF 0 "nonimmediate_operand"
 	 "=!r,       f,         v,          wa,        m,         wY,
 	  Z,         m,         wa,         !r,        f,         wa,
-	  !r,        *c*l,      !r,         *h,        wa")
+	  !r,        *c*l,      !r,         *h,        wa,        wa")
 	(match_operand:SF 1 "input_operand"
 	 "m,         m,         wY,         Z,         f,         v,
 	  wa,        r,         j,          j,         f,         wa,
-	  r,         r,         *h,         0,         eF"))]
+	  r,         r,         *h,         0,         eF,        eD"))]
   "(register_operand (operands[0], SFmode)
    || register_operand (operands[1], SFmode))
    && TARGET_HARD_FLOAT
@@ -7761,15 +7761,24 @@
    mt%0 %1
    mf%1 %0
    nop
+   #
    #"
   [(set_attr "type"
 	"load,       fpload,    fpload,     fpload,    fpstore,   fpstore,
 	 fpstore,    store,     veclogical, integer,   fpsimple,  fpsimple,
-	 *,          mtjmpr,    mfjmpr,     *,         vecperm")
+	 *,          mtjmpr,    mfjmpr,     *,         vecperm,   vecperm")
    (set_attr "isa"
 	"*,          *,         p9v,        p8v,       *,         p9v,
 	 p8v,        *,         *,          *,         *,         *,
-	 *,          *,         *,          *,         p10")])
+	 *,          *,         *,          *,         p10,       p10")
+   (set_attr "max_prefixed_insns"
+        "*,          *,         *,          *,         *,         *,
+         *,          *,         *,          *,         *,         *,
+         *,          *,         *,          *,         *,         2")
+   (set_attr "num_insns"
+        "*,          *,         *,          *,         *,         *,
+         *,          *,         *,          *,         *,         *,
+         *,          *,         *,          *,         *,         2")])
 
 ;;	LWZ          LFIWZX     STW        STFIWX     MTVSRWZ    MFVSRWZ
 ;;	FMR          MR         MT%0       MF%1       NOP
@@ -8029,18 +8038,18 @@
 
 ;;           STFD         LFD         FMR         LXSD        STXSD
 ;;           LXSD         STXSD       XXLOR       XXLXOR      GPR<-0
-;;           LWZ          STW         MR          XXSPLTIDP
+;;           LWZ          STW         MR          XXSPLTIDP   XXSPLTI32DX
 
 
 (define_insn "*mov<mode>_hardfloat32"
   [(set (match_operand:FMOVE64 0 "nonimmediate_operand"
             "=m,          d,          d,          <f64_p9>,   wY,
               <f64_av>,   Z,          <f64_vsx>,  <f64_vsx>,  !r,
-              Y,          r,          !r,         wa")
+              Y,          r,          !r,         wa,         wa")
 	(match_operand:FMOVE64 1 "input_operand"
              "d,          m,          d,          wY,         <f64_p9>,
               Z,          <f64_av>,   <f64_vsx>,  <zero_fp>,  <zero_fp>,
-              r,          Y,          r,          eF"))]
+              r,          Y,          r,          eF,         eD"))]
   "! TARGET_POWERPC64 && TARGET_HARD_FLOAT
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode))"
@@ -8058,20 +8067,29 @@
    #
    #
    #
+   #
    #"
   [(set_attr "type"
             "fpstore,     fpload,     fpsimple,   fpload,     fpstore,
              fpload,      fpstore,    veclogical, veclogical, two,
-             store,       load,       two,        vecperm")
+             store,       load,       two,        vecperm,    vecperm")
    (set_attr "size" "64")
    (set_attr "length"
             "*,           *,          *,          *,          *,
              *,           *,          *,          *,          8,
-             8,           8,          8,          *")
+             8,           8,          8,          *,          *")
    (set_attr "isa"
             "*,           *,          *,          p9v,        p9v,
              p7v,         p7v,        *,          *,          *,
-             *,           *,          *,          p10")])
+             *,           *,          *,          p10,        p10")
+   (set_attr "max_prefixed_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          2")
+   (set_attr "num_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          2")])
 
 ;;           STW      LWZ     MR      G-const H-const F-const
 
@@ -8098,19 +8116,19 @@
 ;;           STFD         LFD         FMR         LXSD        STXSD
 ;;           LXSDX        STXSDX      XXLOR       XXLXOR      LI 0
 ;;           STD          LD          MR          MT{CTR,LR}  MF{CTR,LR}
-;;           NOP          MFVSRD      MTVSRD      XXSPLTIDP
+;;           NOP          MFVSRD      MTVSRD      XXSPLTIDP   XXSPLTI32DX
 
 (define_insn "*mov<mode>_hardfloat64"
   [(set (match_operand:FMOVE64 0 "nonimmediate_operand"
            "=m,           d,          d,          <f64_p9>,   wY,
              <f64_av>,    Z,          <f64_vsx>,  <f64_vsx>,  !r,
              YZ,          r,          !r,         *c*l,       !r,
-            *h,           r,          <f64_dm>,   wa")
+            *h,           r,          <f64_dm>,   wa,         wa")
 	(match_operand:FMOVE64 1 "input_operand"
             "d,           m,          d,          wY,         <f64_p9>,
              Z,           <f64_av>,   <f64_vsx>,  <zero_fp>,  <zero_fp>,
              r,           YZ,         r,          r,          *h,
-             0,           <f64_dm>,   r,          eF"))]
+             0,           <f64_dm>,   r,          eF,         eD"))]
   "TARGET_POWERPC64 && TARGET_HARD_FLOAT
    && (gpc_reg_operand (operands[0], <MODE>mode)
        || gpc_reg_operand (operands[1], <MODE>mode))"
@@ -8133,18 +8151,29 @@
    nop
    mfvsrd %0,%x1
    mtvsrd %x0,%1
+   #
    #"
   [(set_attr "type"
             "fpstore,     fpload,     fpsimple,   fpload,     fpstore,
              fpload,      fpstore,    veclogical, veclogical, integer,
              store,       load,       *,          mtjmpr,     mfjmpr,
-             *,           mfvsr,      mtvsr,      vecperm")
+             *,           mfvsr,      mtvsr,      vecperm,    vecperm")
    (set_attr "size" "64")
    (set_attr "isa"
             "*,           *,          *,          p9v,        p9v,
              p7v,         p7v,        *,          *,          *,
              *,           *,          *,          *,          *,
-             *,           p8v,        p8v,        p10")])
+             *,           p8v,        p8v,        p10,        p10")
+   (set_attr "max_prefixed_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          2")
+   (set_attr "num_insns"
+            "*,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          *,
+             *,           *,          *,          *,          *")])
 
 ;;           STD      LD       MR      MT<SPR> MF<SPR> G-const
 ;;           H-const  F-const  Special
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 3a9011f98f9..77ece90c710 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -653,6 +653,11 @@ mxxspltiw
 Target Undocumented Var(TARGET_XXSPLTIW) Init(0) Save
 Generate (do not generate) XXSPLTIW instructions.
 
+; Do not enable this by default at the present time.
+mxxsplti32dx
+Target Undocumented Var(TARGET_XXSPLTI32DX) Init(0) Save
+Generate (do not generate) XXSPLTI32DX instructions.
+
 ; Do not enable this by default at the present time.
 mlxvkq
 Target Undocumented Var(TARGET_LXVKQ) Init(0) Save
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 4f716a9f2d2..9a99ea9331c 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -376,6 +376,7 @@
    UNSPEC_XXSPLTIW
    UNSPEC_XXSPLTIDP
    UNSPEC_XXSPLTI32DX
+   UNSPEC_XXSPLTI32DX_CONST
    UNSPEC_XXBLEND
    UNSPEC_XXPERMX
   ])
@@ -6620,6 +6621,79 @@
   [(set_attr "type" "vecperm")
    (set_attr "prefixed" "yes")])
 
+;; XXSPLTI32DX used to create 64-bit constants or vector constants where the
+;; even elements match and the odd elements match.
+(define_mode_iterator XXSPLTI32DX [SF DF V2DF V2DI])
+
+(define_insn_and_split "*xxsplti32dx_<mode>"
+  [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa")
+	(match_operand:XXSPLTI32DX 1 "xxsplti32dx_operand" "eD"))]
+  "TARGET_XXSPLTI32DX"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:XXSPLTI32DX [(match_dup 2)
+			     (match_dup 3)] UNSPEC_XXSPLTI32DX_CONST))
+   (set (match_dup 0)
+	(unspec:XXSPLTI32DX [(match_dup 0)
+			     (match_dup 4)
+			     (match_dup 5)] UNSPEC_XXSPLTI32DX_CONST))]
+{
+  HOST_WIDE_INT high = 0, low = 0;
+
+  if (!xxsplti32dx_constant_p (operands[1], <MODE>mode, &high, &low))
+    gcc_unreachable ();
+
+  /* If the low bits are 0 or all 1s, initialize that word first.  This way we
+     can use a smaller XXSPLTIB/XXLXOR/XXLORC instruction instead the first
+     XXSPLTI32DX.  */
+  if (low == 0 || low ==  -1)
+    {
+      operands[2] = const1_rtx;
+      operands[3] = GEN_INT (low);
+      operands[4] = const0_rtx;
+      operands[5] = GEN_INT (high);
+    }
+  else
+    {
+      operands[2] = const0_rtx;
+      operands[3] = GEN_INT (high);
+      operands[4] = const1_rtx;
+      operands[5] = GEN_INT (low);
+    }
+}
+  [(set_attr "type" "vecperm")
+   (set_attr "prefixed" "yes")
+   (set_attr "num_insns" "2")
+   (set_attr "max_prefixed_insns" "2")])
+
+;; First word of XXSPLTI32DX
+(define_insn "*xxsplti32dx_<mode>_first"
+  [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa,wa,wa")
+	(unspec:XXSPLTI32DX [(match_operand 1 "u1bit_cint_operand" "n,n,n")
+			     (match_operand 2 "const_int_operand" "O,wM,n")]
+			    UNSPEC_XXSPLTI32DX_CONST))]
+  "TARGET_XXSPLTI32DX"
+  "@
+   xxlxor %x0,%x0,%x0
+   xxlorc %x0,%x0,%x0
+   xxsplti32dx %x0,%1,%2"
+  [(set_attr "type" "veclogical,veclogical,vecperm")
+   (set_attr "prefixed" "*,*,yes")])
+
+;; Second word of XXSPLTI32DX
+(define_insn "*xxsplti32dx_<mode>_second"
+  [(set (match_operand:XXSPLTI32DX 0 "vsx_register_operand" "=wa")
+	(unspec:XXSPLTI32DX [(match_operand:XXSPLTI32DX 1 "vsx_register_operand" "0")
+			     (match_operand 2 "u1bit_cint_operand" "n")
+			     (match_operand 3 "const_int_operand" "n")]
+			    UNSPEC_XXSPLTI32DX_CONST))]
+  "TARGET_XXSPLTI32DX"
+  "xxsplti32dx %x0,%2,%3"
+  [(set_attr "type" "vecperm")
+   (set_attr "prefixed" "yes")])
+
+
 ;; XXBLEND built-in function support
 (define_insn "xxblend_<mode>"
   [(set (match_operand:VM3 0 "register_operand" "=wa")
diff --git a/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c b/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c
index bd1502bb30a..7fb6ec978a6 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr86731-fwrapv-longlong.c
@@ -26,9 +26,10 @@ vector signed long long splats4(void)
 
 /* Codegen will consist of splat and shift instructions for most types.
    If folding is enabled, the vec_sl tests using vector long long type will
-   generate a lvx instead of a vspltisw+vsld pair.  */
+   generate a lvx instead of a vspltisw+vsld pair.  On power10, it may
+   generate an xxsplti32dx instead of a load.  */
 
 /* { dg-final { scan-assembler-times {\mvspltis[bhw]\M} 0 } } */
 /* { dg-final { scan-assembler-times {\mvsl[bhwd]\M} 0 } } */
-/* { dg-final { scan-assembler-times {\mp?lxv\M|\mlxv\M|\mlxvd2x\M} 2 } } */
+/* { dg-final { scan-assembler-times {\mp?lxv\M|\mlxv\M|\mlxvd2x\M|\mxxsplti32dx\M} 2 } } */
 
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c
index 8f6e176f9af..27ded2b6d1d 100644
--- a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-df.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target power10_ok } */
-/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+/* { dg-options "-mdejagnu-cpu=power10 -msplti32dx -O2" } */
 
 #include <math.h>
 
@@ -48,13 +48,16 @@ scalar_double_m_inf (void)	/* XXSPLTIDP.  */
 double
 scalar_double_pi (void)
 {
-  return M_PI;			/* PLFD.  */
+  return M_PI;			/* 2x XXSPLTI32DX.  */
 }
 
 double
 scalar_double_denorm (void)
 {
-  return 0x1p-149f;		/* PLFD.  */
+  return 0x1p-149f;		/* XXLXOR, XXSPLTI32DX.  */
 }
 
-/* { dg-final { scan-assembler-times {\mxxspltidp\M} 5 } } */
+/* { dg-final { scan-assembler-times {\mxxspltidp\M}   5 } } */
+/* { dg-final { scan-assembler-times {\mxxsplti32dx\M} 3 } } */
+/* { dg-final { scan-assembler-not   {\mplfd\M}          } } */
+/* { dg-final { scan-assembler-not   {\mplxsd\M}         } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c
index 72504bdfbbd..74b2466f74c 100644
--- a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-sf.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target power10_ok } */
-/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+/* { dg-options "-mdejagnu-cpu=power10 -mxxsplti32dx -O2" } */
 
 #include <math.h>
 
@@ -57,4 +57,7 @@ scalar_float_denorm (void)
   return 0x1p-149f;		/* PLFS.  */
 }
 
-/* { dg-final { scan-assembler-times {\mxxspltidp\M} 6 } } */
+/* { dg-final { scan-assembler-times {\mxxspltidp\M}   6 } } */
+/* { dg-final { scan-assembler-times {\mxxsplti32dx\M} 1 } } */
+/* { dg-final { scan-assembler-not   {\mplfs\M}          } } */
+/* { dg-final { scan-assembler-not   {\mplxssp\M}        } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c
index 82ffc86f8aa..f438c3787a0 100644
--- a/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c
+++ b/gcc/testsuite/gcc.target/powerpc/vec-splat-constant-v2df.c
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-require-effective-target power10_ok } */
-/* { dg-options "-mdejagnu-cpu=power10 -O2" } */
+/* { dg-options "-mdejagnu-cpu=power10 -mxxsplti32dx -O2" } */
 
 #include <math.h>
 
@@ -51,14 +51,16 @@ v2df_double_m_inf (void)
 vector double
 v2df_double_pi (void)
 {
-  return (vector double) { M_PI, M_PI };		/* PLVX.  */
+  return (vector double) { M_PI, M_PI };		/* 2x XXSPLTI32DX.  */
 }
 
 vector double
 v2df_double_denorm (void)
 {
   return (vector double) { (double)0x1p-149f,
-			   (double)0x1p-149f };		/* PLVX.  */
+			   (double)0x1p-149f };	/* XXLXOR, XXSPLTI32DX.  */
 }
 
-/* { dg-final { scan-assembler-times {\mxxspltidp\M} 5 } } */
+/* { dg-final { scan-assembler-times {\mxxspltidp\M}   5 } } */
+/* { dg-final { scan-assembler-times {\mxxsplti32dx\M} 3 } } */
+/* { dg-final { scan-assembler-not   {\mplxv\M}          } } */


^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2021-09-07  2:15 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-09-07  2:15 [gcc(refs/users/meissner/heads/work067)] Generate XXSPLTI32DX on power10 Michael Meissner
  -- strict thread matches above, loose matches on Subject: below --
2021-09-07  0:26 Michael Meissner
2021-09-02 23:34 Michael Meissner

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).