From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <meissner@sourceware.org>
Received: by sourceware.org (Postfix, from userid 1005)
 id E80743858402; Mon,  4 Oct 2021 21:16:45 +0000 (GMT)
DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org E80743858402
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
From: Michael Meissner <meissner@gcc.gnu.org>
To: gcc-cvs@gcc.gnu.org
Subject: [gcc(refs/users/meissner/heads/work070)] Add LXVKQ support.
X-Act-Checkin: gcc
X-Git-Author: Michael Meissner <meissner@linux.ibm.com>
X-Git-Refname: refs/users/meissner/heads/work070
X-Git-Oldrev: f62b92fd93d8124fe9773cd3202de6bbf117d656
X-Git-Newrev: 461a8404822977cad2150489fbfac172b75ad1aa
Message-Id: <20211004211645.E80743858402@sourceware.org>
Date: Mon,  4 Oct 2021 21:16:45 +0000 (GMT)
X-BeenThere: gcc-cvs@gcc.gnu.org
X-Mailman-Version: 2.1.29
Precedence: list
List-Id: Gcc-cvs mailing list <gcc-cvs.gcc.gnu.org>
List-Unsubscribe: <https://gcc.gnu.org/mailman/options/gcc-cvs>,
 <mailto:gcc-cvs-request@gcc.gnu.org?subject=unsubscribe>
List-Archive: <https://gcc.gnu.org/pipermail/gcc-cvs/>
List-Help: <mailto:gcc-cvs-request@gcc.gnu.org?subject=help>
List-Subscribe: <https://gcc.gnu.org/mailman/listinfo/gcc-cvs>,
 <mailto:gcc-cvs-request@gcc.gnu.org?subject=subscribe>
X-List-Received-Date: Mon, 04 Oct 2021 21:16:46 -0000

https://gcc.gnu.org/g:461a8404822977cad2150489fbfac172b75ad1aa

commit 461a8404822977cad2150489fbfac172b75ad1aa
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Mon Oct 4 17:15:15 2021 -0400

    Add LXVKQ support.
    
    This patch adds support to generate the LXVKQ instruction to load specific
    IEEE-128 floating point constants.
    
    2021-10-04  Michael Meissner  <meissner@linux.ibm.com>
    
    gcc/
    
            * config/rs6000/constraints.md (eQ): New constraint.
            * config/rs6000/predicates.md (easy_fp_constant): If we can use
            LXVKQ, it is an easy floating point constant.
            (easy_fp_constant_ieee128): New predicate.
            * config/rs6000/rs6000-protos.h (xxspltidp_constant_immediate):
            New declaration.
            * config/rs6000/rs6000.c (xxspltidp_constant_immediate): New
            function.
            (output_vec_const_move): Add support for LXVKQ.
            (rs6000_output_move_128bit): Likewise.
            * config/rs6000/rs6000.opt (-mlxvkq): New debug option.
            * config/rs6000/vsx.md (vsx_mov<mode>_64bit): Add support for
            LXVKQ.
            (vsx_mov<mode>_32bit): Likewise.
            * doc/md.texi (PowerPC and IBM RS6000 constraints): Document the
            eQ constraint.
    
    gcc/testsuite/
    
            * gcc.target/powerpc/float128-constant.c: New test.

Diff:
---
 gcc/config/rs6000/constraints.md                   |   5 +
 gcc/config/rs6000/predicates.md                    |  78 +++++++++++
 gcc/config/rs6000/rs6000-protos.h                  |   1 +
 gcc/config/rs6000/rs6000.c                         | 102 +++++++++++++++
 gcc/config/rs6000/rs6000.opt                       |   4 +
 gcc/config/rs6000/vsx.md                           |  28 ++--
 gcc/doc/md.texi                                    |   3 +
 .../gcc.target/powerpc/float128-constant.c         | 144 +++++++++++++++++++++
 8 files changed, 351 insertions(+), 14 deletions(-)
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 1ff46c9f4fc..1700657abe9 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -223,6 +223,11 @@
   "A 128-bit vector constant that can be loaded with the XXSPLTIDP instruction."
   (match_operand 0 "easy_vector_constant_64bit_element"))
 
+;; KF/TF scalar than can be loaded with LXVKQ
+(define_constraint "eQ"
+  "An IEEE 128-bit constant that can be loaded with the LXVKQ instruction."
+  (match_operand 0 "easy_fp_constant_ieee128"))
+
 ;; Floating-point constraints.  These two are defined so that insn
 ;; length attributes can be calculated exactly.
 
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 7544ac87700..30e89ec79f0 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -606,6 +606,11 @@
   if (easy_fp_constant_64bit_scalar (op, mode))
     return 1;
 
+  /* If we have the ISA 3.1 LXVKQ instruction, see if the constant can be loaded
+     with that instruction.  */
+  if (easy_fp_constant_ieee128 (op, mode))
+    return 1;
+
   /* Otherwise consider floating point constants hard, so that the
      constant gets pushed to memory during the early RTL phases.  This
      has the advantage that double precision constants that can be
@@ -777,6 +782,79 @@
   return num_insns == 1;
 })
 
+;; Return 1 if the operand is an IEEE 128-bit special constant that can be
+;; loaded with the LXVKQ instruction.
+(define_predicate "easy_fp_constant_ieee128"
+  (match_code "const_double")
+{
+  if (!TARGET_LXVKQ || !TARGET_POWER10 || !TARGET_VSX || !TARGET_FLOAT128_HW)
+    return false;
+
+  if (mode == VOIDmode)
+    mode = GET_MODE (op);
+
+  if (!FLOAT128_IEEE_P (mode))
+    return false;
+
+  if (!CONST_DOUBLE_P (op))
+    return false;
+
+  /* Special values (+/-infinity, -0.0.  */
+  const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
+  if (real_isinf (rv) || real_isnegzero (rv))
+    return true;
+
+  /* Only recognize the normal NaN.  Do not recognize NaNs with the negative
+     sign, signaling NaNs, or NaNs that have non-zero mantissa.  */
+  if (real_isnan (rv))
+    {
+      long w[4];
+
+      real_to_target (&w[0], rv, mode);
+      return (BYTES_BIG_ENDIAN
+	      ? (w[0] == 0x7fff8000 && w[1] == 0 && w[2] == 0 && w[3] == 0)
+	      : (w[3] == 0x7fff8000 && w[2] == 0 && w[1] == 0 && w[0] == 0));
+    }
+
+  if (real_issignaling_nan (rv))
+    return false;
+
+  /* All of the values generated can be expressed as SFmode values, if it
+     doesn't fit in SFmode, exit.  */
+  if (!exact_real_truncate (SFmode, rv))
+    return false;
+
+  /* The other values are all integers 1..7, and -1..-7.  */
+  if (!real_isinteger (rv, mode))
+    return false;
+
+  HOST_WIDE_INT value = real_to_integer (rv);
+  switch (value)
+    {
+    default:
+      break;
+
+    case 1:
+    case 2:
+    case 3:
+    case 4:
+    case 5:
+    case 6:
+    case 7:
+    case -1:
+    case -2:
+    case -3:
+    case -4:
+    case -5:
+    case -6:
+    case -7:
+      return true;
+    }
+
+  /* We can't load the value with LXVKQ.  */
+  return false;
+})
+
 ;; Return 1 if the operand is a CONST_VECTOR and can be loaded into a
 ;; vector register without using memory.
 (define_predicate "easy_vector_constant"
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index e9be9c4d99f..a21fa08b367 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -33,6 +33,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, int, int, int,
 extern int easy_altivec_constant (rtx, machine_mode);
 extern bool xxspltib_constant_p (rtx, machine_mode, int *, int *);
 extern long xxspltidp_constant_immediate (rtx, machine_mode);
+extern int lxvkq_constant_immediate (rtx, machine_mode);
 extern int vspltis_shifted (rtx);
 extern HOST_WIDE_INT const_vector_elt_as_int (rtx, unsigned int);
 extern bool macho_lo_sum_memory_operand (rtx, machine_mode);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 83d243269e3..7b0b5357f0b 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -7000,6 +7000,96 @@ xxspltidp_constant_immediate (rtx op, machine_mode mode)
   return ret;
 }
 
+/* Return the constant that will go in the LXVKQ instruction.  */
+
+/* LXVKQ immediates.  */
+enum {
+  LXVKQ_ONE		= 1,
+  LXVKQ_TWO		= 2,
+  LXVKQ_THREE		= 3,
+  LXVKQ_FOUR		= 4,
+  LXVKQ_FIVE		= 5,
+  LXVKQ_SIX		= 6,
+  LXVKQ_SEVEN		= 7,
+  LXVKQ_INF		= 8,
+  LXVKQ_NAN		= 9,
+  LXVKQ_NEG_ZERO	= 16,
+  LXVKQ_NEG_ONE		= 17,
+  LXVKQ_NEG_TWO		= 18,
+  LXVKQ_NEG_THREE	= 19,
+  LXVKQ_NEG_FOUR	= 20,
+  LXVKQ_NEG_FIVE	= 21,
+  LXVKQ_NEG_SIX		= 22,
+  LXVKQ_NEG_SEVEN	= 23,
+  LXVKQ_NEG_INF		= 24
+};
+
+int
+lxvkq_constant_immediate (rtx op, machine_mode mode)
+{
+  int ret = -1;
+  gcc_assert (easy_fp_constant_ieee128 (op, mode));
+
+  const struct real_value *rv = CONST_DOUBLE_REAL_VALUE (op);
+
+  gcc_assert (!real_issignaling_nan (rv));
+
+  /* Special values (infinity, nan, -0.0.  */
+  if (real_isinf (rv))
+    ret = real_isneg (rv) ? LXVKQ_NEG_INF : LXVKQ_INF;
+
+  /* Only recognize the normal NaN.  Do not recognize NaNs with the negative
+     sign, signaling NaNs, or NaNs that have non-zero mantissa.  */
+  else if (real_isnan (rv))
+    {
+      long w[4];
+
+      real_to_target (&w[0], rv, mode);
+      gcc_assert (BYTES_BIG_ENDIAN
+		  ? (w[0] == 0x7fff8000 && w[1] == 0 && w[2] == 0
+		     && w[3] == 0)
+		  : (w[3] == 0x7fff8000 && w[2] == 0 && w[1] == 0
+		     && w[0] == 0));
+
+      ret = LXVKQ_NAN;
+    }
+
+  else if (real_isnegzero (rv))
+    ret = LXVKQ_NEG_ZERO;
+
+  else
+    {
+      HOST_WIDE_INT value = real_to_integer (rv);
+      switch (value)
+	{
+	default:
+	  gcc_unreachable ();
+
+	case 1:
+	case 2:
+	case 3:
+	case 4:
+	case 5:
+	case 6:
+	case 7:
+	  ret = LXVKQ_ONE + (value - 1);
+	  break;
+
+	case -1:
+	case -2:
+	case -3:
+	case -4:
+	case -5:
+	case -6:
+	case -7:
+	  ret = LXVKQ_NEG_ONE + (-value - 1);
+	  break;
+	}
+    }
+
+  return ret;
+}
+
 const char *
 output_vec_const_move (rtx *operands)
 {
@@ -7051,6 +7141,12 @@ output_vec_const_move (rtx *operands)
 	  return "xxspltidp %x0,%2";
 	}
 
+      if (easy_fp_constant_ieee128 (vec, mode))
+	{
+	  operands[2] = GEN_INT (lxvkq_constant_immediate (vec, mode));
+	  return "lxvkq %x0,%2";
+	}
+
       if (TARGET_P9_VECTOR
 	  && xxspltib_constant_p (vec, mode, &num_insns, &xxspltib_value))
 	{
@@ -13893,6 +13989,12 @@ rs6000_output_move_128bit (rtx operands[])
     }
 
   /* Constants.  */
+  else if (dest_vsx_p && easy_fp_constant_ieee128 (src, mode))
+    {
+      operands[2] = GEN_INT (lxvkq_constant_immediate (src, mode));
+      return "lxvkq %x0,%2";
+    }
+
   else if (dest_regno >= 0
 	   && (CONST_INT_P (src)
 	       || CONST_WIDE_INT_P (src)
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 1d7ce4cc94a..c9eb78952d6 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -644,6 +644,10 @@ mxxspltidp
 Target Undocumented Var(TARGET_XXSPLTIDP) Init(1) Save
 Generate (do not generate) XXSPLTIDP instructions.
 
+mlxvkq
+Target Undocumented Var(TARGET_LXVKQ) Init(1) Save
+Generate (do not generate) LXVKQ instructions.
+
 -param=rs6000-density-pct-threshold=
 Target Undocumented Joined UInteger Var(rs6000_density_pct_threshold) Init(85) IntegerRange(0, 100) Param
 When costing for loop vectorization, we probably need to penalize the loop body
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index fa33c9d9fbf..d7e58654ded 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1191,19 +1191,19 @@
 ;; instruction). But generate XXLXOR/XXLORC if it will avoid a register move.
 
 ;;              VSX store  VSX load   VSX move  VSX->GPR   GPR->VSX    LQ (GPR)
-;;              XXSPLTIDP
+;;              XXSPLTIDP  LXVKQ
 ;;              STQ (GPR)  GPR load   GPR store GPR move   XXSPLTIB    VSPLTISW
 ;;              VSX 0/-1   VMX const  GPR const LVX (VMX)  STVX (VMX)
 (define_insn "vsx_mov<mode>_64bit"
   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
                "=ZwO,      wa,        wa,        r,         we,        ?wQ,
-                wa,
+                wa,        wa,
                 ?&r,       ??r,       ??Y,       <??r>,     wa,        v,
                 ?wa,       v,         <??r>,     wZ,        v")
 
 	(match_operand:VSX_M 1 "input_operand" 
                "wa,        ZwO,       wa,        we,        r,         r,
-                eV,
+                eV,        eQ,
                 wQ,        Y,         r,         r,         wE,        jwM,
                 ?jwM,      W,         <nW>,      v,         wZ"))]
 
@@ -1215,44 +1215,44 @@
 }
   [(set_attr "type"
                "vecstore,  vecload,   vecsimple, mtvsr,     mfvsr,     load,
-                vecperm,
+                vecperm,   vecperm,
                 store,     load,      store,     *,         vecsimple, vecsimple,
                 vecsimple, *,         *,         vecstore,  vecload")
    (set_attr "num_insns"
                "*,         *,         *,         2,         *,         2,
-                *,
+                *,         *,
                 2,         2,         2,         2,         *,         *,
                 *,         5,         2,         *,         *")
    (set_attr "max_prefixed_insns"
                "*,         *,         *,         *,         *,         2,
-                *,
+                *,         *,
                 2,         2,         2,         2,         *,         *,
                 *,         *,         *,         *,         *")
    (set_attr "length"
                "*,         *,         *,         8,         *,         8,
-                *,
+                *,         *,
                 8,         8,         8,         8,         *,         *,
                 *,         20,        8,         *,         *")
    (set_attr "isa"
                "<VSisa>,   <VSisa>,   <VSisa>,   *,         *,         *,
-                p10,
+                p10,       p10,
                 *,         *,         *,         *,         p9v,       *,
                 <VSisa>,   *,         *,         *,         *")])
 
 ;;              VSX store  VSX load   VSX move   GPR load   GPR store  GPR move
-;;              XXSPLTIDP
+;;              XXSPLTIDP  LXVKQ
 ;;              XXSPLTIB   VSPLTISW   VSX 0/-1   VMX const  GPR const
 ;;              LVX (VMX)  STVX (VMX)
 (define_insn "*vsx_mov<mode>_32bit"
   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
                "=ZwO,      wa,        wa,        ??r,       ??Y,       <??r>,
-                wa,
+                wa,        wa,
                 wa,        v,         ?wa,       v,         <??r>,
                 wZ,        v")
 
 	(match_operand:VSX_M 1 "input_operand" 
                "wa,        ZwO,       wa,        Y,         r,         r,
-                eV,
+                eV,        eQ,
                 wE,        jwM,       ?jwM,      W,         <nW>,
                 v,         wZ"))]
 
@@ -1264,17 +1264,17 @@
 }
   [(set_attr "type"
                "vecstore,  vecload,   vecsimple, load,      store,    *,
-                vecperm,
+                vecperm,   vecperm,
                 vecsimple, vecsimple, vecsimple, *,         *,
                 vecstore,  vecload")
    (set_attr "length"
                "*,         *,         *,         16,        16,        16,
-                *,
+                *,         *,
                 *,         *,         *,         20,        16,
                 *,         *")
    (set_attr "isa"
                "<VSisa>,   <VSisa>,   <VSisa>,   *,         *,         *,
-                p10,
+                p10,       p10,
                 p9v,       *,         <VSisa>,   *,         *,
                 *,         *")])
 
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 5035a3fd604..813d6316d8c 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -3339,6 +3339,9 @@ A 64-bit scalar constant that can be loaded with the XXSPLTIDP instruction.
 @item eI
 A signed 34-bit integer constant if prefixed instructions are supported.
 
+@item eQ
+An IEEE 128-bit constant that can be loaded with the LXVKQ instruction.
+
 @item eV
 A 128-bit vector constant that can be loaded with the XXSPLTIDP instruction.
 
diff --git a/gcc/testsuite/gcc.target/powerpc/float128-constant.c b/gcc/testsuite/gcc.target/powerpc/float128-constant.c
new file mode 100644
index 00000000000..23ee7e85d84
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/float128-constant.c
@@ -0,0 +1,144 @@
+/* { dg-require-effective-target ppc_float128_hw } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -mlxvkq -O2" } */
+
+/* Test whether the LXVKQ instruction is generated to load special IEEE 128-bit
+   constants.  */
+
+_Float128
+return_0 (void)
+{
+  return 0.0f128;			/* XXSPLTIB 34,0.  */
+}
+
+_Float128
+return_1 (void)
+{
+  return 1.0f128;			/* LXVKQ 34,1.  */
+}
+
+_Float128
+return_2 (void)
+{
+  return 2.0f128;			/* LXVKQ 34,2.  */
+}
+
+_Float128
+return_3 (void)
+{
+  return 3.0f128;			/* LXVKQ 34,3.  */
+}
+
+_Float128
+return_4 (void)
+{
+  return 4.0f128;			/* LXVKQ 34,4.  */
+}
+
+_Float128
+return_5 (void)
+{
+  return 5.0f128;			/* LXVKQ 34,5.  */
+}
+
+_Float128
+return_6 (void)
+{
+  return 6.0f128;			/* LXVKQ 34,6.  */
+}
+
+_Float128
+return_7 (void)
+{
+  return 7.0f128;			/* LXVKQ 34,7.  */
+}
+
+_Float128
+return_m0 (void)
+{
+  return -0.0f128;			/* LXVKQ 34,16.  */
+}
+
+_Float128
+return_m1 (void)
+{
+  return -1.0f128;			/* LXVKQ 34,17.  */
+}
+
+_Float128
+return_m2 (void)
+{
+  return -2.0f128;			/* LXVKQ 34,18.  */
+}
+
+_Float128
+return_m3 (void)
+{
+  return -3.0f128;			/* LXVKQ 34,19.  */
+}
+
+_Float128
+return_m4 (void)
+{
+  return -4.0f128;			/* LXVKQ 34,20.  */
+}
+
+_Float128
+return_m5 (void)
+{
+  return -5.0f128;			/* LXVKQ 34,21.  */
+}
+
+_Float128
+return_m6 (void)
+{
+  return -6.0f128;			/* LXVKQ 34,22.  */
+}
+
+_Float128
+return_m7 (void)
+{
+  return -7.0f128;			/* LXVKQ 34,23.  */
+}
+
+_Float128
+return_inf (void)
+{
+  return __builtin_inff128 ();		/* LXVKQ 34,8.  */
+}
+
+_Float128
+return_minf (void)
+{
+  return - __builtin_inff128 ();	/* LXVKQ 34,24.  */
+}
+
+_Float128
+return_nan (void)
+{
+  return __builtin_nanf128 ("");	/* LXVKQ 34,9.  */
+}
+
+/* Note, the following NaNs should not generate a LXVKQ instruction.  */
+_Float128
+return_mnan (void)
+{
+  return - __builtin_nanf128 ("");	/* PLXV 34,... */
+}
+
+_Float128
+return_nan2 (void)
+{
+  return __builtin_nanf128 ("1");	/* PLXV 34,... */
+}
+
+_Float128
+return_nans (void)
+{
+  return __builtin_nansf128 ("");	/* PLXV 34,... */
+}
+
+/* { dg-final { scan-assembler-times {\mlxvkq\M}    18 } } */
+/* { dg-final { scan-assembler-times {\mplxv\M}      3 } } */
+/* { dg-final { scan-assembler-times {\mxxspltib\M}  1 } } */
+