[gcc(refs/users/meissner/heads/work071)] Add LXVKQ support.

public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed

From: Michael Meissner <meissner@gcc.gnu.org>
To: gcc-cvs@gcc.gnu.org
Subject: [gcc(refs/users/meissner/heads/work071)] Add LXVKQ support.
Date: Mon, 18 Oct 2021 19:12:12 +0000 (GMT)	[thread overview]
Message-ID: <20211018191212.F40B83858C27@sourceware.org> (raw)

https://gcc.gnu.org/g:86ae6e0d17cb144be310c96dd425383000534e45

commit 86ae6e0d17cb144be310c96dd425383000534e45
Author: Michael Meissner <meissner@linux.ibm.com>
Date:   Mon Oct 18 15:09:45 2021 -0400

    Add LXVKQ support.
    
    This patch adds support to generate the LXVKQ instruction to load specific
    IEEE-128 floating point constants.
    
    Compared to the last time I submitted this patch, I modified it so that it
    uses the bit pattern of the vector to see if it can generate the LXVKQ
    instruction.  This means on a little endian Power<xxx> system, the
    following code will generate a LXVKQ 34,16 instruction:
    
        vector long long foo (void)
        {
          return (vector long long) { 0x0000000000000000, 0x8000000000000000 };
        }
    
    because that vector pattern is the same bit pattern as -0.0F128.
    
    2021-10-18  Michael Meissner  <meissner@the-meissners.org>
    
    gcc/
    
            * config/rs6000/constraints.md (eQ): New constraint.
            * config/rs6000/predicates.md (easy_fp_constant): Add support for
            generating the LXVKQ instruction.
            (easy_vector_constant_ieee128): New predicate.
            (easy_vector_constant): Add support for generating the LXVKQ
            instruction.
            * config/rs6000/rs6000-protos.h (rs6000_vec_concat): Add fields
            for generating LXVKQ.
            * config/rs6000/rs6000.c (output_vec_const_move): Add support for
            generating LXVKQ.
            (vec_const_use_lxvkq): New function.
            * config/rs6000/rs6000.opt (-mlxvkq): New debug option.
            * config/rs6000/vsx.md (vsx_mov<mode>_64bit): Add support for
            generating LXVKQ.
            (vsx_mov<mode>_32bit): Likewise.
            * doc/md.texi (PowerPC and IBM RS6000 constraints): Document the
            eQ constraint.
    
    gcc/testsuite/
    
            * gcc.target/powerpc/float128-constant.c: New test.

Diff:
---
 gcc/config/rs6000/constraints.md                   |   5 +
 gcc/config/rs6000/predicates.md                    |  22 +++
 gcc/config/rs6000/rs6000-protos.h                  |   2 +
 gcc/config/rs6000/rs6000.c                         |  56 ++++++++
 gcc/config/rs6000/rs6000.opt                       |   4 +
 gcc/config/rs6000/vsx.md                           |  28 ++--
 gcc/doc/md.texi                                    |   3 +
 .../gcc.target/powerpc/float128-constant.c         | 160 +++++++++++++++++++++
 8 files changed, 266 insertions(+), 14 deletions(-)

diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 7d594872a78..f5b524254ab 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -219,6 +219,11 @@
   "A constant that can be loaded into a VSX register with one prefixed insn."
   (match_operand 0 "vsx_prefixed_constant"))
 
+;; 128-bit IEEE 128-bit constant
+(define_constraint "eQ"
+  "An IEEE 128-bit constant that can be loaded with the LXVKQ instruction."
+  (match_operand 0 "easy_vector_constant_ieee128"))
+
 ;; Floating-point constraints.  These two are defined so that insn
 ;; length attributes can be calculated exactly.
 
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 40c4cba68ff..c271b379f6d 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -606,6 +606,9 @@
 
   if (TARGET_POWER10 && vec_const_to_bytes (op, mode, &vec_const))
     {
+      if (vec_const_use_lxvkq (&vec_const))
+	return true;
+
       if (vec_const_use_xxspltidp (&vec_const))
 	return true;
 
@@ -661,6 +664,22 @@
   return false;
 })
 
+;; Return 1 if the operand is a special IEEE 128-bit value that can be loaded
+;; via the LXVKQ instruction.
+
+(define_predicate "easy_vector_constant_ieee128"
+  (match_code "const_vector,const_double")
+{
+  rs6000_vec_const vec_const;
+
+  /* Can we generate the LXVKQ instruction?  */
+  if (!TARGET_LXVKQ || !TARGET_FLOAT128_HW || !TARGET_POWER10 || !TARGET_VSX)
+    return false;
+
+  return (vec_const_to_bytes (op, mode, &vec_const)
+	  && vec_const_use_lxvkq (&vec_const));
+})
+
 ;; Return 1 if the operand is a constant that can loaded with a XXSPLTIB
 ;; instruction and then a VUPKHSB, VECSB2W or VECSB2D instruction.
 
@@ -715,6 +734,9 @@
 
       if (TARGET_POWER10 && vec_const_to_bytes (op, mode, &vec_const))
 	{
+	  if (vec_const_use_lxvkq (&vec_const))
+	    return true;
+
 	  if (vec_const_use_xxspltidp (&vec_const))
 	    return true;
 
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index b12f6b10c13..40a796d4461 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -240,6 +240,7 @@ typedef struct {
   unsigned char bytes[VECTOR_CONST_BYTES];
 
   unsigned int xxspltidp_immediate;	/* Immediate value for XXSPLTIDP.  */
+  unsigned int lxvkq_immediate;		/* Immediate value for LXVKQ.  */
   bool fp_constant_p;			/* Is the constant floating point?  */
   bool all_double_words_same;		/* Are the double words all equal?  */
   bool all_words_same;			/* Are the words all equal?  */
@@ -250,6 +251,7 @@ typedef struct {
 extern bool vec_const_to_bytes (rtx, machine_mode, rs6000_vec_const *);
 extern bool vec_const_use_xxspltidp (rs6000_vec_const *);
 extern bool vec_const_use_xxspltiw (rs6000_vec_const *);
+extern bool vec_const_use_lxvkq (rs6000_vec_const *);
 #endif /* RTX_CODE */
 
 #ifdef TREE_CODE
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 20226169ba2..fbcd307177c 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -6998,6 +6998,12 @@ output_vec_const_move (rtx *operands)
       rs6000_vec_const vec_const;
       if (TARGET_POWER10 && vec_const_to_bytes (vec, mode, &vec_const))
 	{
+	  if (vec_const_use_lxvkq (&vec_const))
+	    {
+	      operands[2] = GEN_INT (vec_const.lxvkq_immediate);
+	      return "lxvkq %x0,%2";
+	    }
+
 	  if (vec_const_use_xxspltidp (&vec_const))
 	    {
 	      operands[2] = GEN_INT (vec_const.xxspltidp_immediate);
@@ -28827,6 +28833,56 @@ vec_const_use_xxspltiw (rs6000_vec_const *vec_const)
   return true;
 }
 
+/* Determine if a vector constant can be loaded with LXVKQ.  If so, fill out
+   the fields used to generate the instruction.  */
+
+bool
+vec_const_use_lxvkq (rs6000_vec_const *vec_const)
+{
+  unsigned immediate;
+
+  if (!TARGET_LXVKQ || !TARGET_PREFIXED || !TARGET_VSX)
+    return false;
+
+  /* Verify that all of the bottom 3 words in the constants loaded by the
+     LXVKQ instruction are zero.  */
+  for (size_t i = 1; i < VECTOR_CONST_WORDS; i++)
+    if (vec_const->words[i] != 0)
+      return false;
+
+  /* See if we have a match.  */
+  switch (vec_const->words[0])
+    {
+    case 0x3FFF0000U: immediate = 1;  break;	/* IEEE 128-bit +1.0.  */
+    case 0x40000000U: immediate = 2;  break;	/* IEEE 128-bit +2.0.  */
+    case 0x40008000U: immediate = 3;  break;	/* IEEE 128-bit +3.0.  */
+    case 0x40010000U: immediate = 4;  break;	/* IEEE 128-bit +4.0.  */
+    case 0x40014000U: immediate = 5;  break;	/* IEEE 128-bit +5.0.  */
+    case 0x40018000U: immediate = 6;  break;	/* IEEE 128-bit +6.0.  */
+    case 0x4001C000U: immediate = 7;  break;	/* IEEE 128-bit +7.0.  */
+    case 0x7FFF0000U: immediate = 8;  break;	/* IEEE 128-bit +Infinity.  */
+    case 0x7FFF8000U: immediate = 9;  break;	/* IEEE 128-bit quiet NaN.  */
+    case 0x80000000U: immediate = 16; break;	/* IEEE 128-bit -0.0.  */
+    case 0xBFFF0000U: immediate = 17; break;	/* IEEE 128-bit -1.0.  */
+    case 0xC0000000U: immediate = 18; break;	/* IEEE 128-bit -2.0.  */
+    case 0xC0008000U: immediate = 19; break;	/* IEEE 128-bit -3.0.  */
+    case 0xC0010000U: immediate = 20; break;	/* IEEE 128-bit -4.0.  */
+    case 0xC0014000U: immediate = 21; break;	/* IEEE 128-bit -5.0.  */
+    case 0xC0018000U: immediate = 22; break;	/* IEEE 128-bit -6.0.  */
+    case 0xC001C000U: immediate = 23; break;	/* IEEE 128-bit -7.0.  */
+    case 0xFFFF0000U: immediate = 24; break;	/* IEEE 128-bit -Infinity.  */
+
+      /* anything else cannot be loaded.  */
+    default:
+      return false;
+    }
+
+  /* We can use the LXVKQ instruction, record the immediate needed for the
+     instruction.  */
+  vec_const->lxvkq_immediate = immediate;
+  return true;
+}
+
 /* Convert a vector constant to an internal structure, breaking it out to
    bytes, half words, words, and double words.  Return true if we have
    successfully broken it out.  */
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 332f61be0ba..015bf91b6d5 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -648,6 +648,10 @@ mxxspltiw
 Target Undocumented Var(TARGET_XXSPLTIW) Init(1) Save
 Generate (do not generate) XXSPLTIW instructions.
 
+mlxvkq
+Target Undocumented Var(TARGET_LXVKQ) Init(1) Save
+Generate (do not generate) LXVKQ instructions.
+
 -param=rs6000-density-pct-threshold=
 Target Undocumented Joined UInteger Var(rs6000_density_pct_threshold) Init(85) IntegerRange(0, 100) Param
 When costing for loop vectorization, we probably need to penalize the loop body
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 0ceecc1975c..ce8402101ef 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1192,19 +1192,19 @@
 
 ;;              VSX store  VSX load   VSX move  VSX->GPR   GPR->VSX    LQ (GPR)
 ;;              STQ (GPR)  GPR load   GPR store GPR move   XXSPLTIB    VSPLTISW
-;;              XXLSPLTI*
+;;              XXLSPLTI*  LXVKQ
 ;;              VSX 0/-1   VMX const  GPR const LVX (VMX)  STVX (VMX)
 (define_insn "vsx_mov<mode>_64bit"
   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
                "=ZwO,      wa,        wa,        r,         we,        ?wQ,
                 ?&r,       ??r,       ??Y,       <??r>,     wa,        v,
-                wa,
+                wa,        wa,
                 ?wa,       v,         <??r>,     wZ,        v")
 
 	(match_operand:VSX_M 1 "input_operand" 
                "wa,        ZwO,       wa,        we,        r,         r,
                 wQ,        Y,         r,         r,         wE,        jwM,
-                eP,
+                eP,        eQ,
                 ?jwM,      W,         <nW>,      v,         wZ"))]
 
   "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
@@ -1216,46 +1216,46 @@
   [(set_attr "type"
                "vecstore,  vecload,   vecsimple, mtvsr,     mfvsr,     load,
                 store,     load,      store,     *,         vecsimple, vecsimple,
-                vecperm,
+                vecperm,   vecperm,
                 vecsimple, *,         *,         vecstore,  vecload")
    (set_attr "num_insns"
                "*,         *,         *,         2,         *,         2,
                 2,         2,         2,         2,         *,         *,
-                *,
+                *,         *,
                 *,         5,         2,         *,         *")
    (set_attr "max_prefixed_insns"
                "*,         *,         *,         *,         *,         2,
                 2,         2,         2,         2,         *,         *,
-                *,
+                *,         *,
                 *,         *,         *,         *,         *")
    (set_attr "length"
                "*,         *,         *,         8,         *,         8,
                 8,         8,         8,         8,         *,         *,
-                *,
+                *,         *,
                 *,         20,        8,         *,         *")
    (set_attr "isa"
                "<VSisa>,   <VSisa>,   <VSisa>,   *,         *,         *,
                 *,         *,         *,         *,         p9v,       *,
-                p10,
+                p10,       p10,
                 <VSisa>,   *,         *,         *,         *")])
 
 ;;              VSX store  VSX load   VSX move   GPR load   GPR store  GPR move
 ;;              XXSPLTIB   VSPLTISW   VSX 0/-1
-;;              XXSPLTI*
+;;              XXSPLTI*   LXVKQ
 ;;              VMX const  GPR const
 ;;              LVX (VMX)  STVX (VMX)
 (define_insn "*vsx_mov<mode>_32bit"
   [(set (match_operand:VSX_M 0 "nonimmediate_operand"
                "=ZwO,      wa,        wa,        ??r,       ??Y,       <??r>,
                 wa,        v,         ?wa,
-                wa,
+                wa,        wa,
                 v,         <??r>,
                 wZ,        v")
 
 	(match_operand:VSX_M 1 "input_operand" 
                "wa,        ZwO,       wa,        Y,         r,         r,
                 wE,        jwM,       ?jwM,
-                eP,
+                eP,        eQ,
                 W,         <nW>,
                 v,         wZ"))]
 
@@ -1268,19 +1268,19 @@
   [(set_attr "type"
                "vecstore,  vecload,   vecsimple, load,      store,    *,
                 vecsimple, vecsimple, vecsimple,
-                vecperm,
+                vecperm,   vecperm,
                 *,         *,
                 vecstore,  vecload")
    (set_attr "length"
                "*,         *,         *,         16,        16,        16,
                 *,         *,         *,
-                *,
+                *,         *,
                 20,        16,
                 *,         *")
    (set_attr "isa"
                "<VSisa>,   <VSisa>,   <VSisa>,   *,         *,         *,
                 p9v,       *,         <VSisa>,
-                p10,
+                p10,       p10,
                 *,         *,
                 *,         *")])
 
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index 13b56279565..cd70e170955 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -3340,6 +3340,9 @@ A signed 34-bit integer constant if prefixed instructions are supported.
 A scalar floating point constant or a vector constant that can be
 loaded with one prefixed instruction to a VSX register.
 
+@item eQ
+A constant that can be loaded with the LXVKQ instruction.
+
 @ifset INTERNALS
 @item G
 A floating point constant that can be loaded into a register with one
diff --git a/gcc/testsuite/gcc.target/powerpc/float128-constant.c b/gcc/testsuite/gcc.target/powerpc/float128-constant.c
new file mode 100644
index 00000000000..f6becac1075
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/float128-constant.c
@@ -0,0 +1,160 @@
+/* { dg-require-effective-target ppc_float128_hw } */
+/* { dg-require-effective-target power10_ok } */
+/* { dg-options "-mdejagnu-cpu=power10 -mlxvkq -O2" } */
+
+/* Test whether the LXVKQ instruction is generated to load special IEEE 128-bit
+   constants.  */
+
+_Float128
+return_0 (void)
+{
+  return 0.0f128;			/* XXSPLTIB 34,0.  */
+}
+
+_Float128
+return_1 (void)
+{
+  return 1.0f128;			/* LXVKQ 34,1.  */
+}
+
+_Float128
+return_2 (void)
+{
+  return 2.0f128;			/* LXVKQ 34,2.  */
+}
+
+_Float128
+return_3 (void)
+{
+  return 3.0f128;			/* LXVKQ 34,3.  */
+}
+
+_Float128
+return_4 (void)
+{
+  return 4.0f128;			/* LXVKQ 34,4.  */
+}
+
+_Float128
+return_5 (void)
+{
+  return 5.0f128;			/* LXVKQ 34,5.  */
+}
+
+_Float128
+return_6 (void)
+{
+  return 6.0f128;			/* LXVKQ 34,6.  */
+}
+
+_Float128
+return_7 (void)
+{
+  return 7.0f128;			/* LXVKQ 34,7.  */
+}
+
+_Float128
+return_m0 (void)
+{
+  return -0.0f128;			/* LXVKQ 34,16.  */
+}
+
+_Float128
+return_m1 (void)
+{
+  return -1.0f128;			/* LXVKQ 34,17.  */
+}
+
+_Float128
+return_m2 (void)
+{
+  return -2.0f128;			/* LXVKQ 34,18.  */
+}
+
+_Float128
+return_m3 (void)
+{
+  return -3.0f128;			/* LXVKQ 34,19.  */
+}
+
+_Float128
+return_m4 (void)
+{
+  return -4.0f128;			/* LXVKQ 34,20.  */
+}
+
+_Float128
+return_m5 (void)
+{
+  return -5.0f128;			/* LXVKQ 34,21.  */
+}
+
+_Float128
+return_m6 (void)
+{
+  return -6.0f128;			/* LXVKQ 34,22.  */
+}
+
+_Float128
+return_m7 (void)
+{
+  return -7.0f128;			/* LXVKQ 34,23.  */
+}
+
+_Float128
+return_inf (void)
+{
+  return __builtin_inff128 ();		/* LXVKQ 34,8.  */
+}
+
+_Float128
+return_minf (void)
+{
+  return - __builtin_inff128 ();	/* LXVKQ 34,24.  */
+}
+
+_Float128
+return_nan (void)
+{
+  return __builtin_nanf128 ("");	/* LXVKQ 34,9.  */
+}
+
+/* Note, the following NaNs should not generate a LXVKQ instruction.  */
+_Float128
+return_mnan (void)
+{
+  return - __builtin_nanf128 ("");	/* PLXV 34,... */
+}
+
+_Float128
+return_nan2 (void)
+{
+  return __builtin_nanf128 ("1");	/* PLXV 34,... */
+}
+
+_Float128
+return_nans (void)
+{
+  return __builtin_nansf128 ("");	/* PLXV 34,... */
+}
+
+vector long long
+return_longlong_neg_0 (void)
+{
+  /* This vector is the same pattern as -0.0F128.  */
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define FIRST	0x8000000000000000
+#define SECOND	0x0000000000000000
+
+#else
+#define FIRST	0x0000000000000000
+#define SECOND	0x8000000000000000
+#endif
+
+  return (vector long long) { FIRST, SECOND };	/* LXVKQ 34,16.  */
+}
+
+/* { dg-final { scan-assembler-times {\mlxvkq\M}    19 } } */
+/* { dg-final { scan-assembler-times {\mplxv\M}      3 } } */
+/* { dg-final { scan-assembler-times {\mxxspltib\M}  1 } } */
+

next             reply	other threads:[~2021-10-18 19:12 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-10-18 19:12 Michael Meissner [this message]
  -- strict thread matches above, loose matches on Subject: below --
2021-10-21  2:54 Michael Meissner
2021-10-21  2:39 Michael Meissner
2021-10-21  2:20 Michael Meissner
2021-10-15  3:39 Michael Meissner
2021-10-14 16:51 Michael Meissner
2021-10-14 16:50 Michael Meissner
2021-10-14 16:46 Michael Meissner
2021-10-14 15:32 Michael Meissner
2021-10-14  3:39 Michael Meissner
2021-10-14  1:56 Michael Meissner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20211018191212.F40B83858C27@sourceware.org \
    --to=meissner@gcc.gnu.org \
    --cc=gcc-cvs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).