public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] x86: Support 2/4/8 byte constant vector stores
@ 2022-06-30 14:50 H.J. Lu
  2022-07-01 15:31 ` Uros Bizjak
  0 siblings, 1 reply; 4+ messages in thread
From: H.J. Lu @ 2022-06-30 14:50 UTC (permalink / raw)
  To: gcc-patches

1. Add a predicate for constant vectors which can be converted to integer
constants suitable for constant integer stores.  For a 8-byte constant
vector, the converted 64-bit integer must be valid for store with 64-bit
immediate, which is a 64-bit integer sign-extended from a 32-bit integer.
2. Add a new pattern to allow 2-byte, 4-byte and 8-byte constant vector
stores, like

(set (mem:V2HI (reg:DI 84))
     (const_vector:V2HI [(const_int 0 [0]) (const_int 1 [0x1])]))

3. After reload, convert constant vector stores to constant integer
stores, like

(set (mem:SI (reg:DI 5 di [84]))
     (const_int 65536 [0x10000]))

For

void
foo (short * c)
{
  c[0] = 0;
  c[1] = 1;
}

it generates

	movl	$65536, (%rdi)

instead of

	movl	.LC0(%rip), %eax
	movl	%eax, (%rdi)

gcc/

	PR target/106022
	* config/i386/i386-protos.h (ix86_convert_const_vector_to_integer):
	New.
	* config/i386/i386.cc (ix86_convert_const_vector_to_integer):
	New.
	* config/i386/mmx.md (V_16_32_64): New.
	(*mov<mode>_imm): New patterns for stores with 16-bit, 32-bit
	and 64-bit constant vector.
	* config/i386/predicates.md (x86_64_const_vector_operand): New.

gcc/testsuite/

	PR target/106022
	* gcc.target/i386/pr106022-1.c: New test.
	* gcc.target/i386/pr106022-2.c: Likewise.
	* gcc.target/i386/pr106022-3.c: Likewise.
	* gcc.target/i386/pr106022-4.c: Likewise.
---
 gcc/config/i386/i386-protos.h              |  2 +
 gcc/config/i386/i386.cc                    | 47 ++++++++++++++++++++++
 gcc/config/i386/mmx.md                     | 37 +++++++++++++++++
 gcc/config/i386/predicates.md              | 11 +++++
 gcc/testsuite/gcc.target/i386/pr106022-1.c | 13 ++++++
 gcc/testsuite/gcc.target/i386/pr106022-2.c | 14 +++++++
 gcc/testsuite/gcc.target/i386/pr106022-3.c | 14 +++++++
 gcc/testsuite/gcc.target/i386/pr106022-4.c | 14 +++++++
 8 files changed, 152 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr106022-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr106022-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr106022-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr106022-4.c

diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 3596ce81ecf..cf847751ac5 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -122,6 +122,8 @@ extern void ix86_expand_unary_operator (enum rtx_code, machine_mode,
 					rtx[]);
 extern rtx ix86_build_const_vector (machine_mode, bool, rtx);
 extern rtx ix86_build_signbit_mask (machine_mode, bool, bool);
+extern HOST_WIDE_INT ix86_convert_const_vector_to_integer (rtx,
+							   machine_mode);
 extern void ix86_split_convert_uns_si_sse (rtx[]);
 extern void ix86_expand_convert_uns_didf_sse (rtx, rtx);
 extern void ix86_expand_convert_uns_sixf_sse (rtx, rtx);
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index b15b4893bb9..0cfe9962f75 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -15723,6 +15723,53 @@ ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
   return force_reg (vec_mode, v);
 }
 
+/* Return HOST_WIDE_INT for const vector OP in MODE.  */
+
+HOST_WIDE_INT
+ix86_convert_const_vector_to_integer (rtx op, machine_mode mode)
+{
+  if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+    gcc_unreachable ();
+
+  int nunits = GET_MODE_NUNITS (mode);
+  wide_int val = wi::zero (GET_MODE_BITSIZE (mode));
+  machine_mode innermode = GET_MODE_INNER (mode);
+  unsigned int innermode_bits = GET_MODE_BITSIZE (innermode);
+
+  switch (mode)
+    {
+    case E_V2QImode:
+    case E_V4QImode:
+    case E_V2HImode:
+    case E_V8QImode:
+    case E_V4HImode:
+    case E_V2SImode:
+      for (int i = 0; i < nunits; ++i)
+	{
+	  int v = INTVAL (XVECEXP (op, 0, i));
+	  wide_int wv = wi::shwi (v, innermode_bits);
+	  val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
+	}
+      break;
+    case E_V2HFmode:
+    case E_V4HFmode:
+    case E_V2SFmode:
+      for (int i = 0; i < nunits; ++i)
+	{
+	  rtx x = XVECEXP (op, 0, i);
+	  int v = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
+				  REAL_MODE_FORMAT (innermode));
+	  wide_int wv = wi::shwi (v, innermode_bits);
+	  val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
+	}
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  return val.to_shwi ();
+}
+
 /* Return TRUE or FALSE depending on whether the first SET in INSN
    has source and destination with matching CC modes, and that the
    CC mode is at least as constrained as REQ_MODE.  */
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index ba53007a35e..3294c1e6274 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -69,6 +69,12 @@ (define_mode_iterator VI_16_32 [V4QI V2QI V2HI])
 ;; 4-byte and 2-byte QImode vector modes
 (define_mode_iterator VI1_16_32 [V4QI V2QI])
 
+;; All 2-byte, 4-byte and 8-byte vector modes with more than 1 element
+(define_mode_iterator V_16_32_64
+   [V2QI V4QI V2HI V2HF
+    (V8QI "TARGET_64BIT") (V4HI "TARGET_64BIT") (V4HF "TARGET_64BIT")
+    (V2SI "TARGET_64BIT") (V2SF "TARGET_64BIT")])
+
 ;; V2S* modes
 (define_mode_iterator V2FI [V2SF V2SI])
 
@@ -331,6 +337,37 @@ (define_insn "*mov<mode>_internal"
 	   ]
 	   (symbol_ref "true")))])
 
+;; 16-bit, 32-bit and 64-bit constant vector stores.  After reload,
+;; convert them to immediate integer stores.
+(define_insn_and_split "*mov<mode>_imm"
+  [(set (match_operand:V_16_32_64 0 "memory_operand" "=m")
+	(match_operand:V_16_32_64 1 "x86_64_const_vector_operand" "i"))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 1))]
+{
+  HOST_WIDE_INT val = ix86_convert_const_vector_to_integer (operands[1],
+							    <MODE>mode);
+  operands[1] = GEN_INT (val);
+  machine_mode mode;
+  switch (GET_MODE_SIZE (<MODE>mode))
+    {
+    case 2:
+      mode = HImode;
+      break;
+    case 4:
+      mode = SImode;
+      break;
+    case 8:
+      mode = DImode;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  operands[0] = lowpart_subreg (mode, operands[0], <MODE>mode);
+})
+
 ;; For TARGET_64BIT we always round up to 8 bytes.
 (define_insn "*push<mode>2_rex64"
   [(set (match_operand:V_32 0 "push_operand" "=X,X")
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 128144f1050..c71c453cceb 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -1194,6 +1194,17 @@ (define_predicate "reg_or_const_vector_operand"
   (ior (match_operand 0 "register_operand")
        (match_code "const_vector")))
 
+;; Return true when OP is CONST_VECTOR which can be converted to a
+;; sign extended 32-bit integer.
+(define_predicate "x86_64_const_vector_operand"
+  (match_code "const_vector")
+{
+  if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
+    return false;
+  HOST_WIDE_INT val = ix86_convert_const_vector_to_integer (op, mode);
+  return trunc_int_for_mode (val, SImode) == val;
+})
+
 ;; Return true when OP is nonimmediate or standard SSE constant.
 (define_predicate "nonimmediate_or_sse_const_operand"
   (ior (match_operand 0 "nonimmediate_operand")
diff --git a/gcc/testsuite/gcc.target/i386/pr106022-1.c b/gcc/testsuite/gcc.target/i386/pr106022-1.c
new file mode 100644
index 00000000000..6643b4c30f1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr106022-1.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64" } */
+
+void
+foo (char *c)
+{
+  c[0] = 0;
+  c[1] = 1;
+  c[2] = 2;
+  c[3] = 3;
+}
+
+/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\\$50462976," 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr106022-2.c b/gcc/testsuite/gcc.target/i386/pr106022-2.c
new file mode 100644
index 00000000000..0e79fb53297
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr106022-2.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64" } */
+
+void
+foo (int *c)
+{
+  c = __builtin_assume_aligned (c, 16);
+  c[0] = -1;
+  c[1] = -1;
+}
+
+/* { dg-final { scan-assembler-times "movq\[ \\t\]+\[^\n\]*%xmm" 2 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "movq\[ \\t\]+\\\$-1," 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-not "xmm" { target { ! ia32 } } } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr106022-3.c b/gcc/testsuite/gcc.target/i386/pr106022-3.c
new file mode 100644
index 00000000000..8b0c2a8f6d8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr106022-3.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64" } */
+
+void
+foo (int *c)
+{
+  c[0] = 0;
+  c[1] = 1;
+  c[2] = 2;
+  c[3] = 3;
+}
+
+/* { dg-final { scan-assembler-times "movdqa\[ \\t\]+\[^\n\]*%xmm" 1 } } */
+/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr106022-4.c b/gcc/testsuite/gcc.target/i386/pr106022-4.c
new file mode 100644
index 00000000000..8ecda170af3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr106022-4.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=x86-64" } */
+
+void
+foo (float *c)
+{
+  c[0] = 2.3;
+  c[1] = 0.0;
+}
+
+/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\\$0x40133333" 1 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\\$0x00000000" 1 { target { ia32 } } } } */
+/* { dg-final { scan-assembler-times "movq\[ \\t\]+\\\$1075000115," 1 { target { ! ia32 } } } } */
+/* { dg-final { scan-assembler-not "xmm" } } */
-- 
2.36.1


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] x86: Support 2/4/8 byte constant vector stores
  2022-06-30 14:50 [PATCH] x86: Support 2/4/8 byte constant vector stores H.J. Lu
@ 2022-07-01 15:31 ` Uros Bizjak
  2022-07-27 14:24   ` [GCC 12] " H.J. Lu
  0 siblings, 1 reply; 4+ messages in thread
From: Uros Bizjak @ 2022-07-01 15:31 UTC (permalink / raw)
  To: H.J. Lu; +Cc: gcc-patches, Noah Goldstein, Hongtao Liu

On Thu, Jun 30, 2022 at 4:50 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> 1. Add a predicate for constant vectors which can be converted to integer
> constants suitable for constant integer stores.  For a 8-byte constant
> vector, the converted 64-bit integer must be valid for store with 64-bit
> immediate, which is a 64-bit integer sign-extended from a 32-bit integer.
> 2. Add a new pattern to allow 2-byte, 4-byte and 8-byte constant vector
> stores, like
>
> (set (mem:V2HI (reg:DI 84))
>      (const_vector:V2HI [(const_int 0 [0]) (const_int 1 [0x1])]))
>
> 3. After reload, convert constant vector stores to constant integer
> stores, like
>
> (set (mem:SI (reg:DI 5 di [84]))
>      (const_int 65536 [0x10000]))
>
> For
>
> void
> foo (short * c)
> {
>   c[0] = 0;
>   c[1] = 1;
> }
>
> it generates
>
>         movl    $65536, (%rdi)
>
> instead of
>
>         movl    .LC0(%rip), %eax
>         movl    %eax, (%rdi)
>
> gcc/
>
>         PR target/106022
>         * config/i386/i386-protos.h (ix86_convert_const_vector_to_integer):
>         New.
>         * config/i386/i386.cc (ix86_convert_const_vector_to_integer):
>         New.
>         * config/i386/mmx.md (V_16_32_64): New.
>         (*mov<mode>_imm): New patterns for stores with 16-bit, 32-bit
>         and 64-bit constant vector.
>         * config/i386/predicates.md (x86_64_const_vector_operand): New.
>
> gcc/testsuite/
>
>         PR target/106022
>         * gcc.target/i386/pr106022-1.c: New test.
>         * gcc.target/i386/pr106022-2.c: Likewise.
>         * gcc.target/i386/pr106022-3.c: Likewise.
>         * gcc.target/i386/pr106022-4.c: Likewise.

OK.

Thanks,
Uros.

> ---
>  gcc/config/i386/i386-protos.h              |  2 +
>  gcc/config/i386/i386.cc                    | 47 ++++++++++++++++++++++
>  gcc/config/i386/mmx.md                     | 37 +++++++++++++++++
>  gcc/config/i386/predicates.md              | 11 +++++
>  gcc/testsuite/gcc.target/i386/pr106022-1.c | 13 ++++++
>  gcc/testsuite/gcc.target/i386/pr106022-2.c | 14 +++++++
>  gcc/testsuite/gcc.target/i386/pr106022-3.c | 14 +++++++
>  gcc/testsuite/gcc.target/i386/pr106022-4.c | 14 +++++++
>  8 files changed, 152 insertions(+)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr106022-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr106022-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr106022-3.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr106022-4.c
>
> diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
> index 3596ce81ecf..cf847751ac5 100644
> --- a/gcc/config/i386/i386-protos.h
> +++ b/gcc/config/i386/i386-protos.h
> @@ -122,6 +122,8 @@ extern void ix86_expand_unary_operator (enum rtx_code, machine_mode,
>                                         rtx[]);
>  extern rtx ix86_build_const_vector (machine_mode, bool, rtx);
>  extern rtx ix86_build_signbit_mask (machine_mode, bool, bool);
> +extern HOST_WIDE_INT ix86_convert_const_vector_to_integer (rtx,
> +                                                          machine_mode);
>  extern void ix86_split_convert_uns_si_sse (rtx[]);
>  extern void ix86_expand_convert_uns_didf_sse (rtx, rtx);
>  extern void ix86_expand_convert_uns_sixf_sse (rtx, rtx);
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index b15b4893bb9..0cfe9962f75 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -15723,6 +15723,53 @@ ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
>    return force_reg (vec_mode, v);
>  }
>
> +/* Return HOST_WIDE_INT for const vector OP in MODE.  */
> +
> +HOST_WIDE_INT
> +ix86_convert_const_vector_to_integer (rtx op, machine_mode mode)
> +{
> +  if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
> +    gcc_unreachable ();
> +
> +  int nunits = GET_MODE_NUNITS (mode);
> +  wide_int val = wi::zero (GET_MODE_BITSIZE (mode));
> +  machine_mode innermode = GET_MODE_INNER (mode);
> +  unsigned int innermode_bits = GET_MODE_BITSIZE (innermode);
> +
> +  switch (mode)
> +    {
> +    case E_V2QImode:
> +    case E_V4QImode:
> +    case E_V2HImode:
> +    case E_V8QImode:
> +    case E_V4HImode:
> +    case E_V2SImode:
> +      for (int i = 0; i < nunits; ++i)
> +       {
> +         int v = INTVAL (XVECEXP (op, 0, i));
> +         wide_int wv = wi::shwi (v, innermode_bits);
> +         val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
> +       }
> +      break;
> +    case E_V2HFmode:
> +    case E_V4HFmode:
> +    case E_V2SFmode:
> +      for (int i = 0; i < nunits; ++i)
> +       {
> +         rtx x = XVECEXP (op, 0, i);
> +         int v = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
> +                                 REAL_MODE_FORMAT (innermode));
> +         wide_int wv = wi::shwi (v, innermode_bits);
> +         val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
> +       }
> +      break;
> +    default:
> +      gcc_unreachable ();
> +    }
> +
> +  return val.to_shwi ();
> +}
> +
>  /* Return TRUE or FALSE depending on whether the first SET in INSN
>     has source and destination with matching CC modes, and that the
>     CC mode is at least as constrained as REQ_MODE.  */
> diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> index ba53007a35e..3294c1e6274 100644
> --- a/gcc/config/i386/mmx.md
> +++ b/gcc/config/i386/mmx.md
> @@ -69,6 +69,12 @@ (define_mode_iterator VI_16_32 [V4QI V2QI V2HI])
>  ;; 4-byte and 2-byte QImode vector modes
>  (define_mode_iterator VI1_16_32 [V4QI V2QI])
>
> +;; All 2-byte, 4-byte and 8-byte vector modes with more than 1 element
> +(define_mode_iterator V_16_32_64
> +   [V2QI V4QI V2HI V2HF
> +    (V8QI "TARGET_64BIT") (V4HI "TARGET_64BIT") (V4HF "TARGET_64BIT")
> +    (V2SI "TARGET_64BIT") (V2SF "TARGET_64BIT")])
> +
>  ;; V2S* modes
>  (define_mode_iterator V2FI [V2SF V2SI])
>
> @@ -331,6 +337,37 @@ (define_insn "*mov<mode>_internal"
>            ]
>            (symbol_ref "true")))])
>
> +;; 16-bit, 32-bit and 64-bit constant vector stores.  After reload,
> +;; convert them to immediate integer stores.
> +(define_insn_and_split "*mov<mode>_imm"
> +  [(set (match_operand:V_16_32_64 0 "memory_operand" "=m")
> +       (match_operand:V_16_32_64 1 "x86_64_const_vector_operand" "i"))]
> +  ""
> +  "#"
> +  "&& reload_completed"
> +  [(set (match_dup 0) (match_dup 1))]
> +{
> +  HOST_WIDE_INT val = ix86_convert_const_vector_to_integer (operands[1],
> +                                                           <MODE>mode);
> +  operands[1] = GEN_INT (val);
> +  machine_mode mode;
> +  switch (GET_MODE_SIZE (<MODE>mode))
> +    {
> +    case 2:
> +      mode = HImode;
> +      break;
> +    case 4:
> +      mode = SImode;
> +      break;
> +    case 8:
> +      mode = DImode;
> +      break;
> +    default:
> +      gcc_unreachable ();
> +    }
> +  operands[0] = lowpart_subreg (mode, operands[0], <MODE>mode);
> +})
> +
>  ;; For TARGET_64BIT we always round up to 8 bytes.
>  (define_insn "*push<mode>2_rex64"
>    [(set (match_operand:V_32 0 "push_operand" "=X,X")
> diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
> index 128144f1050..c71c453cceb 100644
> --- a/gcc/config/i386/predicates.md
> +++ b/gcc/config/i386/predicates.md
> @@ -1194,6 +1194,17 @@ (define_predicate "reg_or_const_vector_operand"
>    (ior (match_operand 0 "register_operand")
>         (match_code "const_vector")))
>
> +;; Return true when OP is CONST_VECTOR which can be converted to a
> +;; sign extended 32-bit integer.
> +(define_predicate "x86_64_const_vector_operand"
> +  (match_code "const_vector")
> +{
> +  if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
> +    return false;
> +  HOST_WIDE_INT val = ix86_convert_const_vector_to_integer (op, mode);
> +  return trunc_int_for_mode (val, SImode) == val;
> +})
> +
>  ;; Return true when OP is nonimmediate or standard SSE constant.
>  (define_predicate "nonimmediate_or_sse_const_operand"
>    (ior (match_operand 0 "nonimmediate_operand")
> diff --git a/gcc/testsuite/gcc.target/i386/pr106022-1.c b/gcc/testsuite/gcc.target/i386/pr106022-1.c
> new file mode 100644
> index 00000000000..6643b4c30f1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr106022-1.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=x86-64" } */
> +
> +void
> +foo (char *c)
> +{
> +  c[0] = 0;
> +  c[1] = 1;
> +  c[2] = 2;
> +  c[3] = 3;
> +}
> +
> +/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\\$50462976," 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr106022-2.c b/gcc/testsuite/gcc.target/i386/pr106022-2.c
> new file mode 100644
> index 00000000000..0e79fb53297
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr106022-2.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=x86-64" } */
> +
> +void
> +foo (int *c)
> +{
> +  c = __builtin_assume_aligned (c, 16);
> +  c[0] = -1;
> +  c[1] = -1;
> +}
> +
> +/* { dg-final { scan-assembler-times "movq\[ \\t\]+\[^\n\]*%xmm" 2 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "movq\[ \\t\]+\\\$-1," 1 { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-not "xmm" { target { ! ia32 } } } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr106022-3.c b/gcc/testsuite/gcc.target/i386/pr106022-3.c
> new file mode 100644
> index 00000000000..8b0c2a8f6d8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr106022-3.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=x86-64" } */
> +
> +void
> +foo (int *c)
> +{
> +  c[0] = 0;
> +  c[1] = 1;
> +  c[2] = 2;
> +  c[3] = 3;
> +}
> +
> +/* { dg-final { scan-assembler-times "movdqa\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> +/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr106022-4.c b/gcc/testsuite/gcc.target/i386/pr106022-4.c
> new file mode 100644
> index 00000000000..8ecda170af3
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr106022-4.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=x86-64" } */
> +
> +void
> +foo (float *c)
> +{
> +  c[0] = 2.3;
> +  c[1] = 0.0;
> +}
> +
> +/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\\$0x40133333" 1 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\\$0x00000000" 1 { target { ia32 } } } } */
> +/* { dg-final { scan-assembler-times "movq\[ \\t\]+\\\$1075000115," 1 { target { ! ia32 } } } } */
> +/* { dg-final { scan-assembler-not "xmm" } } */
> --
> 2.36.1
>

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [GCC 12] [PATCH] x86: Support 2/4/8 byte constant vector stores
  2022-07-01 15:31 ` Uros Bizjak
@ 2022-07-27 14:24   ` H.J. Lu
  2022-07-31 17:00     ` Uros Bizjak
  0 siblings, 1 reply; 4+ messages in thread
From: H.J. Lu @ 2022-07-27 14:24 UTC (permalink / raw)
  To: Uros Bizjak, Richard Biener; +Cc: gcc-patches, Noah Goldstein, Hongtao Liu

On Fri, Jul 1, 2022 at 8:31 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Thu, Jun 30, 2022 at 4:50 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > 1. Add a predicate for constant vectors which can be converted to integer
> > constants suitable for constant integer stores.  For a 8-byte constant
> > vector, the converted 64-bit integer must be valid for store with 64-bit
> > immediate, which is a 64-bit integer sign-extended from a 32-bit integer.
> > 2. Add a new pattern to allow 2-byte, 4-byte and 8-byte constant vector
> > stores, like
> >
> > (set (mem:V2HI (reg:DI 84))
> >      (const_vector:V2HI [(const_int 0 [0]) (const_int 1 [0x1])]))
> >
> > 3. After reload, convert constant vector stores to constant integer
> > stores, like
> >
> > (set (mem:SI (reg:DI 5 di [84]))
> >      (const_int 65536 [0x10000]))
> >
> > For
> >
> > void
> > foo (short * c)
> > {
> >   c[0] = 0;
> >   c[1] = 1;
> > }
> >
> > it generates
> >
> >         movl    $65536, (%rdi)
> >
> > instead of
> >
> >         movl    .LC0(%rip), %eax
> >         movl    %eax, (%rdi)
> >
> > gcc/
> >
> >         PR target/106022
> >         * config/i386/i386-protos.h (ix86_convert_const_vector_to_integer):
> >         New.
> >         * config/i386/i386.cc (ix86_convert_const_vector_to_integer):
> >         New.
> >         * config/i386/mmx.md (V_16_32_64): New.
> >         (*mov<mode>_imm): New patterns for stores with 16-bit, 32-bit
> >         and 64-bit constant vector.
> >         * config/i386/predicates.md (x86_64_const_vector_operand): New.
> >
> > gcc/testsuite/
> >
> >         PR target/106022
> >         * gcc.target/i386/pr106022-1.c: New test.
> >         * gcc.target/i386/pr106022-2.c: Likewise.
> >         * gcc.target/i386/pr106022-3.c: Likewise.
> >         * gcc.target/i386/pr106022-4.c: Likewise.
>
> OK.

OK to backport to GCC 12 branch?

> Thanks,
> Uros.
>
> > ---
> >  gcc/config/i386/i386-protos.h              |  2 +
> >  gcc/config/i386/i386.cc                    | 47 ++++++++++++++++++++++
> >  gcc/config/i386/mmx.md                     | 37 +++++++++++++++++
> >  gcc/config/i386/predicates.md              | 11 +++++
> >  gcc/testsuite/gcc.target/i386/pr106022-1.c | 13 ++++++
> >  gcc/testsuite/gcc.target/i386/pr106022-2.c | 14 +++++++
> >  gcc/testsuite/gcc.target/i386/pr106022-3.c | 14 +++++++
> >  gcc/testsuite/gcc.target/i386/pr106022-4.c | 14 +++++++
> >  8 files changed, 152 insertions(+)
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr106022-1.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr106022-2.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr106022-3.c
> >  create mode 100644 gcc/testsuite/gcc.target/i386/pr106022-4.c
> >
> > diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
> > index 3596ce81ecf..cf847751ac5 100644
> > --- a/gcc/config/i386/i386-protos.h
> > +++ b/gcc/config/i386/i386-protos.h
> > @@ -122,6 +122,8 @@ extern void ix86_expand_unary_operator (enum rtx_code, machine_mode,
> >                                         rtx[]);
> >  extern rtx ix86_build_const_vector (machine_mode, bool, rtx);
> >  extern rtx ix86_build_signbit_mask (machine_mode, bool, bool);
> > +extern HOST_WIDE_INT ix86_convert_const_vector_to_integer (rtx,
> > +                                                          machine_mode);
> >  extern void ix86_split_convert_uns_si_sse (rtx[]);
> >  extern void ix86_expand_convert_uns_didf_sse (rtx, rtx);
> >  extern void ix86_expand_convert_uns_sixf_sse (rtx, rtx);
> > diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> > index b15b4893bb9..0cfe9962f75 100644
> > --- a/gcc/config/i386/i386.cc
> > +++ b/gcc/config/i386/i386.cc
> > @@ -15723,6 +15723,53 @@ ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
> >    return force_reg (vec_mode, v);
> >  }
> >
> > +/* Return HOST_WIDE_INT for const vector OP in MODE.  */
> > +
> > +HOST_WIDE_INT
> > +ix86_convert_const_vector_to_integer (rtx op, machine_mode mode)
> > +{
> > +  if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
> > +    gcc_unreachable ();
> > +
> > +  int nunits = GET_MODE_NUNITS (mode);
> > +  wide_int val = wi::zero (GET_MODE_BITSIZE (mode));
> > +  machine_mode innermode = GET_MODE_INNER (mode);
> > +  unsigned int innermode_bits = GET_MODE_BITSIZE (innermode);
> > +
> > +  switch (mode)
> > +    {
> > +    case E_V2QImode:
> > +    case E_V4QImode:
> > +    case E_V2HImode:
> > +    case E_V8QImode:
> > +    case E_V4HImode:
> > +    case E_V2SImode:
> > +      for (int i = 0; i < nunits; ++i)
> > +       {
> > +         int v = INTVAL (XVECEXP (op, 0, i));
> > +         wide_int wv = wi::shwi (v, innermode_bits);
> > +         val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
> > +       }
> > +      break;
> > +    case E_V2HFmode:
> > +    case E_V4HFmode:
> > +    case E_V2SFmode:
> > +      for (int i = 0; i < nunits; ++i)
> > +       {
> > +         rtx x = XVECEXP (op, 0, i);
> > +         int v = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (x),
> > +                                 REAL_MODE_FORMAT (innermode));
> > +         wide_int wv = wi::shwi (v, innermode_bits);
> > +         val = wi::insert (val, wv, innermode_bits * i, innermode_bits);
> > +       }
> > +      break;
> > +    default:
> > +      gcc_unreachable ();
> > +    }
> > +
> > +  return val.to_shwi ();
> > +}
> > +
> >  /* Return TRUE or FALSE depending on whether the first SET in INSN
> >     has source and destination with matching CC modes, and that the
> >     CC mode is at least as constrained as REQ_MODE.  */
> > diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
> > index ba53007a35e..3294c1e6274 100644
> > --- a/gcc/config/i386/mmx.md
> > +++ b/gcc/config/i386/mmx.md
> > @@ -69,6 +69,12 @@ (define_mode_iterator VI_16_32 [V4QI V2QI V2HI])
> >  ;; 4-byte and 2-byte QImode vector modes
> >  (define_mode_iterator VI1_16_32 [V4QI V2QI])
> >
> > +;; All 2-byte, 4-byte and 8-byte vector modes with more than 1 element
> > +(define_mode_iterator V_16_32_64
> > +   [V2QI V4QI V2HI V2HF
> > +    (V8QI "TARGET_64BIT") (V4HI "TARGET_64BIT") (V4HF "TARGET_64BIT")
> > +    (V2SI "TARGET_64BIT") (V2SF "TARGET_64BIT")])
> > +
> >  ;; V2S* modes
> >  (define_mode_iterator V2FI [V2SF V2SI])
> >
> > @@ -331,6 +337,37 @@ (define_insn "*mov<mode>_internal"
> >            ]
> >            (symbol_ref "true")))])
> >
> > +;; 16-bit, 32-bit and 64-bit constant vector stores.  After reload,
> > +;; convert them to immediate integer stores.
> > +(define_insn_and_split "*mov<mode>_imm"
> > +  [(set (match_operand:V_16_32_64 0 "memory_operand" "=m")
> > +       (match_operand:V_16_32_64 1 "x86_64_const_vector_operand" "i"))]
> > +  ""
> > +  "#"
> > +  "&& reload_completed"
> > +  [(set (match_dup 0) (match_dup 1))]
> > +{
> > +  HOST_WIDE_INT val = ix86_convert_const_vector_to_integer (operands[1],
> > +                                                           <MODE>mode);
> > +  operands[1] = GEN_INT (val);
> > +  machine_mode mode;
> > +  switch (GET_MODE_SIZE (<MODE>mode))
> > +    {
> > +    case 2:
> > +      mode = HImode;
> > +      break;
> > +    case 4:
> > +      mode = SImode;
> > +      break;
> > +    case 8:
> > +      mode = DImode;
> > +      break;
> > +    default:
> > +      gcc_unreachable ();
> > +    }
> > +  operands[0] = lowpart_subreg (mode, operands[0], <MODE>mode);
> > +})
> > +
> >  ;; For TARGET_64BIT we always round up to 8 bytes.
> >  (define_insn "*push<mode>2_rex64"
> >    [(set (match_operand:V_32 0 "push_operand" "=X,X")
> > diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
> > index 128144f1050..c71c453cceb 100644
> > --- a/gcc/config/i386/predicates.md
> > +++ b/gcc/config/i386/predicates.md
> > @@ -1194,6 +1194,17 @@ (define_predicate "reg_or_const_vector_operand"
> >    (ior (match_operand 0 "register_operand")
> >         (match_code "const_vector")))
> >
> > +;; Return true when OP is CONST_VECTOR which can be converted to a
> > +;; sign extended 32-bit integer.
> > +(define_predicate "x86_64_const_vector_operand"
> > +  (match_code "const_vector")
> > +{
> > +  if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
> > +    return false;
> > +  HOST_WIDE_INT val = ix86_convert_const_vector_to_integer (op, mode);
> > +  return trunc_int_for_mode (val, SImode) == val;
> > +})
> > +
> >  ;; Return true when OP is nonimmediate or standard SSE constant.
> >  (define_predicate "nonimmediate_or_sse_const_operand"
> >    (ior (match_operand 0 "nonimmediate_operand")
> > diff --git a/gcc/testsuite/gcc.target/i386/pr106022-1.c b/gcc/testsuite/gcc.target/i386/pr106022-1.c
> > new file mode 100644
> > index 00000000000..6643b4c30f1
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr106022-1.c
> > @@ -0,0 +1,13 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -march=x86-64" } */
> > +
> > +void
> > +foo (char *c)
> > +{
> > +  c[0] = 0;
> > +  c[1] = 1;
> > +  c[2] = 2;
> > +  c[3] = 3;
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\\$50462976," 1 } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr106022-2.c b/gcc/testsuite/gcc.target/i386/pr106022-2.c
> > new file mode 100644
> > index 00000000000..0e79fb53297
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr106022-2.c
> > @@ -0,0 +1,14 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -march=x86-64" } */
> > +
> > +void
> > +foo (int *c)
> > +{
> > +  c = __builtin_assume_aligned (c, 16);
> > +  c[0] = -1;
> > +  c[1] = -1;
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "movq\[ \\t\]+\[^\n\]*%xmm" 2 { target { ia32 } } } } */
> > +/* { dg-final { scan-assembler-times "movq\[ \\t\]+\\\$-1," 1 { target { ! ia32 } } } } */
> > +/* { dg-final { scan-assembler-not "xmm" { target { ! ia32 } } } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr106022-3.c b/gcc/testsuite/gcc.target/i386/pr106022-3.c
> > new file mode 100644
> > index 00000000000..8b0c2a8f6d8
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr106022-3.c
> > @@ -0,0 +1,14 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -march=x86-64" } */
> > +
> > +void
> > +foo (int *c)
> > +{
> > +  c[0] = 0;
> > +  c[1] = 1;
> > +  c[2] = 2;
> > +  c[3] = 3;
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "movdqa\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> > +/* { dg-final { scan-assembler-times "movups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
> > diff --git a/gcc/testsuite/gcc.target/i386/pr106022-4.c b/gcc/testsuite/gcc.target/i386/pr106022-4.c
> > new file mode 100644
> > index 00000000000..8ecda170af3
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/i386/pr106022-4.c
> > @@ -0,0 +1,14 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O2 -march=x86-64" } */
> > +
> > +void
> > +foo (float *c)
> > +{
> > +  c[0] = 2.3;
> > +  c[1] = 0.0;
> > +}
> > +
> > +/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\\$0x40133333" 1 { target { ia32 } } } } */
> > +/* { dg-final { scan-assembler-times "movl\[ \\t\]+\\\$0x00000000" 1 { target { ia32 } } } } */
> > +/* { dg-final { scan-assembler-times "movq\[ \\t\]+\\\$1075000115," 1 { target { ! ia32 } } } } */
> > +/* { dg-final { scan-assembler-not "xmm" } } */
> > --
> > 2.36.1
> >



-- 
H.J.

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [GCC 12] [PATCH] x86: Support 2/4/8 byte constant vector stores
  2022-07-27 14:24   ` [GCC 12] " H.J. Lu
@ 2022-07-31 17:00     ` Uros Bizjak
  0 siblings, 0 replies; 4+ messages in thread
From: Uros Bizjak @ 2022-07-31 17:00 UTC (permalink / raw)
  To: H.J. Lu; +Cc: Richard Biener, gcc-patches, Noah Goldstein, Hongtao Liu

On Wed, Jul 27, 2022 at 4:24 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Fri, Jul 1, 2022 at 8:31 AM Uros Bizjak <ubizjak@gmail.com> wrote:
> >
> > On Thu, Jun 30, 2022 at 4:50 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > 1. Add a predicate for constant vectors which can be converted to integer
> > > constants suitable for constant integer stores.  For a 8-byte constant
> > > vector, the converted 64-bit integer must be valid for store with 64-bit
> > > immediate, which is a 64-bit integer sign-extended from a 32-bit integer.
> > > 2. Add a new pattern to allow 2-byte, 4-byte and 8-byte constant vector
> > > stores, like
> > >
> > > (set (mem:V2HI (reg:DI 84))
> > >      (const_vector:V2HI [(const_int 0 [0]) (const_int 1 [0x1])]))
> > >
> > > 3. After reload, convert constant vector stores to constant integer
> > > stores, like
> > >
> > > (set (mem:SI (reg:DI 5 di [84]))
> > >      (const_int 65536 [0x10000]))
> > >
> > > For
> > >
> > > void
> > > foo (short * c)
> > > {
> > >   c[0] = 0;
> > >   c[1] = 1;
> > > }
> > >
> > > it generates
> > >
> > >         movl    $65536, (%rdi)
> > >
> > > instead of
> > >
> > >         movl    .LC0(%rip), %eax
> > >         movl    %eax, (%rdi)
> > >
> > > gcc/
> > >
> > >         PR target/106022
> > >         * config/i386/i386-protos.h (ix86_convert_const_vector_to_integer):
> > >         New.
> > >         * config/i386/i386.cc (ix86_convert_const_vector_to_integer):
> > >         New.
> > >         * config/i386/mmx.md (V_16_32_64): New.
> > >         (*mov<mode>_imm): New patterns for stores with 16-bit, 32-bit
> > >         and 64-bit constant vector.
> > >         * config/i386/predicates.md (x86_64_const_vector_operand): New.
> > >
> > > gcc/testsuite/
> > >
> > >         PR target/106022
> > >         * gcc.target/i386/pr106022-1.c: New test.
> > >         * gcc.target/i386/pr106022-2.c: Likewise.
> > >         * gcc.target/i386/pr106022-3.c: Likewise.
> > >         * gcc.target/i386/pr106022-4.c: Likewise.
> >
> > OK.
>
> OK to backport to GCC 12 branch?

Lets keep this in mainline only. It isn't something that makes a lot
of difference.

Uros.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2022-07-31 17:00 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-06-30 14:50 [PATCH] x86: Support 2/4/8 byte constant vector stores H.J. Lu
2022-07-01 15:31 ` Uros Bizjak
2022-07-27 14:24   ` [GCC 12] " H.J. Lu
2022-07-31 17:00     ` Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).