public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
From: Uros Bizjak <uros@gcc.gnu.org>
To: gcc-cvs@gcc.gnu.org
Subject: [gcc r12-1197] i386: Add insert and extract patterns for 4-byte vectors [PR100637]
Date: Thu,  3 Jun 2021 18:06:29 +0000 (GMT)	[thread overview]
Message-ID: <20210603180629.C1AE8386FC25@sourceware.org> (raw)

https://gcc.gnu.org/g:5883e567564c5b3caecba0c13e8a360a14cdc846

commit r12-1197-g5883e567564c5b3caecba0c13e8a360a14cdc846
Author: Uros Bizjak <ubizjak@gmail.com>
Date:   Thu Jun 3 20:05:31 2021 +0200

    i386: Add insert and extract patterns for 4-byte vectors [PR100637]
    
    The patch introduces insert and extract patterns for 4-byte vectors.
    It effectively only emits PINSR and PEXTR instructions when available,
    otherwise falls back to generic code that emulates these instructions
    via inserts, extracts, logic operations and shifts in integer registers.
    
    Please note that generic fallback produces better code than the current
    approach of constructing new vector in memory (due to store forwarding stall)
    so also enable QImode 8-byte vector inserts only with TARGET_SSE4_1.
    
    2021-06-03  Uroš Bizjak  <ubizjak@gmail.com>
    
    gcc/
            PR target/100637
            * config/i386/i386-expand.c (ix86_expand_vector_set):
            Handle V2HI and V4QI modes.
            (ix86_expand_vector_extract): Ditto.
            * config/i386/mmx.md (*pinsrw): New insn pattern.
            (*pinsrb): Ditto.
            (*pextrw): Ditto.
            (*pextrw_zext): Ditto.
            (*pextrb): Ditto.
            (*pextrb_zext): Ditto.
            (vec_setv2hi): New expander.
            (vec_extractv2hihi): Ditto.
            (vec_setv4qi): Ditto.
            (vec_extractv4qiqi): Ditto.
    
            (vec_setv8qi): Enable only for TARGET_SSE4_1.
            (vec_extractv8qiqi): Ditto.
    
    gcc/testsuite/
    
            PR target/100637
            * gcc.target/i386/vperm-v2hi.c: New test.
            * gcc.target/i386/vperm-v4qi.c: Ditto.

Diff:
---
 gcc/config/i386/i386-expand.c              |   6 +
 gcc/config/i386/mmx.md                     | 176 ++++++++++++++++++++++++++++-
 gcc/testsuite/gcc.target/i386/vperm-v2hi.c |  41 +++++++
 gcc/testsuite/gcc.target/i386/vperm-v4qi.c |  47 ++++++++
 4 files changed, 268 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 4185f58eed5..eb7cdb0c14f 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -14968,6 +14968,7 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
       return;
 
     case E_V8HImode:
+    case E_V2HImode:
       use_vec_merge = TARGET_SSE2;
       break;
     case E_V4HImode:
@@ -14975,6 +14976,7 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
       break;
 
     case E_V16QImode:
+    case E_V4QImode:
       use_vec_merge = TARGET_SSE4_1;
       break;
 
@@ -15274,6 +15276,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
       break;
 
     case E_V8HImode:
+    case E_V2HImode:
       use_vec_extr = TARGET_SSE2;
       break;
     case E_V4HImode:
@@ -15294,6 +15297,9 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
 	  return;
 	}
       break;
+    case E_V4QImode:
+      use_vec_extr = TARGET_SSE4_1;
+      break;
 
     case E_V8SFmode:
       if (TARGET_AVX)
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index f39e062ddfc..914e5e91e90 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -3092,7 +3092,7 @@
   [(match_operand:V8QI 0 "register_operand")
    (match_operand:QI 1 "register_operand")
    (match_operand 2 "const_int_operand")]
-  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
 {
   ix86_expand_vector_set (TARGET_MMX_WITH_SSE, operands[0], operands[1],
 			  INTVAL (operands[2]));
@@ -3103,7 +3103,7 @@
   [(match_operand:QI 0 "register_operand")
    (match_operand:V8QI 1 "register_operand")
    (match_operand 2 "const_int_operand")]
-  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
 {
   ix86_expand_vector_extract (TARGET_MMX_WITH_SSE, operands[0],
 			      operands[1], INTVAL (operands[2]));
@@ -3120,6 +3120,178 @@
   DONE;
 })
 
+(define_insn "*pinsrw"
+  [(set (match_operand:V2HI 0 "register_operand" "=x,YW")
+        (vec_merge:V2HI
+          (vec_duplicate:V2HI
+            (match_operand:HI 2 "nonimmediate_operand" "rm,rm"))
+	  (match_operand:V2HI 1 "register_operand" "0,YW")
+          (match_operand:SI 3 "const_int_operand")))]
+  "TARGET_SSE2
+   && ((unsigned) exact_log2 (INTVAL (operands[3]))
+       < GET_MODE_NUNITS (V2HImode))"
+{
+  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+  switch (which_alternative)
+    {
+    case 1:
+      if (MEM_P (operands[2]))
+	return "vpinsrw\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+      else
+	return "vpinsrw\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
+    case 0:
+      if (MEM_P (operands[2]))
+	return "pinsrw\t{%3, %2, %0|%0, %2, %3}";
+      else
+	return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sselog")
+   (set_attr "length_immediate" "1")
+   (set_attr "mode" "TI")])
+
+(define_insn "*pinsrb"
+  [(set (match_operand:V4QI 0 "register_operand" "=x,YW")
+        (vec_merge:V4QI
+          (vec_duplicate:V4QI
+            (match_operand:QI 2 "nonimmediate_operand" "rm,rm"))
+	  (match_operand:V4QI 1 "register_operand" "0,YW")
+          (match_operand:SI 3 "const_int_operand")))]
+  "TARGET_SSE4_1
+   && ((unsigned) exact_log2 (INTVAL (operands[3]))
+       < GET_MODE_NUNITS (V4QImode))"
+{
+  operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
+  switch (which_alternative)
+    {
+    case 1:
+      if (MEM_P (operands[2]))
+	return "vpinsrb\t{%3, %2, %1, %0|%0, %1, %2, %3}";
+      else
+	return "vpinsrb\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
+    case 0:
+      if (MEM_P (operands[2]))
+	return "pinsrb\t{%3, %2, %0|%0, %2, %3}";
+      else
+	return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sselog")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "orig,vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*pextrw"
+  [(set (match_operand:HI 0 "register_sse4nonimm_operand" "=r,m")
+	(vec_select:HI
+	  (match_operand:V2HI 1 "register_operand" "YW,YW")
+	  (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n,n")])))]
+  "TARGET_SSE2"
+  "@
+   %vpextrw\t{%2, %1, %k0|%k0, %1, %2}
+   %vpextrw\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "isa" "*,sse4")
+   (set_attr "type" "sselog1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*pextrw_zext"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+	(zero_extend:SWI48
+	  (vec_select:HI
+	    (match_operand:V2HI 1 "register_operand" "YW")
+	    (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")]))))]
+  "TARGET_SSE2"
+  "%vpextrw\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*pextrb"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,m")
+	(vec_select:QI
+	  (match_operand:V4QI 1 "register_operand" "YW,YW")
+	  (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n,n")])))]
+  "TARGET_SSE4_1"
+  "@
+   %vpextrb\t{%2, %1, %k0|%k0, %1, %2}
+   %vpextrb\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_insn "*pextrb_zext"
+  [(set (match_operand:SWI248 0 "register_operand" "=r")
+	(zero_extend:SWI248
+	  (vec_select:QI
+	    (match_operand:V4QI 1 "register_operand" "YW")
+	    (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")]))))]
+  "TARGET_SSE4_1"
+  "%vpextrb\t{%2, %1, %k0|%k0, %1, %2}"
+  [(set_attr "type" "sselog1")
+   (set_attr "prefix_data16" "1")
+   (set_attr "prefix_extra" "1")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "maybe_vex")
+   (set_attr "mode" "TI")])
+
+(define_expand "vec_setv2hi"
+  [(match_operand:V2HI 0 "register_operand")
+   (match_operand:HI 1 "register_operand")
+   (match_operand 2 "const_int_operand")]
+  "TARGET_SSE2"
+{
+  ix86_expand_vector_set (false, operands[0], operands[1],
+			  INTVAL (operands[2]));
+  DONE;
+})
+
+(define_expand "vec_extractv2hihi"
+  [(match_operand:HI 0 "register_operand")
+   (match_operand:V2HI 1 "register_operand")
+   (match_operand 2 "const_int_operand")]
+  "TARGET_SSE2"
+{
+  ix86_expand_vector_extract (false, operands[0],
+			      operands[1], INTVAL (operands[2]));
+  DONE;
+})
+
+(define_expand "vec_setv4qi"
+  [(match_operand:V4QI 0 "register_operand")
+   (match_operand:QI 1 "register_operand")
+   (match_operand 2 "const_int_operand")]
+  "TARGET_SSE4_1"
+{
+  ix86_expand_vector_set (false, operands[0], operands[1],
+			  INTVAL (operands[2]));
+  DONE;
+})
+
+(define_expand "vec_extractv4qiqi"
+  [(match_operand:QI 0 "register_operand")
+   (match_operand:V4QI 1 "register_operand")
+   (match_operand 2 "const_int_operand")]
+  "TARGET_SSE4_1"
+{
+  ix86_expand_vector_extract (false, operands[0],
+			      operands[1], INTVAL (operands[2]));
+  DONE;
+})
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Miscellaneous
diff --git a/gcc/testsuite/gcc.target/i386/vperm-v2hi.c b/gcc/testsuite/gcc.target/i386/vperm-v2hi.c
new file mode 100644
index 00000000000..0af94f2c6b1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vperm-v2hi.c
@@ -0,0 +1,41 @@
+/* { dg-do run } */
+/* { dg-options "-O -msse2" } */
+/* { dg-require-effective-target sse2 } */
+
+#include "isa-check.h"
+#include "sse-os-support.h"
+
+typedef short S;
+typedef short V __attribute__((vector_size(4)));
+typedef short IV __attribute__((vector_size(4)));
+typedef union { S s[2]; V v; } U;
+
+static U i[2], b, c;
+
+extern int memcmp (const void *, const void *, __SIZE_TYPE__);
+#define assert(T) ((T) || (__builtin_trap (), 0))
+
+#define TEST(E0, E1) \
+  b.v = __builtin_shuffle (i[0].v, i[1].v, (IV){E0, E1}); \
+  c.s[0] = i[0].s[E0]; \
+  c.s[1] = i[0].s[E1]; \
+  __asm__("" : : : "memory"); \
+  assert (memcmp (&b, &c, sizeof(c)) == 0);
+
+#include "vperm-2-2.inc"
+
+int main()
+{
+  check_isa ();
+
+  if (!sse_os_support ())
+    exit (0);
+
+  i[0].s[0] = 0;
+  i[0].s[1] = 1;
+  i[0].s[2] = 2;
+  i[0].s[3] = 3;
+
+  check();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/vperm-v4qi.c b/gcc/testsuite/gcc.target/i386/vperm-v4qi.c
new file mode 100644
index 00000000000..57fa547b9d5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/vperm-v4qi.c
@@ -0,0 +1,47 @@
+/* { dg-do run } */
+/* { dg-options "-O -msse2" } */
+/* { dg-require-effective-target sse2 } */
+
+#include "isa-check.h"
+#include "sse-os-support.h"
+
+typedef char S;
+typedef char V __attribute__((vector_size(4)));
+typedef char IV __attribute__((vector_size(4)));
+typedef union { S s[4]; V v; } U;
+
+static U i[2], b, c;
+
+extern int memcmp (const void *, const void *, __SIZE_TYPE__);
+#define assert(T) ((T) || (__builtin_trap (), 0))
+
+#define TEST(E0, E1, E2, E3) \
+  b.v = __builtin_shuffle (i[0].v, i[1].v, (IV){E0, E1, E2, E3}); \
+  c.s[0] = i[0].s[E0]; \
+  c.s[1] = i[0].s[E1]; \
+  c.s[2] = i[0].s[E2]; \
+  c.s[3] = i[0].s[E3]; \
+  __asm__("" : : : "memory"); \
+  assert (memcmp (&b, &c, sizeof(c)) == 0);
+
+#include "vperm-4-2.inc"
+
+int main()
+{
+  check_isa ();
+
+  if (!sse_os_support ())
+    exit (0);
+
+  i[0].s[0] = 0;
+  i[0].s[1] = 1;
+  i[0].s[2] = 2;
+  i[0].s[3] = 3;
+  i[0].s[4] = 4;
+  i[0].s[5] = 5;
+  i[0].s[6] = 6;
+  i[0].s[7] = 7;
+
+  check();
+  return 0;
+}


                 reply	other threads:[~2021-06-03 18:06 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210603180629.C1AE8386FC25@sourceware.org \
    --to=uros@gcc.gnu.org \
    --cc=gcc-cvs@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).