public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-1916] Reimplement packuswb/packusdw with UNSPEC_US_TRUNCATE instead of original us_truncate.
@ 2023-06-19  1:34 hongtao Liu
  0 siblings, 0 replies; only message in thread
From: hongtao Liu @ 2023-06-19  1:34 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:58e61a3ab1c13b6d5b07d86a30cf48a46e0345c8

commit r14-1916-g58e61a3ab1c13b6d5b07d86a30cf48a46e0345c8
Author: liuhongt <hongtao.liu@intel.com>
Date:   Wed Jun 14 10:34:32 2023 +0800

    Reimplement packuswb/packusdw with UNSPEC_US_TRUNCATE instead of original us_truncate.
    
    packuswb/packusdw does unsigned saturation for signed source, but rtl
    us_truncate means does unsigned saturation for unsigned source.
    So for value -1, packuswb will produce 0, but us_truncate produces
    255. The patch reimplement those related patterns and functions with
    UNSPEC_US_TRUNCATE instead of us_truncate.
    
    gcc/ChangeLog:
    
            PR target/110235
            * config/i386/i386-expand.cc (ix86_split_mmx_pack): Use
            UNSPEC_US_TRUNCATE instead of original us_truncate for
            packusdw/packuswb.
            * config/i386/mmx.md (mmx_pack<s_trunsuffix>swb): Substitute
            with ..
            (mmx_packsswb): .. this and ..
            (mmx_packuswb): .. this.
            (mmx_packusdw): Use UNSPEC_US_TRUNCATE instead of original
            us_truncate.
            (s_trunsuffix): Removed code iterator.
            (any_s_truncate): Ditto.
            * config/i386/sse.md (<sse2_avx2>_packuswb<mask_name>): Use
            UNSPEC_US_TRUNCATE instead of original us_truncate.
            (<sse4_1_avx2>_packusdw<mask_name>): Ditto.
            * config/i386/i386.md (UNSPEC_US_TRUNCATE): New unspec_c_enum.

Diff:
---
 gcc/config/i386/i386-expand.cc | 20 ++++++++++++++-----
 gcc/config/i386/i386.md        |  4 ++++
 gcc/config/i386/mmx.md         | 45 ++++++++++++++++++++++++++++--------------
 gcc/config/i386/sse.md         | 20 +++++++++----------
 4 files changed, 59 insertions(+), 30 deletions(-)

diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 98c43c6704c..ad2cd079fcb 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -1021,6 +1021,7 @@ ix86_split_mmx_pack (rtx operands[], enum rtx_code code)
   rtx op0 = operands[0];
   rtx op1 = operands[1];
   rtx op2 = operands[2];
+  rtx src;
 
   machine_mode dmode = GET_MODE (op0);
   machine_mode smode = GET_MODE (op1);
@@ -1044,11 +1045,20 @@ ix86_split_mmx_pack (rtx operands[], enum rtx_code code)
   op1 = lowpart_subreg (sse_smode, op1, GET_MODE (op1));
   op2 = lowpart_subreg (sse_smode, op2, GET_MODE (op2));
 
-  op1 = gen_rtx_fmt_e (code, sse_half_dmode, op1);
-  op2 = gen_rtx_fmt_e (code, sse_half_dmode, op2);
-  rtx insn = gen_rtx_SET (dest, gen_rtx_VEC_CONCAT (sse_dmode,
-						    op1, op2));
-  emit_insn (insn);
+  /* paskusdw/packuswb does unsigned saturation of a signed source
+     which is different from generic us_truncate RTX.  */
+  if (code == US_TRUNCATE)
+    src = gen_rtx_UNSPEC (sse_dmode,
+			  gen_rtvec (2, op1, op2),
+			  UNSPEC_US_TRUNCATE);
+  else
+    {
+      op1 = gen_rtx_fmt_e (code, sse_half_dmode, op1);
+      op2 = gen_rtx_fmt_e (code, sse_half_dmode, op2);
+      src = gen_rtx_VEC_CONCAT (sse_dmode, op1, op2);
+    }
+
+  emit_move_insn (dest, src);
 
   ix86_move_vector_high_sse_to_mmx (op0);
 }
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index bfdd54f54ba..95a6653cd13 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -129,6 +129,10 @@
   UNSPEC_RSQRT
   UNSPEC_PSADBW
 
+  ;; Different from generic us_truncate RTX
+  ;; as it does unsigned saturation of signed source.
+  UNSPEC_US_TRUNCATE
+
   ;; For AVX/AVX512F support
   UNSPEC_SCALEF
   UNSPEC_PCMP
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index 6fbe3909c8b..12b103a06a7 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -3337,27 +3337,43 @@
 ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
-;; Used in signed and unsigned truncations with saturation.
-(define_code_iterator any_s_truncate [ss_truncate us_truncate])
-;; Instruction suffix for truncations with saturation.
-(define_code_attr s_trunsuffix [(ss_truncate "s") (us_truncate "u")])
-
-(define_insn_and_split "mmx_pack<s_trunsuffix>swb"
+(define_insn_and_split "mmx_packsswb"
   [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
 	(vec_concat:V8QI
-	  (any_s_truncate:V4QI
+	  (ss_truncate:V4QI
 	    (match_operand:V4HI 1 "register_operand" "0,0,Yw"))
-	  (any_s_truncate:V4QI
+	  (ss_truncate:V4QI
 	    (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw"))))]
   "TARGET_MMX || TARGET_MMX_WITH_SSE"
   "@
-   pack<s_trunsuffix>swb\t{%2, %0|%0, %2}
+   packsswb\t{%2, %0|%0, %2}
+   #
+   #"
+  "&& reload_completed
+   && SSE_REGNO_P (REGNO (operands[0]))"
+  [(const_int 0)]
+  "ix86_split_mmx_pack (operands, SS_TRUNCATE); DONE;"
+  [(set_attr "mmx_isa" "native,sse_noavx,avx")
+   (set_attr "type" "mmxshft,sselog,sselog")
+   (set_attr "mode" "DI,TI,TI")])
+
+;; This instruction does unsigned saturation of signed source
+;; and is different from generic us_truncate RTX.
+(define_insn_and_split "mmx_packuswb"
+  [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
+	(unspec:V8QI
+	  [(match_operand:V4HI 1 "register_operand" "0,0,Yw")
+	   (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")]
+	  UNSPEC_US_TRUNCATE))]
+  "TARGET_MMX || TARGET_MMX_WITH_SSE"
+  "@
+   packuswb\t{%2, %0|%0, %2}
    #
    #"
   "&& reload_completed
    && SSE_REGNO_P (REGNO (operands[0]))"
   [(const_int 0)]
-  "ix86_split_mmx_pack (operands, <any_s_truncate:CODE>); DONE;"
+  "ix86_split_mmx_pack (operands, US_TRUNCATE); DONE;"
   [(set_attr "mmx_isa" "native,sse_noavx,avx")
    (set_attr "type" "mmxshft,sselog,sselog")
    (set_attr "mode" "DI,TI,TI")])
@@ -3384,11 +3400,10 @@
 
 (define_insn_and_split "mmx_packusdw"
   [(set (match_operand:V4HI 0 "register_operand" "=Yr,*x,Yw")
-	(vec_concat:V4HI
-	  (us_truncate:V2HI
-	    (match_operand:V2SI 1 "register_operand" "0,0,Yw"))
-	  (us_truncate:V2HI
-	    (match_operand:V2SI 2 "register_operand" "Yr,*x,Yw"))))]
+	(unspec:V4HI
+	  [(match_operand:V2SI 1 "register_operand" "0,0,Yw")
+	   (match_operand:V2SI 2 "register_operand" "Yr,*x,Yw")]
+	   UNSPEC_US_TRUNCATE))]
   "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE"
   "#"
   "&& reload_completed"
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 70d7410f7a9..35b46a54130 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -17796,13 +17796,14 @@
    (set_attr "prefix" "orig,<mask_prefix>")
    (set_attr "mode" "<sseinsnmode>")])
 
+;; This instruction does unsigned saturation of signed source
+;; and is different from generic us_truncate RTX.
 (define_insn "<sse2_avx2>_packuswb<mask_name>"
   [(set (match_operand:VI1_AVX512 0 "register_operand" "=x,<v_Yw>")
-	(vec_concat:VI1_AVX512
-	  (us_truncate:<ssehalfvecmode>
-	    (match_operand:<sseunpackmode> 1 "register_operand" "0,<v_Yw>"))
-	  (us_truncate:<ssehalfvecmode>
-	    (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,<v_Yw>m"))))]
+	(unspec:VI1_AVX512
+	  [(match_operand:<sseunpackmode> 1 "register_operand" "0,<v_Yw>")
+	   (match_operand:<sseunpackmode> 2 "vector_operand" "xBm,<v_Yw>m")]
+	  UNSPEC_US_TRUNCATE))]
   "TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
   "@
    packuswb\t{%2, %0|%0, %2}
@@ -21889,11 +21890,10 @@
 
 (define_insn "<sse4_1_avx2>_packusdw<mask_name>"
   [(set (match_operand:VI2_AVX2 0 "register_operand" "=Yr,*x,<v_Yw>")
-	(vec_concat:VI2_AVX2
-	  (us_truncate:<ssehalfvecmode>
-	    (match_operand:<sseunpackmode> 1 "register_operand" "0,0,<v_Yw>"))
-	  (us_truncate:<ssehalfvecmode>
-	    (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,<v_Yw>m"))))]
+	(unspec:VI2_AVX2
+	  [(match_operand:<sseunpackmode> 1 "register_operand" "0,0,<v_Yw>")
+	   (match_operand:<sseunpackmode> 2 "vector_operand" "YrBm,*xBm,<v_Yw>m")]
+	   UNSPEC_US_TRUNCATE))]
   "TARGET_SSE4_1 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
   "@
    packusdw\t{%2, %0|%0, %2}

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-06-19  1:34 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-06-19  1:34 [gcc r14-1916] Reimplement packuswb/packusdw with UNSPEC_US_TRUNCATE instead of original us_truncate hongtao Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).