public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-2684] i386: Double-word sign-extension missed-optimization [PR110717]
@ 2023-07-20 18:56 Uros Bizjak
  0 siblings, 0 replies; only message in thread
From: Uros Bizjak @ 2023-07-20 18:56 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:b50a851eef4b70aabf28fa875d9b2a302d17b66a

commit r14-2684-gb50a851eef4b70aabf28fa875d9b2a302d17b66a
Author: Uros Bizjak <ubizjak@gmail.com>
Date:   Thu Jul 20 20:54:51 2023 +0200

    i386: Double-word sign-extension missed-optimization [PR110717]
    
    When sign-extending the value in a double-word register pair using shift and
    ashiftrt sequence with the same count immediate value less than word width,
    there is no need to shift the lower word of the value. The sign-extension
    could be limited to the upper word, but we uselessly shift the lower word
    with it as well:
            movq    %rdi, %rax
            movq    %rsi, %rdx
            shldq   $59, %rdi, %rdx
            salq    $59, %rax
            shrdq   $59, %rdx, %rax
            sarq    $59, %rdx
            ret
    for -m64 and
            movl    4(%esp), %eax
            movl    8(%esp), %edx
            shldl   $27, %eax, %edx
            sall    $27, %eax
            shrdl   $27, %edx, %eax
            sarl    $27, %edx
            ret
    for -m32.
    
    The patch introduces a new post-reload splitter to provide the combined
    ASHIFTRT/SHIFT instruction pattern.  The instruction is split to a sequence
    of SAL and SAR insns with the same count immediate operand:
            movq    %rsi, %rdx
            movq    %rdi, %rax
            salq    $59, %rdx
            sarq    $59, %rdx
            ret
    
    Some complication is required to properly handle STV transform, where we
    emit a sequence with DImode PSLLQ and PSRAQ insns for 32-bit AVX512VL
    targets when profitable.
    
    The patch also fixes a small oversight and enables STV transform of SImode
    ASHIFTRT to PSRAD also for SSE2 targets.
    
            PR target/110717
    
    gcc/ChangeLog:
    
            * config/i386/i386-features.cc
            (general_scalar_chain::compute_convert_gain): Calculate gain
            for extend higpart case.
            (general_scalar_chain::convert_op): Handle
            ASHIFTRT/ASHIFT combined RTX.
            (general_scalar_to_vector_candidate_p): Enable ASHIFTRT for
            SImode for SSE2 targets.  Handle ASHIFTRT/ASHIFT combined RTX.
            * config/i386/i386.md (*extend<dwi>2_doubleword_highpart):
            New define_insn_and_split pattern.
            (*extendv2di2_highpart_stv): Ditto.
    
    gcc/testsuite/ChangeLog:
    
            * gcc.target/i386/pr110717.c: New test.

Diff:
---
 gcc/config/i386/i386-features.cc         | 16 +++++++++++++--
 gcc/config/i386/i386.md                  | 35 ++++++++++++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr110717.c | 21 +++++++++++++++++++
 3 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index 4d69251d4f5..f801a8fc94a 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -572,6 +572,9 @@ general_scalar_chain::compute_convert_gain ()
 	      {
 		if (INTVAL (XEXP (src, 1)) >= 32)
 		  igain += ix86_cost->add;
+		/* Gain for extend highpart case.  */
+		else if (GET_CODE (XEXP (src, 0)) == ASHIFT)
+		  igain += ix86_cost->shift_const - ix86_cost->sse_op;
 		else
 		  igain += ix86_cost->shift_const;
 	      }
@@ -951,7 +954,8 @@ general_scalar_chain::convert_op (rtx *op, rtx_insn *insn)
 {
   *op = copy_rtx_if_shared (*op);
 
-  if (GET_CODE (*op) == NOT)
+  if (GET_CODE (*op) == NOT
+      || GET_CODE (*op) == ASHIFT)
     {
       convert_op (&XEXP (*op, 0), insn);
       PUT_MODE (*op, vmode);
@@ -2120,7 +2124,7 @@ general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode)
   switch (GET_CODE (src))
     {
     case ASHIFTRT:
-      if (!TARGET_AVX512VL)
+      if (mode == DImode && !TARGET_AVX512VL)
 	return false;
       /* FALLTHRU */
 
@@ -2131,6 +2135,14 @@ general_scalar_to_vector_candidate_p (rtx_insn *insn, enum machine_mode mode)
       if (!CONST_INT_P (XEXP (src, 1))
 	  || !IN_RANGE (INTVAL (XEXP (src, 1)), 0, GET_MODE_BITSIZE (mode)-1))
 	return false;
+
+      /* Check for extend highpart case.  */
+      if (mode != DImode
+	  || GET_CODE (src) != ASHIFTRT
+	  || GET_CODE (XEXP (src, 0)) != ASHIFT)
+	break;
+
+      src = XEXP (src, 0);
       break;
 
     case SMAX:
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 8c54aa5e981..4db210cc795 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -15292,6 +15292,41 @@
        (const_string "0")
        (const_string "*")))
    (set_attr "mode" "QI")])
+
+(define_insn_and_split "*extend<dwi>2_doubleword_highpart"
+  [(set (match_operand:<DWI> 0 "register_operand" "=r")
+	(ashiftrt:<DWI>
+	  (ashift:<DWI> (match_operand:<DWI> 1 "nonimmediate_operand" "0")
+		        (match_operand:QI 2 "const_int_operand"))
+	  (match_operand:QI 3 "const_int_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "INTVAL (operands[2]) == INTVAL (operands[3])
+   && UINTVAL (operands[2]) < <MODE_SIZE> * BITS_PER_UNIT"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (match_dup 4)
+		   (ashift:DWIH (match_dup 4) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])
+   (parallel [(set (match_dup 4)
+		   (ashiftrt:DWIH (match_dup 4) (match_dup 2)))
+	      (clobber (reg:CC FLAGS_REG))])]
+  "split_double_mode (<DWI>mode, &operands[0], 1, &operands[0], &operands[4]);")
+
+(define_insn_and_split "*extendv2di2_highpart_stv"
+  [(set (match_operand:V2DI 0 "register_operand" "=v")
+	(ashiftrt:V2DI
+	  (ashift:V2DI (match_operand:V2DI 1 "nonimmediate_operand" "vm")
+		       (match_operand:QI 2 "const_int_operand"))
+	  (match_operand:QI 3 "const_int_operand")))]
+  "!TARGET_64BIT && TARGET_STV && TARGET_AVX512VL
+   && INTVAL (operands[2]) == INTVAL (operands[3])
+   && UINTVAL (operands[2]) < 32"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(ashift:V2DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0)
+	(ashiftrt:V2DI (match_dup 0) (match_dup 2)))])
 \f
 ;; Rotate instructions
 
diff --git a/gcc/testsuite/gcc.target/i386/pr110717.c b/gcc/testsuite/gcc.target/i386/pr110717.c
new file mode 100644
index 00000000000..233f0eae5b5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr110717.c
@@ -0,0 +1,21 @@
+/* PR target/110717 */
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+#ifdef __SIZEOF_INT128__
+unsigned __int128
+foo (unsigned __int128 x)
+{
+  x <<= 59;
+  return ((__int128) x) >> 59;
+}
+#else
+unsigned long long
+foo (unsigned long long x)
+{
+  x <<= 27;
+  return ((long long) x) >> 27;
+}
+#endif
+
+/* { dg-final { scan-assembler-not "sh\[lr\]d" } } */

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-07-20 18:56 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-07-20 18:56 [gcc r14-2684] i386: Double-word sign-extension missed-optimization [PR110717] Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).