public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-5558] x86: Fix up x86_{,64_}sh{l,r}d patterns [PR103431]
@ 2021-11-27 12:02 Jakub Jelinek
  0 siblings, 0 replies; only message in thread
From: Jakub Jelinek @ 2021-11-27 12:02 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:f7e4f57f1c7883721b6f5ad48953e10ebfb5a756

commit r12-5558-gf7e4f57f1c7883721b6f5ad48953e10ebfb5a756
Author: Jakub Jelinek <jakub@redhat.com>
Date:   Sat Nov 27 13:02:06 2021 +0100

    x86: Fix up x86_{,64_}sh{l,r}d patterns [PR103431]
    
    The following testcase is miscompiled because the x86_{,64_}sh{l,r}d
    patterns don't properly describe what the instructions do.  One thing
    is left out, in particular that there is initial count &= 63 for
    sh{l,r}dq and initial count &= 31 for sh{l,r}d{l,w}.  And another thing
    not described properly, in particular the behavior when count (after the
    masking) is 0.  The pattern says it is e.g.
    res = (op0 << op2) | (op1 >> (64 - op2))
    but that triggers UB on op1 >> 64.  For op2 0 we actually want
    res = (op0 << op2) | 0
    When constants are propagated to these patterns during RTL optimizations,
    both such problems trigger wrong-code issues.
    This patch represents the patterns as e.g.
    res = (op0 << (op2 & 63)) | (unsigned long long) ((uint128_t) op1 >> (64 - (op2 & 63)))
    so there is both the initial masking and op2 == 0 behavior results in
    zero being ored.
    The patch introduces alternate patterns for constant op2 where
    simplify-rtx.c will fold those expressions into simple numbers,
    and define_insn_and_split pre-reload splitter for how the patterns
    looked before into the new form, so that it can pattern match during
    combine even computations that assumed the shift amount will be in
    the range of 1 .. bitsize-1.
    
    2021-11-27  Jakub Jelinek  <jakub@redhat.com>
    
            PR middle-end/103431
            * config/i386/i386.md (x86_64_shld, x86_shld, x86_64_shrd, x86_shrd):
            Change insn pattern to accurately describe the instructions.
            (*x86_64_shld_1, *x86_shld_1, *x86_64_shrd_1, *x86_shrd_1): New
            define_insn patterns.
            (*x86_64_shld_2, *x86_shld_2, *x86_64_shrd_2, *x86_shrd_2): New
            define_insn_and_split patterns.
            (*ashl<dwi>3_doubleword_mask, *ashl<dwi>3_doubleword_mask_1,
            *<insn><dwi>3_doubleword_mask, *<insn><dwi>3_doubleword_mask_1,
            ix86_rotl<dwi>3_doubleword, ix86_rotr<dwi>3_doubleword): Adjust
            splitters for x86_{,64_}sh{l,r}d pattern changes.
    
            * gcc.dg/pr103431.c: New test.

Diff:
---
 gcc/config/i386/i386.md         | 302 ++++++++++++++++++++++++++++++++++------
 gcc/testsuite/gcc.dg/pr103431.c |  21 +++
 2 files changed, 281 insertions(+), 42 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 68606e57e60..24368c73edc 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -11301,9 +11301,12 @@
   "&& 1"
   [(parallel
      [(set (match_dup 6)
-	   (ior:DWIH (ashift:DWIH (match_dup 6) (match_dup 2))
-		     (lshiftrt:DWIH (match_dup 5)
-		       (minus:QI (match_dup 8) (match_dup 2)))))
+	   (ior:DWIH (ashift:DWIH (match_dup 6)
+		       (and:QI (match_dup 2) (match_dup 8)))
+		     (subreg:DWIH
+		       (lshiftrt:<DWI> (zero_extend:<DWI> (match_dup 5))
+			 (minus:QI (match_dup 9)
+				   (and:QI (match_dup 2) (match_dup 8)))) 0)))
       (clobber (reg:CC FLAGS_REG))])
    (parallel
      [(set (match_dup 4)
@@ -11312,7 +11315,8 @@
 {
   split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);
 
-  operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
+  operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
+  operands[9] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
 
   if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
       != ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
@@ -11342,9 +11346,12 @@
   "&& 1"
   [(parallel
      [(set (match_dup 6)
-	   (ior:DWIH (ashift:DWIH (match_dup 6) (match_dup 2))
-		     (lshiftrt:DWIH (match_dup 5)
-		       (minus:QI (match_dup 8) (match_dup 2)))))
+	   (ior:DWIH (ashift:DWIH (match_dup 6)
+		       (and:QI (match_dup 2) (match_dup 8)))
+		     (subreg:DWIH
+		       (lshiftrt:<DWI> (zero_extend:<DWI> (match_dup 5))
+			 (minus:QI (match_dup 9)
+				   (and:QI (match_dup 2) (match_dup 8)))) 0)))
       (clobber (reg:CC FLAGS_REG))])
    (parallel
      [(set (match_dup 4)
@@ -11353,7 +11360,8 @@
 {
   split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);
 
-  operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
+  operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
+  operands[9] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
 
   if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
       != ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
@@ -11404,9 +11412,14 @@
 (define_insn "x86_64_shld"
   [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
         (ior:DI (ashift:DI (match_dup 0)
-		  (match_operand:QI 2 "nonmemory_operand" "Jc"))
-		(lshiftrt:DI (match_operand:DI 1 "register_operand" "r")
-		  (minus:QI (const_int 64) (match_dup 2)))))
+		  (and:QI (match_operand:QI 2 "nonmemory_operand" "Jc")
+			  (const_int 63)))
+		(subreg:DI
+		  (lshiftrt:TI
+		    (zero_extend:TI
+		      (match_operand:DI 1 "register_operand" "r"))
+		    (minus:QI (const_int 64)
+			      (and:QI (match_dup 2) (const_int 63)))) 0)))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT"
   "shld{q}\t{%s2%1, %0|%0, %1, %2}"
@@ -11417,12 +11430,58 @@
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
+(define_insn "*x86_64_shld_1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
+        (ior:DI (ashift:DI (match_dup 0)
+			   (match_operand:QI 2 "const_0_to_63_operand" "J"))
+		(subreg:DI
+		  (lshiftrt:TI
+		    (zero_extend:TI
+		      (match_operand:DI 1 "register_operand" "r"))
+		    (match_operand:QI 3 "const_0_to_255_operand" "N")) 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && INTVAL (operands[3]) == 64 - INTVAL (operands[2])"
+  "shld{q}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "DI")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "vector")])
+
+(define_insn_and_split "*x86_64_shld_2"
+  [(set (match_operand:DI 0 "nonimmediate_operand")
+	(ior:DI (ashift:DI (match_dup 0)
+			   (match_operand:QI 2 "nonmemory_operand"))
+		(lshiftrt:DI (match_operand:DI 1 "register_operand")
+			     (minus:QI (const_int 64) (match_dup 2)))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 0)
+		   (ior:DI (ashift:DI (match_dup 0)
+				      (and:QI (match_dup 2) (const_int 63)))
+			   (subreg:DI
+			     (lshiftrt:TI
+			       (zero_extend:TI (match_dup 1))
+				 (minus:QI (const_int 64)
+					   (and:QI (match_dup 2)
+						   (const_int 63)))) 0)))
+	      (clobber (reg:CC FLAGS_REG))])])
+
 (define_insn "x86_shld"
   [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
         (ior:SI (ashift:SI (match_dup 0)
-		  (match_operand:QI 2 "nonmemory_operand" "Ic"))
-		(lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
-		  (minus:QI (const_int 32) (match_dup 2)))))
+		  (and:QI (match_operand:QI 2 "nonmemory_operand" "Ic")
+			  (const_int 31)))
+		(subreg:SI
+		  (lshiftrt:DI
+		    (zero_extend:DI
+		      (match_operand:SI 1 "register_operand" "r"))
+		    (minus:QI (const_int 32)
+			      (and:QI (match_dup 2) (const_int 31)))) 0)))
    (clobber (reg:CC FLAGS_REG))]
   ""
   "shld{l}\t{%s2%1, %0|%0, %1, %2}"
@@ -11434,6 +11493,47 @@
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
+(define_insn "*x86_shld_1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
+        (ior:SI (ashift:SI (match_dup 0)
+			   (match_operand:QI 2 "const_0_to_31_operand" "I"))
+		(subreg:SI
+		  (lshiftrt:DI
+		    (zero_extend:DI
+		      (match_operand:SI 1 "register_operand" "r"))
+		    (match_operand:QI 3 "const_0_to_63_operand" "J")) 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "INTVAL (operands[3]) == 32 - INTVAL (operands[2])"
+  "shld{l}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "SI")
+   (set_attr "pent_pair" "np")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "vector")])
+
+(define_insn_and_split "*x86_shld_2"
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+	(ior:SI (ashift:SI (match_dup 0)
+			   (match_operand:QI 2 "nonmemory_operand"))
+		(lshiftrt:SI (match_operand:SI 1 "register_operand")
+			     (minus:QI (const_int 32) (match_dup 2)))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 0)
+		   (ior:SI (ashift:SI (match_dup 0)
+				      (and:QI (match_dup 2) (const_int 31)))
+			   (subreg:SI
+			     (lshiftrt:DI
+			       (zero_extend:DI (match_dup 1))
+				 (minus:QI (const_int 32)
+					   (and:QI (match_dup 2)
+						   (const_int 31)))) 0)))
+	      (clobber (reg:CC FLAGS_REG))])])
+
 (define_expand "@x86_shift<mode>_adj_1"
   [(set (reg:CCZ FLAGS_REG)
 	(compare:CCZ (and:QI (match_operand:QI 2 "register_operand")
@@ -12080,9 +12180,12 @@
   "&& 1"
   [(parallel
      [(set (match_dup 4)
-	   (ior:DWIH (lshiftrt:DWIH (match_dup 4) (match_dup 2))
-		     (ashift:DWIH (match_dup 7)
-		       (minus:QI (match_dup 8) (match_dup 2)))))
+	   (ior:DWIH (lshiftrt:DWIH (match_dup 4)
+		       (and:QI (match_dup 2) (match_dup 8)))
+		     (subreg:DWIH
+		       (ashift:<DWI> (zero_extend:<DWI> (match_dup 7))
+			 (minus:QI (match_dup 9)
+				   (and:QI (match_dup 2) (match_dup 8)))) 0)))
       (clobber (reg:CC FLAGS_REG))])
    (parallel
      [(set (match_dup 6)
@@ -12091,7 +12194,8 @@
 {
   split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);
 
-  operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
+  operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
+  operands[9] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
 
   if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
       != ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
@@ -12121,9 +12225,12 @@
   "&& 1"
   [(parallel
      [(set (match_dup 4)
-	   (ior:DWIH (lshiftrt:DWIH (match_dup 4) (match_dup 2))
-		     (ashift:DWIH (match_dup 7)
-		       (minus:QI (match_dup 8) (match_dup 2)))))
+	   (ior:DWIH (lshiftrt:DWIH (match_dup 4)
+		       (and:QI (match_dup 2) (match_dup 8)))
+		     (subreg:DWIH
+		       (ashift:<DWI> (zero_extend:<DWI> (match_dup 7))
+			 (minus:QI (match_dup 9)
+				   (and:QI (match_dup 2) (match_dup 8)))) 0)))
       (clobber (reg:CC FLAGS_REG))])
    (parallel
      [(set (match_dup 6)
@@ -12132,7 +12239,8 @@
 {
   split_double_mode (<DWI>mode, &operands[0], 2, &operands[4], &operands[6]);
 
-  operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
+  operands[8] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT - 1);
+  operands[9] = GEN_INT (<MODE_SIZE> * BITS_PER_UNIT);
 
   if ((INTVAL (operands[3]) & ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
       != ((<MODE_SIZE> * BITS_PER_UNIT) - 1))
@@ -12177,9 +12285,14 @@
 (define_insn "x86_64_shrd"
   [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
         (ior:DI (lshiftrt:DI (match_dup 0)
-		  (match_operand:QI 2 "nonmemory_operand" "Jc"))
-		(ashift:DI (match_operand:DI 1 "register_operand" "r")
-		  (minus:QI (const_int 64) (match_dup 2)))))
+		  (and:QI (match_operand:QI 2 "nonmemory_operand" "Jc")
+			  (const_int 63)))
+		(subreg:DI
+		  (ashift:TI
+		    (zero_extend:TI
+		      (match_operand:DI 1 "register_operand" "r"))
+		    (minus:QI (const_int 64)
+			      (and:QI (match_dup 2) (const_int 63)))) 0)))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_64BIT"
   "shrd{q}\t{%s2%1, %0|%0, %1, %2}"
@@ -12190,12 +12303,58 @@
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
+(define_insn "*x86_64_shrd_1"
+  [(set (match_operand:DI 0 "nonimmediate_operand" "+r*m")
+        (ior:DI (lshiftrt:DI (match_dup 0)
+			     (match_operand:QI 2 "const_0_to_63_operand" "J"))
+		(subreg:DI
+		  (ashift:TI
+		    (zero_extend:TI
+		      (match_operand:DI 1 "register_operand" "r"))
+		    (match_operand:QI 3 "const_0_to_255_operand" "N")) 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT
+   && INTVAL (operands[3]) == 64 - INTVAL (operands[2])"
+  "shrd{q}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "DI")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "vector")])
+
+(define_insn_and_split "*x86_64_shrd_2"
+  [(set (match_operand:DI 0 "nonimmediate_operand")
+	(ior:DI (lshiftrt:DI (match_dup 0)
+			     (match_operand:QI 2 "nonmemory_operand"))
+		(ashift:DI (match_operand:DI 1 "register_operand")
+			   (minus:QI (const_int 64) (match_dup 2)))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 0)
+		   (ior:DI (lshiftrt:DI (match_dup 0)
+					(and:QI (match_dup 2) (const_int 63)))
+			   (subreg:DI
+			     (ashift:TI
+			       (zero_extend:TI (match_dup 1))
+				 (minus:QI (const_int 64)
+					   (and:QI (match_dup 2)
+						   (const_int 63)))) 0)))
+	      (clobber (reg:CC FLAGS_REG))])])
+
 (define_insn "x86_shrd"
   [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
         (ior:SI (lshiftrt:SI (match_dup 0)
-		  (match_operand:QI 2 "nonmemory_operand" "Ic"))
-		(ashift:SI (match_operand:SI 1 "register_operand" "r")
-		  (minus:QI (const_int 32) (match_dup 2)))))
+		  (and:QI (match_operand:QI 2 "nonmemory_operand" "Ic")
+			  (const_int 31)))
+		(subreg:SI
+		  (ashift:DI
+		    (zero_extend:DI
+		      (match_operand:SI 1 "register_operand" "r"))
+		    (minus:QI (const_int 32)
+			      (and:QI (match_dup 2) (const_int 31)))) 0)))
    (clobber (reg:CC FLAGS_REG))]
   ""
   "shrd{l}\t{%s2%1, %0|%0, %1, %2}"
@@ -12207,6 +12366,47 @@
    (set_attr "amdfam10_decode" "vector")
    (set_attr "bdver1_decode" "vector")])
 
+(define_insn "*x86_shrd_1"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
+        (ior:SI (lshiftrt:SI (match_dup 0)
+			     (match_operand:QI 2 "const_0_to_31_operand" "I"))
+		(subreg:SI
+		  (ashift:DI
+		    (zero_extend:DI
+		      (match_operand:SI 1 "register_operand" "r"))
+		    (match_operand:QI 3 "const_0_to_63_operand" "J")) 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "INTVAL (operands[3]) == 32 - INTVAL (operands[2])"
+  "shrd{l}\t{%2, %1, %0|%0, %1, %2}"
+  [(set_attr "type" "ishift")
+   (set_attr "prefix_0f" "1")
+   (set_attr "mode" "SI")
+   (set_attr "pent_pair" "np")
+   (set_attr "athlon_decode" "vector")
+   (set_attr "amdfam10_decode" "vector")
+   (set_attr "bdver1_decode" "vector")])
+
+(define_insn_and_split "*x86_shrd_2"
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+	(ior:SI (lshiftrt:SI (match_dup 0)
+			     (match_operand:QI 2 "nonmemory_operand"))
+		(ashift:SI (match_operand:SI 1 "register_operand")
+			   (minus:QI (const_int 32) (match_dup 2)))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 0)
+		   (ior:SI (lshiftrt:SI (match_dup 0)
+					(and:QI (match_dup 2) (const_int 31)))
+			   (subreg:SI
+			     (ashift:DI
+			       (zero_extend:DI (match_dup 1))
+				 (minus:QI (const_int 32)
+					   (and:QI (match_dup 2)
+						   (const_int 31)))) 0)))
+	      (clobber (reg:CC FLAGS_REG))])])
+
 ;; Base name for insn mnemonic.
 (define_mode_attr cvt_mnemonic
   [(SI "{cltd|cdq}") (DI "{cqto|cqo}")])
@@ -12784,18 +12984,27 @@
  [(set (match_dup 3) (match_dup 4))
   (parallel
    [(set (match_dup 4)
-	 (ior:DWIH (ashift:DWIH (match_dup 4) (match_dup 2))
-		   (lshiftrt:DWIH (match_dup 5)
-				  (minus:QI (match_dup 6) (match_dup 2)))))
+	 (ior:DWIH (ashift:DWIH (match_dup 4)
+				(and:QI (match_dup 2) (match_dup 6)))
+		   (subreg:DWIH
+		     (lshiftrt:<DWI> (zero_extend:<DWI> (match_dup 5))
+				     (minus:QI (match_dup 7)
+					       (and:QI (match_dup 2)
+						       (match_dup 6)))) 0)))
     (clobber (reg:CC FLAGS_REG))])
   (parallel
    [(set (match_dup 5)
-	 (ior:DWIH (ashift:DWIH (match_dup 5) (match_dup 2))
-		   (lshiftrt:DWIH (match_dup 3)
-				  (minus:QI (match_dup 6) (match_dup 2)))))
+	 (ior:DWIH (ashift:DWIH (match_dup 5)
+				(and:QI (match_dup 2) (match_dup 6)))
+		   (subreg:DWIH
+		     (lshiftrt:<DWI> (zero_extend:<DWI> (match_dup 3))
+				     (minus:QI (match_dup 7)
+					       (and:QI (match_dup 2)
+						       (match_dup 6)))) 0)))
     (clobber (reg:CC FLAGS_REG))])]
 {
-  operands[6] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
+  operands[6] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - 1);
+  operands[7] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
 
   split_double_mode (<DWI>mode, &operands[0], 1, &operands[4], &operands[5]);
 })
@@ -12812,18 +13021,27 @@
  [(set (match_dup 3) (match_dup 4))
   (parallel
    [(set (match_dup 4)
-	 (ior:DWIH (lshiftrt:DWIH (match_dup 4) (match_dup 2))
-		   (ashift:DWIH (match_dup 5)
-				(minus:QI (match_dup 6) (match_dup 2)))))
+	 (ior:DWIH (lshiftrt:DWIH (match_dup 4)
+				  (and:QI (match_dup 2) (match_dup 6)))
+		   (subreg:DWIH
+		     (ashift:<DWI> (zero_extend:<DWI> (match_dup 5))
+				   (minus:QI (match_dup 7)
+					     (and:QI (match_dup 2)
+						     (match_dup 6)))) 0)))
     (clobber (reg:CC FLAGS_REG))])
   (parallel
    [(set (match_dup 5)
-	 (ior:DWIH (lshiftrt:DWIH (match_dup 5) (match_dup 2))
-		   (ashift:DWIH (match_dup 3)
-				(minus:QI (match_dup 6) (match_dup 2)))))
+	 (ior:DWIH (lshiftrt:DWIH (match_dup 5)
+				  (and:QI (match_dup 2) (match_dup 6)))
+		   (subreg:DWIH
+		     (ashift:<DWI> (zero_extend:<DWI> (match_dup 3))
+				   (minus:QI (match_dup 7)
+					     (and:QI (match_dup 2)
+						     (match_dup 6)))) 0)))
     (clobber (reg:CC FLAGS_REG))])]
 {
-  operands[6] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
+  operands[6] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode) - 1);
+  operands[7] = GEN_INT (GET_MODE_BITSIZE (<MODE>mode));
 
   split_double_mode (<DWI>mode, &operands[0], 1, &operands[4], &operands[5]);
 })
diff --git a/gcc/testsuite/gcc.dg/pr103431.c b/gcc/testsuite/gcc.dg/pr103431.c
new file mode 100644
index 00000000000..09f224a3903
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr103431.c
@@ -0,0 +1,21 @@
+/* PR middle-end/103431 */
+/* { dg-do run { target int128 } } */
+/* { dg-options "-O -fno-tree-bit-ccp -fno-tree-dominator-opts" } */
+
+__attribute__((noipa))
+void foo (unsigned short a)
+{
+  __uint128_t b = 5;
+  int size = __SIZEOF_INT128__ * __CHAR_BIT__ - 1;
+  a /= 0xfffffffd;
+  __uint128_t c = (b << (a & size) | b >> (-(a & size) & size));
+  if (c != 5)
+    __builtin_abort ();
+}
+
+int
+main ()
+{
+  foo (0);
+  return 0;
+}


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-11-27 12:02 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-27 12:02 [gcc r12-5558] x86: Fix up x86_{,64_}sh{l,r}d patterns [PR103431] Jakub Jelinek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).