public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r13-4914] Provide zero_extend versions/variants of several patterns on x86.
@ 2022-12-28 19:32 Roger Sayle
  0 siblings, 0 replies; only message in thread
From: Roger Sayle @ 2022-12-28 19:32 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:4003e470a7ff8d8cfc94d28f193033fd53cbd1cf

commit r13-4914-g4003e470a7ff8d8cfc94d28f193033fd53cbd1cf
Author: Roger Sayle <roger@nextmovesoftware.com>
Date:   Wed Dec 28 19:30:17 2022 +0000

    Provide zero_extend versions/variants of several patterns on x86.
    
    The middle-end doesn't have a preferred canonical form for expressing
    zero-extension, sometimes using an AND, sometimes pairs of SHIFTs,
    and sometimes using zero_extend.  Pending changes to RTL simplification
    will/may alter some of these representations, so a few additional
    patterns are required to recognize these alternate representations
    and avoid any testsuite regressions.
    
    As an example, *popcountsi2_zext is currently represented as:
      [(set (match_operand:DI 0 "register_operand" "=r")
            (and:DI
              (subreg:DI
                (popcount:SI
                  (match_operand:SI 1 "nonimmediate_operand" "rm")) 0)
              (const_int 63)))
       (clobber (reg:CC FLAGS_REG))]
    
    this patch adds an alternate/equivalent pattern that matches:
      [(set (match_operand:DI 0 "register_operand" "=r")
           (zero_extend:DI
             (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))))
       (clobber (reg:CC FLAGS_REG))]
    
    Another example is *popcounthi2 which is currently represented as:
      [(set (match_operand:SI 0 "register_operand")
            (popcount:SI
              (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand"))))
       (clobber (reg:CC FLAGS_REG))]
    
    this patch adds an alternate/equivalent pattern that matches:
      [(set (match_operand:SI 0 "register_operand")
            (zero_extend:SI
              (popcount:HI (match_operand:HI 1 "nonimmediate_operand"))))
       (clobber (reg:CC FLAGS_REG))]
    
    The contents of the machine description definitions remain the same.
    it's just the expected RTL is slightly different but equivalent.
    Providing both forms makes the backend more robust to middle-end
    changes [and possibly catches some missed optimizations].
    
    2022-12-28  Roger Sayle  <roger@nextmovesoftware.com>
    
    gcc/ChangeLog
            * config/i386/i386.md (*clzsi2_lzcnt_zext_2): define_insn_and_split
            to match ZERO_EXTEND form of *clzsi2_lzcnt_zext.
            (*clzsi2_lzcnt_zext_2_falsedep): Likewise, new define_insn to match
            ZERO_EXTEND form of *clzsi2_lzcnt_zext_falsedep.
            (*bmi2_bzhi_zero_extendsidi_5): Likewise, new define_insn to match
            ZERO_EXTEND form of *bmi2_bzhi_zero_extendsidi.
            (*popcountsi2_zext_2): Likewise, new define_insn_and_split to match
            ZERO_EXTEND form of *popcountsi2_zext.
            (*popcountsi2_zext_2_falsedep): Likewise, new define_insn to match
            ZERO_EXTEND form of *popcountsi2_zext_falsedep.
            (*popcounthi2_2): Likewise, new define_insn_and_split to match
            ZERO_EXTEND form of *popcounthi2.
            (define_peephole2): ZERO_EXTEND variant of HImode popcount&1 using
            parity flag peephole2.

Diff:
---
 gcc/config/i386/i386.md | 151 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 151 insertions(+)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 06267529245..ca40c4fee17 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -17419,6 +17419,42 @@
    (set_attr "type" "bitmanip")
    (set_attr "mode" "SI")])
 
+(define_insn_and_split "*clzsi2_lzcnt_zext_2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (clz:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_LZCNT && TARGET_64BIT"
+  "lzcnt{l}\t{%1, %k0|%k0, %1}"
+  "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
+   && optimize_function_for_speed_p (cfun)
+   && !reg_mentioned_p (operands[0], operands[1])"
+  [(parallel
+    [(set (match_dup 0)
+	  (zero_extend:DI (clz:SI (match_dup 1))))
+     (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
+     (clobber (reg:CC FLAGS_REG))])]
+  "ix86_expand_clear (operands[0]);"
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "SI")])
+
+; False dependency happens when destination is only updated by tzcnt,
+; lzcnt or popcnt.  There is no false dependency when destination is
+; also used in source.
+(define_insn "*clzsi2_lzcnt_zext_2_falsedep"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (clz:SI (match_operand:SWI48 1 "nonimmediate_operand" "rm"))))
+   (unspec [(match_operand:DI 2 "register_operand" "0")]
+	   UNSPEC_INSN_FALSE_DEP)
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_LZCNT"
+  "lzcnt{l}\t{%1, %k0|%k0, %1}"
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "SI")])
+
 (define_int_iterator LT_ZCNT
 	[(UNSPEC_TZCNT "TARGET_BMI")
 	 (UNSPEC_LZCNT "TARGET_LZCNT")])
@@ -17737,6 +17773,22 @@
    (set_attr "prefix" "vex")
    (set_attr "mode" "DI")])
 
+(define_insn "*bmi2_bzhi_zero_extendsidi_5"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI
+	  (zero_extend:DI
+	    (plus:SI
+	      (ashift:SI (const_int 1)
+			 (match_operand:QI 2 "register_operand" "r"))
+	      (const_int -1)))
+	  (match_operand:DI 1 "nonimmediate_operand" "rm")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_64BIT && TARGET_BMI2"
+  "bzhi\t{%q2, %q1, %q0|%q0, %q1, %q2}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "prefix" "vex")
+   (set_attr "mode" "DI")])
+
 (define_insn "bmi2_pdep_<mode>3"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
         (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r")
@@ -17999,6 +18051,54 @@
    (set_attr "type" "bitmanip")
    (set_attr "mode" "SI")])
 
+(define_insn_and_split "*popcountsi2_zext_2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_POPCNT && TARGET_64BIT"
+{
+#if TARGET_MACHO
+  return "popcnt\t{%1, %k0|%k0, %1}";
+#else
+  return "popcnt{l}\t{%1, %k0|%k0, %1}";
+#endif
+}
+  "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
+   && optimize_function_for_speed_p (cfun)
+   && !reg_mentioned_p (operands[0], operands[1])"
+  [(parallel
+    [(set (match_dup 0)
+	  (zero_extend:DI (popcount:SI (match_dup 1))))
+     (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)
+     (clobber (reg:CC FLAGS_REG))])]
+  "ix86_expand_clear (operands[0]);"
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "SI")])
+
+; False dependency happens when destination is only updated by tzcnt,
+; lzcnt or popcnt.  There is no false dependency when destination is
+; also used in source.
+(define_insn "*popcountsi2_zext_2_falsedep"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI
+	  (popcount:SI (match_operand:SI 1 "nonimmediate_operand" "rm"))))
+   (unspec [(match_operand:DI 2 "register_operand" "0")]
+	   UNSPEC_INSN_FALSE_DEP)
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_POPCNT && TARGET_64BIT"
+{
+#if TARGET_MACHO
+  return "popcnt\t{%1, %k0|%k0, %1}";
+#else
+  return "popcnt{l}\t{%1, %k0|%k0, %1}";
+#endif
+}
+  [(set_attr "prefix_rep" "1")
+   (set_attr "type" "bitmanip")
+   (set_attr "mode" "SI")])
+
 (define_insn_and_split "*popcounthi2_1"
   [(set (match_operand:SI 0 "register_operand")
 	(popcount:SI
@@ -18017,6 +18117,24 @@
   DONE;
 })
 
+(define_insn_and_split "*popcounthi2_2"
+  [(set (match_operand:SI 0 "register_operand")
+	(zero_extend:SI
+	  (popcount:HI (match_operand:HI 1 "nonimmediate_operand"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_POPCNT
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  rtx tmp = gen_reg_rtx (HImode);
+
+  emit_insn (gen_popcounthi2 (tmp, operands[1]));
+  emit_insn (gen_zero_extendhisi2 (operands[0], tmp));
+  DONE;
+})
+
 (define_insn "popcounthi2"
   [(set (match_operand:HI 0 "register_operand" "=r")
 	(popcount:HI
@@ -18336,6 +18454,39 @@
   PUT_CODE (operands[5], GET_CODE (operands[5]) == EQ ? UNORDERED : ORDERED);
 })
 
+;; Eliminate HImode popcount&1 using parity flag (variant 2)
+(define_peephole2
+  [(match_scratch:HI 0 "Q")
+   (parallel [(set (match_operand:HI 1 "register_operand")
+		   (popcount:HI
+		    (match_operand:HI 2 "nonimmediate_operand")))
+	      (clobber (reg:CC FLAGS_REG))])
+   (set (reg:CCZ FLAGS_REG)
+        (compare:CCZ (and:QI (match_operand:QI 3 "register_operand")
+			     (const_int 1))
+		     (const_int 0)))
+   (set (pc) (if_then_else (match_operator 4 "bt_comparison_operator"
+			    [(reg:CCZ FLAGS_REG)
+			     (const_int 0)])
+			   (label_ref (match_operand 5))
+			   (pc)))]
+  "REGNO (operands[1]) == REGNO (operands[3])
+   && peep2_reg_dead_p (2, operands[1])
+   && peep2_reg_dead_p (2, operands[3])
+   && peep2_regno_dead_p (3, FLAGS_REG)"
+  [(set (match_dup 0) (match_dup 2))
+   (parallel [(set (reg:CC FLAGS_REG)
+		   (unspec:CC [(match_dup 0)] UNSPEC_PARITY))
+	      (clobber (match_dup 0))])
+   (set (pc) (if_then_else (match_op_dup 4 [(reg:CC FLAGS_REG)
+					    (const_int 0)])
+			   (label_ref (match_dup 5))
+			   (pc)))]
+{
+  operands[4] = shallow_copy_rtx (operands[4]);
+  PUT_CODE (operands[4], GET_CODE (operands[4]) == EQ ? UNORDERED : ORDERED);
+})
+
 \f
 ;; Thread-local storage patterns for ELF.
 ;;

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-12-28 19:32 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-12-28 19:32 [gcc r13-4914] Provide zero_extend versions/variants of several patterns on x86 Roger Sayle

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).