public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 2/2] Remove x86 cmpstrnsi
  2014-09-27 18:11 [PATCH 1/2] Remove i386 cmpstrnsi peephole Andi Kleen
@ 2014-09-27 18:11 ` Andi Kleen
  2014-09-27 18:45   ` Oleg Endo
  0 siblings, 1 reply; 5+ messages in thread
From: Andi Kleen @ 2014-09-27 18:11 UTC (permalink / raw)
  To: gcc-patches; +Cc: Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

In my tests the optimized glibc out of line strcmp is always faster than
using inline rep ; cmpsb, even for small strings. The Intel optimization manual
also recommends to not use it. So remove the cmpstrnsi instruction.

Tested on Sandy Bridge, Westmere Intel CPUs.

gcc/:

2014-09-27  Andi Kleen	<ak@linux.intel.com>

	* config/i386/i386.md (cmpstrnsi, cmpintqi): Remove expanders.
---
 gcc/config/i386/i386.md | 85 -------------------------------------------------
 1 file changed, 85 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 98df8e1..1d2f1a5 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -16097,91 +16097,6 @@
 	  (const_string "*")))
    (set_attr "mode" "QI")])
 
-(define_expand "cmpstrnsi"
-  [(set (match_operand:SI 0 "register_operand")
-	(compare:SI (match_operand:BLK 1 "general_operand")
-		    (match_operand:BLK 2 "general_operand")))
-   (use (match_operand 3 "general_operand"))
-   (use (match_operand 4 "immediate_operand"))]
-  ""
-{
-  rtx addr1, addr2, out, outlow, count, countreg, align;
-
-  if (optimize_insn_for_size_p () && !TARGET_INLINE_ALL_STRINGOPS)
-    FAIL;
-
-  /* Can't use this if the user has appropriated ecx, esi or edi.  */
-  if (fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
-    FAIL;
-
-  out = operands[0];
-  if (!REG_P (out))
-    out = gen_reg_rtx (SImode);
-
-  addr1 = copy_addr_to_reg (XEXP (operands[1], 0));
-  addr2 = copy_addr_to_reg (XEXP (operands[2], 0));
-  if (addr1 != XEXP (operands[1], 0))
-    operands[1] = replace_equiv_address_nv (operands[1], addr1);
-  if (addr2 != XEXP (operands[2], 0))
-    operands[2] = replace_equiv_address_nv (operands[2], addr2);
-
-  count = operands[3];
-  countreg = ix86_zero_extend_to_Pmode (count);
-
-  /* %%% Iff we are testing strict equality, we can use known alignment
-     to good advantage.  This may be possible with combine, particularly
-     once cc0 is dead.  */
-  align = operands[4];
-
-  if (CONST_INT_P (count))
-    {
-      if (INTVAL (count) == 0)
-	{
-	  emit_move_insn (operands[0], const0_rtx);
-	  DONE;
-	}
-      emit_insn (gen_cmpstrnqi_nz_1 (addr1, addr2, countreg, align,
-				     operands[1], operands[2]));
-    }
-  else
-    {
-      rtx (*gen_cmp) (rtx, rtx);
-
-      gen_cmp = (TARGET_64BIT
-		 ? gen_cmpdi_1 : gen_cmpsi_1);
-
-      emit_insn (gen_cmp (countreg, countreg));
-      emit_insn (gen_cmpstrnqi_1 (addr1, addr2, countreg, align,
-				  operands[1], operands[2]));
-    }
-
-  outlow = gen_lowpart (QImode, out);
-  emit_insn (gen_cmpintqi (outlow));
-  emit_move_insn (out, gen_rtx_SIGN_EXTEND (SImode, outlow));
-
-  if (operands[0] != out)
-    emit_move_insn (operands[0], out);
-
-  DONE;
-})
-
-;; Produce a tri-state integer (-1, 0, 1) from condition codes.
-
-(define_expand "cmpintqi"
-  [(set (match_dup 1)
-	(gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
-   (set (match_dup 2)
-	(ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
-   (parallel [(set (match_operand:QI 0 "register_operand")
-		   (minus:QI (match_dup 1)
-			     (match_dup 2)))
-	      (clobber (reg:CC FLAGS_REG))])]
-  ""
-{
-  operands[1] = gen_reg_rtx (QImode);
-  operands[2] = gen_reg_rtx (QImode);
-})
-
 ;; memcmp recognizers.  The `cmpsb' opcode does nothing if the count is
 ;; zero.  Emit extra code to make sure that a zero-length compare is EQ.
 
-- 
2.1.1

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/2] Remove i386 cmpstrnsi peephole
@ 2014-09-27 18:11 Andi Kleen
  2014-09-27 18:11 ` [PATCH 2/2] Remove x86 cmpstrnsi Andi Kleen
  0 siblings, 1 reply; 5+ messages in thread
From: Andi Kleen @ 2014-09-27 18:11 UTC (permalink / raw)
  To: gcc-patches; +Cc: Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

The peephole that removes the code to compute a tristate for cmpstrnsi
when only a boolean jump is needed never triggers in my tests. Just
remove it.

gcc/:

2014-09-27  Andi Kleen	<ak@linux.intel.com>

	* config/i386/i386.md: Remove peepholes for cmpstrn*.
---
 gcc/config/i386/i386.md | 77 -------------------------------------------------
 1 file changed, 77 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 004302d..98df8e1 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -16297,83 +16297,6 @@
 	  (const_string "0")
 	  (const_string "*")))
    (set_attr "prefix_rep" "1")])
-
-;; Peephole optimizations to clean up after cmpstrn*.  This should be
-;; handled in combine, but it is not currently up to the task.
-;; When used for their truth value, the cmpstrn* expanders generate
-;; code like this:
-;;
-;;   repz cmpsb
-;;   seta 	%al
-;;   setb 	%dl
-;;   cmpb 	%al, %dl
-;;   jcc	label
-;;
-;; The intermediate three instructions are unnecessary.
-
-;; This one handles cmpstrn*_nz_1...
-(define_peephole2
-  [(parallel[
-     (set (reg:CC FLAGS_REG)
-	  (compare:CC (mem:BLK (match_operand 4 "register_operand"))
-		      (mem:BLK (match_operand 5 "register_operand"))))
-     (use (match_operand 6 "register_operand"))
-     (use (match_operand:SI 3 "immediate_operand"))
-     (clobber (match_operand 0 "register_operand"))
-     (clobber (match_operand 1 "register_operand"))
-     (clobber (match_operand 2 "register_operand"))])
-   (set (match_operand:QI 7 "register_operand")
-	(gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
-   (set (match_operand:QI 8 "register_operand")
-	(ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
-   (set (reg FLAGS_REG)
-	(compare (match_dup 7) (match_dup 8)))
-  ]
-  "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])"
-  [(parallel[
-     (set (reg:CC FLAGS_REG)
-	  (compare:CC (mem:BLK (match_dup 4))
-		      (mem:BLK (match_dup 5))))
-     (use (match_dup 6))
-     (use (match_dup 3))
-     (clobber (match_dup 0))
-     (clobber (match_dup 1))
-     (clobber (match_dup 2))])])
-
-;; ...and this one handles cmpstrn*_1.
-(define_peephole2
-  [(parallel[
-     (set (reg:CC FLAGS_REG)
-	  (if_then_else:CC (ne (match_operand 6 "register_operand")
-			       (const_int 0))
-	    (compare:CC (mem:BLK (match_operand 4 "register_operand"))
-		        (mem:BLK (match_operand 5 "register_operand")))
-	    (const_int 0)))
-     (use (match_operand:SI 3 "immediate_operand"))
-     (use (reg:CC FLAGS_REG))
-     (clobber (match_operand 0 "register_operand"))
-     (clobber (match_operand 1 "register_operand"))
-     (clobber (match_operand 2 "register_operand"))])
-   (set (match_operand:QI 7 "register_operand")
-	(gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
-   (set (match_operand:QI 8 "register_operand")
-	(ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
-   (set (reg FLAGS_REG)
-	(compare (match_dup 7) (match_dup 8)))
-  ]
-  "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])"
-  [(parallel[
-     (set (reg:CC FLAGS_REG)
-	  (if_then_else:CC (ne (match_dup 6)
-			       (const_int 0))
-	    (compare:CC (mem:BLK (match_dup 4))
-			(mem:BLK (match_dup 5)))
-	    (const_int 0)))
-     (use (match_dup 3))
-     (use (reg:CC FLAGS_REG))
-     (clobber (match_dup 0))
-     (clobber (match_dup 1))
-     (clobber (match_dup 2))])])
 \f
 ;; Conditional move instructions.
 
-- 
2.1.1

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] Remove x86 cmpstrnsi
  2014-09-27 18:11 ` [PATCH 2/2] Remove x86 cmpstrnsi Andi Kleen
@ 2014-09-27 18:45   ` Oleg Endo
  2014-09-27 18:54     ` Andi Kleen
  0 siblings, 1 reply; 5+ messages in thread
From: Oleg Endo @ 2014-09-27 18:45 UTC (permalink / raw)
  To: Andi Kleen; +Cc: gcc-patches, Andi Kleen

On Sat, 2014-09-27 at 11:10 -0700, Andi Kleen wrote:
> From: Andi Kleen <ak@linux.intel.com>
> 
> In my tests the optimized glibc out of line strcmp is always faster than
> using inline rep ; cmpsb, even for small strings. The Intel optimization manual
> also recommends to not use it. So remove the cmpstrnsi instruction.
> 
> Tested on Sandy Bridge, Westmere Intel CPUs.
> 
> gcc/:
> 
> 2014-09-27  Andi Kleen	<ak@linux.intel.com>
> 
> 	* config/i386/i386.md (cmpstrnsi, cmpintqi): Remove expanders.

This has been mentioned a while ago, e.g.
https://gcc.gnu.org/ml/gcc/2002-10/msg01616.html
https://gcc.gnu.org/ml/gcc/2003-04/msg00166.html

Instead of just completely removing it, how about disabling it for newer
CPU types if not optimizing for size?

Cheers,
Oleg

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH 2/2] Remove x86 cmpstrnsi
  2014-09-27 18:45   ` Oleg Endo
@ 2014-09-27 18:54     ` Andi Kleen
  0 siblings, 0 replies; 5+ messages in thread
From: Andi Kleen @ 2014-09-27 18:54 UTC (permalink / raw)
  To: Oleg Endo; +Cc: Andi Kleen, gcc-patches, Andi Kleen

On Sat, Sep 27, 2014 at 08:45:18PM +0200, Oleg Endo wrote:
> On Sat, 2014-09-27 at 11:10 -0700, Andi Kleen wrote:
> > From: Andi Kleen <ak@linux.intel.com>
> > 
> > In my tests the optimized glibc out of line strcmp is always faster than
> > using inline rep ; cmpsb, even for small strings. The Intel optimization manual
> > also recommends to not use it. So remove the cmpstrnsi instruction.
> > 
> > Tested on Sandy Bridge, Westmere Intel CPUs.
> > 
> > gcc/:
> > 
> > 2014-09-27  Andi Kleen	<ak@linux.intel.com>
> > 
> > 	* config/i386/i386.md (cmpstrnsi, cmpintqi): Remove expanders.
> 
> This has been mentioned a while ago, e.g.
> https://gcc.gnu.org/ml/gcc/2002-10/msg01616.html
> https://gcc.gnu.org/ml/gcc/2003-04/msg00166.html
> 
> Instead of just completely removing it, how about disabling it for newer
> CPU types if not optimizing for size?

I believe it was slow even on old CPUs. But back then glibc may have
been even slower. Not sure it is worth keeping it for -Os, especially
given that parts of it have already bitrotted.

-Andi

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/2] Remove i386 cmpstrnsi peephole
@ 2014-07-04  5:14 Andi Kleen
  0 siblings, 0 replies; 5+ messages in thread
From: Andi Kleen @ 2014-07-04  5:14 UTC (permalink / raw)
  To: gcc-patches; +Cc: Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

The peephole that removes the code to compute a tristate for cmpstrnsi
when only a boolean jump is needed never triggers in my tests. Just
remove it.

gcc/:

2014-07-02  Andi Kleen	<ak@linux.intel.com>

	* config/i386/i386.md: Remove peepholes for cmpstrn*.
---
 gcc/config/i386/i386.md | 77 -------------------------------------------------
 1 file changed, 77 deletions(-)

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 695b981..5f32a24 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -16078,83 +16078,6 @@
 	  (const_string "0")
 	  (const_string "*")))
    (set_attr "prefix_rep" "1")])
-
-;; Peephole optimizations to clean up after cmpstrn*.  This should be
-;; handled in combine, but it is not currently up to the task.
-;; When used for their truth value, the cmpstrn* expanders generate
-;; code like this:
-;;
-;;   repz cmpsb
-;;   seta 	%al
-;;   setb 	%dl
-;;   cmpb 	%al, %dl
-;;   jcc	label
-;;
-;; The intermediate three instructions are unnecessary.
-
-;; This one handles cmpstrn*_nz_1...
-(define_peephole2
-  [(parallel[
-     (set (reg:CC FLAGS_REG)
-	  (compare:CC (mem:BLK (match_operand 4 "register_operand"))
-		      (mem:BLK (match_operand 5 "register_operand"))))
-     (use (match_operand 6 "register_operand"))
-     (use (match_operand:SI 3 "immediate_operand"))
-     (clobber (match_operand 0 "register_operand"))
-     (clobber (match_operand 1 "register_operand"))
-     (clobber (match_operand 2 "register_operand"))])
-   (set (match_operand:QI 7 "register_operand")
-	(gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
-   (set (match_operand:QI 8 "register_operand")
-	(ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
-   (set (reg FLAGS_REG)
-	(compare (match_dup 7) (match_dup 8)))
-  ]
-  "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])"
-  [(parallel[
-     (set (reg:CC FLAGS_REG)
-	  (compare:CC (mem:BLK (match_dup 4))
-		      (mem:BLK (match_dup 5))))
-     (use (match_dup 6))
-     (use (match_dup 3))
-     (clobber (match_dup 0))
-     (clobber (match_dup 1))
-     (clobber (match_dup 2))])])
-
-;; ...and this one handles cmpstrn*_1.
-(define_peephole2
-  [(parallel[
-     (set (reg:CC FLAGS_REG)
-	  (if_then_else:CC (ne (match_operand 6 "register_operand")
-			       (const_int 0))
-	    (compare:CC (mem:BLK (match_operand 4 "register_operand"))
-		        (mem:BLK (match_operand 5 "register_operand")))
-	    (const_int 0)))
-     (use (match_operand:SI 3 "immediate_operand"))
-     (use (reg:CC FLAGS_REG))
-     (clobber (match_operand 0 "register_operand"))
-     (clobber (match_operand 1 "register_operand"))
-     (clobber (match_operand 2 "register_operand"))])
-   (set (match_operand:QI 7 "register_operand")
-	(gtu:QI (reg:CC FLAGS_REG) (const_int 0)))
-   (set (match_operand:QI 8 "register_operand")
-	(ltu:QI (reg:CC FLAGS_REG) (const_int 0)))
-   (set (reg FLAGS_REG)
-	(compare (match_dup 7) (match_dup 8)))
-  ]
-  "peep2_reg_dead_p (4, operands[7]) && peep2_reg_dead_p (4, operands[8])"
-  [(parallel[
-     (set (reg:CC FLAGS_REG)
-	  (if_then_else:CC (ne (match_dup 6)
-			       (const_int 0))
-	    (compare:CC (mem:BLK (match_dup 4))
-			(mem:BLK (match_dup 5)))
-	    (const_int 0)))
-     (use (match_dup 3))
-     (use (reg:CC FLAGS_REG))
-     (clobber (match_dup 0))
-     (clobber (match_dup 1))
-     (clobber (match_dup 2))])])
 \f
 ;; Conditional move instructions.
 
-- 
2.0.0

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2014-09-27 18:54 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-09-27 18:11 [PATCH 1/2] Remove i386 cmpstrnsi peephole Andi Kleen
2014-09-27 18:11 ` [PATCH 2/2] Remove x86 cmpstrnsi Andi Kleen
2014-09-27 18:45   ` Oleg Endo
2014-09-27 18:54     ` Andi Kleen
  -- strict thread matches above, loose matches on Subject: below --
2014-07-04  5:14 [PATCH 1/2] Remove i386 cmpstrnsi peephole Andi Kleen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).