public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] x86: Add cmpmemsi for -mgeneral-regs-only
@ 2020-05-17 17:06 H.J. Lu
  2020-05-19  8:48 ` Uros Bizjak
  0 siblings, 1 reply; 10+ messages in thread
From: H.J. Lu @ 2020-05-17 17:06 UTC (permalink / raw)
  To: gcc-patches; +Cc: Uros Bizjak

Duplicate the cmpstrn pattern for cmpmem.  The only difference is that
the length argument of cmpmem is guaranteed to be less than or equal to
lengths of 2 memory areas.  Since "repz cmpsb" can be much slower than
memcmp function implemented with vector instruction, see

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=43052

expand cmpmem to "repz cmpsb" only with -mgeneral-regs-only.

Tested on Linux/x86 and Linux/x86-64.  OK for master?

Thanks.

H.J.
---
gcc/

	PR target/95151
	* config/i386/i386-expand.c (ix86_expand_cmpstrn_or_cmpmem): New
	function.  Duplicated from the cmpstrn pattern.  Expand memcmp
	to "repz cmpsb" only with -mgeneral-regs-only.
	* config/i386/i386-protos.h (ix86_expand_cmpstrn_or_cmpmem): New
	prototype.
	* config/i386/i386.md (cmpmemsi): New pattern.
	(cmpstrnsi): Use ix86_expand_cmpstrn_or_cmpmem.

gcc/testsuite/

	PR target/95151
	* gcc.target/i386/pr95151-1.c: New test.
	* gcc.target/i386/pr95151-2.c: Likewise.
	* gcc.target/i386/pr95151-3.c: Likewise.
	* gcc.target/i386/pr95151-4.c: Likewise.
---
 gcc/config/i386/i386-expand.c             | 80 +++++++++++++++++++++++
 gcc/config/i386/i386-protos.h             |  1 +
 gcc/config/i386/i386.md                   | 80 ++++++-----------------
 gcc/testsuite/gcc.target/i386/pr95151-1.c | 18 +++++
 gcc/testsuite/gcc.target/i386/pr95151-2.c | 11 ++++
 gcc/testsuite/gcc.target/i386/pr95151-3.c | 18 +++++
 gcc/testsuite/gcc.target/i386/pr95151-4.c | 11 ++++
 7 files changed, 160 insertions(+), 59 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr95151-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr95151-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr95151-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr95151-4.c

diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 26531585c5f..05d3c33720c 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -7651,6 +7651,86 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
   return true;
 }
 
+/* Expand cmpstrn or memcmp.  */
+
+bool
+ix86_expand_cmpstrn_or_cmpmem (rtx result, rtx src1, rtx src2,
+			       rtx length, rtx align, bool is_cmpstrn)
+{
+  if (optimize_insn_for_size_p () && !TARGET_INLINE_ALL_STRINGOPS)
+    return false;
+
+  /* Can't use this if the user has appropriated ecx, esi or edi.  */
+  if (fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
+    return false;
+
+  if (is_cmpstrn)
+    {
+      /* For strncmp, length is the maximum length, which can be larger
+	 than actual string lengths.  We can expand the cmpstrn pattern
+	 to "repz cmpsb" only if one of the strings is a constant so
+	 that expand_builtin_strncmp() can write the length argument to
+	 be the minimum of the const string length and the actual length
+	 argument.  Otherwise, "repz cmpsb" may pass the 0 byte.  */
+      tree t1 = MEM_EXPR (src1);
+      tree t2 = MEM_EXPR (src2);
+      if (!((t1 && TREE_CODE (t1) == MEM_REF
+	     && TREE_CODE (TREE_OPERAND (t1, 0)) == ADDR_EXPR
+	     && (TREE_CODE (TREE_OPERAND (TREE_OPERAND (t1, 0), 0))
+		 == STRING_CST))
+	    || (t2 && TREE_CODE (t2) == MEM_REF
+		&& TREE_CODE (TREE_OPERAND (t2, 0)) == ADDR_EXPR
+		&& (TREE_CODE (TREE_OPERAND (TREE_OPERAND (t2, 0), 0))
+		    == STRING_CST))))
+	return false;
+    }
+  else
+    {
+      /* Expand memcmp to "repz cmpsb" only for -mgeneral-regs-only
+	 since "repz cmpsb" can be much slower than memcmp function
+	 implemented with vector instructions, see
+
+	 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=43052
+       */
+      if (!TARGET_GENERAL_REGS_ONLY)
+	return false;
+    }
+
+  rtx addr1 = copy_addr_to_reg (XEXP (src1, 0));
+  rtx addr2 = copy_addr_to_reg (XEXP (src2, 0));
+  if (addr1 != XEXP (src1, 0))
+    src1 = replace_equiv_address_nv (src1, addr1);
+  if (addr2 != XEXP (src2, 0))
+    src2 = replace_equiv_address_nv (src2, addr2);
+
+  rtx lengthreg = ix86_zero_extend_to_Pmode (length);
+
+  /* If we are testing strict equality, we can use known alignment to
+     good advantage.  This may be possible with combine, particularly
+     once cc0 is dead.  */
+  if (CONST_INT_P (length))
+    {
+      if (length == const0_rtx)
+	{
+	  emit_move_insn (result, const0_rtx);
+	  return true;
+	}
+      emit_insn (gen_cmpstrnqi_nz_1 (addr1, addr2, lengthreg, align,
+				     src1, src2));
+    }
+  else
+    {
+      emit_insn (gen_cmp_1 (Pmode, lengthreg, lengthreg));
+      emit_insn (gen_cmpstrnqi_1 (addr1, addr2, lengthreg, align,
+				  src1, src2));
+    }
+
+  rtx out = gen_lowpart (QImode, result);
+  emit_insn (gen_cmpintqi (out));
+  emit_move_insn (result, gen_rtx_SIGN_EXTEND (SImode, out));
+
+  return true;
+}
 
 /* Expand the appropriate insns for doing strlen if not just doing
    repnz; scasb
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 39fcaa0ad5f..238aa650b61 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -71,6 +71,7 @@ extern int avx_vperm2f128_parallel (rtx par, machine_mode mode);
 extern bool ix86_expand_strlen (rtx, rtx, rtx, rtx);
 extern bool ix86_expand_set_or_cpymem (rtx, rtx, rtx, rtx, rtx, rtx,
 				       rtx, rtx, rtx, rtx, bool);
+extern bool ix86_expand_cmpstrn_or_cmpmem (rtx, rtx, rtx, rtx, rtx, bool);
 
 extern bool constant_address_p (rtx);
 extern bool legitimate_pic_operand_p (rtx);
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 1bf0c1a7f01..7fb97db1e6e 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -17761,6 +17761,22 @@ (define_insn "*rep_stosqi"
 	  (const_string "*")))
    (set_attr "mode" "QI")])
 
+(define_expand "cmpmemsi"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (compare:SI (match_operand:BLK 1 "memory_operand" "")
+                    (match_operand:BLK 2 "memory_operand" "") ) )
+   (use (match_operand 3 "general_operand"))
+   (use (match_operand 4 "immediate_operand"))]
+  ""
+{
+  if (ix86_expand_cmpstrn_or_cmpmem (operands[0], operands[1],
+				     operands[2], operands[3],
+				     operands[4], false))
+    DONE;
+  else
+    FAIL;
+})
+
 (define_expand "cmpstrnsi"
   [(set (match_operand:SI 0 "register_operand")
 	(compare:SI (match_operand:BLK 1 "general_operand")
@@ -17769,66 +17785,12 @@ (define_expand "cmpstrnsi"
    (use (match_operand 4 "immediate_operand"))]
   ""
 {
-  rtx addr1, addr2, countreg, align, out;
-
-  if (optimize_insn_for_size_p () && !TARGET_INLINE_ALL_STRINGOPS)
-    FAIL;
-
-  /* Can't use this if the user has appropriated ecx, esi or edi.  */
-  if (fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
-    FAIL;
-
-  /* One of the strings must be a constant.  If so, expand_builtin_strncmp()
-     will have rewritten the length arg to be the minimum of the const string
-     length and the actual length arg.  If both strings are the same and
-     shorter than the length arg, repz cmpsb will not stop at the 0 byte and
-     will incorrectly base the results on chars past the 0 byte.  */
-  tree t1 = MEM_EXPR (operands[1]);
-  tree t2 = MEM_EXPR (operands[2]);
-  if (!((t1 && TREE_CODE (t1) == MEM_REF
-         && TREE_CODE (TREE_OPERAND (t1, 0)) == ADDR_EXPR
-         && TREE_CODE (TREE_OPERAND (TREE_OPERAND (t1, 0), 0)) == STRING_CST)
-      || (t2 && TREE_CODE (t2) == MEM_REF
-          && TREE_CODE (TREE_OPERAND (t2, 0)) == ADDR_EXPR
-          && TREE_CODE (TREE_OPERAND (TREE_OPERAND (t2, 0), 0)) == STRING_CST)))
-    FAIL;
-
-  addr1 = copy_addr_to_reg (XEXP (operands[1], 0));
-  addr2 = copy_addr_to_reg (XEXP (operands[2], 0));
-  if (addr1 != XEXP (operands[1], 0))
-    operands[1] = replace_equiv_address_nv (operands[1], addr1);
-  if (addr2 != XEXP (operands[2], 0))
-    operands[2] = replace_equiv_address_nv (operands[2], addr2);
-
-  countreg = ix86_zero_extend_to_Pmode (operands[3]);
-
-  /* %%% Iff we are testing strict equality, we can use known alignment
-     to good advantage.  This may be possible with combine, particularly
-     once cc0 is dead.  */
-  align = operands[4];
-
-  if (CONST_INT_P (operands[3]))
-    {
-      if (operands[3] == const0_rtx)
-	{
-	  emit_move_insn (operands[0], const0_rtx);
-	  DONE;
-	}
-      emit_insn (gen_cmpstrnqi_nz_1 (addr1, addr2, countreg, align,
-				     operands[1], operands[2]));
-    }
+  if (ix86_expand_cmpstrn_or_cmpmem (operands[0], operands[1],
+				     operands[2], operands[3],
+				     operands[4], true))
+    DONE;
   else
-    {
-      emit_insn (gen_cmp_1 (Pmode, countreg, countreg));
-      emit_insn (gen_cmpstrnqi_1 (addr1, addr2, countreg, align,
-				  operands[1], operands[2]));
-    }
-
-  out = gen_lowpart (QImode, operands[0]);
-  emit_insn (gen_cmpintqi (out));
-  emit_move_insn (operands[0], gen_rtx_SIGN_EXTEND (SImode, out));
-
-  DONE;
+    FAIL;
 })
 
 ;; Produce a tri-state integer (-1, 0, 1) from condition codes.
diff --git a/gcc/testsuite/gcc.target/i386/pr95151-1.c b/gcc/testsuite/gcc.target/i386/pr95151-1.c
new file mode 100644
index 00000000000..f7ed88e508f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95151-1.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mgeneral-regs-only -minline-all-stringops" } */
+
+struct foo
+{
+  char array[257];
+};
+
+extern struct foo x;
+
+int
+func (struct foo i)
+{
+  return __builtin_memcmp (&x, &i, sizeof (x)) ? 1 : 2;
+}
+
+/* { dg-final { scan-assembler-not "call\[\\t \]*_?memcmp" } } */
+/* { dg-final { scan-assembler "repz cmpsb" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr95151-2.c b/gcc/testsuite/gcc.target/i386/pr95151-2.c
new file mode 100644
index 00000000000..afd811ae7fb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95151-2.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mgeneral-regs-only -minline-all-stringops" } */
+
+int
+func (void *d, void *s, unsigned int l)
+{
+  return __builtin_memcmp (d, s, l) ? 1 : 2;
+}
+
+/* { dg-final { scan-assembler-not "call\[\\t \]*_?memcmp" } } */
+/* { dg-final { scan-assembler "repz cmpsb" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr95151-3.c b/gcc/testsuite/gcc.target/i386/pr95151-3.c
new file mode 100644
index 00000000000..bd62db5e640
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95151-3.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2 -minline-all-stringops" } */
+
+struct foo
+{
+  char array[257];
+};
+
+extern struct foo x;
+
+int
+func (struct foo i)
+{
+  return __builtin_memcmp (&x, &i, sizeof (x)) ? 1 : 2;
+}
+
+/* { dg-final { scan-assembler "call\[\\t \]*_?memcmp" } } */
+/* { dg-final { scan-assembler-not "repz cmpsb" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr95151-4.c b/gcc/testsuite/gcc.target/i386/pr95151-4.c
new file mode 100644
index 00000000000..b8258f2530a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95151-4.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2 -minline-all-stringops" } */
+
+int
+func (void *d, void *s, unsigned int l)
+{
+  return __builtin_memcmp (d, s, l) ? 1 : 2;
+}
+
+/* { dg-final { scan-assembler "call\[\\t \]*_?memcmp" } } */
+/* { dg-final { scan-assembler-not "repz cmpsb" } } */
-- 
2.26.2


^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: [PATCH] x86: Add cmpmemsi for -mgeneral-regs-only
  2020-05-17 17:06 [PATCH] x86: Add cmpmemsi for -mgeneral-regs-only H.J. Lu
@ 2020-05-19  8:48 ` Uros Bizjak
  2020-05-19 12:14   ` [PATCH] x86: Add cmpmemsi for -minline-all-stringops H.J. Lu
  0 siblings, 1 reply; 10+ messages in thread
From: Uros Bizjak @ 2020-05-19  8:48 UTC (permalink / raw)
  To: gcc-patches; +Cc: H.J. Lu, Jan Hubicka

On Sun, May 17, 2020 at 7:06 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> Duplicate the cmpstrn pattern for cmpmem.  The only difference is that
> the length argument of cmpmem is guaranteed to be less than or equal to
> lengths of 2 memory areas.  Since "repz cmpsb" can be much slower than
> memcmp function implemented with vector instruction, see
>
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=43052
>
> expand cmpmem to "repz cmpsb" only with -mgeneral-regs-only.

If there is no benefit compared to the library implementation, then
enable these patterns only when -minline-all-stringops is used.

Eventually these should be reimplemented with SSE4 string instructions.

Honza is the author of the block handling x86 system, I'll leave the
review to him.

Uros.

> Tested on Linux/x86 and Linux/x86-64.  OK for master?
>
> Thanks.
>
> H.J.
> ---
> gcc/
>
>         PR target/95151
>         * config/i386/i386-expand.c (ix86_expand_cmpstrn_or_cmpmem): New
>         function.  Duplicated from the cmpstrn pattern.  Expand memcmp
>         to "repz cmpsb" only with -mgeneral-regs-only.
>         * config/i386/i386-protos.h (ix86_expand_cmpstrn_or_cmpmem): New
>         prototype.
>         * config/i386/i386.md (cmpmemsi): New pattern.
>         (cmpstrnsi): Use ix86_expand_cmpstrn_or_cmpmem.
>
> gcc/testsuite/
>
>         PR target/95151
>         * gcc.target/i386/pr95151-1.c: New test.
>         * gcc.target/i386/pr95151-2.c: Likewise.
>         * gcc.target/i386/pr95151-3.c: Likewise.
>         * gcc.target/i386/pr95151-4.c: Likewise.
> ---
>  gcc/config/i386/i386-expand.c             | 80 +++++++++++++++++++++++
>  gcc/config/i386/i386-protos.h             |  1 +
>  gcc/config/i386/i386.md                   | 80 ++++++-----------------
>  gcc/testsuite/gcc.target/i386/pr95151-1.c | 18 +++++
>  gcc/testsuite/gcc.target/i386/pr95151-2.c | 11 ++++
>  gcc/testsuite/gcc.target/i386/pr95151-3.c | 18 +++++
>  gcc/testsuite/gcc.target/i386/pr95151-4.c | 11 ++++
>  7 files changed, 160 insertions(+), 59 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr95151-1.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr95151-2.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr95151-3.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr95151-4.c
>
> diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
> index 26531585c5f..05d3c33720c 100644
> --- a/gcc/config/i386/i386-expand.c
> +++ b/gcc/config/i386/i386-expand.c
> @@ -7651,6 +7651,86 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
>    return true;
>  }
>
> +/* Expand cmpstrn or memcmp.  */
> +
> +bool
> +ix86_expand_cmpstrn_or_cmpmem (rtx result, rtx src1, rtx src2,
> +                              rtx length, rtx align, bool is_cmpstrn)
> +{
> +  if (optimize_insn_for_size_p () && !TARGET_INLINE_ALL_STRINGOPS)
> +    return false;
> +
> +  /* Can't use this if the user has appropriated ecx, esi or edi.  */
> +  if (fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
> +    return false;
> +
> +  if (is_cmpstrn)
> +    {
> +      /* For strncmp, length is the maximum length, which can be larger
> +        than actual string lengths.  We can expand the cmpstrn pattern
> +        to "repz cmpsb" only if one of the strings is a constant so
> +        that expand_builtin_strncmp() can write the length argument to
> +        be the minimum of the const string length and the actual length
> +        argument.  Otherwise, "repz cmpsb" may pass the 0 byte.  */
> +      tree t1 = MEM_EXPR (src1);
> +      tree t2 = MEM_EXPR (src2);
> +      if (!((t1 && TREE_CODE (t1) == MEM_REF
> +            && TREE_CODE (TREE_OPERAND (t1, 0)) == ADDR_EXPR
> +            && (TREE_CODE (TREE_OPERAND (TREE_OPERAND (t1, 0), 0))
> +                == STRING_CST))
> +           || (t2 && TREE_CODE (t2) == MEM_REF
> +               && TREE_CODE (TREE_OPERAND (t2, 0)) == ADDR_EXPR
> +               && (TREE_CODE (TREE_OPERAND (TREE_OPERAND (t2, 0), 0))
> +                   == STRING_CST))))
> +       return false;
> +    }
> +  else
> +    {
> +      /* Expand memcmp to "repz cmpsb" only for -mgeneral-regs-only
> +        since "repz cmpsb" can be much slower than memcmp function
> +        implemented with vector instructions, see
> +
> +        https://gcc.gnu.org/bugzilla/show_bug.cgi?id=43052
> +       */
> +      if (!TARGET_GENERAL_REGS_ONLY)
> +       return false;
> +    }
> +
> +  rtx addr1 = copy_addr_to_reg (XEXP (src1, 0));
> +  rtx addr2 = copy_addr_to_reg (XEXP (src2, 0));
> +  if (addr1 != XEXP (src1, 0))
> +    src1 = replace_equiv_address_nv (src1, addr1);
> +  if (addr2 != XEXP (src2, 0))
> +    src2 = replace_equiv_address_nv (src2, addr2);
> +
> +  rtx lengthreg = ix86_zero_extend_to_Pmode (length);
> +
> +  /* If we are testing strict equality, we can use known alignment to
> +     good advantage.  This may be possible with combine, particularly
> +     once cc0 is dead.  */
> +  if (CONST_INT_P (length))
> +    {
> +      if (length == const0_rtx)
> +       {
> +         emit_move_insn (result, const0_rtx);
> +         return true;
> +       }
> +      emit_insn (gen_cmpstrnqi_nz_1 (addr1, addr2, lengthreg, align,
> +                                    src1, src2));
> +    }
> +  else
> +    {
> +      emit_insn (gen_cmp_1 (Pmode, lengthreg, lengthreg));
> +      emit_insn (gen_cmpstrnqi_1 (addr1, addr2, lengthreg, align,
> +                                 src1, src2));
> +    }
> +
> +  rtx out = gen_lowpart (QImode, result);
> +  emit_insn (gen_cmpintqi (out));
> +  emit_move_insn (result, gen_rtx_SIGN_EXTEND (SImode, out));
> +
> +  return true;
> +}
>
>  /* Expand the appropriate insns for doing strlen if not just doing
>     repnz; scasb
> diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
> index 39fcaa0ad5f..238aa650b61 100644
> --- a/gcc/config/i386/i386-protos.h
> +++ b/gcc/config/i386/i386-protos.h
> @@ -71,6 +71,7 @@ extern int avx_vperm2f128_parallel (rtx par, machine_mode mode);
>  extern bool ix86_expand_strlen (rtx, rtx, rtx, rtx);
>  extern bool ix86_expand_set_or_cpymem (rtx, rtx, rtx, rtx, rtx, rtx,
>                                        rtx, rtx, rtx, rtx, bool);
> +extern bool ix86_expand_cmpstrn_or_cmpmem (rtx, rtx, rtx, rtx, rtx, bool);
>
>  extern bool constant_address_p (rtx);
>  extern bool legitimate_pic_operand_p (rtx);
> diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
> index 1bf0c1a7f01..7fb97db1e6e 100644
> --- a/gcc/config/i386/i386.md
> +++ b/gcc/config/i386/i386.md
> @@ -17761,6 +17761,22 @@ (define_insn "*rep_stosqi"
>           (const_string "*")))
>     (set_attr "mode" "QI")])
>
> +(define_expand "cmpmemsi"
> +  [(set (match_operand:SI 0 "register_operand" "")
> +        (compare:SI (match_operand:BLK 1 "memory_operand" "")
> +                    (match_operand:BLK 2 "memory_operand" "") ) )
> +   (use (match_operand 3 "general_operand"))
> +   (use (match_operand 4 "immediate_operand"))]
> +  ""
> +{
> +  if (ix86_expand_cmpstrn_or_cmpmem (operands[0], operands[1],
> +                                    operands[2], operands[3],
> +                                    operands[4], false))
> +    DONE;
> +  else
> +    FAIL;
> +})
> +
>  (define_expand "cmpstrnsi"
>    [(set (match_operand:SI 0 "register_operand")
>         (compare:SI (match_operand:BLK 1 "general_operand")
> @@ -17769,66 +17785,12 @@ (define_expand "cmpstrnsi"
>     (use (match_operand 4 "immediate_operand"))]
>    ""
>  {
> -  rtx addr1, addr2, countreg, align, out;
> -
> -  if (optimize_insn_for_size_p () && !TARGET_INLINE_ALL_STRINGOPS)
> -    FAIL;
> -
> -  /* Can't use this if the user has appropriated ecx, esi or edi.  */
> -  if (fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
> -    FAIL;
> -
> -  /* One of the strings must be a constant.  If so, expand_builtin_strncmp()
> -     will have rewritten the length arg to be the minimum of the const string
> -     length and the actual length arg.  If both strings are the same and
> -     shorter than the length arg, repz cmpsb will not stop at the 0 byte and
> -     will incorrectly base the results on chars past the 0 byte.  */
> -  tree t1 = MEM_EXPR (operands[1]);
> -  tree t2 = MEM_EXPR (operands[2]);
> -  if (!((t1 && TREE_CODE (t1) == MEM_REF
> -         && TREE_CODE (TREE_OPERAND (t1, 0)) == ADDR_EXPR
> -         && TREE_CODE (TREE_OPERAND (TREE_OPERAND (t1, 0), 0)) == STRING_CST)
> -      || (t2 && TREE_CODE (t2) == MEM_REF
> -          && TREE_CODE (TREE_OPERAND (t2, 0)) == ADDR_EXPR
> -          && TREE_CODE (TREE_OPERAND (TREE_OPERAND (t2, 0), 0)) == STRING_CST)))
> -    FAIL;
> -
> -  addr1 = copy_addr_to_reg (XEXP (operands[1], 0));
> -  addr2 = copy_addr_to_reg (XEXP (operands[2], 0));
> -  if (addr1 != XEXP (operands[1], 0))
> -    operands[1] = replace_equiv_address_nv (operands[1], addr1);
> -  if (addr2 != XEXP (operands[2], 0))
> -    operands[2] = replace_equiv_address_nv (operands[2], addr2);
> -
> -  countreg = ix86_zero_extend_to_Pmode (operands[3]);
> -
> -  /* %%% Iff we are testing strict equality, we can use known alignment
> -     to good advantage.  This may be possible with combine, particularly
> -     once cc0 is dead.  */
> -  align = operands[4];
> -
> -  if (CONST_INT_P (operands[3]))
> -    {
> -      if (operands[3] == const0_rtx)
> -       {
> -         emit_move_insn (operands[0], const0_rtx);
> -         DONE;
> -       }
> -      emit_insn (gen_cmpstrnqi_nz_1 (addr1, addr2, countreg, align,
> -                                    operands[1], operands[2]));
> -    }
> +  if (ix86_expand_cmpstrn_or_cmpmem (operands[0], operands[1],
> +                                    operands[2], operands[3],
> +                                    operands[4], true))
> +    DONE;
>    else
> -    {
> -      emit_insn (gen_cmp_1 (Pmode, countreg, countreg));
> -      emit_insn (gen_cmpstrnqi_1 (addr1, addr2, countreg, align,
> -                                 operands[1], operands[2]));
> -    }
> -
> -  out = gen_lowpart (QImode, operands[0]);
> -  emit_insn (gen_cmpintqi (out));
> -  emit_move_insn (operands[0], gen_rtx_SIGN_EXTEND (SImode, out));
> -
> -  DONE;
> +    FAIL;
>  })
>
>  ;; Produce a tri-state integer (-1, 0, 1) from condition codes.
> diff --git a/gcc/testsuite/gcc.target/i386/pr95151-1.c b/gcc/testsuite/gcc.target/i386/pr95151-1.c
> new file mode 100644
> index 00000000000..f7ed88e508f
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr95151-1.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mgeneral-regs-only -minline-all-stringops" } */
> +
> +struct foo
> +{
> +  char array[257];
> +};
> +
> +extern struct foo x;
> +
> +int
> +func (struct foo i)
> +{
> +  return __builtin_memcmp (&x, &i, sizeof (x)) ? 1 : 2;
> +}
> +
> +/* { dg-final { scan-assembler-not "call\[\\t \]*_?memcmp" } } */
> +/* { dg-final { scan-assembler "repz cmpsb" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr95151-2.c b/gcc/testsuite/gcc.target/i386/pr95151-2.c
> new file mode 100644
> index 00000000000..afd811ae7fb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr95151-2.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mgeneral-regs-only -minline-all-stringops" } */
> +
> +int
> +func (void *d, void *s, unsigned int l)
> +{
> +  return __builtin_memcmp (d, s, l) ? 1 : 2;
> +}
> +
> +/* { dg-final { scan-assembler-not "call\[\\t \]*_?memcmp" } } */
> +/* { dg-final { scan-assembler "repz cmpsb" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr95151-3.c b/gcc/testsuite/gcc.target/i386/pr95151-3.c
> new file mode 100644
> index 00000000000..bd62db5e640
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr95151-3.c
> @@ -0,0 +1,18 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msse2 -minline-all-stringops" } */
> +
> +struct foo
> +{
> +  char array[257];
> +};
> +
> +extern struct foo x;
> +
> +int
> +func (struct foo i)
> +{
> +  return __builtin_memcmp (&x, &i, sizeof (x)) ? 1 : 2;
> +}
> +
> +/* { dg-final { scan-assembler "call\[\\t \]*_?memcmp" } } */
> +/* { dg-final { scan-assembler-not "repz cmpsb" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr95151-4.c b/gcc/testsuite/gcc.target/i386/pr95151-4.c
> new file mode 100644
> index 00000000000..b8258f2530a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr95151-4.c
> @@ -0,0 +1,11 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -msse2 -minline-all-stringops" } */
> +
> +int
> +func (void *d, void *s, unsigned int l)
> +{
> +  return __builtin_memcmp (d, s, l) ? 1 : 2;
> +}
> +
> +/* { dg-final { scan-assembler "call\[\\t \]*_?memcmp" } } */
> +/* { dg-final { scan-assembler-not "repz cmpsb" } } */
> --
> 2.26.2
>

^ permalink raw reply	[flat|nested] 10+ messages in thread

* [PATCH] x86: Add cmpmemsi for -minline-all-stringops
  2020-05-19  8:48 ` Uros Bizjak
@ 2020-05-19 12:14   ` H.J. Lu
  2020-08-19 13:09     ` PING " H.J. Lu
  0 siblings, 1 reply; 10+ messages in thread
From: H.J. Lu @ 2020-05-19 12:14 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches, Jan Hubicka

[-- Attachment #1: Type: text/plain, Size: 1623 bytes --]

On Tue, May 19, 2020 at 1:48 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Sun, May 17, 2020 at 7:06 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > Duplicate the cmpstrn pattern for cmpmem.  The only difference is that
> > the length argument of cmpmem is guaranteed to be less than or equal to
> > lengths of 2 memory areas.  Since "repz cmpsb" can be much slower than
> > memcmp function implemented with vector instruction, see
> >
> > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=43052
> >
> > expand cmpmem to "repz cmpsb" only with -mgeneral-regs-only.
>
> If there is no benefit compared to the library implementation, then
> enable these patterns only when -minline-all-stringops is used.

Fixed.

> Eventually these should be reimplemented with SSE4 string instructions.
>
> Honza is the author of the block handling x86 system, I'll leave the
> review to him.

We used to expand memcmp to "repz cmpsb" via cmpstrnsi.  It was changed
by

commit 9b0f6f5e511ca512e4faeabc81d2fd3abad9b02f
Author: Nick Clifton <nickc@redhat.com>
Date:   Fri Aug 12 16:26:11 2011 +0000

    builtins.c (expand_builtin_memcmp): Do not use cmpstrnsi pattern.

            * builtins.c (expand_builtin_memcmp): Do not use cmpstrnsi
            pattern.
            * doc/md.texi (cmpstrn): Note that the comparison stops if both
            fetched bytes are zero.
            (cmpstr): Likewise.
            (cmpmem): Note that the comparison does not stop if both of the
            fetched bytes are zero.

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95151

is a regression.

Honza, can you take a look at this?

Thanks.

--
H.J.

[-- Attachment #2: 0001-x86-Add-cmpmemsi-for-minline-all-stringops.patch --]
[-- Type: text/x-patch, Size: 11702 bytes --]

From ec91a57f3f91168034f7f5eb391949c301741680 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Thu, 14 May 2020 13:06:23 -0700
Subject: [PATCH] x86: Add cmpmemsi for -minline-all-stringops
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We used to expand memcmp to "repz cmpsb" via cmpstrnsi.  It was changed
by

commit 9b0f6f5e511ca512e4faeabc81d2fd3abad9b02f
Author: Nick Clifton <nickc@redhat.com>
Date:   Fri Aug 12 16:26:11 2011 +0000

    builtins.c (expand_builtin_memcmp): Do not use cmpstrnsi pattern.

            * builtins.c (expand_builtin_memcmp): Do not use cmpstrnsi
            pattern.
            * doc/md.texi (cmpstrn): Note that the comparison stops if both
            fetched bytes are zero.
            (cmpstr): Likewise.
            (cmpmem): Note that the comparison does not stop if both of the
            fetched bytes are zero.

Duplicate the cmpstrn pattern for cmpmem.  The only difference is that
the length argument of cmpmem is guaranteed to be less than or equal to
lengths of 2 memory areas.  Since "repz cmpsb" can be much slower than
memcmp function implemented with vector instruction, see

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=43052

expand cmpmem to "repz cmpsb" only for -minline-all-stringops.

gcc/

	PR target/95151
	* config/i386/i386-expand.c (ix86_expand_cmpstrn_or_cmpmem): New
	function.
	* config/i386/i386-protos.h (ix86_expand_cmpstrn_or_cmpmem): New
	prototype.
	* config/i386/i386.md (cmpmemsi): New pattern.

gcc/testsuite/

	PR target/95151
	* gcc.target/i386/pr95151-1.c: New test.
	* gcc.target/i386/pr95151-2.c: Likewise.
	* gcc.target/i386/pr95151-3.c: Likewise.
	* gcc.target/i386/pr95151-4.c: Likewise.
---
 gcc/config/i386/i386-expand.c             | 80 +++++++++++++++++++++++
 gcc/config/i386/i386-protos.h             |  1 +
 gcc/config/i386/i386.md                   | 80 ++++++-----------------
 gcc/testsuite/gcc.target/i386/pr95151-1.c | 17 +++++
 gcc/testsuite/gcc.target/i386/pr95151-2.c | 10 +++
 gcc/testsuite/gcc.target/i386/pr95151-3.c | 18 +++++
 gcc/testsuite/gcc.target/i386/pr95151-4.c | 11 ++++
 7 files changed, 158 insertions(+), 59 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr95151-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr95151-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr95151-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr95151-4.c

diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
index 79f827fd653..d3f4280ad58 100644
--- a/gcc/config/i386/i386-expand.c
+++ b/gcc/config/i386/i386-expand.c
@@ -7656,6 +7656,86 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
   return true;
 }
 
+/* Expand cmpstrn or memcmp.  */
+
+bool
+ix86_expand_cmpstrn_or_cmpmem (rtx result, rtx src1, rtx src2,
+			       rtx length, rtx align, bool is_cmpstrn)
+{
+  if (optimize_insn_for_size_p () && !TARGET_INLINE_ALL_STRINGOPS)
+    return false;
+
+  /* Can't use this if the user has appropriated ecx, esi or edi.  */
+  if (fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
+    return false;
+
+  if (is_cmpstrn)
+    {
+      /* For strncmp, length is the maximum length, which can be larger
+	 than actual string lengths.  We can expand the cmpstrn pattern
+	 to "repz cmpsb" only if one of the strings is a constant so
+	 that expand_builtin_strncmp() can write the length argument to
+	 be the minimum of the const string length and the actual length
+	 argument.  Otherwise, "repz cmpsb" may pass the 0 byte.  */
+      tree t1 = MEM_EXPR (src1);
+      tree t2 = MEM_EXPR (src2);
+      if (!((t1 && TREE_CODE (t1) == MEM_REF
+	     && TREE_CODE (TREE_OPERAND (t1, 0)) == ADDR_EXPR
+	     && (TREE_CODE (TREE_OPERAND (TREE_OPERAND (t1, 0), 0))
+		 == STRING_CST))
+	    || (t2 && TREE_CODE (t2) == MEM_REF
+		&& TREE_CODE (TREE_OPERAND (t2, 0)) == ADDR_EXPR
+		&& (TREE_CODE (TREE_OPERAND (TREE_OPERAND (t2, 0), 0))
+		    == STRING_CST))))
+	return false;
+    }
+  else
+    {
+      /* Expand memcmp to "repz cmpsb" only for -minline-all-stringops
+	 since "repz cmpsb" can be much slower than memcmp function
+	 implemented with vector instructions, see
+
+	 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=43052
+       */
+      if (!TARGET_INLINE_ALL_STRINGOPS)
+	return false;
+    }
+
+  rtx addr1 = copy_addr_to_reg (XEXP (src1, 0));
+  rtx addr2 = copy_addr_to_reg (XEXP (src2, 0));
+  if (addr1 != XEXP (src1, 0))
+    src1 = replace_equiv_address_nv (src1, addr1);
+  if (addr2 != XEXP (src2, 0))
+    src2 = replace_equiv_address_nv (src2, addr2);
+
+  rtx lengthreg = ix86_zero_extend_to_Pmode (length);
+
+  /* If we are testing strict equality, we can use known alignment to
+     good advantage.  This may be possible with combine, particularly
+     once cc0 is dead.  */
+  if (CONST_INT_P (length))
+    {
+      if (length == const0_rtx)
+	{
+	  emit_move_insn (result, const0_rtx);
+	  return true;
+	}
+      emit_insn (gen_cmpstrnqi_nz_1 (addr1, addr2, lengthreg, align,
+				     src1, src2));
+    }
+  else
+    {
+      emit_insn (gen_cmp_1 (Pmode, lengthreg, lengthreg));
+      emit_insn (gen_cmpstrnqi_1 (addr1, addr2, lengthreg, align,
+				  src1, src2));
+    }
+
+  rtx out = gen_lowpart (QImode, result);
+  emit_insn (gen_cmpintqi (out));
+  emit_move_insn (result, gen_rtx_SIGN_EXTEND (SImode, out));
+
+  return true;
+}
 
 /* Expand the appropriate insns for doing strlen if not just doing
    repnz; scasb
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 39fcaa0ad5f..238aa650b61 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -71,6 +71,7 @@ extern int avx_vperm2f128_parallel (rtx par, machine_mode mode);
 extern bool ix86_expand_strlen (rtx, rtx, rtx, rtx);
 extern bool ix86_expand_set_or_cpymem (rtx, rtx, rtx, rtx, rtx, rtx,
 				       rtx, rtx, rtx, rtx, bool);
+extern bool ix86_expand_cmpstrn_or_cmpmem (rtx, rtx, rtx, rtx, rtx, bool);
 
 extern bool constant_address_p (rtx);
 extern bool legitimate_pic_operand_p (rtx);
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index aa4f25b7065..9ab41b8ad01 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -17761,6 +17761,22 @@
 	  (const_string "*")))
    (set_attr "mode" "QI")])
 
+(define_expand "cmpmemsi"
+  [(set (match_operand:SI 0 "register_operand" "")
+        (compare:SI (match_operand:BLK 1 "memory_operand" "")
+                    (match_operand:BLK 2 "memory_operand" "") ) )
+   (use (match_operand 3 "general_operand"))
+   (use (match_operand 4 "immediate_operand"))]
+  ""
+{
+  if (ix86_expand_cmpstrn_or_cmpmem (operands[0], operands[1],
+				     operands[2], operands[3],
+				     operands[4], false))
+    DONE;
+  else
+    FAIL;
+})
+
 (define_expand "cmpstrnsi"
   [(set (match_operand:SI 0 "register_operand")
 	(compare:SI (match_operand:BLK 1 "general_operand")
@@ -17769,66 +17785,12 @@
    (use (match_operand 4 "immediate_operand"))]
   ""
 {
-  rtx addr1, addr2, countreg, align, out;
-
-  if (optimize_insn_for_size_p () && !TARGET_INLINE_ALL_STRINGOPS)
-    FAIL;
-
-  /* Can't use this if the user has appropriated ecx, esi or edi.  */
-  if (fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
-    FAIL;
-
-  /* One of the strings must be a constant.  If so, expand_builtin_strncmp()
-     will have rewritten the length arg to be the minimum of the const string
-     length and the actual length arg.  If both strings are the same and
-     shorter than the length arg, repz cmpsb will not stop at the 0 byte and
-     will incorrectly base the results on chars past the 0 byte.  */
-  tree t1 = MEM_EXPR (operands[1]);
-  tree t2 = MEM_EXPR (operands[2]);
-  if (!((t1 && TREE_CODE (t1) == MEM_REF
-         && TREE_CODE (TREE_OPERAND (t1, 0)) == ADDR_EXPR
-         && TREE_CODE (TREE_OPERAND (TREE_OPERAND (t1, 0), 0)) == STRING_CST)
-      || (t2 && TREE_CODE (t2) == MEM_REF
-          && TREE_CODE (TREE_OPERAND (t2, 0)) == ADDR_EXPR
-          && TREE_CODE (TREE_OPERAND (TREE_OPERAND (t2, 0), 0)) == STRING_CST)))
-    FAIL;
-
-  addr1 = copy_addr_to_reg (XEXP (operands[1], 0));
-  addr2 = copy_addr_to_reg (XEXP (operands[2], 0));
-  if (addr1 != XEXP (operands[1], 0))
-    operands[1] = replace_equiv_address_nv (operands[1], addr1);
-  if (addr2 != XEXP (operands[2], 0))
-    operands[2] = replace_equiv_address_nv (operands[2], addr2);
-
-  countreg = ix86_zero_extend_to_Pmode (operands[3]);
-
-  /* %%% Iff we are testing strict equality, we can use known alignment
-     to good advantage.  This may be possible with combine, particularly
-     once cc0 is dead.  */
-  align = operands[4];
-
-  if (CONST_INT_P (operands[3]))
-    {
-      if (operands[3] == const0_rtx)
-	{
-	  emit_move_insn (operands[0], const0_rtx);
-	  DONE;
-	}
-      emit_insn (gen_cmpstrnqi_nz_1 (addr1, addr2, countreg, align,
-				     operands[1], operands[2]));
-    }
+  if (ix86_expand_cmpstrn_or_cmpmem (operands[0], operands[1],
+				     operands[2], operands[3],
+				     operands[4], true))
+    DONE;
   else
-    {
-      emit_insn (gen_cmp_1 (Pmode, countreg, countreg));
-      emit_insn (gen_cmpstrnqi_1 (addr1, addr2, countreg, align,
-				  operands[1], operands[2]));
-    }
-
-  out = gen_lowpart (QImode, operands[0]);
-  emit_insn (gen_cmpintqi (out));
-  emit_move_insn (operands[0], gen_rtx_SIGN_EXTEND (SImode, out));
-
-  DONE;
+    FAIL;
 })
 
 ;; Produce a tri-state integer (-1, 0, 1) from condition codes.
diff --git a/gcc/testsuite/gcc.target/i386/pr95151-1.c b/gcc/testsuite/gcc.target/i386/pr95151-1.c
new file mode 100644
index 00000000000..54a7510042a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95151-1.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -minline-all-stringops" } */
+
+struct foo
+{
+  char array[257];
+};
+
+extern struct foo x;
+
+int
+func (struct foo i)
+{
+  return __builtin_memcmp (&x, &i, sizeof (x)) ? 1 : 2;
+}
+
+/* { dg-final { scan-assembler-not "call\[\\t \]*_?memcmp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr95151-2.c b/gcc/testsuite/gcc.target/i386/pr95151-2.c
new file mode 100644
index 00000000000..8f9d8ee1bf4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95151-2.c
@@ -0,0 +1,10 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -minline-all-stringops" } */
+
+int
+func (void *d, void *s, unsigned int l)
+{
+  return __builtin_memcmp (d, s, l) ? 1 : 2;
+}
+
+/* { dg-final { scan-assembler-not "call\[\\t \]*_?memcmp" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr95151-3.c b/gcc/testsuite/gcc.target/i386/pr95151-3.c
new file mode 100644
index 00000000000..14cbdec4c93
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95151-3.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2 -mno-inline-all-stringops" } */
+
+struct foo
+{
+  char array[257];
+};
+
+extern struct foo x;
+
+int
+func (struct foo i)
+{
+  return __builtin_memcmp (&x, &i, sizeof (x)) ? 1 : 2;
+}
+
+/* { dg-final { scan-assembler "call\[\\t \]*_?memcmp" } } */
+/* { dg-final { scan-assembler-not "cmpsb" } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr95151-4.c b/gcc/testsuite/gcc.target/i386/pr95151-4.c
new file mode 100644
index 00000000000..c93b2b60c5b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr95151-4.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -msse2 -mno-inline-all-stringops" } */
+
+int
+func (void *d, void *s, unsigned int l)
+{
+  return __builtin_memcmp (d, s, l) ? 1 : 2;
+}
+
+/* { dg-final { scan-assembler "call\[\\t \]*_?memcmp" } } */
+/* { dg-final { scan-assembler-not "cmpsb" } } */
-- 
2.26.2


^ permalink raw reply	[flat|nested] 10+ messages in thread

* PING [PATCH] x86: Add cmpmemsi for -minline-all-stringops
  2020-05-19 12:14   ` [PATCH] x86: Add cmpmemsi for -minline-all-stringops H.J. Lu
@ 2020-08-19 13:09     ` H.J. Lu
  2020-09-17  5:07       ` PING^2 " H.J. Lu
  0 siblings, 1 reply; 10+ messages in thread
From: H.J. Lu @ 2020-08-19 13:09 UTC (permalink / raw)
  To: Uros Bizjak, Jakub Jelinek, Jeffrey Law; +Cc: gcc-patches, Jan Hubicka

On Tue, May 19, 2020 at 5:14 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Tue, May 19, 2020 at 1:48 AM Uros Bizjak <ubizjak@gmail.com> wrote:
> >
> > On Sun, May 17, 2020 at 7:06 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > Duplicate the cmpstrn pattern for cmpmem.  The only difference is that
> > > the length argument of cmpmem is guaranteed to be less than or equal to
> > > lengths of 2 memory areas.  Since "repz cmpsb" can be much slower than
> > > memcmp function implemented with vector instruction, see
> > >
> > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=43052
> > >
> > > expand cmpmem to "repz cmpsb" only with -mgeneral-regs-only.
> >
> > If there is no benefit compared to the library implementation, then
> > enable these patterns only when -minline-all-stringops is used.
>
> Fixed.
>
> > Eventually these should be reimplemented with SSE4 string instructions.
> >
> > Honza is the author of the block handling x86 system, I'll leave the
> > review to him.
>
> We used to expand memcmp to "repz cmpsb" via cmpstrnsi.  It was changed
> by
>
> commit 9b0f6f5e511ca512e4faeabc81d2fd3abad9b02f
> Author: Nick Clifton <nickc@redhat.com>
> Date:   Fri Aug 12 16:26:11 2011 +0000
>
>     builtins.c (expand_builtin_memcmp): Do not use cmpstrnsi pattern.
>
>             * builtins.c (expand_builtin_memcmp): Do not use cmpstrnsi
>             pattern.
>             * doc/md.texi (cmpstrn): Note that the comparison stops if both
>             fetched bytes are zero.
>             (cmpstr): Likewise.
>             (cmpmem): Note that the comparison does not stop if both of the
>             fetched bytes are zero.
>
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95151
>
> is a regression.
>
> Honza, can you take a look at this?
>

PING:

https://gcc.gnu.org/pipermail/gcc-patches/2020-May/546921.html

-- 
H.J.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* PING^2 [PATCH] x86: Add cmpmemsi for -minline-all-stringops
  2020-08-19 13:09     ` PING " H.J. Lu
@ 2020-09-17  5:07       ` H.J. Lu
  2020-10-02 13:21         ` PING^3 " H.J. Lu
  0 siblings, 1 reply; 10+ messages in thread
From: H.J. Lu @ 2020-09-17  5:07 UTC (permalink / raw)
  To: Uros Bizjak, Jakub Jelinek, Jeffrey Law; +Cc: gcc-patches, Jan Hubicka

On Wed, Aug 19, 2020 at 6:09 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Tue, May 19, 2020 at 5:14 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Tue, May 19, 2020 at 1:48 AM Uros Bizjak <ubizjak@gmail.com> wrote:
> > >
> > > On Sun, May 17, 2020 at 7:06 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > >
> > > > Duplicate the cmpstrn pattern for cmpmem.  The only difference is that
> > > > the length argument of cmpmem is guaranteed to be less than or equal to
> > > > lengths of 2 memory areas.  Since "repz cmpsb" can be much slower than
> > > > memcmp function implemented with vector instruction, see
> > > >
> > > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=43052
> > > >
> > > > expand cmpmem to "repz cmpsb" only with -mgeneral-regs-only.
> > >
> > > If there is no benefit compared to the library implementation, then
> > > enable these patterns only when -minline-all-stringops is used.
> >
> > Fixed.
> >
> > > Eventually these should be reimplemented with SSE4 string instructions.
> > >
> > > Honza is the author of the block handling x86 system, I'll leave the
> > > review to him.
> >
> > We used to expand memcmp to "repz cmpsb" via cmpstrnsi.  It was changed
> > by
> >
> > commit 9b0f6f5e511ca512e4faeabc81d2fd3abad9b02f
> > Author: Nick Clifton <nickc@redhat.com>
> > Date:   Fri Aug 12 16:26:11 2011 +0000
> >
> >     builtins.c (expand_builtin_memcmp): Do not use cmpstrnsi pattern.
> >
> >             * builtins.c (expand_builtin_memcmp): Do not use cmpstrnsi
> >             pattern.
> >             * doc/md.texi (cmpstrn): Note that the comparison stops if both
> >             fetched bytes are zero.
> >             (cmpstr): Likewise.
> >             (cmpmem): Note that the comparison does not stop if both of the
> >             fetched bytes are zero.
> >
> > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95151
> >
> > is a regression.
> >
> > Honza, can you take a look at this?
> >
>
> PING:
>
> https://gcc.gnu.org/pipermail/gcc-patches/2020-May/546921.html
>

PING.

-- 
H.J.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* PING^3 [PATCH] x86: Add cmpmemsi for -minline-all-stringops
  2020-09-17  5:07       ` PING^2 " H.J. Lu
@ 2020-10-02 13:21         ` H.J. Lu
  2020-10-17 21:18           ` H.J. Lu
  0 siblings, 1 reply; 10+ messages in thread
From: H.J. Lu @ 2020-10-02 13:21 UTC (permalink / raw)
  To: Uros Bizjak, Jakub Jelinek, Jeffrey Law; +Cc: gcc-patches, Jan Hubicka

On Wed, Sep 16, 2020 at 10:07 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Wed, Aug 19, 2020 at 6:09 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Tue, May 19, 2020 at 5:14 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > On Tue, May 19, 2020 at 1:48 AM Uros Bizjak <ubizjak@gmail.com> wrote:
> > > >
> > > > On Sun, May 17, 2020 at 7:06 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > > >
> > > > > Duplicate the cmpstrn pattern for cmpmem.  The only difference is that
> > > > > the length argument of cmpmem is guaranteed to be less than or equal to
> > > > > lengths of 2 memory areas.  Since "repz cmpsb" can be much slower than
> > > > > memcmp function implemented with vector instruction, see
> > > > >
> > > > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=43052
> > > > >
> > > > > expand cmpmem to "repz cmpsb" only with -mgeneral-regs-only.
> > > >
> > > > If there is no benefit compared to the library implementation, then
> > > > enable these patterns only when -minline-all-stringops is used.
> > >
> > > Fixed.
> > >
> > > > Eventually these should be reimplemented with SSE4 string instructions.
> > > >
> > > > Honza is the author of the block handling x86 system, I'll leave the
> > > > review to him.
> > >
> > > We used to expand memcmp to "repz cmpsb" via cmpstrnsi.  It was changed
> > > by
> > >
> > > commit 9b0f6f5e511ca512e4faeabc81d2fd3abad9b02f
> > > Author: Nick Clifton <nickc@redhat.com>
> > > Date:   Fri Aug 12 16:26:11 2011 +0000
> > >
> > >     builtins.c (expand_builtin_memcmp): Do not use cmpstrnsi pattern.
> > >
> > >             * builtins.c (expand_builtin_memcmp): Do not use cmpstrnsi
> > >             pattern.
> > >             * doc/md.texi (cmpstrn): Note that the comparison stops if both
> > >             fetched bytes are zero.
> > >             (cmpstr): Likewise.
> > >             (cmpmem): Note that the comparison does not stop if both of the
> > >             fetched bytes are zero.
> > >
> > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95151
> > >
> > > is a regression.
> > >
> > > Honza, can you take a look at this?
> > >
> >
> > PING:
> >
> > https://gcc.gnu.org/pipermail/gcc-patches/2020-May/546921.html
> >
>
> PING.
>

PING.

-- 
H.J.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: PING^3 [PATCH] x86: Add cmpmemsi for -minline-all-stringops
  2020-10-02 13:21         ` PING^3 " H.J. Lu
@ 2020-10-17 21:18           ` H.J. Lu
  2020-10-18 15:16             ` Jan Hubicka
  0 siblings, 1 reply; 10+ messages in thread
From: H.J. Lu @ 2020-10-17 21:18 UTC (permalink / raw)
  To: Uros Bizjak, Jakub Jelinek, Jeffrey Law; +Cc: gcc-patches, Jan Hubicka

On Fri, Oct 2, 2020 at 6:21 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Wed, Sep 16, 2020 at 10:07 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Wed, Aug 19, 2020 at 6:09 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > On Tue, May 19, 2020 at 5:14 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > >
> > > > On Tue, May 19, 2020 at 1:48 AM Uros Bizjak <ubizjak@gmail.com> wrote:
> > > > >
> > > > > On Sun, May 17, 2020 at 7:06 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > > > >
> > > > > > Duplicate the cmpstrn pattern for cmpmem.  The only difference is that
> > > > > > the length argument of cmpmem is guaranteed to be less than or equal to
> > > > > > lengths of 2 memory areas.  Since "repz cmpsb" can be much slower than
> > > > > > memcmp function implemented with vector instruction, see
> > > > > >
> > > > > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=43052
> > > > > >
> > > > > > expand cmpmem to "repz cmpsb" only with -mgeneral-regs-only.
> > > > >
> > > > > If there is no benefit compared to the library implementation, then
> > > > > enable these patterns only when -minline-all-stringops is used.
> > > >
> > > > Fixed.
> > > >
> > > > > Eventually these should be reimplemented with SSE4 string instructions.
> > > > >
> > > > > Honza is the author of the block handling x86 system, I'll leave the
> > > > > review to him.
> > > >
> > > > We used to expand memcmp to "repz cmpsb" via cmpstrnsi.  It was changed
> > > > by
> > > >
> > > > commit 9b0f6f5e511ca512e4faeabc81d2fd3abad9b02f
> > > > Author: Nick Clifton <nickc@redhat.com>
> > > > Date:   Fri Aug 12 16:26:11 2011 +0000
> > > >
> > > >     builtins.c (expand_builtin_memcmp): Do not use cmpstrnsi pattern.
> > > >
> > > >             * builtins.c (expand_builtin_memcmp): Do not use cmpstrnsi
> > > >             pattern.
> > > >             * doc/md.texi (cmpstrn): Note that the comparison stops if both
> > > >             fetched bytes are zero.
> > > >             (cmpstr): Likewise.
> > > >             (cmpmem): Note that the comparison does not stop if both of the
> > > >             fetched bytes are zero.
> > > >
> > > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95151
> > > >
> > > > is a regression.
> > > >
> > > > Honza, can you take a look at this?
> > > >
> > >
> > > PING:
> > >
> > > https://gcc.gnu.org/pipermail/gcc-patches/2020-May/546921.html
> > >
> >
> > PING.
> >
>
> PING.
>

I'd like to check it in next Tuesday if there are no comments.

-- 
H.J.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: PING^3 [PATCH] x86: Add cmpmemsi for -minline-all-stringops
  2020-10-17 21:18           ` H.J. Lu
@ 2020-10-18 15:16             ` Jan Hubicka
  2020-10-18 15:27               ` H.J. Lu
  2020-10-23 12:45               ` H.J. Lu
  0 siblings, 2 replies; 10+ messages in thread
From: Jan Hubicka @ 2020-10-18 15:16 UTC (permalink / raw)
  To: H.J. Lu; +Cc: Uros Bizjak, Jakub Jelinek, Jeffrey Law, gcc-patches

> On Fri, Oct 2, 2020 at 6:21 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Wed, Sep 16, 2020 at 10:07 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > On Wed, Aug 19, 2020 at 6:09 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > >
> > > > On Tue, May 19, 2020 at 5:14 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > > >
> > > > > On Tue, May 19, 2020 at 1:48 AM Uros Bizjak <ubizjak@gmail.com> wrote:
> > > > > >
> > > > > > On Sun, May 17, 2020 at 7:06 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > > > > >
> > > > > > > Duplicate the cmpstrn pattern for cmpmem.  The only difference is that
> > > > > > > the length argument of cmpmem is guaranteed to be less than or equal to
> > > > > > > lengths of 2 memory areas.  Since "repz cmpsb" can be much slower than
> > > > > > > memcmp function implemented with vector instruction, see
> > > > > > >
> > > > > > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=43052
> > > > > > >
> > > > > > > expand cmpmem to "repz cmpsb" only with -mgeneral-regs-only.
> > > > > >
> > > > > > If there is no benefit compared to the library implementation, then
> > > > > > enable these patterns only when -minline-all-stringops is used.
> > > > >
> > > > > Fixed.
> > > > >
> > > > > > Eventually these should be reimplemented with SSE4 string instructions.
> > > > > >
> > > > > > Honza is the author of the block handling x86 system, I'll leave the
> > > > > > review to him.
> > > > >
> > > > > We used to expand memcmp to "repz cmpsb" via cmpstrnsi.  It was changed
> > > > > by
> > > > >
> > > > > commit 9b0f6f5e511ca512e4faeabc81d2fd3abad9b02f
> > > > > Author: Nick Clifton <nickc@redhat.com>
> > > > > Date:   Fri Aug 12 16:26:11 2011 +0000
> > > > >
> > > > >     builtins.c (expand_builtin_memcmp): Do not use cmpstrnsi pattern.
> > > > >
> > > > >             * builtins.c (expand_builtin_memcmp): Do not use cmpstrnsi
> > > > >             pattern.
> > > > >             * doc/md.texi (cmpstrn): Note that the comparison stops if both
> > > > >             fetched bytes are zero.
> > > > >             (cmpstr): Likewise.
> > > > >             (cmpmem): Note that the comparison does not stop if both of the
> > > > >             fetched bytes are zero.
> > > > >
> > > > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95151
> > > > >
> > > > > is a regression.
> > > > >
> > > > > Honza, can you take a look at this?
> > > > >
> > > >
> > > > PING:
> > > >
> > > > https://gcc.gnu.org/pipermail/gcc-patches/2020-May/546921.html
> > > >
> > >
> > > PING.
> > >
> >
> > PING.
> >
> 
> I'd like to check it in next Tuesday if there are no comments.

I still plan to intorduce the two-level optimize_for_size predicates.
Will try to do that by tuesday.

Honza
> 
> -- 
> H.J.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: PING^3 [PATCH] x86: Add cmpmemsi for -minline-all-stringops
  2020-10-18 15:16             ` Jan Hubicka
@ 2020-10-18 15:27               ` H.J. Lu
  2020-10-23 12:45               ` H.J. Lu
  1 sibling, 0 replies; 10+ messages in thread
From: H.J. Lu @ 2020-10-18 15:27 UTC (permalink / raw)
  To: Jan Hubicka; +Cc: Uros Bizjak, Jakub Jelinek, Jeffrey Law, gcc-patches

On Sun, Oct 18, 2020 at 8:16 AM Jan Hubicka <hubicka@ucw.cz> wrote:
>
> > On Fri, Oct 2, 2020 at 6:21 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > On Wed, Sep 16, 2020 at 10:07 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > >
> > > > On Wed, Aug 19, 2020 at 6:09 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > > >
> > > > > On Tue, May 19, 2020 at 5:14 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > > > >
> > > > > > On Tue, May 19, 2020 at 1:48 AM Uros Bizjak <ubizjak@gmail.com> wrote:
> > > > > > >
> > > > > > > On Sun, May 17, 2020 at 7:06 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > > > > > >
> > > > > > > > Duplicate the cmpstrn pattern for cmpmem.  The only difference is that
> > > > > > > > the length argument of cmpmem is guaranteed to be less than or equal to
> > > > > > > > lengths of 2 memory areas.  Since "repz cmpsb" can be much slower than
> > > > > > > > memcmp function implemented with vector instruction, see
> > > > > > > >
> > > > > > > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=43052
> > > > > > > >
> > > > > > > > expand cmpmem to "repz cmpsb" only with -mgeneral-regs-only.
> > > > > > >
> > > > > > > If there is no benefit compared to the library implementation, then
> > > > > > > enable these patterns only when -minline-all-stringops is used.
> > > > > >
> > > > > > Fixed.
> > > > > >
> > > > > > > Eventually these should be reimplemented with SSE4 string instructions.
> > > > > > >
> > > > > > > Honza is the author of the block handling x86 system, I'll leave the
> > > > > > > review to him.
> > > > > >
> > > > > > We used to expand memcmp to "repz cmpsb" via cmpstrnsi.  It was changed
> > > > > > by
> > > > > >
> > > > > > commit 9b0f6f5e511ca512e4faeabc81d2fd3abad9b02f
> > > > > > Author: Nick Clifton <nickc@redhat.com>
> > > > > > Date:   Fri Aug 12 16:26:11 2011 +0000
> > > > > >
> > > > > >     builtins.c (expand_builtin_memcmp): Do not use cmpstrnsi pattern.
> > > > > >
> > > > > >             * builtins.c (expand_builtin_memcmp): Do not use cmpstrnsi
> > > > > >             pattern.
> > > > > >             * doc/md.texi (cmpstrn): Note that the comparison stops if both
> > > > > >             fetched bytes are zero.
> > > > > >             (cmpstr): Likewise.
> > > > > >             (cmpmem): Note that the comparison does not stop if both of the
> > > > > >             fetched bytes are zero.
> > > > > >
> > > > > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95151
> > > > > >
> > > > > > is a regression.
> > > > > >
> > > > > > Honza, can you take a look at this?
> > > > > >
> > > > >
> > > > > PING:
> > > > >
> > > > > https://gcc.gnu.org/pipermail/gcc-patches/2020-May/546921.html
> > > > >
> > > >
> > > > PING.
> > > >
> > >
> > > PING.
> > >
> >
> > I'd like to check it in next Tuesday if there are no comments.
>
> I still plan to intorduce the two-level optimize_for_size predicates.
> Will try to do that by tuesday.
>

Thanks.

BTW, this patch is about inlining memcmp with -minline-all-stringops.
It is very important for user interrupt codes (UINTR) not to call memcmp
since memcmp in glibc uses vector registers which shouldn't be used in
user interrupt codes.

-- 
H.J.

^ permalink raw reply	[flat|nested] 10+ messages in thread

* Re: PING^3 [PATCH] x86: Add cmpmemsi for -minline-all-stringops
  2020-10-18 15:16             ` Jan Hubicka
  2020-10-18 15:27               ` H.J. Lu
@ 2020-10-23 12:45               ` H.J. Lu
  1 sibling, 0 replies; 10+ messages in thread
From: H.J. Lu @ 2020-10-23 12:45 UTC (permalink / raw)
  To: Jan Hubicka; +Cc: Uros Bizjak, Jakub Jelinek, Jeffrey Law, gcc-patches

On Sun, Oct 18, 2020 at 8:16 AM Jan Hubicka <hubicka@ucw.cz> wrote:
>
> > On Fri, Oct 2, 2020 at 6:21 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > >
> > > On Wed, Sep 16, 2020 at 10:07 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > >
> > > > On Wed, Aug 19, 2020 at 6:09 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > > >
> > > > > On Tue, May 19, 2020 at 5:14 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > > > >
> > > > > > On Tue, May 19, 2020 at 1:48 AM Uros Bizjak <ubizjak@gmail.com> wrote:
> > > > > > >
> > > > > > > On Sun, May 17, 2020 at 7:06 PM H.J. Lu <hjl.tools@gmail.com> wrote:
> > > > > > > >
> > > > > > > > Duplicate the cmpstrn pattern for cmpmem.  The only difference is that
> > > > > > > > the length argument of cmpmem is guaranteed to be less than or equal to
> > > > > > > > lengths of 2 memory areas.  Since "repz cmpsb" can be much slower than
> > > > > > > > memcmp function implemented with vector instruction, see
> > > > > > > >
> > > > > > > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=43052
> > > > > > > >
> > > > > > > > expand cmpmem to "repz cmpsb" only with -mgeneral-regs-only.
> > > > > > >
> > > > > > > If there is no benefit compared to the library implementation, then
> > > > > > > enable these patterns only when -minline-all-stringops is used.
> > > > > >
> > > > > > Fixed.
> > > > > >
> > > > > > > Eventually these should be reimplemented with SSE4 string instructions.
> > > > > > >
> > > > > > > Honza is the author of the block handling x86 system, I'll leave the
> > > > > > > review to him.
> > > > > >
> > > > > > We used to expand memcmp to "repz cmpsb" via cmpstrnsi.  It was changed
> > > > > > by
> > > > > >
> > > > > > commit 9b0f6f5e511ca512e4faeabc81d2fd3abad9b02f
> > > > > > Author: Nick Clifton <nickc@redhat.com>
> > > > > > Date:   Fri Aug 12 16:26:11 2011 +0000
> > > > > >
> > > > > >     builtins.c (expand_builtin_memcmp): Do not use cmpstrnsi pattern.
> > > > > >
> > > > > >             * builtins.c (expand_builtin_memcmp): Do not use cmpstrnsi
> > > > > >             pattern.
> > > > > >             * doc/md.texi (cmpstrn): Note that the comparison stops if both
> > > > > >             fetched bytes are zero.
> > > > > >             (cmpstr): Likewise.
> > > > > >             (cmpmem): Note that the comparison does not stop if both of the
> > > > > >             fetched bytes are zero.
> > > > > >
> > > > > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=95151
> > > > > >
> > > > > > is a regression.
> > > > > >
> > > > > > Honza, can you take a look at this?
> > > > > >
> > > > >
> > > > > PING:
> > > > >
> > > > > https://gcc.gnu.org/pipermail/gcc-patches/2020-May/546921.html
> > > > >
> > > >
> > > > PING.
> > > >
> > >
> > > PING.
> > >
> >
> > I'd like to check it in next Tuesday if there are no comments.
>
> I still plan to intorduce the two-level optimize_for_size predicates.
> Will try to do that by tuesday.

Any updates on this?  My patch is still needed to generate cmpmemsi
with -minline-all-stringops.  If there are comments, I will check it in next
Monday.

--
H.J.

^ permalink raw reply	[flat|nested] 10+ messages in thread

end of thread, other threads:[~2020-10-23 12:45 UTC | newest]

Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-17 17:06 [PATCH] x86: Add cmpmemsi for -mgeneral-regs-only H.J. Lu
2020-05-19  8:48 ` Uros Bizjak
2020-05-19 12:14   ` [PATCH] x86: Add cmpmemsi for -minline-all-stringops H.J. Lu
2020-08-19 13:09     ` PING " H.J. Lu
2020-09-17  5:07       ` PING^2 " H.J. Lu
2020-10-02 13:21         ` PING^3 " H.J. Lu
2020-10-17 21:18           ` H.J. Lu
2020-10-18 15:16             ` Jan Hubicka
2020-10-18 15:27               ` H.J. Lu
2020-10-23 12:45               ` H.J. Lu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).