public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: HAO CHEN GUI <guihaoc@linux.ibm.com>
To: gcc-patches <gcc-patches@gcc.gnu.org>
Cc: Segher Boessenkool <segher@kernel.crashing.org>,
	David <dje.gcc@gmail.com>, "Kewen.Lin" <linkw@linux.ibm.com>,
	Peter Bergner <bergner@linux.ibm.com>
Subject: [Patchv3, rs6000] Clean up pre-checkings of expand_block_compare
Date: Thu, 21 Dec 2023 09:37:52 +0800	[thread overview]
Message-ID: <138fd05a-75c8-4a4b-b358-9633e087da20@linux.ibm.com> (raw)

Hi,
  This patch cleans up pre-checkings of expand_block_compare. It does
1. Assert only P7 above can enter this function as it's already guard
by the expand.
2. Remove P7 processor test as only P7 above can enter this function and
P7 LE is excluded by targetm.slow_unaligned_access. On P7 BE, the
performance of expand is better than the performance of library when
the length is long.

  Compared to last version,
https://gcc.gnu.org/pipermail/gcc-patches/2023-December/640833.html
the main change is to split optimization for size to a separate patch
and add a testcase for P7 BE.

  Bootstrapped and tested on x86 and powerpc64-linux BE and LE with no
regressions. Is this OK for trunk?

Thanks
Gui Haochen

ChangeLog
rs6000: Clean up the pre-checkings of expand_block_compare

Remove P7 CPU test as only P7 above can enter this function and P7 LE is
excluded by the checking of targetm.slow_unaligned_access on word_mode.
Also performance test shows the expand of block compare is better than
library on P7 BE when the length is from 16 bytes to 64 bytes.

gcc/
	* gcc/config/rs6000/rs6000-string.cc (expand_block_compare): Assert
	only P7 above can enter this function.  Remove P7 CPU test and let
	P7 BE do the expand.

gcc/testsuite/
	* gcc.target/powerpc/block-cmp-4.c: New.

patch.diff
diff --git a/gcc/config/rs6000/rs6000-string.cc b/gcc/config/rs6000/rs6000-string.cc
index 5149273b80e..09db57255fa 100644
--- a/gcc/config/rs6000/rs6000-string.cc
+++ b/gcc/config/rs6000/rs6000-string.cc
@@ -1947,15 +1947,12 @@ expand_block_compare_gpr(unsigned HOST_WIDE_INT bytes, unsigned int base_align,
 bool
 expand_block_compare (rtx operands[])
 {
+  /* TARGET_POPCNTD is already guarded at expand cmpmemsi.  */
+  gcc_assert (TARGET_POPCNTD);
+
   if (optimize_insn_for_size_p ())
     return false;

-  rtx target = operands[0];
-  rtx orig_src1 = operands[1];
-  rtx orig_src2 = operands[2];
-  rtx bytes_rtx = operands[3];
-  rtx align_rtx = operands[4];
-
   /* This case is complicated to handle because the subtract
      with carry instructions do not generate the 64-bit
      carry and so we must emit code to calculate it ourselves.
@@ -1963,23 +1960,19 @@ expand_block_compare (rtx operands[])
   if (TARGET_32BIT && TARGET_POWERPC64)
     return false;

-  bool isP7 = (rs6000_tune == PROCESSOR_POWER7);
-
   /* Allow this param to shut off all expansion.  */
   if (rs6000_block_compare_inline_limit == 0)
     return false;

-  /* targetm.slow_unaligned_access -- don't do unaligned stuff.
-     However slow_unaligned_access returns true on P7 even though the
-     performance of this code is good there.  */
-  if (!isP7
-      && (targetm.slow_unaligned_access (word_mode, MEM_ALIGN (orig_src1))
-	  || targetm.slow_unaligned_access (word_mode, MEM_ALIGN (orig_src2))))
-    return false;
+  rtx target = operands[0];
+  rtx orig_src1 = operands[1];
+  rtx orig_src2 = operands[2];
+  rtx bytes_rtx = operands[3];
+  rtx align_rtx = operands[4];

-  /* Unaligned l*brx traps on P7 so don't do this.  However this should
-     not affect much because LE isn't really supported on P7 anyway.  */
-  if (isP7 && !BYTES_BIG_ENDIAN)
+  /* targetm.slow_unaligned_access -- don't do unaligned stuff.  */
+  if (targetm.slow_unaligned_access (word_mode, MEM_ALIGN (orig_src1))
+      || targetm.slow_unaligned_access (word_mode, MEM_ALIGN (orig_src2)))
     return false;

   /* If this is not a fixed size compare, try generating loop code and
@@ -2027,14 +2020,6 @@ expand_block_compare (rtx operands[])
   if (!IN_RANGE (bytes, 1, max_bytes))
     return expand_compare_loop (operands);

-  /* The code generated for p7 and older is not faster than glibc
-     memcmp if alignment is small and length is not short, so bail
-     out to avoid those conditions.  */
-  if (targetm.slow_unaligned_access (word_mode, base_align * BITS_PER_UNIT)
-      && ((base_align == 1 && bytes > 16)
-	  || (base_align == 2 && bytes > 32)))
-    return false;
-
   rtx final_label = NULL;

   if (use_vec)
diff --git a/gcc/testsuite/gcc.target/powerpc/block-cmp-4.c b/gcc/testsuite/gcc.target/powerpc/block-cmp-4.c
new file mode 100644
index 00000000000..c86febae68a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/block-cmp-4.c
@@ -0,0 +1,11 @@
+/* { dg-do compile { target be } } */
+/* { dg-options "-O2 -mdejagnu-cpu=power7" } */
+/* { dg-final { scan-assembler-not {\mb[l]? memcmp\M} } }  */
+
+/* Test that it does expand for memcmpsi instead of calling library on
+   P7 BE when length is less than 32 bytes.  */
+
+int foo (const char* s1, const char* s2)
+{
+  return __builtin_memcmp (s1, s2, 31);
+}

             reply	other threads:[~2023-12-21  1:38 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-12-21  1:37 HAO CHEN GUI [this message]
2023-12-21  6:28 ` Kewen.Lin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=138fd05a-75c8-4a4b-b358-9633e087da20@linux.ibm.com \
    --to=guihaoc@linux.ibm.com \
    --cc=bergner@linux.ibm.com \
    --cc=dje.gcc@gmail.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=linkw@linux.ibm.com \
    --cc=segher@kernel.crashing.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).