public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] PR target/97312: Tweak gcc.target/aarch64/pr90838.c
@ 2020-10-08  9:58 Aldy Hernandez
  2020-10-08 10:22 ` Jakub Jelinek
  0 siblings, 1 reply; 9+ messages in thread
From: Aldy Hernandez @ 2020-10-08  9:58 UTC (permalink / raw)
  To: gcc-patches

I am quoting my analysis from the PR.  Could an aarch64 expert 
pontificate here?

This test is checking the final assembly for a specific sequence.  I 
don't speak aarch64 assembly, but the IL is different coming out of evrp.

The first culprit is this difference in the mergephi1 dump:

    _9 = .CTZ (x_6(D));
-  _10 = _9 & 31;
+  _10 = _9;

These are unsigned ints, so assuming they are 32 bits on aarch64, 
__builtin_ctz is always less than 32.  This is because a CTZ of 0 is 
undefined according to the GCC manual:

[[
Built-in Function: int __builtin_ctz (unsigned int x)

     Returns the number of trailing 0-bits in x, starting at the least 
significant bit position. If x is 0, the result is undefined.
]]

So a bitwise AND of anything less than 32 with 0x1f (31) is a no-op.

Here are the full IL differences:

--- legacy-evrp/pr90838.c.038t.mergephi1        2020-10-07 
08:44:12.152358885 -0400
+++ ranger/pr90838.c.038t.mergephi1     2020-10-07 08:39:12.339296502 -0400
@@ -1,41 +1,41 @@

  ;; Function ctz1 (ctz1, funcdef_no=0, decl_uid=3587, cgraph_uid=1, 
symbol_order=0)

  ctz1 (unsigned int x)
  {
    static const char table[32] = 
"\x00\x01\x1c\x02\x1d\x0e\x18\x03\x1e\x16\x14\x0f\x19\x11\x04\b\x1f\x1b\r\x17\x15\x13\x10\x07\x1a\f\x12\x06\v\x05\n\t";
    unsigned int _1;
    unsigned int _2;
    unsigned int _3;
    unsigned int _4;
    char _5;
    int _9;
    int _10;

    <bb 2> :
    _1 = -x_6(D);
    _2 = _1 & x_6(D);
    _3 = _2 * 125613361;
    _4 = _3 >> 27;
    _9 = .CTZ (x_6(D));
-  _10 = _9 & 31;
+  _10 = _9;
    _5 = (char) _10;
    return _10;

  }



  ;; Function ctz2 (ctz2, funcdef_no=1, decl_uid=3591, cgraph_uid=2, 
symbol_order=1)

  ctz2 (unsigned int x)
  {
    static short int table[64] = {32, 0, 1, 12, 2, 6, 0, 13, 3, 0, 7, 0, 
0, 0, 0, 14, 10, 4, 0, 0, 8, 0, 0, 25, 0, 0, 0, 0, 0, 21, 27, 15, 31, 
11, 5, 0, 0, 0, 0, 0, 9, 0, 0,
24, 0, 0, 20, 26, 30, 0, 0, 0, 0, 23, 0, 19, 29, 0, 22, 18, 28, 17, 16, 0};
    unsigned int _1;
    unsigned int _2;
    unsigned int _3;
    short int _4;
    int _8;

    <bb 2> :
    _1 = -x_5(D);
@@ -87,27 +87,27 @@


  ;; Function ctz4 (ctz4, funcdef_no=3, decl_uid=3601, cgraph_uid=4, 
symbol_order=5)

  ctz4 (long unsigned int x)
  {
    long unsigned int lsb;
    long unsigned int _1;
    long long unsigned int _2;
    long long unsigned int _3;
    char _4;
    int _9;
    int _10;

    <bb 2> :
    _1 = -x_5(D);
    lsb_6 = _1 & x_5(D);
    _2 = lsb_6 * 283881067100198605;
    _3 = _2 >> 58;
    _9 = .CTZ (x_5(D));
-  _10 = _9 & 63;
+  _10 = _9;
    _4 = (char) _10;
    return _10;

  }

The difference in assembly matches.  We have 2 less AND's in the final 
output:

$ diff -u legacy.s ranger.s
--- legacy.s    2020-10-07 09:06:13.420446783 -0400
+++ ranger.s    2020-10-07 09:06:42.646646949 -0400
@@ -8,7 +8,6 @@
  ctz1:
         rbit    w0, w0
         clz     w0, w0
-       and     w0, w0, 31
         ret
         .size   ctz1, .-ctz1
         .align  2
@@ -36,7 +35,6 @@
  ctz4:
         rbit    x0, x0
         clz     x0, x0
-       and     w0, w0, 63
         ret
         .size   ctz4, .-ctz4

If my analysis is correct, we could just remove the line checking for 
"and", or perhaps check that we don't have any and's.

OK for trunk?
Aldy

     gcc/testsuite/ChangeLog:

             PR target/97312
             * gcc.target/aarch64/pr90838.c: Remove scan for AND.

diff --git a/gcc/testsuite/gcc.target/aarch64/pr90838.c 
b/gcc/testsuite/gcc.target/aarch64/pr90838.c
index e1e19ac6a61..76cd5e18d2e 100644
--- a/gcc/testsuite/gcc.target/aarch64/pr90838.c
+++ b/gcc/testsuite/gcc.target/aarch64/pr90838.c
@@ -60,5 +60,4 @@ int ctz4 (unsigned long x)
  }

  /* { dg-final { scan-assembler-times "clz\t" 4 } } */
-/* { dg-final { scan-assembler-times "and\t" 2 } } */
  /* { dg-final { scan-assembler-not "cmp\t.*0" } } */


^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2020-10-08 15:09 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-10-08  9:58 [PATCH] PR target/97312: Tweak gcc.target/aarch64/pr90838.c Aldy Hernandez
2020-10-08 10:22 ` Jakub Jelinek
2020-10-08 10:27   ` Jakub Jelinek
2020-10-08 13:54   ` [PATCH] vrp: Fix up gcc.target/aarch64/pr90838.c [PR97312, PR94801] Jakub Jelinek
2020-10-08 14:28     ` Aldy Hernandez
2020-10-08 14:39       ` Jakub Jelinek
2020-10-08 14:55         ` Aldy Hernandez
2020-10-08 15:08           ` Jakub Jelinek
2020-10-08 15:09             ` Aldy Hernandez

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).