public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug tree-optimization/101301] New: Improving sparse switch statement
@ 2021-07-02 15:32 tkoenig at gcc dot gnu.org
  2021-07-03 10:41 ` [Bug tree-optimization/101301] " tkoenig at gcc dot gnu.org
                   ` (11 more replies)
  0 siblings, 12 replies; 13+ messages in thread
From: tkoenig at gcc dot gnu.org @ 2021-07-02 15:32 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101301

            Bug ID: 101301
           Summary: Improving sparse switch statement
           Product: gcc
           Version: 12.0
            Status: UNCONFIRMED
          Severity: enhancement
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: tkoenig at gcc dot gnu.org
  Target Milestone: ---

The following two functions do the same thing:

int foo(int x)
{
     switch (x) {
      case 11111: return 1;
      case 22222: return 2;
      case 33333: return 3;
      case 44444: return 4;
      case 55555: return 5;
      case 66666: return 6;
      case 77777: return 7;
      case 88888: return 8;
      case 99999: return 9;
      default: return 0;
     }
}

int foo2(int n)
{
  if (n >= 55555)
    {
      if (n >= 77777)
        {
          if (n == 77777)
            return 7;
          if (n == 88888)
            return 8;
          if (n == 99999)
            return 9;

          return 0;
        }
      else
        {
          if (n == 55555)
            return 5;
          if (n == 66666)
            return 6;

          return 0;
        }
    }
  else
    {
      if (n >= 33333)
        {
          if (n == 33333)
            return 3;
          if (n == 44444)
            return 4;

          return 0;
        }
      else
        {
          if (n == 11111)
            return 1;
          if (n == 22222)
            return 2;

          return 0;
        }
    }
}

but foo2 is translated into code with fewer conditional branches
on average.  Considering how expensive a mispredicted branch
can be, translating foo like foo2 could be an improvement.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Bug tree-optimization/101301] Improving sparse switch statement
  2021-07-02 15:32 [Bug tree-optimization/101301] New: Improving sparse switch statement tkoenig at gcc dot gnu.org
@ 2021-07-03 10:41 ` tkoenig at gcc dot gnu.org
  2021-07-03 18:42 ` tkoenig at gcc dot gnu.org
                   ` (10 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: tkoenig at gcc dot gnu.org @ 2021-07-03 10:41 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101301

--- Comment #1 from Thomas Koenig <tkoenig at gcc dot gnu.org> ---
Here's the assembly for the switch statement with -O3:

        cmpl    $55555, %edi
        je      .L6
        jle     .L18
        movl    $8, %eax
        cmpl    $88888, %edi
        je      .L1
        jle     .L19
        xorl    %eax, %eax
        cmpl    $99999, %edi
        sete    %al
        leal    (%rax,%rax,8), %eax
        ret
        .p2align 4,,10
        .p2align 3
.L18:
        movl    $3, %eax
        cmpl    $33333, %edi
        je      .L1
        jle     .L20
        xorl    %eax, %eax
        cmpl    $44444, %edi
        sete    %al
        sall    $2, %eax
        ret
        .p2align 4,,10
        .p2align 3
.L19:
        movl    $6, %eax
        cmpl    $66666, %edi
        je      .L1
        xorl    %eax, %eax
        movl    $7, %edx
        cmpl    $77777, %edi
        cmove   %edx, %eax
        ret
        .p2align 4,,10
        .p2align 3
.L6:
        movl    $5, %eax
.L1:
        ret
        .p2align 4,,10
        .p2align 3
.L20:
        movl    $1, %eax
        cmpl    $11111, %edi
        je      .L1
        xorl    %eax, %eax
        cmpl    $22222, %edi
        sete    %al
        addl    %eax, %eax
        ret

and here for the if statement version:

        cmpl    $55554, %edi
        jle     .L2
        cmpl    $77776, %edi
        jle     .L3
        cmpl    $77777, %edi
        je      .L6
        cmpl    $88888, %edi
        je      .L7
        xorl    %eax, %eax
        cmpl    $99999, %edi
        sete    %al
        leal    (%rax,%rax,8), %eax
        ret
        .p2align 4,,10
        .p2align 3
.L3:
        cmpl    $55555, %edi
        je      .L9
        xorl    %eax, %eax
        movl    $6, %edx
        cmpl    $66666, %edi
        cmove   %edx, %eax
        ret
        .p2align 4,,10
        .p2align 3
.L2:
        cmpl    $33332, %edi
        jle     .L5
        cmpl    $33333, %edi
        je      .L11
        xorl    %eax, %eax
        cmpl    $44444, %edi
        sete    %al
        sall    $2, %eax
        ret
        .p2align 4,,10
        .p2align 3
.L5:
        movl    $1, %eax
        cmpl    $11111, %edi
        je      .L1
        xorl    %eax, %eax
        cmpl    $22222, %edi
        sete    %al
        addl    %eax, %eax
        ret
        .p2align 4,,10
        .p2align 3
.L11:
        movl    $3, %eax
.L1:
        ret
        .p2align 4,,10
        .p2align 3
.L7:
        movl    $8, %eax
        ret
        .p2align 4,,10
        .p2align 3
.L9:
        movl    $5, %eax
        ret
        .p2align 4,,10
        .p2align 3
.L6:
        movl    $7, %eax
        ret

Stepping through the code with a debugger, here is a table on the number
of conditional jumps for all special values and in all relevant intervals:

 5000  5  3
 11111 5  3
 15000 5  3
 22222 5  3
 25000 5  3
 33333 3  3
 35000 4  3
 44444 4  3
 50000 4  3
 55555 1  3
 60000 5  3
 66666 5  3
 70000 5  3
 77777 5  3
 80000 5  4
 88888 3  4
 90000 4  4
 99999 4  4
100000 4  4

If one assumes each case as equally likely, the number of conditional jumps
goes down from 81 to 62.

Assuming each case statement is reached once, the total number of
conditional jumps executed goes from 35 to 29.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Bug tree-optimization/101301] Improving sparse switch statement
  2021-07-02 15:32 [Bug tree-optimization/101301] New: Improving sparse switch statement tkoenig at gcc dot gnu.org
  2021-07-03 10:41 ` [Bug tree-optimization/101301] " tkoenig at gcc dot gnu.org
@ 2021-07-03 18:42 ` tkoenig at gcc dot gnu.org
  2021-07-03 19:34 ` segher at gcc dot gnu.org
                   ` (9 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: tkoenig at gcc dot gnu.org @ 2021-07-03 18:42 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101301

Thomas Koenig <tkoenig at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |segher at gcc dot gnu.org

--- Comment #2 from Thomas Koenig <tkoenig at gcc dot gnu.org> ---
Some benchmarks are interesting, and it seems that things are a lot less clear
than I thought.

a.c contains the code in the original test. bench.c has


#include <stdio.h>
#include <sys/time.h>
#include <stdlib.h>

#define N 1024
#define TIMES (1024*1024)

int foo(int);
int foo2(int);

const int nums[] =
#ifdef FULL
  {  5000, 11111, 15000, 22222, 25000, 33333, 35000, 44444, 50000,
    55555, 60000, 66666, 70000, 77777, 80000, 88888, 90000, 99999,
     100000};
#else
  {  11111, 22222, 33333, 44444, 55555, 66666, 77777, 88888, 99999 };
#endif

double this_time ()
{
  struct timeval tv;
  gettimeofday (&tv, NULL);
  return tv.tv_sec + tv.tv_usec * 1e-6;
}

int main()
{
  double t1, t2;
  int *x;
  int r;
  int s;
  x = malloc(N * sizeof(*x));
  for (int i=0; i<N; i++)
    {
      r = drand48() * sizeof(nums)/sizeof(int);
      x[i] = nums[r];
    }
  t1 = this_time();
  for (int j=0; j<TIMES; j++)
    for (int i=0; i<N; i++)
      foo (x[i]);
  t2 = this_time();
  printf ("foo: %f\n", t2-t1);
  t1 = this_time();
  for (int j=0; j<TIMES; j++)
    for (int i=0; i<N; i++)
      foo2 (x[i]);
  t2 = this_time();
  printf ("foo2: %f\n", t2-t1);
  return 0;
}

On my home box, a Ryzen 1700, I get
$ gcc -O3 bench.c a.c && ./a.out
foo: 4.324224
foo2: 4.218115
$ gcc -DFULL -O3 bench.c a.c && ./a.out
foo: 4.132713
foo2: 3.924765

so there is an advantage for the hand-crafted binary search, but it
is rather small (up to 5%).

On POWER, the results are

[tkoenig@gcc135 ~]$ gcc -O3 bench.c a.c && ./a.out
foo: 6.489096
foo2: 8.457161
[tkoenig@gcc135 ~]$ gcc -O3 -DFULL bench.c a.c && ./a.out
foo: 6.481499
foo2: 6.976012

so things are actually slower with the manual version of the switch.

Things get very interesting when checking against clang 12:
[tkoenig@gcc135 ~]$ clang -O3 bench.c a.c && ./a.out
foo: 7.489085
foo2: 4.175748
[tkoenig@gcc135 ~]$ clang -O3 -DFULL bench.c a.c && ./a.out
foo: 8.142711
foo2: 3.983875

clang's own switch is slower, but the handcrafted version is very fast.
clang apparently decides to make heavy use of POWER's condition
registers and isel:

00000000000000e0 <foo2>:
  e0:   ce 56 83 2c     cmpwi   cr1,r3,22222
  e4:   00 00 80 3c     lis     r4,0
  e8:   00 00 a0 38     li      r5,0
  ec:   02 00 c0 38     li      r6,2
  f0:   01 00 e0 38     li      r7,1
  f4:   01 00 00 3d     lis     r8,1
  f8:   67 2b 83 2a     cmplwi  cr5,r3,11111
  fc:   9c ad 03 28     cmplwi  r3,44444
 100:   35 82 89 60     ori     r9,r4,33333
 104:   04 00 40 39     li      r10,4
 108:   6a 04 08 61     ori     r8,r8,1130
 10c:   03 d9 84 60     ori     r4,r4,55555
 110:   9e 29 c6 7c     isel    r6,r6,r5,6
 114:   35 82 03 2b     cmplwi  cr6,r3,33333
 118:   00 48 83 7c     cmpw    cr1,r3,r9
 11c:   9e 35 c7 7c     isel    r6,r7,r6,22
 120:   03 00 e0 38     li      r7,3
 124:   01 00 69 6c     xoris   r9,r3,1
 128:   9e 28 4a 7d     iseleq  r10,r10,r5
 12c:   40 40 03 7c     cmplw   r3,r8
 130:   66 2b 08 39     addi    r8,r8,11110
 134:   d1 2f 89 2a     cmplwi  cr5,r9,12241
 138:   9e 56 e7 7c     isel    r7,r7,r10,26
 13c:   06 00 40 39     li      r10,6
 140:   9f 86 09 2b     cmplwi  cr6,r9,34463
 144:   38 5b 89 2b     cmplwi  cr7,r9,23352
 148:   08 00 20 39     li      r9,8
 14c:   9e 28 4a 7d     iseleq  r10,r10,r5
 150:   00 40 03 7c     cmpw    r3,r8
 154:   09 00 00 39     li      r8,9
 158:   9e 2f a9 7c     isel    r5,r9,r5,30
 15c:   1e 39 c6 7c     isel    r6,r6,r7,4
 160:   05 00 e0 38     li      r7,5
 164:   03 d9 83 28     cmplwi  cr1,r3,55555
 168:   9e 2e a8 7c     isel    r5,r8,r5,26
 16c:   07 00 00 39     li      r8,7
 170:   9e 51 e7 7c     isel    r7,r7,r10,6
 174:   00 20 83 7c     cmpw    cr1,r3,r4
 178:   9e 2d a8 7c     isel    r5,r8,r5,22
 17c:   5e 38 65 7c     iselgt  r3,r5,r7
 180:   1e 19 66 7c     isel    r3,r6,r3,4
 184:   20 00 63 78     clrldi  r3,r3,32
 188:   20 00 80 4e     blr

I don't have any recent Intel hardware to test.

So, things are less clear then when just looking at the assembler code,
and the behavior of branch predictors is not easy to predict.

A small (up to 5% advantage) x86_64 could be a big disadvantage or
advantage on POWER, depending on how it is compiled...

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Bug tree-optimization/101301] Improving sparse switch statement
  2021-07-02 15:32 [Bug tree-optimization/101301] New: Improving sparse switch statement tkoenig at gcc dot gnu.org
  2021-07-03 10:41 ` [Bug tree-optimization/101301] " tkoenig at gcc dot gnu.org
  2021-07-03 18:42 ` tkoenig at gcc dot gnu.org
@ 2021-07-03 19:34 ` segher at gcc dot gnu.org
  2021-07-03 21:57 ` tkoenig at gcc dot gnu.org
                   ` (8 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: segher at gcc dot gnu.org @ 2021-07-03 19:34 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101301

--- Comment #3 from Segher Boessenkool <segher at gcc dot gnu.org> ---
What CPU is in your Power system, and what -mcpu= did you compile with?
What is the ABI you are using?  (That last one doesn't seem to matter in
this particular case, but :-) )

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Bug tree-optimization/101301] Improving sparse switch statement
  2021-07-02 15:32 [Bug tree-optimization/101301] New: Improving sparse switch statement tkoenig at gcc dot gnu.org
                   ` (2 preceding siblings ...)
  2021-07-03 19:34 ` segher at gcc dot gnu.org
@ 2021-07-03 21:57 ` tkoenig at gcc dot gnu.org
  2021-07-04 17:37 ` segher at gcc dot gnu.org
                   ` (7 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: tkoenig at gcc dot gnu.org @ 2021-07-03 21:57 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101301

--- Comment #4 from Thomas Koenig <tkoenig at gcc dot gnu.org> ---
(In reply to Segher Boessenkool from comment #3)
> What CPU is in your Power system, and what -mcpu= did you compile with?
> What is the ABI you are using?  (That last one doesn't seem to matter in
> this particular case, but :-) )

The machine is gcc135 from the gcc compile farm. cat /proc/cpuinfo says

processor       : 0
cpu             : POWER9, altivec supported
clock           : 3800.000000MHz
revision        : 2.2 (pvr 004e 1202)

...

I compiled without -mcpu.

With -mcpu=native, the times are

[tkoenig@gcc135 ~]$ gcc -mcpu=native -O3  bench.c a.c && ./a.out
foo: 5.679413
foo2: 6.799938

[tkoenig@gcc135 ~]$ gcc -mcpu=native -O3 -DFULL bench.c a.c && ./a.out
foo: 5.713161
foo2: 6.137598

so -mcpu=native makes a difference there.  The version of the compiler is

gcc-Version 12.0.0 20210701 (experimental) (GCC) 

HTH.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Bug tree-optimization/101301] Improving sparse switch statement
  2021-07-02 15:32 [Bug tree-optimization/101301] New: Improving sparse switch statement tkoenig at gcc dot gnu.org
                   ` (3 preceding siblings ...)
  2021-07-03 21:57 ` tkoenig at gcc dot gnu.org
@ 2021-07-04 17:37 ` segher at gcc dot gnu.org
  2021-07-05  5:33 ` tkoenig at gcc dot gnu.org
                   ` (6 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: segher at gcc dot gnu.org @ 2021-07-04 17:37 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101301

--- Comment #5 from Segher Boessenkool <segher at gcc dot gnu.org> ---
Looking at -fdump-tree-all-all, things are fine with "foo" until the
switchlower pass.  Before that all nine switch cases and the default
case all had probability 0.10; after the switch conversion there are
many basic blocks with "Invalid sum of incoming counts", and not
everything has the same probability any more.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Bug tree-optimization/101301] Improving sparse switch statement
  2021-07-02 15:32 [Bug tree-optimization/101301] New: Improving sparse switch statement tkoenig at gcc dot gnu.org
                   ` (4 preceding siblings ...)
  2021-07-04 17:37 ` segher at gcc dot gnu.org
@ 2021-07-05  5:33 ` tkoenig at gcc dot gnu.org
  2021-07-05 17:33 ` segher at gcc dot gnu.org
                   ` (5 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: tkoenig at gcc dot gnu.org @ 2021-07-05  5:33 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101301

--- Comment #6 from Thomas Koenig <tkoenig at gcc dot gnu.org> ---
If I create a foo3 function with

int foo3 (int n)
{
  if (__builtin_expect_with_probability (n >= 55555, 1, 0.55))
    {
      if (__builtin_expect_with_probability (n >= 77777, 1, 0.33/0.55))
        {
          if (__builtin_expect_with_probability (n == 77777, 1, 0.1/0.33))
            return 7;
          if (__builtin_expect_with_probability (n == 88888, 1, 0.1/0.23))
            return 8;
          if (__builtin_expect_with_probability (n == 99999, 1, 0.1/0.11))
            return 9;

          return 0;
        }
      else
        {
          if (__builtin_expect_with_probability (n == 55555, 1, 0.1/0.22))
            return 5;
          if (__builtin_expect_with_probability (n == 66666, 1, 0.1/0.11))
            return 6;

          return 0;
        }
    }
  else
    {
      if (__builtin_expect_with_probability (n >= 33333, 1, 0.22/0.45))
        {
          if (__builtin_expect_with_probability (n == 33333, 1, 0.1/0.22))
            return 3;
          if (__builtin_expect_with_probability (n == 44444, 1, 0.1/0.11))
            return 4;

          return 0;
        }
      else
        {
          if (__builtin_expect_with_probability (n == 11111, 1, 0.1/0.23))
            return 1;
          if (__builtin_expect_with_probability (n == 22222, 1, 0.1/0.13))
            return 2;

          return 0;
        }
    }
}

the numbers on POWER9 become

[tkoenig@gcc135 ~]$ gcc -O3 bench.c a.c
[tkoenig@gcc135 ~]$ ./a.out
foo: 7.134855
foo2: 7.842507
foo3: 6.624406
[tkoenig@gcc135 ~]$ gcc -mcpu=native -O3 bench.c a.c
[tkoenig@gcc135 ~]$ ./a.out
foo: 6.458520
foo2: 7.696735
foo3: 6.196469

where, on a few runs, the differene betweeh foo and foo3 with -mcpu=native
sometimes disappears and sometimes is larger (gcc135 is not a benchmark
machine).

So, I'd say there some advantage in the compiler not lying to itself :-)

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Bug tree-optimization/101301] Improving sparse switch statement
  2021-07-02 15:32 [Bug tree-optimization/101301] New: Improving sparse switch statement tkoenig at gcc dot gnu.org
                   ` (5 preceding siblings ...)
  2021-07-05  5:33 ` tkoenig at gcc dot gnu.org
@ 2021-07-05 17:33 ` segher at gcc dot gnu.org
  2021-07-06 13:39 ` tkoenig at gcc dot gnu.org
                   ` (4 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: segher at gcc dot gnu.org @ 2021-07-05 17:33 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101301

--- Comment #7 from Segher Boessenkool <segher at gcc dot gnu.org> ---
Assuming that you divide the "default" case into ten pieces of 0.01 each,
some slight corrections:

(In reply to Thomas Koenig from comment #6)
> int foo3 (int n)
> {
>   if (__builtin_expect_with_probability (n >= 55555, 1, 0.55))
>     {
>       if (__builtin_expect_with_probability (n >= 77777, 1, 0.33/0.55))
>         {
>           if (__builtin_expect_with_probability (n == 77777, 1, 0.1/0.33))
>             return 7;
>           if (__builtin_expect_with_probability (n == 88888, 1, 0.1/0.23))
>             return 8;
>           if (__builtin_expect_with_probability (n == 99999, 1, 0.1/0.11))
>             return 9;

0.1/0.13 for that last one.

> 
>           return 0;
>         }
>       else
>         {
>           if (__builtin_expect_with_probability (n == 55555, 1, 0.1/0.22))
>             return 5;
>           if (__builtin_expect_with_probability (n == 66666, 1, 0.1/0.11))
>             return 6;

0.1/0.12

> 
>           return 0;
>         }
>     }
>   else
>     {
>       if (__builtin_expect_with_probability (n >= 33333, 1, 0.22/0.45))
>         {
>           if (__builtin_expect_with_probability (n == 33333, 1, 0.1/0.22))
>             return 3;
>           if (__builtin_expect_with_probability (n == 44444, 1, 0.1/0.11))
>             return 4;

0.1/0.12

> 
>           return 0;
>         }
>       else
>         {
>           if (__builtin_expect_with_probability (n == 11111, 1, 0.1/0.23))
>             return 1;
>           if (__builtin_expect_with_probability (n == 22222, 1, 0.1/0.13))
>             return 2;
> 
>           return 0;
>         }
>     }
> }
> 
> the numbers on POWER9 become
> 
> [tkoenig@gcc135 ~]$ gcc -O3 bench.c a.c
> [tkoenig@gcc135 ~]$ ./a.out
> foo: 7.134855
> foo2: 7.842507
> foo3: 6.624406
> [tkoenig@gcc135 ~]$ gcc -mcpu=native -O3 bench.c a.c
> [tkoenig@gcc135 ~]$ ./a.out
> foo: 6.458520
> foo2: 7.696735
> foo3: 6.196469
> 
> where, on a few runs, the differene betweeh foo and foo3 with -mcpu=native
> sometimes disappears and sometimes is larger (gcc135 is not a benchmark
> machine).
> 
> So, I'd say there some advantage in the compiler not lying to itself :-)

Yeah :-)  Of course in your testing the nine named cases have the same
probability,
which is not very true in practice (but is there any better guess possible),
and
the "default" case has that same probability for GCC (is there a better
estimate
for that, maybe?)

(I expect there just is some typo or thinko somewhere in the pass, fwiw :-) )

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Bug tree-optimization/101301] Improving sparse switch statement
  2021-07-02 15:32 [Bug tree-optimization/101301] New: Improving sparse switch statement tkoenig at gcc dot gnu.org
                   ` (6 preceding siblings ...)
  2021-07-05 17:33 ` segher at gcc dot gnu.org
@ 2021-07-06 13:39 ` tkoenig at gcc dot gnu.org
  2021-07-06 16:35 ` segher at gcc dot gnu.org
                   ` (3 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: tkoenig at gcc dot gnu.org @ 2021-07-06 13:39 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101301

--- Comment #8 from Thomas Koenig <tkoenig at gcc dot gnu.org> ---
(In reply to Segher Boessenkool from comment #7)

> Yeah :-)  Of course in your testing the nine named cases have the same
> probability,
> which is not very true in practice (but is there any better guess possible),
> and
> the "default" case has that same probability for GCC (is there a better
> estimate
> for that, maybe?)

I think that we should leave something to do for the hardware branch
predictors.  Any pattern should lead to better predictions. The test
case is rather brutal because it is random.

> (I expect there just is some typo or thinko somewhere in the pass, fwiw :-) )

As I have demonstrated above, such a thinko is rather easy to make :-)

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Bug tree-optimization/101301] Improving sparse switch statement
  2021-07-02 15:32 [Bug tree-optimization/101301] New: Improving sparse switch statement tkoenig at gcc dot gnu.org
                   ` (7 preceding siblings ...)
  2021-07-06 13:39 ` tkoenig at gcc dot gnu.org
@ 2021-07-06 16:35 ` segher at gcc dot gnu.org
  2021-08-12 10:31 ` marxin at gcc dot gnu.org
                   ` (2 subsequent siblings)
  11 siblings, 0 replies; 13+ messages in thread
From: segher at gcc dot gnu.org @ 2021-07-06 16:35 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101301

--- Comment #9 from Segher Boessenkool <segher at gcc dot gnu.org> ---
(In reply to Thomas Koenig from comment #8)
> (In reply to Segher Boessenkool from comment #7)
>  
> > Yeah :-)  Of course in your testing the nine named cases have the same
> > probability,
> > which is not very true in practice (but is there any better guess possible),
> > and
> > the "default" case has that same probability for GCC (is there a better
> > estimate
> > for that, maybe?)
> 
> I think that we should leave something to do for the hardware branch
> predictors.  Any pattern should lead to better predictions. The test
> case is rather brutal because it is random.

Heh.  My question is if we would get better code if we assumed the default case
is more likely than the other cases, in general, on actual code "in the wild".
There is bound to be some literature about that, hrm...

> > (I expect there just is some typo or thinko somewhere in the pass, fwiw :-) )
> 
> As I have demonstrated above, such a thinko is rather easy to make :-)

Ha :-)  Too bad we can only warn "invalid sum of incoming frequencies", it
still
happens too often (read: at all) to actually error on it...  that would perhaps
grab people's attention :-)

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Bug tree-optimization/101301] Improving sparse switch statement
  2021-07-02 15:32 [Bug tree-optimization/101301] New: Improving sparse switch statement tkoenig at gcc dot gnu.org
                   ` (8 preceding siblings ...)
  2021-07-06 16:35 ` segher at gcc dot gnu.org
@ 2021-08-12 10:31 ` marxin at gcc dot gnu.org
  2022-11-30 13:04 ` cvs-commit at gcc dot gnu.org
  2023-04-03  9:04 ` marxin at gcc dot gnu.org
  11 siblings, 0 replies; 13+ messages in thread
From: marxin at gcc dot gnu.org @ 2021-08-12 10:31 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101301

Martin Liška <marxin at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
           Assignee|unassigned at gcc dot gnu.org      |marxin at gcc dot gnu.org
   Last reconfirmed|                            |2021-08-12
             Status|UNCONFIRMED                 |ASSIGNED
     Ever confirmed|0                           |1

--- Comment #10 from Martin Liška <marxin at gcc dot gnu.org> ---
> Ha :-)  Too bad we can only warn "invalid sum of incoming frequencies", it
> still
> happens too often (read: at all) to actually error on it...  that would
> perhaps
> grab people's attention :-)

I can confirm switch lowering is not updating BB counts and I see some other
issues related to edge probabilities. I have a partial fix, but I'm going to
return to it once stage1 is gone.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Bug tree-optimization/101301] Improving sparse switch statement
  2021-07-02 15:32 [Bug tree-optimization/101301] New: Improving sparse switch statement tkoenig at gcc dot gnu.org
                   ` (9 preceding siblings ...)
  2021-08-12 10:31 ` marxin at gcc dot gnu.org
@ 2022-11-30 13:04 ` cvs-commit at gcc dot gnu.org
  2023-04-03  9:04 ` marxin at gcc dot gnu.org
  11 siblings, 0 replies; 13+ messages in thread
From: cvs-commit at gcc dot gnu.org @ 2022-11-30 13:04 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101301

--- Comment #11 from CVS Commits <cvs-commit at gcc dot gnu.org> ---
The master branch has been updated by Martin Liska <marxin@gcc.gnu.org>:

https://gcc.gnu.org/g:4fa25a7eb322f0a003c1eb15680c71ece345e01e

commit r13-4409-g4fa25a7eb322f0a003c1eb15680c71ece345e01e
Author: Martin Liska <mliska@suse.cz>
Date:   Mon Jan 24 15:45:38 2022 +0100

    Improve profile handling in switch lowering.

            PR tree-optimization/101301
            PR tree-optimization/103680

    gcc/ChangeLog:

            * tree-switch-conversion.cc (bit_test_cluster::emit):
            Handle correctly remaining probability.
            (switch_decision_tree::try_switch_expansion): Fix BB's count
            where a cluster expansion happens.
            (switch_decision_tree::emit_cmp_and_jump_insns): Fill up also
            BB count.
            (switch_decision_tree::do_jump_if_equal): Likewise.
            (switch_decision_tree::emit_case_nodes): Handle special case
            for BT expansion which can also fallback to a default BB.
            * tree-switch-conversion.h (cluster::cluster): Add
            m_default_prob probability.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [Bug tree-optimization/101301] Improving sparse switch statement
  2021-07-02 15:32 [Bug tree-optimization/101301] New: Improving sparse switch statement tkoenig at gcc dot gnu.org
                   ` (10 preceding siblings ...)
  2022-11-30 13:04 ` cvs-commit at gcc dot gnu.org
@ 2023-04-03  9:04 ` marxin at gcc dot gnu.org
  11 siblings, 0 replies; 13+ messages in thread
From: marxin at gcc dot gnu.org @ 2023-04-03  9:04 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=101301

Martin Liška <marxin at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|ASSIGNED                    |NEW
           Assignee|marxin at gcc dot gnu.org          |unassigned at gcc dot gnu.org

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2023-04-03  9:04 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-02 15:32 [Bug tree-optimization/101301] New: Improving sparse switch statement tkoenig at gcc dot gnu.org
2021-07-03 10:41 ` [Bug tree-optimization/101301] " tkoenig at gcc dot gnu.org
2021-07-03 18:42 ` tkoenig at gcc dot gnu.org
2021-07-03 19:34 ` segher at gcc dot gnu.org
2021-07-03 21:57 ` tkoenig at gcc dot gnu.org
2021-07-04 17:37 ` segher at gcc dot gnu.org
2021-07-05  5:33 ` tkoenig at gcc dot gnu.org
2021-07-05 17:33 ` segher at gcc dot gnu.org
2021-07-06 13:39 ` tkoenig at gcc dot gnu.org
2021-07-06 16:35 ` segher at gcc dot gnu.org
2021-08-12 10:31 ` marxin at gcc dot gnu.org
2022-11-30 13:04 ` cvs-commit at gcc dot gnu.org
2023-04-03  9:04 ` marxin at gcc dot gnu.org

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).