public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug tree-optimization/98138] New: BB vect fail to SLP one case
@ 2020-12-04 10:46 linkw at gcc dot gnu.org
  2020-12-04 10:52 ` [Bug tree-optimization/98138] " linkw at gcc dot gnu.org
                   ` (13 more replies)
  0 siblings, 14 replies; 15+ messages in thread
From: linkw at gcc dot gnu.org @ 2020-12-04 10:46 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98138

            Bug ID: 98138
           Summary: BB vect fail to SLP one case
           Product: gcc
           Version: 11.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: linkw at gcc dot gnu.org
  Target Milestone: ---

Test case:

  extern void test(unsigned int t[4][4]);

  void foo(unsigned char *p1, int i1, unsigned char *p2, int i2)
  {
    unsigned int tmp[4][4];
    unsigned int a0, a1, a2, a3;

    for (int i = 0; i < 4; i++, p1 += i1, p2 += i2) {
      a0 = (p1[0] - p2[0]) + ((p1[4] - p2[4]) << 16);
      a1 = (p1[1] - p2[1]) + ((p1[5] - p2[5]) << 16);
      a2 = (p1[2] - p2[2]) + ((p1[6] - p2[6]) << 16);
      a3 = (p1[3] - p2[3]) + ((p1[7] - p2[7]) << 16);

      int t0 = a0 + a1;
      int t1 = a0 - a1;
      int t2 = a2 + a3;
      int t3 = a2 - a3;

      tmp[i][0] = t0 + t2;
      tmp[i][2] = t0 - t2;
      tmp[i][1] = t1 + t3;
      tmp[i][3] = t1 - t3;
    }
    test(tmp);
  }

The expected code on ppc64le can look like:

  // p1 byte 0 to byte 7 
  d1_0_7 = load_dword(p1)
  // p1+i1 b0 to b7, rename it as 8 to 15       
  d1_8_15 = load_dword(p1 + i1)
  d1_16_23 = load_dword(p1 + 2*i1) 
  d1_24_31 = load_dword(p1 + 3*i1)

  V_d1_0_15 = construct_vec(d1_0_7,d1_8_15) // vector char
  V_d1_16_31 = construct_vec(d1_16_23,d1_24_31)
  V_d1_0_3_all = vperm(V_d1_0_15, V_d1_0_15, 
                      {0 8 16 24 1 9 17 25 2 10 18 26 3 11 19 27})
  V_d1_4_7_all = vperm(V_d1_0_15, V_d1_0_15, 
                      {4 12 20 28 5 13 21 29 6 14 22 30 7 15 23 31})

  // Do the similar for p2 with i2, get V_d2_0_3_all, V_d2_4_7_all

  // Do the subtraction together (all 4x4 bytes)
  V_sub1 = V_d1_0_3_all - V_d2_0_3_all
  V_sub2 = V_d1_4_7_all - V_d2_4_7_all

  // Do some unpack and get the promoted vector int
  V_a0_tmp = vec_promote(V_sub2, {0 1 2 3}) // vector int {b4 b12 b20 b28}
  V_a0_1 = V_a0_tmp << 16
  V_a0_0 = vec_promote(V_sub1, {0 1 2 3}).  // vector int {b0 b8 b16 b24}
  // vector int {a0_iter0, a0_iter1, a0_iter2, a0_iter3}
  V_a0 = V_a0_0 + V_a0_1

  // Get the similar for V_a1, V_a2, V_a3

  // Compute t0/t1/t2/t3
  // vector int {t0_iter0, t0_iter1, t0_iter2, t0_iter3}
  V_t0 = V_a0 + V_a1  
  V_t1 = V_a0 - V_a1
  V_t2 = V_a2 + V_a3
  V_t3 = V_a2 - V_a3

  // Compute tmps
  // vector int {tmp[0][0], tmp[1][0], tmp[2][0], tmp[3][0]}
  V_tmp0 = V_t0 + V_t2
  V_tmp2 = V_t0 - V_t2
  V_tmp1 = V_t1 + V_t3
  V_tmp3 = V_t1 - V_t3

  // Final construct the {tmp[0][0], tmp[0][1], tmp[0][2], tmp[0][3]} ...
  // with six further permutation on V_tmp0/V_tmp1/V_tmp2/V_tmp3

^ permalink raw reply	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2023-10-09  7:38 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-04 10:46 [Bug tree-optimization/98138] New: BB vect fail to SLP one case linkw at gcc dot gnu.org
2020-12-04 10:52 ` [Bug tree-optimization/98138] " linkw at gcc dot gnu.org
2020-12-04 12:19 ` rguenth at gcc dot gnu.org
2020-12-07  3:10 ` linkw at gcc dot gnu.org
2021-01-05  8:42 ` linkw at gcc dot gnu.org
2021-01-06  3:29 ` linkw at gcc dot gnu.org
2021-01-06  9:48 ` rguenth at gcc dot gnu.org
2021-01-12  7:23 ` linkw at gcc dot gnu.org
2021-01-12  7:25 ` linkw at gcc dot gnu.org
2021-08-04 10:31 ` rguenth at gcc dot gnu.org
2022-07-06 10:39 ` ktkachov at gcc dot gnu.org
2023-02-01  8:19 ` manolis.tsamis at vrull dot eu
2023-10-04 22:37 ` jiangning.liu at amperecomputing dot com
2023-10-05  6:58 ` rguenth at gcc dot gnu.org
2023-10-09  7:38 ` rguenth at gcc dot gnu.org

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).