public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug tree-optimization/98113] New: [11 Regression] popcnt is not vectorized on s390 since f5e18dd9c7da
@ 2020-12-03  1:28 iii at linux dot ibm.com
  2020-12-03  2:44 ` [Bug tree-optimization/98113] " linkw at gcc dot gnu.org
                   ` (7 more replies)
  0 siblings, 8 replies; 9+ messages in thread
From: iii at linux dot ibm.com @ 2020-12-03  1:28 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=98113

            Bug ID: 98113
           Summary: [11 Regression] popcnt is not vectorized on s390 since
                    f5e18dd9c7da
           Product: gcc
           Version: 11.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: iii at linux dot ibm.com
  Target Milestone: ---

s390's vxe/popcount-1.c began to fail after PR96789 fix.

The reason is that for the following source code

uv4si __attribute__((noinline))
vpopctf (uv4si a)
{
  uv4si r;
  int i;

  for (i = 0; i < 4; i++)
    r[i] = __builtin_popcount (a[i]);

  return r;
}

FRE turned

  _4 = BIT_FIELD_REF <aD.2283, 32, 0>;
  _11 = __builtin_popcountD.1211 (_4);
  _18 = (unsigned intD.9) _11;
  BIT_FIELD_REF <rD.2286, 32, 0> = _18;
  i_20 = 1;
  ivtmp_21 = 3;
  _25 = VIEW_CONVERT_EXPR<unsigned intD.9[4]>(aD.2283)[i_20];
  _26 = __builtin_popcountD.1211 (_25);
  _27 = (unsigned intD.9) _26;
  VIEW_CONVERT_EXPR<unsigned intD.9[4]>(rD.2286)[i_20] = _27;
  i_29 = i_20 + 1;
  ivtmp_30 = ivtmp_21 + 4294967295;
  _34 = VIEW_CONVERT_EXPR<unsigned intD.9[4]>(aD.2283)[i_29];
  _35 = __builtin_popcountD.1211 (_34);
  _36 = (unsigned intD.9) _35;
  VIEW_CONVERT_EXPR<unsigned intD.9[4]>(rD.2286)[i_29] = _36;
  i_38 = i_29 + 1;
  ivtmp_39 = ivtmp_30 + 4294967295;
  _1 = VIEW_CONVERT_EXPR<unsigned intD.9[4]>(aD.2283)[i_38];
  _2 = __builtin_popcountD.1211 (_1);
  _3 = (unsigned intD.9) _2;
  VIEW_CONVERT_EXPR<unsigned intD.9[4]>(rD.2286)[i_38] = _3;
  i_10 = i_38 + 1;
  ivtmp_16 = ivtmp_39 + 4294967295;
  _7 = rD.2286;
  rD.2286 ={v} {CLOBBER};
  return _7;

into

  _4 = BIT_FIELD_REF <a_17(D), 32, 0>;
  _11 = __builtin_popcountD.1211 (_4);
  _18 = (unsigned intD.9) _11;
  r_14 = BIT_INSERT_EXPR <r_15(D), _18, 0 (32 bits)>;
  _25 = BIT_FIELD_REF <a_17(D), 32, 32>;
  _26 = __builtin_popcountD.1211 (_25);
  _27 = (unsigned intD.9) _26;
  r_33 = BIT_INSERT_EXPR <r_14, _27, 32 (32 bits)>;
  _34 = BIT_FIELD_REF <a_17(D), 32, 64>;
  _35 = __builtin_popcountD.1211 (_34);
  _36 = (unsigned intD.9) _35;
  r_32 = BIT_INSERT_EXPR <r_33, _36, 64 (32 bits)>;
  _1 = BIT_FIELD_REF <a_17(D), 32, 96>;
  _2 = __builtin_popcountD.1211 (_1);
  _3 = (unsigned intD.9) _2;
  r_31 = BIT_INSERT_EXPR <r_32, _3, 96 (32 bits)>;
  _7 = r_31;
  return _7;

that is, replaced a sequence of stores with a sequence of
BIT_INSERT_EXPRs.

slp1 now says: "missed:  not vectorized: no grouped stores in basic
block", presumably because it doesn't understand BIT_INSERT_EXPRs.

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2020-12-07 11:54 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-03  1:28 [Bug tree-optimization/98113] New: [11 Regression] popcnt is not vectorized on s390 since f5e18dd9c7da iii at linux dot ibm.com
2020-12-03  2:44 ` [Bug tree-optimization/98113] " linkw at gcc dot gnu.org
2020-12-03  3:20 ` linkw at gcc dot gnu.org
2020-12-03  7:52 ` rguenth at gcc dot gnu.org
2020-12-03  9:28 ` rguenth at gcc dot gnu.org
2020-12-03 10:59 ` rguenth at gcc dot gnu.org
2020-12-03 13:53 ` iii at linux dot ibm.com
2020-12-07 11:54 ` cvs-commit at gcc dot gnu.org
2020-12-07 11:54 ` rguenth at gcc dot gnu.org

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).