[Bug target/111381] New: RISC-V: missed autovec MULH for signed * unsigned

public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed

* [Bug target/111381] New: RISC-V: missed autovec MULH for signed * unsigned
@ 2023-09-12  3:36 lehua.ding at rivai dot ai
  2023-09-12 12:31 ` [Bug target/111381] " rguenth at gcc dot gnu.org
  2023-09-15  1:44 ` cvs-commit at gcc dot gnu.org
  0 siblings, 2 replies; 3+ messages in thread
From: lehua.ding at rivai dot ai @ 2023-09-12  3:36 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111381

            Bug ID: 111381
           Summary: RISC-V: missed autovec MULH for signed * unsigned
           Product: gcc
           Version: 14.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: lehua.ding at rivai dot ai
  Target Milestone: ---

For singed * signed or unsigned * unsigned, they can be convert to .MULH, but
for singed * unsigned, it is failed. If the target support singed * unsigned, I
think it can be convert to .MULH and expand to a sumul<mode>3_highpart pattern.

https://godbolt.org/z/exrfYWdW9

C Cdoe:
#include <riscv_vector.h>

void foo6 (uint32_t* restrict a, uint32_t* restrict b, int* restrict pred, int
n)
{
    for (int i = 0; i < n; i += 1)
      a[i] = pred[i] ? (uint32_t)(((uint64_t)a[i] * (uint64_t)b[i]) >> 32) :
a[i];
}

void foo7 (int* restrict a, uint32_t* restrict b, int* restrict pred, int n)
{
    for (int i = 0; i < n; i += 1)
      a[i] = pred[i] ? (int32_t)(((int64_t)a[i] * (uint64_t)b[i]) >> 32) :
a[i];
}

Optimized dump:

;; Function foo6 (foo6, funcdef_no=0, decl_uid=56325, cgraph_uid=1,
symbol_order=0)

Removing basic block 6
Removing basic block 7
Removing basic block 8
void foo6 (uint32_t * restrict a, uint32_t * restrict b, int * restrict pred,
int n)
{
  vector([4,4]) unsigned int * vectp_a.23;
  vector([4,4]) unsigned int vect_iftmp.22;
  vector([4,4]) unsigned int vect_patt_37.20;
  vector([4,4]) unsigned int vect__9.19;
  vector([4,4]) unsigned int * vectp_b.17;
  vector([4,4]) <signed-boolean:1> mask__38.16;
  vector([4,4]) unsigned int vect_pretmp_41.15;
  vector([4,4]) unsigned int * vectp_a.13;
  vector([4,4]) int vect__4.12;
  vector([4,4]) int * vectp_pred.10;
  unsigned long ivtmp_62;
  unsigned long _83;
  unsigned long ivtmp_84;
  unsigned long ivtmp_85;
  unsigned long _86;

  <bb 2> [local count: 118111600]:
  if (n_19(D) > 0)
    goto <bb 4>; [89.00%]
  else
    goto <bb 3>; [11.00%]

  <bb 3> [local count: 118111600]:
  return;

  <bb 4> [local count: 105119324]:
  _83 = (unsigned long) n_19(D);

  <bb 5> [local count: 955630224]:
  # vectp_pred.10_63 = PHI <vectp_pred.10_64(5), pred_20(D)(4)>
  # vectp_a.13_67 = PHI <vectp_a.13_68(5), a_21(D)(4)>
  # vectp_b.17_73 = PHI <vectp_b.17_74(5), b_23(D)(4)>
  # vectp_a.23_80 = PHI <vectp_a.23_81(5), a_21(D)(4)>
  # ivtmp_84 = PHI <ivtmp_85(5), _83(4)>
  _86 = .SELECT_VL (ivtmp_84, POLY_INT_CST [4, 4]);
  ivtmp_62 = _86 * 4;
  vect__4.12_65 = .MASK_LEN_LOAD (vectp_pred.10_63, 32B, { -1, ... }, _86, 0);
  vect_pretmp_41.15_69 = .MASK_LEN_LOAD (vectp_a.13_67, 32B, { -1, ... }, _86,
0);
  mask__38.16_71 = vect__4.12_65 != { 0, ... };
  vect__9.19_75 = .MASK_LEN_LOAD (vectp_b.17_73, 32B, mask__38.16_71, _86, 0);
  vect_patt_37.20_76 = .MULH (vect_pretmp_41.15_69, vect__9.19_75);
  vect_iftmp.22_78 = .VCOND_MASK (mask__38.16_71, vect_patt_37.20_76,
vect_pretmp_41.15_69);
  .MASK_LEN_STORE (vectp_a.23_80, 32B, { -1, ... }, _86, 0, vect_iftmp.22_78);
  vectp_pred.10_64 = vectp_pred.10_63 + ivtmp_62;
  vectp_a.13_68 = vectp_a.13_67 + ivtmp_62;
  vectp_b.17_74 = vectp_b.17_73 + ivtmp_62;
  vectp_a.23_81 = vectp_a.23_80 + ivtmp_62;
  ivtmp_85 = ivtmp_84 - _86;
  if (ivtmp_85 != 0)
    goto <bb 5>; [89.00%]
  else
    goto <bb 3>; [11.00%]

}



;; Function foo7 (foo7, funcdef_no=1, decl_uid=56336, cgraph_uid=2,
symbol_order=1)

Removing basic block 6
Removing basic block 7
Removing basic block 8
void foo7 (int * restrict a, uint32_t * restrict b, int * restrict pred, int n)
{
  vector([2,2]) int * vectp_a.49;
  vector([2,2]) int vect_iftmp.48;
  vector([2,2]) int vect_iftmp.47;
  vector([2,2]) long unsigned int vect__12.46;
  vector([2,2]) long unsigned int vect__11.45;
  vector([2,2]) long unsigned int vect__10.44;
  vector([2,2]) unsigned int vect__9.43;
  vector([2,2]) unsigned int * vectp_b.41;
  vector([2,2]) long unsigned int vect__7.40;
  vector([2,2]) <signed-boolean:1> mask__38.39;
  vector([2,2]) int vect_pretmp_41.38;
  vector([2,2]) int * vectp_a.36;
  vector([2,2]) int vect__4.35;
  vector([2,2]) int * vectp_pred.33;
  unsigned long ivtmp_56;
  unsigned long _80;
  unsigned long ivtmp_81;
  unsigned long ivtmp_82;
  unsigned long _83;

  <bb 2> [local count: 118111600]:
  if (n_19(D) > 0)
    goto <bb 4>; [89.00%]
  else
    goto <bb 3>; [11.00%]

  <bb 3> [local count: 118111600]:
  return;

  <bb 4> [local count: 105119324]:
  _80 = (unsigned long) n_19(D);

  <bb 5> [local count: 955630224]:
  # vectp_pred.33_57 = PHI <vectp_pred.33_58(5), pred_20(D)(4)>
  # vectp_a.36_61 = PHI <vectp_a.36_62(5), a_21(D)(4)>
  # vectp_b.41_68 = PHI <vectp_b.41_69(5), b_23(D)(4)>
  # vectp_a.49_77 = PHI <vectp_a.49_78(5), a_21(D)(4)>
  # ivtmp_81 = PHI <ivtmp_82(5), _80(4)>
  _83 = .SELECT_VL (ivtmp_81, POLY_INT_CST [2, 2]);
  ivtmp_56 = _83 * 4;
  vect__4.35_59 = .MASK_LEN_LOAD (vectp_pred.33_57, 32B, { -1, ... }, _83, 0);
  vect_pretmp_41.38_63 = .MASK_LEN_LOAD (vectp_a.36_61, 32B, { -1, ... }, _83,
0);
  mask__38.39_65 = vect__4.35_59 != { 0, ... };
  vect__7.40_66 = (vector([2,2]) long unsigned int) vect_pretmp_41.38_63;
  vect__9.43_70 = .MASK_LEN_LOAD (vectp_b.41_68, 32B, mask__38.39_65, _83, 0);
  vect__10.44_71 = (vector([2,2]) long unsigned int) vect__9.43_70;
  vect__11.45_72 = vect__7.40_66 * vect__10.44_71;
  vect__12.46_73 = vect__11.45_72 >> 32;
  vect_iftmp.47_74 = (vector([2,2]) int) vect__12.46_73;
  vect_iftmp.48_75 = .VCOND_MASK (mask__38.39_65, vect_iftmp.47_74,
vect_pretmp_41.38_63);
  .MASK_LEN_STORE (vectp_a.49_77, 32B, { -1, ... }, _83, 0, vect_iftmp.48_75);
  vectp_pred.33_58 = vectp_pred.33_57 + ivtmp_56;
  vectp_a.36_62 = vectp_a.36_61 + ivtmp_56;
  vectp_b.41_69 = vectp_b.41_68 + ivtmp_56;
  vectp_a.49_78 = vectp_a.49_77 + ivtmp_56;
  ivtmp_82 = ivtmp_81 - _83;
  if (ivtmp_82 != 0)
    goto <bb 5>; [89.00%]
  else
    goto <bb 3>; [11.00%]

}

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [Bug target/111381] RISC-V: missed autovec MULH for signed * unsigned
  2023-09-12  3:36 [Bug target/111381] New: RISC-V: missed autovec MULH for signed * unsigned lehua.ding at rivai dot ai
@ 2023-09-12 12:31 ` rguenth at gcc dot gnu.org
  2023-09-15  1:44 ` cvs-commit at gcc dot gnu.org
  1 sibling, 0 replies; 3+ messages in thread
From: rguenth at gcc dot gnu.org @ 2023-09-12 12:31 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111381

Richard Biener <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Blocks|                            |53947

--- Comment #1 from Richard Biener <rguenth at gcc dot gnu.org> ---
I think the only relevant difference is the result type difference, uint32_t vs
int32_t since (int64_t)a[i] * (uint64_t)b[i] should get promoted to a uint64_t
multiplication.


Referenced Bugs:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53947
[Bug 53947] [meta-bug] vectorizer missed-optimizations

^ permalink raw reply	[flat|nested] 3+ messages in thread

* [Bug target/111381] RISC-V: missed autovec MULH for signed * unsigned
  2023-09-12  3:36 [Bug target/111381] New: RISC-V: missed autovec MULH for signed * unsigned lehua.ding at rivai dot ai
  2023-09-12 12:31 ` [Bug target/111381] " rguenth at gcc dot gnu.org
@ 2023-09-15  1:44 ` cvs-commit at gcc dot gnu.org
  1 sibling, 0 replies; 3+ messages in thread
From: cvs-commit at gcc dot gnu.org @ 2023-09-15  1:44 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111381

--- Comment #2 from CVS Commits <cvs-commit at gcc dot gnu.org> ---
The trunk branch has been updated by Lehua Ding <lhtin@gcc.gnu.org>:

https://gcc.gnu.org/g:68cb873fd360dbb64f2a6dfb28e79399ff99d07d

commit r14-4008-g68cb873fd360dbb64f2a6dfb28e79399ff99d07d
Author: Lehua Ding <lehua.ding@rivai.ai>
Date:   Thu Sep 14 23:35:42 2023 +0800

    RISC-V: Support combine extend and reduce sum to widen reduce sum

    This patch add combine pattern to combine extend and reduce sum
    to widen reduce sum. The pattern in autovec.md was adjusted as
    needed. Note that the current vectorization cannot generate reduce
    operand which is LMUL=M8, because this means that we need an LMUL=M16
    for the extended operand, which is currently not possible. So I've
    added VI_QHS_NO_M8 and VF_HS_NO_M8 mode iterator, which exclude
    mode which is LMUL=M8.

            PR target/111381

    gcc/ChangeLog:

            * config/riscv/autovec-opt.md (*reduc_plus_scal_<mode>):
            New combine pattern.
            (*fold_left_widen_plus_<mode>): Ditto.
            (*mask_len_fold_left_widen_plus_<mode>): Ditto.
            * config/riscv/autovec.md (reduc_plus_scal_<mode>):
            Change from define_expand to define_insn_and_split.
            (fold_left_plus_<mode>): Ditto.
            (mask_len_fold_left_plus_<mode>): Ditto.
            * config/riscv/riscv-v.cc (expand_reduction):
            Support widen reduction.
            * config/riscv/vector-iterators.md (UNSPEC_WREDUC_SUM):
            Add new iterators and attrs.

    gcc/testsuite/ChangeLog:

            * gcc.target/riscv/rvv/autovec/widen/widen_reduc-1.c: New test.
            * gcc.target/riscv/rvv/autovec/widen/widen_reduc_order-1.c: New
test.
            * gcc.target/riscv/rvv/autovec/widen/widen_reduc_order-2.c: New
test.
            * gcc.target/riscv/rvv/autovec/widen/widen_reduc_order_run-1.c: New
test.
            * gcc.target/riscv/rvv/autovec/widen/widen_reduc_order_run-2.c: New
test.
            * gcc.target/riscv/rvv/autovec/widen/widen_reduc_run-1.c: New test.

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2023-09-15  1:45 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-09-12  3:36 [Bug target/111381] New: RISC-V: missed autovec MULH for signed * unsigned lehua.ding at rivai dot ai
2023-09-12 12:31 ` [Bug target/111381] " rguenth at gcc dot gnu.org
2023-09-15  1:44 ` cvs-commit at gcc dot gnu.org

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).