public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug target/111381] New: RISC-V: missed autovec MULH for signed * unsigned
@ 2023-09-12 3:36 lehua.ding at rivai dot ai
2023-09-12 12:31 ` [Bug target/111381] " rguenth at gcc dot gnu.org
2023-09-15 1:44 ` cvs-commit at gcc dot gnu.org
0 siblings, 2 replies; 3+ messages in thread
From: lehua.ding at rivai dot ai @ 2023-09-12 3:36 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111381
Bug ID: 111381
Summary: RISC-V: missed autovec MULH for signed * unsigned
Product: gcc
Version: 14.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: target
Assignee: unassigned at gcc dot gnu.org
Reporter: lehua.ding at rivai dot ai
Target Milestone: ---
For singed * signed or unsigned * unsigned, they can be convert to .MULH, but
for singed * unsigned, it is failed. If the target support singed * unsigned, I
think it can be convert to .MULH and expand to a sumul<mode>3_highpart pattern.
https://godbolt.org/z/exrfYWdW9
C Cdoe:
#include <riscv_vector.h>
void foo6 (uint32_t* restrict a, uint32_t* restrict b, int* restrict pred, int
n)
{
for (int i = 0; i < n; i += 1)
a[i] = pred[i] ? (uint32_t)(((uint64_t)a[i] * (uint64_t)b[i]) >> 32) :
a[i];
}
void foo7 (int* restrict a, uint32_t* restrict b, int* restrict pred, int n)
{
for (int i = 0; i < n; i += 1)
a[i] = pred[i] ? (int32_t)(((int64_t)a[i] * (uint64_t)b[i]) >> 32) :
a[i];
}
Optimized dump:
;; Function foo6 (foo6, funcdef_no=0, decl_uid=56325, cgraph_uid=1,
symbol_order=0)
Removing basic block 6
Removing basic block 7
Removing basic block 8
void foo6 (uint32_t * restrict a, uint32_t * restrict b, int * restrict pred,
int n)
{
vector([4,4]) unsigned int * vectp_a.23;
vector([4,4]) unsigned int vect_iftmp.22;
vector([4,4]) unsigned int vect_patt_37.20;
vector([4,4]) unsigned int vect__9.19;
vector([4,4]) unsigned int * vectp_b.17;
vector([4,4]) <signed-boolean:1> mask__38.16;
vector([4,4]) unsigned int vect_pretmp_41.15;
vector([4,4]) unsigned int * vectp_a.13;
vector([4,4]) int vect__4.12;
vector([4,4]) int * vectp_pred.10;
unsigned long ivtmp_62;
unsigned long _83;
unsigned long ivtmp_84;
unsigned long ivtmp_85;
unsigned long _86;
<bb 2> [local count: 118111600]:
if (n_19(D) > 0)
goto <bb 4>; [89.00%]
else
goto <bb 3>; [11.00%]
<bb 3> [local count: 118111600]:
return;
<bb 4> [local count: 105119324]:
_83 = (unsigned long) n_19(D);
<bb 5> [local count: 955630224]:
# vectp_pred.10_63 = PHI <vectp_pred.10_64(5), pred_20(D)(4)>
# vectp_a.13_67 = PHI <vectp_a.13_68(5), a_21(D)(4)>
# vectp_b.17_73 = PHI <vectp_b.17_74(5), b_23(D)(4)>
# vectp_a.23_80 = PHI <vectp_a.23_81(5), a_21(D)(4)>
# ivtmp_84 = PHI <ivtmp_85(5), _83(4)>
_86 = .SELECT_VL (ivtmp_84, POLY_INT_CST [4, 4]);
ivtmp_62 = _86 * 4;
vect__4.12_65 = .MASK_LEN_LOAD (vectp_pred.10_63, 32B, { -1, ... }, _86, 0);
vect_pretmp_41.15_69 = .MASK_LEN_LOAD (vectp_a.13_67, 32B, { -1, ... }, _86,
0);
mask__38.16_71 = vect__4.12_65 != { 0, ... };
vect__9.19_75 = .MASK_LEN_LOAD (vectp_b.17_73, 32B, mask__38.16_71, _86, 0);
vect_patt_37.20_76 = .MULH (vect_pretmp_41.15_69, vect__9.19_75);
vect_iftmp.22_78 = .VCOND_MASK (mask__38.16_71, vect_patt_37.20_76,
vect_pretmp_41.15_69);
.MASK_LEN_STORE (vectp_a.23_80, 32B, { -1, ... }, _86, 0, vect_iftmp.22_78);
vectp_pred.10_64 = vectp_pred.10_63 + ivtmp_62;
vectp_a.13_68 = vectp_a.13_67 + ivtmp_62;
vectp_b.17_74 = vectp_b.17_73 + ivtmp_62;
vectp_a.23_81 = vectp_a.23_80 + ivtmp_62;
ivtmp_85 = ivtmp_84 - _86;
if (ivtmp_85 != 0)
goto <bb 5>; [89.00%]
else
goto <bb 3>; [11.00%]
}
;; Function foo7 (foo7, funcdef_no=1, decl_uid=56336, cgraph_uid=2,
symbol_order=1)
Removing basic block 6
Removing basic block 7
Removing basic block 8
void foo7 (int * restrict a, uint32_t * restrict b, int * restrict pred, int n)
{
vector([2,2]) int * vectp_a.49;
vector([2,2]) int vect_iftmp.48;
vector([2,2]) int vect_iftmp.47;
vector([2,2]) long unsigned int vect__12.46;
vector([2,2]) long unsigned int vect__11.45;
vector([2,2]) long unsigned int vect__10.44;
vector([2,2]) unsigned int vect__9.43;
vector([2,2]) unsigned int * vectp_b.41;
vector([2,2]) long unsigned int vect__7.40;
vector([2,2]) <signed-boolean:1> mask__38.39;
vector([2,2]) int vect_pretmp_41.38;
vector([2,2]) int * vectp_a.36;
vector([2,2]) int vect__4.35;
vector([2,2]) int * vectp_pred.33;
unsigned long ivtmp_56;
unsigned long _80;
unsigned long ivtmp_81;
unsigned long ivtmp_82;
unsigned long _83;
<bb 2> [local count: 118111600]:
if (n_19(D) > 0)
goto <bb 4>; [89.00%]
else
goto <bb 3>; [11.00%]
<bb 3> [local count: 118111600]:
return;
<bb 4> [local count: 105119324]:
_80 = (unsigned long) n_19(D);
<bb 5> [local count: 955630224]:
# vectp_pred.33_57 = PHI <vectp_pred.33_58(5), pred_20(D)(4)>
# vectp_a.36_61 = PHI <vectp_a.36_62(5), a_21(D)(4)>
# vectp_b.41_68 = PHI <vectp_b.41_69(5), b_23(D)(4)>
# vectp_a.49_77 = PHI <vectp_a.49_78(5), a_21(D)(4)>
# ivtmp_81 = PHI <ivtmp_82(5), _80(4)>
_83 = .SELECT_VL (ivtmp_81, POLY_INT_CST [2, 2]);
ivtmp_56 = _83 * 4;
vect__4.35_59 = .MASK_LEN_LOAD (vectp_pred.33_57, 32B, { -1, ... }, _83, 0);
vect_pretmp_41.38_63 = .MASK_LEN_LOAD (vectp_a.36_61, 32B, { -1, ... }, _83,
0);
mask__38.39_65 = vect__4.35_59 != { 0, ... };
vect__7.40_66 = (vector([2,2]) long unsigned int) vect_pretmp_41.38_63;
vect__9.43_70 = .MASK_LEN_LOAD (vectp_b.41_68, 32B, mask__38.39_65, _83, 0);
vect__10.44_71 = (vector([2,2]) long unsigned int) vect__9.43_70;
vect__11.45_72 = vect__7.40_66 * vect__10.44_71;
vect__12.46_73 = vect__11.45_72 >> 32;
vect_iftmp.47_74 = (vector([2,2]) int) vect__12.46_73;
vect_iftmp.48_75 = .VCOND_MASK (mask__38.39_65, vect_iftmp.47_74,
vect_pretmp_41.38_63);
.MASK_LEN_STORE (vectp_a.49_77, 32B, { -1, ... }, _83, 0, vect_iftmp.48_75);
vectp_pred.33_58 = vectp_pred.33_57 + ivtmp_56;
vectp_a.36_62 = vectp_a.36_61 + ivtmp_56;
vectp_b.41_69 = vectp_b.41_68 + ivtmp_56;
vectp_a.49_78 = vectp_a.49_77 + ivtmp_56;
ivtmp_82 = ivtmp_81 - _83;
if (ivtmp_82 != 0)
goto <bb 5>; [89.00%]
else
goto <bb 3>; [11.00%]
}
^ permalink raw reply [flat|nested] 3+ messages in thread
* [Bug target/111381] RISC-V: missed autovec MULH for signed * unsigned
2023-09-12 3:36 [Bug target/111381] New: RISC-V: missed autovec MULH for signed * unsigned lehua.ding at rivai dot ai
@ 2023-09-12 12:31 ` rguenth at gcc dot gnu.org
2023-09-15 1:44 ` cvs-commit at gcc dot gnu.org
1 sibling, 0 replies; 3+ messages in thread
From: rguenth at gcc dot gnu.org @ 2023-09-12 12:31 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111381
Richard Biener <rguenth at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
Blocks| |53947
--- Comment #1 from Richard Biener <rguenth at gcc dot gnu.org> ---
I think the only relevant difference is the result type difference, uint32_t vs
int32_t since (int64_t)a[i] * (uint64_t)b[i] should get promoted to a uint64_t
multiplication.
Referenced Bugs:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=53947
[Bug 53947] [meta-bug] vectorizer missed-optimizations
^ permalink raw reply [flat|nested] 3+ messages in thread
* [Bug target/111381] RISC-V: missed autovec MULH for signed * unsigned
2023-09-12 3:36 [Bug target/111381] New: RISC-V: missed autovec MULH for signed * unsigned lehua.ding at rivai dot ai
2023-09-12 12:31 ` [Bug target/111381] " rguenth at gcc dot gnu.org
@ 2023-09-15 1:44 ` cvs-commit at gcc dot gnu.org
1 sibling, 0 replies; 3+ messages in thread
From: cvs-commit at gcc dot gnu.org @ 2023-09-15 1:44 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111381
--- Comment #2 from CVS Commits <cvs-commit at gcc dot gnu.org> ---
The trunk branch has been updated by Lehua Ding <lhtin@gcc.gnu.org>:
https://gcc.gnu.org/g:68cb873fd360dbb64f2a6dfb28e79399ff99d07d
commit r14-4008-g68cb873fd360dbb64f2a6dfb28e79399ff99d07d
Author: Lehua Ding <lehua.ding@rivai.ai>
Date: Thu Sep 14 23:35:42 2023 +0800
RISC-V: Support combine extend and reduce sum to widen reduce sum
This patch add combine pattern to combine extend and reduce sum
to widen reduce sum. The pattern in autovec.md was adjusted as
needed. Note that the current vectorization cannot generate reduce
operand which is LMUL=M8, because this means that we need an LMUL=M16
for the extended operand, which is currently not possible. So I've
added VI_QHS_NO_M8 and VF_HS_NO_M8 mode iterator, which exclude
mode which is LMUL=M8.
PR target/111381
gcc/ChangeLog:
* config/riscv/autovec-opt.md (*reduc_plus_scal_<mode>):
New combine pattern.
(*fold_left_widen_plus_<mode>): Ditto.
(*mask_len_fold_left_widen_plus_<mode>): Ditto.
* config/riscv/autovec.md (reduc_plus_scal_<mode>):
Change from define_expand to define_insn_and_split.
(fold_left_plus_<mode>): Ditto.
(mask_len_fold_left_plus_<mode>): Ditto.
* config/riscv/riscv-v.cc (expand_reduction):
Support widen reduction.
* config/riscv/vector-iterators.md (UNSPEC_WREDUC_SUM):
Add new iterators and attrs.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/widen/widen_reduc-1.c: New test.
* gcc.target/riscv/rvv/autovec/widen/widen_reduc_order-1.c: New
test.
* gcc.target/riscv/rvv/autovec/widen/widen_reduc_order-2.c: New
test.
* gcc.target/riscv/rvv/autovec/widen/widen_reduc_order_run-1.c: New
test.
* gcc.target/riscv/rvv/autovec/widen/widen_reduc_order_run-2.c: New
test.
* gcc.target/riscv/rvv/autovec/widen/widen_reduc_run-1.c: New test.
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2023-09-15 1:45 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-09-12 3:36 [Bug target/111381] New: RISC-V: missed autovec MULH for signed * unsigned lehua.ding at rivai dot ai
2023-09-12 12:31 ` [Bug target/111381] " rguenth at gcc dot gnu.org
2023-09-15 1:44 ` cvs-commit at gcc dot gnu.org
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).