public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc/devel/omp/gcc-12] amdgcn: multi-size vector reductions
@ 2022-11-01 11:54 Andrew Stubbs
0 siblings, 0 replies; only message in thread
From: Andrew Stubbs @ 2022-11-01 11:54 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:7ac1717e1b286f7f6348d9af711dfa1a723a6252
commit 7ac1717e1b286f7f6348d9af711dfa1a723a6252
Author: Andrew Stubbs <ams@codesourcery.com>
Date: Fri Oct 28 12:38:43 2022 +0100
amdgcn: multi-size vector reductions
Add support for vector reductions for any vector width by switching iterators
and generalising the code slightly. There's no one-instruction way to move an
item from lane 31 to lane 0 (63, 15, 7, 3, and 1 are all fine though), and
vec_extract is probably fewer cycles anyway, so now we always reduce to an
SGPR.
gcc/ChangeLog:
* config/gcn/gcn-valu.md (V64_SI): Delete iterator.
(V64_DI): Likewise.
(V64_1REG): Likewise.
(V64_INT_1REG): Likewise.
(V64_2REG): Likewise.
(V64_ALL): Likewise.
(V64_FP): Likewise.
(reduc_<reduc_op>_scal_<mode>): Use V_ALL. Use gen_vec_extract.
(fold_left_plus_<mode>): Use V_FP.
(*<reduc_op>_dpp_shr_<mode>): Use V_1REG.
(*<reduc_op>_dpp_shr_<mode>): Use V_DI.
(*plus_carry_dpp_shr_<mode>): Use V_INT_1REG.
(*plus_carry_in_dpp_shr_<mode>): Use V_SI.
(*plus_carry_dpp_shr_<mode>): Use V_DI.
(mov_from_lane63_<mode>): Delete.
(mov_from_lane63_<mode>): Delete.
* config/gcn/gcn.cc (gcn_expand_reduc_scalar): Support partial vectors.
* config/gcn/gcn.md (unspec): Remove UNSPEC_MOV_FROM_LANE63.
(cherry picked from commit f539029c1ce6fb9163422d1a8b6ac12a2554eaa2)
Diff:
---
gcc/ChangeLog.omp | 24 ++++++++++
gcc/config/gcn/gcn-valu.md | 111 ++++++++++++---------------------------------
gcc/config/gcn/gcn.cc | 27 ++++++-----
gcc/config/gcn/gcn.md | 1 -
4 files changed, 69 insertions(+), 94 deletions(-)
diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp
index 5859c22ccd7..6c7cf8a4dae 100644
--- a/gcc/ChangeLog.omp
+++ b/gcc/ChangeLog.omp
@@ -1,3 +1,27 @@
+2022-11-01 Andrew Stubbs <ams@codesourcery.com>
+
+ Backport from mainline:
+ 2022-10-31 Andrew Stubbs <ams@codesourcery.com>
+
+ * config/gcn/gcn-valu.md (V64_SI): Delete iterator.
+ (V64_DI): Likewise.
+ (V64_1REG): Likewise.
+ (V64_INT_1REG): Likewise.
+ (V64_2REG): Likewise.
+ (V64_ALL): Likewise.
+ (V64_FP): Likewise.
+ (reduc_<reduc_op>_scal_<mode>): Use V_ALL. Use gen_vec_extract.
+ (fold_left_plus_<mode>): Use V_FP.
+ (*<reduc_op>_dpp_shr_<mode>): Use V_1REG.
+ (*<reduc_op>_dpp_shr_<mode>): Use V_DI.
+ (*plus_carry_dpp_shr_<mode>): Use V_INT_1REG.
+ (*plus_carry_in_dpp_shr_<mode>): Use V_SI.
+ (*plus_carry_dpp_shr_<mode>): Use V_DI.
+ (mov_from_lane63_<mode>): Delete.
+ (mov_from_lane63_<mode>): Delete.
+ * config/gcn/gcn.cc (gcn_expand_reduc_scalar): Support partial vectors.
+ * config/gcn/gcn.md (unspec): Remove UNSPEC_MOV_FROM_LANE63.
+
2022-10-28 Thomas Schwinge <thomas@codesourcery.com>
* omp-low.cc (oacc_privatization_candidate_p) <DECL_ARTIFICIAL>:
diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index 9691ff480ce..8b113ca7f91 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -32,11 +32,6 @@
(define_mode_iterator V_DF
[V2DF V4DF V8DF V16DF V32DF V64DF])
-(define_mode_iterator V64_SI
- [V64SI])
-(define_mode_iterator V64_DI
- [V64DI])
-
; Vector modes for sub-dword modes
(define_mode_iterator V_QIHI
[V2QI V2HI
@@ -77,13 +72,6 @@
V32HF V32SF
V64HF V64SF])
-; V64_* modes are for where more general support is unimplemented
-; (e.g. reductions)
-(define_mode_iterator V64_1REG
- [V64QI V64HI V64SI V64HF V64SF])
-(define_mode_iterator V64_INT_1REG
- [V64QI V64HI V64SI])
-
; Vector modes for two vector registers
(define_mode_iterator V_2REG
[V2DI V2DF
@@ -93,9 +81,6 @@
V32DI V32DF
V64DI V64DF])
-(define_mode_iterator V64_2REG
- [V64DI V64DF])
-
; Vector modes with native support
(define_mode_iterator V_noQI
[V2HI V2HF V2SI V2SF V2DI V2DF
@@ -158,11 +143,6 @@
V32HF V32SF V32DF
V64HF V64SF V64DF])
-(define_mode_iterator V64_ALL
- [V64QI V64HI V64HF V64SI V64SF V64DI V64DF])
-(define_mode_iterator V64_FP
- [V64HF V64SF V64DF])
-
(define_mode_attr scalar_mode
[(V2QI "qi") (V2HI "hi") (V2SI "si")
(V2HF "hf") (V2SF "sf") (V2DI "di") (V2DF "df")
@@ -3539,15 +3519,16 @@
(define_expand "reduc_<reduc_op>_scal_<mode>"
[(set (match_operand:<SCALAR_MODE> 0 "register_operand")
(unspec:<SCALAR_MODE>
- [(match_operand:V64_ALL 1 "register_operand")]
+ [(match_operand:V_ALL 1 "register_operand")]
REDUC_UNSPEC))]
""
{
rtx tmp = gcn_expand_reduc_scalar (<MODE>mode, operands[1],
<reduc_unspec>);
- /* The result of the reduction is in lane 63 of tmp. */
- emit_insn (gen_mov_from_lane63_<mode> (operands[0], tmp));
+ rtx last_lane = GEN_INT (GET_MODE_NUNITS (<MODE>mode) - 1);
+ emit_insn (gen_vec_extract<mode><scalar_mode> (operands[0], tmp,
+ last_lane));
DONE;
})
@@ -3558,7 +3539,7 @@
(define_expand "fold_left_plus_<mode>"
[(match_operand:<SCALAR_MODE> 0 "register_operand")
(match_operand:<SCALAR_MODE> 1 "gcn_alu_operand")
- (match_operand:V64_FP 2 "gcn_alu_operand")]
+ (match_operand:V_FP 2 "gcn_alu_operand")]
"can_create_pseudo_p ()
&& (flag_openacc || flag_openmp
|| flag_associative_math)"
@@ -3574,11 +3555,11 @@
})
(define_insn "*<reduc_op>_dpp_shr_<mode>"
- [(set (match_operand:V64_1REG 0 "register_operand" "=v")
- (unspec:V64_1REG
- [(match_operand:V64_1REG 1 "register_operand" "v")
- (match_operand:V64_1REG 2 "register_operand" "v")
- (match_operand:SI 3 "const_int_operand" "n")]
+ [(set (match_operand:V_1REG 0 "register_operand" "=v")
+ (unspec:V_1REG
+ [(match_operand:V_1REG 1 "register_operand" "v")
+ (match_operand:V_1REG 2 "register_operand" "v")
+ (match_operand:SI 3 "const_int_operand" "n")]
REDUC_UNSPEC))]
; GCN3 requires a carry out, GCN5 not
"!(TARGET_GCN3 && SCALAR_INT_MODE_P (<SCALAR_MODE>mode)
@@ -3591,11 +3572,11 @@
(set_attr "length" "8")])
(define_insn_and_split "*<reduc_op>_dpp_shr_<mode>"
- [(set (match_operand:V64_DI 0 "register_operand" "=v")
- (unspec:V64_DI
- [(match_operand:V64_DI 1 "register_operand" "v")
- (match_operand:V64_DI 2 "register_operand" "v")
- (match_operand:SI 3 "const_int_operand" "n")]
+ [(set (match_operand:V_DI 0 "register_operand" "=v")
+ (unspec:V_DI
+ [(match_operand:V_DI 1 "register_operand" "v")
+ (match_operand:V_DI 2 "register_operand" "v")
+ (match_operand:SI 3 "const_int_operand" "n")]
REDUC_2REG_UNSPEC))]
""
"#"
@@ -3620,10 +3601,10 @@
; Special cases for addition.
(define_insn "*plus_carry_dpp_shr_<mode>"
- [(set (match_operand:V64_INT_1REG 0 "register_operand" "=v")
- (unspec:V64_INT_1REG
- [(match_operand:V64_INT_1REG 1 "register_operand" "v")
- (match_operand:V64_INT_1REG 2 "register_operand" "v")
+ [(set (match_operand:V_INT_1REG 0 "register_operand" "=v")
+ (unspec:V_INT_1REG
+ [(match_operand:V_INT_1REG 1 "register_operand" "v")
+ (match_operand:V_INT_1REG 2 "register_operand" "v")
(match_operand:SI 3 "const_int_operand" "n")]
UNSPEC_PLUS_CARRY_DPP_SHR))
(clobber (reg:DI VCC_REG))]
@@ -3637,12 +3618,12 @@
(set_attr "length" "8")])
(define_insn "*plus_carry_in_dpp_shr_<mode>"
- [(set (match_operand:V64_SI 0 "register_operand" "=v")
- (unspec:V64_SI
- [(match_operand:V64_SI 1 "register_operand" "v")
- (match_operand:V64_SI 2 "register_operand" "v")
- (match_operand:SI 3 "const_int_operand" "n")
- (match_operand:DI 4 "register_operand" "cV")]
+ [(set (match_operand:V_SI 0 "register_operand" "=v")
+ (unspec:V_SI
+ [(match_operand:V_SI 1 "register_operand" "v")
+ (match_operand:V_SI 2 "register_operand" "v")
+ (match_operand:SI 3 "const_int_operand" "n")
+ (match_operand:DI 4 "register_operand" "cV")]
UNSPEC_PLUS_CARRY_IN_DPP_SHR))
(clobber (reg:DI VCC_REG))]
""
@@ -3655,11 +3636,11 @@
(set_attr "length" "8")])
(define_insn_and_split "*plus_carry_dpp_shr_<mode>"
- [(set (match_operand:V64_DI 0 "register_operand" "=v")
- (unspec:V64_DI
- [(match_operand:V64_DI 1 "register_operand" "v")
- (match_operand:V64_DI 2 "register_operand" "v")
- (match_operand:SI 3 "const_int_operand" "n")]
+ [(set (match_operand:V_DI 0 "register_operand" "=v")
+ (unspec:V_DI
+ [(match_operand:V_DI 1 "register_operand" "v")
+ (match_operand:V_DI 2 "register_operand" "v")
+ (match_operand:SI 3 "const_int_operand" "n")]
UNSPEC_PLUS_CARRY_DPP_SHR))
(clobber (reg:DI VCC_REG))]
""
@@ -3686,38 +3667,6 @@
[(set_attr "type" "vmult")
(set_attr "length" "16")])
-; Instructions to move a scalar value from lane 63 of a vector register.
-(define_insn "mov_from_lane63_<mode>"
- [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v")
- (unspec:<SCALAR_MODE>
- [(match_operand:V64_1REG 1 "register_operand" " v,v")]
- UNSPEC_MOV_FROM_LANE63))]
- ""
- "@
- v_readlane_b32\t%0, %1, 63
- v_mov_b32\t%0, %1 wave_ror:1"
- [(set_attr "type" "vop3a,vop_dpp")
- (set_attr "exec" "none,*")
- (set_attr "length" "8")])
-
-(define_insn "mov_from_lane63_<mode>"
- [(set (match_operand:<SCALAR_MODE> 0 "register_operand" "=Sg,v")
- (unspec:<SCALAR_MODE>
- [(match_operand:V64_2REG 1 "register_operand" " v,v")]
- UNSPEC_MOV_FROM_LANE63))]
- ""
- "@
- v_readlane_b32\t%L0, %L1, 63\;v_readlane_b32\t%H0, %H1, 63
- * if (REGNO (operands[0]) <= REGNO (operands[1])) \
- return \"v_mov_b32\t%L0, %L1 wave_ror:1\;\" \
- \"v_mov_b32\t%H0, %H1 wave_ror:1\"; \
- else \
- return \"v_mov_b32\t%H0, %H1 wave_ror:1\;\" \
- \"v_mov_b32\t%L0, %L1 wave_ror:1\";"
- [(set_attr "type" "vop3a,vop_dpp")
- (set_attr "exec" "none,*")
- (set_attr "length" "8")])
-
;; }}}
;; {{{ Miscellaneous
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index 720c0a08a13..d24e7aec9ef 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -4960,23 +4960,25 @@ gcn_expand_dpp_shr_insn (machine_mode mode, const char *insn,
The vector register SRC of mode MODE is reduced using the operation given
by UNSPEC, and the scalar result is returned in lane 63 of a vector
- register. */
-/* FIXME: Implement reductions for sizes other than V64.
- (They're currently disabled in the machine description.) */
+ register (or lane 31, 15, 7, 3, 1 for partial vectors). */
rtx
gcn_expand_reduc_scalar (machine_mode mode, rtx src, int unspec)
{
machine_mode orig_mode = mode;
+ machine_mode scalar_mode = GET_MODE_INNER (mode);
+ int vf = GET_MODE_NUNITS (mode);
bool use_moves = (((unspec == UNSPEC_SMIN_DPP_SHR
+ || unspec == UNSPEC_SMIN_DPP_SHR
|| unspec == UNSPEC_SMAX_DPP_SHR
|| unspec == UNSPEC_UMIN_DPP_SHR
|| unspec == UNSPEC_UMAX_DPP_SHR)
- && (mode == V64DImode
- || mode == V64DFmode))
+ && (scalar_mode == DImode
+ || scalar_mode == DFmode))
|| (unspec == UNSPEC_PLUS_DPP_SHR
- && mode == V64DFmode));
+ && scalar_mode == DFmode));
rtx_code code = (unspec == UNSPEC_SMIN_DPP_SHR ? SMIN
+ : unspec == UNSPEC_SMIN_DPP_SHR ? SMIN
: unspec == UNSPEC_SMAX_DPP_SHR ? SMAX
: unspec == UNSPEC_UMIN_DPP_SHR ? UMIN
: unspec == UNSPEC_UMAX_DPP_SHR ? UMAX
@@ -4986,23 +4988,23 @@ gcn_expand_reduc_scalar (machine_mode mode, rtx src, int unspec)
|| unspec == UNSPEC_SMAX_DPP_SHR
|| unspec == UNSPEC_UMIN_DPP_SHR
|| unspec == UNSPEC_UMAX_DPP_SHR)
- && (mode == V64QImode
- || mode == V64HImode));
+ && (scalar_mode == QImode
+ || scalar_mode == HImode));
bool unsignedp = (unspec == UNSPEC_UMIN_DPP_SHR
|| unspec == UNSPEC_UMAX_DPP_SHR);
bool use_plus_carry = unspec == UNSPEC_PLUS_DPP_SHR
&& GET_MODE_CLASS (mode) == MODE_VECTOR_INT
- && (TARGET_GCN3 || mode == V64DImode);
+ && (TARGET_GCN3 || scalar_mode == DImode);
if (use_plus_carry)
unspec = UNSPEC_PLUS_CARRY_DPP_SHR;
if (use_extends)
{
- rtx tmp = gen_reg_rtx (V64SImode);
+ mode = VnMODE (vf, SImode);
+ rtx tmp = gen_reg_rtx (mode);
convert_move (tmp, src, unsignedp);
src = tmp;
- mode = V64SImode;
}
/* Perform reduction by first performing the reduction operation on every
@@ -5010,7 +5012,8 @@ gcn_expand_reduc_scalar (machine_mode mode, rtx src, int unspec)
iteration (thereby effectively reducing every 4 lanes) and so on until
all lanes are reduced. */
rtx in, out = force_reg (mode, src);
- for (int i = 0, shift = 1; i < 6; i++, shift <<= 1)
+ int iterations = exact_log2 (vf);
+ for (int i = 0, shift = 1; i < iterations; i++, shift <<= 1)
{
rtx shift_val = gen_rtx_CONST_INT (VOIDmode, shift);
in = out;
diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md
index d13e7d2678d..1ac0ad0d818 100644
--- a/gcc/config/gcn/gcn.md
+++ b/gcc/config/gcn/gcn.md
@@ -78,7 +78,6 @@
UNSPEC_PLUS_CARRY_DPP_SHR UNSPEC_PLUS_CARRY_IN_DPP_SHR
UNSPEC_AND_DPP_SHR UNSPEC_IOR_DPP_SHR UNSPEC_XOR_DPP_SHR
UNSPEC_MOV_DPP_SHR
- UNSPEC_MOV_FROM_LANE63
UNSPEC_GATHER
UNSPEC_SCATTER
UNSPEC_RCP
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2022-11-01 11:54 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-01 11:54 [gcc/devel/omp/gcc-12] amdgcn: multi-size vector reductions Andrew Stubbs
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).