public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc/devel/omp/gcc-13] amdgcn: minimal V64TImode vector support
@ 2023-06-20  9:51 Andrew Stubbs
  0 siblings, 0 replies; only message in thread
From: Andrew Stubbs @ 2023-06-20  9:51 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:1abff151d24dd7eb064b8b9a5c6c0878f613f35f

commit 1abff151d24dd7eb064b8b9a5c6c0878f613f35f
Author: Andrew Stubbs <ams@codesourcery.com>
Date:   Thu May 25 15:53:15 2023 +0100

    amdgcn: minimal V64TImode vector support
    
    Just enough support for TImode vectors to exist, load, store, move,
    without any real instructions available.
    
    This is primarily for the use of divmodv64di4, which uses TImode to
    return a pair of DImode values.
    
    gcc/ChangeLog:
    
            * config/gcn/gcn-protos.h (vgpr_4reg_mode_p): New function.
            * config/gcn/gcn-valu.md (V_4REG, V_4REG_ALT): New iterators.
            (V_MOV, V_MOV_ALT): Likewise.
            (scalar_mode, SCALAR_MODE): Add TImode.
            (vnsi, VnSI, vndi, VnDI): Likewise.
            (vec_merge, vec_merge_with_clobber, vec_merge_with_vcc): Use V_MOV.
            (mov<mode>, mov<mode>_unspec): Use V_MOV.
            (*mov<mode>_4reg): New insn.
            (mov<mode>_exec): New 4reg variant.
            (mov<mode>_sgprbase): Likewise.
            (reload_in<mode>, reload_out<mode>): Use V_MOV.
            (vec_set<mode>): Likewise.
            (vec_duplicate<mode><exec>): New 4reg variant.
            (vec_extract<mode><scalar_mode>): Likewise.
            (vec_extract<V_ALL:mode><V_ALL_ALT:mode>): Rename to ...
            (vec_extract<V_MOV:mode><V_MOV_ALT:mode>): ... this, and use V_MOV.
            (vec_extract<V_4REG:mode><V_4REG_ALT:mode>_nop): New 4reg variant.
            (fold_extract_last_<mode>): Use V_MOV.
            (vec_init<V_ALL:mode><V_ALL_ALT:mode>): Rename to ...
            (vec_init<V_MOV:mode><V_MOV_ALT:mode>): ... this, and use V_MOV.
            (gather_load<mode><vnsi>, gather<mode>_expr<exec>,
            gather<mode>_insn_1offset<exec>, gather<mode>_insn_1offset_ds<exec>,
            gather<mode>_insn_2offsets<exec>): Use V_MOV.
            (scatter_store<mode><vnsi>, scatter<mode>_expr<exec_scatter>,
            scatter<mode>_insn_1offset<exec_scatter>,
            scatter<mode>_insn_1offset_ds<exec_scatter>,
            scatter<mode>_insn_2offsets<exec_scatter>): Likewise.
            (maskload<mode>di, maskstore<mode>di, mask_gather_load<mode><vnsi>,
            mask_scatter_store<mode><vnsi>): Likewise.
            * config/gcn/gcn.cc (gcn_class_max_nregs): Use vgpr_4reg_mode_p.
            (gcn_hard_regno_mode_ok): Likewise.
            (GEN_VNM): Add TImode support.
            (USE_TI): New macro. Separate TImode operations from non-TImode ones.
            (gcn_vector_mode_supported_p): Add V64TImode, V32TImode, V16TImode,
            V8TImode, and V2TImode.
            (print_operand):  Add 'J' and 'K' print codes.
    
    (cherry picked from commit 8aeabd9f63d8a54a5fa0b038ad4425a999e1cc75)

Diff:
---
 gcc/ChangeLog.omp           |  43 +++++
 gcc/config/gcn/gcn-protos.h |  11 ++
 gcc/config/gcn/gcn-valu.md  | 384 +++++++++++++++++++++++++++++---------------
 gcc/config/gcn/gcn.cc       |  42 ++++-
 4 files changed, 346 insertions(+), 134 deletions(-)

diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp
index cbc29c12ca2..486ba9e733f 100644
--- a/gcc/ChangeLog.omp
+++ b/gcc/ChangeLog.omp
@@ -1,3 +1,46 @@
+2023-06-20  Andrew Stubbs  <ams@codesourcery.com>
+
+	Backport from mainline:
+	2023-06-19  Andrew Stubbs  <ams@codesourcery.com>
+
+	* config/gcn/gcn-protos.h (vgpr_4reg_mode_p): New function.
+	* config/gcn/gcn-valu.md (V_4REG, V_4REG_ALT): New iterators.
+	(V_MOV, V_MOV_ALT): Likewise.
+	(scalar_mode, SCALAR_MODE): Add TImode.
+	(vnsi, VnSI, vndi, VnDI): Likewise.
+	(vec_merge, vec_merge_with_clobber, vec_merge_with_vcc): Use V_MOV.
+	(mov<mode>, mov<mode>_unspec): Use V_MOV.
+	(*mov<mode>_4reg): New insn.
+	(mov<mode>_exec): New 4reg variant.
+	(mov<mode>_sgprbase): Likewise.
+	(reload_in<mode>, reload_out<mode>): Use V_MOV.
+	(vec_set<mode>): Likewise.
+	(vec_duplicate<mode><exec>): New 4reg variant.
+	(vec_extract<mode><scalar_mode>): Likewise.
+	(vec_extract<V_ALL:mode><V_ALL_ALT:mode>): Rename to ...
+	(vec_extract<V_MOV:mode><V_MOV_ALT:mode>): ... this, and use V_MOV.
+	(vec_extract<V_4REG:mode><V_4REG_ALT:mode>_nop): New 4reg variant.
+	(fold_extract_last_<mode>): Use V_MOV.
+	(vec_init<V_ALL:mode><V_ALL_ALT:mode>): Rename to ...
+	(vec_init<V_MOV:mode><V_MOV_ALT:mode>): ... this, and use V_MOV.
+	(gather_load<mode><vnsi>, gather<mode>_expr<exec>,
+	gather<mode>_insn_1offset<exec>, gather<mode>_insn_1offset_ds<exec>,
+	gather<mode>_insn_2offsets<exec>): Use V_MOV.
+	(scatter_store<mode><vnsi>, scatter<mode>_expr<exec_scatter>,
+	scatter<mode>_insn_1offset<exec_scatter>,
+	scatter<mode>_insn_1offset_ds<exec_scatter>,
+	scatter<mode>_insn_2offsets<exec_scatter>): Likewise.
+	(maskload<mode>di, maskstore<mode>di, mask_gather_load<mode><vnsi>,
+	mask_scatter_store<mode><vnsi>): Likewise.
+	* config/gcn/gcn.cc (gcn_class_max_nregs): Use vgpr_4reg_mode_p.
+	(gcn_hard_regno_mode_ok): Likewise.
+	(GEN_VNM): Add TImode support.
+	(USE_TI): New macro. Separate TImode operations from non-TImode ones.
+	(gcn_vector_mode_supported_p): Add V64TImode, V32TImode, V16TImode,
+	V8TImode, and V2TImode.
+	(print_operand):  Add 'J' and 'K' print codes.
+
+
 2023-06-19  Julian Brown  <julian@codesourcery.com>
 
 	* gimplify.cc (gimplify_adjust_omp_clauses_1): Set
diff --git a/gcc/config/gcn/gcn-protos.h b/gcc/config/gcn/gcn-protos.h
index 287ce17d422..3befb2b7caa 100644
--- a/gcc/config/gcn/gcn-protos.h
+++ b/gcc/config/gcn/gcn-protos.h
@@ -136,6 +136,17 @@ vgpr_2reg_mode_p (machine_mode mode)
   return (mode == DImode || mode == DFmode);
 }
 
+/* Return true if MODE is valid for four VGPR registers.  */
+
+inline bool
+vgpr_4reg_mode_p (machine_mode mode)
+{
+  if (VECTOR_MODE_P (mode))
+    mode = GET_MODE_INNER (mode);
+
+  return (mode == TImode);
+}
+
 /* Return true if MODE can be handled directly by VGPR operations.  */
 
 inline bool
diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index 1087899077b..109a28932c4 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -96,6 +96,10 @@
 		       V32DI V32DF
 		       V64DI V64DF])
 
+; Vector modes for four vector registers
+(define_mode_iterator V_4REG [V2TI V4TI V8TI V16TI V32TI V64TI])
+(define_mode_iterator V_4REG_ALT [V2TI V4TI V8TI V16TI V32TI V64TI])
+
 ; Vector modes with native support
 (define_mode_iterator V_noQI
 		      [V2HI V2HF V2SI V2SF V2DI V2DF
@@ -136,7 +140,7 @@
 		       V32SF V32DF
 		       V64SF V64DF])
 
-; All of above
+; All modes in which we want to do more than just moves.
 (define_mode_iterator V_ALL
 		      [V2QI V2HI V2HF V2SI V2SF V2DI V2DF
 		       V4QI V4HI V4HF V4SI V4SF V4DI V4DF
@@ -175,97 +179,113 @@
 		       V32HF V32SF V32DF
 		       V64HF V64SF V64DF])
 
+; All modes that need moves, including those without many insns.
+(define_mode_iterator V_MOV
+		      [V2QI V2HI V2HF V2SI V2SF V2DI V2DF V2TI
+		       V4QI V4HI V4HF V4SI V4SF V4DI V4DF V4TI
+		       V8QI V8HI V8HF V8SI V8SF V8DI V8DF V8TI
+		       V16QI V16HI V16HF V16SI V16SF V16DI V16DF V16TI
+		       V32QI V32HI V32HF V32SI V32SF V32DI V32DF V32TI
+		       V64QI V64HI V64HF V64SI V64SF V64DI V64DF V64TI])
+(define_mode_iterator V_MOV_ALT
+		      [V2QI V2HI V2HF V2SI V2SF V2DI V2DF V2TI
+		       V4QI V4HI V4HF V4SI V4SF V4DI V4DF V4TI
+		       V8QI V8HI V8HF V8SI V8SF V8DI V8DF V8TI
+		       V16QI V16HI V16HF V16SI V16SF V16DI V16DF V16TI
+		       V32QI V32HI V32HF V32SI V32SF V32DI V32DF V32TI
+		       V64QI V64HI V64HF V64SI V64SF V64DI V64DF V64TI])
+
 (define_mode_attr scalar_mode
-  [(QI "qi") (HI "hi") (SI "si")
+  [(QI "qi") (HI "hi") (SI "si") (TI "ti")
    (HF "hf") (SF "sf") (DI "di") (DF "df")
-   (V2QI "qi") (V2HI "hi") (V2SI "si")
+   (V2QI "qi") (V2HI "hi") (V2SI "si") (V2TI "ti")
    (V2HF "hf") (V2SF "sf") (V2DI "di") (V2DF "df")
-   (V4QI "qi") (V4HI "hi") (V4SI "si")
+   (V4QI "qi") (V4HI "hi") (V4SI "si") (V4TI "ti")
    (V4HF "hf") (V4SF "sf") (V4DI "di") (V4DF "df")
-   (V8QI "qi") (V8HI "hi") (V8SI "si")
+   (V8QI "qi") (V8HI "hi") (V8SI "si") (V8TI "ti")
    (V8HF "hf") (V8SF "sf") (V8DI "di") (V8DF "df")
-   (V16QI "qi") (V16HI "hi") (V16SI "si")
+   (V16QI "qi") (V16HI "hi") (V16SI "si") (V16TI "ti")
    (V16HF "hf") (V16SF "sf") (V16DI "di") (V16DF "df")
-   (V32QI "qi") (V32HI "hi") (V32SI "si")
+   (V32QI "qi") (V32HI "hi") (V32SI "si") (V32TI "ti")
    (V32HF "hf") (V32SF "sf") (V32DI "di") (V32DF "df")
-   (V64QI "qi") (V64HI "hi") (V64SI "si")
+   (V64QI "qi") (V64HI "hi") (V64SI "si") (V64TI "ti")
    (V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")])
 
 (define_mode_attr SCALAR_MODE
-  [(QI "QI") (HI "HI") (SI "SI")
+  [(QI "QI") (HI "HI") (SI "SI") (TI "TI")
    (HF "HF") (SF "SF") (DI "DI") (DF "DF")
-   (V2QI "QI") (V2HI "HI") (V2SI "SI")
+   (V2QI "QI") (V2HI "HI") (V2SI "SI") (V2TI "TI")
    (V2HF "HF") (V2SF "SF") (V2DI "DI") (V2DF "DF")
-   (V4QI "QI") (V4HI "HI") (V4SI "SI")
+   (V4QI "QI") (V4HI "HI") (V4SI "SI") (V4TI "TI")
    (V4HF "HF") (V4SF "SF") (V4DI "DI") (V4DF "DF")
-   (V8QI "QI") (V8HI "HI") (V8SI "SI")
+   (V8QI "QI") (V8HI "HI") (V8SI "SI") (V8TI "TI")
    (V8HF "HF") (V8SF "SF") (V8DI "DI") (V8DF "DF")
-   (V16QI "QI") (V16HI "HI") (V16SI "SI")
+   (V16QI "QI") (V16HI "HI") (V16SI "SI") (V16TI "TI")
    (V16HF "HF") (V16SF "SF") (V16DI "DI") (V16DF "DF")
-   (V32QI "QI") (V32HI "HI") (V32SI "SI")
+   (V32QI "QI") (V32HI "HI") (V32SI "SI") (V32TI "TI")
    (V32HF "HF") (V32SF "SF") (V32DI "DI") (V32DF "DF")
-   (V64QI "QI") (V64HI "HI") (V64SI "SI")
+   (V64QI "QI") (V64HI "HI") (V64SI "SI") (V64TI "TI")
    (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")])
 
 (define_mode_attr vnsi
-  [(QI "si") (HI "si") (SI "si")
+  [(QI "si") (HI "si") (SI "si") (TI "si")
    (HF "si") (SF "si") (DI "si") (DF "si")
    (V2QI "v2si") (V2HI "v2si") (V2HF "v2si") (V2SI "v2si")
-   (V2SF "v2si") (V2DI "v2si") (V2DF "v2si")
+   (V2SF "v2si") (V2DI "v2si") (V2DF "v2si") (V2TI "v2si")
    (V4QI "v4si") (V4HI "v4si") (V4HF "v4si") (V4SI "v4si")
-   (V4SF "v4si") (V4DI "v4si") (V4DF "v4si")
+   (V4SF "v4si") (V4DI "v4si") (V4DF "v4si") (V4TI "v4si")
    (V8QI "v8si") (V8HI "v8si") (V8HF "v8si") (V8SI "v8si")
-   (V8SF "v8si") (V8DI "v8si") (V8DF "v8si")
+   (V8SF "v8si") (V8DI "v8si") (V8DF "v8si") (V8TI "v8si")
    (V16QI "v16si") (V16HI "v16si") (V16HF "v16si") (V16SI "v16si")
-   (V16SF "v16si") (V16DI "v16si") (V16DF "v16si")
+   (V16SF "v16si") (V16DI "v16si") (V16DF "v16si") (V16TI "v16si")
    (V32QI "v32si") (V32HI "v32si") (V32HF "v32si") (V32SI "v32si")
-   (V32SF "v32si") (V32DI "v32si") (V32DF "v32si")
+   (V32SF "v32si") (V32DI "v32si") (V32DF "v32si") (V32TI "v32si")
    (V64QI "v64si") (V64HI "v64si") (V64HF "v64si") (V64SI "v64si")
-   (V64SF "v64si") (V64DI "v64si") (V64DF "v64si")])
+   (V64SF "v64si") (V64DI "v64si") (V64DF "v64si") (V64TI "v64si")])
 
 (define_mode_attr VnSI
-  [(QI "SI") (HI "SI") (SI "SI")
+  [(QI "SI") (HI "SI") (SI "SI") (TI "SI")
    (HF "SI") (SF "SI") (DI "SI") (DF "SI")
    (V2QI "V2SI") (V2HI "V2SI") (V2HF "V2SI") (V2SI "V2SI")
-   (V2SF "V2SI") (V2DI "V2SI") (V2DF "V2SI")
+   (V2SF "V2SI") (V2DI "V2SI") (V2DF "V2SI") (V2TI "V2SI")
    (V4QI "V4SI") (V4HI "V4SI") (V4HF "V4SI") (V4SI "V4SI")
-   (V4SF "V4SI") (V4DI "V4SI") (V4DF "V4SI")
+   (V4SF "V4SI") (V4DI "V4SI") (V4DF "V4SI") (V4TI "V4SI")
    (V8QI "V8SI") (V8HI "V8SI") (V8HF "V8SI") (V8SI "V8SI")
-   (V8SF "V8SI") (V8DI "V8SI") (V8DF "V8SI")
+   (V8SF "V8SI") (V8DI "V8SI") (V8DF "V8SI") (V8TI "V8SI")
    (V16QI "V16SI") (V16HI "V16SI") (V16HF "V16SI") (V16SI "V16SI")
-   (V16SF "V16SI") (V16DI "V16SI") (V16DF "V16SI")
+   (V16SF "V16SI") (V16DI "V16SI") (V16DF "V16SI") (V16TI "V16SI")
    (V32QI "V32SI") (V32HI "V32SI") (V32HF "V32SI") (V32SI "V32SI")
-   (V32SF "V32SI") (V32DI "V32SI") (V32DF "V32SI")
+   (V32SF "V32SI") (V32DI "V32SI") (V32DF "V32SI") (V32TI "V32SI")
    (V64QI "V64SI") (V64HI "V64SI") (V64HF "V64SI") (V64SI "V64SI")
-   (V64SF "V64SI") (V64DI "V64SI") (V64DF "V64SI")])
+   (V64SF "V64SI") (V64DI "V64SI") (V64DF "V64SI") (V64TI "V64SI")])
 
 (define_mode_attr vndi
   [(V2QI "v2di") (V2HI "v2di") (V2HF "v2di") (V2SI "v2di")
-   (V2SF "v2di") (V2DI "v2di") (V2DF "v2di")
+   (V2SF "v2di") (V2DI "v2di") (V2DF "v2di") (V2TI "v2di")
    (V4QI "v4di") (V4HI "v4di") (V4HF "v4di") (V4SI "v4di")
-   (V4SF "v4di") (V4DI "v4di") (V4DF "v4di")
+   (V4SF "v4di") (V4DI "v4di") (V4DF "v4di") (V4TI "v4di")
    (V8QI "v8di") (V8HI "v8di") (V8HF "v8di") (V8SI "v8di")
-   (V8SF "v8di") (V8DI "v8di") (V8DF "v8di")
+   (V8SF "v8di") (V8DI "v8di") (V8DF "v8di") (V8TI "v8di")
    (V16QI "v16di") (V16HI "v16di") (V16HF "v16di") (V16SI "v16di")
-   (V16SF "v16di") (V16DI "v16di") (V16DF "v16di")
+   (V16SF "v16di") (V16DI "v16di") (V16DF "v16di") (V16TI "v16di")
    (V32QI "v32di") (V32HI "v32di") (V32HF "v32di") (V32SI "v32di")
-   (V32SF "v32di") (V32DI "v32di") (V32DF "v32di")
+   (V32SF "v32di") (V32DI "v32di") (V32DF "v32di") (V32TI "v32di")
    (V64QI "v64di") (V64HI "v64di") (V64HF "v64di") (V64SI "v64di")
-   (V64SF "v64di") (V64DI "v64di") (V64DF "v64di")])
+   (V64SF "v64di") (V64DI "v64di") (V64DF "v64di") (V64TI "v64di")])
 
 (define_mode_attr VnDI
   [(V2QI "V2DI") (V2HI "V2DI") (V2HF "V2DI") (V2SI "V2DI")
-   (V2SF "V2DI") (V2DI "V2DI") (V2DF "V2DI")
+   (V2SF "V2DI") (V2DI "V2DI") (V2DF "V2DI") (V2TI "V2DI")
    (V4QI "V4DI") (V4HI "V4DI") (V4HF "V4DI") (V4SI "V4DI")
-   (V4SF "V4DI") (V4DI "V4DI") (V4DF "V4DI")
+   (V4SF "V4DI") (V4DI "V4DI") (V4DF "V4DI") (V4TI "V4DI")
    (V8QI "V8DI") (V8HI "V8DI") (V8HF "V8DI") (V8SI "V8DI")
-   (V8SF "V8DI") (V8DI "V8DI") (V8DF "V8DI")
+   (V8SF "V8DI") (V8DI "V8DI") (V8DF "V8DI") (V8TI "V8DI")
    (V16QI "V16DI") (V16HI "V16DI") (V16HF "V16DI") (V16SI "V16DI")
-   (V16SF "V16DI") (V16DI "V16DI") (V16DF "V16DI")
+   (V16SF "V16DI") (V16DI "V16DI") (V16DF "V16DI") (V16TI "V16DI")
    (V32QI "V32DI") (V32HI "V32DI") (V32HF "V32DI") (V32SI "V32DI")
-   (V32SF "V32DI") (V32DI "V32DI") (V32DF "V32DI")
+   (V32SF "V32DI") (V32DI "V32DI") (V32DF "V32DI") (V32TI "V32DI")
    (V64QI "V64DI") (V64HI "V64DI") (V64HF "V64DI") (V64SI "V64DI")
-   (V64SF "V64DI") (V64DI "V64DI") (V64DF "V64DI")])
+   (V64SF "V64DI") (V64DI "V64DI") (V64DF "V64DI") (V64TI "V64DI")])
 
 (define_mode_attr sdwa
   [(V2QI "BYTE_0") (V2HI "WORD_0") (V2SI "DWORD")
@@ -288,38 +308,38 @@
 		   "" "_exec")
 
 (define_subst "vec_merge"
-  [(set (match_operand:V_ALL 0)
-	(match_operand:V_ALL 1))]
+  [(set (match_operand:V_MOV 0)
+	(match_operand:V_MOV 1))]
   ""
   [(set (match_dup 0)
-	(vec_merge:V_ALL
+	(vec_merge:V_MOV
 	  (match_dup 1)
-	  (match_operand:V_ALL 3 "gcn_register_or_unspec_operand" "U0")
+	  (match_operand:V_MOV 3 "gcn_register_or_unspec_operand" "U0")
 	  (match_operand:DI 4 "gcn_exec_reg_operand" "e")))])
 
 (define_subst "vec_merge_with_clobber"
-  [(set (match_operand:V_ALL 0)
-	(match_operand:V_ALL 1))
+  [(set (match_operand:V_MOV 0)
+	(match_operand:V_MOV 1))
    (clobber (match_operand 2))]
   ""
   [(set (match_dup 0)
-	(vec_merge:V_ALL
+	(vec_merge:V_MOV
 	  (match_dup 1)
-	  (match_operand:V_ALL 3 "gcn_register_or_unspec_operand" "U0")
+	  (match_operand:V_MOV 3 "gcn_register_or_unspec_operand" "U0")
 	  (match_operand:DI 4 "gcn_exec_reg_operand" "e")))
    (clobber (match_dup 2))])
 
 (define_subst "vec_merge_with_vcc"
-  [(set (match_operand:V_ALL 0)
-	(match_operand:V_ALL 1))
+  [(set (match_operand:V_MOV 0)
+	(match_operand:V_MOV 1))
    (set (match_operand:DI 2)
 	(match_operand:DI 3))]
   ""
   [(parallel
      [(set (match_dup 0)
-	   (vec_merge:V_ALL
+	   (vec_merge:V_MOV
 	     (match_dup 1)
-	     (match_operand:V_ALL 4 "gcn_register_or_unspec_operand" "U0")
+	     (match_operand:V_MOV 4 "gcn_register_or_unspec_operand" "U0")
 	     (match_operand:DI 5 "gcn_exec_reg_operand" "e")))
       (set (match_dup 2)
 	   (and:DI (match_dup 3)
@@ -351,8 +371,8 @@
 ; gather/scatter, maskload/store, etc.
 
 (define_expand "mov<mode>"
-  [(set (match_operand:V_ALL 0 "nonimmediate_operand")
-	(match_operand:V_ALL 1 "general_operand"))]
+  [(set (match_operand:V_MOV 0 "nonimmediate_operand")
+	(match_operand:V_MOV 1 "general_operand"))]
   ""
   {
     /* Bitwise reinterpret casts via SUBREG don't work with GCN vector
@@ -421,8 +441,8 @@
 ; A pseudo instruction that helps LRA use the "U0" constraint.
 
 (define_insn "mov<mode>_unspec"
-  [(set (match_operand:V_ALL 0 "nonimmediate_operand" "=v")
-	(match_operand:V_ALL 1 "gcn_unspec_operand"   " U"))]
+  [(set (match_operand:V_MOV 0 "nonimmediate_operand" "=v")
+	(match_operand:V_MOV 1 "gcn_unspec_operand"   " U"))]
   ""
   ""
   [(set_attr "type" "unknown")
@@ -527,6 +547,69 @@
   [(set_attr "type" "vmult,vmult,vmult,*,*")
    (set_attr "length" "16,16,16,16,16")])
 
+(define_insn "*mov<mode>_4reg"
+  [(set (match_operand:V_4REG 0 "nonimmediate_operand" "=v")
+	(match_operand:V_4REG 1 "general_operand"      "vDB"))]
+  ""
+  {
+    return "v_mov_b32\t%L0, %L1\;"
+           "v_mov_b32\t%H0, %H1\;"
+           "v_mov_b32\t%J0, %J1\;"
+           "v_mov_b32\t%K0, %K1\;";
+  }
+  [(set_attr "type" "vmult")
+   (set_attr "length" "16")])
+
+(define_insn "mov<mode>_exec"
+  [(set (match_operand:V_4REG 0 "nonimmediate_operand" "= v,   v,   v, v, m")
+	(vec_merge:V_4REG
+	  (match_operand:V_4REG 1 "general_operand"    "vDB,  v0,  v0, m, v")
+	  (match_operand:V_4REG 2 "gcn_alu_or_unspec_operand"
+						       " U0,vDA0,vDA0,U0,U0")
+	  (match_operand:DI 3 "register_operand"       "  e,  cV,  Sv, e, e")))
+   (clobber (match_scratch:<VnDI> 4		       "= X,   X,   X,&v,&v"))]
+  "!MEM_P (operands[0]) || REG_P (operands[1])"
+  {
+    if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
+      switch (which_alternative)
+	{
+	case 0:
+	  return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;"
+                 "v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1";
+	case 1:
+	  return "v_cndmask_b32\t%L0, %L2, %L1, vcc\;"
+		 "v_cndmask_b32\t%H0, %H2, %H1, vcc\;"
+		 "v_cndmask_b32\t%J0, %J2, %J1, vcc\;"
+		 "v_cndmask_b32\t%K0, %K2, %K1, vcc";
+	case 2:
+	  return "v_cndmask_b32\t%L0, %L2, %L1, %3\;"
+		 "v_cndmask_b32\t%H0, %H2, %H1, %3\;"
+		 "v_cndmask_b32\t%J0, %J2, %J1, %3\;"
+		 "v_cndmask_b32\t%K0, %K2, %K1, %3";
+	}
+    else
+      switch (which_alternative)
+	{
+	case 0:
+	  return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\;"
+                 "v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1";
+	case 1:
+	  return "v_cndmask_b32\t%H0, %H2, %H1, vcc\;"
+		 "v_cndmask_b32\t%L0, %L2, %L1, vcc\;"
+		 "v_cndmask_b32\t%J0, %J2, %J1, vcc\;"
+		 "v_cndmask_b32\t%K0, %K2, %K1, vcc";
+	case 2:
+	  return "v_cndmask_b32\t%H0, %H2, %H1, %3\;"
+		 "v_cndmask_b32\t%L0, %L2, %L1, %3\;"
+		 "v_cndmask_b32\t%J0, %J2, %J1, %3\;"
+		 "v_cndmask_b32\t%K0, %K2, %K1, %3";
+	}
+
+    return "#";
+  }
+  [(set_attr "type" "vmult,vmult,vmult,*,*")
+   (set_attr "length" "32")])
+
 ; This variant does not accept an unspec, but does permit MEM
 ; read/modify/write which is necessary for maskstore.
 
@@ -592,12 +675,25 @@
   [(set_attr "type" "vmult,*,*")
    (set_attr "length" "8,12,12")])
 
+(define_insn "mov<mode>_sgprbase"
+  [(set (match_operand:V_4REG 0 "nonimmediate_operand" "= v, v, m")
+	(unspec:V_4REG
+	  [(match_operand:V_4REG 1 "general_operand"   "vDB, m, v")]
+	  UNSPEC_SGPRBASE))
+   (clobber (match_operand:<VnDI> 2 "register_operand"  "=&v,&v,&v"))]
+  "lra_in_progress || reload_completed"
+  "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1
+   #
+   #"
+  [(set_attr "type" "vmult,*,*")
+   (set_attr "length" "8,12,12")])
+
 ; reload_in was once a standard name, but here it's only referenced by
 ; gcn_secondary_reload.  It allows a reload with a scratch register.
 
 (define_expand "reload_in<mode>"
-  [(set (match_operand:V_ALL 0 "register_operand"     "= v")
-	(match_operand:V_ALL 1 "memory_operand"	      "  m"))
+  [(set (match_operand:V_MOV 0 "register_operand"     "= v")
+	(match_operand:V_MOV 1 "memory_operand"	      "  m"))
    (clobber (match_operand:<VnDI> 2 "register_operand" "=&v"))]
   ""
   {
@@ -608,8 +704,8 @@
 ; reload_out is similar to reload_in, above.
 
 (define_expand "reload_out<mode>"
-  [(set (match_operand:V_ALL 0 "memory_operand"	      "= m")
-	(match_operand:V_ALL 1 "register_operand"     "  v"))
+  [(set (match_operand:V_MOV 0 "memory_operand"	      "= m")
+	(match_operand:V_MOV 1 "register_operand"     "  v"))
    (clobber (match_operand:<VnDI> 2 "register_operand" "=&v"))]
   ""
   {
@@ -620,9 +716,9 @@
 ; Expand scalar addresses into gather/scatter patterns
 
 (define_split
-  [(set (match_operand:V_ALL 0 "memory_operand")
-	(unspec:V_ALL
-	  [(match_operand:V_ALL 1 "general_operand")]
+  [(set (match_operand:V_MOV 0 "memory_operand")
+	(unspec:V_MOV
+	  [(match_operand:V_MOV 1 "general_operand")]
 	  UNSPEC_SGPRBASE))
    (clobber (match_scratch:<VnDI> 2))]
   ""
@@ -638,10 +734,10 @@
   })
 
 (define_split
-  [(set (match_operand:V_ALL 0 "memory_operand")
-	(vec_merge:V_ALL
-	  (match_operand:V_ALL 1 "general_operand")
-	  (match_operand:V_ALL 2 "")
+  [(set (match_operand:V_MOV 0 "memory_operand")
+	(vec_merge:V_MOV
+	  (match_operand:V_MOV 1 "general_operand")
+	  (match_operand:V_MOV 2 "")
 	  (match_operand:DI 3 "gcn_exec_reg_operand")))
    (clobber (match_scratch:<VnDI> 4))]
   ""
@@ -659,14 +755,14 @@
   })
 
 (define_split
-  [(set (match_operand:V_ALL 0 "nonimmediate_operand")
-	(unspec:V_ALL
-	  [(match_operand:V_ALL 1 "memory_operand")]
+  [(set (match_operand:V_MOV 0 "nonimmediate_operand")
+	(unspec:V_MOV
+	  [(match_operand:V_MOV 1 "memory_operand")]
 	  UNSPEC_SGPRBASE))
    (clobber (match_scratch:<VnDI> 2))]
   ""
   [(set (match_dup 0)
-	(unspec:V_ALL [(match_dup 5) (match_dup 6) (match_dup 7)
+	(unspec:V_MOV [(match_dup 5) (match_dup 6) (match_dup 7)
 		       (mem:BLK (scratch))]
 		      UNSPEC_GATHER))]
   {
@@ -678,16 +774,16 @@
   })
 
 (define_split
-  [(set (match_operand:V_ALL 0 "nonimmediate_operand")
-	(vec_merge:V_ALL
-	  (match_operand:V_ALL 1 "memory_operand")
-	  (match_operand:V_ALL 2 "")
+  [(set (match_operand:V_MOV 0 "nonimmediate_operand")
+	(vec_merge:V_MOV
+	  (match_operand:V_MOV 1 "memory_operand")
+	  (match_operand:V_MOV 2 "")
 	  (match_operand:DI 3 "gcn_exec_reg_operand")))
    (clobber (match_scratch:<VnDI> 4))]
   ""
   [(set (match_dup 0)
-	(vec_merge:V_ALL
-	  (unspec:V_ALL [(match_dup 5) (match_dup 6) (match_dup 7)
+	(vec_merge:V_MOV
+	  (unspec:V_MOV [(match_dup 5) (match_dup 6) (match_dup 7)
 			 (mem:BLK (scratch))]
 			 UNSPEC_GATHER)
 	  (match_dup 2)
@@ -744,9 +840,9 @@
    (set_attr "laneselect" "yes")])
 
 (define_expand "vec_set<mode>"
-  [(set (match_operand:V_ALL 0 "register_operand")
-	(vec_merge:V_ALL
-	  (vec_duplicate:V_ALL
+  [(set (match_operand:V_MOV 0 "register_operand")
+	(vec_merge:V_MOV
+	  (vec_duplicate:V_MOV
 	    (match_operand:<SCALAR_MODE> 1 "register_operand"))
 	  (match_dup 0)
 	  (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand"))))]
@@ -804,6 +900,15 @@
   [(set_attr "type" "vop3a")
    (set_attr "length" "16")])
 
+(define_insn "vec_duplicate<mode><exec>"
+  [(set (match_operand:V_4REG 0 "register_operand"	   "=  v")
+	(vec_duplicate:V_4REG
+	  (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand" "SvDB")))]
+  ""
+  "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1"
+  [(set_attr "type" "mult")
+   (set_attr "length" "32")])
+
 (define_insn "vec_extract<mode><scalar_mode>"
   [(set (match_operand:<SCALAR_MODE> 0 "register_operand"  "=Sg")
 	(vec_select:<SCALAR_MODE>
@@ -828,6 +933,18 @@
    (set_attr "exec" "none")
    (set_attr "laneselect" "yes")])
 
+(define_insn "vec_extract<mode><scalar_mode>"
+  [(set (match_operand:<SCALAR_MODE> 0 "register_operand"  "=&Sg")
+	(vec_select:<SCALAR_MODE>
+	  (match_operand:V_4REG 1 "register_operand"	   "   v")
+	  (parallel [(match_operand:SI 2 "gcn_alu_operand" " SvB")])))]
+  ""
+  "v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2\;v_readlane_b32 %J0, %J1, %2\;v_readlane_b32 %K0, %K1, %2"
+  [(set_attr "type" "vmult")
+   (set_attr "length" "32")
+   (set_attr "exec" "none")
+   (set_attr "laneselect" "yes")])
+
 (define_insn "vec_extract<V_1REG:mode><V_1REG_ALT:mode>_nop"
   [(set (match_operand:V_1REG_ALT 0 "register_operand" "=v,v")
 	(vec_select:V_1REG_ALT
@@ -854,39 +971,52 @@
   [(set_attr "type" "vmult")
    (set_attr "length" "0,8")])
   
-(define_expand "vec_extract<V_ALL:mode><V_ALL_ALT:mode>"
-  [(match_operand:V_ALL_ALT 0 "register_operand")
-   (match_operand:V_ALL 1 "register_operand")
+(define_insn "vec_extract<V_4REG:mode><V_4REG_ALT:mode>_nop"
+  [(set (match_operand:V_4REG_ALT 0 "register_operand" "=v,v")
+	(vec_select:V_4REG_ALT
+	  (match_operand:V_4REG 1 "register_operand"   " 0,v")
+	  (match_operand 2 "ascending_zero_int_parallel" "")))]
+  "MODE_VF (<V_4REG_ALT:MODE>mode) < MODE_VF (<V_4REG:MODE>mode)
+   && <V_4REG_ALT:SCALAR_MODE>mode == <V_4REG:SCALAR_MODE>mode"
+  "@
+  ; in-place extract %0
+  v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\;v_mov_b32\t%J0, %J1\;v_mov_b32\t%K0, %K1"
+  [(set_attr "type" "vmult")
+   (set_attr "length" "0,16")])
+  
+(define_expand "vec_extract<V_MOV:mode><V_MOV_ALT:mode>"
+  [(match_operand:V_MOV_ALT 0 "register_operand")
+   (match_operand:V_MOV 1 "register_operand")
    (match_operand 2 "immediate_operand")]
-  "MODE_VF (<V_ALL_ALT:MODE>mode) < MODE_VF (<V_ALL:MODE>mode)
-   && <V_ALL_ALT:SCALAR_MODE>mode == <V_ALL:SCALAR_MODE>mode"
+  "MODE_VF (<V_MOV_ALT:MODE>mode) < MODE_VF (<V_MOV:MODE>mode)
+   && <V_MOV_ALT:SCALAR_MODE>mode == <V_MOV:SCALAR_MODE>mode"
   {
-    int numlanes = GET_MODE_NUNITS (<V_ALL_ALT:MODE>mode);
+    int numlanes = GET_MODE_NUNITS (<V_MOV_ALT:MODE>mode);
     int firstlane = INTVAL (operands[2]) * numlanes;
     rtx tmp;
 
     if (firstlane == 0)
       {
-	rtx parallel = gen_rtx_PARALLEL (<V_ALL:MODE>mode,
+	rtx parallel = gen_rtx_PARALLEL (<V_MOV:MODE>mode,
 					  rtvec_alloc (numlanes));
 	for (int i = 0; i < numlanes; i++)
 	  XVECEXP (parallel, 0, i) = GEN_INT (i);
-	emit_insn (gen_vec_extract<V_ALL:mode><V_ALL_ALT:mode>_nop
+	emit_insn (gen_vec_extract<V_MOV:mode><V_MOV_ALT:mode>_nop
 		   (operands[0], operands[1], parallel));
       } else {
         /* FIXME: optimize this by using DPP where available.  */
 
-        rtx permutation = gen_reg_rtx (<V_ALL:VnSI>mode);
-	emit_insn (gen_vec_series<V_ALL:vnsi> (permutation,
+        rtx permutation = gen_reg_rtx (<V_MOV:VnSI>mode);
+	emit_insn (gen_vec_series<V_MOV:vnsi> (permutation,
 					       GEN_INT (firstlane*4),
 					       GEN_INT (4)));
 
-	tmp = gen_reg_rtx (<V_ALL:MODE>mode);
-	emit_insn (gen_ds_bpermute<V_ALL:mode> (tmp, permutation, operands[1],
-						get_exec (<V_ALL:MODE>mode)));
+	tmp = gen_reg_rtx (<V_MOV:MODE>mode);
+	emit_insn (gen_ds_bpermute<V_MOV:mode> (tmp, permutation, operands[1],
+						get_exec (<V_MOV:MODE>mode)));
 
 	emit_move_insn (operands[0],
-			gen_rtx_SUBREG (<V_ALL_ALT:MODE>mode, tmp, 0));
+			gen_rtx_SUBREG (<V_MOV_ALT:MODE>mode, tmp, 0));
       }
     DONE;
   })
@@ -894,7 +1024,7 @@
 (define_expand "extract_last_<mode>"
   [(match_operand:<SCALAR_MODE> 0 "register_operand")
    (match_operand:DI 1 "gcn_alu_operand")
-   (match_operand:V_ALL 2 "register_operand")]
+   (match_operand:V_MOV 2 "register_operand")]
   "can_create_pseudo_p ()"
   {
     rtx dst = operands[0];
@@ -912,7 +1042,7 @@
   [(match_operand:<SCALAR_MODE> 0 "register_operand")
    (match_operand:<SCALAR_MODE> 1 "gcn_alu_operand")
    (match_operand:DI 2 "gcn_alu_operand")
-   (match_operand:V_ALL 3 "register_operand")]
+   (match_operand:V_MOV 3 "register_operand")]
   "can_create_pseudo_p ()"
   {
     rtx dst = operands[0];
@@ -934,7 +1064,7 @@
   })
 
 (define_expand "vec_init<mode><scalar_mode>"
-  [(match_operand:V_ALL 0 "register_operand")
+  [(match_operand:V_MOV 0 "register_operand")
    (match_operand 1)]
   ""
   {
@@ -942,11 +1072,11 @@
     DONE;
   })
 
-(define_expand "vec_init<V_ALL:mode><V_ALL_ALT:mode>"
-  [(match_operand:V_ALL 0 "register_operand")
-   (match_operand:V_ALL_ALT 1)]
-  "<V_ALL:SCALAR_MODE>mode == <V_ALL_ALT:SCALAR_MODE>mode
-   && MODE_VF (<V_ALL_ALT:MODE>mode) < MODE_VF (<V_ALL:MODE>mode)"
+(define_expand "vec_init<V_MOV:mode><V_MOV_ALT:mode>"
+  [(match_operand:V_MOV 0 "register_operand")
+   (match_operand:V_MOV_ALT 1)]
+  "<V_MOV:SCALAR_MODE>mode == <V_MOV_ALT:SCALAR_MODE>mode
+   && MODE_VF (<V_MOV_ALT:MODE>mode) < MODE_VF (<V_MOV:MODE>mode)"
   {
     gcn_expand_vector_init (operands[0], operands[1]);
     DONE;
@@ -988,7 +1118,7 @@
 ;; TODO: implement combined gather and zero_extend, but only for -msram-ecc=on
 
 (define_expand "gather_load<mode><vndi>"
-  [(match_operand:V_ALL 0 "register_operand")
+  [(match_operand:V_MOV 0 "register_operand")
    (match_operand:DI 1 "register_operand")
    (match_operand:<VnDI> 2 "register_operand")
    (match_operand 3 "immediate_operand")
@@ -1016,7 +1146,7 @@
   })
 
 (define_expand "gather_load<mode><vnsi>"
-  [(match_operand:V_ALL 0 "register_operand")
+  [(match_operand:V_MOV 0 "register_operand")
    (match_operand:DI 1 "register_operand")
    (match_operand:<VnSI> 2 "register_operand")
    (match_operand 3 "immediate_operand")
@@ -1039,8 +1169,8 @@
 
 ; Allow any address expression
 (define_expand "gather<mode>_expr<exec>"
-  [(set (match_operand:V_ALL 0 "register_operand")
-	(unspec:V_ALL
+  [(set (match_operand:V_MOV 0 "register_operand")
+	(unspec:V_MOV
 	  [(match_operand 1 "")
 	   (match_operand 2 "immediate_operand")
 	   (match_operand 3 "immediate_operand")
@@ -1050,8 +1180,8 @@
     {})
 
 (define_insn "gather<mode>_insn_1offset<exec>"
-  [(set (match_operand:V_ALL 0 "register_operand"		   "=v,&v")
-	(unspec:V_ALL
+  [(set (match_operand:V_MOV 0 "register_operand"		   "=v,&v")
+	(unspec:V_MOV
 	  [(plus:<VnDI> (match_operand:<VnDI> 1 "register_operand" " v, v")
 			(vec_duplicate:<VnDI>
 			  (match_operand 2 "immediate_operand"	   " n, n")))
@@ -1090,8 +1220,8 @@
    (set_attr "xnack" "off,on")])
 
 (define_insn "gather<mode>_insn_1offset_ds<exec>"
-  [(set (match_operand:V_ALL 0 "register_operand"		   "=v")
-	(unspec:V_ALL
+  [(set (match_operand:V_MOV 0 "register_operand"		   "=v")
+	(unspec:V_MOV
 	  [(plus:<VnSI> (match_operand:<VnSI> 1 "register_operand" " v")
 			(vec_duplicate:<VnSI>
 			  (match_operand 2 "immediate_operand"	   " n")))
@@ -1112,8 +1242,8 @@
    (set_attr "length" "12")])
 
 (define_insn "gather<mode>_insn_2offsets<exec>"
-  [(set (match_operand:V_ALL 0 "register_operand"		     "=v,&v")
-	(unspec:V_ALL
+  [(set (match_operand:V_MOV 0 "register_operand"		     "=v,&v")
+	(unspec:V_MOV
 	  [(plus:<VnDI>
 	     (plus:<VnDI>
 	       (vec_duplicate:<VnDI>
@@ -1150,7 +1280,7 @@
    (match_operand:<VnDI> 1 "register_operand")
    (match_operand 2 "immediate_operand")
    (match_operand:SI 3 "gcn_alu_operand")
-   (match_operand:V_ALL 4 "register_operand")]
+   (match_operand:V_MOV 4 "register_operand")]
   ""
   {
     rtx vec_base = gen_reg_rtx (<VnDI>mode);
@@ -1178,7 +1308,7 @@
    (match_operand:<VnSI> 1 "register_operand")
    (match_operand 2 "immediate_operand")
    (match_operand:SI 3 "gcn_alu_operand")
-   (match_operand:V_ALL 4 "register_operand")]
+   (match_operand:V_MOV 4 "register_operand")]
   ""
   {
     rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
@@ -1200,7 +1330,7 @@
   [(set (mem:BLK (scratch))
 	(unspec:BLK
 	  [(match_operand:<VnDI> 0 "")
-	   (match_operand:V_ALL 1 "register_operand")
+	   (match_operand:V_MOV 1 "register_operand")
 	   (match_operand 2 "immediate_operand")
 	   (match_operand 3 "immediate_operand")]
 	  UNSPEC_SCATTER))]
@@ -1213,7 +1343,7 @@
 	  [(plus:<VnDI> (match_operand:<VnDI> 0 "register_operand" "v")
 			(vec_duplicate:<VnDI>
 			  (match_operand 1 "immediate_operand"	   "n")))
-	   (match_operand:V_ALL 2 "register_operand"		   "v")
+	   (match_operand:V_MOV 2 "register_operand"		   "v")
 	   (match_operand 3 "immediate_operand"			   "n")
 	   (match_operand 4 "immediate_operand"			   "n")]
 	  UNSPEC_SCATTER))]
@@ -1251,7 +1381,7 @@
 	  [(plus:<VnSI> (match_operand:<VnSI> 0 "register_operand" "v")
 			(vec_duplicate:<VnSI>
 			  (match_operand 1 "immediate_operand"	   "n")))
-	   (match_operand:V_ALL 2 "register_operand"		   "v")
+	   (match_operand:V_MOV 2 "register_operand"		   "v")
 	   (match_operand 3 "immediate_operand"			   "n")
 	   (match_operand 4 "immediate_operand"			   "n")]
 	  UNSPEC_SCATTER))]
@@ -1277,7 +1407,7 @@
 	       (sign_extend:<VnDI>
 		 (match_operand:<VnSI> 1 "register_operand"		" v")))
 	     (vec_duplicate:<VnDI> (match_operand 2 "immediate_operand" " n")))
-	   (match_operand:V_ALL 3 "register_operand"			" v")
+	   (match_operand:V_MOV 3 "register_operand"			" v")
 	   (match_operand 4 "immediate_operand"				" n")
 	   (match_operand 5 "immediate_operand"				" n")]
 	  UNSPEC_SCATTER))]
@@ -3863,8 +3993,8 @@
   })
 
 (define_expand "maskload<mode>di"
-  [(match_operand:V_ALL 0 "register_operand")
-   (match_operand:V_ALL 1 "memory_operand")
+  [(match_operand:V_MOV 0 "register_operand")
+   (match_operand:V_MOV 1 "memory_operand")
    (match_operand 2 "")]
   ""
   {
@@ -3883,8 +4013,8 @@
   })
 
 (define_expand "maskstore<mode>di"
-  [(match_operand:V_ALL 0 "memory_operand")
-   (match_operand:V_ALL 1 "register_operand")
+  [(match_operand:V_MOV 0 "memory_operand")
+   (match_operand:V_MOV 1 "register_operand")
    (match_operand 2 "")]
   ""
   {
@@ -3898,7 +4028,7 @@
   })
 
 (define_expand "mask_gather_load<mode><vndi>"
-  [(match_operand:V_ALL 0 "register_operand")
+  [(match_operand:V_MOV 0 "register_operand")
    (match_operand:DI 1 "register_operand")
    (match_operand:<VnDI> 2 "register_operand")
    (match_operand 3 "immediate_operand")
@@ -3933,7 +4063,7 @@
   })
 
 (define_expand "mask_gather_load<mode><vnsi>"
-  [(match_operand:V_ALL 0 "register_operand")
+  [(match_operand:V_MOV 0 "register_operand")
    (match_operand:DI 1 "register_operand")
    (match_operand:<VnSI> 2 "register_operand")
    (match_operand 3 "immediate_operand")
@@ -3968,7 +4098,7 @@
    (match_operand:<VnDI> 1 "register_operand")
    (match_operand 2 "immediate_operand")
    (match_operand:DI 3 "gcn_alu_operand")
-   (match_operand:V_ALL 4 "register_operand")
+   (match_operand:V_MOV 4 "register_operand")
    (match_operand:DI 5 "")]
   ""
   {
@@ -4000,7 +4130,7 @@
    (match_operand:<VnSI> 1 "register_operand")
    (match_operand 2 "immediate_operand")
    (match_operand:SI 3 "gcn_alu_operand")
-   (match_operand:V_ALL 4 "register_operand")
+   (match_operand:V_MOV 4 "register_operand")
    (match_operand:DI 5 "")]
   ""
   {
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index e879691dfde..f019051c010 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -498,7 +498,7 @@ gcn_class_max_nregs (reg_class_t rclass, machine_mode mode)
       if (vgpr_2reg_mode_p (mode))
 	return 2;
       /* TImode is used by DImode compare_and_swap.  */
-      if (mode == TImode)
+      if (vgpr_4reg_mode_p (mode))
 	return 4;
     }
   else if (rclass == VCC_CONDITIONAL_REG && mode == BImode)
@@ -601,9 +601,9 @@ gcn_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
        Therefore, we restrict ourselved to aligned registers.  */
     return (vgpr_1reg_mode_p (mode)
 	    || (!((regno - FIRST_VGPR_REG) & 1) && vgpr_2reg_mode_p (mode))
-	    /* TImode is used by DImode compare_and_swap.  */
-	    || (mode == TImode
-		&& !((regno - FIRST_VGPR_REG) & 3)));
+	    /* TImode is used by DImode compare_and_swap,
+	       and by DIVMOD V64DImode libfuncs.  */
+	    || (!((regno - FIRST_VGPR_REG) & 3) && vgpr_4reg_mode_p (mode)));
   return false;
 }
 
@@ -1335,6 +1335,7 @@ GEN_VN (PREFIX, si##SUFFIX, A(PARAMS), A(ARGS)) \
 GEN_VN (PREFIX, sf##SUFFIX, A(PARAMS), A(ARGS)) \
 GEN_VN (PREFIX, di##SUFFIX, A(PARAMS), A(ARGS)) \
 GEN_VN (PREFIX, df##SUFFIX, A(PARAMS), A(ARGS)) \
+USE_TI (GEN_VN (PREFIX, ti##SUFFIX, A(PARAMS), A(ARGS))) \
 static rtx \
 gen_##PREFIX##vNm##SUFFIX (PARAMS, rtx merge_src=NULL, rtx exec=NULL) \
 { \
@@ -1349,6 +1350,8 @@ gen_##PREFIX##vNm##SUFFIX (PARAMS, rtx merge_src=NULL, rtx exec=NULL) \
     case E_SFmode: return gen_##PREFIX##vNsf##SUFFIX (ARGS, merge_src, exec); \
     case E_DImode: return gen_##PREFIX##vNdi##SUFFIX (ARGS, merge_src, exec); \
     case E_DFmode: return gen_##PREFIX##vNdf##SUFFIX (ARGS, merge_src, exec); \
+    case E_TImode: \
+	USE_TI (return gen_##PREFIX##vNti##SUFFIX (ARGS, merge_src, exec);) \
     default: \
       break; \
     } \
@@ -1357,6 +1360,14 @@ gen_##PREFIX##vNm##SUFFIX (PARAMS, rtx merge_src=NULL, rtx exec=NULL) \
   return NULL_RTX; \
 }
 
+/* These have TImode support.  */
+#define USE_TI(ARGS) ARGS
+GEN_VNM (mov,, A(rtx dest, rtx src), A(dest, src))
+GEN_VNM (vec_duplicate,, A(rtx dest, rtx src), A(dest, src))
+
+/* These do not have TImode support.  */
+#undef USE_TI
+#define USE_TI(ARGS)
 GEN_VNM (add,3, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
 GEN_VN (add,si3_dup, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
 GEN_VN (add,si3_vcc_dup, A(rtx dest, rtx src1, rtx src2, rtx vcc),
@@ -1375,12 +1386,11 @@ GEN_VNM_NOEXEC (ds_bpermute,, A(rtx dest, rtx addr, rtx src, rtx exec),
 		A(dest, addr, src, exec))
 GEN_VNM (gather,_expr, A(rtx dest, rtx addr, rtx as, rtx vol),
 	 A(dest, addr, as, vol))
-GEN_VNM (mov,, A(rtx dest, rtx src), A(dest, src))
 GEN_VN (mul,si3_dup, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
 GEN_VN (sub,si3, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2))
-GEN_VNM (vec_duplicate,, A(rtx dest, rtx src), A(dest, src))
 GEN_VN_NOEXEC (vec_series,si, A(rtx dest, rtx x, rtx c), A(dest, x, c))
 
+#undef USE_TI
 #undef GEN_VNM
 #undef GEN_VN
 #undef GET_VN_FN
@@ -1414,6 +1424,7 @@ get_code_for_##PREFIX##vN##SUFFIX (int nunits) \
 	CODE_FOR (PREFIX, sf) \
 	CODE_FOR (PREFIX, di) \
 	CODE_FOR (PREFIX, df) \
+	CODE_FOR (PREFIX, ti) \
 static int \
 get_code_for_##PREFIX (machine_mode mode) \
 { \
@@ -1429,6 +1440,7 @@ get_code_for_##PREFIX (machine_mode mode) \
     case E_SFmode: return get_code_for_##PREFIX##vNsf (vf); \
     case E_DImode: return get_code_for_##PREFIX##vNdi (vf); \
     case E_DFmode: return get_code_for_##PREFIX##vNdf (vf); \
+    case E_TImode: return get_code_for_##PREFIX##vNti (vf); \
     default: break; \
     } \
   \
@@ -4939,7 +4951,13 @@ gcn_vector_mode_supported_p (machine_mode mode)
 	  || mode == V4SFmode || mode == V4DFmode
 	  || mode == V2QImode || mode == V2HImode
 	  || mode == V2SImode || mode == V2DImode
-	  || mode == V2SFmode || mode == V2DFmode);
+	  || mode == V2SFmode || mode == V2DFmode
+	  /* TImode vectors are allowed to exist for divmod, but there
+	     are almost no instructions defined for them, and the
+	     autovectorizer does not use them.  */
+	  || mode == V64TImode || mode == V32TImode
+	  || mode == V16TImode || mode == V8TImode
+	  || mode == V4TImode || mode == V2TImode);
 }
 
 /* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE.
@@ -6768,6 +6786,10 @@ print_operand_address (FILE *file, rtx mem)
    O - print offset:n for data share operations.
    ^ - print "_co" suffix for GCN5 mnemonics
    g - print "glc", if appropriate for given MEM
+   L - print low-part of a multi-reg value
+   H - print second part of a multi-reg value (high-part of 2-reg value)
+   J - print third part of a multi-reg value
+   K - print fourth part of a multi-reg value
  */
 
 void
@@ -7307,6 +7329,12 @@ print_operand (FILE *file, rtx x, int code)
     case 'H':
       print_operand (file, gcn_operand_part (GET_MODE (x), x, 1), 0);
       return;
+    case 'J':
+      print_operand (file, gcn_operand_part (GET_MODE (x), x, 2), 0);
+      return;
+    case 'K':
+      print_operand (file, gcn_operand_part (GET_MODE (x), x, 3), 0);
+      return;
     case 'R':
       /* Print a scalar register number as an integer.  Temporary hack.  */
       gcc_assert (REG_P (x));

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-06-20  9:51 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-06-20  9:51 [gcc/devel/omp/gcc-13] amdgcn: minimal V64TImode vector support Andrew Stubbs

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).