diff --git a/gcc/config/gcn/gcn-modes.def b/gcc/config/gcn/gcn-modes.def index 82585de798b..1b8a3203463 100644 --- a/gcc/config/gcn/gcn-modes.def +++ b/gcc/config/gcn/gcn-modes.def @@ -29,6 +29,48 @@ VECTOR_MODE (FLOAT, HF, 64); /* V64HF */ VECTOR_MODE (FLOAT, SF, 64); /* V64SF */ VECTOR_MODE (FLOAT, DF, 64); /* V64DF */ +/* Artificial vector modes, for when vector masking doesn't work (yet). */ +VECTOR_MODE (INT, QI, 32); /* V32QI */ +VECTOR_MODE (INT, HI, 32); /* V32HI */ +VECTOR_MODE (INT, SI, 32); /* V32SI */ +VECTOR_MODE (INT, DI, 32); /* V32DI */ +VECTOR_MODE (INT, TI, 32); /* V32TI */ +VECTOR_MODE (FLOAT, HF, 32); /* V32HF */ +VECTOR_MODE (FLOAT, SF, 32); /* V32SF */ +VECTOR_MODE (FLOAT, DF, 32); /* V32DF */ +VECTOR_MODE (INT, QI, 16); /* V16QI */ +VECTOR_MODE (INT, HI, 16); /* V16HI */ +VECTOR_MODE (INT, SI, 16); /* V16SI */ +VECTOR_MODE (INT, DI, 16); /* V16DI */ +VECTOR_MODE (INT, TI, 16); /* V16TI */ +VECTOR_MODE (FLOAT, HF, 16); /* V16HF */ +VECTOR_MODE (FLOAT, SF, 16); /* V16SF */ +VECTOR_MODE (FLOAT, DF, 16); /* V16DF */ +VECTOR_MODE (INT, QI, 8); /* V8QI */ +VECTOR_MODE (INT, HI, 8); /* V8HI */ +VECTOR_MODE (INT, SI, 8); /* V8SI */ +VECTOR_MODE (INT, DI, 8); /* V8DI */ +VECTOR_MODE (INT, TI, 8); /* V8TI */ +VECTOR_MODE (FLOAT, HF, 8); /* V8HF */ +VECTOR_MODE (FLOAT, SF, 8); /* V8SF */ +VECTOR_MODE (FLOAT, DF, 8); /* V8DF */ +VECTOR_MODE (INT, QI, 4); /* V4QI */ +VECTOR_MODE (INT, HI, 4); /* V4HI */ +VECTOR_MODE (INT, SI, 4); /* V4SI */ +VECTOR_MODE (INT, DI, 4); /* V4DI */ +VECTOR_MODE (INT, TI, 4); /* V4TI */ +VECTOR_MODE (FLOAT, HF, 4); /* V4HF */ +VECTOR_MODE (FLOAT, SF, 4); /* V4SF */ +VECTOR_MODE (FLOAT, DF, 4); /* V4DF */ +VECTOR_MODE (INT, QI, 2); /* V2QI */ +VECTOR_MODE (INT, HI, 2); /* V2HI */ +VECTOR_MODE (INT, SI, 2); /* V2SI */ +VECTOR_MODE (INT, DI, 2); /* V2DI */ +VECTOR_MODE (INT, TI, 2); /* V2TI */ +VECTOR_MODE (FLOAT, HF, 2); /* V2HF */ +VECTOR_MODE (FLOAT, SF, 2); /* V2SF */ +VECTOR_MODE (FLOAT, DF, 2); /* V2DF */ + /* Vector units handle reads independently and thus no large alignment needed. */ ADJUST_ALIGNMENT (V64QI, 1); @@ -39,3 +81,43 @@ ADJUST_ALIGNMENT (V64TI, 16); ADJUST_ALIGNMENT (V64HF, 2); ADJUST_ALIGNMENT (V64SF, 4); ADJUST_ALIGNMENT (V64DF, 8); +ADJUST_ALIGNMENT (V32QI, 1); +ADJUST_ALIGNMENT (V32HI, 2); +ADJUST_ALIGNMENT (V32SI, 4); +ADJUST_ALIGNMENT (V32DI, 8); +ADJUST_ALIGNMENT (V32TI, 16); +ADJUST_ALIGNMENT (V32HF, 2); +ADJUST_ALIGNMENT (V32SF, 4); +ADJUST_ALIGNMENT (V32DF, 8); +ADJUST_ALIGNMENT (V16QI, 1); +ADJUST_ALIGNMENT (V16HI, 2); +ADJUST_ALIGNMENT (V16SI, 4); +ADJUST_ALIGNMENT (V16DI, 8); +ADJUST_ALIGNMENT (V16TI, 16); +ADJUST_ALIGNMENT (V16HF, 2); +ADJUST_ALIGNMENT (V16SF, 4); +ADJUST_ALIGNMENT (V16DF, 8); +ADJUST_ALIGNMENT (V8QI, 1); +ADJUST_ALIGNMENT (V8HI, 2); +ADJUST_ALIGNMENT (V8SI, 4); +ADJUST_ALIGNMENT (V8DI, 8); +ADJUST_ALIGNMENT (V8TI, 16); +ADJUST_ALIGNMENT (V8HF, 2); +ADJUST_ALIGNMENT (V8SF, 4); +ADJUST_ALIGNMENT (V8DF, 8); +ADJUST_ALIGNMENT (V4QI, 1); +ADJUST_ALIGNMENT (V4HI, 2); +ADJUST_ALIGNMENT (V4SI, 4); +ADJUST_ALIGNMENT (V4DI, 8); +ADJUST_ALIGNMENT (V4TI, 16); +ADJUST_ALIGNMENT (V4HF, 2); +ADJUST_ALIGNMENT (V4SF, 4); +ADJUST_ALIGNMENT (V4DF, 8); +ADJUST_ALIGNMENT (V2QI, 1); +ADJUST_ALIGNMENT (V2HI, 2); +ADJUST_ALIGNMENT (V2SI, 4); +ADJUST_ALIGNMENT (V2DI, 8); +ADJUST_ALIGNMENT (V2TI, 16); +ADJUST_ALIGNMENT (V2HF, 2); +ADJUST_ALIGNMENT (V2SF, 4); +ADJUST_ALIGNMENT (V2DF, 8); diff --git a/gcc/config/gcn/gcn-protos.h b/gcc/config/gcn/gcn-protos.h index ca804609c09..6300c1cbd36 100644 --- a/gcc/config/gcn/gcn-protos.h +++ b/gcc/config/gcn/gcn-protos.h @@ -34,8 +34,6 @@ extern rtx gcn_expand_scalar_to_vector_address (machine_mode, rtx, rtx, rtx); extern void gcn_expand_vector_init (rtx, rtx); extern bool gcn_flat_address_p (rtx, machine_mode); extern bool gcn_fp_constant_p (rtx, bool); -extern rtx gcn_full_exec (); -extern rtx gcn_full_exec_reg (); extern rtx gcn_gen_undef (machine_mode); extern bool gcn_global_address_p (rtx); extern tree gcn_goacc_adjust_private_decl (location_t, tree var, int level); @@ -67,8 +65,6 @@ extern rtx gcn_operand_part (machine_mode, rtx, int); extern bool gcn_regno_mode_code_ok_for_base_p (int, machine_mode, addr_space_t, int, int); extern reg_class gcn_regno_reg_class (int regno); -extern rtx gcn_scalar_exec (); -extern rtx gcn_scalar_exec_reg (); extern bool gcn_scalar_flat_address_p (rtx); extern bool gcn_scalar_flat_mem_p (rtx); extern bool gcn_sgpr_move_p (rtx, rtx); @@ -105,9 +101,11 @@ extern gimple_opt_pass *make_pass_omp_gcn (gcc::context *ctxt); inline bool vgpr_1reg_mode_p (machine_mode mode) { - return (mode == SImode || mode == SFmode || mode == HImode || mode == QImode - || mode == V64QImode || mode == V64HImode || mode == V64SImode - || mode == V64HFmode || mode == V64SFmode || mode == BImode); + if (VECTOR_MODE_P (mode)) + mode = GET_MODE_INNER (mode); + + return (mode == SImode || mode == SFmode || mode == HImode || mode == HFmode + || mode == QImode || mode == BImode); } /* Return true if MODE is valid for 1 SGPR register. */ @@ -124,8 +122,10 @@ sgpr_1reg_mode_p (machine_mode mode) inline bool vgpr_2reg_mode_p (machine_mode mode) { - return (mode == DImode || mode == DFmode - || mode == V64DImode || mode == V64DFmode); + if (VECTOR_MODE_P (mode)) + mode = GET_MODE_INNER (mode); + + return (mode == DImode || mode == DFmode); } /* Return true if MODE can be handled directly by VGPR operations. */ @@ -133,9 +133,7 @@ vgpr_2reg_mode_p (machine_mode mode) inline bool vgpr_vector_mode_p (machine_mode mode) { - return (mode == V64QImode || mode == V64HImode - || mode == V64SImode || mode == V64DImode - || mode == V64HFmode || mode == V64SFmode || mode == V64DFmode); + return VECTOR_MODE_P (mode); } diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md index dec81e863f7..52d2fcb880a 100644 --- a/gcc/config/gcn/gcn-valu.md +++ b/gcc/config/gcn/gcn-valu.md @@ -17,88 +17,243 @@ ;; {{{ Vector iterators ; Vector modes for specific types -; (This will make more sense when there are multiple vector sizes) (define_mode_iterator V_QI - [V64QI]) + [V2QI V4QI V8QI V16QI V32QI V64QI]) (define_mode_iterator V_HI - [V64HI]) + [V2HI V4HI V8HI V16HI V32HI V64HI]) (define_mode_iterator V_HF - [V64HF]) + [V2HF V4HF V8HF V16HF V32HF V64HF]) (define_mode_iterator V_SI - [V64SI]) + [V2SI V4SI V8SI V16SI V32SI V64SI]) (define_mode_iterator V_SF - [V64SF]) + [V2SF V4SF V8SF V16SF V32SF V64SF]) (define_mode_iterator V_DI - [V64DI]) + [V2DI V4DI V8DI V16DI V32DI V64DI]) (define_mode_iterator V_DF - [V64DF]) + [V2DF V4DF V8DF V16DF V32DF V64DF]) + +(define_mode_iterator V64_SI + [V64SI]) +(define_mode_iterator V64_DI + [V64DI]) ; Vector modes for sub-dword modes (define_mode_iterator V_QIHI - [V64QI V64HI]) + [V2QI V2HI + V4QI V4HI + V8QI V8HI + V16QI V16HI + V32QI V32HI + V64QI V64HI]) ; Vector modes for one vector register (define_mode_iterator V_1REG - [V64QI V64HI V64SI V64HF V64SF]) + [V2QI V2HI V2SI V2HF V2SF + V4QI V4HI V4SI V4HF V4SF + V8QI V8HI V8SI V8HF V8SF + V16QI V16HI V16SI V16HF V16SF + V32QI V32HI V32SI V32HF V32SF + V64QI V64HI V64SI V64HF V64SF]) (define_mode_iterator V_INT_1REG - [V64QI V64HI V64SI]) + [V2QI V2HI V2SI + V4QI V4HI V4SI + V8QI V8HI V8SI + V16QI V16HI V16SI + V32QI V32HI V32SI + V64QI V64HI V64SI]) (define_mode_iterator V_INT_1REG_ALT - [V64QI V64HI V64SI]) + [V2QI V2HI V2SI + V4QI V4HI V4SI + V8QI V8HI V8SI + V16QI V16HI V16SI + V32QI V32HI V32SI + V64QI V64HI V64SI]) (define_mode_iterator V_FP_1REG - [V64HF V64SF]) + [V2HF V2SF + V4HF V4SF + V8HF V8SF + V16HF V16SF + V32HF V32SF + V64HF V64SF]) + +; V64_* modes are for where more general support is unimplemented +; (e.g. reductions) +(define_mode_iterator V64_1REG + [V64QI V64HI V64SI V64HF V64SF]) +(define_mode_iterator V64_INT_1REG + [V64QI V64HI V64SI]) ; Vector modes for two vector registers (define_mode_iterator V_2REG + [V2DI V2DF + V4DI V4DF + V8DI V8DF + V16DI V16DF + V32DI V32DF + V64DI V64DF]) + +(define_mode_iterator V64_2REG [V64DI V64DF]) ; Vector modes with native support (define_mode_iterator V_noQI - [V64HI V64HF V64SI V64SF V64DI V64DF]) + [V2HI V2HF V2SI V2SF V2DI V2DF + V4HI V4HF V4SI V4SF V4DI V4DF + V8HI V8HF V8SI V8SF V8DI V8DF + V16HI V16HF V16SI V16SF V16DI V16DF + V32HI V32HF V32SI V32SF V32DI V32DF + V64HI V64HF V64SI V64SF V64DI V64DF]) (define_mode_iterator V_noHI - [V64HF V64SI V64SF V64DI V64DF]) + [V2HF V2SI V2SF V2DI V2DF + V4HF V4SI V4SF V4DI V4DF + V8HF V8SI V8SF V8DI V8DF + V16HF V16SI V16SF V16DI V16DF + V32HF V32SI V32SF V32DI V32DF + V64HF V64SI V64SF V64DI V64DF]) (define_mode_iterator V_INT_noQI - [V64HI V64SI V64DI]) + [V2HI V2SI V2DI + V4HI V4SI V4DI + V8HI V8SI V8DI + V16HI V16SI V16DI + V32HI V32SI V32DI + V64HI V64SI V64DI]) (define_mode_iterator V_INT_noHI - [V64SI V64DI]) + [V2SI V2DI + V4SI V4DI + V8SI V8DI + V16SI V16DI + V32SI V32DI + V64SI V64DI]) ; All of above (define_mode_iterator V_ALL - [V64QI V64HI V64HF V64SI V64SF V64DI V64DF]) + [V2QI V2HI V2HF V2SI V2SF V2DI V2DF + V4QI V4HI V4HF V4SI V4SF V4DI V4DF + V8QI V8HI V8HF V8SI V8SF V8DI V8DF + V16QI V16HI V16HF V16SI V16SF V16DI V16DF + V32QI V32HI V32HF V32SI V32SF V32DI V32DF + V64QI V64HI V64HF V64SI V64SF V64DI V64DF]) (define_mode_iterator V_ALL_ALT - [V64QI V64HI V64HF V64SI V64SF V64DI V64DF]) + [V2QI V2HI V2HF V2SI V2SF V2DI V2DF + V4QI V4HI V4HF V4SI V4SF V4DI V4DF + V8QI V8HI V8HF V8SI V8SF V8DI V8DF + V16QI V16HI V16HF V16SI V16SF V16DI V16DF + V32QI V32HI V32HF V32SI V32SF V32DI V32DF + V64QI V64HI V64HF V64SI V64SF V64DI V64DF]) (define_mode_iterator V_INT - [V64QI V64HI V64SI V64DI]) + [V2QI V2HI V2SI V2DI + V4QI V4HI V4SI V4DI + V8QI V8HI V8SI V8DI + V16QI V16HI V16SI V16DI + V32QI V32HI V32SI V32DI + V64QI V64HI V64SI V64DI]) (define_mode_iterator V_FP + [V2HF V2SF V2DF + V4HF V4SF V4DF + V8HF V8SF V8DF + V16HF V16SF V16DF + V32HF V32SF V32DF + V64HF V64SF V64DF]) + +(define_mode_iterator V64_ALL + [V64QI V64HI V64HF V64SI V64SF V64DI V64DF]) +(define_mode_iterator V64_FP [V64HF V64SF V64DF]) (define_mode_attr scalar_mode - [(V64QI "qi") (V64HI "hi") (V64SI "si") + [(V2QI "qi") (V2HI "hi") (V2SI "si") + (V2HF "hf") (V2SF "sf") (V2DI "di") (V2DF "df") + (V4QI "qi") (V4HI "hi") (V4SI "si") + (V4HF "hf") (V4SF "sf") (V4DI "di") (V4DF "df") + (V8QI "qi") (V8HI "hi") (V8SI "si") + (V8HF "hf") (V8SF "sf") (V8DI "di") (V8DF "df") + (V16QI "qi") (V16HI "hi") (V16SI "si") + (V16HF "hf") (V16SF "sf") (V16DI "di") (V16DF "df") + (V32QI "qi") (V32HI "hi") (V32SI "si") + (V32HF "hf") (V32SF "sf") (V32DI "di") (V32DF "df") + (V64QI "qi") (V64HI "hi") (V64SI "si") (V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")]) (define_mode_attr SCALAR_MODE - [(V64QI "QI") (V64HI "HI") (V64SI "SI") + [(V2QI "QI") (V2HI "HI") (V2SI "SI") + (V2HF "HF") (V2SF "SF") (V2DI "DI") (V2DF "DF") + (V4QI "QI") (V4HI "HI") (V4SI "SI") + (V4HF "HF") (V4SF "SF") (V4DI "DI") (V4DF "DF") + (V8QI "QI") (V8HI "HI") (V8SI "SI") + (V8HF "HF") (V8SF "SF") (V8DI "DI") (V8DF "DF") + (V16QI "QI") (V16HI "HI") (V16SI "SI") + (V16HF "HF") (V16SF "SF") (V16DI "DI") (V16DF "DF") + (V32QI "QI") (V32HI "HI") (V32SI "SI") + (V32HF "HF") (V32SF "SF") (V32DI "DI") (V32DF "DF") + (V64QI "QI") (V64HI "HI") (V64SI "SI") (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")]) (define_mode_attr vnsi - [(V64QI "v64si") (V64HI "v64si") (V64HF "v64si") (V64SI "v64si") + [(V2QI "v2si") (V2HI "v2si") (V2HF "v2si") (V2SI "v2si") + (V2SF "v2si") (V2DI "v2si") (V2DF "v2si") + (V4QI "v4si") (V4HI "v4si") (V4HF "v4si") (V4SI "v4si") + (V4SF "v4si") (V4DI "v4si") (V4DF "v4si") + (V8QI "v8si") (V8HI "v8si") (V8HF "v8si") (V8SI "v8si") + (V8SF "v8si") (V8DI "v8si") (V8DF "v8si") + (V16QI "v16si") (V16HI "v16si") (V16HF "v16si") (V16SI "v16si") + (V16SF "v16si") (V16DI "v16si") (V16DF "v16si") + (V32QI "v32si") (V32HI "v32si") (V32HF "v32si") (V32SI "v32si") + (V32SF "v32si") (V32DI "v32si") (V32DF "v32si") + (V64QI "v64si") (V64HI "v64si") (V64HF "v64si") (V64SI "v64si") (V64SF "v64si") (V64DI "v64si") (V64DF "v64si")]) (define_mode_attr VnSI - [(V64QI "V64SI") (V64HI "V64SI") (V64HF "V64SI") (V64SI "V64SI") + [(V2QI "V2SI") (V2HI "V2SI") (V2HF "V2SI") (V2SI "V2SI") + (V2SF "V2SI") (V2DI "V2SI") (V2DF "V2SI") + (V4QI "V4SI") (V4HI "V4SI") (V4HF "V4SI") (V4SI "V4SI") + (V4SF "V4SI") (V4DI "V4SI") (V4DF "V4SI") + (V8QI "V8SI") (V8HI "V8SI") (V8HF "V8SI") (V8SI "V8SI") + (V8SF "V8SI") (V8DI "V8SI") (V8DF "V8SI") + (V16QI "V16SI") (V16HI "V16SI") (V16HF "V16SI") (V16SI "V16SI") + (V16SF "V16SI") (V16DI "V16SI") (V16DF "V16SI") + (V32QI "V32SI") (V32HI "V32SI") (V32HF "V32SI") (V32SI "V32SI") + (V32SF "V32SI") (V32DI "V32SI") (V32DF "V32SI") + (V64QI "V64SI") (V64HI "V64SI") (V64HF "V64SI") (V64SI "V64SI") (V64SF "V64SI") (V64DI "V64SI") (V64DF "V64SI")]) (define_mode_attr vndi - [(V64QI "v64di") (V64HI "v64di") (V64HF "v64di") (V64SI "v64di") + [(V2QI "v2di") (V2HI "v2di") (V2HF "v2di") (V2SI "v2di") + (V2SF "v2di") (V2DI "v2di") (V2DF "v2di") + (V4QI "v4di") (V4HI "v4di") (V4HF "v4di") (V4SI "v4di") + (V4SF "v4di") (V4DI "v4di") (V4DF "v4di") + (V8QI "v8di") (V8HI "v8di") (V8HF "v8di") (V8SI "v8di") + (V8SF "v8di") (V8DI "v8di") (V8DF "v8di") + (V16QI "v16di") (V16HI "v16di") (V16HF "v16di") (V16SI "v16di") + (V16SF "v16di") (V16DI "v16di") (V16DF "v16di") + (V32QI "v32di") (V32HI "v32di") (V32HF "v32di") (V32SI "v32di") + (V32SF "v32di") (V32DI "v32di") (V32DF "v32di") + (V64QI "v64di") (V64HI "v64di") (V64HF "v64di") (V64SI "v64di") (V64SF "v64di") (V64DI "v64di") (V64DF "v64di")]) (define_mode_attr VnDI - [(V64QI "V64DI") (V64HI "V64DI") (V64HF "V64DI") (V64SI "V64DI") + [(V2QI "V2DI") (V2HI "V2DI") (V2HF "V2DI") (V2SI "V2DI") + (V2SF "V2DI") (V2DI "V2DI") (V2DF "V2DI") + (V4QI "V4DI") (V4HI "V4DI") (V4HF "V4DI") (V4SI "V4DI") + (V4SF "V4DI") (V4DI "V4DI") (V4DF "V4DI") + (V8QI "V8DI") (V8HI "V8DI") (V8HF "V8DI") (V8SI "V8DI") + (V8SF "V8DI") (V8DI "V8DI") (V8DF "V8DI") + (V16QI "V16DI") (V16HI "V16DI") (V16HF "V16DI") (V16SI "V16DI") + (V16SF "V16DI") (V16DI "V16DI") (V16DF "V16DI") + (V32QI "V32DI") (V32HI "V32DI") (V32HF "V32DI") (V32SI "V32DI") + (V32SF "V32DI") (V32DI "V32DI") (V32DF "V32DI") + (V64QI "V64DI") (V64HI "V64DI") (V64HF "V64DI") (V64SI "V64DI") (V64SF "V64DI") (V64DI "V64DI") (V64DF "V64DI")]) -(define_mode_attr sdwa [(V64QI "BYTE_0") (V64HI "WORD_0") (V64SI "DWORD")]) +(define_mode_attr sdwa + [(V2QI "BYTE_0") (V2HI "WORD_0") (V2SI "DWORD") + (V4QI "BYTE_0") (V4HI "WORD_0") (V4SI "DWORD") + (V8QI "BYTE_0") (V8HI "WORD_0") (V8SI "DWORD") + (V16QI "BYTE_0") (V16HI "WORD_0") (V16SI "DWORD") + (V32QI "BYTE_0") (V32HI "WORD_0") (V32SI "DWORD") + (V64QI "BYTE_0") (V64HI "WORD_0") (V64SI "DWORD")]) ;; }}} ;; {{{ Substitutions @@ -180,6 +335,37 @@ (define_expand "mov" (match_operand:V_ALL 1 "general_operand"))] "" { + /* Bitwise reinterpret casts via SUBREG don't work with GCN vector + registers, but we can convert the MEM to a mode that does work. */ + if (MEM_P (operands[0]) && !SUBREG_P (operands[0]) + && SUBREG_P (operands[1]) + && GET_MODE_SIZE (GET_MODE (operands[1])) + == GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[1])))) + { + rtx src = SUBREG_REG (operands[1]); + rtx mem = copy_rtx (operands[0]); + PUT_MODE_RAW (mem, GET_MODE (src)); + emit_move_insn (mem, src); + DONE; + } + if (MEM_P (operands[1]) && !SUBREG_P (operands[1]) + && SUBREG_P (operands[0]) + && GET_MODE_SIZE (GET_MODE (operands[0])) + == GET_MODE_SIZE (GET_MODE (SUBREG_REG (operands[0])))) + { + rtx dest = SUBREG_REG (operands[0]); + rtx mem = copy_rtx (operands[1]); + PUT_MODE_RAW (mem, GET_MODE (dest)); + emit_move_insn (dest, mem); + DONE; + } + + /* SUBREG of MEM is not supported. */ + gcc_assert ((!SUBREG_P (operands[0]) + || !MEM_P (SUBREG_REG (operands[0]))) + && (!SUBREG_P (operands[1]) + || !MEM_P (SUBREG_REG (operands[1])))); + if (MEM_P (operands[0]) && !lra_in_progress && !reload_completed) { operands[1] = force_reg (mode, operands[1]); @@ -2419,10 +2605,10 @@ (define_insn "ldexp3" (set_attr "length" "8")]) (define_insn "ldexp3" - [(set (match_operand:V_FP 0 "register_operand" "=v") + [(set (match_operand:V_FP 0 "register_operand" "= v") (unspec:V_FP - [(match_operand:V_FP 1 "gcn_alu_operand" "vB") - (match_operand:V64SI 2 "gcn_alu_operand" "vSvA")] + [(match_operand:V_FP 1 "gcn_alu_operand" " vB") + (match_operand: 2 "gcn_alu_operand" "vSvA")] UNSPEC_LDEXP))] "" "v_ldexp%i0\t%0, %1, %2" @@ -2452,8 +2638,8 @@ (define_insn "frexp_mant2" (set_attr "length" "8")]) (define_insn "frexp_exp2" - [(set (match_operand:V64SI 0 "register_operand" "=v") - (unspec:V64SI + [(set (match_operand: 0 "register_operand" "=v") + (unspec: [(match_operand:V_FP 1 "gcn_alu_operand" "vB")] UNSPEC_FREXP_EXP))] "" @@ -2640,9 +2826,27 @@ (define_expand "div3" (define_mode_iterator CVT_FROM_MODE [HI SI HF SF DF]) (define_mode_iterator CVT_TO_MODE [HI SI HF SF DF]) -(define_mode_iterator VCVT_MODE [V64HI V64SI V64HF V64SF V64DF]) -(define_mode_iterator VCVT_FMODE [V64HF V64SF V64DF]) -(define_mode_iterator VCVT_IMODE [V64HI V64SI]) +(define_mode_iterator VCVT_MODE + [V2HI V2SI V2HF V2SF V2DF + V4HI V4SI V4HF V4SF V4DF + V8HI V8SI V8HF V8SF V8DF + V16HI V16SI V16HF V16SF V16DF + V32HI V32SI V32HF V32SF V32DF + V64HI V64SI V64HF V64SF V64DF]) +(define_mode_iterator VCVT_FMODE + [V2HF V2SF V2DF + V4HF V4SF V4DF + V8HF V8SF V8DF + V16HF V16SF V16DF + V32HF V32SF V32DF + V64HF V64SF V64DF]) +(define_mode_iterator VCVT_IMODE + [V2HI V2SI + V4HI V4SI + V8HI V8SI + V16HI V16SI + V32HI V32SI + V64HI V64SI]) (define_code_iterator cvt_op [fix unsigned_fix float unsigned_float @@ -3265,7 +3469,7 @@ (define_int_attr reduc_insn [(UNSPEC_SMIN_DPP_SHR "v_min%i0") (define_expand "reduc__scal_" [(set (match_operand: 0 "register_operand") (unspec: - [(match_operand:V_ALL 1 "register_operand")] + [(match_operand:V64_ALL 1 "register_operand")] REDUC_UNSPEC))] "" { @@ -3284,7 +3488,7 @@ (define_expand "reduc__scal_" (define_expand "fold_left_plus_" [(match_operand: 0 "register_operand") (match_operand: 1 "gcn_alu_operand") - (match_operand:V_FP 2 "gcn_alu_operand")] + (match_operand:V64_FP 2 "gcn_alu_operand")] "can_create_pseudo_p () && (flag_openacc || flag_openmp || flag_associative_math)" @@ -3300,11 +3504,11 @@ (define_expand "fold_left_plus_" }) (define_insn "*_dpp_shr_" - [(set (match_operand:V_1REG 0 "register_operand" "=v") - (unspec:V_1REG - [(match_operand:V_1REG 1 "register_operand" "v") - (match_operand:V_1REG 2 "register_operand" "v") - (match_operand:SI 3 "const_int_operand" "n")] + [(set (match_operand:V64_1REG 0 "register_operand" "=v") + (unspec:V64_1REG + [(match_operand:V64_1REG 1 "register_operand" "v") + (match_operand:V64_1REG 2 "register_operand" "v") + (match_operand:SI 3 "const_int_operand" "n")] REDUC_UNSPEC))] ; GCN3 requires a carry out, GCN5 not "!(TARGET_GCN3 && SCALAR_INT_MODE_P (mode) @@ -3317,11 +3521,11 @@ (define_insn "*_dpp_shr_" (set_attr "length" "8")]) (define_insn_and_split "*_dpp_shr_" - [(set (match_operand:V_DI 0 "register_operand" "=v") - (unspec:V_DI - [(match_operand:V_DI 1 "register_operand" "v") - (match_operand:V_DI 2 "register_operand" "v") - (match_operand:SI 3 "const_int_operand" "n")] + [(set (match_operand:V64_DI 0 "register_operand" "=v") + (unspec:V64_DI + [(match_operand:V64_DI 1 "register_operand" "v") + (match_operand:V64_DI 2 "register_operand" "v") + (match_operand:SI 3 "const_int_operand" "n")] REDUC_2REG_UNSPEC))] "" "#" @@ -3346,10 +3550,10 @@ (define_insn_and_split "*_dpp_shr_" ; Special cases for addition. (define_insn "*plus_carry_dpp_shr_" - [(set (match_operand:V_INT_1REG 0 "register_operand" "=v") - (unspec:V_INT_1REG - [(match_operand:V_INT_1REG 1 "register_operand" "v") - (match_operand:V_INT_1REG 2 "register_operand" "v") + [(set (match_operand:V64_INT_1REG 0 "register_operand" "=v") + (unspec:V64_INT_1REG + [(match_operand:V64_INT_1REG 1 "register_operand" "v") + (match_operand:V64_INT_1REG 2 "register_operand" "v") (match_operand:SI 3 "const_int_operand" "n")] UNSPEC_PLUS_CARRY_DPP_SHR)) (clobber (reg:DI VCC_REG))] @@ -3363,12 +3567,12 @@ (define_insn "*plus_carry_dpp_shr_" (set_attr "length" "8")]) (define_insn "*plus_carry_in_dpp_shr_" - [(set (match_operand:V_SI 0 "register_operand" "=v") - (unspec:V_SI - [(match_operand:V_SI 1 "register_operand" "v") - (match_operand:V_SI 2 "register_operand" "v") - (match_operand:SI 3 "const_int_operand" "n") - (match_operand:DI 4 "register_operand" "cV")] + [(set (match_operand:V64_SI 0 "register_operand" "=v") + (unspec:V64_SI + [(match_operand:V64_SI 1 "register_operand" "v") + (match_operand:V64_SI 2 "register_operand" "v") + (match_operand:SI 3 "const_int_operand" "n") + (match_operand:DI 4 "register_operand" "cV")] UNSPEC_PLUS_CARRY_IN_DPP_SHR)) (clobber (reg:DI VCC_REG))] "" @@ -3381,11 +3585,11 @@ (define_insn "*plus_carry_in_dpp_shr_" (set_attr "length" "8")]) (define_insn_and_split "*plus_carry_dpp_shr_" - [(set (match_operand:V_DI 0 "register_operand" "=v") - (unspec:V_DI - [(match_operand:V_DI 1 "register_operand" "v") - (match_operand:V_DI 2 "register_operand" "v") - (match_operand:SI 3 "const_int_operand" "n")] + [(set (match_operand:V64_DI 0 "register_operand" "=v") + (unspec:V64_DI + [(match_operand:V64_DI 1 "register_operand" "v") + (match_operand:V64_DI 2 "register_operand" "v") + (match_operand:SI 3 "const_int_operand" "n")] UNSPEC_PLUS_CARRY_DPP_SHR)) (clobber (reg:DI VCC_REG))] "" @@ -3416,7 +3620,7 @@ (define_insn_and_split "*plus_carry_dpp_shr_" (define_insn "mov_from_lane63_" [(set (match_operand: 0 "register_operand" "=Sg,v") (unspec: - [(match_operand:V_1REG 1 "register_operand" " v,v")] + [(match_operand:V64_1REG 1 "register_operand" " v,v")] UNSPEC_MOV_FROM_LANE63))] "" "@ @@ -3429,7 +3633,7 @@ (define_insn "mov_from_lane63_" (define_insn "mov_from_lane63_" [(set (match_operand: 0 "register_operand" "=Sg,v") (unspec: - [(match_operand:V_2REG 1 "register_operand" " v,v")] + [(match_operand:V64_2REG 1 "register_operand" " v,v")] UNSPEC_MOV_FROM_LANE63))] "" "@ diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index c27ee91210e..e1636f6ddd6 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -395,6 +395,97 @@ gcn_scalar_mode_supported_p (scalar_mode mode) || mode == TImode); } +/* Return a vector mode with N lanes of MODE. */ + +static machine_mode +VnMODE (int n, machine_mode mode) +{ + switch (mode) + { + case QImode: + switch (n) + { + case 2: return V2QImode; + case 4: return V4QImode; + case 8: return V8QImode; + case 16: return V16QImode; + case 32: return V32QImode; + case 64: return V64QImode; + } + break; + case HImode: + switch (n) + { + case 2: return V2HImode; + case 4: return V4HImode; + case 8: return V8HImode; + case 16: return V16HImode; + case 32: return V32HImode; + case 64: return V64HImode; + } + break; + case HFmode: + switch (n) + { + case 2: return V2HFmode; + case 4: return V4HFmode; + case 8: return V8HFmode; + case 16: return V16HFmode; + case 32: return V32HFmode; + case 64: return V64HFmode; + } + break; + case SImode: + switch (n) + { + case 2: return V2SImode; + case 4: return V4SImode; + case 8: return V8SImode; + case 16: return V16SImode; + case 32: return V32SImode; + case 64: return V64SImode; + } + break; + case SFmode: + switch (n) + { + case 2: return V2SFmode; + case 4: return V4SFmode; + case 8: return V8SFmode; + case 16: return V16SFmode; + case 32: return V32SFmode; + case 64: return V64SFmode; + } + break; + case DImode: + switch (n) + { + case 2: return V2DImode; + case 4: return V4DImode; + case 8: return V8DImode; + case 16: return V16DImode; + case 32: return V32DImode; + case 64: return V64DImode; + } + break; + case DFmode: + switch (n) + { + case 2: return V2DFmode; + case 4: return V4DFmode; + case 8: return V8DFmode; + case 16: return V16DFmode; + case 32: return V32DFmode; + case 64: return V64DFmode; + } + break; + default: + break; + } + + return VOIDmode; +} + /* Implement TARGET_CLASS_MAX_NREGS. Return the number of hard registers needed to hold a value of MODE in @@ -556,6 +647,23 @@ gcn_can_change_mode_class (machine_mode from, machine_mode to, { if (!vgpr_vector_mode_p (from) && !vgpr_vector_mode_p (to)) return true; + + /* Vector conversions are only valid when changing mode with a fixed number + of lanes, or changing number of lanes with a fixed mode. Anything else + would require actual data movement. */ + if (VECTOR_MODE_P (from) && VECTOR_MODE_P (to) + && GET_MODE_NUNITS (from) != GET_MODE_NUNITS (to) + && GET_MODE_INNER (from) != GET_MODE_INNER (to)) + return false; + + /* Vector/scalar conversions are only permitted when the scalar mode + is the same or smaller than the inner vector mode. */ + if ((VECTOR_MODE_P (from) && !VECTOR_MODE_P (to) + && GET_MODE_SIZE (to) >= GET_MODE_SIZE (GET_MODE_INNER (from))) + || (VECTOR_MODE_P (to) && !VECTOR_MODE_P (from) + && GET_MODE_SIZE (from) >= GET_MODE_SIZE (GET_MODE_INNER (to)))) + return false; + return (gcn_class_max_nregs (regclass, from) == gcn_class_max_nregs (regclass, to)); } @@ -595,6 +703,16 @@ gcn_class_likely_spilled_p (reg_class_t rclass) bool gcn_modes_tieable_p (machine_mode mode1, machine_mode mode2) { + if (VECTOR_MODE_P (mode1) || VECTOR_MODE_P (mode2)) + { + int vf1 = (VECTOR_MODE_P (mode1) ? GET_MODE_NUNITS (mode1) : 1); + int vf2 = (VECTOR_MODE_P (mode2) ? GET_MODE_NUNITS (mode2) : 1); + machine_mode inner1 = (vf1 > 1 ? GET_MODE_INNER (mode1) : mode1); + machine_mode inner2 = (vf2 > 1 ? GET_MODE_INNER (mode2) : mode2); + + return (vf1 == vf2 || (inner1 == inner2 && vf2 <= vf1)); + } + return (GET_MODE_BITSIZE (mode1) <= MAX_FIXED_MODE_SIZE && GET_MODE_BITSIZE (mode2) <= MAX_FIXED_MODE_SIZE); } @@ -616,14 +734,16 @@ gcn_truly_noop_truncation (poly_uint64 outprec, poly_uint64 inprec) rtx gcn_operand_part (machine_mode mode, rtx op, int n) { - if (GET_MODE_SIZE (mode) >= 256) + int vf = VECTOR_MODE_P (mode) ? GET_MODE_NUNITS (mode) : 1; + + if (vf > 1) { - /*gcc_assert (GET_MODE_SIZE (mode) == 256 || n == 0); */ + machine_mode vsimode = VnMODE (vf, SImode); if (REG_P (op)) { gcc_assert (REGNO (op) + n < FIRST_PSEUDO_REGISTER); - return gen_rtx_REG (V64SImode, REGNO (op) + n); + return gen_rtx_REG (vsimode, REGNO (op) + n); } if (GET_CODE (op) == CONST_VECTOR) { @@ -634,10 +754,10 @@ gcn_operand_part (machine_mode mode, rtx op, int n) RTVEC_ELT (v, i) = gcn_operand_part (GET_MODE_INNER (mode), CONST_VECTOR_ELT (op, i), n); - return gen_rtx_CONST_VECTOR (V64SImode, v); + return gen_rtx_CONST_VECTOR (vsimode, v); } if (GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_VECTOR) - return gcn_gen_undef (V64SImode); + return gcn_gen_undef (vsimode); gcc_unreachable (); } else if (GET_MODE_SIZE (mode) == 8 && REG_P (op)) @@ -734,38 +854,6 @@ get_exec (int64_t val) return reg; } -/* Return value of scalar exec register. */ - -rtx -gcn_scalar_exec () -{ - return const1_rtx; -} - -/* Return pseudo holding scalar exec register. */ - -rtx -gcn_scalar_exec_reg () -{ - return get_exec (1); -} - -/* Return value of full exec register. */ - -rtx -gcn_full_exec () -{ - return constm1_rtx; -} - -/* Return pseudo holding full exec register. */ - -rtx -gcn_full_exec_reg () -{ - return get_exec (-1); -} - /* }}} */ /* {{{ Immediate constants. */ @@ -802,8 +890,13 @@ int gcn_inline_fp_constant_p (rtx x, bool allow_vector) { machine_mode mode = GET_MODE (x); + int vf = VECTOR_MODE_P (mode) ? GET_MODE_NUNITS (mode) : 1; - if ((mode == V64HFmode || mode == V64SFmode || mode == V64DFmode) + if (vf > 1) + mode = GET_MODE_INNER (mode); + + if (vf > 1 + && (mode == HFmode || mode == SFmode || mode == DFmode) && allow_vector) { int n; @@ -812,7 +905,7 @@ gcn_inline_fp_constant_p (rtx x, bool allow_vector) n = gcn_inline_fp_constant_p (CONST_VECTOR_ELT (x, 0), false); if (!n) return 0; - for (int i = 1; i < 64; i++) + for (int i = 1; i < vf; i++) if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0)) return 0; return 1; @@ -867,8 +960,13 @@ bool gcn_fp_constant_p (rtx x, bool allow_vector) { machine_mode mode = GET_MODE (x); + int vf = VECTOR_MODE_P (mode) ? GET_MODE_NUNITS (mode) : 1; - if ((mode == V64HFmode || mode == V64SFmode || mode == V64DFmode) + if (vf > 1) + mode = GET_MODE_INNER (mode); + + if (vf > 1 + && (mode == HFmode || mode == SFmode || mode == DFmode) && allow_vector) { int n; @@ -877,7 +975,7 @@ gcn_fp_constant_p (rtx x, bool allow_vector) n = gcn_fp_constant_p (CONST_VECTOR_ELT (x, 0), false); if (!n) return false; - for (int i = 1; i < 64; i++) + for (int i = 1; i < vf; i++) if (CONST_VECTOR_ELT (x, i) != CONST_VECTOR_ELT (x, 0)) return false; return true; @@ -1090,6 +1188,244 @@ gcn_gen_undef (machine_mode mode) return gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), UNSPEC_VECTOR); } +/* }}} */ +/* {{{ Utility functions. */ + +/* Generalised accessor functions for instruction patterns. + The machine desription '@' prefix does something similar, but as of + GCC 10 is incompatible with define_subst, and anyway it doesn't + auto-handle the exec feature. + + Four macros are provided; each function only needs one: + + GEN_VN - create accessor functions for all sizes of one mode + GEN_VNM - create accessor functions for all sizes of all modes + GEN_VN_NOEXEC - for insns without "_exec" variants + GEN_VNM_NOEXEC - likewise + + E.g. add3 + GEN_VNM (add, 3, A(rtx dest, rtx s1, rtx s2), A(dest, s1, s2) + + gen_addvNsi3 (dst, a, b) + -> calls gen_addv64si3, or gen_addv32si3, etc. + + gen_addvNm3 (dst, a, b) + -> calls gen_addv64qi3, or gen_addv2di3, etc. + + The mode is determined from the first parameter, which must be called + "dest" (or else the macro doesn't work). + + Each function has two optional parameters at the end: merge_src and exec. + If exec is non-null, the function will call the "_exec" variant of the + insn. If exec is non-null but merge_src is null then an undef unspec + will be created. + + E.g. cont. + gen_addvNsi3 (v64sidst, a, b, oldval, exec) + -> calls gen_addv64si3_exec (v64sidst, a, b, oldval, exec) + + gen_addvNm3 (v2qidst, a, b, NULL, exec) + -> calls gen_addv2qi3_exec (v2qidst, a, b, + gcn_gen_undef (V2QImode), exec) + */ + +#define A(...) __VA_ARGS__ +#define GEN_VN_NOEXEC(PREFIX, SUFFIX, PARAMS, ARGS) \ +static rtx \ +gen_##PREFIX##vN##SUFFIX (PARAMS) \ +{ \ + machine_mode mode = GET_MODE (dest); \ + int n = GET_MODE_NUNITS (mode); \ + \ + switch (n) \ + { \ + case 2: return gen_##PREFIX##v2##SUFFIX (ARGS); \ + case 4: return gen_##PREFIX##v4##SUFFIX (ARGS); \ + case 8: return gen_##PREFIX##v8##SUFFIX (ARGS); \ + case 16: return gen_##PREFIX##v16##SUFFIX (ARGS); \ + case 32: return gen_##PREFIX##v32##SUFFIX (ARGS); \ + case 64: return gen_##PREFIX##v64##SUFFIX (ARGS); \ + } \ + \ + gcc_unreachable (); \ + return NULL_RTX; \ +} + +#define GEN_VNM_NOEXEC(PREFIX, SUFFIX, PARAMS, ARGS) \ +GEN_VN_NOEXEC (PREFIX, qi##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN_NOEXEC (PREFIX, hi##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN_NOEXEC (PREFIX, hf##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN_NOEXEC (PREFIX, si##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN_NOEXEC (PREFIX, sf##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN_NOEXEC (PREFIX, di##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN_NOEXEC (PREFIX, df##SUFFIX, A(PARAMS), A(ARGS)) \ +static rtx \ +gen_##PREFIX##vNm##SUFFIX (PARAMS) \ +{ \ + machine_mode mode = GET_MODE_INNER (GET_MODE (dest)); \ + \ + switch (mode) \ + { \ + case E_QImode: return gen_##PREFIX##vNqi##SUFFIX (ARGS); \ + case E_HImode: return gen_##PREFIX##vNhi##SUFFIX (ARGS); \ + case E_HFmode: return gen_##PREFIX##vNhf##SUFFIX (ARGS); \ + case E_SImode: return gen_##PREFIX##vNsi##SUFFIX (ARGS); \ + case E_SFmode: return gen_##PREFIX##vNsf##SUFFIX (ARGS); \ + case E_DImode: return gen_##PREFIX##vNdi##SUFFIX (ARGS); \ + case E_DFmode: return gen_##PREFIX##vNdf##SUFFIX (ARGS); \ + default: \ + break; \ + } \ + \ + gcc_unreachable (); \ + return NULL_RTX; \ +} + +#define GEN_VN(PREFIX, SUFFIX, PARAMS, ARGS) \ +static rtx \ +gen_##PREFIX##vN##SUFFIX (PARAMS, rtx merge_src=NULL, rtx exec=NULL) \ +{ \ + machine_mode mode = GET_MODE (dest); \ + int n = GET_MODE_NUNITS (mode); \ + \ + if (exec && !merge_src) \ + merge_src = gcn_gen_undef (mode); \ + \ + if (exec) \ + switch (n) \ + { \ + case 2: return gen_##PREFIX##v2##SUFFIX##_exec (ARGS, merge_src, exec); \ + case 4: return gen_##PREFIX##v4##SUFFIX##_exec (ARGS, merge_src, exec); \ + case 8: return gen_##PREFIX##v8##SUFFIX##_exec (ARGS, merge_src, exec); \ + case 16: return gen_##PREFIX##v16##SUFFIX##_exec (ARGS, merge_src, exec); \ + case 32: return gen_##PREFIX##v32##SUFFIX##_exec (ARGS, merge_src, exec); \ + case 64: return gen_##PREFIX##v64##SUFFIX##_exec (ARGS, merge_src, exec); \ + } \ + else \ + switch (n) \ + { \ + case 2: return gen_##PREFIX##v2##SUFFIX (ARGS); \ + case 4: return gen_##PREFIX##v4##SUFFIX (ARGS); \ + case 8: return gen_##PREFIX##v8##SUFFIX (ARGS); \ + case 16: return gen_##PREFIX##v16##SUFFIX (ARGS); \ + case 32: return gen_##PREFIX##v32##SUFFIX (ARGS); \ + case 64: return gen_##PREFIX##v64##SUFFIX (ARGS); \ + } \ + \ + gcc_unreachable (); \ + return NULL_RTX; \ +} + +#define GEN_VNM(PREFIX, SUFFIX, PARAMS, ARGS) \ +GEN_VN (PREFIX, qi##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN (PREFIX, hi##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN (PREFIX, hf##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN (PREFIX, si##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN (PREFIX, sf##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN (PREFIX, di##SUFFIX, A(PARAMS), A(ARGS)) \ +GEN_VN (PREFIX, df##SUFFIX, A(PARAMS), A(ARGS)) \ +static rtx \ +gen_##PREFIX##vNm##SUFFIX (PARAMS, rtx merge_src=NULL, rtx exec=NULL) \ +{ \ + machine_mode mode = GET_MODE_INNER (GET_MODE (dest)); \ + \ + switch (mode) \ + { \ + case E_QImode: return gen_##PREFIX##vNqi##SUFFIX (ARGS, merge_src, exec); \ + case E_HImode: return gen_##PREFIX##vNhi##SUFFIX (ARGS, merge_src, exec); \ + case E_HFmode: return gen_##PREFIX##vNhf##SUFFIX (ARGS, merge_src, exec); \ + case E_SImode: return gen_##PREFIX##vNsi##SUFFIX (ARGS, merge_src, exec); \ + case E_SFmode: return gen_##PREFIX##vNsf##SUFFIX (ARGS, merge_src, exec); \ + case E_DImode: return gen_##PREFIX##vNdi##SUFFIX (ARGS, merge_src, exec); \ + case E_DFmode: return gen_##PREFIX##vNdf##SUFFIX (ARGS, merge_src, exec); \ + default: \ + break; \ + } \ + \ + gcc_unreachable (); \ + return NULL_RTX; \ +} + +GEN_VNM (add,3, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2)) +GEN_VN (add,si3_dup, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2)) +GEN_VN (add,si3_vcc_dup, A(rtx dest, rtx src1, rtx src2, rtx vcc), + A(dest, src1, src2, vcc)) +GEN_VN (add,di3_sext_dup2, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2)) +GEN_VN (add,di3_vcc_zext_dup, A(rtx dest, rtx src1, rtx src2, rtx vcc), + A(dest, src1, src2, vcc)) +GEN_VN (add,di3_zext_dup2, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2)) +GEN_VN (add,di3_vcc_zext_dup2, A(rtx dest, rtx src1, rtx src2, rtx vcc), + A(dest, src1, src2, vcc)) +GEN_VN (addc,si3, A(rtx dest, rtx src1, rtx src2, rtx vccout, rtx vccin), + A(dest, src1, src2, vccout, vccin)) +GEN_VN (ashl,si3, A(rtx dest, rtx src, rtx shift), A(dest, src, shift)) +GEN_VNM_NOEXEC (ds_bpermute,, A(rtx dest, rtx addr, rtx src, rtx exec), + A(dest, addr, src, exec)) +GEN_VNM (mov,, A(rtx dest, rtx src), A(dest, src)) +GEN_VN (mul,si3_dup, A(rtx dest, rtx src1, rtx src2), A(dest, src1, src2)) +GEN_VNM (vec_duplicate,, A(rtx dest, rtx src), A(dest, src)) + +#undef GEN_VNM +#undef GEN_VN +#undef GET_VN_FN +#undef A + +/* Get icode for vector instructions without an optab. */ + +#define CODE_FOR(PREFIX, SUFFIX) \ +static int \ +get_code_for_##PREFIX##vN##SUFFIX (int nunits) \ +{ \ + switch (nunits) \ + { \ + case 2: return CODE_FOR_##PREFIX##v2##SUFFIX; \ + case 4: return CODE_FOR_##PREFIX##v4##SUFFIX; \ + case 8: return CODE_FOR_##PREFIX##v8##SUFFIX; \ + case 16: return CODE_FOR_##PREFIX##v16##SUFFIX; \ + case 32: return CODE_FOR_##PREFIX##v32##SUFFIX; \ + case 64: return CODE_FOR_##PREFIX##v64##SUFFIX; \ + } \ + \ + gcc_unreachable (); \ + return CODE_FOR_nothing; \ +} + +#define CODE_FOR_OP(PREFIX) \ + CODE_FOR (PREFIX, qi) \ + CODE_FOR (PREFIX, hi) \ + CODE_FOR (PREFIX, hf) \ + CODE_FOR (PREFIX, si) \ + CODE_FOR (PREFIX, sf) \ + CODE_FOR (PREFIX, di) \ + CODE_FOR (PREFIX, df) \ +static int \ +get_code_for_##PREFIX (machine_mode mode) \ +{ \ + int vf = GET_MODE_NUNITS (mode); \ + machine_mode smode = GET_MODE_INNER (mode); \ + \ + switch (smode) \ + { \ + case E_QImode: return get_code_for_##PREFIX##vNqi (vf); \ + case E_HImode: return get_code_for_##PREFIX##vNhi (vf); \ + case E_HFmode: return get_code_for_##PREFIX##vNhf (vf); \ + case E_SImode: return get_code_for_##PREFIX##vNsi (vf); \ + case E_SFmode: return get_code_for_##PREFIX##vNsf (vf); \ + case E_DImode: return get_code_for_##PREFIX##vNdi (vf); \ + case E_DFmode: return get_code_for_##PREFIX##vNdf (vf); \ + default: break; \ + } \ + \ + gcc_unreachable (); \ + return CODE_FOR_nothing; \ +} + +CODE_FOR_OP (reload_in) +CODE_FOR_OP (reload_out) + +#undef CODE_FOR_OP +#undef CODE_FOR + /* }}} */ /* {{{ Addresses, pointers and moves. */ @@ -1644,60 +1980,6 @@ regno_ok_for_index_p (int regno) return regno == M0_REG || VGPR_REGNO_P (regno); } -/* Generate move which uses the exec flags. If EXEC is NULL, then it is - assumed that all lanes normally relevant to the mode of the move are - affected. If PREV is NULL, then a sensible default is supplied for - the inactive lanes. */ - -static rtx -gen_mov_with_exec (rtx op0, rtx op1, rtx exec = NULL, rtx prev = NULL) -{ - machine_mode mode = GET_MODE (op0); - - if (vgpr_vector_mode_p (mode)) - { - if (exec && exec != CONSTM1_RTX (DImode)) - { - if (!prev) - prev = op0; - } - else - { - if (!prev) - prev = gcn_gen_undef (mode); - exec = gcn_full_exec_reg (); - } - - rtx set = gen_rtx_SET (op0, gen_rtx_VEC_MERGE (mode, op1, prev, exec)); - - return gen_rtx_PARALLEL (VOIDmode, - gen_rtvec (2, set, - gen_rtx_CLOBBER (VOIDmode, - gen_rtx_SCRATCH (V64DImode)))); - } - - return (gen_rtx_PARALLEL - (VOIDmode, - gen_rtvec (2, gen_rtx_SET (op0, op1), - gen_rtx_USE (VOIDmode, - exec ? exec : gcn_scalar_exec ())))); -} - -/* Generate masked move. */ - -static rtx -gen_duplicate_load (rtx op0, rtx op1, rtx op2 = NULL, rtx exec = NULL) -{ - if (exec) - return (gen_rtx_SET (op0, - gen_rtx_VEC_MERGE (GET_MODE (op0), - gen_rtx_VEC_DUPLICATE (GET_MODE - (op0), op1), - op2, exec))); - else - return (gen_rtx_SET (op0, gen_rtx_VEC_DUPLICATE (GET_MODE (op0), op1))); -} - /* Expand vector init of OP0 by VEC. Implements vec_init instruction pattern. */ @@ -1707,10 +1989,11 @@ gcn_expand_vector_init (rtx op0, rtx vec) int64_t initialized_mask = 0; int64_t curr_mask = 1; machine_mode mode = GET_MODE (op0); + int vf = GET_MODE_NUNITS (mode); rtx val = XVECEXP (vec, 0, 0); - for (int i = 1; i < 64; i++) + for (int i = 1; i < vf; i++) if (rtx_equal_p (val, XVECEXP (vec, 0, i))) curr_mask |= (int64_t) 1 << i; @@ -1719,26 +2002,26 @@ gcn_expand_vector_init (rtx op0, rtx vec) else { val = force_reg (GET_MODE_INNER (mode), val); - emit_insn (gen_duplicate_load (op0, val)); + emit_insn (gen_vec_duplicatevNm (op0, val)); } initialized_mask |= curr_mask; - for (int i = 1; i < 64; i++) + for (int i = 1; i < vf; i++) if (!(initialized_mask & ((int64_t) 1 << i))) { curr_mask = (int64_t) 1 << i; rtx val = XVECEXP (vec, 0, i); - for (int j = i + 1; j < 64; j++) + for (int j = i + 1; j < vf; j++) if (rtx_equal_p (val, XVECEXP (vec, 0, j))) curr_mask |= (int64_t) 1 << j; if (gcn_constant_p (val)) - emit_insn (gen_mov_with_exec (op0, gcn_vec_constant (mode, val), - get_exec (curr_mask))); + emit_insn (gen_movvNm (op0, gcn_vec_constant (mode, val), op0, + get_exec (curr_mask))); else { val = force_reg (GET_MODE_INNER (mode), val); - emit_insn (gen_duplicate_load (op0, val, op0, - get_exec (curr_mask))); + emit_insn (gen_vec_duplicatevNm (op0, val, op0, + get_exec (curr_mask))); } initialized_mask |= curr_mask; } @@ -1751,18 +2034,18 @@ strided_constant (machine_mode mode, int base, int val) { rtx x = gen_reg_rtx (mode); emit_move_insn (x, gcn_vec_constant (mode, base)); - emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 32), - x, get_exec (0xffffffff00000000))); - emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 16), - x, get_exec (0xffff0000ffff0000))); - emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 8), - x, get_exec (0xff00ff00ff00ff00))); - emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 4), - x, get_exec (0xf0f0f0f0f0f0f0f0))); - emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 2), - x, get_exec (0xcccccccccccccccc))); - emit_insn (gen_addv64si3_exec (x, x, gcn_vec_constant (mode, val * 1), - x, get_exec (0xaaaaaaaaaaaaaaaa))); + emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 32), + x, get_exec (0xffffffff00000000))); + emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 16), + x, get_exec (0xffff0000ffff0000))); + emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 8), + x, get_exec (0xff00ff00ff00ff00))); + emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 4), + x, get_exec (0xf0f0f0f0f0f0f0f0))); + emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 2), + x, get_exec (0xcccccccccccccccc))); + emit_insn (gen_addvNm3 (x, x, gcn_vec_constant (mode, val * 1), + x, get_exec (0xaaaaaaaaaaaaaaaa))); return x; } @@ -1792,15 +2075,17 @@ gcn_addr_space_legitimize_address (rtx x, rtx old, machine_mode mode, case ADDR_SPACE_LDS: case ADDR_SPACE_GDS: /* FIXME: LDS support offsets, handle them!. */ - if (vgpr_vector_mode_p (mode) && GET_MODE (x) != V64SImode) + if (vgpr_vector_mode_p (mode) + && GET_MODE_INNER (GET_MODE (x)) != SImode) { - rtx addrs = gen_reg_rtx (V64SImode); + machine_mode simode = VnMODE (GET_MODE_NUNITS (mode), SImode); + rtx addrs = gen_reg_rtx (simode); rtx base = force_reg (SImode, x); - rtx offsets = strided_constant (V64SImode, 0, + rtx offsets = strided_constant (simode, 0, GET_MODE_UNIT_SIZE (mode)); - emit_insn (gen_vec_duplicatev64si (addrs, base)); - emit_insn (gen_addv64si3 (addrs, offsets, addrs)); + emit_insn (gen_vec_duplicatevNsi (addrs, base)); + emit_insn (gen_addvNsi3 (addrs, offsets, addrs)); return addrs; } return x; @@ -1808,16 +2093,18 @@ gcn_addr_space_legitimize_address (rtx x, rtx old, machine_mode mode, gcc_unreachable (); } -/* Convert a (mem: (reg:DI)) to (mem: (reg:V64DI)) with the +/* Convert a (mem: (reg:DI)) to (mem: (reg:VnDI)) with the proper vector of stepped addresses. MEM will be a DImode address of a vector in an SGPR. - TMP will be a V64DImode VGPR pair or (scratch:V64DI). */ + TMP will be a VnDImode VGPR pair or (scratch:VnDI). */ rtx gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem, rtx tmp) { + machine_mode pmode = VnMODE (GET_MODE_NUNITS (mode), DImode); + machine_mode offmode = VnMODE (GET_MODE_NUNITS (mode), SImode); gcc_assert (MEM_P (mem)); rtx mem_base = XEXP (mem, 0); rtx mem_index = NULL_RTX; @@ -1841,22 +2128,18 @@ gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem, machine_mode inner = GET_MODE_INNER (mode); int shift = exact_log2 (GET_MODE_SIZE (inner)); - rtx ramp = gen_rtx_REG (V64SImode, VGPR_REGNO (1)); - rtx undef_v64si = gcn_gen_undef (V64SImode); + rtx ramp = gen_rtx_REG (offmode, VGPR_REGNO (1)); rtx new_base = NULL_RTX; addr_space_t as = MEM_ADDR_SPACE (mem); rtx tmplo = (REG_P (tmp) - ? gcn_operand_part (V64DImode, tmp, 0) - : gen_reg_rtx (V64SImode)); + ? gcn_operand_part (pmode, tmp, 0) + : gen_reg_rtx (offmode)); /* tmplo[:] = ramp[:] << shift */ - if (exec) - emit_insn (gen_ashlv64si3_exec (tmplo, ramp, - gen_int_mode (shift, SImode), - undef_v64si, exec)); - else - emit_insn (gen_ashlv64si3 (tmplo, ramp, gen_int_mode (shift, SImode))); + emit_insn (gen_ashlvNsi3 (tmplo, ramp, + gen_int_mode (shift, SImode), + NULL, exec)); if (AS_FLAT_P (as)) { @@ -1866,53 +2149,41 @@ gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem, { rtx mem_base_lo = gcn_operand_part (DImode, mem_base, 0); rtx mem_base_hi = gcn_operand_part (DImode, mem_base, 1); - rtx tmphi = gcn_operand_part (V64DImode, tmp, 1); + rtx tmphi = gcn_operand_part (pmode, tmp, 1); /* tmphi[:] = mem_base_hi */ - if (exec) - emit_insn (gen_vec_duplicatev64si_exec (tmphi, mem_base_hi, - undef_v64si, exec)); - else - emit_insn (gen_vec_duplicatev64si (tmphi, mem_base_hi)); + emit_insn (gen_vec_duplicatevNsi (tmphi, mem_base_hi, NULL, exec)); /* tmp[:] += zext (mem_base) */ if (exec) { - emit_insn (gen_addv64si3_vcc_dup_exec (tmplo, mem_base_lo, tmplo, - vcc, undef_v64si, exec)); - emit_insn (gen_addcv64si3_exec (tmphi, tmphi, const0_rtx, - vcc, vcc, undef_v64si, exec)); + emit_insn (gen_addvNsi3_vcc_dup (tmplo, mem_base_lo, tmplo, + vcc, NULL, exec)); + emit_insn (gen_addcvNsi3 (tmphi, tmphi, const0_rtx, + vcc, vcc, NULL, exec)); } else - emit_insn (gen_addv64di3_vcc_zext_dup (tmp, mem_base_lo, tmp, vcc)); + emit_insn (gen_addvNdi3_vcc_zext_dup (tmp, mem_base_lo, tmp, vcc)); } else { - tmp = gen_reg_rtx (V64DImode); - if (exec) - emit_insn (gen_addv64di3_vcc_zext_dup2_exec - (tmp, tmplo, mem_base, vcc, gcn_gen_undef (V64DImode), - exec)); - else - emit_insn (gen_addv64di3_vcc_zext_dup2 (tmp, tmplo, mem_base, vcc)); + tmp = gen_reg_rtx (pmode); + emit_insn (gen_addvNdi3_vcc_zext_dup2 (tmp, tmplo, mem_base, vcc, + NULL, exec)); } new_base = tmp; } else if (AS_ANY_DS_P (as)) { - if (!exec) - emit_insn (gen_addv64si3_dup (tmplo, tmplo, mem_base)); - else - emit_insn (gen_addv64si3_dup_exec (tmplo, tmplo, mem_base, - gcn_gen_undef (V64SImode), exec)); + emit_insn (gen_addvNsi3_dup (tmplo, tmplo, mem_base, NULL, exec)); new_base = tmplo; } else { - mem_base = gen_rtx_VEC_DUPLICATE (V64DImode, mem_base); - new_base = gen_rtx_PLUS (V64DImode, mem_base, - gen_rtx_SIGN_EXTEND (V64DImode, tmplo)); + mem_base = gen_rtx_VEC_DUPLICATE (pmode, mem_base); + new_base = gen_rtx_PLUS (pmode, mem_base, + gen_rtx_SIGN_EXTEND (pmode, tmplo)); } return gen_rtx_PLUS (GET_MODE (new_base), new_base, @@ -1929,42 +2200,33 @@ gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem, If EXEC is set then _exec patterns will be used, otherwise plain. Return values. - ADDR_SPACE_FLAT - return V64DImode vector of absolute addresses. - ADDR_SPACE_GLOBAL - return V64SImode vector of offsets. */ + ADDR_SPACE_FLAT - return VnDImode vector of absolute addresses. + ADDR_SPACE_GLOBAL - return VnSImode vector of offsets. */ rtx gcn_expand_scaled_offsets (addr_space_t as, rtx base, rtx offsets, rtx scale, bool unsigned_p, rtx exec) { - rtx tmpsi = gen_reg_rtx (V64SImode); - rtx tmpdi = gen_reg_rtx (V64DImode); - rtx undefsi = exec ? gcn_gen_undef (V64SImode) : NULL; - rtx undefdi = exec ? gcn_gen_undef (V64DImode) : NULL; + int vf = GET_MODE_NUNITS (GET_MODE (offsets)); + rtx tmpsi = gen_reg_rtx (VnMODE (vf, SImode)); + rtx tmpdi = gen_reg_rtx (VnMODE (vf, DImode)); if (CONST_INT_P (scale) && INTVAL (scale) > 0 && exact_log2 (INTVAL (scale)) >= 0) - emit_insn (gen_ashlv64si3 (tmpsi, offsets, - GEN_INT (exact_log2 (INTVAL (scale))))); + emit_insn (gen_ashlvNsi3 (tmpsi, offsets, + GEN_INT (exact_log2 (INTVAL (scale))), + NULL, exec)); else - (exec - ? emit_insn (gen_mulv64si3_dup_exec (tmpsi, offsets, scale, undefsi, - exec)) - : emit_insn (gen_mulv64si3_dup (tmpsi, offsets, scale))); + emit_insn (gen_mulvNsi3_dup (tmpsi, offsets, scale, NULL, exec)); /* "Global" instructions do not support negative register offsets. */ if (as == ADDR_SPACE_FLAT || !unsigned_p) { if (unsigned_p) - (exec - ? emit_insn (gen_addv64di3_zext_dup2_exec (tmpdi, tmpsi, base, - undefdi, exec)) - : emit_insn (gen_addv64di3_zext_dup2 (tmpdi, tmpsi, base))); + emit_insn (gen_addvNdi3_zext_dup2 (tmpdi, tmpsi, base, NULL, exec)); else - (exec - ? emit_insn (gen_addv64di3_sext_dup2_exec (tmpdi, tmpsi, base, - undefdi, exec)) - : emit_insn (gen_addv64di3_sext_dup2 (tmpdi, tmpsi, base))); + emit_insn (gen_addvNdi3_sext_dup2 (tmpdi, tmpsi, base, NULL, exec)); return tmpdi; } else if (as == ADDR_SPACE_GLOBAL) @@ -2065,59 +2327,9 @@ gcn_secondary_reload (bool in_p, rtx x, reg_class_t rclass, || GET_MODE_CLASS (reload_mode) == MODE_VECTOR_FLOAT) { if (in_p) - switch (reload_mode) - { - case E_V64SImode: - sri->icode = CODE_FOR_reload_inv64si; - break; - case E_V64SFmode: - sri->icode = CODE_FOR_reload_inv64sf; - break; - case E_V64HImode: - sri->icode = CODE_FOR_reload_inv64hi; - break; - case E_V64HFmode: - sri->icode = CODE_FOR_reload_inv64hf; - break; - case E_V64QImode: - sri->icode = CODE_FOR_reload_inv64qi; - break; - case E_V64DImode: - sri->icode = CODE_FOR_reload_inv64di; - break; - case E_V64DFmode: - sri->icode = CODE_FOR_reload_inv64df; - break; - default: - gcc_unreachable (); - } + sri->icode = get_code_for_reload_in (reload_mode); else - switch (reload_mode) - { - case E_V64SImode: - sri->icode = CODE_FOR_reload_outv64si; - break; - case E_V64SFmode: - sri->icode = CODE_FOR_reload_outv64sf; - break; - case E_V64HImode: - sri->icode = CODE_FOR_reload_outv64hi; - break; - case E_V64HFmode: - sri->icode = CODE_FOR_reload_outv64hf; - break; - case E_V64QImode: - sri->icode = CODE_FOR_reload_outv64qi; - break; - case E_V64DImode: - sri->icode = CODE_FOR_reload_outv64di; - break; - case E_V64DFmode: - sri->icode = CODE_FOR_reload_outv64df; - break; - default: - gcc_unreachable (); - } + sri->icode = get_code_for_reload_out (reload_mode); break; } /* Fallthrough. */ @@ -3428,6 +3640,9 @@ gcn_valid_cvt_p (machine_mode from, machine_mode to, enum gcn_cvt_t op) if (VECTOR_MODE_P (from)) { + if (GET_MODE_NUNITS (from) != GET_MODE_NUNITS (to)) + return false; + from = GET_MODE_INNER (from); to = GET_MODE_INNER (to); } @@ -3926,7 +4141,7 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ , rtx mem = gen_rtx_MEM (GET_MODE (target), addrs); /*set_mem_addr_space (mem, ADDR_SPACE_FLAT); */ /* FIXME: set attributes. */ - emit_insn (gen_mov_with_exec (target, mem, exec)); + emit_insn (gen_movvNm (target, mem, NULL, exec)); return target; } case GCN_BUILTIN_FLAT_STORE_PTR_INT32: @@ -3961,20 +4176,18 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ , rtx mem = gen_rtx_MEM (vmode, addrs); /*set_mem_addr_space (mem, ADDR_SPACE_FLAT); */ /* FIXME: set attributes. */ - emit_insn (gen_mov_with_exec (mem, val, exec)); + emit_insn (gen_movvNm (mem, val, NULL, exec)); return target; } case GCN_BUILTIN_SQRTVF: { if (ignore) return target; - rtx exec = gcn_full_exec_reg (); rtx arg = force_reg (V64SFmode, expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, V64SFmode, EXPAND_NORMAL)); - emit_insn (gen_sqrtv64sf2_exec - (target, arg, gcn_gen_undef (V64SFmode), exec)); + emit_insn (gen_sqrtv64sf2 (target, arg)); return target; } case GCN_BUILTIN_SQRTF: @@ -3992,20 +4205,17 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ , { if (ignore) return target; - rtx exec = gcn_full_exec_reg (); rtx arg = force_reg (V64SFmode, expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, V64SFmode, EXPAND_NORMAL)); - emit_insn (gen_absv64sf2_exec - (target, arg, gcn_gen_undef (V64SFmode), exec)); + emit_insn (gen_absv64sf2 (target, arg)); return target; } case GCN_BUILTIN_LDEXPVF: { if (ignore) return target; - rtx exec = gcn_full_exec_reg (); rtx arg1 = force_reg (V64SFmode, expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, V64SFmode, @@ -4014,15 +4224,13 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ , expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, V64SImode, EXPAND_NORMAL)); - emit_insn (gen_ldexpv64sf3_exec - (target, arg1, arg2, gcn_gen_undef (V64SFmode), exec)); + emit_insn (gen_ldexpv64sf3 (target, arg1, arg2)); return target; } case GCN_BUILTIN_LDEXPV: { if (ignore) return target; - rtx exec = gcn_full_exec_reg (); rtx arg1 = force_reg (V64DFmode, expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, V64SFmode, @@ -4031,60 +4239,51 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ , expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, V64SImode, EXPAND_NORMAL)); - emit_insn (gen_ldexpv64df3_exec - (target, arg1, arg2, gcn_gen_undef (V64DFmode), exec)); + emit_insn (gen_ldexpv64df3 (target, arg1, arg2)); return target; } case GCN_BUILTIN_FREXPVF_EXP: { if (ignore) return target; - rtx exec = gcn_full_exec_reg (); rtx arg = force_reg (V64SFmode, expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, V64SFmode, EXPAND_NORMAL)); - emit_insn (gen_frexpv64sf_exp2_exec - (target, arg, gcn_gen_undef (V64SImode), exec)); + emit_insn (gen_frexpv64sf_exp2 (target, arg)); return target; } case GCN_BUILTIN_FREXPVF_MANT: { if (ignore) return target; - rtx exec = gcn_full_exec_reg (); rtx arg = force_reg (V64SFmode, expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, V64SFmode, EXPAND_NORMAL)); - emit_insn (gen_frexpv64sf_mant2_exec - (target, arg, gcn_gen_undef (V64SFmode), exec)); + emit_insn (gen_frexpv64sf_mant2 (target, arg)); return target; } case GCN_BUILTIN_FREXPV_EXP: { if (ignore) return target; - rtx exec = gcn_full_exec_reg (); rtx arg = force_reg (V64DFmode, expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, V64DFmode, EXPAND_NORMAL)); - emit_insn (gen_frexpv64df_exp2_exec - (target, arg, gcn_gen_undef (V64SImode), exec)); + emit_insn (gen_frexpv64df_exp2 (target, arg)); return target; } case GCN_BUILTIN_FREXPV_MANT: { if (ignore) return target; - rtx exec = gcn_full_exec_reg (); rtx arg = force_reg (V64DFmode, expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, V64DFmode, EXPAND_NORMAL)); - emit_insn (gen_frexpv64df_mant2_exec - (target, arg, gcn_gen_undef (V64DFmode), exec)); + emit_insn (gen_frexpv64df_mant2 (target, arg)); return target; } case GCN_BUILTIN_OMP_DIM_SIZE: @@ -4239,10 +4438,11 @@ gcn_vectorize_get_mask_mode (machine_mode) Helper function for gcn_vectorize_vec_perm_const. */ static rtx -gcn_make_vec_perm_address (unsigned int *perm) +gcn_make_vec_perm_address (unsigned int *perm, int nelt) { - rtx x = gen_reg_rtx (V64SImode); - emit_move_insn (x, gcn_vec_constant (V64SImode, 0)); + machine_mode mode = VnMODE (nelt, SImode); + rtx x = gen_reg_rtx (mode); + emit_move_insn (x, gcn_vec_constant (mode, 0)); /* Permutation addresses use byte addressing. With each vector lane being 4 bytes wide, and with 64 lanes in total, only bits 2..7 are significant, @@ -4258,15 +4458,13 @@ gcn_make_vec_perm_address (unsigned int *perm) { uint64_t exec_mask = 0; uint64_t lane_mask = 1; - for (int j = 0; j < 64; j++, lane_mask <<= 1) - if ((perm[j] * 4) & bit_mask) + for (int j = 0; j < nelt; j++, lane_mask <<= 1) + if (((perm[j] % nelt) * 4) & bit_mask) exec_mask |= lane_mask; if (exec_mask) - emit_insn (gen_addv64si3_exec (x, x, - gcn_vec_constant (V64SImode, - bit_mask), - x, get_exec (exec_mask))); + emit_insn (gen_addvNsi3 (x, x, gcn_vec_constant (mode, bit_mask), + x, get_exec (exec_mask))); } return x; @@ -4336,39 +4534,11 @@ gcn_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode, src1_lanes |= lane_bit; } - rtx addr = gcn_make_vec_perm_address (perm); - rtx (*ds_bpermute) (rtx, rtx, rtx, rtx); - - switch (vmode) - { - case E_V64QImode: - ds_bpermute = gen_ds_bpermutev64qi; - break; - case E_V64HImode: - ds_bpermute = gen_ds_bpermutev64hi; - break; - case E_V64SImode: - ds_bpermute = gen_ds_bpermutev64si; - break; - case E_V64HFmode: - ds_bpermute = gen_ds_bpermutev64hf; - break; - case E_V64SFmode: - ds_bpermute = gen_ds_bpermutev64sf; - break; - case E_V64DImode: - ds_bpermute = gen_ds_bpermutev64di; - break; - case E_V64DFmode: - ds_bpermute = gen_ds_bpermutev64df; - break; - default: - gcc_assert (false); - } + rtx addr = gcn_make_vec_perm_address (perm, nelt); /* Load elements from src0 to dst. */ - gcc_assert (~src1_lanes); - emit_insn (ds_bpermute (dst, addr, src0, gcn_full_exec_reg ())); + gcc_assert ((~src1_lanes) & (0xffffffffffffffffUL > (64-nelt))); + emit_insn (gen_ds_bpermutevNm (dst, addr, src0, get_exec (vmode))); /* Load elements from src1 to dst. */ if (src1_lanes) @@ -4379,8 +4549,8 @@ gcn_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode, the two source vectors together. */ rtx tmp = gen_reg_rtx (vmode); - emit_insn (ds_bpermute (tmp, addr, src1, gcn_full_exec_reg ())); - emit_insn (gen_mov_with_exec (dst, tmp, get_exec (src1_lanes))); + emit_insn (gen_ds_bpermutevNm (tmp, addr, src1, get_exec (vmode))); + emit_insn (gen_movvNm (dst, tmp, dst, get_exec (src1_lanes))); } return true; @@ -4396,7 +4566,22 @@ gcn_vector_mode_supported_p (machine_mode mode) { return (mode == V64QImode || mode == V64HImode || mode == V64SImode || mode == V64DImode - || mode == V64SFmode || mode == V64DFmode); + || mode == V64SFmode || mode == V64DFmode + || mode == V32QImode || mode == V32HImode + || mode == V32SImode || mode == V32DImode + || mode == V32SFmode || mode == V32DFmode + || mode == V16QImode || mode == V16HImode + || mode == V16SImode || mode == V16DImode + || mode == V16SFmode || mode == V16DFmode + || mode == V8QImode || mode == V8HImode + || mode == V8SImode || mode == V8DImode + || mode == V8SFmode || mode == V8DFmode + || mode == V4QImode || mode == V4HImode + || mode == V4SImode || mode == V4DImode + || mode == V4SFmode || mode == V4DFmode + || mode == V2QImode || mode == V2HImode + || mode == V2SImode || mode == V2DImode + || mode == V2SFmode || mode == V2DFmode); } /* Implement TARGET_VECTORIZE_PREFERRED_SIMD_MODE. @@ -4425,23 +4610,74 @@ gcn_vectorize_preferred_simd_mode (scalar_mode mode) } } +/* Implement TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES. + + Try all the vector modes. */ + +unsigned int gcn_autovectorize_vector_modes (vector_modes *modes, + bool ARG_UNUSED (all)) +{ + modes->safe_push (V64QImode); + modes->safe_push (V64HImode); + modes->safe_push (V64SImode); + modes->safe_push (V64SFmode); + modes->safe_push (V64DImode); + modes->safe_push (V64DFmode); + + modes->safe_push (V32QImode); + modes->safe_push (V32HImode); + modes->safe_push (V32SImode); + modes->safe_push (V32SFmode); + modes->safe_push (V32DImode); + modes->safe_push (V32DFmode); + + modes->safe_push (V16QImode); + modes->safe_push (V16HImode); + modes->safe_push (V16SImode); + modes->safe_push (V16SFmode); + modes->safe_push (V16DImode); + modes->safe_push (V16DFmode); + + modes->safe_push (V8QImode); + modes->safe_push (V8HImode); + modes->safe_push (V8SImode); + modes->safe_push (V8SFmode); + modes->safe_push (V8DImode); + modes->safe_push (V8DFmode); + + modes->safe_push (V4QImode); + modes->safe_push (V4HImode); + modes->safe_push (V4SImode); + modes->safe_push (V4SFmode); + modes->safe_push (V4DImode); + modes->safe_push (V4DFmode); + + modes->safe_push (V2QImode); + modes->safe_push (V2HImode); + modes->safe_push (V2SImode); + modes->safe_push (V2SFmode); + modes->safe_push (V2DImode); + modes->safe_push (V2DFmode); + + /* We shouldn't need VECT_COMPARE_COSTS as they should all cost the same. */ + return 0; +} + /* Implement TARGET_VECTORIZE_RELATED_MODE. All GCN vectors are 64-lane, so this is simpler than other architectures. In particular, we do *not* want to match vector bit-size. */ static opt_machine_mode -gcn_related_vector_mode (machine_mode ARG_UNUSED (vector_mode), +gcn_related_vector_mode (machine_mode vector_mode, scalar_mode element_mode, poly_uint64 nunits) { - if (known_ne (nunits, 0U) && known_ne (nunits, 64U)) - return VOIDmode; + int n = nunits.to_constant (); - machine_mode pref_mode = gcn_vectorize_preferred_simd_mode (element_mode); - if (!VECTOR_MODE_P (pref_mode)) - return VOIDmode; + if (n == 0) + n = GET_MODE_NUNITS (vector_mode); - return pref_mode; + return VnMODE (n, element_mode); } /* Implement TARGET_VECTORIZE_PREFERRED_VECTOR_ALIGNMENT. @@ -4566,6 +4802,8 @@ gcn_expand_dpp_shr_insn (machine_mode mode, const char *insn, The vector register SRC of mode MODE is reduced using the operation given by UNSPEC, and the scalar result is returned in lane 63 of a vector register. */ +/* FIXME: Implement reductions for sizes other than V64. + (They're currently disabled in the machine description.) */ rtx gcn_expand_reduc_scalar (machine_mode mode, rtx src, int unspec) @@ -4975,10 +5213,11 @@ gcn_md_reorg (void) { if (VECTOR_MODE_P (GET_MODE (x))) { - new_exec = -1; - break; + int vf = GET_MODE_NUNITS (GET_MODE (x)); + new_exec = MAX ((uint64_t)new_exec, + 0xffffffffffffffffUL >> (64-vf)); } - else + else if (new_exec == 0) new_exec = 1; } } @@ -5693,13 +5932,12 @@ static void print_reg (FILE *file, rtx x) { machine_mode mode = GET_MODE (x); + if (VECTOR_MODE_P (mode)) + mode = GET_MODE_INNER (mode); if (mode == BImode || mode == QImode || mode == HImode || mode == SImode - || mode == HFmode || mode == SFmode - || mode == V64SFmode || mode == V64SImode - || mode == V64QImode || mode == V64HImode) + || mode == HFmode || mode == SFmode) fprintf (file, "%s", reg_names[REGNO (x)]); - else if (mode == DImode || mode == V64DImode - || mode == DFmode || mode == V64DFmode) + else if (mode == DImode || mode == DFmode) { if (SGPR_REGNO_P (REGNO (x))) fprintf (file, "s[%i:%i]", REGNO (x) - FIRST_SGPR_REG, @@ -6146,20 +6384,20 @@ print_operand (FILE *file, rtx x, int code) case 'o': { const char *s = 0; - switch (GET_MODE_SIZE (GET_MODE (x))) + machine_mode mode = GET_MODE (x); + if (VECTOR_MODE_P (mode)) + mode = GET_MODE_INNER (mode); + + switch (mode) { - case 1: + case E_QImode: s = "_ubyte"; break; - case 2: + case E_HImode: + case E_HFmode: s = "_ushort"; break; - /* The following are full-vector variants. */ - case 64: - s = "_ubyte"; - break; - case 128: - s = "_ushort"; + default: break; } @@ -6174,43 +6412,31 @@ print_operand (FILE *file, rtx x, int code) } case 's': { - const char *s = ""; - switch (GET_MODE_SIZE (GET_MODE (x))) + const char *s; + machine_mode mode = GET_MODE (x); + if (VECTOR_MODE_P (mode)) + mode = GET_MODE_INNER (mode); + + switch (mode) { - case 1: + case E_QImode: s = "_byte"; break; - case 2: + case E_HImode: + case E_HFmode: s = "_short"; break; - case 4: + case E_SImode: + case E_SFmode: s = "_dword"; break; - case 8: + case E_DImode: + case E_DFmode: s = "_dwordx2"; break; - case 12: - s = "_dwordx3"; - break; - case 16: + case E_TImode: s = "_dwordx4"; break; - case 32: - s = "_dwordx8"; - break; - case 64: - s = VECTOR_MODE_P (GET_MODE (x)) ? "_byte" : "_dwordx16"; - break; - /* The following are full-vector variants. */ - case 128: - s = "_short"; - break; - case 256: - s = "_dword"; - break; - case 512: - s = "_dwordx2"; - break; default: output_operand_lossage ("invalid operand %%xn code"); return; @@ -6714,6 +6940,9 @@ gcn_dwarf_register_span (rtx rtl) #define TARGET_ASM_TRAMPOLINE_TEMPLATE gcn_asm_trampoline_template #undef TARGET_ATTRIBUTE_TABLE #define TARGET_ATTRIBUTE_TABLE gcn_attribute_table +#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES +#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \ + gcn_autovectorize_vector_modes #undef TARGET_BUILTIN_DECL #define TARGET_BUILTIN_DECL gcn_builtin_decl #undef TARGET_CAN_CHANGE_MODE_CLASS