diff --git a/gcc/config/gcn/constraints.md b/gcc/config/gcn/constraints.md
new file mode 100644
index 0000000..326bbc2
--- /dev/null
+++ b/gcc/config/gcn/constraints.md
@@ -0,0 +1,139 @@
+;; Constraint definitions for GCN.
+;; Copyright (C) 2016-2018 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; .
+
+(define_constraint "I"
+ "Inline integer constant"
+ (and (match_code "const_int")
+ (match_test "ival >= -16 && ival <= 64")))
+
+(define_constraint "J"
+ "Signed integer 16-bit inline constant"
+ (and (match_code "const_int")
+ (match_test "((unsigned HOST_WIDE_INT) ival + 0x8000) < 0x10000")))
+
+(define_constraint "Kf"
+ "Immeditate constant -1"
+ (and (match_code "const_int")
+ (match_test "ival == -1")))
+
+(define_constraint "L"
+ "Unsigned integer 15-bit constant"
+ (and (match_code "const_int")
+ (match_test "((unsigned HOST_WIDE_INT) ival) < 0x8000")))
+
+(define_constraint "A"
+ "Inline immediate parameter"
+ (and (match_code "const_int,const_double,const_vector")
+ (match_test "gcn_inline_constant_p (op)")))
+
+(define_constraint "B"
+ "Immediate 32-bit parameter"
+ (and (match_code "const_int,const_double,const_vector")
+ (match_test "gcn_constant_p (op)")))
+
+(define_constraint "C"
+ "Immediate 32-bit parameter zero-extended to 64-bits"
+ (and (match_code "const_int,const_double,const_vector")
+ (match_test "gcn_constant64_p (op)")))
+
+(define_constraint "DA"
+ "Splittable inline immediate 64-bit parameter"
+ (and (match_code "const_int,const_double,const_vector")
+ (match_test "gcn_inline_constant64_p (op)")))
+
+(define_constraint "DB"
+ "Splittable immediate 64-bit parameter"
+ (match_code "const_int,const_double,const_vector"))
+
+(define_constraint "U"
+ "unspecified value"
+ (match_code "unspec"))
+
+(define_constraint "Y"
+ "Symbol or label for relative calls"
+ (match_code "symbol_ref,label_ref"))
+
+(define_register_constraint "v" "VGPR_REGS"
+ "VGPR registers")
+
+(define_register_constraint "Sg" "SGPR_REGS"
+ "SGPR registers")
+
+(define_register_constraint "SD" "SGPR_DST_REGS"
+ "registers useable as a destination of scalar operation")
+
+(define_register_constraint "SS" "SGPR_SRC_REGS"
+ "registers useable as a source of scalar operation")
+
+(define_register_constraint "Sm" "SGPR_MEM_SRC_REGS"
+ "registers useable as a source of scalar memory operation")
+
+(define_register_constraint "Sv" "SGPR_VOP_SRC_REGS"
+ "registers useable as a source of VOP3A instruction")
+
+(define_register_constraint "ca" "ALL_CONDITIONAL_REGS"
+ "SCC VCCZ or EXECZ")
+
+(define_register_constraint "cs" "SCC_CONDITIONAL_REG"
+ "SCC")
+
+(define_register_constraint "cV" "VCC_CONDITIONAL_REG"
+ "VCC")
+
+(define_register_constraint "e" "EXEC_MASK_REG"
+ "EXEC")
+
+(define_special_memory_constraint "RB"
+ "Buffer memory address to scratch memory."
+ (and (match_code "mem")
+ (match_test "AS_SCRATCH_P (MEM_ADDR_SPACE (op))")))
+
+(define_special_memory_constraint "RF"
+ "Buffer memory address to flat memory."
+ (and (match_code "mem")
+ (match_test "AS_FLAT_P (MEM_ADDR_SPACE (op))
+ && gcn_flat_address_p (XEXP (op, 0), mode)")))
+
+(define_special_memory_constraint "RS"
+ "Buffer memory address to scalar flat memory."
+ (and (match_code "mem")
+ (match_test "AS_SCALAR_FLAT_P (MEM_ADDR_SPACE (op))
+ && gcn_scalar_flat_mem_p (op)")))
+
+(define_special_memory_constraint "RL"
+ "Buffer memory address to LDS memory."
+ (and (match_code "mem")
+ (match_test "AS_LDS_P (MEM_ADDR_SPACE (op))")))
+
+(define_special_memory_constraint "RG"
+ "Buffer memory address to GDS memory."
+ (and (match_code "mem")
+ (match_test "AS_GDS_P (MEM_ADDR_SPACE (op))")))
+
+(define_special_memory_constraint "RD"
+ "Buffer memory address to GDS or LDS memory."
+ (and (match_code "mem")
+ (ior (match_test "AS_GDS_P (MEM_ADDR_SPACE (op))")
+ (match_test "AS_LDS_P (MEM_ADDR_SPACE (op))"))))
+
+(define_special_memory_constraint "RM"
+ "Memory address to global (main) memory."
+ (and (match_code "mem")
+ (match_test "AS_GLOBAL_P (MEM_ADDR_SPACE (op))
+ && gcn_global_address_p (XEXP (op, 0))")))
diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
new file mode 100644
index 0000000..18907b6
--- /dev/null
+++ b/gcc/config/gcn/gcn-valu.md
@@ -0,0 +1,3048 @@
+;; Copyright (C) 2016-2018 Free Software Foundation, Inc.
+
+;; This file is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3 of the License, or (at your option)
+;; any later version.
+
+;; This file is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+;; for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; .
+
+;; {{{ Vector iterators
+
+; Vector modes for one vector register
+(define_mode_iterator VEC_1REG_MODE
+ [V64QI V64HI V64SI V64HF V64SF])
+(define_mode_iterator VEC_1REG_ALT
+ [V64QI V64HI V64SI V64HF V64SF])
+
+(define_mode_iterator VEC_1REG_INT_MODE
+ [V64QI V64HI V64SI])
+(define_mode_iterator VEC_1REG_INT_ALT
+ [V64QI V64HI V64SI])
+
+; Vector modes for two vector registers
+(define_mode_iterator VEC_2REG_MODE
+ [V64DI V64DF])
+
+; All of above
+(define_mode_iterator VEC_REG_MODE
+ [V64QI V64HI V64SI V64HF V64SF ; Single reg
+ V64DI V64DF]) ; Double reg
+
+(define_mode_attr scalar_mode
+ [(V64QI "qi") (V64HI "hi") (V64SI "si")
+ (V64HF "hf") (V64SF "sf") (V64DI "di") (V64DF "df")])
+
+(define_mode_attr SCALAR_MODE
+ [(V64QI "QI") (V64HI "HI") (V64SI "SI")
+ (V64HF "HF") (V64SF "SF") (V64DI "DI") (V64DF "DF")])
+
+;; }}}
+;; {{{ Substitutions
+
+(define_subst_attr "exec" "vec_merge"
+ "" "_exec")
+(define_subst_attr "exec_clobber" "vec_merge_with_clobber"
+ "" "_exec")
+(define_subst_attr "exec_vcc" "vec_merge_with_vcc"
+ "" "_exec")
+(define_subst_attr "exec_scatter" "scatter_store"
+ "" "_exec")
+
+(define_subst "vec_merge"
+ [(set (match_operand:VEC_REG_MODE 0)
+ (match_operand:VEC_REG_MODE 1))]
+ ""
+ [(set (match_dup 0)
+ (vec_merge:VEC_REG_MODE
+ (match_dup 1)
+ (match_operand:VEC_REG_MODE 3 "gcn_register_or_unspec_operand" "U0")
+ (match_operand:DI 4 "gcn_exec_reg_operand" "e")))])
+
+(define_subst "vec_merge_with_clobber"
+ [(set (match_operand:VEC_REG_MODE 0)
+ (match_operand:VEC_REG_MODE 1))
+ (clobber (match_operand 2))]
+ ""
+ [(set (match_dup 0)
+ (vec_merge:VEC_REG_MODE
+ (match_dup 1)
+ (match_operand:VEC_REG_MODE 3 "gcn_register_or_unspec_operand" "U0")
+ (match_operand:DI 4 "gcn_exec_reg_operand" "e")))
+ (clobber (match_dup 2))])
+
+(define_subst "vec_merge_with_vcc"
+ [(set (match_operand:VEC_REG_MODE 0)
+ (match_operand:VEC_REG_MODE 1))
+ (set (match_operand:DI 2)
+ (match_operand:DI 3))]
+ ""
+ [(parallel
+ [(set (match_dup 0)
+ (vec_merge:VEC_REG_MODE
+ (match_dup 1)
+ (match_operand:VEC_REG_MODE 4
+ "gcn_register_or_unspec_operand" "U0")
+ (match_operand:DI 5 "gcn_exec_reg_operand" "e")))
+ (set (match_dup 2)
+ (and:DI (match_dup 3)
+ (reg:DI EXEC_REG)))])])
+
+(define_subst "scatter_store"
+ [(set (mem:BLK (scratch))
+ (unspec:BLK
+ [(match_operand 0)
+ (match_operand 1)
+ (match_operand 2)
+ (match_operand 3)]
+ UNSPEC_SCATTER))]
+ ""
+ [(set (mem:BLK (scratch))
+ (unspec:BLK
+ [(match_dup 0)
+ (match_dup 1)
+ (match_dup 2)
+ (match_dup 3)
+ (match_operand:DI 4 "gcn_exec_reg_operand" "e")]
+ UNSPEC_SCATTER))])
+
+;; }}}
+;; {{{ Vector moves
+
+; This is the entry point for all vector register moves. Memory accesses can
+; come this way also, but will more usually use the reload_in/out,
+; gather/scatter, maskload/store, etc.
+
+(define_expand "mov"
+ [(set (match_operand:VEC_REG_MODE 0 "nonimmediate_operand")
+ (match_operand:VEC_REG_MODE 1 "general_operand"))]
+ ""
+ {
+ if (MEM_P (operands[0]) && !lra_in_progress && !reload_completed)
+ {
+ operands[1] = force_reg (mode, operands[1]);
+ rtx scratch = gen_rtx_SCRATCH (V64DImode);
+ rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
+ rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
+ rtx expr = gcn_expand_scalar_to_vector_address (mode, NULL,
+ operands[0],
+ scratch);
+ emit_insn (gen_scatter_expr (expr, operands[1], a, v));
+ DONE;
+ }
+ else if (MEM_P (operands[1]) && !lra_in_progress && !reload_completed)
+ {
+ rtx scratch = gen_rtx_SCRATCH (V64DImode);
+ rtx a = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
+ rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
+ rtx expr = gcn_expand_scalar_to_vector_address (mode, NULL,
+ operands[1],
+ scratch);
+ emit_insn (gen_gather_expr (operands[0], expr, a, v));
+ DONE;
+ }
+ else if ((MEM_P (operands[0]) || MEM_P (operands[1])))
+ {
+ gcc_assert (!reload_completed);
+ rtx scratch = gen_reg_rtx (V64DImode);
+ emit_insn (gen_mov_sgprbase (operands[0], operands[1], scratch));
+ DONE;
+ }
+ })
+
+; A pseudo instruction that helps LRA use the "U0" constraint.
+
+(define_insn "mov_unspec"
+ [(set (match_operand:VEC_REG_MODE 0 "nonimmediate_operand" "=v")
+ (match_operand:VEC_REG_MODE 1 "gcn_unspec_operand" " U"))]
+ ""
+ ""
+ [(set_attr "type" "unknown")
+ (set_attr "length" "0")])
+
+(define_insn "*mov"
+ [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand" "=v,v")
+ (match_operand:VEC_1REG_MODE 1 "general_operand" "vA,B"))]
+ ""
+ "v_mov_b32\t%0, %1"
+ [(set_attr "type" "vop1,vop1")
+ (set_attr "length" "4,8")])
+
+(define_insn "mov_exec"
+ [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand"
+ "=v, v, v, v, v, m")
+ (vec_merge:VEC_1REG_MODE
+ (match_operand:VEC_1REG_MODE 1 "general_operand"
+ "vA, B, v,vA, m, v")
+ (match_operand:VEC_1REG_MODE 3 "gcn_alu_or_unspec_operand"
+ "U0,U0,vA,vA,U0,U0")
+ (match_operand:DI 2 "register_operand" " e, e,cV,Sv, e, e")))
+ (clobber (match_scratch:V64DI 4 "=X, X, X, X,&v,&v"))]
+ "!MEM_P (operands[0]) || REG_P (operands[1])"
+ "@
+ v_mov_b32\t%0, %1
+ v_mov_b32\t%0, %1
+ v_cndmask_b32\t%0, %3, %1, vcc
+ v_cndmask_b32\t%0, %3, %1, %2
+ #
+ #"
+ [(set_attr "type" "vop1,vop1,vop2,vop3a,*,*")
+ (set_attr "length" "4,8,4,8,16,16")])
+
+; This variant does not accept an unspec, but does permit MEM
+; read/modify/write which is necessary for maskstore.
+
+;(define_insn "*mov_exec_match"
+; [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand" "=v,v, v, m")
+; (vec_merge:VEC_1REG_MODE
+; (match_operand:VEC_1REG_MODE 1 "general_operand" "vA,B, m, v")
+; (match_dup 0)
+; (match_operand:DI 2 "gcn_exec_reg_operand" " e,e, e, e")))
+; (clobber (match_scratch:V64DI 3 "=X,X,&v,&v"))]
+; "!MEM_P (operands[0]) || REG_P (operands[1])"
+; "@
+; v_mov_b32\t%0, %1
+; v_mov_b32\t%0, %1
+; #
+; #"
+; [(set_attr "type" "vop1,vop1,*,*")
+; (set_attr "length" "4,8,16,16")])
+
+(define_insn "*mov"
+ [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "=v")
+ (match_operand:VEC_2REG_MODE 1 "general_operand" "vDB"))]
+ ""
+ {
+ if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
+ return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
+ else
+ return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
+ }
+ [(set_attr "type" "vmult")
+ (set_attr "length" "16")])
+
+(define_insn "mov_exec"
+ [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand"
+ "= v, v, v, v, m")
+ (vec_merge:VEC_2REG_MODE
+ (match_operand:VEC_2REG_MODE 1 "general_operand"
+ "vDB, v0, v0, m, v")
+ (match_operand:VEC_2REG_MODE 3 "gcn_alu_or_unspec_operand"
+ " U0,vDA0,vDA0,U0,U0")
+ (match_operand:DI 2 "register_operand" " e, cV, Sv, e, e")))
+ (clobber (match_scratch:V64DI 4 "= X, X, X,&v,&v"))]
+ "!MEM_P (operands[0]) || REG_P (operands[1])"
+ {
+ if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1]))
+ switch (which_alternative)
+ {
+ case 0:
+ return "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1";
+ case 1:
+ return "v_cndmask_b32\t%L0, %L3, %L1, vcc\;"
+ "v_cndmask_b32\t%H0, %H3, %H1, vcc";
+ case 2:
+ return "v_cndmask_b32\t%L0, %L3, %L1, %2\;"
+ "v_cndmask_b32\t%H0, %H3, %H1, %2";
+ }
+ else
+ switch (which_alternative)
+ {
+ case 0:
+ return "v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1";
+ case 1:
+ return "v_cndmask_b32\t%H0, %H3, %H1, vcc\;"
+ "v_cndmask_b32\t%L0, %L3, %L1, vcc";
+ case 2:
+ return "v_cndmask_b32\t%H0, %H3, %H1, %2\;"
+ "v_cndmask_b32\t%L0, %L3, %L1, %2";
+ }
+
+ return "#";
+ }
+ [(set_attr "type" "vmult,vmult,vmult,*,*")
+ (set_attr "length" "16,16,16,16,16")])
+
+; This variant does not accept an unspec, but does permit MEM
+; read/modify/write which is necessary for maskstore.
+
+;(define_insn "*mov_exec_match"
+; [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "=v, v, m")
+; (vec_merge:VEC_2REG_MODE
+; (match_operand:VEC_2REG_MODE 1 "general_operand" "vDB, m, v")
+; (match_dup 0)
+; (match_operand:DI 2 "gcn_exec_reg_operand" " e, e, e")))
+; (clobber (match_scratch:V64DI 3 "=X,&v,&v"))]
+; "!MEM_P (operands[0]) || REG_P (operands[1])"
+; "@
+; * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
+; return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
+; else \
+; return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
+; #
+; #"
+; [(set_attr "type" "vmult,*,*")
+; (set_attr "length" "16,16,16")])
+
+; A SGPR-base load looks like:
+; v, Sv
+;
+; There's no hardware instruction that corresponds to this, but vector base
+; addresses are placed in an SGPR because it is easier to add to a vector.
+; We also have a temporary vT, and the vector v1 holding numbered lanes.
+;
+; Rewrite as:
+; vT = v1 << log2(element-size)
+; vT += Sv
+; flat_load v, vT
+
+(define_insn "mov_sgprbase"
+ [(set (match_operand:VEC_1REG_MODE 0 "nonimmediate_operand" "= v, v, v, m")
+ (unspec:VEC_1REG_MODE
+ [(match_operand:VEC_1REG_MODE 1 "general_operand" " vA,vB, m, v")]
+ UNSPEC_SGPRBASE))
+ (clobber (match_operand:V64DI 2 "register_operand" "=&v,&v,&v,&v"))]
+ "lra_in_progress || reload_completed"
+ "@
+ v_mov_b32\t%0, %1
+ v_mov_b32\t%0, %1
+ #
+ #"
+ [(set_attr "type" "vop1,vop1,*,*")
+ (set_attr "length" "4,8,12,12")])
+
+(define_insn "mov_sgprbase"
+ [(set (match_operand:VEC_2REG_MODE 0 "nonimmediate_operand" "= v, v, m")
+ (unspec:VEC_2REG_MODE
+ [(match_operand:VEC_2REG_MODE 1 "general_operand" "vDB, m, v")]
+ UNSPEC_SGPRBASE))
+ (clobber (match_operand:V64DI 2 "register_operand" "=&v,&v,&v"))]
+ "lra_in_progress || reload_completed"
+ "@
+ * if (!REG_P (operands[1]) || REGNO (operands[0]) <= REGNO (operands[1])) \
+ return \"v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1\"; \
+ else \
+ return \"v_mov_b32\t%H0, %H1\;v_mov_b32\t%L0, %L1\";
+ #
+ #"
+ [(set_attr "type" "vmult,*,*")
+ (set_attr "length" "8,12,12")])
+
+; reload_in was once a standard name, but here it's only referenced by
+; gcn_secondary_reload. It allows a reload with a scratch register.
+
+(define_expand "reload_in"
+ [(set (match_operand:VEC_REG_MODE 0 "register_operand" "= v")
+ (match_operand:VEC_REG_MODE 1 "memory_operand" " m"))
+ (clobber (match_operand:V64DI 2 "register_operand" "=&v"))]
+ ""
+ {
+ emit_insn (gen_mov_sgprbase (operands[0], operands[1], operands[2]));
+ DONE;
+ })
+
+; reload_out is similar to reload_in, above.
+
+(define_expand "reload_out"
+ [(set (match_operand:VEC_REG_MODE 0 "memory_operand" "= m")
+ (match_operand:VEC_REG_MODE 1 "register_operand" " v"))
+ (clobber (match_operand:V64DI 2 "register_operand" "=&v"))]
+ ""
+ {
+ emit_insn (gen_mov_sgprbase (operands[0], operands[1], operands[2]));
+ DONE;
+ })
+
+; Expand scalar addresses into gather/scatter patterns
+
+(define_split
+ [(set (match_operand:VEC_REG_MODE 0 "memory_operand")
+ (unspec:VEC_REG_MODE
+ [(match_operand:VEC_REG_MODE 1 "general_operand")]
+ UNSPEC_SGPRBASE))
+ (clobber (match_scratch:V64DI 2))]
+ ""
+ [(set (mem:BLK (scratch))
+ (unspec:BLK [(match_dup 5) (match_dup 1) (match_dup 6) (match_dup 7)]
+ UNSPEC_SCATTER))]
+ {
+ operands[5] = gcn_expand_scalar_to_vector_address (mode, NULL,
+ operands[0],
+ operands[2]);
+ operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
+ operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
+ })
+
+(define_split
+ [(set (match_operand:VEC_REG_MODE 0 "memory_operand")
+ (vec_merge:VEC_REG_MODE
+ (match_operand:VEC_REG_MODE 1 "general_operand")
+ (match_operand:VEC_REG_MODE 2 "")
+ (match_operand:DI 3 "gcn_exec_reg_operand")))
+ (clobber (match_scratch:V64DI 4))]
+ ""
+ [(set (mem:BLK (scratch))
+ (unspec:BLK [(match_dup 5) (match_dup 1)
+ (match_dup 6) (match_dup 7) (match_dup 3)]
+ UNSPEC_SCATTER))]
+ {
+ operands[5] = gcn_expand_scalar_to_vector_address (mode,
+ operands[3],
+ operands[0],
+ operands[4]);
+ operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
+ operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
+ })
+
+(define_split
+ [(set (match_operand:VEC_REG_MODE 0 "nonimmediate_operand")
+ (unspec:VEC_REG_MODE
+ [(match_operand:VEC_REG_MODE 1 "memory_operand")]
+ UNSPEC_SGPRBASE))
+ (clobber (match_scratch:V64DI 2))]
+ ""
+ [(set (match_dup 0)
+ (unspec:VEC_REG_MODE [(match_dup 5) (match_dup 6) (match_dup 7)
+ (mem:BLK (scratch))]
+ UNSPEC_GATHER))]
+ {
+ operands[5] = gcn_expand_scalar_to_vector_address (mode, NULL,
+ operands[1],
+ operands[2]);
+ operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
+ operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
+ })
+
+(define_split
+ [(set (match_operand:VEC_REG_MODE 0 "nonimmediate_operand")
+ (vec_merge:VEC_REG_MODE
+ (match_operand:VEC_REG_MODE 1 "memory_operand")
+ (match_operand:VEC_REG_MODE 2 "")
+ (match_operand:DI 3 "gcn_exec_reg_operand")))
+ (clobber (match_scratch:V64DI 4))]
+ ""
+ [(set (match_dup 0)
+ (vec_merge:VEC_REG_MODE
+ (unspec:VEC_REG_MODE [(match_dup 5) (match_dup 6) (match_dup 7)
+ (mem:BLK (scratch))]
+ UNSPEC_GATHER)
+ (match_dup 2)
+ (match_dup 3)))]
+ {
+ operands[5] = gcn_expand_scalar_to_vector_address (mode,
+ operands[3],
+ operands[1],
+ operands[4]);
+ operands[6] = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
+ operands[7] = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
+ })
+
+; TODO: Add zero/sign extending variants.
+
+;; }}}
+;; {{{ Lane moves
+
+; v_writelane and v_readlane work regardless of exec flags.
+; We allow source to be scratch.
+;
+; FIXME these should take A immediates
+
+(define_insn "*vec_set"
+ [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "= v")
+ (vec_merge:VEC_1REG_MODE
+ (vec_duplicate:VEC_1REG_MODE
+ (match_operand: 1 "register_operand" " Sv"))
+ (match_operand:VEC_1REG_MODE 3 "gcn_register_or_unspec_operand"
+ " U0")
+ (ashift (const_int 1)
+ (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
+ ""
+ "v_writelane_b32 %0, %1, %2"
+ [(set_attr "type" "vop3a")
+ (set_attr "length" "8")
+ (set_attr "exec" "none")
+ (set_attr "laneselect" "yes")])
+
+; FIXME: 64bit operations really should be splitters, but I am not sure how
+; to represent vertical subregs.
+(define_insn "*vec_set"
+ [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "= v")
+ (vec_merge:VEC_2REG_MODE
+ (vec_duplicate:VEC_2REG_MODE
+ (match_operand: 1 "register_operand" " Sv"))
+ (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand"
+ " U0")
+ (ashift (const_int 1)
+ (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
+ ""
+ "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2"
+ [(set_attr "type" "vmult")
+ (set_attr "length" "16")
+ (set_attr "exec" "none")
+ (set_attr "laneselect" "yes")])
+
+(define_expand "vec_set"
+ [(set (match_operand:VEC_REG_MODE 0 "register_operand")
+ (vec_merge:VEC_REG_MODE
+ (vec_duplicate:VEC_REG_MODE
+ (match_operand: 1 "register_operand"))
+ (match_dup 0)
+ (ashift (const_int 1) (match_operand:SI 2 "gcn_alu_operand"))))]
+ "")
+
+(define_insn "*vec_set_1"
+ [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v")
+ (vec_merge:VEC_1REG_MODE
+ (vec_duplicate:VEC_1REG_MODE
+ (match_operand: 1 "register_operand" "Sv"))
+ (match_operand:VEC_1REG_MODE 3 "gcn_register_or_unspec_operand"
+ "U0")
+ (match_operand:SI 2 "const_int_operand" " i")))]
+ "((unsigned) exact_log2 (INTVAL (operands[2])) < 64)"
+ {
+ operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
+ return "v_writelane_b32 %0, %1, %2";
+ }
+ [(set_attr "type" "vop3a")
+ (set_attr "length" "8")
+ (set_attr "exec" "none")
+ (set_attr "laneselect" "yes")])
+
+(define_insn "*vec_set_1"
+ [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "=v")
+ (vec_merge:VEC_2REG_MODE
+ (vec_duplicate:VEC_2REG_MODE
+ (match_operand: 1 "register_operand" "Sv"))
+ (match_operand:VEC_2REG_MODE 3 "gcn_register_or_unspec_operand"
+ "U0")
+ (match_operand:SI 2 "const_int_operand" " i")))]
+ "((unsigned) exact_log2 (INTVAL (operands[2])) < 64)"
+ {
+ operands[2] = GEN_INT (exact_log2 (INTVAL (operands[2])));
+ return "v_writelane_b32 %L0, %L1, %2\;v_writelane_b32 %H0, %H1, %2";
+ }
+ [(set_attr "type" "vmult")
+ (set_attr "length" "16")
+ (set_attr "exec" "none")
+ (set_attr "laneselect" "yes")])
+
+(define_insn "vec_duplicate"
+ [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v")
+ (vec_duplicate:VEC_1REG_MODE
+ (match_operand: 1 "gcn_alu_operand" "SvB")))]
+ ""
+ "v_mov_b32\t%0, %1"
+ [(set_attr "type" "vop3a")
+ (set_attr "length" "8")])
+
+(define_insn "vec_duplicate"
+ [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "= v")
+ (vec_duplicate:VEC_2REG_MODE
+ (match_operand: 1 "gcn_alu_operand" "SvDB")))]
+ ""
+ "v_mov_b32\t%L0, %L1\;v_mov_b32\t%H0, %H1"
+ [(set_attr "type" "vop3a")
+ (set_attr "length" "16")])
+
+(define_insn "vec_extract"
+ [(set (match_operand: 0 "register_operand" "=Sg")
+ (vec_select:
+ (match_operand:VEC_1REG_MODE 1 "register_operand" " v")
+ (parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))]
+ ""
+ "v_readlane_b32 %0, %1, %2"
+ [(set_attr "type" "vop3a")
+ (set_attr "length" "8")
+ (set_attr "exec" "none")
+ (set_attr "laneselect" "yes")])
+
+(define_insn "vec_extract"
+ [(set (match_operand: 0 "register_operand" "=Sg")
+ (vec_select:
+ (match_operand:VEC_2REG_MODE 1 "register_operand" " v")
+ (parallel [(match_operand:SI 2 "gcn_alu_operand" "SvB")])))]
+ ""
+ "v_readlane_b32 %L0, %L1, %2\;v_readlane_b32 %H0, %H1, %2"
+ [(set_attr "type" "vmult")
+ (set_attr "length" "16")
+ (set_attr "exec" "none")
+ (set_attr "laneselect" "yes")])
+
+(define_expand "vec_init"
+ [(match_operand:VEC_REG_MODE 0 "register_operand")
+ (match_operand 1)]
+ ""
+ {
+ gcn_expand_vector_init (operands[0], operands[1]);
+ DONE;
+ })
+
+;; }}}
+;; {{{ Scatter / Gather
+
+;; GCN does not have an instruction for loading a vector from contiguous
+;; memory so *all* loads and stores are eventually converted to scatter
+;; or gather.
+;;
+;; GCC does not permit MEM to hold vectors of addresses, so we must use an
+;; unspec. The unspec formats are as follows:
+;;
+;; (unspec:V64??
+;; [()
+;; ()
+;; ()
+;; (mem:BLK (scratch))]
+;; UNSPEC_GATHER)
+;;
+;; (unspec:BLK
+;; [()
+;; ()
+;; ()
+;; ()
+;; ()]
+;; UNSPEC_SCATTER)
+;;
+;; - Loads are expected to be wrapped in a vec_merge, so do not need .
+;; - The mem:BLK does not contain any real information, but indicates that an
+;; unknown memory read is taking place. Stores are expected to use a similar
+;; mem:BLK outside the unspec.
+;; - The address space and glc (volatile) fields are there to replace the
+;; fields normally found in a MEM.
+;; - Multiple forms of address expression are supported, below.
+
+(define_expand "gather_load"
+ [(match_operand:VEC_REG_MODE 0 "register_operand")
+ (match_operand:DI 1 "register_operand")
+ (match_operand 2 "register_operand")
+ (match_operand 3 "immediate_operand")
+ (match_operand:SI 4 "gcn_alu_operand")]
+ ""
+ {
+ rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
+ operands[2], operands[4],
+ INTVAL (operands[3]), NULL);
+
+ if (GET_MODE (addr) == V64DImode)
+ emit_insn (gen_gather_insn_1offset (operands[0], addr, const0_rtx,
+ const0_rtx, const0_rtx));
+ else
+ emit_insn (gen_gather_insn_2offsets (operands[0], operands[1],
+ addr, const0_rtx, const0_rtx,
+ const0_rtx));
+ DONE;
+ })
+
+(define_expand "gather_exec"
+ [(match_operand:VEC_REG_MODE 0 "register_operand")
+ (match_operand:DI 1 "register_operand")
+ (match_operand:V64SI 2 "register_operand")
+ (match_operand 3 "immediate_operand")
+ (match_operand:SI 4 "gcn_alu_operand")
+ (match_operand:DI 5 "gcn_exec_reg_operand")]
+ ""
+ {
+ rtx undefmode = gcn_gen_undef (mode);
+
+ rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[1],
+ operands[2], operands[4],
+ INTVAL (operands[3]), operands[5]);
+
+ if (GET_MODE (addr) == V64DImode)
+ emit_insn (gen_gather_insn_1offset_exec (operands[0], addr,
+ const0_rtx, const0_rtx,
+ const0_rtx, undefmode,
+ operands[5]));
+ else
+ emit_insn (gen_gather_insn_2offsets_exec (operands[0], operands[1],
+ addr, const0_rtx,
+ const0_rtx, const0_rtx,
+ undefmode, operands[5]));
+ DONE;
+ })
+
+; Allow any address expression
+(define_expand "gather_expr"
+ [(set (match_operand:VEC_REG_MODE 0 "register_operand")
+ (unspec:VEC_REG_MODE
+ [(match_operand 1 "")
+ (match_operand 2 "immediate_operand")
+ (match_operand 3 "immediate_operand")
+ (mem:BLK (scratch))]
+ UNSPEC_GATHER))]
+ ""
+ {})
+
+(define_insn "gather_insn_1offset"
+ [(set (match_operand:VEC_REG_MODE 0 "register_operand" "=v")
+ (unspec:VEC_REG_MODE
+ [(plus:V64DI (match_operand:V64DI 1 "register_operand" " v")
+ (vec_duplicate:V64DI
+ (match_operand 2 "immediate_operand" " n")))
+ (match_operand 3 "immediate_operand" " n")
+ (match_operand 4 "immediate_operand" " n")
+ (mem:BLK (scratch))]
+ UNSPEC_GATHER))]
+ "(AS_FLAT_P (INTVAL (operands[3]))
+ && ((TARGET_GCN3 && INTVAL(operands[2]) == 0)
+ || ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x1000)))
+ || (AS_GLOBAL_P (INTVAL (operands[3]))
+ && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
+ {
+ addr_space_t as = INTVAL (operands[3]);
+ const char *glc = INTVAL (operands[4]) ? " glc" : "";
+
+ static char buf[200];
+ if (AS_FLAT_P (as))
+ {
+ if (TARGET_GCN5_PLUS)
+ sprintf (buf, "flat_load%%s0\t%%0, %%1 offset:%%2%s\;s_waitcnt\t0",
+ glc);
+ else
+ sprintf (buf, "flat_load%%s0\t%%0, %%1%s\;s_waitcnt\t0", glc);
+ }
+ else if (AS_GLOBAL_P (as))
+ sprintf (buf, "global_load%%s0\t%%0, %%1, off offset:%%2%s\;"
+ "s_waitcnt\tvmcnt(0)", glc);
+ else
+ gcc_unreachable ();
+
+ return buf;
+ }
+ [(set_attr "type" "flat")
+ (set_attr "length" "12")])
+
+(define_insn "gather_insn_1offset_ds"
+ [(set (match_operand:VEC_REG_MODE 0 "register_operand" "=v")
+ (unspec:VEC_REG_MODE
+ [(plus:V64SI (match_operand:V64SI 1 "register_operand" " v")
+ (vec_duplicate:V64SI
+ (match_operand 2 "immediate_operand" " n")))
+ (match_operand 3 "immediate_operand" " n")
+ (match_operand 4 "immediate_operand" " n")
+ (mem:BLK (scratch))]
+ UNSPEC_GATHER))]
+ "(AS_ANY_DS_P (INTVAL (operands[3]))
+ && ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x10000))"
+ {
+ addr_space_t as = INTVAL (operands[3]);
+ static char buf[200];
+ sprintf (buf, "ds_read%%b0\t%%0, %%1 offset:%%2%s\;s_waitcnt\tlgkmcnt(0)",
+ (AS_GDS_P (as) ? " gds" : ""));
+ return buf;
+ }
+ [(set_attr "type" "ds")
+ (set_attr "length" "12")])
+
+(define_insn "gather_insn_2offsets"
+ [(set (match_operand:VEC_REG_MODE 0 "register_operand" "=v")
+ (unspec:VEC_REG_MODE
+ [(plus:V64DI
+ (plus:V64DI
+ (vec_duplicate:V64DI
+ (match_operand:DI 1 "register_operand" "Sv"))
+ (sign_extend:V64DI
+ (match_operand:V64SI 2 "register_operand" " v")))
+ (vec_duplicate:V64DI (match_operand 3 "immediate_operand" " n")))
+ (match_operand 4 "immediate_operand" " n")
+ (match_operand 5 "immediate_operand" " n")
+ (mem:BLK (scratch))]
+ UNSPEC_GATHER))]
+ "(AS_GLOBAL_P (INTVAL (operands[4]))
+ && (((unsigned HOST_WIDE_INT)INTVAL(operands[3]) + 0x1000) < 0x2000))"
+ {
+ addr_space_t as = INTVAL (operands[4]);
+ const char *glc = INTVAL (operands[5]) ? " glc" : "";
+
+ static char buf[200];
+ if (AS_GLOBAL_P (as))
+ {
+ /* Work around assembler bug in which a 64-bit register is expected,
+ but a 32-bit value would be correct. */
+ int reg = REGNO (operands[2]) - FIRST_VGPR_REG;
+ sprintf (buf, "global_load%%s0\t%%0, v[%d:%d], %%1 offset:%%3%s\;"
+ "s_waitcnt\tvmcnt(0)", reg, reg + 1, glc);
+ }
+ else
+ gcc_unreachable ();
+
+ return buf;
+ }
+ [(set_attr "type" "flat")
+ (set_attr "length" "12")])
+
+(define_expand "scatter_store"
+ [(match_operand:DI 0 "register_operand")
+ (match_operand 1 "register_operand")
+ (match_operand 2 "immediate_operand")
+ (match_operand:SI 3 "gcn_alu_operand")
+ (match_operand:VEC_REG_MODE 4 "register_operand")]
+ ""
+ {
+ rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
+ operands[1], operands[3],
+ INTVAL (operands[2]), NULL);
+
+ if (GET_MODE (addr) == V64DImode)
+ emit_insn (gen_scatter_insn_1offset (addr, const0_rtx, operands[4],
+ const0_rtx, const0_rtx));
+ else
+ emit_insn (gen_scatter_insn_2offsets (operands[0], addr,
+ const0_rtx, operands[4],
+ const0_rtx, const0_rtx));
+ DONE;
+ })
+
+(define_expand "scatter_exec"
+ [(match_operand:DI 0 "register_operand")
+ (match_operand 1 "register_operand")
+ (match_operand 2 "immediate_operand")
+ (match_operand:SI 3 "gcn_alu_operand")
+ (match_operand:VEC_REG_MODE 4 "register_operand")
+ (match_operand:DI 5 "gcn_exec_reg_operand")]
+ ""
+ {
+ operands[5] = force_reg (DImode, operands[5]);
+
+ rtx addr = gcn_expand_scaled_offsets (DEFAULT_ADDR_SPACE, operands[0],
+ operands[1], operands[3],
+ INTVAL (operands[2]), operands[5]);
+
+ if (GET_MODE (addr) == V64DImode)
+ emit_insn (gen_scatter_insn_1offset_exec (addr, const0_rtx,
+ operands[4], const0_rtx,
+ const0_rtx,
+ operands[5]));
+ else
+ emit_insn (gen_scatter_insn_2offsets_exec (operands[0], addr,
+ const0_rtx, operands[4],
+ const0_rtx, const0_rtx,
+ operands[5]));
+ DONE;
+ })
+
+; Allow any address expression
+(define_expand "scatter_expr"
+ [(set (mem:BLK (scratch))
+ (unspec:BLK
+ [(match_operand:V64DI 0 "")
+ (match_operand:VEC_REG_MODE 1 "register_operand")
+ (match_operand 2 "immediate_operand")
+ (match_operand 3 "immediate_operand")]
+ UNSPEC_SCATTER))]
+ ""
+ {})
+
+(define_insn "scatter_insn_1offset"
+ [(set (mem:BLK (scratch))
+ (unspec:BLK
+ [(plus:V64DI (match_operand:V64DI 0 "register_operand" "v")
+ (vec_duplicate:V64DI
+ (match_operand 1 "immediate_operand" "n")))
+ (match_operand:VEC_REG_MODE 2 "register_operand" "v")
+ (match_operand 3 "immediate_operand" "n")
+ (match_operand 4 "immediate_operand" "n")]
+ UNSPEC_SCATTER))]
+ "(AS_FLAT_P (INTVAL (operands[3]))
+ && (INTVAL(operands[1]) == 0
+ || (TARGET_GCN5_PLUS
+ && (unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x1000)))
+ || (AS_GLOBAL_P (INTVAL (operands[3]))
+ && (((unsigned HOST_WIDE_INT)INTVAL(operands[1]) + 0x1000) < 0x2000))"
+ {
+ addr_space_t as = INTVAL (operands[3]);
+ const char *glc = INTVAL (operands[4]) ? " glc" : "";
+
+ static char buf[200];
+ if (AS_FLAT_P (as))
+ {
+ if (TARGET_GCN5_PLUS)
+ sprintf (buf, "flat_store%%s2\t%%0, %%2 offset:%%1%s\;s_waitcnt\t0",
+ glc);
+ else
+ sprintf (buf, "flat_store%%s2\t%%0, %%2%s\;s_waitcnt\t0", glc);
+ }
+ else if (AS_GLOBAL_P (as))
+ sprintf (buf, "global_store%%s2\t%%0, %%2, off offset:%%1%s\;"
+ "s_waitcnt\tvmcnt(0)", glc);
+ else
+ gcc_unreachable ();
+
+ return buf;
+ }
+ [(set_attr "type" "flat")
+ (set_attr "length" "12")])
+
+(define_insn "scatter_insn_1offset_ds"
+ [(set (mem:BLK (scratch))
+ (unspec:BLK
+ [(plus:V64SI (match_operand:V64SI 0 "register_operand" "v")
+ (vec_duplicate:V64SI
+ (match_operand 1 "immediate_operand" "n")))
+ (match_operand:VEC_REG_MODE 2 "register_operand" "v")
+ (match_operand 3 "immediate_operand" "n")
+ (match_operand 4 "immediate_operand" "n")]
+ UNSPEC_SCATTER))]
+ "(AS_ANY_DS_P (INTVAL (operands[3]))
+ && ((unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x10000))"
+ {
+ addr_space_t as = INTVAL (operands[3]);
+ static char buf[200];
+ sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s\;s_waitcnt\tlgkmcnt(0)",
+ (AS_GDS_P (as) ? " gds" : ""));
+ return buf;
+ }
+ [(set_attr "type" "ds")
+ (set_attr "length" "12")])
+
+(define_insn "scatter_insn_2offsets"
+ [(set (mem:BLK (scratch))
+ (unspec:BLK
+ [(plus:V64DI
+ (plus:V64DI
+ (vec_duplicate:V64DI
+ (match_operand:DI 0 "register_operand" "Sv"))
+ (sign_extend:V64DI
+ (match_operand:V64SI 1 "register_operand" " v")))
+ (vec_duplicate:V64DI (match_operand 2 "immediate_operand"
+ " n")))
+ (match_operand:VEC_REG_MODE 3 "register_operand" " v")
+ (match_operand 4 "immediate_operand" " n")
+ (match_operand 5 "immediate_operand" " n")]
+ UNSPEC_SCATTER))]
+ "(AS_GLOBAL_P (INTVAL (operands[4]))
+ && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
+ {
+ addr_space_t as = INTVAL (operands[4]);
+ const char *glc = INTVAL (operands[5]) ? " glc" : "";
+
+ static char buf[200];
+ if (AS_GLOBAL_P (as))
+ {
+ /* Work around assembler bug in which a 64-bit register is expected,
+ but a 32-bit value would be correct. */
+ int reg = REGNO (operands[1]) - FIRST_VGPR_REG;
+ sprintf (buf, "global_store%%s3\tv[%d:%d], %%3, %%0 offset:%%2%s\;"
+ "s_waitcnt\tvmcnt(0)", reg, reg + 1, glc);
+ }
+ else
+ gcc_unreachable ();
+
+ return buf;
+ }
+ [(set_attr "type" "flat")
+ (set_attr "length" "12")])
+
+;; }}}
+;; {{{ Permutations
+
+(define_insn "ds_bpermute"
+ [(set (match_operand:VEC_1REG_MODE 0 "register_operand" "=v")
+ (unspec:VEC_1REG_MODE
+ [(match_operand:VEC_1REG_MODE 2 "register_operand" " v")
+ (match_operand:V64SI 1 "register_operand" " v")
+ (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
+ UNSPEC_BPERMUTE))]
+ ""
+ "ds_bpermute_b32\t%0, %1, %2\;s_waitcnt\tlgkmcnt(0)"
+ [(set_attr "type" "vop2")
+ (set_attr "length" "12")])
+
+(define_insn_and_split "ds_bpermute"
+ [(set (match_operand:VEC_2REG_MODE 0 "register_operand" "=&v")
+ (unspec:VEC_2REG_MODE
+ [(match_operand:VEC_2REG_MODE 2 "register_operand" " v0")
+ (match_operand:V64SI 1 "register_operand" " v")
+ (match_operand:DI 3 "gcn_exec_reg_operand" " e")]
+ UNSPEC_BPERMUTE))]
+ ""
+ "#"
+ "reload_completed"
+ [(set (match_dup 4) (unspec:V64SI [(match_dup 6) (match_dup 1) (match_dup 3)]
+ UNSPEC_BPERMUTE))
+ (set (match_dup 5) (unspec:V64SI [(match_dup 7) (match_dup 1) (match_dup 3)]
+ UNSPEC_BPERMUTE))]
+ {
+ operands[4] = gcn_operand_part (mode, operands[0], 0);
+ operands[5] = gcn_operand_part (mode, operands[0], 1);
+ operands[6] = gcn_operand_part (mode, operands[2], 0);
+ operands[7] = gcn_operand_part (mode, operands[2], 1);
+ }
+ [(set_attr "type" "vmult")
+ (set_attr "length" "24")])
+
+;; }}}
+;; {{{ ALU special case: add/sub
+
+(define_insn "addv64si3"
+ [(set (match_operand:V64SI 0 "register_operand" "= v")
+ (plus:V64SI
+ (match_operand:V64SI 1 "register_operand" "% v")
+ (match_operand:V64SI 2 "gcn_alu_operand" "vSvB")))
+ (clobber (reg:DI VCC_REG))]
+ ""
+ "v_add%^_u32\t%0, vcc, %2, %1"
+ [(set_attr "type" "vop2")
+ (set_attr "length" "8")])
+
+(define_insn "addv64si3_dup"
+ [(set (match_operand:V64SI 0 "register_operand" "= v")
+ (plus:V64SI
+ (vec_duplicate:V64SI
+ (match_operand:SI 2 "gcn_alu_operand" "SvB"))
+ (match_operand:V64SI 1 "register_operand" " v")))
+ (clobber (reg:DI VCC_REG))]
+ ""
+ "v_add%^_u32\t%0, vcc, %2, %1"
+ [(set_attr "type" "vop2")
+ (set_attr "length" "8")])
+
+(define_insn "addv64si3_vcc"
+ [(set (match_operand:V64SI 0 "register_operand" "= v, v")
+ (plus:V64SI
+ (match_operand:V64SI 1 "register_operand" "% v, v")
+ (match_operand:V64SI 2 "gcn_alu_operand" "vSvB,vSvB")))
+ (set (match_operand:DI 3 "register_operand" "= cV, Sg")
+ (ltu:DI (plus:V64SI (match_dup 1) (match_dup 2))
+ (match_dup 1)))]
+ ""
+ "v_add%^_u32\t%0, %3, %2, %1"
+ [(set_attr "type" "vop2,vop3b")
+ (set_attr "length" "8")])
+
+; This pattern only changes the VCC bits when the corresponding lane is
+; enabled, so the set must be described as an ior.
+
+(define_insn "addv64si3_vcc_dup"
+ [(set (match_operand:V64SI 0 "register_operand" "= v, v")
+ (plus:V64SI
+ (vec_duplicate:V64SI
+ (match_operand:SI 1 "gcn_alu_operand" "SvB,SvB"))
+ (match_operand:V64SI 2 "register_operand" " v, v")))
+ (set (match_operand:DI 3 "register_operand" "=cV, Sg")
+ (ltu:DI (plus:V64SI (vec_duplicate:V64SI (match_dup 2))
+ (match_dup 1))
+ (vec_duplicate:V64SI (match_dup 2))))]
+ ""
+ "v_add%^_u32\t%0, %3, %2, %1"
+ [(set_attr "type" "vop2,vop3b")
+ (set_attr "length" "8,8")])
+
+; This pattern does not accept SGPR because VCC read already counts as an
+; SGPR use and number of SGPR operands is limited to 1.
+
+(define_insn "addcv64si3"
+ [(set (match_operand:V64SI 0 "register_operand" "=v,v")
+ (plus:V64SI
+ (plus:V64SI
+ (vec_merge:V64SI
+ (vec_duplicate:V64SI (const_int 1))
+ (vec_duplicate:V64SI (const_int 0))
+ (match_operand:DI 3 "register_operand" " cV,Sv"))
+ (match_operand:V64SI 1 "gcn_alu_operand" "%vA,vA"))
+ (match_operand:V64SI 2 "gcn_alu_operand" " vB,vB")))
+ (set (match_operand:DI 4 "register_operand" "=cV,Sg")
+ (ior:DI (ltu:DI (plus:V64SI
+ (plus:V64SI
+ (vec_merge:V64SI
+ (vec_duplicate:V64SI (const_int 1))
+ (vec_duplicate:V64SI (const_int 0))
+ (match_dup 3))
+ (match_dup 1))
+ (match_dup 2))
+ (match_dup 2))
+ (ltu:DI (plus:V64SI
+ (vec_merge:V64SI
+ (vec_duplicate:V64SI (const_int 1))
+ (vec_duplicate:V64SI (const_int 0))
+ (match_dup 3))
+ (match_dup 1))
+ (match_dup 1))))]
+ ""
+ "v_addc%^_u32\t%0, %4, %1, %2, %3"
+ [(set_attr "type" "vop2,vop3b")
+ (set_attr "length" "4,8")])
+
+(define_insn "addcv64si3_dup"
+ [(set (match_operand:V64SI 0 "register_operand" "=v,v")
+ (plus:V64SI
+ (plus:V64SI
+ (vec_merge:V64SI
+ (vec_duplicate:V64SI (const_int 1))
+ (vec_duplicate:V64SI (const_int 0))
+ (match_operand:DI 3 "register_operand" " cV, Sv"))
+ (match_operand:V64SI 1 "gcn_alu_operand" "%vA, vA"))
+ (vec_duplicate:V64SI
+ (match_operand:SI 2 "gcn_alu_operand" "SvB,SvB"))))
+ (set (match_operand:DI 4 "register_operand" "=cV, Sg")
+ (ior:DI (ltu:DI (plus:V64SI (plus:V64SI
+ (vec_merge:V64SI
+ (vec_duplicate:V64SI (const_int 1))
+ (vec_duplicate:V64SI (const_int 0))
+ (match_dup 3))
+ (match_dup 1))
+ (vec_duplicate:V64SI
+ (match_dup 2)))
+ (vec_duplicate:V64SI
+ (match_dup 2)))
+ (ltu:DI (plus:V64SI (vec_merge:V64SI
+ (vec_duplicate:V64SI (const_int 1))
+ (vec_duplicate:V64SI (const_int 0))
+ (match_dup 3))
+ (match_dup 1))
+ (match_dup 1))))]
+ ""
+ "v_addc%^_u32\t%0, %4, %1, %2, %3"
+ [(set_attr "type" "vop2,vop3b")
+ (set_attr "length" "4,8")])
+
+(define_insn "subv64si3"
+ [(set (match_operand:V64SI 0 "register_operand" "= v, v")
+ (minus:V64SI
+ (match_operand:V64SI 1 "gcn_alu_operand" "vSvB, v")
+ (match_operand:V64SI 2 "gcn_alu_operand" " v,vSvB")))
+ (clobber (reg:DI VCC_REG))]
+ ""
+ "@
+ v_sub%^_u32\t%0, vcc, %1, %2
+ v_subrev%^_u32\t%0, vcc, %2, %1"
+ [(set_attr "type" "vop2")
+ (set_attr "length" "8,8")])
+
+(define_insn "subv64si3_vcc"
+ [(set (match_operand:V64SI 0 "register_operand" "= v, v, v, v")
+ (minus:V64SI
+ (match_operand:V64SI 1 "gcn_alu_operand" "vSvB,vSvB, v, v")
+ (match_operand:V64SI 2 "gcn_alu_operand" " v, v,vSvB,vSvB")))
+ (set (match_operand:DI 3 "register_operand" "= cV, Sg, cV, Sg")
+ (gtu:DI (minus:V64SI (match_dup 1) (match_dup 2))
+ (match_dup 1)))]
+ ""
+ "@
+ v_sub%^_u32\t%0, %3, %1, %2
+ v_sub%^_u32\t%0, %3, %1, %2
+ v_subrev%^_u32\t%0, %3, %2, %1
+ v_subrev%^_u32\t%0, %3, %2, %1"
+ [(set_attr "type" "vop2,vop3b,vop2,vop3b")
+ (set_attr "length" "8")])
+
+; This pattern does not accept SGPR because VCC read already counts
+; as a SGPR use and number of SGPR operands is limited to 1.
+
+(define_insn "subcv64si3"
+ [(set (match_operand:V64SI 0 "register_operand" "= v, v, v, v")
+ (minus:V64SI
+ (minus:V64SI
+ (vec_merge:V64SI
+ (vec_duplicate:V64SI (const_int 1))
+ (vec_duplicate:V64SI (const_int 0))
+ (match_operand:DI 3 "gcn_alu_operand" " cV,Sv,cV,Sv"))
+ (match_operand:V64SI 1 "gcn_alu_operand" " vA,vA,vB,vB"))
+ (match_operand:V64SI 2 "gcn_alu_operand" " vB,vB,vA,vA")))
+ (set (match_operand:DI 4 "register_operand" "=cV,Sg,cV,Sg")
+ (ior:DI (gtu:DI (minus:V64SI (minus:V64SI
+ (vec_merge:V64SI
+ (vec_duplicate:V64SI (const_int 1))
+ (vec_duplicate:V64SI (const_int 0))
+ (match_dup 3))
+ (match_dup 1))
+ (match_dup 2))
+ (match_dup 2))
+ (ltu:DI (minus:V64SI (vec_merge:V64SI
+ (vec_duplicate:V64SI (const_int 1))
+ (vec_duplicate:V64SI (const_int 0))
+ (match_dup 3))
+ (match_dup 1))
+ (match_dup 1))))]
+ ""
+ "@
+ v_subb%^_u32\t%0, %4, %1, %2, %3
+ v_subb%^_u32\t%0, %4, %1, %2, %3
+ v_subbrev%^_u32\t%0, %4, %2, %1, %3
+ v_subbrev%^_u32\t%0, %4, %2, %1, %3"
+ [(set_attr "type" "vop2,vop3b,vop2,vop3b")
+ (set_attr "length" "8")])
+
+(define_insn_and_split "addv64di3"
+ [(set (match_operand:V64DI 0 "register_operand" "= &v")
+ (plus:V64DI
+ (match_operand:V64DI 1 "register_operand" "% v0")
+ (match_operand:V64DI 2 "gcn_alu_operand" "vSvB0")))
+ (clobber (reg:DI VCC_REG))]
+ ""
+ "#"
+ "gcn_can_split_p (V64DImode, operands[0])
+ && gcn_can_split_p (V64DImode, operands[1])
+ && gcn_can_split_p (V64DImode, operands[2])"
+ [(const_int 0)]
+ {
+ rtx vcc = gen_rtx_REG (DImode, VCC_REG);
+ emit_insn (gen_addv64si3_vcc
+ (gcn_operand_part (V64DImode, operands[0], 0),
+ gcn_operand_part (V64DImode, operands[1], 0),
+ gcn_operand_part (V64DImode, operands[2], 0),
+ vcc));
+ emit_insn (gen_addcv64si3
+ (gcn_operand_part (V64DImode, operands[0], 1),
+ gcn_operand_part (V64DImode, operands[1], 1),
+ gcn_operand_part (V64DImode, operands[2], 1),
+ vcc, vcc));
+ DONE;
+ }
+ [(set_attr "type" "vmult")
+ (set_attr "length" "8")])
+
+(define_insn_and_split "addv64di3_exec"
+ [(set (match_operand:V64DI 0 "register_operand" "= &v")
+ (vec_merge:V64DI
+ (plus:V64DI
+ (match_operand:V64DI 1 "register_operand" "% v0")
+ (match_operand:V64DI 2 "gcn_alu_operand" "vSvB0"))
+ (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
+ (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
+ (clobber (reg:DI VCC_REG))]
+ ""
+ "#"
+ "gcn_can_split_p (V64DImode, operands[0])
+ && gcn_can_split_p (V64DImode, operands[1])
+ && gcn_can_split_p (V64DImode, operands[2])
+ && gcn_can_split_p (V64DImode, operands[4])"
+ [(const_int 0)]
+ {
+ rtx vcc = gen_rtx_REG (DImode, VCC_REG);
+ emit_insn (gen_addv64si3_vcc_exec
+ (gcn_operand_part (V64DImode, operands[0], 0),
+ gcn_operand_part (V64DImode, operands[1], 0),
+ gcn_operand_part (V64DImode, operands[2], 0),
+ vcc,
+ gcn_operand_part (V64DImode, operands[3], 0),
+ operands[4]));
+ emit_insn (gen_addcv64si3_exec
+ (gcn_operand_part (V64DImode, operands[0], 1),
+ gcn_operand_part (V64DImode, operands[1], 1),
+ gcn_operand_part (V64DImode, operands[2], 1),
+ vcc, vcc,
+ gcn_operand_part (V64DImode, operands[3], 1),
+ operands[4]));
+ DONE;
+ }
+ [(set_attr "type" "vmult")
+ (set_attr "length" "8")])
+
+(define_insn_and_split "subv64di3"
+ [(set (match_operand:V64DI 0 "register_operand" "= &v, &v")
+ (minus:V64DI
+ (match_operand:V64DI 1 "gcn_alu_operand" "vSvB0, v0")
+ (match_operand:V64DI 2 "gcn_alu_operand" " v0,vSvB0")))
+ (clobber (reg:DI VCC_REG))]
+ ""
+ "#"
+ "gcn_can_split_p (V64DImode, operands[0])
+ && gcn_can_split_p (V64DImode, operands[1])
+ && gcn_can_split_p (V64DImode, operands[2])"
+ [(const_int 0)]
+ {
+ rtx vcc = gen_rtx_REG (DImode, VCC_REG);
+ emit_insn (gen_subv64si3_vcc
+ (gcn_operand_part (V64DImode, operands[0], 0),
+ gcn_operand_part (V64DImode, operands[1], 0),
+ gcn_operand_part (V64DImode, operands[2], 0),
+ vcc));
+ emit_insn (gen_subcv64si3
+ (gcn_operand_part (V64DImode, operands[0], 1),
+ gcn_operand_part (V64DImode, operands[1], 1),
+ gcn_operand_part (V64DImode, operands[2], 1),
+ vcc, vcc));
+ DONE;
+ }
+ [(set_attr "type" "vmult")
+ (set_attr "length" "8,8")])
+
+(define_insn_and_split "subv64di3_exec"
+ [(set (match_operand:V64DI 0 "register_operand" "= &v, &v")
+ (vec_merge:V64DI
+ (minus:V64DI
+ (match_operand:V64DI 1 "gcn_alu_operand" "vSvB0, v0")
+ (match_operand:V64DI 2 "gcn_alu_operand" " v0,vSvB0"))
+ (match_operand:V64DI 3 "gcn_register_or_unspec_operand"
+ " U0, U0")
+ (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
+ (clobber (reg:DI VCC_REG))]
+ "register_operand (operands[1], VOIDmode)
+ || register_operand (operands[2], VOIDmode)"
+ "#"
+ "gcn_can_split_p (V64DImode, operands[0])
+ && gcn_can_split_p (V64DImode, operands[1])
+ && gcn_can_split_p (V64DImode, operands[2])
+ && gcn_can_split_p (V64DImode, operands[3])"
+ [(const_int 0)]
+ {
+ rtx vcc = gen_rtx_REG (DImode, VCC_REG);
+ emit_insn (gen_subv64si3_vcc_exec
+ (gcn_operand_part (V64DImode, operands[0], 0),
+ gcn_operand_part (V64DImode, operands[1], 0),
+ gcn_operand_part (V64DImode, operands[2], 0),
+ vcc,
+ gcn_operand_part (V64DImode, operands[3], 0),
+ operands[4]));
+ emit_insn (gen_subcv64si3_exec
+ (gcn_operand_part (V64DImode, operands[0], 1),
+ gcn_operand_part (V64DImode, operands[1], 1),
+ gcn_operand_part (V64DImode, operands[2], 1),
+ vcc, vcc,
+ gcn_operand_part (V64DImode, operands[3], 1),
+ operands[4]));
+ DONE;
+ }
+ [(set_attr "type" "vmult")
+ (set_attr "length" "8,8")])
+
+(define_insn_and_split "addv64di3_dup"
+ [(set (match_operand:V64DI 0 "register_operand" "= &v")
+ (plus:V64DI
+ (match_operand:V64DI 1 "register_operand" " v0")
+ (vec_duplicate:V64DI
+ (match_operand:DI 2 "gcn_alu_operand" "SvDB"))))
+ (clobber (reg:DI VCC_REG))]
+ ""
+ "#"
+ "gcn_can_split_p (V64DImode, operands[0])
+ && gcn_can_split_p (V64DImode, operands[1])
+ && gcn_can_split_p (V64DImode, operands[2])"
+ [(const_int 0)]
+ {
+ rtx vcc = gen_rtx_REG (DImode, VCC_REG);
+ emit_insn (gen_addv64si3_vcc_dup
+ (gcn_operand_part (V64DImode, operands[0], 0),
+ gcn_operand_part (DImode, operands[2], 0),
+ gcn_operand_part (V64DImode, operands[1], 0),
+ vcc));
+ emit_insn (gen_addcv64si3_dup
+ (gcn_operand_part (V64DImode, operands[0], 1),
+ gcn_operand_part (V64DImode, operands[1], 1),
+ gcn_operand_part (DImode, operands[2], 1),
+ vcc, vcc));
+ DONE;
+ }
+ [(set_attr "type" "vmult")
+ (set_attr "length" "8")])
+
+(define_insn_and_split "addv64di3_dup_exec"
+ [(set (match_operand:V64DI 0 "register_operand" "= &v")
+ (vec_merge:V64DI
+ (plus:V64DI
+ (match_operand:V64DI 1 "register_operand" " v0")
+ (vec_duplicate:V64DI
+ (match_operand:DI 2 "gcn_alu_operand" "SvDB")))
+ (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
+ (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
+ (clobber (reg:DI VCC_REG))]
+ ""
+ "#"
+ "gcn_can_split_p (V64DImode, operands[0])
+ && gcn_can_split_p (V64DImode, operands[1])
+ && gcn_can_split_p (V64DImode, operands[2])
+ && gcn_can_split_p (V64DImode, operands[3])"
+ [(const_int 0)]
+ {
+ rtx vcc = gen_rtx_REG (DImode, VCC_REG);
+ emit_insn (gen_addv64si3_vcc_dup_exec
+ (gcn_operand_part (V64DImode, operands[0], 0),
+ gcn_operand_part (DImode, operands[2], 0),
+ gcn_operand_part (V64DImode, operands[1], 0),
+ vcc,
+ gcn_operand_part (V64DImode, operands[3], 0),
+ operands[4]));
+ emit_insn (gen_addcv64si3_dup_exec
+ (gcn_operand_part (V64DImode, operands[0], 1),
+ gcn_operand_part (V64DImode, operands[1], 1),
+ gcn_operand_part (DImode, operands[2], 1),
+ vcc, vcc,
+ gcn_operand_part (V64DImode, operands[3], 1),
+ operands[4]));
+ DONE;
+ }
+ [(set_attr "type" "vmult")
+ (set_attr "length" "8")])
+
+(define_insn_and_split "addv64di3_zext"
+ [(set (match_operand:V64DI 0 "register_operand" "=&v,&v")
+ (plus:V64DI
+ (zero_extend:V64DI
+ (match_operand:V64SI 1 "gcn_alu_operand" "0vA,0vB"))
+ (match_operand:V64DI 2 "gcn_alu_operand" "0vB,0vA")))
+ (clobber (reg:DI VCC_REG))]
+ ""
+ "#"
+ "gcn_can_split_p (V64DImode, operands[0])
+ && gcn_can_split_p (V64DImode, operands[2])"
+ [(const_int 0)]
+ {
+ rtx vcc = gen_rtx_REG (DImode, VCC_REG);
+ emit_insn (gen_addv64si3_vcc
+ (gcn_operand_part (V64DImode, operands[0], 0),
+ operands[1],
+ gcn_operand_part (V64DImode, operands[2], 0),
+ vcc));
+ emit_insn (gen_addcv64si3
+ (gcn_operand_part (V64DImode, operands[0], 1),
+ gcn_operand_part (V64DImode, operands[2], 1),
+ const0_rtx, vcc, vcc));
+ DONE;
+ }
+ [(set_attr "type" "vmult")
+ (set_attr "length" "8,8")])
+
+(define_insn_and_split "addv64di3_zext_exec"
+ [(set (match_operand:V64DI 0 "register_operand" "=&v,&v")
+ (vec_merge:V64DI
+ (plus:V64DI
+ (zero_extend:V64DI
+ (match_operand:V64SI 1 "gcn_alu_operand" "0vA,0vB"))
+ (match_operand:V64DI 2 "gcn_alu_operand" "0vB,0vA"))
+ (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0, U0")
+ (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))
+ (clobber (reg:DI VCC_REG))]
+ ""
+ "#"
+ "gcn_can_split_p (V64DImode, operands[0])
+ && gcn_can_split_p (V64DImode, operands[2])
+ && gcn_can_split_p (V64DImode, operands[3])"
+ [(const_int 0)]
+ {
+ rtx vcc = gen_rtx_REG (DImode, VCC_REG);
+ emit_insn (gen_addv64si3_vcc_exec
+ (gcn_operand_part (V64DImode, operands[0], 0),
+ operands[1],
+ gcn_operand_part (V64DImode, operands[2], 0),
+ vcc,
+ gcn_operand_part (V64DImode, operands[3], 0),
+ operands[4]));
+ emit_insn (gen_addcv64si3_exec
+ (gcn_operand_part (V64DImode, operands[0], 1),
+ gcn_operand_part (V64DImode, operands[2], 1),
+ const0_rtx, vcc, vcc,
+ gcn_operand_part (V64DImode, operands[3], 1),
+ operands[4]));
+ DONE;
+ }
+ [(set_attr "type" "vmult")
+ (set_attr "length" "8,8")])
+
+(define_insn_and_split "addv64di3_zext_dup"
+ [(set (match_operand:V64DI 0 "register_operand" "=&v")
+ (plus:V64DI
+ (zero_extend:V64DI
+ (vec_duplicate:V64SI
+ (match_operand:SI 1 "gcn_alu_operand" "BSv")))
+ (match_operand:V64DI 2 "gcn_alu_operand" "vA0")))
+ (clobber (reg:DI VCC_REG))]
+ ""
+ "#"
+ "gcn_can_split_p (V64DImode, operands[0])
+ && gcn_can_split_p (V64DImode, operands[2])"
+ [(const_int 0)]
+ {
+ rtx vcc = gen_rtx_REG (DImode, VCC_REG);
+ emit_insn (gen_addv64si3_vcc_dup
+ (gcn_operand_part (V64DImode, operands[0], 0),
+ gcn_operand_part (DImode, operands[1], 0),
+ gcn_operand_part (V64DImode, operands[2], 0),
+ vcc));
+ emit_insn (gen_addcv64si3
+ (gcn_operand_part (V64DImode, operands[0], 1),
+ gcn_operand_part (V64DImode, operands[2], 1),
+ const0_rtx, vcc, vcc));
+ DONE;
+ }
+ [(set_attr "type" "vmult")
+ (set_attr "length" "8")])
+
+(define_insn_and_split "addv64di3_zext_dup_exec"
+ [(set (match_operand:V64DI 0 "register_operand" "=&v")
+ (vec_merge:V64DI
+ (plus:V64DI
+ (zero_extend:V64DI
+ (vec_duplicate:V64SI
+ (match_operand:SI 1 "gcn_alu_operand" "BSv")))
+ (match_operand:V64DI 2 "gcn_alu_operand" "vA0"))
+ (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
+ (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
+ (clobber (reg:DI VCC_REG))]
+ ""
+ "#"
+ "gcn_can_split_p (V64DImode, operands[0])
+ && gcn_can_split_p (V64DImode, operands[2])
+ && gcn_can_split_p (V64DImode, operands[3])"
+ [(const_int 0)]
+ {
+ rtx vcc = gen_rtx_REG (DImode, VCC_REG);
+ emit_insn (gen_addv64si3_vcc_dup_exec
+ (gcn_operand_part (V64DImode, operands[0], 0),
+ gcn_operand_part (DImode, operands[1], 0),
+ gcn_operand_part (V64DImode, operands[2], 0),
+ vcc,
+ gcn_operand_part (V64DImode, operands[3], 0),
+ operands[4]));
+ emit_insn (gen_addcv64si3_exec
+ (gcn_operand_part (V64DImode, operands[0], 1),
+ gcn_operand_part (V64DImode, operands[2], 1),
+ const0_rtx, vcc, vcc,
+ gcn_operand_part (V64DImode, operands[3], 1),
+ operands[4]));
+ DONE;
+ }
+ [(set_attr "type" "vmult")
+ (set_attr "length" "8")])
+
+(define_insn_and_split "addv64di3_zext_dup2"
+ [(set (match_operand:V64DI 0 "register_operand" "= v")
+ (plus:V64DI
+ (zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA"))
+ (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))))
+ (clobber (reg:DI VCC_REG))]
+ ""
+ "#"
+ "gcn_can_split_p (V64DImode, operands[0])"
+ [(const_int 0)]
+ {
+ rtx vcc = gen_rtx_REG (DImode, VCC_REG);
+ emit_insn (gen_addv64si3_vcc_dup
+ (gcn_operand_part (V64DImode, operands[0], 0),
+ gcn_operand_part (DImode, operands[2], 0),
+ operands[1],
+ vcc));
+ rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
+ emit_insn (gen_vec_duplicatev64si
+ (dsthi, gcn_operand_part (DImode, operands[2], 1)));
+ emit_insn (gen_addcv64si3 (dsthi, dsthi, const0_rtx, vcc, vcc));
+ DONE;
+ }
+ [(set_attr "type" "vmult")
+ (set_attr "length" "8")])
+
+(define_insn_and_split "addv64di3_zext_dup2_exec"
+ [(set (match_operand:V64DI 0 "register_operand" "= v")
+ (vec_merge:V64DI
+ (plus:V64DI
+ (zero_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand"
+ " vA"))
+ (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
+ (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
+ (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
+ (clobber (reg:DI VCC_REG))]
+ ""
+ "#"
+ "gcn_can_split_p (V64DImode, operands[0])
+ && gcn_can_split_p (V64DImode, operands[3])"
+ [(const_int 0)]
+ {
+ rtx vcc = gen_rtx_REG (DImode, VCC_REG);
+ emit_insn (gen_addv64si3_vcc_dup_exec
+ (gcn_operand_part (V64DImode, operands[0], 0),
+ gcn_operand_part (DImode, operands[2], 0),
+ operands[1],
+ vcc,
+ gcn_operand_part (V64DImode, operands[3], 0),
+ operands[4]));
+ rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
+ emit_insn (gen_vec_duplicatev64si_exec
+ (dsthi, gcn_operand_part (DImode, operands[2], 1),
+ gcn_gen_undef (V64SImode), operands[4]));
+ emit_insn (gen_addcv64si3_exec
+ (dsthi, dsthi, const0_rtx, vcc, vcc,
+ gcn_operand_part (V64DImode, operands[3], 1),
+ operands[4]));
+ DONE;
+ }
+ [(set_attr "type" "vmult")
+ (set_attr "length" "8")])
+
+(define_insn_and_split "addv64di3_sext_dup2"
+ [(set (match_operand:V64DI 0 "register_operand" "= v")
+ (plus:V64DI
+ (sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand" " vA"))
+ (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv"))))
+ (clobber (match_scratch:V64SI 3 "=&v"))
+ (clobber (reg:DI VCC_REG))]
+ ""
+ "#"
+ "gcn_can_split_p (V64DImode, operands[0])"
+ [(const_int 0)]
+ {
+ rtx vcc = gen_rtx_REG (DImode, VCC_REG);
+ emit_insn (gen_ashrv64si3 (operands[3], operands[1], GEN_INT (31)));
+ emit_insn (gen_addv64si3_vcc_dup
+ (gcn_operand_part (V64DImode, operands[0], 0),
+ gcn_operand_part (DImode, operands[2], 0),
+ operands[1],
+ vcc));
+ rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
+ emit_insn (gen_vec_duplicatev64si
+ (dsthi, gcn_operand_part (DImode, operands[2], 1)));
+ emit_insn (gen_addcv64si3 (dsthi, dsthi, operands[3], vcc, vcc));
+ DONE;
+ }
+ [(set_attr "type" "vmult")
+ (set_attr "length" "8")])
+
+(define_insn_and_split "addv64di3_sext_dup2_exec"
+ [(set (match_operand:V64DI 0 "register_operand" "= v")
+ (vec_merge:V64DI
+ (plus:V64DI
+ (sign_extend:V64DI (match_operand:V64SI 1 "gcn_alu_operand"
+ " vA"))
+ (vec_duplicate:V64DI (match_operand:DI 2 "gcn_alu_operand" "BSv")))
+ (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
+ (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
+ (clobber (match_scratch:V64SI 5 "=&v"))
+ (clobber (reg:DI VCC_REG))]
+ ""
+ "#"
+ "gcn_can_split_p (V64DImode, operands[0])
+ && gcn_can_split_p (V64DImode, operands[3])"
+ [(const_int 0)]
+ {
+ rtx vcc = gen_rtx_REG (DImode, VCC_REG);
+ emit_insn (gen_ashrv64si3_exec (operands[5], operands[1], GEN_INT (31),
+ gcn_gen_undef (V64SImode), operands[4]));
+ emit_insn (gen_addv64si3_vcc_dup_exec
+ (gcn_operand_part (V64DImode, operands[0], 0),
+ gcn_operand_part (DImode, operands[2], 0),
+ operands[1],
+ vcc,
+ gcn_operand_part (V64DImode, operands[3], 0),
+ operands[4]));
+ rtx dsthi = gcn_operand_part (V64DImode, operands[0], 1);
+ emit_insn (gen_vec_duplicatev64si_exec
+ (dsthi, gcn_operand_part (DImode, operands[2], 1),
+ gcn_gen_undef (V64SImode), operands[4]));
+ emit_insn (gen_addcv64si3_exec
+ (dsthi, dsthi, operands[5], vcc, vcc,
+ gcn_operand_part (V64DImode, operands[3], 1),
+ operands[4]));
+ DONE;
+ }
+ [(set_attr "type" "vmult")
+ (set_attr "length" "8")])
+
+;; }}}
+;; {{{ DS memory ALU: add/sub
+
+(define_mode_iterator DS_ARITH_MODE [V64SI V64SF V64DI])
+(define_mode_iterator DS_ARITH_SCALAR_MODE [SI SF DI])
+
+;; FIXME: the vector patterns probably need RD expanded to a vector of
+;; addresses. For now, the only way a vector can get into LDS is
+;; if the user puts it there manually.
+;;
+;; FIXME: the scalar patterns are probably fine in themselves, but need to be
+;; checked to see if anything can ever use them.
+
+(define_insn "add3_ds"
+ [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
+ (plus:DS_ARITH_MODE
+ (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" "%RD")
+ (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
+ "rtx_equal_p (operands[0], operands[1])"
+ "ds_add%u0\t%A0, %2%O0"
+ [(set_attr "type" "ds")
+ (set_attr "length" "8")])
+
+(define_insn "add3_ds_scalar"
+ [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
+ (plus:DS_ARITH_SCALAR_MODE
+ (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
+ "%RD")
+ (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
+ "rtx_equal_p (operands[0], operands[1])"
+ "ds_add%u0\t%A0, %2%O0"
+ [(set_attr "type" "ds")
+ (set_attr "length" "8")])
+
+(define_insn "sub3_ds"
+ [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
+ (minus:DS_ARITH_MODE
+ (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")
+ (match_operand:DS_ARITH_MODE 2 "register_operand" " v")))]
+ "rtx_equal_p (operands[0], operands[1])"
+ "ds_sub%u0\t%A0, %2%O0"
+ [(set_attr "type" "ds")
+ (set_attr "length" "8")])
+
+(define_insn "sub3_ds_scalar"
+ [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
+ (minus:DS_ARITH_SCALAR_MODE
+ (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
+ " RD")
+ (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")))]
+ "rtx_equal_p (operands[0], operands[1])"
+ "ds_sub%u0\t%A0, %2%O0"
+ [(set_attr "type" "ds")
+ (set_attr "length" "8")])
+
+(define_insn "subr3_ds"
+ [(set (match_operand:DS_ARITH_MODE 0 "gcn_ds_memory_operand" "=RD")
+ (minus:DS_ARITH_MODE
+ (match_operand:DS_ARITH_MODE 2 "register_operand" " v")
+ (match_operand:DS_ARITH_MODE 1 "gcn_ds_memory_operand" " RD")))]
+ "rtx_equal_p (operands[0], operands[1])"
+ "ds_rsub%u0\t%A0, %2%O0"
+ [(set_attr "type" "ds")
+ (set_attr "length" "8")])
+
+(define_insn "subr3_ds_scalar"
+ [(set (match_operand:DS_ARITH_SCALAR_MODE 0 "gcn_ds_memory_operand" "=RD")
+ (minus:DS_ARITH_SCALAR_MODE
+ (match_operand:DS_ARITH_SCALAR_MODE 2 "register_operand" " v")
+ (match_operand:DS_ARITH_SCALAR_MODE 1 "gcn_ds_memory_operand"
+ " RD")))]
+ "rtx_equal_p (operands[0], operands[1])"
+ "ds_rsub%u0\t%A0, %2%O0"
+ [(set_attr "type" "ds")
+ (set_attr "length" "8")])
+
+;; }}}
+;; {{{ ALU special case: mult
+
+(define_insn "mulv64si3_highpart"
+ [(set (match_operand:V64SI 0 "register_operand" "= v")
+ (truncate:V64SI
+ (lshiftrt:V64DI
+ (mult:V64DI
+ (any_extend:V64DI
+ (match_operand:V64SI 1 "gcn_alu_operand" " %v"))
+ (any_extend:V64DI
+ (match_operand:V64SI 2 "gcn_alu_operand" "vSvA")))
+ (const_int 32))))]
+ ""
+ "v_mul_hi0\t%0, %2, %1"
+ [(set_attr "type" "vop3a")
+ (set_attr "length" "8")])
+
+(define_insn "mulv64si3"
+ [(set (match_operand:V64SI 0 "register_operand" "= v")
+ (mult:V64SI
+ (match_operand:V64SI 1 "gcn_alu_operand" "%vSvA")
+ (match_operand:V64SI 2 "gcn_alu_operand" " vSvA")))]
+ ""
+ "v_mul_lo_u32\t%0, %1, %2"
+ [(set_attr "type" "vop3a")
+ (set_attr "length" "8")])
+
+(define_insn "mulv64si3_dup"
+ [(set (match_operand:V64SI 0 "register_operand" "= v")
+ (mult:V64SI
+ (match_operand:V64SI 1 "gcn_alu_operand" "%vSvA")
+ (vec_duplicate:V64SI
+ (match_operand:SI 2 "gcn_alu_operand" " SvA"))))]
+ ""
+ "v_mul_lo_u32\t%0, %1, %2"
+ [(set_attr "type" "vop3a")
+ (set_attr "length" "8")])
+
+(define_insn_and_split "mulv64di3"
+ [(set (match_operand:V64DI 0 "register_operand" "=&v")
+ (mult:V64DI
+ (match_operand:V64DI 1 "gcn_alu_operand" "% v")
+ (match_operand:V64DI 2 "gcn_alu_operand" "vDA")))
+ (clobber (match_scratch:V64SI 3 "=&v"))]
+ ""
+ "#"
+ "reload_completed"
+ [(const_int 0)]
+ {
+ rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
+ rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
+ rtx left_lo = gcn_operand_part (V64DImode, operands[1], 0);
+ rtx left_hi = gcn_operand_part (V64DImode, operands[1], 1);
+ rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
+ rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
+ rtx tmp = operands[3];
+
+ emit_insn (gen_mulv64si3 (out_lo, left_lo, right_lo));
+ emit_insn (gen_umulv64si3_highpart (out_hi, left_lo, right_lo));
+ emit_insn (gen_mulv64si3 (tmp, left_hi, right_lo));
+ emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
+ emit_insn (gen_mulv64si3 (tmp, left_lo, right_hi));
+ emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
+ emit_insn (gen_mulv64si3 (tmp, left_hi, right_hi));
+ emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
+ DONE;
+ })
+
+(define_insn_and_split "mulv64di3_exec"
+ [(set (match_operand:V64DI 0 "register_operand" "=&v")
+ (vec_merge:V64DI
+ (mult:V64DI
+ (match_operand:V64DI 1 "gcn_alu_operand" "% v")
+ (match_operand:V64DI 2 "gcn_alu_operand" "vDA"))
+ (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
+ (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
+ (clobber (match_scratch:V64SI 5 "=&v"))]
+ ""
+ "#"
+ "reload_completed"
+ [(const_int 0)]
+ {
+ rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
+ rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
+ rtx left_lo = gcn_operand_part (V64DImode, operands[1], 0);
+ rtx left_hi = gcn_operand_part (V64DImode, operands[1], 1);
+ rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
+ rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
+ rtx exec = operands[4];
+ rtx tmp = operands[5];
+
+ rtx old_lo, old_hi;
+ if (GET_CODE (operands[3]) == UNSPEC)
+ {
+ old_lo = old_hi = gcn_gen_undef (V64SImode);
+ }
+ else
+ {
+ old_lo = gcn_operand_part (V64DImode, operands[3], 0);
+ old_hi = gcn_operand_part (V64DImode, operands[3], 1);
+ }
+
+ rtx undef = gcn_gen_undef (V64SImode);
+
+ emit_insn (gen_mulv64si3_exec (out_lo, left_lo, right_lo, old_lo, exec));
+ emit_insn (gen_umulv64si3_highpart_exec (out_hi, left_lo, right_lo,
+ old_hi, exec));
+ emit_insn (gen_mulv64si3_exec (tmp, left_hi, right_lo, undef, exec));
+ emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
+ emit_insn (gen_mulv64si3_exec (tmp, left_lo, right_hi, undef, exec));
+ emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
+ emit_insn (gen_mulv64si3_exec (tmp, left_hi, right_hi, undef, exec));
+ emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
+ DONE;
+ })
+
+(define_insn_and_split "mulv64di3_zext"
+ [(set (match_operand:V64DI 0 "register_operand" "=&v")
+ (mult:V64DI
+ (zero_extend:V64DI
+ (match_operand:V64SI 1 "gcn_alu_operand" " v"))
+ (match_operand:V64DI 2 "gcn_alu_operand" "vDA")))
+ (clobber (match_scratch:V64SI 3 "=&v"))]
+ ""
+ "#"
+ "reload_completed"
+ [(const_int 0)]
+ {
+ rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
+ rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
+ rtx left = operands[1];
+ rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
+ rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
+ rtx tmp = operands[3];
+
+ emit_insn (gen_mulv64si3 (out_lo, left, right_lo));
+ emit_insn (gen_umulv64si3_highpart (out_hi, left, right_lo));
+ emit_insn (gen_mulv64si3 (tmp, left, right_hi));
+ emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
+ DONE;
+ })
+
+(define_insn_and_split "mulv64di3_zext_exec"
+ [(set (match_operand:V64DI 0 "register_operand" "=&v")
+ (vec_merge:V64DI
+ (mult:V64DI
+ (zero_extend:V64DI
+ (match_operand:V64SI 1 "gcn_alu_operand" " v"))
+ (match_operand:V64DI 2 "gcn_alu_operand" "vDA"))
+ (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
+ (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
+ (clobber (match_scratch:V64SI 5 "=&v"))]
+ ""
+ "#"
+ "reload_completed"
+ [(const_int 0)]
+ {
+ rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
+ rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
+ rtx left = operands[1];
+ rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
+ rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
+ rtx exec = operands[4];
+ rtx tmp = operands[5];
+
+ rtx old_lo, old_hi;
+ if (GET_CODE (operands[3]) == UNSPEC)
+ {
+ old_lo = old_hi = gcn_gen_undef (V64SImode);
+ }
+ else
+ {
+ old_lo = gcn_operand_part (V64DImode, operands[3], 0);
+ old_hi = gcn_operand_part (V64DImode, operands[3], 1);
+ }
+
+ rtx undef = gcn_gen_undef (V64SImode);
+
+ emit_insn (gen_mulv64si3_exec (out_lo, left, right_lo, old_lo, exec));
+ emit_insn (gen_umulv64si3_highpart_exec (out_hi, left, right_lo,
+ old_hi, exec));
+ emit_insn (gen_mulv64si3_exec (tmp, left, right_hi, undef, exec));
+ emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
+ DONE;
+ })
+
+(define_insn_and_split "mulv64di3_zext_dup2"
+ [(set (match_operand:V64DI 0 "register_operand" "= &v")
+ (mult:V64DI
+ (zero_extend:V64DI
+ (match_operand:V64SI 1 "gcn_alu_operand" " v"))
+ (vec_duplicate:V64DI
+ (match_operand:DI 2 "gcn_alu_operand" "SvDA"))))
+ (clobber (match_scratch:V64SI 3 "= &v"))]
+ ""
+ "#"
+ "reload_completed"
+ [(const_int 0)]
+ {
+ rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
+ rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
+ rtx left = operands[1];
+ rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
+ rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
+ rtx tmp = operands[3];
+
+ emit_insn (gen_mulv64si3 (out_lo, left, right_lo));
+ emit_insn (gen_umulv64si3_highpart (out_hi, left, right_lo));
+ emit_insn (gen_mulv64si3 (tmp, left, right_hi));
+ emit_insn (gen_addv64si3 (out_hi, out_hi, tmp));
+ DONE;
+ })
+
+(define_insn_and_split "mulv64di3_zext_dup2_exec"
+ [(set (match_operand:V64DI 0 "register_operand" "= &v")
+ (vec_merge:V64DI
+ (mult:V64DI
+ (zero_extend:V64DI
+ (match_operand:V64SI 1 "gcn_alu_operand" " v"))
+ (vec_duplicate:V64DI
+ (match_operand:DI 2 "gcn_alu_operand" "SvDA")))
+ (match_operand:V64DI 3 "gcn_register_or_unspec_operand" " U0")
+ (match_operand:DI 4 "gcn_exec_reg_operand" " e")))
+ (clobber (match_scratch:V64SI 5 "= &v"))]
+ ""
+ "#"
+ "reload_completed"
+ [(const_int 0)]
+ {
+ rtx out_lo = gcn_operand_part (V64DImode, operands[0], 0);
+ rtx out_hi = gcn_operand_part (V64DImode, operands[0], 1);
+ rtx left = operands[1];
+ rtx right_lo = gcn_operand_part (V64DImode, operands[2], 0);
+ rtx right_hi = gcn_operand_part (V64DImode, operands[2], 1);
+ rtx exec = operands[4];
+ rtx tmp = operands[5];
+
+ rtx old_lo, old_hi;
+ if (GET_CODE (operands[3]) == UNSPEC)
+ {
+ old_lo = old_hi = gcn_gen_undef (V64SImode);
+ }
+ else
+ {
+ old_lo = gcn_operand_part (V64DImode, operands[3], 0);
+ old_hi = gcn_operand_part (V64DImode, operands[3], 1);
+ }
+
+ rtx undef = gcn_gen_undef (V64SImode);
+
+ emit_insn (gen_mulv64si3_exec (out_lo, left, right_lo, old_lo, exec));
+ emit_insn (gen_umulv64si3_highpart_exec (out_hi, left, right_lo,
+ old_hi, exec));
+ emit_insn (gen_mulv64si3_exec (tmp, left, right_hi, undef, exec));
+ emit_insn (gen_addv64si3_exec (out_hi, out_hi, tmp, out_hi, exec));
+ DONE;
+ })
+
+;; }}}
+;; {{{ ALU generic case
+
+(define_mode_iterator VEC_INT_MODE [V64QI V64HI V64SI V64DI])
+
+(define_code_iterator bitop [and ior xor])
+(define_code_iterator shiftop [ashift lshiftrt ashiftrt])
+(define_code_iterator minmaxop [smin smax umin umax])
+
+(define_insn "2"
+ [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v")
+ (bitunop:VEC_1REG_INT_MODE
+ (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand" "vSvB")))]
+ ""
+ "v_0\t%0, %1"
+ [(set_attr "type" "vop1")
+ (set_attr "length" "8")])
+
+(define_insn "3"
+ [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v,RD")
+ (bitop:VEC_1REG_INT_MODE
+ (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand"
+ "% v, 0")
+ (match_operand:VEC_1REG_INT_MODE 2 "gcn_valu_src1com_operand"
+ "vSvB, v")))]
+ ""
+ "@
+ v_0\t%0, %2, %1
+ ds_0\t%A0, %2%O0"
+ [(set_attr "type" "vop2,ds")
+ (set_attr "length" "8,8")])
+
+(define_insn_and_split "v64di3"
+ [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD")
+ (bitop:V64DI
+ (match_operand:V64DI 1 "gcn_valu_src0_operand" "% v,RD")
+ (match_operand:V64DI 2 "gcn_valu_src1com_operand" "vSvB, v")))]
+ ""
+ "@
+ #
+ ds_0\t%A0, %2%O0"
+ "(reload_completed && !gcn_ds_memory_operand (operands[0], V64DImode))"
+ [(set (match_dup 3)
+ (bitop:V64SI (match_dup 5) (match_dup 7)))
+ (set (match_dup 4)
+ (bitop:V64SI (match_dup 6) (match_dup 8)))]
+ {
+ operands[3] = gcn_operand_part (V64DImode, operands[0], 0);
+ operands[4] = gcn_operand_part (V64DImode, operands[0], 1);
+ operands[5] = gcn_operand_part (V64DImode, operands[1], 0);
+ operands[6] = gcn_operand_part (V64DImode, operands[1], 1);
+ operands[7] = gcn_operand_part (V64DImode, operands[2], 0);
+ operands[8] = gcn_operand_part (V64DImode, operands[2], 1);
+ }
+ [(set_attr "type" "vmult,ds")
+ (set_attr "length" "16,8")])
+
+(define_insn_and_split "v64di3_exec"
+ [(set (match_operand:V64DI 0 "gcn_valu_dst_operand" "=&v,RD")
+ (vec_merge:V64DI
+ (bitop:V64DI
+ (match_operand:V64DI 1 "gcn_valu_src0_operand" "% v,RD")
+ (match_operand:V64DI 2 "gcn_valu_src1com_operand" "vSvB, v"))
+ (match_operand:V64DI 3 "gcn_register_ds_or_unspec_operand"
+ " U0,U0")
+ (match_operand:DI 4 "gcn_exec_reg_operand" " e, e")))]
+ "!memory_operand (operands[0], VOIDmode)
+ || (rtx_equal_p (operands[0], operands[1])
+ && register_operand (operands[2], VOIDmode))"
+ "@
+ #
+ ds_0\t%A0, %2%O0"
+ "(reload_completed && !gcn_ds_memory_operand (operands[0], V64DImode))"
+ [(set (match_dup 5)
+ (vec_merge:V64SI
+ (bitop:V64SI (match_dup 7) (match_dup 9))
+ (match_dup 11)
+ (match_dup 4)))
+ (set (match_dup 6)
+ (vec_merge:V64SI
+ (bitop:V64SI (match_dup 8) (match_dup 10))
+ (match_dup 12)
+ (match_dup 4)))]
+ {
+ operands[5] = gcn_operand_part (V64DImode, operands[0], 0);
+ operands[6] = gcn_operand_part (V64DImode, operands[0], 1);
+ operands[7] = gcn_operand_part (V64DImode, operands[1], 0);
+ operands[8] = gcn_operand_part (V64DImode, operands[1], 1);
+ operands[9] = gcn_operand_part (V64DImode, operands[2], 0);
+ operands[10] = gcn_operand_part (V64DImode, operands[2], 1);
+ operands[11] = gcn_operand_part (V64DImode, operands[3], 0);
+ operands[12] = gcn_operand_part (V64DImode, operands[3], 1);
+ }
+ [(set_attr "type" "vmult,ds")
+ (set_attr "length" "16,8")])
+
+(define_insn "v64si3"
+ [(set (match_operand:V64SI 0 "register_operand" "= v")
+ (shiftop:V64SI
+ (match_operand:V64SI 1 "gcn_alu_operand" " v")
+ (vec_duplicate:V64SI
+ (match_operand:SI 2 "gcn_alu_operand" "SvB"))))]
+ ""
+ "v_0\t%0, %2, %1"
+ [(set_attr "type" "vop2")
+ (set_attr "length" "8")])
+
+(define_insn "vv64si3"
+ [(set (match_operand:V64SI 0 "register_operand" "=v")
+ (shiftop:V64SI
+ (match_operand:V64SI 1 "gcn_alu_operand" " v")
+ (match_operand:V64SI 2 "gcn_alu_operand" "vB")))]
+ ""
+ "v_0\t%0, %2, %1"
+ [(set_attr "type" "vop2")
+ (set_attr "length" "8")])
+
+(define_insn "3"
+ [(set (match_operand:VEC_1REG_INT_MODE 0 "gcn_valu_dst_operand" "= v,RD")
+ (minmaxop:VEC_1REG_INT_MODE
+ (match_operand:VEC_1REG_INT_MODE 1 "gcn_valu_src0_operand"
+ "% v, 0")
+ (match_operand:VEC_1REG_INT_MODE 2 "gcn_valu_src1com_operand"
+ "vSvB, v")))]
+ ""
+ "@
+ v_0\t%0, %2, %1
+ ds_0\t%A0, %2%O0"
+ [(set_attr "type" "vop2,ds")
+ (set_attr "length" "8,8")])
+
+;; }}}
+;; {{{ FP binops - special cases
+
+; GCN does not directly provide a DFmode subtract instruction, so we do it by
+; adding the negated second operand to the first.
+
+(define_insn "subv64df3"
+ [(set (match_operand:V64DF 0 "register_operand" "= v, v")
+ (minus:V64DF
+ (match_operand:V64DF 1 "gcn_alu_operand" "vSvB, v")
+ (match_operand:V64DF 2 "gcn_alu_operand" " v,vSvB")))]
+ ""
+ "@
+ v_add_f64\t%0, %1, -%2
+ v_add_f64\t%0, -%2, %1"
+ [(set_attr "type" "vop3a")
+ (set_attr "length" "8,8")])
+
+(define_insn "subdf"
+ [(set (match_operand:DF 0 "register_operand" "= v, v")
+ (minus:DF
+ (match_operand:DF 1 "gcn_alu_operand" "vSvB, v")
+ (match_operand:DF 2 "gcn_alu_operand" " v,vSvB")))]
+ ""
+ "@
+ v_add_f64\t%0, %1, -%2
+ v_add_f64\t%0, -%2, %1"
+ [(set_attr "type" "vop3a")
+ (set_attr "length" "8,8")])
+
+;; }}}
+;; {{{ FP binops - generic
+
+(define_mode_iterator VEC_FP_MODE [V64HF V64SF V64DF])
+(define_mode_iterator VEC_FP_1REG_MODE [V64HF V64SF])
+(define_mode_iterator FP_MODE [HF SF DF])
+(define_mode_iterator FP_1REG_MODE [HF SF])
+
+(define_code_iterator comm_fp [plus mult smin smax])
+(define_code_iterator nocomm_fp [minus])
+(define_code_iterator all_fp [plus mult minus smin smax])
+
+(define_insn "3"
+ [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v")
+ (comm_fp:VEC_FP_MODE
+ (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "% v")
+ (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" "vSvB")))]
+ ""
+ "v_0\t%0, %2, %1"
+ [(set_attr "type" "vop2")
+ (set_attr "length" "8")])
+
+(define_insn "3"
+ [(set (match_operand:FP_MODE 0 "gcn_valu_dst_operand" "= v, RL")
+ (comm_fp:FP_MODE
+ (match_operand:FP_MODE 1 "gcn_valu_src0_operand" "% v, 0")
+ (match_operand:FP_MODE 2 "gcn_valu_src1_operand" "vSvB,vSvB")))]
+ ""
+ "@
+ v_0\t%0, %2, %1
+ v_0\t%0, %1%O0"
+ [(set_attr "type" "vop2,ds")
+ (set_attr "length" "8")])
+
+(define_insn "3"
+ [(set (match_operand:VEC_FP_1REG_MODE 0 "register_operand" "= v, v")
+ (nocomm_fp:VEC_FP_1REG_MODE
+ (match_operand:VEC_FP_1REG_MODE 1 "gcn_alu_operand" "vSvB, v")
+ (match_operand:VEC_FP_1REG_MODE 2 "gcn_alu_operand" " v,vSvB")))]
+ ""
+ "@
+ v_0\t%0, %1, %2
+ v_0\t%0, %2, %1"
+ [(set_attr "type" "vop2")
+ (set_attr "length" "8,8")])
+
+(define_insn "3"
+ [(set (match_operand:FP_1REG_MODE 0 "register_operand" "= v, v")
+ (nocomm_fp:FP_1REG_MODE
+ (match_operand:FP_1REG_MODE 1 "gcn_alu_operand" "vSvB, v")
+ (match_operand:FP_1REG_MODE 2 "gcn_alu_operand" " v,vSvB")))]
+ ""
+ "@
+ v_0\t%0, %1, %2
+ v_0\t%0, %2, %1"
+ [(set_attr "type" "vop2")
+ (set_attr "length" "8,8")])
+
+;; }}}
+;; {{{ FP unops
+
+(define_insn "abs2"
+ [(set (match_operand:FP_MODE 0 "register_operand" "=v")
+ (abs:FP_MODE (match_operand:FP_MODE 1 "register_operand" " v")))]
+ ""
+ "v_add%i0\t%0, 0, |%1|"
+ [(set_attr "type" "vop3a")
+ (set_attr "length" "8")])
+
+(define_insn "abs2"
+ [(set (match_operand:VEC_FP_MODE 0 "register_operand" "=v")
+ (abs:VEC_FP_MODE
+ (match_operand:VEC_FP_MODE 1 "register_operand" " v")))]
+ ""
+ "v_add%i0\t%0, 0, |%1|"
+ [(set_attr "type" "vop3a")
+ (set_attr "length" "8")])
+
+(define_insn "neg2"
+ [(set (match_operand:VEC_FP_MODE 0 "register_operand" "=v")
+ (neg:VEC_FP_MODE
+ (match_operand:VEC_FP_MODE 1 "register_operand" " v")))]
+ ""
+ "v_add%i0\t%0, 0, -%1"
+ [(set_attr "type" "vop3a")
+ (set_attr "length" "8")])
+
+(define_insn "sqrt2"
+ [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v")
+ (sqrt:VEC_FP_MODE
+ (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "vSvB")))]
+ "flag_unsafe_math_optimizations"
+ "v_sqrt%i0\t%0, %1"
+ [(set_attr "type" "vop1")
+ (set_attr "length" "8")])
+
+(define_insn "sqrt2"
+ [(set (match_operand:FP_MODE 0 "register_operand" "= v")
+ (sqrt:FP_MODE
+ (match_operand:FP_MODE 1 "gcn_alu_operand" "vSvB")))]
+ "flag_unsafe_math_optimizations"
+ "v_sqrt%i0\t%0, %1"
+ [(set_attr "type" "vop1")
+ (set_attr "length" "8")])
+
+;; }}}
+;; {{{ FP fused multiply and add
+
+(define_insn "fma4"
+ [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v, v")
+ (fma:VEC_FP_MODE
+ (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "% vA, vA")
+ (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" " vA,vSvA")
+ (match_operand:VEC_FP_MODE 3 "gcn_alu_operand" "vSvA, vA")))]
+ ""
+ "v_fma%i0\t%0, %1, %2, %3"
+ [(set_attr "type" "vop3a")
+ (set_attr "length" "8")])
+
+(define_insn "fma4_negop2"
+ [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v, v, v")
+ (fma:VEC_FP_MODE
+ (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" " vA, vA,vSvA")
+ (neg:VEC_FP_MODE
+ (match_operand:VEC_FP_MODE 2 "gcn_alu_operand" " vA,vSvA, vA"))
+ (match_operand:VEC_FP_MODE 3 "gcn_alu_operand" "vSvA, vA, vA")))]
+ ""
+ "v_fma%i0\t%0, %1, -%2, %3"
+ [(set_attr "type" "vop3a")
+ (set_attr "length" "8")])
+
+(define_insn "fma4"
+ [(set (match_operand:FP_MODE 0 "register_operand" "= v, v")
+ (fma:FP_MODE
+ (match_operand:FP_MODE 1 "gcn_alu_operand" "% vA, vA")
+ (match_operand:FP_MODE 2 "gcn_alu_operand" " vA,vSvA")
+ (match_operand:FP_MODE 3 "gcn_alu_operand" "vSvA, vA")))]
+ ""
+ "v_fma%i0\t%0, %1, %2, %3"
+ [(set_attr "type" "vop3a")
+ (set_attr "length" "8")])
+
+(define_insn "fma4_negop2"
+ [(set (match_operand:FP_MODE 0 "register_operand" "= v, v, v")
+ (fma:FP_MODE
+ (match_operand:FP_MODE 1 "gcn_alu_operand" " vA, vA,vSvA")
+ (neg:FP_MODE
+ (match_operand:FP_MODE 2 "gcn_alu_operand" " vA,vSvA, vA"))
+ (match_operand:FP_MODE 3 "gcn_alu_operand" "vSvA, vA, vA")))]
+ ""
+ "v_fma%i0\t%0, %1, -%2, %3"
+ [(set_attr "type" "vop3a")
+ (set_attr "length" "8")])
+
+;; }}}
+;; {{{ FP division
+
+(define_insn "recip2"
+ [(set (match_operand:VEC_FP_MODE 0 "register_operand" "= v")
+ (div:VEC_FP_MODE
+ (vec_duplicate:VEC_FP_MODE (float: (const_int 1)))
+ (match_operand:VEC_FP_MODE 1 "gcn_alu_operand" "vSvB")))]
+ ""
+ "v_rcp%i0\t%0, %1"
+ [(set_attr "type" "vop1")
+ (set_attr "length" "8")])
+
+(define_insn "recip2"
+ [(set (match_operand:FP_MODE 0 "register_operand" "= v")
+ (div:FP_MODE
+ (float:FP_MODE (const_int 1))
+ (match_operand:FP_MODE 1 "gcn_alu_operand" "vSvB")))]
+ ""
+ "v_rcp%i0\t%0, %1"
+ [(set_attr "type" "vop1")
+ (set_attr "length" "8")])
+
+;; Do division via a = b * 1/c
+;; The v_rcp_* instructions are not sufficiently accurate on their own,
+;; so we use 2 v_fma_* instructions to do one round of Newton-Raphson
+;; which the ISA manual says is enough to improve the reciprocal accuracy.
+;;
+;; FIXME: This does not handle denormals, NaNs, division-by-zero etc.
+
+(define_expand "div3"
+ [(match_operand:VEC_FP_MODE 0 "gcn_valu_dst_operand")
+ (match_operand:VEC_FP_MODE 1 "gcn_valu_src0_operand")
+ (match_operand:VEC_FP_MODE 2 "gcn_valu_src0_operand")]
+ "flag_reciprocal_math"
+ {
+ rtx two = gcn_vec_constant (mode,
+ const_double_from_real_value (dconst2, mode));
+ rtx initrcp = gen_reg_rtx (mode);
+ rtx fma = gen_reg_rtx (mode);
+ rtx rcp;
+
+ bool is_rcp = (GET_CODE (operands[1]) == CONST_VECTOR
+ && real_identical
+ (CONST_DOUBLE_REAL_VALUE
+ (CONST_VECTOR_ELT (operands[1], 0)), &dconstm1));
+
+ if (is_rcp)
+ rcp = operands[0];
+ else
+ rcp = gen_reg_rtx (mode);
+
+ emit_insn (gen_recip2 (initrcp, operands[2]));
+ emit_insn (gen_fma4_negop2 (fma, initrcp, operands[2], two));
+ emit_insn (gen_mul3 (rcp, initrcp, fma));
+
+ if (!is_rcp)
+ emit_insn (gen_mul3 (operands[0], operands[1], rcp));
+
+ DONE;
+ })
+
+(define_expand "div3"
+ [(match_operand:FP_MODE 0 "gcn_valu_dst_operand")
+ (match_operand:FP_MODE 1 "gcn_valu_src0_operand")
+ (match_operand:FP_MODE 2 "gcn_valu_src0_operand")]
+ "flag_reciprocal_math"
+ {
+ rtx two = const_double_from_real_value (dconst2, mode);
+ rtx initrcp = gen_reg_rtx (mode);
+ rtx fma = gen_reg_rtx (mode);
+ rtx rcp;
+
+ bool is_rcp = (GET_CODE (operands[1]) == CONST_DOUBLE
+ && real_identical (CONST_DOUBLE_REAL_VALUE (operands[1]),
+ &dconstm1));
+
+ if (is_rcp)
+ rcp = operands[0];
+ else
+ rcp = gen_reg_rtx (mode);
+
+ emit_insn (gen_recip2 (initrcp, operands[2]));
+ emit_insn (gen_fma4_negop2 (fma, initrcp, operands[2], two));
+ emit_insn (gen_mul3 (rcp, initrcp, fma));
+
+ if (!is_rcp)
+ emit_insn (gen_mul3 (operands[0], operands[1], rcp));
+
+ DONE;
+ })
+
+;; }}}
+;; {{{ Int/FP conversions
+
+(define_mode_iterator CVT_FROM_MODE [HI SI HF SF DF])
+(define_mode_iterator CVT_TO_MODE [HI SI HF SF DF])
+
+(define_mode_iterator VCVT_FROM_MODE [V64HI V64SI V64HF V64SF V64DF])
+(define_mode_iterator VCVT_TO_MODE [V64HI V64SI V64HF V64SF V64DF])
+
+(define_code_iterator cvt_op [fix unsigned_fix
+ float unsigned_float
+ float_extend float_truncate])
+(define_code_attr cvt_name [(fix "fix_trunc") (unsigned_fix "fixuns_trunc")
+ (float "float") (unsigned_float "floatuns")
+ (float_extend "extend") (float_truncate "trunc")])
+(define_code_attr cvt_operands [(fix "%i0%i1") (unsigned_fix "%u0%i1")
+ (float "%i0%i1") (unsigned_float "%i0%u1")
+ (float_extend "%i0%i1")
+ (float_truncate "%i0%i1")])
+
+(define_insn "2"
+ [(set (match_operand:CVT_TO_MODE 0 "register_operand" "= v")
+ (cvt_op:CVT_TO_MODE
+ (match_operand:CVT_FROM_MODE 1 "gcn_alu_operand" "vSvB")))]
+ "gcn_valid_cvt_p (mode, mode,
+ _cvt)"
+ "v_cvt\t%0, %1"
+ [(set_attr "type" "vop1")
+ (set_attr "length" "8")])
+
+(define_insn "2"
+ [(set (match_operand:VCVT_TO_MODE 0 "register_operand" "= v")
+ (cvt_op:VCVT_TO_MODE
+ (match_operand:VCVT_FROM_MODE 1 "gcn_alu_operand" "vSvB")))]
+ "gcn_valid_cvt_p (mode, mode,
+ _cvt)"
+ "v_cvt\t%0, %1"
+ [(set_attr "type" "vop1")
+ (set_attr "length" "8")])
+
+;; }}}
+;; {{{ Int/int conversions
+
+;; GCC can already do these for scalar types, but not for vector types.
+;; Unfortunately you can't just do SUBREG on a vector to select the low part,
+;; so there must be a few tricks here.
+
+(define_insn_and_split "vec_truncatev64div64si"
+ [(set (match_operand:V64SI 0 "register_operand" "=v,&v")
+ (truncate:V64SI
+ (match_operand:V64DI 1 "register_operand" " 0, v")))]
+ ""
+ "#"
+ "reload_completed"
+ [(set (match_dup 0) (match_dup 1))]
+ {
+ operands[1] = gcn_operand_part (V64SImode, operands[1], 0);
+ }
+ [(set_attr "type" "vop2")
+ (set_attr "length" "0,4")])
+
+(define_insn_and_split "vec_truncatev64div64si_exec"
+ [(set (match_operand:V64SI 0 "register_operand" "=v,&v")
+ (vec_merge:V64SI
+ (truncate:V64SI
+ (match_operand:V64DI 1 "register_operand" " 0, v"))
+ (match_operand:V64SI 2 "gcn_alu_or_unspec_operand" "U0,U0")
+ (match_operand:DI 3 "gcn_exec_operand" " e, e")))]
+ ""
+ "#"
+ "reload_completed"
+ [(parallel [(set (match_dup 0)
+ (vec_merge:V64SI (match_dup 1) (match_dup 2) (match_dup 3)))
+ (clobber (scratch:V64DI))])]
+ {
+ operands[1] = gcn_operand_part (V64SImode, operands[1], 0);
+ }
+ [(set_attr "type" "vop2")
+ (set_attr "length" "0,4")])
+
+;; }}}
+;; {{{ Vector comparison/merge
+
+(define_insn "vec_cmpdi"
+ [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
+ (match_operator 1 "comparison_operator"
+ [(match_operand:VEC_1REG_MODE 2 "gcn_alu_operand"
+ "vSv, B,vSv, B, v,vA")
+ (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
+ " v, v, v, v,vA, v")]))
+ (clobber (match_scratch:DI 4 "= X, X, cV,cV, X, X"))]
+ ""
+ "@
+ v_cmp%E1\tvcc, %2, %3
+ v_cmp%E1\tvcc, %2, %3
+ v_cmpx%E1\tvcc, %2, %3
+ v_cmpx%E1\tvcc, %2, %3
+ v_cmp%E1\t%0, %2, %3
+ v_cmp%E1\t%0, %2, %3"
+ [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
+ (set_attr "length" "4,8,4,8,8,8")])
+
+(define_expand "vec_cmpudi"
+ [(match_operand:DI 0 "register_operand")
+ (match_operator 1 "comparison_operator"
+ [(match_operand:VEC_1REG_INT_MODE 2 "gcn_alu_operand")
+ (match_operand:VEC_1REG_INT_MODE 3 "gcn_vop3_operand")])]
+ ""
+ {
+ /* Unsigned comparisons use the same patterns as signed comparisons,
+ except that they use unsigned operators (e.g. LTU vs LT).
+ The '%E1' directive then does the Right Thing. */
+ emit_insn (gen_vec_cmpdi (operands[0], operands[1], operands[2],
+ operands[3]));
+ DONE;
+ })
+
+(define_insn "vec_cmpdi_exec"
+ [(set (match_operand:DI 0 "register_operand" "=cV,cV, e, e,Sg,Sg")
+ (and:DI
+ (match_operator 1 "comparison_operator"
+ [(match_operand:VEC_1REG_MODE 2 "gcn_alu_operand"
+ "vSv, B,vSv, B, v,vA")
+ (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
+ " v, v, v, v,vA, v")])
+ (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e, e, e, e")))
+ (clobber (match_scratch:DI 5 "= X, X, cV,cV, X, X"))]
+ ""
+ "@
+ v_cmp%E1\tvcc, %2, %3
+ v_cmp%E1\tvcc, %2, %3
+ v_cmpx%E1\tvcc, %2, %3
+ v_cmpx%E1\tvcc, %2, %3
+ v_cmp%E1\t%0, %2, %3
+ v_cmp%E1\t%0, %2, %3"
+ [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a,vop3a")
+ (set_attr "length" "4,8,4,8,8,8")])
+
+(define_insn "vec_cmpdi_dup"
+ [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
+ (match_operator 1 "comparison_operator"
+ [(vec_duplicate:VEC_1REG_MODE
+ (match_operand: 2 "gcn_alu_operand"
+ " Sv, B,Sv,B, A"))
+ (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
+ " v, v, v,v, v")]))
+ (clobber (match_scratch:DI 4 "= X,X,cV,cV, X"))]
+ ""
+ "@
+ v_cmp%E1\tvcc, %2, %3
+ v_cmp%E1\tvcc, %2, %3
+ v_cmpx%E1\tvcc, %2, %3
+ v_cmpx%E1\tvcc, %2, %3
+ v_cmp%E1\t%0, %2, %3"
+ [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
+ (set_attr "length" "4,8,4,8,8")])
+
+(define_insn "vec_cmpdi_dup_exec"
+ [(set (match_operand:DI 0 "register_operand" "=cV,cV, e,e,Sg")
+ (and:DI
+ (match_operator 1 "comparison_operator"
+ [(vec_duplicate:VEC_1REG_MODE
+ (match_operand: 2 "gcn_alu_operand"
+ " Sv, B,Sv,B, A"))
+ (match_operand:VEC_1REG_MODE 3 "gcn_vop3_operand"
+ " v, v, v,v, v")])
+ (match_operand:DI 4 "gcn_exec_reg_operand" " e, e, e,e, e")))
+ (clobber (match_scratch:DI 5 "= X,X,cV,cV, X"))]
+ ""
+ "@
+ v_cmp%E1\tvcc, %2, %3
+ v_cmp%E1\tvcc, %2, %3
+ v_cmpx%E1\tvcc, %2, %3
+ v_cmpx%E1\tvcc, %2, %3
+ v_cmp%E1\t%0, %2, %3"
+ [(set_attr "type" "vopc,vopc,vopc,vopc,vop3a")
+ (set_attr "length" "4,8,4,8,8")])
+
+(define_expand "vcond_mask_di"
+ [(parallel
+ [(set (match_operand:VEC_REG_MODE 0 "register_operand" "")
+ (vec_merge:VEC_REG_MODE
+ (match_operand:VEC_REG_MODE 1 "gcn_vop3_operand" "")
+ (match_operand:VEC_REG_MODE 2 "gcn_alu_operand" "")
+ (match_operand:DI 3 "register_operand" "")))
+ (clobber (scratch:V64DI))])]
+ ""
+ "")
+
+(define_expand "vcond"
+ [(match_operand:VEC_1REG_MODE 0 "register_operand")
+ (match_operand:VEC_1REG_MODE 1 "gcn_vop3_operand")
+ (match_operand:VEC_1REG_MODE 2 "gcn_alu_operand")
+ (match_operator 3 "comparison_operator"
+ [(match_operand:VEC_1REG_ALT 4 "gcn_alu_operand")
+ (match_operand:VEC_1REG_ALT 5 "gcn_vop3_operand")])]
+ ""
+ {
+ rtx tmp = gen_reg_rtx (DImode);
+ emit_insn (gen_vec_cmpdi (tmp, operands[3], operands[4],
+ operands[5]));
+ emit_insn (gen_vcond_mask_di (operands[0], operands[1], operands[2],
+ tmp));
+ DONE;
+ })
+
+(define_expand "vcond_exec"
+ [(match_operand:VEC_1REG_MODE 0 "register_operand")
+ (match_operand:VEC_1REG_MODE 1 "gcn_vop3_operand")
+ (match_operand:VEC_1REG_MODE 2 "gcn_alu_operand")
+ (match_operator 3 "comparison_operator"
+ [(match_operand:VEC_1REG_ALT 4 "gcn_alu_operand")
+ (match_operand:VEC_1REG_ALT 5 "gcn_vop3_operand")])
+ (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
+ ""
+ {
+ rtx tmp = gen_reg_rtx (DImode);
+ emit_insn (gen_vec_cmpdi_exec (tmp, operands[3], operands[4],
+ operands[5], operands[6]));
+ emit_insn (gen_vcond_mask_di (operands[0], operands[1], operands[2],
+ tmp));
+ DONE;
+ })
+
+(define_expand "vcondu"
+ [(match_operand:VEC_1REG_INT_MODE 0 "register_operand")
+ (match_operand:VEC_1REG_INT_MODE 1 "gcn_vop3_operand")
+ (match_operand:VEC_1REG_INT_MODE 2 "gcn_alu_operand")
+ (match_operator 3 "comparison_operator"
+ [(match_operand:VEC_1REG_INT_ALT 4 "gcn_alu_operand")
+ (match_operand:VEC_1REG_INT_ALT 5 "gcn_vop3_operand")])]
+ ""
+ {
+ rtx tmp = gen_reg_rtx (DImode);
+ emit_insn (gen_vec_cmpdi (tmp, operands[3], operands[4],
+ operands[5]));
+ emit_insn (gen_vcond_mask_di (operands[0], operands[1], operands[2],
+ tmp));
+ DONE;
+ })
+
+(define_expand "vcondu_exec"
+ [(match_operand:VEC_1REG_INT_MODE 0 "register_operand")
+ (match_operand:VEC_1REG_INT_MODE 1 "gcn_vop3_operand")
+ (match_operand:VEC_1REG_INT_MODE 2 "gcn_alu_operand")
+ (match_operator 3 "comparison_operator"
+ [(match_operand:VEC_1REG_INT_ALT 4 "gcn_alu_operand")
+ (match_operand:VEC_1REG_INT_ALT 5 "gcn_vop3_operand")])
+ (match_operand:DI 6 "gcn_exec_reg_operand" "e")]
+ ""
+ {
+ rtx tmp = gen_reg_rtx (DImode);
+ emit_insn (gen_vec_cmpdi_exec (tmp, operands[3], operands[4],
+ operands[5], operands[6]));
+ emit_insn (gen_vcond_mask_di (operands[0], operands[1], operands[2],
+ tmp));
+ DONE;
+ })
+
+;; }}}
+;; {{{ Fully masked loop support
+
+(define_expand "while_ultsidi"
+ [(match_operand:DI 0 "register_operand")
+ (match_operand:SI 1 "")
+ (match_operand:SI 2 "")]
+ ""
+ {
+ if (GET_CODE (operands[1]) != CONST_INT
+ || GET_CODE (operands[2]) != CONST_INT)
+ {
+ rtx _0_1_2_3 = gen_rtx_REG (V64SImode, VGPR_REGNO (1));
+ rtx tmp = _0_1_2_3;
+ if (GET_CODE (operands[1]) != CONST_INT
+ || INTVAL (operands[1]) != 0)
+ {
+ tmp = gen_reg_rtx (V64SImode);
+ emit_insn (gen_addv64si3_dup (tmp, _0_1_2_3, operands[1]));
+ }
+ emit_insn (gen_vec_cmpv64sidi_dup (operands[0],
+ gen_rtx_GT (VOIDmode, 0, 0),
+ operands[2], tmp));
+ }
+ else
+ {
+ HOST_WIDE_INT diff = INTVAL (operands[2]) - INTVAL (operands[1]);
+ HOST_WIDE_INT mask = (diff >= 64 ? -1
+ : ~((unsigned HOST_WIDE_INT)-1 << diff));
+ emit_move_insn (operands[0], gen_rtx_CONST_INT (VOIDmode, mask));
+ }
+ DONE;
+ })
+
+(define_expand "maskloaddi"
+ [(match_operand:VEC_REG_MODE 0 "register_operand")
+ (match_operand:VEC_REG_MODE 1 "memory_operand")
+ (match_operand 2 "")]
+ ""
+ {
+ rtx exec = force_reg (DImode, operands[2]);
+ rtx addr = gcn_expand_scalar_to_vector_address
+ (mode, exec, operands[1], gen_rtx_SCRATCH (V64DImode));
+ rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
+ rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
+ rtx undef = gcn_gen_undef (mode);
+ emit_insn (gen_gather_expr_exec (operands[0], addr, as, v, undef,
+ exec));
+ DONE;
+ })
+
+(define_expand "maskstoredi"
+ [(match_operand:VEC_REG_MODE 0 "memory_operand")
+ (match_operand:VEC_REG_MODE 1 "register_operand")
+ (match_operand 2 "")]
+ ""
+ {
+ rtx exec = force_reg (DImode, operands[2]);
+ rtx addr = gcn_expand_scalar_to_vector_address
+ (mode, exec, operands[0], gen_rtx_SCRATCH (V64DImode));
+ rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[0]));
+ rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[0]));
+ emit_insn (gen_scatter_expr_exec (addr, operands[1], as, v, exec));
+ DONE;
+ })
+
+(define_expand "mask_gather_load"
+ [(match_operand:VEC_REG_MODE 0 "register_operand")
+ (match_operand:DI 1 "register_operand")
+ (match_operand 2 "register_operand")
+ (match_operand 3 "immediate_operand")
+ (match_operand:SI 4 "gcn_alu_operand")
+ (match_operand:DI 5 "")]
+ ""
+ {
+ rtx exec = force_reg (DImode, operands[5]);
+
+ /* TODO: more conversions will be needed when more types are vectorized. */
+ if (GET_MODE (operands[2]) == V64DImode)
+ {
+ rtx tmp = gen_reg_rtx (V64SImode);
+ emit_insn (gen_vec_truncatev64div64si_exec (tmp, operands[2],
+ gcn_gen_undef (V64SImode),
+ exec));
+ operands[2] = tmp;
+ }
+
+ emit_insn (gen_gather_exec (operands[0], operands[1], operands[2],
+ operands[3], operands[4], exec));
+ DONE;
+ })
+
+(define_expand "mask_scatter_store"
+ [(match_operand:DI 0 "register_operand")
+ (match_operand 1 "register_operand")
+ (match_operand 2 "immediate_operand")
+ (match_operand:SI 3 "gcn_alu_operand")
+ (match_operand:VEC_REG_MODE 4 "register_operand")
+ (match_operand:DI 5 "")]
+ ""
+ {
+ rtx exec = force_reg (DImode, operands[5]);
+
+ /* TODO: more conversions will be needed when more types are vectorized. */
+ if (GET_MODE (operands[1]) == V64DImode)
+ {
+ rtx tmp = gen_reg_rtx (V64SImode);
+ emit_insn (gen_vec_truncatev64div64si_exec (tmp, operands[1],
+ gcn_gen_undef (V64SImode),
+ exec));
+ operands[1] = tmp;
+ }
+
+ emit_insn (gen_scatter_exec (operands[0], operands[1], operands[2],
+ operands[3], operands[4], exec));
+ DONE;
+ })
+
+; FIXME this should be VEC_REG_MODE, but not all dependencies are implemented.
+(define_mode_iterator COND_MODE [V64SI V64DI V64SF V64DF])
+(define_mode_iterator COND_INT_MODE [V64SI V64DI])
+
+(define_code_iterator cond_op [plus minus])
+
+(define_expand "cond_"
+ [(match_operand:COND_MODE 0 "register_operand")
+ (match_operand:DI 1 "register_operand")
+ (cond_op:COND_MODE
+ (match_operand:COND_MODE 2 "gcn_alu_operand")
+ (match_operand:COND_MODE 3 "gcn_alu_operand"))
+ (match_operand:COND_MODE 4 "register_operand")]
+ ""
+ {
+ operands[1] = force_reg (DImode, operands[1]);
+ operands[2] = force_reg (mode, operands[2]);
+
+ emit_insn (gen_3_exec (operands[0], operands[2],
+ operands[3], operands[4],
+ operands[1]));
+ DONE;
+ })
+
+(define_code_iterator cond_bitop [and ior xor])
+
+(define_expand "cond_