From 67c4f47e3f3be277fd291c393b8d3bcec6770fa9 Mon Sep 17 00:00:00 2001 From: Sebastian Pop Date: Wed, 2 Dec 2009 15:39:15 -0600 Subject: [PATCH] Fix FMA4 and XOP insns. 2009-12-02 Sebastian Pop Richard Henderson * config/i386/i386-protos.h (ix86_fma4_valid_op_p): Removed. * config/i386/i386.c (ix86_fma4_valid_op_p): Removed. * config/i386/i386.md: Do not use ix86_fma4_valid_op_p. * config/i386/sse.md (fma4_*): Remove alternative with operand 1 matching a memory access. Do not use ix86_fma4_valid_op_p. (xop_*): Same. Do not use ix86_fma4_valid_op_p in FMA4 and XOP splitters. --- gcc/config/i386/i386-protos.h | 3 +- gcc/config/i386/i386.c | 200 +---------- gcc/config/i386/i386.md | 2 +- gcc/config/i386/sse.md | 794 ++++++++++++++++++----------------------- 4 files changed, 374 insertions(+), 625 deletions(-) diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index bb55da1..cf29cc7 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -218,8 +218,7 @@ extern void ix86_expand_vector_set (bool, rtx, rtx, int); extern void ix86_expand_vector_extract (bool, rtx, rtx, int); extern void ix86_expand_reduc_v4sf (rtx (*)(rtx, rtx, rtx), rtx, rtx); -extern bool ix86_fma4_valid_op_p (rtx [], rtx, int, bool, int, bool); -extern void ix86_expand_fma4_multiple_memory (rtx [], enum machine_mode); +extern bool ix86_expand_fma4_multiple_memory (rtx [], enum machine_mode); extern void ix86_expand_vec_extract_even_odd (rtx, rtx, rtx, unsigned); diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 436e935..ade3a7d 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -28807,197 +28807,35 @@ ix86_expand_round (rtx operand0, rtx operand1) emit_move_insn (operand0, res); } -/* Validate whether a FMA4 instruction is valid or not. - OPERANDS is the array of operands. - NUM is the number of operands. - USES_OC0 is true if the instruction uses OC0 and provides 4 variants. - NUM_MEMORY is the maximum number of memory operands to accept. - NUM_MEMORY less than zero is a special case to allow an operand - of an instruction to be memory operation. - when COMMUTATIVE is set, operand 1 and 2 can be swapped. */ -bool -ix86_fma4_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num, - bool uses_oc0, int num_memory, bool commutative) -{ - int mem_mask; - int mem_count; - int i; - - /* Count the number of memory arguments */ - mem_mask = 0; - mem_count = 0; - for (i = 0; i < num; i++) - { - enum machine_mode mode = GET_MODE (operands[i]); - if (register_operand (operands[i], mode)) - ; - - else if (memory_operand (operands[i], mode)) - { - mem_mask |= (1 << i); - mem_count++; - } - - else - { - rtx pattern = PATTERN (insn); - - /* allow 0 for pcmov */ - if (GET_CODE (pattern) != SET - || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE - || i < 2 - || operands[i] != CONST0_RTX (mode)) - return false; - } - } - - /* Special case pmacsdq{l,h} where we allow the 3rd argument to be - a memory operation. */ - if (num_memory < 0) - { - num_memory = -num_memory; - if ((mem_mask & (1 << (num-1))) != 0) - { - mem_mask &= ~(1 << (num-1)); - mem_count--; - } - } - - /* If there were no memory operations, allow the insn */ - if (mem_mask == 0) - return true; - - /* Do not allow the destination register to be a memory operand. */ - else if (mem_mask & (1 << 0)) - return false; - - /* If there are too many memory operations, disallow the instruction. While - the hardware only allows 1 memory reference, before register allocation - for some insns, we allow two memory operations sometimes in order to allow - code like the following to be optimized: +/* Fixup an FMA4 or XOP instruction that has 2 memory input references + into a form the hardware will allow by using the destination + register to load one of the memory operations. Presently this is + used by the multiply/add routines to allow 2 memory references. */ - float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; } - - or similar cases that are vectorized into using the vfmaddss - instruction. */ - else if (mem_count > num_memory) - return false; - - /* Don't allow more than one memory operation if not optimizing. */ - else if (mem_count > 1 && !optimize) - return false; - - else if (num == 4 && mem_count == 1) - { - /* formats (destination is the first argument), example vfmaddss: - xmm1, xmm1, xmm2, xmm3/mem - xmm1, xmm1, xmm2/mem, xmm3 - xmm1, xmm2, xmm3/mem, xmm1 - xmm1, xmm2/mem, xmm3, xmm1 */ - if (uses_oc0) - return ((mem_mask == (1 << 1)) - || (mem_mask == (1 << 2)) - || (mem_mask == (1 << 3))); - - /* format, example vpmacsdd: - xmm1, xmm2, xmm3/mem, xmm1 */ - if (commutative) - return (mem_mask == (1 << 2) || mem_mask == (1 << 1)); - else - return (mem_mask == (1 << 2)); - } - - else if (num == 4 && num_memory == 2) - { - /* If there are two memory operations, we can load one of the memory ops - into the destination register. This is for optimizing the - multiply/add ops, which the combiner has optimized both the multiply - and the add insns to have a memory operation. We have to be careful - that the destination doesn't overlap with the inputs. */ - rtx op0 = operands[0]; - - if (reg_mentioned_p (op0, operands[1]) - || reg_mentioned_p (op0, operands[2]) - || reg_mentioned_p (op0, operands[3])) - return false; - - /* formats (destination is the first argument), example vfmaddss: - xmm1, xmm1, xmm2, xmm3/mem - xmm1, xmm1, xmm2/mem, xmm3 - xmm1, xmm2, xmm3/mem, xmm1 - xmm1, xmm2/mem, xmm3, xmm1 - - For the oc0 case, we will load either operands[1] or operands[3] into - operands[0], so any combination of 2 memory operands is ok. */ - if (uses_oc0) - return true; - - /* format, example vpmacsdd: - xmm1, xmm2, xmm3/mem, xmm1 - - For the integer multiply/add instructions be more restrictive and - require operands[2] and operands[3] to be the memory operands. */ - if (commutative) - return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3))); - else - return (mem_mask == ((1 << 2) | (1 << 3))); - } - - else if (num == 3 && num_memory == 1) - { - /* formats, example vprotb: - xmm1, xmm2, xmm3/mem - xmm1, xmm2/mem, xmm3 */ - if (uses_oc0) - return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2))); - - /* format, example vpcomeq: - xmm1, xmm2, xmm3/mem */ - else - return (mem_mask == (1 << 2)); - } - - else - gcc_unreachable (); - - return false; -} - - -/* Fixup an FMA4 instruction that has 2 memory input references into a form the - hardware will allow by using the destination register to load one of the - memory operations. Presently this is used by the multiply/add routines to - allow 2 memory references. */ - -void +bool ix86_expand_fma4_multiple_memory (rtx operands[], enum machine_mode mode) { - rtx op0 = operands[0]; + rtx scratch = operands[0]; - if (memory_operand (op0, mode) - || reg_mentioned_p (op0, operands[1]) - || reg_mentioned_p (op0, operands[2]) - || reg_mentioned_p (op0, operands[3])) - gcc_unreachable (); + gcc_assert (register_operand (operands[0], mode)); + gcc_assert (register_operand (operands[1], mode)); + gcc_assert (MEM_P (operands[2]) && MEM_P (operands[3])); - /* For 2 memory operands, pick either operands[1] or operands[3] to move into - the destination register. */ - if (memory_operand (operands[1], mode)) + if (reg_mentioned_p (scratch, operands[1])) { - emit_move_insn (op0, operands[1]); - operands[1] = op0; - } - else if (memory_operand (operands[3], mode)) - { - emit_move_insn (op0, operands[3]); - operands[3] = op0; + if (!can_create_pseudo_p ()) + return false; + scratch = gen_reg_rtx (mode); } - else - gcc_unreachable (); - return; + emit_move_insn (scratch, operands[3]); + if (rtx_equal_p (operands[2], operands[3])) + operands[2] = operands[3] = scratch; + else + operands[3] = scratch; + return true; } /* Table of valid machine attributes. */ diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 851061d..1ef3025 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -19248,7 +19248,7 @@ (match_operand:MODEF 1 "register_operand" "x") (match_operand:MODEF 2 "register_operand" "x") (match_operand:MODEF 3 "register_operand" "x")))] - "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)" + "TARGET_XOP" "vpcmov\t{%1, %3, %2, %0|%0, %2, %3, %1}" [(set_attr "type" "sse4arg")]) diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 4899c0a..78e4b6a 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -1703,14 +1703,13 @@ ;; (set (reg3) (plus (reg2) (mem (addr3)))) (define_insn "fma4_fmadd4256" - [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x,x") + [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x") (plus:FMA4MODEF4 (mult:FMA4MODEF4 - (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x,xm") - (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm,x")) - (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x,x")))] - "TARGET_FMA4 - && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)" + (match_operand:FMA4MODEF4 1 "register_operand" "%x,x") + (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")) + (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))] + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) @@ -1720,34 +1719,29 @@ [(set (match_operand:FMA4MODEF4 0 "register_operand" "") (plus:FMA4MODEF4 (mult:FMA4MODEF4 - (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "") - (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "")) - (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "")))] - "TARGET_FMA4 - && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true) - && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true) - && !reg_mentioned_p (operands[0], operands[1]) - && !reg_mentioned_p (operands[0], operands[2]) - && !reg_mentioned_p (operands[0], operands[3])" - [(const_int 0)] + (match_operand:FMA4MODEF4 1 "register_operand" "") + (match_operand:FMA4MODEF4 2 "memory_operand" "")) + (match_operand:FMA4MODEF4 3 "memory_operand" "")))] + "TARGET_FMA4" + [(set (match_dup 0) + (plus:FMA4MODEF4 + (mult:FMA4MODEF4 (match_dup 1) (match_dup 2)) + (match_dup 3)))] { - ix86_expand_fma4_multiple_memory (operands, mode); - emit_insn (gen_fma4_fmadd4256 (operands[0], operands[1], - operands[2], operands[3])); - DONE; + if (!ix86_expand_fma4_multiple_memory (operands, mode)) + FAIL; }) ;; Floating multiply and subtract ;; Allow two memory operands the same as fmadd (define_insn "fma4_fmsub4256" - [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x,x") + [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x") (minus:FMA4MODEF4 (mult:FMA4MODEF4 - (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x,xm") - (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm,x")) - (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x,x")))] - "TARGET_FMA4 - && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)" + (match_operand:FMA4MODEF4 1 "register_operand" "%x,x") + (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")) + (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))] + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) @@ -1757,21 +1751,17 @@ [(set (match_operand:FMA4MODEF4 0 "register_operand" "") (minus:FMA4MODEF4 (mult:FMA4MODEF4 - (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "") - (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "")) - (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "")))] - "TARGET_FMA4 - && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true) - && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true) - && !reg_mentioned_p (operands[0], operands[1]) - && !reg_mentioned_p (operands[0], operands[2]) - && !reg_mentioned_p (operands[0], operands[3])" - [(const_int 0)] + (match_operand:FMA4MODEF4 1 "register_operand" "") + (match_operand:FMA4MODEF4 2 "memory_operand" "")) + (match_operand:FMA4MODEF4 3 "memory_operand" "")))] + "TARGET_FMA4" + [(set (match_dup 0) + (minus:FMA4MODEF4 + (mult:FMA4MODEF4 (match_dup 1) (match_dup 2)) + (match_dup 3)))] { - ix86_expand_fma4_multiple_memory (operands, mode); - emit_insn (gen_fma4_fmsub4256 (operands[0], operands[1], - operands[2], operands[3])); - DONE; + if (!ix86_expand_fma4_multiple_memory (operands, mode)) + FAIL; }) ;; Floating point negative multiply and add @@ -1779,14 +1769,13 @@ ;; Note operands are out of order to simplify call to ix86_fma4_valid_p ;; Allow two memory operands to help in optimizing. (define_insn "fma4_fnmadd4256" - [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x,x") + [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x") (minus:FMA4MODEF4 - (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x,x") + (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x") (mult:FMA4MODEF4 - (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x,xm") - (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm,x"))))] - "TARGET_FMA4 - && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)" + (match_operand:FMA4MODEF4 1 "register_operand" "%x,x") + (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))))] + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfnmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) @@ -1795,22 +1784,18 @@ (define_split [(set (match_operand:FMA4MODEF4 0 "register_operand" "") (minus:FMA4MODEF4 - (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "") + (match_operand:FMA4MODEF4 3 "memory_operand" "") (mult:FMA4MODEF4 - (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "") - (match_operand:FMA4MODEF4 2 "nonimmediate_operand" ""))))] - "TARGET_FMA4 - && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true) - && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true) - && !reg_mentioned_p (operands[0], operands[1]) - && !reg_mentioned_p (operands[0], operands[2]) - && !reg_mentioned_p (operands[0], operands[3])" - [(const_int 0)] + (match_operand:FMA4MODEF4 1 "register_operand" "") + (match_operand:FMA4MODEF4 2 "memory_operand" ""))))] + "TARGET_FMA4" + [(set (match_dup 0) + (minus:FMA4MODEF4 + (match_dup 3) + (mult:FMA4MODEF4 (match_dup 1) (match_dup 2))))] { - ix86_expand_fma4_multiple_memory (operands, mode); - emit_insn (gen_fma4_fnmadd4256 (operands[0], operands[1], - operands[2], operands[3])); - DONE; + if (!ix86_expand_fma4_multiple_memory (operands, mode)) + FAIL; }) ;; Floating point negative multiply and subtract @@ -1821,11 +1806,10 @@ (minus:FMA4MODEF4 (mult:FMA4MODEF4 (neg:FMA4MODEF4 - (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x")) - (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm")) + (match_operand:FMA4MODEF4 1 "register_operand" "%x,x")) + (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")) (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))] - "TARGET_FMA4 - && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, false)" + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfnmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) @@ -1836,33 +1820,30 @@ (minus:FMA4MODEF4 (mult:FMA4MODEF4 (neg:FMA4MODEF4 - (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "")) - (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "")) - (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "")))] - "TARGET_FMA4 - && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false) - && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, false) - && !reg_mentioned_p (operands[0], operands[1]) - && !reg_mentioned_p (operands[0], operands[2]) - && !reg_mentioned_p (operands[0], operands[3])" - [(const_int 0)] + (match_operand:FMA4MODEF4 1 "register_operand" "")) + (match_operand:FMA4MODEF4 2 "memory_operand" "")) + (match_operand:FMA4MODEF4 3 "memory_operand" "")))] + "TARGET_FMA4" + [(set (match_dup 0) + (minus:FMA4MODEF4 + (mult:FMA4MODEF4 + (neg:FMA4MODEF4 (match_dup 1)) + (match_dup 2)) + (match_dup 3)))] { - ix86_expand_fma4_multiple_memory (operands, mode); - emit_insn (gen_fma4_fnmsub4256 (operands[0], operands[1], - operands[2], operands[3])); - DONE; + if (!ix86_expand_fma4_multiple_memory (operands, mode)) + FAIL; }) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define_insn "fma4_fmadd4" - [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x") + [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x") (plus:SSEMODEF4 (mult:SSEMODEF4 - (match_operand:SSEMODEF4 1 "nonimmediate_operand" "x,x,xm") - (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,x")) - (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,x")))] - "TARGET_FMA4 - && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)" + (match_operand:SSEMODEF4 1 "register_operand" "%x,x") + (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m")) + (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))] + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) @@ -1872,21 +1853,17 @@ [(set (match_operand:SSEMODEF4 0 "register_operand" "") (plus:SSEMODEF4 (mult:SSEMODEF4 - (match_operand:SSEMODEF4 1 "nonimmediate_operand" "") - (match_operand:SSEMODEF4 2 "nonimmediate_operand" "")) - (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))] - "TARGET_FMA4 - && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true) - && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true) - && !reg_mentioned_p (operands[0], operands[1]) - && !reg_mentioned_p (operands[0], operands[2]) - && !reg_mentioned_p (operands[0], operands[3])" - [(const_int 0)] + (match_operand:SSEMODEF4 1 "register_operand" "") + (match_operand:SSEMODEF4 2 "memory_operand" "")) + (match_operand:SSEMODEF4 3 "memory_operand" "")))] + "TARGET_FMA4" + [(set (match_dup 0) + (plus:SSEMODEF4 + (mult:SSEMODEF4 (match_dup 1) (match_dup 2)) + (match_dup 3)))] { - ix86_expand_fma4_multiple_memory (operands, mode); - emit_insn (gen_fma4_fmadd4 (operands[0], operands[1], - operands[2], operands[3])); - DONE; + if (!ix86_expand_fma4_multiple_memory (operands, mode)) + FAIL; }) ;; For the scalar operations, use operand1 for the upper words that aren't @@ -1897,13 +1874,12 @@ (vec_merge:SSEMODEF2P (plus:SSEMODEF2P (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) + (match_operand:SSEMODEF2P 1 "register_operand" "%x,x") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) (match_dup 0) (const_int 1)))] - "TARGET_FMA4 - && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)" + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) @@ -1911,14 +1887,13 @@ ;; Floating multiply and subtract ;; Allow two memory operands the same as fmadd (define_insn "fma4_fmsub4" - [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x") + [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x") (minus:SSEMODEF4 (mult:SSEMODEF4 - (match_operand:SSEMODEF4 1 "nonimmediate_operand" "x,x,xm") - (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,x")) - (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,x")))] - "TARGET_FMA4 - && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)" + (match_operand:SSEMODEF4 1 "register_operand" "%x,x") + (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m")) + (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))] + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) @@ -1928,21 +1903,17 @@ [(set (match_operand:SSEMODEF4 0 "register_operand" "") (minus:SSEMODEF4 (mult:SSEMODEF4 - (match_operand:SSEMODEF4 1 "nonimmediate_operand" "") - (match_operand:SSEMODEF4 2 "nonimmediate_operand" "")) - (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))] - "TARGET_FMA4 - && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true) - && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true) - && !reg_mentioned_p (operands[0], operands[1]) - && !reg_mentioned_p (operands[0], operands[2]) - && !reg_mentioned_p (operands[0], operands[3])" - [(const_int 0)] + (match_operand:SSEMODEF4 1 "register_operand" "") + (match_operand:SSEMODEF4 2 "memory_operand" "")) + (match_operand:SSEMODEF4 3 "memory_operand" "")))] + "TARGET_FMA4" + [(set (match_dup 0) + (minus:SSEMODEF4 + (mult:SSEMODEF4 (match_dup 1) (match_dup 2)) + (match_dup 3)))] { - ix86_expand_fma4_multiple_memory (operands, mode); - emit_insn (gen_fma4_fmsub4 (operands[0], operands[1], - operands[2], operands[3])); - DONE; + if (!ix86_expand_fma4_multiple_memory (operands, mode)) + FAIL; }) ;; For the scalar operations, use operand1 for the upper words that aren't @@ -1953,13 +1924,12 @@ (vec_merge:SSEMODEF2P (minus:SSEMODEF2P (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) + (match_operand:SSEMODEF2P 1 "register_operand" "%x,x") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) (match_dup 0) (const_int 1)))] - "TARGET_FMA4 - && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)" + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) @@ -1969,14 +1939,13 @@ ;; Note operands are out of order to simplify call to ix86_fma4_valid_p ;; Allow two memory operands to help in optimizing. (define_insn "fma4_fnmadd4" - [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x") + [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x") (minus:SSEMODEF4 - (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,x") + (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x") (mult:SSEMODEF4 - (match_operand:SSEMODEF4 1 "nonimmediate_operand" "x,x,xm") - (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,x"))))] - "TARGET_FMA4 - && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)" + (match_operand:SSEMODEF4 1 "register_operand" "%x,x") + (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))))] + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfnmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) @@ -1985,22 +1954,18 @@ (define_split [(set (match_operand:SSEMODEF4 0 "register_operand" "") (minus:SSEMODEF4 - (match_operand:SSEMODEF4 3 "nonimmediate_operand" "") + (match_operand:SSEMODEF4 3 "memory_operand" "") (mult:SSEMODEF4 - (match_operand:SSEMODEF4 1 "nonimmediate_operand" "") - (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))] - "TARGET_FMA4 - && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true) - && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true) - && !reg_mentioned_p (operands[0], operands[1]) - && !reg_mentioned_p (operands[0], operands[2]) - && !reg_mentioned_p (operands[0], operands[3])" - [(const_int 0)] + (match_operand:SSEMODEF4 1 "register_operand" "") + (match_operand:SSEMODEF4 2 "memory_operand" ""))))] + "TARGET_FMA4" + [(set (match_dup 0) + (minus:SSEMODEF4 + (match_dup 3) + (mult:SSEMODEF4 (match_dup 1) (match_dup 2))))] { - ix86_expand_fma4_multiple_memory (operands, mode); - emit_insn (gen_fma4_fnmadd4 (operands[0], operands[1], - operands[2], operands[3])); - DONE; + if (!ix86_expand_fma4_multiple_memory (operands, mode)) + FAIL; }) ;; For the scalar operations, use operand1 for the upper words that aren't @@ -2012,12 +1977,11 @@ (minus:SSEMODEF2P (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x") (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))) + (match_operand:SSEMODEF2P 1 "register_operand" "%x,x") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))) (match_dup 0) (const_int 1)))] - "TARGET_FMA4 - && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)" + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfnmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) @@ -2030,11 +1994,10 @@ (minus:SSEMODEF4 (mult:SSEMODEF4 (neg:SSEMODEF4 - (match_operand:SSEMODEF4 1 "nonimmediate_operand" "x,x")) - (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm")) + (match_operand:SSEMODEF4 1 "register_operand" "%x,x")) + (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m")) (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))] - "TARGET_FMA4 - && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, false)" + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfnmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) @@ -2045,21 +2008,19 @@ (minus:SSEMODEF4 (mult:SSEMODEF4 (neg:SSEMODEF4 - (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")) - (match_operand:SSEMODEF4 2 "nonimmediate_operand" "")) - (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))] - "TARGET_FMA4 - && !ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false) - && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, false) - && !reg_mentioned_p (operands[0], operands[1]) - && !reg_mentioned_p (operands[0], operands[2]) - && !reg_mentioned_p (operands[0], operands[3])" - [(const_int 0)] + (match_operand:SSEMODEF4 1 "register_operand" "")) + (match_operand:SSEMODEF4 2 "memory_operand" "")) + (match_operand:SSEMODEF4 3 "memory_operand" "")))] + "TARGET_FMA4" + [(set (match_dup 0) + (minus:SSEMODEF4 + (mult:SSEMODEF4 + (neg:SSEMODEF4 (match_dup 1)) + (match_dup 2)) + (match_dup 3)))] { - ix86_expand_fma4_multiple_memory (operands, mode); - emit_insn (gen_fma4_fnmsub4 (operands[0], operands[1], - operands[2], operands[3])); - DONE; + if (!ix86_expand_fma4_multiple_memory (operands, mode)) + FAIL; }) ;; For the scalar operations, use operand1 for the upper words that aren't @@ -2071,13 +2032,12 @@ (minus:SSEMODEF2P (mult:SSEMODEF2P (neg:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")) - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) + (match_operand:SSEMODEF2P 1 "register_operand" "%x,x")) + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) (match_dup 0) (const_int 1)))] - "TARGET_FMA4 - && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, false)" + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfnmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) @@ -2089,11 +2049,11 @@ (unspec:FMA4MODEF4 [(plus:FMA4MODEF4 (mult:FMA4MODEF4 - (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x") - (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm")) + (match_operand:FMA4MODEF4 1 "register_operand" "%x,x") + (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")) (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))] UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)" + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) @@ -2103,11 +2063,11 @@ (unspec:FMA4MODEF4 [(minus:FMA4MODEF4 (mult:FMA4MODEF4 - (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x") - (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm")) + (match_operand:FMA4MODEF4 1 "register_operand" "%x,x") + (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")) (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))] UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)" + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) @@ -2118,10 +2078,10 @@ [(minus:FMA4MODEF4 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x") (mult:FMA4MODEF4 - (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x") - (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm")))] + (match_operand:FMA4MODEF4 1 "register_operand" "%x,x") + (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")))] UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)" + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfnmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) @@ -2132,11 +2092,11 @@ [(minus:FMA4MODEF4 (mult:FMA4MODEF4 (neg:FMA4MODEF4 - (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "x,x")) - (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,xm")) + (match_operand:FMA4MODEF4 1 "register_operand" "%x,x")) + (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")) (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))] UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)" + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfnmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) @@ -2147,11 +2107,11 @@ (unspec:SSEMODEF2P [(plus:SSEMODEF2P (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) + (match_operand:SSEMODEF2P 1 "register_operand" "%x,x") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))] UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)" + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) @@ -2161,11 +2121,11 @@ (unspec:SSEMODEF2P [(minus:SSEMODEF2P (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) + (match_operand:SSEMODEF2P 1 "register_operand" "%x,x") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))] UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)" + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) @@ -2176,10 +2136,10 @@ [(minus:SSEMODEF2P (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x") (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")))] + (match_operand:SSEMODEF2P 1 "register_operand" "%x,x") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))] UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)" + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfnmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) @@ -2190,11 +2150,11 @@ [(minus:SSEMODEF2P (mult:SSEMODEF2P (neg:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x")) - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) + (match_operand:SSEMODEF2P 1 "register_operand" "%x,x")) + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))] UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)" + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfnmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) @@ -2207,13 +2167,13 @@ [(vec_merge:SSEMODEF2P (plus:SSEMODEF2P (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "x,x") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) + (match_operand:SSEMODEF2P 1 "register_operand" "%x,x") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) (match_dup 0) (const_int 1))] UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)" + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) @@ -2224,13 +2184,13 @@ [(vec_merge:SSEMODEF2P (minus:SSEMODEF2P (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "x,x") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) + (match_operand:SSEMODEF2P 1 "register_operand" "%x,x") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) (match_dup 0) (const_int 1))] UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)" + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) @@ -2242,12 +2202,12 @@ (minus:SSEMODEF2P (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x") (mult:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "x,x") - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))) + (match_operand:SSEMODEF2P 1 "register_operand" "%x,x") + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))) (match_dup 0) (const_int 1))] UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, true)" + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfnmadd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) @@ -2259,13 +2219,13 @@ (minus:SSEMODEF2P (mult:SSEMODEF2P (neg:SSEMODEF2P - (match_operand:SSEMODEF2P 1 "register_operand" "x,x")) - (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) + (match_operand:SSEMODEF2P 1 "register_operand" "%x,x")) + (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) (match_dup 0) (const_int 1))] UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4 && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)" + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfnmsub\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "")]) @@ -2281,8 +2241,8 @@ (vec_merge:V8SF (plus:V8SF (mult:V8SF - (match_operand:V8SF 1 "nonimmediate_operand" "x,x") - (match_operand:V8SF 2 "nonimmediate_operand" "x,xm")) + (match_operand:V8SF 1 "register_operand" "%x,x") + (match_operand:V8SF 2 "nonimmediate_operand" "x,m")) (match_operand:V8SF 3 "nonimmediate_operand" "xm,x")) (minus:V8SF (mult:V8SF @@ -2290,8 +2250,7 @@ (match_dup 2)) (match_dup 3)) (const_int 170)))] - "TARGET_FMA4 - && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)" + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "V8SF")]) @@ -2301,8 +2260,8 @@ (vec_merge:V4DF (plus:V4DF (mult:V4DF - (match_operand:V4DF 1 "nonimmediate_operand" "x,x") - (match_operand:V4DF 2 "nonimmediate_operand" "x,xm")) + (match_operand:V4DF 1 "register_operand" "%x,x") + (match_operand:V4DF 2 "nonimmediate_operand" "x,m")) (match_operand:V4DF 3 "nonimmediate_operand" "xm,x")) (minus:V4DF (mult:V4DF @@ -2310,8 +2269,7 @@ (match_dup 2)) (match_dup 3)) (const_int 10)))] - "TARGET_FMA4 - && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)" + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "V4DF")]) @@ -2321,8 +2279,8 @@ (vec_merge:V4SF (plus:V4SF (mult:V4SF - (match_operand:V4SF 1 "nonimmediate_operand" "x,x") - (match_operand:V4SF 2 "nonimmediate_operand" "x,xm")) + (match_operand:V4SF 1 "register_operand" "%x,x") + (match_operand:V4SF 2 "nonimmediate_operand" "x,m")) (match_operand:V4SF 3 "nonimmediate_operand" "xm,x")) (minus:V4SF (mult:V4SF @@ -2330,8 +2288,7 @@ (match_dup 2)) (match_dup 3)) (const_int 10)))] - "TARGET_FMA4 - && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)" + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "V4SF")]) @@ -2341,8 +2298,8 @@ (vec_merge:V2DF (plus:V2DF (mult:V2DF - (match_operand:V2DF 1 "nonimmediate_operand" "x,x") - (match_operand:V2DF 2 "nonimmediate_operand" "x,xm")) + (match_operand:V2DF 1 "register_operand" "%x,x") + (match_operand:V2DF 2 "nonimmediate_operand" "x,m")) (match_operand:V2DF 3 "nonimmediate_operand" "xm,x")) (minus:V2DF (mult:V2DF @@ -2350,8 +2307,7 @@ (match_dup 2)) (match_dup 3)) (const_int 2)))] - "TARGET_FMA4 - && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)" + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "V2DF")]) @@ -2361,8 +2317,8 @@ (vec_merge:V8SF (plus:V8SF (mult:V8SF - (match_operand:V8SF 1 "nonimmediate_operand" "x,x") - (match_operand:V8SF 2 "nonimmediate_operand" "x,xm")) + (match_operand:V8SF 1 "register_operand" "%x,x") + (match_operand:V8SF 2 "nonimmediate_operand" "x,m")) (match_operand:V8SF 3 "nonimmediate_operand" "xm,x")) (minus:V8SF (mult:V8SF @@ -2370,8 +2326,7 @@ (match_dup 2)) (match_dup 3)) (const_int 85)))] - "TARGET_FMA4 - && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)" + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "V8SF")]) @@ -2381,8 +2336,8 @@ (vec_merge:V4DF (plus:V4DF (mult:V4DF - (match_operand:V4DF 1 "nonimmediate_operand" "x,x") - (match_operand:V4DF 2 "nonimmediate_operand" "x,xm")) + (match_operand:V4DF 1 "register_operand" "%x,x") + (match_operand:V4DF 2 "nonimmediate_operand" "x,m")) (match_operand:V4DF 3 "nonimmediate_operand" "xm,x")) (minus:V4DF (mult:V4DF @@ -2390,8 +2345,7 @@ (match_dup 2)) (match_dup 3)) (const_int 5)))] - "TARGET_FMA4 - && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)" + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "V4DF")]) @@ -2401,8 +2355,8 @@ (vec_merge:V4SF (plus:V4SF (mult:V4SF - (match_operand:V4SF 1 "nonimmediate_operand" "x,x") - (match_operand:V4SF 2 "nonimmediate_operand" "x,xm")) + (match_operand:V4SF 1 "register_operand" "%x,x") + (match_operand:V4SF 2 "nonimmediate_operand" "x,m")) (match_operand:V4SF 3 "nonimmediate_operand" "xm,x")) (minus:V4SF (mult:V4SF @@ -2410,8 +2364,7 @@ (match_dup 2)) (match_dup 3)) (const_int 5)))] - "TARGET_FMA4 - && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)" + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "V4SF")]) @@ -2421,8 +2374,8 @@ (vec_merge:V2DF (plus:V2DF (mult:V2DF - (match_operand:V2DF 1 "nonimmediate_operand" "x,x") - (match_operand:V2DF 2 "nonimmediate_operand" "x,xm")) + (match_operand:V2DF 1 "register_operand" "%x,x") + (match_operand:V2DF 2 "nonimmediate_operand" "x,m")) (match_operand:V2DF 3 "nonimmediate_operand" "xm,x")) (minus:V2DF (mult:V2DF @@ -2430,8 +2383,7 @@ (match_dup 2)) (match_dup 3)) (const_int 1)))] - "TARGET_FMA4 - && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)" + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "V2DF")]) @@ -2444,8 +2396,8 @@ [(vec_merge:V8SF (plus:V8SF (mult:V8SF - (match_operand:V8SF 1 "nonimmediate_operand" "x,x") - (match_operand:V8SF 2 "nonimmediate_operand" "x,xm")) + (match_operand:V8SF 1 "register_operand" "%x,x") + (match_operand:V8SF 2 "nonimmediate_operand" "x,m")) (match_operand:V8SF 3 "nonimmediate_operand" "xm,x")) (minus:V8SF (mult:V8SF @@ -2454,8 +2406,7 @@ (match_dup 3)) (const_int 170))] UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4 - && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)" + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "V8SF")]) @@ -2466,8 +2417,8 @@ [(vec_merge:V4DF (plus:V4DF (mult:V4DF - (match_operand:V4DF 1 "nonimmediate_operand" "x,x") - (match_operand:V4DF 2 "nonimmediate_operand" "x,xm")) + (match_operand:V4DF 1 "register_operand" "%x,x") + (match_operand:V4DF 2 "nonimmediate_operand" "x,m")) (match_operand:V4DF 3 "nonimmediate_operand" "xm,x")) (minus:V4DF (mult:V4DF @@ -2476,8 +2427,7 @@ (match_dup 3)) (const_int 10))] UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4 - && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)" + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "V4DF")]) @@ -2488,8 +2438,8 @@ [(vec_merge:V4SF (plus:V4SF (mult:V4SF - (match_operand:V4SF 1 "nonimmediate_operand" "x,x") - (match_operand:V4SF 2 "nonimmediate_operand" "x,xm")) + (match_operand:V4SF 1 "register_operand" "%x,x") + (match_operand:V4SF 2 "nonimmediate_operand" "x,m")) (match_operand:V4SF 3 "nonimmediate_operand" "xm,x")) (minus:V4SF (mult:V4SF @@ -2498,8 +2448,7 @@ (match_dup 3)) (const_int 10))] UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4 - && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)" + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "V4SF")]) @@ -2510,8 +2459,8 @@ [(vec_merge:V2DF (plus:V2DF (mult:V2DF - (match_operand:V2DF 1 "nonimmediate_operand" "x,x") - (match_operand:V2DF 2 "nonimmediate_operand" "x,xm")) + (match_operand:V2DF 1 "register_operand" "%x,x") + (match_operand:V2DF 2 "nonimmediate_operand" "x,m")) (match_operand:V2DF 3 "nonimmediate_operand" "xm,x")) (minus:V2DF (mult:V2DF @@ -2520,8 +2469,7 @@ (match_dup 3)) (const_int 2))] UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4 - && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)" + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "V2DF")]) @@ -2532,8 +2480,8 @@ [(vec_merge:V8SF (plus:V8SF (mult:V8SF - (match_operand:V8SF 1 "nonimmediate_operand" "x,x") - (match_operand:V8SF 2 "nonimmediate_operand" "x,xm")) + (match_operand:V8SF 1 "register_operand" "%x,x") + (match_operand:V8SF 2 "nonimmediate_operand" "x,m")) (match_operand:V8SF 3 "nonimmediate_operand" "xm,x")) (minus:V8SF (mult:V8SF @@ -2542,8 +2490,7 @@ (match_dup 3)) (const_int 85))] UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4 - && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)" + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "V8SF")]) @@ -2554,8 +2501,8 @@ [(vec_merge:V4DF (plus:V4DF (mult:V4DF - (match_operand:V4DF 1 "nonimmediate_operand" "x,x") - (match_operand:V4DF 2 "nonimmediate_operand" "x,xm")) + (match_operand:V4DF 1 "register_operand" "%x,x") + (match_operand:V4DF 2 "nonimmediate_operand" "x,m")) (match_operand:V4DF 3 "nonimmediate_operand" "xm,x")) (minus:V4DF (mult:V4DF @@ -2564,8 +2511,7 @@ (match_dup 3)) (const_int 5))] UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4 - && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)" + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "V4DF")]) @@ -2576,8 +2522,8 @@ [(vec_merge:V4SF (plus:V4SF (mult:V4SF - (match_operand:V4SF 1 "nonimmediate_operand" "x,x") - (match_operand:V4SF 2 "nonimmediate_operand" "x,xm")) + (match_operand:V4SF 1 "register_operand" "%x,x") + (match_operand:V4SF 2 "nonimmediate_operand" "x,m")) (match_operand:V4SF 3 "nonimmediate_operand" "xm,x")) (minus:V4SF (mult:V4SF @@ -2586,8 +2532,7 @@ (match_dup 3)) (const_int 5))] UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4 - && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)" + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "V4SF")]) @@ -2598,8 +2543,8 @@ [(vec_merge:V2DF (plus:V2DF (mult:V2DF - (match_operand:V2DF 1 "nonimmediate_operand" "x,x") - (match_operand:V2DF 2 "nonimmediate_operand" "x,xm")) + (match_operand:V2DF 1 "register_operand" "%x,x") + (match_operand:V2DF 2 "nonimmediate_operand" "x,m")) (match_operand:V2DF 3 "nonimmediate_operand" "xm,x")) (minus:V2DF (mult:V2DF @@ -2608,8 +2553,7 @@ (match_dup 3)) (const_int 1))] UNSPEC_FMA4_INTRINSIC))] - "TARGET_FMA4 - && ix86_fma4_valid_op_p (operands, insn, 4, true, 2, true)" + "TARGET_FMA4 && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "V2DF")]) @@ -10356,16 +10300,14 @@ ;; that it does and splitting it later allows the following to be recognized: ;; a[i] = b[i] * c[i] + d[i]; (define_insn "xop_pmacsww" - [(set (match_operand:V8HI 0 "register_operand" "=x,x") + [(set (match_operand:V8HI 0 "register_operand" "=x") (plus:V8HI (mult:V8HI - (match_operand:V8HI 1 "nonimmediate_operand" "%x,m") - (match_operand:V8HI 2 "nonimmediate_operand" "xm,x")) - (match_operand:V8HI 3 "register_operand" "x,x")))] - "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)" - "@ - vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3} - vpmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}" + (match_operand:V8HI 1 "register_operand" "%x") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")) + (match_operand:V8HI 3 "register_operand" "x")))] + "TARGET_XOP" + "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) @@ -10373,33 +10315,27 @@ (define_split [(set (match_operand:V8HI 0 "register_operand" "") (plus:V8HI - (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "") - (match_operand:V8HI 2 "nonimmediate_operand" "")) - (match_operand:V8HI 3 "nonimmediate_operand" "")))] - "TARGET_XOP - && !ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true) - && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true) - && !reg_mentioned_p (operands[0], operands[1]) - && !reg_mentioned_p (operands[0], operands[2]) - && !reg_mentioned_p (operands[0], operands[3])" - [(const_int 0)] + (mult:V8HI (match_operand:V8HI 1 "register_operand" "") + (match_operand:V8HI 2 "memory_operand" "")) + (match_operand:V8HI 3 "memory_operand" "")))] + "TARGET_XOP" + [(set (match_dup 0) + (plus:V8HI + (mult:V8HI (match_dup 1) (match_dup 2)) + (match_dup 3)))] { - ix86_expand_fma4_multiple_memory (operands, V8HImode); - emit_insn (gen_xop_pmacsww (operands[0], operands[1], operands[2], - operands[3])); - DONE; + if (!ix86_expand_fma4_multiple_memory (operands, V8HImode)) + FAIL; }) (define_insn "xop_pmacssww" - [(set (match_operand:V8HI 0 "register_operand" "=x,x") + [(set (match_operand:V8HI 0 "register_operand" "=x") (ss_plus:V8HI - (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,m") - (match_operand:V8HI 2 "nonimmediate_operand" "xm,x")) - (match_operand:V8HI 3 "register_operand" "x,x")))] - "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)" - "@ - vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3} - vpmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}" + (mult:V8HI (match_operand:V8HI 1 "register_operand" "%x") + (match_operand:V8HI 2 "nonimmediate_operand" "xm")) + (match_operand:V8HI 3 "register_operand" "x")))] + "TARGET_XOP" + "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) @@ -10408,16 +10344,14 @@ ;; that it does and splitting it later allows the following to be recognized: ;; a[i] = b[i] * c[i] + d[i]; (define_insn "xop_pmacsdd" - [(set (match_operand:V4SI 0 "register_operand" "=x,x") + [(set (match_operand:V4SI 0 "register_operand" "=x") (plus:V4SI (mult:V4SI - (match_operand:V4SI 1 "nonimmediate_operand" "%x,m") - (match_operand:V4SI 2 "nonimmediate_operand" "xm,x")) - (match_operand:V4SI 3 "register_operand" "x,x")))] - "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true)" - "@ - vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3} - vpmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}" + (match_operand:V4SI 1 "register_operand" "%x") + (match_operand:V4SI 2 "nonimmediate_operand" "xm")) + (match_operand:V4SI 3 "register_operand" "x")))] + "TARGET_XOP" + "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) @@ -10425,117 +10359,105 @@ (define_split [(set (match_operand:V4SI 0 "register_operand" "") (plus:V4SI - (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "") - (match_operand:V4SI 2 "nonimmediate_operand" "")) - (match_operand:V4SI 3 "nonimmediate_operand" "")))] - "TARGET_XOP - && !ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true) - && ix86_fma4_valid_op_p (operands, insn, 4, false, 2, true) - && !reg_mentioned_p (operands[0], operands[1]) - && !reg_mentioned_p (operands[0], operands[2]) - && !reg_mentioned_p (operands[0], operands[3])" - [(const_int 0)] + (mult:V4SI (match_operand:V4SI 1 "register_operand" "") + (match_operand:V4SI 2 "memory_operand" "")) + (match_operand:V4SI 3 "memory_operand" "")))] + "TARGET_XOP" + [(set (match_dup 0) + (plus:V4SI + (mult:V4SI (match_dup 1) (match_dup 2)) + (match_dup 3)))] { - ix86_expand_fma4_multiple_memory (operands, V4SImode); - emit_insn (gen_xop_pmacsdd (operands[0], operands[1], operands[2], - operands[3])); - DONE; + if (!ix86_expand_fma4_multiple_memory (operands, V4SImode)) + FAIL; }) (define_insn "xop_pmacssdd" - [(set (match_operand:V4SI 0 "register_operand" "=x,x") + [(set (match_operand:V4SI 0 "register_operand" "=x") (ss_plus:V4SI - (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,m") - (match_operand:V4SI 2 "nonimmediate_operand" "xm,x")) - (match_operand:V4SI 3 "register_operand" "x,x")))] - "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)" - "@ - vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3} - vpmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}" + (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x") + (match_operand:V4SI 2 "nonimmediate_operand" "xm")) + (match_operand:V4SI 3 "register_operand" "x")))] + "TARGET_XOP" + "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) (define_insn "xop_pmacssdql" - [(set (match_operand:V2DI 0 "register_operand" "=x,x") + [(set (match_operand:V2DI 0 "register_operand" "=x") (ss_plus:V2DI (mult:V2DI (sign_extend:V2DI (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "%x,m") + (match_operand:V4SI 1 "register_operand" "%x") (parallel [(const_int 1) (const_int 3)]))) (vec_select:V2SI - (match_operand:V4SI 2 "nonimmediate_operand" "xm,x") + (match_operand:V4SI 2 "nonimmediate_operand" "xm") (parallel [(const_int 1) (const_int 3)]))) - (match_operand:V2DI 3 "register_operand" "x,x")))] - "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)" - "@ - vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3} - vpmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}" + (match_operand:V2DI 3 "register_operand" "x")))] + "TARGET_XOP" + "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) (define_insn "xop_pmacssdqh" - [(set (match_operand:V2DI 0 "register_operand" "=x,x") + [(set (match_operand:V2DI 0 "register_operand" "=x") (ss_plus:V2DI (mult:V2DI (sign_extend:V2DI (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "%x,m") + (match_operand:V4SI 1 "register_operand" "%x") (parallel [(const_int 0) (const_int 2)]))) (sign_extend:V2DI (vec_select:V2SI - (match_operand:V4SI 2 "nonimmediate_operand" "xm,x") + (match_operand:V4SI 2 "nonimmediate_operand" "xm") (parallel [(const_int 0) (const_int 2)])))) - (match_operand:V2DI 3 "register_operand" "x,x")))] - "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)" - "@ - vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3} - vpmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}" + (match_operand:V2DI 3 "register_operand" "x")))] + "TARGET_XOP" + "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) (define_insn "xop_pmacsdql" - [(set (match_operand:V2DI 0 "register_operand" "=x,x") + [(set (match_operand:V2DI 0 "register_operand" "=x") (plus:V2DI (mult:V2DI (sign_extend:V2DI (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "%x,m") + (match_operand:V4SI 1 "register_operand" "%x") (parallel [(const_int 1) (const_int 3)]))) (sign_extend:V2DI (vec_select:V2SI - (match_operand:V4SI 2 "nonimmediate_operand" "xm,x") + (match_operand:V4SI 2 "nonimmediate_operand" "xm") (parallel [(const_int 1) (const_int 3)])))) - (match_operand:V2DI 3 "register_operand" "x,x")))] - "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)" - "@ - vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3} - vpmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}" + (match_operand:V2DI 3 "register_operand" "x")))] + "TARGET_XOP" + "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) (define_insn_and_split "*xop_pmacsdql_mem" - [(set (match_operand:V2DI 0 "register_operand" "=&x,&x") + [(set (match_operand:V2DI 0 "register_operand" "=&x") (plus:V2DI (mult:V2DI (sign_extend:V2DI (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "%x,m") + (match_operand:V4SI 1 "register_operand" "%x") (parallel [(const_int 1) (const_int 3)]))) (sign_extend:V2DI (vec_select:V2SI - (match_operand:V4SI 2 "nonimmediate_operand" "xm,x") + (match_operand:V4SI 2 "nonimmediate_operand" "xm") (parallel [(const_int 1) (const_int 3)])))) - (match_operand:V2DI 3 "memory_operand" "m,m")))] - "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, -1, true)" + (match_operand:V2DI 3 "memory_operand" "m")))] + "TARGET_XOP" "#" "&& reload_completed" [(set (match_dup 0) @@ -10564,7 +10486,7 @@ (mult:V2DI (sign_extend:V2DI (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "%x") + (match_operand:V4SI 1 "register_operand" "%x") (parallel [(const_int 1) (const_int 3)]))) (sign_extend:V2DI @@ -10598,43 +10520,41 @@ (set_attr "mode" "TI")]) (define_insn "xop_pmacsdqh" - [(set (match_operand:V2DI 0 "register_operand" "=x,x") + [(set (match_operand:V2DI 0 "register_operand" "=x") (plus:V2DI (mult:V2DI (sign_extend:V2DI (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "%x,m") + (match_operand:V4SI 1 "register_operand" "%x") (parallel [(const_int 0) (const_int 2)]))) (sign_extend:V2DI (vec_select:V2SI - (match_operand:V4SI 2 "nonimmediate_operand" "xm,x") + (match_operand:V4SI 2 "nonimmediate_operand" "xm") (parallel [(const_int 0) (const_int 2)])))) - (match_operand:V2DI 3 "register_operand" "x,x")))] - "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)" - "@ - vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3} - vpmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}" + (match_operand:V2DI 3 "register_operand" "x")))] + "TARGET_XOP" + "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) (define_insn_and_split "*xop_pmacsdqh_mem" - [(set (match_operand:V2DI 0 "register_operand" "=&x,&x") + [(set (match_operand:V2DI 0 "register_operand" "=&x") (plus:V2DI (mult:V2DI (sign_extend:V2DI (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "%x,m") + (match_operand:V4SI 1 "register_operand" "%x") (parallel [(const_int 0) (const_int 2)]))) (sign_extend:V2DI (vec_select:V2SI - (match_operand:V4SI 2 "nonimmediate_operand" "xm,x") + (match_operand:V4SI 2 "nonimmediate_operand" "xm") (parallel [(const_int 0) (const_int 2)])))) - (match_operand:V2DI 3 "memory_operand" "m,m")))] - "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, -1, true)" + (match_operand:V2DI 3 "memory_operand" "m")))] + "TARGET_XOP" "#" "&& reload_completed" [(set (match_dup 0) @@ -10663,7 +10583,7 @@ (mult:V2DI (sign_extend:V2DI (vec_select:V2SI - (match_operand:V4SI 1 "nonimmediate_operand" "%x") + (match_operand:V4SI 1 "register_operand" "%x") (parallel [(const_int 0) (const_int 2)]))) (sign_extend:V2DI @@ -10698,72 +10618,68 @@ ;; XOP parallel integer multiply/add instructions for the intrinisics (define_insn "xop_pmacsswd" - [(set (match_operand:V4SI 0 "register_operand" "=x,x") + [(set (match_operand:V4SI 0 "register_operand" "=x") (ss_plus:V4SI (mult:V4SI (sign_extend:V4SI (vec_select:V4HI - (match_operand:V8HI 1 "nonimmediate_operand" "%x,m") + (match_operand:V8HI 1 "register_operand" "%x") (parallel [(const_int 1) (const_int 3) (const_int 5) (const_int 7)]))) (sign_extend:V4SI (vec_select:V4HI - (match_operand:V8HI 2 "nonimmediate_operand" "xm,x") + (match_operand:V8HI 2 "nonimmediate_operand" "xm") (parallel [(const_int 1) (const_int 3) (const_int 5) (const_int 7)])))) - (match_operand:V4SI 3 "register_operand" "x,x")))] - "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)" - "@ - vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3} - vpmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}" + (match_operand:V4SI 3 "register_operand" "x")))] + "TARGET_XOP" + "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) (define_insn "xop_pmacswd" - [(set (match_operand:V4SI 0 "register_operand" "=x,x") + [(set (match_operand:V4SI 0 "register_operand" "=x") (plus:V4SI (mult:V4SI (sign_extend:V4SI (vec_select:V4HI - (match_operand:V8HI 1 "nonimmediate_operand" "%x,m") + (match_operand:V8HI 1 "register_operand" "%x") (parallel [(const_int 1) (const_int 3) (const_int 5) (const_int 7)]))) (sign_extend:V4SI (vec_select:V4HI - (match_operand:V8HI 2 "nonimmediate_operand" "xm,x") + (match_operand:V8HI 2 "nonimmediate_operand" "xm") (parallel [(const_int 1) (const_int 3) (const_int 5) (const_int 7)])))) - (match_operand:V4SI 3 "register_operand" "x,x")))] - "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)" - "@ - vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3} - vpmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}" + (match_operand:V4SI 3 "register_operand" "x")))] + "TARGET_XOP" + "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) (define_insn "xop_pmadcsswd" - [(set (match_operand:V4SI 0 "register_operand" "=x,x") + [(set (match_operand:V4SI 0 "register_operand" "=x") (ss_plus:V4SI (plus:V4SI (mult:V4SI (sign_extend:V4SI (vec_select:V4HI - (match_operand:V8HI 1 "nonimmediate_operand" "%x,m") + (match_operand:V8HI 1 "register_operand" "%x") (parallel [(const_int 0) (const_int 2) (const_int 4) (const_int 6)]))) (sign_extend:V4SI (vec_select:V4HI - (match_operand:V8HI 2 "nonimmediate_operand" "xm,x") + (match_operand:V8HI 2 "nonimmediate_operand" "xm") (parallel [(const_int 0) (const_int 2) (const_int 4) @@ -10783,29 +10699,27 @@ (const_int 3) (const_int 5) (const_int 7)]))))) - (match_operand:V4SI 3 "register_operand" "x,x")))] - "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)" - "@ - vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3} - vpmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}" + (match_operand:V4SI 3 "register_operand" "x")))] + "TARGET_XOP" + "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) (define_insn "xop_pmadcswd" - [(set (match_operand:V4SI 0 "register_operand" "=x,x") + [(set (match_operand:V4SI 0 "register_operand" "=x") (plus:V4SI (plus:V4SI (mult:V4SI (sign_extend:V4SI (vec_select:V4HI - (match_operand:V8HI 1 "nonimmediate_operand" "%x,m") + (match_operand:V8HI 1 "register_operand" "%x") (parallel [(const_int 0) (const_int 2) (const_int 4) (const_int 6)]))) (sign_extend:V4SI (vec_select:V4HI - (match_operand:V8HI 2 "nonimmediate_operand" "xm,x") + (match_operand:V8HI 2 "nonimmediate_operand" "xm") (parallel [(const_int 0) (const_int 2) (const_int 4) @@ -10825,32 +10739,30 @@ (const_int 3) (const_int 5) (const_int 7)]))))) - (match_operand:V4SI 3 "register_operand" "x,x")))] - "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, false, 1, true)" - "@ - vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3} - vpmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}" + (match_operand:V4SI 3 "register_operand" "x")))] + "TARGET_XOP" + "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "mode" "TI")]) ;; XOP parallel XMM conditional moves (define_insn "xop_pcmov_" - [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x") + [(set (match_operand:SSEMODE 0 "register_operand" "=x,x") (if_then_else:SSEMODE - (match_operand:SSEMODE 3 "nonimmediate_operand" "x,x,m") - (match_operand:SSEMODE 1 "vector_move_operand" "x,m,x") - (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x")))] - "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)" + (match_operand:SSEMODE 3 "nonimmediate_operand" "x,m") + (match_operand:SSEMODE 1 "vector_move_operand" "x,x") + (match_operand:SSEMODE 2 "vector_move_operand" "xm,x")))] + "TARGET_XOP" "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "sse4arg")]) (define_insn "xop_pcmov_256" - [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x") + [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x") (if_then_else:AVX256MODE - (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,x,m") - (match_operand:AVX256MODE 1 "vector_move_operand" "x,m,x") - (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x,x")))] - "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)" + (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,m") + (match_operand:AVX256MODE 1 "vector_move_operand" "x,x") + (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x")))] + "TARGET_XOP" "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "sse4arg")]) @@ -11296,53 +11208,53 @@ ;; XOP permute instructions (define_insn "xop_pperm" - [(set (match_operand:V16QI 0 "register_operand" "=x,x,x") + [(set (match_operand:V16QI 0 "register_operand" "=x,x") (unspec:V16QI - [(match_operand:V16QI 1 "nonimmediate_operand" "x,x,m") - (match_operand:V16QI 2 "nonimmediate_operand" "x,m,x") - (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x")] + [(match_operand:V16QI 1 "register_operand" "x,x") + (match_operand:V16QI 2 "nonimmediate_operand" "x,m") + (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")] UNSPEC_XOP_PERMUTE))] - "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)" + "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "sse4arg") (set_attr "mode" "TI")]) ;; XOP pack instructions that combine two vectors into a smaller vector (define_insn "xop_pperm_pack_v2di_v4si" - [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") + [(set (match_operand:V4SI 0 "register_operand" "=x,x") (vec_concat:V4SI (truncate:V2SI - (match_operand:V2DI 1 "nonimmediate_operand" "x,x,m")) + (match_operand:V2DI 1 "register_operand" "x,x")) (truncate:V2SI - (match_operand:V2DI 2 "nonimmediate_operand" "x,m,x")))) - (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x"))] - "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)" + (match_operand:V2DI 2 "nonimmediate_operand" "x,m")))) + (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))] + "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "sse4arg") (set_attr "mode" "TI")]) (define_insn "xop_pperm_pack_v4si_v8hi" - [(set (match_operand:V8HI 0 "register_operand" "=x,x,x") + [(set (match_operand:V8HI 0 "register_operand" "=x,x") (vec_concat:V8HI (truncate:V4HI - (match_operand:V4SI 1 "nonimmediate_operand" "x,x,m")) + (match_operand:V4SI 1 "register_operand" "x,x")) (truncate:V4HI - (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")))) - (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x"))] - "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)" + (match_operand:V4SI 2 "nonimmediate_operand" "x,m")))) + (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))] + "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "sse4arg") (set_attr "mode" "TI")]) (define_insn "xop_pperm_pack_v8hi_v16qi" - [(set (match_operand:V16QI 0 "register_operand" "=x,x,x") + [(set (match_operand:V16QI 0 "register_operand" "=x,x") (vec_concat:V16QI (truncate:V8QI - (match_operand:V8HI 1 "nonimmediate_operand" "x,x,m")) + (match_operand:V8HI 1 "register_operand" "x,x")) (truncate:V8QI - (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")))) - (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,x"))] - "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 4, true, 1, false)" + (match_operand:V8HI 2 "nonimmediate_operand" "x,m")))) + (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))] + "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))" "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" [(set_attr "type" "sse4arg") (set_attr "mode" "TI")]) @@ -11471,7 +11383,7 @@ (rotatert:SSEMODE1248 (match_dup 1) (neg:SSEMODE1248 (match_dup 2)))))] - "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 3, true, 1, false)" + "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "vprot\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseishft") (set_attr "prefix_data16" "0") @@ -11526,7 +11438,7 @@ (ashiftrt:SSEMODE1248 (match_dup 1) (neg:SSEMODE1248 (match_dup 2)))))] - "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 3, true, 1, false)" + "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "vpsha\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseishft") (set_attr "prefix_data16" "0") @@ -11545,7 +11457,7 @@ (lshiftrt:SSEMODE1248 (match_dup 1) (neg:SSEMODE1248 (match_dup 2)))))] - "TARGET_XOP && ix86_fma4_valid_op_p (operands, insn, 3, true, 1, false)" + "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))" "vpshl\t{%2, %1, %0|%0, %1, %2}" [(set_attr "type" "sseishft") (set_attr "prefix_data16" "0") -- 1.6.0.4