From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 7980 invoked by alias); 4 Feb 2003 19:45:08 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 7747 invoked from network); 4 Feb 2003 19:45:05 -0000 Received: from unknown (HELO nikam.ms.mff.cuni.cz) (195.113.18.106) by 172.16.49.205 with SMTP; 4 Feb 2003 19:45:05 -0000 Received: from camelot.ms.mff.cuni.cz (kampanus.ms.mff.cuni.cz [195.113.18.107]) by nikam.ms.mff.cuni.cz (Postfix) with SMTP id B800C4DE6F; Tue, 4 Feb 2003 20:45:06 +0100 (CET) Received: by camelot.ms.mff.cuni.cz (sSMTP sendmail emulation); Tue, 4 Feb 2003 20:45:06 +0100 Date: Tue, 04 Feb 2003 19:45:00 -0000 From: Jan Hubicka To: gcc-patches@gcc.gnu.org, rth@cygnus.com Subject: SSE/MMX moves optimization Message-ID: <20030204194506.GG8259@kam.mff.cuni.cz> Mime-Version: 1.0 Content-Type: text/plain; charset=us-ascii Content-Disposition: inline User-Agent: Mutt/1.3.28i X-SW-Source: 2003-02/txt/msg00230.txt.bz2 Hi, Athlon and K8 does not like moves from SSE to integer and vice versa. It is faster to do trought memory. This patch implements it and fixes some dead ends in the patterns I've noticed. Bootstrapped/regtested x86-64. OK? Tue Feb 4 20:27:24 CET 2003 Jan Hubicka * i386.c (x86_inter_unit_moves): New variable. (ix86_secondary_memory_needed): Fix 64bit case, honor TARGET_INTER_UNIT_MOVES * i386.h (x86_inter_unit_moves): Declare. (TARGET_INTER_UNIT_MOVES): New macro. * i386.md (movsi_1): Cleanup constraints; disable when not doing inter-unit moves. (movsi_1_nointernunit): New. (movdi_1_rex64): Fix constraints; deal with SSE->GPR moves. (movdi_1_rex64_nointerunit): New. (mivsf_1): disable when not doing inter-unit moves. (movsf_1_nointerunit): New. Index: i386.c =================================================================== RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.c,v retrieving revision 1.524 diff -c -3 -p -r1.524 i386.c *** i386.c 3 Feb 2003 20:20:36 -0000 1.524 --- i386.c 4 Feb 2003 19:27:16 -0000 *************** const int x86_sse_typeless_stores = m_AT *** 521,526 **** --- 521,527 ---- const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4; const int x86_use_ffreep = m_ATHLON_K8; const int x86_rep_movl_optimal = m_386 | m_PENT | m_PPRO | m_K6; + const int x86_inter_unit_moves = ~(m_ATHLON_K8); /* In case the average insn count for single function invocation is lower than this constant, emit fast (but longer) prologue and *************** ix86_secondary_memory_needed (class1, cl *** 14385,14394 **** return 1; } return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2) ! || (SSE_CLASS_P (class1) != SSE_CLASS_P (class2) ! && (mode) != SImode) ! || (MMX_CLASS_P (class1) != MMX_CLASS_P (class2) ! && (mode) != SImode)); } /* Return the cost of moving data from a register in class CLASS1 to one in class CLASS2. --- 14386,14395 ---- return 1; } return (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2) ! || ((SSE_CLASS_P (class1) != SSE_CLASS_P (class2) ! || MMX_CLASS_P (class1) != MMX_CLASS_P (class2)) ! && ((mode != SImode && (mode != DImode || !TARGET_64BIT)) ! || (!TARGET_INTER_UNIT_MOVES && !optimize_size)))); } /* Return the cost of moving data from a register in class CLASS1 to one in class CLASS2. Index: i386.h =================================================================== RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.h,v retrieving revision 1.318 diff -c -3 -p -r1.318 i386.h *** i386.h 31 Jan 2003 23:34:15 -0000 1.318 --- i386.h 4 Feb 2003 19:27:16 -0000 *************** extern const int x86_arch_always_fancy_m *** 230,235 **** --- 230,236 ---- extern const int x86_sse_partial_reg_dependency, x86_sse_partial_regs; extern const int x86_sse_typeless_stores, x86_sse_load0_by_pxor; extern const int x86_use_ffreep, x86_sse_partial_regs_for_cvtsd2ss; + extern const int x86_inter_unit_moves; extern int x86_prefetch_sse; #define TARGET_USE_LEAVE (x86_use_leave & CPUMASK) *************** extern int x86_prefetch_sse; *** 282,287 **** --- 283,289 ---- #define TARGET_SHIFT1 (x86_shift1 & CPUMASK) #define TARGET_USE_FFREEP (x86_use_ffreep & CPUMASK) #define TARGET_REP_MOVL_OPTIMAL (x86_rep_movl_optimal & CPUMASK) + #define TARGET_INTER_UNIT_MOVES (x86_inter_unit_moves & CPUMASK) #define TARGET_STACK_PROBE (target_flags & MASK_STACK_PROBE) Index: i386.md =================================================================== RCS file: /cvs/gcc/gcc/gcc/config/i386/i386.md,v retrieving revision 1.421 diff -c -3 -p -r1.421 i386.md *** i386.md 3 Feb 2003 20:20:36 -0000 1.421 --- i386.md 4 Feb 2003 19:27:17 -0000 *************** *** 1166,1173 **** (define_insn "*movsi_1" [(set (match_operand:SI 0 "nonimmediate_operand" "=*?a,r,*?a,m,!*y,!rm,!*y,!*Y,!rm,!*Y") ! (match_operand:SI 1 "general_operand" "im,rinm,rinm,rin,rm,*y,*y,rm,*Y,*Y"))] ! "GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM" { switch (get_attr_type (insn)) { --- 1166,1214 ---- (define_insn "*movsi_1" [(set (match_operand:SI 0 "nonimmediate_operand" "=*?a,r,*?a,m,!*y,!rm,!*y,!*Y,!rm,!*Y") ! (match_operand:SI 1 "general_operand" "im,rinm,rinm,rin,*y,*y,rm,*Y,*Y,rm"))] ! "(TARGET_INTER_UNIT_MOVES || optimize_size) ! && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" ! { ! switch (get_attr_type (insn)) ! { ! case TYPE_SSEMOV: ! if (get_attr_mode (insn) == TImode) ! return "movdqa\t{%1, %0|%0, %1}"; ! return "movd\t{%1, %0|%0, %1}"; ! ! case TYPE_MMXMOV: ! if (get_attr_mode (insn) == DImode) ! return "movq\t{%1, %0|%0, %1}"; ! return "movd\t{%1, %0|%0, %1}"; ! ! case TYPE_LEA: ! return "lea{l}\t{%1, %0|%0, %1}"; ! ! default: ! if (flag_pic && !LEGITIMATE_PIC_OPERAND_P (operands[1])) ! abort(); ! return "mov{l}\t{%1, %0|%0, %1}"; ! } ! } ! [(set (attr "type") ! (cond [(eq_attr "alternative" "4,5,6") ! (const_string "mmxmov") ! (eq_attr "alternative" "7,8,9") ! (const_string "ssemov") ! (and (ne (symbol_ref "flag_pic") (const_int 0)) ! (match_operand:SI 1 "symbolic_operand" "")) ! (const_string "lea") ! ] ! (const_string "imov"))) ! (set_attr "modrm" "0,*,0,*,*,*,*,*,*,*") ! (set_attr "mode" "SI,SI,SI,SI,DI,SI,SI,TI,SI,SI")]) ! ! (define_insn "*movsi_1_nointernunit" ! [(set (match_operand:SI 0 "nonimmediate_operand" "=*?a,r,*?a,m,!*y,!m,!*y,!*Y,!m,!*Y") ! (match_operand:SI 1 "general_operand" "im,rinm,rinm,rin,*y,*y,m,*Y,*Y,m"))] ! "(!TARGET_INTER_UNIT_MOVES && !optimize_size) ! && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" { switch (get_attr_type (insn)) { *************** *** 1201,1207 **** ] (const_string "imov"))) (set_attr "modrm" "0,*,0,*,*,*,*,*,*,*") ! (set_attr "mode" "SI,SI,SI,SI,SI,SI,DI,TI,SI,SI")]) ;; Stores and loads of ax to arbitrary constant address. ;; We fake an second form of instruction to force reload to load address --- 1242,1248 ---- ] (const_string "imov"))) (set_attr "modrm" "0,*,0,*,*,*,*,*,*,*") ! (set_attr "mode" "SI,SI,SI,SI,DI,SI,SI,TI,SI,SI")]) ;; Stores and loads of ax to arbitrary constant address. ;; We fake an second form of instruction to force reload to load address *************** *** 1932,1947 **** "ix86_split_long_move (operands); DONE;") (define_insn "*movdi_1_rex64" ! [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,mr,!mr,!m*y,!*y,!*Y,!m,!*Y") ! (match_operand:DI 1 "general_operand" "Z,rem,i,re,n,*y,m,*Y,*Y,*m"))] "TARGET_64BIT && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" { switch (get_attr_type (insn)) { case TYPE_SSEMOV: ! if (register_operand (operands[0], DImode) ! && register_operand (operands[1], DImode)) return "movdqa\t{%1, %0|%0, %1}"; /* FALLTHRU */ case TYPE_MMXMOV: --- 1973,2038 ---- "ix86_split_long_move (operands); DONE;") (define_insn "*movdi_1_rex64" ! [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,mr,!mr,!*y,!rm,!*y,!*Y,!rm,!*Y") ! (match_operand:DI 1 "general_operand" "Z,rem,i,re,n,*y,*y,rm,*Y,*Y,rm"))] ! "TARGET_64BIT ! && (TARGET_INTER_UNIT_MOVES || optimize_size) ! && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" ! { ! switch (get_attr_type (insn)) ! { ! case TYPE_SSEMOV: ! if (get_attr_mode (insn) == MODE_TI) ! return "movdqa\t{%1, %0|%0, %1}"; ! /* Moves from and into integer register is done using movd opcode with ! REX prefix. */ ! if (GENERAL_REG_P (operands[0]) || GENERAL_REG_P (operands[1])) ! return "movd\t{%1, %0|%0, %1}"; ! /* FALLTHRU */ ! case TYPE_MMXMOV: ! return "movq\t{%1, %0|%0, %1}"; ! case TYPE_MULTI: ! return "#"; ! case TYPE_LEA: ! return "lea{q}\t{%a1, %0|%0, %a1}"; ! default: ! if (flag_pic && !LEGITIMATE_PIC_OPERAND_P (operands[1])) ! abort (); ! if (get_attr_mode (insn) == MODE_SI) ! return "mov{l}\t{%k1, %k0|%k0, %k1}"; ! else if (which_alternative == 2) ! return "movabs{q}\t{%1, %0|%0, %1}"; ! else ! return "mov{q}\t{%1, %0|%0, %1}"; ! } ! } ! [(set (attr "type") ! (cond [(eq_attr "alternative" "5,6,7") ! (const_string "mmxmov") ! (eq_attr "alternative" "8,9,10") ! (const_string "ssemov") ! (eq_attr "alternative" "4") ! (const_string "multi") ! (and (ne (symbol_ref "flag_pic") (const_int 0)) ! (match_operand:DI 1 "symbolic_operand" "")) ! (const_string "lea") ! ] ! (const_string "imov"))) ! (set_attr "modrm" "*,0,0,*,*,*,*,*,*,*,*") ! (set_attr "length_immediate" "*,4,8,*,*,*,*,*,*,*,*") ! (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,TI,DI,DI")]) ! ! (define_insn "*movdi_1_rex64_nointerunit" ! [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,mr,!mr,!*y,!m,!*y,!*Y,!m,!*Y") ! (match_operand:DI 1 "general_operand" "Z,rem,i,re,n,*y,*y,m,*Y,*Y,m"))] "TARGET_64BIT + && (!TARGET_INTER_UNIT_MOVES && !optimize_size) && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM)" { switch (get_attr_type (insn)) { case TYPE_SSEMOV: ! if (get_attr_mode (insn) == MODE_TI) return "movdqa\t{%1, %0|%0, %1}"; /* FALLTHRU */ case TYPE_MMXMOV: *************** *** 1962,1970 **** } } [(set (attr "type") ! (cond [(eq_attr "alternative" "5,6") (const_string "mmxmov") ! (eq_attr "alternative" "7,8") (const_string "ssemov") (eq_attr "alternative" "4") (const_string "multi") --- 2053,2061 ---- } } [(set (attr "type") ! (cond [(eq_attr "alternative" "5,6,7") (const_string "mmxmov") ! (eq_attr "alternative" "8,9,10") (const_string "ssemov") (eq_attr "alternative" "4") (const_string "multi") *************** *** 1973,1981 **** (const_string "lea") ] (const_string "imov"))) ! (set_attr "modrm" "*,0,0,*,*,*,*,*,*,*") ! (set_attr "length_immediate" "*,4,8,*,*,*,*,*,*,*") ! (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,TI,DI")]) ;; Stores and loads of ax to arbitrary constant address. ;; We fake an second form of instruction to force reload to load address --- 2064,2072 ---- (const_string "lea") ] (const_string "imov"))) ! (set_attr "modrm" "*,0,0,*,*,*,*,*,*,*,*") ! (set_attr "length_immediate" "*,4,8,*,*,*,*,*,*,*,*") ! (set_attr "mode" "SI,DI,DI,DI,SI,DI,DI,DI,TI,DI,DI")]) ;; Stores and loads of ax to arbitrary constant address. ;; We fake an second form of instruction to force reload to load address *************** *** 2130,2136 **** (define_insn "*movsf_1" [(set (match_operand:SF 0 "nonimmediate_operand" "=f#xr,m,f#xr,r#xf,m,x#rf,x#rf,x#rf,m,!*y,!rm,!*y") (match_operand:SF 1 "general_operand" "fm#rx,f#rx,G,rmF#fx,Fr#fx,C,x,xm#rf,x#rf,rm,*y,*y"))] ! "(GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) && (reload_in_progress || reload_completed || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) || GET_CODE (operands[1]) != CONST_DOUBLE --- 2221,2329 ---- (define_insn "*movsf_1" [(set (match_operand:SF 0 "nonimmediate_operand" "=f#xr,m,f#xr,r#xf,m,x#rf,x#rf,x#rf,m,!*y,!rm,!*y") (match_operand:SF 1 "general_operand" "fm#rx,f#rx,G,rmF#fx,Fr#fx,C,x,xm#rf,x#rf,rm,*y,*y"))] ! "(TARGET_INTER_UNIT_MOVES || optimize_size) ! && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) ! && (reload_in_progress || reload_completed ! || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) ! || GET_CODE (operands[1]) != CONST_DOUBLE ! || memory_operand (operands[0], SFmode))" ! { ! switch (which_alternative) ! { ! case 0: ! if (REG_P (operands[1]) ! && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) ! return "fstp\t%y0"; ! else if (STACK_TOP_P (operands[0])) ! return "fld%z1\t%y1"; ! else ! return "fst\t%y0"; ! ! case 1: ! if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) ! return "fstp%z0\t%y0"; ! else ! return "fst%z0\t%y0"; ! ! case 2: ! switch (standard_80387_constant_p (operands[1])) ! { ! case 1: ! return "fldz"; ! case 2: ! return "fld1"; ! } ! abort(); ! ! case 3: ! case 4: ! return "mov{l}\t{%1, %0|%0, %1}"; ! case 5: ! if (get_attr_mode (insn) == MODE_TI) ! return "pxor\t%0, %0"; ! else ! return "xorps\t%0, %0"; ! case 6: ! if (get_attr_mode (insn) == MODE_V4SF) ! return "movaps\t{%1, %0|%0, %1}"; ! else ! return "movss\t{%1, %0|%0, %1}"; ! case 7: ! case 8: ! return "movss\t{%1, %0|%0, %1}"; ! ! case 9: ! case 10: ! return "movd\t{%1, %0|%0, %1}"; ! ! case 11: ! return "movq\t{%1, %0|%0, %1}"; ! ! default: ! abort(); ! } ! } ! [(set_attr "type" "fmov,fmov,fmov,imov,imov,ssemov,ssemov,ssemov,ssemov,mmxmov,mmxmov,mmxmov") ! (set (attr "mode") ! (cond [(eq_attr "alternative" "3,4,9,10") ! (const_string "SI") ! (eq_attr "alternative" "5") ! (if_then_else ! (and (and (ne (symbol_ref "TARGET_SSE_LOAD0_BY_PXOR") ! (const_int 0)) ! (ne (symbol_ref "TARGET_SSE2") ! (const_int 0))) ! (eq (symbol_ref "optimize_size") ! (const_int 0))) ! (const_string "TI") ! (const_string "V4SF")) ! /* For architectures resolving dependencies on ! whole SSE registers use APS move to break dependency ! chains, otherwise use short move to avoid extra work. ! ! Do the same for architectures resolving dependencies on ! the parts. While in DF mode it is better to always handle ! just register parts, the SF mode is different due to lack ! of instructions to load just part of the register. It is ! better to maintain the whole registers in single format ! to avoid problems on using packed logical operations. */ ! (eq_attr "alternative" "6") ! (if_then_else ! (ior (ne (symbol_ref "TARGET_SSE_PARTIAL_REG_DEPENDENCY") ! (const_int 0)) ! (ne (symbol_ref "TARGET_SSE_PARTIAL_REGS") ! (const_int 0))) ! (const_string "V4SF") ! (const_string "SF")) ! (eq_attr "alternative" "11") ! (const_string "DI")] ! (const_string "SF")))]) ! ! (define_insn "*movsf_1_nointerunit" ! [(set (match_operand:SF 0 "nonimmediate_operand" "=f#xr,m,f#xr,r#xf,m,x#rf,x#rf,x#rf,m,!*y,!m,!*y") ! (match_operand:SF 1 "general_operand" "fm#rx,f#rx,G,rmF#fx,Fr#fx,C,x,xm#rf,x#rf,m,*y,*y"))] ! "(!TARGET_INTER_UNIT_MOVES && !optimize_size) ! && (GET_CODE (operands[0]) != MEM || GET_CODE (operands[1]) != MEM) && (reload_in_progress || reload_completed || (ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_LARGE) || GET_CODE (operands[1]) != CONST_DOUBLE