From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 14220 invoked by alias); 28 Oct 2011 04:08:33 -0000 Received: (qmail 13871 invoked by uid 22791); 28 Oct 2011 04:08:28 -0000 X-SWARE-Spam-Status: No, hits=-1.3 required=5.0 tests=AWL,BAYES_00,DKIM_SIGNED,DKIM_VALID,FREEMAIL_ENVFROM_END_DIGIT,FREEMAIL_FROM,RCVD_IN_DNSWL_LOW,SARE_HTML_INV_TAG,TW_PX,T_TO_NO_BRKTS_FREEMAIL X-Spam-Check-By: sourceware.org Received: from mail-gy0-f175.google.com (HELO mail-gy0-f175.google.com) (209.85.160.175) by sourceware.org (qpsmtpd/0.43rc1) with ESMTP; Fri, 28 Oct 2011 04:07:57 +0000 Received: by gyc15 with SMTP id 15so3770482gyc.20 for ; Thu, 27 Oct 2011 21:07:57 -0700 (PDT) Received: by 10.236.124.9 with SMTP id w9mr979854yhh.2.1319774876976; Thu, 27 Oct 2011 21:07:56 -0700 (PDT) Received: from localhost.localdomain (c-98-203-235-125.hsd1.wa.comcast.net. [98.203.235.125]) by mx.google.com with ESMTPS id j25sm10849016yhm.12.2011.10.27.21.07.55 (version=TLSv1/SSLv3 cipher=OTHER); Thu, 27 Oct 2011 21:07:56 -0700 (PDT) From: Richard Henderson To: gcc-patches@gcc.gnu.org Cc: amacleod@redhat.com, jakub@redhat.com, ubizjak@gmail.com Subject: [PATCH 8/9] Convert i386 backend to new atomic patterns. Date: Fri, 28 Oct 2011 05:20:00 -0000 Message-Id: <1319774858-9181-9-git-send-email-rth@redhat.com> In-Reply-To: <1319774858-9181-1-git-send-email-rth@redhat.com> References: <1319774858-9181-1-git-send-email-rth@redhat.com> X-IsSubscribed: yes Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org X-SW-Source: 2011-10/txt/msg02610.txt.bz2 Cc: jakub@redhat.com Cc: ubizjak@gmail.com --- gcc/config/i386/i386.md | 5 +- gcc/config/i386/sync.md | 306 +++++++++++++++++++++++++---------------------- 2 files changed, 167 insertions(+), 144 deletions(-) diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index a11a71b..7ce57d8 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -262,7 +262,10 @@ UNSPECV_ALIGN UNSPECV_MONITOR UNSPECV_MWAIT - UNSPECV_CMPXCHG + UNSPECV_CMPXCHG_1 + UNSPECV_CMPXCHG_2 + UNSPECV_CMPXCHG_3 + UNSPECV_CMPXCHG_4 UNSPECV_XCHG UNSPECV_LOCK UNSPECV_PROLOGUE_USE diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md index 1044255..e5579b1 100644 --- a/gcc/config/i386/sync.md +++ b/gcc/config/i386/sync.md @@ -18,31 +18,27 @@ ;; along with GCC; see the file COPYING3. If not see ;; . -(define_mode_iterator CASMODE - [QI HI SI (DI "TARGET_64BIT || TARGET_CMPXCHG8B") - (TI "TARGET_64BIT && TARGET_CMPXCHG16B")]) -(define_mode_iterator DCASMODE - [(DI "!TARGET_64BIT && TARGET_CMPXCHG8B && !flag_pic") - (TI "TARGET_64BIT && TARGET_CMPXCHG16B")]) -(define_mode_attr doublemodesuffix [(DI "8") (TI "16")]) -(define_mode_attr DCASHMODE [(DI "SI") (TI "DI")]) - -(define_expand "memory_barrier" - [(set (match_dup 0) - (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] +(define_expand "mem_thread_fence" + [(match_operand:SI 0 "const_int_operand" "")] ;; model "" { - operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); - MEM_VOLATILE_P (operands[0]) = 1; + /* Unless this is a SEQ_CST fence, the i386 memory model is strong + enough not to require barriers of any kind. */ + if (INTVAL (operands[0]) != MEMMODEL_SEQ_CST) + DONE; - if (!(TARGET_64BIT || TARGET_SSE2)) + if (TARGET_64BIT || TARGET_SSE2) + emit_insn (gen_sse2_mfence ()); + else { - emit_insn (gen_memory_barrier_nosse (operands[0])); - DONE; + rtx mem = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (mem) = 1; + emit_insn (gen_mfence_nosse (mem)); } + DONE; }) -(define_insn "memory_barrier_nosse" +(define_insn "mfence_nosse" [(set (match_operand:BLK 0 "" "") (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE)) (clobber (reg:CC FLAGS_REG))] @@ -50,127 +46,152 @@ "lock{%;} or{l}\t{$0, (%%esp)|DWORD PTR [esp], 0}" [(set_attr "memory" "unknown")]) -;; ??? It would be possible to use cmpxchg8b on pentium for DImode -;; changes. It's complicated because the insn uses ecx:ebx as the -;; new value; note that the registers are reversed from the order -;; that they'd be in with (reg:DI 2 ecx). Similarly for TImode -;; data in 64-bit mode. +(define_expand "atomic_compare_and_swap" + [(match_operand:QI 0 "register_operand" "") ;; bool success output + (match_operand:SWI124 1 "register_operand" "") ;; oldval output + (match_operand:SWI124 2 "memory_operand" "") ;; memory + (match_operand:SWI124 3 "register_operand" "") ;; expected input + (match_operand:SWI124 4 "register_operand" "") ;; newval input + (match_operand:SI 5 "const_int_operand" "") ;; is_weak + (match_operand:SI 6 "const_int_operand" "") ;; success model + (match_operand:SI 7 "const_int_operand" "")] ;; failure model + "TARGET_CMPXCHG" +{ + emit_insn (gen_atomic_compare_and_swap_single + (operands[1], operands[2], operands[3], operands[4])); + ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG), + const0_rtx); + DONE; +}) -(define_expand "sync_compare_and_swap" - [(parallel - [(set (match_operand:CASMODE 0 "register_operand" "") - (match_operand:CASMODE 1 "memory_operand" "")) - (set (match_dup 1) - (unspec_volatile:CASMODE - [(match_dup 1) - (match_operand:CASMODE 2 "register_operand" "") - (match_operand:CASMODE 3 "register_operand" "")] - UNSPECV_CMPXCHG)) - (set (reg:CCZ FLAGS_REG) - (compare:CCZ - (unspec_volatile:CASMODE - [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPECV_CMPXCHG) - (match_dup 2)))])] +(define_mode_iterator CASMODE + [(DI "TARGET_64BIT || TARGET_CMPXCHG8B") + (TI "TARGET_64BIT && TARGET_CMPXCHG16B")]) +(define_mode_iterator DCASMODE + [(DI "!TARGET_64BIT && TARGET_CMPXCHG8B && !flag_pic") + (TI "TARGET_64BIT && TARGET_CMPXCHG16B")]) +(define_mode_attr doublemodesuffix [(DI "8") (TI "16")]) +(define_mode_attr DCASHMODE [(DI "SI") (TI "DI")]) + +(define_expand "atomic_compare_and_swap" + [(match_operand:QI 0 "register_operand" "") ;; bool success output + (match_operand:CASMODE 1 "register_operand" "") ;; oldval output + (match_operand:CASMODE 2 "memory_operand" "") ;; memory + (match_operand:CASMODE 3 "register_operand" "") ;; expected input + (match_operand:CASMODE 4 "register_operand" "") ;; newval input + (match_operand:SI 5 "const_int_operand" "") ;; is_weak + (match_operand:SI 6 "const_int_operand" "") ;; success model + (match_operand:SI 7 "const_int_operand" "")] ;; failure model "TARGET_CMPXCHG" { - if ((mode == DImode && !TARGET_64BIT) || mode == TImode) + if (mode == DImode && TARGET_64BIT) + { + emit_insn (gen_atomic_compare_and_swap_singledi + (operands[1], operands[2], operands[3], operands[4])); + } + else { - enum machine_mode hmode = mode == DImode ? SImode : DImode; - rtx low = simplify_gen_subreg (hmode, operands[3], mode, 0); - rtx high = simplify_gen_subreg (hmode, operands[3], mode, - GET_MODE_SIZE (hmode)); - low = force_reg (hmode, low); - high = force_reg (hmode, high); - if (mode == DImode) - { - if (flag_pic && !cmpxchg8b_pic_memory_operand (operands[1], DImode)) - operands[1] = replace_equiv_address (operands[1], - force_reg (Pmode, - XEXP (operands[1], - 0))); - emit_insn (gen_sync_double_compare_and_swapdi - (operands[0], operands[1], operands[2], low, high)); - } - else if (mode == TImode) - emit_insn (gen_sync_double_compare_and_swapti - (operands[0], operands[1], operands[2], low, high)); - else - gcc_unreachable (); - DONE; + enum machine_mode hmode = mode; + rtx lo_o, lo_e, lo_n, hi_o, hi_e, hi_n, mem; + + lo_o = operands[1]; + mem = operands[2]; + lo_e = operands[3]; + lo_n = operands[4]; + hi_o = gen_highpart (hmode, lo_o); + hi_e = gen_highpart (hmode, lo_e); + hi_n = gen_highpart (hmode, lo_n); + lo_o = gen_lowpart (hmode, lo_o); + lo_e = gen_lowpart (hmode, lo_e); + lo_n = gen_lowpart (hmode, lo_n); + + if (mode == DImode + && !TARGET_64BIT + && flag_pic + && !cmpxchg8b_pic_memory_operand (mem, DImode)) + mem = replace_equiv_address (mem, force_reg (Pmode, XEXP (mem, 0))); + + emit_insn (gen_atomic_compare_and_swap_double + (lo_o, hi_o, mem, lo_e, hi_e, lo_n, hi_n)); } + ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG), + const0_rtx); + DONE; }) -(define_insn "*sync_compare_and_swap" +(define_insn "atomic_compare_and_swap_single" [(set (match_operand:SWI 0 "register_operand" "=a") - (match_operand:SWI 1 "memory_operand" "+m")) - (set (match_dup 1) (unspec_volatile:SWI - [(match_dup 1) - (match_operand:SWI 2 "register_operand" "a") + [(match_operand:SWI 1 "memory_operand" "+m") + (match_operand:SWI 2 "register_operand" "0") (match_operand:SWI 3 "register_operand" "")] - UNSPECV_CMPXCHG)) + UNSPECV_CMPXCHG_1)) + (set (match_dup 1) + (unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG_2)) (set (reg:CCZ FLAGS_REG) - (compare:CCZ - (unspec_volatile:SWI - [(match_dup 1) (match_dup 2) (match_dup 3)] UNSPECV_CMPXCHG) - (match_dup 2)))] + (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG_3))] "TARGET_CMPXCHG" "lock{%;} cmpxchg{}\t{%3, %1|%1, %3}") -(define_insn "sync_double_compare_and_swap" - [(set (match_operand:DCASMODE 0 "register_operand" "=A") - (match_operand:DCASMODE 1 "memory_operand" "+m")) - (set (match_dup 1) - (unspec_volatile:DCASMODE - [(match_dup 1) - (match_operand:DCASMODE 2 "register_operand" "A") - (match_operand: 3 "register_operand" "b") - (match_operand: 4 "register_operand" "c")] - UNSPECV_CMPXCHG)) +;; For double-word compare and swap, we are obliged to play tricks with +;; the input newval (op5:op6) because the Intel register numbering does +;; not match the gcc register numbering, so the pair must be CX:BX. +;; That said, in order to take advantage of possible lower-subreg opts, +;; treat all of the integral operands in the same way. +(define_insn "atomic_compare_and_swap_double" + [(set (match_operand: 0 "register_operand" "=a") + (unspec_volatile: + [(match_operand:DCASMODE 2 "memory_operand" "+m") + (match_operand: 3 "register_operand" "0") + (match_operand: 4 "register_operand" "1") + (match_operand: 5 "register_operand" "b") + (match_operand: 6 "register_operand" "c")] + UNSPECV_CMPXCHG_1)) + (set (match_operand: 1 "register_operand" "=d") + (unspec_volatile: [(const_int 0)] UNSPECV_CMPXCHG_2)) + (set (match_dup 2) + (unspec_volatile:DCASMODE [(const_int 0)] UNSPECV_CMPXCHG_3)) (set (reg:CCZ FLAGS_REG) - (compare:CCZ - (unspec_volatile:DCASMODE - [(match_dup 1) (match_dup 2) (match_dup 3) (match_dup 4)] - UNSPECV_CMPXCHG) - (match_dup 2)))] + (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG_4))] "" - "lock{%;} cmpxchgb\t%1") + "lock{%;} cmpxchgb\t%2") -;; Theoretically we'd like to use constraint "r" (any reg) for operand -;; 3, but that includes ecx. If operand 3 and 4 are the same (like when -;; the input is -1LL) GCC might chose to allocate operand 3 to ecx, like -;; operand 4. This breaks, as the xchg will move the PIC register contents -;; to %ecx then --> boom. Operands 3 and 4 really need to be different -;; registers, which in this case means operand 3 must not be ecx. -;; Instead of playing tricks with fake early clobbers or the like we -;; just enumerate all regs possible here, which (as this is !TARGET_64BIT) +;; Theoretically we'd like to use constraint "r" (any reg) for op5, +;; but that includes ecx. If op5 and op6 are the same (like when +;; the input is -1LL) GCC might chose to allocate op5 to ecx, like +;; op6. This breaks, as the xchg will move the PIC register contents +;; to %ecx then --> boom. Operands 5 and 6 really need to be different +;; registers, which in this case means op5 must not be ecx. Instead +;; of playing tricks with fake early clobbers or the like we just +;; enumerate all regs possible here, which (as this is !TARGET_64BIT) ;; are just esi and edi. -(define_insn "*sync_double_compare_and_swapdi_pic" - [(set (match_operand:DI 0 "register_operand" "=A") - (match_operand:DI 1 "cmpxchg8b_pic_memory_operand" "+m")) - (set (match_dup 1) - (unspec_volatile:DI - [(match_dup 1) - (match_operand:DI 2 "register_operand" "A") - (match_operand:SI 3 "register_operand" "SD") - (match_operand:SI 4 "register_operand" "c")] - UNSPECV_CMPXCHG)) +(define_insn "*atomic_compare_and_swap_doubledi_pic" + [(set (match_operand:SI 0 "register_operand" "=a") + (unspec_volatile:SI + [(match_operand:DI 2 "cmpxchg8b_pic_memory_operand" "+m") + (match_operand:SI 3 "register_operand" "0") + (match_operand:SI 4 "register_operand" "1") + (match_operand:SI 5 "register_operand" "SD") + (match_operand:SI 6 "register_operand" "c")] + UNSPECV_CMPXCHG_1)) + (set (match_operand:SI 1 "register_operand" "=d") + (unspec_volatile:SI [(const_int 0)] UNSPECV_CMPXCHG_2)) + (set (match_dup 2) + (unspec_volatile:DI [(const_int 0)] UNSPECV_CMPXCHG_3)) (set (reg:CCZ FLAGS_REG) - (compare:CCZ - (unspec_volatile:DI - [(match_dup 1) (match_dup 2) (match_dup 3) (match_dup 4)] - UNSPECV_CMPXCHG) - (match_dup 2)))] + (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG_4))] "!TARGET_64BIT && TARGET_CMPXCHG8B && flag_pic" - "xchg{l}\t%%ebx, %3\;lock{%;} cmpxchg8b\t%1\;xchg{l}\t%%ebx, %3") + "xchg{l}\t%%ebx, %5\;lock{%;} cmpxchg8b\t%2\;xchg{l}\t%%ebx, %5") ;; For operand 2 nonmemory_operand predicate is used instead of ;; register_operand to allow combiner to better optimize atomic ;; additions of constants. -(define_insn "sync_old_add" +(define_insn "atomic_fetch_add" [(set (match_operand:SWI 0 "register_operand" "=") (unspec_volatile:SWI - [(match_operand:SWI 1 "memory_operand" "+m")] UNSPECV_XCHG)) + [(match_operand:SWI 1 "memory_operand" "+m") + (match_operand:SI 3 "const_int_operand" "")] ;; model + UNSPECV_XCHG)) (set (match_dup 1) (plus:SWI (match_dup 1) (match_operand:SWI 2 "nonmemory_operand" "0"))) @@ -186,7 +207,9 @@ (match_operand:SWI 2 "const_int_operand" "")) (parallel [(set (match_dup 0) (unspec_volatile:SWI - [(match_operand:SWI 1 "memory_operand" "")] UNSPECV_XCHG)) + [(match_operand:SWI 1 "memory_operand" "") + (match_operand:SI 4 "const_int_operand" "")] + UNSPECV_XCHG)) (set (match_dup 1) (plus:SWI (match_dup 1) (match_dup 0))) @@ -199,17 +222,19 @@ == -(unsigned HOST_WIDE_INT) INTVAL (operands[3]) && !reg_overlap_mentioned_p (operands[0], operands[1])" [(parallel [(set (reg:CCZ FLAGS_REG) - (compare:CCZ (unspec_volatile:SWI [(match_dup 1)] - UNSPECV_XCHG) - (match_dup 3))) + (compare:CCZ + (unspec_volatile:SWI [(match_dup 1) (match_dup 4)] + UNSPECV_XCHG) + (match_dup 3))) (set (match_dup 1) (plus:SWI (match_dup 1) (match_dup 2)))])]) -(define_insn "*sync_old_add_cmp" +(define_insn "*atomic_fetch_add_cmp" [(set (reg:CCZ FLAGS_REG) (compare:CCZ (unspec_volatile:SWI - [(match_operand:SWI 0 "memory_operand" "+m")] + [(match_operand:SWI 0 "memory_operand" "+m") + (match_operand:SI 3 "const_int_operand" "")] UNSPECV_XCHG) (match_operand:SWI 2 "const_int_operand" "i"))) (set (match_dup 0) @@ -232,35 +257,25 @@ return "lock{%;} add{}\t{%1, %0|%0, %1}"; }) -(define_expand "atomic_exchange" - [(match_operand:SWI 0 "register_operand" "") ;; output - (match_operand:SWI 1 "memory_operand" "") ;; memory - (match_operand:SWI 2 "register_operand" "") ;; input - (match_operand:SI 3 "const_int_operand" "")] ;; memory model - "" -{ - /* On i386 the xchg instruction is a full barrier. Thus we - can completely ignore the memory model operand. */ - emit_insn (gen_sync_lock_test_and_set - (operands[0], operands[1], operands[2])); - DONE; -}) - ;; Recall that xchg implicitly sets LOCK#, so adding it again wastes space. -(define_insn "sync_lock_test_and_set" - [(set (match_operand:SWI 0 "register_operand" "=") +;; In addition, it is always a full barrier, so we can ignore the memory model. +(define_insn "atomic_exchange" + [(set (match_operand:SWI 0 "register_operand" "=") ;; output (unspec_volatile:SWI - [(match_operand:SWI 1 "memory_operand" "+m")] UNSPECV_XCHG)) + [(match_operand:SWI 1 "memory_operand" "+m") ;; memory + (match_operand:SI 3 "const_int_operand" "")] ;; model + UNSPECV_XCHG)) (set (match_dup 1) - (match_operand:SWI 2 "register_operand" "0"))] + (match_operand:SWI 2 "register_operand" "0"))] ;; input "" "xchg{}\t{%1, %0|%0, %1}") -(define_insn "sync_add" +(define_insn "atomic_add" [(set (match_operand:SWI 0 "memory_operand" "+m") (unspec_volatile:SWI [(plus:SWI (match_dup 0) - (match_operand:SWI 1 "nonmemory_operand" ""))] + (match_operand:SWI 1 "nonmemory_operand" "")) + (match_operand:SI 2 "const_int_operand" "")] ;; model UNSPECV_LOCK)) (clobber (reg:CC FLAGS_REG))] "" @@ -279,11 +294,12 @@ return "lock{%;} add{}\t{%1, %0|%0, %1}"; }) -(define_insn "sync_sub" +(define_insn "atomic_sub" [(set (match_operand:SWI 0 "memory_operand" "+m") (unspec_volatile:SWI [(minus:SWI (match_dup 0) - (match_operand:SWI 1 "nonmemory_operand" ""))] + (match_operand:SWI 1 "nonmemory_operand" "")) + (match_operand:SI 2 "const_int_operand" "")] ;; model UNSPECV_LOCK)) (clobber (reg:CC FLAGS_REG))] "" @@ -296,14 +312,18 @@ return "lock{%;} inc{}\t%0"; } + if (x86_maybe_negate_const_int (&operands[1], mode)) + return "lock{%;} add{}\t{%1, %0|%0, %1}"; + return "lock{%;} sub{}\t{%1, %0|%0, %1}"; }) -(define_insn "sync_" +(define_insn "atomic_" [(set (match_operand:SWI 0 "memory_operand" "+m") (unspec_volatile:SWI [(any_logic:SWI (match_dup 0) - (match_operand:SWI 1 "nonmemory_operand" ""))] + (match_operand:SWI 1 "nonmemory_operand" "")) + (match_operand:SI 2 "const_int_operand" "")] ;; model UNSPECV_LOCK)) (clobber (reg:CC FLAGS_REG))] "" -- 1.7.6.4