From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: (qmail 89472 invoked by alias); 1 Sep 2015 16:14:45 -0000 Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk List-Id: List-Archive: List-Post: List-Help: Sender: gcc-patches-owner@gcc.gnu.org Received: (qmail 89416 invoked by uid 89); 1 Sep 2015 16:14:39 -0000 Authentication-Results: sourceware.org; auth=none X-Virus-Found: No X-Spam-SWARE-Status: No, score=-2.6 required=5.0 tests=BAYES_00,FREEMAIL_FROM,RCVD_IN_DNSWL_LOW,SPF_PASS autolearn=ham version=3.3.2 X-HELO: mail-qk0-f179.google.com Received: from mail-qk0-f179.google.com (HELO mail-qk0-f179.google.com) (209.85.220.179) by sourceware.org (qpsmtpd/0.93/v0.84-503-g423c35a) with (AES128-GCM-SHA256 encrypted) ESMTPS; Tue, 01 Sep 2015 16:14:37 +0000 Received: by qkdv1 with SMTP id v1so47556523qkd.0 for ; Tue, 01 Sep 2015 09:14:35 -0700 (PDT) X-Received: by 10.55.221.79 with SMTP id n76mr19248527qki.62.1441124075574; Tue, 01 Sep 2015 09:14:35 -0700 (PDT) Received: from ?IPv6:2601:181:c000:c497:a2a8:cdff:fe3e:b48? ([2601:181:c000:c497:a2a8:cdff:fe3e:b48]) by smtp.googlemail.com with ESMTPSA id r5sm11087136qkr.26.2015.09.01.09.14.34 (version=TLSv1.2 cipher=ECDHE-RSA-AES128-GCM-SHA256 bits=128/128); Tue, 01 Sep 2015 09:14:34 -0700 (PDT) To: GCC Patches From: Nathan Sidwell Subject: [gomp4] ptx assembler formatting Message-ID: <55E5CEE9.7090600@acm.org> Date: Tue, 01 Sep 2015 16:14:00 -0000 User-Agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.1.0 MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="------------030105050707010600070103" X-SW-Source: 2015-09/txt/msg00082.txt.bz2 This is a multi-part message in MIME format. --------------030105050707010600070103 Content-Type: text/plain; charset=utf-8; format=flowed Content-Transfer-Encoding: 7bit Content-length: 410 In looking at some ptx output, OCD kicked in and I couldn't tolerate the formatting inconsistencies. Fixed with this patch. Also, the mechanism of providing scratch regs to the spin lock and reset insns, caused the optimizers to want to insert initializations. Fixed by making these patterns SETs. (We can't use the usual method of naming clobbers, because the register allocator is disabled). nathan --------------030105050707010600070103 Content-Type: text/x-patch; name="gomp4-ptx.patch" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="gomp4-ptx.patch" Content-length: 5743 2015-09-01 Nathan Sidwell * config/nvptx/nvptx.md: Use tabs and operand spacing consistently throughout. (nvptx_spin_lock): Use set. (nvptx_spin_reset): Likewise. Index: config/nvptx/nvptx.md =================================================================== --- config/nvptx/nvptx.md (revision 227369) +++ config/nvptx/nvptx.md (working copy) @@ -799,7 +799,7 @@ [(match_operand:HSDIM 2 "nvptx_register_operand" "R") (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))] "" - "%.\\tsetp%c1 %0,%2,%3;") + "%.\\tsetp%c1\\t%0, %2, %3;") (define_insn "*cmp" [(set (match_operand:BI 0 "nvptx_register_operand" "=R") @@ -807,7 +807,7 @@ [(match_operand:SDFM 2 "nvptx_register_operand" "R") (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))] "" - "%.\\tsetp%c1 %0,%2,%3;") + "%.\\tsetp%c1\\t%0, %2, %3;") (define_insn "jump" [(set (pc) @@ -941,7 +941,7 @@ [(match_operand:HSDIM 2 "nvptx_register_operand" "R") (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))] "" - "%.\\tset%t0%c1 %0,%2,%3;") + "%.\\tset%t0%c1\\t%0, %2, %3;") (define_insn "setcc_int" [(set (match_operand:SI 0 "nvptx_register_operand" "=R") @@ -949,7 +949,7 @@ [(match_operand:SDFM 2 "nvptx_register_operand" "R") (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))] "" - "%.\\tset%t0%c1 %0,%2,%3;") + "%.\\tset%t0%c1\\t%0, %2, %3;") (define_insn "setcc_float" [(set (match_operand:SF 0 "nvptx_register_operand" "=R") @@ -957,7 +957,7 @@ [(match_operand:HSDIM 2 "nvptx_register_operand" "R") (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))] "" - "%.\\tset%t0%c1 %0,%2,%3;") + "%.\\tset%t0%c1\\t%0, %2, %3;") (define_insn "setcc_float" [(set (match_operand:SF 0 "nvptx_register_operand" "=R") @@ -965,7 +965,7 @@ [(match_operand:SDFM 2 "nvptx_register_operand" "R") (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))] "" - "%.\\tset%t0%c1 %0,%2,%3;") + "%.\\tset%t0%c1\\t%0, %2, %3;") (define_expand "cstorebi4" [(set (match_operand:SI 0 "nvptx_register_operand") @@ -1343,9 +1343,9 @@ { static const char *const asms[] = { /* Must match oacc_loop_levels ordering. */ - "%.\\tmov.u32 %0, %%nctaid.x;",/* gang */ - "%.\\tmov.u32 %0, %%ntid.y;", /* worker */ - "%.\\tmov.u32 %0, %%ntid.x;", /* vector */ + "%.\\tmov.u32\\t%0, %%nctaid.x;", /* gang */ + "%.\\tmov.u32\\t%0, %%ntid.y;", /* worker */ + "%.\\tmov.u32\\t%0, %%ntid.x;", /* vector */ }; return asms[INTVAL (operands[1])]; }) @@ -1358,9 +1358,9 @@ { static const char *const asms[] = { /* Must match oacc_loop_levels ordering. */ - "%.\\tmov.u32 %0, %%ctaid.x;",/* gang */ - "%.\\tmov.u32 %0, %%tid.y;", /* worker */ - "%.\\tmov.u32 %0, %%tid.x;", /* vector */ + "%.\\tmov.u32\\t%0, %%ctaid.x;", /* gang */ + "%.\\tmov.u32\\t%0, %%tid.y;", /* worker */ + "%.\\tmov.u32\\t%0, %%tid.x;", /* vector */ }; return asms[INTVAL (operands[1])]; }) @@ -1460,7 +1460,7 @@ (set (match_operand:SI 1 "nvptx_register_operand" "=R") (unspec:SI [(match_dup 2) (const_int 1)] UNSPEC_BIT_CONV))] "" - "%.\\tmov.b64 {%0,%1}, %2;") + "%.\\tmov.b64\\t{%0,%1}, %2;") ;; pack 2 32-bit ints into a 64 bit object (define_insn "packsi2" @@ -1469,21 +1469,21 @@ (match_operand:SI 2 "nvptx_register_operand" "R")] UNSPEC_BIT_CONV))] "" - "%.\\tmov.b64 %0, {%1,%2};") + "%.\\tmov.b64\\t%0, {%1,%2};") (define_insn "worker_load" [(set (match_operand:SDISDFM 0 "nvptx_register_operand" "=R") (unspec:SDISDFM [(match_operand:SDISDFM 1 "memory_operand" "m")] UNSPEC_SHARED_DATA))] "" - "%.\\tld.shared%u0\\t%0,%1;") + "%.\\tld.shared%u0\\t%0, %1;") (define_insn "worker_store" [(set (unspec:SDISDFM [(match_operand:SDISDFM 1 "memory_operand" "=m")] UNSPEC_SHARED_DATA) (match_operand:SDISDFM 0 "nvptx_register_operand" "R"))] "" - "%.\\tst.shared%u1\\t%1,%0;") + "%.\\tst.shared%u1\\t%1, %0;") ;; Atomic insns. @@ -1577,30 +1577,30 @@ [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "")] UNSPECV_BARSYNC)] "" - "bar.sync\\t%0;") + "\\tbar.sync\\t%0;") (define_insn "nvptx_membar" [(unspec_volatile [(match_operand:SI 0 "const_int_operand" "")] UNSPECV_MEMBAR)] "" - "membar%B0;") + "%.\\tmembar%B0;") ;; spin lock and reset (define_insn "nvptx_spin_lock" [(parallel - [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m") - (match_operand:SI 1 "const_int_operand" "i")] - UNSPECV_LOCK) - (match_operand:SI 2 "register_operand" "=R") - (match_operand:BI 3 "register_operand" "=R") + [(set (match_operand:SI 2 "register_operand" "=R") + (unspec_volatile:SI [(match_operand:SI 0 "memory_operand" "m") + (match_operand:SI 1 "const_int_operand" "i")] + UNSPECV_LOCK)) + (set (match_operand:BI 3 "register_operand" "=R") (const_int 0)) (label_ref (match_operand 4 "" ""))])] "" - "%4:\\tatom%R1.cas.b32 %2,%0,0,1;setp.ne.u32 %3,%2,0;@%3 bra.uni %4;") + "%4:\\tatom%R1.cas.b32\\t%2, %0, 0, 1;\\n\\t\\tsetp.ne.u32\\t%3, %2, 0;\\n\\t@%3\\tbra.uni\\t%4;") (define_insn "nvptx_spin_reset" - [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m") - (match_operand:SI 1 "const_int_operand" "i")] - UNSPECV_LOCK) - (match_operand:SI 2 "register_operand" "=R")] + [(set (match_operand:SI 2 "register_operand" "=R") + (unspec_volatile:SI [(match_operand:SI 0 "memory_operand" "m") + (match_operand:SI 1 "const_int_operand" "i")] + UNSPECV_LOCK))] "" - "atom%R1.exch.b32 %2,%0,0;") + "%.\\tatom%R1.exch.b32\\t%2, %0, 0;") --------------030105050707010600070103--