From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2205) id 5D8DB385381E; Mon, 17 May 2021 18:21:32 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 5D8DB385381E MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Tom de Vries To: gcc-cvs@gcc.gnu.org Subject: [gcc r12-846] [nvptx] Handle memmodel for atomic ops X-Act-Checkin: gcc X-Git-Author: Tom de Vries X-Git-Refname: refs/heads/master X-Git-Oldrev: 45aa7a447652e8541cc381d7ab128544f81ed857 X-Git-Newrev: 58f7c7e098b79c96403c8341823ec3ba1e8b3945 Message-Id: <20210517182132.5D8DB385381E@sourceware.org> Date: Mon, 17 May 2021 18:21:32 +0000 (GMT) X-BeenThere: gcc-cvs@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-cvs mailing list List-Unsubscribe: , List-Archive: List-Help: List-Subscribe: , X-List-Received-Date: Mon, 17 May 2021 18:21:32 -0000 https://gcc.gnu.org/g:58f7c7e098b79c96403c8341823ec3ba1e8b3945 commit r12-846-g58f7c7e098b79c96403c8341823ec3ba1e8b3945 Author: Tom de Vries Date: Mon May 17 10:11:52 2021 +0200 [nvptx] Handle memmodel for atomic ops The atomic ops in nvptx.md have memmodel arguments, which are currently ignored. Handle these, fixing test-case fails libgomp.c-c++-common/reduction-{5,6}.c on volta. Tested libgomp on x86_64-linux with nvptx accelerator. gcc/ChangeLog: 2021-05-17 Tom de Vries PR target/100497 * config/nvptx/nvptx-protos.h (nvptx_output_atomic_insn): Declare * config/nvptx/nvptx.c (nvptx_output_barrier) (nvptx_output_atomic_insn): New function. (nvptx_print_operand): Add support for 'B'. * config/nvptx/nvptx.md: Use nvptx_output_atomic_insn for atomic insns. Diff: --- gcc/config/nvptx/nvptx-protos.h | 1 + gcc/config/nvptx/nvptx.c | 77 +++++++++++++++++++++++++++++++++++++++++ gcc/config/nvptx/nvptx.md | 31 ++++++++++++++--- 3 files changed, 104 insertions(+), 5 deletions(-) diff --git a/gcc/config/nvptx/nvptx-protos.h b/gcc/config/nvptx/nvptx-protos.h index 15122096487..b7e6ae26522 100644 --- a/gcc/config/nvptx/nvptx-protos.h +++ b/gcc/config/nvptx/nvptx-protos.h @@ -57,5 +57,6 @@ extern const char *nvptx_output_set_softstack (unsigned); extern const char *nvptx_output_simt_enter (rtx, rtx, rtx); extern const char *nvptx_output_simt_exit (rtx); extern const char *nvptx_output_red_partition (rtx, rtx); +extern const char *nvptx_output_atomic_insn (const char *, rtx *, int, int); #endif #endif diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index ebbfa921589..722b0faa330 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -2444,6 +2444,53 @@ nvptx_output_mov_insn (rtx dst, rtx src) return "%.\tcvt%t0%t1\t%0, %1;"; } +/* Output a pre/post barrier for MEM_OPERAND according to MEMMODEL. */ + +static void +nvptx_output_barrier (rtx *mem_operand, int memmodel, bool pre_p) +{ + bool post_p = !pre_p; + + switch (memmodel) + { + case MEMMODEL_RELAXED: + return; + case MEMMODEL_CONSUME: + case MEMMODEL_ACQUIRE: + case MEMMODEL_SYNC_ACQUIRE: + if (post_p) + break; + return; + case MEMMODEL_RELEASE: + case MEMMODEL_SYNC_RELEASE: + if (pre_p) + break; + return; + case MEMMODEL_ACQ_REL: + case MEMMODEL_SEQ_CST: + case MEMMODEL_SYNC_SEQ_CST: + if (pre_p || post_p) + break; + return; + default: + gcc_unreachable (); + } + + output_asm_insn ("%.\tmembar%B0;", mem_operand); +} + +const char * +nvptx_output_atomic_insn (const char *asm_template, rtx *operands, int mem_pos, + int memmodel_pos) +{ + nvptx_output_barrier (&operands[mem_pos], INTVAL (operands[memmodel_pos]), + true); + output_asm_insn (asm_template, operands); + nvptx_output_barrier (&operands[mem_pos], INTVAL (operands[memmodel_pos]), + false); + return ""; +} + static void nvptx_print_operand (FILE *, rtx, int); /* Output INSN, which is a call to CALLEE with result RESULT. For ptx, this @@ -2660,6 +2707,36 @@ nvptx_print_operand (FILE *file, rtx x, int code) switch (code) { + case 'B': + if (SYMBOL_REF_P (XEXP (x, 0))) + switch (SYMBOL_DATA_AREA (XEXP (x, 0))) + { + case DATA_AREA_GENERIC: + /* Assume worst-case: global. */ + gcc_fallthrough (); /* FALLTHROUGH. */ + case DATA_AREA_GLOBAL: + break; + case DATA_AREA_SHARED: + fputs (".cta", file); + return; + case DATA_AREA_LOCAL: + case DATA_AREA_CONST: + case DATA_AREA_PARAM: + default: + gcc_unreachable (); + } + + /* There are 2 cases where membar.sys differs from membar.gl: + - host accesses global memory (f.i. systemwide atomics) + - 2 or more devices are setup in peer-to-peer mode, and one + peer can access global memory of other peer. + Neither are currently supported by openMP/OpenACC on nvptx, but + that could change, so we default to membar.sys. We could support + this more optimally by adding DATA_AREA_SYS and then emitting + .gl for DATA_AREA_GLOBAL and .sys for DATA_AREA_SYS. */ + fputs (".sys", file); + return; + case 'A': x = XEXP (x, 0); gcc_fallthrough (); /* FALLTHROUGH. */ diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md index 00bb8fea821..108de1c0c59 100644 --- a/gcc/config/nvptx/nvptx.md +++ b/gcc/config/nvptx/nvptx.md @@ -1642,7 +1642,11 @@ (set (match_dup 1) (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))] "" - "%.\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;" + { + const char *t + = "%.\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;"; + return nvptx_output_atomic_insn (t, operands, 1, 4); + } [(set_attr "atomic" "true")]) (define_insn "atomic_exchange" @@ -1654,7 +1658,11 @@ (set (match_dup 1) (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))] ;; input "" - "%.\\tatom%A1.exch.b%T0\\t%0, %1, %2;" + { + const char *t + = "%.\tatom%A1.exch.b%T0\t%0, %1, %2;"; + return nvptx_output_atomic_insn (t, operands, 1, 3); + } [(set_attr "atomic" "true")]) (define_insn "atomic_fetch_add" @@ -1667,7 +1675,11 @@ (set (match_operand:SDIM 0 "nvptx_register_operand" "=R") (match_dup 1))] "" - "%.\\tatom%A1.add%t0\\t%0, %1, %2;" + { + const char *t + = "%.\\tatom%A1.add%t0\\t%0, %1, %2;"; + return nvptx_output_atomic_insn (t, operands, 1, 3); + } [(set_attr "atomic" "true")]) (define_insn "atomic_fetch_addsf" @@ -1680,7 +1692,11 @@ (set (match_operand:SF 0 "nvptx_register_operand" "=R") (match_dup 1))] "" - "%.\\tatom%A1.add%t0\\t%0, %1, %2;" + { + const char *t + = "%.\\tatom%A1.add%t0\\t%0, %1, %2;"; + return nvptx_output_atomic_insn (t, operands, 1, 3); + } [(set_attr "atomic" "true")]) (define_code_iterator any_logic [and ior xor]) @@ -1696,7 +1712,12 @@ (set (match_operand:SDIM 0 "nvptx_register_operand" "=R") (match_dup 1))] "mode == SImode || TARGET_SM35" - "%.\\tatom%A1.b%T0.\\t%0, %1, %2;" + { + const char *t + = "%.\\tatom%A1.b%T0.\\t%0, %1, %2;"; + return nvptx_output_atomic_insn (t, operands, 1, 3); + } + [(set_attr "atomic" "true")]) (define_expand "atomic_test_and_set"