public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-7167] PR target/104345: Use nvptx "set" instruction for cond ? -1 : 0
@ 2022-02-10  8:02 Tom de Vries
  0 siblings, 0 replies; only message in thread
From: Tom de Vries @ 2022-02-10  8:02 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:9bacd7af2e3bba9ddad17e7de4e2d299419d819d

commit r12-7167-g9bacd7af2e3bba9ddad17e7de4e2d299419d819d
Author: Roger Sayle <roger@nextmovesoftware.com>
Date:   Fri Feb 4 04:13:53 2022 +0100

    PR target/104345: Use nvptx "set" instruction for cond ? -1 : 0
    
    This patch addresses the "increased register pressure" regression on
    nvptx-none caused by my change to transition the backend to a
    STORE_FLAG_VALUE = 1 target.  This improved code generation for the
    more common case of producing 0/1 Boolean values, but unfortunately
    made things marginally worse when a 0/-1 mask value is desired.
    Unfortunately, nvptx kernels are extremely sensitive to changes in
    register usage, which was observable in the reported PR.
    
    This patch provides optimizations for -(cond ? 1 : 0), effectively
    simplify this into cond ? -1 : 0, where these ternary operators are
    provided by nvptx's selp instruction, and for the specific case of
    SImode, using (restoring) nvptx's "set" instruction (which avoids
    the need for a predicate register).
    
    This patch has been tested on nvptx-none hosted on x86_64-pc-linux-gnu
    with a "make" and "make -k check" with no new failures.  Unfortunately,
    the exact register usage of a nvptx kernel depends upon the version of
    the Cuda drivers being used (and the hardware), but I believe this
    change should resolve the PR (for Thomas) by improving code generation
    for the cases that regressed.
    
    gcc/ChangeLog:
    
            PR target/104345
            * config/nvptx/nvptx.md (sel_true<mode>): Fix indentation.
            (sel_false<mode>): Likewise.
            (define_code_iterator eqne): New code iterator for EQ and NE.
            (*selp<mode>_neg_<code>): New define_insn_and_split to optimize
            the negation of a selp instruction.
            (*selp<mode>_not_<code>): New define_insn_and_split to optimize
            the bitwise not of a selp instruction.
            (*setcc_int<mode>): Use set instruction for neg:SI of a selp.
    
    gcc/testsuite/ChangeLog:
    
            PR target/104345
            * gcc.target/nvptx/neg-selp.c: New test case.

Diff:
---
 gcc/config/nvptx/nvptx.md                 | 58 ++++++++++++++++++++++++++++---
 gcc/testsuite/gcc.target/nvptx/neg-selp.c | 17 +++++++++
 2 files changed, 71 insertions(+), 4 deletions(-)

diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index 107df74e46c..ad642e78ae3 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -977,7 +977,7 @@
 
 (define_insn "sel_true<mode>"
   [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
-        (if_then_else:HSDIM
+	(if_then_else:HSDIM
 	  (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
 	  (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")
 	  (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
@@ -986,7 +986,7 @@
 
 (define_insn "sel_true<mode>"
   [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
-        (if_then_else:SDFM
+	(if_then_else:SDFM
 	  (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
 	  (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
 	  (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
@@ -995,7 +995,7 @@
 
 (define_insn "sel_false<mode>"
   [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
-        (if_then_else:HSDIM
+	(if_then_else:HSDIM
 	  (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
 	  (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")
 	  (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
@@ -1004,13 +1004,63 @@
 
 (define_insn "sel_false<mode>"
   [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
-        (if_then_else:SDFM
+	(if_then_else:SDFM
 	  (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
 	  (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
 	  (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
   ""
   "%.\\tselp%t0\\t%0, %3, %2, %1;")
 
+(define_code_iterator eqne [eq ne])
+
+;; Split negation of a predicate into a conditional move.
+(define_insn_and_split "*selp<mode>_neg_<code>"
+  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
+	(neg:HSDIM (eqne:HSDIM
+		     (match_operand:BI 1 "nvptx_register_operand" "R")
+		     (const_int 0))))]
+  ""
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(if_then_else:HSDIM
+	  (eqne (match_dup 1) (const_int 0))
+	  (const_int -1)
+	  (const_int 0)))])
+
+;; Split bitwise not of a predicate into a conditional move.
+(define_insn_and_split "*selp<mode>_not_<code>"
+  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
+	(not:HSDIM (eqne:HSDIM
+		     (match_operand:BI 1 "nvptx_register_operand" "R")
+		     (const_int 0))))]
+  ""
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(if_then_else:HSDIM
+	  (eqne (match_dup 1) (const_int 0))
+	  (const_int -2)
+	  (const_int -1)))])
+
+(define_insn "*setcc_int<mode>"
+  [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
+	(neg:SI
+	  (match_operator:SI 1 "nvptx_comparison_operator"
+	    [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
+	     (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")])))]
+  ""
+  "%.\\tset%t0%c1\\t%0, %2, %3;")
+
+(define_insn "*setcc_int<mode>"
+  [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
+	(neg:SI
+	  (match_operator:SI 1 "nvptx_float_comparison_operator"
+	    [(match_operand:SDFM 2 "nvptx_register_operand" "R")
+	     (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")])))]
+  ""
+  "%.\\tset%t0%c1\\t%0, %2, %3;")
+
 (define_insn "setcc_float<mode>"
   [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
 	(match_operator:SF 1 "nvptx_comparison_operator"
diff --git a/gcc/testsuite/gcc.target/nvptx/neg-selp.c b/gcc/testsuite/gcc.target/nvptx/neg-selp.c
new file mode 100644
index 00000000000..a8f0118dd5e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/nvptx/neg-selp.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int neg(int x, int y)
+{
+  int t = (x == y) ? 1 : 0;
+  return -t;
+}
+
+int not(int x, int y)
+{
+  int t = (x == y) ? 1 : 0;
+  return ~t;
+}
+
+/* { dg-final { scan-assembler-not "neg.s32" } } */
+/* { dg-final { scan-assembler-not "not.b32" } } */


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-02-10  8:02 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-02-10  8:02 [gcc r12-7167] PR target/104345: Use nvptx "set" instruction for cond ? -1 : 0 Tom de Vries

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).