public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-6207] nvptx: Transition nvptx backend to STORE_FLAG_VALUE = 1
@ 2022-01-04 11:28 Tom de Vries
  0 siblings, 0 replies; only message in thread
From: Tom de Vries @ 2022-01-04 11:28 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:beed3f8f60492289ca6211d86c54a2254a642035

commit r12-6207-gbeed3f8f60492289ca6211d86c54a2254a642035
Author: Roger Sayle <roger@nextmovesoftware.com>
Date:   Wed Dec 15 15:09:48 2021 +0100

    nvptx: Transition nvptx backend to STORE_FLAG_VALUE = 1
    
    This patch to the nvptx backend changes the backend's STORE_FLAG_VALUE
    from -1 to 1, by using BImode predicates and selp instructions, instead
    of set instructions (almost always followed by integer negation).
    
    Historically, it was reasonable (through rare) for backends to use -1
    for representing true during the RTL passes.  However with tree-ssa,
    GCC now emits lots of code that reads and writes _Bool values, requiring
    STORE_FLAG_VALUE=-1 targets to frequently convert 0/-1 pseudos to 0/1
    pseudos using integer negation.  Unfortunately, this process prevents
    or complicates many optimizations (negate isn't associative with logical
    AND, OR and XOR, and interferes with range/vrp/nonzerobits bounds etc.).
    
    The impact of this is that for a relatively simple logical expression
    like "return (x==21) && (y==69);", the nvptx backend currently generates:
    
                    mov.u32 %r26, %ar0;
                    mov.u32 %r27, %ar1;
                    set.u32.eq.u32  %r30, %r26, 21;
                    neg.s32 %r31, %r30;
                    mov.u32 %r29, %r31;
                    set.u32.eq.u32  %r33, %r27, 69;
                    neg.s32 %r34, %r33;
                    mov.u32 %r32, %r34;
                    cvt.u16.u8      %r39, %r29;
                    mov.u16 %r36, %r39;
                    cvt.u16.u8      %r39, %r32;
                    mov.u16 %r37, %r39;
                    and.b16 %r35, %r36, %r37;
                    cvt.u32.u16     %r38, %r35;
                    cvt.u32.u8      %value, %r38;
    
    This patch tweaks nvptx to generate 0/1 values instead, requiring the
    same number of instructions, using (BImode) predicate registers and selp
    instructions so as to now generate the almost identical:
    
                    mov.u32 %r26, %ar0;
                    mov.u32 %r27, %ar1;
                    setp.eq.u32     %r31, %r26, 21;
                    selp.u32        %r30, 1, 0, %r31;
                    mov.u32 %r29, %r30;
                    setp.eq.u32     %r34, %r27, 69;
                    selp.u32        %r33, 1, 0, %r34;
                    mov.u32 %r32, %r33;
                    cvt.u16.u8      %r39, %r29;
                    mov.u16 %r36, %r39;
                    cvt.u16.u8      %r39, %r32;
                    mov.u16 %r37, %r39;
                    and.b16 %r35, %r36, %r37;
                    cvt.u32.u16     %r38, %r35;
                    cvt.u32.u8      %value, %r38;
    
    The hidden benefit is that this sequence can (in theory) be optimized
    by the RTL passes to eventually generate a much shorter sequence using
    an and.pred instruction (just like Nvidia's nvcc compiler).
    
    This patch has been tested nvptx-none with a "make" and "make -k check"
    (including newlib) hosted on x86_64-pc-linux-gnu with no new failures.
    
    gcc/ChangeLog:
    
            * config/nvptx/nvptx.h (STORE_FLAG_VALUE): Change to 1.
            * config/nvptx/nvptx.md (movbi): Use P1 constraint for true.
            (setcc_from_bi): Remove SImode specific pattern.
            (setcc<mode>_from_bi): Provide more general HSDIM pattern.
            (extendbi<mode>2, zeroextendbi<mode>2): Provide instructions
            for sign- and zero-extending BImode predicates to integers.
            (setcc_int<mode>): Remove previous (-1-based) instructions.
            (cstorebi4): Remove BImode to SImode specific expander.
            (cstore<mode>4): Fix indentation.  Expand using setccsi_from_bi.
            (cstore<mode>4): For both integer and floating point modes.

Diff:
---
 gcc/config/nvptx/nvptx.h  |  2 +-
 gcc/config/nvptx/nvptx.md | 76 +++++++++++++++++++++++++----------------------
 2 files changed, 41 insertions(+), 37 deletions(-)

diff --git a/gcc/config/nvptx/nvptx.h b/gcc/config/nvptx/nvptx.h
index 728dff04903..d61bd70c364 100644
--- a/gcc/config/nvptx/nvptx.h
+++ b/gcc/config/nvptx/nvptx.h
@@ -319,7 +319,7 @@ struct GTY(()) machine_function
 #define NO_DOT_IN_LABEL
 #define ASM_COMMENT_START "//"
 
-#define STORE_FLAG_VALUE -1
+#define STORE_FLAG_VALUE 1
 #define FLOAT_STORE_FLAG_VALUE(MODE) REAL_VALUE_ATOF("1.0", (MODE))
 
 #define CASE_VECTOR_MODE SImode
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index 77ac36daf46..a4c14a3f6c7 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -215,7 +215,7 @@
 ;; get variables in this mode and pseudos are never spilled.
 (define_insn "movbi"
   [(set (match_operand:BI 0 "nvptx_register_operand" "=R,R,R")
-	(match_operand:BI 1 "nvptx_nonmemory_operand" "R,P0,Pn"))]
+	(match_operand:BI 1 "nvptx_nonmemory_operand" "R,P0,P1"))]
   ""
   "@
    %.\\tmov%t0\\t%0, %1;
@@ -859,12 +859,26 @@
 
 ;; Conditional stores
 
-(define_insn "setcc_from_bi"
-  [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
-	(ne:SI (match_operand:BI 1 "nvptx_register_operand" "R")
-	       (const_int 0)))]
+(define_insn "setcc<mode>_from_bi"
+  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
+	(ne:HSDIM (match_operand:BI 1 "nvptx_register_operand" "R")
+		   (const_int 0)))]
+  ""
+  "%.\\tselp%t0\\t%0, 1, 0, %1;")
+
+(define_insn "extendbi<mode>2"
+  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
+	(sign_extend:HSDIM
+	 (match_operand:BI 1 "nvptx_register_operand" "R")))]
+  ""
+  "%.\\tselp%t0\\t%0, -1, 0, %1;")
+
+(define_insn "zero_extendbi<mode>2"
+  [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
+	(zero_extend:HSDIM
+	 (match_operand:BI 1 "nvptx_register_operand" "R")))]
   ""
-  "%.\\tselp%t0 %0,-1,0,%1;")
+  "%.\\tselp%t0\\t%0, 1, 0, %1;")
 
 (define_insn "sel_true<mode>"
   [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
@@ -902,22 +916,6 @@
   ""
   "%.\\tselp%t0\\t%0, %3, %2, %1;")
 
-(define_insn "setcc_int<mode>"
-  [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
-	(match_operator:SI 1 "nvptx_comparison_operator"
-	  [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
-	   (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
-  ""
-  "%.\\tset%t0%c1\\t%0, %2, %3;")
-
-(define_insn "setcc_int<mode>"
-  [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
-	(match_operator:SI 1 "nvptx_float_comparison_operator"
-	   [(match_operand:SDFM 2 "nvptx_register_operand" "R")
-	    (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
-  ""
-  "%.\\tset%t0%c1\\t%0, %2, %3;")
-
 (define_insn "setcc_float<mode>"
   [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
 	(match_operator:SF 1 "nvptx_comparison_operator"
@@ -934,29 +932,35 @@
   ""
   "%.\\tset%t0%c1\\t%0, %2, %3;")
 
-(define_expand "cstorebi4"
-  [(set (match_operand:SI 0 "nvptx_register_operand")
-	(match_operator:SI 1 "ne_operator"
-         [(match_operand:BI 2 "nvptx_register_operand")
-          (match_operand:BI 3 "const0_operand")]))]
-  ""
-  "")
-
 (define_expand "cstore<mode>4"
   [(set (match_operand:SI 0 "nvptx_register_operand")
 	(match_operator:SI 1 "nvptx_comparison_operator"
-         [(match_operand:HSDIM 2 "nvptx_register_operand")
-          (match_operand:HSDIM 3 "nvptx_nonmemory_operand")]))]
+	  [(match_operand:HSDIM 2 "nvptx_register_operand")
+	   (match_operand:HSDIM 3 "nvptx_nonmemory_operand")]))]
   ""
-  "")
+{
+  rtx reg = gen_reg_rtx (BImode);
+  rtx cmp = gen_rtx_fmt_ee (GET_CODE (operands[1]), BImode,
+			    operands[2], operands[3]);
+  emit_move_insn (reg, cmp);
+  emit_insn (gen_setccsi_from_bi (operands[0], reg));
+  DONE;
+})
 
 (define_expand "cstore<mode>4"
   [(set (match_operand:SI 0 "nvptx_register_operand")
 	(match_operator:SI 1 "nvptx_float_comparison_operator"
-         [(match_operand:SDFM 2 "nvptx_register_operand")
-          (match_operand:SDFM 3 "nvptx_nonmemory_operand")]))]
+	  [(match_operand:SDFM 2 "nvptx_register_operand")
+	   (match_operand:SDFM 3 "nvptx_nonmemory_operand")]))]
   ""
-  "")
+{
+  rtx reg = gen_reg_rtx (BImode);
+  rtx cmp = gen_rtx_fmt_ee (GET_CODE (operands[1]), BImode,
+			    operands[2], operands[3]);
+  emit_move_insn (reg, cmp);
+  emit_insn (gen_setccsi_from_bi (operands[0], reg));
+  DONE;
+})
 
 ;; Calls


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2022-01-04 11:28 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-04 11:28 [gcc r12-6207] nvptx: Transition nvptx backend to STORE_FLAG_VALUE = 1 Tom de Vries

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).