public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc/devel/omp/gcc-12] amdgcn: gather/scatter with DImode offsets
@ 2023-03-17 13:05 Andrew Stubbs
  0 siblings, 0 replies; only message in thread
From: Andrew Stubbs @ 2023-03-17 13:05 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:47cfa2d657edb9eddc8836059f02d81cbebad2e5

commit 47cfa2d657edb9eddc8836059f02d81cbebad2e5
Author: Andrew Stubbs <ams@codesourcery.com>
Date:   Mon Mar 6 12:42:44 2023 +0000

    amdgcn: gather/scatter with DImode offsets
    
    The GPU architecture requires SImode offsets on gather/scatter instructions,
    but they can also take a vector of absolute addresses, so this allows
    gather/scatter in more situations.
    
    gcc/ChangeLog:
    
            * config/gcn/gcn-valu.md (gather_load<mode><vndi>): New.
            (scatter_store<mode><vndi>): New.
            (mask_gather_load<mode><vndi>): New.
            (mask_scatter_store<mode><vndi>): New.

Diff:
---
 gcc/ChangeLog.omp          |   7 +++
 gcc/config/gcn/gcn-valu.md | 123 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 130 insertions(+)

diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp
index 6d88f486fa7..3a3e54bdeb2 100644
--- a/gcc/ChangeLog.omp
+++ b/gcc/ChangeLog.omp
@@ -1,3 +1,10 @@
+2023-03-17  Andrew Stubbs  <ams@codesourcery.com>
+
+	* config/gcn/gcn-valu.md (gather_load<mode><vndi>): New.
+	(scatter_store<mode><vndi>): New.
+	(mask_gather_load<mode><vndi>): New.
+	(mask_scatter_store<mode><vndi>): New.
+
 2023-03-17  Andrew Stubbs  <ams@codesourcery.com>
 
 	* config/gcn/gcn-protos.h (gcn_stepped_zero_int_parallel_p): New.
diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index 1bfd358cf10..70e3fa63c53 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -961,6 +961,34 @@
 ;;
 ;; TODO: implement combined gather and zero_extend, but only for -msram-ecc=on
 
+(define_expand "gather_load<mode><vndi>"
+  [(match_operand:V_ALL 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand:<VnDI> 2 "register_operand")
+   (match_operand 3 "immediate_operand")
+   (match_operand:SI 4 "gcn_alu_operand")]
+  ""
+  {
+    rtx vec_base = gen_reg_rtx (<VnDI>mode);
+    rtx addr = gen_reg_rtx (<VnDI>mode);
+    rtx multiplier = gen_reg_rtx (<VnDI>mode);
+    rtx offsets = gen_reg_rtx (<VnDI>mode);
+
+    if (CONST_INT_P (operands[4]) && INTVAL (operands[4]) != 1)
+      {
+	emit_insn (gen_vec_duplicate<vndi> (multiplier, operands[4]));
+	emit_insn (gen_mul<vndi>3 (offsets, operands[2], multiplier));
+      }
+    else
+      offsets = operands[2];
+    emit_insn (gen_vec_duplicate<vndi> (vec_base, operands[1]));
+    emit_insn (gen_add<vndi>3 (addr, vec_base, offsets));
+
+    emit_insn (gen_gather<mode>_insn_1offset (operands[0], addr, const0_rtx,
+					      const0_rtx, const0_rtx));
+    DONE;
+  })
+
 (define_expand "gather_load<mode><vnsi>"
   [(match_operand:V_ALL 0 "register_operand")
    (match_operand:DI 1 "register_operand")
@@ -1091,6 +1119,34 @@
    (set_attr "length" "12")
    (set_attr "xnack" "off,on")])
 
+(define_expand "scatter_store<mode><vndi>"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:<VnDI> 1 "register_operand")
+   (match_operand 2 "immediate_operand")
+   (match_operand:SI 3 "gcn_alu_operand")
+   (match_operand:V_ALL 4 "register_operand")]
+  ""
+  {
+    rtx vec_base = gen_reg_rtx (<VnDI>mode);
+    rtx addr = gen_reg_rtx (<VnDI>mode);
+    rtx multiplier = gen_reg_rtx (<VnDI>mode);
+    rtx offsets = gen_reg_rtx (<VnDI>mode);
+
+    if (CONST_INT_P (operands[3]) && INTVAL (operands[3]) != 1)
+      {
+	emit_insn (gen_vec_duplicate<vndi> (multiplier, operands[3]));
+	emit_insn (gen_mul<vndi>3 (offsets, operands[1], multiplier));
+      }
+    else
+      offsets = operands[1];
+    emit_insn (gen_vec_duplicate<vndi> (vec_base, operands[0]));
+    emit_insn (gen_add<vndi>3 (addr, vec_base, offsets));
+
+    emit_insn (gen_scatter<mode>_insn_1offset (addr, const0_rtx, operands[4],
+					       const0_rtx, const0_rtx));
+    DONE;
+  })
+
 (define_expand "scatter_store<mode><vnsi>"
   [(match_operand:DI 0 "register_operand")
    (match_operand:<VnSI> 1 "register_operand")
@@ -3528,6 +3584,41 @@
     DONE;
   })
 
+(define_expand "mask_gather_load<mode><vndi>"
+  [(match_operand:V_ALL 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand:<VnDI> 2 "register_operand")
+   (match_operand 3 "immediate_operand")
+   (match_operand:SI 4 "gcn_alu_operand")
+   (match_operand:DI 5 "")]
+  ""
+  {
+    rtx vec_base = gen_reg_rtx (<VnDI>mode);
+    rtx addr = gen_reg_rtx (<VnDI>mode);
+    rtx multiplier = gen_reg_rtx (<VnDI>mode);
+    rtx offsets = gen_reg_rtx (<VnDI>mode);
+    rtx exec = force_reg (DImode, operands[5]);
+
+    if (CONST_INT_P (operands[4]) && INTVAL (operands[4]) != 1)
+      {
+	emit_insn (gen_vec_duplicate<vndi> (multiplier, operands[4]));
+	emit_insn (gen_mul<vndi>3 (offsets, operands[2], multiplier));
+      }
+    else
+      offsets = operands[2];
+    emit_insn (gen_vec_duplicate<vndi> (vec_base, operands[1]));
+    emit_insn (gen_add<vndi>3 (addr, vec_base, offsets));
+
+    /* Masked lanes are required to hold zero.  */
+    emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0));
+
+    emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr,
+						   const0_rtx, const0_rtx,
+						   const0_rtx, operands[0],
+						   exec));
+    DONE;
+  })
+
 (define_expand "mask_gather_load<mode><vnsi>"
   [(match_operand:V_ALL 0 "register_operand")
    (match_operand:DI 1 "register_operand")
@@ -3559,6 +3650,38 @@
     DONE;
   })
 
+(define_expand "mask_scatter_store<mode><vndi>"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:<VnDI> 1 "register_operand")
+   (match_operand 2 "immediate_operand")
+   (match_operand:DI 3 "gcn_alu_operand")
+   (match_operand:V_ALL 4 "register_operand")
+   (match_operand:DI 5 "")]
+  ""
+  {
+    rtx vec_base = gen_reg_rtx (<VnDI>mode);
+    rtx addr = gen_reg_rtx (<VnDI>mode);
+    rtx multiplier = gen_reg_rtx (<VnDI>mode);
+    rtx offsets = gen_reg_rtx (<VnDI>mode);
+    rtx exec = force_reg (DImode, operands[5]);
+ 
+    if (CONST_INT_P (operands[3]) && INTVAL (operands[3]) != 1)
+      {
+	emit_insn (gen_vec_duplicate<vndi> (multiplier, operands[3]));
+	emit_insn (gen_mul<vndi>3 (offsets, operands[1], multiplier));
+      }
+    else
+      offsets = operands[1];
+    emit_insn (gen_vec_duplicate<vndi> (vec_base, operands[0]));
+    emit_insn (gen_add<vndi>3 (addr, vec_base, offsets));
+
+    emit_insn (gen_scatter<mode>_insn_1offset_exec (addr, const0_rtx,
+						    operands[4], const0_rtx,
+						    const0_rtx,
+						    exec));
+    DONE;
+  })
+
 (define_expand "mask_scatter_store<mode><vnsi>"
   [(match_operand:DI 0 "register_operand")
    (match_operand:<VnSI> 1 "register_operand")

^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2023-03-17 13:05 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-03-17 13:05 [gcc/devel/omp/gcc-12] amdgcn: gather/scatter with DImode offsets Andrew Stubbs

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).