public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v2] LoongArch: Add prefetch instructions.
@ 2022-11-12  7:37 Lulu Cheng
  2022-11-12  9:45 ` Xi Ruoyao
  0 siblings, 1 reply; 4+ messages in thread
From: Lulu Cheng @ 2022-11-12  7:37 UTC (permalink / raw)
  To: gcc-patches; +Cc: xry111, i, xuchenghua, Lulu Cheng, xujiahao

Co-Authored-By: xujiahao <xujiahao@loongson.cn>

gcc/ChangeLog:

	* config/loongarch/loongarch-def.c: Initial number of parallel prefetch.
	* config/loongarch/loongarch-tune.h (struct loongarch_cache):
	Define number of parallel prefetch.
	* config/loongarch/loongarch.cc (loongarch_option_override_internal):
	Set up parameters to be used in prefetching algorithm.
	(loongarch_prefetch_cookie): Select load or store based on the value of write.
	* config/loongarch/loongarch.md (prefetch): New template.
	(*prefetch_indexed_<mode>): New template.
---
 gcc/config/loongarch/constraints.md   | 20 +++++++++++
 gcc/config/loongarch/loongarch-def.c  |  2 ++
 gcc/config/loongarch/loongarch-tune.h |  1 +
 gcc/config/loongarch/loongarch.cc     | 50 +++++++++++++++++++++------
 gcc/config/loongarch/loongarch.md     | 20 +++++++++++
 5 files changed, 83 insertions(+), 10 deletions(-)

diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md
index 43cb7b5f0f5..9ac5e4c00fb 100644
--- a/gcc/config/loongarch/constraints.md
+++ b/gcc/config/loongarch/constraints.md
@@ -86,6 +86,14 @@
 ;;    "ZB"
 ;;      "An address that is held in a general-purpose register.
 ;;      The offset is zero"
+;;    "ZD"
+;;	"An address operand whose address is formed by a base register
+;;	 and offset that is suitable for use in instructions with the same
+;;	 addressing mode as @code{preld}."
+;;    "ZE"
+;;	"An address operand whose address is formed by a base register
+;;	 and index register that is suitable for use in instructions
+;;	 with the same addressing mode as @code{preldx}."
 ;; "<" "Matches a pre-dec or post-dec operand." (Global non-architectural)
 ;; ">" "Matches a pre-inc or post-inc operand." (Global non-architectural)
 
@@ -190,3 +198,15 @@ (define_memory_constraint "ZB"
   The offset is zero"
   (and (match_code "mem")
        (match_test "REG_P (XEXP (op, 0))")))
+
+(define_address_constraint "ZD"
+  "An address operand whose address is formed by a base register
+   and offset that is suitable for use in instructions with the same
+   addressing mode as @code{preld}."
+   (match_test "loongarch_12bit_offset_address_p (op, mode)"))
+
+(define_address_constraint "ZE"
+  "An address operand whose address is formed by a base register
+   and index register that is suitable for use in instructions
+   with the same addressing mode as @code{preldx}."
+   (match_test "loongarch_base_index_address_p (op, mode)"))
diff --git a/gcc/config/loongarch/loongarch-def.c b/gcc/config/loongarch/loongarch-def.c
index cbf995d81b5..80ab10a52a8 100644
--- a/gcc/config/loongarch/loongarch-def.c
+++ b/gcc/config/loongarch/loongarch-def.c
@@ -62,11 +62,13 @@ loongarch_cpu_cache[N_TUNE_TYPES] = {
       .l1d_line_size = 64,
       .l1d_size = 64,
       .l2d_size = 256,
+      .simultaneous_prefetches = 4,
   },
   [CPU_LA464] = {
       .l1d_line_size = 64,
       .l1d_size = 64,
       .l2d_size = 256,
+      .simultaneous_prefetches = 4,
   },
 };
 
diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h
index 6f3530f5c02..8e3eb29472b 100644
--- a/gcc/config/loongarch/loongarch-tune.h
+++ b/gcc/config/loongarch/loongarch-tune.h
@@ -45,6 +45,7 @@ struct loongarch_cache {
     int l1d_line_size;  /* bytes */
     int l1d_size;       /* KiB */
     int l2d_size;       /* kiB */
+    int simultaneous_prefetches; /* number of parallel prefetch */
 };
 
 #endif /* LOONGARCH_TUNE_H */
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index 8d5d8d965dd..a36802fbbf2 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -63,6 +63,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "context.h"
 #include "builtins.h"
 #include "rtl-iter.h"
+#include "opts.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -5958,22 +5959,24 @@ loongarch_variable_issue (FILE *file ATTRIBUTE_UNUSED,
   return more;
 }
 
-/* Given that we have an rtx of the form (prefetch ... WRITE LOCALITY),
-   return the first operand of the associated PREF or PREFX insn.  */
+/* LoongArch only implements preld hint=0 (prefetch for load) and hint=8
+   (prefetch for store), other hint just scale to hint = 0 and hint = 1.  */
 
 rtx
 loongarch_prefetch_cookie (rtx write, rtx locality)
 {
-  /* store_streamed / load_streamed.  */
-  if (INTVAL (locality) <= 0)
-    return GEN_INT (INTVAL (write) + 4);
+  if (INTVAL (locality) == 1 && INTVAL (write) == 0)
+    return GEN_INT (INTVAL (write) + 2);
 
-  /* store / load.  */
-  if (INTVAL (locality) <= 2)
-    return write;
+  /* store.  */
+  if (INTVAL (write) == 1)
+    return GEN_INT (INTVAL (write) + 7);
 
-  /* store_retained / load_retained.  */
-  return GEN_INT (INTVAL (write) + 6);
+  /* load.  */
+  if (INTVAL (write) == 0)
+    return GEN_INT (INTVAL (write));
+
+  gcc_unreachable ();
 }
 
 /* Implement TARGET_ASM_OUTPUT_MI_THUNK.  Generate rtl rather than asm text
@@ -6100,6 +6103,33 @@ loongarch_option_override_internal (struct gcc_options *opts)
   if (loongarch_branch_cost == 0)
     loongarch_branch_cost = loongarch_cost->branch_cost;
 
+  /* Set up parameters to be used in prefetching algorithm.  */
+  int simultaneous_prefetches
+    = loongarch_cpu_cache[LARCH_ACTUAL_TUNE].simultaneous_prefetches;
+
+  SET_OPTION_IF_UNSET (opts, &global_options_set,
+		       param_simultaneous_prefetches,
+		       simultaneous_prefetches);
+
+  SET_OPTION_IF_UNSET (opts, &global_options_set,
+		       param_l1_cache_line_size,
+		       loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_line_size);
+
+  SET_OPTION_IF_UNSET (opts, &global_options_set,
+		       param_l1_cache_size,
+		       loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l1d_size);
+
+  SET_OPTION_IF_UNSET (opts, &global_options_set,
+		       param_l2_cache_size,
+		       loongarch_cpu_cache[LARCH_ACTUAL_TUNE].l2d_size);
+
+
+  /* Enable sw prefetching at -O3 and higher.  */
+  if (opts->x_flag_prefetch_loop_arrays < 0
+      && (opts->x_optimize >= 3 || opts->x_flag_profile_use)
+      && !opts->x_optimize_size)
+    opts->x_flag_prefetch_loop_arrays = 1;
+
   if (TARGET_DIRECT_EXTERN_ACCESS && flag_shlib)
     error ("%qs cannot be used for compiling a shared library",
 	   "-mdirect-extern-access");
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index 682ab961741..fea6bf57239 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -3282,6 +3282,26 @@ (define_expand "untyped_call"
 ;;  ....................
 ;;
 
+(define_insn "prefetch"
+  [(prefetch (match_operand 0 "address_operand" "ZD,ZE")
+	     (match_operand 1 "const_int_operand" "n,n")
+	     (match_operand 2 "const_int_operand" "n,n"))]
+  ""
+{
+  operands[1] = loongarch_prefetch_cookie (operands[1], operands[2]);
+
+  switch (which_alternative)
+    {
+    case 0:
+      return "preld\t%1,%a0";
+    case 1:
+      return "preldx\t%1,%a0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "prefetch,prefetchx")])
+
 (define_insn "nop"
   [(const_int 0)]
   ""
-- 
2.31.1


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] LoongArch: Add prefetch instructions.
  2022-11-12  7:37 [PATCH v2] LoongArch: Add prefetch instructions Lulu Cheng
@ 2022-11-12  9:45 ` Xi Ruoyao
  2022-11-15  9:17   ` Xi Ruoyao
  0 siblings, 1 reply; 4+ messages in thread
From: Xi Ruoyao @ 2022-11-12  9:45 UTC (permalink / raw)
  To: Lulu Cheng, gcc-patches; +Cc: i, xuchenghua, xujiahao

On Sat, 2022-11-12 at 15:37 +0800, Lulu Cheng wrote:
> Co-Authored-By: xujiahao <xujiahao@loongson.cn>
> 
> gcc/ChangeLog:
> 
>         * config/loongarch/loongarch-def.c: Initial number of parallel
> prefetch.
>         * config/loongarch/loongarch-tune.h (struct loongarch_cache):
>         Define number of parallel prefetch.
>         * config/loongarch/loongarch.cc
> (loongarch_option_override_internal):
>         Set up parameters to be used in prefetching algorithm.
>         (loongarch_prefetch_cookie): Select load or store based on the
> value of write.
>         * config/loongarch/loongarch.md (prefetch): New template.
>         (*prefetch_indexed_<mode>): New template.

Missing config/loongarch/constraints.md.

/* snip */

>  rtx
>  loongarch_prefetch_cookie (rtx write, rtx locality)
>  {
> -  /* store_streamed / load_streamed.  */
> -  if (INTVAL (locality) <= 0)
> -    return GEN_INT (INTVAL (write) + 4);
> +  if (INTVAL (locality) == 1 && INTVAL (write) == 0)
> +    return GEN_INT (INTVAL (write) + 2);

So __builtin_prefetch(ptr, 0, 1) will produce
"preld 2,$r4,0", while the document says

   hint has 32 optional values (0 to 31), 0 represents load to level 1
   Cache, and 8 represents store to level 1 Cache. The remaining hint
   values are not defined and are processed for nop instructions when the
   processor executes.
   
OTOH hint 2 is documented in preldx.  So does preld also support hint 2?

/* snip */


> +(define_insn "prefetch"
> +  [(prefetch (match_operand 0 "address_operand" "ZD,ZE")
> +            (match_operand 1 "const_int_operand" "n,n")
> +            (match_operand 2 "const_int_operand" "n,n"))]
> +  ""
> +{
> +  operands[1] = loongarch_prefetch_cookie (operands[1], operands[2]);
> +
> +  switch (which_alternative)
> +    {
> +    case 0:
> +      return "preld\t%1,%a0";
> +    case 1:
> +      return "preldx\t%1,%a0";

void prefetch(char *ptr, int off)
{
	return __builtin_prefetch(ptr + off);
}

It's compiled to "preldx 0,$r4,$r5".  I don't think it's correct because
according to the doc, rk should contains several bit-fields instead of
an offset.

-- 
Xi Ruoyao <xry111@xry111.site>
School of Aerospace Science and Technology, Xidian University

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] LoongArch: Add prefetch instructions.
  2022-11-12  9:45 ` Xi Ruoyao
@ 2022-11-15  9:17   ` Xi Ruoyao
  2022-11-15  9:32     ` Lulu Cheng
  0 siblings, 1 reply; 4+ messages in thread
From: Xi Ruoyao @ 2022-11-15  9:17 UTC (permalink / raw)
  To: Lulu Cheng, gcc-patches; +Cc: i, xuchenghua, xujiahao

On Sat, 2022-11-12 at 17:45 +0800, Xi Ruoyao via Gcc-patches wrote:
> void prefetch(char *ptr, int off)
> {
>         return __builtin_prefetch(ptr + off);
> }
> 
> It's compiled to "preldx 0,$r4,$r5".  I don't think it's correct
> because
> according to the doc, rk should contains several bit-fields instead of
> an offset.

Hi Lulu,

Considering we are in stage 3 now and we can still push patches which
have been reviewed (in the first week of stage 3), I guess we can add
preld for GCC 13 and try preldx in the next development cycle?

BTW if preldx behaves exactly as how the manual says, I think it's not
possible to invoke it correctly in GNU C unless using inline assembly...

-- 
Xi Ruoyao <xry111@xry111.site>
School of Aerospace Science and Technology, Xidian University

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH v2] LoongArch: Add prefetch instructions.
  2022-11-15  9:17   ` Xi Ruoyao
@ 2022-11-15  9:32     ` Lulu Cheng
  0 siblings, 0 replies; 4+ messages in thread
From: Lulu Cheng @ 2022-11-15  9:32 UTC (permalink / raw)
  To: Xi Ruoyao, gcc-patches; +Cc: i, xuchenghua, xujiahao


在 2022/11/15 下午5:17, Xi Ruoyao 写道:
> On Sat, 2022-11-12 at 17:45 +0800, Xi Ruoyao via Gcc-patches wrote:
>> void prefetch(char *ptr, int off)
>> {
>>          return __builtin_prefetch(ptr + off);
>> }
>>
>> It's compiled to "preldx 0,$r4,$r5".  I don't think it's correct
>> because
>> according to the doc, rk should contains several bit-fields instead of
>> an offset.
> Hi Lulu,
>
> Considering we are in stage 3 now and we can still push patches which
> have been reviewed (in the first week of stage 3), I guess we can add
> preld for GCC 13 and try preldx in the next development cycle?
>
> BTW if preldx behaves exactly as how the manual says, I think it's not
> possible to invoke it correctly in GNU C unless using inline assembly...
>
Well, I also want to add preld and instant load optimization support in 
this release.

I will send patch in the next two days:-)



^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2022-11-15  9:33 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-11-12  7:37 [PATCH v2] LoongArch: Add prefetch instructions Lulu Cheng
2022-11-12  9:45 ` Xi Ruoyao
2022-11-15  9:17   ` Xi Ruoyao
2022-11-15  9:32     ` Lulu Cheng

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).