public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v1 0/2] Basic support for the Ventana VT1 w/ instruction fusion
@ 2021-11-14 21:47 Philipp Tomsich
  2021-11-14 21:47 ` [PATCH v1 1/2] RISC-V: Add basic support for the Ventana-VT1 core Philipp Tomsich
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Philipp Tomsich @ 2021-11-14 21:47 UTC (permalink / raw)
  To: gcc-patches; +Cc: kito.cheng, wilson, Philipp Tomsich


This series provides support for the Ventana VT1 (a 4-way superscalar
rv64gc_zba_zbb_zbc_zbs core) including support for the supported
instruction fusion patterns.

This includes the addition of the fusion-aware scheduling
infrastructure for RISC-V and implements idiom recognition for the
fusion patterns supported by VT1.


Philipp Tomsich (2):
  RISC-V: Add basic support for the Ventana-VT1 core
  RISC-V: Add instruction fusion (for ventana-vt1)

 gcc/config/riscv/riscv-cores.def |   2 +
 gcc/config/riscv/riscv-opts.h    |   3 +-
 gcc/config/riscv/riscv.c         | 210 +++++++++++++++++++++++++++++++
 gcc/config/riscv/riscv.md        |   2 +-
 gcc/doc/invoke.texi              |   4 +-
 5 files changed, 217 insertions(+), 4 deletions(-)

-- 
2.32.0


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v1 1/2] RISC-V: Add basic support for the Ventana-VT1 core
  2021-11-14 21:47 [PATCH v1 0/2] Basic support for the Ventana VT1 w/ instruction fusion Philipp Tomsich
@ 2021-11-14 21:47 ` Philipp Tomsich
  2021-11-14 21:47 ` [PATCH v1 2/2] RISC-V: Add instruction fusion (for ventana-vt1) Philipp Tomsich
  2021-11-17 14:05 ` [PATCH v1 0/2] Basic support for the Ventana VT1 w/ instruction fusion Kito Cheng
  2 siblings, 0 replies; 7+ messages in thread
From: Philipp Tomsich @ 2021-11-14 21:47 UTC (permalink / raw)
  To: gcc-patches; +Cc: kito.cheng, wilson, Philipp Tomsich, Philipp Tomsich

From: Philipp Tomsich <ptomsich@ventanamicro.com>

The Ventana-VT1 core is compatible with rv64gc and Zb[abcs].
This introduces a placeholder -mcpu=ventana-vt1, so tooling and
scripts don't need to change once full support (pipeline, tuning,
etc.) will become public later.

gcc/ChangeLog:

	* config/riscv/riscv-cores.def (RISCV_CORE): Add ventana-vt1.
	* config/riscv/riscv-opts.h (enum riscv_microarchitecture_type): Add ventana_vt1.
	* config/riscv/riscv.c: Add tune-info for ventana-vt1.
	* config/riscv/riscv.md (tune): Add ventana_vt1.
	* doc/invoke.texi: Add ventana-vt1.

Signed-off-by: Philipp Tomsich <philipp.tomsich@vrull.eu>
---

 gcc/config/riscv/riscv-cores.def |  2 ++
 gcc/config/riscv/riscv-opts.h    |  3 ++-
 gcc/config/riscv/riscv.c         | 14 ++++++++++++++
 gcc/config/riscv/riscv.md        |  2 +-
 gcc/doc/invoke.texi              |  4 ++--
 5 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/gcc/config/riscv/riscv-cores.def b/gcc/config/riscv/riscv-cores.def
index bf5aaba49c3..f6f225d3c5f 100644
--- a/gcc/config/riscv/riscv-cores.def
+++ b/gcc/config/riscv/riscv-cores.def
@@ -46,4 +46,6 @@ RISCV_CORE("sifive-s76",      "rv64imafdc", "sifive-7-series")
 RISCV_CORE("sifive-u54",      "rv64imafdc", "sifive-5-series")
 RISCV_CORE("sifive-u74",      "rv64imafdc", "sifive-7-series")
 
+RISCV_CORE("ventana-vt1",     "rv64imafdc_zba_zbb_zbc_zbs",	"ventana-vt1")
+
 #undef RISCV_CORE
diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index 2efc4b80f1f..32d6a9db1bd 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -52,7 +52,8 @@ extern enum riscv_isa_spec_class riscv_isa_spec;
 /* Keep this list in sync with define_attr "tune" in riscv.md.  */
 enum riscv_microarchitecture_type {
   generic,
-  sifive_7
+  sifive_7,
+  ventana_vt1
 };
 extern enum riscv_microarchitecture_type riscv_microarchitecture;
 
diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index df66abeb6ce..6b918db65e9 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -330,6 +330,19 @@ static const struct riscv_tune_param optimize_size_tune_info = {
   false,					/* slow_unaligned_access */
 };
 
+/* Costs to use when optimizing for Ventana Micro VT1.  */
+static const struct riscv_tune_param ventana_vt1_tune_info = {
+  {COSTS_N_INSNS (4), COSTS_N_INSNS (5)},	/* fp_add */
+  {COSTS_N_INSNS (4), COSTS_N_INSNS (5)},	/* fp_mul */
+  {COSTS_N_INSNS (20), COSTS_N_INSNS (20)},	/* fp_div */
+  {COSTS_N_INSNS (4), COSTS_N_INSNS (4)},	/* int_mul */
+  {COSTS_N_INSNS (6), COSTS_N_INSNS (6)},	/* int_div */
+  4,						/* issue_rate */
+  4,						/* branch_cost */
+  5,						/* memory_cost */
+  false,					/* slow_unaligned_access */
+};
+
 static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
 static tree riscv_handle_type_attribute (tree *, tree, tree, int, bool *);
 
@@ -366,6 +379,7 @@ static const struct riscv_tune_info riscv_tune_info_table[] = {
   { "sifive-5-series", generic, &rocket_tune_info },
   { "sifive-7-series", sifive_7, &sifive_7_tune_info },
   { "thead-c906", generic, &thead_c906_tune_info },
+  { "ventana-vt1", ventana_vt1, &ventana_vt1_tune_info },
   { "size", generic, &optimize_size_tune_info },
 };
 
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index b06a26bffb3..be7ccc753a4 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -270,7 +270,7 @@ (define_attr "cannot_copy" "no,yes" (const_string "no"))
 ;; Microarchitectures we know how to tune for.
 ;; Keep this in sync with enum riscv_microarchitecture.
 (define_attr "tune"
-  "generic,sifive_7"
+  "generic,sifive_7,ventana_vt1"
   (const (symbol_ref "((enum attr_tune) riscv_microarchitecture)")))
 
 ;; Describe a user's asm statement.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 99cdeb90c7c..b5934183a88 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -27358,14 +27358,14 @@ by particular CPU name.
 Permissible values for this option are: @samp{sifive-e20}, @samp{sifive-e21},
 @samp{sifive-e24}, @samp{sifive-e31}, @samp{sifive-e34}, @samp{sifive-e76},
 @samp{sifive-s21}, @samp{sifive-s51}, @samp{sifive-s54}, @samp{sifive-s76},
-@samp{sifive-u54}, and @samp{sifive-u74}.
+@samp{sifive-u54}, @samp{sifive-u74}, and @samp{ventana-vt1} .
 
 @item -mtune=@var{processor-string}
 @opindex mtune
 Optimize the output for the given processor, specified by microarchitecture or
 particular CPU name.  Permissible values for this option are: @samp{rocket},
 @samp{sifive-3-series}, @samp{sifive-5-series}, @samp{sifive-7-series},
-@samp{size}, and all valid options for @option{-mcpu=}.
+@samp{ventana-vt1}, @samp{size}, and all valid options for @option{-mcpu=}.
 
 When @option{-mtune=} is not specified, use the setting from @option{-mcpu},
 the default is @samp{rocket} if both are not specified.
-- 
2.32.0


^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v1 2/2] RISC-V: Add instruction fusion (for ventana-vt1)
  2021-11-14 21:47 [PATCH v1 0/2] Basic support for the Ventana VT1 w/ instruction fusion Philipp Tomsich
  2021-11-14 21:47 ` [PATCH v1 1/2] RISC-V: Add basic support for the Ventana-VT1 core Philipp Tomsich
@ 2021-11-14 21:47 ` Philipp Tomsich
  2021-11-17 14:05   ` Kito Cheng
  2021-11-17 14:05 ` [PATCH v1 0/2] Basic support for the Ventana VT1 w/ instruction fusion Kito Cheng
  2 siblings, 1 reply; 7+ messages in thread
From: Philipp Tomsich @ 2021-11-14 21:47 UTC (permalink / raw)
  To: gcc-patches; +Cc: kito.cheng, wilson, Philipp Tomsich, Philipp Tomsich

From: Philipp Tomsich <philipp.tomsich@theobroma-systems.com>

The Ventana VT1 core supports quad-issue and instruction fusion.
This implemented TARGET_SCHED_MACRO_FUSION_P to keep fusible sequences
together and adds idiom matcheing for the supported fusion cases.

gcc/ChangeLog:

	* config/riscv/riscv.c (enum riscv_fusion_pairs): Add symbolic
	constants to identify supported fusion patterns.
	(struct riscv_tune_param): Add fusible_op field.
	(riscv_macro_fusion_p): Implement.
	(riscv_fusion_enabled_p): Implement.
	(riscv_macro_fusion_pair_p): Implement and recoginze fusible
	idioms for Ventana VT1.
	(TARGET_SCHED_MACRO_FUSION_P): Point to riscv_macro_fusion_p.
	(TARGET_SCHED_MACRO_FUSION_PAIR_P): Point to riscv_macro_fusion_pair_p.

Signed-off-by: Philipp Tomsich <philipp.tomsich@vrull.eu>
---

 gcc/config/riscv/riscv.c | 196 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 196 insertions(+)

diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index 6b918db65e9..8eac52101a3 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -211,6 +211,19 @@ struct riscv_integer_op {
    The worst case is LUI, ADDI, SLLI, ADDI, SLLI, ADDI, SLLI, ADDI.  */
 #define RISCV_MAX_INTEGER_OPS 8
 
+enum riscv_fusion_pairs
+{
+  RISCV_FUSE_NOTHING = 0,
+  RISCV_FUSE_ZEXTW = (1 << 0),
+  RISCV_FUSE_ZEXTH = (1 << 1),
+  RISCV_FUSE_ZEXTWS = (1 << 2),
+  RISCV_FUSE_LDINDEXED = (1 << 3),
+  RISCV_FUSE_LUI_ADDI = (1 << 4),
+  RISCV_FUSE_AUIPC_ADDI = (1 << 5),
+  RISCV_FUSE_LUI_LD = (1 << 6),
+  RISCV_FUSE_AUIPC_LD = (1 << 7),
+};
+
 /* Costs of various operations on the different architectures.  */
 
 struct riscv_tune_param
@@ -224,6 +237,7 @@ struct riscv_tune_param
   unsigned short branch_cost;
   unsigned short memory_cost;
   bool slow_unaligned_access;
+  unsigned int fusible_ops;
 };
 
 /* Information about one micro-arch we know about.  */
@@ -289,6 +303,7 @@ static const struct riscv_tune_param rocket_tune_info = {
   3,						/* branch_cost */
   5,						/* memory_cost */
   true,						/* slow_unaligned_access */
+  RISCV_FUSE_NOTHING,                           /* fusible_ops */
 };
 
 /* Costs to use when optimizing for Sifive 7 Series.  */
@@ -302,6 +317,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
   4,						/* branch_cost */
   3,						/* memory_cost */
   true,						/* slow_unaligned_access */
+  RISCV_FUSE_NOTHING,                           /* fusible_ops */
 };
 
 /* Costs to use when optimizing for T-HEAD c906.  */
@@ -328,6 +344,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
   1,						/* branch_cost */
   2,						/* memory_cost */
   false,					/* slow_unaligned_access */
+  RISCV_FUSE_NOTHING,                           /* fusible_ops */
 };
 
 /* Costs to use when optimizing for Ventana Micro VT1.  */
@@ -341,6 +358,10 @@ static const struct riscv_tune_param ventana_vt1_tune_info = {
   4,						/* branch_cost */
   5,						/* memory_cost */
   false,					/* slow_unaligned_access */
+  ( RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH |       /* fusible_ops */
+    RISCV_FUSE_ZEXTWS | RISCV_FUSE_LDINDEXED |
+    RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI |
+    RISCV_FUSE_LUI_LD | RISCV_FUSE_AUIPC_LD )
 };
 
 static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
@@ -4909,6 +4930,177 @@ riscv_issue_rate (void)
   return tune_param->issue_rate;
 }
 
+/* Implement TARGET_SCHED_MACRO_FUSION_P.  Return true if target supports
+   instruction fusion of some sort.  */
+
+static bool
+riscv_macro_fusion_p (void)
+{
+  return tune_param->fusible_ops != RISCV_FUSE_NOTHING;
+}
+
+/* Return true iff the instruction fusion described by OP is enabled.  */
+
+static bool
+riscv_fusion_enabled_p(enum riscv_fusion_pairs op)
+{
+  return tune_param->fusible_ops & op;
+}
+
+/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P.  Return true if PREV and CURR
+   should be kept together during scheduling.  */
+
+static bool
+riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
+{
+  rtx prev_set = single_set (prev);
+  rtx curr_set = single_set (curr);
+  /* prev and curr are simple SET insns i.e. no flag setting or branching.  */
+  bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
+
+  if (!riscv_macro_fusion_p ())
+    return false;
+
+  if (simple_sets_p && (riscv_fusion_enabled_p (RISCV_FUSE_ZEXTW) ||
+			riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH)))
+    {
+      /* We are trying to match the following:
+	   prev (slli) == (set (reg:DI rD)
+			       (ashift:DI (reg:DI rS) (const_int 32)))
+	   curr (slri) == (set (reg:DI rD)
+			       (lshiftrt:DI (reg:DI rD) (const_int <shift>)))
+	 with <shift> being either 32 for FUSE_ZEXTW, or
+			 `less than 32 for FUSE_ZEXTWS. */
+
+      if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
+	  && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
+	  && REG_P (SET_DEST (prev_set))
+	  && REG_P (SET_DEST (curr_set))
+	  && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
+	  && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set))
+	  && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
+	  && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
+	  && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 32
+	  && (( INTVAL (XEXP (SET_SRC (curr_set), 1)) == 32
+		&& riscv_fusion_enabled_p(RISCV_FUSE_ZEXTW) )
+	      || ( INTVAL (XEXP (SET_SRC (curr_set), 1)) < 32
+		   && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTWS))))
+	return true;
+    }
+
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH))
+    {
+      /* We are trying to match the following:
+	   prev (slli) == (set (reg:DI rD)
+			       (ashift:DI (reg:DI rS) (const_int 48)))
+	   curr (slri) == (set (reg:DI rD)
+			       (lshiftrt:DI (reg:DI rD) (const_int 48))) */
+
+      if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
+	  && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
+	  && REG_P (SET_DEST (prev_set))
+	  && REG_P (SET_DEST (curr_set))
+	  && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
+	  && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set))
+	  && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
+	  && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
+	  && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 48
+	  && INTVAL (XEXP (SET_SRC (curr_set), 1)) == 48)
+	return true;
+    }
+
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDINDEXED))
+    {
+      /* We are trying to match the following:
+	   prev (add) == (set (reg:DI rD)
+			      (plus:DI (reg:DI rS1) (reg:DI rS2))
+	   curr (ld)  == (set (reg:DI rD)
+			      (mem:DI (reg:DI rD))) */
+
+      if (MEM_P (SET_SRC (curr_set))
+	  && REG_P (XEXP (SET_SRC (curr_set), 0))
+	  && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO (SET_DEST (prev_set))
+	  && GET_CODE (SET_SRC (prev_set)) == PLUS
+	  && REG_P (XEXP (SET_SRC (prev_set), 0))
+	  && REG_P (XEXP (SET_SRC (prev_set), 1)))
+	return true;
+
+      /* We are trying to match the following:
+	   prev (add) == (set (reg:DI rD)
+			      (plus:DI (reg:DI rS1) (reg:DI rS2)))
+	   curr (lw)  == (set (any_extend:DI (mem:SUBX (reg:DI rD)))) */
+
+      if ((GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND
+	   || (GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND))
+	  && MEM_P (XEXP (SET_SRC (curr_set), 0))
+	  && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
+	  && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == REGNO (SET_DEST (prev_set))
+	  && GET_CODE (SET_SRC (prev_set)) == PLUS
+	  && REG_P (XEXP (SET_SRC (prev_set), 0))
+	  && REG_P (XEXP (SET_SRC (prev_set), 1)))
+	return true;
+    }
+
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_ADDI))
+    {
+      /* We are trying to match the following:
+	   prev (lui)  == (set (reg:DI rD) (const_int UPPER_IMM_20))
+	   curr (addi) == (set (reg:DI rD)
+			       (plus:DI (reg:DI rD) (const_int IMM12))) */
+
+      if (GET_CODE (SET_SRC (curr_set)) == PLUS
+	  && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
+	  && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1)))
+	  && CONST_INT_P (SET_SRC (prev_set))
+	  && LUI_OPERAND (INTVAL (SET_SRC (prev_set))))
+	return true;
+    }
+
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_ADDI))
+    {
+      /* We are trying to match the following:
+	   prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
+	   prev (addi)  == (set (reg:DI rD)
+				(plus:DI (reg:DI rD) (const_int IMM12))) */
+
+      if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
+	  && XINT (prev_set, 1) == UNSPEC_AUIPC
+	  && GET_CODE (SET_SRC (curr_set)) == PLUS
+	  && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1))))
+	return true;
+    }
+
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_LD))
+    {
+      /* We are trying to match the following:
+	   prev (lui)  == (set (reg:DI rD) (const_int UPPER_IMM_20))
+	   curr (ld)  == (set (reg:DI rD)
+			      (mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */
+
+      if (CONST_INT_P (SET_SRC (prev_set))
+	  && LUI_OPERAND (INTVAL (SET_SRC (prev_set)))
+	  && MEM_P (SET_SRC (curr_set))
+	  && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
+	return true;
+    }
+
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_LD))
+    {
+      /* We are trying to match the following:
+	   prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
+	   curr (ld)  == (set (reg:DI rD)
+			      (mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */
+
+      if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
+	  && XINT (prev_set, 1) == UNSPEC_AUIPC
+	  && MEM_P (SET_SRC (curr_set))
+	  && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
+	return true;
+    }
+
+  return false;
+}
+
 /* Auxiliary function to emit RISC-V ELF attribute. */
 static void
 riscv_emit_attribute ()
@@ -5676,6 +5868,10 @@ riscv_asan_shadow_offset (void)
 
 #undef TARGET_SCHED_ISSUE_RATE
 #define TARGET_SCHED_ISSUE_RATE riscv_issue_rate
+#undef TARGET_SCHED_MACRO_FUSION_P
+#define TARGET_SCHED_MACRO_FUSION_P riscv_macro_fusion_p
+#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
+#define TARGET_SCHED_MACRO_FUSION_PAIR_P riscv_macro_fusion_pair_p
 
 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
 #define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall
-- 
2.32.0


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v1 2/2] RISC-V: Add instruction fusion (for ventana-vt1)
  2021-11-14 21:47 ` [PATCH v1 2/2] RISC-V: Add instruction fusion (for ventana-vt1) Philipp Tomsich
@ 2021-11-17 14:05   ` Kito Cheng
  2021-11-17 19:40     ` Palmer Dabbelt
  0 siblings, 1 reply; 7+ messages in thread
From: Kito Cheng @ 2021-11-17 14:05 UTC (permalink / raw)
  To: Philipp Tomsich; +Cc: GCC Patches, Philipp Tomsich, wilson

Hi Philipp:

Thanks for the patch, I like this approach, that can easily configure
different capabilities for each core :)

So there are only a few minor comments for this patch.

On Mon, Nov 15, 2021 at 5:49 AM Philipp Tomsich
<philipp.tomsich@vrull.eu> wrote:
>
> From: Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
>
> The Ventana VT1 core supports quad-issue and instruction fusion.
> This implemented TARGET_SCHED_MACRO_FUSION_P to keep fusible sequences
> together and adds idiom matcheing for the supported fusion cases.
>
> gcc/ChangeLog:
>
>         * config/riscv/riscv.c (enum riscv_fusion_pairs): Add symbolic
>         constants to identify supported fusion patterns.
>         (struct riscv_tune_param): Add fusible_op field.
>         (riscv_macro_fusion_p): Implement.
>         (riscv_fusion_enabled_p): Implement.
>         (riscv_macro_fusion_pair_p): Implement and recoginze fusible
>         idioms for Ventana VT1.
>         (TARGET_SCHED_MACRO_FUSION_P): Point to riscv_macro_fusion_p.
>         (TARGET_SCHED_MACRO_FUSION_PAIR_P): Point to riscv_macro_fusion_pair_p.
>
> Signed-off-by: Philipp Tomsich <philipp.tomsich@vrull.eu>
> ---
>
>  gcc/config/riscv/riscv.c | 196 +++++++++++++++++++++++++++++++++++++++
>  1 file changed, 196 insertions(+)
>
> diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
> index 6b918db65e9..8eac52101a3 100644
> --- a/gcc/config/riscv/riscv.c
> +++ b/gcc/config/riscv/riscv.c
> @@ -211,6 +211,19 @@ struct riscv_integer_op {
>     The worst case is LUI, ADDI, SLLI, ADDI, SLLI, ADDI, SLLI, ADDI.  */
>  #define RISCV_MAX_INTEGER_OPS 8
>
> +enum riscv_fusion_pairs
> +{
> +  RISCV_FUSE_NOTHING = 0,
> +  RISCV_FUSE_ZEXTW = (1 << 0),
> +  RISCV_FUSE_ZEXTH = (1 << 1),
> +  RISCV_FUSE_ZEXTWS = (1 << 2),
> +  RISCV_FUSE_LDINDEXED = (1 << 3),

RISCV_FUSE_LDINDEXED -> RISCV_FUSE_LD_INDEXED

Could you add some comment for above enums, like that:
/* slli rx, rx, 32 + srli rx, rx, 32 */
RISCV_FUSE_ZEXTW

So that we could know what kind of instruction will be funded for this enum.

> +  RISCV_FUSE_LUI_ADDI = (1 << 4),
> +  RISCV_FUSE_AUIPC_ADDI = (1 << 5),
> +  RISCV_FUSE_LUI_LD = (1 << 6),
> +  RISCV_FUSE_AUIPC_LD = (1 << 7),
> +};
> +
>  /* Costs of various operations on the different architectures.  */
>
>  struct riscv_tune_param
> @@ -224,6 +237,7 @@ struct riscv_tune_param
>    unsigned short branch_cost;
>    unsigned short memory_cost;
>    bool slow_unaligned_access;
> +  unsigned int fusible_ops;
>  };
>
>  /* Information about one micro-arch we know about.  */
> @@ -289,6 +303,7 @@ static const struct riscv_tune_param rocket_tune_info = {
>    3,                                           /* branch_cost */
>    5,                                           /* memory_cost */
>    true,                                                /* slow_unaligned_access */
> +  RISCV_FUSE_NOTHING,                           /* fusible_ops */
>  };
>
>  /* Costs to use when optimizing for Sifive 7 Series.  */
> @@ -302,6 +317,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
>    4,                                           /* branch_cost */
>    3,                                           /* memory_cost */
>    true,                                                /* slow_unaligned_access */
> +  RISCV_FUSE_NOTHING,                           /* fusible_ops */
>  };
>
>  /* Costs to use when optimizing for T-HEAD c906.  */
> @@ -328,6 +344,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
>    1,                                           /* branch_cost */
>    2,                                           /* memory_cost */
>    false,                                       /* slow_unaligned_access */
> +  RISCV_FUSE_NOTHING,                           /* fusible_ops */
>  };
>
>  /* Costs to use when optimizing for Ventana Micro VT1.  */
> @@ -341,6 +358,10 @@ static const struct riscv_tune_param ventana_vt1_tune_info = {
>    4,                                           /* branch_cost */
>    5,                                           /* memory_cost */
>    false,                                       /* slow_unaligned_access */
> +  ( RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH |       /* fusible_ops */
> +    RISCV_FUSE_ZEXTWS | RISCV_FUSE_LDINDEXED |
> +    RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI |
> +    RISCV_FUSE_LUI_LD | RISCV_FUSE_AUIPC_LD )
>  };
>
>  static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
> @@ -4909,6 +4930,177 @@ riscv_issue_rate (void)
>    return tune_param->issue_rate;
>  }
>
> +/* Implement TARGET_SCHED_MACRO_FUSION_P.  Return true if target supports
> +   instruction fusion of some sort.  */
> +
> +static bool
> +riscv_macro_fusion_p (void)
> +{
> +  return tune_param->fusible_ops != RISCV_FUSE_NOTHING;
> +}
> +
> +/* Return true iff the instruction fusion described by OP is enabled.  */
> +
> +static bool
> +riscv_fusion_enabled_p(enum riscv_fusion_pairs op)

space between function name and parentheses.

riscv_fusion_enabled_p (enum riscv_fusion_pairs op)

> +{
> +  return tune_param->fusible_ops & op;
> +}
> +
> +/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P.  Return true if PREV and CURR
> +   should be kept together during scheduling.  */
> +
> +static bool
> +riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
> +{
> +  rtx prev_set = single_set (prev);
> +  rtx curr_set = single_set (curr);
> +  /* prev and curr are simple SET insns i.e. no flag setting or branching.  */
> +  bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
> +
> +  if (!riscv_macro_fusion_p ())
> +    return false;
> +
> +  if (simple_sets_p && (riscv_fusion_enabled_p (RISCV_FUSE_ZEXTW) ||
> +                       riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH)))
> +    {
> +      /* We are trying to match the following:
> +          prev (slli) == (set (reg:DI rD)
> +                              (ashift:DI (reg:DI rS) (const_int 32)))
> +          curr (slri) == (set (reg:DI rD)

slri -> srli

> +                              (lshiftrt:DI (reg:DI rD) (const_int <shift>)))
> +        with <shift> being either 32 for FUSE_ZEXTW, or
> +                        `less than 32 for FUSE_ZEXTWS. */

`less <-- I guess the ` is kind of an accident?

> +
> +      if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
> +         && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
> +         && REG_P (SET_DEST (prev_set))
> +         && REG_P (SET_DEST (curr_set))
> +         && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
> +         && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set))
> +         && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
> +         && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
> +         && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 32
> +         && (( INTVAL (XEXP (SET_SRC (curr_set), 1)) == 32
> +               && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTW) )

space between function name and parentheses.

> +             || ( INTVAL (XEXP (SET_SRC (curr_set), 1)) < 32
> +                  && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTWS))))

space between function name and parentheses.

> +       return true;
> +    }
> +
> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH))
> +    {
> +      /* We are trying to match the following:
> +          prev (slli) == (set (reg:DI rD)
> +                              (ashift:DI (reg:DI rS) (const_int 48)))
> +          curr (slri) == (set (reg:DI rD)
> +                              (lshiftrt:DI (reg:DI rD) (const_int 48))) */

slri -> srli

> +
> +      if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
> +         && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
> +         && REG_P (SET_DEST (prev_set))
> +         && REG_P (SET_DEST (curr_set))
> +         && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
> +         && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set))
> +         && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
> +         && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
> +         && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 48
> +         && INTVAL (XEXP (SET_SRC (curr_set), 1)) == 48)
> +       return true;
> +    }
> +
> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDINDEXED))
> +    {
> +      /* We are trying to match the following:
> +          prev (add) == (set (reg:DI rD)
> +                             (plus:DI (reg:DI rS1) (reg:DI rS2))
> +          curr (ld)  == (set (reg:DI rD)
> +                             (mem:DI (reg:DI rD))) */
> +
> +      if (MEM_P (SET_SRC (curr_set))
> +         && REG_P (XEXP (SET_SRC (curr_set), 0))
> +         && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO (SET_DEST (prev_set))
> +         && GET_CODE (SET_SRC (prev_set)) == PLUS
> +         && REG_P (XEXP (SET_SRC (prev_set), 0))
> +         && REG_P (XEXP (SET_SRC (prev_set), 1)))
> +       return true;
> +
> +      /* We are trying to match the following:
> +          prev (add) == (set (reg:DI rD)
> +                             (plus:DI (reg:DI rS1) (reg:DI rS2)))
> +          curr (lw)  == (set (any_extend:DI (mem:SUBX (reg:DI rD)))) */
> +
> +      if ((GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND
> +          || (GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND))
> +         && MEM_P (XEXP (SET_SRC (curr_set), 0))
> +         && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
> +         && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == REGNO (SET_DEST (prev_set))

This line is over 80 columns.


> +         && GET_CODE (SET_SRC (prev_set)) == PLUS
> +         && REG_P (XEXP (SET_SRC (prev_set), 0))
> +         && REG_P (XEXP (SET_SRC (prev_set), 1)))
> +       return true;
> +    }
> +
> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_ADDI))
> +    {
> +      /* We are trying to match the following:
> +          prev (lui)  == (set (reg:DI rD) (const_int UPPER_IMM_20))
> +          curr (addi) == (set (reg:DI rD)
> +                              (plus:DI (reg:DI rD) (const_int IMM12))) */
> +
> +      if (GET_CODE (SET_SRC (curr_set)) == PLUS
> +         && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
> +         && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1)))
> +         && CONST_INT_P (SET_SRC (prev_set))
> +         && LUI_OPERAND (INTVAL (SET_SRC (prev_set))))
> +       return true;
> +    }
> +
> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_ADDI))
> +    {
> +      /* We are trying to match the following:
> +          prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
> +          prev (addi)  == (set (reg:DI rD)
> +                               (plus:DI (reg:DI rD) (const_int IMM12))) */
> +
> +      if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
> +         && XINT (prev_set, 1) == UNSPEC_AUIPC
> +         && GET_CODE (SET_SRC (curr_set)) == PLUS
> +         && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1))))
> +       return true;
> +    }
> +
> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_LD))
> +    {
> +      /* We are trying to match the following:
> +          prev (lui)  == (set (reg:DI rD) (const_int UPPER_IMM_20))
> +          curr (ld)  == (set (reg:DI rD)
> +                             (mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */
> +
> +      if (CONST_INT_P (SET_SRC (prev_set))
> +         && LUI_OPERAND (INTVAL (SET_SRC (prev_set)))
> +         && MEM_P (SET_SRC (curr_set))
> +         && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
> +       return true;
> +    }
> +
> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_LD))
> +    {
> +      /* We are trying to match the following:
> +          prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
> +          curr (ld)  == (set (reg:DI rD)
> +                             (mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */
> +
> +      if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
> +         && XINT (prev_set, 1) == UNSPEC_AUIPC
> +         && MEM_P (SET_SRC (curr_set))
> +         && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
> +       return true;
> +    }
> +
> +  return false;
> +}
> +
>  /* Auxiliary function to emit RISC-V ELF attribute. */
>  static void
>  riscv_emit_attribute ()
> @@ -5676,6 +5868,10 @@ riscv_asan_shadow_offset (void)
>
>  #undef TARGET_SCHED_ISSUE_RATE
>  #define TARGET_SCHED_ISSUE_RATE riscv_issue_rate
> +#undef TARGET_SCHED_MACRO_FUSION_P
> +#define TARGET_SCHED_MACRO_FUSION_P riscv_macro_fusion_p
> +#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
> +#define TARGET_SCHED_MACRO_FUSION_PAIR_P riscv_macro_fusion_pair_p
>
>  #undef TARGET_FUNCTION_OK_FOR_SIBCALL
>  #define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall
> --
> 2.32.0
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v1 0/2] Basic support for the Ventana VT1 w/ instruction fusion
  2021-11-14 21:47 [PATCH v1 0/2] Basic support for the Ventana VT1 w/ instruction fusion Philipp Tomsich
  2021-11-14 21:47 ` [PATCH v1 1/2] RISC-V: Add basic support for the Ventana-VT1 core Philipp Tomsich
  2021-11-14 21:47 ` [PATCH v1 2/2] RISC-V: Add instruction fusion (for ventana-vt1) Philipp Tomsich
@ 2021-11-17 14:05 ` Kito Cheng
  2 siblings, 0 replies; 7+ messages in thread
From: Kito Cheng @ 2021-11-17 14:05 UTC (permalink / raw)
  To: Philipp Tomsich; +Cc: GCC Patches, wilson

Hi Philipp:

This patch set LGTM, feel free to commit once addressed those issues.

On Mon, Nov 15, 2021 at 5:48 AM Philipp Tomsich
<philipp.tomsich@vrull.eu> wrote:
>
>
> This series provides support for the Ventana VT1 (a 4-way superscalar
> rv64gc_zba_zbb_zbc_zbs core) including support for the supported
> instruction fusion patterns.
>
> This includes the addition of the fusion-aware scheduling
> infrastructure for RISC-V and implements idiom recognition for the
> fusion patterns supported by VT1.
>
>
> Philipp Tomsich (2):
>   RISC-V: Add basic support for the Ventana-VT1 core
>   RISC-V: Add instruction fusion (for ventana-vt1)
>
>  gcc/config/riscv/riscv-cores.def |   2 +
>  gcc/config/riscv/riscv-opts.h    |   3 +-
>  gcc/config/riscv/riscv.c         | 210 +++++++++++++++++++++++++++++++
>  gcc/config/riscv/riscv.md        |   2 +-
>  gcc/doc/invoke.texi              |   4 +-
>  5 files changed, 217 insertions(+), 4 deletions(-)
>
> --
> 2.32.0
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v1 2/2] RISC-V: Add instruction fusion (for ventana-vt1)
  2021-11-17 14:05   ` Kito Cheng
@ 2021-11-17 19:40     ` Palmer Dabbelt
  2021-11-17 19:44       ` Philipp Tomsich
  0 siblings, 1 reply; 7+ messages in thread
From: Palmer Dabbelt @ 2021-11-17 19:40 UTC (permalink / raw)
  To: Kito Cheng; +Cc: philipp.tomsich, wilson, philipp.tomsich, gcc-patches

[This is my first time trying my Rivos address on the lists, so sorry if 
something goes off the rails.]

On Wed, 17 Nov 2021 06:05:04 PST (-0800), gcc-patches@gcc.gnu.org wrote:
> Hi Philipp:
>
> Thanks for the patch, I like this approach, that can easily configure
> different capabilities for each core :)
>
> So there are only a few minor comments for this patch.
>
> On Mon, Nov 15, 2021 at 5:49 AM Philipp Tomsich
> <philipp.tomsich@vrull.eu> wrote:
>>
>> From: Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
>>
>> The Ventana VT1 core supports quad-issue and instruction fusion.
>> This implemented TARGET_SCHED_MACRO_FUSION_P to keep fusible sequences
>> together and adds idiom matcheing for the supported fusion cases.

There's a typo at "matcheing".

>>
>> gcc/ChangeLog:
>>
>>         * config/riscv/riscv.c (enum riscv_fusion_pairs): Add symbolic
>>         constants to identify supported fusion patterns.
>>         (struct riscv_tune_param): Add fusible_op field.
>>         (riscv_macro_fusion_p): Implement.
>>         (riscv_fusion_enabled_p): Implement.
>>         (riscv_macro_fusion_pair_p): Implement and recoginze fusible
>>         idioms for Ventana VT1.
>>         (TARGET_SCHED_MACRO_FUSION_P): Point to riscv_macro_fusion_p.
>>         (TARGET_SCHED_MACRO_FUSION_PAIR_P): Point to riscv_macro_fusion_pair_p.
>>
>> Signed-off-by: Philipp Tomsich <philipp.tomsich@vrull.eu>

This doesn't match the From (though admittedly I'm pretty new to the SoB 
stuff in GCC, so I'm not sure if that's even a rule here).

>> ---
>>
>>  gcc/config/riscv/riscv.c | 196 +++++++++++++++++++++++++++++++++++++++
>>  1 file changed, 196 insertions(+)
>>
>> diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
>> index 6b918db65e9..8eac52101a3 100644
>> --- a/gcc/config/riscv/riscv.c
>> +++ b/gcc/config/riscv/riscv.c
>> @@ -211,6 +211,19 @@ struct riscv_integer_op {
>>     The worst case is LUI, ADDI, SLLI, ADDI, SLLI, ADDI, SLLI, ADDI.  */
>>  #define RISCV_MAX_INTEGER_OPS 8
>>
>> +enum riscv_fusion_pairs
>> +{
>> +  RISCV_FUSE_NOTHING = 0,
>> +  RISCV_FUSE_ZEXTW = (1 << 0),
>> +  RISCV_FUSE_ZEXTH = (1 << 1),
>> +  RISCV_FUSE_ZEXTWS = (1 << 2),
>> +  RISCV_FUSE_LDINDEXED = (1 << 3),
>
> RISCV_FUSE_LDINDEXED -> RISCV_FUSE_LD_INDEXED
>
> Could you add some comment for above enums, like that:
> /* slli rx, rx, 32 + srli rx, rx, 32 */
> RISCV_FUSE_ZEXTW
>
> So that we could know what kind of instruction will be funded for this enum.
>
>> +  RISCV_FUSE_LUI_ADDI = (1 << 4),
>> +  RISCV_FUSE_AUIPC_ADDI = (1 << 5),
>> +  RISCV_FUSE_LUI_LD = (1 << 6),
>> +  RISCV_FUSE_AUIPC_LD = (1 << 7),
>> +};
>> +
>>  /* Costs of various operations on the different architectures.  */
>>
>>  struct riscv_tune_param
>> @@ -224,6 +237,7 @@ struct riscv_tune_param
>>    unsigned short branch_cost;
>>    unsigned short memory_cost;
>>    bool slow_unaligned_access;
>> +  unsigned int fusible_ops;
>>  };
>>
>>  /* Information about one micro-arch we know about.  */
>> @@ -289,6 +303,7 @@ static const struct riscv_tune_param rocket_tune_info = {
>>    3,                                           /* branch_cost */
>>    5,                                           /* memory_cost */
>>    true,                                                /* slow_unaligned_access */
>> +  RISCV_FUSE_NOTHING,                           /* fusible_ops */
>>  };

There's some tab/space issues here (and in the below ones).  They align 
when merged, but the new lines are spaces-only and the old ones have 
internal spaces mixed with tabs (IIRC that's to the GCC style, if not we 
should fix these to at least be consistent).

>>
>>  /* Costs to use when optimizing for Sifive 7 Series.  */
>> @@ -302,6 +317,7 @@ static const struct riscv_tune_param sifive_7_tune_info = {
>>    4,                                           /* branch_cost */
>>    3,                                           /* memory_cost */
>>    true,                                                /* slow_unaligned_access */
>> +  RISCV_FUSE_NOTHING,                           /* fusible_ops */
>>  };
>>
>>  /* Costs to use when optimizing for T-HEAD c906.  */
>> @@ -328,6 +344,7 @@ static const struct riscv_tune_param optimize_size_tune_info = {
>>    1,                                           /* branch_cost */
>>    2,                                           /* memory_cost */
>>    false,                                       /* slow_unaligned_access */
>> +  RISCV_FUSE_NOTHING,                           /* fusible_ops */
>>  };
>>
>>  /* Costs to use when optimizing for Ventana Micro VT1.  */
>> @@ -341,6 +358,10 @@ static const struct riscv_tune_param ventana_vt1_tune_info = {
>>    4,                                           /* branch_cost */
>>    5,                                           /* memory_cost */
>>    false,                                       /* slow_unaligned_access */
>> +  ( RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH |       /* fusible_ops */
>> +    RISCV_FUSE_ZEXTWS | RISCV_FUSE_LDINDEXED |
>> +    RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI |
>> +    RISCV_FUSE_LUI_LD | RISCV_FUSE_AUIPC_LD )
>>  };
>>
>>  static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
>> @@ -4909,6 +4930,177 @@ riscv_issue_rate (void)
>>    return tune_param->issue_rate;
>>  }
>>
>> +/* Implement TARGET_SCHED_MACRO_FUSION_P.  Return true if target supports
>> +   instruction fusion of some sort.  */
>> +
>> +static bool
>> +riscv_macro_fusion_p (void)
>> +{
>> +  return tune_param->fusible_ops != RISCV_FUSE_NOTHING;
>> +}
>> +
>> +/* Return true iff the instruction fusion described by OP is enabled.  */
>> +
>> +static bool
>> +riscv_fusion_enabled_p(enum riscv_fusion_pairs op)
>
> space between function name and parentheses.
>
> riscv_fusion_enabled_p (enum riscv_fusion_pairs op)
>
>> +{
>> +  return tune_param->fusible_ops & op;
>> +}
>> +
>> +/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P.  Return true if PREV and CURR
>> +   should be kept together during scheduling.  */
>> +
>> +static bool
>> +riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
>> +{
>> +  rtx prev_set = single_set (prev);
>> +  rtx curr_set = single_set (curr);
>> +  /* prev and curr are simple SET insns i.e. no flag setting or branching.  */
>> +  bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
>> +
>> +  if (!riscv_macro_fusion_p ())
>> +    return false;
>> +
>> +  if (simple_sets_p && (riscv_fusion_enabled_p (RISCV_FUSE_ZEXTW) ||
>> +                       riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH)))
>> +    {
>> +      /* We are trying to match the following:
>> +          prev (slli) == (set (reg:DI rD)
>> +                              (ashift:DI (reg:DI rS) (const_int 32)))
>> +          curr (slri) == (set (reg:DI rD)

This really jumps out as the sort of thing that should be in RTL, but 
I'm not sure that's feasible.  The short branch->cmov optimization we're 
doing for the SiFive 7 series is sort of fusion and done in RTL, but the 
mechanism is very different there so it wouldn't apply directly.  There 
are other ports doing it this way so I'm not opposed to merging this 
as-is, but if there's a straight-forward way to port this to RTL then 
it's probably be a lot saner to maintain in the long term.  I can't come 
up with anything sane, though.

On the subject of maintainability: I don't see any tests for this.  
Given that we're exceedingly likely to end up with other targets that 
have front-end fusion and thus try to share this code, it'd be really 
nice to have either some documentation of what fusions this core 
performs or some tests that exercise the important ones.  I can't find 
any other references to "Ventana VT1" on the internet, but LMK if 
there's something I'm missing.

I'm not sure lacking tests should block merging this (as it's not like 
we have much optimization-based testing for the RISC-V port), but 
without at least some public documentation it's going to be hard to 
produce those outside of Ventana -- essentially I'm worried about ending 
up with the same black-box pipeline problem we have for the C906.

I seem to remember having tests for the SiFive optimization, but I 
couldn't find any when I looked.  Maybe Jim or Kito remembers where to 
find them, otherwise I'll throw some together.

>
> slri -> srli
>
>> +                              (lshiftrt:DI (reg:DI rD) (const_int <shift>)))
>> +        with <shift> being either 32 for FUSE_ZEXTW, or
>> +                        `less than 32 for FUSE_ZEXTWS. */
>
> `less <-- I guess the ` is kind of an accident?
>
>> +
>> +      if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
>> +         && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
>> +         && REG_P (SET_DEST (prev_set))
>> +         && REG_P (SET_DEST (curr_set))
>> +         && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
>> +         && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set))
>> +         && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
>> +         && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
>> +         && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 32
>> +         && (( INTVAL (XEXP (SET_SRC (curr_set), 1)) == 32
>> +               && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTW) )
>
> space between function name and parentheses.
>
>> +             || ( INTVAL (XEXP (SET_SRC (curr_set), 1)) < 32
>> +                  && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTWS))))
>
> space between function name and parentheses.
>
>> +       return true;
>> +    }
>> +
>> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH))
>> +    {
>> +      /* We are trying to match the following:
>> +          prev (slli) == (set (reg:DI rD)
>> +                              (ashift:DI (reg:DI rS) (const_int 48)))
>> +          curr (slri) == (set (reg:DI rD)
>> +                              (lshiftrt:DI (reg:DI rD) (const_int 48))) */
>
> slri -> srli
>
>> +
>> +      if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
>> +         && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
>> +         && REG_P (SET_DEST (prev_set))
>> +         && REG_P (SET_DEST (curr_set))
>> +         && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
>> +         && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set))
>> +         && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
>> +         && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
>> +         && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 48
>> +         && INTVAL (XEXP (SET_SRC (curr_set), 1)) == 48)
>> +       return true;
>> +    }
>> +
>> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDINDEXED))
>> +    {
>> +      /* We are trying to match the following:
>> +          prev (add) == (set (reg:DI rD)
>> +                             (plus:DI (reg:DI rS1) (reg:DI rS2))
>> +          curr (ld)  == (set (reg:DI rD)
>> +                             (mem:DI (reg:DI rD))) */
>> +
>> +      if (MEM_P (SET_SRC (curr_set))
>> +         && REG_P (XEXP (SET_SRC (curr_set), 0))
>> +         && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO (SET_DEST (prev_set))
>> +         && GET_CODE (SET_SRC (prev_set)) == PLUS
>> +         && REG_P (XEXP (SET_SRC (prev_set), 0))
>> +         && REG_P (XEXP (SET_SRC (prev_set), 1)))
>> +       return true;
>> +
>> +      /* We are trying to match the following:
>> +          prev (add) == (set (reg:DI rD)
>> +                             (plus:DI (reg:DI rS1) (reg:DI rS2)))
>> +          curr (lw)  == (set (any_extend:DI (mem:SUBX (reg:DI rD)))) */
>> +
>> +      if ((GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND
>> +          || (GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND))
>> +         && MEM_P (XEXP (SET_SRC (curr_set), 0))
>> +         && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
>> +         && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == REGNO (SET_DEST (prev_set))
>
> This line is over 80 columns.
>
>
>> +         && GET_CODE (SET_SRC (prev_set)) == PLUS
>> +         && REG_P (XEXP (SET_SRC (prev_set), 0))
>> +         && REG_P (XEXP (SET_SRC (prev_set), 1)))
>> +       return true;
>> +    }
>> +
>> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_ADDI))
>> +    {
>> +      /* We are trying to match the following:
>> +          prev (lui)  == (set (reg:DI rD) (const_int UPPER_IMM_20))
>> +          curr (addi) == (set (reg:DI rD)
>> +                              (plus:DI (reg:DI rD) (const_int IMM12))) */
>> +
>> +      if (GET_CODE (SET_SRC (curr_set)) == PLUS
>> +         && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
>> +         && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1)))
>> +         && CONST_INT_P (SET_SRC (prev_set))
>> +         && LUI_OPERAND (INTVAL (SET_SRC (prev_set))))
>> +       return true;
>> +    }
>> +
>> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_ADDI))
>> +    {
>> +      /* We are trying to match the following:
>> +          prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
>> +          prev (addi)  == (set (reg:DI rD)
>> +                               (plus:DI (reg:DI rD) (const_int IMM12))) */
>> +
>> +      if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
>> +         && XINT (prev_set, 1) == UNSPEC_AUIPC
>> +         && GET_CODE (SET_SRC (curr_set)) == PLUS
>> +         && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1))))
>> +       return true;
>> +    }
>> +
>> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_LD))
>> +    {
>> +      /* We are trying to match the following:
>> +          prev (lui)  == (set (reg:DI rD) (const_int UPPER_IMM_20))
>> +          curr (ld)  == (set (reg:DI rD)
>> +                             (mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */
>> +
>> +      if (CONST_INT_P (SET_SRC (prev_set))
>> +         && LUI_OPERAND (INTVAL (SET_SRC (prev_set)))
>> +         && MEM_P (SET_SRC (curr_set))
>> +         && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
>> +       return true;
>> +    }
>> +
>> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_LD))
>> +    {
>> +      /* We are trying to match the following:
>> +          prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
>> +          curr (ld)  == (set (reg:DI rD)
>> +                             (mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */
>> +
>> +      if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
>> +         && XINT (prev_set, 1) == UNSPEC_AUIPC
>> +         && MEM_P (SET_SRC (curr_set))
>> +         && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
>> +       return true;
>> +    }
>> +
>> +  return false;
>> +}
>> +
>>  /* Auxiliary function to emit RISC-V ELF attribute. */
>>  static void
>>  riscv_emit_attribute ()
>> @@ -5676,6 +5868,10 @@ riscv_asan_shadow_offset (void)
>>
>>  #undef TARGET_SCHED_ISSUE_RATE
>>  #define TARGET_SCHED_ISSUE_RATE riscv_issue_rate
>> +#undef TARGET_SCHED_MACRO_FUSION_P
>> +#define TARGET_SCHED_MACRO_FUSION_P riscv_macro_fusion_p
>> +#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
>> +#define TARGET_SCHED_MACRO_FUSION_PAIR_P riscv_macro_fusion_pair_p
>>
>>  #undef TARGET_FUNCTION_OK_FOR_SIBCALL
>>  #define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall
>> --
>> 2.32.0
>>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH v1 2/2] RISC-V: Add instruction fusion (for ventana-vt1)
  2021-11-17 19:40     ` Palmer Dabbelt
@ 2021-11-17 19:44       ` Philipp Tomsich
  0 siblings, 0 replies; 7+ messages in thread
From: Philipp Tomsich @ 2021-11-17 19:44 UTC (permalink / raw)
  To: Palmer Dabbelt; +Cc: Kito Cheng, wilson, gcc-patches

On Wed, 17 Nov 2021 at 20:40, Palmer Dabbelt <palmer@rivosinc.com> wrote:

> [This is my first time trying my Rivos address on the lists, so sorry if
> something goes off the rails.]
>
> On Wed, 17 Nov 2021 06:05:04 PST (-0800), gcc-patches@gcc.gnu.org wrote:
> > Hi Philipp:
> >
> > Thanks for the patch, I like this approach, that can easily configure
> > different capabilities for each core :)
> >
> > So there are only a few minor comments for this patch.
> >
> > On Mon, Nov 15, 2021 at 5:49 AM Philipp Tomsich
> > <philipp.tomsich@vrull.eu> wrote:
> >>
> >> From: Philipp Tomsich <philipp.tomsich@theobroma-systems.com>
> >>
> >> The Ventana VT1 core supports quad-issue and instruction fusion.
> >> This implemented TARGET_SCHED_MACRO_FUSION_P to keep fusible sequences
> >> together and adds idiom matcheing for the supported fusion cases.
>
> There's a typo at "matcheing".
>
> >>
> >> gcc/ChangeLog:
> >>
> >>         * config/riscv/riscv.c (enum riscv_fusion_pairs): Add symbolic
> >>         constants to identify supported fusion patterns.
> >>         (struct riscv_tune_param): Add fusible_op field.
> >>         (riscv_macro_fusion_p): Implement.
> >>         (riscv_fusion_enabled_p): Implement.
> >>         (riscv_macro_fusion_pair_p): Implement and recoginze fusible
> >>         idioms for Ventana VT1.
> >>         (TARGET_SCHED_MACRO_FUSION_P): Point to riscv_macro_fusion_p.
> >>         (TARGET_SCHED_MACRO_FUSION_PAIR_P): Point to
> riscv_macro_fusion_pair_p.
> >>
> >> Signed-off-by: Philipp Tomsich <philipp.tomsich@vrull.eu>
>
> This doesn't match the From (though admittedly I'm pretty new to the SoB
> stuff in GCC, so I'm not sure if that's even a rule here).
>

I noticed that I hadn't reset the authors and that patman had inserted a
Signed-off-by: for that reason, right after I sent this out.
Given that it's all me and there's both individual assignment paperwork and
company disclaimers on file for all of the email-addresses, this should be
fine.

>> ---
> >>
> >>  gcc/config/riscv/riscv.c | 196 +++++++++++++++++++++++++++++++++++++++
> >>  1 file changed, 196 insertions(+)
> >>
> >> diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
> >> index 6b918db65e9..8eac52101a3 100644
> >> --- a/gcc/config/riscv/riscv.c
> >> +++ b/gcc/config/riscv/riscv.c
> >> @@ -211,6 +211,19 @@ struct riscv_integer_op {
> >>     The worst case is LUI, ADDI, SLLI, ADDI, SLLI, ADDI, SLLI, ADDI.  */
> >>  #define RISCV_MAX_INTEGER_OPS 8
> >>
> >> +enum riscv_fusion_pairs
> >> +{
> >> +  RISCV_FUSE_NOTHING = 0,
> >> +  RISCV_FUSE_ZEXTW = (1 << 0),
> >> +  RISCV_FUSE_ZEXTH = (1 << 1),
> >> +  RISCV_FUSE_ZEXTWS = (1 << 2),
> >> +  RISCV_FUSE_LDINDEXED = (1 << 3),
> >
> > RISCV_FUSE_LDINDEXED -> RISCV_FUSE_LD_INDEXED
> >
> > Could you add some comment for above enums, like that:
> > /* slli rx, rx, 32 + srli rx, rx, 32 */
> > RISCV_FUSE_ZEXTW
> >
> > So that we could know what kind of instruction will be funded for this
> enum.
> >
> >> +  RISCV_FUSE_LUI_ADDI = (1 << 4),
> >> +  RISCV_FUSE_AUIPC_ADDI = (1 << 5),
> >> +  RISCV_FUSE_LUI_LD = (1 << 6),
> >> +  RISCV_FUSE_AUIPC_LD = (1 << 7),
> >> +};
> >> +
> >>  /* Costs of various operations on the different architectures.  */
> >>
> >>  struct riscv_tune_param
> >> @@ -224,6 +237,7 @@ struct riscv_tune_param
> >>    unsigned short branch_cost;
> >>    unsigned short memory_cost;
> >>    bool slow_unaligned_access;
> >> +  unsigned int fusible_ops;
> >>  };
> >>
> >>  /* Information about one micro-arch we know about.  */
> >> @@ -289,6 +303,7 @@ static const struct riscv_tune_param
> rocket_tune_info = {
> >>    3,                                           /* branch_cost */
> >>    5,                                           /* memory_cost */
> >>    true,                                                /*
> slow_unaligned_access */
> >> +  RISCV_FUSE_NOTHING,                           /* fusible_ops */
> >>  };
>
> There's some tab/space issues here (and in the below ones).  They align
> when merged, but the new lines are spaces-only and the old ones have
> internal spaces mixed with tabs (IIRC that's to the GCC style, if not we
> should fix these to at least be consistent).
>
> >>
> >>  /* Costs to use when optimizing for Sifive 7 Series.  */
> >> @@ -302,6 +317,7 @@ static const struct riscv_tune_param
> sifive_7_tune_info = {
> >>    4,                                           /* branch_cost */
> >>    3,                                           /* memory_cost */
> >>    true,                                                /*
> slow_unaligned_access */
> >> +  RISCV_FUSE_NOTHING,                           /* fusible_ops */
> >>  };
> >>
> >>  /* Costs to use when optimizing for T-HEAD c906.  */
> >> @@ -328,6 +344,7 @@ static const struct riscv_tune_param
> optimize_size_tune_info = {
> >>    1,                                           /* branch_cost */
> >>    2,                                           /* memory_cost */
> >>    false,                                       /*
> slow_unaligned_access */
> >> +  RISCV_FUSE_NOTHING,                           /* fusible_ops */
> >>  };
> >>
> >>  /* Costs to use when optimizing for Ventana Micro VT1.  */
> >> @@ -341,6 +358,10 @@ static const struct riscv_tune_param
> ventana_vt1_tune_info = {
> >>    4,                                           /* branch_cost */
> >>    5,                                           /* memory_cost */
> >>    false,                                       /*
> slow_unaligned_access */
> >> +  ( RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH |       /* fusible_ops */
> >> +    RISCV_FUSE_ZEXTWS | RISCV_FUSE_LDINDEXED |
> >> +    RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI |
> >> +    RISCV_FUSE_LUI_LD | RISCV_FUSE_AUIPC_LD )
> >>  };
> >>
> >>  static tree riscv_handle_fndecl_attribute (tree *, tree, tree, int,
> bool *);
> >> @@ -4909,6 +4930,177 @@ riscv_issue_rate (void)
> >>    return tune_param->issue_rate;
> >>  }
> >>
> >> +/* Implement TARGET_SCHED_MACRO_FUSION_P.  Return true if target
> supports
> >> +   instruction fusion of some sort.  */
> >> +
> >> +static bool
> >> +riscv_macro_fusion_p (void)
> >> +{
> >> +  return tune_param->fusible_ops != RISCV_FUSE_NOTHING;
> >> +}
> >> +
> >> +/* Return true iff the instruction fusion described by OP is enabled.
> */
> >> +
> >> +static bool
> >> +riscv_fusion_enabled_p(enum riscv_fusion_pairs op)
> >
> > space between function name and parentheses.
> >
> > riscv_fusion_enabled_p (enum riscv_fusion_pairs op)
> >
> >> +{
> >> +  return tune_param->fusible_ops & op;
> >> +}
> >> +
> >> +/* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P.  Return true if PREV
> and CURR
> >> +   should be kept together during scheduling.  */
> >> +
> >> +static bool
> >> +riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
> >> +{
> >> +  rtx prev_set = single_set (prev);
> >> +  rtx curr_set = single_set (curr);
> >> +  /* prev and curr are simple SET insns i.e. no flag setting or
> branching.  */
> >> +  bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
> >> +
> >> +  if (!riscv_macro_fusion_p ())
> >> +    return false;
> >> +
> >> +  if (simple_sets_p && (riscv_fusion_enabled_p (RISCV_FUSE_ZEXTW) ||
> >> +                       riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH)))
> >> +    {
> >> +      /* We are trying to match the following:
> >> +          prev (slli) == (set (reg:DI rD)
> >> +                              (ashift:DI (reg:DI rS) (const_int 32)))
> >> +          curr (slri) == (set (reg:DI rD)
>
> This really jumps out as the sort of thing that should be in RTL, but
> I'm not sure that's feasible.  The short branch->cmov optimization we're
> doing for the SiFive 7 series is sort of fusion and done in RTL, but the
> mechanism is very different there so it wouldn't apply directly.  There
> are other ports doing it this way so I'm not opposed to merging this
> as-is, but if there's a straight-forward way to port this to RTL then
> it's probably be a lot saner to maintain in the long term.  I can't come
> up with anything sane, though.
>
> On the subject of maintainability: I don't see any tests for this.
> Given that we're exceedingly likely to end up with other targets that
> have front-end fusion and thus try to share this code, it'd be really
> nice to have either some documentation of what fusions this core
> performs or some tests that exercise the important ones.  I can't find
> any other references to "Ventana VT1" on the internet, but LMK if
> there's something I'm missing.
>
> I'm not sure lacking tests should block merging this (as it's not like
> we have much optimization-based testing for the RISC-V port), but
> without at least some public documentation it's going to be hard to
> produce those outside of Ventana -- essentially I'm worried about ending
> up with the same black-box pipeline problem we have for the C906.
>
> I seem to remember having tests for the SiFive optimization, but I
> couldn't find any when I looked.  Maybe Jim or Kito remembers where to
> find them, otherwise I'll throw some together.
>
> >
> > slri -> srli
> >
> >> +                              (lshiftrt:DI (reg:DI rD) (const_int
> <shift>)))
> >> +        with <shift> being either 32 for FUSE_ZEXTW, or
> >> +                        `less than 32 for FUSE_ZEXTWS. */
> >
> > `less <-- I guess the ` is kind of an accident?
> >
> >> +
> >> +      if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
> >> +         && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
> >> +         && REG_P (SET_DEST (prev_set))
> >> +         && REG_P (SET_DEST (curr_set))
> >> +         && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
> >> +         && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST
> (curr_set))
> >> +         && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
> >> +         && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
> >> +         && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 32
> >> +         && (( INTVAL (XEXP (SET_SRC (curr_set), 1)) == 32
> >> +               && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTW) )
> >
> > space between function name and parentheses.
> >
> >> +             || ( INTVAL (XEXP (SET_SRC (curr_set), 1)) < 32
> >> +                  && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTWS))))
> >
> > space between function name and parentheses.
> >
> >> +       return true;
> >> +    }
> >> +
> >> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH))
> >> +    {
> >> +      /* We are trying to match the following:
> >> +          prev (slli) == (set (reg:DI rD)
> >> +                              (ashift:DI (reg:DI rS) (const_int 48)))
> >> +          curr (slri) == (set (reg:DI rD)
> >> +                              (lshiftrt:DI (reg:DI rD) (const_int
> 48))) */
> >
> > slri -> srli
> >
> >> +
> >> +      if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
> >> +         && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
> >> +         && REG_P (SET_DEST (prev_set))
> >> +         && REG_P (SET_DEST (curr_set))
> >> +         && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
> >> +         && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST
> (curr_set))
> >> +         && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
> >> +         && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
> >> +         && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 48
> >> +         && INTVAL (XEXP (SET_SRC (curr_set), 1)) == 48)
> >> +       return true;
> >> +    }
> >> +
> >> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDINDEXED))
> >> +    {
> >> +      /* We are trying to match the following:
> >> +          prev (add) == (set (reg:DI rD)
> >> +                             (plus:DI (reg:DI rS1) (reg:DI rS2))
> >> +          curr (ld)  == (set (reg:DI rD)
> >> +                             (mem:DI (reg:DI rD))) */
> >> +
> >> +      if (MEM_P (SET_SRC (curr_set))
> >> +         && REG_P (XEXP (SET_SRC (curr_set), 0))
> >> +         && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO (SET_DEST
> (prev_set))
> >> +         && GET_CODE (SET_SRC (prev_set)) == PLUS
> >> +         && REG_P (XEXP (SET_SRC (prev_set), 0))
> >> +         && REG_P (XEXP (SET_SRC (prev_set), 1)))
> >> +       return true;
> >> +
> >> +      /* We are trying to match the following:
> >> +          prev (add) == (set (reg:DI rD)
> >> +                             (plus:DI (reg:DI rS1) (reg:DI rS2)))
> >> +          curr (lw)  == (set (any_extend:DI (mem:SUBX (reg:DI rD)))) */
> >> +
> >> +      if ((GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND
> >> +          || (GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND))
> >> +         && MEM_P (XEXP (SET_SRC (curr_set), 0))
> >> +         && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
> >> +         && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == REGNO
> (SET_DEST (prev_set))
> >
> > This line is over 80 columns.
> >
> >
> >> +         && GET_CODE (SET_SRC (prev_set)) == PLUS
> >> +         && REG_P (XEXP (SET_SRC (prev_set), 0))
> >> +         && REG_P (XEXP (SET_SRC (prev_set), 1)))
> >> +       return true;
> >> +    }
> >> +
> >> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_ADDI))
> >> +    {
> >> +      /* We are trying to match the following:
> >> +          prev (lui)  == (set (reg:DI rD) (const_int UPPER_IMM_20))
> >> +          curr (addi) == (set (reg:DI rD)
> >> +                              (plus:DI (reg:DI rD) (const_int IMM12)))
> */
> >> +
> >> +      if (GET_CODE (SET_SRC (curr_set)) == PLUS
> >> +         && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
> >> +         && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1)))
> >> +         && CONST_INT_P (SET_SRC (prev_set))
> >> +         && LUI_OPERAND (INTVAL (SET_SRC (prev_set))))
> >> +       return true;
> >> +    }
> >> +
> >> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_ADDI))
> >> +    {
> >> +      /* We are trying to match the following:
> >> +          prev (auipc) == (set (reg:DI rD) (unspec:DI [...]
> UNSPEC_AUIPC))
> >> +          prev (addi)  == (set (reg:DI rD)
> >> +                               (plus:DI (reg:DI rD) (const_int
> IMM12))) */
> >> +
> >> +      if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
> >> +         && XINT (prev_set, 1) == UNSPEC_AUIPC
> >> +         && GET_CODE (SET_SRC (curr_set)) == PLUS
> >> +         && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1))))
> >> +       return true;
> >> +    }
> >> +
> >> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_LD))
> >> +    {
> >> +      /* We are trying to match the following:
> >> +          prev (lui)  == (set (reg:DI rD) (const_int UPPER_IMM_20))
> >> +          curr (ld)  == (set (reg:DI rD)
> >> +                             (mem:DI (plus:DI (reg:DI rD) (const_int
> IMM12)))) */
> >> +
> >> +      if (CONST_INT_P (SET_SRC (prev_set))
> >> +         && LUI_OPERAND (INTVAL (SET_SRC (prev_set)))
> >> +         && MEM_P (SET_SRC (curr_set))
> >> +         && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
> >> +       return true;
> >> +    }
> >> +
> >> +  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_LD))
> >> +    {
> >> +      /* We are trying to match the following:
> >> +          prev (auipc) == (set (reg:DI rD) (unspec:DI [...]
> UNSPEC_AUIPC))
> >> +          curr (ld)  == (set (reg:DI rD)
> >> +                             (mem:DI (plus:DI (reg:DI rD) (const_int
> IMM12)))) */
> >> +
> >> +      if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
> >> +         && XINT (prev_set, 1) == UNSPEC_AUIPC
> >> +         && MEM_P (SET_SRC (curr_set))
> >> +         && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
> >> +       return true;
> >> +    }
> >> +
> >> +  return false;
> >> +}
> >> +
> >>  /* Auxiliary function to emit RISC-V ELF attribute. */
> >>  static void
> >>  riscv_emit_attribute ()
> >> @@ -5676,6 +5868,10 @@ riscv_asan_shadow_offset (void)
> >>
> >>  #undef TARGET_SCHED_ISSUE_RATE
> >>  #define TARGET_SCHED_ISSUE_RATE riscv_issue_rate
> >> +#undef TARGET_SCHED_MACRO_FUSION_P
> >> +#define TARGET_SCHED_MACRO_FUSION_P riscv_macro_fusion_p
> >> +#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
> >> +#define TARGET_SCHED_MACRO_FUSION_PAIR_P riscv_macro_fusion_pair_p
> >>
> >>  #undef TARGET_FUNCTION_OK_FOR_SIBCALL
> >>  #define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall
> >> --
> >> 2.32.0
> >>
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2021-11-17 19:44 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-11-14 21:47 [PATCH v1 0/2] Basic support for the Ventana VT1 w/ instruction fusion Philipp Tomsich
2021-11-14 21:47 ` [PATCH v1 1/2] RISC-V: Add basic support for the Ventana-VT1 core Philipp Tomsich
2021-11-14 21:47 ` [PATCH v1 2/2] RISC-V: Add instruction fusion (for ventana-vt1) Philipp Tomsich
2021-11-17 14:05   ` Kito Cheng
2021-11-17 19:40     ` Palmer Dabbelt
2021-11-17 19:44       ` Philipp Tomsich
2021-11-17 14:05 ` [PATCH v1 0/2] Basic support for the Ventana VT1 w/ instruction fusion Kito Cheng

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).