public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Machine_Mode: Extend machine_mode from 8 to 16 bits
@ 2023-05-12  5:00 pan2.li
  2023-05-12  6:49 ` Richard Biener
                   ` (2 more replies)
  0 siblings, 3 replies; 33+ messages in thread
From: pan2.li @ 2023-05-12  5:00 UTC (permalink / raw)
  To: gcc-patches
  Cc: juzhe.zhong, kito.cheng, pan2.li, yanzhang.wang, jeffreyalaw,
	rguenther, richard.sandiford

From: Pan Li <pan2.li@intel.com>

We are running out of the machine_mode(8 bits) in RISC-V backend. Thus
we would like to extend the machine mode bit size from 8 to 16 bits.
However, it is sensitive to extend the memory size in common structure
like tree or rtx. This patch would like to extend the machine mode bits
to 16 bits by shrinking, like:

* Swap the bit size of code and machine code in rtx_def.
* Reconcile the machine_mode location and spare in tree.

The memory impact of this patch for correlated structure looks like below:

+-------------------+----------+---------+------+
| struct/bytes      | upstream | patched | diff |
+-------------------+----------+---------+------+
| rtx_obj_reference |        8 |      12 |   +4 |
| ext_modified      |        2 |       3 |   +1 |
| ira_allocno       |      192 |     200 |   +8 |
| qty_table_elem    |       40 |      40 |    0 |
| reg_stat_type     |       64 |      64 |    0 |
| rtx_def           |       40 |      40 |    0 |
| table_elt         |       80 |      80 |    0 |
| tree_decl_common  |      112 |     112 |    0 |
| tree_type_common  |      128 |     128 |    0 |
+-------------------+----------+---------+------+

The tree and rtx related struct has no memory changes after this patch,
and the machine_mode changes to 16 bits already.

Signed-off-by: Pan Li <pan2.li@intel.com>
Co-authored-by: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
Co-authored-by: Kito Cheng <kito.cheng@sifive.com>

gcc/ChangeLog:

	* combine.cc (struct reg_stat_type): Extended machine mode to 16 bits.
	* cse.cc (struct qty_table_elem): Ditto.
	(struct table_elt): Ditto.
	(struct set): Ditto.
	* genopinit.cc (main): Reconciled the machine mode limit.
	* ira-int.h (struct ira_allocno): Extended machine mode to 16 bits.
	* ree.cc (struct ATTRIBUTE_PACKED): Ditto.
	* rtl-ssa/accesses.h: Ditto.
	* rtl.h (RTX_CODE_BITSIZE): New macro.
	(RTX_MACHINE_MODE_BITSIZE): Ditto.
	(struct GTY): Swap bit size between code and machine mode.
	(subreg_shape::unique_id): Reconciled the machine mode limit.
	* rtlanal.h: Extended machine mode to 16 bits.
	* tree-core.h (struct tree_type_common): Ditto.
	(struct tree_decl_common): Reconciled the locate and extended
	bit size of machine mode.
---
 gcc/combine.cc         |  4 ++--
 gcc/cse.cc             |  8 ++++----
 gcc/genopinit.cc       |  3 ++-
 gcc/ira-int.h          | 12 ++++++++----
 gcc/ree.cc             |  2 +-
 gcc/rtl-ssa/accesses.h |  6 ++++--
 gcc/rtl.h              |  9 ++++++---
 gcc/rtlanal.h          |  5 +++--
 gcc/tree-core.h        | 11 ++++++++---
 9 files changed, 38 insertions(+), 22 deletions(-)

diff --git a/gcc/combine.cc b/gcc/combine.cc
index 5aa0ec5c45a..bdf6f635c80 100644
--- a/gcc/combine.cc
+++ b/gcc/combine.cc
@@ -200,7 +200,7 @@ struct reg_stat_type {
 
   unsigned HOST_WIDE_INT	last_set_nonzero_bits;
   char				last_set_sign_bit_copies;
-  ENUM_BITFIELD(machine_mode)	last_set_mode : 8;
+  ENUM_BITFIELD(machine_mode)	last_set_mode : RTX_MACHINE_MODE_BITSIZE;
 
   /* Set nonzero if references to register n in expressions should not be
      used.  last_set_invalid is set nonzero when this register is being
@@ -235,7 +235,7 @@ struct reg_stat_type {
      truncation if we know that value already contains a truncated
      value.  */
 
-  ENUM_BITFIELD(machine_mode)	truncated_to_mode : 8;
+  ENUM_BITFIELD(machine_mode)	truncated_to_mode : RTX_MACHINE_MODE_BITSIZE;
 };
 
 
diff --git a/gcc/cse.cc b/gcc/cse.cc
index b10c9b0c94d..fe594c1bc3d 100644
--- a/gcc/cse.cc
+++ b/gcc/cse.cc
@@ -250,8 +250,8 @@ struct qty_table_elem
   unsigned int first_reg, last_reg;
   /* The sizes of these fields should match the sizes of the
      code and mode fields of struct rtx_def (see rtl.h).  */
-  ENUM_BITFIELD(rtx_code) comparison_code : 16;
-  ENUM_BITFIELD(machine_mode) mode : 8;
+  ENUM_BITFIELD(rtx_code) comparison_code : RTX_CODE_BITSIZE;
+  ENUM_BITFIELD(machine_mode) mode : RTX_MACHINE_MODE_BITSIZE;
 };
 
 /* The table of all qtys, indexed by qty number.  */
@@ -406,7 +406,7 @@ struct table_elt
   int regcost;
   /* The size of this field should match the size
      of the mode field of struct rtx_def (see rtl.h).  */
-  ENUM_BITFIELD(machine_mode) mode : 8;
+  ENUM_BITFIELD(machine_mode) mode : RTX_MACHINE_MODE_BITSIZE;
   char in_memory;
   char is_const;
   char flag;
@@ -4155,7 +4155,7 @@ struct set
   /* Original machine mode, in case it becomes a CONST_INT.
      The size of this field should match the size of the mode
      field of struct rtx_def (see rtl.h).  */
-  ENUM_BITFIELD(machine_mode) mode : 8;
+  ENUM_BITFIELD(machine_mode) mode : RTX_MACHINE_MODE_BITSIZE;
   /* Hash value of constant equivalent for SET_SRC.  */
   unsigned src_const_hash;
   /* A constant equivalent for SET_SRC, if any.  */
diff --git a/gcc/genopinit.cc b/gcc/genopinit.cc
index 83cb7504fa1..2add8b925da 100644
--- a/gcc/genopinit.cc
+++ b/gcc/genopinit.cc
@@ -182,7 +182,8 @@ main (int argc, const char **argv)
 
   progname = "genopinit";
 
-  if (NUM_OPTABS > 0xffff || MAX_MACHINE_MODE >= 0xff)
+  if (NUM_OPTABS > 0xffff
+    || MAX_MACHINE_MODE >= ((1 << RTX_MACHINE_MODE_BITSIZE) - 1))
     fatal ("genopinit range assumptions invalid");
 
   if (!init_rtx_reader_args_cb (argc, argv, handle_arg))
diff --git a/gcc/ira-int.h b/gcc/ira-int.h
index e2de47213b4..124e14200b1 100644
--- a/gcc/ira-int.h
+++ b/gcc/ira-int.h
@@ -280,11 +280,15 @@ struct ira_allocno
   /* Regno for allocno or cap.  */
   int regno;
   /* Mode of the allocno which is the mode of the corresponding
-     pseudo-register.  */
-  ENUM_BITFIELD (machine_mode) mode : 8;
+     pseudo-register.  Note the bitsize of mode should be exactly
+     the same as the definition of rtx_def, aka RTX_MACHINE_MODE_BITSIZE
+     in rtl.h.  */
+  ENUM_BITFIELD (machine_mode) mode : 16;
   /* Widest mode of the allocno which in at least one case could be
-     for paradoxical subregs where wmode > mode.  */
-  ENUM_BITFIELD (machine_mode) wmode : 8;
+     for paradoxical subregs where wmode > mode.  Note the bitsize of
+     wmode should be exactly the same as the definition of rtx_def,
+     aka RTX_MACHINE_MODE_BITSIZE in rtl.h.  */
+  ENUM_BITFIELD (machine_mode) wmode : 16;
   /* Register class which should be used for allocation for given
      allocno.  NO_REGS means that we should use memory.  */
   ENUM_BITFIELD (reg_class) aclass : 16;
diff --git a/gcc/ree.cc b/gcc/ree.cc
index 413aec7c8eb..fb011bc907c 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -567,7 +567,7 @@ enum ext_modified_kind
 struct ATTRIBUTE_PACKED ext_modified
 {
   /* Mode from which ree has zero or sign extended the destination.  */
-  ENUM_BITFIELD(machine_mode) mode : 8;
+  ENUM_BITFIELD(machine_mode) mode : RTX_MACHINE_MODE_BITSIZE;
 
   /* Kind of modification of the insn.  */
   ENUM_BITFIELD(ext_modified_kind) kind : 2;
diff --git a/gcc/rtl-ssa/accesses.h b/gcc/rtl-ssa/accesses.h
index c5180b9308a..2e73004cafa 100644
--- a/gcc/rtl-ssa/accesses.h
+++ b/gcc/rtl-ssa/accesses.h
@@ -253,8 +253,10 @@ private:
   // Bits for future expansion.
   unsigned int m_spare : 2;
 
-  // The value returned by the accessor above.
-  machine_mode m_mode : 8;
+  // The value returned by the accessor above.  Note the bitsize of
+  // m_mode should be exactly the same as the definition of rtx_def,
+  // aka RTX_MACHINE_MODE_BITSIZE in rtl.h.
+  machine_mode m_mode : 16;
 };
 
 // A contiguous array of access_info pointers.  Used to represent a
diff --git a/gcc/rtl.h b/gcc/rtl.h
index f634cab730b..a18ecf7632f 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -63,6 +63,9 @@ enum rtx_code  {
 # define NON_GENERATOR_NUM_RTX_CODE ((int) MATCH_OPERAND)
 #endif
 
+#define RTX_CODE_BITSIZE 8
+#define RTX_MACHINE_MODE_BITSIZE 16
+
 /* Register Transfer Language EXPRESSIONS CODE CLASSES */
 
 enum rtx_class  {
@@ -310,10 +313,10 @@ struct GTY((desc("0"), tag("0"),
 	    chain_next ("RTX_NEXT (&%h)"),
 	    chain_prev ("RTX_PREV (&%h)"))) rtx_def {
   /* The kind of expression this is.  */
-  ENUM_BITFIELD(rtx_code) code: 16;
+  ENUM_BITFIELD(rtx_code) code: RTX_CODE_BITSIZE;
 
   /* The kind of value the expression has.  */
-  ENUM_BITFIELD(machine_mode) mode : 8;
+  ENUM_BITFIELD(machine_mode) mode : RTX_MACHINE_MODE_BITSIZE;
 
   /* 1 in a MEM if we should keep the alias set for this mem unchanged
      when we access a component.
@@ -2164,7 +2167,7 @@ subreg_shape::operator != (const subreg_shape &other) const
 inline unsigned HOST_WIDE_INT
 subreg_shape::unique_id () const
 {
-  { STATIC_ASSERT (MAX_MACHINE_MODE <= 256); }
+  { STATIC_ASSERT (MAX_MACHINE_MODE <= (1 << RTX_MACHINE_MODE_BITSIZE)); }
   { STATIC_ASSERT (NUM_POLY_INT_COEFFS <= 3); }
   { STATIC_ASSERT (sizeof (offset.coeffs[0]) <= 2); }
   int res = (int) inner_mode + ((int) outer_mode << 8);
diff --git a/gcc/rtlanal.h b/gcc/rtlanal.h
index 5fbed816e20..15aba0dec7a 100644
--- a/gcc/rtlanal.h
+++ b/gcc/rtlanal.h
@@ -99,8 +99,9 @@ public:
   unsigned int flags : 16;
 
   /* The mode of the reference.  If IS_MULTIREG, this is the mode of
-     REGNO - MULTIREG_OFFSET.  */
-  machine_mode mode : 8;
+     REGNO - MULTIREG_OFFSET.  Note the bitsize of mode should be exactly
+     the same as the definition of rtx_def,  */
+  machine_mode mode : 16;
 
   /* If IS_MULTIREG, the offset of REGNO from the start of the register.  */
   unsigned int multireg_offset : 8;
diff --git a/gcc/tree-core.h b/gcc/tree-core.h
index a1aea136e75..001d232f433 100644
--- a/gcc/tree-core.h
+++ b/gcc/tree-core.h
@@ -1680,8 +1680,11 @@ struct GTY(()) tree_type_common {
   tree attributes;
   unsigned int uid;
 
+  /* Note the bitsize of wmode should be exactly the same as the
+     definition of rtx_def,  aka RTX_MACHINE_MODE_BITSIZE in rtl.h.  */
+  ENUM_BITFIELD(machine_mode) mode : 16;
+
   unsigned int precision : 16;
-  ENUM_BITFIELD(machine_mode) mode : 8;
   unsigned lang_flag_0 : 1;
   unsigned lang_flag_1 : 1;
   unsigned lang_flag_2 : 1;
@@ -1712,7 +1715,7 @@ struct GTY(()) tree_type_common {
   unsigned empty_flag : 1;
   unsigned indivisible_p : 1;
   unsigned no_named_args_stdarg_p : 1;
-  unsigned spare : 9;
+  unsigned spare : 1;
 
   alias_set_type alias_set;
   tree pointer_to;
@@ -1770,7 +1773,9 @@ struct GTY(()) tree_decl_common {
   struct tree_decl_minimal common;
   tree size;
 
-  ENUM_BITFIELD(machine_mode) mode : 8;
+  /* Note the bitsize of wmode should be exactly the same as the
+     definition of rtx_def,  aka RTX_MACHINE_MODE_BITSIZE in rtl.h.  */
+  ENUM_BITFIELD(machine_mode) mode : 16;
 
   unsigned nonlocal_flag : 1;
   unsigned virtual_flag : 1;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Machine_Mode: Extend machine_mode from 8 to 16 bits
  2023-05-12  5:00 [PATCH] Machine_Mode: Extend machine_mode from 8 to 16 bits pan2.li
@ 2023-05-12  6:49 ` Richard Biener
  2023-05-12 19:14   ` Bernhard Reutner-Fischer
  2023-05-12  8:24 ` [PATCH] Machine_Mode: Extend machine_mode from 8 to 16 bits Richard Sandiford
  2023-05-16 15:35 ` pan2.li
  2 siblings, 1 reply; 33+ messages in thread
From: Richard Biener @ 2023-05-12  6:49 UTC (permalink / raw)
  To: pan2.li
  Cc: gcc-patches, juzhe.zhong, kito.cheng, yanzhang.wang, jeffreyalaw,
	richard.sandiford

On Fri, 12 May 2023, pan2.li@intel.com wrote:

> From: Pan Li <pan2.li@intel.com>
> 
> We are running out of the machine_mode(8 bits) in RISC-V backend. Thus
> we would like to extend the machine mode bit size from 8 to 16 bits.
> However, it is sensitive to extend the memory size in common structure
> like tree or rtx. This patch would like to extend the machine mode bits
> to 16 bits by shrinking, like:
> 
> * Swap the bit size of code and machine code in rtx_def.
> * Reconcile the machine_mode location and spare in tree.
> 
> The memory impact of this patch for correlated structure looks like below:
> 
> +-------------------+----------+---------+------+
> | struct/bytes      | upstream | patched | diff |
> +-------------------+----------+---------+------+
> | rtx_obj_reference |        8 |      12 |   +4 |
> | ext_modified      |        2 |       3 |   +1 |

this struct is packed and we have an array of it - it _might_ be
bad to have elements of size 3 here.  Size 4 shouldn't be too
bad so I suggest to remove the packed attribute there.

> | ira_allocno       |      192 |     200 |   +8 |

that looks unfortunate - did you check if there's now
padding that could be used by re-ordering fields?

> | qty_table_elem    |       40 |      40 |    0 |
> | reg_stat_type     |       64 |      64 |    0 |
> | rtx_def           |       40 |      40 |    0 |
> | table_elt         |       80 |      80 |    0 |
> | tree_decl_common  |      112 |     112 |    0 |
> | tree_type_common  |      128 |     128 |    0 |
> +-------------------+----------+---------+------+
> 
> The tree and rtx related struct has no memory changes after this patch,
> and the machine_mode changes to 16 bits already.
> 
> Signed-off-by: Pan Li <pan2.li@intel.com>
> Co-authored-by: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
> Co-authored-by: Kito Cheng <kito.cheng@sifive.com>
> 
> gcc/ChangeLog:
> 
> 	* combine.cc (struct reg_stat_type): Extended machine mode to 16 bits.
> 	* cse.cc (struct qty_table_elem): Ditto.
> 	(struct table_elt): Ditto.
> 	(struct set): Ditto.
> 	* genopinit.cc (main): Reconciled the machine mode limit.
> 	* ira-int.h (struct ira_allocno): Extended machine mode to 16 bits.
> 	* ree.cc (struct ATTRIBUTE_PACKED): Ditto.

please go over the ChangeLog and properly specify the structure types
altered.  The script generating the changelog isn't perfect.

Richard.

> 	* rtl-ssa/accesses.h: Ditto.
> 	* rtl.h (RTX_CODE_BITSIZE): New macro.
> 	(RTX_MACHINE_MODE_BITSIZE): Ditto.
> 	(struct GTY): Swap bit size between code and machine mode.
> 	(subreg_shape::unique_id): Reconciled the machine mode limit.
> 	* rtlanal.h: Extended machine mode to 16 bits.
> 	* tree-core.h (struct tree_type_common): Ditto.
> 	(struct tree_decl_common): Reconciled the locate and extended
> 	bit size of machine mode.
> ---
>  gcc/combine.cc         |  4 ++--
>  gcc/cse.cc             |  8 ++++----
>  gcc/genopinit.cc       |  3 ++-
>  gcc/ira-int.h          | 12 ++++++++----
>  gcc/ree.cc             |  2 +-
>  gcc/rtl-ssa/accesses.h |  6 ++++--
>  gcc/rtl.h              |  9 ++++++---
>  gcc/rtlanal.h          |  5 +++--
>  gcc/tree-core.h        | 11 ++++++++---
>  9 files changed, 38 insertions(+), 22 deletions(-)
> 
> diff --git a/gcc/combine.cc b/gcc/combine.cc
> index 5aa0ec5c45a..bdf6f635c80 100644
> --- a/gcc/combine.cc
> +++ b/gcc/combine.cc
> @@ -200,7 +200,7 @@ struct reg_stat_type {
>  
>    unsigned HOST_WIDE_INT	last_set_nonzero_bits;
>    char				last_set_sign_bit_copies;
> -  ENUM_BITFIELD(machine_mode)	last_set_mode : 8;
> +  ENUM_BITFIELD(machine_mode)	last_set_mode : RTX_MACHINE_MODE_BITSIZE;
>  
>    /* Set nonzero if references to register n in expressions should not be
>       used.  last_set_invalid is set nonzero when this register is being
> @@ -235,7 +235,7 @@ struct reg_stat_type {
>       truncation if we know that value already contains a truncated
>       value.  */
>  
> -  ENUM_BITFIELD(machine_mode)	truncated_to_mode : 8;
> +  ENUM_BITFIELD(machine_mode)	truncated_to_mode : RTX_MACHINE_MODE_BITSIZE;
>  };
>  
>  
> diff --git a/gcc/cse.cc b/gcc/cse.cc
> index b10c9b0c94d..fe594c1bc3d 100644
> --- a/gcc/cse.cc
> +++ b/gcc/cse.cc
> @@ -250,8 +250,8 @@ struct qty_table_elem
>    unsigned int first_reg, last_reg;
>    /* The sizes of these fields should match the sizes of the
>       code and mode fields of struct rtx_def (see rtl.h).  */
> -  ENUM_BITFIELD(rtx_code) comparison_code : 16;
> -  ENUM_BITFIELD(machine_mode) mode : 8;
> +  ENUM_BITFIELD(rtx_code) comparison_code : RTX_CODE_BITSIZE;
> +  ENUM_BITFIELD(machine_mode) mode : RTX_MACHINE_MODE_BITSIZE;
>  };
>  
>  /* The table of all qtys, indexed by qty number.  */
> @@ -406,7 +406,7 @@ struct table_elt
>    int regcost;
>    /* The size of this field should match the size
>       of the mode field of struct rtx_def (see rtl.h).  */
> -  ENUM_BITFIELD(machine_mode) mode : 8;
> +  ENUM_BITFIELD(machine_mode) mode : RTX_MACHINE_MODE_BITSIZE;
>    char in_memory;
>    char is_const;
>    char flag;
> @@ -4155,7 +4155,7 @@ struct set
>    /* Original machine mode, in case it becomes a CONST_INT.
>       The size of this field should match the size of the mode
>       field of struct rtx_def (see rtl.h).  */
> -  ENUM_BITFIELD(machine_mode) mode : 8;
> +  ENUM_BITFIELD(machine_mode) mode : RTX_MACHINE_MODE_BITSIZE;
>    /* Hash value of constant equivalent for SET_SRC.  */
>    unsigned src_const_hash;
>    /* A constant equivalent for SET_SRC, if any.  */
> diff --git a/gcc/genopinit.cc b/gcc/genopinit.cc
> index 83cb7504fa1..2add8b925da 100644
> --- a/gcc/genopinit.cc
> +++ b/gcc/genopinit.cc
> @@ -182,7 +182,8 @@ main (int argc, const char **argv)
>  
>    progname = "genopinit";
>  
> -  if (NUM_OPTABS > 0xffff || MAX_MACHINE_MODE >= 0xff)
> +  if (NUM_OPTABS > 0xffff
> +    || MAX_MACHINE_MODE >= ((1 << RTX_MACHINE_MODE_BITSIZE) - 1))
>      fatal ("genopinit range assumptions invalid");
>  
>    if (!init_rtx_reader_args_cb (argc, argv, handle_arg))
> diff --git a/gcc/ira-int.h b/gcc/ira-int.h
> index e2de47213b4..124e14200b1 100644
> --- a/gcc/ira-int.h
> +++ b/gcc/ira-int.h
> @@ -280,11 +280,15 @@ struct ira_allocno
>    /* Regno for allocno or cap.  */
>    int regno;
>    /* Mode of the allocno which is the mode of the corresponding
> -     pseudo-register.  */
> -  ENUM_BITFIELD (machine_mode) mode : 8;
> +     pseudo-register.  Note the bitsize of mode should be exactly
> +     the same as the definition of rtx_def, aka RTX_MACHINE_MODE_BITSIZE
> +     in rtl.h.  */
> +  ENUM_BITFIELD (machine_mode) mode : 16;
>    /* Widest mode of the allocno which in at least one case could be
> -     for paradoxical subregs where wmode > mode.  */
> -  ENUM_BITFIELD (machine_mode) wmode : 8;
> +     for paradoxical subregs where wmode > mode.  Note the bitsize of
> +     wmode should be exactly the same as the definition of rtx_def,
> +     aka RTX_MACHINE_MODE_BITSIZE in rtl.h.  */
> +  ENUM_BITFIELD (machine_mode) wmode : 16;
>    /* Register class which should be used for allocation for given
>       allocno.  NO_REGS means that we should use memory.  */
>    ENUM_BITFIELD (reg_class) aclass : 16;
> diff --git a/gcc/ree.cc b/gcc/ree.cc
> index 413aec7c8eb..fb011bc907c 100644
> --- a/gcc/ree.cc
> +++ b/gcc/ree.cc
> @@ -567,7 +567,7 @@ enum ext_modified_kind
>  struct ATTRIBUTE_PACKED ext_modified
>  {
>    /* Mode from which ree has zero or sign extended the destination.  */
> -  ENUM_BITFIELD(machine_mode) mode : 8;
> +  ENUM_BITFIELD(machine_mode) mode : RTX_MACHINE_MODE_BITSIZE;
>  
>    /* Kind of modification of the insn.  */
>    ENUM_BITFIELD(ext_modified_kind) kind : 2;
> diff --git a/gcc/rtl-ssa/accesses.h b/gcc/rtl-ssa/accesses.h
> index c5180b9308a..2e73004cafa 100644
> --- a/gcc/rtl-ssa/accesses.h
> +++ b/gcc/rtl-ssa/accesses.h
> @@ -253,8 +253,10 @@ private:
>    // Bits for future expansion.
>    unsigned int m_spare : 2;
>  
> -  // The value returned by the accessor above.
> -  machine_mode m_mode : 8;
> +  // The value returned by the accessor above.  Note the bitsize of
> +  // m_mode should be exactly the same as the definition of rtx_def,
> +  // aka RTX_MACHINE_MODE_BITSIZE in rtl.h.
> +  machine_mode m_mode : 16;
>  };
>  
>  // A contiguous array of access_info pointers.  Used to represent a
> diff --git a/gcc/rtl.h b/gcc/rtl.h
> index f634cab730b..a18ecf7632f 100644
> --- a/gcc/rtl.h
> +++ b/gcc/rtl.h
> @@ -63,6 +63,9 @@ enum rtx_code  {
>  # define NON_GENERATOR_NUM_RTX_CODE ((int) MATCH_OPERAND)
>  #endif
>  
> +#define RTX_CODE_BITSIZE 8
> +#define RTX_MACHINE_MODE_BITSIZE 16
> +
>  /* Register Transfer Language EXPRESSIONS CODE CLASSES */
>  
>  enum rtx_class  {
> @@ -310,10 +313,10 @@ struct GTY((desc("0"), tag("0"),
>  	    chain_next ("RTX_NEXT (&%h)"),
>  	    chain_prev ("RTX_PREV (&%h)"))) rtx_def {
>    /* The kind of expression this is.  */
> -  ENUM_BITFIELD(rtx_code) code: 16;
> +  ENUM_BITFIELD(rtx_code) code: RTX_CODE_BITSIZE;
>  
>    /* The kind of value the expression has.  */
> -  ENUM_BITFIELD(machine_mode) mode : 8;
> +  ENUM_BITFIELD(machine_mode) mode : RTX_MACHINE_MODE_BITSIZE;
>  
>    /* 1 in a MEM if we should keep the alias set for this mem unchanged
>       when we access a component.
> @@ -2164,7 +2167,7 @@ subreg_shape::operator != (const subreg_shape &other) const
>  inline unsigned HOST_WIDE_INT
>  subreg_shape::unique_id () const
>  {
> -  { STATIC_ASSERT (MAX_MACHINE_MODE <= 256); }
> +  { STATIC_ASSERT (MAX_MACHINE_MODE <= (1 << RTX_MACHINE_MODE_BITSIZE)); }
>    { STATIC_ASSERT (NUM_POLY_INT_COEFFS <= 3); }
>    { STATIC_ASSERT (sizeof (offset.coeffs[0]) <= 2); }
>    int res = (int) inner_mode + ((int) outer_mode << 8);
> diff --git a/gcc/rtlanal.h b/gcc/rtlanal.h
> index 5fbed816e20..15aba0dec7a 100644
> --- a/gcc/rtlanal.h
> +++ b/gcc/rtlanal.h
> @@ -99,8 +99,9 @@ public:
>    unsigned int flags : 16;
>  
>    /* The mode of the reference.  If IS_MULTIREG, this is the mode of
> -     REGNO - MULTIREG_OFFSET.  */
> -  machine_mode mode : 8;
> +     REGNO - MULTIREG_OFFSET.  Note the bitsize of mode should be exactly
> +     the same as the definition of rtx_def,  */
> +  machine_mode mode : 16;
>  
>    /* If IS_MULTIREG, the offset of REGNO from the start of the register.  */
>    unsigned int multireg_offset : 8;
> diff --git a/gcc/tree-core.h b/gcc/tree-core.h
> index a1aea136e75..001d232f433 100644
> --- a/gcc/tree-core.h
> +++ b/gcc/tree-core.h
> @@ -1680,8 +1680,11 @@ struct GTY(()) tree_type_common {
>    tree attributes;
>    unsigned int uid;
>  
> +  /* Note the bitsize of wmode should be exactly the same as the
> +     definition of rtx_def,  aka RTX_MACHINE_MODE_BITSIZE in rtl.h.  */
> +  ENUM_BITFIELD(machine_mode) mode : 16;
> +
>    unsigned int precision : 16;
> -  ENUM_BITFIELD(machine_mode) mode : 8;
>    unsigned lang_flag_0 : 1;
>    unsigned lang_flag_1 : 1;
>    unsigned lang_flag_2 : 1;
> @@ -1712,7 +1715,7 @@ struct GTY(()) tree_type_common {
>    unsigned empty_flag : 1;
>    unsigned indivisible_p : 1;
>    unsigned no_named_args_stdarg_p : 1;
> -  unsigned spare : 9;
> +  unsigned spare : 1;
>  
>    alias_set_type alias_set;
>    tree pointer_to;
> @@ -1770,7 +1773,9 @@ struct GTY(()) tree_decl_common {
>    struct tree_decl_minimal common;
>    tree size;
>  
> -  ENUM_BITFIELD(machine_mode) mode : 8;
> +  /* Note the bitsize of wmode should be exactly the same as the
> +     definition of rtx_def,  aka RTX_MACHINE_MODE_BITSIZE in rtl.h.  */
> +  ENUM_BITFIELD(machine_mode) mode : 16;
>  
>    unsigned nonlocal_flag : 1;
>    unsigned virtual_flag : 1;
> 

-- 
Richard Biener <rguenther@suse.de>
SUSE Software Solutions Germany GmbH, Frankenstrasse 146, 90461 Nuernberg,
Germany; GF: Ivo Totev, Andrew Myers, Andrew McDonald, Boudien Moerman;
HRB 36809 (AG Nuernberg)

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Machine_Mode: Extend machine_mode from 8 to 16 bits
  2023-05-12  5:00 [PATCH] Machine_Mode: Extend machine_mode from 8 to 16 bits pan2.li
  2023-05-12  6:49 ` Richard Biener
@ 2023-05-12  8:24 ` Richard Sandiford
  2023-05-12 11:16   ` Li, Pan2
  2023-05-16 15:35 ` pan2.li
  2 siblings, 1 reply; 33+ messages in thread
From: Richard Sandiford @ 2023-05-12  8:24 UTC (permalink / raw)
  To: pan2.li
  Cc: gcc-patches, juzhe.zhong, kito.cheng, yanzhang.wang, jeffreyalaw,
	rguenther

pan2.li@intel.com writes:
> From: Pan Li <pan2.li@intel.com>
>
> We are running out of the machine_mode(8 bits) in RISC-V backend. Thus
> we would like to extend the machine mode bit size from 8 to 16 bits.
> However, it is sensitive to extend the memory size in common structure
> like tree or rtx. This patch would like to extend the machine mode bits
> to 16 bits by shrinking, like:
>
> * Swap the bit size of code and machine code in rtx_def.
> * Reconcile the machine_mode location and spare in tree.
>
> The memory impact of this patch for correlated structure looks like below:
>
> +-------------------+----------+---------+------+
> | struct/bytes      | upstream | patched | diff |
> +-------------------+----------+---------+------+
> | rtx_obj_reference |        8 |      12 |   +4 |
> | ext_modified      |        2 |       3 |   +1 |
> | ira_allocno       |      192 |     200 |   +8 |
> | qty_table_elem    |       40 |      40 |    0 |
> | reg_stat_type     |       64 |      64 |    0 |
> | rtx_def           |       40 |      40 |    0 |
> | table_elt         |       80 |      80 |    0 |
> | tree_decl_common  |      112 |     112 |    0 |
> | tree_type_common  |      128 |     128 |    0 |
> +-------------------+----------+---------+------+
>
> The tree and rtx related struct has no memory changes after this patch,
> and the machine_mode changes to 16 bits already.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> Co-authored-by: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
> Co-authored-by: Kito Cheng <kito.cheng@sifive.com>
>
> gcc/ChangeLog:
>
> 	* combine.cc (struct reg_stat_type): Extended machine mode to 16 bits.
> 	* cse.cc (struct qty_table_elem): Ditto.
> 	(struct table_elt): Ditto.
> 	(struct set): Ditto.
> 	* genopinit.cc (main): Reconciled the machine mode limit.
> 	* ira-int.h (struct ira_allocno): Extended machine mode to 16 bits.
> 	* ree.cc (struct ATTRIBUTE_PACKED): Ditto.
> 	* rtl-ssa/accesses.h: Ditto.
> 	* rtl.h (RTX_CODE_BITSIZE): New macro.
> 	(RTX_MACHINE_MODE_BITSIZE): Ditto.
> 	(struct GTY): Swap bit size between code and machine mode.
> 	(subreg_shape::unique_id): Reconciled the machine mode limit.
> 	* rtlanal.h: Extended machine mode to 16 bits.
> 	* tree-core.h (struct tree_type_common): Ditto.
> 	(struct tree_decl_common): Reconciled the locate and extended
> 	bit size of machine mode.
> ---
>  gcc/combine.cc         |  4 ++--
>  gcc/cse.cc             |  8 ++++----
>  gcc/genopinit.cc       |  3 ++-
>  gcc/ira-int.h          | 12 ++++++++----
>  gcc/ree.cc             |  2 +-
>  gcc/rtl-ssa/accesses.h |  6 ++++--
>  gcc/rtl.h              |  9 ++++++---
>  gcc/rtlanal.h          |  5 +++--
>  gcc/tree-core.h        | 11 ++++++++---
>  9 files changed, 38 insertions(+), 22 deletions(-)
>
> diff --git a/gcc/combine.cc b/gcc/combine.cc
> index 5aa0ec5c45a..bdf6f635c80 100644
> --- a/gcc/combine.cc
> +++ b/gcc/combine.cc
> @@ -200,7 +200,7 @@ struct reg_stat_type {
>  
>    unsigned HOST_WIDE_INT	last_set_nonzero_bits;
>    char				last_set_sign_bit_copies;
> -  ENUM_BITFIELD(machine_mode)	last_set_mode : 8;
> +  ENUM_BITFIELD(machine_mode)	last_set_mode : RTX_MACHINE_MODE_BITSIZE;
>  
>    /* Set nonzero if references to register n in expressions should not be
>       used.  last_set_invalid is set nonzero when this register is being
> @@ -235,7 +235,7 @@ struct reg_stat_type {
>       truncation if we know that value already contains a truncated
>       value.  */
>  
> -  ENUM_BITFIELD(machine_mode)	truncated_to_mode : 8;
> +  ENUM_BITFIELD(machine_mode)	truncated_to_mode : RTX_MACHINE_MODE_BITSIZE;
>  };
>  
>  
> diff --git a/gcc/cse.cc b/gcc/cse.cc
> index b10c9b0c94d..fe594c1bc3d 100644
> --- a/gcc/cse.cc
> +++ b/gcc/cse.cc
> @@ -250,8 +250,8 @@ struct qty_table_elem
>    unsigned int first_reg, last_reg;
>    /* The sizes of these fields should match the sizes of the
>       code and mode fields of struct rtx_def (see rtl.h).  */

The comment can be removed, since you're now adding macros to ensure
this (thanks).  Same for other instances of the comment.

> -  ENUM_BITFIELD(rtx_code) comparison_code : 16;
> -  ENUM_BITFIELD(machine_mode) mode : 8;
> +  ENUM_BITFIELD(rtx_code) comparison_code : RTX_CODE_BITSIZE;
> +  ENUM_BITFIELD(machine_mode) mode : RTX_MACHINE_MODE_BITSIZE;

Please put the mode first, so that the 16-bit value is aligned
to 16 bits.

>  };
>  
>  /* The table of all qtys, indexed by qty number.  */
> @@ -406,7 +406,7 @@ struct table_elt
>    int regcost;
>    /* The size of this field should match the size
>       of the mode field of struct rtx_def (see rtl.h).  */
> -  ENUM_BITFIELD(machine_mode) mode : 8;
> +  ENUM_BITFIELD(machine_mode) mode : RTX_MACHINE_MODE_BITSIZE;
>    char in_memory;
>    char is_const;
>    char flag;
> @@ -4155,7 +4155,7 @@ struct set
>    /* Original machine mode, in case it becomes a CONST_INT.
>       The size of this field should match the size of the mode
>       field of struct rtx_def (see rtl.h).  */
> -  ENUM_BITFIELD(machine_mode) mode : 8;
> +  ENUM_BITFIELD(machine_mode) mode : RTX_MACHINE_MODE_BITSIZE;
>    /* Hash value of constant equivalent for SET_SRC.  */
>    unsigned src_const_hash;
>    /* A constant equivalent for SET_SRC, if any.  */
> diff --git a/gcc/genopinit.cc b/gcc/genopinit.cc
> index 83cb7504fa1..2add8b925da 100644
> --- a/gcc/genopinit.cc
> +++ b/gcc/genopinit.cc
> @@ -182,7 +182,8 @@ main (int argc, const char **argv)
>  
>    progname = "genopinit";
>  
> -  if (NUM_OPTABS > 0xffff || MAX_MACHINE_MODE >= 0xff)
> +  if (NUM_OPTABS > 0xffff
> +    || MAX_MACHINE_MODE >= ((1 << RTX_MACHINE_MODE_BITSIZE) - 1))
>      fatal ("genopinit range assumptions invalid");
>  
>    if (!init_rtx_reader_args_cb (argc, argv, handle_arg))
> diff --git a/gcc/ira-int.h b/gcc/ira-int.h
> index e2de47213b4..124e14200b1 100644
> --- a/gcc/ira-int.h
> +++ b/gcc/ira-int.h
> @@ -280,11 +280,15 @@ struct ira_allocno
>    /* Regno for allocno or cap.  */
>    int regno;
>    /* Mode of the allocno which is the mode of the corresponding
> -     pseudo-register.  */
> -  ENUM_BITFIELD (machine_mode) mode : 8;
> +     pseudo-register.  Note the bitsize of mode should be exactly
> +     the same as the definition of rtx_def, aka RTX_MACHINE_MODE_BITSIZE
> +     in rtl.h.  */
> +  ENUM_BITFIELD (machine_mode) mode : 16;
>    /* Widest mode of the allocno which in at least one case could be
> -     for paradoxical subregs where wmode > mode.  */
> -  ENUM_BITFIELD (machine_mode) wmode : 8;
> +     for paradoxical subregs where wmode > mode.  Note the bitsize of
> +     wmode should be exactly the same as the definition of rtx_def,
> +     aka RTX_MACHINE_MODE_BITSIZE in rtl.h.  */
> +  ENUM_BITFIELD (machine_mode) wmode : 16;
>    /* Register class which should be used for allocation for given
>       allocno.  NO_REGS means that we should use memory.  */
>    ENUM_BITFIELD (reg_class) aclass : 16;

Why not use the BITSIZE macros directly?  Any reasonable use of
ira-int.h will also need rtl.h.  If something includes ira-int.h
before rtl.h then we should change that.

To avoid growing this structure, please move something into one of
the holes later in the structure.  E.g. hard_regno could go before
num_objects.

> diff --git a/gcc/ree.cc b/gcc/ree.cc
> index 413aec7c8eb..fb011bc907c 100644
> --- a/gcc/ree.cc
> +++ b/gcc/ree.cc
> @@ -567,7 +567,7 @@ enum ext_modified_kind
>  struct ATTRIBUTE_PACKED ext_modified
>  {
>    /* Mode from which ree has zero or sign extended the destination.  */
> -  ENUM_BITFIELD(machine_mode) mode : 8;
> +  ENUM_BITFIELD(machine_mode) mode : RTX_MACHINE_MODE_BITSIZE;
>  
>    /* Kind of modification of the insn.  */
>    ENUM_BITFIELD(ext_modified_kind) kind : 2;
> diff --git a/gcc/rtl-ssa/accesses.h b/gcc/rtl-ssa/accesses.h
> index c5180b9308a..2e73004cafa 100644
> --- a/gcc/rtl-ssa/accesses.h
> +++ b/gcc/rtl-ssa/accesses.h
> @@ -253,8 +253,10 @@ private:
>    // Bits for future expansion.
>    unsigned int m_spare : 2;
>  
> -  // The value returned by the accessor above.
> -  machine_mode m_mode : 8;
> +  // The value returned by the accessor above.  Note the bitsize of
> +  // m_mode should be exactly the same as the definition of rtx_def,
> +  // aka RTX_MACHINE_MODE_BITSIZE in rtl.h.
> +  machine_mode m_mode : 16;
>  };
>  
>  // A contiguous array of access_info pointers.  Used to represent a
> diff --git a/gcc/rtl.h b/gcc/rtl.h
> index f634cab730b..a18ecf7632f 100644
> --- a/gcc/rtl.h
> +++ b/gcc/rtl.h
> @@ -63,6 +63,9 @@ enum rtx_code  {
>  # define NON_GENERATOR_NUM_RTX_CODE ((int) MATCH_OPERAND)
>  #endif
>  
> +#define RTX_CODE_BITSIZE 8
> +#define RTX_MACHINE_MODE_BITSIZE 16
> +
>  /* Register Transfer Language EXPRESSIONS CODE CLASSES */
>  
>  enum rtx_class  {
> @@ -310,10 +313,10 @@ struct GTY((desc("0"), tag("0"),
>  	    chain_next ("RTX_NEXT (&%h)"),
>  	    chain_prev ("RTX_PREV (&%h)"))) rtx_def {
>    /* The kind of expression this is.  */
> -  ENUM_BITFIELD(rtx_code) code: 16;
> +  ENUM_BITFIELD(rtx_code) code: RTX_CODE_BITSIZE;
>  
>    /* The kind of value the expression has.  */
> -  ENUM_BITFIELD(machine_mode) mode : 8;
> +  ENUM_BITFIELD(machine_mode) mode : RTX_MACHINE_MODE_BITSIZE;

Please swap the fields around, as discussed previously.

>  
>    /* 1 in a MEM if we should keep the alias set for this mem unchanged
>       when we access a component.
> @@ -2164,7 +2167,7 @@ subreg_shape::operator != (const subreg_shape &other) const
>  inline unsigned HOST_WIDE_INT
>  subreg_shape::unique_id () const
>  {
> -  { STATIC_ASSERT (MAX_MACHINE_MODE <= 256); }
> +  { STATIC_ASSERT (MAX_MACHINE_MODE <= (1 << RTX_MACHINE_MODE_BITSIZE)); }
>    { STATIC_ASSERT (NUM_POLY_INT_COEFFS <= 3); }
>    { STATIC_ASSERT (sizeof (offset.coeffs[0]) <= 2); }
>    int res = (int) inner_mode + ((int) outer_mode << 8);
> diff --git a/gcc/rtlanal.h b/gcc/rtlanal.h
> index 5fbed816e20..15aba0dec7a 100644
> --- a/gcc/rtlanal.h
> +++ b/gcc/rtlanal.h
> @@ -99,8 +99,9 @@ public:
>    unsigned int flags : 16;
>  
>    /* The mode of the reference.  If IS_MULTIREG, this is the mode of
> -     REGNO - MULTIREG_OFFSET.  */
> -  machine_mode mode : 8;
> +     REGNO - MULTIREG_OFFSET.  Note the bitsize of mode should be exactly
> +     the same as the definition of rtx_def,  */
> +  machine_mode mode : 16;
>  
>    /* If IS_MULTIREG, the offset of REGNO from the start of the register.  */
>    unsigned int multireg_offset : 8;

Like with ira-int.h, any reasonable use of rtlanal.h will also need rtl.h,
so we should be able to use the BITSIZE macro directly.  Please swap
#includes around if necessary.

> diff --git a/gcc/tree-core.h b/gcc/tree-core.h
> index a1aea136e75..001d232f433 100644
> --- a/gcc/tree-core.h
> +++ b/gcc/tree-core.h
> @@ -1680,8 +1680,11 @@ struct GTY(()) tree_type_common {
>    tree attributes;
>    unsigned int uid;
>  
> +  /* Note the bitsize of wmode should be exactly the same as the
> +     definition of rtx_def,  aka RTX_MACHINE_MODE_BITSIZE in rtl.h.  */
> +  ENUM_BITFIELD(machine_mode) mode : 16;
> +
>    unsigned int precision : 16;
> -  ENUM_BITFIELD(machine_mode) mode : 8;
>    unsigned lang_flag_0 : 1;
>    unsigned lang_flag_1 : 1;
>    unsigned lang_flag_2 : 1;
> @@ -1712,7 +1715,7 @@ struct GTY(()) tree_type_common {
>    unsigned empty_flag : 1;
>    unsigned indivisible_p : 1;
>    unsigned no_named_args_stdarg_p : 1;
> -  unsigned spare : 9;
> +  unsigned spare : 1;
>  
>    alias_set_type alias_set;
>    tree pointer_to;
> @@ -1770,7 +1773,9 @@ struct GTY(()) tree_decl_common {
>    struct tree_decl_minimal common;
>    tree size;
>  
> -  ENUM_BITFIELD(machine_mode) mode : 8;
> +  /* Note the bitsize of wmode should be exactly the same as the
> +     definition of rtx_def,  aka RTX_MACHINE_MODE_BITSIZE in rtl.h.  */
> +  ENUM_BITFIELD(machine_mode) mode : 16;
>  
>    unsigned nonlocal_flag : 1;
>    unsigned virtual_flag : 1;

Please also update:

  /* 13 bits unused.  */

to account for the difference.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 33+ messages in thread

* RE: [PATCH] Machine_Mode: Extend machine_mode from 8 to 16 bits
  2023-05-12  8:24 ` [PATCH] Machine_Mode: Extend machine_mode from 8 to 16 bits Richard Sandiford
@ 2023-05-12 11:16   ` Li, Pan2
  2023-05-12 11:31     ` Richard Sandiford
  0 siblings, 1 reply; 33+ messages in thread
From: Li, Pan2 @ 2023-05-12 11:16 UTC (permalink / raw)
  To: Richard Sandiford
  Cc: gcc-patches, juzhe.zhong, kito.cheng, Wang, Yanzhang,
	jeffreyalaw, rguenther

Thanks Richard for comments. In previous, I am not sure it is reasonable to let everywhere consume the same macro in rtl.h (As the includes you mentioned). Thus, make a conservative change in PATCH v1.

I will address the comments and try to align the bit size to the one and the only one macro soon.

Pan


-----Original Message-----
From: Richard Sandiford <richard.sandiford@arm.com> 
Sent: Friday, May 12, 2023 4:24 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@sifive.com; Wang, Yanzhang <yanzhang.wang@intel.com>; jeffreyalaw@gmail.com; rguenther@suse.de
Subject: Re: [PATCH] Machine_Mode: Extend machine_mode from 8 to 16 bits

pan2.li@intel.com writes:
> From: Pan Li <pan2.li@intel.com>
>
> We are running out of the machine_mode(8 bits) in RISC-V backend. Thus 
> we would like to extend the machine mode bit size from 8 to 16 bits.
> However, it is sensitive to extend the memory size in common structure 
> like tree or rtx. This patch would like to extend the machine mode 
> bits to 16 bits by shrinking, like:
>
> * Swap the bit size of code and machine code in rtx_def.
> * Reconcile the machine_mode location and spare in tree.
>
> The memory impact of this patch for correlated structure looks like below:
>
> +-------------------+----------+---------+------+
> | struct/bytes      | upstream | patched | diff |
> +-------------------+----------+---------+------+
> | rtx_obj_reference |        8 |      12 |   +4 |
> | ext_modified      |        2 |       3 |   +1 |
> | ira_allocno       |      192 |     200 |   +8 |
> | qty_table_elem    |       40 |      40 |    0 |
> | reg_stat_type     |       64 |      64 |    0 |
> | rtx_def           |       40 |      40 |    0 |
> | table_elt         |       80 |      80 |    0 |
> | tree_decl_common  |      112 |     112 |    0 |
> | tree_type_common  |      128 |     128 |    0 |
> +-------------------+----------+---------+------+
>
> The tree and rtx related struct has no memory changes after this 
> patch, and the machine_mode changes to 16 bits already.
>
> Signed-off-by: Pan Li <pan2.li@intel.com>
> Co-authored-by: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
> Co-authored-by: Kito Cheng <kito.cheng@sifive.com>
>
> gcc/ChangeLog:
>
> 	* combine.cc (struct reg_stat_type): Extended machine mode to 16 bits.
> 	* cse.cc (struct qty_table_elem): Ditto.
> 	(struct table_elt): Ditto.
> 	(struct set): Ditto.
> 	* genopinit.cc (main): Reconciled the machine mode limit.
> 	* ira-int.h (struct ira_allocno): Extended machine mode to 16 bits.
> 	* ree.cc (struct ATTRIBUTE_PACKED): Ditto.
> 	* rtl-ssa/accesses.h: Ditto.
> 	* rtl.h (RTX_CODE_BITSIZE): New macro.
> 	(RTX_MACHINE_MODE_BITSIZE): Ditto.
> 	(struct GTY): Swap bit size between code and machine mode.
> 	(subreg_shape::unique_id): Reconciled the machine mode limit.
> 	* rtlanal.h: Extended machine mode to 16 bits.
> 	* tree-core.h (struct tree_type_common): Ditto.
> 	(struct tree_decl_common): Reconciled the locate and extended
> 	bit size of machine mode.
> ---
>  gcc/combine.cc         |  4 ++--
>  gcc/cse.cc             |  8 ++++----
>  gcc/genopinit.cc       |  3 ++-
>  gcc/ira-int.h          | 12 ++++++++----
>  gcc/ree.cc             |  2 +-
>  gcc/rtl-ssa/accesses.h |  6 ++++--
>  gcc/rtl.h              |  9 ++++++---
>  gcc/rtlanal.h          |  5 +++--
>  gcc/tree-core.h        | 11 ++++++++---
>  9 files changed, 38 insertions(+), 22 deletions(-)
>
> diff --git a/gcc/combine.cc b/gcc/combine.cc index 
> 5aa0ec5c45a..bdf6f635c80 100644
> --- a/gcc/combine.cc
> +++ b/gcc/combine.cc
> @@ -200,7 +200,7 @@ struct reg_stat_type {
>  
>    unsigned HOST_WIDE_INT	last_set_nonzero_bits;
>    char				last_set_sign_bit_copies;
> -  ENUM_BITFIELD(machine_mode)	last_set_mode : 8;
> +  ENUM_BITFIELD(machine_mode)	last_set_mode : RTX_MACHINE_MODE_BITSIZE;
>  
>    /* Set nonzero if references to register n in expressions should not be
>       used.  last_set_invalid is set nonzero when this register is 
> being @@ -235,7 +235,7 @@ struct reg_stat_type {
>       truncation if we know that value already contains a truncated
>       value.  */
>  
> -  ENUM_BITFIELD(machine_mode)	truncated_to_mode : 8;
> +  ENUM_BITFIELD(machine_mode)	truncated_to_mode : RTX_MACHINE_MODE_BITSIZE;
>  };
>  
>  
> diff --git a/gcc/cse.cc b/gcc/cse.cc
> index b10c9b0c94d..fe594c1bc3d 100644
> --- a/gcc/cse.cc
> +++ b/gcc/cse.cc
> @@ -250,8 +250,8 @@ struct qty_table_elem
>    unsigned int first_reg, last_reg;
>    /* The sizes of these fields should match the sizes of the
>       code and mode fields of struct rtx_def (see rtl.h).  */

The comment can be removed, since you're now adding macros to ensure this (thanks).  Same for other instances of the comment.

> -  ENUM_BITFIELD(rtx_code) comparison_code : 16;
> -  ENUM_BITFIELD(machine_mode) mode : 8;
> +  ENUM_BITFIELD(rtx_code) comparison_code : RTX_CODE_BITSIZE;
> +  ENUM_BITFIELD(machine_mode) mode : RTX_MACHINE_MODE_BITSIZE;

Please put the mode first, so that the 16-bit value is aligned to 16 bits.

>  };
>  
>  /* The table of all qtys, indexed by qty number.  */ @@ -406,7 +406,7 
> @@ struct table_elt
>    int regcost;
>    /* The size of this field should match the size
>       of the mode field of struct rtx_def (see rtl.h).  */
> -  ENUM_BITFIELD(machine_mode) mode : 8;
> +  ENUM_BITFIELD(machine_mode) mode : RTX_MACHINE_MODE_BITSIZE;
>    char in_memory;
>    char is_const;
>    char flag;
> @@ -4155,7 +4155,7 @@ struct set
>    /* Original machine mode, in case it becomes a CONST_INT.
>       The size of this field should match the size of the mode
>       field of struct rtx_def (see rtl.h).  */
> -  ENUM_BITFIELD(machine_mode) mode : 8;
> +  ENUM_BITFIELD(machine_mode) mode : RTX_MACHINE_MODE_BITSIZE;
>    /* Hash value of constant equivalent for SET_SRC.  */
>    unsigned src_const_hash;
>    /* A constant equivalent for SET_SRC, if any.  */ diff --git 
> a/gcc/genopinit.cc b/gcc/genopinit.cc index 83cb7504fa1..2add8b925da 
> 100644
> --- a/gcc/genopinit.cc
> +++ b/gcc/genopinit.cc
> @@ -182,7 +182,8 @@ main (int argc, const char **argv)
>  
>    progname = "genopinit";
>  
> -  if (NUM_OPTABS > 0xffff || MAX_MACHINE_MODE >= 0xff)
> +  if (NUM_OPTABS > 0xffff
> +    || MAX_MACHINE_MODE >= ((1 << RTX_MACHINE_MODE_BITSIZE) - 1))
>      fatal ("genopinit range assumptions invalid");
>  
>    if (!init_rtx_reader_args_cb (argc, argv, handle_arg)) diff --git 
> a/gcc/ira-int.h b/gcc/ira-int.h index e2de47213b4..124e14200b1 100644
> --- a/gcc/ira-int.h
> +++ b/gcc/ira-int.h
> @@ -280,11 +280,15 @@ struct ira_allocno
>    /* Regno for allocno or cap.  */
>    int regno;
>    /* Mode of the allocno which is the mode of the corresponding
> -     pseudo-register.  */
> -  ENUM_BITFIELD (machine_mode) mode : 8;
> +     pseudo-register.  Note the bitsize of mode should be exactly
> +     the same as the definition of rtx_def, aka RTX_MACHINE_MODE_BITSIZE
> +     in rtl.h.  */
> +  ENUM_BITFIELD (machine_mode) mode : 16;
>    /* Widest mode of the allocno which in at least one case could be
> -     for paradoxical subregs where wmode > mode.  */
> -  ENUM_BITFIELD (machine_mode) wmode : 8;
> +     for paradoxical subregs where wmode > mode.  Note the bitsize of
> +     wmode should be exactly the same as the definition of rtx_def,
> +     aka RTX_MACHINE_MODE_BITSIZE in rtl.h.  */  ENUM_BITFIELD 
> + (machine_mode) wmode : 16;
>    /* Register class which should be used for allocation for given
>       allocno.  NO_REGS means that we should use memory.  */
>    ENUM_BITFIELD (reg_class) aclass : 16;

Why not use the BITSIZE macros directly?  Any reasonable use of ira-int.h will also need rtl.h.  If something includes ira-int.h before rtl.h then we should change that.

To avoid growing this structure, please move something into one of the holes later in the structure.  E.g. hard_regno could go before num_objects.

> diff --git a/gcc/ree.cc b/gcc/ree.cc
> index 413aec7c8eb..fb011bc907c 100644
> --- a/gcc/ree.cc
> +++ b/gcc/ree.cc
> @@ -567,7 +567,7 @@ enum ext_modified_kind  struct ATTRIBUTE_PACKED 
> ext_modified  {
>    /* Mode from which ree has zero or sign extended the destination.  
> */
> -  ENUM_BITFIELD(machine_mode) mode : 8;
> +  ENUM_BITFIELD(machine_mode) mode : RTX_MACHINE_MODE_BITSIZE;
>  
>    /* Kind of modification of the insn.  */
>    ENUM_BITFIELD(ext_modified_kind) kind : 2; diff --git 
> a/gcc/rtl-ssa/accesses.h b/gcc/rtl-ssa/accesses.h index 
> c5180b9308a..2e73004cafa 100644
> --- a/gcc/rtl-ssa/accesses.h
> +++ b/gcc/rtl-ssa/accesses.h
> @@ -253,8 +253,10 @@ private:
>    // Bits for future expansion.
>    unsigned int m_spare : 2;
>  
> -  // The value returned by the accessor above.
> -  machine_mode m_mode : 8;
> +  // The value returned by the accessor above.  Note the bitsize of  
> + // m_mode should be exactly the same as the definition of rtx_def,  
> + // aka RTX_MACHINE_MODE_BITSIZE in rtl.h.
> +  machine_mode m_mode : 16;
>  };
>  
>  // A contiguous array of access_info pointers.  Used to represent a 
> diff --git a/gcc/rtl.h b/gcc/rtl.h index f634cab730b..a18ecf7632f 
> 100644
> --- a/gcc/rtl.h
> +++ b/gcc/rtl.h
> @@ -63,6 +63,9 @@ enum rtx_code  {
>  # define NON_GENERATOR_NUM_RTX_CODE ((int) MATCH_OPERAND)  #endif
>  
> +#define RTX_CODE_BITSIZE 8
> +#define RTX_MACHINE_MODE_BITSIZE 16
> +
>  /* Register Transfer Language EXPRESSIONS CODE CLASSES */
>  
>  enum rtx_class  {
> @@ -310,10 +313,10 @@ struct GTY((desc("0"), tag("0"),
>  	    chain_next ("RTX_NEXT (&%h)"),
>  	    chain_prev ("RTX_PREV (&%h)"))) rtx_def {
>    /* The kind of expression this is.  */
> -  ENUM_BITFIELD(rtx_code) code: 16;
> +  ENUM_BITFIELD(rtx_code) code: RTX_CODE_BITSIZE;
>  
>    /* The kind of value the expression has.  */
> -  ENUM_BITFIELD(machine_mode) mode : 8;
> +  ENUM_BITFIELD(machine_mode) mode : RTX_MACHINE_MODE_BITSIZE;

Please swap the fields around, as discussed previously.

>  
>    /* 1 in a MEM if we should keep the alias set for this mem unchanged
>       when we access a component.
> @@ -2164,7 +2167,7 @@ subreg_shape::operator != (const subreg_shape 
> &other) const  inline unsigned HOST_WIDE_INT  subreg_shape::unique_id 
> () const  {
> -  { STATIC_ASSERT (MAX_MACHINE_MODE <= 256); }
> +  { STATIC_ASSERT (MAX_MACHINE_MODE <= (1 << 
> + RTX_MACHINE_MODE_BITSIZE)); }
>    { STATIC_ASSERT (NUM_POLY_INT_COEFFS <= 3); }
>    { STATIC_ASSERT (sizeof (offset.coeffs[0]) <= 2); }
>    int res = (int) inner_mode + ((int) outer_mode << 8); diff --git 
> a/gcc/rtlanal.h b/gcc/rtlanal.h index 5fbed816e20..15aba0dec7a 100644
> --- a/gcc/rtlanal.h
> +++ b/gcc/rtlanal.h
> @@ -99,8 +99,9 @@ public:
>    unsigned int flags : 16;
>  
>    /* The mode of the reference.  If IS_MULTIREG, this is the mode of
> -     REGNO - MULTIREG_OFFSET.  */
> -  machine_mode mode : 8;
> +     REGNO - MULTIREG_OFFSET.  Note the bitsize of mode should be exactly
> +     the same as the definition of rtx_def,  */  machine_mode mode : 
> + 16;
>  
>    /* If IS_MULTIREG, the offset of REGNO from the start of the register.  */
>    unsigned int multireg_offset : 8;

Like with ira-int.h, any reasonable use of rtlanal.h will also need rtl.h, so we should be able to use the BITSIZE macro directly.  Please swap #includes around if necessary.

> diff --git a/gcc/tree-core.h b/gcc/tree-core.h index 
> a1aea136e75..001d232f433 100644
> --- a/gcc/tree-core.h
> +++ b/gcc/tree-core.h
> @@ -1680,8 +1680,11 @@ struct GTY(()) tree_type_common {
>    tree attributes;
>    unsigned int uid;
>  
> +  /* Note the bitsize of wmode should be exactly the same as the
> +     definition of rtx_def,  aka RTX_MACHINE_MODE_BITSIZE in rtl.h.  
> + */
> +  ENUM_BITFIELD(machine_mode) mode : 16;
> +
>    unsigned int precision : 16;
> -  ENUM_BITFIELD(machine_mode) mode : 8;
>    unsigned lang_flag_0 : 1;
>    unsigned lang_flag_1 : 1;
>    unsigned lang_flag_2 : 1;
> @@ -1712,7 +1715,7 @@ struct GTY(()) tree_type_common {
>    unsigned empty_flag : 1;
>    unsigned indivisible_p : 1;
>    unsigned no_named_args_stdarg_p : 1;
> -  unsigned spare : 9;
> +  unsigned spare : 1;
>  
>    alias_set_type alias_set;
>    tree pointer_to;
> @@ -1770,7 +1773,9 @@ struct GTY(()) tree_decl_common {
>    struct tree_decl_minimal common;
>    tree size;
>  
> -  ENUM_BITFIELD(machine_mode) mode : 8;
> +  /* Note the bitsize of wmode should be exactly the same as the
> +     definition of rtx_def,  aka RTX_MACHINE_MODE_BITSIZE in rtl.h.  
> + */
> +  ENUM_BITFIELD(machine_mode) mode : 16;
>  
>    unsigned nonlocal_flag : 1;
>    unsigned virtual_flag : 1;

Please also update:

  /* 13 bits unused.  */

to account for the difference.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Machine_Mode: Extend machine_mode from 8 to 16 bits
  2023-05-12 11:16   ` Li, Pan2
@ 2023-05-12 11:31     ` Richard Sandiford
  2023-05-12 11:48       ` Li, Pan2
  0 siblings, 1 reply; 33+ messages in thread
From: Richard Sandiford @ 2023-05-12 11:31 UTC (permalink / raw)
  To: Li, Pan2 via Gcc-patches
  Cc: Li, Pan2, juzhe.zhong, kito.cheng, Wang, Yanzhang, jeffreyalaw,
	rguenther

"Li, Pan2 via Gcc-patches" <gcc-patches@gcc.gnu.org> writes:
> Thanks Richard for comments. In previous, I am not sure it is reasonable to let everywhere consume the same macro in rtl.h (As the includes you mentioned). Thus, make a conservative change in PATCH v1.
>
> I will address the comments and try to align the bit size to the one and the only one macro soon.

Sorry, I should have thought about this earlier, but it would
probably make sense to name the macro MACHINE_MODE_BITSIZE and
define it in machmode.h rather than rtl.h.  (The rtx_code stuff
should stay as-is.)

Thanks,
Richard

>
> Pan
>
>
> -----Original Message-----
> From: Richard Sandiford <richard.sandiford@arm.com> 
> Sent: Friday, May 12, 2023 4:24 PM
> To: Li, Pan2 <pan2.li@intel.com>
> Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@sifive.com; Wang, Yanzhang <yanzhang.wang@intel.com>; jeffreyalaw@gmail.com; rguenther@suse.de
> Subject: Re: [PATCH] Machine_Mode: Extend machine_mode from 8 to 16 bits
>
> pan2.li@intel.com writes:
>> From: Pan Li <pan2.li@intel.com>
>>
>> We are running out of the machine_mode(8 bits) in RISC-V backend. Thus 
>> we would like to extend the machine mode bit size from 8 to 16 bits.
>> However, it is sensitive to extend the memory size in common structure 
>> like tree or rtx. This patch would like to extend the machine mode 
>> bits to 16 bits by shrinking, like:
>>
>> * Swap the bit size of code and machine code in rtx_def.
>> * Reconcile the machine_mode location and spare in tree.
>>
>> The memory impact of this patch for correlated structure looks like below:
>>
>> +-------------------+----------+---------+------+
>> | struct/bytes      | upstream | patched | diff |
>> +-------------------+----------+---------+------+
>> | rtx_obj_reference |        8 |      12 |   +4 |
>> | ext_modified      |        2 |       3 |   +1 |
>> | ira_allocno       |      192 |     200 |   +8 |
>> | qty_table_elem    |       40 |      40 |    0 |
>> | reg_stat_type     |       64 |      64 |    0 |
>> | rtx_def           |       40 |      40 |    0 |
>> | table_elt         |       80 |      80 |    0 |
>> | tree_decl_common  |      112 |     112 |    0 |
>> | tree_type_common  |      128 |     128 |    0 |
>> +-------------------+----------+---------+------+
>>
>> The tree and rtx related struct has no memory changes after this 
>> patch, and the machine_mode changes to 16 bits already.
>>
>> Signed-off-by: Pan Li <pan2.li@intel.com>
>> Co-authored-by: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
>> Co-authored-by: Kito Cheng <kito.cheng@sifive.com>
>>
>> gcc/ChangeLog:
>>
>> 	* combine.cc (struct reg_stat_type): Extended machine mode to 16 bits.
>> 	* cse.cc (struct qty_table_elem): Ditto.
>> 	(struct table_elt): Ditto.
>> 	(struct set): Ditto.
>> 	* genopinit.cc (main): Reconciled the machine mode limit.
>> 	* ira-int.h (struct ira_allocno): Extended machine mode to 16 bits.
>> 	* ree.cc (struct ATTRIBUTE_PACKED): Ditto.
>> 	* rtl-ssa/accesses.h: Ditto.
>> 	* rtl.h (RTX_CODE_BITSIZE): New macro.
>> 	(RTX_MACHINE_MODE_BITSIZE): Ditto.
>> 	(struct GTY): Swap bit size between code and machine mode.
>> 	(subreg_shape::unique_id): Reconciled the machine mode limit.
>> 	* rtlanal.h: Extended machine mode to 16 bits.
>> 	* tree-core.h (struct tree_type_common): Ditto.
>> 	(struct tree_decl_common): Reconciled the locate and extended
>> 	bit size of machine mode.
>> ---
>>  gcc/combine.cc         |  4 ++--
>>  gcc/cse.cc             |  8 ++++----
>>  gcc/genopinit.cc       |  3 ++-
>>  gcc/ira-int.h          | 12 ++++++++----
>>  gcc/ree.cc             |  2 +-
>>  gcc/rtl-ssa/accesses.h |  6 ++++--
>>  gcc/rtl.h              |  9 ++++++---
>>  gcc/rtlanal.h          |  5 +++--
>>  gcc/tree-core.h        | 11 ++++++++---
>>  9 files changed, 38 insertions(+), 22 deletions(-)
>>
>> diff --git a/gcc/combine.cc b/gcc/combine.cc index 
>> 5aa0ec5c45a..bdf6f635c80 100644
>> --- a/gcc/combine.cc
>> +++ b/gcc/combine.cc
>> @@ -200,7 +200,7 @@ struct reg_stat_type {
>>  
>>    unsigned HOST_WIDE_INT	last_set_nonzero_bits;
>>    char				last_set_sign_bit_copies;
>> -  ENUM_BITFIELD(machine_mode)	last_set_mode : 8;
>> +  ENUM_BITFIELD(machine_mode)	last_set_mode : RTX_MACHINE_MODE_BITSIZE;
>>  
>>    /* Set nonzero if references to register n in expressions should not be
>>       used.  last_set_invalid is set nonzero when this register is 
>> being @@ -235,7 +235,7 @@ struct reg_stat_type {
>>       truncation if we know that value already contains a truncated
>>       value.  */
>>  
>> -  ENUM_BITFIELD(machine_mode)	truncated_to_mode : 8;
>> +  ENUM_BITFIELD(machine_mode)	truncated_to_mode : RTX_MACHINE_MODE_BITSIZE;
>>  };
>>  
>>  
>> diff --git a/gcc/cse.cc b/gcc/cse.cc
>> index b10c9b0c94d..fe594c1bc3d 100644
>> --- a/gcc/cse.cc
>> +++ b/gcc/cse.cc
>> @@ -250,8 +250,8 @@ struct qty_table_elem
>>    unsigned int first_reg, last_reg;
>>    /* The sizes of these fields should match the sizes of the
>>       code and mode fields of struct rtx_def (see rtl.h).  */
>
> The comment can be removed, since you're now adding macros to ensure this (thanks).  Same for other instances of the comment.
>
>> -  ENUM_BITFIELD(rtx_code) comparison_code : 16;
>> -  ENUM_BITFIELD(machine_mode) mode : 8;
>> +  ENUM_BITFIELD(rtx_code) comparison_code : RTX_CODE_BITSIZE;
>> +  ENUM_BITFIELD(machine_mode) mode : RTX_MACHINE_MODE_BITSIZE;
>
> Please put the mode first, so that the 16-bit value is aligned to 16 bits.
>
>>  };
>>  
>>  /* The table of all qtys, indexed by qty number.  */ @@ -406,7 +406,7 
>> @@ struct table_elt
>>    int regcost;
>>    /* The size of this field should match the size
>>       of the mode field of struct rtx_def (see rtl.h).  */
>> -  ENUM_BITFIELD(machine_mode) mode : 8;
>> +  ENUM_BITFIELD(machine_mode) mode : RTX_MACHINE_MODE_BITSIZE;
>>    char in_memory;
>>    char is_const;
>>    char flag;
>> @@ -4155,7 +4155,7 @@ struct set
>>    /* Original machine mode, in case it becomes a CONST_INT.
>>       The size of this field should match the size of the mode
>>       field of struct rtx_def (see rtl.h).  */
>> -  ENUM_BITFIELD(machine_mode) mode : 8;
>> +  ENUM_BITFIELD(machine_mode) mode : RTX_MACHINE_MODE_BITSIZE;
>>    /* Hash value of constant equivalent for SET_SRC.  */
>>    unsigned src_const_hash;
>>    /* A constant equivalent for SET_SRC, if any.  */ diff --git 
>> a/gcc/genopinit.cc b/gcc/genopinit.cc index 83cb7504fa1..2add8b925da 
>> 100644
>> --- a/gcc/genopinit.cc
>> +++ b/gcc/genopinit.cc
>> @@ -182,7 +182,8 @@ main (int argc, const char **argv)
>>  
>>    progname = "genopinit";
>>  
>> -  if (NUM_OPTABS > 0xffff || MAX_MACHINE_MODE >= 0xff)
>> +  if (NUM_OPTABS > 0xffff
>> +    || MAX_MACHINE_MODE >= ((1 << RTX_MACHINE_MODE_BITSIZE) - 1))
>>      fatal ("genopinit range assumptions invalid");
>>  
>>    if (!init_rtx_reader_args_cb (argc, argv, handle_arg)) diff --git 
>> a/gcc/ira-int.h b/gcc/ira-int.h index e2de47213b4..124e14200b1 100644
>> --- a/gcc/ira-int.h
>> +++ b/gcc/ira-int.h
>> @@ -280,11 +280,15 @@ struct ira_allocno
>>    /* Regno for allocno or cap.  */
>>    int regno;
>>    /* Mode of the allocno which is the mode of the corresponding
>> -     pseudo-register.  */
>> -  ENUM_BITFIELD (machine_mode) mode : 8;
>> +     pseudo-register.  Note the bitsize of mode should be exactly
>> +     the same as the definition of rtx_def, aka RTX_MACHINE_MODE_BITSIZE
>> +     in rtl.h.  */
>> +  ENUM_BITFIELD (machine_mode) mode : 16;
>>    /* Widest mode of the allocno which in at least one case could be
>> -     for paradoxical subregs where wmode > mode.  */
>> -  ENUM_BITFIELD (machine_mode) wmode : 8;
>> +     for paradoxical subregs where wmode > mode.  Note the bitsize of
>> +     wmode should be exactly the same as the definition of rtx_def,
>> +     aka RTX_MACHINE_MODE_BITSIZE in rtl.h.  */  ENUM_BITFIELD 
>> + (machine_mode) wmode : 16;
>>    /* Register class which should be used for allocation for given
>>       allocno.  NO_REGS means that we should use memory.  */
>>    ENUM_BITFIELD (reg_class) aclass : 16;
>
> Why not use the BITSIZE macros directly?  Any reasonable use of ira-int.h will also need rtl.h.  If something includes ira-int.h before rtl.h then we should change that.
>
> To avoid growing this structure, please move something into one of the holes later in the structure.  E.g. hard_regno could go before num_objects.
>
>> diff --git a/gcc/ree.cc b/gcc/ree.cc
>> index 413aec7c8eb..fb011bc907c 100644
>> --- a/gcc/ree.cc
>> +++ b/gcc/ree.cc
>> @@ -567,7 +567,7 @@ enum ext_modified_kind  struct ATTRIBUTE_PACKED 
>> ext_modified  {
>>    /* Mode from which ree has zero or sign extended the destination.  
>> */
>> -  ENUM_BITFIELD(machine_mode) mode : 8;
>> +  ENUM_BITFIELD(machine_mode) mode : RTX_MACHINE_MODE_BITSIZE;
>>  
>>    /* Kind of modification of the insn.  */
>>    ENUM_BITFIELD(ext_modified_kind) kind : 2; diff --git 
>> a/gcc/rtl-ssa/accesses.h b/gcc/rtl-ssa/accesses.h index 
>> c5180b9308a..2e73004cafa 100644
>> --- a/gcc/rtl-ssa/accesses.h
>> +++ b/gcc/rtl-ssa/accesses.h
>> @@ -253,8 +253,10 @@ private:
>>    // Bits for future expansion.
>>    unsigned int m_spare : 2;
>>  
>> -  // The value returned by the accessor above.
>> -  machine_mode m_mode : 8;
>> +  // The value returned by the accessor above.  Note the bitsize of  
>> + // m_mode should be exactly the same as the definition of rtx_def,  
>> + // aka RTX_MACHINE_MODE_BITSIZE in rtl.h.
>> +  machine_mode m_mode : 16;
>>  };
>>  
>>  // A contiguous array of access_info pointers.  Used to represent a 
>> diff --git a/gcc/rtl.h b/gcc/rtl.h index f634cab730b..a18ecf7632f 
>> 100644
>> --- a/gcc/rtl.h
>> +++ b/gcc/rtl.h
>> @@ -63,6 +63,9 @@ enum rtx_code  {
>>  # define NON_GENERATOR_NUM_RTX_CODE ((int) MATCH_OPERAND)  #endif
>>  
>> +#define RTX_CODE_BITSIZE 8
>> +#define RTX_MACHINE_MODE_BITSIZE 16
>> +
>>  /* Register Transfer Language EXPRESSIONS CODE CLASSES */
>>  
>>  enum rtx_class  {
>> @@ -310,10 +313,10 @@ struct GTY((desc("0"), tag("0"),
>>  	    chain_next ("RTX_NEXT (&%h)"),
>>  	    chain_prev ("RTX_PREV (&%h)"))) rtx_def {
>>    /* The kind of expression this is.  */
>> -  ENUM_BITFIELD(rtx_code) code: 16;
>> +  ENUM_BITFIELD(rtx_code) code: RTX_CODE_BITSIZE;
>>  
>>    /* The kind of value the expression has.  */
>> -  ENUM_BITFIELD(machine_mode) mode : 8;
>> +  ENUM_BITFIELD(machine_mode) mode : RTX_MACHINE_MODE_BITSIZE;
>
> Please swap the fields around, as discussed previously.
>
>>  
>>    /* 1 in a MEM if we should keep the alias set for this mem unchanged
>>       when we access a component.
>> @@ -2164,7 +2167,7 @@ subreg_shape::operator != (const subreg_shape 
>> &other) const  inline unsigned HOST_WIDE_INT  subreg_shape::unique_id 
>> () const  {
>> -  { STATIC_ASSERT (MAX_MACHINE_MODE <= 256); }
>> +  { STATIC_ASSERT (MAX_MACHINE_MODE <= (1 << 
>> + RTX_MACHINE_MODE_BITSIZE)); }
>>    { STATIC_ASSERT (NUM_POLY_INT_COEFFS <= 3); }
>>    { STATIC_ASSERT (sizeof (offset.coeffs[0]) <= 2); }
>>    int res = (int) inner_mode + ((int) outer_mode << 8); diff --git 
>> a/gcc/rtlanal.h b/gcc/rtlanal.h index 5fbed816e20..15aba0dec7a 100644
>> --- a/gcc/rtlanal.h
>> +++ b/gcc/rtlanal.h
>> @@ -99,8 +99,9 @@ public:
>>    unsigned int flags : 16;
>>  
>>    /* The mode of the reference.  If IS_MULTIREG, this is the mode of
>> -     REGNO - MULTIREG_OFFSET.  */
>> -  machine_mode mode : 8;
>> +     REGNO - MULTIREG_OFFSET.  Note the bitsize of mode should be exactly
>> +     the same as the definition of rtx_def,  */  machine_mode mode : 
>> + 16;
>>  
>>    /* If IS_MULTIREG, the offset of REGNO from the start of the register.  */
>>    unsigned int multireg_offset : 8;
>
> Like with ira-int.h, any reasonable use of rtlanal.h will also need rtl.h, so we should be able to use the BITSIZE macro directly.  Please swap #includes around if necessary.
>
>> diff --git a/gcc/tree-core.h b/gcc/tree-core.h index 
>> a1aea136e75..001d232f433 100644
>> --- a/gcc/tree-core.h
>> +++ b/gcc/tree-core.h
>> @@ -1680,8 +1680,11 @@ struct GTY(()) tree_type_common {
>>    tree attributes;
>>    unsigned int uid;
>>  
>> +  /* Note the bitsize of wmode should be exactly the same as the
>> +     definition of rtx_def,  aka RTX_MACHINE_MODE_BITSIZE in rtl.h.  
>> + */
>> +  ENUM_BITFIELD(machine_mode) mode : 16;
>> +
>>    unsigned int precision : 16;
>> -  ENUM_BITFIELD(machine_mode) mode : 8;
>>    unsigned lang_flag_0 : 1;
>>    unsigned lang_flag_1 : 1;
>>    unsigned lang_flag_2 : 1;
>> @@ -1712,7 +1715,7 @@ struct GTY(()) tree_type_common {
>>    unsigned empty_flag : 1;
>>    unsigned indivisible_p : 1;
>>    unsigned no_named_args_stdarg_p : 1;
>> -  unsigned spare : 9;
>> +  unsigned spare : 1;
>>  
>>    alias_set_type alias_set;
>>    tree pointer_to;
>> @@ -1770,7 +1773,9 @@ struct GTY(()) tree_decl_common {
>>    struct tree_decl_minimal common;
>>    tree size;
>>  
>> -  ENUM_BITFIELD(machine_mode) mode : 8;
>> +  /* Note the bitsize of wmode should be exactly the same as the
>> +     definition of rtx_def,  aka RTX_MACHINE_MODE_BITSIZE in rtl.h.  
>> + */
>> +  ENUM_BITFIELD(machine_mode) mode : 16;
>>  
>>    unsigned nonlocal_flag : 1;
>>    unsigned virtual_flag : 1;
>
> Please also update:
>
>   /* 13 bits unused.  */
>
> to account for the difference.
>
> Thanks,
> Richard

^ permalink raw reply	[flat|nested] 33+ messages in thread

* RE: [PATCH] Machine_Mode: Extend machine_mode from 8 to 16 bits
  2023-05-12 11:31     ` Richard Sandiford
@ 2023-05-12 11:48       ` Li, Pan2
  0 siblings, 0 replies; 33+ messages in thread
From: Li, Pan2 @ 2023-05-12 11:48 UTC (permalink / raw)
  To: Richard Sandiford, Li, Pan2 via Gcc-patches
  Cc: juzhe.zhong, kito.cheng, Wang, Yanzhang, jeffreyalaw, rguenther

Never minder. When preparing the PR, I am keeping ask myself that is everywhere about machine code bit size updated? Thus would like to align the bit size to one macro, to avoid developers (perhaps myself in future) suffering such kind of concern.

Will try to move the machine mode to machmode.h.

Pan

-----Original Message-----
From: Richard Sandiford <richard.sandiford@arm.com> 
Sent: Friday, May 12, 2023 7:32 PM
To: Li, Pan2 via Gcc-patches <gcc-patches@gcc.gnu.org>
Cc: Li, Pan2 <pan2.li@intel.com>; juzhe.zhong@rivai.ai; kito.cheng@sifive.com; Wang, Yanzhang <yanzhang.wang@intel.com>; jeffreyalaw@gmail.com; rguenther@suse.de
Subject: Re: [PATCH] Machine_Mode: Extend machine_mode from 8 to 16 bits

"Li, Pan2 via Gcc-patches" <gcc-patches@gcc.gnu.org> writes:
> Thanks Richard for comments. In previous, I am not sure it is reasonable to let everywhere consume the same macro in rtl.h (As the includes you mentioned). Thus, make a conservative change in PATCH v1.
>
> I will address the comments and try to align the bit size to the one and the only one macro soon.

Sorry, I should have thought about this earlier, but it would probably make sense to name the macro MACHINE_MODE_BITSIZE and define it in machmode.h rather than rtl.h.  (The rtx_code stuff should stay as-is.)

Thanks,
Richard

>
> Pan
>
>
> -----Original Message-----
> From: Richard Sandiford <richard.sandiford@arm.com>
> Sent: Friday, May 12, 2023 4:24 PM
> To: Li, Pan2 <pan2.li@intel.com>
> Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; 
> kito.cheng@sifive.com; Wang, Yanzhang <yanzhang.wang@intel.com>; 
> jeffreyalaw@gmail.com; rguenther@suse.de
> Subject: Re: [PATCH] Machine_Mode: Extend machine_mode from 8 to 16 
> bits
>
> pan2.li@intel.com writes:
>> From: Pan Li <pan2.li@intel.com>
>>
>> We are running out of the machine_mode(8 bits) in RISC-V backend. 
>> Thus we would like to extend the machine mode bit size from 8 to 16 bits.
>> However, it is sensitive to extend the memory size in common 
>> structure like tree or rtx. This patch would like to extend the 
>> machine mode bits to 16 bits by shrinking, like:
>>
>> * Swap the bit size of code and machine code in rtx_def.
>> * Reconcile the machine_mode location and spare in tree.
>>
>> The memory impact of this patch for correlated structure looks like below:
>>
>> +-------------------+----------+---------+------+
>> | struct/bytes      | upstream | patched | diff |
>> +-------------------+----------+---------+------+
>> | rtx_obj_reference |        8 |      12 |   +4 |
>> | ext_modified      |        2 |       3 |   +1 |
>> | ira_allocno       |      192 |     200 |   +8 |
>> | qty_table_elem    |       40 |      40 |    0 |
>> | reg_stat_type     |       64 |      64 |    0 |
>> | rtx_def           |       40 |      40 |    0 |
>> | table_elt         |       80 |      80 |    0 |
>> | tree_decl_common  |      112 |     112 |    0 |
>> | tree_type_common  |      128 |     128 |    0 |
>> +-------------------+----------+---------+------+
>>
>> The tree and rtx related struct has no memory changes after this 
>> patch, and the machine_mode changes to 16 bits already.
>>
>> Signed-off-by: Pan Li <pan2.li@intel.com>
>> Co-authored-by: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
>> Co-authored-by: Kito Cheng <kito.cheng@sifive.com>
>>
>> gcc/ChangeLog:
>>
>> 	* combine.cc (struct reg_stat_type): Extended machine mode to 16 bits.
>> 	* cse.cc (struct qty_table_elem): Ditto.
>> 	(struct table_elt): Ditto.
>> 	(struct set): Ditto.
>> 	* genopinit.cc (main): Reconciled the machine mode limit.
>> 	* ira-int.h (struct ira_allocno): Extended machine mode to 16 bits.
>> 	* ree.cc (struct ATTRIBUTE_PACKED): Ditto.
>> 	* rtl-ssa/accesses.h: Ditto.
>> 	* rtl.h (RTX_CODE_BITSIZE): New macro.
>> 	(RTX_MACHINE_MODE_BITSIZE): Ditto.
>> 	(struct GTY): Swap bit size between code and machine mode.
>> 	(subreg_shape::unique_id): Reconciled the machine mode limit.
>> 	* rtlanal.h: Extended machine mode to 16 bits.
>> 	* tree-core.h (struct tree_type_common): Ditto.
>> 	(struct tree_decl_common): Reconciled the locate and extended
>> 	bit size of machine mode.
>> ---
>>  gcc/combine.cc         |  4 ++--
>>  gcc/cse.cc             |  8 ++++----
>>  gcc/genopinit.cc       |  3 ++-
>>  gcc/ira-int.h          | 12 ++++++++----
>>  gcc/ree.cc             |  2 +-
>>  gcc/rtl-ssa/accesses.h |  6 ++++--
>>  gcc/rtl.h              |  9 ++++++---
>>  gcc/rtlanal.h          |  5 +++--
>>  gcc/tree-core.h        | 11 ++++++++---
>>  9 files changed, 38 insertions(+), 22 deletions(-)
>>
>> diff --git a/gcc/combine.cc b/gcc/combine.cc index
>> 5aa0ec5c45a..bdf6f635c80 100644
>> --- a/gcc/combine.cc
>> +++ b/gcc/combine.cc
>> @@ -200,7 +200,7 @@ struct reg_stat_type {
>>  
>>    unsigned HOST_WIDE_INT	last_set_nonzero_bits;
>>    char				last_set_sign_bit_copies;
>> -  ENUM_BITFIELD(machine_mode)	last_set_mode : 8;
>> +  ENUM_BITFIELD(machine_mode)	last_set_mode : RTX_MACHINE_MODE_BITSIZE;
>>  
>>    /* Set nonzero if references to register n in expressions should not be
>>       used.  last_set_invalid is set nonzero when this register is 
>> being @@ -235,7 +235,7 @@ struct reg_stat_type {
>>       truncation if we know that value already contains a truncated
>>       value.  */
>>  
>> -  ENUM_BITFIELD(machine_mode)	truncated_to_mode : 8;
>> +  ENUM_BITFIELD(machine_mode)	truncated_to_mode : RTX_MACHINE_MODE_BITSIZE;
>>  };
>>  
>>  
>> diff --git a/gcc/cse.cc b/gcc/cse.cc
>> index b10c9b0c94d..fe594c1bc3d 100644
>> --- a/gcc/cse.cc
>> +++ b/gcc/cse.cc
>> @@ -250,8 +250,8 @@ struct qty_table_elem
>>    unsigned int first_reg, last_reg;
>>    /* The sizes of these fields should match the sizes of the
>>       code and mode fields of struct rtx_def (see rtl.h).  */
>
> The comment can be removed, since you're now adding macros to ensure this (thanks).  Same for other instances of the comment.
>
>> -  ENUM_BITFIELD(rtx_code) comparison_code : 16;
>> -  ENUM_BITFIELD(machine_mode) mode : 8;
>> +  ENUM_BITFIELD(rtx_code) comparison_code : RTX_CODE_BITSIZE;
>> +  ENUM_BITFIELD(machine_mode) mode : RTX_MACHINE_MODE_BITSIZE;
>
> Please put the mode first, so that the 16-bit value is aligned to 16 bits.
>
>>  };
>>  
>>  /* The table of all qtys, indexed by qty number.  */ @@ -406,7 
>> +406,7 @@ struct table_elt
>>    int regcost;
>>    /* The size of this field should match the size
>>       of the mode field of struct rtx_def (see rtl.h).  */
>> -  ENUM_BITFIELD(machine_mode) mode : 8;
>> +  ENUM_BITFIELD(machine_mode) mode : RTX_MACHINE_MODE_BITSIZE;
>>    char in_memory;
>>    char is_const;
>>    char flag;
>> @@ -4155,7 +4155,7 @@ struct set
>>    /* Original machine mode, in case it becomes a CONST_INT.
>>       The size of this field should match the size of the mode
>>       field of struct rtx_def (see rtl.h).  */
>> -  ENUM_BITFIELD(machine_mode) mode : 8;
>> +  ENUM_BITFIELD(machine_mode) mode : RTX_MACHINE_MODE_BITSIZE;
>>    /* Hash value of constant equivalent for SET_SRC.  */
>>    unsigned src_const_hash;
>>    /* A constant equivalent for SET_SRC, if any.  */ diff --git 
>> a/gcc/genopinit.cc b/gcc/genopinit.cc index 83cb7504fa1..2add8b925da
>> 100644
>> --- a/gcc/genopinit.cc
>> +++ b/gcc/genopinit.cc
>> @@ -182,7 +182,8 @@ main (int argc, const char **argv)
>>  
>>    progname = "genopinit";
>>  
>> -  if (NUM_OPTABS > 0xffff || MAX_MACHINE_MODE >= 0xff)
>> +  if (NUM_OPTABS > 0xffff
>> +    || MAX_MACHINE_MODE >= ((1 << RTX_MACHINE_MODE_BITSIZE) - 1))
>>      fatal ("genopinit range assumptions invalid");
>>  
>>    if (!init_rtx_reader_args_cb (argc, argv, handle_arg)) diff --git 
>> a/gcc/ira-int.h b/gcc/ira-int.h index e2de47213b4..124e14200b1 100644
>> --- a/gcc/ira-int.h
>> +++ b/gcc/ira-int.h
>> @@ -280,11 +280,15 @@ struct ira_allocno
>>    /* Regno for allocno or cap.  */
>>    int regno;
>>    /* Mode of the allocno which is the mode of the corresponding
>> -     pseudo-register.  */
>> -  ENUM_BITFIELD (machine_mode) mode : 8;
>> +     pseudo-register.  Note the bitsize of mode should be exactly
>> +     the same as the definition of rtx_def, aka RTX_MACHINE_MODE_BITSIZE
>> +     in rtl.h.  */
>> +  ENUM_BITFIELD (machine_mode) mode : 16;
>>    /* Widest mode of the allocno which in at least one case could be
>> -     for paradoxical subregs where wmode > mode.  */
>> -  ENUM_BITFIELD (machine_mode) wmode : 8;
>> +     for paradoxical subregs where wmode > mode.  Note the bitsize of
>> +     wmode should be exactly the same as the definition of rtx_def,
>> +     aka RTX_MACHINE_MODE_BITSIZE in rtl.h.  */  ENUM_BITFIELD
>> + (machine_mode) wmode : 16;
>>    /* Register class which should be used for allocation for given
>>       allocno.  NO_REGS means that we should use memory.  */
>>    ENUM_BITFIELD (reg_class) aclass : 16;
>
> Why not use the BITSIZE macros directly?  Any reasonable use of ira-int.h will also need rtl.h.  If something includes ira-int.h before rtl.h then we should change that.
>
> To avoid growing this structure, please move something into one of the holes later in the structure.  E.g. hard_regno could go before num_objects.
>
>> diff --git a/gcc/ree.cc b/gcc/ree.cc
>> index 413aec7c8eb..fb011bc907c 100644
>> --- a/gcc/ree.cc
>> +++ b/gcc/ree.cc
>> @@ -567,7 +567,7 @@ enum ext_modified_kind  struct ATTRIBUTE_PACKED 
>> ext_modified  {
>>    /* Mode from which ree has zero or sign extended the destination.  
>> */
>> -  ENUM_BITFIELD(machine_mode) mode : 8;
>> +  ENUM_BITFIELD(machine_mode) mode : RTX_MACHINE_MODE_BITSIZE;
>>  
>>    /* Kind of modification of the insn.  */
>>    ENUM_BITFIELD(ext_modified_kind) kind : 2; diff --git 
>> a/gcc/rtl-ssa/accesses.h b/gcc/rtl-ssa/accesses.h index 
>> c5180b9308a..2e73004cafa 100644
>> --- a/gcc/rtl-ssa/accesses.h
>> +++ b/gcc/rtl-ssa/accesses.h
>> @@ -253,8 +253,10 @@ private:
>>    // Bits for future expansion.
>>    unsigned int m_spare : 2;
>>  
>> -  // The value returned by the accessor above.
>> -  machine_mode m_mode : 8;
>> +  // The value returned by the accessor above.  Note the bitsize of 
>> + // m_mode should be exactly the same as the definition of rtx_def, 
>> + // aka RTX_MACHINE_MODE_BITSIZE in rtl.h.
>> +  machine_mode m_mode : 16;
>>  };
>>  
>>  // A contiguous array of access_info pointers.  Used to represent a 
>> diff --git a/gcc/rtl.h b/gcc/rtl.h index f634cab730b..a18ecf7632f
>> 100644
>> --- a/gcc/rtl.h
>> +++ b/gcc/rtl.h
>> @@ -63,6 +63,9 @@ enum rtx_code  {
>>  # define NON_GENERATOR_NUM_RTX_CODE ((int) MATCH_OPERAND)  #endif
>>  
>> +#define RTX_CODE_BITSIZE 8
>> +#define RTX_MACHINE_MODE_BITSIZE 16
>> +
>>  /* Register Transfer Language EXPRESSIONS CODE CLASSES */
>>  
>>  enum rtx_class  {
>> @@ -310,10 +313,10 @@ struct GTY((desc("0"), tag("0"),
>>  	    chain_next ("RTX_NEXT (&%h)"),
>>  	    chain_prev ("RTX_PREV (&%h)"))) rtx_def {
>>    /* The kind of expression this is.  */
>> -  ENUM_BITFIELD(rtx_code) code: 16;
>> +  ENUM_BITFIELD(rtx_code) code: RTX_CODE_BITSIZE;
>>  
>>    /* The kind of value the expression has.  */
>> -  ENUM_BITFIELD(machine_mode) mode : 8;
>> +  ENUM_BITFIELD(machine_mode) mode : RTX_MACHINE_MODE_BITSIZE;
>
> Please swap the fields around, as discussed previously.
>
>>  
>>    /* 1 in a MEM if we should keep the alias set for this mem unchanged
>>       when we access a component.
>> @@ -2164,7 +2167,7 @@ subreg_shape::operator != (const subreg_shape
>> &other) const  inline unsigned HOST_WIDE_INT  subreg_shape::unique_id
>> () const  {
>> -  { STATIC_ASSERT (MAX_MACHINE_MODE <= 256); }
>> +  { STATIC_ASSERT (MAX_MACHINE_MODE <= (1 << 
>> + RTX_MACHINE_MODE_BITSIZE)); }
>>    { STATIC_ASSERT (NUM_POLY_INT_COEFFS <= 3); }
>>    { STATIC_ASSERT (sizeof (offset.coeffs[0]) <= 2); }
>>    int res = (int) inner_mode + ((int) outer_mode << 8); diff --git 
>> a/gcc/rtlanal.h b/gcc/rtlanal.h index 5fbed816e20..15aba0dec7a 100644
>> --- a/gcc/rtlanal.h
>> +++ b/gcc/rtlanal.h
>> @@ -99,8 +99,9 @@ public:
>>    unsigned int flags : 16;
>>  
>>    /* The mode of the reference.  If IS_MULTIREG, this is the mode of
>> -     REGNO - MULTIREG_OFFSET.  */
>> -  machine_mode mode : 8;
>> +     REGNO - MULTIREG_OFFSET.  Note the bitsize of mode should be exactly
>> +     the same as the definition of rtx_def,  */  machine_mode mode : 
>> + 16;
>>  
>>    /* If IS_MULTIREG, the offset of REGNO from the start of the register.  */
>>    unsigned int multireg_offset : 8;
>
> Like with ira-int.h, any reasonable use of rtlanal.h will also need rtl.h, so we should be able to use the BITSIZE macro directly.  Please swap #includes around if necessary.
>
>> diff --git a/gcc/tree-core.h b/gcc/tree-core.h index
>> a1aea136e75..001d232f433 100644
>> --- a/gcc/tree-core.h
>> +++ b/gcc/tree-core.h
>> @@ -1680,8 +1680,11 @@ struct GTY(()) tree_type_common {
>>    tree attributes;
>>    unsigned int uid;
>>  
>> +  /* Note the bitsize of wmode should be exactly the same as the
>> +     definition of rtx_def,  aka RTX_MACHINE_MODE_BITSIZE in rtl.h.  
>> + */
>> +  ENUM_BITFIELD(machine_mode) mode : 16;
>> +
>>    unsigned int precision : 16;
>> -  ENUM_BITFIELD(machine_mode) mode : 8;
>>    unsigned lang_flag_0 : 1;
>>    unsigned lang_flag_1 : 1;
>>    unsigned lang_flag_2 : 1;
>> @@ -1712,7 +1715,7 @@ struct GTY(()) tree_type_common {
>>    unsigned empty_flag : 1;
>>    unsigned indivisible_p : 1;
>>    unsigned no_named_args_stdarg_p : 1;
>> -  unsigned spare : 9;
>> +  unsigned spare : 1;
>>  
>>    alias_set_type alias_set;
>>    tree pointer_to;
>> @@ -1770,7 +1773,9 @@ struct GTY(()) tree_decl_common {
>>    struct tree_decl_minimal common;
>>    tree size;
>>  
>> -  ENUM_BITFIELD(machine_mode) mode : 8;
>> +  /* Note the bitsize of wmode should be exactly the same as the
>> +     definition of rtx_def,  aka RTX_MACHINE_MODE_BITSIZE in rtl.h.  
>> + */
>> +  ENUM_BITFIELD(machine_mode) mode : 16;
>>  
>>    unsigned nonlocal_flag : 1;
>>    unsigned virtual_flag : 1;
>
> Please also update:
>
>   /* 13 bits unused.  */
>
> to account for the difference.
>
> Thanks,
> Richard

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH v2] Machine_Mode: Extend machine_mode from 8 to 16 bits
       [not found] <Message-Id: <20230512050016.476110-1-pan2.li@intel.com>
@ 2023-05-12 15:38 ` pan2.li
  2023-05-13 13:13 ` [PATCH v3] " pan2.li
  1 sibling, 0 replies; 33+ messages in thread
From: pan2.li @ 2023-05-12 15:38 UTC (permalink / raw)
  To: gcc-patches
  Cc: juzhe.zhong, kito.cheng, pan2.li, yanzhang.wang, jeffreyalaw,
	rguenther, richard.sandiford

From: Pan Li <pan2.li@intel.com>

We are running out of the machine_mode(8 bits) in RISC-V backend. Thus
we would like to extend the machine_mode bit size from 8 to 16 bits.
However, it is sensitive to extend the memory size in common structure
like tree or rtx. This patch would like to extend the machine_mode bits
to 16 bits by shrinking, like:

* Swap the bit size of code and machine code in rtx_def.
* Reconcile the machine_mode location and spare in tree.

The memory impact of this patch for correlated structure looks like below:

+-------------------+----------+---------+------+
| struct/bytes      | upstream | patched | diff |
+-------------------+----------+---------+------+
| rtx_obj_reference |        8 |      12 |   +4 |
| ext_modified      |        2 |       4 |   +2 |
| ira_allocno       |      192 |     184 |   -8 |
| qty_table_elem    |       40 |      40 |    0 |
| reg_stat_type     |       64 |      64 |    0 |
| rtx_def           |       40 |      40 |    0 |
| table_elt         |       80 |      80 |    0 |
| tree_decl_common  |      112 |     112 |    0 |
| tree_type_common  |      128 |     128 |    0 |
+-------------------+----------+---------+------+

The tree and rtx related struct has no memory changes after this patch,
and the machine_mode changes to 16 bits already.

Signed-off-by: Pan Li <pan2.li@intel.com>
Co-authored-by: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
Co-authored-by: Kito Cheng <kito.cheng@sifive.com>
Co-Authored-By: Richard Biener <rguenther@suse.de>
Co-Authored-By: Richard Sandiford <richard.sandiford@arm.com>

gcc/ChangeLog:

	* combine.cc (struct reg_stat_type): Extended machine_mode to 16 bits.
	* cse.cc (struct qty_table_elem): Extended machine_mode to 16 bits
	and re-ordered the struct fields for alignment.
	(struct table_elt): Extended machine_mode to 16 bits.
	(struct set): Ditto.
	* genopinit.cc (main): Reconciled the machine_mode limit.
	* ira-int.h (struct ira_allocno): Extended machine_mode to 16 bits.
	re-ordered the struct fields for padding.
	* machmode.h (MACHINE_MODE_BITSIZE): New macro.
	* ree.cc (struct ext_modified): Extended machine_mode to 16 bits and
	removed the ATTRIBUTE_PACKED.
	* rtl-ssa/accesses.h: Extended machine_mode to 16 bits.
	* rtl.h (RTX_CODE_BITSIZE): New macro.
	(struct rtx_def): Swap both the bit size and location between the
	rtx_code and the machine_mode.
	(subreg_shape::unique_id): Reconciled the machine_mode limit.
	* rtlanal.h: Extended machine_mode to 16 bits.
	* tree-core.h (struct tree_type_common): Extended machine_mode to 16
	bits and re-ordered the struct fields for padding.
	(struct tree_decl_common): Extended machine_mode to 16 bits.
---
 gcc/combine.cc         |  4 +--
 gcc/cse.cc             | 16 ++++--------
 gcc/genopinit.cc       |  3 ++-
 gcc/ira-int.h          | 56 +++++++++++++++++++++---------------------
 gcc/machmode.h         |  2 ++
 gcc/ree.cc             |  4 +--
 gcc/rtl-ssa/accesses.h |  2 +-
 gcc/rtl.h              | 12 +++++----
 gcc/rtlanal.h          |  2 +-
 gcc/tree-core.h        |  9 ++++---
 10 files changed, 55 insertions(+), 55 deletions(-)

diff --git a/gcc/combine.cc b/gcc/combine.cc
index 5aa0ec5c45a..a23caeed96f 100644
--- a/gcc/combine.cc
+++ b/gcc/combine.cc
@@ -200,7 +200,7 @@ struct reg_stat_type {
 
   unsigned HOST_WIDE_INT	last_set_nonzero_bits;
   char				last_set_sign_bit_copies;
-  ENUM_BITFIELD(machine_mode)	last_set_mode : 8;
+  ENUM_BITFIELD(machine_mode)	last_set_mode : MACHINE_MODE_BITSIZE;
 
   /* Set nonzero if references to register n in expressions should not be
      used.  last_set_invalid is set nonzero when this register is being
@@ -235,7 +235,7 @@ struct reg_stat_type {
      truncation if we know that value already contains a truncated
      value.  */
 
-  ENUM_BITFIELD(machine_mode)	truncated_to_mode : 8;
+  ENUM_BITFIELD(machine_mode)	truncated_to_mode : MACHINE_MODE_BITSIZE;
 };
 
 
diff --git a/gcc/cse.cc b/gcc/cse.cc
index b10c9b0c94d..86403b95938 100644
--- a/gcc/cse.cc
+++ b/gcc/cse.cc
@@ -248,10 +248,8 @@ struct qty_table_elem
   rtx comparison_const;
   int comparison_qty;
   unsigned int first_reg, last_reg;
-  /* The sizes of these fields should match the sizes of the
-     code and mode fields of struct rtx_def (see rtl.h).  */
-  ENUM_BITFIELD(rtx_code) comparison_code : 16;
-  ENUM_BITFIELD(machine_mode) mode : 8;
+  ENUM_BITFIELD(machine_mode) mode : MACHINE_MODE_BITSIZE;
+  ENUM_BITFIELD(rtx_code) comparison_code : RTX_CODE_BITSIZE;
 };
 
 /* The table of all qtys, indexed by qty number.  */
@@ -404,9 +402,7 @@ struct table_elt
   struct table_elt *related_value;
   int cost;
   int regcost;
-  /* The size of this field should match the size
-     of the mode field of struct rtx_def (see rtl.h).  */
-  ENUM_BITFIELD(machine_mode) mode : 8;
+  ENUM_BITFIELD(machine_mode) mode : MACHINE_MODE_BITSIZE;
   char in_memory;
   char is_const;
   char flag;
@@ -4152,10 +4148,8 @@ struct set
   /* Nonzero if the SET_SRC contains something
      whose value cannot be predicted and understood.  */
   char src_volatile;
-  /* Original machine mode, in case it becomes a CONST_INT.
-     The size of this field should match the size of the mode
-     field of struct rtx_def (see rtl.h).  */
-  ENUM_BITFIELD(machine_mode) mode : 8;
+  /* Original machine mode, in case it becomes a CONST_INT.  */
+  ENUM_BITFIELD(machine_mode) mode : MACHINE_MODE_BITSIZE;
   /* Hash value of constant equivalent for SET_SRC.  */
   unsigned src_const_hash;
   /* A constant equivalent for SET_SRC, if any.  */
diff --git a/gcc/genopinit.cc b/gcc/genopinit.cc
index 83cb7504fa1..0c1b6859ca0 100644
--- a/gcc/genopinit.cc
+++ b/gcc/genopinit.cc
@@ -182,7 +182,8 @@ main (int argc, const char **argv)
 
   progname = "genopinit";
 
-  if (NUM_OPTABS > 0xffff || MAX_MACHINE_MODE >= 0xff)
+  if (NUM_OPTABS > 0xffff
+    || MAX_MACHINE_MODE >= ((1 << MACHINE_MODE_BITSIZE) - 1))
     fatal ("genopinit range assumptions invalid");
 
   if (!init_rtx_reader_args_cb (argc, argv, handle_arg))
diff --git a/gcc/ira-int.h b/gcc/ira-int.h
index e2de47213b4..e7460cfd906 100644
--- a/gcc/ira-int.h
+++ b/gcc/ira-int.h
@@ -281,13 +281,20 @@ struct ira_allocno
   int regno;
   /* Mode of the allocno which is the mode of the corresponding
      pseudo-register.  */
-  ENUM_BITFIELD (machine_mode) mode : 8;
+  ENUM_BITFIELD (machine_mode) mode : MACHINE_MODE_BITSIZE;
   /* Widest mode of the allocno which in at least one case could be
      for paradoxical subregs where wmode > mode.  */
-  ENUM_BITFIELD (machine_mode) wmode : 8;
+  ENUM_BITFIELD (machine_mode) wmode : MACHINE_MODE_BITSIZE;
   /* Register class which should be used for allocation for given
      allocno.  NO_REGS means that we should use memory.  */
   ENUM_BITFIELD (reg_class) aclass : 16;
+  /* Hard register assigned to given allocno.  Negative value means
+     that memory was allocated to the allocno.  During the reload,
+     spilled allocno has value equal to the corresponding stack slot
+     number (0, ...) - 2.  Value -1 is used for allocnos spilled by the
+     reload (at this point pseudo-register has only one allocno) which
+     did not get stack slot yet.  */
+  signed int hard_regno : 16;
   /* A bitmask of the ABIs used by calls that occur while the allocno
      is live.  */
   unsigned int crossed_calls_abis : NUM_ABI_IDS;
@@ -321,22 +328,6 @@ struct ira_allocno
 
      This is only ever true for non-cap allocnos.  */
   unsigned int might_conflict_with_parent_p : 1;
-  /* Hard register assigned to given allocno.  Negative value means
-     that memory was allocated to the allocno.  During the reload,
-     spilled allocno has value equal to the corresponding stack slot
-     number (0, ...) - 2.  Value -1 is used for allocnos spilled by the
-     reload (at this point pseudo-register has only one allocno) which
-     did not get stack slot yet.  */
-  signed int hard_regno : 16;
-  /* Allocnos with the same regno are linked by the following member.
-     Allocnos corresponding to inner loops are first in the list (it
-     corresponds to depth-first traverse of the loops).  */
-  ira_allocno_t next_regno_allocno;
-  /* There may be different allocnos with the same regno in different
-     regions.  Allocnos are bound to the corresponding loop tree node.
-     Pseudo-register may have only one regular allocno with given loop
-     tree node but more than one cap (see comments above).  */
-  ira_loop_tree_node_t loop_tree_node;
   /* Accumulated usage references of the allocno.  Here and below,
      word 'accumulated' means info for given region and all nested
      subregions.  In this case, 'accumulated' means sum of references
@@ -362,6 +353,25 @@ struct ira_allocno
      register class living at the point than number of hard-registers
      of the class available for the allocation.  */
   int excess_pressure_points_num;
+  /* The number of objects tracked in the following array.  */
+  int num_objects;
+  /* Accumulated frequency of calls which given allocno
+     intersects.  */
+  int call_freq;
+  /* Accumulated number of the intersected calls.  */
+  int calls_crossed_num;
+  /* The number of calls across which it is live, but which should not
+     affect register preferences.  */
+  int cheap_calls_crossed_num;
+  /* Allocnos with the same regno are linked by the following member.
+     Allocnos corresponding to inner loops are first in the list (it
+     corresponds to depth-first traverse of the loops).  */
+  ira_allocno_t next_regno_allocno;
+  /* There may be different allocnos with the same regno in different
+     regions.  Allocnos are bound to the corresponding loop tree node.
+     Pseudo-register may have only one regular allocno with given loop
+     tree node but more than one cap (see comments above).  */
+  ira_loop_tree_node_t loop_tree_node;
   /* Allocno hard reg preferences.  */
   ira_pref_t allocno_prefs;
   /* Copies to other non-conflicting allocnos.  The copies can
@@ -374,21 +384,11 @@ struct ira_allocno
   /* It is a link to allocno (cap) on lower loop level represented by
      given cap.  Null if given allocno is not a cap.  */
   ira_allocno_t cap_member;
-  /* The number of objects tracked in the following array.  */
-  int num_objects;
   /* An array of structures describing conflict information and live
      ranges for each object associated with the allocno.  There may be
      more than one such object in cases where the allocno represents a
      multi-word register.  */
   ira_object_t objects[2];
-  /* Accumulated frequency of calls which given allocno
-     intersects.  */
-  int call_freq;
-  /* Accumulated number of the intersected calls.  */
-  int calls_crossed_num;
-  /* The number of calls across which it is live, but which should not
-     affect register preferences.  */
-  int cheap_calls_crossed_num;
   /* Registers clobbered by intersected calls.  */
    HARD_REG_SET crossed_calls_clobbered_regs;
   /* Array of usage costs (accumulated and the one updated during
diff --git a/gcc/machmode.h b/gcc/machmode.h
index f1865c1ef42..965e40f2799 100644
--- a/gcc/machmode.h
+++ b/gcc/machmode.h
@@ -242,6 +242,8 @@ extern const unsigned char mode_class[NUM_MACHINE_MODES];
    || CLASS == MODE_ACCUM                      \
    || CLASS == MODE_UACCUM)
 
+#define MACHINE_MODE_BITSIZE 16
+
 /* An optional T (i.e. a T or nothing), where T is some form of mode class.  */
 template<typename T>
 class opt_mode
diff --git a/gcc/ree.cc b/gcc/ree.cc
index 413aec7c8eb..fc04249fa84 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -564,10 +564,10 @@ enum ext_modified_kind
   EXT_MODIFIED_SEXT
 };
 
-struct ATTRIBUTE_PACKED ext_modified
+struct ext_modified
 {
   /* Mode from which ree has zero or sign extended the destination.  */
-  ENUM_BITFIELD(machine_mode) mode : 8;
+  ENUM_BITFIELD(machine_mode) mode : MACHINE_MODE_BITSIZE;
 
   /* Kind of modification of the insn.  */
   ENUM_BITFIELD(ext_modified_kind) kind : 2;
diff --git a/gcc/rtl-ssa/accesses.h b/gcc/rtl-ssa/accesses.h
index c5180b9308a..38b4d6160c2 100644
--- a/gcc/rtl-ssa/accesses.h
+++ b/gcc/rtl-ssa/accesses.h
@@ -254,7 +254,7 @@ private:
   unsigned int m_spare : 2;
 
   // The value returned by the accessor above.
-  machine_mode m_mode : 8;
+  machine_mode m_mode : MACHINE_MODE_BITSIZE;
 };
 
 // A contiguous array of access_info pointers.  Used to represent a
diff --git a/gcc/rtl.h b/gcc/rtl.h
index f634cab730b..364782b6060 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -63,6 +63,8 @@ enum rtx_code  {
 # define NON_GENERATOR_NUM_RTX_CODE ((int) MATCH_OPERAND)
 #endif
 
+#define RTX_CODE_BITSIZE 8
+
 /* Register Transfer Language EXPRESSIONS CODE CLASSES */
 
 enum rtx_class  {
@@ -309,11 +311,11 @@ struct GTY((variable_size)) const_poly_int_def {
 struct GTY((desc("0"), tag("0"),
 	    chain_next ("RTX_NEXT (&%h)"),
 	    chain_prev ("RTX_PREV (&%h)"))) rtx_def {
-  /* The kind of expression this is.  */
-  ENUM_BITFIELD(rtx_code) code: 16;
-
   /* The kind of value the expression has.  */
-  ENUM_BITFIELD(machine_mode) mode : 8;
+  ENUM_BITFIELD(machine_mode) mode : MACHINE_MODE_BITSIZE;
+
+  /* The kind of expression this is.  */
+  ENUM_BITFIELD(rtx_code) code: RTX_CODE_BITSIZE;
 
   /* 1 in a MEM if we should keep the alias set for this mem unchanged
      when we access a component.
@@ -2164,7 +2166,7 @@ subreg_shape::operator != (const subreg_shape &other) const
 inline unsigned HOST_WIDE_INT
 subreg_shape::unique_id () const
 {
-  { STATIC_ASSERT (MAX_MACHINE_MODE <= 256); }
+  { STATIC_ASSERT (MAX_MACHINE_MODE <= (1 << MACHINE_MODE_BITSIZE)); }
   { STATIC_ASSERT (NUM_POLY_INT_COEFFS <= 3); }
   { STATIC_ASSERT (sizeof (offset.coeffs[0]) <= 2); }
   int res = (int) inner_mode + ((int) outer_mode << 8);
diff --git a/gcc/rtlanal.h b/gcc/rtlanal.h
index 5fbed816e20..9013e75c04b 100644
--- a/gcc/rtlanal.h
+++ b/gcc/rtlanal.h
@@ -100,7 +100,7 @@ public:
 
   /* The mode of the reference.  If IS_MULTIREG, this is the mode of
      REGNO - MULTIREG_OFFSET.  */
-  machine_mode mode : 8;
+  machine_mode mode : MACHINE_MODE_BITSIZE;
 
   /* If IS_MULTIREG, the offset of REGNO from the start of the register.  */
   unsigned int multireg_offset : 8;
diff --git a/gcc/tree-core.h b/gcc/tree-core.h
index a1aea136e75..9d44c04bf03 100644
--- a/gcc/tree-core.h
+++ b/gcc/tree-core.h
@@ -1680,8 +1680,9 @@ struct GTY(()) tree_type_common {
   tree attributes;
   unsigned int uid;
 
+  ENUM_BITFIELD(machine_mode) mode : MACHINE_MODE_BITSIZE;
+
   unsigned int precision : 16;
-  ENUM_BITFIELD(machine_mode) mode : 8;
   unsigned lang_flag_0 : 1;
   unsigned lang_flag_1 : 1;
   unsigned lang_flag_2 : 1;
@@ -1712,7 +1713,7 @@ struct GTY(()) tree_type_common {
   unsigned empty_flag : 1;
   unsigned indivisible_p : 1;
   unsigned no_named_args_stdarg_p : 1;
-  unsigned spare : 9;
+  unsigned spare : 1;
 
   alias_set_type alias_set;
   tree pointer_to;
@@ -1770,7 +1771,7 @@ struct GTY(()) tree_decl_common {
   struct tree_decl_minimal common;
   tree size;
 
-  ENUM_BITFIELD(machine_mode) mode : 8;
+  ENUM_BITFIELD(machine_mode) mode : MACHINE_MODE_BITSIZE;
 
   unsigned nonlocal_flag : 1;
   unsigned virtual_flag : 1;
@@ -1828,7 +1829,7 @@ struct GTY(()) tree_decl_common {
   /* In FIELD_DECL, this is DECL_NOT_FLEXARRAY.  */
   unsigned int decl_not_flexarray : 1;
 
-  /* 13 bits unused.  */
+  /* 5 bits unused.  */
 
   /* UID for points-to sets, stable over copying from inlining.  */
   unsigned int pt_uid;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Machine_Mode: Extend machine_mode from 8 to 16 bits
  2023-05-12  6:49 ` Richard Biener
@ 2023-05-12 19:14   ` Bernhard Reutner-Fischer
  2023-05-13  8:44     ` Kito Cheng
  0 siblings, 1 reply; 33+ messages in thread
From: Bernhard Reutner-Fischer @ 2023-05-12 19:14 UTC (permalink / raw)
  To: Richard Biener, Richard Biener via Gcc-patches, pan2.li
  Cc: gcc-patches, juzhe.zhong, kito.cheng, yanzhang.wang, jeffreyalaw,
	richard.sandiford

On 12 May 2023 08:49:53 CEST, Richard Biener via Gcc-patches <gcc-patches@gcc.gnu.org> wrote:

>> gcc/ChangeLog:
>> 
>> 	* combine.cc (struct reg_stat_type): Extended machine mode to 16 bits.
>> 	* cse.cc (struct qty_table_elem): Ditto.
>> 	(struct table_elt): Ditto.
>> 	(struct set): Ditto.
>> 	* genopinit.cc (main): Reconciled the machine mode limit.
>> 	* ira-int.h (struct ira_allocno): Extended machine mode to 16 bits.
>> 	* ree.cc (struct ATTRIBUTE_PACKED): Ditto.
>
>please go over the ChangeLog and properly specify the structure types
>altered.  The script generating the changelog isn't perfect.

And a nitpick, please use present tense in the ChangeLog: Extend, etc
And I wouldn't have said reconcile.

thanks,

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Machine_Mode: Extend machine_mode from 8 to 16 bits
  2023-05-12 19:14   ` Bernhard Reutner-Fischer
@ 2023-05-13  8:44     ` Kito Cheng
  2023-05-13 12:26       ` Li, Pan2
  2023-06-30 11:46       ` Adjust LTO mode tables for "Machine_Mode: Extend machine_mode from 8 to 16 bits" (was: [PATCH] Machine_Mode: Extend machine_mode from 8 to 16 bits) Thomas Schwinge
  0 siblings, 2 replies; 33+ messages in thread
From: Kito Cheng @ 2023-05-13  8:44 UTC (permalink / raw)
  To: Bernhard Reutner-Fischer
  Cc: Richard Biener, Richard Biener via Gcc-patches, pan2.li,
	juzhe.zhong, yanzhang.wang, jeffreyalaw, richard.sandiford

Hi Pan:

Tried this patch and I ran into some issues, some variables are using
unsigned char to hold machine mode and will have problems when the
number of modes is larger than 255...

And here is the fix:


diff --git a/gcc/genmodes.cc b/gcc/genmodes.cc
index 715787b8f483..55ac2adb5596 100644
--- a/gcc/genmodes.cc
+++ b/gcc/genmodes.cc
@@ -1141,10 +1141,10 @@ inline __attribute__((__always_inline__))\n\
#else\n\
extern __inline__ __attribute__((__always_inline__, __gnu_inline__))\n\
#endif\n\
-unsigned char\n\
+unsigned short\n\
mode_inner_inline (machine_mode mode)\n\
{\n\
-  extern const unsigned char mode_inner[NUM_MACHINE_MODES];\n\
+  extern const unsigned short mode_inner[NUM_MACHINE_MODES];\n\
  gcc_assert (mode >= 0 && mode < NUM_MACHINE_MODES);\n\
  switch (mode)\n\
    {");
@@ -1529,7 +1529,7 @@ emit_mode_wider (void)
  int c;
  struct mode_data *m;

-  print_decl ("unsigned char", "mode_next", "NUM_MACHINE_MODES");
+  print_decl ("unsigned short", "mode_next", "NUM_MACHINE_MODES");

  for_all_modes (c, m)
    tagged_printf ("E_%smode",
@@ -1537,7 +1537,7 @@ emit_mode_wider (void)
                  m->name);

  print_closer ();
-  print_decl ("unsigned char", "mode_wider", "NUM_MACHINE_MODES");
+  print_decl ("unsigned short", "mode_wider", "NUM_MACHINE_MODES");

  for_all_modes (c, m)
    {
@@ -1568,7 +1568,7 @@ emit_mode_wider (void)
    }

  print_closer ();
-  print_decl ("unsigned char", "mode_2xwider", "NUM_MACHINE_MODES");
+  print_decl ("unsigned short", "mode_2xwider", "NUM_MACHINE_MODES");

  for_all_modes (c, m)
    {
@@ -1625,7 +1625,7 @@ emit_mode_complex (void)
  int c;
  struct mode_data *m;

-  print_decl ("unsigned char", "mode_complex", "NUM_MACHINE_MODES");
+  print_decl ("unsigned short", "mode_complex", "NUM_MACHINE_MODES");

  for_all_modes (c, m)
    tagged_printf ("E_%smode",
@@ -1665,7 +1665,7 @@ emit_mode_inner (void)
  int c;
  struct mode_data *m;

-  print_decl ("unsigned char", "mode_inner", "NUM_MACHINE_MODES");
+  print_decl ("unsigned short", "mode_inner", "NUM_MACHINE_MODES");

  for_all_modes (c, m)
    tagged_printf ("E_%smode",
@@ -1738,7 +1738,7 @@ emit_class_narrowest_mode (void)
{
  int c;

-  print_decl ("unsigned char", "class_narrowest_mode", "MAX_MODE_CLASS");
+  print_decl ("unsigned short", "class_narrowest_mode", "MAX_MODE_CLASS");

  for (c = 0; c < MAX_MODE_CLASS; c++)
    {
diff --git a/gcc/machmode.h b/gcc/machmode.h
index f1865c1ef425..a168d6f0da2e 100644
--- a/gcc/machmode.h
+++ b/gcc/machmode.h
@@ -24,13 +24,13 @@ typedef opt_mode<machine_mode> opt_machine_mode;

extern CONST_MODE_SIZE poly_uint16_pod mode_size[NUM_MACHINE_MODES];
extern CONST_MODE_PRECISION poly_uint16_pod mode_precision[NUM_MACHINE_MODES];
-extern const unsigned char mode_inner[NUM_MACHINE_MODES];
+extern const unsigned short mode_inner[NUM_MACHINE_MODES];
extern CONST_MODE_NUNITS poly_uint16_pod mode_nunits[NUM_MACHINE_MODES];
extern CONST_MODE_UNIT_SIZE unsigned char mode_unit_size[NUM_MACHINE_MODES];
extern const unsigned short mode_unit_precision[NUM_MACHINE_MODES];
-extern const unsigned char mode_next[NUM_MACHINE_MODES];
-extern const unsigned char mode_wider[NUM_MACHINE_MODES];
-extern const unsigned char mode_2xwider[NUM_MACHINE_MODES];
+extern const unsigned short mode_next[NUM_MACHINE_MODES];
+extern const unsigned short mode_wider[NUM_MACHINE_MODES];
+extern const unsigned short mode_2xwider[NUM_MACHINE_MODES];

template<typename T>
struct mode_traits
@@ -797,7 +797,7 @@ GET_MODE_2XWIDER_MODE (const T &m)
}

/* Get the complex mode from the component mode.  */
-extern const unsigned char mode_complex[NUM_MACHINE_MODES];
+extern const unsigned short mode_complex[NUM_MACHINE_MODES];
#define GET_MODE_COMPLEX_MODE(MODE) ((machine_mode) mode_complex[MODE])

/* Represents a machine mode that must have a fixed size.  The main
@@ -946,7 +946,7 @@ extern unsigned get_mode_alignment (machine_mode);

/* For each class, get the narrowest mode in that class.  */

-extern const unsigned char class_narrowest_mode[MAX_MODE_CLASS];
+extern const unsigned short class_narrowest_mode[MAX_MODE_CLASS];
#define GET_CLASS_NARROWEST_MODE(CLASS) \
  ((machine_mode) class_narrowest_mode[CLASS])

--
2.39.2

^ permalink raw reply	[flat|nested] 33+ messages in thread

* RE: [PATCH] Machine_Mode: Extend machine_mode from 8 to 16 bits
  2023-05-13  8:44     ` Kito Cheng
@ 2023-05-13 12:26       ` Li, Pan2
  2023-06-30 11:46       ` Adjust LTO mode tables for "Machine_Mode: Extend machine_mode from 8 to 16 bits" (was: [PATCH] Machine_Mode: Extend machine_mode from 8 to 16 bits) Thomas Schwinge
  1 sibling, 0 replies; 33+ messages in thread
From: Li, Pan2 @ 2023-05-13 12:26 UTC (permalink / raw)
  To: Kito Cheng, Bernhard Reutner-Fischer
  Cc: Richard Biener, Richard Biener via Gcc-patches, juzhe.zhong,
	Wang, Yanzhang, jeffreyalaw, richard.sandiford

Oops, looks missed this part when I search all machine_mode by word. Thanks kito for catching this.

It seems there is no easy way to remind the developer to change (for example 16 to 32 bits) the below part in future, how about add some comments to MACHINE_MODE_BITSIZE for this?

Pan

-----Original Message-----
From: Kito Cheng <kito.cheng@sifive.com> 
Sent: Saturday, May 13, 2023 4:45 PM
To: Bernhard Reutner-Fischer <rep.dot.nop@gmail.com>
Cc: Richard Biener <rguenther@suse.de>; Richard Biener via Gcc-patches <gcc-patches@gcc.gnu.org>; Li, Pan2 <pan2.li@intel.com>; juzhe.zhong@rivai.ai; Wang, Yanzhang <yanzhang.wang@intel.com>; jeffreyalaw@gmail.com; richard.sandiford@arm.com
Subject: Re: [PATCH] Machine_Mode: Extend machine_mode from 8 to 16 bits

Hi Pan:

Tried this patch and I ran into some issues, some variables are using unsigned char to hold machine mode and will have problems when the number of modes is larger than 255...

And here is the fix:


diff --git a/gcc/genmodes.cc b/gcc/genmodes.cc index 715787b8f483..55ac2adb5596 100644
--- a/gcc/genmodes.cc
+++ b/gcc/genmodes.cc
@@ -1141,10 +1141,10 @@ inline __attribute__((__always_inline__))\n\
#else\n\
extern __inline__ __attribute__((__always_inline__, __gnu_inline__))\n\ #endif\n\ -unsigned char\n\
+unsigned short\n\
mode_inner_inline (machine_mode mode)\n\ {\n\
-  extern const unsigned char mode_inner[NUM_MACHINE_MODES];\n\
+  extern const unsigned short mode_inner[NUM_MACHINE_MODES];\n\
  gcc_assert (mode >= 0 && mode < NUM_MACHINE_MODES);\n\
  switch (mode)\n\
    {");
@@ -1529,7 +1529,7 @@ emit_mode_wider (void)
  int c;
  struct mode_data *m;

-  print_decl ("unsigned char", "mode_next", "NUM_MACHINE_MODES");
+  print_decl ("unsigned short", "mode_next", "NUM_MACHINE_MODES");

  for_all_modes (c, m)
    tagged_printf ("E_%smode",
@@ -1537,7 +1537,7 @@ emit_mode_wider (void)
                  m->name);

  print_closer ();
-  print_decl ("unsigned char", "mode_wider", "NUM_MACHINE_MODES");
+  print_decl ("unsigned short", "mode_wider", "NUM_MACHINE_MODES");

  for_all_modes (c, m)
    {
@@ -1568,7 +1568,7 @@ emit_mode_wider (void)
    }

  print_closer ();
-  print_decl ("unsigned char", "mode_2xwider", "NUM_MACHINE_MODES");
+  print_decl ("unsigned short", "mode_2xwider", "NUM_MACHINE_MODES");

  for_all_modes (c, m)
    {
@@ -1625,7 +1625,7 @@ emit_mode_complex (void)
  int c;
  struct mode_data *m;

-  print_decl ("unsigned char", "mode_complex", "NUM_MACHINE_MODES");
+  print_decl ("unsigned short", "mode_complex", "NUM_MACHINE_MODES");

  for_all_modes (c, m)
    tagged_printf ("E_%smode",
@@ -1665,7 +1665,7 @@ emit_mode_inner (void)
  int c;
  struct mode_data *m;

-  print_decl ("unsigned char", "mode_inner", "NUM_MACHINE_MODES");
+  print_decl ("unsigned short", "mode_inner", "NUM_MACHINE_MODES");

  for_all_modes (c, m)
    tagged_printf ("E_%smode",
@@ -1738,7 +1738,7 @@ emit_class_narrowest_mode (void) {
  int c;

-  print_decl ("unsigned char", "class_narrowest_mode", "MAX_MODE_CLASS");
+  print_decl ("unsigned short", "class_narrowest_mode", 
+ "MAX_MODE_CLASS");

  for (c = 0; c < MAX_MODE_CLASS; c++)
    {
diff --git a/gcc/machmode.h b/gcc/machmode.h index f1865c1ef425..a168d6f0da2e 100644
--- a/gcc/machmode.h
+++ b/gcc/machmode.h
@@ -24,13 +24,13 @@ typedef opt_mode<machine_mode> opt_machine_mode;

extern CONST_MODE_SIZE poly_uint16_pod mode_size[NUM_MACHINE_MODES]; extern CONST_MODE_PRECISION poly_uint16_pod mode_precision[NUM_MACHINE_MODES];
-extern const unsigned char mode_inner[NUM_MACHINE_MODES];
+extern const unsigned short mode_inner[NUM_MACHINE_MODES];
extern CONST_MODE_NUNITS poly_uint16_pod mode_nunits[NUM_MACHINE_MODES]; extern CONST_MODE_UNIT_SIZE unsigned char mode_unit_size[NUM_MACHINE_MODES];
extern const unsigned short mode_unit_precision[NUM_MACHINE_MODES];
-extern const unsigned char mode_next[NUM_MACHINE_MODES]; -extern const unsigned char mode_wider[NUM_MACHINE_MODES]; -extern const unsigned char mode_2xwider[NUM_MACHINE_MODES];
+extern const unsigned short mode_next[NUM_MACHINE_MODES]; extern const 
+unsigned short mode_wider[NUM_MACHINE_MODES]; extern const unsigned 
+short mode_2xwider[NUM_MACHINE_MODES];

template<typename T>
struct mode_traits
@@ -797,7 +797,7 @@ GET_MODE_2XWIDER_MODE (const T &m) }

/* Get the complex mode from the component mode.  */ -extern const unsigned char mode_complex[NUM_MACHINE_MODES];
+extern const unsigned short mode_complex[NUM_MACHINE_MODES];
#define GET_MODE_COMPLEX_MODE(MODE) ((machine_mode) mode_complex[MODE])

/* Represents a machine mode that must have a fixed size.  The main @@ -946,7 +946,7 @@ extern unsigned get_mode_alignment (machine_mode);

/* For each class, get the narrowest mode in that class.  */

-extern const unsigned char class_narrowest_mode[MAX_MODE_CLASS];
+extern const unsigned short class_narrowest_mode[MAX_MODE_CLASS];
#define GET_CLASS_NARROWEST_MODE(CLASS) \
  ((machine_mode) class_narrowest_mode[CLASS])

--
2.39.2

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH v3] Machine_Mode: Extend machine_mode from 8 to 16 bits
       [not found] <Message-Id: <20230512050016.476110-1-pan2.li@intel.com>
  2023-05-12 15:38 ` [PATCH v2] " pan2.li
@ 2023-05-13 13:13 ` pan2.li
  2023-05-16  1:12   ` Li, Pan2
  2023-05-16  9:09   ` Richard Sandiford
  1 sibling, 2 replies; 33+ messages in thread
From: pan2.li @ 2023-05-13 13:13 UTC (permalink / raw)
  To: gcc-patches
  Cc: juzhe.zhong, kito.cheng, pan2.li, yanzhang.wang, jeffreyalaw,
	rguenther, richard.sandiford

From: Pan Li <pan2.li@intel.com>

We are running out of the machine_mode(8 bits) in RISC-V backend. Thus
we would like to extend the machine_mode bit size from 8 to 16 bits.
However, it is sensitive to extend the memory size in common structure
like tree or rtx. This patch would like to extend the machine_mode bits
to 16 bits by shrinking, like:

* Swap the bit size of code and machine code in rtx_def.
* Adjust the machine_mode location and spare in tree.

The memory impact of this patch for correlated structure looks like below:

+-------------------+----------+---------+------+
| struct/bytes      | upstream | patched | diff |
+-------------------+----------+---------+------+
| rtx_obj_reference |        8 |      12 |   +4 |
| ext_modified      |        2 |       4 |   +2 |
| ira_allocno       |      192 |     184 |   -8 |
| qty_table_elem    |       40 |      40 |    0 |
| reg_stat_type     |       64 |      64 |    0 |
| rtx_def           |       40 |      40 |    0 |
| table_elt         |       80 |      80 |    0 |
| tree_decl_common  |      112 |     112 |    0 |
| tree_type_common  |      128 |     128 |    0 |
+-------------------+----------+---------+------+

The tree and rtx related struct has no memory changes after this patch,
and the machine_mode changes to 16 bits already.

Signed-off-by: Pan Li <pan2.li@intel.com>
Co-authored-by: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
Co-authored-by: Kito Cheng <kito.cheng@sifive.com>
Co-Authored-By: Richard Biener <rguenther@suse.de>
Co-Authored-By: Richard Sandiford <richard.sandiford@arm.com>

gcc/ChangeLog:

	* combine.cc (struct reg_stat_type): Extend machine_mode to 16 bits.
	* cse.cc (struct qty_table_elem): Extend machine_mode to 16 bits
	(struct table_elt): Extend machine_mode to 16 bits.
	(struct set): Ditto.
	* genmodes.cc (emit_mode_wider): Extend type from char to short.
	(emit_mode_complex): Ditto.
	(emit_mode_inner): Ditto.
	(emit_class_narrowest_mode): Ditto.
	* genopinit.cc (main): Extend the machine_mode limit.
	* ira-int.h (struct ira_allocno): Extend machine_mode to 16 bits and
	re-ordered the struct fields for padding.
	* machmode.h (MACHINE_MODE_BITSIZE): New macro.
	(GET_MODE_2XWIDER_MODE): Extend type from char to short.
	(get_mode_alignment): Extend type from char to short.
	* ree.cc (struct ext_modified): Extend machine_mode to 16 bits and
	removed the ATTRIBUTE_PACKED.
	* rtl-ssa/accesses.h: Extend machine_mode to 16 bits.
	* rtl.h (RTX_CODE_BITSIZE): New macro.
	(struct rtx_def): Swap both the bit size and location between the
	rtx_code and the machine_mode.
	(subreg_shape::unique_id): Extend the machine_mode limit.
	* rtlanal.h: Extend machine_mode to 16 bits.
	* tree-core.h (struct tree_type_common): Extend machine_mode to 16
	bits and re-ordered the struct fields for padding.
	(struct tree_decl_common): Extend machine_mode to 16 bits.
---
 gcc/combine.cc         |  4 +--
 gcc/cse.cc             | 16 ++++--------
 gcc/genmodes.cc        | 16 ++++++------
 gcc/genopinit.cc       |  3 ++-
 gcc/ira-int.h          | 56 +++++++++++++++++++++---------------------
 gcc/machmode.h         | 27 +++++++++++++++-----
 gcc/ree.cc             |  4 +--
 gcc/rtl-ssa/accesses.h |  2 +-
 gcc/rtl.h              | 12 +++++----
 gcc/rtlanal.h          |  2 +-
 gcc/tree-core.h        |  9 ++++---
 11 files changed, 82 insertions(+), 69 deletions(-)

diff --git a/gcc/combine.cc b/gcc/combine.cc
index 5aa0ec5c45a..a23caeed96f 100644
--- a/gcc/combine.cc
+++ b/gcc/combine.cc
@@ -200,7 +200,7 @@ struct reg_stat_type {
 
   unsigned HOST_WIDE_INT	last_set_nonzero_bits;
   char				last_set_sign_bit_copies;
-  ENUM_BITFIELD(machine_mode)	last_set_mode : 8;
+  ENUM_BITFIELD(machine_mode)	last_set_mode : MACHINE_MODE_BITSIZE;
 
   /* Set nonzero if references to register n in expressions should not be
      used.  last_set_invalid is set nonzero when this register is being
@@ -235,7 +235,7 @@ struct reg_stat_type {
      truncation if we know that value already contains a truncated
      value.  */
 
-  ENUM_BITFIELD(machine_mode)	truncated_to_mode : 8;
+  ENUM_BITFIELD(machine_mode)	truncated_to_mode : MACHINE_MODE_BITSIZE;
 };
 
 
diff --git a/gcc/cse.cc b/gcc/cse.cc
index b10c9b0c94d..86403b95938 100644
--- a/gcc/cse.cc
+++ b/gcc/cse.cc
@@ -248,10 +248,8 @@ struct qty_table_elem
   rtx comparison_const;
   int comparison_qty;
   unsigned int first_reg, last_reg;
-  /* The sizes of these fields should match the sizes of the
-     code and mode fields of struct rtx_def (see rtl.h).  */
-  ENUM_BITFIELD(rtx_code) comparison_code : 16;
-  ENUM_BITFIELD(machine_mode) mode : 8;
+  ENUM_BITFIELD(machine_mode) mode : MACHINE_MODE_BITSIZE;
+  ENUM_BITFIELD(rtx_code) comparison_code : RTX_CODE_BITSIZE;
 };
 
 /* The table of all qtys, indexed by qty number.  */
@@ -404,9 +402,7 @@ struct table_elt
   struct table_elt *related_value;
   int cost;
   int regcost;
-  /* The size of this field should match the size
-     of the mode field of struct rtx_def (see rtl.h).  */
-  ENUM_BITFIELD(machine_mode) mode : 8;
+  ENUM_BITFIELD(machine_mode) mode : MACHINE_MODE_BITSIZE;
   char in_memory;
   char is_const;
   char flag;
@@ -4152,10 +4148,8 @@ struct set
   /* Nonzero if the SET_SRC contains something
      whose value cannot be predicted and understood.  */
   char src_volatile;
-  /* Original machine mode, in case it becomes a CONST_INT.
-     The size of this field should match the size of the mode
-     field of struct rtx_def (see rtl.h).  */
-  ENUM_BITFIELD(machine_mode) mode : 8;
+  /* Original machine mode, in case it becomes a CONST_INT.  */
+  ENUM_BITFIELD(machine_mode) mode : MACHINE_MODE_BITSIZE;
   /* Hash value of constant equivalent for SET_SRC.  */
   unsigned src_const_hash;
   /* A constant equivalent for SET_SRC, if any.  */
diff --git a/gcc/genmodes.cc b/gcc/genmodes.cc
index 715787b8f48..55ac2adb559 100644
--- a/gcc/genmodes.cc
+++ b/gcc/genmodes.cc
@@ -1141,10 +1141,10 @@ inline __attribute__((__always_inline__))\n\
 #else\n\
 extern __inline__ __attribute__((__always_inline__, __gnu_inline__))\n\
 #endif\n\
-unsigned char\n\
+unsigned short\n\
 mode_inner_inline (machine_mode mode)\n\
 {\n\
-  extern const unsigned char mode_inner[NUM_MACHINE_MODES];\n\
+  extern const unsigned short mode_inner[NUM_MACHINE_MODES];\n\
   gcc_assert (mode >= 0 && mode < NUM_MACHINE_MODES);\n\
   switch (mode)\n\
     {");
@@ -1529,7 +1529,7 @@ emit_mode_wider (void)
   int c;
   struct mode_data *m;
 
-  print_decl ("unsigned char", "mode_next", "NUM_MACHINE_MODES");
+  print_decl ("unsigned short", "mode_next", "NUM_MACHINE_MODES");
 
   for_all_modes (c, m)
     tagged_printf ("E_%smode",
@@ -1537,7 +1537,7 @@ emit_mode_wider (void)
 		   m->name);
 
   print_closer ();
-  print_decl ("unsigned char", "mode_wider", "NUM_MACHINE_MODES");
+  print_decl ("unsigned short", "mode_wider", "NUM_MACHINE_MODES");
 
   for_all_modes (c, m)
     {
@@ -1568,7 +1568,7 @@ emit_mode_wider (void)
     }
 
   print_closer ();
-  print_decl ("unsigned char", "mode_2xwider", "NUM_MACHINE_MODES");
+  print_decl ("unsigned short", "mode_2xwider", "NUM_MACHINE_MODES");
 
   for_all_modes (c, m)
     {
@@ -1625,7 +1625,7 @@ emit_mode_complex (void)
   int c;
   struct mode_data *m;
 
-  print_decl ("unsigned char", "mode_complex", "NUM_MACHINE_MODES");
+  print_decl ("unsigned short", "mode_complex", "NUM_MACHINE_MODES");
 
   for_all_modes (c, m)
     tagged_printf ("E_%smode",
@@ -1665,7 +1665,7 @@ emit_mode_inner (void)
   int c;
   struct mode_data *m;
 
-  print_decl ("unsigned char", "mode_inner", "NUM_MACHINE_MODES");
+  print_decl ("unsigned short", "mode_inner", "NUM_MACHINE_MODES");
 
   for_all_modes (c, m)
     tagged_printf ("E_%smode",
@@ -1738,7 +1738,7 @@ emit_class_narrowest_mode (void)
 {
   int c;
 
-  print_decl ("unsigned char", "class_narrowest_mode", "MAX_MODE_CLASS");
+  print_decl ("unsigned short", "class_narrowest_mode", "MAX_MODE_CLASS");
 
   for (c = 0; c < MAX_MODE_CLASS; c++)
     {
diff --git a/gcc/genopinit.cc b/gcc/genopinit.cc
index 83cb7504fa1..0c1b6859ca0 100644
--- a/gcc/genopinit.cc
+++ b/gcc/genopinit.cc
@@ -182,7 +182,8 @@ main (int argc, const char **argv)
 
   progname = "genopinit";
 
-  if (NUM_OPTABS > 0xffff || MAX_MACHINE_MODE >= 0xff)
+  if (NUM_OPTABS > 0xffff
+    || MAX_MACHINE_MODE >= ((1 << MACHINE_MODE_BITSIZE) - 1))
     fatal ("genopinit range assumptions invalid");
 
   if (!init_rtx_reader_args_cb (argc, argv, handle_arg))
diff --git a/gcc/ira-int.h b/gcc/ira-int.h
index e2de47213b4..e7460cfd906 100644
--- a/gcc/ira-int.h
+++ b/gcc/ira-int.h
@@ -281,13 +281,20 @@ struct ira_allocno
   int regno;
   /* Mode of the allocno which is the mode of the corresponding
      pseudo-register.  */
-  ENUM_BITFIELD (machine_mode) mode : 8;
+  ENUM_BITFIELD (machine_mode) mode : MACHINE_MODE_BITSIZE;
   /* Widest mode of the allocno which in at least one case could be
      for paradoxical subregs where wmode > mode.  */
-  ENUM_BITFIELD (machine_mode) wmode : 8;
+  ENUM_BITFIELD (machine_mode) wmode : MACHINE_MODE_BITSIZE;
   /* Register class which should be used for allocation for given
      allocno.  NO_REGS means that we should use memory.  */
   ENUM_BITFIELD (reg_class) aclass : 16;
+  /* Hard register assigned to given allocno.  Negative value means
+     that memory was allocated to the allocno.  During the reload,
+     spilled allocno has value equal to the corresponding stack slot
+     number (0, ...) - 2.  Value -1 is used for allocnos spilled by the
+     reload (at this point pseudo-register has only one allocno) which
+     did not get stack slot yet.  */
+  signed int hard_regno : 16;
   /* A bitmask of the ABIs used by calls that occur while the allocno
      is live.  */
   unsigned int crossed_calls_abis : NUM_ABI_IDS;
@@ -321,22 +328,6 @@ struct ira_allocno
 
      This is only ever true for non-cap allocnos.  */
   unsigned int might_conflict_with_parent_p : 1;
-  /* Hard register assigned to given allocno.  Negative value means
-     that memory was allocated to the allocno.  During the reload,
-     spilled allocno has value equal to the corresponding stack slot
-     number (0, ...) - 2.  Value -1 is used for allocnos spilled by the
-     reload (at this point pseudo-register has only one allocno) which
-     did not get stack slot yet.  */
-  signed int hard_regno : 16;
-  /* Allocnos with the same regno are linked by the following member.
-     Allocnos corresponding to inner loops are first in the list (it
-     corresponds to depth-first traverse of the loops).  */
-  ira_allocno_t next_regno_allocno;
-  /* There may be different allocnos with the same regno in different
-     regions.  Allocnos are bound to the corresponding loop tree node.
-     Pseudo-register may have only one regular allocno with given loop
-     tree node but more than one cap (see comments above).  */
-  ira_loop_tree_node_t loop_tree_node;
   /* Accumulated usage references of the allocno.  Here and below,
      word 'accumulated' means info for given region and all nested
      subregions.  In this case, 'accumulated' means sum of references
@@ -362,6 +353,25 @@ struct ira_allocno
      register class living at the point than number of hard-registers
      of the class available for the allocation.  */
   int excess_pressure_points_num;
+  /* The number of objects tracked in the following array.  */
+  int num_objects;
+  /* Accumulated frequency of calls which given allocno
+     intersects.  */
+  int call_freq;
+  /* Accumulated number of the intersected calls.  */
+  int calls_crossed_num;
+  /* The number of calls across which it is live, but which should not
+     affect register preferences.  */
+  int cheap_calls_crossed_num;
+  /* Allocnos with the same regno are linked by the following member.
+     Allocnos corresponding to inner loops are first in the list (it
+     corresponds to depth-first traverse of the loops).  */
+  ira_allocno_t next_regno_allocno;
+  /* There may be different allocnos with the same regno in different
+     regions.  Allocnos are bound to the corresponding loop tree node.
+     Pseudo-register may have only one regular allocno with given loop
+     tree node but more than one cap (see comments above).  */
+  ira_loop_tree_node_t loop_tree_node;
   /* Allocno hard reg preferences.  */
   ira_pref_t allocno_prefs;
   /* Copies to other non-conflicting allocnos.  The copies can
@@ -374,21 +384,11 @@ struct ira_allocno
   /* It is a link to allocno (cap) on lower loop level represented by
      given cap.  Null if given allocno is not a cap.  */
   ira_allocno_t cap_member;
-  /* The number of objects tracked in the following array.  */
-  int num_objects;
   /* An array of structures describing conflict information and live
      ranges for each object associated with the allocno.  There may be
      more than one such object in cases where the allocno represents a
      multi-word register.  */
   ira_object_t objects[2];
-  /* Accumulated frequency of calls which given allocno
-     intersects.  */
-  int call_freq;
-  /* Accumulated number of the intersected calls.  */
-  int calls_crossed_num;
-  /* The number of calls across which it is live, but which should not
-     affect register preferences.  */
-  int cheap_calls_crossed_num;
   /* Registers clobbered by intersected calls.  */
    HARD_REG_SET crossed_calls_clobbered_regs;
   /* Array of usage costs (accumulated and the one updated during
diff --git a/gcc/machmode.h b/gcc/machmode.h
index f1865c1ef42..a22df60dc20 100644
--- a/gcc/machmode.h
+++ b/gcc/machmode.h
@@ -24,13 +24,13 @@ typedef opt_mode<machine_mode> opt_machine_mode;
 
 extern CONST_MODE_SIZE poly_uint16_pod mode_size[NUM_MACHINE_MODES];
 extern CONST_MODE_PRECISION poly_uint16_pod mode_precision[NUM_MACHINE_MODES];
-extern const unsigned char mode_inner[NUM_MACHINE_MODES];
+extern const unsigned short mode_inner[NUM_MACHINE_MODES];
 extern CONST_MODE_NUNITS poly_uint16_pod mode_nunits[NUM_MACHINE_MODES];
 extern CONST_MODE_UNIT_SIZE unsigned char mode_unit_size[NUM_MACHINE_MODES];
 extern const unsigned short mode_unit_precision[NUM_MACHINE_MODES];
-extern const unsigned char mode_next[NUM_MACHINE_MODES];
-extern const unsigned char mode_wider[NUM_MACHINE_MODES];
-extern const unsigned char mode_2xwider[NUM_MACHINE_MODES];
+extern const unsigned short mode_next[NUM_MACHINE_MODES];
+extern const unsigned short mode_wider[NUM_MACHINE_MODES];
+extern const unsigned short mode_2xwider[NUM_MACHINE_MODES];
 
 template<typename T>
 struct mode_traits
@@ -242,6 +242,21 @@ extern const unsigned char mode_class[NUM_MACHINE_MODES];
    || CLASS == MODE_ACCUM                      \
    || CLASS == MODE_UACCUM)
 
+/* The MACHINE_MODE_BITSIZE should be exactly aligned with the type of the
+   machine_mode array in the machmode.h and genmodes.cc.  For example as below.
+   +------------------------+-------+
+   | MACHINE_MODE_BITSIZE   |    16 |
+   +------------------------+-------+
+   | mode_inter[]           | short |
+   | mode_next[]            | short |
+   | mode_wider[]           | short |
+   | mode_2xwider[]         | short |
+   | mode_complex[]         | short |
+   | class_narrowest_mode[] | short |
+   +------------------------+-------+
+   */
+#define MACHINE_MODE_BITSIZE 16
+
 /* An optional T (i.e. a T or nothing), where T is some form of mode class.  */
 template<typename T>
 class opt_mode
@@ -797,7 +812,7 @@ GET_MODE_2XWIDER_MODE (const T &m)
 }
 
 /* Get the complex mode from the component mode.  */
-extern const unsigned char mode_complex[NUM_MACHINE_MODES];
+extern const unsigned short mode_complex[NUM_MACHINE_MODES];
 #define GET_MODE_COMPLEX_MODE(MODE) ((machine_mode) mode_complex[MODE])
 
 /* Represents a machine mode that must have a fixed size.  The main
@@ -946,7 +961,7 @@ extern unsigned get_mode_alignment (machine_mode);
 
 /* For each class, get the narrowest mode in that class.  */
 
-extern const unsigned char class_narrowest_mode[MAX_MODE_CLASS];
+extern const unsigned short class_narrowest_mode[MAX_MODE_CLASS];
 #define GET_CLASS_NARROWEST_MODE(CLASS) \
   ((machine_mode) class_narrowest_mode[CLASS])
 
diff --git a/gcc/ree.cc b/gcc/ree.cc
index 413aec7c8eb..fc04249fa84 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -564,10 +564,10 @@ enum ext_modified_kind
   EXT_MODIFIED_SEXT
 };
 
-struct ATTRIBUTE_PACKED ext_modified
+struct ext_modified
 {
   /* Mode from which ree has zero or sign extended the destination.  */
-  ENUM_BITFIELD(machine_mode) mode : 8;
+  ENUM_BITFIELD(machine_mode) mode : MACHINE_MODE_BITSIZE;
 
   /* Kind of modification of the insn.  */
   ENUM_BITFIELD(ext_modified_kind) kind : 2;
diff --git a/gcc/rtl-ssa/accesses.h b/gcc/rtl-ssa/accesses.h
index c5180b9308a..38b4d6160c2 100644
--- a/gcc/rtl-ssa/accesses.h
+++ b/gcc/rtl-ssa/accesses.h
@@ -254,7 +254,7 @@ private:
   unsigned int m_spare : 2;
 
   // The value returned by the accessor above.
-  machine_mode m_mode : 8;
+  machine_mode m_mode : MACHINE_MODE_BITSIZE;
 };
 
 // A contiguous array of access_info pointers.  Used to represent a
diff --git a/gcc/rtl.h b/gcc/rtl.h
index f634cab730b..364782b6060 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -63,6 +63,8 @@ enum rtx_code  {
 # define NON_GENERATOR_NUM_RTX_CODE ((int) MATCH_OPERAND)
 #endif
 
+#define RTX_CODE_BITSIZE 8
+
 /* Register Transfer Language EXPRESSIONS CODE CLASSES */
 
 enum rtx_class  {
@@ -309,11 +311,11 @@ struct GTY((variable_size)) const_poly_int_def {
 struct GTY((desc("0"), tag("0"),
 	    chain_next ("RTX_NEXT (&%h)"),
 	    chain_prev ("RTX_PREV (&%h)"))) rtx_def {
-  /* The kind of expression this is.  */
-  ENUM_BITFIELD(rtx_code) code: 16;
-
   /* The kind of value the expression has.  */
-  ENUM_BITFIELD(machine_mode) mode : 8;
+  ENUM_BITFIELD(machine_mode) mode : MACHINE_MODE_BITSIZE;
+
+  /* The kind of expression this is.  */
+  ENUM_BITFIELD(rtx_code) code: RTX_CODE_BITSIZE;
 
   /* 1 in a MEM if we should keep the alias set for this mem unchanged
      when we access a component.
@@ -2164,7 +2166,7 @@ subreg_shape::operator != (const subreg_shape &other) const
 inline unsigned HOST_WIDE_INT
 subreg_shape::unique_id () const
 {
-  { STATIC_ASSERT (MAX_MACHINE_MODE <= 256); }
+  { STATIC_ASSERT (MAX_MACHINE_MODE <= (1 << MACHINE_MODE_BITSIZE)); }
   { STATIC_ASSERT (NUM_POLY_INT_COEFFS <= 3); }
   { STATIC_ASSERT (sizeof (offset.coeffs[0]) <= 2); }
   int res = (int) inner_mode + ((int) outer_mode << 8);
diff --git a/gcc/rtlanal.h b/gcc/rtlanal.h
index 5fbed816e20..9013e75c04b 100644
--- a/gcc/rtlanal.h
+++ b/gcc/rtlanal.h
@@ -100,7 +100,7 @@ public:
 
   /* The mode of the reference.  If IS_MULTIREG, this is the mode of
      REGNO - MULTIREG_OFFSET.  */
-  machine_mode mode : 8;
+  machine_mode mode : MACHINE_MODE_BITSIZE;
 
   /* If IS_MULTIREG, the offset of REGNO from the start of the register.  */
   unsigned int multireg_offset : 8;
diff --git a/gcc/tree-core.h b/gcc/tree-core.h
index a1aea136e75..9d44c04bf03 100644
--- a/gcc/tree-core.h
+++ b/gcc/tree-core.h
@@ -1680,8 +1680,9 @@ struct GTY(()) tree_type_common {
   tree attributes;
   unsigned int uid;
 
+  ENUM_BITFIELD(machine_mode) mode : MACHINE_MODE_BITSIZE;
+
   unsigned int precision : 16;
-  ENUM_BITFIELD(machine_mode) mode : 8;
   unsigned lang_flag_0 : 1;
   unsigned lang_flag_1 : 1;
   unsigned lang_flag_2 : 1;
@@ -1712,7 +1713,7 @@ struct GTY(()) tree_type_common {
   unsigned empty_flag : 1;
   unsigned indivisible_p : 1;
   unsigned no_named_args_stdarg_p : 1;
-  unsigned spare : 9;
+  unsigned spare : 1;
 
   alias_set_type alias_set;
   tree pointer_to;
@@ -1770,7 +1771,7 @@ struct GTY(()) tree_decl_common {
   struct tree_decl_minimal common;
   tree size;
 
-  ENUM_BITFIELD(machine_mode) mode : 8;
+  ENUM_BITFIELD(machine_mode) mode : MACHINE_MODE_BITSIZE;
 
   unsigned nonlocal_flag : 1;
   unsigned virtual_flag : 1;
@@ -1828,7 +1829,7 @@ struct GTY(()) tree_decl_common {
   /* In FIELD_DECL, this is DECL_NOT_FLEXARRAY.  */
   unsigned int decl_not_flexarray : 1;
 
-  /* 13 bits unused.  */
+  /* 5 bits unused.  */
 
   /* UID for points-to sets, stable over copying from inlining.  */
   unsigned int pt_uid;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 33+ messages in thread

* RE: [PATCH v3] Machine_Mode: Extend machine_mode from 8 to 16 bits
  2023-05-13 13:13 ` [PATCH v3] " pan2.li
@ 2023-05-16  1:12   ` Li, Pan2
  2023-05-16  7:29     ` Richard Sandiford
  2023-05-16  9:09   ` Richard Sandiford
  1 sibling, 1 reply; 33+ messages in thread
From: Li, Pan2 @ 2023-05-16  1:12 UTC (permalink / raw)
  To: gcc-patches
  Cc: juzhe.zhong, kito.cheng, Wang, Yanzhang, jeffreyalaw, rguenther,
	richard.sandiford

Kindly ping for this PATCH v3.

Pan

-----Original Message-----
From: Li, Pan2 <pan2.li@intel.com> 
Sent: Saturday, May 13, 2023 9:13 PM
To: gcc-patches@gcc.gnu.org
Cc: juzhe.zhong@rivai.ai; kito.cheng@sifive.com; Li, Pan2 <pan2.li@intel.com>; Wang, Yanzhang <yanzhang.wang@intel.com>; jeffreyalaw@gmail.com; rguenther@suse.de; richard.sandiford@arm.com
Subject: [PATCH v3] Machine_Mode: Extend machine_mode from 8 to 16 bits

From: Pan Li <pan2.li@intel.com>

We are running out of the machine_mode(8 bits) in RISC-V backend. Thus we would like to extend the machine_mode bit size from 8 to 16 bits.
However, it is sensitive to extend the memory size in common structure like tree or rtx. This patch would like to extend the machine_mode bits to 16 bits by shrinking, like:

* Swap the bit size of code and machine code in rtx_def.
* Adjust the machine_mode location and spare in tree.

The memory impact of this patch for correlated structure looks like below:

+-------------------+----------+---------+------+
| struct/bytes      | upstream | patched | diff |
+-------------------+----------+---------+------+
| rtx_obj_reference |        8 |      12 |   +4 |
| ext_modified      |        2 |       4 |   +2 |
| ira_allocno       |      192 |     184 |   -8 |
| qty_table_elem    |       40 |      40 |    0 |
| reg_stat_type     |       64 |      64 |    0 |
| rtx_def           |       40 |      40 |    0 |
| table_elt         |       80 |      80 |    0 |
| tree_decl_common  |      112 |     112 |    0 |
| tree_type_common  |      128 |     128 |    0 |
+-------------------+----------+---------+------+

The tree and rtx related struct has no memory changes after this patch, and the machine_mode changes to 16 bits already.

Signed-off-by: Pan Li <pan2.li@intel.com>
Co-authored-by: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
Co-authored-by: Kito Cheng <kito.cheng@sifive.com>
Co-Authored-By: Richard Biener <rguenther@suse.de>
Co-Authored-By: Richard Sandiford <richard.sandiford@arm.com>

gcc/ChangeLog:

	* combine.cc (struct reg_stat_type): Extend machine_mode to 16 bits.
	* cse.cc (struct qty_table_elem): Extend machine_mode to 16 bits
	(struct table_elt): Extend machine_mode to 16 bits.
	(struct set): Ditto.
	* genmodes.cc (emit_mode_wider): Extend type from char to short.
	(emit_mode_complex): Ditto.
	(emit_mode_inner): Ditto.
	(emit_class_narrowest_mode): Ditto.
	* genopinit.cc (main): Extend the machine_mode limit.
	* ira-int.h (struct ira_allocno): Extend machine_mode to 16 bits and
	re-ordered the struct fields for padding.
	* machmode.h (MACHINE_MODE_BITSIZE): New macro.
	(GET_MODE_2XWIDER_MODE): Extend type from char to short.
	(get_mode_alignment): Extend type from char to short.
	* ree.cc (struct ext_modified): Extend machine_mode to 16 bits and
	removed the ATTRIBUTE_PACKED.
	* rtl-ssa/accesses.h: Extend machine_mode to 16 bits.
	* rtl.h (RTX_CODE_BITSIZE): New macro.
	(struct rtx_def): Swap both the bit size and location between the
	rtx_code and the machine_mode.
	(subreg_shape::unique_id): Extend the machine_mode limit.
	* rtlanal.h: Extend machine_mode to 16 bits.
	* tree-core.h (struct tree_type_common): Extend machine_mode to 16
	bits and re-ordered the struct fields for padding.
	(struct tree_decl_common): Extend machine_mode to 16 bits.
---
 gcc/combine.cc         |  4 +--
 gcc/cse.cc             | 16 ++++--------
 gcc/genmodes.cc        | 16 ++++++------
 gcc/genopinit.cc       |  3 ++-
 gcc/ira-int.h          | 56 +++++++++++++++++++++---------------------
 gcc/machmode.h         | 27 +++++++++++++++-----
 gcc/ree.cc             |  4 +--
 gcc/rtl-ssa/accesses.h |  2 +-
 gcc/rtl.h              | 12 +++++----
 gcc/rtlanal.h          |  2 +-
 gcc/tree-core.h        |  9 ++++---
 11 files changed, 82 insertions(+), 69 deletions(-)

diff --git a/gcc/combine.cc b/gcc/combine.cc index 5aa0ec5c45a..a23caeed96f 100644
--- a/gcc/combine.cc
+++ b/gcc/combine.cc
@@ -200,7 +200,7 @@ struct reg_stat_type {
 
   unsigned HOST_WIDE_INT	last_set_nonzero_bits;
   char				last_set_sign_bit_copies;
-  ENUM_BITFIELD(machine_mode)	last_set_mode : 8;
+  ENUM_BITFIELD(machine_mode)	last_set_mode : MACHINE_MODE_BITSIZE;
 
   /* Set nonzero if references to register n in expressions should not be
      used.  last_set_invalid is set nonzero when this register is being @@ -235,7 +235,7 @@ struct reg_stat_type {
      truncation if we know that value already contains a truncated
      value.  */
 
-  ENUM_BITFIELD(machine_mode)	truncated_to_mode : 8;
+  ENUM_BITFIELD(machine_mode)	truncated_to_mode : MACHINE_MODE_BITSIZE;
 };
 
 
diff --git a/gcc/cse.cc b/gcc/cse.cc
index b10c9b0c94d..86403b95938 100644
--- a/gcc/cse.cc
+++ b/gcc/cse.cc
@@ -248,10 +248,8 @@ struct qty_table_elem
   rtx comparison_const;
   int comparison_qty;
   unsigned int first_reg, last_reg;
-  /* The sizes of these fields should match the sizes of the
-     code and mode fields of struct rtx_def (see rtl.h).  */
-  ENUM_BITFIELD(rtx_code) comparison_code : 16;
-  ENUM_BITFIELD(machine_mode) mode : 8;
+  ENUM_BITFIELD(machine_mode) mode : MACHINE_MODE_BITSIZE;
+  ENUM_BITFIELD(rtx_code) comparison_code : RTX_CODE_BITSIZE;
 };
 
 /* The table of all qtys, indexed by qty number.  */ @@ -404,9 +402,7 @@ struct table_elt
   struct table_elt *related_value;
   int cost;
   int regcost;
-  /* The size of this field should match the size
-     of the mode field of struct rtx_def (see rtl.h).  */
-  ENUM_BITFIELD(machine_mode) mode : 8;
+  ENUM_BITFIELD(machine_mode) mode : MACHINE_MODE_BITSIZE;
   char in_memory;
   char is_const;
   char flag;
@@ -4152,10 +4148,8 @@ struct set
   /* Nonzero if the SET_SRC contains something
      whose value cannot be predicted and understood.  */
   char src_volatile;
-  /* Original machine mode, in case it becomes a CONST_INT.
-     The size of this field should match the size of the mode
-     field of struct rtx_def (see rtl.h).  */
-  ENUM_BITFIELD(machine_mode) mode : 8;
+  /* Original machine mode, in case it becomes a CONST_INT.  */
+  ENUM_BITFIELD(machine_mode) mode : MACHINE_MODE_BITSIZE;
   /* Hash value of constant equivalent for SET_SRC.  */
   unsigned src_const_hash;
   /* A constant equivalent for SET_SRC, if any.  */ diff --git a/gcc/genmodes.cc b/gcc/genmodes.cc index 715787b8f48..55ac2adb559 100644
--- a/gcc/genmodes.cc
+++ b/gcc/genmodes.cc
@@ -1141,10 +1141,10 @@ inline __attribute__((__always_inline__))\n\
 #else\n\
 extern __inline__ __attribute__((__always_inline__, __gnu_inline__))\n\  #endif\n\ -unsigned char\n\
+unsigned short\n\
 mode_inner_inline (machine_mode mode)\n\  {\n\
-  extern const unsigned char mode_inner[NUM_MACHINE_MODES];\n\
+  extern const unsigned short mode_inner[NUM_MACHINE_MODES];\n\
   gcc_assert (mode >= 0 && mode < NUM_MACHINE_MODES);\n\
   switch (mode)\n\
     {");
@@ -1529,7 +1529,7 @@ emit_mode_wider (void)
   int c;
   struct mode_data *m;
 
-  print_decl ("unsigned char", "mode_next", "NUM_MACHINE_MODES");
+  print_decl ("unsigned short", "mode_next", "NUM_MACHINE_MODES");
 
   for_all_modes (c, m)
     tagged_printf ("E_%smode",
@@ -1537,7 +1537,7 @@ emit_mode_wider (void)
 		   m->name);
 
   print_closer ();
-  print_decl ("unsigned char", "mode_wider", "NUM_MACHINE_MODES");
+  print_decl ("unsigned short", "mode_wider", "NUM_MACHINE_MODES");
 
   for_all_modes (c, m)
     {
@@ -1568,7 +1568,7 @@ emit_mode_wider (void)
     }
 
   print_closer ();
-  print_decl ("unsigned char", "mode_2xwider", "NUM_MACHINE_MODES");
+  print_decl ("unsigned short", "mode_2xwider", "NUM_MACHINE_MODES");
 
   for_all_modes (c, m)
     {
@@ -1625,7 +1625,7 @@ emit_mode_complex (void)
   int c;
   struct mode_data *m;
 
-  print_decl ("unsigned char", "mode_complex", "NUM_MACHINE_MODES");
+  print_decl ("unsigned short", "mode_complex", "NUM_MACHINE_MODES");
 
   for_all_modes (c, m)
     tagged_printf ("E_%smode",
@@ -1665,7 +1665,7 @@ emit_mode_inner (void)
   int c;
   struct mode_data *m;
 
-  print_decl ("unsigned char", "mode_inner", "NUM_MACHINE_MODES");
+  print_decl ("unsigned short", "mode_inner", "NUM_MACHINE_MODES");
 
   for_all_modes (c, m)
     tagged_printf ("E_%smode",
@@ -1738,7 +1738,7 @@ emit_class_narrowest_mode (void)  {
   int c;
 
-  print_decl ("unsigned char", "class_narrowest_mode", "MAX_MODE_CLASS");
+  print_decl ("unsigned short", "class_narrowest_mode", 
+ "MAX_MODE_CLASS");
 
   for (c = 0; c < MAX_MODE_CLASS; c++)
     {
diff --git a/gcc/genopinit.cc b/gcc/genopinit.cc index 83cb7504fa1..0c1b6859ca0 100644
--- a/gcc/genopinit.cc
+++ b/gcc/genopinit.cc
@@ -182,7 +182,8 @@ main (int argc, const char **argv)
 
   progname = "genopinit";
 
-  if (NUM_OPTABS > 0xffff || MAX_MACHINE_MODE >= 0xff)
+  if (NUM_OPTABS > 0xffff
+    || MAX_MACHINE_MODE >= ((1 << MACHINE_MODE_BITSIZE) - 1))
     fatal ("genopinit range assumptions invalid");
 
   if (!init_rtx_reader_args_cb (argc, argv, handle_arg)) diff --git a/gcc/ira-int.h b/gcc/ira-int.h index e2de47213b4..e7460cfd906 100644
--- a/gcc/ira-int.h
+++ b/gcc/ira-int.h
@@ -281,13 +281,20 @@ struct ira_allocno
   int regno;
   /* Mode of the allocno which is the mode of the corresponding
      pseudo-register.  */
-  ENUM_BITFIELD (machine_mode) mode : 8;
+  ENUM_BITFIELD (machine_mode) mode : MACHINE_MODE_BITSIZE;
   /* Widest mode of the allocno which in at least one case could be
      for paradoxical subregs where wmode > mode.  */
-  ENUM_BITFIELD (machine_mode) wmode : 8;
+  ENUM_BITFIELD (machine_mode) wmode : MACHINE_MODE_BITSIZE;
   /* Register class which should be used for allocation for given
      allocno.  NO_REGS means that we should use memory.  */
   ENUM_BITFIELD (reg_class) aclass : 16;
+  /* Hard register assigned to given allocno.  Negative value means
+     that memory was allocated to the allocno.  During the reload,
+     spilled allocno has value equal to the corresponding stack slot
+     number (0, ...) - 2.  Value -1 is used for allocnos spilled by the
+     reload (at this point pseudo-register has only one allocno) which
+     did not get stack slot yet.  */
+  signed int hard_regno : 16;
   /* A bitmask of the ABIs used by calls that occur while the allocno
      is live.  */
   unsigned int crossed_calls_abis : NUM_ABI_IDS; @@ -321,22 +328,6 @@ struct ira_allocno
 
      This is only ever true for non-cap allocnos.  */
   unsigned int might_conflict_with_parent_p : 1;
-  /* Hard register assigned to given allocno.  Negative value means
-     that memory was allocated to the allocno.  During the reload,
-     spilled allocno has value equal to the corresponding stack slot
-     number (0, ...) - 2.  Value -1 is used for allocnos spilled by the
-     reload (at this point pseudo-register has only one allocno) which
-     did not get stack slot yet.  */
-  signed int hard_regno : 16;
-  /* Allocnos with the same regno are linked by the following member.
-     Allocnos corresponding to inner loops are first in the list (it
-     corresponds to depth-first traverse of the loops).  */
-  ira_allocno_t next_regno_allocno;
-  /* There may be different allocnos with the same regno in different
-     regions.  Allocnos are bound to the corresponding loop tree node.
-     Pseudo-register may have only one regular allocno with given loop
-     tree node but more than one cap (see comments above).  */
-  ira_loop_tree_node_t loop_tree_node;
   /* Accumulated usage references of the allocno.  Here and below,
      word 'accumulated' means info for given region and all nested
      subregions.  In this case, 'accumulated' means sum of references @@ -362,6 +353,25 @@ struct ira_allocno
      register class living at the point than number of hard-registers
      of the class available for the allocation.  */
   int excess_pressure_points_num;
+  /* The number of objects tracked in the following array.  */  int 
+ num_objects;
+  /* Accumulated frequency of calls which given allocno
+     intersects.  */
+  int call_freq;
+  /* Accumulated number of the intersected calls.  */  int 
+ calls_crossed_num;
+  /* The number of calls across which it is live, but which should not
+     affect register preferences.  */
+  int cheap_calls_crossed_num;
+  /* Allocnos with the same regno are linked by the following member.
+     Allocnos corresponding to inner loops are first in the list (it
+     corresponds to depth-first traverse of the loops).  */  
+ ira_allocno_t next_regno_allocno;
+  /* There may be different allocnos with the same regno in different
+     regions.  Allocnos are bound to the corresponding loop tree node.
+     Pseudo-register may have only one regular allocno with given loop
+     tree node but more than one cap (see comments above).  */  
+ ira_loop_tree_node_t loop_tree_node;
   /* Allocno hard reg preferences.  */
   ira_pref_t allocno_prefs;
   /* Copies to other non-conflicting allocnos.  The copies can @@ -374,21 +384,11 @@ struct ira_allocno
   /* It is a link to allocno (cap) on lower loop level represented by
      given cap.  Null if given allocno is not a cap.  */
   ira_allocno_t cap_member;
-  /* The number of objects tracked in the following array.  */
-  int num_objects;
   /* An array of structures describing conflict information and live
      ranges for each object associated with the allocno.  There may be
      more than one such object in cases where the allocno represents a
      multi-word register.  */
   ira_object_t objects[2];
-  /* Accumulated frequency of calls which given allocno
-     intersects.  */
-  int call_freq;
-  /* Accumulated number of the intersected calls.  */
-  int calls_crossed_num;
-  /* The number of calls across which it is live, but which should not
-     affect register preferences.  */
-  int cheap_calls_crossed_num;
   /* Registers clobbered by intersected calls.  */
    HARD_REG_SET crossed_calls_clobbered_regs;
   /* Array of usage costs (accumulated and the one updated during diff --git a/gcc/machmode.h b/gcc/machmode.h index f1865c1ef42..a22df60dc20 100644
--- a/gcc/machmode.h
+++ b/gcc/machmode.h
@@ -24,13 +24,13 @@ typedef opt_mode<machine_mode> opt_machine_mode;
 
 extern CONST_MODE_SIZE poly_uint16_pod mode_size[NUM_MACHINE_MODES];  extern CONST_MODE_PRECISION poly_uint16_pod mode_precision[NUM_MACHINE_MODES];
-extern const unsigned char mode_inner[NUM_MACHINE_MODES];
+extern const unsigned short mode_inner[NUM_MACHINE_MODES];
 extern CONST_MODE_NUNITS poly_uint16_pod mode_nunits[NUM_MACHINE_MODES];  extern CONST_MODE_UNIT_SIZE unsigned char mode_unit_size[NUM_MACHINE_MODES];
 extern const unsigned short mode_unit_precision[NUM_MACHINE_MODES];
-extern const unsigned char mode_next[NUM_MACHINE_MODES]; -extern const unsigned char mode_wider[NUM_MACHINE_MODES]; -extern const unsigned char mode_2xwider[NUM_MACHINE_MODES];
+extern const unsigned short mode_next[NUM_MACHINE_MODES]; extern const 
+unsigned short mode_wider[NUM_MACHINE_MODES]; extern const unsigned 
+short mode_2xwider[NUM_MACHINE_MODES];
 
 template<typename T>
 struct mode_traits
@@ -242,6 +242,21 @@ extern const unsigned char mode_class[NUM_MACHINE_MODES];
    || CLASS == MODE_ACCUM                      \
    || CLASS == MODE_UACCUM)
 
+/* The MACHINE_MODE_BITSIZE should be exactly aligned with the type of the
+   machine_mode array in the machmode.h and genmodes.cc.  For example as below.
+   +------------------------+-------+
+   | MACHINE_MODE_BITSIZE   |    16 |
+   +------------------------+-------+
+   | mode_inter[]           | short |
+   | mode_next[]            | short |
+   | mode_wider[]           | short |
+   | mode_2xwider[]         | short |
+   | mode_complex[]         | short |
+   | class_narrowest_mode[] | short |
+   +------------------------+-------+
+   */
+#define MACHINE_MODE_BITSIZE 16
+
 /* An optional T (i.e. a T or nothing), where T is some form of mode class.  */  template<typename T>  class opt_mode @@ -797,7 +812,7 @@ GET_MODE_2XWIDER_MODE (const T &m)  }
 
 /* Get the complex mode from the component mode.  */ -extern const unsigned char mode_complex[NUM_MACHINE_MODES];
+extern const unsigned short mode_complex[NUM_MACHINE_MODES];
 #define GET_MODE_COMPLEX_MODE(MODE) ((machine_mode) mode_complex[MODE])
 
 /* Represents a machine mode that must have a fixed size.  The main @@ -946,7 +961,7 @@ extern unsigned get_mode_alignment (machine_mode);
 
 /* For each class, get the narrowest mode in that class.  */
 
-extern const unsigned char class_narrowest_mode[MAX_MODE_CLASS];
+extern const unsigned short class_narrowest_mode[MAX_MODE_CLASS];
 #define GET_CLASS_NARROWEST_MODE(CLASS) \
   ((machine_mode) class_narrowest_mode[CLASS])
 
diff --git a/gcc/ree.cc b/gcc/ree.cc
index 413aec7c8eb..fc04249fa84 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -564,10 +564,10 @@ enum ext_modified_kind
   EXT_MODIFIED_SEXT
 };
 
-struct ATTRIBUTE_PACKED ext_modified
+struct ext_modified
 {
   /* Mode from which ree has zero or sign extended the destination.  */
-  ENUM_BITFIELD(machine_mode) mode : 8;
+  ENUM_BITFIELD(machine_mode) mode : MACHINE_MODE_BITSIZE;
 
   /* Kind of modification of the insn.  */
   ENUM_BITFIELD(ext_modified_kind) kind : 2; diff --git a/gcc/rtl-ssa/accesses.h b/gcc/rtl-ssa/accesses.h index c5180b9308a..38b4d6160c2 100644
--- a/gcc/rtl-ssa/accesses.h
+++ b/gcc/rtl-ssa/accesses.h
@@ -254,7 +254,7 @@ private:
   unsigned int m_spare : 2;
 
   // The value returned by the accessor above.
-  machine_mode m_mode : 8;
+  machine_mode m_mode : MACHINE_MODE_BITSIZE;
 };
 
 // A contiguous array of access_info pointers.  Used to represent a diff --git a/gcc/rtl.h b/gcc/rtl.h index f634cab730b..364782b6060 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -63,6 +63,8 @@ enum rtx_code  {
 # define NON_GENERATOR_NUM_RTX_CODE ((int) MATCH_OPERAND)  #endif
 
+#define RTX_CODE_BITSIZE 8
+
 /* Register Transfer Language EXPRESSIONS CODE CLASSES */
 
 enum rtx_class  {
@@ -309,11 +311,11 @@ struct GTY((variable_size)) const_poly_int_def {  struct GTY((desc("0"), tag("0"),
 	    chain_next ("RTX_NEXT (&%h)"),
 	    chain_prev ("RTX_PREV (&%h)"))) rtx_def {
-  /* The kind of expression this is.  */
-  ENUM_BITFIELD(rtx_code) code: 16;
-
   /* The kind of value the expression has.  */
-  ENUM_BITFIELD(machine_mode) mode : 8;
+  ENUM_BITFIELD(machine_mode) mode : MACHINE_MODE_BITSIZE;
+
+  /* The kind of expression this is.  */
+  ENUM_BITFIELD(rtx_code) code: RTX_CODE_BITSIZE;
 
   /* 1 in a MEM if we should keep the alias set for this mem unchanged
      when we access a component.
@@ -2164,7 +2166,7 @@ subreg_shape::operator != (const subreg_shape &other) const  inline unsigned HOST_WIDE_INT  subreg_shape::unique_id () const  {
-  { STATIC_ASSERT (MAX_MACHINE_MODE <= 256); }
+  { STATIC_ASSERT (MAX_MACHINE_MODE <= (1 << MACHINE_MODE_BITSIZE)); }
   { STATIC_ASSERT (NUM_POLY_INT_COEFFS <= 3); }
   { STATIC_ASSERT (sizeof (offset.coeffs[0]) <= 2); }
   int res = (int) inner_mode + ((int) outer_mode << 8); diff --git a/gcc/rtlanal.h b/gcc/rtlanal.h index 5fbed816e20..9013e75c04b 100644
--- a/gcc/rtlanal.h
+++ b/gcc/rtlanal.h
@@ -100,7 +100,7 @@ public:
 
   /* The mode of the reference.  If IS_MULTIREG, this is the mode of
      REGNO - MULTIREG_OFFSET.  */
-  machine_mode mode : 8;
+  machine_mode mode : MACHINE_MODE_BITSIZE;
 
   /* If IS_MULTIREG, the offset of REGNO from the start of the register.  */
   unsigned int multireg_offset : 8;
diff --git a/gcc/tree-core.h b/gcc/tree-core.h index a1aea136e75..9d44c04bf03 100644
--- a/gcc/tree-core.h
+++ b/gcc/tree-core.h
@@ -1680,8 +1680,9 @@ struct GTY(()) tree_type_common {
   tree attributes;
   unsigned int uid;
 
+  ENUM_BITFIELD(machine_mode) mode : MACHINE_MODE_BITSIZE;
+
   unsigned int precision : 16;
-  ENUM_BITFIELD(machine_mode) mode : 8;
   unsigned lang_flag_0 : 1;
   unsigned lang_flag_1 : 1;
   unsigned lang_flag_2 : 1;
@@ -1712,7 +1713,7 @@ struct GTY(()) tree_type_common {
   unsigned empty_flag : 1;
   unsigned indivisible_p : 1;
   unsigned no_named_args_stdarg_p : 1;
-  unsigned spare : 9;
+  unsigned spare : 1;
 
   alias_set_type alias_set;
   tree pointer_to;
@@ -1770,7 +1771,7 @@ struct GTY(()) tree_decl_common {
   struct tree_decl_minimal common;
   tree size;
 
-  ENUM_BITFIELD(machine_mode) mode : 8;
+  ENUM_BITFIELD(machine_mode) mode : MACHINE_MODE_BITSIZE;
 
   unsigned nonlocal_flag : 1;
   unsigned virtual_flag : 1;
@@ -1828,7 +1829,7 @@ struct GTY(()) tree_decl_common {
   /* In FIELD_DECL, this is DECL_NOT_FLEXARRAY.  */
   unsigned int decl_not_flexarray : 1;
 
-  /* 13 bits unused.  */
+  /* 5 bits unused.  */
 
   /* UID for points-to sets, stable over copying from inlining.  */
   unsigned int pt_uid;
--
2.34.1


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v3] Machine_Mode: Extend machine_mode from 8 to 16 bits
  2023-05-16  1:12   ` Li, Pan2
@ 2023-05-16  7:29     ` Richard Sandiford
  2023-05-16  7:55       ` Li, Pan2
  0 siblings, 1 reply; 33+ messages in thread
From: Richard Sandiford @ 2023-05-16  7:29 UTC (permalink / raw)
  To: Li, Pan2
  Cc: gcc-patches, juzhe.zhong, kito.cheng, Wang, Yanzhang,
	jeffreyalaw, rguenther

"Li, Pan2" <pan2.li@intel.com> writes:
> Kindly ping for this PATCH v3.

The patch was sent on Saturday, so this is effectively pinging after
one working day in most of Europe and America.  That's too soon and
comes across as aggressive.

I realise you and others are working intensively on this.  But in a
sense that's part of the reason why reviews might seem slow.  The volume
of RVV patches recently has been pretty high, so it's been difficult to
keep up.  There are have also been many other non-RVV patches that have
been "unlocked" by stage 1 opening, so there's a high volume from that
as well.

Also, please bear in mind that most people active in the GCC community
have their own work to do and can only a dedicate a certain amount of
the day to reviews.  And reviewing patches can be time-consuming in
itsself.

So sometimes a patch will get a review within the day.  Sometimes it
will take a bit longer.  The fact that a patch doesn't get a response
within one working day doesn't mean that it's been forgotten.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 33+ messages in thread

* RE: [PATCH v3] Machine_Mode: Extend machine_mode from 8 to 16 bits
  2023-05-16  7:29     ` Richard Sandiford
@ 2023-05-16  7:55       ` Li, Pan2
  2023-05-16  8:03         ` Xi Ruoyao
  0 siblings, 1 reply; 33+ messages in thread
From: Li, Pan2 @ 2023-05-16  7:55 UTC (permalink / raw)
  To: Richard Sandiford
  Cc: gcc-patches, juzhe.zhong, kito.cheng, Wang, Yanzhang,
	jeffreyalaw, rguenther

I see, thanks Richard for reminding. I am sorry I failed to locate anywhere(doc or something else) mentioned such convention about ping, will follow the below convention in future.

Pan

-----Original Message-----
From: Richard Sandiford <richard.sandiford@arm.com> 
Sent: Tuesday, May 16, 2023 3:30 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@sifive.com; Wang, Yanzhang <yanzhang.wang@intel.com>; jeffreyalaw@gmail.com; rguenther@suse.de
Subject: Re: [PATCH v3] Machine_Mode: Extend machine_mode from 8 to 16 bits

"Li, Pan2" <pan2.li@intel.com> writes:
> Kindly ping for this PATCH v3.

The patch was sent on Saturday, so this is effectively pinging after one working day in most of Europe and America.  That's too soon and comes across as aggressive.

I realise you and others are working intensively on this.  But in a sense that's part of the reason why reviews might seem slow.  The volume of RVV patches recently has been pretty high, so it's been difficult to keep up.  There are have also been many other non-RVV patches that have been "unlocked" by stage 1 opening, so there's a high volume from that as well.

Also, please bear in mind that most people active in the GCC community have their own work to do and can only a dedicate a certain amount of the day to reviews.  And reviewing patches can be time-consuming in itsself.

So sometimes a patch will get a review within the day.  Sometimes it will take a bit longer.  The fact that a patch doesn't get a response within one working day doesn't mean that it's been forgotten.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v3] Machine_Mode: Extend machine_mode from 8 to 16 bits
  2023-05-16  7:55       ` Li, Pan2
@ 2023-05-16  8:03         ` Xi Ruoyao
  2023-05-16  8:05           ` Li, Pan2
  0 siblings, 1 reply; 33+ messages in thread
From: Xi Ruoyao @ 2023-05-16  8:03 UTC (permalink / raw)
  To: Li, Pan2, Richard Sandiford
  Cc: gcc-patches, juzhe.zhong, kito.cheng, Wang, Yanzhang,
	jeffreyalaw, rguenther

On Tue, 2023-05-16 at 07:55 +0000, Li, Pan2 via Gcc-patches wrote:
> I see, thanks Richard for reminding. I am sorry I failed to locate
> anywhere(doc or something else) mentioned such convention about ping,

https://gcc.gnu.org/contribute.html suggests two week.

> will follow the below convention in future.
> 
> Pan
> 
> -----Original Message-----
> From: Richard Sandiford <richard.sandiford@arm.com> 
> Sent: Tuesday, May 16, 2023 3:30 PM
> To: Li, Pan2 <pan2.li@intel.com>
> Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai;
> kito.cheng@sifive.com; Wang, Yanzhang <yanzhang.wang@intel.com>;
> jeffreyalaw@gmail.com; rguenther@suse.de
> Subject: Re: [PATCH v3] Machine_Mode: Extend machine_mode from 8 to 16
> bits
> 
> "Li, Pan2" <pan2.li@intel.com> writes:
> > Kindly ping for this PATCH v3.
> 
> The patch was sent on Saturday, so this is effectively pinging after
> one working day in most of Europe and America.  That's too soon and
> comes across as aggressive.
> 
> I realise you and others are working intensively on this.  But in a
> sense that's part of the reason why reviews might seem slow.  The
> volume of RVV patches recently has been pretty high, so it's been
> difficult to keep up.  There are have also been many other non-RVV
> patches that have been "unlocked" by stage 1 opening, so there's a
> high volume from that as well.
> 
> Also, please bear in mind that most people active in the GCC community
> have their own work to do and can only a dedicate a certain amount of
> the day to reviews.  And reviewing patches can be time-consuming in
> itsself.
> 
> So sometimes a patch will get a review within the day.  Sometimes it
> will take a bit longer.  The fact that a patch doesn't get a response
> within one working day doesn't mean that it's been forgotten.
> 
> Thanks,
> Richard

-- 
Xi Ruoyao <xry111@xry111.site>
School of Aerospace Science and Technology, Xidian University

^ permalink raw reply	[flat|nested] 33+ messages in thread

* RE: [PATCH v3] Machine_Mode: Extend machine_mode from 8 to 16 bits
  2023-05-16  8:03         ` Xi Ruoyao
@ 2023-05-16  8:05           ` Li, Pan2
  0 siblings, 0 replies; 33+ messages in thread
From: Li, Pan2 @ 2023-05-16  8:05 UTC (permalink / raw)
  To: Xi Ruoyao, Richard Sandiford
  Cc: gcc-patches, juzhe.zhong, kito.cheng, Wang, Yanzhang,
	jeffreyalaw, rguenther

Got it. Thank you!

Pan

-----Original Message-----
From: Xi Ruoyao <xry111@xry111.site> 
Sent: Tuesday, May 16, 2023 4:04 PM
To: Li, Pan2 <pan2.li@intel.com>; Richard Sandiford <richard.sandiford@arm.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@sifive.com; Wang, Yanzhang <yanzhang.wang@intel.com>; jeffreyalaw@gmail.com; rguenther@suse.de
Subject: Re: [PATCH v3] Machine_Mode: Extend machine_mode from 8 to 16 bits

On Tue, 2023-05-16 at 07:55 +0000, Li, Pan2 via Gcc-patches wrote:
> I see, thanks Richard for reminding. I am sorry I failed to locate 
> anywhere(doc or something else) mentioned such convention about ping,

https://gcc.gnu.org/contribute.html suggests two week.

> will follow the below convention in future.
> 
> Pan
> 
> -----Original Message-----
> From: Richard Sandiford <richard.sandiford@arm.com>
> Sent: Tuesday, May 16, 2023 3:30 PM
> To: Li, Pan2 <pan2.li@intel.com>
> Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; 
> kito.cheng@sifive.com; Wang, Yanzhang <yanzhang.wang@intel.com>; 
> jeffreyalaw@gmail.com; rguenther@suse.de
> Subject: Re: [PATCH v3] Machine_Mode: Extend machine_mode from 8 to 16 
> bits
> 
> "Li, Pan2" <pan2.li@intel.com> writes:
> > Kindly ping for this PATCH v3.
> 
> The patch was sent on Saturday, so this is effectively pinging after 
> one working day in most of Europe and America.  That's too soon and 
> comes across as aggressive.
> 
> I realise you and others are working intensively on this.  But in a 
> sense that's part of the reason why reviews might seem slow.  The 
> volume of RVV patches recently has been pretty high, so it's been 
> difficult to keep up.  There are have also been many other non-RVV 
> patches that have been "unlocked" by stage 1 opening, so there's a 
> high volume from that as well.
> 
> Also, please bear in mind that most people active in the GCC community 
> have their own work to do and can only a dedicate a certain amount of 
> the day to reviews.  And reviewing patches can be time-consuming in 
> itsself.
> 
> So sometimes a patch will get a review within the day.  Sometimes it 
> will take a bit longer.  The fact that a patch doesn't get a response 
> within one working day doesn't mean that it's been forgotten.
> 
> Thanks,
> Richard

--
Xi Ruoyao <xry111@xry111.site>
School of Aerospace Science and Technology, Xidian University

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH v3] Machine_Mode: Extend machine_mode from 8 to 16 bits
  2023-05-13 13:13 ` [PATCH v3] " pan2.li
  2023-05-16  1:12   ` Li, Pan2
@ 2023-05-16  9:09   ` Richard Sandiford
  2023-05-16 12:17     ` Li, Pan2
  1 sibling, 1 reply; 33+ messages in thread
From: Richard Sandiford @ 2023-05-16  9:09 UTC (permalink / raw)
  To: pan2.li
  Cc: gcc-patches, juzhe.zhong, kito.cheng, yanzhang.wang, jeffreyalaw,
	rguenther

pan2.li@intel.com writes:
> diff --git a/gcc/rtl-ssa/accesses.h b/gcc/rtl-ssa/accesses.h
> index c5180b9308a..38b4d6160c2 100644
> --- a/gcc/rtl-ssa/accesses.h
> +++ b/gcc/rtl-ssa/accesses.h
> @@ -254,7 +254,7 @@ private:
>    unsigned int m_spare : 2;
>  
>    // The value returned by the accessor above.
> -  machine_mode m_mode : 8;
> +  machine_mode m_mode : MACHINE_MODE_BITSIZE;
>  };
>  
>  // A contiguous array of access_info pointers.  Used to represent a

This structure (access_info) isn't mentioned in the table in the patch
description.  The structure is currently 1 LP64 word and is very
size-sensitive.  I think we should:

- Put the mode after m_regno
- Reduce m_kind to 2 bits
- Remove m_spare

I *think* that will keep the current size, but please check.

LGTM otherwise.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 33+ messages in thread

* RE: [PATCH v3] Machine_Mode: Extend machine_mode from 8 to 16 bits
  2023-05-16  9:09   ` Richard Sandiford
@ 2023-05-16 12:17     ` Li, Pan2
  2023-05-16 15:39       ` Li, Pan2
  0 siblings, 1 reply; 33+ messages in thread
From: Li, Pan2 @ 2023-05-16 12:17 UTC (permalink / raw)
  To: Richard Sandiford
  Cc: gcc-patches, juzhe.zhong, kito.cheng, Wang, Yanzhang,
	jeffreyalaw, rguenther

Thanks Richard Sandiford for review.

Yes, currently the class access_info will be extended from 8 bytes to 12 bytes, which is missed in the table. With the adjustment as you suggested it will be 8 bytes but unfortunately the change of m_kind may trigger some ICE in some test case(s).

I will take a look into it and keep you posted.

Pan

-----Original Message-----
From: Richard Sandiford <richard.sandiford@arm.com> 
Sent: Tuesday, May 16, 2023 5:09 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@sifive.com; Wang, Yanzhang <yanzhang.wang@intel.com>; jeffreyalaw@gmail.com; rguenther@suse.de
Subject: Re: [PATCH v3] Machine_Mode: Extend machine_mode from 8 to 16 bits

pan2.li@intel.com writes:
> diff --git a/gcc/rtl-ssa/accesses.h b/gcc/rtl-ssa/accesses.h index 
> c5180b9308a..38b4d6160c2 100644
> --- a/gcc/rtl-ssa/accesses.h
> +++ b/gcc/rtl-ssa/accesses.h
> @@ -254,7 +254,7 @@ private:
>    unsigned int m_spare : 2;
>  
>    // The value returned by the accessor above.
> -  machine_mode m_mode : 8;
> +  machine_mode m_mode : MACHINE_MODE_BITSIZE;
>  };
>  
>  // A contiguous array of access_info pointers.  Used to represent a

This structure (access_info) isn't mentioned in the table in the patch description.  The structure is currently 1 LP64 word and is very size-sensitive.  I think we should:

- Put the mode after m_regno
- Reduce m_kind to 2 bits
- Remove m_spare

I *think* that will keep the current size, but please check.

LGTM otherwise.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 33+ messages in thread

* [PATCH] Machine_Mode: Extend machine_mode from 8 to 16 bits
  2023-05-12  5:00 [PATCH] Machine_Mode: Extend machine_mode from 8 to 16 bits pan2.li
  2023-05-12  6:49 ` Richard Biener
  2023-05-12  8:24 ` [PATCH] Machine_Mode: Extend machine_mode from 8 to 16 bits Richard Sandiford
@ 2023-05-16 15:35 ` pan2.li
  2023-05-18  8:57   ` Richard Sandiford
  2 siblings, 1 reply; 33+ messages in thread
From: pan2.li @ 2023-05-16 15:35 UTC (permalink / raw)
  To: gcc-patches
  Cc: juzhe.zhong, kito.cheng, pan2.li, yanzhang.wang, jeffreyalaw,
	rguenther, richard.sandiford

From: Pan Li <pan2.li@intel.com>

We are running out of the machine_mode(8 bits) in RISC-V backend. Thus
we would like to extend the machine_mode bit size from 8 to 16 bits.
However, it is sensitive to extend the memory size in common structure
like tree or rtx. This patch would like to extend the machine_mode bits
to 16 bits by shrinking, like:

* Swap the bit size of code and machine code in rtx_def.
* Adjust the machine_mode location and spare in tree.

The memory impact of this patch for correlated structure looks like below:

+-------------------+----------+---------+------+
| struct/bytes      | upstream | patched | diff |
+-------------------+----------+---------+------+
| rtx_obj_reference |        8 |      12 |   +4 |
| ext_modified      |        2 |       4 |   +2 |
| ira_allocno       |      192 |     184 |   -8 |
| qty_table_elem    |       40 |      40 |    0 |
| reg_stat_type     |       64 |      64 |    0 |
| rtx_def           |       40 |      40 |    0 |
| table_elt         |       80 |      80 |    0 |
| tree_decl_common  |      112 |     112 |    0 |
| tree_type_common  |      128 |     128 |    0 |
| access_info       |        8 |       8 |    0 |
+-------------------+----------+---------+------+

The tree and rtx related struct has no memory changes after this patch,
and the machine_mode changes to 16 bits already.

Signed-off-by: Pan Li <pan2.li@intel.com>
Co-authored-by: Ju-Zhe Zhong <juzhe.zhong@rivai.ai>
Co-authored-by: Kito Cheng <kito.cheng@sifive.com>
Co-Authored-By: Richard Biener <rguenther@suse.de>
Co-Authored-By: Richard Sandiford <richard.sandiford@arm.com>

gcc/ChangeLog:

	* combine.cc (struct reg_stat_type): Extend machine_mode to 16 bits.
	* cse.cc (struct qty_table_elem): Extend machine_mode to 16 bits
	(struct table_elt): Extend machine_mode to 16 bits.
	(struct set): Ditto.
	* genmodes.cc (emit_mode_wider): Extend type from char to short.
	(emit_mode_complex): Ditto.
	(emit_mode_inner): Ditto.
	(emit_class_narrowest_mode): Ditto.
	* genopinit.cc (main): Extend the machine_mode limit.
	* ira-int.h (struct ira_allocno): Extend machine_mode to 16 bits and
	re-ordered the struct fields for padding.
	* machmode.h (MACHINE_MODE_BITSIZE): New macro.
	(GET_MODE_2XWIDER_MODE): Extend type from char to short.
	(get_mode_alignment): Extend type from char to short.
	* ree.cc (struct ext_modified): Extend machine_mode to 16 bits and
	removed the ATTRIBUTE_PACKED.
	* rtl-ssa/accesses.h: Extend machine_mode to 16 bits, narrow
	m_kind to 2 bits and remove m_spare.
	* rtl.h (RTX_CODE_BITSIZE): New macro.
	(struct rtx_def): Swap both the bit size and location between the
	rtx_code and the machine_mode.
	(subreg_shape::unique_id): Extend the machine_mode limit.
	* rtlanal.h: Extend machine_mode to 16 bits.
	* tree-core.h (struct tree_type_common): Extend machine_mode to 16
	bits and re-ordered the struct fields for padding.
	(struct tree_decl_common): Extend machine_mode to 16 bits.
	* internals.inl (rtl_ssa::access_info): Adjust the assignment.
---
 gcc/combine.cc            |  4 +--
 gcc/cse.cc                | 16 ++++-------
 gcc/genmodes.cc           | 16 +++++------
 gcc/genopinit.cc          |  3 ++-
 gcc/ira-int.h             | 56 +++++++++++++++++++--------------------
 gcc/machmode.h            | 27 ++++++++++++++-----
 gcc/ree.cc                |  4 +--
 gcc/rtl-ssa/accesses.h    | 12 ++++-----
 gcc/rtl-ssa/internals.inl |  5 ++--
 gcc/rtl.h                 | 12 +++++----
 gcc/rtlanal.h             |  2 +-
 gcc/tree-core.h           |  9 ++++---
 12 files changed, 88 insertions(+), 78 deletions(-)

diff --git a/gcc/combine.cc b/gcc/combine.cc
index 5aa0ec5c45a..a23caeed96f 100644
--- a/gcc/combine.cc
+++ b/gcc/combine.cc
@@ -200,7 +200,7 @@ struct reg_stat_type {
 
   unsigned HOST_WIDE_INT	last_set_nonzero_bits;
   char				last_set_sign_bit_copies;
-  ENUM_BITFIELD(machine_mode)	last_set_mode : 8;
+  ENUM_BITFIELD(machine_mode)	last_set_mode : MACHINE_MODE_BITSIZE;
 
   /* Set nonzero if references to register n in expressions should not be
      used.  last_set_invalid is set nonzero when this register is being
@@ -235,7 +235,7 @@ struct reg_stat_type {
      truncation if we know that value already contains a truncated
      value.  */
 
-  ENUM_BITFIELD(machine_mode)	truncated_to_mode : 8;
+  ENUM_BITFIELD(machine_mode)	truncated_to_mode : MACHINE_MODE_BITSIZE;
 };
 
 
diff --git a/gcc/cse.cc b/gcc/cse.cc
index b10c9b0c94d..86403b95938 100644
--- a/gcc/cse.cc
+++ b/gcc/cse.cc
@@ -248,10 +248,8 @@ struct qty_table_elem
   rtx comparison_const;
   int comparison_qty;
   unsigned int first_reg, last_reg;
-  /* The sizes of these fields should match the sizes of the
-     code and mode fields of struct rtx_def (see rtl.h).  */
-  ENUM_BITFIELD(rtx_code) comparison_code : 16;
-  ENUM_BITFIELD(machine_mode) mode : 8;
+  ENUM_BITFIELD(machine_mode) mode : MACHINE_MODE_BITSIZE;
+  ENUM_BITFIELD(rtx_code) comparison_code : RTX_CODE_BITSIZE;
 };
 
 /* The table of all qtys, indexed by qty number.  */
@@ -404,9 +402,7 @@ struct table_elt
   struct table_elt *related_value;
   int cost;
   int regcost;
-  /* The size of this field should match the size
-     of the mode field of struct rtx_def (see rtl.h).  */
-  ENUM_BITFIELD(machine_mode) mode : 8;
+  ENUM_BITFIELD(machine_mode) mode : MACHINE_MODE_BITSIZE;
   char in_memory;
   char is_const;
   char flag;
@@ -4152,10 +4148,8 @@ struct set
   /* Nonzero if the SET_SRC contains something
      whose value cannot be predicted and understood.  */
   char src_volatile;
-  /* Original machine mode, in case it becomes a CONST_INT.
-     The size of this field should match the size of the mode
-     field of struct rtx_def (see rtl.h).  */
-  ENUM_BITFIELD(machine_mode) mode : 8;
+  /* Original machine mode, in case it becomes a CONST_INT.  */
+  ENUM_BITFIELD(machine_mode) mode : MACHINE_MODE_BITSIZE;
   /* Hash value of constant equivalent for SET_SRC.  */
   unsigned src_const_hash;
   /* A constant equivalent for SET_SRC, if any.  */
diff --git a/gcc/genmodes.cc b/gcc/genmodes.cc
index 715787b8f48..55ac2adb559 100644
--- a/gcc/genmodes.cc
+++ b/gcc/genmodes.cc
@@ -1141,10 +1141,10 @@ inline __attribute__((__always_inline__))\n\
 #else\n\
 extern __inline__ __attribute__((__always_inline__, __gnu_inline__))\n\
 #endif\n\
-unsigned char\n\
+unsigned short\n\
 mode_inner_inline (machine_mode mode)\n\
 {\n\
-  extern const unsigned char mode_inner[NUM_MACHINE_MODES];\n\
+  extern const unsigned short mode_inner[NUM_MACHINE_MODES];\n\
   gcc_assert (mode >= 0 && mode < NUM_MACHINE_MODES);\n\
   switch (mode)\n\
     {");
@@ -1529,7 +1529,7 @@ emit_mode_wider (void)
   int c;
   struct mode_data *m;
 
-  print_decl ("unsigned char", "mode_next", "NUM_MACHINE_MODES");
+  print_decl ("unsigned short", "mode_next", "NUM_MACHINE_MODES");
 
   for_all_modes (c, m)
     tagged_printf ("E_%smode",
@@ -1537,7 +1537,7 @@ emit_mode_wider (void)
 		   m->name);
 
   print_closer ();
-  print_decl ("unsigned char", "mode_wider", "NUM_MACHINE_MODES");
+  print_decl ("unsigned short", "mode_wider", "NUM_MACHINE_MODES");
 
   for_all_modes (c, m)
     {
@@ -1568,7 +1568,7 @@ emit_mode_wider (void)
     }
 
   print_closer ();
-  print_decl ("unsigned char", "mode_2xwider", "NUM_MACHINE_MODES");
+  print_decl ("unsigned short", "mode_2xwider", "NUM_MACHINE_MODES");
 
   for_all_modes (c, m)
     {
@@ -1625,7 +1625,7 @@ emit_mode_complex (void)
   int c;
   struct mode_data *m;
 
-  print_decl ("unsigned char", "mode_complex", "NUM_MACHINE_MODES");
+  print_decl ("unsigned short", "mode_complex", "NUM_MACHINE_MODES");
 
   for_all_modes (c, m)
     tagged_printf ("E_%smode",
@@ -1665,7 +1665,7 @@ emit_mode_inner (void)
   int c;
   struct mode_data *m;
 
-  print_decl ("unsigned char", "mode_inner", "NUM_MACHINE_MODES");
+  print_decl ("unsigned short", "mode_inner", "NUM_MACHINE_MODES");
 
   for_all_modes (c, m)
     tagged_printf ("E_%smode",
@@ -1738,7 +1738,7 @@ emit_class_narrowest_mode (void)
 {
   int c;
 
-  print_decl ("unsigned char", "class_narrowest_mode", "MAX_MODE_CLASS");
+  print_decl ("unsigned short", "class_narrowest_mode", "MAX_MODE_CLASS");
 
   for (c = 0; c < MAX_MODE_CLASS; c++)
     {
diff --git a/gcc/genopinit.cc b/gcc/genopinit.cc
index 83cb7504fa1..0c1b6859ca0 100644
--- a/gcc/genopinit.cc
+++ b/gcc/genopinit.cc
@@ -182,7 +182,8 @@ main (int argc, const char **argv)
 
   progname = "genopinit";
 
-  if (NUM_OPTABS > 0xffff || MAX_MACHINE_MODE >= 0xff)
+  if (NUM_OPTABS > 0xffff
+    || MAX_MACHINE_MODE >= ((1 << MACHINE_MODE_BITSIZE) - 1))
     fatal ("genopinit range assumptions invalid");
 
   if (!init_rtx_reader_args_cb (argc, argv, handle_arg))
diff --git a/gcc/ira-int.h b/gcc/ira-int.h
index e2de47213b4..e7460cfd906 100644
--- a/gcc/ira-int.h
+++ b/gcc/ira-int.h
@@ -281,13 +281,20 @@ struct ira_allocno
   int regno;
   /* Mode of the allocno which is the mode of the corresponding
      pseudo-register.  */
-  ENUM_BITFIELD (machine_mode) mode : 8;
+  ENUM_BITFIELD (machine_mode) mode : MACHINE_MODE_BITSIZE;
   /* Widest mode of the allocno which in at least one case could be
      for paradoxical subregs where wmode > mode.  */
-  ENUM_BITFIELD (machine_mode) wmode : 8;
+  ENUM_BITFIELD (machine_mode) wmode : MACHINE_MODE_BITSIZE;
   /* Register class which should be used for allocation for given
      allocno.  NO_REGS means that we should use memory.  */
   ENUM_BITFIELD (reg_class) aclass : 16;
+  /* Hard register assigned to given allocno.  Negative value means
+     that memory was allocated to the allocno.  During the reload,
+     spilled allocno has value equal to the corresponding stack slot
+     number (0, ...) - 2.  Value -1 is used for allocnos spilled by the
+     reload (at this point pseudo-register has only one allocno) which
+     did not get stack slot yet.  */
+  signed int hard_regno : 16;
   /* A bitmask of the ABIs used by calls that occur while the allocno
      is live.  */
   unsigned int crossed_calls_abis : NUM_ABI_IDS;
@@ -321,22 +328,6 @@ struct ira_allocno
 
      This is only ever true for non-cap allocnos.  */
   unsigned int might_conflict_with_parent_p : 1;
-  /* Hard register assigned to given allocno.  Negative value means
-     that memory was allocated to the allocno.  During the reload,
-     spilled allocno has value equal to the corresponding stack slot
-     number (0, ...) - 2.  Value -1 is used for allocnos spilled by the
-     reload (at this point pseudo-register has only one allocno) which
-     did not get stack slot yet.  */
-  signed int hard_regno : 16;
-  /* Allocnos with the same regno are linked by the following member.
-     Allocnos corresponding to inner loops are first in the list (it
-     corresponds to depth-first traverse of the loops).  */
-  ira_allocno_t next_regno_allocno;
-  /* There may be different allocnos with the same regno in different
-     regions.  Allocnos are bound to the corresponding loop tree node.
-     Pseudo-register may have only one regular allocno with given loop
-     tree node but more than one cap (see comments above).  */
-  ira_loop_tree_node_t loop_tree_node;
   /* Accumulated usage references of the allocno.  Here and below,
      word 'accumulated' means info for given region and all nested
      subregions.  In this case, 'accumulated' means sum of references
@@ -362,6 +353,25 @@ struct ira_allocno
      register class living at the point than number of hard-registers
      of the class available for the allocation.  */
   int excess_pressure_points_num;
+  /* The number of objects tracked in the following array.  */
+  int num_objects;
+  /* Accumulated frequency of calls which given allocno
+     intersects.  */
+  int call_freq;
+  /* Accumulated number of the intersected calls.  */
+  int calls_crossed_num;
+  /* The number of calls across which it is live, but which should not
+     affect register preferences.  */
+  int cheap_calls_crossed_num;
+  /* Allocnos with the same regno are linked by the following member.
+     Allocnos corresponding to inner loops are first in the list (it
+     corresponds to depth-first traverse of the loops).  */
+  ira_allocno_t next_regno_allocno;
+  /* There may be different allocnos with the same regno in different
+     regions.  Allocnos are bound to the corresponding loop tree node.
+     Pseudo-register may have only one regular allocno with given loop
+     tree node but more than one cap (see comments above).  */
+  ira_loop_tree_node_t loop_tree_node;
   /* Allocno hard reg preferences.  */
   ira_pref_t allocno_prefs;
   /* Copies to other non-conflicting allocnos.  The copies can
@@ -374,21 +384,11 @@ struct ira_allocno
   /* It is a link to allocno (cap) on lower loop level represented by
      given cap.  Null if given allocno is not a cap.  */
   ira_allocno_t cap_member;
-  /* The number of objects tracked in the following array.  */
-  int num_objects;
   /* An array of structures describing conflict information and live
      ranges for each object associated with the allocno.  There may be
      more than one such object in cases where the allocno represents a
      multi-word register.  */
   ira_object_t objects[2];
-  /* Accumulated frequency of calls which given allocno
-     intersects.  */
-  int call_freq;
-  /* Accumulated number of the intersected calls.  */
-  int calls_crossed_num;
-  /* The number of calls across which it is live, but which should not
-     affect register preferences.  */
-  int cheap_calls_crossed_num;
   /* Registers clobbered by intersected calls.  */
    HARD_REG_SET crossed_calls_clobbered_regs;
   /* Array of usage costs (accumulated and the one updated during
diff --git a/gcc/machmode.h b/gcc/machmode.h
index f1865c1ef42..a22df60dc20 100644
--- a/gcc/machmode.h
+++ b/gcc/machmode.h
@@ -24,13 +24,13 @@ typedef opt_mode<machine_mode> opt_machine_mode;
 
 extern CONST_MODE_SIZE poly_uint16_pod mode_size[NUM_MACHINE_MODES];
 extern CONST_MODE_PRECISION poly_uint16_pod mode_precision[NUM_MACHINE_MODES];
-extern const unsigned char mode_inner[NUM_MACHINE_MODES];
+extern const unsigned short mode_inner[NUM_MACHINE_MODES];
 extern CONST_MODE_NUNITS poly_uint16_pod mode_nunits[NUM_MACHINE_MODES];
 extern CONST_MODE_UNIT_SIZE unsigned char mode_unit_size[NUM_MACHINE_MODES];
 extern const unsigned short mode_unit_precision[NUM_MACHINE_MODES];
-extern const unsigned char mode_next[NUM_MACHINE_MODES];
-extern const unsigned char mode_wider[NUM_MACHINE_MODES];
-extern const unsigned char mode_2xwider[NUM_MACHINE_MODES];
+extern const unsigned short mode_next[NUM_MACHINE_MODES];
+extern const unsigned short mode_wider[NUM_MACHINE_MODES];
+extern const unsigned short mode_2xwider[NUM_MACHINE_MODES];
 
 template<typename T>
 struct mode_traits
@@ -242,6 +242,21 @@ extern const unsigned char mode_class[NUM_MACHINE_MODES];
    || CLASS == MODE_ACCUM                      \
    || CLASS == MODE_UACCUM)
 
+/* The MACHINE_MODE_BITSIZE should be exactly aligned with the type of the
+   machine_mode array in the machmode.h and genmodes.cc.  For example as below.
+   +------------------------+-------+
+   | MACHINE_MODE_BITSIZE   |    16 |
+   +------------------------+-------+
+   | mode_inter[]           | short |
+   | mode_next[]            | short |
+   | mode_wider[]           | short |
+   | mode_2xwider[]         | short |
+   | mode_complex[]         | short |
+   | class_narrowest_mode[] | short |
+   +------------------------+-------+
+   */
+#define MACHINE_MODE_BITSIZE 16
+
 /* An optional T (i.e. a T or nothing), where T is some form of mode class.  */
 template<typename T>
 class opt_mode
@@ -797,7 +812,7 @@ GET_MODE_2XWIDER_MODE (const T &m)
 }
 
 /* Get the complex mode from the component mode.  */
-extern const unsigned char mode_complex[NUM_MACHINE_MODES];
+extern const unsigned short mode_complex[NUM_MACHINE_MODES];
 #define GET_MODE_COMPLEX_MODE(MODE) ((machine_mode) mode_complex[MODE])
 
 /* Represents a machine mode that must have a fixed size.  The main
@@ -946,7 +961,7 @@ extern unsigned get_mode_alignment (machine_mode);
 
 /* For each class, get the narrowest mode in that class.  */
 
-extern const unsigned char class_narrowest_mode[MAX_MODE_CLASS];
+extern const unsigned short class_narrowest_mode[MAX_MODE_CLASS];
 #define GET_CLASS_NARROWEST_MODE(CLASS) \
   ((machine_mode) class_narrowest_mode[CLASS])
 
diff --git a/gcc/ree.cc b/gcc/ree.cc
index 413aec7c8eb..fc04249fa84 100644
--- a/gcc/ree.cc
+++ b/gcc/ree.cc
@@ -564,10 +564,10 @@ enum ext_modified_kind
   EXT_MODIFIED_SEXT
 };
 
-struct ATTRIBUTE_PACKED ext_modified
+struct ext_modified
 {
   /* Mode from which ree has zero or sign extended the destination.  */
-  ENUM_BITFIELD(machine_mode) mode : 8;
+  ENUM_BITFIELD(machine_mode) mode : MACHINE_MODE_BITSIZE;
 
   /* Kind of modification of the insn.  */
   ENUM_BITFIELD(ext_modified_kind) kind : 2;
diff --git a/gcc/rtl-ssa/accesses.h b/gcc/rtl-ssa/accesses.h
index c5180b9308a..c2103a5cb5c 100644
--- a/gcc/rtl-ssa/accesses.h
+++ b/gcc/rtl-ssa/accesses.h
@@ -215,7 +215,11 @@ private:
 
   // The values returned by the accessors above.
   unsigned int m_regno;
-  access_kind m_kind : 8;
+
+  // The value returned by the accessor above.
+  machine_mode m_mode : MACHINE_MODE_BITSIZE;
+
+  access_kind m_kind : 2;
 
 protected:
   // The value returned by the accessors above.
@@ -249,12 +253,6 @@ private:
   // Indicates that this access has been allocated on the function_info's
   // temporary obstack and so is not (yet) part of the proper SSA form.
   unsigned int m_is_temp : 1;
-
-  // Bits for future expansion.
-  unsigned int m_spare : 2;
-
-  // The value returned by the accessor above.
-  machine_mode m_mode : 8;
 };
 
 // A contiguous array of access_info pointers.  Used to represent a
diff --git a/gcc/rtl-ssa/internals.inl b/gcc/rtl-ssa/internals.inl
index 16e92e8308b..0a61811289d 100644
--- a/gcc/rtl-ssa/internals.inl
+++ b/gcc/rtl-ssa/internals.inl
@@ -22,6 +22,7 @@ namespace rtl_ssa {
 // Construct a new access with the given resource () and kind () values.
 inline access_info::access_info (resource_info resource, access_kind kind)
   : m_regno (resource.regno),
+    m_mode (resource.mode),
     m_kind (kind),
     m_is_artificial (false),
     m_is_set_with_nondebug_insn_uses (false),
@@ -36,9 +37,7 @@ inline access_info::access_info (resource_info resource, access_kind kind)
     m_is_last_nondebug_insn_use (false),
     m_is_in_debug_insn_or_phi (false),
     m_has_been_superceded (false),
-    m_is_temp (false),
-    m_spare (0),
-    m_mode (resource.mode)
+    m_is_temp (false)
 {
 }
 
diff --git a/gcc/rtl.h b/gcc/rtl.h
index f634cab730b..364782b6060 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -63,6 +63,8 @@ enum rtx_code  {
 # define NON_GENERATOR_NUM_RTX_CODE ((int) MATCH_OPERAND)
 #endif
 
+#define RTX_CODE_BITSIZE 8
+
 /* Register Transfer Language EXPRESSIONS CODE CLASSES */
 
 enum rtx_class  {
@@ -309,11 +311,11 @@ struct GTY((variable_size)) const_poly_int_def {
 struct GTY((desc("0"), tag("0"),
 	    chain_next ("RTX_NEXT (&%h)"),
 	    chain_prev ("RTX_PREV (&%h)"))) rtx_def {
-  /* The kind of expression this is.  */
-  ENUM_BITFIELD(rtx_code) code: 16;
-
   /* The kind of value the expression has.  */
-  ENUM_BITFIELD(machine_mode) mode : 8;
+  ENUM_BITFIELD(machine_mode) mode : MACHINE_MODE_BITSIZE;
+
+  /* The kind of expression this is.  */
+  ENUM_BITFIELD(rtx_code) code: RTX_CODE_BITSIZE;
 
   /* 1 in a MEM if we should keep the alias set for this mem unchanged
      when we access a component.
@@ -2164,7 +2166,7 @@ subreg_shape::operator != (const subreg_shape &other) const
 inline unsigned HOST_WIDE_INT
 subreg_shape::unique_id () const
 {
-  { STATIC_ASSERT (MAX_MACHINE_MODE <= 256); }
+  { STATIC_ASSERT (MAX_MACHINE_MODE <= (1 << MACHINE_MODE_BITSIZE)); }
   { STATIC_ASSERT (NUM_POLY_INT_COEFFS <= 3); }
   { STATIC_ASSERT (sizeof (offset.coeffs[0]) <= 2); }
   int res = (int) inner_mode + ((int) outer_mode << 8);
diff --git a/gcc/rtlanal.h b/gcc/rtlanal.h
index 5fbed816e20..9013e75c04b 100644
--- a/gcc/rtlanal.h
+++ b/gcc/rtlanal.h
@@ -100,7 +100,7 @@ public:
 
   /* The mode of the reference.  If IS_MULTIREG, this is the mode of
      REGNO - MULTIREG_OFFSET.  */
-  machine_mode mode : 8;
+  machine_mode mode : MACHINE_MODE_BITSIZE;
 
   /* If IS_MULTIREG, the offset of REGNO from the start of the register.  */
   unsigned int multireg_offset : 8;
diff --git a/gcc/tree-core.h b/gcc/tree-core.h
index a1aea136e75..9d44c04bf03 100644
--- a/gcc/tree-core.h
+++ b/gcc/tree-core.h
@@ -1680,8 +1680,9 @@ struct GTY(()) tree_type_common {
   tree attributes;
   unsigned int uid;
 
+  ENUM_BITFIELD(machine_mode) mode : MACHINE_MODE_BITSIZE;
+
   unsigned int precision : 16;
-  ENUM_BITFIELD(machine_mode) mode : 8;
   unsigned lang_flag_0 : 1;
   unsigned lang_flag_1 : 1;
   unsigned lang_flag_2 : 1;
@@ -1712,7 +1713,7 @@ struct GTY(()) tree_type_common {
   unsigned empty_flag : 1;
   unsigned indivisible_p : 1;
   unsigned no_named_args_stdarg_p : 1;
-  unsigned spare : 9;
+  unsigned spare : 1;
 
   alias_set_type alias_set;
   tree pointer_to;
@@ -1770,7 +1771,7 @@ struct GTY(()) tree_decl_common {
   struct tree_decl_minimal common;
   tree size;
 
-  ENUM_BITFIELD(machine_mode) mode : 8;
+  ENUM_BITFIELD(machine_mode) mode : MACHINE_MODE_BITSIZE;
 
   unsigned nonlocal_flag : 1;
   unsigned virtual_flag : 1;
@@ -1828,7 +1829,7 @@ struct GTY(()) tree_decl_common {
   /* In FIELD_DECL, this is DECL_NOT_FLEXARRAY.  */
   unsigned int decl_not_flexarray : 1;
 
-  /* 13 bits unused.  */
+  /* 5 bits unused.  */
 
   /* UID for points-to sets, stable over copying from inlining.  */
   unsigned int pt_uid;
-- 
2.34.1


^ permalink raw reply	[flat|nested] 33+ messages in thread

* RE: [PATCH v3] Machine_Mode: Extend machine_mode from 8 to 16 bits
  2023-05-16 12:17     ` Li, Pan2
@ 2023-05-16 15:39       ` Li, Pan2
  0 siblings, 0 replies; 33+ messages in thread
From: Li, Pan2 @ 2023-05-16 15:39 UTC (permalink / raw)
  To: Li, Pan2, Richard Sandiford
  Cc: gcc-patches, juzhe.zhong, kito.cheng, Wang, Yanzhang,
	jeffreyalaw, rguenther

Update the PATCH v4 (I am sorry, missed the v4 in subject) as below with x86 bootstrap test passed.

https://gcc.gnu.org/pipermail/gcc-patches/2023-May/618742.html

Pan

-----Original Message-----
From: Gcc-patches <gcc-patches-bounces+pan2.li=intel.com@gcc.gnu.org> On Behalf Of Li, Pan2 via Gcc-patches
Sent: Tuesday, May 16, 2023 8:17 PM
To: Richard Sandiford <richard.sandiford@arm.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@sifive.com; Wang, Yanzhang <yanzhang.wang@intel.com>; jeffreyalaw@gmail.com; rguenther@suse.de
Subject: RE: [PATCH v3] Machine_Mode: Extend machine_mode from 8 to 16 bits

Thanks Richard Sandiford for review.

Yes, currently the class access_info will be extended from 8 bytes to 12 bytes, which is missed in the table. With the adjustment as you suggested it will be 8 bytes but unfortunately the change of m_kind may trigger some ICE in some test case(s).

I will take a look into it and keep you posted.

Pan

-----Original Message-----
From: Richard Sandiford <richard.sandiford@arm.com> 
Sent: Tuesday, May 16, 2023 5:09 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@sifive.com; Wang, Yanzhang <yanzhang.wang@intel.com>; jeffreyalaw@gmail.com; rguenther@suse.de
Subject: Re: [PATCH v3] Machine_Mode: Extend machine_mode from 8 to 16 bits

pan2.li@intel.com writes:
> diff --git a/gcc/rtl-ssa/accesses.h b/gcc/rtl-ssa/accesses.h index 
> c5180b9308a..38b4d6160c2 100644
> --- a/gcc/rtl-ssa/accesses.h
> +++ b/gcc/rtl-ssa/accesses.h
> @@ -254,7 +254,7 @@ private:
>    unsigned int m_spare : 2;
>  
>    // The value returned by the accessor above.
> -  machine_mode m_mode : 8;
> +  machine_mode m_mode : MACHINE_MODE_BITSIZE;
>  };
>  
>  // A contiguous array of access_info pointers.  Used to represent a

This structure (access_info) isn't mentioned in the table in the patch description.  The structure is currently 1 LP64 word and is very size-sensitive.  I think we should:

- Put the mode after m_regno
- Reduce m_kind to 2 bits
- Remove m_spare

I *think* that will keep the current size, but please check.

LGTM otherwise.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: [PATCH] Machine_Mode: Extend machine_mode from 8 to 16 bits
  2023-05-16 15:35 ` pan2.li
@ 2023-05-18  8:57   ` Richard Sandiford
  2023-05-18  9:17     ` Li, Pan2
  0 siblings, 1 reply; 33+ messages in thread
From: Richard Sandiford @ 2023-05-18  8:57 UTC (permalink / raw)
  To: pan2.li
  Cc: gcc-patches, juzhe.zhong, kito.cheng, yanzhang.wang, jeffreyalaw,
	rguenther

pan2.li@intel.com writes:
> diff --git a/gcc/rtl-ssa/accesses.h b/gcc/rtl-ssa/accesses.h
> index c5180b9308a..c2103a5cb5c 100644
> --- a/gcc/rtl-ssa/accesses.h
> +++ b/gcc/rtl-ssa/accesses.h
> @@ -215,7 +215,11 @@ private:
>  
>    // The values returned by the accessors above.
>    unsigned int m_regno;
> -  access_kind m_kind : 8;
> +
> +  // The value returned by the accessor above.
> +  machine_mode m_mode : MACHINE_MODE_BITSIZE;
> +
> +  access_kind m_kind : 2;

There's no need to repeat the comment.  Just:

  // The values returned by the accessors above.
  unsigned int m_regno;
  machine_mode m_mode : MACHINE_MODE_BITSIZE;
  access_kind m_kind : 2;

would be enough.

OK with that change, thanks.  (There's no need to post the updated patch.)

Richard

^ permalink raw reply	[flat|nested] 33+ messages in thread

* RE: [PATCH] Machine_Mode: Extend machine_mode from 8 to 16 bits
  2023-05-18  8:57   ` Richard Sandiford
@ 2023-05-18  9:17     ` Li, Pan2
  0 siblings, 0 replies; 33+ messages in thread
From: Li, Pan2 @ 2023-05-18  9:17 UTC (permalink / raw)
  To: Richard Sandiford
  Cc: gcc-patches, juzhe.zhong, kito.cheng, Wang, Yanzhang,
	jeffreyalaw, rguenther

Committed with that change, thanks Richard Sandiford.

Pan

-----Original Message-----
From: Richard Sandiford <richard.sandiford@arm.com> 
Sent: Thursday, May 18, 2023 4:57 PM
To: Li, Pan2 <pan2.li@intel.com>
Cc: gcc-patches@gcc.gnu.org; juzhe.zhong@rivai.ai; kito.cheng@sifive.com; Wang, Yanzhang <yanzhang.wang@intel.com>; jeffreyalaw@gmail.com; rguenther@suse.de
Subject: Re: [PATCH] Machine_Mode: Extend machine_mode from 8 to 16 bits

pan2.li@intel.com writes:
> diff --git a/gcc/rtl-ssa/accesses.h b/gcc/rtl-ssa/accesses.h index 
> c5180b9308a..c2103a5cb5c 100644
> --- a/gcc/rtl-ssa/accesses.h
> +++ b/gcc/rtl-ssa/accesses.h
> @@ -215,7 +215,11 @@ private:
>  
>    // The values returned by the accessors above.
>    unsigned int m_regno;
> -  access_kind m_kind : 8;
> +
> +  // The value returned by the accessor above.
> +  machine_mode m_mode : MACHINE_MODE_BITSIZE;
> +
> +  access_kind m_kind : 2;

There's no need to repeat the comment.  Just:

  // The values returned by the accessors above.
  unsigned int m_regno;
  machine_mode m_mode : MACHINE_MODE_BITSIZE;
  access_kind m_kind : 2;

would be enough.

OK with that change, thanks.  (There's no need to post the updated patch.)

Richard

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Adjust LTO mode tables for "Machine_Mode: Extend machine_mode from 8 to 16 bits" (was: [PATCH] Machine_Mode: Extend machine_mode from 8 to 16 bits)
  2023-05-13  8:44     ` Kito Cheng
  2023-05-13 12:26       ` Li, Pan2
@ 2023-06-30 11:46       ` Thomas Schwinge
  2023-06-30 12:45         ` Kito Cheng
       [not found]         ` <MW5PR11MB590876BB7E52B78E837C95C9A913A@MW5PR11MB5908.namprd11.prod.outlook.com>
  1 sibling, 2 replies; 33+ messages in thread
From: Thomas Schwinge @ 2023-06-30 11:46 UTC (permalink / raw)
  To: Richard Biener, gcc-patches, pan2.li, Bernhard Reutner-Fischer,
	Jakub Jelinek
  Cc: Kito Cheng, juzhe.zhong, yanzhang.wang, jeffreyalaw, richard.sandiford

[-- Attachment #1: Type: text/plain, Size: 2577 bytes --]

Hi!

On 2023-05-13T16:44:41+0800, Kito Cheng via Gcc-patches <gcc-patches@gcc.gnu.org> wrote:
> Tried this patch and I ran into some issues, some variables are using
> unsigned char to hold machine mode and will have problems when the
> number of modes is larger than 255...
>
> And here is the fix:

> --- a/gcc/genmodes.cc
> +++ b/gcc/genmodes.cc
> @@ -1141,10 +1141,10 @@ inline __attribute__((__always_inline__))\n\
> #else\n\
> extern __inline__ __attribute__((__always_inline__, __gnu_inline__))\n\
> #endif\n\
> -unsigned char\n\
> +unsigned short\n\
> mode_inner_inline (machine_mode mode)\n\
> {\n\
> -  extern const unsigned char mode_inner[NUM_MACHINE_MODES];\n\
> +  extern const unsigned short mode_inner[NUM_MACHINE_MODES];\n\
>   gcc_assert (mode >= 0 && mode < NUM_MACHINE_MODES);\n\
>   switch (mode)\n\
>     {");
> @@ -1529,7 +1529,7 @@ emit_mode_wider (void)
>   int c;
>   struct mode_data *m;
>
> -  print_decl ("unsigned char", "mode_next", "NUM_MACHINE_MODES");
> +  print_decl ("unsigned short", "mode_next", "NUM_MACHINE_MODES");

Etc.

Instead of 's%char%short', shouldn't we really be using
'enum machine_mode' here?  (I understand such a change may require some
further surgery, but wouldn't it be the correct thing to do?)


And, in context of working on
<https://inbox.sourceware.org/87mt0hcp12.fsf@euler.schwinge.homeip.net>
"Streamer: Fix out of range memory access of machine mode", I found
another one, see attached
'[WIP] Adjust LTO mode tables for "Machine_Mode: Extend machine_mode from 8 to 16 bits"'
(..., which applies on top of the former.)  There, in fact, I did change
to 'enum machine_mode' instead of 's%char%short' -- correct?  Any
comments on the 'GTY' issues: (1) 'const' build error,
(2) '[build-gcc]/gcc/gtype-desc.cc' changes, and (3) is 'GTY((atomic))'
actually the right thing to use, here?

In particular, the 'lto_mode_identity_table' changes would seem necessary
to keep standard LTO ('-flto') functional for large 'machine_mode' size?


Bernhard: Fancy writing a Coccinelle check whether there are any more
places where we put what originally was a 'machine_mode' type into a
'char' (or, into a non-'machine_mode' generally)?  ;-) Hey, just a Friday
afternoon idea!


Grüße
 Thomas


-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-WIP-Adjust-LTO-mode-tables-for-Machine_Mode-Extend-m.patch --]
[-- Type: text/x-diff, Size: 6119 bytes --]

From 0fd8f65bb87b11ef8ae366a797aec572d67b284f Mon Sep 17 00:00:00 2001
From: Thomas Schwinge <thomas@codesourcery.com>
Date: Fri, 30 Jun 2023 13:23:55 +0200
Subject: [PATCH] [WIP] Adjust LTO mode tables for "Machine_Mode: Extend
 machine_mode from 8 to 16 bits"

---
 gcc/lto-streamer-in.cc |  2 +-
 gcc/lto-streamer.h     | 56 +++++++++++++++++++++++++++++++++++++++++-
 gcc/lto/lto-common.cc  | 10 ++++----
 gcc/lto/lto-common.h   |  2 +-
 gcc/tree-streamer.h    |  2 +-
 5 files changed, 63 insertions(+), 9 deletions(-)

diff --git a/gcc/lto-streamer-in.cc b/gcc/lto-streamer-in.cc
index 1876e1967ec..bbd44504ff8 100644
--- a/gcc/lto-streamer-in.cc
+++ b/gcc/lto-streamer-in.cc
@@ -1997,7 +1997,7 @@ lto_input_mode_table (struct lto_file_decl_data *file_data)
   bitpack_d bp = streamer_read_bitpack (&ib);
 
   unsigned mode_bits = bp_unpack_value (&bp, 5);
-  unsigned char *table = ggc_cleared_vec_alloc<unsigned char> (1 << mode_bits);
+  machine_mode *table = ggc_cleared_vec_alloc<machine_mode> (1 << mode_bits);
 
   file_data->mode_table = table;
   file_data->mode_bits = mode_bits;
diff --git a/gcc/lto-streamer.h b/gcc/lto-streamer.h
index 0556b34c837..4d83741e4c6 100644
--- a/gcc/lto-streamer.h
+++ b/gcc/lto-streamer.h
@@ -596,7 +596,61 @@ struct GTY(()) lto_file_decl_data
   hash_map<tree, ld_plugin_symbol_resolution> * GTY((skip)) resolution_map;
 
   /* Mode translation table.  */
-  const unsigned char *mode_table;
+  /*TODO const
+With 'const', we get:
+
+    gtype-desc.cc: In function 'void gt_pch_nx_lto_file_decl_data(void*)':
+    gtype-desc.cc:6531:34: error: invalid conversion from 'const void*' to 'void*' [-fpermissive]
+             gt_pch_note_object ((*x).mode_table, x, gt_pch_p_18lto_file_decl_data);
+                                      ^
+    In file included from [...]/source-gcc/gcc/hash-table.h:247:0,
+                     from [...]/source-gcc/gcc/coretypes.h:486,
+                     from gtype-desc.cc:23:
+    [...]/source-gcc/gcc/ggc.h:47:12: note:   initializing argument 1 of 'int gt_pch_note_object(void*, void*, gt_note_pointers, size_t)'
+     extern int gt_pch_note_object (void *, void *, gt_note_pointers,
+                ^
+    make[2]: *** [Makefile:1180: gtype-desc.o] Error 1
+   */
+  machine_mode * GTY((atomic)) mode_table;
+  /*
+This (without 'const') changes '[build-gcc]/gcc/gtype-desc.cc' as follows:
+
+    @@ -2566,7 +2566,9 @@ gt_ggc_mx_lto_file_decl_data (void *x_p)
+           gt_ggc_m_17lto_in_decl_state ((*x).global_decl_state);
+           gt_ggc_m_29hash_table_decl_state_hasher_ ((*x).function_decl_states);
+           gt_ggc_m_18lto_file_decl_data ((*x).next);
+    -      gt_ggc_m_S ((*x).mode_table);
+    +      if ((*x).mode_table != NULL) {
+    +        ggc_mark ((*x).mode_table);
+    +      }
+         }
+     }
+     
+    @@ -6525,7 +6527,9 @@ gt_pch_nx_lto_file_decl_data (void *x_p)
+           gt_pch_n_17lto_in_decl_state ((*x).global_decl_state);
+           gt_pch_n_29hash_table_decl_state_hasher_ ((*x).function_decl_states);
+           gt_pch_n_18lto_file_decl_data ((*x).next);
+    -      gt_pch_n_S ((*x).mode_table);
+    +      if ((*x).mode_table != NULL) {
+    +        gt_pch_note_object ((*x).mode_table, x, gt_pch_p_18lto_file_decl_data);
+    +      }
+         }
+     }
+     
+    @@ -10929,8 +10933,10 @@ gt_pch_p_18lto_file_decl_data (ATTRIBUTE
+         op (&((*x).function_decl_states), NULL, cookie);
+       if ((void *)(x) == this_obj)
+         op (&((*x).next), NULL, cookie);
+    -  if ((void *)(x) == this_obj)
+    -    op (&((*x).mode_table), NULL, cookie);
+    +  if ((*x).mode_table != NULL) {
+    +    if ((void *)(x) == this_obj)
+    +      op (&((*x).mode_table), NULL, cookie);
+    +  }
+     }
+
+Given that the '[...]_S' routines are for strings (due to previous 'char *', I suppose), that's probably correct?
+   */
 
   /* Read LTO section.  */
   lto_section lto_section_header;
diff --git a/gcc/lto/lto-common.cc b/gcc/lto/lto-common.cc
index 973ab791712..f483f42997e 100644
--- a/gcc/lto/lto-common.cc
+++ b/gcc/lto/lto-common.cc
@@ -64,7 +64,7 @@ static bool type_streaming_finished = false;
 
 GTY(()) tree first_personality_decl;
 
-GTY(()) const unsigned char *lto_mode_identity_table;
+GTY(()) const machine_mode *lto_mode_identity_table;
 
 /* Returns a hash code for P.  */
 
@@ -2274,7 +2274,7 @@ lto_file_finalize (struct lto_file_decl_data *file_data, lto_file *file,
 #ifdef ACCEL_COMPILER
   lto_input_mode_table (file_data);
 #else
-  file_data->mode_table = lto_mode_identity_table;
+  file_data->mode_table = /*TODO const*/ (machine_mode *) lto_mode_identity_table;
   file_data->mode_bits = ceil_log2 (MAX_MACHINE_MODE);
 #endif
 
@@ -3116,10 +3116,10 @@ lto_fe_init (void)
   bitmap_obstack_initialize (NULL);
   gimple_register_cfg_hooks ();
 #ifndef ACCEL_COMPILER
-  unsigned char *table
-    = ggc_vec_alloc<unsigned char> (MAX_MACHINE_MODE);
+  machine_mode *table
+    = ggc_vec_alloc<machine_mode> (MAX_MACHINE_MODE);
   for (int m = 0; m < MAX_MACHINE_MODE; m++)
-    table[m] = m;
+    table[m] = (machine_mode) m;
   lto_mode_identity_table = table;
 #endif
 }
diff --git a/gcc/lto/lto-common.h b/gcc/lto/lto-common.h
index 24b2445673b..2b868085139 100644
--- a/gcc/lto/lto-common.h
+++ b/gcc/lto/lto-common.h
@@ -26,7 +26,7 @@ void print_lto_report_1 (void);
 
 extern tree lto_eh_personality_decl;
 extern GTY(()) vec<tree, va_gc> *tree_with_vars;
-extern const unsigned char *lto_mode_identity_table;
+extern const machine_mode *lto_mode_identity_table;
 extern tree first_personality_decl;
 
 #endif
diff --git a/gcc/tree-streamer.h b/gcc/tree-streamer.h
index ff49d1ba637..1e346b775e6 100644
--- a/gcc/tree-streamer.h
+++ b/gcc/tree-streamer.h
@@ -118,7 +118,7 @@ bp_unpack_machine_mode (struct bitpack_d *bp)
   lto_input_block *ib = (class lto_input_block *) bp->stream;
   int last = 1 << ib->file_data->mode_bits;
   unsigned ix = bp_unpack_enum (bp, machine_mode, last);
-  return (machine_mode) ib->file_data->mode_table[ix];
+  return ib->file_data->mode_table[ix];
 }
 
 #endif  /* GCC_TREE_STREAMER_H  */
-- 
2.34.1


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: Adjust LTO mode tables for "Machine_Mode: Extend machine_mode from 8 to 16 bits" (was: [PATCH] Machine_Mode: Extend machine_mode from 8 to 16 bits)
  2023-06-30 11:46       ` Adjust LTO mode tables for "Machine_Mode: Extend machine_mode from 8 to 16 bits" (was: [PATCH] Machine_Mode: Extend machine_mode from 8 to 16 bits) Thomas Schwinge
@ 2023-06-30 12:45         ` Kito Cheng
  2023-06-30 16:11           ` Thomas Schwinge
  2023-06-30 16:37           ` Jakub Jelinek
       [not found]         ` <MW5PR11MB590876BB7E52B78E837C95C9A913A@MW5PR11MB5908.namprd11.prod.outlook.com>
  1 sibling, 2 replies; 33+ messages in thread
From: Kito Cheng @ 2023-06-30 12:45 UTC (permalink / raw)
  To: Thomas Schwinge
  Cc: Richard Biener, gcc-patches, pan2.li, Bernhard Reutner-Fischer,
	Jakub Jelinek, juzhe.zhong, yanzhang.wang, jeffreyalaw,
	richard.sandiford

> On 2023-05-13T16:44:41+0800, Kito Cheng via Gcc-patches <gcc-patches@gcc.gnu.org> wrote:
> > Tried this patch and I ran into some issues, some variables are using
> > unsigned char to hold machine mode and will have problems when the
> > number of modes is larger than 255...
> >
> > And here is the fix:
>
> > --- a/gcc/genmodes.cc
> > +++ b/gcc/genmodes.cc
> > @@ -1141,10 +1141,10 @@ inline __attribute__((__always_inline__))\n\
> > #else\n\
> > extern __inline__ __attribute__((__always_inline__, __gnu_inline__))\n\
> > #endif\n\
> > -unsigned char\n\
> > +unsigned short\n\
> > mode_inner_inline (machine_mode mode)\n\
> > {\n\
> > -  extern const unsigned char mode_inner[NUM_MACHINE_MODES];\n\
> > +  extern const unsigned short mode_inner[NUM_MACHINE_MODES];\n\
> >   gcc_assert (mode >= 0 && mode < NUM_MACHINE_MODES);\n\
> >   switch (mode)\n\
> >     {");
> > @@ -1529,7 +1529,7 @@ emit_mode_wider (void)
> >   int c;
> >   struct mode_data *m;
> >
> > -  print_decl ("unsigned char", "mode_next", "NUM_MACHINE_MODES");
> > +  print_decl ("unsigned short", "mode_next", "NUM_MACHINE_MODES");
>
> Etc.
>
> Instead of 's%char%short', shouldn't we really be using
> 'enum machine_mode' here?  (I understand such a change may require some
> further surgery, but wouldn't it be the correct thing to do?)

Hmmm, I think maybe what we need is to leverage C++ language features
to declare enum with underlying types like that:

enum machine_mode : uint16_t

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: Adjust LTO mode tables for "Machine_Mode: Extend machine_mode from 8 to 16 bits" (was: [PATCH] Machine_Mode: Extend machine_mode from 8 to 16 bits)
  2023-06-30 12:45         ` Kito Cheng
@ 2023-06-30 16:11           ` Thomas Schwinge
  2023-06-30 16:37           ` Jakub Jelinek
  1 sibling, 0 replies; 33+ messages in thread
From: Thomas Schwinge @ 2023-06-30 16:11 UTC (permalink / raw)
  To: Kito Cheng, Richard Biener, Jakub Jelinek
  Cc: gcc-patches, pan2.li, Bernhard Reutner-Fischer, juzhe.zhong,
	yanzhang.wang, jeffreyalaw, richard.sandiford

Hi!

On 2023-06-30T20:45:38+0800, Kito Cheng <kito.cheng@sifive.com> wrote:
>> On 2023-05-13T16:44:41+0800, Kito Cheng via Gcc-patches <gcc-patches@gcc.gnu.org> wrote:
>> > Tried this patch and I ran into some issues, some variables are using
>> > unsigned char to hold machine mode and will have problems when the
>> > number of modes is larger than 255...
>> >
>> > And here is the fix:
>>
>> > --- a/gcc/genmodes.cc
>> > +++ b/gcc/genmodes.cc
>> > @@ -1141,10 +1141,10 @@ inline __attribute__((__always_inline__))\n\
>> > #else\n\
>> > extern __inline__ __attribute__((__always_inline__, __gnu_inline__))\n\
>> > #endif\n\
>> > -unsigned char\n\
>> > +unsigned short\n\
>> > mode_inner_inline (machine_mode mode)\n\
>> > {\n\
>> > -  extern const unsigned char mode_inner[NUM_MACHINE_MODES];\n\
>> > +  extern const unsigned short mode_inner[NUM_MACHINE_MODES];\n\
>> >   gcc_assert (mode >= 0 && mode < NUM_MACHINE_MODES);\n\
>> >   switch (mode)\n\
>> >     {");
>> > @@ -1529,7 +1529,7 @@ emit_mode_wider (void)
>> >   int c;
>> >   struct mode_data *m;
>> >
>> > -  print_decl ("unsigned char", "mode_next", "NUM_MACHINE_MODES");
>> > +  print_decl ("unsigned short", "mode_next", "NUM_MACHINE_MODES");
>>
>> Etc.
>>
>> Instead of 's%char%short', shouldn't we really be using
>> 'enum machine_mode' here?  (I understand such a change may require some
>> further surgery, but wouldn't it be the correct thing to do?)
>
> Hmmm, I think maybe what we need is to leverage C++ language features
> to declare enum with underlying types like that:
>
> enum machine_mode : uint16_t

Eh, so that's the reason/confusion (or, at least some of it...) here: my
(naïve...) assumption has been that 'enum machine_mode' already does have
a fixed underlying type -- but apparently it does not, so defaults to
'unsigned int'!

    (gdb) ptype lto_mode_identity_table
    type = const enum machine_mode : unsigned int {E_VOIDmode, E_BLKmode, E_CCmode, [...], NUM_MACHINE_MODES = 130} *

So, yeah, should we fix that, and then generally use 'enum machine_mode'
instead of 'char' vs. 'short'?  (Or, which other "detail" do I fail to
recognize this time?)


Grüße
 Thomas
-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: Adjust LTO mode tables for "Machine_Mode: Extend machine_mode from 8 to 16 bits" (was: [PATCH] Machine_Mode: Extend machine_mode from 8 to 16 bits)
  2023-06-30 12:45         ` Kito Cheng
  2023-06-30 16:11           ` Thomas Schwinge
@ 2023-06-30 16:37           ` Jakub Jelinek
  2023-07-04 15:45             ` Thomas Schwinge
  1 sibling, 1 reply; 33+ messages in thread
From: Jakub Jelinek @ 2023-06-30 16:37 UTC (permalink / raw)
  To: Kito Cheng
  Cc: Thomas Schwinge, Richard Biener, gcc-patches, pan2.li,
	Bernhard Reutner-Fischer, juzhe.zhong, yanzhang.wang,
	jeffreyalaw, richard.sandiford

On Fri, Jun 30, 2023 at 08:45:38PM +0800, Kito Cheng wrote:
> Hmmm, I think maybe what we need is to leverage C++ language features
> to declare enum with underlying types like that:
> 
> enum machine_mode : uint16_t

What would be the advantage of doing that?
I mean, on most hosts using unsigned rather than unsigned short is
actually faster, and for the cases where we care about the size
(e.g. mode in RTL, DECLs and the like) we already use enum bitfields.

	Jakub


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: Adjust LTO mode tables for "Machine_Mode: Extend machine_mode from 8 to 16 bits" (was: [PATCH] Machine_Mode: Extend machine_mode from 8 to 16 bits)
  2023-06-30 16:37           ` Jakub Jelinek
@ 2023-07-04 15:45             ` Thomas Schwinge
  0 siblings, 0 replies; 33+ messages in thread
From: Thomas Schwinge @ 2023-07-04 15:45 UTC (permalink / raw)
  To: Jakub Jelinek
  Cc: Kito Cheng, Richard Biener, gcc-patches, pan2.li,
	Bernhard Reutner-Fischer, juzhe.zhong, yanzhang.wang,
	jeffreyalaw, richard.sandiford

Hi Jakub!

On 2023-06-30T18:37:59+0200, Jakub Jelinek <jakub@redhat.com> wrote:
> On Fri, Jun 30, 2023 at 08:45:38PM +0800, Kito Cheng wrote:
>> Hmmm, I think maybe what we need is to leverage C++ language features
>> to declare enum with underlying types like that:
>>
>> enum machine_mode : uint16_t
>
> What would be the advantage of doing that?
> I mean, on most hosts using unsigned rather than unsigned short is
> actually faster

Interesting, I had not considered that, and assumed we'd always
space-optimize such things.  But yes, I do see your point.

Is it worth adding some such rationale into a new "Data Types" (or
similar) section on <https://gcc.gnu.org/codingconventions.html>,
<https://gcc.gnu.org/codingrationale.html>?

> and for the cases where we care about the size
> (e.g. mode in RTL, DECLs and the like) we already use enum bitfields.

So, which category does (current) 'unsigned char *mode_table' in
'gcc/lto-streamer.h:struct lto_file_decl_data' fall into?  Used in
'gcc/tree-streamer.h:bp_unpack_machine_mode', normally (non-offloading
case) doing "identity-lookup" via
'gcc/lto/lto-common.cc:lto_mode_identity_table'.  Should this turn into
'ENUM_BITFIELD (machine_mode)' ("native size") or
'ENUM_BITFIELD (machine_mode) : MACHINE_MODE_BITSIZE' (space-optimized)?


Grüße
 Thomas
-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955

^ permalink raw reply	[flat|nested] 33+ messages in thread

* RE: Machine Mode ICE in RISC-V when LTO
       [not found]             ` <MW5PR11MB5908E5743F472D48ACF60039A913A@MW5PR11MB5908.namprd11.prod.outlook.com>
@ 2023-08-10 13:23               ` Thomas Schwinge
  2023-08-10 14:14                 ` Li, Pan2
  0 siblings, 1 reply; 33+ messages in thread
From: Thomas Schwinge @ 2023-08-10 13:23 UTC (permalink / raw)
  To: Li, Pan2, Richard Biener, Jakub Jelinek
  Cc: gcc-patches, richard.sandiford, kito.cheng, Jeff Law,
	juzhe.zhong, Wang, Yanzhang

Hi!

On 2023-08-10T12:25:36+0000, "Li, Pan2" <pan2.li@intel.com> wrote:
> Thanks Richard for comment, let me try to promote the table to unsigned short.

I have WIP work for this issue -- which I'd already raised a month ago:
<https://inbox.sourceware.org/87o7kxuq9s.fsf@euler.schwinge.homeip.net>:

On 2023-06-30T13:46:07+0200, Thomas Schwinge <thomas@codesourcery.com> wrote:
> In particular, the 'lto_mode_identity_table' changes would seem necessary
> to keep standard LTO ('-flto') functional for large 'machine_mode' size?

... which is exactly the problem you've now run into?

However, a simple:

    -GTY(()) const unsigned char *lto_mode_identity_table;
    +GTY(()) const unsigned short *lto_mode_identity_table;

..., or:

    -GTY(()) const unsigned char *lto_mode_identity_table;
    +GTY(()) const machine_mode *lto_mode_identity_table;

... is not sufficient: that runs into GTY issues, as the current
'unsigned char *lto_mode_identity_table' is (mis-)classified by
'gengtype' as a C string.  This happens to work for this case, but still
isn't right, and only works for 'char *' but not 'short *' etc.  I have
WIP work to tighten that.  ..., which got me into other GTY issues, and
so on...  ;-) (Richard already ACKed and I pushed some of the
prerequisite changes, but there's more to come.)  I'm still planning on
resolving all that mess, but I'm tight on time right now.

However, I have a different proposal, which should address your current
issue: simply, get rid of the 'lto_mode_identity_table', which is just
that: a 1-to-1 mapping of array index to value.  Instead, in
'gcc/lto/lto-common.cc:lto_file_finalize', for '!ACCEL_COMPILER', set
'file_data->mode_table = NULL', and in the users (only
'gcc/tree-streamer.h:bp_unpack_machine_mode'?), replace (untested):

    -return (machine_mode) ib->file_data->mode_table[ix];
    +return ib->file_data->mode_table ? ib->file_data->mode_table[ix] : ix;

Jakub, as the original author of 'lto_mode_identity_table' (see
commit db847fa8f2cca6139188b8dfa0a7064319b19193 (Subversion r221005)), is
there any reason not to do it this way?


Grüße
 Thomas


> -----Original Message-----
> From: Richard Biener <richard.guenther@gmail.com>
> Sent: Thursday, August 10, 2023 7:08 PM
> To: Li, Pan2 <pan2.li@intel.com>
> Cc: richard.sandiford@arm.com; Thomas Schwinge <thomas@codesourcery.com>; jakub@redhat.com; kito.cheng@gmail.com; Jeff Law <jeffreyalaw@gmail.com>; juzhe.zhong@rivai.ai; Wang, Yanzhang <yanzhang.wang@intel.com>
> Subject: Re: Machine Mode ICE in RISC-V when LTO
>
> On Thu, Aug 10, 2023 at 10:19 AM Li, Pan2 <pan2.li@intel.com> wrote:
>>
>> Hi all,
>>
>>
>>
>> Recently I found there is still some issues for the machine mode with LTO part by fixing one
>>
>> ICE (only when compile with LTO) in RISC-V backend in , aka below case.
>>
>>
>>
>> >> ../__RISC-V_INSTALL___/bin/riscv64-unknown-elf-g++ -O2 -flto gcc/testsuite/g++.dg/torture/vshuf-v4df.C -o test.elf
>>
>> during RTL pass: expand
>>
>> gcc/testsuite/g++.dg/torture/vshuf-main.inc: In function 'main':
>>
>> gcc/testsuite/g++.dg/torture/vshuf-main.inc:15:9: internal compiler error: in as_a, at machmode.h:381
>>
>>    15 |       V r = __builtin_shuffle(in1[i], mask1[i]);
>>
>>       |         ^
>>
>> 0x7e5b8e scalar_int_mode as_a<scalar_int_mode>(machine_mode)
>>
>>         ../.././gcc/gcc/machmode.h:381
>>
>> 0x7eabdb scalar_mode as_a<scalar_mode>(machine_mode)
>>
>>         ../.././gcc/gcc/expr.cc:332
>>
>> 0x7eabdb convert_mode_scalar
>>
>>         ../.././gcc/gcc/expr.cc:325
>>
>> 0xb8485b store_expr(tree_node*, rtx_def*, int, bool, bool)
>>
>>         ../.././gcc/gcc/expr.cc:6413
>>
>> 0xb8a556 store_field
>>
>>         ../.././gcc/gcc/expr.cc:7648
>>
>> 0xb88f27 store_constructor(tree_node*, rtx_def*, int, poly_int<2u, long>, bool)
>>
>>         ../.././gcc/gcc/expr.cc:7588
>>
>> 0xb8b8b8 expand_constructor
>>
>>         ../.././gcc/gcc/expr.cc:8931
>>
>> 0xb76bc7 expand_expr_real_1(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool)
>>
>>         ../.././gcc/gcc/expr.cc:11170
>>
>> 0xb77ef7 expand_expr_real_1(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool)
>>
>>         ../.././gcc/gcc/expr.cc:10809
>>
>> 0xb83a80 store_expr(tree_node*, rtx_def*, int, bool, bool)
>>
>>         ../.././gcc/gcc/expr.cc:6325
>>
>> 0xb851d9 expand_assignment(tree_node*, tree_node*, bool)
>>
>>         ../.././gcc/gcc/expr.cc:6043
>>
>> 0xa48717 expand_gimple_stmt_1
>>
>>         ../.././gcc/gcc/cfgexpand.cc:3946
>>
>> 0xa48717 expand_gimple_stmt
>>
>>         ../.././gcc/gcc/cfgexpand.cc:4044
>>
>> 0xa4d030 expand_gimple_basic_block
>>
>>         ../.././gcc/gcc/cfgexpand.cc:6096
>>
>> 0xa4efd6 execute
>>
>>         ../.././gcc/gcc/cfgexpand.cc:6831
>>
>>
>>
>> I double checked the reason that comes from we add even more machine modes in the RISC-V backend,
>>
>> and then did some investigation for the root cause. It should be related to the mode_table, as well as the
>>
>> bp_unpack_machine_mode.
>>
>>
>>
>> In lto_fe_init:
>>
>>    unsigned char *table
>>
>>     = ggc_vec_alloc<unsigned char> (MAX_MACHINE_MODE);
>>
>>
>>
>>    for (int m = 0; m < MAX_MACHINE_MODE; m++)
>>
>> table[m] = m;                                                              <== May overflow here given MAX_MACHINE_MODE > 256 and table[m] is unsigned char.
>>
>>
>>
>> in bp_unpack_machine_mode:
>>
>>    unsigned ix = bp_unpack_enum (bp, machine_mode, last);
>>
>>   return (machine_mode) ib->file_data->mode_table[ix];  <== May return truncated mode here.
>>
>>
>>
>> To validate this idea, I tried below hack code for double checking and then there is no ICE anymore, which indicates
>>
>> the problem here as I bet. However, the lto is quite complicated and I am not sure how to fix it in the right way.
>>
>>
>>
>> +    = ggc_vec_alloc<unsigned char> (MAX_MACHINE_MODE * 2);
>>
>> …
>>
>> +    ((unsigned short *)table)[m] = m;
>>
>> …
>>
>> +   return (machine_mode) ((unsigned short *)ib->file_data->mode_table)[ix];
>>
>>
>>
>> Besides, I also tried to change the mode_table from char * to short * but got one weird error when building as below.
>>
>>
>>
>> gcc/lto-streamer.h:599: field `(*x).mode_table' is pointer to unimplemented type
>
> We still have some places using an array of char for the mode table.
> The above is assigned
> to lto_mode_identity_table which ends up in
> lto_file_decl_data::mode_table.  I think those
> need to be all promoted to unsigned short.
>
> Richard.
>
>>
>>
>> Pan
>>
>>
-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955

^ permalink raw reply	[flat|nested] 33+ messages in thread

* RE: Machine Mode ICE in RISC-V when LTO
  2023-08-10 13:23               ` Machine Mode ICE in RISC-V when LTO Thomas Schwinge
@ 2023-08-10 14:14                 ` Li, Pan2
  2023-08-11  6:40                   ` Li, Pan2
  0 siblings, 1 reply; 33+ messages in thread
From: Li, Pan2 @ 2023-08-10 14:14 UTC (permalink / raw)
  To: Thomas Schwinge, Richard Biener, Jakub Jelinek
  Cc: gcc-patches, richard.sandiford, kito.cheng, Jeff Law,
	juzhe.zhong, Wang, Yanzhang

Thanks Thomas for the information, great to learn you have a fix WIP.

> ... is not sufficient: that runs into GTY issues, as the current
> 'unsigned char *lto_mode_identity_table' is (mis-)classified by
> 'gengtype' as a C string.  This happens to work for this case, but still
> isn't right, and only works for 'char *' but not 'short *' etc

Does it reports something like " gcc/lto-streamer.h:599: field `(*x).mode_table' is pointer to unimplemented type" when changing to short *?

>    -return (machine_mode) ib->file_data->mode_table[ix];
>    +return ib->file_data->mode_table ? ib->file_data->mode_table[ix] : ix;

Got the point and the mode_table is constant up to a point.

Pan

-----Original Message-----
From: Thomas Schwinge <thomas@codesourcery.com> 
Sent: Thursday, August 10, 2023 9:24 PM
To: Li, Pan2 <pan2.li@intel.com>; Richard Biener <richard.guenther@gmail.com>; Jakub Jelinek <jakub@redhat.com>
Cc: gcc-patches@gcc.gnu.org; richard.sandiford@arm.com; kito.cheng@gmail.com; Jeff Law <jeffreyalaw@gmail.com>; juzhe.zhong@rivai.ai; Wang, Yanzhang <yanzhang.wang@intel.com>
Subject: RE: Machine Mode ICE in RISC-V when LTO

Hi!

On 2023-08-10T12:25:36+0000, "Li, Pan2" <pan2.li@intel.com> wrote:
> Thanks Richard for comment, let me try to promote the table to unsigned short.

I have WIP work for this issue -- which I'd already raised a month ago:
<https://inbox.sourceware.org/87o7kxuq9s.fsf@euler.schwinge.homeip.net>:

On 2023-06-30T13:46:07+0200, Thomas Schwinge <thomas@codesourcery.com> wrote:
> In particular, the 'lto_mode_identity_table' changes would seem necessary
> to keep standard LTO ('-flto') functional for large 'machine_mode' size?

... which is exactly the problem you've now run into?

However, a simple:

    -GTY(()) const unsigned char *lto_mode_identity_table;
    +GTY(()) const unsigned short *lto_mode_identity_table;

..., or:

    -GTY(()) const unsigned char *lto_mode_identity_table;
    +GTY(()) const machine_mode *lto_mode_identity_table;

... is not sufficient: that runs into GTY issues, as the current
'unsigned char *lto_mode_identity_table' is (mis-)classified by
'gengtype' as a C string.  This happens to work for this case, but still
isn't right, and only works for 'char *' but not 'short *' etc.  I have
WIP work to tighten that.  ..., which got me into other GTY issues, and
so on...  ;-) (Richard already ACKed and I pushed some of the
prerequisite changes, but there's more to come.)  I'm still planning on
resolving all that mess, but I'm tight on time right now.

However, I have a different proposal, which should address your current
issue: simply, get rid of the 'lto_mode_identity_table', which is just
that: a 1-to-1 mapping of array index to value.  Instead, in
'gcc/lto/lto-common.cc:lto_file_finalize', for '!ACCEL_COMPILER', set
'file_data->mode_table = NULL', and in the users (only
'gcc/tree-streamer.h:bp_unpack_machine_mode'?), replace (untested):

    -return (machine_mode) ib->file_data->mode_table[ix];
    +return ib->file_data->mode_table ? ib->file_data->mode_table[ix] : ix;

Jakub, as the original author of 'lto_mode_identity_table' (see
commit db847fa8f2cca6139188b8dfa0a7064319b19193 (Subversion r221005)), is
there any reason not to do it this way?


Grüße
 Thomas


> -----Original Message-----
> From: Richard Biener <richard.guenther@gmail.com>
> Sent: Thursday, August 10, 2023 7:08 PM
> To: Li, Pan2 <pan2.li@intel.com>
> Cc: richard.sandiford@arm.com; Thomas Schwinge <thomas@codesourcery.com>; jakub@redhat.com; kito.cheng@gmail.com; Jeff Law <jeffreyalaw@gmail.com>; juzhe.zhong@rivai.ai; Wang, Yanzhang <yanzhang.wang@intel.com>
> Subject: Re: Machine Mode ICE in RISC-V when LTO
>
> On Thu, Aug 10, 2023 at 10:19 AM Li, Pan2 <pan2.li@intel.com> wrote:
>>
>> Hi all,
>>
>>
>>
>> Recently I found there is still some issues for the machine mode with LTO part by fixing one
>>
>> ICE (only when compile with LTO) in RISC-V backend in , aka below case.
>>
>>
>>
>> >> ../__RISC-V_INSTALL___/bin/riscv64-unknown-elf-g++ -O2 -flto gcc/testsuite/g++.dg/torture/vshuf-v4df.C -o test.elf
>>
>> during RTL pass: expand
>>
>> gcc/testsuite/g++.dg/torture/vshuf-main.inc: In function 'main':
>>
>> gcc/testsuite/g++.dg/torture/vshuf-main.inc:15:9: internal compiler error: in as_a, at machmode.h:381
>>
>>    15 |       V r = __builtin_shuffle(in1[i], mask1[i]);
>>
>>       |         ^
>>
>> 0x7e5b8e scalar_int_mode as_a<scalar_int_mode>(machine_mode)
>>
>>         ../.././gcc/gcc/machmode.h:381
>>
>> 0x7eabdb scalar_mode as_a<scalar_mode>(machine_mode)
>>
>>         ../.././gcc/gcc/expr.cc:332
>>
>> 0x7eabdb convert_mode_scalar
>>
>>         ../.././gcc/gcc/expr.cc:325
>>
>> 0xb8485b store_expr(tree_node*, rtx_def*, int, bool, bool)
>>
>>         ../.././gcc/gcc/expr.cc:6413
>>
>> 0xb8a556 store_field
>>
>>         ../.././gcc/gcc/expr.cc:7648
>>
>> 0xb88f27 store_constructor(tree_node*, rtx_def*, int, poly_int<2u, long>, bool)
>>
>>         ../.././gcc/gcc/expr.cc:7588
>>
>> 0xb8b8b8 expand_constructor
>>
>>         ../.././gcc/gcc/expr.cc:8931
>>
>> 0xb76bc7 expand_expr_real_1(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool)
>>
>>         ../.././gcc/gcc/expr.cc:11170
>>
>> 0xb77ef7 expand_expr_real_1(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool)
>>
>>         ../.././gcc/gcc/expr.cc:10809
>>
>> 0xb83a80 store_expr(tree_node*, rtx_def*, int, bool, bool)
>>
>>         ../.././gcc/gcc/expr.cc:6325
>>
>> 0xb851d9 expand_assignment(tree_node*, tree_node*, bool)
>>
>>         ../.././gcc/gcc/expr.cc:6043
>>
>> 0xa48717 expand_gimple_stmt_1
>>
>>         ../.././gcc/gcc/cfgexpand.cc:3946
>>
>> 0xa48717 expand_gimple_stmt
>>
>>         ../.././gcc/gcc/cfgexpand.cc:4044
>>
>> 0xa4d030 expand_gimple_basic_block
>>
>>         ../.././gcc/gcc/cfgexpand.cc:6096
>>
>> 0xa4efd6 execute
>>
>>         ../.././gcc/gcc/cfgexpand.cc:6831
>>
>>
>>
>> I double checked the reason that comes from we add even more machine modes in the RISC-V backend,
>>
>> and then did some investigation for the root cause. It should be related to the mode_table, as well as the
>>
>> bp_unpack_machine_mode.
>>
>>
>>
>> In lto_fe_init:
>>
>>    unsigned char *table
>>
>>     = ggc_vec_alloc<unsigned char> (MAX_MACHINE_MODE);
>>
>>
>>
>>    for (int m = 0; m < MAX_MACHINE_MODE; m++)
>>
>> table[m] = m;                                                              <== May overflow here given MAX_MACHINE_MODE > 256 and table[m] is unsigned char.
>>
>>
>>
>> in bp_unpack_machine_mode:
>>
>>    unsigned ix = bp_unpack_enum (bp, machine_mode, last);
>>
>>   return (machine_mode) ib->file_data->mode_table[ix];  <== May return truncated mode here.
>>
>>
>>
>> To validate this idea, I tried below hack code for double checking and then there is no ICE anymore, which indicates
>>
>> the problem here as I bet. However, the lto is quite complicated and I am not sure how to fix it in the right way.
>>
>>
>>
>> +    = ggc_vec_alloc<unsigned char> (MAX_MACHINE_MODE * 2);
>>
>> …
>>
>> +    ((unsigned short *)table)[m] = m;
>>
>> …
>>
>> +   return (machine_mode) ((unsigned short *)ib->file_data->mode_table)[ix];
>>
>>
>>
>> Besides, I also tried to change the mode_table from char * to short * but got one weird error when building as below.
>>
>>
>>
>> gcc/lto-streamer.h:599: field `(*x).mode_table' is pointer to unimplemented type
>
> We still have some places using an array of char for the mode table.
> The above is assigned
> to lto_mode_identity_table which ends up in
> lto_file_decl_data::mode_table.  I think those
> need to be all promoted to unsigned short.
>
> Richard.
>
>>
>>
>> Pan
>>
>>
-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955

^ permalink raw reply	[flat|nested] 33+ messages in thread

* RE: Machine Mode ICE in RISC-V when LTO
  2023-08-10 14:14                 ` Li, Pan2
@ 2023-08-11  6:40                   ` Li, Pan2
  2023-09-15 13:33                     ` Robin Dapp
  0 siblings, 1 reply; 33+ messages in thread
From: Li, Pan2 @ 2023-08-11  6:40 UTC (permalink / raw)
  To: Thomas Schwinge, Richard Biener, Jakub Jelinek
  Cc: gcc-patches, richard.sandiford, kito.cheng, Jeff Law,
	juzhe.zhong, Wang, Yanzhang

Hi Thomas,

Just FYI that tried your proposal as below for the lto ICE in RISC-V, it can resolve the ICE (gcc/testsuite/g++.dg/torture/vshuf-v4df.C) as expected. Let's wait Jakub's comments for this.

diff --git a/gcc/lto/lto-common.cc b/gcc/lto/lto-common.cc
index 703e665b698..970e3ea11ac 100644
--- a/gcc/lto/lto-common.cc
+++ b/gcc/lto/lto-common.cc
@@ -2277,7 +2277,7 @@ lto_file_finalize (struct lto_file_decl_data *file_data, lto_file *file,
 #ifdef ACCEL_COMPILER
   lto_input_mode_table (file_data);
 #else
-  file_data->mode_table = lto_mode_identity_table;
+  file_data->mode_table = NULL;
   file_data->mode_bits = ceil_log2 (MAX_MACHINE_MODE);
 #endif

diff --git a/gcc/tree-streamer.h b/gcc/tree-streamer.h
index ff49d1ba637..30208692bc7 100644
--- a/gcc/tree-streamer.h
+++ b/gcc/tree-streamer.h
@@ -118,7 +118,11 @@ bp_unpack_machine_mode (struct bitpack_d *bp)
   lto_input_block *ib = (class lto_input_block *) bp->stream;
   int last = 1 << ib->file_data->mode_bits;
   unsigned ix = bp_unpack_enum (bp, machine_mode, last);
-  return (machine_mode) ib->file_data->mode_table[ix];
+
+  if (ib->file_data->mode_table)
+    return (machine_mode) ib->file_data->mode_table[ix];
+  else
+    return (machine_mode) ix;
 }

 #endif  /* GCC_TREE_STREAMER_H  */

Pan

-----Original Message-----
From: Li, Pan2 
Sent: Thursday, August 10, 2023 10:14 PM
To: Thomas Schwinge <thomas@codesourcery.com>; Richard Biener <richard.guenther@gmail.com>; Jakub Jelinek <jakub@redhat.com>
Cc: gcc-patches@gcc.gnu.org; richard.sandiford@arm.com; kito.cheng@gmail.com; Jeff Law <jeffreyalaw@gmail.com>; juzhe.zhong@rivai.ai; Wang, Yanzhang <yanzhang.wang@intel.com>
Subject: RE: Machine Mode ICE in RISC-V when LTO

Thanks Thomas for the information, great to learn you have a fix WIP.

> ... is not sufficient: that runs into GTY issues, as the current
> 'unsigned char *lto_mode_identity_table' is (mis-)classified by
> 'gengtype' as a C string.  This happens to work for this case, but still
> isn't right, and only works for 'char *' but not 'short *' etc

Does it reports something like " gcc/lto-streamer.h:599: field `(*x).mode_table' is pointer to unimplemented type" when changing to short *?

>    -return (machine_mode) ib->file_data->mode_table[ix];
>    +return ib->file_data->mode_table ? ib->file_data->mode_table[ix] : ix;

Got the point and the mode_table is constant up to a point.

Pan

-----Original Message-----
From: Thomas Schwinge <thomas@codesourcery.com> 
Sent: Thursday, August 10, 2023 9:24 PM
To: Li, Pan2 <pan2.li@intel.com>; Richard Biener <richard.guenther@gmail.com>; Jakub Jelinek <jakub@redhat.com>
Cc: gcc-patches@gcc.gnu.org; richard.sandiford@arm.com; kito.cheng@gmail.com; Jeff Law <jeffreyalaw@gmail.com>; juzhe.zhong@rivai.ai; Wang, Yanzhang <yanzhang.wang@intel.com>
Subject: RE: Machine Mode ICE in RISC-V when LTO

Hi!

On 2023-08-10T12:25:36+0000, "Li, Pan2" <pan2.li@intel.com> wrote:
> Thanks Richard for comment, let me try to promote the table to unsigned short.

I have WIP work for this issue -- which I'd already raised a month ago:
<https://inbox.sourceware.org/87o7kxuq9s.fsf@euler.schwinge.homeip.net>:

On 2023-06-30T13:46:07+0200, Thomas Schwinge <thomas@codesourcery.com> wrote:
> In particular, the 'lto_mode_identity_table' changes would seem necessary
> to keep standard LTO ('-flto') functional for large 'machine_mode' size?

... which is exactly the problem you've now run into?

However, a simple:

    -GTY(()) const unsigned char *lto_mode_identity_table;
    +GTY(()) const unsigned short *lto_mode_identity_table;

..., or:

    -GTY(()) const unsigned char *lto_mode_identity_table;
    +GTY(()) const machine_mode *lto_mode_identity_table;

... is not sufficient: that runs into GTY issues, as the current
'unsigned char *lto_mode_identity_table' is (mis-)classified by
'gengtype' as a C string.  This happens to work for this case, but still
isn't right, and only works for 'char *' but not 'short *' etc.  I have
WIP work to tighten that.  ..., which got me into other GTY issues, and
so on...  ;-) (Richard already ACKed and I pushed some of the
prerequisite changes, but there's more to come.)  I'm still planning on
resolving all that mess, but I'm tight on time right now.

However, I have a different proposal, which should address your current
issue: simply, get rid of the 'lto_mode_identity_table', which is just
that: a 1-to-1 mapping of array index to value.  Instead, in
'gcc/lto/lto-common.cc:lto_file_finalize', for '!ACCEL_COMPILER', set
'file_data->mode_table = NULL', and in the users (only
'gcc/tree-streamer.h:bp_unpack_machine_mode'?), replace (untested):

    -return (machine_mode) ib->file_data->mode_table[ix];
    +return ib->file_data->mode_table ? ib->file_data->mode_table[ix] : ix;

Jakub, as the original author of 'lto_mode_identity_table' (see
commit db847fa8f2cca6139188b8dfa0a7064319b19193 (Subversion r221005)), is
there any reason not to do it this way?


Grüße
 Thomas


> -----Original Message-----
> From: Richard Biener <richard.guenther@gmail.com>
> Sent: Thursday, August 10, 2023 7:08 PM
> To: Li, Pan2 <pan2.li@intel.com>
> Cc: richard.sandiford@arm.com; Thomas Schwinge <thomas@codesourcery.com>; jakub@redhat.com; kito.cheng@gmail.com; Jeff Law <jeffreyalaw@gmail.com>; juzhe.zhong@rivai.ai; Wang, Yanzhang <yanzhang.wang@intel.com>
> Subject: Re: Machine Mode ICE in RISC-V when LTO
>
> On Thu, Aug 10, 2023 at 10:19 AM Li, Pan2 <pan2.li@intel.com> wrote:
>>
>> Hi all,
>>
>>
>>
>> Recently I found there is still some issues for the machine mode with LTO part by fixing one
>>
>> ICE (only when compile with LTO) in RISC-V backend in , aka below case.
>>
>>
>>
>> >> ../__RISC-V_INSTALL___/bin/riscv64-unknown-elf-g++ -O2 -flto gcc/testsuite/g++.dg/torture/vshuf-v4df.C -o test.elf
>>
>> during RTL pass: expand
>>
>> gcc/testsuite/g++.dg/torture/vshuf-main.inc: In function 'main':
>>
>> gcc/testsuite/g++.dg/torture/vshuf-main.inc:15:9: internal compiler error: in as_a, at machmode.h:381
>>
>>    15 |       V r = __builtin_shuffle(in1[i], mask1[i]);
>>
>>       |         ^
>>
>> 0x7e5b8e scalar_int_mode as_a<scalar_int_mode>(machine_mode)
>>
>>         ../.././gcc/gcc/machmode.h:381
>>
>> 0x7eabdb scalar_mode as_a<scalar_mode>(machine_mode)
>>
>>         ../.././gcc/gcc/expr.cc:332
>>
>> 0x7eabdb convert_mode_scalar
>>
>>         ../.././gcc/gcc/expr.cc:325
>>
>> 0xb8485b store_expr(tree_node*, rtx_def*, int, bool, bool)
>>
>>         ../.././gcc/gcc/expr.cc:6413
>>
>> 0xb8a556 store_field
>>
>>         ../.././gcc/gcc/expr.cc:7648
>>
>> 0xb88f27 store_constructor(tree_node*, rtx_def*, int, poly_int<2u, long>, bool)
>>
>>         ../.././gcc/gcc/expr.cc:7588
>>
>> 0xb8b8b8 expand_constructor
>>
>>         ../.././gcc/gcc/expr.cc:8931
>>
>> 0xb76bc7 expand_expr_real_1(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool)
>>
>>         ../.././gcc/gcc/expr.cc:11170
>>
>> 0xb77ef7 expand_expr_real_1(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool)
>>
>>         ../.././gcc/gcc/expr.cc:10809
>>
>> 0xb83a80 store_expr(tree_node*, rtx_def*, int, bool, bool)
>>
>>         ../.././gcc/gcc/expr.cc:6325
>>
>> 0xb851d9 expand_assignment(tree_node*, tree_node*, bool)
>>
>>         ../.././gcc/gcc/expr.cc:6043
>>
>> 0xa48717 expand_gimple_stmt_1
>>
>>         ../.././gcc/gcc/cfgexpand.cc:3946
>>
>> 0xa48717 expand_gimple_stmt
>>
>>         ../.././gcc/gcc/cfgexpand.cc:4044
>>
>> 0xa4d030 expand_gimple_basic_block
>>
>>         ../.././gcc/gcc/cfgexpand.cc:6096
>>
>> 0xa4efd6 execute
>>
>>         ../.././gcc/gcc/cfgexpand.cc:6831
>>
>>
>>
>> I double checked the reason that comes from we add even more machine modes in the RISC-V backend,
>>
>> and then did some investigation for the root cause. It should be related to the mode_table, as well as the
>>
>> bp_unpack_machine_mode.
>>
>>
>>
>> In lto_fe_init:
>>
>>    unsigned char *table
>>
>>     = ggc_vec_alloc<unsigned char> (MAX_MACHINE_MODE);
>>
>>
>>
>>    for (int m = 0; m < MAX_MACHINE_MODE; m++)
>>
>> table[m] = m;                                                              <== May overflow here given MAX_MACHINE_MODE > 256 and table[m] is unsigned char.
>>
>>
>>
>> in bp_unpack_machine_mode:
>>
>>    unsigned ix = bp_unpack_enum (bp, machine_mode, last);
>>
>>   return (machine_mode) ib->file_data->mode_table[ix];  <== May return truncated mode here.
>>
>>
>>
>> To validate this idea, I tried below hack code for double checking and then there is no ICE anymore, which indicates
>>
>> the problem here as I bet. However, the lto is quite complicated and I am not sure how to fix it in the right way.
>>
>>
>>
>> +    = ggc_vec_alloc<unsigned char> (MAX_MACHINE_MODE * 2);
>>
>> …
>>
>> +    ((unsigned short *)table)[m] = m;
>>
>> …
>>
>> +   return (machine_mode) ((unsigned short *)ib->file_data->mode_table)[ix];
>>
>>
>>
>> Besides, I also tried to change the mode_table from char * to short * but got one weird error when building as below.
>>
>>
>>
>> gcc/lto-streamer.h:599: field `(*x).mode_table' is pointer to unimplemented type
>
> We still have some places using an array of char for the mode table.
> The above is assigned
> to lto_mode_identity_table which ends up in
> lto_file_decl_data::mode_table.  I think those
> need to be all promoted to unsigned short.
>
> Richard.
>
>>
>>
>> Pan
>>
>>
-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955

^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: Machine Mode ICE in RISC-V when LTO
  2023-08-11  6:40                   ` Li, Pan2
@ 2023-09-15 13:33                     ` Robin Dapp
  2023-09-18 14:46                       ` LTO: Get rid of 'lto_mode_identity_table' (was: Machine Mode ICE in RISC-V when LTO) Thomas Schwinge
  0 siblings, 1 reply; 33+ messages in thread
From: Robin Dapp @ 2023-09-15 13:33 UTC (permalink / raw)
  To: Li, Pan2, Thomas Schwinge, Richard Biener, Jakub Jelinek
  Cc: rdapp.gcc, gcc-patches, richard.sandiford, kito.cheng, Jeff Law,
	juzhe.zhong, Wang, Yanzhang

Hi Thomas, Jakub,

is there anything we can do to assist from the riscv side in order to help
with this?  I haven't really been involved with it but was wondering
what's missing.  If I understand correctly Thomas has a major cleanup
operation in plan but might not get to it soon.  The fix he proposed
helps for the riscv case, however, even without the rework?

If so, I'd kindly ping Jakub to check if the fix is reasonable.

Thank you.

Regards
 Robin

^ permalink raw reply	[flat|nested] 33+ messages in thread

* LTO: Get rid of 'lto_mode_identity_table' (was: Machine Mode ICE in RISC-V when LTO)
  2023-09-15 13:33                     ` Robin Dapp
@ 2023-09-18 14:46                       ` Thomas Schwinge
  2023-09-18 14:53                         ` Richard Biener
  0 siblings, 1 reply; 33+ messages in thread
From: Thomas Schwinge @ 2023-09-18 14:46 UTC (permalink / raw)
  To: gcc-patches, Robin Dapp, pan2.li, Richard Biener, Jakub Jelinek
  Cc: richard.sandiford, kito.cheng, Jeff Law, juzhe.zhong, yanzhang.wang

[-- Attachment #1: Type: text/plain, Size: 1039 bytes --]

Hi!

On 2023-09-15T15:33:59+0200, Robin Dapp <rdapp.gcc@gmail.com> wrote:
> is there anything we can do to assist from the riscv side in order to help
> with this?  I haven't really been involved with it but was wondering
> what's missing.  If I understand correctly Thomas has a major cleanup
> operation in plan

Not really major, but indeed non-trivial -- but WIP already.  ;-)

> but might not get to it soon.

Right.

> The fix he proposed
> helps for the riscv case, however, even without the rework?

Right, and no harm done for my work.

> If so, I'd kindly ping Jakub to check if the fix is reasonable.

I'll push the attached "LTO: Get rid of 'lto_mode_identity_table'"
mid-week, unless any objections raised.


Grüße
 Thomas


-----------------
Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: 0001-LTO-Get-rid-of-lto_mode_identity_table.patch --]
[-- Type: text/x-diff, Size: 3264 bytes --]

From f88a923d7e3e26c29630f1b20624fa54032f8e96 Mon Sep 17 00:00:00 2001
From: Thomas Schwinge <thomas@codesourcery.com>
Date: Thu, 10 Aug 2023 15:23:37 +0200
Subject: [PATCH] LTO: Get rid of 'lto_mode_identity_table'

This, in particular, resolves LTO ICEs with big 'machine_mode's, as for RISC-V.
('mode_table' in 'lto_file_decl_data' still is 'unsigned char'; changing that
is still to be done (for use in offloading compilation), but is not trivial.)
For now, get rid of 'lto_mode_identity_table' to resolve the RISC-V LTO ICEs;
we don't need an actual table for a 1-to-1 mapping.

	gcc/lto/
	* lto-common.cc (lto_mode_identity_table): Remove.
	(lto_file_finalize) [!ACCEL_COMPILER]: 'NULL'-intialize
	'file_data->mode_table'.
	(lto_fe_init): Don't initialize 'lto_mode_identity_table'.
	* lto-common.h (lto_mode_identity_table): Remove.
	gcc/
	* tree-streamer.h (bp_unpack_machine_mode): If
	'ib->file_data->mode_table' not available, apply 1-to-1 mapping.

Co-authored-by: Pan Li <pan2.li@intel.com>
---
 gcc/lto/lto-common.cc | 11 +----------
 gcc/lto/lto-common.h  |  1 -
 gcc/tree-streamer.h   |  5 ++++-
 3 files changed, 5 insertions(+), 12 deletions(-)

diff --git a/gcc/lto/lto-common.cc b/gcc/lto/lto-common.cc
index 703e665b698..ad6e7fde5ce 100644
--- a/gcc/lto/lto-common.cc
+++ b/gcc/lto/lto-common.cc
@@ -64,8 +64,6 @@ static bool type_streaming_finished = false;
 
 GTY(()) tree first_personality_decl;
 
-GTY(()) const unsigned char *lto_mode_identity_table;
-
 /* Returns a hash code for P.  */
 
 static hashval_t
@@ -2277,7 +2275,7 @@ lto_file_finalize (struct lto_file_decl_data *file_data, lto_file *file,
 #ifdef ACCEL_COMPILER
   lto_input_mode_table (file_data);
 #else
-  file_data->mode_table = lto_mode_identity_table;
+  file_data->mode_table = NULL;
   file_data->mode_bits = ceil_log2 (MAX_MACHINE_MODE);
 #endif
 
@@ -3118,13 +3116,6 @@ lto_fe_init (void)
   memset (&lto_stats, 0, sizeof (lto_stats));
   bitmap_obstack_initialize (NULL);
   gimple_register_cfg_hooks ();
-#ifndef ACCEL_COMPILER
-  unsigned char *table
-    = ggc_vec_alloc<unsigned char> (MAX_MACHINE_MODE);
-  for (int m = 0; m < MAX_MACHINE_MODE; m++)
-    table[m] = m;
-  lto_mode_identity_table = table;
-#endif
 }
 
 #include "gt-lto-lto-common.h"
diff --git a/gcc/lto/lto-common.h b/gcc/lto/lto-common.h
index 24b2445673b..b9faa72e5cb 100644
--- a/gcc/lto/lto-common.h
+++ b/gcc/lto/lto-common.h
@@ -26,7 +26,6 @@ void print_lto_report_1 (void);
 
 extern tree lto_eh_personality_decl;
 extern GTY(()) vec<tree, va_gc> *tree_with_vars;
-extern const unsigned char *lto_mode_identity_table;
 extern tree first_personality_decl;
 
 #endif
diff --git a/gcc/tree-streamer.h b/gcc/tree-streamer.h
index ff49d1ba637..886ee6ac754 100644
--- a/gcc/tree-streamer.h
+++ b/gcc/tree-streamer.h
@@ -118,7 +118,10 @@ bp_unpack_machine_mode (struct bitpack_d *bp)
   lto_input_block *ib = (class lto_input_block *) bp->stream;
   int last = 1 << ib->file_data->mode_bits;
   unsigned ix = bp_unpack_enum (bp, machine_mode, last);
-  return (machine_mode) ib->file_data->mode_table[ix];
+  if (ib->file_data->mode_table)
+    return (machine_mode) ib->file_data->mode_table[ix];
+  else
+    return (machine_mode) ix;
 }
 
 #endif  /* GCC_TREE_STREAMER_H  */
-- 
2.34.1


^ permalink raw reply	[flat|nested] 33+ messages in thread

* Re: LTO: Get rid of 'lto_mode_identity_table' (was: Machine Mode ICE in RISC-V when LTO)
  2023-09-18 14:46                       ` LTO: Get rid of 'lto_mode_identity_table' (was: Machine Mode ICE in RISC-V when LTO) Thomas Schwinge
@ 2023-09-18 14:53                         ` Richard Biener
  0 siblings, 0 replies; 33+ messages in thread
From: Richard Biener @ 2023-09-18 14:53 UTC (permalink / raw)
  To: Thomas Schwinge
  Cc: gcc-patches, Robin Dapp, pan2.li, Jakub Jelinek,
	richard.sandiford, kito.cheng, Jeff Law, juzhe.zhong,
	yanzhang.wang

On Mon, Sep 18, 2023 at 4:46 PM Thomas Schwinge <thomas@codesourcery.com> wrote:
>
> Hi!
>
> On 2023-09-15T15:33:59+0200, Robin Dapp <rdapp.gcc@gmail.com> wrote:
> > is there anything we can do to assist from the riscv side in order to help
> > with this?  I haven't really been involved with it but was wondering
> > what's missing.  If I understand correctly Thomas has a major cleanup
> > operation in plan
>
> Not really major, but indeed non-trivial -- but WIP already.  ;-)
>
> > but might not get to it soon.
>
> Right.
>
> > The fix he proposed
> > helps for the riscv case, however, even without the rework?
>
> Right, and no harm done for my work.
>
> > If so, I'd kindly ping Jakub to check if the fix is reasonable.
>
> I'll push the attached "LTO: Get rid of 'lto_mode_identity_table'"
> mid-week, unless any objections raised.

OK.

Richard.

>
> Grüße
>  Thomas
>
>
> -----------------
> Siemens Electronic Design Automation GmbH; Anschrift: Arnulfstraße 201, 80634 München; Gesellschaft mit beschränkter Haftung; Geschäftsführer: Thomas Heurung, Frank Thürauf; Sitz der Gesellschaft: München; Registergericht München, HRB 106955

^ permalink raw reply	[flat|nested] 33+ messages in thread

end of thread, other threads:[~2023-09-18 14:53 UTC | newest]

Thread overview: 33+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-05-12  5:00 [PATCH] Machine_Mode: Extend machine_mode from 8 to 16 bits pan2.li
2023-05-12  6:49 ` Richard Biener
2023-05-12 19:14   ` Bernhard Reutner-Fischer
2023-05-13  8:44     ` Kito Cheng
2023-05-13 12:26       ` Li, Pan2
2023-06-30 11:46       ` Adjust LTO mode tables for "Machine_Mode: Extend machine_mode from 8 to 16 bits" (was: [PATCH] Machine_Mode: Extend machine_mode from 8 to 16 bits) Thomas Schwinge
2023-06-30 12:45         ` Kito Cheng
2023-06-30 16:11           ` Thomas Schwinge
2023-06-30 16:37           ` Jakub Jelinek
2023-07-04 15:45             ` Thomas Schwinge
     [not found]         ` <MW5PR11MB590876BB7E52B78E837C95C9A913A@MW5PR11MB5908.namprd11.prod.outlook.com>
     [not found]           ` <CAFiYyc0Ajoi__g1YJhdrh-Z3DsOyU7+1iG6SEmZMDzAShX-L6g@mail.gmail.com>
     [not found]             ` <MW5PR11MB5908E5743F472D48ACF60039A913A@MW5PR11MB5908.namprd11.prod.outlook.com>
2023-08-10 13:23               ` Machine Mode ICE in RISC-V when LTO Thomas Schwinge
2023-08-10 14:14                 ` Li, Pan2
2023-08-11  6:40                   ` Li, Pan2
2023-09-15 13:33                     ` Robin Dapp
2023-09-18 14:46                       ` LTO: Get rid of 'lto_mode_identity_table' (was: Machine Mode ICE in RISC-V when LTO) Thomas Schwinge
2023-09-18 14:53                         ` Richard Biener
2023-05-12  8:24 ` [PATCH] Machine_Mode: Extend machine_mode from 8 to 16 bits Richard Sandiford
2023-05-12 11:16   ` Li, Pan2
2023-05-12 11:31     ` Richard Sandiford
2023-05-12 11:48       ` Li, Pan2
2023-05-16 15:35 ` pan2.li
2023-05-18  8:57   ` Richard Sandiford
2023-05-18  9:17     ` Li, Pan2
     [not found] <Message-Id: <20230512050016.476110-1-pan2.li@intel.com>
2023-05-12 15:38 ` [PATCH v2] " pan2.li
2023-05-13 13:13 ` [PATCH v3] " pan2.li
2023-05-16  1:12   ` Li, Pan2
2023-05-16  7:29     ` Richard Sandiford
2023-05-16  7:55       ` Li, Pan2
2023-05-16  8:03         ` Xi Ruoyao
2023-05-16  8:05           ` Li, Pan2
2023-05-16  9:09   ` Richard Sandiford
2023-05-16 12:17     ` Li, Pan2
2023-05-16 15:39       ` Li, Pan2

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).