public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH 0/5] Fix handling of word subregs of wide registers
@ 2014-09-18 10:07 Richard Sandiford
  2014-09-18 10:09 ` [PATCH 1/5] Allow *_HARD_REG_SET arguments to be const Richard Sandiford
                   ` (6 more replies)
  0 siblings, 7 replies; 23+ messages in thread
From: Richard Sandiford @ 2014-09-18 10:07 UTC (permalink / raw)
  To: gcc-patches

This series is a cleaned-up version of:

    https://gcc.gnu.org/ml/gcc/2014-03/msg00163.html

The underlying problem is that the semantics of subregs depend on the
word size.  You can't have a subreg for byte 2 of a 4-byte word, say,
but you can have a subreg for word 2 of a 4-word value (as well as lowpart
subregs of that word, etc.).  This causes problems when an architecture has
wider-than-word registers, since the addressability of a word can then depend
on which register class is used.

The register allocators need to fix up cases where a subreg turns out to
be invalid for a particular class.  This is really an extension of what
we need to do for CANNOT_CHANGE_MODE_CLASS.

Tested on x86_64-linux-gnu, powerpc64-linux-gnu and aarch64_be-elf.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 1/5] Allow *_HARD_REG_SET arguments to be const
  2014-09-18 10:07 [PATCH 0/5] Fix handling of word subregs of wide registers Richard Sandiford
@ 2014-09-18 10:09 ` Richard Sandiford
  2014-09-19  6:14   ` Jeff Law
  2014-09-18 10:10 ` [PATCH 2/5] Tweak subreg_get_info documentation Richard Sandiford
                   ` (5 subsequent siblings)
  6 siblings, 1 reply; 23+ messages in thread
From: Richard Sandiford @ 2014-09-18 10:09 UTC (permalink / raw)
  To: gcc-patches

Patch 4 needs to pass a const HARD_REG_SET to AND/COPY_HARD_REG_SET.
This patch allows that for all intent-in arguments.


gcc/
	* hard-reg-set.h (COPY_HARD_REG_SET, COMPL_HARD_REG_SET)
	(AND_HARD_REG_SET, AND_COMPL_HARD_REG_SET, IOR_HARD_REG_SET)
	(IOR_COMPL_HARD_REG_SET): Allow the "from" set to be constant.


Index: gcc/hard-reg-set.h
===================================================================
--- gcc/hard-reg-set.h	2014-09-15 10:00:12.133398136 +0100
+++ gcc/hard-reg-set.h	2014-09-15 10:00:12.129398185 +0100
@@ -168,32 +168,38 @@ do { HARD_REG_ELT_TYPE *scan_tp_ = (TO);
      scan_tp_[1] = -1; } while (0)
 
 #define COPY_HARD_REG_SET(TO, FROM)  \
-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO), *scan_fp_ = (FROM);	\
+do { HARD_REG_ELT_TYPE *scan_tp_ = (TO);			\
+     const HARD_REG_ELT_TYPE *scan_fp_ = (FROM);		\
      scan_tp_[0] = scan_fp_[0];					\
      scan_tp_[1] = scan_fp_[1]; } while (0)
 
 #define COMPL_HARD_REG_SET(TO, FROM)  \
-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO), *scan_fp_ = (FROM); 	\
+do { HARD_REG_ELT_TYPE *scan_tp_ = (TO);			\
+     const HARD_REG_ELT_TYPE *scan_fp_ = (FROM);		\
      scan_tp_[0] = ~ scan_fp_[0];				\
      scan_tp_[1] = ~ scan_fp_[1]; } while (0)
 
 #define AND_HARD_REG_SET(TO, FROM)  \
-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO), *scan_fp_ = (FROM); 	\
+do { HARD_REG_ELT_TYPE *scan_tp_ = (TO);			\
+     const HARD_REG_ELT_TYPE *scan_fp_ = (FROM);		\
      scan_tp_[0] &= scan_fp_[0];				\
      scan_tp_[1] &= scan_fp_[1]; } while (0)
 
 #define AND_COMPL_HARD_REG_SET(TO, FROM)  \
-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO), *scan_fp_ = (FROM); 	\
+do { HARD_REG_ELT_TYPE *scan_tp_ = (TO);			\
+     const HARD_REG_ELT_TYPE *scan_fp_ = (FROM);		\
      scan_tp_[0] &= ~ scan_fp_[0];				\
      scan_tp_[1] &= ~ scan_fp_[1]; } while (0)
 
 #define IOR_HARD_REG_SET(TO, FROM)  \
-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO), *scan_fp_ = (FROM); 	\
+do { HARD_REG_ELT_TYPE *scan_tp_ = (TO);			\
+     const HARD_REG_ELT_TYPE *scan_fp_ = (FROM);		\
      scan_tp_[0] |= scan_fp_[0];				\
      scan_tp_[1] |= scan_fp_[1]; } while (0)
 
 #define IOR_COMPL_HARD_REG_SET(TO, FROM)  \
-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO), *scan_fp_ = (FROM); 	\
+do { HARD_REG_ELT_TYPE *scan_tp_ = (TO);			\
+     const HARD_REG_ELT_TYPE *scan_fp_ = (FROM);		\
      scan_tp_[0] |= ~ scan_fp_[0];				\
      scan_tp_[1] |= ~ scan_fp_[1]; } while (0)
 
@@ -236,37 +242,43 @@ do { HARD_REG_ELT_TYPE *scan_tp_ = (TO);
      scan_tp_[2] = -1; } while (0)
 
 #define COPY_HARD_REG_SET(TO, FROM)  \
-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO), *scan_fp_ = (FROM);	\
+do { HARD_REG_ELT_TYPE *scan_tp_ = (TO);			\
+     const HARD_REG_ELT_TYPE *scan_fp_ = (FROM);		\
      scan_tp_[0] = scan_fp_[0];					\
      scan_tp_[1] = scan_fp_[1];					\
      scan_tp_[2] = scan_fp_[2]; } while (0)
 
 #define COMPL_HARD_REG_SET(TO, FROM)  \
-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO), *scan_fp_ = (FROM); 	\
+do { HARD_REG_ELT_TYPE *scan_tp_ = (TO);			\
+     const HARD_REG_ELT_TYPE *scan_fp_ = (FROM);		\
      scan_tp_[0] = ~ scan_fp_[0];				\
      scan_tp_[1] = ~ scan_fp_[1];				\
      scan_tp_[2] = ~ scan_fp_[2]; } while (0)
 
 #define AND_HARD_REG_SET(TO, FROM)  \
-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO), *scan_fp_ = (FROM); 	\
+do { HARD_REG_ELT_TYPE *scan_tp_ = (TO);			\
+     const HARD_REG_ELT_TYPE *scan_fp_ = (FROM);		\
      scan_tp_[0] &= scan_fp_[0];				\
      scan_tp_[1] &= scan_fp_[1];				\
      scan_tp_[2] &= scan_fp_[2]; } while (0)
 
 #define AND_COMPL_HARD_REG_SET(TO, FROM)  \
-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO), *scan_fp_ = (FROM); 	\
+do { HARD_REG_ELT_TYPE *scan_tp_ = (TO);			\
+     const HARD_REG_ELT_TYPE *scan_fp_ = (FROM);		\
      scan_tp_[0] &= ~ scan_fp_[0];				\
      scan_tp_[1] &= ~ scan_fp_[1];				\
      scan_tp_[2] &= ~ scan_fp_[2]; } while (0)
 
 #define IOR_HARD_REG_SET(TO, FROM)  \
-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO), *scan_fp_ = (FROM); 	\
+do { HARD_REG_ELT_TYPE *scan_tp_ = (TO);			\
+     const HARD_REG_ELT_TYPE *scan_fp_ = (FROM);		\
      scan_tp_[0] |= scan_fp_[0];				\
      scan_tp_[1] |= scan_fp_[1];				\
      scan_tp_[2] |= scan_fp_[2]; } while (0)
 
 #define IOR_COMPL_HARD_REG_SET(TO, FROM)  \
-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO), *scan_fp_ = (FROM); 	\
+do { HARD_REG_ELT_TYPE *scan_tp_ = (TO);			\
+     const HARD_REG_ELT_TYPE *scan_fp_ = (FROM);		\
      scan_tp_[0] |= ~ scan_fp_[0];				\
      scan_tp_[1] |= ~ scan_fp_[1];				\
      scan_tp_[2] |= ~ scan_fp_[2]; } while (0)
@@ -316,42 +328,48 @@ do { HARD_REG_ELT_TYPE *scan_tp_ = (TO);
      scan_tp_[3] = -1; } while (0)
 
 #define COPY_HARD_REG_SET(TO, FROM)  \
-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO), *scan_fp_ = (FROM);	\
+do { HARD_REG_ELT_TYPE *scan_tp_ = (TO);			\
+     const HARD_REG_ELT_TYPE *scan_fp_ = (FROM);		\
      scan_tp_[0] = scan_fp_[0];					\
      scan_tp_[1] = scan_fp_[1];					\
      scan_tp_[2] = scan_fp_[2];					\
      scan_tp_[3] = scan_fp_[3]; } while (0)
 
 #define COMPL_HARD_REG_SET(TO, FROM)  \
-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO), *scan_fp_ = (FROM); 	\
+do { HARD_REG_ELT_TYPE *scan_tp_ = (TO);			\
+     const HARD_REG_ELT_TYPE *scan_fp_ = (FROM);		\
      scan_tp_[0] = ~ scan_fp_[0];				\
      scan_tp_[1] = ~ scan_fp_[1];				\
      scan_tp_[2] = ~ scan_fp_[2];				\
      scan_tp_[3] = ~ scan_fp_[3]; } while (0)
 
 #define AND_HARD_REG_SET(TO, FROM)  \
-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO), *scan_fp_ = (FROM); 	\
+do { HARD_REG_ELT_TYPE *scan_tp_ = (TO);			\
+     const HARD_REG_ELT_TYPE *scan_fp_ = (FROM);		\
      scan_tp_[0] &= scan_fp_[0];				\
      scan_tp_[1] &= scan_fp_[1];				\
      scan_tp_[2] &= scan_fp_[2];				\
      scan_tp_[3] &= scan_fp_[3]; } while (0)
 
 #define AND_COMPL_HARD_REG_SET(TO, FROM)  \
-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO), *scan_fp_ = (FROM); 	\
+do { HARD_REG_ELT_TYPE *scan_tp_ = (TO);			\
+     const HARD_REG_ELT_TYPE *scan_fp_ = (FROM);		\
      scan_tp_[0] &= ~ scan_fp_[0];				\
      scan_tp_[1] &= ~ scan_fp_[1];				\
      scan_tp_[2] &= ~ scan_fp_[2];				\
      scan_tp_[3] &= ~ scan_fp_[3]; } while (0)
 
 #define IOR_HARD_REG_SET(TO, FROM)  \
-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO), *scan_fp_ = (FROM); 	\
+do { HARD_REG_ELT_TYPE *scan_tp_ = (TO);			\
+     const HARD_REG_ELT_TYPE *scan_fp_ = (FROM);		\
      scan_tp_[0] |= scan_fp_[0];				\
      scan_tp_[1] |= scan_fp_[1];				\
      scan_tp_[2] |= scan_fp_[2];				\
      scan_tp_[3] |= scan_fp_[3]; } while (0)
 
 #define IOR_COMPL_HARD_REG_SET(TO, FROM)  \
-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO), *scan_fp_ = (FROM); 	\
+do { HARD_REG_ELT_TYPE *scan_tp_ = (TO);			\
+     const HARD_REG_ELT_TYPE *scan_fp_ = (FROM);		\
      scan_tp_[0] |= ~ scan_fp_[0];				\
      scan_tp_[1] |= ~ scan_fp_[1];				\
      scan_tp_[2] |= ~ scan_fp_[2];				\
@@ -402,37 +420,43 @@ do { HARD_REG_ELT_TYPE *scan_tp_ = (TO);
        *scan_tp_++ = -1; } while (0)
 
 #define COPY_HARD_REG_SET(TO, FROM)  \
-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO), *scan_fp_ = (FROM); 	\
+do { HARD_REG_ELT_TYPE *scan_tp_ = (TO);			\
+     const HARD_REG_ELT_TYPE *scan_fp_ = (FROM);		\
      int i;							\
      for (i = 0; i < HARD_REG_SET_LONGS; i++)			\
        *scan_tp_++ = *scan_fp_++; } while (0)
 
 #define COMPL_HARD_REG_SET(TO, FROM)  \
-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO), *scan_fp_ = (FROM); 	\
+do { HARD_REG_ELT_TYPE *scan_tp_ = (TO);			\
+     const HARD_REG_ELT_TYPE *scan_fp_ = (FROM);		\
      int i;							\
      for (i = 0; i < HARD_REG_SET_LONGS; i++)			\
        *scan_tp_++ = ~ *scan_fp_++; } while (0)
 
 #define AND_HARD_REG_SET(TO, FROM)  \
-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO), *scan_fp_ = (FROM); 	\
+do { HARD_REG_ELT_TYPE *scan_tp_ = (TO);			\
+     const HARD_REG_ELT_TYPE *scan_fp_ = (FROM);		\
      int i;							\
      for (i = 0; i < HARD_REG_SET_LONGS; i++)			\
        *scan_tp_++ &= *scan_fp_++; } while (0)
 
 #define AND_COMPL_HARD_REG_SET(TO, FROM)  \
-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO), *scan_fp_ = (FROM); 	\
+do { HARD_REG_ELT_TYPE *scan_tp_ = (TO);			\
+     const HARD_REG_ELT_TYPE *scan_fp_ = (FROM);		\
      int i;							\
      for (i = 0; i < HARD_REG_SET_LONGS; i++)			\
        *scan_tp_++ &= ~ *scan_fp_++; } while (0)
 
 #define IOR_HARD_REG_SET(TO, FROM)  \
-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO), *scan_fp_ = (FROM); 	\
+do { HARD_REG_ELT_TYPE *scan_tp_ = (TO);			\
+     const HARD_REG_ELT_TYPE *scan_fp_ = (FROM);		\
      int i;							\
      for (i = 0; i < HARD_REG_SET_LONGS; i++)			\
        *scan_tp_++ |= *scan_fp_++; } while (0)
 
 #define IOR_COMPL_HARD_REG_SET(TO, FROM)  \
-do { HARD_REG_ELT_TYPE *scan_tp_ = (TO), *scan_fp_ = (FROM); 	\
+do { HARD_REG_ELT_TYPE *scan_tp_ = (TO);			\
+     const HARD_REG_ELT_TYPE *scan_fp_ = (FROM);		\
      int i;							\
      for (i = 0; i < HARD_REG_SET_LONGS; i++)			\
        *scan_tp_++ |= ~ *scan_fp_++; } while (0)

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 2/5] Tweak subreg_get_info documentation
  2014-09-18 10:07 [PATCH 0/5] Fix handling of word subregs of wide registers Richard Sandiford
  2014-09-18 10:09 ` [PATCH 1/5] Allow *_HARD_REG_SET arguments to be const Richard Sandiford
@ 2014-09-18 10:10 ` Richard Sandiford
  2014-09-19  6:16   ` Jeff Law
  2014-09-18 10:13 ` [PATCH 3/5] Use simplify_subreg_regno in combine.c:subst Richard Sandiford
                   ` (4 subsequent siblings)
  6 siblings, 1 reply; 23+ messages in thread
From: Richard Sandiford @ 2014-09-18 10:10 UTC (permalink / raw)
  To: gcc-patches

Try to clarify what subreg_get_info does and doesn't check.


gcc/
	* rtl.h (subreg_info): Expand commentary
	* rtlanal.c (subreg_get_info): Likewise.

Index: gcc/rtl.h
===================================================================
--- gcc/rtl.h	2014-09-15 10:00:14.693366097 +0100
+++ gcc/rtl.h	2014-09-15 10:00:14.689366147 +0100
@@ -2866,10 +2866,13 @@ struct subreg_info
 {
   /* Offset of first hard register involved in the subreg.  */
   int offset;
-  /* Number of hard registers involved in the subreg.  */
+  /* Number of hard registers involved in the subreg.  In the case of
+     a paradoxical subreg, this is the number of registers that would
+     be modified by writing to the subreg; some of them may be don't-care
+     when reading from the subreg.  */
   int nregs;
   /* Whether this subreg can be represented as a hard reg with the new
-     mode.  */
+     mode (by adding OFFSET to the original hard register).  */
   bool representable_p;
 };
 
Index: gcc/rtlanal.c
===================================================================
--- gcc/rtlanal.c	2014-09-15 10:00:14.693366097 +0100
+++ gcc/rtlanal.c	2014-09-15 10:00:14.689366147 +0100
@@ -3411,7 +3411,20 @@ subreg_lsb (const_rtx x)
    xmode  - The mode of xregno.
    offset - The byte offset.
    ymode  - The mode of a top level SUBREG (or what may become one).
-   info   - Pointer to structure to fill in.  */
+   info   - Pointer to structure to fill in.
+
+   Rather than considering one particular inner register (and thus one
+   particular "outer" register) in isolation, this function really uses
+   XREGNO as a model for a sequence of isomorphic hard registers.  Thus the
+   function does not check whether adding INFO->offset to XREGNO gives
+   a valid hard register; even if INFO->offset + XREGNO is out of range,
+   there might be another register of the same type that is in range.
+   Likewise it doesn't check whether HARD_REGNO_MODE_OK accepts the new
+   register, since that can depend on things like whether the final
+   register number is even or odd.  Callers that want to check whether
+   this particular subreg can be replaced by a simple (reg ...) should
+   use simplify_subreg_regno.  */
+
 void
 subreg_get_info (unsigned int xregno, enum machine_mode xmode,
 		 unsigned int offset, enum machine_mode ymode,

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 3/5] Use simplify_subreg_regno in combine.c:subst
  2014-09-18 10:07 [PATCH 0/5] Fix handling of word subregs of wide registers Richard Sandiford
  2014-09-18 10:09 ` [PATCH 1/5] Allow *_HARD_REG_SET arguments to be const Richard Sandiford
  2014-09-18 10:10 ` [PATCH 2/5] Tweak subreg_get_info documentation Richard Sandiford
@ 2014-09-18 10:13 ` Richard Sandiford
  2014-09-19  6:17   ` Jeff Law
  2014-09-18 10:25 ` [PATCH 4/5] Generalise invalid_mode_change_p Richard Sandiford
                   ` (3 subsequent siblings)
  6 siblings, 1 reply; 23+ messages in thread
From: Richard Sandiford @ 2014-09-18 10:13 UTC (permalink / raw)
  To: gcc-patches

combine.c:subst should refuse to substitute a hard register
into a subreg if the new subreg would not be simplified to a
simple hard register, since the result would have to be reloaded.
This is more for optimisation than correctness, since in theory
the RA should be able to fix up any unsimplified subregs.


gcc/
	* combine.c (subst): Use simplify_subreg_regno rather than
	REG_CANNOT_CHANGE_MODE_P to detect invalid mode changes.

Index: gcc/combine.c
===================================================================
--- gcc/combine.c	2014-09-15 10:00:17.545330404 +0100
+++ gcc/combine.c	2014-09-15 10:00:17.545330404 +0100
@@ -5121,15 +5121,13 @@ #define COMBINE_RTX_EQUAL_P(X,Y)			\
 		      )
 		    return gen_rtx_CLOBBER (VOIDmode, const0_rtx);
 
-#ifdef CANNOT_CHANGE_MODE_CLASS
 		  if (code == SUBREG
 		      && REG_P (to)
 		      && REGNO (to) < FIRST_PSEUDO_REGISTER
-		      && REG_CANNOT_CHANGE_MODE_P (REGNO (to),
-						   GET_MODE (to),
-						   GET_MODE (x)))
+		      && simplify_subreg_regno (REGNO (to), GET_MODE (to),
+						SUBREG_BYTE (x),
+						GET_MODE (x)) < 0)
 		    return gen_rtx_CLOBBER (VOIDmode, const0_rtx);
-#endif
 
 		  new_rtx = (unique_copy && n_occurrences ? copy_rtx (to) : to);
 		  n_occurrences++;

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 4/5] Generalise invalid_mode_change_p
  2014-09-18 10:07 [PATCH 0/5] Fix handling of word subregs of wide registers Richard Sandiford
                   ` (2 preceding siblings ...)
  2014-09-18 10:13 ` [PATCH 3/5] Use simplify_subreg_regno in combine.c:subst Richard Sandiford
@ 2014-09-18 10:25 ` Richard Sandiford
  2014-09-19 17:53   ` Jeff Law
  2014-09-18 10:26 ` [PATCH 5/5] Remove CANNOT_CHANGE_MODE_CLASS workaround in i386.c Richard Sandiford
                   ` (2 subsequent siblings)
  6 siblings, 1 reply; 23+ messages in thread
From: Richard Sandiford @ 2014-09-18 10:25 UTC (permalink / raw)
  To: gcc-patches

This is the main patch for the bug.  We should treat a register as invalid
for a mode change if simplify_subreg_regno cannot provide a new register
number for the result.  We should treat a class as invalid for a mode change
if all registers in the class are invalid.  This is an extension of the old
CANNOT_CHANGE_MODE_CLASS-based check (simplify_subreg_regno checks C_C_C_M).

I forgot to say that the patch is a prerequisite to removing aarch64's
C_C_C_M.  There are other prerequisites too, but removing C_C_C_M without
this patch caused regressions in the existing testsuite, which is why no
new tests are needed.


gcc/
	* hard-reg-set.h: Include hash-table.h.
	(target_hard_regs): Add a finalize method and a x_simplifiable_subregs
	field.
	* target-globals.c (target_globals::~target_globals): Handle
	hard_regs->finalize.
	* rtl.h (subreg_shape): New structure.
	(shape_of_subreg): New function.
	(simplifiable_subregs): Declare.
	* reginfo.c (simplifiable_subreg): New structure.
	(simplifiable_subregs_hasher): Likewise.
	(simplifiable_subregs): New function.
	(invalid_mode_changes): Delete.
	(alid_mode_changes, valid_mode_changes_obstack): New variables.
	(record_subregs_of_mode): Remove subregs_of_mode parameter.
	Record valid mode changes in valid_mode_changes.
	(find_subregs_of_mode): Remove subregs_of_mode parameter.
	Update calls to record_subregs_of_mode.
	(init_subregs_of_mode): Remove invalid_mode_changes and bitmap
	handling.  Initialize new variables.  Update call to
	find_subregs_of_mode.
	(invalid_mode_change_p): Check new variables instead of
	invalid_mode_changes.
	(finish_subregs_of_mode): Finalize new variables instead of
	invalid_mode_changes.
	(target_hard_regs::finalize): New function.
	* ira-costs.c (print_allocno_costs): Call invalid_mode_change_p
	even when CLASS_CANNOT_CHANGE_MODE is undefined.

Index: gcc/hard-reg-set.h
===================================================================
--- gcc/hard-reg-set.h	2014-09-15 11:55:40.459855161 +0100
+++ gcc/hard-reg-set.h	2014-09-15 11:55:40.455855210 +0100
@@ -20,6 +20,8 @@ Software Foundation; either version 3, o
 #ifndef GCC_HARD_REG_SET_H
 #define GCC_HARD_REG_SET_H
 
+#include "hash-table.h"
+
 /* Define the type of a set of hard registers.  */
 
 /* HARD_REG_ELT_TYPE is a typedef of the unsigned integral type which
@@ -613,7 +615,11 @@ #define EXECUTE_IF_SET_IN_HARD_REG_SET(S
 
 extern char global_regs[FIRST_PSEUDO_REGISTER];
 
+struct simplifiable_subregs_hasher;
+
 struct target_hard_regs {
+  void finalize ();
+
   /* The set of registers that actually exist on the current target.  */
   HARD_REG_SET x_accessible_reg_set;
 
@@ -688,6 +694,10 @@ struct target_hard_regs {
 
   /* Vector indexed by hardware reg giving its name.  */
   const char *x_reg_names[FIRST_PSEUDO_REGISTER];
+
+  /* Records which registers can form a particular subreg, with the subreg
+     being identified by its outer mode, inner mode and offset.  */
+  hash_table <simplifiable_subregs_hasher> *x_simplifiable_subregs;
 };
 
 extern struct target_hard_regs default_target_hard_regs;
Index: gcc/target-globals.c
===================================================================
--- gcc/target-globals.c	2014-09-15 11:55:40.459855161 +0100
+++ gcc/target-globals.c	2014-09-15 11:55:40.459855161 +0100
@@ -125,6 +125,7 @@ target_globals::~target_globals ()
   /* default_target_globals points to static data so shouldn't be freed.  */
   if (this != &default_target_globals)
     {
+      hard_regs->finalize ();
       XDELETE (flag_state);
       XDELETE (regs);
       XDELETE (recog);
Index: gcc/rtl.h
===================================================================
--- gcc/rtl.h	2014-09-15 11:55:40.459855161 +0100
+++ gcc/rtl.h	2014-09-15 12:26:21.249077760 +0100
@@ -1822,6 +1822,64 @@ costs_add_n_insns (struct full_rtx_costs
   c->size += COSTS_N_INSNS (n);
 }
 
+/* Describes the shape of a subreg:
+
+   inner_mode == the mode of the SUBREG_REG
+   offset     == the SUBREG_BYTE
+   outer_mode == the mode of the SUBREG itself.  */
+struct subreg_shape {
+  subreg_shape (enum machine_mode, unsigned int, enum machine_mode);
+  bool operator == (const subreg_shape &) const;
+  bool operator != (const subreg_shape &) const;
+  unsigned int unique_id () const;
+
+  enum machine_mode inner_mode;
+  unsigned int offset;
+  enum machine_mode outer_mode;
+};
+
+inline
+subreg_shape::subreg_shape (enum machine_mode inner_mode_in,
+			    unsigned int offset_in,
+			    enum machine_mode outer_mode_in)
+  : inner_mode (inner_mode_in), offset (offset_in), outer_mode (outer_mode_in)
+{}
+
+inline bool
+subreg_shape::operator == (const subreg_shape &other) const
+{
+  return (inner_mode == other.inner_mode
+	  && offset == other.offset
+	  && outer_mode == other.outer_mode);
+}
+
+inline bool
+subreg_shape::operator != (const subreg_shape &other) const
+{
+  return !operator == (other);
+}
+
+/* Return an integer that uniquely identifies this shape.  Structures
+   like rtx_def assume that a mode can fit in an 8-bit bitfield and no
+   current mode is anywhere near being 65536 bytes in size, so the
+   id comfortably fits in an int.  */
+
+inline unsigned int
+subreg_shape::unique_id () const
+{
+  STATIC_ASSERT (MAX_MACHINE_MODE <= 256);
+  return (int) inner_mode + ((int) outer_mode << 8) + (offset << 16);
+}
+
+/* Return the shape of a SUBREG rtx.  */
+
+static inline subreg_shape
+shape_of_subreg (const_rtx x)
+{
+  return subreg_shape (GET_MODE (SUBREG_REG (x)),
+		       SUBREG_BYTE (x), GET_MODE (x));
+}
+
 /* Information about an address.  This structure is supposed to be able
    to represent all supported target addresses.  Please extend it if it
    is not yet general enough.  */
@@ -2718,6 +2776,9 @@ extern bool val_signbit_known_clear_p (e
 /* In reginfo.c  */
 extern enum machine_mode choose_hard_reg_mode (unsigned int, unsigned int,
 					       bool);
+#ifdef HARD_CONST
+extern const HARD_REG_SET &simplifiable_subregs (const subreg_shape &);
+#endif
 
 /* In emit-rtl.c  */
 extern rtx set_for_reg_notes (rtx);
Index: gcc/reginfo.c
===================================================================
--- gcc/reginfo.c	2014-09-15 11:55:40.459855161 +0100
+++ gcc/reginfo.c	2014-09-18 11:22:12.520550755 +0100
@@ -54,6 +54,24 @@ Software Foundation; either version 3, o
 
 int max_regno;
 
+/* Used to cache the results of simplifiable_subregs.  SHAPE is the input
+   parameter and SIMPLIFIABLE_REGS is the result.  */
+struct simplifiable_subreg
+{
+  simplifiable_subreg (const subreg_shape &);
+
+  subreg_shape shape;
+  HARD_REG_SET simplifiable_regs;
+};
+
+struct simplifiable_subregs_hasher : typed_noop_remove <simplifiable_subreg>
+{
+  typedef simplifiable_subreg value_type;
+  typedef subreg_shape compare_type;
+
+  static inline hashval_t hash (const value_type *);
+  static inline bool equal (const value_type *, const compare_type *);
+};
 \f
 struct target_hard_regs default_target_hard_regs;
 struct target_regs default_target_regs;
@@ -1193,64 +1211,102 @@ reg_classes_intersect_p (reg_class_t c1,
 }
 
 \f
+inline hashval_t
+simplifiable_subregs_hasher::hash (const value_type *value)
+{
+  return value->shape.unique_id ();
+}
+
+inline bool
+simplifiable_subregs_hasher::equal (const value_type *value,
+				    const compare_type *compare)
+{
+  return value->shape == *compare;
+}
+
+inline simplifiable_subreg::simplifiable_subreg (const subreg_shape &shape_in)
+  : shape (shape_in)
+{
+  CLEAR_HARD_REG_SET (simplifiable_regs);
+}
+
+/* Return the set of hard registers that are able to form the subreg
+   described by SHAPE.  */
+
+const HARD_REG_SET &
+simplifiable_subregs (const subreg_shape &shape)
+{
+  if (!this_target_hard_regs->x_simplifiable_subregs)
+    this_target_hard_regs->x_simplifiable_subregs
+      = new hash_table <simplifiable_subregs_hasher> (30);
+  simplifiable_subreg **slot
+    = (this_target_hard_regs->x_simplifiable_subregs
+       ->find_slot_with_hash (&shape, shape.unique_id (), INSERT));
+
+  if (!*slot)
+    {
+      simplifiable_subreg *info = new simplifiable_subreg (shape);
+      for (unsigned int i = 0; i < FIRST_PSEUDO_REGISTER; ++i)
+	if (HARD_REGNO_MODE_OK (i, shape.inner_mode)
+	    && simplify_subreg_regno (i, shape.inner_mode, shape.offset,
+				      shape.outer_mode) >= 0)
+	  SET_HARD_REG_BIT (info->simplifiable_regs, i);
+      *slot = info;
+    }
+  return (*slot)->simplifiable_regs;
+}
 
 /* Passes for keeping and updating info about modes of registers
    inside subregisters.  */
 
-#ifdef CANNOT_CHANGE_MODE_CLASS
-
-static bitmap invalid_mode_changes;
+static HARD_REG_SET **valid_mode_changes;
+static obstack valid_mode_changes_obstack;
 
 static void
-record_subregs_of_mode (rtx subreg, bitmap subregs_of_mode)
+record_subregs_of_mode (rtx subreg)
 {
-  enum machine_mode mode;
   unsigned int regno;
 
   if (!REG_P (SUBREG_REG (subreg)))
     return;
 
   regno = REGNO (SUBREG_REG (subreg));
-  mode = GET_MODE (subreg);
-
   if (regno < FIRST_PSEUDO_REGISTER)
     return;
 
-  if (bitmap_set_bit (subregs_of_mode,
-		      regno * NUM_MACHINE_MODES + (unsigned int) mode))
+  if (valid_mode_changes[regno])
+    AND_HARD_REG_SET (*valid_mode_changes[regno],
+		      simplifiable_subregs (shape_of_subreg (subreg)));
+  else
     {
-      unsigned int rclass;
-      for (rclass = 0; rclass < N_REG_CLASSES; rclass++)
-	if (!bitmap_bit_p (invalid_mode_changes,
-			   regno * N_REG_CLASSES + rclass)
-	    && CANNOT_CHANGE_MODE_CLASS (PSEUDO_REGNO_MODE (regno),
-					 mode, (enum reg_class) rclass))
-	  bitmap_set_bit (invalid_mode_changes,
-			  regno * N_REG_CLASSES + rclass);
+      valid_mode_changes[regno]
+	= XOBNEW (&valid_mode_changes_obstack, HARD_REG_SET);
+      COPY_HARD_REG_SET (*valid_mode_changes[regno],
+			 simplifiable_subregs (shape_of_subreg (subreg)));
     }
 }
 
 /* Call record_subregs_of_mode for all the subregs in X.  */
 static void
-find_subregs_of_mode (rtx x, bitmap subregs_of_mode)
+find_subregs_of_mode (rtx x)
 {
   enum rtx_code code = GET_CODE (x);
   const char * const fmt = GET_RTX_FORMAT (code);
   int i;
 
   if (code == SUBREG)
-    record_subregs_of_mode (x, subregs_of_mode);
+    record_subregs_of_mode (x);
 
   /* Time for some deep diving.  */
   for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
     {
       if (fmt[i] == 'e')
-	find_subregs_of_mode (XEXP (x, i), subregs_of_mode);
+	find_subregs_of_mode (XEXP (x, i));
       else if (fmt[i] == 'E')
 	{
 	  int j;
 	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
-	    find_subregs_of_mode (XVECEXP (x, i, j), subregs_of_mode);
+	    find_subregs_of_mode (XVECEXP (x, i, j));
 	}
     }
 }
@@ -1260,46 +1316,38 @@ init_subregs_of_mode (void)
 {
   basic_block bb;
   rtx_insn *insn;
-  bitmap_obstack srom_obstack;
-  bitmap subregs_of_mode;
 
-  gcc_assert (invalid_mode_changes == NULL);
-  invalid_mode_changes = BITMAP_ALLOC (NULL);
-  bitmap_obstack_initialize (&srom_obstack);
-  subregs_of_mode = BITMAP_ALLOC (&srom_obstack);
+  gcc_obstack_init (&valid_mode_changes_obstack);
+  valid_mode_changes = XCNEWVEC (HARD_REG_SET *, max_reg_num ());
 
   FOR_EACH_BB_FN (bb, cfun)
     FOR_BB_INSNS (bb, insn)
       if (NONDEBUG_INSN_P (insn))
-        find_subregs_of_mode (PATTERN (insn), subregs_of_mode);
-
-  BITMAP_FREE (subregs_of_mode);
-  bitmap_obstack_release (&srom_obstack);
+        find_subregs_of_mode (PATTERN (insn));
 }
 
 /* Return 1 if REGNO has had an invalid mode change in CLASS from FROM
    mode.  */
 bool
-invalid_mode_change_p (unsigned int regno,
-		       enum reg_class rclass)
+invalid_mode_change_p (unsigned int regno, enum reg_class rclass)
 {
-  return bitmap_bit_p (invalid_mode_changes,
-		       regno * N_REG_CLASSES + (unsigned) rclass);
+  return (valid_mode_changes[regno]
+	  && !hard_reg_set_intersect_p (reg_class_contents[rclass],
+					*valid_mode_changes[regno]));
 }
 
 void
 finish_subregs_of_mode (void)
 {
-  BITMAP_FREE (invalid_mode_changes);
-}
-#else
-void
-init_subregs_of_mode (void)
-{
+  XDELETEVEC (valid_mode_changes);
+  obstack_finish (&valid_mode_changes_obstack);
 }
+
+/* Free all data attached to the structure.  This isn't a destructor because
+   we don't want to run on exit.  */
+
 void
-finish_subregs_of_mode (void)
+target_hard_regs::finalize ()
 {
+  delete x_simplifiable_subregs;
 }
-
-#endif /* CANNOT_CHANGE_MODE_CLASS */
Index: gcc/ira-costs.c
===================================================================
--- gcc/ira-costs.c	2014-09-15 11:55:40.459855161 +0100
+++ gcc/ira-costs.c	2014-09-15 11:55:40.455855210 +0100
@@ -1438,10 +1438,7 @@ print_allocno_costs (FILE *f)
 	{
 	  rclass = cost_classes[k];
 	  if (contains_reg_of_mode[rclass][PSEUDO_REGNO_MODE (regno)]
-#ifdef CANNOT_CHANGE_MODE_CLASS
-	      && ! invalid_mode_change_p (regno, (enum reg_class) rclass)
-#endif
-	      )
+	      && ! invalid_mode_change_p (regno, (enum reg_class) rclass))
 	    {
 	      fprintf (f, " %s:%d", reg_class_names[rclass],
 		       COSTS (costs, i)->cost[k]);
@@ -1480,10 +1477,7 @@ print_pseudo_costs (FILE *f)
 	{
 	  rclass = cost_classes[k];
 	  if (contains_reg_of_mode[rclass][PSEUDO_REGNO_MODE (regno)]
-#ifdef CANNOT_CHANGE_MODE_CLASS
-	      && ! invalid_mode_change_p (regno, (enum reg_class) rclass)
-#endif
-	      )
+	      && ! invalid_mode_change_p (regno, (enum reg_class) rclass))
 	    fprintf (f, " %s:%d", reg_class_names[rclass],
 		     COSTS (costs, regno)->cost[k]);
 	}
@@ -1725,10 +1719,7 @@ find_costs_and_classes (FILE *dump_file)
 	      /* Ignore classes that are too small or invalid for this
 		 operand.  */
 	      if (! contains_reg_of_mode[rclass][PSEUDO_REGNO_MODE (i)]
-#ifdef CANNOT_CHANGE_MODE_CLASS
-		  || invalid_mode_change_p (i, (enum reg_class) rclass)
-#endif
-		  )
+		  || invalid_mode_change_p (i, (enum reg_class) rclass))
 		continue;
 	      if (i_costs[k] < best_cost)
 		{
@@ -1822,10 +1813,7 @@ find_costs_and_classes (FILE *dump_file)
 		      /* Ignore classes that are too small or invalid
 			 for this operand.  */
 		      if (! contains_reg_of_mode[rclass][PSEUDO_REGNO_MODE (i)]
-#ifdef CANNOT_CHANGE_MODE_CLASS
-			  || invalid_mode_change_p (i, (enum reg_class) rclass)
-#endif
-			  )
+			  || invalid_mode_change_p (i, (enum reg_class) rclass))
 			;
 		      else if (total_a_costs[k] < best_cost)
 			{

^ permalink raw reply	[flat|nested] 23+ messages in thread

* [PATCH 5/5] Remove CANNOT_CHANGE_MODE_CLASS workaround in i386.c
  2014-09-18 10:07 [PATCH 0/5] Fix handling of word subregs of wide registers Richard Sandiford
                   ` (3 preceding siblings ...)
  2014-09-18 10:25 ` [PATCH 4/5] Generalise invalid_mode_change_p Richard Sandiford
@ 2014-09-18 10:26 ` Richard Sandiford
  2014-09-19 17:54   ` Jeff Law
  2014-09-19  6:14 ` [PATCH 0/5] Fix handling of word subregs of wide registers Jeff Law
  2014-09-22  9:00 ` Andrew Pinski
  6 siblings, 1 reply; 23+ messages in thread
From: Richard Sandiford @ 2014-09-18 10:26 UTC (permalink / raw)
  To: gcc-patches

Patch 4 should make it possible to relax i386'a CANNOT_CHANGE_MODE_CLASS,
solving the missed optimisation that triggered the original thread.


gcc/
	* config/i386/i386.c (ix86_cannot_change_mode_class): Remove
	GET_MODE_SIZE (to) < GET_MODE_SIZE (from) test.

Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c	2014-09-15 09:48:11.310438531 +0100
+++ gcc/config/i386/i386.c	2014-09-15 09:48:11.310438531 +0100
@@ -37526,13 +37526,6 @@ ix86_cannot_change_mode_class (enum mach
 	 the vec_dupv4hi pattern.  */
       if (GET_MODE_SIZE (from) < 4)
 	return true;
-
-      /* Vector registers do not support subreg with nonzero offsets, which
-	 are otherwise valid for integer registers.  Since we can't see
-	 whether we have a nonzero offset from here, prohibit all
-         nonparadoxical subregs changing size.  */
-      if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
-	return true;
     }
 
   return false;

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 0/5] Fix handling of word subregs of wide registers
  2014-09-18 10:07 [PATCH 0/5] Fix handling of word subregs of wide registers Richard Sandiford
                   ` (4 preceding siblings ...)
  2014-09-18 10:26 ` [PATCH 5/5] Remove CANNOT_CHANGE_MODE_CLASS workaround in i386.c Richard Sandiford
@ 2014-09-19  6:14 ` Jeff Law
  2014-09-19  7:24   ` Richard Sandiford
  2014-09-22  9:00 ` Andrew Pinski
  6 siblings, 1 reply; 23+ messages in thread
From: Jeff Law @ 2014-09-19  6:14 UTC (permalink / raw)
  To: gcc-patches, richard.sandiford

On 09/18/14 04:07, Richard Sandiford wrote:
> This series is a cleaned-up version of:
>
>      https://gcc.gnu.org/ml/gcc/2014-03/msg00163.html
>
> The underlying problem is that the semantics of subregs depend on the
> word size.  You can't have a subreg for byte 2 of a 4-byte word, say,
> but you can have a subreg for word 2 of a 4-word value (as well as lowpart
> subregs of that word, etc.).  This causes problems when an architecture has
> wider-than-word registers, since the addressability of a word can then depend
> on which register class is used.
>
> The register allocators need to fix up cases where a subreg turns out to
> be invalid for a particular class.  This is really an extension of what
> we need to do for CANNOT_CHANGE_MODE_CLASS.
>
> Tested on x86_64-linux-gnu, powerpc64-linux-gnu and aarch64_be-elf.
I thought we fixed these problems long ago with the change to subreg_byte?!?

Clearly I missed something.

jeff

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 1/5] Allow *_HARD_REG_SET arguments to be const
  2014-09-18 10:09 ` [PATCH 1/5] Allow *_HARD_REG_SET arguments to be const Richard Sandiford
@ 2014-09-19  6:14   ` Jeff Law
  0 siblings, 0 replies; 23+ messages in thread
From: Jeff Law @ 2014-09-19  6:14 UTC (permalink / raw)
  To: gcc-patches, richard.sandiford

On 09/18/14 04:09, Richard Sandiford wrote:
> Patch 4 needs to pass a const HARD_REG_SET to AND/COPY_HARD_REG_SET.
> This patch allows that for all intent-in arguments.
>
>
> gcc/
> 	* hard-reg-set.h (COPY_HARD_REG_SET, COMPL_HARD_REG_SET)
> 	(AND_HARD_REG_SET, AND_COMPL_HARD_REG_SET, IOR_HARD_REG_SET)
> 	(IOR_COMPL_HARD_REG_SET): Allow the "from" set to be constant.
Seems like a good cleanup in and of itself.

Fine for the trunk independent of the other changes.

jeff

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 2/5] Tweak subreg_get_info documentation
  2014-09-18 10:10 ` [PATCH 2/5] Tweak subreg_get_info documentation Richard Sandiford
@ 2014-09-19  6:16   ` Jeff Law
  0 siblings, 0 replies; 23+ messages in thread
From: Jeff Law @ 2014-09-19  6:16 UTC (permalink / raw)
  To: gcc-patches, richard.sandiford

On 09/18/14 04:10, Richard Sandiford wrote:
> Try to clarify what subreg_get_info does and doesn't check.
>
>
> gcc/
> 	* rtl.h (subreg_info): Expand commentary
> 	* rtlanal.c (subreg_get_info): Likewise.
OK independently of the other patches as well.

jeff

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 3/5] Use simplify_subreg_regno in combine.c:subst
  2014-09-18 10:13 ` [PATCH 3/5] Use simplify_subreg_regno in combine.c:subst Richard Sandiford
@ 2014-09-19  6:17   ` Jeff Law
  0 siblings, 0 replies; 23+ messages in thread
From: Jeff Law @ 2014-09-19  6:17 UTC (permalink / raw)
  To: gcc-patches, richard.sandiford

On 09/18/14 04:13, Richard Sandiford wrote:
> combine.c:subst should refuse to substitute a hard register
> into a subreg if the new subreg would not be simplified to a
> simple hard register, since the result would have to be reloaded.
> This is more for optimisation than correctness, since in theory
> the RA should be able to fix up any unsimplified subregs.
>
>
> gcc/
> 	* combine.c (subst): Use simplify_subreg_regno rather than
> 	REG_CANNOT_CHANGE_MODE_P to detect invalid mode changes.
OK independent of other changes.

jeff

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 0/5] Fix handling of word subregs of wide registers
  2014-09-19  6:14 ` [PATCH 0/5] Fix handling of word subregs of wide registers Jeff Law
@ 2014-09-19  7:24   ` Richard Sandiford
  2014-09-19 15:59     ` Jeff Law
  2014-09-19 17:14     ` Jeff Law
  0 siblings, 2 replies; 23+ messages in thread
From: Richard Sandiford @ 2014-09-19  7:24 UTC (permalink / raw)
  To: Jeff Law; +Cc: gcc-patches

Jeff Law <law@redhat.com> writes:
> On 09/18/14 04:07, Richard Sandiford wrote:
>> This series is a cleaned-up version of:
>>
>>      https://gcc.gnu.org/ml/gcc/2014-03/msg00163.html
>>
>> The underlying problem is that the semantics of subregs depend on the
>> word size.  You can't have a subreg for byte 2 of a 4-byte word, say,
>> but you can have a subreg for word 2 of a 4-word value (as well as lowpart
>> subregs of that word, etc.).  This causes problems when an architecture has
>> wider-than-word registers, since the addressability of a word can then depend
>> on which register class is used.
>>
>> The register allocators need to fix up cases where a subreg turns out to
>> be invalid for a particular class.  This is really an extension of what
>> we need to do for CANNOT_CHANGE_MODE_CLASS.
>>
>> Tested on x86_64-linux-gnu, powerpc64-linux-gnu and aarch64_be-elf.
> I thought we fixed these problems long ago with the change to subreg_byte?!?

No, that was fixing something else.  (I'm just about old enough to remember
that too!)  The problem here is that (say):

    (subreg:SI (reg:DI X) 4)

is independently addressable on little-endian AArch32 if X assigned
to a GPR, but not if X is assigned to a vector register.  We need
to allow these kinds of subreg on pseudos in order to decompose multiword
arithmetic.  It's then up to the RA to realise that a reload would be
needed if X were assigned to a vector register, since the upper half
of a vector register cannot be independently accessed.

Note that you could write this example even with the old word-style offsets
and IIRC the effect would have been the same.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 0/5] Fix handling of word subregs of wide registers
  2014-09-19  7:24   ` Richard Sandiford
@ 2014-09-19 15:59     ` Jeff Law
  2014-09-19 17:14     ` Jeff Law
  1 sibling, 0 replies; 23+ messages in thread
From: Jeff Law @ 2014-09-19 15:59 UTC (permalink / raw)
  To: gcc-patches, richard.sandiford

On 09/19/14 01:23, Richard Sandiford wrote:
> Jeff Law <law@redhat.com> writes:
>> On 09/18/14 04:07, Richard Sandiford wrote:
>>> This series is a cleaned-up version of:
>>>
>>>       https://gcc.gnu.org/ml/gcc/2014-03/msg00163.html
>>>
>>> The underlying problem is that the semantics of subregs depend on the
>>> word size.  You can't have a subreg for byte 2 of a 4-byte word, say,
>>> but you can have a subreg for word 2 of a 4-word value (as well as lowpart
>>> subregs of that word, etc.).  This causes problems when an architecture has
>>> wider-than-word registers, since the addressability of a word can then depend
>>> on which register class is used.
>>>
>>> The register allocators need to fix up cases where a subreg turns out to
>>> be invalid for a particular class.  This is really an extension of what
>>> we need to do for CANNOT_CHANGE_MODE_CLASS.
>>>
>>> Tested on x86_64-linux-gnu, powerpc64-linux-gnu and aarch64_be-elf.
>> I thought we fixed these problems long ago with the change to subreg_byte?!?
>
> No, that was fixing something else.  (I'm just about old enough to remember
> that too!)  The problem here is that (say):
>
>      (subreg:SI (reg:DI X) 4)
>
> is independently addressable on little-endian AArch32 if X assigned
> to a GPR, but not if X is assigned to a vector register.  We need
> to allow these kinds of subreg on pseudos in order to decompose multiword
> arithmetic.  It's then up to the RA to realise that a reload would be
> needed if X were assigned to a vector register, since the upper half
> of a vector register cannot be independently accessed.
>
> Note that you could write this example even with the old word-style offsets
> and IIRC the effect would have been the same.
Maybe I've got some kind of mental block here.  I'll go back and read 
the manual first and see if that helps :-)

jeff


^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 0/5] Fix handling of word subregs of wide registers
  2014-09-19  7:24   ` Richard Sandiford
  2014-09-19 15:59     ` Jeff Law
@ 2014-09-19 17:14     ` Jeff Law
  2014-09-22  7:23       ` Richard Sandiford
  1 sibling, 1 reply; 23+ messages in thread
From: Jeff Law @ 2014-09-19 17:14 UTC (permalink / raw)
  To: gcc-patches, richard.sandiford

On 09/19/14 01:23, Richard Sandiford wrote:
> Jeff Law <law@redhat.com> writes:
>> On 09/18/14 04:07, Richard Sandiford wrote:
>>> This series is a cleaned-up version of:
>>>
>>>       https://gcc.gnu.org/ml/gcc/2014-03/msg00163.html
>>>
>>> The underlying problem is that the semantics of subregs depend on the
>>> word size.  You can't have a subreg for byte 2 of a 4-byte word, say,
>>> but you can have a subreg for word 2 of a 4-word value (as well as lowpart
>>> subregs of that word, etc.).  This causes problems when an architecture has
>>> wider-than-word registers, since the addressability of a word can then depend
>>> on which register class is used.
>>>
>>> The register allocators need to fix up cases where a subreg turns out to
>>> be invalid for a particular class.  This is really an extension of what
>>> we need to do for CANNOT_CHANGE_MODE_CLASS.
>>>
>>> Tested on x86_64-linux-gnu, powerpc64-linux-gnu and aarch64_be-elf.
>> I thought we fixed these problems long ago with the change to subreg_byte?!?
>
> No, that was fixing something else.  (I'm just about old enough to remember
> that too!)  The problem here is that (say):
>
>      (subreg:SI (reg:DI X) 4)
>
> is independently addressable on little-endian AArch32 if X assigned
> to a GPR, but not if X is assigned to a vector register.  We need
> to allow these kinds of subreg on pseudos in order to decompose multiword
> arithmetic.  It's then up to the RA to realise that a reload would be
> needed if X were assigned to a vector register, since the upper half
> of a vector register cannot be independently accessed.
>
> Note that you could write this example even with the old word-style offsets
> and IIRC the effect would have been the same.
OK.  So I kept thinking in terms of the byte offset stuff.  But what 
you're tackling is related to the mess around the mode of the subreg 
having a different meaning if its smaller than a word vs word-sized or 
greater.

Right?


Jeff

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 4/5] Generalise invalid_mode_change_p
  2014-09-18 10:25 ` [PATCH 4/5] Generalise invalid_mode_change_p Richard Sandiford
@ 2014-09-19 17:53   ` Jeff Law
  2014-09-22  7:34     ` Richard Sandiford
  0 siblings, 1 reply; 23+ messages in thread
From: Jeff Law @ 2014-09-19 17:53 UTC (permalink / raw)
  To: gcc-patches, richard.sandiford

On 09/18/14 04:25, Richard Sandiford wrote:
> This is the main patch for the bug.  We should treat a register as invalid
> for a mode change if simplify_subreg_regno cannot provide a new register
> number for the result.  We should treat a class as invalid for a mode change
> if all registers in the class are invalid.  This is an extension of the old
> CANNOT_CHANGE_MODE_CLASS-based check (simplify_subreg_regno checks C_C_C_M).
>
> I forgot to say that the patch is a prerequisite to removing aarch64's
> C_C_C_M.  There are other prerequisites too, but removing C_C_C_M without
> this patch caused regressions in the existing testsuite, which is why no
> new tests are needed.
>
>
> gcc/
> 	* hard-reg-set.h: Include hash-table.h.
> 	(target_hard_regs): Add a finalize method and a x_simplifiable_subregs
> 	field.
> 	* target-globals.c (target_globals::~target_globals): Handle
> 	hard_regs->finalize.
> 	* rtl.h (subreg_shape): New structure.
> 	(shape_of_subreg): New function.
> 	(simplifiable_subregs): Declare.
> 	* reginfo.c (simplifiable_subreg): New structure.
> 	(simplifiable_subregs_hasher): Likewise.
> 	(simplifiable_subregs): New function.
> 	(invalid_mode_changes): Delete.
> 	(alid_mode_changes, valid_mode_changes_obstack): New variables.
> 	(record_subregs_of_mode): Remove subregs_of_mode parameter.
> 	Record valid mode changes in valid_mode_changes.
> 	(find_subregs_of_mode): Remove subregs_of_mode parameter.
> 	Update calls to record_subregs_of_mode.
> 	(init_subregs_of_mode): Remove invalid_mode_changes and bitmap
> 	handling.  Initialize new variables.  Update call to
> 	find_subregs_of_mode.
> 	(invalid_mode_change_p): Check new variables instead of
> 	invalid_mode_changes.
> 	(finish_subregs_of_mode): Finalize new variables instead of
> 	invalid_mode_changes.
> 	(target_hard_regs::finalize): New function.
> 	* ira-costs.c (print_allocno_costs): Call invalid_mode_change_p
> 	even when CLASS_CANNOT_CHANGE_MODE is undefined.
>
> Index: gcc/rtl.h
> ===================================================================
> --- gcc/rtl.h	2014-09-15 11:55:40.459855161 +0100
> +++ gcc/rtl.h	2014-09-15 12:26:21.249077760 +0100
> +/* Return the shape of a SUBREG rtx.  */
> +
> +static inline subreg_shape
> +shape_of_subreg (const_rtx x)
> +{
> +  return subreg_shape (GET_MODE (SUBREG_REG (x)),
> +		       SUBREG_BYTE (x), GET_MODE (x));
> +}
> +
Is there some reason you don't have a constructor that accepts a 
const_rtx?

OK for the trunk.

jeff

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 5/5] Remove CANNOT_CHANGE_MODE_CLASS workaround in i386.c
  2014-09-18 10:26 ` [PATCH 5/5] Remove CANNOT_CHANGE_MODE_CLASS workaround in i386.c Richard Sandiford
@ 2014-09-19 17:54   ` Jeff Law
  0 siblings, 0 replies; 23+ messages in thread
From: Jeff Law @ 2014-09-19 17:54 UTC (permalink / raw)
  To: gcc-patches, richard.sandiford

On 09/18/14 04:26, Richard Sandiford wrote:
> Patch 4 should make it possible to relax i386'a CANNOT_CHANGE_MODE_CLASS,
> solving the missed optimisation that triggered the original thread.
>
>
> gcc/
> 	* config/i386/i386.c (ix86_cannot_change_mode_class): Remove
> 	GET_MODE_SIZE (to) < GET_MODE_SIZE (from) test.
>
> Index: gcc/config/i386/i386.c
> ===================================================================
> --- gcc/config/i386/i386.c	2014-09-15 09:48:11.310438531 +0100
> +++ gcc/config/i386/i386.c	2014-09-15 09:48:11.310438531 +0100
> @@ -37526,13 +37526,6 @@ ix86_cannot_change_mode_class (enum mach
>   	 the vec_dupv4hi pattern.  */
>         if (GET_MODE_SIZE (from) < 4)
>   	return true;
> -
> -      /* Vector registers do not support subreg with nonzero offsets, which
> -	 are otherwise valid for integer registers.  Since we can't see
> -	 whether we have a nonzero offset from here, prohibit all
> -         nonparadoxical subregs changing size.  */
> -      if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
> -	return true;
>       }
>
>     return false;
>
OK.
jeff

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 0/5] Fix handling of word subregs of wide registers
  2014-09-19 17:14     ` Jeff Law
@ 2014-09-22  7:23       ` Richard Sandiford
  2014-09-22 10:49         ` Ajit Kumar Agarwal
  0 siblings, 1 reply; 23+ messages in thread
From: Richard Sandiford @ 2014-09-22  7:23 UTC (permalink / raw)
  To: Jeff Law; +Cc: gcc-patches

Jeff Law <law@redhat.com> writes:
> On 09/19/14 01:23, Richard Sandiford wrote:
>> Jeff Law <law@redhat.com> writes:
>>> On 09/18/14 04:07, Richard Sandiford wrote:
>>>> This series is a cleaned-up version of:
>>>>
>>>>       https://gcc.gnu.org/ml/gcc/2014-03/msg00163.html
>>>>
>>>> The underlying problem is that the semantics of subregs depend on the
>>>> word size.  You can't have a subreg for byte 2 of a 4-byte word, say,
>>>> but you can have a subreg for word 2 of a 4-word value (as well as lowpart
>>>> subregs of that word, etc.).  This causes problems when an architecture has
>>>> wider-than-word registers, since the addressability of a word can
>>>> then depend
>>>> on which register class is used.
>>>>
>>>> The register allocators need to fix up cases where a subreg turns out to
>>>> be invalid for a particular class.  This is really an extension of what
>>>> we need to do for CANNOT_CHANGE_MODE_CLASS.
>>>>
>>>> Tested on x86_64-linux-gnu, powerpc64-linux-gnu and aarch64_be-elf.
>>> I thought we fixed these problems long ago with the change to subreg_byte?!?
>>
>> No, that was fixing something else.  (I'm just about old enough to remember
>> that too!)  The problem here is that (say):
>>
>>      (subreg:SI (reg:DI X) 4)
>>
>> is independently addressable on little-endian AArch32 if X assigned
>> to a GPR, but not if X is assigned to a vector register.  We need
>> to allow these kinds of subreg on pseudos in order to decompose multiword
>> arithmetic.  It's then up to the RA to realise that a reload would be
>> needed if X were assigned to a vector register, since the upper half
>> of a vector register cannot be independently accessed.
>>
>> Note that you could write this example even with the old word-style offsets
>> and IIRC the effect would have been the same.
> OK.  So I kept thinking in terms of the byte offset stuff.  But what 
> you're tackling is related to the mess around the mode of the subreg 
> having a different meaning if its smaller than a word vs word-sized or 
> greater.
>
> Right?

Yeah, that's right.  Addressability is based on words, which is inconvenient
when your registers are bigger than a word.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 4/5] Generalise invalid_mode_change_p
  2014-09-19 17:53   ` Jeff Law
@ 2014-09-22  7:34     ` Richard Sandiford
  2014-09-22 16:29       ` Jeff Law
  0 siblings, 1 reply; 23+ messages in thread
From: Richard Sandiford @ 2014-09-22  7:34 UTC (permalink / raw)
  To: Jeff Law; +Cc: gcc-patches

Jeff Law <law@redhat.com> writes:
> On 09/18/14 04:25, Richard Sandiford wrote:
>> This is the main patch for the bug.  We should treat a register as invalid
>> for a mode change if simplify_subreg_regno cannot provide a new register
>> number for the result.  We should treat a class as invalid for a mode change
>> if all registers in the class are invalid.  This is an extension of the old
>> CANNOT_CHANGE_MODE_CLASS-based check (simplify_subreg_regno checks C_C_C_M).
>>
>> I forgot to say that the patch is a prerequisite to removing aarch64's
>> C_C_C_M.  There are other prerequisites too, but removing C_C_C_M without
>> this patch caused regressions in the existing testsuite, which is why no
>> new tests are needed.
>>
>>
>> gcc/
>> 	* hard-reg-set.h: Include hash-table.h.
>> 	(target_hard_regs): Add a finalize method and a x_simplifiable_subregs
>> 	field.
>> 	* target-globals.c (target_globals::~target_globals): Handle
>> 	hard_regs->finalize.
>> 	* rtl.h (subreg_shape): New structure.
>> 	(shape_of_subreg): New function.
>> 	(simplifiable_subregs): Declare.
>> 	* reginfo.c (simplifiable_subreg): New structure.
>> 	(simplifiable_subregs_hasher): Likewise.
>> 	(simplifiable_subregs): New function.
>> 	(invalid_mode_changes): Delete.
>> 	(alid_mode_changes, valid_mode_changes_obstack): New variables.
>> 	(record_subregs_of_mode): Remove subregs_of_mode parameter.
>> 	Record valid mode changes in valid_mode_changes.
>> 	(find_subregs_of_mode): Remove subregs_of_mode parameter.
>> 	Update calls to record_subregs_of_mode.
>> 	(init_subregs_of_mode): Remove invalid_mode_changes and bitmap
>> 	handling.  Initialize new variables.  Update call to
>> 	find_subregs_of_mode.
>> 	(invalid_mode_change_p): Check new variables instead of
>> 	invalid_mode_changes.
>> 	(finish_subregs_of_mode): Finalize new variables instead of
>> 	invalid_mode_changes.
>> 	(target_hard_regs::finalize): New function.
>> 	* ira-costs.c (print_allocno_costs): Call invalid_mode_change_p
>> 	even when CLASS_CANNOT_CHANGE_MODE is undefined.
>>
>> Index: gcc/rtl.h
>> ===================================================================
>> --- gcc/rtl.h	2014-09-15 11:55:40.459855161 +0100
>> +++ gcc/rtl.h	2014-09-15 12:26:21.249077760 +0100
>> +/* Return the shape of a SUBREG rtx.  */
>> +
>> +static inline subreg_shape
>> +shape_of_subreg (const_rtx x)
>> +{
>> +  return subreg_shape (GET_MODE (SUBREG_REG (x)),
>> +		       SUBREG_BYTE (x), GET_MODE (x));
>> +}
>> +
> Is there some reason you don't have a constructor that accepts a 
> const_rtx?

I was worried that by allowing implicit const_rtx->subreg_shape
conversions, it would be less obvious that the rtx has to have
code SUBREG.  I.e. a checked conversion would be hidden in the
constructor rather than being explicit.

If with David's new rtx hierarchy we end up with an rtx_subreg
subclass then I agree we should have a constructor that takes
one of those.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 0/5] Fix handling of word subregs of wide registers
  2014-09-18 10:07 [PATCH 0/5] Fix handling of word subregs of wide registers Richard Sandiford
                   ` (5 preceding siblings ...)
  2014-09-19  6:14 ` [PATCH 0/5] Fix handling of word subregs of wide registers Jeff Law
@ 2014-09-22  9:00 ` Andrew Pinski
  2014-09-22 11:29   ` Richard Sandiford
  6 siblings, 1 reply; 23+ messages in thread
From: Andrew Pinski @ 2014-09-22  9:00 UTC (permalink / raw)
  To: GCC Patches, richard.sandiford

On Thu, Sep 18, 2014 at 3:07 AM, Richard Sandiford
<richard.sandiford@arm.com> wrote:
> This series is a cleaned-up version of:
>
>     https://gcc.gnu.org/ml/gcc/2014-03/msg00163.html
>
> The underlying problem is that the semantics of subregs depend on the
> word size.  You can't have a subreg for byte 2 of a 4-byte word, say,
> but you can have a subreg for word 2 of a 4-word value (as well as lowpart
> subregs of that word, etc.).  This causes problems when an architecture has
> wider-than-word registers, since the addressability of a word can then depend
> on which register class is used.
>
> The register allocators need to fix up cases where a subreg turns out to
> be invalid for a particular class.  This is really an extension of what
> we need to do for CANNOT_CHANGE_MODE_CLASS.
>
> Tested on x86_64-linux-gnu, powerpc64-linux-gnu and aarch64_be-elf.


This sounds like something which should be tested on spu as it is the
main target that I can think of which has wider-than-word registers
and that has had issues with subreg.  I can't remember if the
simulator for SPU is free (as in beer) and would run on anything
besides PowerPC.  It has been more than 4 years since I looked into
the spu back-end also.

Thanks,
Andrew Pinski

>
> Thanks,
> Richard
>

^ permalink raw reply	[flat|nested] 23+ messages in thread

* RE: [PATCH 0/5] Fix handling of word subregs of wide registers
  2014-09-22  7:23       ` Richard Sandiford
@ 2014-09-22 10:49         ` Ajit Kumar Agarwal
  2014-09-22 11:26           ` Richard Sandiford
  0 siblings, 1 reply; 23+ messages in thread
From: Ajit Kumar Agarwal @ 2014-09-22 10:49 UTC (permalink / raw)
  To: Richard Sandiford, Jeff Law; +Cc: gcc-patches



-----Original Message-----
From: gcc-patches-owner@gcc.gnu.org [mailto:gcc-patches-owner@gcc.gnu.org] On Behalf Of Richard Sandiford
Sent: Monday, September 22, 2014 12:54 PM
To: Jeff Law
Cc: gcc-patches@gcc.gnu.org
Subject: Re: [PATCH 0/5] Fix handling of word subregs of wide registers

Jeff Law <law@redhat.com> writes:
> On 09/19/14 01:23, Richard Sandiford wrote:
>> Jeff Law <law@redhat.com> writes:
>>> On 09/18/14 04:07, Richard Sandiford wrote:
>>>> This series is a cleaned-up version of:
>>>>
>>>>       https://gcc.gnu.org/ml/gcc/2014-03/msg00163.html
>>>>
>>>> The underlying problem is that the semantics of subregs depend on 
>>>> the word size.  You can't have a subreg for byte 2 of a 4-byte 
>>>> word, say, but you can have a subreg for word 2 of a 4-word value 
>>>> (as well as lowpart subregs of that word, etc.).  This causes 
>>>> problems when an architecture has wider-than-word registers, since 
>>>> the addressability of a word can then depend on which register 
>>>> class is used.
>>>>
>>>> The register allocators need to fix up cases where a subreg turns 
>>>> out to be invalid for a particular class.  This is really an 
>>>> extension of what we need to do for CANNOT_CHANGE_MODE_CLASS.
>>>>
>>>> Tested on x86_64-linux-gnu, powerpc64-linux-gnu and aarch64_be-elf.
>>> I thought we fixed these problems long ago with the change to subreg_byte?!?
>>
>> No, that was fixing something else.  (I'm just about old enough to 
>> remember that too!)  The problem here is that (say):
>>
>>      (subreg:SI (reg:DI X) 4)
>>
>> is independently addressable on little-endian AArch32 if X assigned 
>> to a GPR, but not if X is assigned to a vector register.  We need to 
>> allow these kinds of subreg on pseudos in order to decompose 
>> multiword arithmetic.  It's then up to the RA to realise that a 
>> reload would be needed if X were assigned to a vector register, since 
>> the upper half of a vector register cannot be independently accessed.
>>
>> Note that you could write this example even with the old word-style 
>> offsets and IIRC the effect would have been the same.
> OK.  So I kept thinking in terms of the byte offset stuff.  But what 
> you're tackling is related to the mess around the mode of the subreg 
> having a different meaning if its smaller than a word vs word-sized or 
> greater.
>
> Right?

>>Yeah, that's right.  Addressability is based on words, which is inconvenient when your registers are bigger than a word.

If the architecture like Microblaze which doesn't support  the 1 byte or 2 byte registers. In this scenario what should be returned when SUBREG_WORD is used.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 0/5] Fix handling of word subregs of wide registers
  2014-09-22 10:49         ` Ajit Kumar Agarwal
@ 2014-09-22 11:26           ` Richard Sandiford
  2014-09-22 12:07             ` Ajit Kumar Agarwal
  0 siblings, 1 reply; 23+ messages in thread
From: Richard Sandiford @ 2014-09-22 11:26 UTC (permalink / raw)
  To: Ajit Kumar Agarwal; +Cc: Jeff Law, gcc-patches

Ajit Kumar Agarwal <ajit.kumar.agarwal@xilinx.com> writes:
> Jeff Law <law@redhat.com> writes:
>> On 09/19/14 01:23, Richard Sandiford wrote:
>>> Jeff Law <law@redhat.com> writes:
>>>> On 09/18/14 04:07, Richard Sandiford wrote:
>>>>> This series is a cleaned-up version of:
>>>>>
>>>>>       https://gcc.gnu.org/ml/gcc/2014-03/msg00163.html
>>>>>
>>>>> The underlying problem is that the semantics of subregs depend on 
>>>>> the word size.  You can't have a subreg for byte 2 of a 4-byte 
>>>>> word, say, but you can have a subreg for word 2 of a 4-word value 
>>>>> (as well as lowpart subregs of that word, etc.).  This causes 
>>>>> problems when an architecture has wider-than-word registers, since 
>>>>> the addressability of a word can then depend on which register 
>>>>> class is used.
>>>>>
>>>>> The register allocators need to fix up cases where a subreg turns 
>>>>> out to be invalid for a particular class.  This is really an 
>>>>> extension of what we need to do for CANNOT_CHANGE_MODE_CLASS.
>>>>>
>>>>> Tested on x86_64-linux-gnu, powerpc64-linux-gnu and aarch64_be-elf.
>>>> I thought we fixed these problems long ago with the change to subreg_byte?!?
>>>
>>> No, that was fixing something else.  (I'm just about old enough to 
>>> remember that too!)  The problem here is that (say):
>>>
>>>      (subreg:SI (reg:DI X) 4)
>>>
>>> is independently addressable on little-endian AArch32 if X assigned 
>>> to a GPR, but not if X is assigned to a vector register.  We need to 
>>> allow these kinds of subreg on pseudos in order to decompose 
>>> multiword arithmetic.  It's then up to the RA to realise that a 
>>> reload would be needed if X were assigned to a vector register, since 
>>> the upper half of a vector register cannot be independently accessed.
>>>
>>> Note that you could write this example even with the old word-style 
>>> offsets and IIRC the effect would have been the same.
>> OK.  So I kept thinking in terms of the byte offset stuff.  But what 
>> you're tackling is related to the mess around the mode of the subreg 
>> having a different meaning if its smaller than a word vs word-sized or 
>> greater.
>>
>> Right?
>
>>>Yeah, that's right.  Addressability is based on words, which is
>>> inconvenient when your registers are bigger than a word.
>
> If the architecture like Microblaze which doesn't support the 1 byte or
> 2 byte registers. In this scenario what should be returned when
> SUBREG_WORD is used.

I don't understand the question sorry.  Subreg offsets are still represented
as bytes rather than words.  The patch doesn't change the way that subregs
are represented or the rules about which subregs are valid.

Both before and after the patch, the semantics of subregs say that if
you have 4-byte words, only one of:

    (subreg:QI (reg:SI X) 0)
    (subreg:QI (reg:SI X) 1)
    (subreg:QI (reg:SI X) 2)
    (subreg:QI (reg:SI X) 3)

is ever valid (0 for little-endian, 3 for big-endian).  Writing to that
one valid subreg will change the whole of X, unless the subreg is wrapped
in a strict_lowpart.  In other words, subregs are defined so that individual
parts of a word are not independently addressable.

However, individual words of a multiword register _are_ addressable.  I.e.:

   (subreg:SI (reg:DI Y) 0)
   (subreg:SI (reg:DI Y) 4)

are both valid.  Writing to one does not change the other.

The problem the patch was trying to solve was that you can have targets
with 4-byte words but some 8-byte registers.  In those cases, it's still
possible to form both of the Y subregs above if Y is allocated to a word-sized
register, but not if Y is allocated to a doubleword-sized register.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 0/5] Fix handling of word subregs of wide registers
  2014-09-22  9:00 ` Andrew Pinski
@ 2014-09-22 11:29   ` Richard Sandiford
  0 siblings, 0 replies; 23+ messages in thread
From: Richard Sandiford @ 2014-09-22 11:29 UTC (permalink / raw)
  To: Andrew Pinski; +Cc: GCC Patches

Andrew Pinski <pinskia@gmail.com> writes:
> On Thu, Sep 18, 2014 at 3:07 AM, Richard Sandiford
> <richard.sandiford@arm.com> wrote:
>> This series is a cleaned-up version of:
>>
>>     https://gcc.gnu.org/ml/gcc/2014-03/msg00163.html
>>
>> The underlying problem is that the semantics of subregs depend on the
>> word size.  You can't have a subreg for byte 2 of a 4-byte word, say,
>> but you can have a subreg for word 2 of a 4-word value (as well as lowpart
>> subregs of that word, etc.).  This causes problems when an architecture has
>> wider-than-word registers, since the addressability of a word can then depend
>> on which register class is used.
>>
>> The register allocators need to fix up cases where a subreg turns out to
>> be invalid for a particular class.  This is really an extension of what
>> we need to do for CANNOT_CHANGE_MODE_CLASS.
>>
>> Tested on x86_64-linux-gnu, powerpc64-linux-gnu and aarch64_be-elf.
>
>
> This sounds like something which should be tested on spu as it is the
> main target that I can think of which has wider-than-word registers
> and that has had issues with subreg.  I can't remember if the
> simulator for SPU is free (as in beer) and would run on anything
> besides PowerPC.  It has been more than 4 years since I looked into
> the spu back-end also.

Well, AArch64 and x86_64 should be good enough targets for testing
the patch.  In the AArch64 case the bug was holding up other big-endian
fixes, in the x86_64 case it led to a workaround in C_C_M_C.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 23+ messages in thread

* RE: [PATCH 0/5] Fix handling of word subregs of wide registers
  2014-09-22 11:26           ` Richard Sandiford
@ 2014-09-22 12:07             ` Ajit Kumar Agarwal
  0 siblings, 0 replies; 23+ messages in thread
From: Ajit Kumar Agarwal @ 2014-09-22 12:07 UTC (permalink / raw)
  To: Richard Sandiford; +Cc: Jeff Law, gcc-patches



-----Original Message-----
From: Richard Sandiford [mailto:richard.sandiford@arm.com] 
Sent: Monday, September 22, 2014 4:56 PM
To: Ajit Kumar Agarwal
Cc: Jeff Law; gcc-patches@gcc.gnu.org
Subject: Re: [PATCH 0/5] Fix handling of word subregs of wide registers

Ajit Kumar Agarwal <ajit.kumar.agarwal@xilinx.com> writes:
> Jeff Law <law@redhat.com> writes:
>> On 09/19/14 01:23, Richard Sandiford wrote:
>>> Jeff Law <law@redhat.com> writes:
>>>> On 09/18/14 04:07, Richard Sandiford wrote:
>>>>> This series is a cleaned-up version of:
>>>>>
>>>>>       https://gcc.gnu.org/ml/gcc/2014-03/msg00163.html
>>>>>
>>>>> The underlying problem is that the semantics of subregs depend on 
>>>>> the word size.  You can't have a subreg for byte 2 of a 4-byte 
>>>>> word, say, but you can have a subreg for word 2 of a 4-word value 
>>>>> (as well as lowpart subregs of that word, etc.).  This causes 
>>>>> problems when an architecture has wider-than-word registers, since 
>>>>> the addressability of a word can then depend on which register 
>>>>> class is used.
>>>>>
>>>>> The register allocators need to fix up cases where a subreg turns 
>>>>> out to be invalid for a particular class.  This is really an 
>>>>> extension of what we need to do for CANNOT_CHANGE_MODE_CLASS.
>>>>>
>>>>> Tested on x86_64-linux-gnu, powerpc64-linux-gnu and aarch64_be-elf.
>>>> I thought we fixed these problems long ago with the change to subreg_byte?!?
>>>
>>> No, that was fixing something else.  (I'm just about old enough to 
>>> remember that too!)  The problem here is that (say):
>>>
>>>      (subreg:SI (reg:DI X) 4)
>>>
>>> is independently addressable on little-endian AArch32 if X assigned 
>>> to a GPR, but not if X is assigned to a vector register.  We need to 
>>> allow these kinds of subreg on pseudos in order to decompose 
>>> multiword arithmetic.  It's then up to the RA to realise that a 
>>> reload would be needed if X were assigned to a vector register, 
>>> since the upper half of a vector register cannot be independently accessed.
>>>
>>> Note that you could write this example even with the old word-style 
>>> offsets and IIRC the effect would have been the same.
>> OK.  So I kept thinking in terms of the byte offset stuff.  But what 
>> you're tackling is related to the mess around the mode of the subreg 
>> having a different meaning if its smaller than a word vs word-sized 
>> or greater.
>>
>> Right?
>
>>>Yeah, that's right.  Addressability is based on words, which is  
>>>inconvenient when your registers are bigger than a word.
>
> If the architecture like Microblaze which doesn't support the 1 byte 
> or
> 2 byte registers. In this scenario what should be returned when 
> SUBREG_WORD is used.

>>I don't understand the question sorry.  Subreg offsets are still represented as bytes rather than words.  The patch doesn't change the way that subregs are >>represented or the rules about which subregs are valid.

>>Both before and after the patch, the semantics of subregs say that if you have 4-byte words, only one of:

    >>(subreg:QI (reg:SI X) 0)
    >>(subreg:QI (reg:SI X) 1)
    >>(subreg:QI (reg:SI X) 2)
    >>(subreg:QI (reg:SI X) 3)

>>is ever valid (0 for little-endian, 3 for big-endian).  Writing to that one valid subreg will change the whole of X, unless the subreg is wrapped in a >>strict_lowpart.  In other words, subregs are defined so that individual parts of a word are not independently addressable.

>>However, individual words of a multiword register _are_ addressable.  I.e.:

   (subreg:SI (reg:DI Y) 0)
   (subreg:SI (reg:DI Y) 4)

>>are both valid.  Writing to one does not change the other.

>>The problem the patch was trying to solve was that you can have targets with 4-byte words but some 8-byte registers.  In those cases, it's still possible to >>form both of the Y subregs above if Y is allocated to a word-sized register, but not if Y is allocated to a doubleword-sized register.

Thanks Richard for the explanation. 

Thanks,
Richard

^ permalink raw reply	[flat|nested] 23+ messages in thread

* Re: [PATCH 4/5] Generalise invalid_mode_change_p
  2014-09-22  7:34     ` Richard Sandiford
@ 2014-09-22 16:29       ` Jeff Law
  0 siblings, 0 replies; 23+ messages in thread
From: Jeff Law @ 2014-09-22 16:29 UTC (permalink / raw)
  To: gcc-patches, richard.sandiford

On 09/22/14 01:34, Richard Sandiford wrote:
> Jeff Law <law@redhat.com> writes:
>> On 09/18/14 04:25, Richard Sandiford wrote:
>>> This is the main patch for the bug.  We should treat a register as invalid
>>> for a mode change if simplify_subreg_regno cannot provide a new register
>>> number for the result.  We should treat a class as invalid for a mode change
>>> if all registers in the class are invalid.  This is an extension of the old
>>> CANNOT_CHANGE_MODE_CLASS-based check (simplify_subreg_regno checks C_C_C_M).
>>>
>>> I forgot to say that the patch is a prerequisite to removing aarch64's
>>> C_C_C_M.  There are other prerequisites too, but removing C_C_C_M without
>>> this patch caused regressions in the existing testsuite, which is why no
>>> new tests are needed.
>>>
>>>
>>> gcc/
>>> 	* hard-reg-set.h: Include hash-table.h.
>>> 	(target_hard_regs): Add a finalize method and a x_simplifiable_subregs
>>> 	field.
>>> 	* target-globals.c (target_globals::~target_globals): Handle
>>> 	hard_regs->finalize.
>>> 	* rtl.h (subreg_shape): New structure.
>>> 	(shape_of_subreg): New function.
>>> 	(simplifiable_subregs): Declare.
>>> 	* reginfo.c (simplifiable_subreg): New structure.
>>> 	(simplifiable_subregs_hasher): Likewise.
>>> 	(simplifiable_subregs): New function.
>>> 	(invalid_mode_changes): Delete.
>>> 	(alid_mode_changes, valid_mode_changes_obstack): New variables.
>>> 	(record_subregs_of_mode): Remove subregs_of_mode parameter.
>>> 	Record valid mode changes in valid_mode_changes.
>>> 	(find_subregs_of_mode): Remove subregs_of_mode parameter.
>>> 	Update calls to record_subregs_of_mode.
>>> 	(init_subregs_of_mode): Remove invalid_mode_changes and bitmap
>>> 	handling.  Initialize new variables.  Update call to
>>> 	find_subregs_of_mode.
>>> 	(invalid_mode_change_p): Check new variables instead of
>>> 	invalid_mode_changes.
>>> 	(finish_subregs_of_mode): Finalize new variables instead of
>>> 	invalid_mode_changes.
>>> 	(target_hard_regs::finalize): New function.
>>> 	* ira-costs.c (print_allocno_costs): Call invalid_mode_change_p
>>> 	even when CLASS_CANNOT_CHANGE_MODE is undefined.
>>>
>>> Index: gcc/rtl.h
>>> ===================================================================
>>> --- gcc/rtl.h	2014-09-15 11:55:40.459855161 +0100
>>> +++ gcc/rtl.h	2014-09-15 12:26:21.249077760 +0100
>>> +/* Return the shape of a SUBREG rtx.  */
>>> +
>>> +static inline subreg_shape
>>> +shape_of_subreg (const_rtx x)
>>> +{
>>> +  return subreg_shape (GET_MODE (SUBREG_REG (x)),
>>> +		       SUBREG_BYTE (x), GET_MODE (x));
>>> +}
>>> +
>> Is there some reason you don't have a constructor that accepts a
>> const_rtx?
>
> I was worried that by allowing implicit const_rtx->subreg_shape
> conversions, it would be less obvious that the rtx has to have
> code SUBREG.  I.e. a checked conversion would be hidden in the
> constructor rather than being explicit.
>
> If with David's new rtx hierarchy we end up with an rtx_subreg
> subclass then I agree we should have a constructor that takes
> one of those.
Makes sense.

I'm not sure if I was explicit, but the patch is fine, that was more a 
curiosity on my part than anything.

jeff

^ permalink raw reply	[flat|nested] 23+ messages in thread

end of thread, other threads:[~2014-09-22 16:29 UTC | newest]

Thread overview: 23+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-09-18 10:07 [PATCH 0/5] Fix handling of word subregs of wide registers Richard Sandiford
2014-09-18 10:09 ` [PATCH 1/5] Allow *_HARD_REG_SET arguments to be const Richard Sandiford
2014-09-19  6:14   ` Jeff Law
2014-09-18 10:10 ` [PATCH 2/5] Tweak subreg_get_info documentation Richard Sandiford
2014-09-19  6:16   ` Jeff Law
2014-09-18 10:13 ` [PATCH 3/5] Use simplify_subreg_regno in combine.c:subst Richard Sandiford
2014-09-19  6:17   ` Jeff Law
2014-09-18 10:25 ` [PATCH 4/5] Generalise invalid_mode_change_p Richard Sandiford
2014-09-19 17:53   ` Jeff Law
2014-09-22  7:34     ` Richard Sandiford
2014-09-22 16:29       ` Jeff Law
2014-09-18 10:26 ` [PATCH 5/5] Remove CANNOT_CHANGE_MODE_CLASS workaround in i386.c Richard Sandiford
2014-09-19 17:54   ` Jeff Law
2014-09-19  6:14 ` [PATCH 0/5] Fix handling of word subregs of wide registers Jeff Law
2014-09-19  7:24   ` Richard Sandiford
2014-09-19 15:59     ` Jeff Law
2014-09-19 17:14     ` Jeff Law
2014-09-22  7:23       ` Richard Sandiford
2014-09-22 10:49         ` Ajit Kumar Agarwal
2014-09-22 11:26           ` Richard Sandiford
2014-09-22 12:07             ` Ajit Kumar Agarwal
2014-09-22  9:00 ` Andrew Pinski
2014-09-22 11:29   ` Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).