public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* Fix 70083, lra-induced crash
       [not found] <56E12ADA.5060400@t-online.de>
@ 2016-03-10  8:26 ` Bernd Schmidt
  2016-03-10 17:56   ` Vladimir Makarov
  0 siblings, 1 reply; 2+ messages in thread
From: Bernd Schmidt @ 2016-03-10  8:26 UTC (permalink / raw)
  To: GCC Patches, Vladimir Makarov

[-- Attachment #1: Type: text/plain, Size: 802 bytes --]

This crash happens because LRA tries to save an AVX hard reg in a large 
mode, and it only appears in the function in smaller modes. Stack 
alignment isn't set up to support the larger mode.

Currently, biggest_mode for hard registers is set up from regno_reg_rtx, 
set up to a large mode for argument regs. That mode is not necessarily 
seen in the function itself and may be too large. If that initialization 
is changed to use VOIDmode, we compute the correct value during 
lra_push_insns, but then subsequently we clear it to VOIDmode again, and 
it never seems to get updated. Hence, the patch has several parts: 
initialize hard reg biggest_mode with VOIDmode, ensure it gets updated 
during process_bb_lives, and use the value in split_reg.

Bootstrapped and tested on x86_64-linux, ok?


Bernd

[-- Attachment #2: 70083.diff --]
[-- Type: text/x-patch, Size: 6602 bytes --]

	PR target/70083
	* lra-lives.c (process_bb_lives): Also update biggest mode for hard
	regs.
	(lra_create_live_ranges_1): initialize hard register biggest_mode to
	VOIDmode.
	* lra-constraints.c (split_reg): For hard regs, try to find the
	biggest single-register mode used in the function.

testsuite/
	PR target/70083
	* gcc.dg/torture/pr70083.c: New test.
	* gcc.target/i386/pr70083.c: New test.

Index: gcc/lra-lives.c
===================================================================
--- gcc/lra-lives.c	(revision 234025)
+++ gcc/lra-lives.c	(working copy)
@@ -700,12 +700,13 @@ process_bb_lives (basic_block bb, int &c
 
       /* Update max ref width and hard reg usage.  */
       for (reg = curr_id->regs; reg != NULL; reg = reg->next)
-	if (reg->regno >= FIRST_PSEUDO_REGISTER
-	    && (GET_MODE_SIZE (reg->biggest_mode)
-		> GET_MODE_SIZE (lra_reg_info[reg->regno].biggest_mode)))
-	  lra_reg_info[reg->regno].biggest_mode = reg->biggest_mode;
-	else if (reg->regno < FIRST_PSEUDO_REGISTER)
-	  lra_hard_reg_usage[reg->regno] += freq;
+	{
+	  if (GET_MODE_SIZE (reg->biggest_mode)
+	      > GET_MODE_SIZE (lra_reg_info[reg->regno].biggest_mode))
+	    lra_reg_info[reg->regno].biggest_mode = reg->biggest_mode;
+	  if (reg->regno < FIRST_PSEUDO_REGISTER)
+	    lra_hard_reg_usage[reg->regno] += freq;
+	}
 
       call_p = CALL_P (curr_insn);
       src_regno = (set != NULL_RTX && REG_P (SET_SRC (set))
@@ -1208,7 +1209,7 @@ lra_create_live_ranges_1 (bool all_p, bo
 	 conservative because of recent transformation.  Here in this
 	 file we recalculate it again as it costs practically
 	 nothing.  */
-      if (regno_reg_rtx[i] != NULL_RTX)
+      if (i >= FIRST_PSEUDO_REGISTER && regno_reg_rtx[i] != NULL_RTX)
 	lra_reg_info[i].biggest_mode = GET_MODE (regno_reg_rtx[i]);
       else
 	lra_reg_info[i].biggest_mode = VOIDmode;
Index: gcc/lra-constraints.c
===================================================================
--- gcc/lra-constraints.c	(revision 234025)
+++ gcc/lra-constraints.c	(working copy)
@@ -4972,6 +4972,7 @@ split_reg (bool before_p, int original_r
   rtx_insn *restore, *save;
   bool after_p;
   bool call_save_p;
+  machine_mode mode;
 
   if (original_regno < FIRST_PSEUDO_REGISTER)
     {
@@ -4979,24 +4980,32 @@ split_reg (bool before_p, int original_r
       hard_regno = original_regno;
       call_save_p = false;
       nregs = 1;
+      mode = lra_reg_info[hard_regno].biggest_mode;
+      machine_mode reg_rtx_mode = GET_MODE (regno_reg_rtx[hard_regno]);
+      if (GET_MODE_SIZE (mode) > GET_MODE_SIZE (reg_rtx_mode))
+	{
+	  original_reg = regno_reg_rtx[hard_regno];
+	  mode = reg_rtx_mode;
+	}
+      else
+	original_reg = gen_rtx_REG (mode, hard_regno);
     }
   else
     {
+      mode = PSEUDO_REGNO_MODE (original_regno);
       hard_regno = reg_renumber[original_regno];
-      nregs = hard_regno_nregs[hard_regno][PSEUDO_REGNO_MODE (original_regno)];
+      nregs = hard_regno_nregs[hard_regno][mode];
       rclass = lra_get_allocno_class (original_regno);
       original_reg = regno_reg_rtx[original_regno];
       call_save_p = need_for_call_save_p (original_regno);
     }
-  original_reg = regno_reg_rtx[original_regno];
   lra_assert (hard_regno >= 0);
   if (lra_dump_file != NULL)
     fprintf (lra_dump_file,
 	     "	  ((((((((((((((((((((((((((((((((((((((((((((((((\n");
+	  
   if (call_save_p)
     {
-      machine_mode mode = GET_MODE (original_reg);
-
       mode = HARD_REGNO_CALLER_SAVE_MODE (hard_regno,
 					  hard_regno_nregs[hard_regno][mode],
 					  mode);
@@ -5004,8 +5013,7 @@ split_reg (bool before_p, int original_r
     }
   else
     {
-      rclass = choose_split_class (rclass, hard_regno,
-				   GET_MODE (original_reg));
+      rclass = choose_split_class (rclass, hard_regno, mode);
       if (rclass == NO_REGS)
 	{
 	  if (lra_dump_file != NULL)
@@ -5023,8 +5031,7 @@ split_reg (bool before_p, int original_r
 	    }
 	  return false;
 	}
-      new_reg = lra_create_new_reg (GET_MODE (original_reg), original_reg,
-				    rclass, "split");
+      new_reg = lra_create_new_reg (mode, original_reg, rclass, "split");
       reg_renumber[REGNO (new_reg)] = hard_regno;
     }
   save = emit_spill_move (true, new_reg, original_reg);
Index: gcc/testsuite/gcc.dg/torture/pr70083.c
===================================================================
--- gcc/testsuite/gcc.dg/torture/pr70083.c	(revision 0)
+++ gcc/testsuite/gcc.dg/torture/pr70083.c	(working copy)
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-Wno-psabi" } */
+
+typedef short v16hi __attribute__ ((vector_size (32)));
+typedef int v8si __attribute__ ((vector_size (32)));
+typedef long long v4di __attribute__ ((vector_size (32)));
+
+int
+foo(int u32_0, int u64_0, int u64_1, v16hi v32u16_0, v8si v32u32_0, v4di v32u64_0, v16hi v32u16_1, v8si v32u32_1, v4di v32u64_1)
+{
+  v32u32_1 %= (v8si) v32u16_1 | 1;
+  v32u64_1[1] |= ((1));
+  v32u16_0 /= (v16hi){~u64_1, 1, 0xb56c, 0xd279, 0x26b6, 0x74d9, 0xf764, 0, 0, -v32u16_1[6]} | 1;
+  v32u16_1 ^= (v16hi){0xc98d, 1, 0x8c71, u32_0, 0x5366, 0, ~v32u64_1[1]} & 31;
+  v32u32_0 -= (v8si)~v32u64_1;
+  v32u32_1[2] |= 0x1f;
+  v32u16_0 %= (v16hi){2, 0xffff, u32_0, 1, v32u64_0[1], u32_0 };
+  v32u32_1 /= (v8si){0x1e7390, v32u16_0[12], ~v32u16_1[2], -u64_0};
+  return v32u16_0[4] + v32u16_0[5] + v32u32_0[5] + v32u32_1[6] + v32u64_1[3];
+}
Index: gcc/testsuite/gcc.target/i386/pr70083.c
===================================================================
--- gcc/testsuite/gcc.target/i386/pr70083.c	(revision 0)
+++ gcc/testsuite/gcc.target/i386/pr70083.c	(working copy)
@@ -0,0 +1,20 @@
+/* { dg-do compile } */
+/* { dg-options "-Wno-psabi -O2 -fno-dce -fschedule-insns -fno-sched-critical-path-heuristic -mavx512dq --param=max-cse-insns=1" } */
+
+typedef short v16hi __attribute__ ((vector_size (32)));
+typedef int v8si __attribute__ ((vector_size (32)));
+typedef long long v4di __attribute__ ((vector_size (32)));
+
+int
+foo(int u32_0, int u64_0, int u64_1, v16hi v32u16_0, v8si v32u32_0, v4di v32u64_0, v16hi v32u16_1, v8si v32u32_1, v4di v32u64_1)
+{
+  v32u32_1 %= (v8si) v32u16_1 | 1;
+  v32u64_1[1] |= ((1));
+  v32u16_0 /= (v16hi){~u64_1, 1, 0xb56c, 0xd279, 0x26b6, 0x74d9, 0xf764, 0, 0, -v32u16_1[6]} | 1;
+  v32u16_1 ^= (v16hi){0xc98d, 1, 0x8c71, u32_0, 0x5366, 0, ~v32u64_1[1]} & 31;
+  v32u32_0 -= (v8si)~v32u64_1;
+  v32u32_1[2] |= 0x1f;
+  v32u16_0 %= (v16hi){2, 0xffff, u32_0, 1, v32u64_0[1], u32_0 };
+  v32u32_1 /= (v8si){0x1e7390, v32u16_0[12], ~v32u16_1[2], -u64_0};
+  return v32u16_0[4] + v32u16_0[5] + v32u32_0[5] + v32u32_1[6] + v32u64_1[3];
+}

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: Fix 70083, lra-induced crash
  2016-03-10  8:26 ` Fix 70083, lra-induced crash Bernd Schmidt
@ 2016-03-10 17:56   ` Vladimir Makarov
  0 siblings, 0 replies; 2+ messages in thread
From: Vladimir Makarov @ 2016-03-10 17:56 UTC (permalink / raw)
  To: Bernd Schmidt, GCC Patches

On 03/10/2016 03:26 AM, Bernd Schmidt wrote:
> This crash happens because LRA tries to save an AVX hard reg in a 
> large mode, and it only appears in the function in smaller modes. 
> Stack alignment isn't set up to support the larger mode.
>
> Currently, biggest_mode for hard registers is set up from 
> regno_reg_rtx, set up to a large mode for argument regs. That mode is 
> not necessarily seen in the function itself and may be too large. If 
> that initialization is changed to use VOIDmode, we compute the correct 
> value during lra_push_insns, but then subsequently we clear it to 
> VOIDmode again, and it never seems to get updated. Hence, the patch 
> has several parts: initialize hard reg biggest_mode with VOIDmode, 
> ensure it gets updated during process_bb_lives, and use the value in 
> split_reg.
>
> Bootstrapped and tested on x86_64-linux, ok?
>
Yes.  Thank you, Bernd.

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2016-03-10 17:56 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <56E12ADA.5060400@t-online.de>
2016-03-10  8:26 ` Fix 70083, lra-induced crash Bernd Schmidt
2016-03-10 17:56   ` Vladimir Makarov

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).