public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* using scratchpads to enhance RTL-level if-conversion: revised patch
@ 2015-10-13 20:05 Abe
  0 siblings, 0 replies; 19+ messages in thread
From: Abe @ 2015-10-13 20:05 UTC (permalink / raw)
  To: gcc-patches, Sebastian Pop, Kyrill Tkachov, Bernd Schmidt

[-- Attachment #1: Type: text/plain, Size: 215 bytes --]

Attached please find my revised patch to enhance RTL-level if-conversion
using scratchpads, newly optimizing only half-hammock stores for now.

Bootstrapped and regression-tested on x86_64 GNU/Linux.

Regards,

Abe

[-- Attachment #2: Abe_RTL_ifcvt_patch_1_v007.2___2015_Oct._13_.patch --]
[-- Type: text/plain, Size: 6705 bytes --]

--- ifcvt.c	2015-09-01 12:54:38.234108158 -0500
+++ ifcvt.c	2015-10-13 14:22:27.935204461 -0500
@@ -56,6 +56,8 @@
 #include "rtl-iter.h"
 #include "ifcvt.h"
 
+#include <utility>
+
 #ifndef MAX_CONDITIONAL_EXECUTE
 #define MAX_CONDITIONAL_EXECUTE \
   (BRANCH_COST (optimize_function_for_speed_p (cfun), false) \
@@ -66,6 +68,9 @@
 
 #define NULL_BLOCK	((basic_block) NULL)
 
+/* An arbitrary inclusive maximum size (in bytes) for each scratchpad.  */
+#define SCRATCHPAD_MAX_SIZE 128
+
 /* True if after combine pass.  */
 static bool ifcvt_after_combine;
 
@@ -110,6 +115,8 @@ static int dead_or_predicable (basic_blo
 			       edge, int);
 static void noce_emit_move_insn (rtx, rtx);
 static rtx_insn *block_has_only_trap (basic_block);
+
+static auto_vec<std::pair<rtx, unsigned int> > scratchpads;
 \f
 /* Count the number of non-jump active insns in BB.  */
 
@@ -2784,19 +2791,16 @@ noce_operand_ok (const_rtx op)
   return ! may_trap_p (op);
 }
 
-/* Return true if a write into MEM may trap or fault.  */
+/* Return true if a write into MEM may trap or fault
+   even in the presence of scratchpad support.  */
 
 static bool
-noce_mem_write_may_trap_or_fault_p (const_rtx mem)
+noce_mem_write_may_trap_or_fault_p_1 (const_rtx mem)
 {
-  rtx addr;
-
   if (MEM_READONLY_P (mem))
     return true;
 
-  if (may_trap_or_fault_p (mem))
-    return true;
-
+  rtx addr;
   addr = XEXP (mem, 0);
 
   /* Call target hook to avoid the effects of -fpic etc....  */
@@ -2837,6 +2841,18 @@ noce_mem_write_may_trap_or_fault_p (cons
   return false;
 }
 
+/* Return true if a write into MEM may trap or fault
+   without scratchpad support.  */
+
+static bool
+noce_mem_write_may_trap_or_fault_p (const_rtx mem)
+{
+  if (may_trap_or_fault_p (mem))
+    return true;
+
+  return noce_mem_write_may_trap_or_fault_p_1 (mem);
+}
+
 /* Return whether we can use store speculation for MEM.  TOP_BB is the
    basic block above the conditional block where we are considering
    doing the speculative store.  We look for whether MEM is set
@@ -3156,17 +3172,116 @@ noce_process_if_block (struct noce_if_in
 
   if (!set_b && MEM_P (orig_x))
     {
-      /* Disallow the "if (...) x = a;" form (implicit "else x = x;")
-	 for optimizations if writing to x may trap or fault,
-	 i.e. it's a memory other than a static var or a stack slot,
-	 is misaligned on strict aligned machines or is read-only.  If
-	 x is a read-only memory, then the program is valid only if we
-	 avoid the store into it.  If there are stores on both the
-	 THEN and ELSE arms, then we can go ahead with the conversion;
-	 either the program is broken, or the condition is always
-	 false such that the other memory is selected.  */
+      /* Disallow the "if (...) x = a;" form (with no "else") for optimizations
+	 when x is misaligned on strict-alignment machines or is read-only.
+	 If x is a memory other than a static var or a stack slot: for targets
+	 _with_ conditional move and _without_ conditional execution,
+	 convert using the scratchpad technique, otherwise don`t convert.
+	 If x is a read-only memory, then the program is valid only if we avoid
+	 the store into it.  If there are stores on both the THEN and ELSE arms,
+	 then we can go ahead with the conversion; either the program is broken,
+	 or the condition is always false such that the other memory is selected.
+	 The non-scratchpad-based conversion here has an implicit "else x = x;".  */
       if (noce_mem_write_may_trap_or_fault_p (orig_x))
-	return FALSE;
+	{
+	  if (   reload_completed
+	      || optimize < 2
+	      || optimize_function_for_size_p (cfun)
+	      || targetm.have_conditional_execution ()
+	      || !HAVE_conditional_move
+	      || (CONSTANT_P (XEXP (cond, 0)) && CONSTANT_P (XEXP (cond, 1))))
+	    return FALSE;
+
+
+	  if (noce_mem_write_may_trap_or_fault_p_1 (orig_x)
+	      || !MEM_SIZE_KNOWN_P (orig_x))
+	    return FALSE;
+
+	  const size_t MEM_size = MEM_SIZE (orig_x);
+	  if (MEM_size > SCRATCHPAD_MAX_SIZE)
+	    return FALSE;
+
+	  rtx biggest_spad = 0;
+	  unsigned int biggest_spad_size = 0;
+	  if (size_t vec_len = scratchpads.length ())
+	    {
+	      std::pair<rtx, unsigned> tmp_pair = scratchpads[vec_len - 1];
+	      biggest_spad = tmp_pair.first;
+	      biggest_spad_size = tmp_pair.second;
+	    }
+	  if (MEM_size > biggest_spad_size)
+	    {
+	      biggest_spad_size = MEM_size;
+	      biggest_spad = assign_stack_local (GET_MODE (orig_x), MEM_size, 0);
+	      gcc_assert (biggest_spad);
+	      scratchpads.safe_push (std::make_pair (biggest_spad, MEM_size));
+	    }
+
+	  gcc_assert (biggest_spad);
+
+	  rtx reg_for_store_addr = gen_reg_rtx (Pmode);
+	  set_used_flags (reg_for_store_addr);
+
+	  start_sequence ();
+
+	  for (rtx_insn *insn = BB_HEAD (then_bb);
+	       insn && insn != insn_a && insn != BB_END (then_bb);
+	       insn = NEXT_INSN (insn))
+	    if (!(NOTE_INSN_BASIC_BLOCK_P (insn) || DEBUG_INSN_P (insn)))
+	      duplicate_insn_chain (insn, insn);
+
+	  rtx target = noce_emit_cmove (if_info,
+					reg_for_store_addr,
+					GET_CODE (cond),
+					XEXP (cond, 0),
+					XEXP (cond, 1),
+					XEXP (orig_x, 0),
+					XEXP (biggest_spad, 0));
+
+	  if (!target)
+	    {
+	      end_sequence ();
+	      return FALSE;
+	    }
+	  if (target != reg_for_store_addr)
+	    noce_emit_move_insn (reg_for_store_addr, target);
+
+	  rtx mem = gen_rtx_MEM (GET_MODE (orig_x), reg_for_store_addr);
+	  MEM_NOTRAP_P (mem) = true;
+	  MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_x);
+
+	  alias_set_type temp_alias_set = new_alias_set ();
+	  if (MEM_ALIAS_SET (orig_x))
+	    record_alias_subset (MEM_ALIAS_SET (orig_x), temp_alias_set);
+	  set_mem_alias_set (mem, temp_alias_set);
+
+	  set_mem_align (mem, MIN (MEM_ALIGN (biggest_spad),
+				   MEM_ALIGN (orig_x)));
+	  if (MEM_ADDR_SPACE (orig_x) != MEM_ADDR_SPACE (biggest_spad))
+	    {
+	      end_sequence ();
+	      return FALSE;
+	    }
+
+	  set_used_flags (mem);
+
+	  noce_emit_move_insn (mem, a);
+
+	  rtx_insn *seq = end_ifcvt_sequence (if_info);
+	  if (!seq)
+	    return FALSE;
+
+	  unshare_all_rtl_in_chain (seq);
+
+	  /* Prevent the code right after "success:"
+	     from throwing away the changes.  */
+	  x = orig_x;
+
+	  emit_insn_before_setloc (seq, if_info->jump,
+				   INSN_LOCATION (if_info->insn_a));
+	  goto success;
+
+	}
 
       /* Avoid store speculation: given "if (...) x = a" where x is a
 	 MEM, we only want to do the store if x is always set
@@ -4959,6 +5074,9 @@ if_convert (bool after_combine)
   basic_block bb;
   int pass;
 
+  /* Ensure that we start the scratchpads data fresh each time.  */
+  scratchpads.truncate (0);
+
   if (optimize == 1)
     {
       df_live_add_problem ();

^ permalink raw reply	[flat|nested] 19+ messages in thread
* using scratchpads to enhance RTL-level if-conversion: revised patch
@ 2015-10-07 23:29 Abe
  2015-10-08 13:09 ` Sebastian Pop
  2015-10-08 13:23 ` Bernd Schmidt
  0 siblings, 2 replies; 19+ messages in thread
From: Abe @ 2015-10-07 23:29 UTC (permalink / raw)
  To: gcc-patches, Sebastian Pop, Bernd Schmidt, Kyrill Tkachov

[-- Attachment #1: Type: text/plain, Size: 903 bytes --]

Dear all,

Attached please find my revised patch to the RTL if converter.  This patch enables the
if-conversion of half-hammocks with a store in them that the internal GCC machinery
otherwise considers too hazardous to if-convert.  This is made safe by using the
"scratchpad" technique, i.e. throwing away the store into a safe location where nothing
of any importance is currently stored.  The scratchpads are allocated in the stack frame.

Here is an example of code which is newly converted with this patch,
at least when targeting AArch64:

   int A[10];

   void half_hammock() {
     if (A[0])
       A[1] = 2;
   }


Both tested against trunk and bootstrapped OK with defaults* on AMD64-AKA-"x86_64" GNU/Linux.

'*': [except for "--prefix"]


I`m sending the patch as an attachment to avoid it
being corrupted/reformatted by any e-mail troubles.

I look forward to your feedback.

Regards,

Abe


[-- Attachment #2: Abe_RTL_ifcvt_patch_1___2015_Oct._7_.patch --]
[-- Type: text/plain, Size: 7836 bytes --]

--- ifcvt.c	2015-09-01 12:54:38.234108158 -0500
+++ ifcvt.c	2015-10-07 11:14:11.606439645 -0500
@@ -47,6 +47,7 @@
 #include "insn-codes.h"
 #include "optabs.h"
 #include "diagnostic-core.h"
+#include "diagnostic-color.h"
 #include "tm_p.h"
 #include "cfgloop.h"
 #include "target.h"
@@ -56,6 +57,8 @@
 #include "rtl-iter.h"
 #include "ifcvt.h"
 
+#include <utility>
+
 #ifndef MAX_CONDITIONAL_EXECUTE
 #define MAX_CONDITIONAL_EXECUTE \
   (BRANCH_COST (optimize_function_for_speed_p (cfun), false) \
@@ -66,6 +69,9 @@
 
 #define NULL_BLOCK	((basic_block) NULL)
 
+/* An arbitrary inclusive maximum size (in bytes) for each scratchpad.  */
+#define SCRATCHPAD_MAX_SIZE 128
+
 /* True if after combine pass.  */
 static bool ifcvt_after_combine;
 
@@ -110,6 +116,8 @@ static int dead_or_predicable (basic_blo
 			       edge, int);
 static void noce_emit_move_insn (rtx, rtx);
 static rtx_insn *block_has_only_trap (basic_block);
+
+static auto_vec<std::pair<rtx, unsigned int> > scratchpads;
 \f
 /* Count the number of non-jump active insns in BB.  */
 
@@ -2784,19 +2792,16 @@ noce_operand_ok (const_rtx op)
   return ! may_trap_p (op);
 }
 
-/* Return true if a write into MEM may trap or fault.  */
+/* Return true if a write into MEM may trap or fault
+   even in the presence of scratchpad support.  */
 
 static bool
-noce_mem_write_may_trap_or_fault_p (const_rtx mem)
+noce_mem_write_may_trap_or_fault_p_1 (const_rtx mem)
 {
-  rtx addr;
-
   if (MEM_READONLY_P (mem))
     return true;
 
-  if (may_trap_or_fault_p (mem))
-    return true;
-
+  rtx addr;
   addr = XEXP (mem, 0);
 
   /* Call target hook to avoid the effects of -fpic etc....  */
@@ -2837,6 +2842,18 @@ noce_mem_write_may_trap_or_fault_p (cons
   return false;
 }
 
+/* Return true if a write into MEM may trap or fault
+   without scratchpad support.  */
+
+static bool
+noce_mem_write_may_trap_or_fault_p (const_rtx mem)
+{
+  if (may_trap_or_fault_p (mem))
+    return true;
+
+  return noce_mem_write_may_trap_or_fault_p_1 (mem);
+}
+
 /* Return whether we can use store speculation for MEM.  TOP_BB is the
    basic block above the conditional block where we are considering
    doing the speculative store.  We look for whether MEM is set
@@ -3156,17 +3173,149 @@ noce_process_if_block (struct noce_if_in
 
   if (!set_b && MEM_P (orig_x))
     {
-      /* Disallow the "if (...) x = a;" form (implicit "else x = x;")
-	 for optimizations if writing to x may trap or fault,
-	 i.e. it's a memory other than a static var or a stack slot,
-	 is misaligned on strict aligned machines or is read-only.  If
-	 x is a read-only memory, then the program is valid only if we
-	 avoid the store into it.  If there are stores on both the
-	 THEN and ELSE arms, then we can go ahead with the conversion;
-	 either the program is broken, or the condition is always
-	 false such that the other memory is selected.  */
+      /* Disallow the "if (...) x = a;" form (with no "else") for optimizations
+	 when x is misaligned on strict-alignment machines or is read-only.
+	 If x is a memory other than a static var or a stack slot: for targets
+	 _with_ conditional move and _without_ conditional execution,
+	 convert using the scratchpad technique, otherwise don`t convert.
+	 If x is a read-only memory, then the program is valid only if we avoid
+	 the store into it.  If there are stores on both the THEN and ELSE arms,
+	 then we can go ahead with the conversion; either the program is broken,
+	 or the condition is always false such that the other memory is selected.
+	 The non-scratchpad-based conversion here has an implicit "else x = x;".  */
       if (noce_mem_write_may_trap_or_fault_p (orig_x))
-	return FALSE;
+	{
+	  /* The next "if": quoting "noce_emit_cmove":
+	       If we can't create new pseudos, though, don't bother.  */
+	  if (reload_completed)
+	    return FALSE;
+
+	  if (optimize<2)
+	    return FALSE;
+
+	  if (optimize_function_for_size_p (cfun))
+	    return FALSE;
+
+	  if (targetm.have_conditional_execution () || ! HAVE_conditional_move)
+	    return FALSE;
+
+	  const bool not_a_scratchpad_candidate =
+	    noce_mem_write_may_trap_or_fault_p_1 (orig_x);
+
+	  if (! not_a_scratchpad_candidate)
+	  {
+	    if (MEM_SIZE_KNOWN_P (orig_x))
+	    {
+	      const size_t size_of_MEM = MEM_SIZE (orig_x);
+
+	      if (size_of_MEM <= SCRATCHPAD_MAX_SIZE)
+	      {
+		rtx biggest_scratchpad = 0;
+		unsigned int biggest_scratchpad_size = 0;
+		if (size_t vec_len = scratchpads.length ())
+		  {
+		    std::pair<rtx, unsigned> tmp_pair = scratchpads[vec_len-1];
+		    biggest_scratchpad = tmp_pair.first;
+		    biggest_scratchpad_size = tmp_pair.second;
+		  }
+		if (size_of_MEM > biggest_scratchpad_size)
+		{
+		  biggest_scratchpad_size = size_of_MEM;
+		  biggest_scratchpad = assign_stack_local
+		    (GET_MODE (orig_x), size_of_MEM, 0);
+		  gcc_assert (biggest_scratchpad);
+		  scratchpads.safe_push (std::make_pair (biggest_scratchpad,
+							 size_of_MEM));
+		}
+
+		gcc_assert (biggest_scratchpad);
+
+		rtx reg_for_store_addr = gen_reg_rtx (Pmode);
+		set_used_flags (reg_for_store_addr);
+
+		start_sequence ();
+
+		/* We must copy the insns between the start of the THEN block
+		   and the set of 'a', if they exist, since they may be needed
+		   for the converted code as well, but we must not copy a
+		   start-of-BB note if one is present, nor debug "insn"s.  */
+
+		for (rtx_insn* insn = BB_HEAD (then_bb); insn && insn != insn_a
+		  && insn != BB_END (then_bb); insn=NEXT_INSN (insn))
+		  {
+		     if (! (NOTE_INSN_BASIC_BLOCK_P (insn) || DEBUG_INSN_P (insn)))
+		       duplicate_insn_chain (insn, insn);
+		       /* A return of 0 from "duplicate_insn_chain" is _not_
+			  a failure; it just returns the "NEXT_INSN" of the
+			  last insn it duplicated.  */
+		  }
+
+		/* Done copying the needed insns between the start of the
+		   THEN block and the set of 'a', if any.  */
+
+		if (CONSTANT_P (XEXP (cond, 0)) && CONSTANT_P (XEXP (cond, 1)))
+		  {
+		    end_sequence ();
+		    return FALSE;
+		  }
+
+		rtx target = noce_emit_cmove (if_info,
+					      reg_for_store_addr,
+					      GET_CODE (cond),
+					      XEXP (cond, 0),
+					      XEXP (cond, 1),
+					      XEXP (orig_x, 0),
+					      XEXP (biggest_scratchpad, 0));
+
+		if (!target)
+		  {
+		    end_sequence ();
+		    return FALSE;
+		  }
+		if (target != reg_for_store_addr)
+		  noce_emit_move_insn (reg_for_store_addr, target);
+
+		rtx mem = gen_rtx_MEM (GET_MODE (orig_x), reg_for_store_addr);
+		MEM_NOTRAP_P (mem) = true;
+		MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_x);
+
+		alias_set_type temp_alias_set = new_alias_set ();
+		if (MEM_ALIAS_SET (orig_x))
+		  record_alias_subset (MEM_ALIAS_SET (orig_x), temp_alias_set);
+		set_mem_alias_set (mem, temp_alias_set);
+
+		set_mem_align (mem,
+		  MIN (MEM_ALIGN (biggest_scratchpad), MEM_ALIGN (orig_x)));
+		if (MEM_ADDR_SPACE (orig_x)
+		 != MEM_ADDR_SPACE (biggest_scratchpad))
+		  {
+		    end_sequence ();
+		    return FALSE;
+		  }
+
+		set_used_flags (mem);
+
+		noce_emit_move_insn (mem, a);
+
+		rtx_insn *seq = end_ifcvt_sequence (if_info);
+		if (!seq)
+		  return FALSE;
+
+		unshare_all_rtl_in_chain (seq);
+
+		/* Prevent the code right after "success:"
+		   from throwing away the changes.  */
+		x = orig_x;
+
+		emit_insn_before_setloc (seq, if_info->jump,
+					 INSN_LOCATION (if_info->insn_a));
+		goto success;
+
+	      }
+	    }
+	  }
+	  return FALSE;
+	}
 
       /* Avoid store speculation: given "if (...) x = a" where x is a
 	 MEM, we only want to do the store if x is always set
@@ -4959,6 +5108,9 @@ if_convert (bool after_combine)
   basic_block bb;
   int pass;
 
+  /* Ensure that we start the scratchpads data fresh each time.  */
+  scratchpads.truncate (0);
+
   if (optimize == 1)
     {
       df_live_add_problem ();

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2015-10-30 14:08 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <024301d11106$2379b5f0$6a6d21d0$@samsung.com>
2015-10-27 23:02 ` using scratchpads to enhance RTL-level if-conversion: revised patch Abe
2015-10-30 14:09   ` Bernd Schmidt
2015-10-13 20:05 Abe
  -- strict thread matches above, loose matches on Subject: below --
2015-10-07 23:29 Abe
2015-10-08 13:09 ` Sebastian Pop
2015-10-08 13:20   ` Sebastian Pop
2015-10-08 13:26     ` Bernd Schmidt
2015-10-08 13:23 ` Bernd Schmidt
2015-10-13 19:34   ` Abe
2015-10-13 20:16     ` Bernd Schmidt
2015-10-14 17:43       ` Jeff Law
2015-10-14 19:15         ` Bernd Schmidt
2015-10-15  8:52           ` Richard Biener
2015-10-20  5:52           ` Jeff Law
2015-10-20  9:37             ` Richard Biener
2015-10-14  1:05   ` Richard Henderson
2015-10-14  1:11     ` Richard Henderson
2015-10-14  8:29     ` Eric Botcazou
2015-10-14 17:46       ` Jeff Law

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).