public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Abe <abe_skolnik@yahoo.com>
To: "gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org>,
	 Sebastian Pop <sebpop@gmail.com>,
	Bernd Schmidt <bschmidt@redhat.com>,
	 Kyrill Tkachov <kyrylo.tkachov@arm.com>
Subject: using scratchpads to enhance RTL-level if-conversion: revised patch
Date: Wed, 07 Oct 2015 23:29:00 -0000	[thread overview]
Message-ID: <5615AADE.4030306@yahoo.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 903 bytes --]

Dear all,

Attached please find my revised patch to the RTL if converter.  This patch enables the
if-conversion of half-hammocks with a store in them that the internal GCC machinery
otherwise considers too hazardous to if-convert.  This is made safe by using the
"scratchpad" technique, i.e. throwing away the store into a safe location where nothing
of any importance is currently stored.  The scratchpads are allocated in the stack frame.

Here is an example of code which is newly converted with this patch,
at least when targeting AArch64:

   int A[10];

   void half_hammock() {
     if (A[0])
       A[1] = 2;
   }


Both tested against trunk and bootstrapped OK with defaults* on AMD64-AKA-"x86_64" GNU/Linux.

'*': [except for "--prefix"]


I`m sending the patch as an attachment to avoid it
being corrupted/reformatted by any e-mail troubles.

I look forward to your feedback.

Regards,

Abe


[-- Attachment #2: Abe_RTL_ifcvt_patch_1___2015_Oct._7_.patch --]
[-- Type: text/plain, Size: 7836 bytes --]

--- ifcvt.c	2015-09-01 12:54:38.234108158 -0500
+++ ifcvt.c	2015-10-07 11:14:11.606439645 -0500
@@ -47,6 +47,7 @@
 #include "insn-codes.h"
 #include "optabs.h"
 #include "diagnostic-core.h"
+#include "diagnostic-color.h"
 #include "tm_p.h"
 #include "cfgloop.h"
 #include "target.h"
@@ -56,6 +57,8 @@
 #include "rtl-iter.h"
 #include "ifcvt.h"
 
+#include <utility>
+
 #ifndef MAX_CONDITIONAL_EXECUTE
 #define MAX_CONDITIONAL_EXECUTE \
   (BRANCH_COST (optimize_function_for_speed_p (cfun), false) \
@@ -66,6 +69,9 @@
 
 #define NULL_BLOCK	((basic_block) NULL)
 
+/* An arbitrary inclusive maximum size (in bytes) for each scratchpad.  */
+#define SCRATCHPAD_MAX_SIZE 128
+
 /* True if after combine pass.  */
 static bool ifcvt_after_combine;
 
@@ -110,6 +116,8 @@ static int dead_or_predicable (basic_blo
 			       edge, int);
 static void noce_emit_move_insn (rtx, rtx);
 static rtx_insn *block_has_only_trap (basic_block);
+
+static auto_vec<std::pair<rtx, unsigned int> > scratchpads;
 \f
 /* Count the number of non-jump active insns in BB.  */
 
@@ -2784,19 +2792,16 @@ noce_operand_ok (const_rtx op)
   return ! may_trap_p (op);
 }
 
-/* Return true if a write into MEM may trap or fault.  */
+/* Return true if a write into MEM may trap or fault
+   even in the presence of scratchpad support.  */
 
 static bool
-noce_mem_write_may_trap_or_fault_p (const_rtx mem)
+noce_mem_write_may_trap_or_fault_p_1 (const_rtx mem)
 {
-  rtx addr;
-
   if (MEM_READONLY_P (mem))
     return true;
 
-  if (may_trap_or_fault_p (mem))
-    return true;
-
+  rtx addr;
   addr = XEXP (mem, 0);
 
   /* Call target hook to avoid the effects of -fpic etc....  */
@@ -2837,6 +2842,18 @@ noce_mem_write_may_trap_or_fault_p (cons
   return false;
 }
 
+/* Return true if a write into MEM may trap or fault
+   without scratchpad support.  */
+
+static bool
+noce_mem_write_may_trap_or_fault_p (const_rtx mem)
+{
+  if (may_trap_or_fault_p (mem))
+    return true;
+
+  return noce_mem_write_may_trap_or_fault_p_1 (mem);
+}
+
 /* Return whether we can use store speculation for MEM.  TOP_BB is the
    basic block above the conditional block where we are considering
    doing the speculative store.  We look for whether MEM is set
@@ -3156,17 +3173,149 @@ noce_process_if_block (struct noce_if_in
 
   if (!set_b && MEM_P (orig_x))
     {
-      /* Disallow the "if (...) x = a;" form (implicit "else x = x;")
-	 for optimizations if writing to x may trap or fault,
-	 i.e. it's a memory other than a static var or a stack slot,
-	 is misaligned on strict aligned machines or is read-only.  If
-	 x is a read-only memory, then the program is valid only if we
-	 avoid the store into it.  If there are stores on both the
-	 THEN and ELSE arms, then we can go ahead with the conversion;
-	 either the program is broken, or the condition is always
-	 false such that the other memory is selected.  */
+      /* Disallow the "if (...) x = a;" form (with no "else") for optimizations
+	 when x is misaligned on strict-alignment machines or is read-only.
+	 If x is a memory other than a static var or a stack slot: for targets
+	 _with_ conditional move and _without_ conditional execution,
+	 convert using the scratchpad technique, otherwise don`t convert.
+	 If x is a read-only memory, then the program is valid only if we avoid
+	 the store into it.  If there are stores on both the THEN and ELSE arms,
+	 then we can go ahead with the conversion; either the program is broken,
+	 or the condition is always false such that the other memory is selected.
+	 The non-scratchpad-based conversion here has an implicit "else x = x;".  */
       if (noce_mem_write_may_trap_or_fault_p (orig_x))
-	return FALSE;
+	{
+	  /* The next "if": quoting "noce_emit_cmove":
+	       If we can't create new pseudos, though, don't bother.  */
+	  if (reload_completed)
+	    return FALSE;
+
+	  if (optimize<2)
+	    return FALSE;
+
+	  if (optimize_function_for_size_p (cfun))
+	    return FALSE;
+
+	  if (targetm.have_conditional_execution () || ! HAVE_conditional_move)
+	    return FALSE;
+
+	  const bool not_a_scratchpad_candidate =
+	    noce_mem_write_may_trap_or_fault_p_1 (orig_x);
+
+	  if (! not_a_scratchpad_candidate)
+	  {
+	    if (MEM_SIZE_KNOWN_P (orig_x))
+	    {
+	      const size_t size_of_MEM = MEM_SIZE (orig_x);
+
+	      if (size_of_MEM <= SCRATCHPAD_MAX_SIZE)
+	      {
+		rtx biggest_scratchpad = 0;
+		unsigned int biggest_scratchpad_size = 0;
+		if (size_t vec_len = scratchpads.length ())
+		  {
+		    std::pair<rtx, unsigned> tmp_pair = scratchpads[vec_len-1];
+		    biggest_scratchpad = tmp_pair.first;
+		    biggest_scratchpad_size = tmp_pair.second;
+		  }
+		if (size_of_MEM > biggest_scratchpad_size)
+		{
+		  biggest_scratchpad_size = size_of_MEM;
+		  biggest_scratchpad = assign_stack_local
+		    (GET_MODE (orig_x), size_of_MEM, 0);
+		  gcc_assert (biggest_scratchpad);
+		  scratchpads.safe_push (std::make_pair (biggest_scratchpad,
+							 size_of_MEM));
+		}
+
+		gcc_assert (biggest_scratchpad);
+
+		rtx reg_for_store_addr = gen_reg_rtx (Pmode);
+		set_used_flags (reg_for_store_addr);
+
+		start_sequence ();
+
+		/* We must copy the insns between the start of the THEN block
+		   and the set of 'a', if they exist, since they may be needed
+		   for the converted code as well, but we must not copy a
+		   start-of-BB note if one is present, nor debug "insn"s.  */
+
+		for (rtx_insn* insn = BB_HEAD (then_bb); insn && insn != insn_a
+		  && insn != BB_END (then_bb); insn=NEXT_INSN (insn))
+		  {
+		     if (! (NOTE_INSN_BASIC_BLOCK_P (insn) || DEBUG_INSN_P (insn)))
+		       duplicate_insn_chain (insn, insn);
+		       /* A return of 0 from "duplicate_insn_chain" is _not_
+			  a failure; it just returns the "NEXT_INSN" of the
+			  last insn it duplicated.  */
+		  }
+
+		/* Done copying the needed insns between the start of the
+		   THEN block and the set of 'a', if any.  */
+
+		if (CONSTANT_P (XEXP (cond, 0)) && CONSTANT_P (XEXP (cond, 1)))
+		  {
+		    end_sequence ();
+		    return FALSE;
+		  }
+
+		rtx target = noce_emit_cmove (if_info,
+					      reg_for_store_addr,
+					      GET_CODE (cond),
+					      XEXP (cond, 0),
+					      XEXP (cond, 1),
+					      XEXP (orig_x, 0),
+					      XEXP (biggest_scratchpad, 0));
+
+		if (!target)
+		  {
+		    end_sequence ();
+		    return FALSE;
+		  }
+		if (target != reg_for_store_addr)
+		  noce_emit_move_insn (reg_for_store_addr, target);
+
+		rtx mem = gen_rtx_MEM (GET_MODE (orig_x), reg_for_store_addr);
+		MEM_NOTRAP_P (mem) = true;
+		MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_x);
+
+		alias_set_type temp_alias_set = new_alias_set ();
+		if (MEM_ALIAS_SET (orig_x))
+		  record_alias_subset (MEM_ALIAS_SET (orig_x), temp_alias_set);
+		set_mem_alias_set (mem, temp_alias_set);
+
+		set_mem_align (mem,
+		  MIN (MEM_ALIGN (biggest_scratchpad), MEM_ALIGN (orig_x)));
+		if (MEM_ADDR_SPACE (orig_x)
+		 != MEM_ADDR_SPACE (biggest_scratchpad))
+		  {
+		    end_sequence ();
+		    return FALSE;
+		  }
+
+		set_used_flags (mem);
+
+		noce_emit_move_insn (mem, a);
+
+		rtx_insn *seq = end_ifcvt_sequence (if_info);
+		if (!seq)
+		  return FALSE;
+
+		unshare_all_rtl_in_chain (seq);
+
+		/* Prevent the code right after "success:"
+		   from throwing away the changes.  */
+		x = orig_x;
+
+		emit_insn_before_setloc (seq, if_info->jump,
+					 INSN_LOCATION (if_info->insn_a));
+		goto success;
+
+	      }
+	    }
+	  }
+	  return FALSE;
+	}
 
       /* Avoid store speculation: given "if (...) x = a" where x is a
 	 MEM, we only want to do the store if x is always set
@@ -4959,6 +5108,9 @@ if_convert (bool after_combine)
   basic_block bb;
   int pass;
 
+  /* Ensure that we start the scratchpads data fresh each time.  */
+  scratchpads.truncate (0);
+
   if (optimize == 1)
     {
       df_live_add_problem ();

             reply	other threads:[~2015-10-07 23:29 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-10-07 23:29 Abe [this message]
2015-10-08 13:09 ` Sebastian Pop
2015-10-08 13:20   ` Sebastian Pop
2015-10-08 13:26     ` Bernd Schmidt
2015-10-08 13:23 ` Bernd Schmidt
2015-10-13 19:34   ` Abe
2015-10-13 20:16     ` Bernd Schmidt
2015-10-14 17:43       ` Jeff Law
2015-10-14 19:15         ` Bernd Schmidt
2015-10-15  8:52           ` Richard Biener
2015-10-20  5:52           ` Jeff Law
2015-10-20  9:37             ` Richard Biener
2015-10-14  1:05   ` Richard Henderson
2015-10-14  1:11     ` Richard Henderson
2015-10-14  8:29     ` Eric Botcazou
2015-10-14 17:46       ` Jeff Law
2015-10-13 20:05 Abe
     [not found] <024301d11106$2379b5f0$6a6d21d0$@samsung.com>
2015-10-27 23:02 ` Abe
2015-10-30 14:09   ` Bernd Schmidt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5615AADE.4030306@yahoo.com \
    --to=abe_skolnik@yahoo.com \
    --cc=bschmidt@redhat.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=kyrylo.tkachov@arm.com \
    --cc=sebpop@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).