public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Pat Haugen <pthaugen@linux.vnet.ibm.com>
To: gcc-patches@gcc.gnu.org
Cc: David Edelsohn <dje.gcc@gmail.com>
Subject: [PATCH] Bump alignment for small loops on PowerPC
Date: Fri, 22 Oct 2010 21:12:00 -0000	[thread overview]
Message-ID: <4CC1F004.2000505@linux.vnet.ibm.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 1090 bytes --]

The following patch increases the alignment for certain small loops (5-8 
insns) on PowerPC for improved performance. Small loops can benefit by 
being contained within a single 32-byte icache sector. Spec testing was 
neutral, but it should eliminate variations in performance I have seen 
in the past due to whether one of these loops crossed a 32-byte boundary.

Bootstrap/regtested on powerpc64-linux with no new regressions.  OK for 
trunk?


2010-10-22  Pat Haugen <pthaugen@us.ibm.com>

     * final.c (compute_alignments): Compute/free loop info all the time.
     * config/rs6000/rs6000.h (LOOP_ALIGN): Define.
     * config/rs6000/rs6000-protos.h (rs6000_loop_align): Declare.
     * config/rs6000/t-rs6000 (rs6000.o): Add cfgloop.h.
     * config/rs6000/rs6000.c (cfgloop.h): Include.
     (can_override_loop_align): New.
     (rs6000_option_override_internal): Set it.
     (TARGET_ASM_LOOP_ALIGN_MAX_SKIP): Define target hook.
     (rs6000_loop_align): New function.
     (rs6000_loop_align_max_skip): Likewise.
     * testsuite/gcc.target/powerpc/loop_align.c: New.




-Pat


[-- Attachment #2: loop-align.diff --]
[-- Type: text/plain, Size: 6107 bytes --]

Index: gcc/final.c
===================================================================
--- gcc/final.c	(revision 165738)
+++ gcc/final.c	(working copy)
@@ -715,8 +715,8 @@ compute_alignments (void)
     {
       dump_flow_info (dump_file, TDF_DETAILS);
       flow_loops_dump (dump_file, NULL, 1);
-      loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
     }
+  loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
   FOR_EACH_BB (bb)
     if (bb->frequency > freq_max)
       freq_max = bb->frequency;
@@ -808,11 +808,8 @@ compute_alignments (void)
       LABEL_TO_MAX_SKIP (label) = max_skip;
     }
 
-  if (dump_file)
-    {
-      loop_optimizer_finalize ();
-      free_dominance_info (CDI_DOMINATORS);
-    }
+  loop_optimizer_finalize ();
+  free_dominance_info (CDI_DOMINATORS);
   return 0;
 }
 
Index: gcc/config/rs6000/rs6000.h
===================================================================
--- gcc/config/rs6000/rs6000.h	(revision 165738)
+++ gcc/config/rs6000/rs6000.h	(working copy)
@@ -2388,6 +2388,9 @@ extern char rs6000_reg_names[][8];	/* re
   if ((LOG) != 0)			\
     fprintf (FILE, "\t.align %d\n", (LOG))
 
+/* How to align the given loop. */
+#define LOOP_ALIGN(LABEL)  rs6000_loop_align(LABEL)
+
 /* Pick up the return address upon entry to a procedure. Used for
    dwarf2 unwind information.  This also enables the table driven
    mechanism.  */
Index: gcc/config/rs6000/rs6000-protos.h
===================================================================
--- gcc/config/rs6000/rs6000-protos.h	(revision 165738)
+++ gcc/config/rs6000/rs6000-protos.h	(working copy)
@@ -131,6 +131,7 @@ extern rtx rs6000_machopic_legitimize_pi
 extern rtx rs6000_address_for_fpconvert (rtx);
 extern rtx rs6000_allocate_stack_temp (enum machine_mode, bool, bool);
 extern void rs6000_expand_convert_si_to_sfdf (rtx, rtx, bool);
+extern int rs6000_loop_align (rtx);
 #endif /* RTX_CODE */
 
 #ifdef TREE_CODE
Index: gcc/config/rs6000/t-rs6000
===================================================================
--- gcc/config/rs6000/t-rs6000	(revision 165738)
+++ gcc/config/rs6000/t-rs6000	(working copy)
@@ -27,7 +27,7 @@ rs6000.o: $(CONFIG_H) $(SYSTEM_H) corety
   $(OBSTACK_H) $(TREE_H) $(EXPR_H) $(OPTABS_H) except.h function.h \
   output.h $(BASIC_BLOCK_H) $(INTEGRATE_H) toplev.h $(GGC_H) $(HASHTAB_H) \
   $(TM_P_H) $(TARGET_H) $(TARGET_DEF_H) langhooks.h reload.h gt-rs6000.h \
-  cfglayout.h
+  cfglayout.h cfgloop.h
 
 rs6000-c.o: $(srcdir)/config/rs6000/rs6000-c.c \
     $(srcdir)/config/rs6000/rs6000-protos.h \
Index: gcc/config/rs6000/rs6000.c
===================================================================
--- gcc/config/rs6000/rs6000.c	(revision 165738)
+++ gcc/config/rs6000/rs6000.c	(working copy)
@@ -51,6 +51,7 @@
 #include "langhooks.h"
 #include "reload.h"
 #include "cfglayout.h"
+#include "cfgloop.h"
 #include "sched-int.h"
 #include "gimple.h"
 #include "tree-flow.h"
@@ -154,6 +155,9 @@ static GTY(()) bool rs6000_sched_groups;
 /* Align branch targets.  */
 static GTY(()) bool rs6000_align_branch_targets;
 
+/* Non-zero to allow overriding loop alignment. */
+static int can_override_loop_align = 0;
+
 /* Support for -msched-costly-dep option.  */
 const char *rs6000_sched_costly_dep_str;
 enum rs6000_dependence_cost rs6000_sched_costly_dep;
@@ -1139,6 +1143,7 @@ static void rs6000_option_override (void
 static void rs6000_option_init_struct (struct gcc_options *);
 static void rs6000_option_default_params (void);
 static bool rs6000_handle_option (size_t, const char *, int);
+static int rs6000_loop_align_max_skip (rtx);
 static void rs6000_parse_tls_size_option (void);
 static void rs6000_parse_yes_no_option (const char *, const char *, int *);
 static int first_altivec_reg_to_save (void);
@@ -1599,6 +1604,9 @@ static const struct attribute_spec rs600
 #undef TARGET_HANDLE_OPTION
 #define TARGET_HANDLE_OPTION rs6000_handle_option
 
+#undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
+#define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
+
 #undef TARGET_OPTION_OVERRIDE
 #define TARGET_OPTION_OVERRIDE rs6000_option_override
 
@@ -3025,7 +3033,10 @@ rs6000_option_override_internal (const c
 	  if (align_jumps <= 0)
 	    align_jumps = 16;
 	  if (align_loops <= 0)
-	    align_loops = 16;
+	    {
+	      can_override_loop_align = 1;
+	      align_loops = 16;
+	    }
 	}
       if (align_jumps_max_skip <= 0)
 	align_jumps_max_skip = 15;
@@ -3270,6 +3281,38 @@ rs6000_builtin_mask_for_load (void)
     return 0;
 }
 
+/* Implement LOOP_ALIGN. */
+int
+rs6000_loop_align (rtx label)
+{
+  basic_block bb;
+  int ninsns;
+
+  /* Don't override loop alignment if -falign-loops was specified. */
+  if (!can_override_loop_align)
+    return align_loops_log;
+
+  bb = BLOCK_FOR_INSN (label);
+  ninsns = num_loop_insns(bb->loop_father);
+
+  /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
+  if (ninsns > 4 && ninsns <= 8
+      && (rs6000_cpu == PROCESSOR_POWER4
+	  || rs6000_cpu == PROCESSOR_POWER5
+	  || rs6000_cpu == PROCESSOR_POWER6
+	  || rs6000_cpu == PROCESSOR_POWER7))
+    return 5;
+  else
+    return align_loops_log;
+}
+
+/* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
+static int
+rs6000_loop_align_max_skip (rtx label)
+{
+  return (1 << rs6000_loop_align (label)) - 1;
+}
+
 /* Implement targetm.vectorize.builtin_conversion.
    Returns a decl of a function that implements conversion of an integer vector
    into a floating-point vector, or vice-versa.  DEST_TYPE is the
Index: gcc/testsuite/gcc.target/powerpc/loop_align.c
===================================================================
--- gcc/testsuite/gcc.target/powerpc/loop_align.c	(revision 0)
+++ gcc/testsuite/gcc.target/powerpc/loop_align.c	(revision 0)
@@ -0,0 +1,10 @@
+/* { dg-do compile { target { powerpc*-*-* } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */
+/* { dg-options "-O2 -mcpu=power7 -falign-functions=16" } */
+/* { dg-final { scan-assembler ".p2align 5,,31" } } */
+
+void f(double *a, double *b, double *c, int n) {
+  int i;
+  for (i=0; i < n; i++)
+    a[i] = b[i] + c[i];
+}

             reply	other threads:[~2010-10-22 20:12 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-10-22 21:12 Pat Haugen [this message]
2010-10-23  6:58 ` Eric Botcazou
2010-11-02 19:20 ` David Edelsohn
     [not found] ` <4CD2FFF7.9080301@linux.vnet.ibm.com>
2010-11-04 19:53   ` [PING] " Richard Henderson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4CC1F004.2000505@linux.vnet.ibm.com \
    --to=pthaugen@linux.vnet.ibm.com \
    --cc=dje.gcc@gmail.com \
    --cc=gcc-patches@gcc.gnu.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).