public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH RESEND 1/2] Temporary remove "at least 8 byte alignment" code from x86
@ 2016-09-26 19:11 Denys Vlasenko
  2016-09-26 19:19 ` [PATCH RESEND 2/2] Extend -falign-FOO=N to N[,M[,N2[,M2]]] Denys Vlasenko
  0 siblings, 1 reply; 4+ messages in thread
From: Denys Vlasenko @ 2016-09-26 19:11 UTC (permalink / raw)
  To: gcc-patches; +Cc: Denys Vlasenko, Andrew Pinski

This change drops forced alignment to 8 if requested alignment is higher
than 8: before the patch, -falign-functions=9 was generating

        .p2align 4,,8
        .p2align 3

which means: "align to 16 if the skip is 8 bytes or less; else align to 8".
After this change, ".p2align 3" is not emitted.

This behavior will be implemented differently by the next patch.

2016-09-26  Denys Vlasenko  <dvlasenk@redhat.com>

    * config/i386/freebsd.h (ASM_OUTPUT_MAX_SKIP_ALIGN): Remove "If N
    is large, do at least 8 byte alignment" code.
    * config/i386/gnu-user.h (ASM_OUTPUT_MAX_SKIP_ALIGN): Likewise.
    * config/i386/iamcu.h (ASM_OUTPUT_MAX_SKIP_ALIGN): Likewise.
    * config/i386/openbsdelf.h (ASM_OUTPUT_MAX_SKIP_ALIGN): Likewise.
    * config/i386/x86-64.h (ASM_OUTPUT_MAX_SKIP_ALIGN): Likewise.

Index: gcc/config/i386/freebsd.h
===================================================================
--- gcc/config/i386/freebsd.h	(revision 239390)
+++ gcc/config/i386/freebsd.h	(working copy)
@@ -92,25 +92,17 @@ along with GCC; see the file COPYING3.  If not see
 
 /* A C statement to output to the stdio stream FILE an assembler
    command to advance the location counter to a multiple of 1<<LOG
-   bytes if it is within MAX_SKIP bytes.
+   bytes if it is within MAX_SKIP bytes.  */
 
-   This is used to align code labels according to Intel recommendations.  */
-
 #ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
 #undef  ASM_OUTPUT_MAX_SKIP_ALIGN
 #define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP)			\
   do {									\
     if ((LOG) != 0) {							\
-      if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG));	\
-      else {								\
+      if ((MAX_SKIP) == 0 || (MAX_SKIP) >= (1<<(LOG)))			\
+        fprintf ((FILE), "\t.p2align %d\n", (LOG));			\
+      else								\
 	fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP));	\
-	/* Make sure that we have at least 8 byte alignment if > 8 byte \
-	   alignment is preferred.  */					\
-	if ((LOG) > 3							\
-	    && (1 << (LOG)) > ((MAX_SKIP) + 1)				\
-	    && (MAX_SKIP) >= 7)						\
-	  fputs ("\t.p2align 3\n", (FILE));				\
-      }									\
     }									\
   } while (0)
 #endif
Index: gcc/config/i386/gnu-user.h
===================================================================
--- gcc/config/i386/gnu-user.h	(revision 239390)
+++ gcc/config/i386/gnu-user.h	(working copy)
@@ -94,24 +94,16 @@ along with GCC; see the file COPYING3.  If not see
 
 /* A C statement to output to the stdio stream FILE an assembler
    command to advance the location counter to a multiple of 1<<LOG
-   bytes if it is within MAX_SKIP bytes.
+   bytes if it is within MAX_SKIP bytes.  */
 
-   This is used to align code labels according to Intel recommendations.  */
-
 #ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
 #define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP)			\
   do {									\
     if ((LOG) != 0) {							\
-      if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG));	\
-      else {								\
+      if ((MAX_SKIP) == 0 || (MAX_SKIP) >= (1<<(LOG)))			\
+        fprintf ((FILE), "\t.p2align %d\n", (LOG));			\
+      else								\
 	fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP));	\
-	/* Make sure that we have at least 8 byte alignment if > 8 byte \
-	   alignment is preferred.  */					\
-	if ((LOG) > 3							\
-	    && (1 << (LOG)) > ((MAX_SKIP) + 1)				\
-	    && (MAX_SKIP) >= 7)						\
-	  fputs ("\t.p2align 3\n", (FILE));				\
-      }									\
     }									\
   } while (0)
 #endif
Index: gcc/config/i386/iamcu.h
===================================================================
--- gcc/config/i386/iamcu.h	(revision 239390)
+++ gcc/config/i386/iamcu.h	(working copy)
@@ -62,23 +62,15 @@ see the files COPYING3 and COPYING.RUNTIME respect
 
 /* A C statement to output to the stdio stream FILE an assembler
    command to advance the location counter to a multiple of 1<<LOG
-   bytes if it is within MAX_SKIP bytes.
+   bytes if it is within MAX_SKIP bytes.  */
 
-   This is used to align code labels according to Intel recommendations.  */
-
 #define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP)			\
   do {									\
     if ((LOG) != 0) {							\
-      if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG));	\
-      else {								\
+      if ((MAX_SKIP) == 0 || (MAX_SKIP) >= (1<<(LOG)))			\
+        fprintf ((FILE), "\t.p2align %d\n", (LOG));			\
+      else								\
 	fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP));	\
-	/* Make sure that we have at least 8 byte alignment if > 8 byte \
-	   alignment is preferred.  */					\
-	if ((LOG) > 3							\
-	    && (1 << (LOG)) > ((MAX_SKIP) + 1)				\
-	    && (MAX_SKIP) >= 7)						\
-	  fputs ("\t.p2align 3\n", (FILE));				\
-      }									\
     }									\
   } while (0)
 
Index: gcc/config/i386/openbsdelf.h
===================================================================
--- gcc/config/i386/openbsdelf.h	(revision 239390)
+++ gcc/config/i386/openbsdelf.h	(working copy)
@@ -63,24 +63,16 @@ along with GCC; see the file COPYING3.  If not see
 
 /* A C statement to output to the stdio stream FILE an assembler
    command to advance the location counter to a multiple of 1<<LOG
-   bytes if it is within MAX_SKIP bytes.
+   bytes if it is within MAX_SKIP bytes.  */
 
-   This is used to align code labels according to Intel recommendations.  */
-
 #ifdef HAVE_GAS_MAX_SKIP_P2ALIGN
 #define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP)			\
   do {									\
     if ((LOG) != 0) {							\
-      if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG));	\
-      else {								\
+      if ((MAX_SKIP) == 0 || (MAX_SKIP) >= (1<<(LOG)))			\
+        fprintf ((FILE), "\t.p2align %d\n", (LOG));			\
+      else								\
 	fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP));	\
-	/* Make sure that we have at least 8 byte alignment if > 8 byte \
-	   alignment is preferred.  */					\
-	if ((LOG) > 3							\
-	    && (1 << (LOG)) > ((MAX_SKIP) + 1)				\
-	    && (MAX_SKIP) >= 7)						\
-	  fputs ("\t.p2align 3\n", (FILE));				\
-      }									\
     }									\
   } while (0)
 #endif
Index: gcc/config/i386/x86-64.h
===================================================================
--- gcc/config/i386/x86-64.h	(revision 239390)
+++ gcc/config/i386/x86-64.h	(working copy)
@@ -65,16 +65,10 @@ see the files COPYING3 and COPYING.RUNTIME respect
 #define ASM_OUTPUT_MAX_SKIP_ALIGN(FILE,LOG,MAX_SKIP)			\
   do {									\
     if ((LOG) != 0) {							\
-      if ((MAX_SKIP) == 0) fprintf ((FILE), "\t.p2align %d\n", (LOG));	\
-      else {								\
+      if ((MAX_SKIP) == 0 || (MAX_SKIP) >= (1<<(LOG)))			\
+        fprintf ((FILE), "\t.p2align %d\n", (LOG));			\
+      else								\
 	fprintf ((FILE), "\t.p2align %d,,%d\n", (LOG), (MAX_SKIP));	\
-	/* Make sure that we have at least 8 byte alignment if > 8 byte \
-	   alignment is preferred.  */					\
-	if ((LOG) > 3							\
-	    && (1 << (LOG)) > ((MAX_SKIP) + 1)				\
-	    && (MAX_SKIP) >= 7)						\
-	  fputs ("\t.p2align 3\n", (FILE));				\
-      }									\
     }									\
   } while (0)
 #undef  ASM_OUTPUT_MAX_SKIP_PAD

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH RESEND 2/2] Extend -falign-FOO=N to N[,M[,N2[,M2]]]
  2016-09-26 19:11 [PATCH RESEND 1/2] Temporary remove "at least 8 byte alignment" code from x86 Denys Vlasenko
@ 2016-09-26 19:19 ` Denys Vlasenko
  2016-09-27 11:59   ` Bernd Schmidt
  0 siblings, 1 reply; 4+ messages in thread
From: Denys Vlasenko @ 2016-09-26 19:19 UTC (permalink / raw)
  To: gcc-patches; +Cc: Denys Vlasenko, Andrew Pinski

falign-functions=N is too simplistic.

Ingo Molnar ran some tests and it seems that on latest x86 CPUs, 64-byte alignment
of functions runs fastest (he tried many other possibilites):
this way, after a call CPU can fetch a lot of insns in the first cacheline fill.

However, developers are less than thrilled by the idea of a slam-dunk 64-byte
aligning everything. Too much waste:
        On 05/20/2015 02:47 AM, Linus Torvalds wrote:
        > At the same time, I have to admit that I abhor a 64-byte function
        > alignment, when we have a fair number of functions that are (much)
        > smaller than that.
        >
        > Is there some way to get gcc to take the size of the function into
        > account? Because aligning a 16-byte or 32-byte function on a 64-byte
        > alignment is just criminally nasty and wasteful.

This change makes it possible to align functions to 64-byte boundaries *if*
this does not introduce huge amount of padding.

Example syntax is -falign-functions=64,9: "align to 64 by skipping up to
9 bytes (not inclusive)". IOW: "after a call insn, CPU will always be able
to fetch at least 9 bytes of insns".

x86 had a tweak: -falign-functions=N with N > 8 was adding secondary alignment.
For example, falign-functions=10 was emitting this before every function:
	.p2align 4,,9
	.p2align 3
This tweak was removed by the previous patch. Now it is reinstated
by the logic that if falign-functions=N[,M] is specified and N > 8,
then default value of N2 is 8, not 1. But now this can be suppressed by
falign-functions=N,M,1 - which wasn't possible before.
In general, optional N2,M2 pair can be used to generate any secondary
alignment user wants.

Testing:

Tested that with -falign-functions=N (tried 8, 15, 16, 17...) the alignment
directives are the same before and after the patch.
Tested that -falign-functions=N,N (two equal parameters) works exactly
like -falign-functions=N.

No change from past behavior:
Tested that "-falign-functions" uses an arch-dependent alignment.
Tested that "-O2" uses an arch-dependent alignment.
Tested that "-O2 -falign-functions=N" uses explicitly given alignment.

2016-09-26  Denys Vlasenko  <dvlasenk@redhat.com>

    * common.opt (-falign-functions=): Accept a string instead of integer.
    (-falign-jumps=): Likewise.
    (-falign-labels=): Likewise.
    (-falign-loops=): Likewise.
    * flags.h (struct target_flag_state): Revamp how alignment data is stored:
    for each of four alignment types, store two pairs of log/maxskip values.
    * toplev.c (read_uint): New function.
    (read_log_maxskip): New function.
    (parse_N_M): New function.
    (init_alignments): Set align_FOO[0/1].log/maxskip from
    specified falign-FOO=N[,M[,N[,M]]] options.
    * varasm.c (assemble_start_function): Call two ASM_OUTPUT_MAX_SKIP_ALIGN
    macros, first for N,M and second time for N2,M2 from
    falign-functions=N,M,N2,M2. This generates 0, 1, or 2 align directives.
    * config/aarch64/aarch64-protos.h (struct tune_params):
    Change foo_align members from integers to strings.
    * config/i386/i386.c (struct ptt): Likewise.
    * config/aarch64/aarch64.c (<cpu>_tunings):
    Change foo_align field values from integers to strings.
    * config/i386/i386.c (processor_target_table): Likewise.
    * config/arm/arm.c (arm_override_options_after_change_1):
    Fix if() condition to detect that -falign-functions is specified.
    * config/i386/i386.c (ix86_default_align): Likewise.
    * common/config/i386/i386-common.c (ix86_handle_option):
    Remove conversion of malign_foo's to falign_foo's.
    The warning is still emitted.
    * doc/invoke.texi: Update option documentation.
    * testsuite/gcc.target/i386/falign-functions.c: New file.

Index: gcc/common/config/i386/i386-common.c
===================================================================
--- gcc/common/config/i386/i386-common.c	(revision 239860)
+++ gcc/common/config/i386/i386-common.c	(working copy)
@@ -998,36 +998,17 @@ ix86_handle_option (struct gcc_options *opts,
 	}
       return true;
 
-
-  /* Comes from final.c -- no real reason to change it.  */
-#define MAX_CODE_ALIGN 16
-
     case OPT_malign_loops_:
       warning_at (loc, 0, "-malign-loops is obsolete, use -falign-loops");
-      if (value > MAX_CODE_ALIGN)
-	error_at (loc, "-malign-loops=%d is not between 0 and %d",
-		  value, MAX_CODE_ALIGN);
-      else
-	opts->x_align_loops = 1 << value;
       return true;
 
     case OPT_malign_jumps_:
       warning_at (loc, 0, "-malign-jumps is obsolete, use -falign-jumps");
-      if (value > MAX_CODE_ALIGN)
-	error_at (loc, "-malign-jumps=%d is not between 0 and %d",
-		  value, MAX_CODE_ALIGN);
-      else
-	opts->x_align_jumps = 1 << value;
       return true;
 
     case OPT_malign_functions_:
       warning_at (loc, 0,
 		  "-malign-functions is obsolete, use -falign-functions");
-      if (value > MAX_CODE_ALIGN)
-	error_at (loc, "-malign-functions=%d is not between 0 and %d",
-		  value, MAX_CODE_ALIGN);
-      else
-	opts->x_align_functions = 1 << value;
       return true;
 
     case OPT_mbranch_cost_:
Index: gcc/common.opt
===================================================================
--- gcc/common.opt	(revision 239860)
+++ gcc/common.opt	(working copy)
@@ -896,32 +896,32 @@ Common Report Var(flag_aggressive_loop_optimizatio
 Aggressively optimize loops using language constraints.
 
 falign-functions
-Common Report Var(align_functions,0) Optimization UInteger
+Common Report Var(flag_align_functions) Optimization
 Align the start of functions.
 
 falign-functions=
-Common RejectNegative Joined UInteger Var(align_functions)
+Common RejectNegative Joined Var(str_align_functions)
 
 falign-jumps
-Common Report Var(align_jumps,0) Optimization UInteger
+Common Report Var(flag_align_jumps) Optimization
 Align labels which are only reached by jumping.
 
 falign-jumps=
-Common RejectNegative Joined UInteger Var(align_jumps)
+Common RejectNegative Joined Var(str_align_jumps)
 
 falign-labels
-Common Report Var(align_labels,0) Optimization UInteger
+Common Report Var(flag_align_labels) Optimization
 Align all labels.
 
 falign-labels=
-Common RejectNegative Joined UInteger Var(align_labels)
+Common RejectNegative Joined Var(str_align_labels)
 
 falign-loops
-Common Report Var(align_loops,0) Optimization UInteger
+Common Report Var(flag_align_loops) Optimization
 Align the start of loops.
 
 falign-loops=
-Common RejectNegative Joined UInteger Var(align_loops)
+Common RejectNegative Joined Var(str_align_loops)
 
 fargument-alias
 Common Ignore
Index: gcc/config/aarch64/aarch64-protos.h
===================================================================
--- gcc/config/aarch64/aarch64-protos.h	(revision 239860)
+++ gcc/config/aarch64/aarch64-protos.h	(working copy)
@@ -208,9 +208,9 @@ struct tune_params
   int memmov_cost;
   int issue_rate;
   unsigned int fusible_ops;
-  int function_align;
-  int jump_align;
-  int loop_align;
+  const char *function_align;
+  const char *jump_align;
+  const char *loop_align;
   int int_reassoc_width;
   int fp_reassoc_width;
   int vec_reassoc_width;
Index: gcc/config/aarch64/aarch64.c
===================================================================
--- gcc/config/aarch64/aarch64.c	(revision 239860)
+++ gcc/config/aarch64/aarch64.c	(working copy)
@@ -521,9 +521,9 @@ static const struct tune_params generic_tunings =
   4, /* memmov_cost  */
   2, /* issue_rate  */
   AARCH64_FUSE_NOTHING, /* fusible_ops  */
-  8,	/* function_align.  */
-  8,	/* jump_align.  */
-  4,	/* loop_align.  */
+  "8",	/* function_align.  */
+  "8",	/* jump_align.  */
+  "4",	/* loop_align.  */
   2,	/* int_reassoc_width.  */
   4,	/* fp_reassoc_width.  */
   1,	/* vec_reassoc_width.  */
@@ -547,9 +547,9 @@ static const struct tune_params cortexa35_tunings
   1, /* issue_rate  */
   (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
    | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops  */
-  16,	/* function_align.  */
-  8,	/* jump_align.  */
-  8,	/* loop_align.  */
+  "16",	/* function_align.  */
+  "8",	/* jump_align.  */
+  "8",	/* loop_align.  */
   2,	/* int_reassoc_width.  */
   4,	/* fp_reassoc_width.  */
   1,	/* vec_reassoc_width.  */
@@ -573,9 +573,9 @@ static const struct tune_params cortexa53_tunings
   2, /* issue_rate  */
   (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
    | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops  */
-  16,	/* function_align.  */
-  8,	/* jump_align.  */
-  8,	/* loop_align.  */
+  "16",	/* function_align.  */
+  "8",	/* jump_align.  */
+  "8",	/* loop_align.  */
   2,	/* int_reassoc_width.  */
   4,	/* fp_reassoc_width.  */
   1,	/* vec_reassoc_width.  */
@@ -599,9 +599,9 @@ static const struct tune_params cortexa57_tunings
   3, /* issue_rate  */
   (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
    | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops  */
-  16,	/* function_align.  */
-  8,	/* jump_align.  */
-  8,	/* loop_align.  */
+  "16",	/* function_align.  */
+  "8",	/* jump_align.  */
+  "8",	/* loop_align.  */
   2,	/* int_reassoc_width.  */
   4,	/* fp_reassoc_width.  */
   1,	/* vec_reassoc_width.  */
@@ -625,9 +625,9 @@ static const struct tune_params cortexa72_tunings
   3, /* issue_rate  */
   (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
    | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops  */
-  16,	/* function_align.  */
-  8,	/* jump_align.  */
-  8,	/* loop_align.  */
+  "16",	/* function_align.  */
+  "8",	/* jump_align.  */
+  "8",	/* loop_align.  */
   2,	/* int_reassoc_width.  */
   4,	/* fp_reassoc_width.  */
   1,	/* vec_reassoc_width.  */
@@ -651,9 +651,9 @@ static const struct tune_params cortexa73_tunings
   2, /* issue_rate.  */
   (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
    | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops  */
-  16,	/* function_align.  */
-  8,	/* jump_align.  */
-  8,	/* loop_align.  */
+  "16",	/* function_align.  */
+  "8",	/* jump_align.  */
+  "8",	/* loop_align.  */
   2,	/* int_reassoc_width.  */
   4,	/* fp_reassoc_width.  */
   1,	/* vec_reassoc_width.  */
@@ -676,9 +676,9 @@ static const struct tune_params exynosm1_tunings =
   4,	/* memmov_cost  */
   3,	/* issue_rate  */
   (AARCH64_FUSE_AES_AESMC), /* fusible_ops  */
-  4,	/* function_align.  */
-  4,	/* jump_align.  */
-  4,	/* loop_align.  */
+  "4",	/* function_align.  */
+  "4",	/* jump_align.  */
+  "4",	/* loop_align.  */
   2,	/* int_reassoc_width.  */
   4,	/* fp_reassoc_width.  */
   1,	/* vec_reassoc_width.  */
@@ -701,9 +701,9 @@ static const struct tune_params thunderx_tunings =
   6, /* memmov_cost  */
   2, /* issue_rate  */
   AARCH64_FUSE_CMP_BRANCH, /* fusible_ops  */
-  8,	/* function_align.  */
-  8,	/* jump_align.  */
-  8,	/* loop_align.  */
+  "8",	/* function_align.  */
+  "8",	/* jump_align.  */
+  "8",	/* loop_align.  */
   2,	/* int_reassoc_width.  */
   4,	/* fp_reassoc_width.  */
   1,	/* vec_reassoc_width.  */
@@ -726,9 +726,9 @@ static const struct tune_params xgene1_tunings =
   6, /* memmov_cost  */
   4, /* issue_rate  */
   AARCH64_FUSE_NOTHING, /* fusible_ops  */
-  16,	/* function_align.  */
-  8,	/* jump_align.  */
-  16,	/* loop_align.  */
+  "16",	/* function_align.  */
+  "8",	/* jump_align.  */
+  "16",	/* loop_align.  */
   2,	/* int_reassoc_width.  */
   4,	/* fp_reassoc_width.  */
   1,	/* vec_reassoc_width.  */
@@ -752,9 +752,9 @@ static const struct tune_params qdf24xx_tunings =
   4, /* issue_rate  */
   (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
    | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops  */
-  16,	/* function_align.  */
-  8,	/* jump_align.  */
-  16,	/* loop_align.  */
+  "16",	/* function_align.  */
+  "8",	/* jump_align.  */
+  "16",	/* loop_align.  */
   2,	/* int_reassoc_width.  */
   4,	/* fp_reassoc_width.  */
   1,	/* vec_reassoc_width.  */
@@ -777,9 +777,9 @@ static const struct tune_params vulcan_tunings =
   4, /* memmov_cost.  */
   4, /* issue_rate.  */
   AARCH64_FUSE_NOTHING, /* fuseable_ops.  */
-  16,	/* function_align.  */
-  8,	/* jump_align.  */
-  16,	/* loop_align.  */
+  "16",	/* function_align.  */
+  "8",	/* jump_align.  */
+  "16",	/* loop_align.  */
   3,	/* int_reassoc_width.  */
   2,	/* fp_reassoc_width.  */
   2,	/* vec_reassoc_width.  */
Index: gcc/config/arm/arm.c
===================================================================
--- gcc/config/arm/arm.c	(revision 239860)
+++ gcc/config/arm/arm.c	(working copy)
@@ -2899,9 +2899,10 @@ static GTY(()) tree init_optimize;
 static void
 arm_override_options_after_change_1 (struct gcc_options *opts)
 {
-  if (opts->x_align_functions <= 0)
-    opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
-      && opts->x_optimize_size ? 2 : 4;
+  /* -falign-functions without argument: supply one */
+  if (opts->x_flag_align_functions && !opts->x_str_align_functions)
+    opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
+      && opts->x_optimize_size ? "2" : "4";
 }
 
 /* Implement targetm.override_options_after_change.  */
Index: gcc/config/i386/i386.c
===================================================================
--- gcc/config/i386/i386.c	(revision 239860)
+++ gcc/config/i386/i386.c	(working copy)
@@ -2626,45 +2626,43 @@ struct ptt
 {
   const char *const name;			/* processor name  */
   const struct processor_costs *cost;		/* Processor costs */
-  const int align_loop;				/* Default alignments.  */
-  const int align_loop_max_skip;
-  const int align_jump;
-  const int align_jump_max_skip;
-  const int align_func;
+  const char *const align_loop;			/* Default alignments.  */
+  const char *const align_jump;
+  const char *const align_func;
 };
 
 /* This table must be in sync with enum processor_type in i386.h.  */ 
 static const struct ptt processor_target_table[PROCESSOR_max] =
 {
-  {"generic", &generic_cost, 16, 10, 16, 10, 16},
-  {"i386", &i386_cost, 4, 3, 4, 3, 4},
-  {"i486", &i486_cost, 16, 15, 16, 15, 16},
-  {"pentium", &pentium_cost, 16, 7, 16, 7, 16},
-  {"lakemont", &lakemont_cost, 16, 7, 16, 7, 16},
-  {"pentiumpro", &pentiumpro_cost, 16, 15, 16, 10, 16},
-  {"pentium4", &pentium4_cost, 0, 0, 0, 0, 0},
-  {"nocona", &nocona_cost, 0, 0, 0, 0, 0},
-  {"core2", &core_cost, 16, 10, 16, 10, 16},
-  {"nehalem", &core_cost, 16, 10, 16, 10, 16},
-  {"sandybridge", &core_cost, 16, 10, 16, 10, 16},
-  {"haswell", &core_cost, 16, 10, 16, 10, 16},
-  {"bonnell", &atom_cost, 16, 15, 16, 7, 16},
-  {"silvermont", &slm_cost, 16, 15, 16, 7, 16},
-  {"knl", &slm_cost, 16, 15, 16, 7, 16},
-  {"skylake-avx512", &core_cost, 16, 10, 16, 10, 16},
-  {"intel", &intel_cost, 16, 15, 16, 7, 16},
-  {"geode", &geode_cost, 0, 0, 0, 0, 0},
-  {"k6", &k6_cost, 32, 7, 32, 7, 32},
-  {"athlon", &athlon_cost, 16, 7, 16, 7, 16},
-  {"k8", &k8_cost, 16, 7, 16, 7, 16},
-  {"amdfam10", &amdfam10_cost, 32, 24, 32, 7, 32},
-  {"bdver1", &bdver1_cost, 16, 10, 16, 7, 11},
-  {"bdver2", &bdver2_cost, 16, 10, 16, 7, 11},
-  {"bdver3", &bdver3_cost, 16, 10, 16, 7, 11},
-  {"bdver4", &bdver4_cost, 16, 10, 16, 7, 11},
-  {"btver1", &btver1_cost, 16, 10, 16, 7, 11},
-  {"btver2", &btver2_cost, 16, 10, 16, 7, 11},
-  {"znver1", &znver1_cost, 16, 10, 16, 7, 11}
+  {"generic",    &generic_cost, "16,11", "16,11","16"},
+  {"i386",       &i386_cost,    "4",     "4",    "4"},
+  {"i486",       &i486_cost,    "16",    "16",   "16"},
+  {"pentium",    &pentium_cost, "16,8",  "16,8", "16"},
+  {"lakemont",   &lakemont_cost,"16,8",  "16,8", "16"},
+  {"pentiumpro", &pentiumpro_cost,"16",  "16,11","16"},
+  {"pentium4",   &pentium4_cost,NULL,    NULL,   NULL},
+  {"nocona",     &nocona_cost,  NULL,    NULL,   NULL},
+  {"core2",      &core_cost,    "16,11", "16,11","16"},
+  {"nehalem",    &core_cost,    "16,11", "16,11","16"},
+  {"sandybridge",&core_cost,    "16,11", "16,11","16"},
+  {"haswell",    &core_cost,    "16,11", "16,11","16"},
+  {"bonnell",    &atom_cost,    "16",    "16,8", "16"},
+  {"silvermont", &slm_cost,     "16",    "16,8", "16"},
+  {"knl",        &slm_cost,     "16",    "16,8", "16"},
+  {"skylake-avx512", &core_cost,"16,11", "16,11","16"},
+  {"intel",      &intel_cost,   "16",    "16,8", "16"},
+  {"geode",      &geode_cost,   NULL,    NULL,   NULL},
+  {"k6",         &k6_cost,      "32,8",  "32,8", "32"},
+  {"athlon",     &athlon_cost,  "16,8",  "16,8", "16"},
+  {"k8",         &k8_cost,      "16,8",  "16,8", "16"},
+  {"amdfam10",   &amdfam10_cost,"32,25", "32,8", "32"},
+  {"bdver1",     &bdver1_cost,  "16,11", "16,8", "11"},
+  {"bdver2",     &bdver2_cost,  "16,11", "16,8", "11"},
+  {"bdver3",     &bdver3_cost,  "16,11", "16,8", "11"},
+  {"bdver4",     &bdver4_cost,  "16,11", "16,8", "11"},
+  {"btver1",     &btver1_cost,  "16,11", "16,8", "11"},
+  {"btver2",     &btver2_cost,  "16,11", "16,8", "11"},
+  {"znver1",     &znver1_cost,  "16,11", "16,8", "11"}
 };
 \f
 static unsigned int
@@ -4695,19 +4693,18 @@ set_ix86_tune_features (enum processor_type ix86_t
 static void
 ix86_default_align (struct gcc_options *opts)
 {
-  if (opts->x_align_loops == 0)
+  /* -falign-foo without argument: supply one */
+  if (opts->x_flag_align_loops && !opts->x_str_align_loops)
     {
-      opts->x_align_loops = processor_target_table[ix86_tune].align_loop;
-      align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
+      opts->x_str_align_loops = processor_target_table[ix86_tune].align_loop;
     }
-  if (opts->x_align_jumps == 0)
+  if (opts->x_flag_align_jumps && !opts->x_str_align_jumps)
     {
-      opts->x_align_jumps = processor_target_table[ix86_tune].align_jump;
-      align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
+      opts->x_str_align_jumps = processor_target_table[ix86_tune].align_jump;
     }
-  if (opts->x_align_functions == 0)
+  if (opts->x_flag_align_functions && !opts->x_str_align_functions)
     {
-      opts->x_align_functions = processor_target_table[ix86_tune].align_func;
+      opts->x_str_align_functions = processor_target_table[ix86_tune].align_func;
     }
 }
 
Index: gcc/doc/invoke.texi
===================================================================
--- gcc/doc/invoke.texi	(revision 239860)
+++ gcc/doc/invoke.texi	(working copy)
@@ -337,9 +337,10 @@ Objective-C and Objective-C++ Dialects}.
 
 @item Optimization Options
 @xref{Optimize Options,,Options that Control Optimization}.
-@gccoptlist{-faggressive-loop-optimizations -falign-functions[=@var{n}] @gol
--falign-jumps[=@var{n}] @gol
--falign-labels[=@var{n}] -falign-loops[=@var{n}] @gol
+@gccoptlist{-faggressive-loop-optimizations @gol
+-falign-functions[=@var{n}[,@var{m},[@var{n}[,@var{m}]]]] @gol
+-falign-jumps[=@var{n}[,@var{m}]] @gol
+-falign-labels[=@var{n}[,@var{m}]] -falign-loops[=@var{n}[,@var{m}]] @gol
 -fassociative-math -fauto-profile -fauto-profile[=@var{path}] @gol
 -fauto-inc-dec -fbranch-probabilities @gol
 -fbranch-target-load-optimize -fbranch-target-load-optimize2 @gol
@@ -7952,9 +7953,11 @@ The @option{-fstrict-overflow} option is enabled a
 
 @item -falign-functions
 @itemx -falign-functions=@var{n}
+@itemx -falign-functions=@var{n},@var{m}
 @opindex falign-functions
+If @var{m} is not specified, it defaults to @var{n}.
 Align the start of functions to the next power-of-two greater than
-@var{n}, skipping up to @var{n} bytes.  For instance,
+@var{n}, skipping up to @var{m}-1 bytes.  For instance,
 @option{-falign-functions=32} aligns functions to the next 32-byte
 boundary, but @option{-falign-functions=24} aligns to the next
 32-byte boundary only if this can be done by skipping 23 bytes or less.
@@ -7971,9 +7974,11 @@ Enabled at levels @option{-O2}, @option{-O3}.
 
 @item -falign-labels
 @itemx -falign-labels=@var{n}
+@itemx -falign-labels=@var{n},@var{m}
 @opindex falign-labels
+If @var{m} is not specified, it defaults to @var{n}.
 Align all branch targets to a power-of-two boundary, skipping up to
-@var{n} bytes like @option{-falign-functions}.  This option can easily
+@var{m}-1 bytes like @option{-falign-functions}.  This option can easily
 make code slower, because it must insert dummy operations for when the
 branch target is reached in the usual flow of the code.
 
@@ -7990,8 +7995,10 @@ Enabled at levels @option{-O2}, @option{-O3}.
 
 @item -falign-loops
 @itemx -falign-loops=@var{n}
+@itemx -falign-loops=@var{n},@var{m}
 @opindex falign-loops
-Align loops to a power-of-two boundary, skipping up to @var{n} bytes
+If @var{m} is not specified, it defaults to @var{n}.
+Align loops to a power-of-two boundary, skipping up to @var{m}-1 bytes
 like @option{-falign-functions}.  If the loops are
 executed many times, this makes up for any execution of the dummy
 operations.
@@ -8005,9 +8012,11 @@ Enabled at levels @option{-O2}, @option{-O3}.
 
 @item -falign-jumps
 @itemx -falign-jumps=@var{n}
+@itemx -falign-jumps=@var{n},@var{m}
 @opindex falign-jumps
+If @var{m} is not specified, it defaults to @var{n}.
 Align branch targets to a power-of-two boundary, for branch targets
-where the targets can only be reached by jumping, skipping up to @var{n}
+where the targets can only be reached by jumping, skipping up to @var{m}-1
 bytes like @option{-falign-functions}.  In this case, no dummy operations
 need be executed.
 
Index: gcc/flags.h
===================================================================
--- gcc/flags.h	(revision 239860)
+++ gcc/flags.h	(working copy)
@@ -43,19 +43,22 @@ extern bool final_insns_dump_p;
 /* Other basic status info about current function.  */
 
 /* Target-dependent global state.  */
-struct target_flag_state {
+struct align_flags {
   /* Values of the -falign-* flags: how much to align labels in code.
-     0 means `use default', 1 means `don't align'.
-     For each variable, there is an _log variant which is the power
-     of two not less than the variable, for .align output.  */
-  int x_align_loops_log;
-  int x_align_loops_max_skip;
-  int x_align_jumps_log;
-  int x_align_jumps_max_skip;
-  int x_align_labels_log;
-  int x_align_labels_max_skip;
-  int x_align_functions_log;
+     log is "align to 2^log" (so 0 means no alignment).
+     maxskip is the maximum allowed amount of padding to insert. */
+  int log;
+  int maxskip;
+};
 
+struct target_flag_state {
+  /* Each falign-foo can generate up to two levels of alignment:
+     -falign-foo=N,M[,N2,M2] */
+  struct align_flags x_align_loops[2];
+  struct align_flags x_align_jumps[2];
+  struct align_flags x_align_labels[2];
+  struct align_flags x_align_functions[2];
+
   /* The excess precision currently in effect.  */
   enum excess_precision x_flag_excess_precision;
 };
@@ -67,20 +70,21 @@ extern struct target_flag_state *this_target_flag_
 #define this_target_flag_state (&default_target_flag_state)
 #endif
 
-#define align_loops_log \
-  (this_target_flag_state->x_align_loops_log)
-#define align_loops_max_skip \
-  (this_target_flag_state->x_align_loops_max_skip)
-#define align_jumps_log \
-  (this_target_flag_state->x_align_jumps_log)
-#define align_jumps_max_skip \
-  (this_target_flag_state->x_align_jumps_max_skip)
-#define align_labels_log \
-  (this_target_flag_state->x_align_labels_log)
-#define align_labels_max_skip \
-  (this_target_flag_state->x_align_labels_max_skip)
-#define align_functions_log \
-  (this_target_flag_state->x_align_functions_log)
+#define align_loops              (this_target_flag_state->x_align_loops)
+#define align_jumps              (this_target_flag_state->x_align_jumps)
+#define align_labels             (this_target_flag_state->x_align_labels)
+#define align_functions          (this_target_flag_state->x_align_functions)
+#define align_loops_log          (align_loops[0].log)
+#define align_jumps_log          (align_jumps[0].log)
+#define align_labels_log         (align_labels[0].log)
+#define align_functions_log      (align_functions[0].log)
+#define align_loops_max_skip     (align_loops[0].maxskip)
+#define align_jumps_max_skip     (align_jumps[0].maxskip)
+#define align_labels_max_skip    (align_labels[0].maxskip)
+#define align_functions_max_skip (align_functions[0].maxskip)
+/* String representaions of the above options are available in
+   const char *str_align_foo. NULL if not set. */
+
 #define flag_excess_precision \
   (this_target_flag_state->x_flag_excess_precision)
 
Index: gcc/testsuite/gcc.target/i386/falign-functions.c
===================================================================
--- gcc/testsuite/gcc.target/i386/falign-functions.c	(nonexistent)
+++ gcc/testsuite/gcc.target/i386/falign-functions.c	(working copy)
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -falign-functions=64,8" } */
+/* { dg-final { scan-assembler ".p2align 6,,7" } } */
+
+void
+test_func (void)
+{
+}
Index: gcc/toplev.c
===================================================================
--- gcc/toplev.c	(revision 239860)
+++ gcc/toplev.c	(working copy)
@@ -1177,29 +1177,78 @@ target_supports_section_anchors_p (void)
   return true;
 }
 
-/* Default the align_* variables to 1 if they're still unset, and
-   set up the align_*_log variables.  */
+static const char *
+read_uint (const char *flag, const char *name, int *np)
+{
+  const char *flag_start = flag;
+  int n = 0;
+  char c;
+
+  while ((c = *flag++) >= '0' && c <= '9')
+    n = n*10 + (c-'0');
+  *np = n & 0x3fffffff; /* avoid accidentally negative numbers */
+  if (c == '\0')
+    return NULL;
+  if (c == ',')
+    return flag;
+
+  error_at (UNKNOWN_LOCATION, "-falign-%s parameter is bad at '%s'",
+            name, flag_start);
+  return NULL;
+}
+
+static const char *
+read_log_maxskip (const char *flag, const char *name, struct align_flags *a)
+{
+  int n, m;
+  flag = read_uint (flag, name, &a->log);
+  n = a->log;
+  if (n != 0)
+    a->log = floor_log2 (n * 2 - 1);
+  if (!flag)
+    {
+      a->maxskip = n ? n - 1 : 0;
+      return flag;
+    }
+  flag = read_uint (flag, name, &a->maxskip);
+  m = a->maxskip;
+  if (m > n) m = n;
+  if (m > 0) m--; /* -falign-foo=N,M means M-1 max bytes of padding, not M */
+  a->maxskip = m;
+  return flag;
+}
+
 static void
+parse_N_M (const char *flag, const char *name, struct align_flags a[2])
+{
+  if (flag)
+    {
+      flag = read_log_maxskip (flag, name, &a[0]);
+      if (flag)
+	flag = read_log_maxskip (flag, name, &a[1]);
+#if defined (__i386__) || defined (__x86_64__)
+      /* Before -falign-foo=N,M,N2,M2 was introduced, x86 had a tweak.
+	 -falign-functions=N with N > 8 was adding secondary alignment.
+	 -falign-functions=10 was emitting this before every function:
+		.p2align 4,,9
+		.p2align 3
+	 Now this behavior (and more) can be explicitly requested:
+	 -falign-functions=16,10,8
+	 Retain old behavior if N2 is missing: */
+      else if (a[0].log > 3)
+	a[1].log = 3;
+#endif
+    }
+}
+
+/* Process -falign-foo=N[,M[,N2[,M2]]] options. */
+static void
 init_alignments (void)
 {
-  if (align_loops <= 0)
-    align_loops = 1;
-  if (align_loops_max_skip > align_loops)
-    align_loops_max_skip = align_loops - 1;
-  align_loops_log = floor_log2 (align_loops * 2 - 1);
-  if (align_jumps <= 0)
-    align_jumps = 1;
-  if (align_jumps_max_skip > align_jumps)
-    align_jumps_max_skip = align_jumps - 1;
-  align_jumps_log = floor_log2 (align_jumps * 2 - 1);
-  if (align_labels <= 0)
-    align_labels = 1;
-  align_labels_log = floor_log2 (align_labels * 2 - 1);
-  if (align_labels_max_skip > align_labels)
-    align_labels_max_skip = align_labels - 1;
-  if (align_functions <= 0)
-    align_functions = 1;
-  align_functions_log = floor_log2 (align_functions * 2 - 1);
+  parse_N_M (str_align_loops, "loops", align_loops);
+  parse_N_M (str_align_jumps, "jumps", align_jumps);
+  parse_N_M (str_align_labels, "labels", align_labels);
+  parse_N_M (str_align_functions, "functions", align_functions);
 }
 
 /* Process the options that have been parsed.  */
Index: gcc/varasm.c
===================================================================
--- gcc/varasm.c	(revision 239860)
+++ gcc/varasm.c	(working copy)
@@ -1791,7 +1791,9 @@ assemble_start_function (tree decl, const char *fn
     {
 #ifdef ASM_OUTPUT_MAX_SKIP_ALIGN
       ASM_OUTPUT_MAX_SKIP_ALIGN (asm_out_file,
-				 align_functions_log, align_functions - 1);
+				 align_functions[0].log, align_functions[0].maxskip);
+      ASM_OUTPUT_MAX_SKIP_ALIGN (asm_out_file,
+				 align_functions[1].log, align_functions[1].maxskip);
 #else
       ASM_OUTPUT_ALIGN (asm_out_file, align_functions_log);
 #endif

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH RESEND 2/2] Extend -falign-FOO=N to N[,M[,N2[,M2]]]
  2016-09-26 19:19 ` [PATCH RESEND 2/2] Extend -falign-FOO=N to N[,M[,N2[,M2]]] Denys Vlasenko
@ 2016-09-27 11:59   ` Bernd Schmidt
  2016-09-27 20:49     ` Denys Vlasenko
  0 siblings, 1 reply; 4+ messages in thread
From: Bernd Schmidt @ 2016-09-27 11:59 UTC (permalink / raw)
  To: Denys Vlasenko, gcc-patches; +Cc: Andrew Pinski, Uros Bizjak

On 09/26/2016 09:08 PM, Denys Vlasenko wrote:
> +@gccoptlist{-faggressive-loop-optimizations @gol
> +-falign-functions[=@var{n}[,@var{m},[@var{n}[,@var{m}]]]] @gol
> +-falign-jumps[=@var{n}[,@var{m}]] @gol
> +-falign-labels[=@var{n}[,@var{m}]] -falign-loops[=@var{n}[,@var{m}]] @gol

>  @itemx -falign-functions=@var{n}
> +@itemx -falign-functions=@var{n},@var{m}
>  @opindex falign-functions
> +If @var{m} is not specified, it defaults to @var{n}.

There are inconsistencies here about how many arguments these take. 
There's no documentation of what it would mean to have more than two. 
The first paragraph seems to imply it's only allowed for 
-falign-functions, but the same implementation is used for all three.

> +#if defined (__i386__) || defined (__x86_64__)
> +      /* Before -falign-foo=N,M,N2,M2 was introduced, x86 had a tweak.
> +	 -falign-functions=N with N > 8 was adding secondary alignment.
> +	 -falign-functions=10 was emitting this before every function:
> +		.p2align 4,,9
> +		.p2align 3
> +	 Now this behavior (and more) can be explicitly requested:
> +	 -falign-functions=16,10,8
> +	 Retain old behavior if N2 is missing: */
> +      else if (a[0].log > 3)
> +	a[1].log = 3;
> +#endif

Can't have such #ifdef blocks in generic code. To start with, this 
changes behaviour based on the host, when you want it to change 
depending on the target. If there's no way to detect such a situation 
from the x86 backend, such as in the option_override function, then 
you'll need a hook.

IIUC the intention for the whole patch is that behaviour is unchanged by 
default, but there are additional options for users to choose?

Since it seems this is mostly for x86, maybe Uros should have a say in 
whether this patch is a good idea or not. Maybe it would be good to also 
pursue the idea of taking function size into account?


Bernd

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH RESEND 2/2] Extend -falign-FOO=N to N[,M[,N2[,M2]]]
  2016-09-27 11:59   ` Bernd Schmidt
@ 2016-09-27 20:49     ` Denys Vlasenko
  0 siblings, 0 replies; 4+ messages in thread
From: Denys Vlasenko @ 2016-09-27 20:49 UTC (permalink / raw)
  To: Bernd Schmidt, gcc-patches; +Cc: Andrew Pinski, Uros Bizjak

On 09/27/2016 01:53 PM, Bernd Schmidt wrote:
> On 09/26/2016 09:08 PM, Denys Vlasenko wrote:
>> +@gccoptlist{-faggressive-loop-optimizations @gol
>> +-falign-functions[=@var{n}[,@var{m},[@var{n}[,@var{m}]]]] @gol
>> +-falign-jumps[=@var{n}[,@var{m}]] @gol
>> +-falign-labels[=@var{n}[,@var{m}]] -falign-loops[=@var{n}[,@var{m}]] @gol
>
>>  @itemx -falign-functions=@var{n}
>> +@itemx -falign-functions=@var{n},@var{m}
>>  @opindex falign-functions
>> +If @var{m} is not specified, it defaults to @var{n}.
>
> There are inconsistencies here about how many arguments these take.
>  There's no documentation of what it would mean to have more than two.
>  The first paragraph seems to imply it's only allowed for -falign-functions,
>  but the same implementation is used for all three.

Noted. I will improve this part.

>> +#if defined (__i386__) || defined (__x86_64__)
>> +      /* Before -falign-foo=N,M,N2,M2 was introduced, x86 had a tweak.
>> +     -falign-functions=N with N > 8 was adding secondary alignment.
>> +     -falign-functions=10 was emitting this before every function:
>> +        .p2align 4,,9
>> +        .p2align 3
>> +     Now this behavior (and more) can be explicitly requested:
>> +     -falign-functions=16,10,8
>> +     Retain old behavior if N2 is missing: */
>> +      else if (a[0].log > 3)
>> +    a[1].log = 3;
>> +#endif
>
> Can't have such #ifdef blocks in generic code. To start with, this
>changesbehaviour based on the host, when you want it to change
>  depending on the target. If there's no way to detect such a
>  situation from the x86 backend, such as in the option_override
>  function, then you'll need a hook.

Ok, will rework this part.

> IIUC the intention for the whole patch is that behaviour is unchanged
>  by default, but there are additional options for users to choose?

Exactly.

> Since it seems this is mostly for x86, maybe Uros should have a say
>  in whether this patch is a good idea or not.

Not really. I imagine the same considerations apply to any CPU:
cachelines are getting wider irrespective of architecture.

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2016-09-27 20:01 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-09-26 19:11 [PATCH RESEND 1/2] Temporary remove "at least 8 byte alignment" code from x86 Denys Vlasenko
2016-09-26 19:19 ` [PATCH RESEND 2/2] Extend -falign-FOO=N to N[,M[,N2[,M2]]] Denys Vlasenko
2016-09-27 11:59   ` Bernd Schmidt
2016-09-27 20:49     ` Denys Vlasenko

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).