From: Richard Biener <rguenther@suse.de>
To: gcc-patches@gcc.gnu.org
Cc: Jan Hubicka <jh@suse.de>,
ubizjak@gmail.com, Martin Jambor <mjambor@suse.de>
Subject: Re: [PATCH] Fix PR80846, change vectorizer reduction epilogue (on x86)
Date: Fri, 05 Jan 2018 09:01:00 -0000 [thread overview]
Message-ID: <alpine.LSU.2.20.1801051001360.32271@zhemvz.fhfr.qr> (raw)
In-Reply-To: <alpine.LSU.2.20.1711281605550.12252@zhemvz.fhfr.qr>
On Tue, 28 Nov 2017, Richard Biener wrote:
>
> The following adds a new target hook, targetm.vectorize.split_reduction,
> which allows the target to specify a preferred mode to perform the
> final reducion on using either vector shifts or scalar extractions.
> Up to that mode the vector reduction result is reduced by combining
> lowparts and highparts recursively. This avoids lane-crossing operations
> when doing AVX256 on Zen and Bulldozer and also speeds up things on
> Haswell (I verified ~20% speedup on Broadwell).
>
> Thus the patch implements the target hook on x86 to _always_ prefer
> SSE modes for the final reduction.
>
> For the testcase in the bugzilla
>
> int sumint(const int arr[]) {
> arr = __builtin_assume_aligned(arr, 64);
> int sum=0;
> for (int i=0 ; i<1024 ; i++)
> sum+=arr[i];
> return sum;
> }
>
> this changes -O3 -mavx512f code from
>
> sumint:
> .LFB0:
> .cfi_startproc
> vpxord %zmm0, %zmm0, %zmm0
> leaq 4096(%rdi), %rax
> .p2align 4,,10
> .p2align 3
> .L2:
> vpaddd (%rdi), %zmm0, %zmm0
> addq $64, %rdi
> cmpq %rdi, %rax
> jne .L2
> vpxord %zmm1, %zmm1, %zmm1
> vshufi32x4 $78, %zmm1, %zmm0, %zmm2
> vpaddd %zmm2, %zmm0, %zmm0
> vmovdqa64 .LC0(%rip), %zmm2
> vpermi2d %zmm1, %zmm0, %zmm2
> vpaddd %zmm2, %zmm0, %zmm0
> vmovdqa64 .LC1(%rip), %zmm2
> vpermi2d %zmm1, %zmm0, %zmm2
> vpaddd %zmm2, %zmm0, %zmm0
> vmovdqa64 .LC2(%rip), %zmm2
> vpermi2d %zmm1, %zmm0, %zmm2
> vpaddd %zmm2, %zmm0, %zmm0
> vmovd %xmm0, %eax
>
> to
>
> sumint:
> .LFB0:
> .cfi_startproc
> vpxord %zmm0, %zmm0, %zmm0
> leaq 4096(%rdi), %rax
> .p2align 4,,10
> .p2align 3
> .L2:
> vpaddd (%rdi), %zmm0, %zmm0
> addq $64, %rdi
> cmpq %rdi, %rax
> jne .L2
> vextracti64x4 $0x1, %zmm0, %ymm1
> vpaddd %ymm0, %ymm1, %ymm1
> vmovdqa %xmm1, %xmm0
> vextracti128 $1, %ymm1, %xmm1
> vpaddd %xmm1, %xmm0, %xmm0
> vpsrldq $8, %xmm0, %xmm1
> vpaddd %xmm1, %xmm0, %xmm0
> vpsrldq $4, %xmm0, %xmm1
> vpaddd %xmm1, %xmm0, %xmm0
> vmovd %xmm0, %eax
>
> and for -O3 -mavx2 from
>
> sumint:
> .LFB0:
> .cfi_startproc
> vpxor %xmm0, %xmm0, %xmm0
> leaq 4096(%rdi), %rax
> .p2align 4,,10
> .p2align 3
> .L2:
> vpaddd (%rdi), %ymm0, %ymm0
> addq $32, %rdi
> cmpq %rdi, %rax
> jne .L2
> vpxor %xmm1, %xmm1, %xmm1
> vperm2i128 $33, %ymm1, %ymm0, %ymm2
> vpaddd %ymm2, %ymm0, %ymm0
> vperm2i128 $33, %ymm1, %ymm0, %ymm2
> vpalignr $8, %ymm0, %ymm2, %ymm2
> vpaddd %ymm2, %ymm0, %ymm0
> vperm2i128 $33, %ymm1, %ymm0, %ymm1
> vpalignr $4, %ymm0, %ymm1, %ymm1
> vpaddd %ymm1, %ymm0, %ymm0
> vmovd %xmm0, %eax
>
> to
>
> sumint:
> .LFB0:
> .cfi_startproc
> vpxor %xmm0, %xmm0, %xmm0
> leaq 4096(%rdi), %rax
> .p2align 4,,10
> .p2align 3
> .L2:
> vpaddd (%rdi), %ymm0, %ymm0
> addq $32, %rdi
> cmpq %rdi, %rax
> jne .L2
> vmovdqa %xmm0, %xmm1
> vextracti128 $1, %ymm0, %xmm0
> vpaddd %xmm0, %xmm1, %xmm0
> vpsrldq $8, %xmm0, %xmm1
> vpaddd %xmm1, %xmm0, %xmm0
> vpsrldq $4, %xmm0, %xmm1
> vpaddd %xmm1, %xmm0, %xmm0
> vmovd %xmm0, %eax
> vzeroupper
> ret
>
> which besides being faster is also smaller (less prefixes).
>
> SPEC 2k6 results on Haswell (thus AVX2) are neutral. As it merely
> effects reduction vectorization epilogues I didn't expect big effects
> but for loops that do not run much (more likely with AVX512).
>
> Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.
>
> Ok for trunk?
Ping?
Richard.
> The PR mentions some more tricks to optimize the sequence but
> those look like backend only optimizations.
>
> Thanks,
> Richard.
>
> 2017-11-28 Richard Biener <rguenther@suse.de>
>
> PR tree-optimization/80846
> * target.def (split_reduction): New target hook.
> * targhooks.c (default_split_reduction): New function.
> * targhooks.h (default_split_reduction): Declare.
> * tree-vect-loop.c (vect_create_epilog_for_reduction): If the
> target requests first reduce vectors by combining low and high
> parts.
> * tree-vect-stmts.c (vect_gen_perm_mask_any): Adjust.
> (get_vectype_for_scalar_type_and_size): Export.
> * tree-vectorizer.h (get_vectype_for_scalar_type_and_size): Declare.
>
> * doc/tm.texi.in (TARGET_VECTORIZE_SPLIT_REDUCTION): Document.
> * doc/tm.texi: Regenerate.
>
> i386/
> * config/i386/i386.c (ix86_split_reduction): Implement
> TARGET_VECTORIZE_SPLIT_REDUCTION.
>
> * gcc.target/i386/pr80846-1.c: New testcase.
> * gcc.target/i386/pr80846-2.c: Likewise.
>
> Index: gcc/config/i386/i386.c
> ===================================================================
> --- gcc/config/i386/i386.c (revision 255197)
> +++ gcc/config/i386/i386.c (working copy)
> @@ -48864,6 +48864,36 @@ ix86_preferred_simd_mode (scalar_mode mo
> }
> }
>
> +/* All CPUs perfer to avoid cross-lane operations so perform reductions
> + upper against lower halves up to SSE reg size. */
> +
> +static machine_mode
> +ix86_split_reduction (machine_mode mode)
> +{
> + /* Reduce lowpart against highpart until we reach SSE reg width to
> + avoid cross-lane operations. */
> + switch (mode)
> + {
> + case E_V16SImode:
> + case E_V8SImode:
> + return V4SImode;
> + case E_V32HImode:
> + case E_V16HImode:
> + return V8HImode;
> + case E_V64QImode:
> + case E_V32QImode:
> + return V16QImode;
> + case E_V16SFmode:
> + case E_V8SFmode:
> + return V4SFmode;
> + case E_V8DFmode:
> + case E_V4DFmode:
> + return V2DFmode;
> + default:
> + return mode;
> + }
> +}
> +
> /* If AVX is enabled then try vectorizing with both 256bit and 128bit
> vectors. If AVX512F is enabled then try vectorizing with 512bit,
> 256bit and 128bit vectors. */
> @@ -50486,6 +50516,9 @@ ix86_run_selftests (void)
> #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
> #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
> ix86_preferred_simd_mode
> +#undef TARGET_VECTORIZE_SPLIT_REDUCTION
> +#define TARGET_VECTORIZE_SPLIT_REDUCTION \
> + ix86_split_reduction
> #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
> #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
> ix86_autovectorize_vector_sizes
> Index: gcc/doc/tm.texi
> ===================================================================
> --- gcc/doc/tm.texi (revision 255197)
> +++ gcc/doc/tm.texi (working copy)
> @@ -5844,6 +5844,13 @@ equal to @code{word_mode}, because the v
> transformations even in absence of specialized @acronym{SIMD} hardware.
> @end deftypefn
>
> +@deftypefn {Target Hook} machine_mode TARGET_VECTORIZE_SPLIT_REDUCTION (machine_mode)
> +This hook should return the preferred mode to split the final reduction
> +step on @var{mode} to. The reduction is then carried out reducing upper
> +against lower halves of vectors recursively until the specified mode is
> +reached. The default is @var{mode} which means no splitting.
> +@end deftypefn
> +
> @deftypefn {Target Hook} {unsigned int} TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES (void)
> This hook should return a mask of sizes that should be iterated over
> after trying to autovectorize using the vector size derived from the
> Index: gcc/doc/tm.texi.in
> ===================================================================
> --- gcc/doc/tm.texi.in (revision 255197)
> +++ gcc/doc/tm.texi.in (working copy)
> @@ -4091,6 +4091,8 @@ address; but often a machine-dependent
>
> @hook TARGET_VECTORIZE_PREFERRED_SIMD_MODE
>
> +@hook TARGET_VECTORIZE_SPLIT_REDUCTION
> +
> @hook TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
>
> @hook TARGET_VECTORIZE_GET_MASK_MODE
> Index: gcc/target.def
> ===================================================================
> --- gcc/target.def (revision 255197)
> +++ gcc/target.def (working copy)
> @@ -1875,6 +1875,17 @@ transformations even in absence of speci
> (scalar_mode mode),
> default_preferred_simd_mode)
>
> +/* Returns the preferred mode for splitting SIMD reductions to. */
> +DEFHOOK
> +(split_reduction,
> + "This hook should return the preferred mode to split the final reduction\n\
> +step on @var{mode} to. The reduction is then carried out reducing upper\n\
> +against lower halves of vectors recursively until the specified mode is\n\
> +reached. The default is @var{mode} which means no splitting.",
> + machine_mode,
> + (machine_mode),
> + default_split_reduction)
> +
> /* Returns a mask of vector sizes to iterate over when auto-vectorizing
> after processing the preferred one derived from preferred_simd_mode. */
> DEFHOOK
> Index: gcc/targhooks.c
> ===================================================================
> --- gcc/targhooks.c (revision 255197)
> +++ gcc/targhooks.c (working copy)
> @@ -1281,6 +1281,14 @@ default_preferred_simd_mode (scalar_mode
> return word_mode;
> }
>
> +/* By default do not split reductions further. */
> +
> +machine_mode
> +default_split_reduction (machine_mode mode)
> +{
> + return mode;
> +}
> +
> /* By default only the size derived from the preferred vector mode
> is tried. */
>
> Index: gcc/targhooks.h
> ===================================================================
> --- gcc/targhooks.h (revision 255197)
> +++ gcc/targhooks.h (working copy)
> @@ -108,6 +108,7 @@ default_builtin_support_vector_misalignm
> const_tree,
> int, bool);
> extern machine_mode default_preferred_simd_mode (scalar_mode mode);
> +extern machine_mode default_split_reduction (machine_mode);
> extern unsigned int default_autovectorize_vector_sizes (void);
> extern opt_machine_mode default_get_mask_mode (unsigned, unsigned);
> extern void *default_init_cost (struct loop *);
> Index: gcc/testsuite/gcc.target/i386/pr80846-1.c
> ===================================================================
> --- gcc/testsuite/gcc.target/i386/pr80846-1.c (nonexistent)
> +++ gcc/testsuite/gcc.target/i386/pr80846-1.c (working copy)
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -mavx512f" } */
> +
> +int sumint(const int arr[]) {
> + arr = __builtin_assume_aligned(arr, 64);
> + int sum=0;
> + for (int i=0 ; i<1024 ; i++)
> + sum+=arr[i];
> + return sum;
> +}
> +
> +/* { dg-final { scan-assembler-times "vextracti" 2 } } */
> Index: gcc/testsuite/gcc.target/i386/pr80846-2.c
> ===================================================================
> --- gcc/testsuite/gcc.target/i386/pr80846-2.c (nonexistent)
> +++ gcc/testsuite/gcc.target/i386/pr80846-2.c (working copy)
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -mavx2" } */
> +
> +int sumint(const int arr[]) {
> + arr = __builtin_assume_aligned(arr, 64);
> + int sum=0;
> + for (int i=0 ; i<1024 ; i++)
> + sum+=arr[i];
> + return sum;
> +}
> +
> +/* { dg-final { scan-assembler-times "vextracti" 1 } } */
> Index: gcc/tree-vect-loop.c
> ===================================================================
> --- gcc/tree-vect-loop.c (revision 255197)
> +++ gcc/tree-vect-loop.c (working copy)
> @@ -4994,38 +4994,126 @@ vect_create_epilog_for_reduction (vec<tr
> }
> else
> {
> - bool reduce_with_shift = have_whole_vector_shift (mode);
> - int element_bitsize = tree_to_uhwi (bitsize);
> - int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
> + bool reduce_with_shift;
> tree vec_temp;
> -
> +
> /* COND reductions all do the final reduction with MAX_EXPR. */
> if (code == COND_EXPR)
> code = MAX_EXPR;
>
> - /* Regardless of whether we have a whole vector shift, if we're
> - emulating the operation via tree-vect-generic, we don't want
> - to use it. Only the first round of the reduction is likely
> - to still be profitable via emulation. */
> - /* ??? It might be better to emit a reduction tree code here, so that
> - tree-vect-generic can expand the first round via bit tricks. */
> - if (!VECTOR_MODE_P (mode))
> - reduce_with_shift = false;
> + /* See if the target wants to do the final (shift) reduction
> + in a vector mode of smaller size and first reduce upper/lower
> + halves against each other. */
> + enum machine_mode mode1 = mode;
> + tree vectype1 = vectype;
> + unsigned sz = tree_to_uhwi (TYPE_SIZE_UNIT (vectype));
> + unsigned sz1 = sz;
> + if (!slp_reduc
> + && (mode1 = targetm.vectorize.split_reduction (mode)) != mode)
> + sz1 = GET_MODE_SIZE (mode1);
> +
> + vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz1);
> + reduce_with_shift = have_whole_vector_shift (mode1);
> + if (!VECTOR_MODE_P (mode1))
> + reduce_with_shift = false;
> else
> - {
> - optab optab = optab_for_tree_code (code, vectype, optab_default);
> - if (optab_handler (optab, mode) == CODE_FOR_nothing)
> - reduce_with_shift = false;
> - }
> + {
> + optab optab = optab_for_tree_code (code, vectype1, optab_default);
> + if (optab_handler (optab, mode1) == CODE_FOR_nothing)
> + reduce_with_shift = false;
> + }
> +
> + /* First reduce the vector to the desired vector size we should
> + do shift reduction on by combining upper and lower halves. */
> + new_temp = new_phi_result;
> + while (sz > sz1)
> + {
> + gcc_assert (!slp_reduc);
> + sz /= 2;
> + vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz);
> +
> + /* The target has to make sure we support lowpart/highpart
> + extraction, either via direct vector extract or through
> + an integer mode punning. */
> + tree dst1, dst2;
> + if (convert_optab_handler (vec_extract_optab,
> + TYPE_MODE (TREE_TYPE (new_temp)),
> + TYPE_MODE (vectype1))
> + != CODE_FOR_nothing)
> + {
> + /* Extract sub-vectors directly once vec_extract becomes
> + a conversion optab. */
> + dst1 = make_ssa_name (vectype1);
> + epilog_stmt
> + = gimple_build_assign (dst1, BIT_FIELD_REF,
> + build3 (BIT_FIELD_REF, vectype1,
> + new_temp, TYPE_SIZE (vectype1),
> + bitsize_int (0)));
> + gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
> + dst2 = make_ssa_name (vectype1);
> + epilog_stmt
> + = gimple_build_assign (dst2, BIT_FIELD_REF,
> + build3 (BIT_FIELD_REF, vectype1,
> + new_temp, TYPE_SIZE (vectype1),
> + bitsize_int (sz * BITS_PER_UNIT)));
> + gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
> + }
> + else
> + {
> + /* Extract via punning to appropriately sized integer mode
> + vector. */
> + tree eltype = build_nonstandard_integer_type (sz * BITS_PER_UNIT,
> + 1);
> + tree etype = build_vector_type (eltype, 2);
> + gcc_assert (convert_optab_handler (vec_extract_optab,
> + TYPE_MODE (etype),
> + TYPE_MODE (eltype))
> + != CODE_FOR_nothing);
> + tree tem = make_ssa_name (etype);
> + epilog_stmt = gimple_build_assign (tem, VIEW_CONVERT_EXPR,
> + build1 (VIEW_CONVERT_EXPR,
> + etype, new_temp));
> + gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
> + new_temp = tem;
> + tem = make_ssa_name (eltype);
> + epilog_stmt
> + = gimple_build_assign (tem, BIT_FIELD_REF,
> + build3 (BIT_FIELD_REF, eltype,
> + new_temp, TYPE_SIZE (eltype),
> + bitsize_int (0)));
> + gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
> + dst1 = make_ssa_name (vectype1);
> + epilog_stmt = gimple_build_assign (dst1, VIEW_CONVERT_EXPR,
> + build1 (VIEW_CONVERT_EXPR,
> + vectype1, tem));
> + gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
> + tem = make_ssa_name (eltype);
> + epilog_stmt
> + = gimple_build_assign (tem, BIT_FIELD_REF,
> + build3 (BIT_FIELD_REF, eltype,
> + new_temp, TYPE_SIZE (eltype),
> + bitsize_int (sz * BITS_PER_UNIT)));
> + gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
> + dst2 = make_ssa_name (vectype1);
> + epilog_stmt = gimple_build_assign (dst2, VIEW_CONVERT_EXPR,
> + build1 (VIEW_CONVERT_EXPR,
> + vectype1, tem));
> + gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
> + }
> +
> + new_temp = make_ssa_name (vectype1);
> + epilog_stmt = gimple_build_assign (new_temp, code, dst1, dst2);
> + gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
> + }
>
> if (reduce_with_shift && !slp_reduc)
> {
> - int nelements = vec_size_in_bits / element_bitsize;
> + int nelements = TYPE_VECTOR_SUBPARTS (vectype1);
> auto_vec_perm_indices sel (nelements);
>
> int elt_offset;
>
> - tree zero_vec = build_zero_cst (vectype);
> + tree zero_vec = build_zero_cst (vectype1);
> /* Case 2: Create:
> for (offset = nelements/2; offset >= 1; offset/=2)
> {
> @@ -5039,15 +5127,15 @@ vect_create_epilog_for_reduction (vec<tr
> dump_printf_loc (MSG_NOTE, vect_location,
> "Reduce using vector shifts\n");
>
> - vec_dest = vect_create_destination_var (scalar_dest, vectype);
> - new_temp = new_phi_result;
> + mode1 = TYPE_MODE (vectype1);
> + vec_dest = vect_create_destination_var (scalar_dest, vectype1);
> for (elt_offset = nelements / 2;
> elt_offset >= 1;
> elt_offset /= 2)
> {
> sel.truncate (0);
> - calc_vec_perm_mask_for_shift (elt_offset, nelements, &sel);
> - tree mask = vect_gen_perm_mask_any (vectype, sel);
> + calc_vec_perm_mask_for_shift (elt_offset, nelements, &sel);
> + tree mask = vect_gen_perm_mask_any (vectype1, sel);
> epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR,
> new_temp, zero_vec, mask);
> new_name = make_ssa_name (vec_dest, epilog_stmt);
> @@ -5092,7 +5180,8 @@ vect_create_epilog_for_reduction (vec<tr
> dump_printf_loc (MSG_NOTE, vect_location,
> "Reduce using scalar code.\n");
>
> - vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
> + int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype1));
> + int element_bitsize = tree_to_uhwi (bitsize);
> FOR_EACH_VEC_ELT (new_phis, i, new_phi)
> {
> int bit_offset;
> Index: gcc/tree-vect-stmts.c
> ===================================================================
> --- gcc/tree-vect-stmts.c (revision 255197)
> +++ gcc/tree-vect-stmts.c (working copy)
> @@ -6514,7 +6514,7 @@ vect_gen_perm_mask_any (tree vectype, ve
>
> mask_elt_type = lang_hooks.types.type_for_mode
> (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
> - mask_type = get_vectype_for_scalar_type (mask_elt_type);
> + mask_type = get_same_sized_vectype (mask_elt_type, vectype);
>
> auto_vec<tree, 32> mask_elts (nunits);
> for (unsigned int i = 0; i < nunits; ++i)
> @@ -9065,7 +9065,7 @@ free_stmt_vec_info (gimple *stmt)
> Returns the vector type corresponding to SCALAR_TYPE and SIZE as supported
> by the target. */
>
> -static tree
> +tree
> get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
> {
> tree orig_scalar_type = scalar_type;
> Index: gcc/tree-vectorizer.h
> ===================================================================
> --- gcc/tree-vectorizer.h (revision 255197)
> +++ gcc/tree-vectorizer.h (working copy)
> @@ -1151,6 +1151,7 @@ extern bool vect_can_advance_ivs_p (loop
> /* In tree-vect-stmts.c. */
> extern unsigned int current_vector_size;
> extern tree get_vectype_for_scalar_type (tree);
> +extern tree get_vectype_for_scalar_type_and_size (tree, unsigned);
> extern tree get_mask_type_for_scalar_type (tree);
> extern tree get_same_sized_vectype (tree, tree);
> extern bool vect_is_simple_use (tree, vec_info *, gimple **,
>
--
Richard Biener <rguenther@suse.de>
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 21284 (AG Nuernberg)
next prev parent reply other threads:[~2018-01-05 9:01 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-11-28 15:25 Richard Biener
2017-12-05 20:18 ` Jeff Law
2017-12-06 6:42 ` Richard Biener
2018-01-05 9:01 ` Richard Biener [this message]
2018-01-09 23:13 ` Jeff Law
2018-01-10 8:23 ` Richard Biener
2018-01-11 12:11 ` Richard Biener
2018-01-11 16:21 ` Jeff Law
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=alpine.LSU.2.20.1801051001360.32271@zhemvz.fhfr.qr \
--to=rguenther@suse.de \
--cc=gcc-patches@gcc.gnu.org \
--cc=jh@suse.de \
--cc=mjambor@suse.de \
--cc=ubizjak@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).