From: Evgeny Stupachenko <evstupac@gmail.com>
To: Richard Biener <rguenther@suse.de>
Cc: GCC Patches <gcc-patches@gcc.gnu.org>,
Jakub Jelinek <jakub@redhat.com>,
Uros Bizjak <ubizjak@gmail.com>
Subject: Re: [PATCH, PR52252] Vectorization for load/store groups of size 3.
Date: Tue, 06 May 2014 11:27:00 -0000 [thread overview]
Message-ID: <CAOvf_xyFZMQusRaXp1FP-MTVKbeMZfm-8EzOgMQvEdOhBwUHaA@mail.gmail.com> (raw)
In-Reply-To: <CAOvf_xxQjF3GmsupiRkWfmQSv_Xxu8qrTPyqJZFmp5qeY3f9gQ@mail.gmail.com>
The patch on cost model was successfully committed.
I've separated the rest part of the patch on loads/stores group into
2: on loads group and on stores group.
Below is first part on loads group.
Bootstrap and make check passed on x86.
Is it ok?
ChangeLog:
2014-05-06 Evgeny Stupachenko <evstupac@gmail.com>
* tree-vect-data-refs.c (vect_grouped_load_supported): New
check for loads group of length 3.
(vect_permute_load_chain): New permutations for loads group of
length 3.
* tree-vect-stmts.c (vect_model_load_cost): Change cost
of vec_perm_shuffle for the new permutations.
ChangeLog for testsuite:
2014-05-06 Evgeny Stupachenko <evstupac@gmail.com>
PR tree-optimization/52252
* gcc.dg/vect/pr52252-ld.c: Test on loads group of size 3.
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index 274cdbd..feafb38 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -4812,36 +4812,74 @@ vect_grouped_load_supported (tree vectype,
unsigned HOST_WIDE_INT count)
{
enum machine_mode mode = TYPE_MODE (vectype);
- /* vect_permute_load_chain requires the group size to be a power of two. */
- if (exact_log2 (count) == -1)
+ /* vect_permute_load_chain requires the group size to be equal to 3 or
+ be a power of two. */
+ if (count != 3 && exact_log2 (count) == -1)
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "the size of the group of accesses"
- " is not a power of 2\n");
+ "the size of the group of accesses"
+ " is not a power of 2 or not eqaul to 3\n");
return false;
}
/* Check that the permutation is supported. */
if (VECTOR_MODE_P (mode))
{
- unsigned int i, nelt = GET_MODE_NUNITS (mode);
+ unsigned int i, j, nelt = GET_MODE_NUNITS (mode);
unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
- for (i = 0; i < nelt; i++)
- sel[i] = i * 2;
- if (can_vec_perm_p (mode, false, sel))
+ if (exact_log2 (count) != -1)
{
for (i = 0; i < nelt; i++)
- sel[i] = i * 2 + 1;
+ sel[i] = i * 2;
if (can_vec_perm_p (mode, false, sel))
- return true;
+ {
+ for (i = 0; i < nelt; i++)
+ sel[i] = i * 2 + 1;
+ if (can_vec_perm_p (mode, false, sel))
+ return true;
+ }
+ }
+ else if (count == 3)
+ {
+ unsigned int k;
+ for (k = 0; k < 3; k++)
+ {
+ for (i = 0; i < nelt; i++)
+ if (3 * i + k < 2 * nelt)
+ sel[i] = 3 * i + k;
+ else
+ sel[i] = 0;
+ if (!can_vec_perm_p (mode, false, sel))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "shuffle of 3 loads is not supported by \
+ target\n");
+ return false;
+ }
+ for (i = 0, j = 0; i < nelt; i++)
+ if (3 * i + k < 2 * nelt)
+ sel[i] = i;
+ else
+ sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
+ if (!can_vec_perm_p (mode, false, sel))
+ {
+ if (dump_enabled_p ())
+ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ "shuffle of 3 loads is not supported by \
+ target\n");
+ return false;
+ }
+ }
+ return true;
}
}
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
- "extract even/odd not supported by target\n");
+ "extract even/odd not supported by target\n");
return false;
}
@@ -4859,8 +4897,9 @@ vect_load_lanes_supported (tree vectype,
unsigned HOST_WIDE_INT count)
/* Function vect_permute_load_chain.
Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
- a power of 2, generate extract_even/odd stmts to reorder the input data
- correctly. Return the final references for loads in RESULT_CHAIN.
+ a power of 2 or equal to 3, generate extract_even/odd stmts to reorder
+ the input data correctly. Return the final references for loads in
+ RESULT_CHAIN.
E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
The input is 4 vectors each containing 8 elements. We assign a
number to each
@@ -4941,6 +4980,7 @@ vect_permute_load_chain (vec<tree> dr_chain,
{
tree data_ref, first_vect, second_vect;
tree perm_mask_even, perm_mask_odd;
+ tree perm3_mask_low, perm3_mask_high;
gimple perm_stmt;
tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
unsigned int i, j, log_length = exact_log2 (length);
@@ -4951,45 +4991,99 @@ vect_permute_load_chain (vec<tree> dr_chain,
memcpy (result_chain->address (), dr_chain.address (),
length * sizeof (tree));
- for (i = 0; i < nelt; ++i)
- sel[i] = i * 2;
- perm_mask_even = vect_gen_perm_mask (vectype, sel);
- gcc_assert (perm_mask_even != NULL);
+ if (log_length != (unsigned int)-1)
+ {
+ for (i = 0; i < nelt; ++i)
+ sel[i] = i * 2;
+ perm_mask_even = vect_gen_perm_mask (vectype, sel);
+ gcc_assert (perm_mask_even != NULL);
- for (i = 0; i < nelt; ++i)
- sel[i] = i * 2 + 1;
- perm_mask_odd = vect_gen_perm_mask (vectype, sel);
- gcc_assert (perm_mask_odd != NULL);
+ for (i = 0; i < nelt; ++i)
+ sel[i] = i * 2 + 1;
+ perm_mask_odd = vect_gen_perm_mask (vectype, sel);
+ gcc_assert (perm_mask_odd != NULL);
- for (i = 0; i < log_length; i++)
- {
- for (j = 0; j < length; j += 2)
+ for (i = 0; i < log_length; i++)
{
- first_vect = dr_chain[j];
- second_vect = dr_chain[j+1];
+ for (j = 0; j < length; j += 2)
+ {
+ first_vect = dr_chain[j];
+ second_vect = dr_chain[j+1];
+
+ /* data_ref = permute_even (first_data_ref, second_data_ref); */
+ data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_even");
+ perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
+ first_vect, second_vect,
+ perm_mask_even);
+ vect_finish_stmt_generation (stmt, perm_stmt, gsi);
+ (*result_chain)[j/2] = data_ref;
+
+ /* data_ref = permute_odd (first_data_ref, second_data_ref); */
+ data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_odd");
+ perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
+ first_vect, second_vect,
+ perm_mask_odd);
+ vect_finish_stmt_generation (stmt, perm_stmt, gsi);
+ (*result_chain)[j/2+length/2] = data_ref;
+ }
+ memcpy (dr_chain.address (), result_chain->address (),
+ length * sizeof (tree));
+ }
+ }
+ /* length is not a power of 2. */
+ else
+ {
+ unsigned int k;
- /* data_ref = permute_even (first_data_ref, second_data_ref); */
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_even");
+ /* currently only length 3 is supported as most frequent case. */
+ gcc_assert (length == 3);
+
+ for (k = 0; k < 3; k++)
+ {
+ for (i = 0; i < nelt; i++)
+ if (3 * i + k < 2 * nelt)
+ sel[i] = 3 * i + k;
+ else
+ sel[i] = 0;
+ perm3_mask_low = vect_gen_perm_mask (vectype, sel);
+ gcc_assert (perm3_mask_low != NULL);
+
+ for (i = 0, j = 0; i < nelt; i++)
+ if (3 * i + k < 2 * nelt)
+ sel[i] = i;
+ else
+ sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
+
+ perm3_mask_high = vect_gen_perm_mask (vectype, sel);
+ gcc_assert (perm3_mask_high != NULL);
+
+ first_vect = dr_chain[0];
+ second_vect = dr_chain[1];
+
+ /* Create interleaving stmt (low part of):
+ low = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
+ ...}> */
+ data_ref = make_temp_ssa_name (vectype, NULL, "vect_suffle3_low");
perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
first_vect, second_vect,
- perm_mask_even);
+ perm3_mask_low);
vect_finish_stmt_generation (stmt, perm_stmt, gsi);
- (*result_chain)[j/2] = data_ref;
- /* data_ref = permute_odd (first_data_ref, second_data_ref); */
- data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_odd");
+ /* Create interleaving stmt (high part of):
+ high = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
+ ...}> */
+ first_vect = data_ref;
+ second_vect = dr_chain[2];
+ data_ref = make_temp_ssa_name (vectype, NULL, "vect_suffle3_high");
perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
first_vect, second_vect,
- perm_mask_odd);
+ perm3_mask_high);
vect_finish_stmt_generation (stmt, perm_stmt, gsi);
- (*result_chain)[j/2+length/2] = data_ref;
+ (*result_chain)[k] = data_ref;
}
- memcpy (dr_chain.address (), result_chain->address (),
- length * sizeof (tree));
}
}
-
/* Function vect_transform_grouped_load.
Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 1a51d6d..b87c143 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -1091,10 +1091,11 @@ vect_model_load_cost (stmt_vec_info stmt_info,
int ncopies,
include the cost of the permutes. */
if (!load_lanes_p && group_size > 1)
{
- /* Uses an even and odd extract operations for each needed permute. */
- int nstmts = ncopies * exact_log2 (group_size) * group_size;
- inside_cost += record_stmt_cost (body_cost_vec, nstmts, vec_perm,
- stmt_info, 0, vect_body);
+ /* Uses an even and odd extract operations or shuffle operations
+ for each needed permute. */
+ int nstmts = ncopies * ceil_log2 (group_size) * group_size;
+ inside_cost = record_stmt_cost (body_cost_vec, nstmts, vec_perm,
+ stmt_info, 0, vect_body);
if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
diff --git a/gcc/testsuite/gcc.dg/vect/pr52252-ld.c
b/gcc/testsuite/gcc.dg/vect/pr52252-ld.c
new file mode 100644
index 0000000..6e3cb52
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr52252-ld.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -g -ftree-vectorize -mssse3
-fdump-tree-vect-details" { target { i?86-*-* x86_64-*-* } } } */
+
+#define byte unsigned char
+
+void
+matrix_mul (byte *in, byte *out, int size)
+{
+ int i;
+ for (i = 0; i < size; i++)
+ {
+ byte in0 = in[0];
+ byte in1 = in[1];
+ byte in2 = in[2];
+ byte out0, out1, out2, out3;
+ out0 = in0 + in1;
+ out1 = in0 + in2;
+ out2 = in1 + in2;
+ out3 = in0 + in1 + in2;
+ out[0] = out0;
+ out[1] = out1;
+ out[2] = out2;
+ out[3] = out3;
+ in += 3;
+ out += 4;
+ }
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */
On Wed, Apr 30, 2014 at 6:31 PM, Evgeny Stupachenko <evstupac@gmail.com> wrote:
> Ping.
>
> On Fri, Apr 18, 2014 at 2:05 PM, Evgeny Stupachenko <evstupac@gmail.com> wrote:
>> Hi,
>>
>> Merged with current master the patch passes bootstrap and is giving
>> expected gains.
>> Patch and new tests are attached.
>>
>> ChangeLog:
>>
>> 2014-04-18 Evgeny Stupachenko <evstupac@gmail.com>
>>
>> * tree-vect-data-refs.c (vect_grouped_store_supported): New
>> check for stores group of length 3.
>> (vect_permute_store_chain): New permutations for stores group of
>> length 3.
>> (vect_grouped_load_supported): New check for loads group of length 3.
>> (vect_permute_load_chain): New permutations for loads group of length 3.
>> * tree-vect-stmts.c (vect_model_store_cost): Change cost
>> of vec_perm_shuffle for the new permutations.
>> (vect_model_load_cost): Ditto.
>>
>> ChangeLog for testsuite:
>>
>> 2014-04-18 Evgeny Stupachenko <evstupac@gmail.com>
>>
>> PR tree-optimization/52252
>> * gcc.dg/vect/pr52252-ld.c: Test on loads group of size 3.
>> * gcc.dg/vect/pr52252-st.c: Test on stores group of size 3.
>>
>> Evgeny
>>
>> On Thu, Mar 6, 2014 at 6:44 PM, Evgeny Stupachenko <evstupac@gmail.com> wrote:
>>> Missed attachment.
>>>
>>> On Thu, Mar 6, 2014 at 6:42 PM, Evgeny Stupachenko <evstupac@gmail.com> wrote:
>>>> I've separated the patch into 2: cost model tuning and load/store
>>>> groups parallelism.
>>>> SLM tuning was partially introduced in the patch:
>>>> http://gcc.gnu.org/ml/gcc-patches/2014-03/msg00226.html
>>>> The patch introducing vectorization for load/store groups of size 3 attached.
>>>>
>>>> Is it ok for stage1?
>>>>
>>>> ChangeLog:
>>>>
>>>> 2014-03-06 Evgeny Stupachenko <evstupac@gmail.com>
>>>>
>>>> * tree-vect-data-refs.c (vect_grouped_store_supported): New
>>>> check for stores group of length 3.
>>>> (vect_permute_store_chain): New permutations for stores group of
>>>> length 3.
>>>> (vect_grouped_load_supported): New check for loads group of length 3.
>>>> (vect_permute_load_chain): New permutations for loads group of length 3.
>>>> * tree-vect-stmts.c (vect_model_store_cost): Change cost
>>>> of vec_perm_shuffle for the new permutations.
>>>> (vect_model_load_cost): Ditto.
>>>>
>>>>
>>>>
>>>> On Tue, Feb 11, 2014 at 7:19 PM, Richard Biener <rguenther@suse.de> wrote:
>>>>> On Tue, 11 Feb 2014, Evgeny Stupachenko wrote:
>>>>>
>>>>>> Missed patch attached in plain-text.
>>>>>>
>>>>>> I have copyright assignment on file with the FSF covering work on GCC.
>>>>>>
>>>>>> Load/stores groups of length 3 is the most frequent non-power-of-2
>>>>>> case. It is used in RGB image processing (like test case in PR52252).
>>>>>> For sure we can extend the patch to length 5 and more. However, this
>>>>>> potentially affect performance on some other architectures and
>>>>>> requires larger testing. So length 3 it is just first step.The
>>>>>> algorithm in the patch could be modified for a general case in several
>>>>>> steps.
>>>>>>
>>>>>> I understand that the patch should wait for the stage 1, however since
>>>>>> its ready we can discuss it right now and make some changes (like
>>>>>> general size of group).
>>>>>
>>>>> Other than that I'd like to see a vectorizer hook querying the cost of a
>>>>> vec_perm_const expansion instead of adding vec_perm_shuffle
>>>>> (thus requires the constant shuffle mask to be passed as well
>>>>> as the vector type). That's more useful for other uses that
>>>>> would require (arbitrary) shuffles.
>>>>>
>>>>> Didn't look at the rest of the patch yet - queued in my review
>>>>> pipeline.
>>>>>
>>>>> Thanks,
>>>>> Richard.
>>>>>
>>>>>> Thanks,
>>>>>> Evgeny
>>>>>>
>>>>>> On Tue, Feb 11, 2014 at 5:00 PM, Richard Biener <rguenther@suse.de> wrote:
>>>>>> >
>>>>>> > On Tue, 11 Feb 2014, Evgeny Stupachenko wrote:
>>>>>> >
>>>>>> > > Hi,
>>>>>> > >
>>>>>> > > The patch gives an expected 3 times gain for the test case in the PR52252
>>>>>> > > (and even 6 times for AVX2).
>>>>>> > > It passes make check and bootstrap on x86.
>>>>>> > > spec2000/spec2006 got no regressions/gains on x86.
>>>>>> > >
>>>>>> > > Is this patch ok?
>>>>>> >
>>>>>> > I've worked on generalizing the permutation support in the light
>>>>>> > of the availability of the generic shuffle support in the IL
>>>>>> > but hit some road-blocks in the way code-generation works for
>>>>>> > group loads with permutations (I don't remember if I posted all patches).
>>>>>> >
>>>>>> > This patch seems to be to a slightly different place but it again
>>>>>> > special-cases a specific permutation. Why's that? Why can't we
>>>>>> > support groups of size 7 for example? So - can this be generalized
>>>>>> > to support arbitrary non-power-of-two load/store groups?
>>>>>> >
>>>>>> > Other than that the patch has to wait for stage1 to open again,
>>>>>> > of course. And it misses a testcase.
>>>>>> >
>>>>>> > Btw, do you have a copyright assignment on file with the FSF covering
>>>>>> > work on GCC?
>>>>>> >
>>>>>> > Thanks,
>>>>>> > Richard.
>>>>>> >
>>>>>> > > ChangeLog:
>>>>>> > >
>>>>>> > > 2014-02-11 Evgeny Stupachenko <evstupac@gmail.com>
>>>>>> > >
>>>>>> > > * target.h (vect_cost_for_stmt): Defining new cost vec_perm_shuffle.
>>>>>> > > * tree-vect-data-refs.c (vect_grouped_store_supported): New
>>>>>> > > check for stores group of length 3.
>>>>>> > > (vect_permute_store_chain): New permutations for stores group of
>>>>>> > > length 3.
>>>>>> > > (vect_grouped_load_supported): New check for loads group of length
>>>>>> > > 3.
>>>>>> > > (vect_permute_load_chain): New permutations for loads group of
>>>>>> > > length 3.
>>>>>> > > * tree-vect-stmts.c (vect_model_store_cost): New cost
>>>>>> > > vec_perm_shuffle
>>>>>> > > for the new permutations.
>>>>>> > > (vect_model_load_cost): Ditto.
>>>>>> > > * config/aarch64/aarch64.c (builtin_vectorization_cost): Adding
>>>>>> > > vec_perm_shuffle cost as equvivalent of vec_perm cost.
>>>>>> > > * config/arm/arm.c: Ditto.
>>>>>> > > * config/rs6000/rs6000.c: Ditto.
>>>>>> > > * config/spu/spu.c: Ditto.
>>>>>> > > * config/i386/x86-tune.def (TARGET_SLOW_PHUFFB): Target for slow
>>>>>> > > byte
>>>>>> > > shuffle on some x86 architectures.
>>>>>> > > * config/i386/i386.h (processor_costs): Defining pshuffb cost.
>>>>>> > > * config/i386/i386.c (processor_costs): Adding pshuffb cost.
>>>>>> > > (ix86_builtin_vectorization_cost): Adding cost for the new
>>>>>> > > permutations.
>>>>>> > > Fixing cost for other permutations.
>>>>>> > > (expand_vec_perm_even_odd_1): Avoid byte shuffles when they are
>>>>>> > > slow (TARGET_SLOW_PHUFFB).
>>>>>> > > (ix86_add_stmt_cost): Adding cost when STMT is WIDEN_MULTIPLY.
>>>>>> > > Adding new shuffle cost only when byte shuffle is expected.
>>>>>> > > Fixing cost model for Silvermont.
>>>>>> > >
>>>>>> > > Thanks,
>>>>>> > > Evgeny
>>>>>> > >
>>>>>> >
>>>>>> > --
>>>>>> > Richard Biener <rguenther@suse.de>
>>>>>> > SUSE / SUSE Labs
>>>>>> > SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
>>>>>> > GF: Jeff Hawn, Jennifer Guild, Felix Imend"orffer
>>>>>>
>>>>>
>>>>> --
>>>>> Richard Biener <rguenther@suse.de>
>>>>> SUSE / SUSE Labs
>>>>> SUSE LINUX Products GmbH - Nuernberg - AG Nuernberg - HRB 16746
>>>>> GF: Jeff Hawn, Jennifer Guild, Felix Imend"orffer
next prev parent reply other threads:[~2014-05-06 11:27 UTC|newest]
Thread overview: 26+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <CAOvf_xxEQ3tm+fwL5EfVSOUKDQnaBt+jTz4huK66T_8+TXzzfQ@mail.gmail.com>
2014-02-11 13:01 ` Richard Biener
2014-02-11 14:03 ` Evgeny Stupachenko
2014-02-11 15:20 ` Richard Biener
2014-03-06 14:42 ` Evgeny Stupachenko
2014-03-06 14:44 ` Evgeny Stupachenko
2014-04-18 10:05 ` Evgeny Stupachenko
2014-04-30 15:03 ` Evgeny Stupachenko
2014-05-06 11:27 ` Evgeny Stupachenko [this message]
2014-05-06 11:49 ` Richard Biener
2014-05-06 14:38 ` Evgeny Stupachenko
2014-05-06 14:40 ` Richard Biener
2014-05-12 15:14 ` Rainer Orth
2014-05-12 18:08 ` Evgeny Stupachenko
2014-05-13 8:23 ` Richard Biener
2014-05-16 13:03 ` Evgeny Stupachenko
2014-05-16 13:11 ` Rainer Orth
2014-05-16 13:22 ` Jakub Jelinek
2014-05-28 10:52 ` Evgeny Stupachenko
2014-05-28 10:55 ` Evgeny Stupachenko
2014-05-28 10:55 ` Jakub Jelinek
2014-05-28 11:33 ` Evgeny Stupachenko
2014-05-28 11:35 ` Jakub Jelinek
2014-05-16 13:33 ` Evgeny Stupachenko
2014-05-16 13:38 ` Rainer Orth
2014-05-13 8:36 ` Rainer Orth
2014-05-14 12:09 ` Evgeny Stupachenko
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=CAOvf_xyFZMQusRaXp1FP-MTVKbeMZfm-8EzOgMQvEdOhBwUHaA@mail.gmail.com \
--to=evstupac@gmail.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=jakub@redhat.com \
--cc=rguenther@suse.de \
--cc=ubizjak@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).