From: Hongtao Liu <crazylht@gmail.com>
To: Uros Bizjak <ubizjak@gmail.com>
Cc: GCC Patches <gcc-patches@gcc.gnu.org>,
Kirill Yukhin <kirill.yukhin@gmail.com>,
"H. J. Lu" <hjl.tools@gmail.com>
Subject: Re: [PATCH 1/4][PR target/88808]Enable bitwise operator for AVX512 masks.
Date: Wed, 19 Aug 2020 09:53:38 +0800 [thread overview]
Message-ID: <CAMZc-bzOWWJNeJz=mjve=APcCEmbu50v_sYunJYac5ZaNwjCwg@mail.gmail.com> (raw)
In-Reply-To: <CAFULd4btgcasuzk_jLBmiC=s2JMYusvyxbiEgtMJ6jg_LnbXEw@mail.gmail.com>
[-- Attachment #1: Type: text/plain, Size: 1979 bytes --]
On Mon, Aug 17, 2020 at 5:20 PM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Fri, Aug 14, 2020 at 10:22 AM Hongtao Liu <crazylht@gmail.com> wrote:
> >
> > Hi:
> > First, since avx512 masks involve both vector isa and general part,
> > so i add both maintainers to the maillist.
> >
> > I'm doing this in 4 steps:
> > 1 - Add cost model for operation of mask registers.
> > 2 - Introduce new cover class INT_MASK_REGS, this will enable direct
> > move between gpr and mask registers in pass_reload by consideration of
> > cost model, this is similar as INT_SSE_REGS.
> > 3 - Tune cost model.
> > 4 - Enable operator or/xor/and/andn/not for mask register. kxnor is
> > not enabled since there's no corresponding instruction for general
> > registers, 64bit mask op is not enabled for 32bit target.
> > kadd/kshift/ktest are not merged into general versionsadd/ashl/test
> > since i think it would be odd to use mask register for those
> > operations.
> >
> > Bootstrap is ok, regression test is ok for i386/x86-64 result.
> > There's some improvement for performance of SPEC2017 tested on SKL,
> > i observe there're many spills from integer to mask registers instead
> > of memory which is the reason for the improvement.
>
> + if (MASK_CLASS_P (regclass))
> + {
> + int index;
> + switch (GET_MODE_SIZE (mode))
> + {
> + case 1:
> + index = 0;
> + break;
> + case 2:
> + index = 1;
> + break;
> + default:
> + index = 3;
>
> Max index = 2!
>
Fix typo.
> + break;
> + }
> +
> + if (in == 2)
> + return MAX (ix86_cost->hard_register.mask_load[index],
> + ix86_cost->hard_register.mask_store[index]);
> + return in ? ix86_cost->hard_register.mask_load[2]
> + : ix86_cost->hard_register.mask_store[2];
> + }
>
> Are DImode loads and stores assumed to cost the same as SImode? A
> comment would be nice here.
>
Yes, comment is added.
> Uros.
Update patch.
--
BR,
Hongtao
[-- Attachment #2: 0001-x86-Add-cost-model-for-operation-of-mask-registers_V2.patch --]
[-- Type: text/x-patch, Size: 17228 bytes --]
From 70e9e389d751c79caf957ef336dded34726f0533 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Tue, 3 Sep 2019 14:41:02 -0700
Subject: [PATCH 1/4] x86: Add cost model for operation of mask registers.
gcc/
PR target/71453
* config/i386/i386.h (struct processor_costs): Add member
mask_to_integer, integer_to_mask, mask_load[3], mask_store[3],
mask_move.
* config/i386/x86-tune-costs.h (ix86_size_cost, i386_cost,
i386_cost, pentium_cost, lakemont_cost, pentiumpro_cost,
geode_cost, k6_cost, athlon_cost, k8_cost, amdfam10_cost,
bdver_cost, znver1_cost, znver2_cost, skylake_cost,
btver1_cost, btver2_cost, pentium4_cost, nocona_cost,
atom_cost, slm_cost, intel_cost, generic_cost, core_cost):
Initialize mask_load[3], mask_store[3], mask_move,
integer_to_mask, mask_to_integer for all target costs.
* config/i386/i386.c (ix86_register_move_cost): Using cost
model of mask registers.
(inline_memory_move_cost): Ditto.
(ix86_register_move_cost): Ditto.
---
gcc/config/i386/i386.c | 34 ++++++++
gcc/config/i386/i386.h | 7 ++
gcc/config/i386/x86-tune-costs.h | 144 +++++++++++++++++++++++++++++++
3 files changed, 185 insertions(+)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 8ea6a4d7ea7..f5e824a16ad 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -18769,6 +18769,29 @@ inline_memory_move_cost (machine_mode mode, enum reg_class regclass, int in)
return in ? ix86_cost->hard_register.sse_load [index]
: ix86_cost->hard_register.sse_store [index];
}
+ if (MASK_CLASS_P (regclass))
+ {
+ int index;
+ switch (GET_MODE_SIZE (mode))
+ {
+ case 1:
+ index = 0;
+ break;
+ case 2:
+ index = 1;
+ break;
+ /* DImode loads and stores assumed to cost the same as SImode. */
+ default:
+ index = 2;
+ break;
+ }
+
+ if (in == 2)
+ return MAX (ix86_cost->hard_register.mask_load[index],
+ ix86_cost->hard_register.mask_store[index]);
+ return in ? ix86_cost->hard_register.mask_load[2]
+ : ix86_cost->hard_register.mask_store[2];
+ }
if (MMX_CLASS_P (regclass))
{
int index;
@@ -18894,6 +18917,17 @@ ix86_register_move_cost (machine_mode mode, reg_class_t class1_i,
? ix86_cost->hard_register.sse_to_integer
: ix86_cost->hard_register.integer_to_sse);
+ /* Moves between mask register and GPR. */
+ if (MASK_CLASS_P (class1) != MASK_CLASS_P (class2))
+ {
+ return (MASK_CLASS_P (class1)
+ ? ix86_cost->hard_register.mask_to_integer
+ : ix86_cost->hard_register.integer_to_mask);
+ }
+ /* Moving between mask registers. */
+ if (MASK_CLASS_P (class1) && MASK_CLASS_P (class2))
+ return ix86_cost->hard_register.mask_move;
+
if (MAYBE_FLOAT_CLASS_P (class1))
return ix86_cost->hard_register.fp_move;
if (MAYBE_SSE_CLASS_P (class1))
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 114967e49a3..e0af87450b8 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -279,6 +279,13 @@ struct processor_costs {
in SImode, DImode and TImode. */
const int sse_to_integer; /* cost of moving SSE register to integer. */
const int integer_to_sse; /* cost of moving integer register to SSE. */
+ const int mask_to_integer; /* cost of moving mask register to integer. */
+ const int integer_to_mask; /* cost of moving integer register to mask. */
+ const int mask_load[3]; /* cost of loading mask registers
+ in QImode, HImode and SImode. */
+ const int mask_store[3]; /* cost of storing mask register
+ in QImode, HImode and SImode. */
+ const int mask_move; /* cost of moving mask register. */
} hard_register;
const int add; /* cost of an add instruction */
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index 3207404e514..256c84e364e 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -59,6 +59,12 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
{3, 3, 3, 3, 3}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
3, 3, /* SSE->integer and integer->SSE moves */
+ 2, 2, /* mask->integer and integer->mask moves */
+ {2, 2, 2}, /* cost of loading mask register
+ in QImode, HImode, SImode. */
+ {2, 2, 2}, /* cost if storing mask register
+ in QImode, HImode, SImode. */
+ 2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@@ -164,6 +170,12 @@ struct processor_costs i386_cost = { /* 386 specific costs */
{4, 8, 16, 32, 64}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
3, 3, /* SSE->integer and integer->SSE moves */
+ 2, 2, /* mask->integer and integer->mask moves */
+ {2, 4, 2}, /* cost of loading mask register
+ in QImode, HImode, SImode. */
+ {2, 4, 2}, /* cost if storing mask register
+ in QImode, HImode, SImode. */
+ 2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@@ -266,6 +278,12 @@ struct processor_costs i486_cost = { /* 486 specific costs */
{4, 8, 16, 32, 64}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
3, 3, /* SSE->integer and integer->SSE moves */
+ 2, 2, /* mask->integer and integer->mask moves */
+ {2, 4, 2}, /* cost of loading mask register
+ in QImode, HImode, SImode. */
+ {2, 4, 2}, /* cost if storing mask register
+ in QImode, HImode, SImode. */
+ 2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@@ -370,6 +388,12 @@ struct processor_costs pentium_cost = {
{4, 8, 16, 32, 64}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
3, 3, /* SSE->integer and integer->SSE moves */
+ 2, 2, /* mask->integer and integer->mask moves */
+ {2, 4, 2}, /* cost of loading mask register
+ in QImode, HImode, SImode. */
+ {2, 4, 2}, /* cost if storing mask register
+ in QImode, HImode, SImode. */
+ 2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@@ -465,6 +489,12 @@ struct processor_costs lakemont_cost = {
{4, 8, 16, 32, 64}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
3, 3, /* SSE->integer and integer->SSE moves */
+ 2, 2, /* mask->integer and integer->mask moves */
+ {2, 4, 2}, /* cost of loading mask register
+ in QImode, HImode, SImode. */
+ {2, 4, 2}, /* cost if storing mask register
+ in QImode, HImode, SImode. */
+ 2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@@ -575,6 +605,12 @@ struct processor_costs pentiumpro_cost = {
{4, 8, 16, 32, 64}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
3, 3, /* SSE->integer and integer->SSE moves */
+ 2, 2, /* mask->integer and integer->mask moves */
+ {4, 4, 4}, /* cost of loading mask register
+ in QImode, HImode, SImode. */
+ {2, 2, 2}, /* cost if storing mask register
+ in QImode, HImode, SImode. */
+ 2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@@ -676,6 +712,12 @@ struct processor_costs geode_cost = {
{2, 2, 8, 16, 32}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
6, 6, /* SSE->integer and integer->SSE moves */
+ 2, 2, /* mask->integer and integer->mask moves */
+ {2, 2, 2}, /* cost of loading mask register
+ in QImode, HImode, SImode. */
+ {2, 2, 2}, /* cost if storing mask register
+ in QImode, HImode, SImode. */
+ 2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@@ -777,6 +819,12 @@ struct processor_costs k6_cost = {
{2, 2, 8, 16, 32}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
6, 6, /* SSE->integer and integer->SSE moves */
+ 2, 2, /* mask->integer and integer->mask moves */
+ {4, 5, 4}, /* cost of loading mask register
+ in QImode, HImode, SImode. */
+ {2, 3, 2}, /* cost if storing mask register
+ in QImode, HImode, SImode. */
+ 2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@@ -884,6 +932,12 @@ struct processor_costs athlon_cost = {
{4, 4, 10, 10, 20}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
5, 5, /* SSE->integer and integer->SSE moves */
+ 2, 2, /* mask->integer and integer->mask moves */
+ {3, 4, 3}, /* cost of loading mask register
+ in QImode, HImode, SImode. */
+ {3, 4, 3}, /* cost if storing mask register
+ in QImode, HImode, SImode. */
+ 2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@@ -993,6 +1047,12 @@ struct processor_costs k8_cost = {
{4, 4, 10, 10, 20}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
5, 5, /* SSE->integer and integer->SSE moves */
+ 2, 2, /* mask->integer and integer->mask moves */
+ {3, 4, 3}, /* cost of loading mask register
+ in QImode, HImode, SImode. */
+ {3, 4, 3}, /* cost if storing mask register
+ in QImode, HImode, SImode. */
+ 2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@@ -1106,6 +1166,12 @@ struct processor_costs amdfam10_cost = {
{4, 4, 5, 10, 20}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
3, 3, /* SSE->integer and integer->SSE moves */
+ 2, 2, /* mask->integer and integer->mask moves */
+ {3, 4, 3}, /* cost of loading mask register
+ in QImode, HImode, SImode. */
+ {3, 4, 3}, /* cost if storing mask register
+ in QImode, HImode, SImode. */
+ 2, /* cost of moving mask register. */
/* On K8:
MOVD reg64, xmmreg Double FSTORE 4
@@ -1229,6 +1295,12 @@ const struct processor_costs bdver_cost = {
{10, 10, 10, 40, 60}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
16, 20, /* SSE->integer and integer->SSE moves */
+ 2, 2, /* mask->integer and integer->mask moves */
+ {8, 8, 8}, /* cost of loading mask register
+ in QImode, HImode, SImode. */
+ {8, 8, 8}, /* cost if storing mask register
+ in QImode, HImode, SImode. */
+ 2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@@ -1360,6 +1432,12 @@ struct processor_costs znver1_cost = {
{8, 8, 8, 16, 32}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit. */
6, 6, /* SSE->integer and integer->SSE moves. */
+ 2, 2, /* mask->integer and integer->mask moves */
+ {6, 6, 6}, /* cost of loading mask register
+ in QImode, HImode, SImode. */
+ {8, 8, 8}, /* cost if storing mask register
+ in QImode, HImode, SImode. */
+ 2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@@ -1509,6 +1587,12 @@ struct processor_costs znver2_cost = {
in 32,64,128,256 and 512-bit. */
6, 6, /* SSE->integer and integer->SSE
moves. */
+ 2, 2, /* mask->integer and integer->mask moves */
+ {6, 6, 6}, /* cost of loading mask register
+ in QImode, HImode, SImode. */
+ {8, 8, 8}, /* cost if storing mask register
+ in QImode, HImode, SImode. */
+ 2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@@ -1643,6 +1727,12 @@ struct processor_costs skylake_cost = {
{8, 8, 8, 12, 24}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
6, 6, /* SSE->integer and integer->SSE moves */
+ 2, 2, /* mask->integer and integer->mask moves */
+ {4, 4, 4}, /* cost of loading mask register
+ in QImode, HImode, SImode. */
+ {6, 6, 6}, /* cost if storing mask register
+ in QImode, HImode, SImode. */
+ 2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@@ -1751,6 +1841,12 @@ const struct processor_costs btver1_cost = {
{10, 10, 12, 48, 96}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
14, 14, /* SSE->integer and integer->SSE moves */
+ 2, 2, /* mask->integer and integer->mask moves */
+ {6, 8, 6}, /* cost of loading mask register
+ in QImode, HImode, SImode. */
+ {6, 8, 6}, /* cost if storing mask register
+ in QImode, HImode, SImode. */
+ 2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@@ -1855,6 +1951,12 @@ const struct processor_costs btver2_cost = {
{10, 10, 12, 48, 96}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
14, 14, /* SSE->integer and integer->SSE moves */
+ 2, 2, /* mask->integer and integer->mask moves */
+ {8, 8, 6}, /* cost of loading mask register
+ in QImode, HImode, SImode. */
+ {8, 8, 6}, /* cost if storing mask register
+ in QImode, HImode, SImode. */
+ 2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@@ -1958,6 +2060,12 @@ struct processor_costs pentium4_cost = {
{16, 16, 16, 32, 64}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
20, 12, /* SSE->integer and integer->SSE moves */
+ 2, 2, /* mask->integer and integer->mask moves */
+ {4, 5, 4}, /* cost of loading mask register
+ in QImode, HImode, SImode. */
+ {2, 3, 2}, /* cost if storing mask register
+ in QImode, HImode, SImode. */
+ 2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@@ -2064,6 +2172,12 @@ struct processor_costs nocona_cost = {
{12, 12, 12, 24, 48}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
20, 12, /* SSE->integer and integer->SSE moves */
+ 2, 2, /* mask->integer and integer->mask moves */
+ {4, 4, 4}, /* cost of loading mask register
+ in QImode, HImode, SImode. */
+ {4, 4, 4}, /* cost if storing mask register
+ in QImode, HImode, SImode. */
+ 2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@@ -2168,6 +2282,12 @@ struct processor_costs atom_cost = {
{8, 8, 8, 16, 32}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
8, 6, /* SSE->integer and integer->SSE moves */
+ 2, 2, /* mask->integer and integer->mask moves */
+ {6, 6, 6}, /* cost of loading mask register
+ in QImode, HImode, SImode. */
+ {6, 6, 6}, /* cost if storing mask register
+ in QImode, HImode, SImode. */
+ 2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@@ -2272,6 +2392,12 @@ struct processor_costs slm_cost = {
{8, 8, 8, 16, 32}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
8, 6, /* SSE->integer and integer->SSE moves */
+ 2, 2, /* mask->integer and integer->mask moves */
+ {8, 8, 8}, /* cost of loading mask register
+ in QImode, HImode, SImode. */
+ {6, 6, 6}, /* cost if storing mask register
+ in QImode, HImode, SImode. */
+ 2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@@ -2376,6 +2502,12 @@ struct processor_costs intel_cost = {
{6, 6, 6, 6, 6}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
4, 4, /* SSE->integer and integer->SSE moves */
+ 2, 2, /* mask->integer and integer->mask moves */
+ {4, 4, 4}, /* cost of loading mask register
+ in QImode, HImode, SImode. */
+ {6, 6, 6}, /* cost if storing mask register
+ in QImode, HImode, SImode. */
+ 2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@@ -2484,6 +2616,12 @@ struct processor_costs generic_cost = {
{6, 6, 6, 10, 15}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
6, 6, /* SSE->integer and integer->SSE moves */
+ 2, 2, /* mask->integer and integer->mask moves */
+ {6, 6, 6}, /* cost of loading mask register
+ in QImode, HImode, SImode. */
+ {6, 6, 6}, /* cost if storing mask register
+ in QImode, HImode, SImode. */
+ 2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
@@ -2597,6 +2735,12 @@ struct processor_costs core_cost = {
{6, 6, 6, 6, 12}, /* cost of storing SSE registers
in 32,64,128,256 and 512-bit */
6, 6, /* SSE->integer and integer->SSE moves */
+ 2, 2, /* mask->integer and integer->mask moves */
+ {4, 4, 4}, /* cost of loading mask register
+ in QImode, HImode, SImode. */
+ {6, 6, 6}, /* cost if storing mask register
+ in QImode, HImode, SImode. */
+ 2, /* cost of moving mask register. */
/* End of register allocator costs. */
},
--
2.18.1
next prev parent reply other threads:[~2020-08-19 1:52 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-08-14 8:23 Hongtao Liu
2020-08-17 9:20 ` Uros Bizjak
2020-08-19 1:53 ` Hongtao Liu [this message]
2020-08-19 6:29 ` Uros Bizjak
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='CAMZc-bzOWWJNeJz=mjve=APcCEmbu50v_sYunJYac5ZaNwjCwg@mail.gmail.com' \
--to=crazylht@gmail.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=hjl.tools@gmail.com \
--cc=kirill.yukhin@gmail.com \
--cc=ubizjak@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).