* [PATCH i386 AVX512] [5/n] Adjust register's availabilty, allow wider masks.
@ 2014-08-12 10:45 Kirill Yukhin
2014-08-12 17:54 ` Uros Bizjak
0 siblings, 1 reply; 2+ messages in thread
From: Kirill Yukhin @ 2014-08-12 10:45 UTC (permalink / raw)
To: Uros Bizjak; +Cc: Jakub Jelinek, Richard Henderson, GCC Patches, kirill.yukhin
Hello,
Attached patch allows wider mask types.
Is it ok for trunk?
Bootstrapped.
gcc/
* config/i386/i386.c (print_reg): Сorrectly print 64-bit mask
registers.
(inline_secondary_memory_needed): Allow 64 bit wide mask registers.
(ix86_hard_regno_mode_ok): Allow 32/64-bit mask registers and
xmm/ymm16+ when availble.
* config/i386/i386.h
(HARD_REGNO_NREGS): Add mask regs.
(VALID_AVX512F_REG_MODE): Ditto.
(VALID_AVX512F_REG_MODE) : Define.
(VALID_MASK_AVX512BW_MODE): Ditto.
(reg_class) (MASK_REG_P(X)): Define.
* config/i386/i386.md: Do not split long moves with mask register,
use kmovb if avx512bw is availible.
(movdi_internal): Handle mask registers.
--
Thanks, K
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index c77e8a6..5a3b67a 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -14701,7 +14701,7 @@ print_reg (rtx x, int code, FILE *file)
case 8:
case 4:
case 12:
- if (! ANY_FP_REG_P (x))
+ if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x))
putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
/* FALLTHRU */
case 16:
@@ -37393,6 +37393,11 @@ inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
return true;
+ /* Between mask and general, we have moves no larger than word size. */
+ if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
+ && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
+ return true;
+
/* ??? This is a lie. We do have moves between mmx/general, and for
mmx/sse2. But by saying we need secondary memory we discourage the
register allocator from using the mmx registers unless needed. */
@@ -37698,7 +37703,8 @@ ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
if (STACK_REGNO_P (regno))
return VALID_FP_MODE_P (mode);
if (MASK_REGNO_P (regno))
- return VALID_MASK_REG_MODE (mode);
+ return (VALID_MASK_REG_MODE (mode)
+ || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode)));
if (SSE_REGNO_P (regno))
{
/* We implement the move patterns for all vector modes into and
@@ -37715,6 +37721,15 @@ ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
|| VALID_AVX512F_SCALAR_MODE (mode)))
return true;
+ /* TODO check for QI/HI scalars. */
+ /* AVX512VL allows sse regs16+ for 128/256 bit modes. */
+ if (TARGET_AVX512VL
+ && (mode == OImode
+ || mode == TImode
+ || VALID_AVX256_REG_MODE (mode)
+ || VALID_AVX512VL_128_REG_MODE (mode)))
+ return true;
+
/* xmm16-xmm31 are only available for AVX-512. */
if (EXT_REX_SSE_REGNO_P (regno))
return false;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 8677e6b..c2f0cee 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1054,7 +1054,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
applied to them. */
#define HARD_REGNO_NREGS(REGNO, MODE) \
- (STACK_REGNO_P (REGNO) || SSE_REGNO_P (REGNO) || MMX_REGNO_P (REGNO) \
+ (STACK_REGNO_P (REGNO) || SSE_REGNO_P (REGNO) \
+ || MMX_REGNO_P (REGNO) || MASK_REGNO_P (REGNO) \
? (COMPLEX_MODE_P (MODE) ? 2 : 1) \
: ((MODE) == XFmode \
? (TARGET_64BIT ? 2 : 3) \
@@ -1085,7 +1086,12 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
#define VALID_AVX512F_REG_MODE(MODE) \
((MODE) == V8DImode || (MODE) == V8DFmode || (MODE) == V64QImode \
- || (MODE) == V16SImode || (MODE) == V16SFmode || (MODE) == V32HImode)
+ || (MODE) == V16SImode || (MODE) == V16SFmode || (MODE) == V32HImode \
+ || (MODE) == V4TImode)
+
+#define VALID_AVX512VL_128_REG_MODE(MODE) \
+ ((MODE) == V2DImode || (MODE) == V2DFmode || (MODE) == V16QImode \
+ || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode)
#define VALID_SSE2_REG_MODE(MODE) \
((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode \
@@ -1132,6 +1138,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
#define VALID_MASK_REG_MODE(MODE) ((MODE) == HImode || (MODE) == QImode)
+#define VALID_MASK_AVX512BW_MODE(MODE) ((MODE) == SImode || (MODE) == DImode)
+
/* Value is 1 if hard register REGNO can hold a value of machine-mode MODE. */
#define HARD_REGNO_MODE_OK(REGNO, MODE) \
@@ -1454,6 +1462,7 @@ enum reg_class
: (N) <= LAST_REX_SSE_REG ? (FIRST_REX_SSE_REG + (N) - 8) \
: (FIRST_EXT_REX_SSE_REG + (N) - 16))
+#define MASK_REG_P(X) (REG_P (X) && MASK_REGNO_P (REGNO (X)))
#define MASK_REGNO_P(N) IN_RANGE ((N), FIRST_MASK_REG, LAST_MASK_REG)
#define ANY_MASK_REG_P(X) (REG_P (X) && MASK_REGNO_P (REGNO (X)))
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 3cb8b67..4867e7e 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -952,6 +952,9 @@
;; Instruction suffix for integer modes.
(define_mode_attr imodesuffix [(QI "b") (HI "w") (SI "l") (DI "q")])
+;; Instruction suffix for masks.
+(define_mode_attr mskmodesuffix [(QI "b") (HI "w") (SI "d") (DI "q")])
+
;; Pointer size prefix for integer modes (Intel asm dialect)
(define_mode_attr iptrsize [(QI "BYTE")
(HI "WORD")
@@ -2022,13 +2025,16 @@
(define_insn "*movdi_internal"
[(set (match_operand:DI 0 "nonimmediate_operand"
- "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,?r ,?r,?*Yi,?*Ym,?*Yi")
+ "=r ,o ,r,r ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,?r ,?r,?*Yi,?*Ym,?*Yi,*k,*k ,*r ,*m")
(match_operand:DI 1 "general_operand"
- "riFo,riF,Z,rem,i,re,C ,*y,m ,*y,*Yn,r ,C ,*v,m ,*v,*Yj,*v,r ,*Yj ,*Yn"))]
+ "riFo,riF,Z,rem,i,re,C ,*y,m ,*y,*Yn,r ,C ,*v,m ,*v,*Yj,*v,r ,*Yj ,*Yn ,*r ,*km,*k,*k"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
switch (get_attr_type (insn))
{
+ case TYPE_MSKMOV:
+ return "kmovq\t{%1, %0|%0, %1}";
+
case TYPE_MULTI:
return "#";
@@ -2099,7 +2105,7 @@
[(set (attr "isa")
(cond [(eq_attr "alternative" "0,1")
(const_string "nox64")
- (eq_attr "alternative" "2,3,4,5,10,11,16,18")
+ (eq_attr "alternative" "2,3,4,5,10,11,16,18,21,23")
(const_string "x64")
(eq_attr "alternative" "17")
(const_string "x64_sse4")
@@ -2118,6 +2124,8 @@
(const_string "ssemov")
(eq_attr "alternative" "19,20")
(const_string "ssecvt")
+ (eq_attr "alternative" "21,22,23,24")
+ (const_string "mskmov")
(match_operand 1 "pic_32bit_operand")
(const_string "lea")
]
@@ -2179,16 +2187,20 @@
[(set (match_operand:DI 0 "nonimmediate_operand")
(match_operand:DI 1 "general_operand"))]
"!TARGET_64BIT && reload_completed
- && !(MMX_REG_P (operands[0]) || SSE_REG_P (operands[0]))
- && !(MMX_REG_P (operands[1]) || SSE_REG_P (operands[1]))"
+ && !(MMX_REG_P (operands[0])
+ || SSE_REG_P (operands[0])
+ || MASK_REG_P (operands[0]))
+ && !(MMX_REG_P (operands[1])
+ || SSE_REG_P (operands[1])
+ || MASK_REG_P (operands[1]))"
[(const_int 0)]
"ix86_split_long_move (operands); DONE;")
(define_insn "*movsi_internal"
[(set (match_operand:SI 0 "nonimmediate_operand"
- "=r,m ,*y,*y,?rm,?*y,*v,*v,*v,m ,?r ,?r,?*Yi")
+ "=r,m ,*y,*y,?rm,?*y,*v,*v,*v,m ,?r ,?r,?*Yi,*k ,*rm")
(match_operand:SI 1 "general_operand"
- "g ,re,C ,*y,*y ,rm ,C ,*v,m ,*v,*Yj,*v,r"))]
+ "g ,re,C ,*y,*y ,rm ,C ,*v,m ,*v,*Yj,*v,r ,*krm,*k"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))"
{
switch (get_attr_type (insn))
@@ -2199,6 +2211,9 @@
return standard_sse_constant_opcode (insn, operands[1]);
+ case TYPE_MSKMOV:
+ return "kmovd\t{%1, %0|%0, %1}";
+
case TYPE_SSEMOV:
switch (get_attr_mode (insn))
{
@@ -2262,6 +2277,8 @@
(const_string "sselog1")
(eq_attr "alternative" "7,8,9,10,12")
(const_string "ssemov")
+ (eq_attr "alternative" "13,14")
+ (const_string "mskmov")
(match_operand 1 "pic_32bit_operand")
(const_string "lea")
]
@@ -2410,9 +2427,12 @@
case TYPE_MSKMOV:
switch (which_alternative)
{
- case 7: return "kmovw\t{%k1, %0|%0, %k1}";
- case 8: return "kmovw\t{%1, %0|%0, %1}";
- case 9: return "kmovw\t{%1, %k0|%k0, %1}";
+ case 7: return TARGET_AVX512BW ? "kmovb\t{%k1, %0|%0, %k1}"
+ : "kmovw\t{%k1, %0|%0, %k1}";
+ case 8: return TARGET_AVX512BW ? "kmovb\t{%1, %0|%0, %1}"
+ : "kmovw\t{%1, %0|%0, %1}";
+ case 9: return TARGET_AVX512BW ? "kmovb\t{%1, %k0|%k0, %1}"
+ : "kmovw\t{%1, %k0|%k0, %1}";
default: gcc_unreachable ();
}
^ permalink raw reply [flat|nested] 2+ messages in thread
* Re: [PATCH i386 AVX512] [5/n] Adjust register's availabilty, allow wider masks.
2014-08-12 10:45 [PATCH i386 AVX512] [5/n] Adjust register's availabilty, allow wider masks Kirill Yukhin
@ 2014-08-12 17:54 ` Uros Bizjak
0 siblings, 0 replies; 2+ messages in thread
From: Uros Bizjak @ 2014-08-12 17:54 UTC (permalink / raw)
To: Kirill Yukhin; +Cc: Jakub Jelinek, Richard Henderson, GCC Patches
On Tue, Aug 12, 2014 at 12:45 PM, Kirill Yukhin <kirill.yukhin@gmail.com> wrote:
> Hello,
> Attached patch allows wider mask types.
>
> Is it ok for trunk?
>
> Bootstrapped.
>
> gcc/
> * config/i386/i386.c (print_reg): Сorrectly print 64-bit mask
> registers.
> (inline_secondary_memory_needed): Allow 64 bit wide mask registers.
> (ix86_hard_regno_mode_ok): Allow 32/64-bit mask registers and
> xmm/ymm16+ when availble.
> * config/i386/i386.h
> (HARD_REGNO_NREGS): Add mask regs.
> (VALID_AVX512F_REG_MODE): Ditto.
> (VALID_AVX512F_REG_MODE) : Define.
> (VALID_MASK_AVX512BW_MODE): Ditto.
> (reg_class) (MASK_REG_P(X)): Define.
> * config/i386/i386.md: Do not split long moves with mask register,
> use kmovb if avx512bw is availible.
> (movdi_internal): Handle mask registers.
OK.
Thanks,
Uros.
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2014-08-12 17:54 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-08-12 10:45 [PATCH i386 AVX512] [5/n] Adjust register's availabilty, allow wider masks Kirill Yukhin
2014-08-12 17:54 ` Uros Bizjak
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).