public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH i386 AVX512] [5/n] Adjust register's availabilty, allow wider masks.
@ 2014-08-12 10:45 Kirill Yukhin
  2014-08-12 17:54 ` Uros Bizjak
  0 siblings, 1 reply; 2+ messages in thread
From: Kirill Yukhin @ 2014-08-12 10:45 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: Jakub Jelinek, Richard Henderson, GCC Patches, kirill.yukhin

Hello,
Attached patch allows wider mask types.

Is it ok for trunk?

Bootstrapped.

gcc/
	* config/i386/i386.c (print_reg): Сorrectly print 64-bit mask
	registers.
	(inline_secondary_memory_needed): Allow 64 bit wide mask registers.
	(ix86_hard_regno_mode_ok): Allow 32/64-bit mask registers and
	xmm/ymm16+ when availble.
	* config/i386/i386.h
	(HARD_REGNO_NREGS): Add mask regs.
	(VALID_AVX512F_REG_MODE): Ditto.
	(VALID_AVX512F_REG_MODE) : Define.
	(VALID_MASK_AVX512BW_MODE): Ditto.
	(reg_class) (MASK_REG_P(X)): Define.
	* config/i386/i386.md: Do not split long moves with mask register,
	use kmovb if avx512bw is availible.
	(movdi_internal): Handle mask registers.

--
Thanks, K

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index c77e8a6..5a3b67a 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -14701,7 +14701,7 @@ print_reg (rtx x, int code, FILE *file)
     case 8:
     case 4:
     case 12:
-      if (! ANY_FP_REG_P (x))
+      if (! ANY_FP_REG_P (x) && ! ANY_MASK_REG_P (x))
 	putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
       /* FALLTHRU */
     case 16:
@@ -37393,6 +37393,11 @@ inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
   if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
     return true;
 
+  /* Between mask and general, we have moves no larger than word size.  */
+  if ((MAYBE_MASK_CLASS_P (class1) != MAYBE_MASK_CLASS_P (class2))
+      && (GET_MODE_SIZE (mode) > UNITS_PER_WORD))
+  return true;
+
   /* ??? This is a lie.  We do have moves between mmx/general, and for
      mmx/sse2.  But by saying we need secondary memory we discourage the
      register allocator from using the mmx registers unless needed.  */
@@ -37698,7 +37703,8 @@ ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
   if (STACK_REGNO_P (regno))
     return VALID_FP_MODE_P (mode);
   if (MASK_REGNO_P (regno))
-    return VALID_MASK_REG_MODE (mode);
+    return (VALID_MASK_REG_MODE (mode)
+	    || (TARGET_AVX512BW && VALID_MASK_AVX512BW_MODE (mode)));
   if (SSE_REGNO_P (regno))
     {
       /* We implement the move patterns for all vector modes into and
@@ -37715,6 +37721,15 @@ ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
 	      || VALID_AVX512F_SCALAR_MODE (mode)))
 	return true;
 
+      /* TODO check for QI/HI scalars.  */
+      /* AVX512VL allows sse regs16+ for 128/256 bit modes.  */
+      if (TARGET_AVX512VL
+	  && (mode == OImode
+	      || mode == TImode
+	      || VALID_AVX256_REG_MODE (mode)
+	      || VALID_AVX512VL_128_REG_MODE (mode)))
+	return true;
+
       /* xmm16-xmm31 are only available for AVX-512.  */
       if (EXT_REX_SSE_REGNO_P (regno))
 	return false;
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 8677e6b..c2f0cee 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1054,7 +1054,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
    applied to them.  */
 
 #define HARD_REGNO_NREGS(REGNO, MODE)					\
-  (STACK_REGNO_P (REGNO) || SSE_REGNO_P (REGNO) || MMX_REGNO_P (REGNO)	\
+  (STACK_REGNO_P (REGNO) || SSE_REGNO_P (REGNO)				\
+   || MMX_REGNO_P (REGNO) || MASK_REGNO_P (REGNO)			\
    ? (COMPLEX_MODE_P (MODE) ? 2 : 1)					\
    : ((MODE) == XFmode							\
       ? (TARGET_64BIT ? 2 : 3)						\
@@ -1085,7 +1086,12 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
 
 #define VALID_AVX512F_REG_MODE(MODE)					\
   ((MODE) == V8DImode || (MODE) == V8DFmode || (MODE) == V64QImode	\
-   || (MODE) == V16SImode || (MODE) == V16SFmode || (MODE) == V32HImode)
+   || (MODE) == V16SImode || (MODE) == V16SFmode || (MODE) == V32HImode \
+   || (MODE) == V4TImode)
+
+#define VALID_AVX512VL_128_REG_MODE(MODE)					\
+  ((MODE) == V2DImode || (MODE) == V2DFmode || (MODE) == V16QImode	\
+   || (MODE) == V4SImode || (MODE) == V4SFmode || (MODE) == V8HImode)
 
 #define VALID_SSE2_REG_MODE(MODE)					\
   ((MODE) == V16QImode || (MODE) == V8HImode || (MODE) == V2DFmode	\
@@ -1132,6 +1138,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
 
 #define VALID_MASK_REG_MODE(MODE) ((MODE) == HImode || (MODE) == QImode)
 
+#define VALID_MASK_AVX512BW_MODE(MODE) ((MODE) == SImode || (MODE) == DImode)
+
 /* Value is 1 if hard register REGNO can hold a value of machine-mode MODE.  */
 
 #define HARD_REGNO_MODE_OK(REGNO, MODE)	\
@@ -1454,6 +1462,7 @@ enum reg_class
          : (N) <= LAST_REX_SSE_REG ? (FIRST_REX_SSE_REG + (N) - 8) \
                                    : (FIRST_EXT_REX_SSE_REG + (N) - 16))
 
+#define MASK_REG_P(X) (REG_P (X) && MASK_REGNO_P (REGNO (X)))
 #define MASK_REGNO_P(N) IN_RANGE ((N), FIRST_MASK_REG, LAST_MASK_REG)
 #define ANY_MASK_REG_P(X) (REG_P (X) && MASK_REGNO_P (REGNO (X)))
 
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 3cb8b67..4867e7e 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -952,6 +952,9 @@
 ;; Instruction suffix for integer modes.
 (define_mode_attr imodesuffix [(QI "b") (HI "w") (SI "l") (DI "q")])
 
+;; Instruction suffix for masks.
+(define_mode_attr mskmodesuffix [(QI "b") (HI "w") (SI "d") (DI "q")])
+
 ;; Pointer size prefix for integer modes (Intel asm dialect)
 (define_mode_attr iptrsize [(QI "BYTE")
 			    (HI "WORD")
@@ -2022,13 +2025,16 @@
 
 (define_insn "*movdi_internal"
   [(set (match_operand:DI 0 "nonimmediate_operand"
-    "=r  ,o  ,r,r  ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,?r ,?r,?*Yi,?*Ym,?*Yi")
+    "=r  ,o  ,r,r  ,r,m ,*y,*y,?*y,?m,?r ,?*Ym,*v,*v,*v,m ,?r ,?r,?*Yi,?*Ym,?*Yi,*k,*k ,*r ,*m")
 	(match_operand:DI 1 "general_operand"
-    "riFo,riF,Z,rem,i,re,C ,*y,m  ,*y,*Yn,r   ,C ,*v,m ,*v,*Yj,*v,r   ,*Yj ,*Yn"))]
+    "riFo,riF,Z,rem,i,re,C ,*y,m  ,*y,*Yn,r   ,C ,*v,m ,*v,*Yj,*v,r   ,*Yj ,*Yn ,*r ,*km,*k,*k"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   switch (get_attr_type (insn))
     {
+    case TYPE_MSKMOV:
+      return "kmovq\t{%1, %0|%0, %1}";
+
     case TYPE_MULTI:
       return "#";
 
@@ -2099,7 +2105,7 @@
   [(set (attr "isa")
      (cond [(eq_attr "alternative" "0,1")
 	      (const_string "nox64")
-	    (eq_attr "alternative" "2,3,4,5,10,11,16,18")
+	    (eq_attr "alternative" "2,3,4,5,10,11,16,18,21,23")
 	      (const_string "x64")
 	    (eq_attr "alternative" "17")
 	      (const_string "x64_sse4")
@@ -2118,6 +2124,8 @@
 	      (const_string "ssemov")
 	    (eq_attr "alternative" "19,20")
 	      (const_string "ssecvt")
+	    (eq_attr "alternative" "21,22,23,24")
+	      (const_string "mskmov")
 	    (match_operand 1 "pic_32bit_operand")
 	      (const_string "lea")
 	   ]
@@ -2179,16 +2187,20 @@
   [(set (match_operand:DI 0 "nonimmediate_operand")
         (match_operand:DI 1 "general_operand"))]
   "!TARGET_64BIT && reload_completed
-   && !(MMX_REG_P (operands[0]) || SSE_REG_P (operands[0]))
-   && !(MMX_REG_P (operands[1]) || SSE_REG_P (operands[1]))"
+   && !(MMX_REG_P (operands[0])
+	|| SSE_REG_P (operands[0])
+	|| MASK_REG_P (operands[0]))
+   && !(MMX_REG_P (operands[1])
+	|| SSE_REG_P (operands[1])
+	|| MASK_REG_P (operands[1]))"
   [(const_int 0)]
   "ix86_split_long_move (operands); DONE;")
 
 (define_insn "*movsi_internal"
   [(set (match_operand:SI 0 "nonimmediate_operand"
-			"=r,m ,*y,*y,?rm,?*y,*v,*v,*v,m ,?r ,?r,?*Yi")
+			"=r,m ,*y,*y,?rm,?*y,*v,*v,*v,m ,?r ,?r,?*Yi,*k  ,*rm")
 	(match_operand:SI 1 "general_operand"
-			"g ,re,C ,*y,*y ,rm ,C ,*v,m ,*v,*Yj,*v,r"))]
+			"g ,re,C ,*y,*y ,rm ,C ,*v,m ,*v,*Yj,*v,r   ,*krm,*k"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   switch (get_attr_type (insn))
@@ -2199,6 +2211,9 @@
 
       return standard_sse_constant_opcode (insn, operands[1]);
 
+    case TYPE_MSKMOV:
+      return "kmovd\t{%1, %0|%0, %1}";
+
     case TYPE_SSEMOV:
       switch (get_attr_mode (insn))
 	{
@@ -2262,6 +2277,8 @@
 	      (const_string "sselog1")
 	    (eq_attr "alternative" "7,8,9,10,12")
 	      (const_string "ssemov")
+	    (eq_attr "alternative" "13,14")
+	      (const_string "mskmov")
  	    (match_operand 1 "pic_32bit_operand")
 	      (const_string "lea")
 	   ]
@@ -2410,9 +2427,12 @@
     case TYPE_MSKMOV:
       switch (which_alternative)
         {
-	case 7: return "kmovw\t{%k1, %0|%0, %k1}";
-	case 8: return "kmovw\t{%1, %0|%0, %1}";
-	case 9: return "kmovw\t{%1, %k0|%k0, %1}";
+	case 7: return TARGET_AVX512BW ? "kmovb\t{%k1, %0|%0, %k1}"
+				       : "kmovw\t{%k1, %0|%0, %k1}";
+	case 8: return TARGET_AVX512BW ? "kmovb\t{%1, %0|%0, %1}"
+				       : "kmovw\t{%1, %0|%0, %1}";
+	case 9: return TARGET_AVX512BW ? "kmovb\t{%1, %k0|%k0, %1}"
+				       : "kmovw\t{%1, %k0|%k0, %1}";
 	default: gcc_unreachable ();
 	}
 

^ permalink raw reply	[flat|nested] 2+ messages in thread

* Re: [PATCH i386 AVX512] [5/n] Adjust register's availabilty, allow wider masks.
  2014-08-12 10:45 [PATCH i386 AVX512] [5/n] Adjust register's availabilty, allow wider masks Kirill Yukhin
@ 2014-08-12 17:54 ` Uros Bizjak
  0 siblings, 0 replies; 2+ messages in thread
From: Uros Bizjak @ 2014-08-12 17:54 UTC (permalink / raw)
  To: Kirill Yukhin; +Cc: Jakub Jelinek, Richard Henderson, GCC Patches

On Tue, Aug 12, 2014 at 12:45 PM, Kirill Yukhin <kirill.yukhin@gmail.com> wrote:
> Hello,
> Attached patch allows wider mask types.
>
> Is it ok for trunk?
>
> Bootstrapped.
>
> gcc/
>         * config/i386/i386.c (print_reg): Сorrectly print 64-bit mask
>         registers.
>         (inline_secondary_memory_needed): Allow 64 bit wide mask registers.
>         (ix86_hard_regno_mode_ok): Allow 32/64-bit mask registers and
>         xmm/ymm16+ when availble.
>         * config/i386/i386.h
>         (HARD_REGNO_NREGS): Add mask regs.
>         (VALID_AVX512F_REG_MODE): Ditto.
>         (VALID_AVX512F_REG_MODE) : Define.
>         (VALID_MASK_AVX512BW_MODE): Ditto.
>         (reg_class) (MASK_REG_P(X)): Define.
>         * config/i386/i386.md: Do not split long moves with mask register,
>         use kmovb if avx512bw is availible.
>         (movdi_internal): Handle mask registers.

OK.

Thanks,
Uros.

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2014-08-12 17:54 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-08-12 10:45 [PATCH i386 AVX512] [5/n] Adjust register's availabilty, allow wider masks Kirill Yukhin
2014-08-12 17:54 ` Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).