public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH, i386, AVX-512] Add vectorizer support builtins
@ 2016-05-23 16:13 Ilya Verbin
  2016-05-25 13:28 ` Kirill Yukhin
  2016-06-02 14:20 ` Ilya Verbin
  0 siblings, 2 replies; 4+ messages in thread
From: Ilya Verbin @ 2016-05-23 16:13 UTC (permalink / raw)
  To: Uros Bizjak, Kirill Yukhin; +Cc: gcc-patches

Hi!

This patch adds missed 512-bit rounding builtins for vectorization.
Regtested on x86_64-linux and i686-linux.  OK for trunk?


gcc/
	* config/i386/i386-builtin-types.def: Add V16SI_FTYPE_V16SF,
	V8DF_FTYPE_V8DF_ROUND, V16SF_FTYPE_V16SF_ROUND, V16SI_FTYPE_V16SF_ROUND.
	* config/i386/i386.c (enum ix86_builtins): Add
	IX86_BUILTIN_CVTPS2DQ512_MASK, IX86_BUILTIN_FLOORPS512,
	IX86_BUILTIN_FLOORPD512, IX86_BUILTIN_CEILPS512, IX86_BUILTIN_CEILPD512,
	IX86_BUILTIN_TRUNCPS512, IX86_BUILTIN_TRUNCPD512,
	IX86_BUILTIN_CVTPS2DQ512, IX86_BUILTIN_VEC_PACK_SFIX512,
	IX86_BUILTIN_FLOORPS_SFIX512, IX86_BUILTIN_CEILPS_SFIX512,
	IX86_BUILTIN_ROUNDPS_AZ_SFIX512.
	(builtin_description bdesc_args): Add __builtin_ia32_floorps512,
	__builtin_ia32_ceilps512, __builtin_ia32_truncps512,
	__builtin_ia32_floorpd512, __builtin_ia32_ceilpd512,
	__builtin_ia32_truncpd512, __builtin_ia32_cvtps2dq512,
	__builtin_ia32_vec_pack_sfix512, __builtin_ia32_roundps_az_sfix512,
	__builtin_ia32_floorps_sfix512, __builtin_ia32_ceilps_sfix512.
	Change IX86_BUILTIN_CVTPS2DQ512 to IX86_BUILTIN_CVTPS2DQ512_MASK for
	__builtin_ia32_cvtps2dq512_mask.
	(ix86_expand_args_builtin): Handle V8DF_FTYPE_V8DF_ROUND,
	V16SF_FTYPE_V16SF_ROUND, V16SI_FTYPE_V16SF_ROUND, V16SI_FTYPE_V16SF.
	(ix86_builtin_vectorized_function): Handle builtins mentioned above.
	* config/i386/sse.md
	(<mask_codefor>avx512f_fix_notruncv16sfv16si<mask_name><round_name>):
	Rename to ...
	(avx512f_fix_notruncv16sfv16si<mask_name><round_name>): ... this.
	(<mask_codefor>avx512f_cvtpd2dq512<mask_name><round_name>): Rename
	to ...
	(avx512f_cvtpd2dq512<mask_name><round_name>): ... this.
	(avx512f_vec_pack_sfix_v8df): New define_expand.
	(avx512f_roundpd512): Rename to ...
	(avx512f_round<castmode>512): ... this.  Change iterator.
	(avx512f_roundps512_sfix): New define_expand.
	(round<mode>2_sfix): Change iterator.
gcc/testsuite/
	* gcc.target/i386/avx512f-ceil-vec-1.c: New test.
	* gcc.target/i386/avx512f-ceil-vec-2.c: New test.
	* gcc.target/i386/avx512f-ceilf-sfix-vec-1.c: New test.
	* gcc.target/i386/avx512f-ceilf-sfix-vec-2.c: New test.
	* gcc.target/i386/avx512f-ceilf-vec-1.c: New test.
	* gcc.target/i386/avx512f-ceilf-vec-2.c: New test.
	* gcc.target/i386/avx512f-floor-vec-1.c: New test.
	* gcc.target/i386/avx512f-floor-vec-2.c: New test.
	* gcc.target/i386/avx512f-floorf-sfix-vec-1.c: New test.
	* gcc.target/i386/avx512f-floorf-sfix-vec-2.c: New test.
	* gcc.target/i386/avx512f-floorf-vec-1.c: New test.
	* gcc.target/i386/avx512f-floorf-vec-2.c: New test.
	* gcc.target/i386/avx512f-rint-sfix-vec-1.c: New test.
	* gcc.target/i386/avx512f-rint-sfix-vec-2.c: New test.
	* gcc.target/i386/avx512f-rintf-sfix-vec-1.c: New test.
	* gcc.target/i386/avx512f-rintf-sfix-vec-2.c: New test.
	* gcc.target/i386/avx512f-round-sfix-vec-1.c: New test.
	* gcc.target/i386/avx512f-round-sfix-vec-2.c: New test.
	* gcc.target/i386/avx512f-roundf-sfix-vec-1.c: New test.
	* gcc.target/i386/avx512f-roundf-sfix-vec-2.c: New test.
	* gcc.target/i386/avx512f-trunc-vec-1.c: New test.
	* gcc.target/i386/avx512f-trunc-vec-2.c: New test.
	* gcc.target/i386/avx512f-truncf-vec-1.c: New test.
	* gcc.target/i386/avx512f-truncf-vec-2.c: New test.


diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 75d57d9..c66f651 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -294,6 +294,7 @@ DEF_FUNCTION_TYPE (V8DF, V4DF)
 DEF_FUNCTION_TYPE (V8DF, V2DF)
 DEF_FUNCTION_TYPE (V16SI, V4SI)
 DEF_FUNCTION_TYPE (V16SI, V8SI)
+DEF_FUNCTION_TYPE (V16SI, V16SF)
 DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI, UHI)
 DEF_FUNCTION_TYPE (V8DI, V8DI, V8DI, UQI)
 DEF_FUNCTION_TYPE (V8DI, PV8DI)
@@ -1061,14 +1062,17 @@ DEF_FUNCTION_TYPE (VOID, QI, V8DI, PCINT, INT, INT)
 
 DEF_FUNCTION_TYPE_ALIAS (V2DF_FTYPE_V2DF, ROUND)
 DEF_FUNCTION_TYPE_ALIAS (V4DF_FTYPE_V4DF, ROUND)
+DEF_FUNCTION_TYPE_ALIAS (V8DF_FTYPE_V8DF, ROUND)
 DEF_FUNCTION_TYPE_ALIAS (V4SF_FTYPE_V4SF, ROUND)
 DEF_FUNCTION_TYPE_ALIAS (V8SF_FTYPE_V8SF, ROUND)
+DEF_FUNCTION_TYPE_ALIAS (V16SF_FTYPE_V16SF, ROUND)
 
 DEF_FUNCTION_TYPE_ALIAS (V4SI_FTYPE_V2DF_V2DF, ROUND)
 DEF_FUNCTION_TYPE_ALIAS (V8SI_FTYPE_V4DF_V4DF, ROUND)
 DEF_FUNCTION_TYPE_ALIAS (V16SI_FTYPE_V8DF_V8DF, ROUND)
 DEF_FUNCTION_TYPE_ALIAS (V4SI_FTYPE_V4SF, ROUND)
 DEF_FUNCTION_TYPE_ALIAS (V8SI_FTYPE_V8SF, ROUND)
+DEF_FUNCTION_TYPE_ALIAS (V16SI_FTYPE_V16SF, ROUND)
 
 DEF_FUNCTION_TYPE_ALIAS (INT_FTYPE_V2DF_V2DF, PTEST)
 DEF_FUNCTION_TYPE_ALIAS (INT_FTYPE_V2DI_V2DI, PTEST)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 1cb88d6..049a006 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -30935,7 +30935,7 @@ enum ix86_builtins
   IX86_BUILTIN_CVTPD2PS512,
   IX86_BUILTIN_CVTPD2UDQ512,
   IX86_BUILTIN_CVTPH2PS512,
-  IX86_BUILTIN_CVTPS2DQ512,
+  IX86_BUILTIN_CVTPS2DQ512_MASK,
   IX86_BUILTIN_CVTPS2PD512,
   IX86_BUILTIN_CVTPS2PH512,
   IX86_BUILTIN_CVTPS2UDQ512,
@@ -32375,14 +32375,25 @@ enum ix86_builtins
   IX86_BUILTIN_COPYSIGNQ,
 
   /* Vectorizer support builtins.  */
-  IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
   IX86_BUILTIN_CPYSGNPS,
   IX86_BUILTIN_CPYSGNPD,
   IX86_BUILTIN_CPYSGNPS256,
   IX86_BUILTIN_CPYSGNPS512,
   IX86_BUILTIN_CPYSGNPD256,
   IX86_BUILTIN_CPYSGNPD512,
+  IX86_BUILTIN_FLOORPS512,
+  IX86_BUILTIN_FLOORPD512,
+  IX86_BUILTIN_CEILPS512,
+  IX86_BUILTIN_CEILPD512,
+  IX86_BUILTIN_TRUNCPS512,
+  IX86_BUILTIN_TRUNCPD512,
+  IX86_BUILTIN_CVTPS2DQ512,
+  IX86_BUILTIN_VEC_PACK_SFIX512,
+  IX86_BUILTIN_FLOORPS_SFIX512,
   IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512,
+  IX86_BUILTIN_CEILPS_SFIX512,
+  IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512,
+  IX86_BUILTIN_ROUNDPS_AZ_SFIX512,
   IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512,
 
 
@@ -34197,6 +34208,17 @@ static const struct builtin_description bdesc_args[] =
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF },
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF },
   { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf, "__builtin_ia32_exp2ps", IX86_BUILTIN_EXP2PS, UNKNOWN, (int) V16SF_FTYPE_V16SF },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundps512, "__builtin_ia32_floorps512", IX86_BUILTIN_FLOORPS512, (enum rtx_code) ROUND_FLOOR, (int) V16SF_FTYPE_V16SF_ROUND },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundps512, "__builtin_ia32_ceilps512", IX86_BUILTIN_CEILPS512, (enum rtx_code) ROUND_CEIL, (int) V16SF_FTYPE_V16SF_ROUND },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundps512, "__builtin_ia32_truncps512", IX86_BUILTIN_TRUNCPS512, (enum rtx_code) ROUND_TRUNC, (int) V16SF_FTYPE_V16SF_ROUND },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_floorpd512", IX86_BUILTIN_FLOORPD512, (enum rtx_code) ROUND_FLOOR, (int) V8DF_FTYPE_V8DF_ROUND },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_ceilpd512", IX86_BUILTIN_CEILPD512, (enum rtx_code) ROUND_CEIL, (int) V8DF_FTYPE_V8DF_ROUND },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_truncpd512", IX86_BUILTIN_TRUNCPD512, (enum rtx_code) ROUND_TRUNC, (int) V8DF_FTYPE_V8DF_ROUND },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si, "__builtin_ia32_cvtps2dq512", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vec_pack_sfix_v8df, "__builtin_ia32_vec_pack_sfix512", IX86_BUILTIN_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv16sf2_sfix, "__builtin_ia32_roundps_az_sfix512", IX86_BUILTIN_ROUNDPS_AZ_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V16SF },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_floorps_sfix512", IX86_BUILTIN_FLOORPS_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V16SF_ROUND },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_ceilps_sfix512", IX86_BUILTIN_CEILPS_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V16SF_ROUND },
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF },
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND },
@@ -35113,7 +35135,7 @@ static const struct builtin_description bdesc_round_args[] =
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtpd2ps512_mask_round,  "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT },
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_ufix_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT },
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_vcvtph2ps512_mask_round,  "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT },
-  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
+  { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT },
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_avx512f_ufix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT },
   { OPTION_MASK_ISA_AVX512F, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT },
@@ -38690,10 +38712,13 @@ ix86_expand_args_builtin (const struct builtin_description *d,
     {
     case V2DF_FTYPE_V2DF_ROUND:
     case V4DF_FTYPE_V4DF_ROUND:
+    case V8DF_FTYPE_V8DF_ROUND:
     case V4SF_FTYPE_V4SF_ROUND:
     case V8SF_FTYPE_V8SF_ROUND:
+    case V16SF_FTYPE_V16SF_ROUND:
     case V4SI_FTYPE_V4SF_ROUND:
     case V8SI_FTYPE_V8SF_ROUND:
+    case V16SI_FTYPE_V16SF_ROUND:
       return ix86_expand_sse_round (d, exp, target);
     case V4SI_FTYPE_V2DF_V2DF_ROUND:
     case V8SI_FTYPE_V4DF_V4DF_ROUND:
@@ -38807,6 +38832,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
     case V16SI_FTYPE_V8SI:
     case V16SF_FTYPE_V4SF:
     case V16SI_FTYPE_V4SI:
+    case V16SI_FTYPE_V16SF:
     case V16SF_FTYPE_V16SF:
     case V8DI_FTYPE_UQI:
     case V8DF_FTYPE_V4DF:
@@ -42536,6 +42562,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
 	    return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX);
 	  else if (out_n == 8 && in_n == 8)
 	    return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256);
+	  else if (out_n == 16 && in_n == 16)
+	    return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX512);
 	}
       break;
 
@@ -42561,6 +42589,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
 	    return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX);
 	  else if (out_n == 8 && in_n == 8)
 	    return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256);
+	  else if (out_n == 16 && in_n == 16)
+	    return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX512);
 	}
       break;
 
@@ -42573,6 +42603,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
 	    return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX);
 	  else if (out_n == 8 && in_n == 4)
 	    return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX256);
+	  else if (out_n == 16 && in_n == 8)
+	    return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX512);
 	}
       if (out_mode == SImode && in_mode == SFmode)
 	{
@@ -42580,6 +42612,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
 	    return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ);
 	  else if (out_n == 8 && in_n == 8)
 	    return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256);
+	  else if (out_n == 16 && in_n == 16)
+	    return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ512);
 	}
       break;
 
@@ -42605,6 +42639,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
 	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX);
 	  else if (out_n == 8 && in_n == 8)
 	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256);
+	  else if (out_n == 16 && in_n == 16)
+	    return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX512);
 	}
       break;
 
@@ -42619,6 +42655,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
 	    return ix86_get_builtin (IX86_BUILTIN_FLOORPD);
 	  else if (out_n == 4 && in_n == 4)
 	    return ix86_get_builtin (IX86_BUILTIN_FLOORPD256);
+	  else if (out_n == 8 && in_n == 8)
+	    return ix86_get_builtin (IX86_BUILTIN_FLOORPD512);
 	}
       if (out_mode == SFmode && in_mode == SFmode)
 	{
@@ -42626,6 +42664,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
 	    return ix86_get_builtin (IX86_BUILTIN_FLOORPS);
 	  else if (out_n == 8 && in_n == 8)
 	    return ix86_get_builtin (IX86_BUILTIN_FLOORPS256);
+	  else if (out_n == 16 && in_n == 16)
+	    return ix86_get_builtin (IX86_BUILTIN_FLOORPS512);
 	}
       break;
 
@@ -42640,6 +42680,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
 	    return ix86_get_builtin (IX86_BUILTIN_CEILPD);
 	  else if (out_n == 4 && in_n == 4)
 	    return ix86_get_builtin (IX86_BUILTIN_CEILPD256);
+	  else if (out_n == 8 && in_n == 8)
+	    return ix86_get_builtin (IX86_BUILTIN_CEILPD512);
 	}
       if (out_mode == SFmode && in_mode == SFmode)
 	{
@@ -42647,6 +42689,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
 	    return ix86_get_builtin (IX86_BUILTIN_CEILPS);
 	  else if (out_n == 8 && in_n == 8)
 	    return ix86_get_builtin (IX86_BUILTIN_CEILPS256);
+	  else if (out_n == 16 && in_n == 16)
+	    return ix86_get_builtin (IX86_BUILTIN_CEILPS512);
 	}
       break;
 
@@ -42661,6 +42705,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
 	    return ix86_get_builtin (IX86_BUILTIN_TRUNCPD);
 	  else if (out_n == 4 && in_n == 4)
 	    return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256);
+	  else if (out_n == 8 && in_n == 8)
+	    return ix86_get_builtin (IX86_BUILTIN_TRUNCPD512);
 	}
       if (out_mode == SFmode && in_mode == SFmode)
 	{
@@ -42668,6 +42714,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
 	    return ix86_get_builtin (IX86_BUILTIN_TRUNCPS);
 	  else if (out_n == 8 && in_n == 8)
 	    return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256);
+	  else if (out_n == 16 && in_n == 16)
+	    return ix86_get_builtin (IX86_BUILTIN_TRUNCPS512);
 	}
       break;
 
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 745b6b6..e71c53f 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -4242,7 +4242,7 @@
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "<mask_codefor>avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
+(define_insn "avx512f_fix_notruncv16sfv16si<mask_name><round_name>"
   [(set (match_operand:V16SI 0 "register_operand" "=v")
 	(unspec:V16SI
 	  [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
@@ -4799,7 +4799,7 @@
    (set_attr "prefix" "maybe_vex")
    (set_attr "mode" "V2DF")])
 
-(define_insn "<mask_codefor>avx512f_cvtpd2dq512<mask_name><round_name>"
+(define_insn "avx512f_cvtpd2dq512<mask_name><round_name>"
   [(set (match_operand:V8SI 0 "register_operand" "=v")
 	(unspec:V8SI
 	  [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
@@ -5802,6 +5802,23 @@
   DONE;
 })
 
+(define_expand "avx512f_vec_pack_sfix_v8df"
+  [(match_operand:V16SI 0 "register_operand")
+   (match_operand:V8DF 1 "nonimmediate_operand")
+   (match_operand:V8DF 2 "nonimmediate_operand")]
+  "TARGET_AVX512F"
+{
+  rtx r1, r2;
+
+  r1 = gen_reg_rtx (V8SImode);
+  r2 = gen_reg_rtx (V8SImode);
+
+  emit_insn (gen_avx512f_cvtpd2dq512 (r1, operands[1]));
+  emit_insn (gen_avx512f_cvtpd2dq512 (r2, operands[2]));
+  emit_insn (gen_avx_vec_concatv16si (operands[0], r1, r2));
+  DONE;
+})
+
 (define_expand "vec_pack_sfix_v4df"
   [(match_operand:V8SI 0 "register_operand")
    (match_operand:V4DF 1 "nonimmediate_operand")
@@ -15020,13 +15037,25 @@
   DONE;
 })
 
-(define_expand "avx512f_roundpd512"
-  [(match_operand:V8DF 0 "register_operand")
-   (match_operand:V8DF 1 "nonimmediate_operand")
+(define_expand "avx512f_round<castmode>512"
+  [(match_operand:VF_512 0 "register_operand")
+   (match_operand:VF_512 1 "nonimmediate_operand")
+   (match_operand:SI 2 "const_0_to_15_operand")]
+  "TARGET_AVX512F"
+{
+  emit_insn (gen_avx512f_rndscale<mode> (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "avx512f_roundps512_sfix"
+  [(match_operand:V16SI 0 "register_operand")
+   (match_operand:V16SF 1 "nonimmediate_operand")
    (match_operand:SI 2 "const_0_to_15_operand")]
   "TARGET_AVX512F"
 {
-  emit_insn (gen_avx512f_rndscalev8df (operands[0], operands[1], operands[2]));
+  rtx tmp = gen_reg_rtx (V16SFmode);
+  emit_insn (gen_avx512f_rndscalev16sf (tmp, operands[1], operands[2]));
+  emit_insn (gen_fix_truncv16sfv16si2 (operands[0], tmp));
   DONE;
 })
 
@@ -15127,7 +15156,7 @@
 
 (define_expand "round<mode>2_sfix"
   [(match_operand:<sseintvecmode> 0 "register_operand")
-   (match_operand:VF1_128_256 1 "register_operand")]
+   (match_operand:VF1 1 "register_operand")]
   "TARGET_ROUND && !flag_trapping_math"
 {
   rtx tmp = gen_reg_rtx (<MODE>mode);
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-ceil-vec-1.c b/gcc/testsuite/gcc.target/i386/avx512f-ceil-vec-1.c
new file mode 100644
index 0000000..fc48b15
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-ceil-vec-1.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
+
+#define __NO_MATH_INLINES
+#include <math.h>
+#include "avx512f-check.h"
+
+#define NUM 64
+
+static void
+__attribute__((__target__("fpmath=sse")))
+init_src (double *src)
+{
+  int i, sign = 1;
+  double f = rand ();
+
+  for (i = 0; i < NUM; i++)
+    {
+      src[i] = (i + 1) * f * M_PI * sign;
+      if (i < (NUM / 2))
+	{
+          if ((i % 6) == 0)
+	    f = f * src[i];
+        }
+      else if (i == (NUM / 2))
+	f = rand ();
+      else if ((i % 6) == 0)
+	f = 1 / (f * (i + 1) * src[i] * M_PI * sign);
+      sign = -sign;
+    }
+}
+
+static void
+__attribute__((__target__("fpmath=387")))
+avx512f_test (void)
+{
+  double a[NUM];
+  double r[NUM];
+  int i;
+
+  init_src (a);
+
+  for (i = 0; i < NUM; i++)
+    r[i] = ceil (a[i]);
+
+  /* check results:  */
+  for (i = 0; i < NUM; i++)
+    if (r[i] != ceil (a[i]))
+      abort();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-ceil-vec-2.c b/gcc/testsuite/gcc.target/i386/avx512f-ceil-vec-2.c
new file mode 100644
index 0000000..bf8af06
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-ceil-vec-2.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512f" } */
+
+#include "avx512f-ceil-vec-1.c"
+
+/* { dg-final { scan-assembler-times "vrndscalepd\[^\n\]+zmm\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-ceilf-sfix-vec-1.c b/gcc/testsuite/gcc.target/i386/avx512f-ceilf-sfix-vec-1.c
new file mode 100644
index 0000000..c6d53d8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-ceilf-sfix-vec-1.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
+
+#define __NO_MATH_INLINES
+#include <math.h>
+#include "avx512f-check.h"
+
+#define NUM 64
+
+static void
+__attribute__((__target__("fpmath=sse")))
+init_src (float *src)
+{
+  int i, sign = 1;
+  float f = rand ();
+
+  for (i = 0; i < NUM; i++)
+    {
+      src[i] = (i + 1) * f * M_PI * sign;
+      if (i < (NUM / 2))
+	{
+          if ((i % 6) == 0)
+	    f = f * src[i];
+        }
+      else if (i == (NUM / 2))
+	f = rand ();
+      else if ((i % 6) == 0)
+	f = 1 / (f * (i + 1) * src[i] * M_PI * sign);
+      sign = -sign;
+    }
+}
+
+static void
+__attribute__((__target__("fpmath=387")))
+avx512f_test (void)
+{
+  float a[NUM];
+  int r[NUM];
+  int i;
+
+  init_src (a);
+
+  for (i = 0; i < NUM; i++)
+    r[i] = (int) ceilf (a[i]);
+
+  /* check results:  */
+  for (i = 0; i < NUM; i++)
+    if (r[i] != (int) ceilf (a[i]))
+      abort();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-ceilf-sfix-vec-2.c b/gcc/testsuite/gcc.target/i386/avx512f-ceilf-sfix-vec-2.c
new file mode 100644
index 0000000..80e594d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-ceilf-sfix-vec-2.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512f" } */
+
+#include "avx512f-ceilf-sfix-vec-1.c"
+
+/* { dg-final { scan-assembler-times "vrndscaleps\[^\n\]+zmm\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2dq\[^\n\]+zmm\[0-9\].{7}(?:\n|\[ \\t\]+#)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-ceilf-vec-1.c b/gcc/testsuite/gcc.target/i386/avx512f-ceilf-vec-1.c
new file mode 100644
index 0000000..4788825
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-ceilf-vec-1.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
+
+#define __NO_MATH_INLINES
+#include <math.h>
+#include "avx512f-check.h"
+
+#define NUM 64
+
+static void
+__attribute__((__target__("fpmath=sse")))
+init_src (float *src)
+{
+  int i, sign = 1;
+  float f = rand ();
+
+  for (i = 0; i < NUM; i++)
+    {
+      src[i] = (i + 1) * f * M_PI * sign;
+      if (i < (NUM / 2))
+	{
+          if ((i % 6) == 0)
+	    f = f * src[i];
+        }
+      else if (i == (NUM / 2))
+	f = rand ();
+      else if ((i % 6) == 0)
+	f = 1 / (f * (i + 1) * src[i] * M_PI * sign);
+      sign = -sign;
+    }
+}
+
+static void
+__attribute__((__target__("fpmath=387")))
+avx512f_test (void)
+{
+  float a[NUM];
+  float r[NUM];
+  int i;
+
+  init_src (a);
+
+  for (i = 0; i < NUM; i++)
+    r[i] = ceilf (a[i]);
+
+  /* check results:  */
+  for (i = 0; i < NUM; i++)
+    if (r[i] != ceilf (a[i]))
+      abort();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-ceilf-vec-2.c b/gcc/testsuite/gcc.target/i386/avx512f-ceilf-vec-2.c
new file mode 100644
index 0000000..95a79e2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-ceilf-vec-2.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512f" } */
+
+#include "avx512f-ceilf-vec-1.c"
+
+/* { dg-final { scan-assembler-times "vrndscaleps\[^\n\]+zmm\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-floor-vec-1.c b/gcc/testsuite/gcc.target/i386/avx512f-floor-vec-1.c
new file mode 100644
index 0000000..b7cbed0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-floor-vec-1.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
+
+#define __NO_MATH_INLINES
+#include <math.h>
+#include "avx512f-check.h"
+
+#define NUM 64
+
+static void
+__attribute__((__target__("fpmath=sse")))
+init_src (double *src)
+{
+  int i, sign = 1;
+  double f = rand ();
+
+  for (i = 0; i < NUM; i++)
+    {
+      src[i] = (i + 1) * f * M_PI * sign;
+      if (i < (NUM / 2))
+	{
+          if ((i % 6) == 0)
+	    f = f * src[i];
+        }
+      else if (i == (NUM / 2))
+	f = rand ();
+      else if ((i % 6) == 0)
+	f = 1 / (f * (i + 1) * src[i] * M_PI * sign);
+      sign = -sign;
+    }
+}
+
+static void
+__attribute__((__target__("fpmath=387")))
+avx512f_test (void)
+{
+  double a[NUM];
+  double r[NUM];
+  int i;
+
+  init_src (a);
+
+  for (i = 0; i < NUM; i++)
+    r[i] = floor (a[i]);
+
+  /* check results:  */
+  for (i = 0; i < NUM; i++)
+    if (r[i] != floor (a[i]))
+      abort();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-floor-vec-2.c b/gcc/testsuite/gcc.target/i386/avx512f-floor-vec-2.c
new file mode 100644
index 0000000..0d401f7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-floor-vec-2.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512f" } */
+
+#include "avx512f-floor-vec-1.c"
+
+/* { dg-final { scan-assembler-times "vrndscalepd\[^\n\]+zmm\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-floorf-sfix-vec-1.c b/gcc/testsuite/gcc.target/i386/avx512f-floorf-sfix-vec-1.c
new file mode 100644
index 0000000..6a25f43
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-floorf-sfix-vec-1.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
+
+#define __NO_MATH_INLINES
+#include <math.h>
+#include "avx512f-check.h"
+
+#define NUM 64
+
+static void
+__attribute__((__target__("fpmath=sse")))
+init_src (float *src)
+{
+  int i, sign = 1;
+  float f = rand ();
+
+  for (i = 0; i < NUM; i++)
+    {
+      src[i] = (i + 1) * f * M_PI * sign;
+      if (i < (NUM / 2))
+	{
+          if ((i % 6) == 0)
+	    f = f * src[i];
+        }
+      else if (i == (NUM / 2))
+	f = rand ();
+      else if ((i % 6) == 0)
+	f = 1 / (f * (i + 1) * src[i] * M_PI * sign);
+      sign = -sign;
+    }
+}
+
+static void
+__attribute__((__target__("fpmath=387")))
+avx512f_test (void)
+{
+  float a[NUM];
+  int r[NUM];
+  int i;
+
+  init_src (a);
+
+  for (i = 0; i < NUM; i++)
+    r[i] = (int) floorf (a[i]);
+
+  /* check results:  */
+  for (i = 0; i < NUM; i++)
+    if (r[i] != (int) floorf (a[i]))
+      abort();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-floorf-sfix-vec-2.c b/gcc/testsuite/gcc.target/i386/avx512f-floorf-sfix-vec-2.c
new file mode 100644
index 0000000..f4bfec5
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-floorf-sfix-vec-2.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512f" } */
+
+#include "avx512f-floorf-sfix-vec-1.c"
+
+/* { dg-final { scan-assembler-times "vrndscaleps\[^\n\]+zmm\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2dq\[^\n\]+zmm\[0-9\].{7}(?:\n|\[ \\t\]+#)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-floorf-vec-1.c b/gcc/testsuite/gcc.target/i386/avx512f-floorf-vec-1.c
new file mode 100644
index 0000000..69fc73d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-floorf-vec-1.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
+
+#define __NO_MATH_INLINES
+#include <math.h>
+#include "avx512f-check.h"
+
+#define NUM 64
+
+static void
+__attribute__((__target__("fpmath=sse")))
+init_src (float *src)
+{
+  int i, sign = 1;
+  float f = rand ();
+
+  for (i = 0; i < NUM; i++)
+    {
+      src[i] = (i + 1) * f * M_PI * sign;
+      if (i < (NUM / 2))
+	{
+          if ((i % 6) == 0)
+	    f = f * src[i];
+        }
+      else if (i == (NUM / 2))
+	f = rand ();
+      else if ((i % 6) == 0)
+	f = 1 / (f * (i + 1) * src[i] * M_PI * sign);
+      sign = -sign;
+    }
+}
+
+static void
+__attribute__((__target__("fpmath=387")))
+avx512f_test (void)
+{
+  float a[NUM];
+  float r[NUM];
+  int i;
+
+  init_src (a);
+
+  for (i = 0; i < NUM; i++)
+    r[i] = floorf (a[i]);
+
+  /* check results:  */
+  for (i = 0; i < NUM; i++)
+    if (r[i] != floorf (a[i]))
+      abort();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-floorf-vec-2.c b/gcc/testsuite/gcc.target/i386/avx512f-floorf-vec-2.c
new file mode 100644
index 0000000..90c6c0f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-floorf-vec-2.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512f" } */
+
+#include "avx512f-floorf-vec-1.c"
+
+/* { dg-final { scan-assembler-times "vrndscaleps\[^\n\]+zmm\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-rint-sfix-vec-1.c b/gcc/testsuite/gcc.target/i386/avx512f-rint-sfix-vec-1.c
new file mode 100644
index 0000000..8e1745a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-rint-sfix-vec-1.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
+
+#define __NO_MATH_INLINES
+#include <math.h>
+#include "avx512f-check.h"
+
+#define NUM 64
+
+static void
+__attribute__((__target__("fpmath=sse")))
+init_src (double *src)
+{
+  int i, sign = 1;
+  double f = rand ();
+
+  for (i = 0; i < NUM; i++)
+    {
+      src[i] = (i + 1) * f * M_PI * sign;
+      if (i < (NUM / 2))
+	{
+          if ((i % 6) == 0)
+	    f = f * src[i];
+        }
+      else if (i == (NUM / 2))
+	f = rand ();
+      else if ((i % 6) == 0)
+	f = 1 / (f * (i + 1) * src[i] * M_PI * sign);
+      sign = -sign;
+    }
+}
+
+static void
+__attribute__((__target__("fpmath=387")))
+avx512f_test (void)
+{
+  double a[NUM];
+  int r[NUM];
+  int i;
+
+  init_src (a);
+
+  for (i = 0; i < NUM; i++)
+    r[i] = (int) rint (a[i]);
+
+  /* check results:  */
+  for (i = 0; i < NUM; i++)
+    if (r[i] != (int) rint (a[i]))
+      abort();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-rint-sfix-vec-2.c b/gcc/testsuite/gcc.target/i386/avx512f-rint-sfix-vec-2.c
new file mode 100644
index 0000000..c3f78ac
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-rint-sfix-vec-2.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512f" } */
+
+#include "avx512f-rint-sfix-vec-1.c"
+
+/* { dg-final { scan-assembler-times "vcvtpd2dq\[^\n\]+ymm\[0-9\](?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vinserti64x4\[^\n\]+zmm\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-rintf-sfix-vec-1.c b/gcc/testsuite/gcc.target/i386/avx512f-rintf-sfix-vec-1.c
new file mode 100644
index 0000000..ac3e9a2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-rintf-sfix-vec-1.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
+
+#define __NO_MATH_INLINES
+#include <math.h>
+#include "avx512f-check.h"
+
+#define NUM 64
+
+static void
+__attribute__((__target__("fpmath=sse")))
+init_src (float *src)
+{
+  int i, sign = 1;
+  float f = rand ();
+
+  for (i = 0; i < NUM; i++)
+    {
+      src[i] = (i + 1) * f * M_PI * sign;
+      if (i < (NUM / 2))
+	{
+          if ((i % 6) == 0)
+	    f = f * src[i];
+        }
+      else if (i == (NUM / 2))
+	f = rand ();
+      else if ((i % 6) == 0)
+	f = 1 / (f * (i + 1) * src[i] * M_PI * sign);
+      sign = -sign;
+    }
+}
+
+static void
+__attribute__((__target__("fpmath=387")))
+avx512f_test (void)
+{
+  float a[NUM];
+  int r[NUM];
+  int i;
+
+  init_src (a);
+
+  for (i = 0; i < NUM; i++)
+    r[i] = (int) rintf (a[i]);
+
+  /* check results:  */
+  for (i = 0; i < NUM; i++)
+    if (r[i] != (int) rintf (a[i]))
+      abort();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-rintf-sfix-vec-2.c b/gcc/testsuite/gcc.target/i386/avx512f-rintf-sfix-vec-2.c
new file mode 100644
index 0000000..c172e61
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-rintf-sfix-vec-2.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512f" } */
+
+#include "avx512f-rintf-sfix-vec-1.c"
+
+/* { dg-final { scan-assembler-times "vcvtps2dq\[^\n\]+zmm\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-round-sfix-vec-1.c b/gcc/testsuite/gcc.target/i386/avx512f-round-sfix-vec-1.c
new file mode 100644
index 0000000..61bea57
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-round-sfix-vec-1.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
+
+#define __NO_MATH_INLINES
+#include <math.h>
+#include "avx512f-check.h"
+
+#define NUM 64
+
+static void
+__attribute__((__target__("fpmath=sse")))
+init_src (double *src)
+{
+  int i, sign = 1;
+  double f = rand ();
+
+  for (i = 0; i < NUM; i++)
+    {
+      src[i] = (i + 1) * f * M_PI * sign;
+      if (i < (NUM / 2))
+	{
+          if ((i % 6) == 0)
+	    f = f * src[i];
+        }
+      else if (i == (NUM / 2))
+	f = rand ();
+      else if ((i % 6) == 0)
+	f = 1 / (f * (i + 1) * src[i] * M_PI * sign);
+      sign = -sign;
+    }
+}
+
+static void
+__attribute__((__target__("fpmath=387")))
+avx512f_test (void)
+{
+  double a[NUM];
+  int r[NUM];
+  int i;
+
+  init_src (a);
+
+  for (i = 0; i < NUM; i++)
+    r[i] = (int) round (a[i]);
+
+  /* check results:  */
+  for (i = 0; i < NUM; i++)
+    if (r[i] != (int) round (a[i]))
+      abort();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-round-sfix-vec-2.c b/gcc/testsuite/gcc.target/i386/avx512f-round-sfix-vec-2.c
new file mode 100644
index 0000000..5982c65
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-round-sfix-vec-2.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512f" } */
+
+#include "avx512f-round-sfix-vec-1.c"
+
+/* { dg-final { scan-assembler-times "vrndscalepd\[^\n\]+zmm\[0-9\](?:\n|\[ \\t\]+#)" 2 } } */
+/* { dg-final { scan-assembler-times "vcvttpd2dq\[^\n\]+zmm\[0-9\].{7}(?:\n|\[ \\t\]+#)" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-roundf-sfix-vec-1.c b/gcc/testsuite/gcc.target/i386/avx512f-roundf-sfix-vec-1.c
new file mode 100644
index 0000000..c5ec9e7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-roundf-sfix-vec-1.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
+
+#define __NO_MATH_INLINES
+#include <math.h>
+#include "avx512f-check.h"
+
+#define NUM 64
+
+static void
+__attribute__((__target__("fpmath=sse")))
+init_src (float *src)
+{
+  int i, sign = 1;
+  float f = rand ();
+
+  for (i = 0; i < NUM; i++)
+    {
+      src[i] = (i + 1) * f * M_PI * sign;
+      if (i < (NUM / 2))
+	{
+          if ((i % 6) == 0)
+	    f = f * src[i];
+        }
+      else if (i == (NUM / 2))
+	f = rand ();
+      else if ((i % 6) == 0)
+	f = 1 / (f * (i + 1) * src[i] * M_PI * sign);
+      sign = -sign;
+    }
+}
+
+static void
+__attribute__((__target__("fpmath=387")))
+avx512f_test (void)
+{
+  float a[NUM];
+  int r[NUM];
+  int i;
+
+  init_src (a);
+
+  for (i = 0; i < NUM; i++)
+    r[i] = (int) roundf (a[i]);
+
+  /* check results:  */
+  for (i = 0; i < NUM; i++)
+    if (r[i] != (int) roundf (a[i]))
+      abort();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-roundf-sfix-vec-2.c b/gcc/testsuite/gcc.target/i386/avx512f-roundf-sfix-vec-2.c
new file mode 100644
index 0000000..0d8abb8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-roundf-sfix-vec-2.c
@@ -0,0 +1,7 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512f" } */
+
+#include "avx512f-roundf-sfix-vec-1.c"
+
+/* { dg-final { scan-assembler-times "vrndscaleps\[^\n\]+zmm\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vcvttps2dq\[^\n\]+zmm\[0-9\].{7}(?:\n|\[ \\t\]+#)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-trunc-vec-1.c b/gcc/testsuite/gcc.target/i386/avx512f-trunc-vec-1.c
new file mode 100644
index 0000000..dfb93d7
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-trunc-vec-1.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
+
+#define __NO_MATH_INLINES
+#include <math.h>
+#include "avx512f-check.h"
+
+#define NUM 64
+
+static void
+__attribute__((__target__("fpmath=sse")))
+init_src (double *src)
+{
+  int i, sign = 1;
+  double f = rand ();
+
+  for (i = 0; i < NUM; i++)
+    {
+      src[i] = (i + 1) * f * M_PI * sign;
+      if (i < (NUM / 2))
+	{
+          if ((i % 6) == 0)
+	    f = f * src[i];
+        }
+      else if (i == (NUM / 2))
+	f = rand ();
+      else if ((i % 6) == 0)
+	f = 1 / (f * (i + 1) * src[i] * M_PI * sign);
+      sign = -sign;
+    }
+}
+
+static void
+__attribute__((__target__("fpmath=387")))
+avx512f_test (void)
+{
+  double a[NUM];
+  double r[NUM];
+  int i;
+
+  init_src (a);
+
+  for (i = 0; i < NUM; i++)
+    r[i] = trunc (a[i]);
+
+  /* check results:  */
+  for (i = 0; i < NUM; i++)
+    if (r[i] != trunc (a[i]))
+      abort();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-trunc-vec-2.c b/gcc/testsuite/gcc.target/i386/avx512f-trunc-vec-2.c
new file mode 100644
index 0000000..e8ec022
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-trunc-vec-2.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512f" } */
+
+#include "avx512f-trunc-vec-1.c"
+
+/* { dg-final { scan-assembler-times "vrndscalepd\[^\n\]+zmm\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-truncf-vec-1.c b/gcc/testsuite/gcc.target/i386/avx512f-truncf-vec-1.c
new file mode 100644
index 0000000..db13e71
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-truncf-vec-1.c
@@ -0,0 +1,52 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-skip-if "no M_PI" { vxworks_kernel } } */
+
+#define __NO_MATH_INLINES
+#include <math.h>
+#include "avx512f-check.h"
+
+#define NUM 64
+
+static void
+__attribute__((__target__("fpmath=sse")))
+init_src (float *src)
+{
+  int i, sign = 1;
+  float f = rand ();
+
+  for (i = 0; i < NUM; i++)
+    {
+      src[i] = (i + 1) * f * M_PI * sign;
+      if (i < (NUM / 2))
+	{
+          if ((i % 6) == 0)
+	    f = f * src[i];
+        }
+      else if (i == (NUM / 2))
+	f = rand ();
+      else if ((i % 6) == 0)
+	f = 1 / (f * (i + 1) * src[i] * M_PI * sign);
+      sign = -sign;
+    }
+}
+
+static void
+__attribute__((__target__("fpmath=387")))
+avx512f_test (void)
+{
+  float a[NUM];
+  float r[NUM];
+  int i;
+
+  init_src (a);
+
+  for (i = 0; i < NUM; i++)
+    r[i] = truncf (a[i]);
+
+  /* check results:  */
+  for (i = 0; i < NUM; i++)
+    if (r[i] != truncf (a[i]))
+      abort();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-truncf-vec-2.c b/gcc/testsuite/gcc.target/i386/avx512f-truncf-vec-2.c
new file mode 100644
index 0000000..ae542d8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-truncf-vec-2.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx512f" } */
+
+#include "avx512f-truncf-vec-1.c"
+
+/* { dg-final { scan-assembler-times "vrndscaleps\[^\n\]+zmm\[0-9\](?:\n|\[ \\t\]+#)" 1 } } */


  -- Ilya

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH, i386, AVX-512] Add vectorizer support builtins
  2016-05-23 16:13 [PATCH, i386, AVX-512] Add vectorizer support builtins Ilya Verbin
@ 2016-05-25 13:28 ` Kirill Yukhin
  2016-06-02 14:20 ` Ilya Verbin
  1 sibling, 0 replies; 4+ messages in thread
From: Kirill Yukhin @ 2016-05-25 13:28 UTC (permalink / raw)
  To: Ilya Verbin; +Cc: Uros Bizjak, gcc-patches

Hello Ilya.
On 23 May 19:11, Ilya Verbin wrote:
> Hi!
> 
> This patch adds missed 512-bit rounding builtins for vectorization.
> Regtested on x86_64-linux and i686-linux.  OK for trunk?
Patch is OK.

--
Thanks, K

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH, i386, AVX-512] Add vectorizer support builtins
  2016-05-23 16:13 [PATCH, i386, AVX-512] Add vectorizer support builtins Ilya Verbin
  2016-05-25 13:28 ` Kirill Yukhin
@ 2016-06-02 14:20 ` Ilya Verbin
  2016-06-07 11:47   ` Kirill Yukhin
  1 sibling, 1 reply; 4+ messages in thread
From: Ilya Verbin @ 2016-06-02 14:20 UTC (permalink / raw)
  To: Uros Bizjak, Kirill Yukhin, Jakub Jelinek, Richard Biener; +Cc: gcc-patches

On Mon, May 23, 2016 at 19:11:53 +0300, Ilya Verbin wrote:
> This patch adds missed 512-bit rounding builtins for vectorization.
> Regtested on x86_64-linux and i686-linux.  OK for trunk?
> 
> gcc/
> 	* config/i386/i386-builtin-types.def: Add V16SI_FTYPE_V16SF,
> 	V8DF_FTYPE_V8DF_ROUND, V16SF_FTYPE_V16SF_ROUND, V16SI_FTYPE_V16SF_ROUND.
> 	* config/i386/i386.c (enum ix86_builtins): Add
> 	IX86_BUILTIN_CVTPS2DQ512_MASK, IX86_BUILTIN_FLOORPS512,
> 	IX86_BUILTIN_FLOORPD512, IX86_BUILTIN_CEILPS512, IX86_BUILTIN_CEILPD512,
> 	IX86_BUILTIN_TRUNCPS512, IX86_BUILTIN_TRUNCPD512,
> 	IX86_BUILTIN_CVTPS2DQ512, IX86_BUILTIN_VEC_PACK_SFIX512,
> 	IX86_BUILTIN_FLOORPS_SFIX512, IX86_BUILTIN_CEILPS_SFIX512,
> 	IX86_BUILTIN_ROUNDPS_AZ_SFIX512.
> 	(builtin_description bdesc_args): Add __builtin_ia32_floorps512,
> 	__builtin_ia32_ceilps512, __builtin_ia32_truncps512,
> 	__builtin_ia32_floorpd512, __builtin_ia32_ceilpd512,
> 	__builtin_ia32_truncpd512, __builtin_ia32_cvtps2dq512,
> 	__builtin_ia32_vec_pack_sfix512, __builtin_ia32_roundps_az_sfix512,
> 	__builtin_ia32_floorps_sfix512, __builtin_ia32_ceilps_sfix512.
> 	Change IX86_BUILTIN_CVTPS2DQ512 to IX86_BUILTIN_CVTPS2DQ512_MASK for
> 	__builtin_ia32_cvtps2dq512_mask.
> 	(ix86_expand_args_builtin): Handle V8DF_FTYPE_V8DF_ROUND,
> 	V16SF_FTYPE_V16SF_ROUND, V16SI_FTYPE_V16SF_ROUND, V16SI_FTYPE_V16SF.
> 	(ix86_builtin_vectorized_function): Handle builtins mentioned above.
> 	* config/i386/sse.md
> 	(<mask_codefor>avx512f_fix_notruncv16sfv16si<mask_name><round_name>):
> 	Rename to ...
> 	(avx512f_fix_notruncv16sfv16si<mask_name><round_name>): ... this.
> 	(<mask_codefor>avx512f_cvtpd2dq512<mask_name><round_name>): Rename
> 	to ...
> 	(avx512f_cvtpd2dq512<mask_name><round_name>): ... this.
> 	(avx512f_vec_pack_sfix_v8df): New define_expand.
> 	(avx512f_roundpd512): Rename to ...
> 	(avx512f_round<castmode>512): ... this.  Change iterator.
> 	(avx512f_roundps512_sfix): New define_expand.
> 	(round<mode>2_sfix): Change iterator.
> gcc/testsuite/
> 	* gcc.target/i386/avx512f-ceil-vec-1.c: New test.
> 	* gcc.target/i386/avx512f-ceil-vec-2.c: New test.
> 	* gcc.target/i386/avx512f-ceilf-sfix-vec-1.c: New test.
> 	* gcc.target/i386/avx512f-ceilf-sfix-vec-2.c: New test.
> 	* gcc.target/i386/avx512f-ceilf-vec-1.c: New test.
> 	* gcc.target/i386/avx512f-ceilf-vec-2.c: New test.
> 	* gcc.target/i386/avx512f-floor-vec-1.c: New test.
> 	* gcc.target/i386/avx512f-floor-vec-2.c: New test.
> 	* gcc.target/i386/avx512f-floorf-sfix-vec-1.c: New test.
> 	* gcc.target/i386/avx512f-floorf-sfix-vec-2.c: New test.
> 	* gcc.target/i386/avx512f-floorf-vec-1.c: New test.
> 	* gcc.target/i386/avx512f-floorf-vec-2.c: New test.
> 	* gcc.target/i386/avx512f-rint-sfix-vec-1.c: New test.
> 	* gcc.target/i386/avx512f-rint-sfix-vec-2.c: New test.
> 	* gcc.target/i386/avx512f-rintf-sfix-vec-1.c: New test.
> 	* gcc.target/i386/avx512f-rintf-sfix-vec-2.c: New test.
> 	* gcc.target/i386/avx512f-round-sfix-vec-1.c: New test.
> 	* gcc.target/i386/avx512f-round-sfix-vec-2.c: New test.
> 	* gcc.target/i386/avx512f-roundf-sfix-vec-1.c: New test.
> 	* gcc.target/i386/avx512f-roundf-sfix-vec-2.c: New test.
> 	* gcc.target/i386/avx512f-trunc-vec-1.c: New test.
> 	* gcc.target/i386/avx512f-trunc-vec-2.c: New test.
> 	* gcc.target/i386/avx512f-truncf-vec-1.c: New test.
> 	* gcc.target/i386/avx512f-truncf-vec-2.c: New test.

Is it OK for gcc-6-branch?

  -- Ilya

^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH, i386, AVX-512] Add vectorizer support builtins
  2016-06-02 14:20 ` Ilya Verbin
@ 2016-06-07 11:47   ` Kirill Yukhin
  0 siblings, 0 replies; 4+ messages in thread
From: Kirill Yukhin @ 2016-06-07 11:47 UTC (permalink / raw)
  To: Ilya Verbin; +Cc: Uros Bizjak, Jakub Jelinek, Richard Biener, gcc-patches

On 02 Jun 17:17, Ilya Verbin wrote:
> On Mon, May 23, 2016 at 19:11:53 +0300, Ilya Verbin wrote:
> > This patch adds missed 512-bit rounding builtins for vectorization.
> > Regtested on x86_64-linux and i686-linux.  OK for trunk?
> > 
> > gcc/
> > 	* config/i386/i386-builtin-types.def: Add V16SI_FTYPE_V16SF,
> > 	V8DF_FTYPE_V8DF_ROUND, V16SF_FTYPE_V16SF_ROUND, V16SI_FTYPE_V16SF_ROUND.
> > 	* config/i386/i386.c (enum ix86_builtins): Add
> > 	IX86_BUILTIN_CVTPS2DQ512_MASK, IX86_BUILTIN_FLOORPS512,
> > 	IX86_BUILTIN_FLOORPD512, IX86_BUILTIN_CEILPS512, IX86_BUILTIN_CEILPD512,
> > 	IX86_BUILTIN_TRUNCPS512, IX86_BUILTIN_TRUNCPD512,
> > 	IX86_BUILTIN_CVTPS2DQ512, IX86_BUILTIN_VEC_PACK_SFIX512,
> > 	IX86_BUILTIN_FLOORPS_SFIX512, IX86_BUILTIN_CEILPS_SFIX512,
> > 	IX86_BUILTIN_ROUNDPS_AZ_SFIX512.
> > 	(builtin_description bdesc_args): Add __builtin_ia32_floorps512,
> > 	__builtin_ia32_ceilps512, __builtin_ia32_truncps512,
> > 	__builtin_ia32_floorpd512, __builtin_ia32_ceilpd512,
> > 	__builtin_ia32_truncpd512, __builtin_ia32_cvtps2dq512,
> > 	__builtin_ia32_vec_pack_sfix512, __builtin_ia32_roundps_az_sfix512,
> > 	__builtin_ia32_floorps_sfix512, __builtin_ia32_ceilps_sfix512.
> > 	Change IX86_BUILTIN_CVTPS2DQ512 to IX86_BUILTIN_CVTPS2DQ512_MASK for
> > 	__builtin_ia32_cvtps2dq512_mask.
> > 	(ix86_expand_args_builtin): Handle V8DF_FTYPE_V8DF_ROUND,
> > 	V16SF_FTYPE_V16SF_ROUND, V16SI_FTYPE_V16SF_ROUND, V16SI_FTYPE_V16SF.
> > 	(ix86_builtin_vectorized_function): Handle builtins mentioned above.
> > 	* config/i386/sse.md
> > 	(<mask_codefor>avx512f_fix_notruncv16sfv16si<mask_name><round_name>):
> > 	Rename to ...
> > 	(avx512f_fix_notruncv16sfv16si<mask_name><round_name>): ... this.
> > 	(<mask_codefor>avx512f_cvtpd2dq512<mask_name><round_name>): Rename
> > 	to ...
> > 	(avx512f_cvtpd2dq512<mask_name><round_name>): ... this.
> > 	(avx512f_vec_pack_sfix_v8df): New define_expand.
> > 	(avx512f_roundpd512): Rename to ...
> > 	(avx512f_round<castmode>512): ... this.  Change iterator.
> > 	(avx512f_roundps512_sfix): New define_expand.
> > 	(round<mode>2_sfix): Change iterator.
> > gcc/testsuite/
> > 	* gcc.target/i386/avx512f-ceil-vec-1.c: New test.
> > 	* gcc.target/i386/avx512f-ceil-vec-2.c: New test.
> > 	* gcc.target/i386/avx512f-ceilf-sfix-vec-1.c: New test.
> > 	* gcc.target/i386/avx512f-ceilf-sfix-vec-2.c: New test.
> > 	* gcc.target/i386/avx512f-ceilf-vec-1.c: New test.
> > 	* gcc.target/i386/avx512f-ceilf-vec-2.c: New test.
> > 	* gcc.target/i386/avx512f-floor-vec-1.c: New test.
> > 	* gcc.target/i386/avx512f-floor-vec-2.c: New test.
> > 	* gcc.target/i386/avx512f-floorf-sfix-vec-1.c: New test.
> > 	* gcc.target/i386/avx512f-floorf-sfix-vec-2.c: New test.
> > 	* gcc.target/i386/avx512f-floorf-vec-1.c: New test.
> > 	* gcc.target/i386/avx512f-floorf-vec-2.c: New test.
> > 	* gcc.target/i386/avx512f-rint-sfix-vec-1.c: New test.
> > 	* gcc.target/i386/avx512f-rint-sfix-vec-2.c: New test.
> > 	* gcc.target/i386/avx512f-rintf-sfix-vec-1.c: New test.
> > 	* gcc.target/i386/avx512f-rintf-sfix-vec-2.c: New test.
> > 	* gcc.target/i386/avx512f-round-sfix-vec-1.c: New test.
> > 	* gcc.target/i386/avx512f-round-sfix-vec-2.c: New test.
> > 	* gcc.target/i386/avx512f-roundf-sfix-vec-1.c: New test.
> > 	* gcc.target/i386/avx512f-roundf-sfix-vec-2.c: New test.
> > 	* gcc.target/i386/avx512f-trunc-vec-1.c: New test.
> > 	* gcc.target/i386/avx512f-trunc-vec-2.c: New test.
> > 	* gcc.target/i386/avx512f-truncf-vec-1.c: New test.
> > 	* gcc.target/i386/avx512f-truncf-vec-2.c: New test.
> 
> Is it OK for gcc-6-branch?
OK.
> 
>   -- Ilya
--
Thanks, K

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2016-06-07 11:47 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-05-23 16:13 [PATCH, i386, AVX-512] Add vectorizer support builtins Ilya Verbin
2016-05-25 13:28 ` Kirill Yukhin
2016-06-02 14:20 ` Ilya Verbin
2016-06-07 11:47   ` Kirill Yukhin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).