* Reducing the amount of builtins by merging named patterns
@ 2014-12-10 14:05 Blumental Maxim
0 siblings, 0 replies; 4+ messages in thread
From: Blumental Maxim @ 2014-12-10 14:05 UTC (permalink / raw)
To: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 746 bytes --]
Hello everyone.
I'm working on reducing the amount of builtin's in i386. My approach
is to merge similar patterns into one with a fake argument which
determines which instruction to print.
We have ~30 groups of similar (i.e.having similar sets of attributes)
named patterns. These groups together include ~230 template (i.e.
having substitution attributes in their names) named patterns in
total. So, we can reduce the amount of template named patterns by ~200
at best. Those template named patterns correspond to several specific
named patterns each. E.g. in my patch (see attached patch) I merged
two template named patterns into one and that allowed me to replace
four builtin's with only two.
Should I continue to work in that direction?
[-- Attachment #2: named_patts_merge.patch --]
[-- Type: application/octet-stream, Size: 16087 bytes --]
commit 8eac830489e1751edaa804daaeb93181ca08f200
Author: Maxim Blumenthal <maxim.blumenthal@intel.com>
Date: Tue Nov 25 13:58:54 2014 +0300
Replaced four builtins for avx512er with two.
diff --git a/gcc/config/i386/avx512erintrin.h b/gcc/config/i386/avx512erintrin.h
index f6870a5..5a4fb40 100644
--- a/gcc/config/i386/avx512erintrin.h
+++ b/gcc/config/i386/avx512erintrin.h
@@ -108,7 +108,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rcp28_round_pd (__m512d __A, int __R)
{
__m512d __W;
- return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
+ return (__m512d) __builtin_ia32_vec_unary_pd_mask ((__v8df) __A, 1,
(__v8df) __W,
(__mmask8) -1, __R);
}
@@ -117,7 +117,7 @@ extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_rcp28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
{
- return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
+ return (__m512d) __builtin_ia32_vec_unary_pd_mask ((__v8df) __A, 1,
(__v8df) __W,
(__mmask8) __U, __R);
}
@@ -126,7 +126,7 @@ extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_rcp28_round_pd (__mmask8 __U, __m512d __A, int __R)
{
- return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
+ return (__m512d) __builtin_ia32_vec_unary_pd_mask ((__v8df) __A, 1,
(__v8df) _mm512_setzero_pd (),
(__mmask8) __U, __R);
}
@@ -136,7 +136,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rcp28_round_ps (__m512 __A, int __R)
{
__m512 __W;
- return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
+ return (__m512) __builtin_ia32_vec_unary_ps_mask ((__v16sf) __A, 1,
(__v16sf) __W,
(__mmask16) -1, __R);
}
@@ -145,7 +145,7 @@ extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_rcp28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
{
- return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
+ return (__m512) __builtin_ia32_vec_unary_ps_mask ((__v16sf) __A, 1,
(__v16sf) __W,
(__mmask16) __U, __R);
}
@@ -154,7 +154,7 @@ extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_rcp28_round_ps (__mmask16 __U, __m512 __A, int __R)
{
- return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
+ return (__m512) __builtin_ia32_vec_unary_ps_mask ((__v16sf) __A, 1,
(__v16sf) _mm512_setzero_ps (),
(__mmask16) __U, __R);
}
@@ -182,7 +182,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rsqrt28_round_pd (__m512d __A, int __R)
{
__m512d __W;
- return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
+ return (__m512d) __builtin_ia32_vec_unary_pd_mask ((__v8df) __A, 0,
(__v8df) __W,
(__mmask8) -1, __R);
}
@@ -191,7 +191,7 @@ extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_rsqrt28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
{
- return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
+ return (__m512d) __builtin_ia32_vec_unary_pd_mask ((__v8df) __A, 0,
(__v8df) __W,
(__mmask8) __U, __R);
}
@@ -200,7 +200,7 @@ extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_rsqrt28_round_pd (__mmask8 __U, __m512d __A, int __R)
{
- return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
+ return (__m512d) __builtin_ia32_vec_unary_pd_mask ((__v8df) __A, 0,
(__v8df) _mm512_setzero_pd (),
(__mmask8) __U, __R);
}
@@ -210,7 +210,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rsqrt28_round_ps (__m512 __A, int __R)
{
__m512 __W;
- return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
+ return (__m512) __builtin_ia32_vec_unary_ps_mask ((__v16sf) __A, 0,
(__v16sf) __W,
(__mmask16) -1, __R);
}
@@ -219,7 +219,7 @@ extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_rsqrt28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
{
- return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
+ return (__m512) __builtin_ia32_vec_unary_ps_mask ((__v16sf) __A, 0,
(__v16sf) __W,
(__mmask16) __U, __R);
}
@@ -228,7 +228,7 @@ extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_rsqrt28_round_ps (__mmask16 __U, __m512 __A, int __R)
{
- return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
+ return (__m512) __builtin_ia32_vec_unary_ps_mask ((__v16sf) __A, 0,
(__v16sf) _mm512_setzero_ps (),
(__mmask16) __U, __R);
}
@@ -271,40 +271,40 @@ _mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R)
__builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
#define _mm512_rcp28_round_pd(A, C) \
- __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
+ __builtin_ia32_vec_unary_pd_mask(A, 1, (__v8df)_mm512_setzero_pd(), -1, C)
#define _mm512_mask_rcp28_round_pd(W, U, A, C) \
- __builtin_ia32_rcp28pd_mask(A, W, U, C)
+ __builtin_ia32_vec_unary_pd_mask(A, 1, W, U, C)
#define _mm512_maskz_rcp28_round_pd(U, A, C) \
- __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
+ __builtin_ia32_vec_unary_pd_mask(A, 1, (__v8df)_mm512_setzero_pd(), U, C)
#define _mm512_rcp28_round_ps(A, C) \
- __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
+ __builtin_ia32_vec_unary_ps_mask(A, 1, (__v16sf)_mm512_setzero_ps(), -1, C)
#define _mm512_mask_rcp28_round_ps(W, U, A, C) \
- __builtin_ia32_rcp28ps_mask(A, W, U, C)
+ __builtin_ia32_vec_unary_ps_mask(A, 1, W, U, C)
#define _mm512_maskz_rcp28_round_ps(U, A, C) \
- __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
+ __builtin_ia32_vec_unary_ps_mask(A, 1, (__v16sf)_mm512_setzero_ps(), U, C)
#define _mm512_rsqrt28_round_pd(A, C) \
- __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
+ __builtin_ia32_vec_unary_pd_mask(A, 0, (__v8df)_mm512_setzero_pd(), -1, C)
#define _mm512_mask_rsqrt28_round_pd(W, U, A, C) \
- __builtin_ia32_rsqrt28pd_mask(A, W, U, C)
+ __builtin_ia32_vec_unary_pd_mask(A, 0, W, U, C)
#define _mm512_maskz_rsqrt28_round_pd(U, A, C) \
- __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
+ __builtin_ia32_vec_unary_pd_mask(A, 0, (__v8df)_mm512_setzero_pd(), U, C)
#define _mm512_rsqrt28_round_ps(A, C) \
- __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
+ __builtin_ia32_vec_unary_ps_mask(A, 0, (__v16sf)_mm512_setzero_ps(), -1, C)
#define _mm512_mask_rsqrt28_round_ps(W, U, A, C) \
- __builtin_ia32_rsqrt28ps_mask(A, W, U, C)
+ __builtin_ia32_vec_unary_ps_mask(A, 0, W, U, C)
#define _mm512_maskz_rsqrt28_round_ps(U, A, C) \
- __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
+ __builtin_ia32_vec_unary_ps_mask(A, 0, (__v16sf)_mm512_setzero_ps(), U, C)
#define _mm_rcp28_round_sd(A, B, R) \
__builtin_ia32_rcp28sd_round(A, B, R)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 211c9e6..ad3bb9b 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -30086,12 +30086,10 @@ enum ix86_builtins
IX86_BUILTIN_EXP2PD_MASK,
IX86_BUILTIN_EXP2PS_MASK,
IX86_BUILTIN_EXP2PS,
- IX86_BUILTIN_RCP28PD,
- IX86_BUILTIN_RCP28PS,
IX86_BUILTIN_RCP28SD,
IX86_BUILTIN_RCP28SS,
- IX86_BUILTIN_RSQRT28PD,
- IX86_BUILTIN_RSQRT28PS,
+ IX86_BUILTIN_VEC_UNARY_PD,
+ IX86_BUILTIN_VEC_UNARY_PS,
IX86_BUILTIN_RSQRT28SD,
IX86_BUILTIN_RSQRT28SS,
@@ -32965,12 +32963,10 @@ static const struct builtin_description bdesc_round_args[] =
/* AVX512ER */
{ OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
{ OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
- { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
- { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
{ OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
{ OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
- { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
- { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512ER, CODE_FOR_vec_unary_v8df_mask_round, "__builtin_ia32_vec_unary_pd_mask", IX86_BUILTIN_VEC_UNARY_PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512ER, CODE_FOR_vec_unary_v16sf_mask_round, "__builtin_ia32_vec_unary_ps_mask", IX86_BUILTIN_VEC_UNARY_PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
{ OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
{ OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index ca5d720..98830ef 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -119,6 +119,7 @@
UNSPEC_EXP2
UNSPEC_RCP28
UNSPEC_RSQRT28
+ UNSPEC_VEC_UNARY
;; For SHA support
UNSPEC_SHA1MSG1
@@ -15332,13 +15333,28 @@
(set_attr "type" "sse")
(set_attr "mode" "<MODE>")])
-(define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
+(define_c_enum "vec_unary" [RSQRT28
+ RCP28])
+
+(define_insn "<mask_codefor>vec_unary_<mode><mask_name><round_saeonly_name>"
[(set (match_operand:VF_512 0 "register_operand" "=v")
(unspec:VF_512
- [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
- UNSPEC_RCP28))]
+ [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+ (match_operand:SI 2 "const_0_to_1_operand" "")]
+ UNSPEC_VEC_UNARY))]
"TARGET_AVX512ER"
- "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
+ {
+ switch (INTVAL(operands[2]))
+ {
+ case RSQRT28:
+ return "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>}";
+ case RCP28:
+ return "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>}";
+ default:
+ gcc_unreachable ();
+ }
+
+ }
[(set_attr "prefix" "evex")
(set_attr "type" "sse")
(set_attr "mode" "<MODE>")])
@@ -15358,17 +15374,6 @@
(set_attr "type" "sse")
(set_attr "mode" "<MODE>")])
-(define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v")
- (unspec:VF_512
- [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
- UNSPEC_RSQRT28))]
- "TARGET_AVX512ER"
- "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
- [(set_attr "prefix" "evex")
- (set_attr "type" "sse")
- (set_attr "mode" "<MODE>")])
-
(define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
[(set (match_operand:VF_128 0 "register_operand" "=v")
(vec_merge:VF_128
diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c
index 0418d07..b9c7478 100644
--- a/gcc/testsuite/gcc.target/i386/avx-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx-1.c
@@ -349,10 +349,8 @@
/* avx512erintrin.h */
#define __builtin_ia32_exp2ps_mask(A, B, C, D) __builtin_ia32_exp2ps_mask(A, B, C, 8)
#define __builtin_ia32_exp2pd_mask(A, B, C, D) __builtin_ia32_exp2pd_mask(A, B, C, 8)
-#define __builtin_ia32_rcp28ps_mask(A, B, C, D) __builtin_ia32_rcp28ps_mask(A, B, C, 8)
-#define __builtin_ia32_rcp28pd_mask(A, B, C, D) __builtin_ia32_rcp28pd_mask(A, B, C, 8)
-#define __builtin_ia32_rsqrt28ps_mask(A, B, C, D) __builtin_ia32_rsqrt28ps_mask(A, B, C, 8)
-#define __builtin_ia32_rsqrt28pd_mask(A, B, C, D) __builtin_ia32_rsqrt28pd_mask(A, B, C, 8)
+#define __builtin_ia32_vec_unary_ps_mask(A, B, C, D, E) __builtin_ia32_vec_unary_ps_mask(A, B, C, D, 8)
+#define __builtin_ia32_vec_unary_pd_mask(A, B, C, D, E) __builtin_ia32_vec_unary_pd_mask(A, B, C, D, 8)
#define __builtin_ia32_rcp28ss_round(A, B, C) __builtin_ia32_rcp28ss_round(A, B, 8)
#define __builtin_ia32_rcp28sd_round(A, B, C) __builtin_ia32_rcp28sd_round(A, B, 8)
#define __builtin_ia32_rsqrt28ss_round(A, B, C) __builtin_ia32_rsqrt28ss_round(A, B, 8)
diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
index 104c63e..1c7ba56 100644
--- a/gcc/testsuite/gcc.target/i386/sse-13.c
+++ b/gcc/testsuite/gcc.target/i386/sse-13.c
@@ -64,10 +64,8 @@
/* avx512erintrin.h */
#define __builtin_ia32_exp2pd_mask(A, B, C, D) __builtin_ia32_exp2pd_mask (A, B, C, 1)
#define __builtin_ia32_exp2ps_mask(A, B, C, D) __builtin_ia32_exp2ps_mask (A, B, C, 1)
-#define __builtin_ia32_rcp28pd_mask(A, B, C, D) __builtin_ia32_rcp28pd_mask (A, B, C, 1)
-#define __builtin_ia32_rcp28ps_mask(A, B, C, D) __builtin_ia32_rcp28ps_mask (A, B, C, 1)
-#define __builtin_ia32_rsqrt28pd_mask(A, B, C, D) __builtin_ia32_rsqrt28pd_mask (A, B, C, 1)
-#define __builtin_ia32_rsqrt28ps_mask(A, B, C, D) __builtin_ia32_rsqrt28ps_mask (A, B, C, 1)
+#define __builtin_ia32_vec_unary_pd_mask(A, B, C, D, E) __builtin_ia32_vec_unary_pd_mask (A, B, C, D, 1)
+#define __builtin_ia32_vec_unary_ps_mask(A, B, C, D, E) __builtin_ia32_vec_unary_ps_mask (A, B, C, D, 1)
/* wmmintrin.h */
#define __builtin_ia32_aeskeygenassist128(X, C) __builtin_ia32_aeskeygenassist128(X, 1)
diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
index 9f81a8a..6439dc9 100644
--- a/gcc/testsuite/gcc.target/i386/sse-23.c
+++ b/gcc/testsuite/gcc.target/i386/sse-23.c
@@ -374,10 +374,8 @@
/* avx512erintrin.h */
#define __builtin_ia32_exp2pd_mask(A, B, C, D) __builtin_ia32_exp2pd_mask (A, B, C, 8)
#define __builtin_ia32_exp2ps_mask(A, B, C, D) __builtin_ia32_exp2ps_mask (A, B, C, 8)
-#define __builtin_ia32_rcp28pd_mask(A, B, C, D) __builtin_ia32_rcp28pd_mask (A, B, C, 8)
-#define __builtin_ia32_rcp28ps_mask(A, B, C, D) __builtin_ia32_rcp28ps_mask (A, B, C, 8)
-#define __builtin_ia32_rsqrt28pd_mask(A, B, C, D) __builtin_ia32_rsqrt28pd_mask (A, B, C, 8)
-#define __builtin_ia32_rsqrt28ps_mask(A, B, C, D) __builtin_ia32_rsqrt28ps_mask (A, B, C, 8)
+#define __builtin_ia32_vec_unary_ps_mask(A, B, C, D, E) __builtin_ia32_vec_unary_ps_mask (A, B, C, D, 8)
+#define __builtin_ia32_vec_unary_pd_mask(A, B, C, D, E) __builtin_ia32_vec_unary_pd_mask (A, B, C, D, 8)
#define __builtin_ia32_rcp28sd_round(A, B, C) __builtin_ia32_rcp28sd_round(A, B, 8)
#define __builtin_ia32_rcp28ss_round(A, B, C) __builtin_ia32_rcp28ss_round(A, B, 8)
#define __builtin_ia32_rsqrt28sd_round(A, B, C) __builtin_ia32_rsqrt28sd_round(A, B, 8)
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: Reducing the amount of builtins by merging named patterns
2014-12-10 16:16 ` Richard Henderson
@ 2014-12-10 23:54 ` Andi Kleen
0 siblings, 0 replies; 4+ messages in thread
From: Andi Kleen @ 2014-12-10 23:54 UTC (permalink / raw)
To: Richard Henderson; +Cc: Blumental Maxim, gcc-patches
Richard Henderson <rth@redhat.com> writes:
> On 12/10/2014 06:08 AM, Blumental Maxim wrote:
>> We have ~30 groups of similar (i.e.having similar sets of attributes)
>> named patterns. These groups together include ~230 template (i.e.
>> having substitution attributes in their names) named patterns in
>> total. So, we can reduce the amount of template named patterns by ~200
>> at best. Those template named patterns correspond to several specific
>> named patterns each. E.g. in my patch (see attached patch) I merged
>> two template named patterns into one and that allowed me to replace
>> four builtin's with only two.
>
> I don't find this particularly readable or maintainable.
> What do you hope to gain here?
I assume it would make the compiler faster and maybe smaller?
But also users may be using the existing builtin names,
so some compat macros would be needed.
-Andi
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: Reducing the amount of builtins by merging named patterns
2014-12-10 14:09 Blumental Maxim
@ 2014-12-10 16:16 ` Richard Henderson
2014-12-10 23:54 ` Andi Kleen
0 siblings, 1 reply; 4+ messages in thread
From: Richard Henderson @ 2014-12-10 16:16 UTC (permalink / raw)
To: Blumental Maxim, gcc-patches
On 12/10/2014 06:08 AM, Blumental Maxim wrote:
> We have ~30 groups of similar (i.e.having similar sets of attributes)
> named patterns. These groups together include ~230 template (i.e.
> having substitution attributes in their names) named patterns in
> total. So, we can reduce the amount of template named patterns by ~200
> at best. Those template named patterns correspond to several specific
> named patterns each. E.g. in my patch (see attached patch) I merged
> two template named patterns into one and that allowed me to replace
> four builtin's with only two.
I don't find this particularly readable or maintainable.
What do you hope to gain here?
r~
^ permalink raw reply [flat|nested] 4+ messages in thread
* Reducing the amount of builtins by merging named patterns
@ 2014-12-10 14:09 Blumental Maxim
2014-12-10 16:16 ` Richard Henderson
0 siblings, 1 reply; 4+ messages in thread
From: Blumental Maxim @ 2014-12-10 14:09 UTC (permalink / raw)
To: gcc-patches
[-- Attachment #1: Type: text/plain, Size: 746 bytes --]
Hello everyone.
I'm working on reducing the amount of builtin's in i386. My approach
is to merge similar patterns into one with a fake argument which
determines which instruction to print.
We have ~30 groups of similar (i.e.having similar sets of attributes)
named patterns. These groups together include ~230 template (i.e.
having substitution attributes in their names) named patterns in
total. So, we can reduce the amount of template named patterns by ~200
at best. Those template named patterns correspond to several specific
named patterns each. E.g. in my patch (see attached patch) I merged
two template named patterns into one and that allowed me to replace
four builtin's with only two.
Should I continue to work in that direction?
[-- Attachment #2: named_patts_merge.patch --]
[-- Type: application/octet-stream, Size: 16087 bytes --]
commit 8eac830489e1751edaa804daaeb93181ca08f200
Author: Maxim Blumenthal <maxim.blumenthal@intel.com>
Date: Tue Nov 25 13:58:54 2014 +0300
Replaced four builtins for avx512er with two.
diff --git a/gcc/config/i386/avx512erintrin.h b/gcc/config/i386/avx512erintrin.h
index f6870a5..5a4fb40 100644
--- a/gcc/config/i386/avx512erintrin.h
+++ b/gcc/config/i386/avx512erintrin.h
@@ -108,7 +108,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rcp28_round_pd (__m512d __A, int __R)
{
__m512d __W;
- return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
+ return (__m512d) __builtin_ia32_vec_unary_pd_mask ((__v8df) __A, 1,
(__v8df) __W,
(__mmask8) -1, __R);
}
@@ -117,7 +117,7 @@ extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_rcp28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
{
- return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
+ return (__m512d) __builtin_ia32_vec_unary_pd_mask ((__v8df) __A, 1,
(__v8df) __W,
(__mmask8) __U, __R);
}
@@ -126,7 +126,7 @@ extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_rcp28_round_pd (__mmask8 __U, __m512d __A, int __R)
{
- return (__m512d) __builtin_ia32_rcp28pd_mask ((__v8df) __A,
+ return (__m512d) __builtin_ia32_vec_unary_pd_mask ((__v8df) __A, 1,
(__v8df) _mm512_setzero_pd (),
(__mmask8) __U, __R);
}
@@ -136,7 +136,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rcp28_round_ps (__m512 __A, int __R)
{
__m512 __W;
- return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
+ return (__m512) __builtin_ia32_vec_unary_ps_mask ((__v16sf) __A, 1,
(__v16sf) __W,
(__mmask16) -1, __R);
}
@@ -145,7 +145,7 @@ extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_rcp28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
{
- return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
+ return (__m512) __builtin_ia32_vec_unary_ps_mask ((__v16sf) __A, 1,
(__v16sf) __W,
(__mmask16) __U, __R);
}
@@ -154,7 +154,7 @@ extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_rcp28_round_ps (__mmask16 __U, __m512 __A, int __R)
{
- return (__m512) __builtin_ia32_rcp28ps_mask ((__v16sf) __A,
+ return (__m512) __builtin_ia32_vec_unary_ps_mask ((__v16sf) __A, 1,
(__v16sf) _mm512_setzero_ps (),
(__mmask16) __U, __R);
}
@@ -182,7 +182,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rsqrt28_round_pd (__m512d __A, int __R)
{
__m512d __W;
- return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
+ return (__m512d) __builtin_ia32_vec_unary_pd_mask ((__v8df) __A, 0,
(__v8df) __W,
(__mmask8) -1, __R);
}
@@ -191,7 +191,7 @@ extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_rsqrt28_round_pd (__m512d __W, __mmask8 __U, __m512d __A, int __R)
{
- return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
+ return (__m512d) __builtin_ia32_vec_unary_pd_mask ((__v8df) __A, 0,
(__v8df) __W,
(__mmask8) __U, __R);
}
@@ -200,7 +200,7 @@ extern __inline __m512d
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_rsqrt28_round_pd (__mmask8 __U, __m512d __A, int __R)
{
- return (__m512d) __builtin_ia32_rsqrt28pd_mask ((__v8df) __A,
+ return (__m512d) __builtin_ia32_vec_unary_pd_mask ((__v8df) __A, 0,
(__v8df) _mm512_setzero_pd (),
(__mmask8) __U, __R);
}
@@ -210,7 +210,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_rsqrt28_round_ps (__m512 __A, int __R)
{
__m512 __W;
- return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
+ return (__m512) __builtin_ia32_vec_unary_ps_mask ((__v16sf) __A, 0,
(__v16sf) __W,
(__mmask16) -1, __R);
}
@@ -219,7 +219,7 @@ extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_mask_rsqrt28_round_ps (__m512 __W, __mmask16 __U, __m512 __A, int __R)
{
- return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
+ return (__m512) __builtin_ia32_vec_unary_ps_mask ((__v16sf) __A, 0,
(__v16sf) __W,
(__mmask16) __U, __R);
}
@@ -228,7 +228,7 @@ extern __inline __m512
__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
_mm512_maskz_rsqrt28_round_ps (__mmask16 __U, __m512 __A, int __R)
{
- return (__m512) __builtin_ia32_rsqrt28ps_mask ((__v16sf) __A,
+ return (__m512) __builtin_ia32_vec_unary_ps_mask ((__v16sf) __A, 0,
(__v16sf) _mm512_setzero_ps (),
(__mmask16) __U, __R);
}
@@ -271,40 +271,40 @@ _mm_rsqrt28_round_ss (__m128 __A, __m128 __B, int __R)
__builtin_ia32_exp2ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
#define _mm512_rcp28_round_pd(A, C) \
- __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
+ __builtin_ia32_vec_unary_pd_mask(A, 1, (__v8df)_mm512_setzero_pd(), -1, C)
#define _mm512_mask_rcp28_round_pd(W, U, A, C) \
- __builtin_ia32_rcp28pd_mask(A, W, U, C)
+ __builtin_ia32_vec_unary_pd_mask(A, 1, W, U, C)
#define _mm512_maskz_rcp28_round_pd(U, A, C) \
- __builtin_ia32_rcp28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
+ __builtin_ia32_vec_unary_pd_mask(A, 1, (__v8df)_mm512_setzero_pd(), U, C)
#define _mm512_rcp28_round_ps(A, C) \
- __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
+ __builtin_ia32_vec_unary_ps_mask(A, 1, (__v16sf)_mm512_setzero_ps(), -1, C)
#define _mm512_mask_rcp28_round_ps(W, U, A, C) \
- __builtin_ia32_rcp28ps_mask(A, W, U, C)
+ __builtin_ia32_vec_unary_ps_mask(A, 1, W, U, C)
#define _mm512_maskz_rcp28_round_ps(U, A, C) \
- __builtin_ia32_rcp28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
+ __builtin_ia32_vec_unary_ps_mask(A, 1, (__v16sf)_mm512_setzero_ps(), U, C)
#define _mm512_rsqrt28_round_pd(A, C) \
- __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), -1, C)
+ __builtin_ia32_vec_unary_pd_mask(A, 0, (__v8df)_mm512_setzero_pd(), -1, C)
#define _mm512_mask_rsqrt28_round_pd(W, U, A, C) \
- __builtin_ia32_rsqrt28pd_mask(A, W, U, C)
+ __builtin_ia32_vec_unary_pd_mask(A, 0, W, U, C)
#define _mm512_maskz_rsqrt28_round_pd(U, A, C) \
- __builtin_ia32_rsqrt28pd_mask(A, (__v8df)_mm512_setzero_pd(), U, C)
+ __builtin_ia32_vec_unary_pd_mask(A, 0, (__v8df)_mm512_setzero_pd(), U, C)
#define _mm512_rsqrt28_round_ps(A, C) \
- __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), -1, C)
+ __builtin_ia32_vec_unary_ps_mask(A, 0, (__v16sf)_mm512_setzero_ps(), -1, C)
#define _mm512_mask_rsqrt28_round_ps(W, U, A, C) \
- __builtin_ia32_rsqrt28ps_mask(A, W, U, C)
+ __builtin_ia32_vec_unary_ps_mask(A, 0, W, U, C)
#define _mm512_maskz_rsqrt28_round_ps(U, A, C) \
- __builtin_ia32_rsqrt28ps_mask(A, (__v16sf)_mm512_setzero_ps(), U, C)
+ __builtin_ia32_vec_unary_ps_mask(A, 0, (__v16sf)_mm512_setzero_ps(), U, C)
#define _mm_rcp28_round_sd(A, B, R) \
__builtin_ia32_rcp28sd_round(A, B, R)
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 211c9e6..ad3bb9b 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -30086,12 +30086,10 @@ enum ix86_builtins
IX86_BUILTIN_EXP2PD_MASK,
IX86_BUILTIN_EXP2PS_MASK,
IX86_BUILTIN_EXP2PS,
- IX86_BUILTIN_RCP28PD,
- IX86_BUILTIN_RCP28PS,
IX86_BUILTIN_RCP28SD,
IX86_BUILTIN_RCP28SS,
- IX86_BUILTIN_RSQRT28PD,
- IX86_BUILTIN_RSQRT28PS,
+ IX86_BUILTIN_VEC_UNARY_PD,
+ IX86_BUILTIN_VEC_UNARY_PS,
IX86_BUILTIN_RSQRT28SD,
IX86_BUILTIN_RSQRT28SS,
@@ -32965,12 +32963,10 @@ static const struct builtin_description bdesc_round_args[] =
/* AVX512ER */
{ OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v8df_mask_round, "__builtin_ia32_exp2pd_mask", IX86_BUILTIN_EXP2PD_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
{ OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_exp2v16sf_mask_round, "__builtin_ia32_exp2ps_mask", IX86_BUILTIN_EXP2PS_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
- { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v8df_mask_round, "__builtin_ia32_rcp28pd_mask", IX86_BUILTIN_RCP28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
- { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rcp28v16sf_mask_round, "__builtin_ia32_rcp28ps_mask", IX86_BUILTIN_RCP28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
{ OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v2df_round, "__builtin_ia32_rcp28sd_round", IX86_BUILTIN_RCP28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
{ OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrcp28v4sf_round, "__builtin_ia32_rcp28ss_round", IX86_BUILTIN_RCP28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
- { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v8df_mask_round, "__builtin_ia32_rsqrt28pd_mask", IX86_BUILTIN_RSQRT28PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT },
- { OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_rsqrt28v16sf_mask_round, "__builtin_ia32_rsqrt28ps_mask", IX86_BUILTIN_RSQRT28PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT },
+ { OPTION_MASK_ISA_AVX512ER, CODE_FOR_vec_unary_v8df_mask_round, "__builtin_ia32_vec_unary_pd_mask", IX86_BUILTIN_VEC_UNARY_PD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT },
+ { OPTION_MASK_ISA_AVX512ER, CODE_FOR_vec_unary_v16sf_mask_round, "__builtin_ia32_vec_unary_ps_mask", IX86_BUILTIN_VEC_UNARY_PS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT },
{ OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v2df_round, "__builtin_ia32_rsqrt28sd_round", IX86_BUILTIN_RSQRT28SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
{ OPTION_MASK_ISA_AVX512ER, CODE_FOR_avx512er_vmrsqrt28v4sf_round, "__builtin_ia32_rsqrt28ss_round", IX86_BUILTIN_RSQRT28SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index ca5d720..98830ef 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -119,6 +119,7 @@
UNSPEC_EXP2
UNSPEC_RCP28
UNSPEC_RSQRT28
+ UNSPEC_VEC_UNARY
;; For SHA support
UNSPEC_SHA1MSG1
@@ -15332,13 +15333,28 @@
(set_attr "type" "sse")
(set_attr "mode" "<MODE>")])
-(define_insn "<mask_codefor>avx512er_rcp28<mode><mask_name><round_saeonly_name>"
+(define_c_enum "vec_unary" [RSQRT28
+ RCP28])
+
+(define_insn "<mask_codefor>vec_unary_<mode><mask_name><round_saeonly_name>"
[(set (match_operand:VF_512 0 "register_operand" "=v")
(unspec:VF_512
- [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
- UNSPEC_RCP28))]
+ [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+ (match_operand:SI 2 "const_0_to_1_operand" "")]
+ UNSPEC_VEC_UNARY))]
"TARGET_AVX512ER"
- "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
+ {
+ switch (INTVAL(operands[2]))
+ {
+ case RSQRT28:
+ return "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>}";
+ case RCP28:
+ return "vrcp28<ssemodesuffix>\t{<round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>}";
+ default:
+ gcc_unreachable ();
+ }
+
+ }
[(set_attr "prefix" "evex")
(set_attr "type" "sse")
(set_attr "mode" "<MODE>")])
@@ -15358,17 +15374,6 @@
(set_attr "type" "sse")
(set_attr "mode" "<MODE>")])
-(define_insn "<mask_codefor>avx512er_rsqrt28<mode><mask_name><round_saeonly_name>"
- [(set (match_operand:VF_512 0 "register_operand" "=v")
- (unspec:VF_512
- [(match_operand:VF_512 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
- UNSPEC_RSQRT28))]
- "TARGET_AVX512ER"
- "vrsqrt28<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
- [(set_attr "prefix" "evex")
- (set_attr "type" "sse")
- (set_attr "mode" "<MODE>")])
-
(define_insn "avx512er_vmrsqrt28<mode><round_saeonly_name>"
[(set (match_operand:VF_128 0 "register_operand" "=v")
(vec_merge:VF_128
diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c
index 0418d07..b9c7478 100644
--- a/gcc/testsuite/gcc.target/i386/avx-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx-1.c
@@ -349,10 +349,8 @@
/* avx512erintrin.h */
#define __builtin_ia32_exp2ps_mask(A, B, C, D) __builtin_ia32_exp2ps_mask(A, B, C, 8)
#define __builtin_ia32_exp2pd_mask(A, B, C, D) __builtin_ia32_exp2pd_mask(A, B, C, 8)
-#define __builtin_ia32_rcp28ps_mask(A, B, C, D) __builtin_ia32_rcp28ps_mask(A, B, C, 8)
-#define __builtin_ia32_rcp28pd_mask(A, B, C, D) __builtin_ia32_rcp28pd_mask(A, B, C, 8)
-#define __builtin_ia32_rsqrt28ps_mask(A, B, C, D) __builtin_ia32_rsqrt28ps_mask(A, B, C, 8)
-#define __builtin_ia32_rsqrt28pd_mask(A, B, C, D) __builtin_ia32_rsqrt28pd_mask(A, B, C, 8)
+#define __builtin_ia32_vec_unary_ps_mask(A, B, C, D, E) __builtin_ia32_vec_unary_ps_mask(A, B, C, D, 8)
+#define __builtin_ia32_vec_unary_pd_mask(A, B, C, D, E) __builtin_ia32_vec_unary_pd_mask(A, B, C, D, 8)
#define __builtin_ia32_rcp28ss_round(A, B, C) __builtin_ia32_rcp28ss_round(A, B, 8)
#define __builtin_ia32_rcp28sd_round(A, B, C) __builtin_ia32_rcp28sd_round(A, B, 8)
#define __builtin_ia32_rsqrt28ss_round(A, B, C) __builtin_ia32_rsqrt28ss_round(A, B, 8)
diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c
index 104c63e..1c7ba56 100644
--- a/gcc/testsuite/gcc.target/i386/sse-13.c
+++ b/gcc/testsuite/gcc.target/i386/sse-13.c
@@ -64,10 +64,8 @@
/* avx512erintrin.h */
#define __builtin_ia32_exp2pd_mask(A, B, C, D) __builtin_ia32_exp2pd_mask (A, B, C, 1)
#define __builtin_ia32_exp2ps_mask(A, B, C, D) __builtin_ia32_exp2ps_mask (A, B, C, 1)
-#define __builtin_ia32_rcp28pd_mask(A, B, C, D) __builtin_ia32_rcp28pd_mask (A, B, C, 1)
-#define __builtin_ia32_rcp28ps_mask(A, B, C, D) __builtin_ia32_rcp28ps_mask (A, B, C, 1)
-#define __builtin_ia32_rsqrt28pd_mask(A, B, C, D) __builtin_ia32_rsqrt28pd_mask (A, B, C, 1)
-#define __builtin_ia32_rsqrt28ps_mask(A, B, C, D) __builtin_ia32_rsqrt28ps_mask (A, B, C, 1)
+#define __builtin_ia32_vec_unary_pd_mask(A, B, C, D, E) __builtin_ia32_vec_unary_pd_mask (A, B, C, D, 1)
+#define __builtin_ia32_vec_unary_ps_mask(A, B, C, D, E) __builtin_ia32_vec_unary_ps_mask (A, B, C, D, 1)
/* wmmintrin.h */
#define __builtin_ia32_aeskeygenassist128(X, C) __builtin_ia32_aeskeygenassist128(X, 1)
diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c
index 9f81a8a..6439dc9 100644
--- a/gcc/testsuite/gcc.target/i386/sse-23.c
+++ b/gcc/testsuite/gcc.target/i386/sse-23.c
@@ -374,10 +374,8 @@
/* avx512erintrin.h */
#define __builtin_ia32_exp2pd_mask(A, B, C, D) __builtin_ia32_exp2pd_mask (A, B, C, 8)
#define __builtin_ia32_exp2ps_mask(A, B, C, D) __builtin_ia32_exp2ps_mask (A, B, C, 8)
-#define __builtin_ia32_rcp28pd_mask(A, B, C, D) __builtin_ia32_rcp28pd_mask (A, B, C, 8)
-#define __builtin_ia32_rcp28ps_mask(A, B, C, D) __builtin_ia32_rcp28ps_mask (A, B, C, 8)
-#define __builtin_ia32_rsqrt28pd_mask(A, B, C, D) __builtin_ia32_rsqrt28pd_mask (A, B, C, 8)
-#define __builtin_ia32_rsqrt28ps_mask(A, B, C, D) __builtin_ia32_rsqrt28ps_mask (A, B, C, 8)
+#define __builtin_ia32_vec_unary_ps_mask(A, B, C, D, E) __builtin_ia32_vec_unary_ps_mask (A, B, C, D, 8)
+#define __builtin_ia32_vec_unary_pd_mask(A, B, C, D, E) __builtin_ia32_vec_unary_pd_mask (A, B, C, D, 8)
#define __builtin_ia32_rcp28sd_round(A, B, C) __builtin_ia32_rcp28sd_round(A, B, 8)
#define __builtin_ia32_rcp28ss_round(A, B, C) __builtin_ia32_rcp28ss_round(A, B, 8)
#define __builtin_ia32_rsqrt28sd_round(A, B, C) __builtin_ia32_rsqrt28sd_round(A, B, 8)
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2014-12-10 23:54 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-12-10 14:05 Reducing the amount of builtins by merging named patterns Blumental Maxim
2014-12-10 14:09 Blumental Maxim
2014-12-10 16:16 ` Richard Henderson
2014-12-10 23:54 ` Andi Kleen
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).