public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r14-104] i386: Share AES xmm intrin with VAES
@ 2023-04-20 1:34 Haochen Jiang
0 siblings, 0 replies; only message in thread
From: Haochen Jiang @ 2023-04-20 1:34 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:24a8acc1662c37003a7b54814bf840019fec2190
commit r14-104-g24a8acc1662c37003a7b54814bf840019fec2190
Author: Haochen Jiang <haochen.jiang@intel.com>
Date: Fri Mar 10 13:40:09 2023 +0800
i386: Share AES xmm intrin with VAES
Currently in GCC, the 128 bit intrin for instruction vaes{end,dec}{last,}
is under AES ISA. Because there is no dependency between ISA set AES
and VAES, The 128 bit intrin is not available when we use compiler flag
-mvaes -mavx512vl and there is no other way to use that intrin. But it
should according to Intel SDM.
Although VAES aims to be a VEX/EVEX promotion for AES, but it is only part
of it. Therefore, we share the AES xmm intrin with VAES.
Also, since -mvaes indicates that we could use VEX encoding for ymm, we
should imply AVX for VAES.
gcc/ChangeLog:
* common/config/i386/i386-common.cc
(OPTION_MASK_ISA2_AVX_UNSET): Add OPTION_MASK_ISA2_VAES_UNSET.
(ix86_handle_option): Set AVX flag for VAES.
* config/i386/i386-builtins.cc (ix86_init_mmx_sse_builtins):
Add OPTION_MASK_ISA2_VAES_UNSET.
(def_builtin): Share builtin between AES and VAES.
* config/i386/i386-expand.cc (ix86_check_builtin_isa_match):
Ditto.
* config/i386/i386.md (aes): New isa attribute.
* config/i386/sse.md (aesenc): Add pattern for VAES with xmm.
(aesenclast): Ditto.
(aesdec): Ditto.
(aesdeclast): Ditto.
* config/i386/vaesintrin.h: Remove redundant avx target push.
* config/i386/wmmintrin.h (_mm_aesdec_si128): Change to macro.
(_mm_aesdeclast_si128): Ditto.
(_mm_aesenc_si128): Ditto.
(_mm_aesenclast_si128): Ditto.
gcc/testsuite/ChangeLog:
* gcc.target/i386/avx512fvl-vaes-1.c: Add VAES xmm test.
* gcc.target/i386/pr109117-1.c: Modify error message.
Diff:
---
gcc/common/config/i386/i386-common.cc | 5 +-
gcc/config/i386/i386-builtins.cc | 21 +++++----
gcc/config/i386/i386-expand.cc | 1 +
gcc/config/i386/i386.md | 3 +-
gcc/config/i386/sse.md | 60 +++++++++++++-----------
gcc/config/i386/vaesintrin.h | 4 +-
gcc/config/i386/wmmintrin.h | 29 ++++--------
gcc/testsuite/gcc.target/i386/avx512fvl-vaes-1.c | 11 +++++
gcc/testsuite/gcc.target/i386/pr109117-1.c | 4 +-
9 files changed, 75 insertions(+), 63 deletions(-)
diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc
index c7954da8e34..bf126f14073 100644
--- a/gcc/common/config/i386/i386-common.cc
+++ b/gcc/common/config/i386/i386-common.cc
@@ -348,7 +348,8 @@ along with GCC; see the file COPYING3. If not see
| OPTION_MASK_ISA2_AVX512VP2INTERSECT_UNSET)
#define OPTION_MASK_ISA2_GENERAL_REGS_ONLY_UNSET \
OPTION_MASK_ISA2_SSE_UNSET
-#define OPTION_MASK_ISA2_AVX_UNSET OPTION_MASK_ISA2_AVX2_UNSET
+#define OPTION_MASK_ISA2_AVX_UNSET \
+ (OPTION_MASK_ISA2_AVX2_UNSET | OPTION_MASK_ISA2_VAES_UNSET)
#define OPTION_MASK_ISA2_SSE4_2_UNSET OPTION_MASK_ISA2_AVX_UNSET
#define OPTION_MASK_ISA2_SSE4_1_UNSET OPTION_MASK_ISA2_SSE4_2_UNSET
#define OPTION_MASK_ISA2_SSE4_UNSET OPTION_MASK_ISA2_SSE4_1_UNSET
@@ -685,6 +686,8 @@ ix86_handle_option (struct gcc_options *opts,
{
opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_VAES_SET;
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_VAES_SET;
+ opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
+ opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
}
else
{
diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc
index fc0c82b156e..28f404da288 100644
--- a/gcc/config/i386/i386-builtins.cc
+++ b/gcc/config/i386/i386-builtins.cc
@@ -279,14 +279,15 @@ def_builtin (HOST_WIDE_INT mask, HOST_WIDE_INT mask2,
if (((mask2 == 0 || (mask2 & ix86_isa_flags2) != 0)
&& (mask == 0 || (mask & ix86_isa_flags) != 0))
|| ((mask & OPTION_MASK_ISA_MMX) != 0 && TARGET_MMX_WITH_SSE)
- /* "Unified" builtin used by either AVXVNNI/AVXIFMA intrinsics
- or AVX512VNNIVL/AVX512IFMAVL non-mask intrinsics should be
- defined whenever avxvnni/avxifma or avx512vnni/avxifma &&
- avx512vl exist. */
+ /* "Unified" builtin used by either AVXVNNI/AVXIFMA/AES intrinsics
+ or AVX512VNNIVL/AVX512IFMAVL/VAESVL non-mask intrinsics should be
+ defined whenever avxvnni/avxifma/aes or avx512vnni/avx512ifma/vaes
+ && avx512vl exist. */
|| (mask2 == OPTION_MASK_ISA2_AVXVNNI)
|| (mask2 == OPTION_MASK_ISA2_AVXIFMA)
|| (mask2 == (OPTION_MASK_ISA2_AVXNECONVERT
| OPTION_MASK_ISA2_AVX512BF16))
+ || ((mask2 & OPTION_MASK_ISA2_VAES) != 0)
|| (lang_hooks.builtin_function
== lang_hooks.builtin_function_ext_scope))
{
@@ -661,16 +662,20 @@ ix86_init_mmx_sse_builtins (void)
VOID_FTYPE_UNSIGNED_UNSIGNED, IX86_BUILTIN_MWAIT);
/* AES */
- def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0,
+ def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2,
+ OPTION_MASK_ISA2_VAES,
"__builtin_ia32_aesenc128",
V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENC128);
- def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0,
+ def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2,
+ OPTION_MASK_ISA2_VAES,
"__builtin_ia32_aesenclast128",
V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESENCLAST128);
- def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0,
+ def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2,
+ OPTION_MASK_ISA2_VAES,
"__builtin_ia32_aesdec128",
V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDEC128);
- def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0,
+ def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2,
+ OPTION_MASK_ISA2_VAES,
"__builtin_ia32_aesdeclast128",
V2DI_FTYPE_V2DI_V2DI, IX86_BUILTIN_AESDECLAST128);
def_builtin_const (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2, 0,
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index f692ddc02fa..634fe61ba79 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -12624,6 +12624,7 @@ ix86_check_builtin_isa_match (unsigned int fcode,
OPTION_MASK_ISA2_AVXIFMA);
SHARE_BUILTIN (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, 0,
OPTION_MASK_ISA2_AVXNECONVERT);
+ SHARE_BUILTIN (OPTION_MASK_ISA_AES, 0, 0, OPTION_MASK_ISA2_VAES);
isa = tmp_isa;
isa2 = tmp_isa2;
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 8afa400bcf3..f8698ea903e 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -837,7 +837,7 @@
;; Used to control the "enabled" attribute on a per-instruction basis.
(define_attr "isa" "base,x64,nox64,x64_sse2,x64_sse4,x64_sse4_noavx,
- x64_avx,x64_avx512bw,x64_avx512dq,
+ x64_avx,x64_avx512bw,x64_avx512dq,aes,
sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx,
avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,
avx512bw,noavx512bw,avx512dq,noavx512dq,fma_or_avx512vl,
@@ -864,6 +864,7 @@
(symbol_ref "TARGET_64BIT && TARGET_AVX512BW")
(eq_attr "isa" "x64_avx512dq")
(symbol_ref "TARGET_64BIT && TARGET_AVX512DQ")
+ (eq_attr "isa" "aes") (symbol_ref "TARGET_AES")
(eq_attr "isa" "sse_noavx")
(symbol_ref "TARGET_SSE && !TARGET_AVX")
(eq_attr "isa" "sse2") (symbol_ref "TARGET_SSE2")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 5594ea621a8..f14a9c24ebd 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -25108,67 +25108,71 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn "aesenc"
- [(set (match_operand:V2DI 0 "register_operand" "=x,x")
- (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
- (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
+ [(set (match_operand:V2DI 0 "register_operand" "=x,x,v")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v")
+ (match_operand:V2DI 2 "vector_operand" "xBm,xm,vm")]
UNSPEC_AESENC))]
- "TARGET_AES"
+ "TARGET_AES || (TARGET_VAES && TARGET_AVX512VL)"
"@
aesenc\t{%2, %0|%0, %2}
+ vaesenc\t{%2, %1, %0|%0, %1, %2}
vaesenc\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,aes,avx512vl")
(set_attr "type" "sselog1")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,vex")
- (set_attr "btver2_decode" "double,double")
+ (set_attr "prefix" "orig,vex,evex")
+ (set_attr "btver2_decode" "double,double,double")
(set_attr "mode" "TI")])
(define_insn "aesenclast"
- [(set (match_operand:V2DI 0 "register_operand" "=x,x")
- (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
- (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
+ [(set (match_operand:V2DI 0 "register_operand" "=x,x,v")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v")
+ (match_operand:V2DI 2 "vector_operand" "xBm,xm,vm")]
UNSPEC_AESENCLAST))]
- "TARGET_AES"
+ "TARGET_AES || (TARGET_VAES && TARGET_AVX512VL)"
"@
aesenclast\t{%2, %0|%0, %2}
+ vaesenclast\t{%2, %1, %0|%0, %1, %2}
vaesenclast\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,aes,avx512vl")
(set_attr "type" "sselog1")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,vex")
- (set_attr "btver2_decode" "double,double")
+ (set_attr "prefix" "orig,vex,evex")
+ (set_attr "btver2_decode" "double,double,double")
(set_attr "mode" "TI")])
(define_insn "aesdec"
- [(set (match_operand:V2DI 0 "register_operand" "=x,x")
- (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
- (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
+ [(set (match_operand:V2DI 0 "register_operand" "=x,x,v")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v")
+ (match_operand:V2DI 2 "vector_operand" "xBm,xm,vm")]
UNSPEC_AESDEC))]
- "TARGET_AES"
+ "TARGET_AES || (TARGET_VAES && TARGET_AVX512VL)"
"@
aesdec\t{%2, %0|%0, %2}
+ vaesdec\t{%2, %1, %0|%0, %1, %2}
vaesdec\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,aes,avx512vl")
(set_attr "type" "sselog1")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,vex")
- (set_attr "btver2_decode" "double,double")
+ (set_attr "prefix" "orig,vex,evex")
+ (set_attr "btver2_decode" "double,double,double")
(set_attr "mode" "TI")])
(define_insn "aesdeclast"
- [(set (match_operand:V2DI 0 "register_operand" "=x,x")
- (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
- (match_operand:V2DI 2 "vector_operand" "xBm,xm")]
+ [(set (match_operand:V2DI 0 "register_operand" "=x,x,v")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v")
+ (match_operand:V2DI 2 "vector_operand" "xBm,xm,vm")]
UNSPEC_AESDECLAST))]
- "TARGET_AES"
+ "TARGET_AES || (TARGET_VAES && TARGET_AVX512VL)"
"@
aesdeclast\t{%2, %0|%0, %2}
+ vaesdeclast\t{%2, %1, %0|%0, %1, %2}
vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,avx")
+ [(set_attr "isa" "noavx,aes,avx512vl")
(set_attr "type" "sselog1")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,vex")
- (set_attr "btver2_decode" "double,double")
+ (set_attr "prefix" "orig,vex,evex")
+ (set_attr "btver2_decode" "double,double,double")
(set_attr "mode" "TI")])
(define_insn "aesimc"
diff --git a/gcc/config/i386/vaesintrin.h b/gcc/config/i386/vaesintrin.h
index 0f1cffe71e9..58fc19c9eb3 100644
--- a/gcc/config/i386/vaesintrin.h
+++ b/gcc/config/i386/vaesintrin.h
@@ -24,9 +24,9 @@
#ifndef __VAESINTRIN_H_INCLUDED
#define __VAESINTRIN_H_INCLUDED
-#if !defined(__VAES__) || !defined(__AVX__)
+#if !defined(__VAES__)
#pragma GCC push_options
-#pragma GCC target("vaes,avx")
+#pragma GCC target("vaes")
#define __DISABLE_VAES__
#endif /* __VAES__ */
diff --git a/gcc/config/i386/wmmintrin.h b/gcc/config/i386/wmmintrin.h
index ae15cea429e..da314dbd44d 100644
--- a/gcc/config/i386/wmmintrin.h
+++ b/gcc/config/i386/wmmintrin.h
@@ -40,36 +40,23 @@
/* Performs 1 round of AES decryption of the first m128i using
the second m128i as a round key. */
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_aesdec_si128 (__m128i __X, __m128i __Y)
-{
- return (__m128i) __builtin_ia32_aesdec128 ((__v2di)__X, (__v2di)__Y);
-}
+#define _mm_aesdec_si128(X, Y) \
+ (__m128i) __builtin_ia32_aesdec128 ((__v2di) (X), (__v2di) (Y))
/* Performs the last round of AES decryption of the first m128i
using the second m128i as a round key. */
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_aesdeclast_si128 (__m128i __X, __m128i __Y)
-{
- return (__m128i) __builtin_ia32_aesdeclast128 ((__v2di)__X,
- (__v2di)__Y);
-}
+#define _mm_aesdeclast_si128(X, Y) \
+ (__m128i) __builtin_ia32_aesdeclast128 ((__v2di) (X), (__v2di) (Y))
/* Performs 1 round of AES encryption of the first m128i using
the second m128i as a round key. */
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_aesenc_si128 (__m128i __X, __m128i __Y)
-{
- return (__m128i) __builtin_ia32_aesenc128 ((__v2di)__X, (__v2di)__Y);
-}
+#define _mm_aesenc_si128(X, Y) \
+ (__m128i) __builtin_ia32_aesenc128 ((__v2di) (X), (__v2di) (Y))
/* Performs the last round of AES encryption of the first m128i
using the second m128i as a round key. */
-extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_aesenclast_si128 (__m128i __X, __m128i __Y)
-{
- return (__m128i) __builtin_ia32_aesenclast128 ((__v2di)__X, (__v2di)__Y);
-}
+#define _mm_aesenclast_si128(X, Y) \
+ (__m128i) __builtin_ia32_aesenclast128 ((__v2di) (X), (__v2di) (Y))
/* Performs the InverseMixColumn operation on the source m128i
and stores the result into m128i destination. */
diff --git a/gcc/testsuite/gcc.target/i386/avx512fvl-vaes-1.c b/gcc/testsuite/gcc.target/i386/avx512fvl-vaes-1.c
index c65b570cd47..f35742ec98b 100644
--- a/gcc/testsuite/gcc.target/i386/avx512fvl-vaes-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512fvl-vaes-1.c
@@ -10,10 +10,16 @@
/* { dg-final { scan-assembler-times "vaesenc\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\{\n\]*%ymm\[0-9\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vaesenclast\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\{\n\]*%ymm\[0-9\]+\[^\{\n\]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vaesdec\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\{\n\]*%xmm\[0-9\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vaesdeclast\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\{\n\]*%xmm\[0-9\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vaesenc\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\{\n\]*%xmm\[0-9\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+/* { dg-final { scan-assembler-times "vaesenclast\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\{\n\]*%xmm\[0-9\]+\[^\{\n\]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
+
#include <immintrin.h>
volatile __m512i x,y;
volatile __m256i x256, y256;
+volatile __m128i x128, y128;
void extern
avx512f_test (void)
@@ -27,4 +33,9 @@ avx512f_test (void)
x256 = _mm256_aesdeclast_epi128 (x256, y256);
x256 = _mm256_aesenc_epi128 (x256, y256);
x256 = _mm256_aesenclast_epi128 (x256, y256);
+
+ x128 = _mm_aesdec_si128 (x128, y128);
+ x128 = _mm_aesdeclast_si128 (x128, y128);
+ x128 = _mm_aesenc_si128 (x128, y128);
+ x128 = _mm_aesenclast_si128 (x128, y128);
}
diff --git a/gcc/testsuite/gcc.target/i386/pr109117-1.c b/gcc/testsuite/gcc.target/i386/pr109117-1.c
index 87a5c0e7fc9..1c4da997c36 100644
--- a/gcc/testsuite/gcc.target/i386/pr109117-1.c
+++ b/gcc/testsuite/gcc.target/i386/pr109117-1.c
@@ -10,5 +10,5 @@ volatile __m128i res;
void
foo (void)
{
- res = __builtin_ia32_vaesdec_v16qi (x, y); /* { dg-warning "implicit declaration of function" } */
-} /* { dg-error "incompatible types when assigning to type" "" { target *-*-* } .-1 } */
+ res = __builtin_ia32_vaesdec_v16qi (x, y); /* { dg-error "incompatible types when assigning to type" } */
+}
^ permalink raw reply [flat|nested] only message in thread
only message in thread, other threads:[~2023-04-20 1:34 UTC | newest]
Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-04-20 1:34 [gcc r14-104] i386: Share AES xmm intrin with VAES Haochen Jiang
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).