public inbox for gcc-bugs@sourceware.org help / color / mirror / Atom feed
* [Bug target/114576] New: [13 regression][config/i386] GCC 14/trunk emits VEX-prefixed AES instruction without AVX enabled @ 2024-04-03 17:36 thiago at kde dot org 2024-04-03 17:41 ` [Bug target/114576] [14 " pinskia at gcc dot gnu.org ` (6 more replies) 0 siblings, 7 replies; 8+ messages in thread From: thiago at kde dot org @ 2024-04-03 17:36 UTC (permalink / raw) To: gcc-bugs https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114576 Bug ID: 114576 Summary: [13 regression][config/i386] GCC 14/trunk emits VEX-prefixed AES instruction without AVX enabled Product: gcc Version: 14.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: target Assignee: unassigned at gcc dot gnu.org Reporter: thiago at kde dot org Target Milestone: --- Re: https://bugreports.qt.io/browse/QTBUG-123965 Re: https://bugzilla.redhat.com/show_bug.cgi?id=2262640, https://bugzilla.redhat.com/show_bug.cgi?id=2272758 Godbolt link: https://gcc.godbolt.org/z/6P9fMvoxW Found while compiling Qt 6.6 or 6.7 with GCC 14 (current trunk). This is a regression from GCC 13. This function from qhash.cpp <https://github.com/qt/qtbase/blob/v6.7.0/src/corelib/tools/qhash.cpp#L581-L588>: Q_ALWAYS_INLINE __m128i AESHashSeed::state1() const { { // unlike the Go code, we don't have more per-process seed __m128i state1 = _mm_aesenc_si128(state0, mseed2); return state1; } } Is apparently getting assembled to: .L2: leaq (%rdi,%rsi), %rdx vaesenc %xmm1, %xmm0, %xmm1 Though there's no AVX enabled in this code (the original version in Qt has some AVX/VAES and AVX512 code but the reduced example does not). This function: // hash twice 16 bytes, running 2 scramble rounds of AES on itself static void QT_FUNCTION_TARGET(AES) QT_VECTORCALL hash2x16bytes(__m128i &state0, __m128i &state1, const __m128i *src0, const __m128i *src1) { __m128i data0 = _mm_loadu_si128(src0); __m128i data1 = _mm_loadu_si128(src1); state0 = _mm_xor_si128(data0, state0); state1 = _mm_xor_si128(data1, state1); state0 = _mm_aesenc_si128(state0, state0); state1 = _mm_aesenc_si128(state1, state1); state0 = _mm_aesenc_si128(state0, state0); state1 = _mm_aesenc_si128(state1, state1); } Is even emitting: .L20: movdqu (%rax), %xmm2 pxor %xmm0, %xmm2 movdqu -16(%rdx), %xmm0 pxor %xmm0, %xmm1 vaesenc %xmm2, %xmm2, %xmm0 aesenc %xmm1, %xmm1 aesenc %xmm0, %xmm0 aesenc %xmm1, %xmm1 and that makes no sense to use AVX for one of four instructions alone, called from the same source function. For reference, GCC 13 generates respectively: .L2: movdqa %xmm0, %xmm1 leaq (%rdi,%rsi), %rdx aesenc %xmm2, %xmm1 and .L20: movdqu (%rax), %xmm2 pxor %xmm0, %xmm2 movdqu -16(%rdx), %xmm0 aesenc %xmm2, %xmm2 pxor %xmm0, %xmm1 movdqa %xmm2, %xmm0 aesenc %xmm1, %xmm1 aesenc %xmm2, %xmm0 aesenc %xmm1, %xmm1 You can tell that they are the same source block because the labels are the same. Sources: #include <immintrin.h> #ifdef _MSC_VER # define Q_ALWAYS_INLINE __forceinline # define QT_VECTORCALL __vectorcall # define QT_FUNCTION_TARGET(x) #else # define Q_ALWAYS_INLINE inline __attribute__((always_inline)) # define QT_VECTORCALL # define QT_FUNCTION_TARGET(x) __attribute__((target(QT_FUNCTION_TARGET_##x))) # define QT_FUNCTION_TARGET_AES "sse4.2,aes" //# define qCpuHasFeature(x) __builtin_cpu_supports(QT_FUNCTION_TARGET_ ## x) #endif #define QT_COMPILER_SUPPORTS_HERE(x) true # define mm_set1_epz _mm_set1_epi64x # define mm_cvtsz_si128 _mm_cvtsi64_si128 # define mm_cvtsi128_sz _mm_cvtsi128_si64 # define mm256_set1_epz _mm256_set1_epi64x extern bool qCpuHasFeature(const char *) noexcept; #define qCpuHasFeature(x) qCpuHasFeature(#x) using uchar = unsigned char; using quintptr = unsigned long long; using qint8 = signed char; // hash 16 bytes, running 3 scramble rounds of AES on itself (like label "final1") static void Q_ALWAYS_INLINE QT_FUNCTION_TARGET(AES) QT_VECTORCALL hash16bytes(__m128i &state0, __m128i data) { state0 = _mm_xor_si128(state0, data); state0 = _mm_aesenc_si128(state0, state0); state0 = _mm_aesenc_si128(state0, state0); state0 = _mm_aesenc_si128(state0, state0); } // hash twice 16 bytes, running 2 scramble rounds of AES on itself static void QT_FUNCTION_TARGET(AES) QT_VECTORCALL hash2x16bytes(__m128i &state0, __m128i &state1, const __m128i *src0, const __m128i *src1) { __m128i data0 = _mm_loadu_si128(src0); __m128i data1 = _mm_loadu_si128(src1); state0 = _mm_xor_si128(data0, state0); state1 = _mm_xor_si128(data1, state1); state0 = _mm_aesenc_si128(state0, state0); state1 = _mm_aesenc_si128(state1, state1); state0 = _mm_aesenc_si128(state0, state0); state1 = _mm_aesenc_si128(state1, state1); } struct AESHashSeed { __m128i state0; __m128i mseed2; AESHashSeed(size_t seed, size_t seed2) QT_FUNCTION_TARGET(AES); __m128i state1() const QT_FUNCTION_TARGET(AES); }; Q_ALWAYS_INLINE AESHashSeed::AESHashSeed(size_t seed, size_t seed2) { __m128i mseed = mm_cvtsz_si128(seed); mseed2 = mm_set1_epz(seed2); // mseed (epi16) = [ seed, seed >> 16, seed >> 32, seed >> 48, len, 0, 0, 0 ] mseed = _mm_insert_epi16(mseed, short(seed), 4); // mseed (epi16) = [ seed, seed >> 16, seed >> 32, seed >> 48, len, len, len, len ] mseed = _mm_shufflehi_epi16(mseed, 0); // merge with the process-global seed __m128i key = _mm_xor_si128(mseed, mseed2); // scramble the key __m128i state0 = _mm_aesenc_si128(key, key); this->state0 = state0; } Q_ALWAYS_INLINE __m128i AESHashSeed::state1() const { { // unlike the Go code, we don't have more per-process seed __m128i state1 = _mm_aesenc_si128(state0, mseed2); return state1; } } static size_t QT_FUNCTION_TARGET(AES) QT_VECTORCALL aeshash128_16to32(__m128i state0, __m128i state1, const __m128i *src, const __m128i *srcend) { { if (src + 1 < srcend) { // epilogue: between 16 and 31 bytes hash2x16bytes(state0, state1, src, srcend - 1); } else if (src != srcend) { // epilogue: between 1 and 16 bytes, overlap with the end __m128i data = _mm_loadu_si128(srcend - 1); hash16bytes(state0, data); } // combine results: state0 = _mm_xor_si128(state0, state1); } return mm_cvtsi128_sz(state0); } static size_t QT_FUNCTION_TARGET(AES) QT_VECTORCALL aeshash128_lt16(__m128i state0, const uchar *p, size_t len) { if (len) { // We're going to load 16 bytes and mask zero the part we don't care // (the hash of a short string is different from the hash of a longer // including NULLs at the end because the length is in the key) // WARNING: this may produce valgrind warnings, but it's safe constexpr quintptr PageSize = 4096; __m128i data; if ((quintptr(p) & (PageSize / 2)) == 0) { // lower half of the page: // load all 16 bytes and mask off the bytes past the end of the source static const qint8 maskarray[] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, }; __m128i mask = _mm_loadu_si128(reinterpret_cast<const __m128i *>(maskarray + 15 - len)); data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p)); data = _mm_and_si128(data, mask); } else { // upper half of the page: // load 16 bytes ending at the data end, then shuffle them to the beginning static const qint8 shufflecontrol[] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 }; __m128i control = _mm_loadu_si128(reinterpret_cast<const __m128i *>(shufflecontrol + 15 - len)); data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p + len) - 1); data = _mm_shuffle_epi8(data, control); } hash16bytes(state0, data); } return mm_cvtsi128_sz(state0); } static size_t QT_FUNCTION_TARGET(AES) QT_VECTORCALL aeshash128_ge32(__m128i state0, __m128i state1, const __m128i *src, const __m128i *srcend) { // main loop: scramble two 16-byte blocks for ( ; src + 2 < srcend; src += 2) hash2x16bytes(state0, state1, src, src + 1); return aeshash128_16to32(state0, state1, src, srcend); } static size_t QT_FUNCTION_TARGET(AES) aeshash128(const uchar *p, size_t len, size_t seed, size_t seed2) noexcept { AESHashSeed state(seed, seed2); auto src = reinterpret_cast<const __m128i *>(p); const auto srcend = reinterpret_cast<const __m128i *>(p + len); if (len < sizeof(__m128i)) return aeshash128_lt16(state.state0, p, len); if (len <= sizeof(__m256i)) return aeshash128_16to32(state.state0, state.state1(), src, srcend); return aeshash128_ge32(state.state0, state.state1(), src, srcend); } static size_t aeshash(const uchar *p, size_t len, size_t seed, size_t seed2) noexcept { return aeshash128(p, len, seed, seed2); } extern size_t qt_qhash_seed; size_t qHashBits(const void *p, size_t size, size_t seed) noexcept { size_t seed2 = size; if (seed) seed2 = qt_qhash_seed; return aeshash(reinterpret_cast<const uchar *>(p), size, seed, seed2); } ^ permalink raw reply [flat|nested] 8+ messages in thread
* [Bug target/114576] [14 regression][config/i386] GCC 14/trunk emits VEX-prefixed AES instruction without AVX enabled 2024-04-03 17:36 [Bug target/114576] New: [13 regression][config/i386] GCC 14/trunk emits VEX-prefixed AES instruction without AVX enabled thiago at kde dot org @ 2024-04-03 17:41 ` pinskia at gcc dot gnu.org 2024-04-03 17:46 ` pinskia at gcc dot gnu.org ` (5 subsequent siblings) 6 siblings, 0 replies; 8+ messages in thread From: pinskia at gcc dot gnu.org @ 2024-04-03 17:41 UTC (permalink / raw) To: gcc-bugs https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114576 Andrew Pinski <pinskia at gcc dot gnu.org> changed: What |Removed |Added ---------------------------------------------------------------------------- Summary|[13 |[14 |regression][config/i386] |regression][config/i386] |GCC 14/trunk emits |GCC 14/trunk emits |VEX-prefixed AES |VEX-prefixed AES |instruction without AVX |instruction without AVX |enabled |enabled Target Milestone|--- |14.0 Keywords| |wrong-code --- Comment #1 from Andrew Pinski <pinskia at gcc dot gnu.org> --- (define_insn "aesenc" [(set (match_operand:V2DI 0 "register_operand" "=x,x,v") (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v") (match_operand:V2DI 2 "vector_operand" "xja,xm,vm")] UNSPEC_AESENC))] "TARGET_AES || (TARGET_VAES && TARGET_AVX512VL)" "@ aesenc\t{%2, %0|%0, %2} vaesenc\t{%2, %1, %0|%0, %1, %2} vaesenc\t{%2, %1, %0|%0, %1, %2}" [(set_attr "isa" "noavx,aes,avx512vl") (set_attr "type" "sselog1") (set_attr "addr" "gpr16,*,*") (set_attr "prefix_extra" "1") (set_attr "prefix" "orig,vex,evex") (set_attr "btver2_decode" "double,double,double") (set_attr "mode" "TI")]) r14-104-g24a8acc1662c37 Also, since -mvaes indicates that we could use VEX encoding for ymm, we should imply AVX for VAES. ^ permalink raw reply [flat|nested] 8+ messages in thread
* [Bug target/114576] [14 regression][config/i386] GCC 14/trunk emits VEX-prefixed AES instruction without AVX enabled 2024-04-03 17:36 [Bug target/114576] New: [13 regression][config/i386] GCC 14/trunk emits VEX-prefixed AES instruction without AVX enabled thiago at kde dot org 2024-04-03 17:41 ` [Bug target/114576] [14 " pinskia at gcc dot gnu.org @ 2024-04-03 17:46 ` pinskia at gcc dot gnu.org 2024-04-03 22:39 ` [Bug target/114576] [14 regression]VEX-prefixed " jakub at gcc dot gnu.org ` (4 subsequent siblings) 6 siblings, 0 replies; 8+ messages in thread From: pinskia at gcc dot gnu.org @ 2024-04-03 17:46 UTC (permalink / raw) To: gcc-bugs https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114576 Andrew Pinski <pinskia at gcc dot gnu.org> changed: What |Removed |Added ---------------------------------------------------------------------------- Ever confirmed|0 |1 Last reconfirmed| |2024-04-03 Status|UNCONFIRMED |NEW --- Comment #2 from Andrew Pinski <pinskia at gcc dot gnu.org> --- Something like this should fix it (but I am not 100% sure it is correct nor can I test it): ``` apinski@xeond:~/src/upstream-gcc-match/gcc/gcc/config/i386$ git diff diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 6ac401154e4..82cf92653f4 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -916,6 +916,7 @@ (define_attr "enabled" "" (eq_attr "isa" "x64_avx512dq") (symbol_ref "TARGET_64BIT && TARGET_AVX512DQ") (eq_attr "isa" "aes") (symbol_ref "TARGET_AES") + (eq_attr "isa" "vaes") (symbol_ref "TARGET_VAES") (eq_attr "isa" "sse_noavx") (symbol_ref "TARGET_SSE && !TARGET_AVX") (eq_attr "isa" "sse2") (symbol_ref "TARGET_SSE2") diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 3286d3a4fac..af79f3b126d 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -26286,7 +26286,7 @@ (define_insn "aesenc" aesenc\t{%2, %0|%0, %2} vaesenc\t{%2, %1, %0|%0, %1, %2} vaesenc\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,aes,avx512vl") + [(set_attr "isa" "noavx,vaes,avx512vl") (set_attr "type" "sselog1") (set_attr "addr" "gpr16,*,*") (set_attr "prefix_extra" "1") @@ -26304,7 +26304,7 @@ (define_insn "aesenclast" aesenclast\t{%2, %0|%0, %2} vaesenclast\t{%2, %1, %0|%0, %1, %2} vaesenclast\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,aes,avx512vl") + [(set_attr "isa" "noavx,vaes,avx512vl") (set_attr "type" "sselog1") (set_attr "addr" "gpr16,*,*") (set_attr "prefix_extra" "1") @@ -26322,7 +26322,7 @@ (define_insn "aesdec" aesdec\t{%2, %0|%0, %2} vaesdec\t{%2, %1, %0|%0, %1, %2} vaesdec\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,aes,avx512vl") + [(set_attr "isa" "noavx,vaes,avx512vl") (set_attr "type" "sselog1") (set_attr "addr" "gpr16,*,*") (set_attr "prefix_extra" "1") @@ -26340,7 +26340,7 @@ (define_insn "aesdeclast" aesdeclast\t{%2, %0|%0, %2} vaesdeclast\t{%2, %1, %0|%0, %1, %2} vaesdeclast\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,aes,avx512vl") + [(set_attr "isa" "noavx,vaes,avx512vl") (set_attr "addr" "gpr16,*,*") (set_attr "type" "sselog1") (set_attr "prefix_extra" "1") ``` ^ permalink raw reply [flat|nested] 8+ messages in thread
* [Bug target/114576] [14 regression]VEX-prefixed AES instruction without AVX enabled 2024-04-03 17:36 [Bug target/114576] New: [13 regression][config/i386] GCC 14/trunk emits VEX-prefixed AES instruction without AVX enabled thiago at kde dot org 2024-04-03 17:41 ` [Bug target/114576] [14 " pinskia at gcc dot gnu.org 2024-04-03 17:46 ` pinskia at gcc dot gnu.org @ 2024-04-03 22:39 ` jakub at gcc dot gnu.org 2024-04-03 23:12 ` [Bug target/114576] [14 regression] VEX-prefixed " thiago at kde dot org ` (3 subsequent siblings) 6 siblings, 0 replies; 8+ messages in thread From: jakub at gcc dot gnu.org @ 2024-04-03 22:39 UTC (permalink / raw) To: gcc-bugs https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114576 Jakub Jelinek <jakub at gcc dot gnu.org> changed: What |Removed |Added ---------------------------------------------------------------------------- CC| |jakub at gcc dot gnu.org --- Comment #3 from Jakub Jelinek <jakub at gcc dot gnu.org> --- (In reply to Andrew Pinski from comment #2) > Something like this should fix it (but I am not 100% sure it is correct nor > can I test it): This is IMHO not correct. vaesenc etc. instructions can be used even if just -maes -mavx, not just -mvaes -mavx512vl. But, it is especially messy because -mvaes doesn't imply -maes, so IMHO if somebody e.g. asks for -mvaes -mavx512vl -mno-aes and the insns don't use any xmm16+ register, it would emit the insn using VEX encoding rather than EVEX, so I think we need to use {evex} prefixes. So I think we want: --- gcc/config/i386/i386.md.jj 2024-03-18 10:33:27.983419363 +0100 +++ gcc/config/i386/i386.md 2024-04-04 00:17:48.818340648 +0200 @@ -568,13 +568,14 @@ (define_attr "unit" "integer,i387,sse,mm ;; Used to control the "enabled" attribute on a per-instruction basis. (define_attr "isa" "base,x64,nox64,x64_sse2,x64_sse4,x64_sse4_noavx, - x64_avx,x64_avx512bw,x64_avx512dq,aes,apx_ndd, + x64_avx,x64_avx512bw,x64_avx512dq,apx_ndd, sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx, avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,avx512f_512, noavx512f,avx512bw,avx512bw_512,noavx512bw,avx512dq, noavx512dq,fma_or_avx512vl,avx512vl,noavx512vl,avxvnni, avx512vnnivl,avx512fp16,avxifma,avx512ifmavl,avxneconvert, - avx512bf16vl,vpclmulqdqvl,avx_noavx512f,avx_noavx512vl" + avx512bf16vl,vpclmulqdqvl,avx_noavx512f,avx_noavx512vl, + aes_avx,vaes_avx512vl" (const_string "base")) ;; The (bounding maximum) length of an instruction immediate. @@ -915,7 +916,6 @@ (define_attr "enabled" "" (symbol_ref "TARGET_64BIT && TARGET_AVX512BW") (eq_attr "isa" "x64_avx512dq") (symbol_ref "TARGET_64BIT && TARGET_AVX512DQ") - (eq_attr "isa" "aes") (symbol_ref "TARGET_AES") (eq_attr "isa" "sse_noavx") (symbol_ref "TARGET_SSE && !TARGET_AVX") (eq_attr "isa" "sse2") (symbol_ref "TARGET_SSE2") @@ -968,6 +968,10 @@ (define_attr "enabled" "" (symbol_ref "TARGET_VPCLMULQDQ && TARGET_AVX512VL") (eq_attr "isa" "apx_ndd") (symbol_ref "TARGET_APX_NDD") + (eq_attr "isa" "aes_avx") + (symbol_ref "TARGET_AES && TARGET_AVX") + (eq_attr "isa" "vaes_avx512vl") + (symbol_ref "TARGET_VAES && TARGET_AVX512VL") (eq_attr "mmx_isa" "native") (symbol_ref "!TARGET_MMX_WITH_SSE") --- gcc/config/i386/sse.md.jj 2024-03-18 08:58:45.942772799 +0100 +++ gcc/config/i386/sse.md 2024-04-04 00:33:32.386194779 +0200 @@ -26277,75 +26277,79 @@ (define_insn "xop_vpermil2<mode>3" ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (define_insn "aesenc" - [(set (match_operand:V2DI 0 "register_operand" "=x,x,v") - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v") - (match_operand:V2DI 2 "vector_operand" "xja,xm,vm")] + [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,v") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,x,v") + (match_operand:V2DI 2 "vector_operand" "xja,xm,xm,vm")] UNSPEC_AESENC))] "TARGET_AES || (TARGET_VAES && TARGET_AVX512VL)" "@ aesenc\t{%2, %0|%0, %2} vaesenc\t{%2, %1, %0|%0, %1, %2} + %{evex%} vaesenc\t{%2, %1, %0|%0, %1, %2} vaesenc\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,aes,avx512vl") + [(set_attr "isa" "noavx,aes_avx,vaes_avx512vl,vaes_avx512vl") (set_attr "type" "sselog1") - (set_attr "addr" "gpr16,*,*") + (set_attr "addr" "gpr16,*,*,*") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,vex,evex") - (set_attr "btver2_decode" "double,double,double") + (set_attr "prefix" "orig,vex,evex,evex") + (set_attr "btver2_decode" "double,double,double,double") (set_attr "mode" "TI")]) (define_insn "aesenclast" - [(set (match_operand:V2DI 0 "register_operand" "=x,x,v") - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v") - (match_operand:V2DI 2 "vector_operand" "xja,xm,vm")] + [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,v") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,x,v") + (match_operand:V2DI 2 "vector_operand" "xja,xm,xm,vm")] UNSPEC_AESENCLAST))] "TARGET_AES || (TARGET_VAES && TARGET_AVX512VL)" "@ aesenclast\t{%2, %0|%0, %2} vaesenclast\t{%2, %1, %0|%0, %1, %2} + %{evex%} vaesenclast\t{%2, %1, %0|%0, %1, %2} vaesenclast\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,aes,avx512vl") + [(set_attr "isa" "noavx,aes_avx,vaes_avx512vl,vaes_avx512vl") (set_attr "type" "sselog1") - (set_attr "addr" "gpr16,*,*") + (set_attr "addr" "gpr16,*,*,*") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,vex,evex") - (set_attr "btver2_decode" "double,double,double") + (set_attr "prefix" "orig,vex,evex,evex") + (set_attr "btver2_decode" "double,double,double,double") (set_attr "mode" "TI")]) (define_insn "aesdec" - [(set (match_operand:V2DI 0 "register_operand" "=x,x,v") - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v") - (match_operand:V2DI 2 "vector_operand" "xja,xm,vm")] + [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,v") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,x,v") + (match_operand:V2DI 2 "vector_operand" "xja,xm,xm,vm")] UNSPEC_AESDEC))] "TARGET_AES || (TARGET_VAES && TARGET_AVX512VL)" "@ aesdec\t{%2, %0|%0, %2} vaesdec\t{%2, %1, %0|%0, %1, %2} + %{evex%} vaesdec\t{%2, %1, %0|%0, %1, %2} vaesdec\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,aes,avx512vl") + [(set_attr "isa" "noavx,aes_avx,vaes_avx512vl,vaes_avx512vl") (set_attr "type" "sselog1") - (set_attr "addr" "gpr16,*,*") + (set_attr "addr" "gpr16,*,*,*") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,vex,evex") - (set_attr "btver2_decode" "double,double,double") + (set_attr "prefix" "orig,vex,evex,evex") + (set_attr "btver2_decode" "double,double,double,double") (set_attr "mode" "TI")]) (define_insn "aesdeclast" - [(set (match_operand:V2DI 0 "register_operand" "=x,x,v") - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v") - (match_operand:V2DI 2 "vector_operand" "xja,xm,vm")] + [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,v") + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,x,v") + (match_operand:V2DI 2 "vector_operand" "xja,xm,xm,vm")] UNSPEC_AESDECLAST))] "TARGET_AES || (TARGET_VAES && TARGET_AVX512VL)" "@ aesdeclast\t{%2, %0|%0, %2} vaesdeclast\t{%2, %1, %0|%0, %1, %2} + %{evex%} vaesdeclast\t{%2, %1, %0|%0, %1, %2} vaesdeclast\t{%2, %1, %0|%0, %1, %2}" - [(set_attr "isa" "noavx,aes,avx512vl") - (set_attr "addr" "gpr16,*,*") + [(set_attr "isa" "noavx,aes_avx,vaes_avx512vl,vaes_avx512vl") + (set_attr "addr" "gpr16,*,*,*") (set_attr "type" "sselog1") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,vex,evex") - (set_attr "btver2_decode" "double,double,double") + (set_attr "prefix" "orig,vex,evex,evex") + (set_attr "btver2_decode" "double,double,double,double") (set_attr "mode" "TI")]) (define_insn "aesimc" @@ -30246,24 +30250,32 @@ (define_insn "vpdpwssds_<mode>_maskz_1" [(set_attr ("prefix") ("evex"))]) (define_insn "vaesdec_<mode>" - [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v") + [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=x,v") (unspec:VI1_AVX512VL_F - [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v") - (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")] + [(match_operand:VI1_AVX512VL_F 1 "register_operand" "x,v") + (match_operand:VI1_AVX512VL_F 2 "vector_operand" "xm,vm")] UNSPEC_VAESDEC))] "TARGET_VAES" - "vaesdec\t{%2, %1, %0|%0, %1, %2}" -) +{ + if (which_alternative == 0 && <MODE>mode == V16QImode) + return "%{evex%} vaesdec\t{%2, %1, %0|%0, %1, %2}"; + else + return "vaesdec\t{%2, %1, %0|%0, %1, %2}"; +}) (define_insn "vaesdeclast_<mode>" - [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v") + [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=x,v") (unspec:VI1_AVX512VL_F - [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v") - (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")] + [(match_operand:VI1_AVX512VL_F 1 "register_operand" "x,v") + (match_operand:VI1_AVX512VL_F 2 "vector_operand" "xm,vm")] UNSPEC_VAESDECLAST))] "TARGET_VAES" - "vaesdeclast\t{%2, %1, %0|%0, %1, %2}" -) +{ + if (which_alternative == 0 && <MODE>mode == V16QImode) + return "%{evex%} vaesdeclast\t{%2, %1, %0|%0, %1, %2}"; + else + return "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"; +}) (define_insn "vaesenc_<mode>" [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v") @@ -30272,18 +30284,26 @@ (define_insn "vaesenc_<mode>" (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")] UNSPEC_VAESENC))] "TARGET_VAES" - "vaesenc\t{%2, %1, %0|%0, %1, %2}" -) +{ + if (which_alternative == 0 && <MODE>mode == V16QImode) + return "%{evex%} vaesenc\t{%2, %1, %0|%0, %1, %2}"; + else + return "vaesenc\t{%2, %1, %0|%0, %1, %2}"; +}) (define_insn "vaesenclast_<mode>" - [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v") + [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=x,v") (unspec:VI1_AVX512VL_F - [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v") - (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")] + [(match_operand:VI1_AVX512VL_F 1 "register_operand" "x,v") + (match_operand:VI1_AVX512VL_F 2 "vector_operand" "xm,vm")] UNSPEC_VAESENCLAST))] "TARGET_VAES" - "vaesenclast\t{%2, %1, %0|%0, %1, %2}" -) +{ + if (which_alternative == 0 && <MODE>mode == V16QImode) + return "%{evex%} vaesenclast\t{%2, %1, %0|%0, %1, %2}"; + else + return "vaesenclast\t{%2, %1, %0|%0, %1, %2}"; +}) (define_insn "vpclmulqdq_<mode>" [(set (match_operand:VI8_FVL 0 "register_operand" "=v") ^ permalink raw reply [flat|nested] 8+ messages in thread
* [Bug target/114576] [14 regression] VEX-prefixed AES instruction without AVX enabled 2024-04-03 17:36 [Bug target/114576] New: [13 regression][config/i386] GCC 14/trunk emits VEX-prefixed AES instruction without AVX enabled thiago at kde dot org ` (2 preceding siblings ...) 2024-04-03 22:39 ` [Bug target/114576] [14 regression]VEX-prefixed " jakub at gcc dot gnu.org @ 2024-04-03 23:12 ` thiago at kde dot org 2024-04-04 7:22 ` rguenth at gcc dot gnu.org ` (2 subsequent siblings) 6 siblings, 0 replies; 8+ messages in thread From: thiago at kde dot org @ 2024-04-03 23:12 UTC (permalink / raw) To: gcc-bugs https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114576 --- Comment #4 from Thiago Macieira <thiago at kde dot org> --- (In reply to Jakub Jelinek from comment #3) > vaesenc etc. instructions can be used even if just -maes -mavx, not just > -mvaes -mavx512vl. Correct, that's just VEX-prefixed AESNI instructions. VAES added the 256-bit and 512-bit versions of those instructions. The table at felix's website is accurate: https://www.felixcloutier.com/x86/aesenc This is actually similar to GFNI: * GFNI: 128-bit only, non-VEX, non-EVEX * GFNI+AVX: VEX allowed, 128- and 256-bit; no EVEX * GFNI+AVX512F: 128- and 256-bit with VEX, 512-bit with EVEX * GFNI+AVX512VL: 128- and 256-bit with VEX, all with EVEX * GFNI+AVX10 without EVEX512: 128- and 256-bit with VEX and EVEX, no 512-bit The F-no-VL case does not exist in practice. > But, it is especially messy because -mvaes doesn't imply -maes, so IMHO if > somebody e.g. asks for -mvaes -mavx512vl -mno-aes and the insns don't use > any xmm16+ register, it would emit the insn using VEX encoding rather than > EVEX, so I think we need to use {evex} prefixes. Would it be simpler to just imply that VAES includes AESNI? There are no processors that have VAES without AESNI and it doesn't make sense for there to be one. ^ permalink raw reply [flat|nested] 8+ messages in thread
* [Bug target/114576] [14 regression] VEX-prefixed AES instruction without AVX enabled 2024-04-03 17:36 [Bug target/114576] New: [13 regression][config/i386] GCC 14/trunk emits VEX-prefixed AES instruction without AVX enabled thiago at kde dot org ` (3 preceding siblings ...) 2024-04-03 23:12 ` [Bug target/114576] [14 regression] VEX-prefixed " thiago at kde dot org @ 2024-04-04 7:22 ` rguenth at gcc dot gnu.org 2024-04-09 10:41 ` cvs-commit at gcc dot gnu.org 2024-04-09 10:48 ` jakub at gcc dot gnu.org 6 siblings, 0 replies; 8+ messages in thread From: rguenth at gcc dot gnu.org @ 2024-04-04 7:22 UTC (permalink / raw) To: gcc-bugs https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114576 Richard Biener <rguenth at gcc dot gnu.org> changed: What |Removed |Added ---------------------------------------------------------------------------- Priority|P3 |P1 ^ permalink raw reply [flat|nested] 8+ messages in thread
* [Bug target/114576] [14 regression] VEX-prefixed AES instruction without AVX enabled 2024-04-03 17:36 [Bug target/114576] New: [13 regression][config/i386] GCC 14/trunk emits VEX-prefixed AES instruction without AVX enabled thiago at kde dot org ` (4 preceding siblings ...) 2024-04-04 7:22 ` rguenth at gcc dot gnu.org @ 2024-04-09 10:41 ` cvs-commit at gcc dot gnu.org 2024-04-09 10:48 ` jakub at gcc dot gnu.org 6 siblings, 0 replies; 8+ messages in thread From: cvs-commit at gcc dot gnu.org @ 2024-04-09 10:41 UTC (permalink / raw) To: gcc-bugs https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114576 --- Comment #5 from GCC Commits <cvs-commit at gcc dot gnu.org> --- The master branch has been updated by Jakub Jelinek <jakub@gcc.gnu.org>: https://gcc.gnu.org/g:a79d13a01f8cbb99fb45bf3f3ffc62c99ee0b05e commit r14-9869-ga79d13a01f8cbb99fb45bf3f3ffc62c99ee0b05e Author: Jakub Jelinek <jakub@redhat.com> Date: Tue Apr 9 12:35:18 2024 +0200 i386: Fix aes/vaes patterns [PR114576] On Wed, Apr 19, 2023 at 02:40:59AM +0000, Jiang, Haochen via Gcc-patches wrote: > > > (define_insn "aesenc" > > > - [(set (match_operand:V2DI 0 "register_operand" "=x,x") > > > - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x") > > > - (match_operand:V2DI 2 "vector_operand" "xBm,xm")] > > > + [(set (match_operand:V2DI 0 "register_operand" "=x,x,v") > > > + (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v") > > > + (match_operand:V2DI 2 "vector_operand" > > > + "xBm,xm,vm")] > > > UNSPEC_AESENC))] > > > - "TARGET_AES" > > > + "TARGET_AES || (TARGET_VAES && TARGET_AVX512VL)" > > > "@ > > > aesenc\t{%2, %0|%0, %2} > > > + vaesenc\t{%2, %1, %0|%0, %1, %2} > > > vaesenc\t{%2, %1, %0|%0, %1, %2}" > > > - [(set_attr "isa" "noavx,avx") > > > + [(set_attr "isa" "noavx,aes,avx512vl") > > Shouldn't it be vaes_avx512vl and then remove " || (TARGET_VAES && > > TARGET_AVX512VL)" from condition. > > Since VAES should not imply AES, we need that "|| (TARGET_VAES && > TARGET_AVX512VL)" > > And there is no need to add vaes_avx512vl since the last alternative will only > be hit when there is no aes. When there is no aes, the pattern will need vaes > and avx512vl both or we could not use this pattern. avx512vl here is just like > a placeholder. As the following testcase shows, the above change was incorrect. Using aes isa for the second alternative is obviously wrong, aes is enabled whenever -maes is, regardless of -mavx or -mno-avx, so the above change means that for -maes -mno-avx RA can choose, either it matches the first alternative with the dup operand, or it matches the second one (but that is of course wrong because vaesenc VEX encoded insn needs AES & AVX CPUID). The big question is if "Since VAES should not imply AES" is the case or not. Looking around at what LLVM does on godbolt, seems since clang 6 which added -mvaes support -mvaes there implies -maes, but GCC treats those two independent. Now, if we'd take the LLVM path of making -mvaes imply -maes and -mno-aes imply -mno-vaes, then we should probably just revert the above patch and tweak common/config/i386/ to do the implications (+ add the testcase from this patch). If we keep the current behavior, where AES and VAES are completely independent extensions, then we need to do more changes as the following patch attempts to do. We should use the aesenc etc. insns for noavx as before, we know at that point that TARGET_AES must be true because (TARGET_VAES && TARGET_AVX512VL) won't be true when !TARGET_AVX - TARGET_AVX512VL implies TARGET_AVX. For the second alternative, i.e. the AVX AES VEX or VAES AVX512F EVEX case without using %xmm16+/EGPR regs, the patch uses avx isa, but we need to emit {evex} prefix in the assembly if AES ISA is not enabled. For the last alternative, we need to use a new vaes_avx512vl isa attribute, because the %xmm16+/EGPR support is there only if both VAES and AVX512VL is enabled, not just AVX and AES. Still, I wonder if -mvaes shouldn't imply at least -mavx512f and -mno-avx512f shouldn't imply -mno-vaes, because otherwise can't see how it could use 512-bit registers (this part not done in the patch). 2024-04-09 Jakub Jelinek <jakub@redhat.com> PR target/114576 * config/i386/i386.md (isa): Remove aes, add vaes_avx512vl. (enabled): Remove aes isa check, add vaes_avx512vl. * config/i386/sse.md (aesenc, aesenclast, aesdec, aesdeclast): Use jm instead of m for second alternative and emit {evex} prefix for it if !TARGET_AES. Use noavx,avx,vaes_avx512vl isa attribute. (vaesdec_<mode>, vaesdeclast_<mode>, vaesenc_<mode>, vaesenclast_<mode>): Add second alternative with x instead of v and jm instead of m. * gcc.target/i386/aes-pr114576.c: New test. ^ permalink raw reply [flat|nested] 8+ messages in thread
* [Bug target/114576] [14 regression] VEX-prefixed AES instruction without AVX enabled 2024-04-03 17:36 [Bug target/114576] New: [13 regression][config/i386] GCC 14/trunk emits VEX-prefixed AES instruction without AVX enabled thiago at kde dot org ` (5 preceding siblings ...) 2024-04-09 10:41 ` cvs-commit at gcc dot gnu.org @ 2024-04-09 10:48 ` jakub at gcc dot gnu.org 6 siblings, 0 replies; 8+ messages in thread From: jakub at gcc dot gnu.org @ 2024-04-09 10:48 UTC (permalink / raw) To: gcc-bugs https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114576 Jakub Jelinek <jakub at gcc dot gnu.org> changed: What |Removed |Added ---------------------------------------------------------------------------- Status|NEW |RESOLVED Resolution|--- |FIXED --- Comment #6 from Jakub Jelinek <jakub at gcc dot gnu.org> --- Fixed. ^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2024-04-09 10:48 UTC | newest] Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2024-04-03 17:36 [Bug target/114576] New: [13 regression][config/i386] GCC 14/trunk emits VEX-prefixed AES instruction without AVX enabled thiago at kde dot org 2024-04-03 17:41 ` [Bug target/114576] [14 " pinskia at gcc dot gnu.org 2024-04-03 17:46 ` pinskia at gcc dot gnu.org 2024-04-03 22:39 ` [Bug target/114576] [14 regression]VEX-prefixed " jakub at gcc dot gnu.org 2024-04-03 23:12 ` [Bug target/114576] [14 regression] VEX-prefixed " thiago at kde dot org 2024-04-04 7:22 ` rguenth at gcc dot gnu.org 2024-04-09 10:41 ` cvs-commit at gcc dot gnu.org 2024-04-09 10:48 ` jakub at gcc dot gnu.org
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).