public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug target/114576] New: [13 regression][config/i386] GCC 14/trunk emits VEX-prefixed AES instruction without AVX enabled
@ 2024-04-03 17:36 thiago at kde dot org
2024-04-03 17:41 ` [Bug target/114576] [14 " pinskia at gcc dot gnu.org
` (6 more replies)
0 siblings, 7 replies; 8+ messages in thread
From: thiago at kde dot org @ 2024-04-03 17:36 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114576
Bug ID: 114576
Summary: [13 regression][config/i386] GCC 14/trunk emits
VEX-prefixed AES instruction without AVX enabled
Product: gcc
Version: 14.0
Status: UNCONFIRMED
Severity: normal
Priority: P3
Component: target
Assignee: unassigned at gcc dot gnu.org
Reporter: thiago at kde dot org
Target Milestone: ---
Re: https://bugreports.qt.io/browse/QTBUG-123965
Re: https://bugzilla.redhat.com/show_bug.cgi?id=2262640,
https://bugzilla.redhat.com/show_bug.cgi?id=2272758
Godbolt link: https://gcc.godbolt.org/z/6P9fMvoxW
Found while compiling Qt 6.6 or 6.7 with GCC 14 (current trunk). This is a
regression from GCC 13.
This function from qhash.cpp
<https://github.com/qt/qtbase/blob/v6.7.0/src/corelib/tools/qhash.cpp#L581-L588>:
Q_ALWAYS_INLINE __m128i AESHashSeed::state1() const
{
{
// unlike the Go code, we don't have more per-process seed
__m128i state1 = _mm_aesenc_si128(state0, mseed2);
return state1;
}
}
Is apparently getting assembled to:
.L2:
leaq (%rdi,%rsi), %rdx
vaesenc %xmm1, %xmm0, %xmm1
Though there's no AVX enabled in this code (the original version in Qt has some
AVX/VAES and AVX512 code but the reduced example does not).
This function:
// hash twice 16 bytes, running 2 scramble rounds of AES on itself
static void QT_FUNCTION_TARGET(AES) QT_VECTORCALL
hash2x16bytes(__m128i &state0, __m128i &state1, const __m128i *src0, const
__m128i *src1)
{
__m128i data0 = _mm_loadu_si128(src0);
__m128i data1 = _mm_loadu_si128(src1);
state0 = _mm_xor_si128(data0, state0);
state1 = _mm_xor_si128(data1, state1);
state0 = _mm_aesenc_si128(state0, state0);
state1 = _mm_aesenc_si128(state1, state1);
state0 = _mm_aesenc_si128(state0, state0);
state1 = _mm_aesenc_si128(state1, state1);
}
Is even emitting:
.L20:
movdqu (%rax), %xmm2
pxor %xmm0, %xmm2
movdqu -16(%rdx), %xmm0
pxor %xmm0, %xmm1
vaesenc %xmm2, %xmm2, %xmm0
aesenc %xmm1, %xmm1
aesenc %xmm0, %xmm0
aesenc %xmm1, %xmm1
and that makes no sense to use AVX for one of four instructions alone, called
from the same source function.
For reference, GCC 13 generates respectively:
.L2:
movdqa %xmm0, %xmm1
leaq (%rdi,%rsi), %rdx
aesenc %xmm2, %xmm1
and
.L20:
movdqu (%rax), %xmm2
pxor %xmm0, %xmm2
movdqu -16(%rdx), %xmm0
aesenc %xmm2, %xmm2
pxor %xmm0, %xmm1
movdqa %xmm2, %xmm0
aesenc %xmm1, %xmm1
aesenc %xmm2, %xmm0
aesenc %xmm1, %xmm1
You can tell that they are the same source block because the labels are the
same.
Sources:
#include <immintrin.h>
#ifdef _MSC_VER
# define Q_ALWAYS_INLINE __forceinline
# define QT_VECTORCALL __vectorcall
# define QT_FUNCTION_TARGET(x)
#else
# define Q_ALWAYS_INLINE inline __attribute__((always_inline))
# define QT_VECTORCALL
# define QT_FUNCTION_TARGET(x) __attribute__((target(QT_FUNCTION_TARGET_##x)))
# define QT_FUNCTION_TARGET_AES "sse4.2,aes"
//# define qCpuHasFeature(x) __builtin_cpu_supports(QT_FUNCTION_TARGET_ ## x)
#endif
#define QT_COMPILER_SUPPORTS_HERE(x) true
# define mm_set1_epz _mm_set1_epi64x
# define mm_cvtsz_si128 _mm_cvtsi64_si128
# define mm_cvtsi128_sz _mm_cvtsi128_si64
# define mm256_set1_epz _mm256_set1_epi64x
extern bool qCpuHasFeature(const char *) noexcept;
#define qCpuHasFeature(x) qCpuHasFeature(#x)
using uchar = unsigned char;
using quintptr = unsigned long long;
using qint8 = signed char;
// hash 16 bytes, running 3 scramble rounds of AES on itself (like label
"final1")
static void Q_ALWAYS_INLINE QT_FUNCTION_TARGET(AES) QT_VECTORCALL
hash16bytes(__m128i &state0, __m128i data)
{
state0 = _mm_xor_si128(state0, data);
state0 = _mm_aesenc_si128(state0, state0);
state0 = _mm_aesenc_si128(state0, state0);
state0 = _mm_aesenc_si128(state0, state0);
}
// hash twice 16 bytes, running 2 scramble rounds of AES on itself
static void QT_FUNCTION_TARGET(AES) QT_VECTORCALL
hash2x16bytes(__m128i &state0, __m128i &state1, const __m128i *src0, const
__m128i *src1)
{
__m128i data0 = _mm_loadu_si128(src0);
__m128i data1 = _mm_loadu_si128(src1);
state0 = _mm_xor_si128(data0, state0);
state1 = _mm_xor_si128(data1, state1);
state0 = _mm_aesenc_si128(state0, state0);
state1 = _mm_aesenc_si128(state1, state1);
state0 = _mm_aesenc_si128(state0, state0);
state1 = _mm_aesenc_si128(state1, state1);
}
struct AESHashSeed
{
__m128i state0;
__m128i mseed2;
AESHashSeed(size_t seed, size_t seed2) QT_FUNCTION_TARGET(AES);
__m128i state1() const QT_FUNCTION_TARGET(AES);
};
Q_ALWAYS_INLINE AESHashSeed::AESHashSeed(size_t seed, size_t seed2)
{
__m128i mseed = mm_cvtsz_si128(seed);
mseed2 = mm_set1_epz(seed2);
// mseed (epi16) = [ seed, seed >> 16, seed >> 32, seed >> 48, len, 0, 0, 0
]
mseed = _mm_insert_epi16(mseed, short(seed), 4);
// mseed (epi16) = [ seed, seed >> 16, seed >> 32, seed >> 48, len, len,
len, len ]
mseed = _mm_shufflehi_epi16(mseed, 0);
// merge with the process-global seed
__m128i key = _mm_xor_si128(mseed, mseed2);
// scramble the key
__m128i state0 = _mm_aesenc_si128(key, key);
this->state0 = state0;
}
Q_ALWAYS_INLINE __m128i AESHashSeed::state1() const
{
{
// unlike the Go code, we don't have more per-process seed
__m128i state1 = _mm_aesenc_si128(state0, mseed2);
return state1;
}
}
static size_t QT_FUNCTION_TARGET(AES) QT_VECTORCALL
aeshash128_16to32(__m128i state0, __m128i state1, const __m128i *src, const
__m128i *srcend)
{
{
if (src + 1 < srcend) {
// epilogue: between 16 and 31 bytes
hash2x16bytes(state0, state1, src, srcend - 1);
} else if (src != srcend) {
// epilogue: between 1 and 16 bytes, overlap with the end
__m128i data = _mm_loadu_si128(srcend - 1);
hash16bytes(state0, data);
}
// combine results:
state0 = _mm_xor_si128(state0, state1);
}
return mm_cvtsi128_sz(state0);
}
static size_t QT_FUNCTION_TARGET(AES) QT_VECTORCALL
aeshash128_lt16(__m128i state0, const uchar *p, size_t len)
{
if (len) {
// We're going to load 16 bytes and mask zero the part we don't care
// (the hash of a short string is different from the hash of a longer
// including NULLs at the end because the length is in the key)
// WARNING: this may produce valgrind warnings, but it's safe
constexpr quintptr PageSize = 4096;
__m128i data;
if ((quintptr(p) & (PageSize / 2)) == 0) {
// lower half of the page:
// load all 16 bytes and mask off the bytes past the end of the
source
static const qint8 maskarray[] = {
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};
__m128i mask = _mm_loadu_si128(reinterpret_cast<const __m128i
*>(maskarray + 15 - len));
data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p));
data = _mm_and_si128(data, mask);
} else {
// upper half of the page:
// load 16 bytes ending at the data end, then shuffle them to the
beginning
static const qint8 shufflecontrol[] = {
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
};
__m128i control = _mm_loadu_si128(reinterpret_cast<const __m128i
*>(shufflecontrol + 15 - len));
data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p + len) -
1);
data = _mm_shuffle_epi8(data, control);
}
hash16bytes(state0, data);
}
return mm_cvtsi128_sz(state0);
}
static size_t QT_FUNCTION_TARGET(AES) QT_VECTORCALL
aeshash128_ge32(__m128i state0, __m128i state1, const __m128i *src, const
__m128i *srcend)
{
// main loop: scramble two 16-byte blocks
for ( ; src + 2 < srcend; src += 2)
hash2x16bytes(state0, state1, src, src + 1);
return aeshash128_16to32(state0, state1, src, srcend);
}
static size_t QT_FUNCTION_TARGET(AES)
aeshash128(const uchar *p, size_t len, size_t seed, size_t seed2) noexcept
{
AESHashSeed state(seed, seed2);
auto src = reinterpret_cast<const __m128i *>(p);
const auto srcend = reinterpret_cast<const __m128i *>(p + len);
if (len < sizeof(__m128i))
return aeshash128_lt16(state.state0, p, len);
if (len <= sizeof(__m256i))
return aeshash128_16to32(state.state0, state.state1(), src, srcend);
return aeshash128_ge32(state.state0, state.state1(), src, srcend);
}
static size_t aeshash(const uchar *p, size_t len, size_t seed, size_t seed2)
noexcept
{
return aeshash128(p, len, seed, seed2);
}
extern size_t qt_qhash_seed;
size_t qHashBits(const void *p, size_t size, size_t seed) noexcept
{
size_t seed2 = size;
if (seed)
seed2 = qt_qhash_seed;
return aeshash(reinterpret_cast<const uchar *>(p), size, seed, seed2);
}
^ permalink raw reply [flat|nested] 8+ messages in thread
* [Bug target/114576] [14 regression][config/i386] GCC 14/trunk emits VEX-prefixed AES instruction without AVX enabled
2024-04-03 17:36 [Bug target/114576] New: [13 regression][config/i386] GCC 14/trunk emits VEX-prefixed AES instruction without AVX enabled thiago at kde dot org
@ 2024-04-03 17:41 ` pinskia at gcc dot gnu.org
2024-04-03 17:46 ` pinskia at gcc dot gnu.org
` (5 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: pinskia at gcc dot gnu.org @ 2024-04-03 17:41 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114576
Andrew Pinski <pinskia at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
Summary|[13 |[14
|regression][config/i386] |regression][config/i386]
|GCC 14/trunk emits |GCC 14/trunk emits
|VEX-prefixed AES |VEX-prefixed AES
|instruction without AVX |instruction without AVX
|enabled |enabled
Target Milestone|--- |14.0
Keywords| |wrong-code
--- Comment #1 from Andrew Pinski <pinskia at gcc dot gnu.org> ---
(define_insn "aesenc"
[(set (match_operand:V2DI 0 "register_operand" "=x,x,v")
(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v")
(match_operand:V2DI 2 "vector_operand" "xja,xm,vm")]
UNSPEC_AESENC))]
"TARGET_AES || (TARGET_VAES && TARGET_AVX512VL)"
"@
aesenc\t{%2, %0|%0, %2}
vaesenc\t{%2, %1, %0|%0, %1, %2}
vaesenc\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "isa" "noavx,aes,avx512vl")
(set_attr "type" "sselog1")
(set_attr "addr" "gpr16,*,*")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "orig,vex,evex")
(set_attr "btver2_decode" "double,double,double")
(set_attr "mode" "TI")])
r14-104-g24a8acc1662c37
Also, since -mvaes indicates that we could use VEX encoding for ymm, we
should imply AVX for VAES.
^ permalink raw reply [flat|nested] 8+ messages in thread
* [Bug target/114576] [14 regression][config/i386] GCC 14/trunk emits VEX-prefixed AES instruction without AVX enabled
2024-04-03 17:36 [Bug target/114576] New: [13 regression][config/i386] GCC 14/trunk emits VEX-prefixed AES instruction without AVX enabled thiago at kde dot org
2024-04-03 17:41 ` [Bug target/114576] [14 " pinskia at gcc dot gnu.org
@ 2024-04-03 17:46 ` pinskia at gcc dot gnu.org
2024-04-03 22:39 ` [Bug target/114576] [14 regression]VEX-prefixed " jakub at gcc dot gnu.org
` (4 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: pinskia at gcc dot gnu.org @ 2024-04-03 17:46 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114576
Andrew Pinski <pinskia at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
Ever confirmed|0 |1
Last reconfirmed| |2024-04-03
Status|UNCONFIRMED |NEW
--- Comment #2 from Andrew Pinski <pinskia at gcc dot gnu.org> ---
Something like this should fix it (but I am not 100% sure it is correct nor can
I test it):
```
apinski@xeond:~/src/upstream-gcc-match/gcc/gcc/config/i386$ git diff
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 6ac401154e4..82cf92653f4 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -916,6 +916,7 @@ (define_attr "enabled" ""
(eq_attr "isa" "x64_avx512dq")
(symbol_ref "TARGET_64BIT && TARGET_AVX512DQ")
(eq_attr "isa" "aes") (symbol_ref "TARGET_AES")
+ (eq_attr "isa" "vaes") (symbol_ref "TARGET_VAES")
(eq_attr "isa" "sse_noavx")
(symbol_ref "TARGET_SSE && !TARGET_AVX")
(eq_attr "isa" "sse2") (symbol_ref "TARGET_SSE2")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 3286d3a4fac..af79f3b126d 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -26286,7 +26286,7 @@ (define_insn "aesenc"
aesenc\t{%2, %0|%0, %2}
vaesenc\t{%2, %1, %0|%0, %1, %2}
vaesenc\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,aes,avx512vl")
+ [(set_attr "isa" "noavx,vaes,avx512vl")
(set_attr "type" "sselog1")
(set_attr "addr" "gpr16,*,*")
(set_attr "prefix_extra" "1")
@@ -26304,7 +26304,7 @@ (define_insn "aesenclast"
aesenclast\t{%2, %0|%0, %2}
vaesenclast\t{%2, %1, %0|%0, %1, %2}
vaesenclast\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,aes,avx512vl")
+ [(set_attr "isa" "noavx,vaes,avx512vl")
(set_attr "type" "sselog1")
(set_attr "addr" "gpr16,*,*")
(set_attr "prefix_extra" "1")
@@ -26322,7 +26322,7 @@ (define_insn "aesdec"
aesdec\t{%2, %0|%0, %2}
vaesdec\t{%2, %1, %0|%0, %1, %2}
vaesdec\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,aes,avx512vl")
+ [(set_attr "isa" "noavx,vaes,avx512vl")
(set_attr "type" "sselog1")
(set_attr "addr" "gpr16,*,*")
(set_attr "prefix_extra" "1")
@@ -26340,7 +26340,7 @@ (define_insn "aesdeclast"
aesdeclast\t{%2, %0|%0, %2}
vaesdeclast\t{%2, %1, %0|%0, %1, %2}
vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,aes,avx512vl")
+ [(set_attr "isa" "noavx,vaes,avx512vl")
(set_attr "addr" "gpr16,*,*")
(set_attr "type" "sselog1")
(set_attr "prefix_extra" "1")
```
^ permalink raw reply [flat|nested] 8+ messages in thread
* [Bug target/114576] [14 regression]VEX-prefixed AES instruction without AVX enabled
2024-04-03 17:36 [Bug target/114576] New: [13 regression][config/i386] GCC 14/trunk emits VEX-prefixed AES instruction without AVX enabled thiago at kde dot org
2024-04-03 17:41 ` [Bug target/114576] [14 " pinskia at gcc dot gnu.org
2024-04-03 17:46 ` pinskia at gcc dot gnu.org
@ 2024-04-03 22:39 ` jakub at gcc dot gnu.org
2024-04-03 23:12 ` [Bug target/114576] [14 regression] VEX-prefixed " thiago at kde dot org
` (3 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: jakub at gcc dot gnu.org @ 2024-04-03 22:39 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114576
Jakub Jelinek <jakub at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
CC| |jakub at gcc dot gnu.org
--- Comment #3 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
(In reply to Andrew Pinski from comment #2)
> Something like this should fix it (but I am not 100% sure it is correct nor
> can I test it):
This is IMHO not correct.
vaesenc etc. instructions can be used even if just -maes -mavx, not just -mvaes
-mavx512vl.
But, it is especially messy because -mvaes doesn't imply -maes, so IMHO if
somebody e.g. asks for -mvaes -mavx512vl -mno-aes and the insns don't use any
xmm16+ register, it would emit the insn using VEX encoding rather than EVEX, so
I think we need to use {evex} prefixes.
So I think we want:
--- gcc/config/i386/i386.md.jj 2024-03-18 10:33:27.983419363 +0100
+++ gcc/config/i386/i386.md 2024-04-04 00:17:48.818340648 +0200
@@ -568,13 +568,14 @@ (define_attr "unit" "integer,i387,sse,mm
;; Used to control the "enabled" attribute on a per-instruction basis.
(define_attr "isa" "base,x64,nox64,x64_sse2,x64_sse4,x64_sse4_noavx,
- x64_avx,x64_avx512bw,x64_avx512dq,aes,apx_ndd,
+ x64_avx,x64_avx512bw,x64_avx512dq,apx_ndd,
sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx,
avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,avx512f_512,
noavx512f,avx512bw,avx512bw_512,noavx512bw,avx512dq,
noavx512dq,fma_or_avx512vl,avx512vl,noavx512vl,avxvnni,
avx512vnnivl,avx512fp16,avxifma,avx512ifmavl,avxneconvert,
- avx512bf16vl,vpclmulqdqvl,avx_noavx512f,avx_noavx512vl"
+ avx512bf16vl,vpclmulqdqvl,avx_noavx512f,avx_noavx512vl,
+ aes_avx,vaes_avx512vl"
(const_string "base"))
;; The (bounding maximum) length of an instruction immediate.
@@ -915,7 +916,6 @@ (define_attr "enabled" ""
(symbol_ref "TARGET_64BIT && TARGET_AVX512BW")
(eq_attr "isa" "x64_avx512dq")
(symbol_ref "TARGET_64BIT && TARGET_AVX512DQ")
- (eq_attr "isa" "aes") (symbol_ref "TARGET_AES")
(eq_attr "isa" "sse_noavx")
(symbol_ref "TARGET_SSE && !TARGET_AVX")
(eq_attr "isa" "sse2") (symbol_ref "TARGET_SSE2")
@@ -968,6 +968,10 @@ (define_attr "enabled" ""
(symbol_ref "TARGET_VPCLMULQDQ && TARGET_AVX512VL")
(eq_attr "isa" "apx_ndd")
(symbol_ref "TARGET_APX_NDD")
+ (eq_attr "isa" "aes_avx")
+ (symbol_ref "TARGET_AES && TARGET_AVX")
+ (eq_attr "isa" "vaes_avx512vl")
+ (symbol_ref "TARGET_VAES && TARGET_AVX512VL")
(eq_attr "mmx_isa" "native")
(symbol_ref "!TARGET_MMX_WITH_SSE")
--- gcc/config/i386/sse.md.jj 2024-03-18 08:58:45.942772799 +0100
+++ gcc/config/i386/sse.md 2024-04-04 00:33:32.386194779 +0200
@@ -26277,75 +26277,79 @@ (define_insn "xop_vpermil2<mode>3"
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(define_insn "aesenc"
- [(set (match_operand:V2DI 0 "register_operand" "=x,x,v")
- (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v")
- (match_operand:V2DI 2 "vector_operand" "xja,xm,vm")]
+ [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,v")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,x,v")
+ (match_operand:V2DI 2 "vector_operand" "xja,xm,xm,vm")]
UNSPEC_AESENC))]
"TARGET_AES || (TARGET_VAES && TARGET_AVX512VL)"
"@
aesenc\t{%2, %0|%0, %2}
vaesenc\t{%2, %1, %0|%0, %1, %2}
+ %{evex%} vaesenc\t{%2, %1, %0|%0, %1, %2}
vaesenc\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,aes,avx512vl")
+ [(set_attr "isa" "noavx,aes_avx,vaes_avx512vl,vaes_avx512vl")
(set_attr "type" "sselog1")
- (set_attr "addr" "gpr16,*,*")
+ (set_attr "addr" "gpr16,*,*,*")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,vex,evex")
- (set_attr "btver2_decode" "double,double,double")
+ (set_attr "prefix" "orig,vex,evex,evex")
+ (set_attr "btver2_decode" "double,double,double,double")
(set_attr "mode" "TI")])
(define_insn "aesenclast"
- [(set (match_operand:V2DI 0 "register_operand" "=x,x,v")
- (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v")
- (match_operand:V2DI 2 "vector_operand" "xja,xm,vm")]
+ [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,v")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,x,v")
+ (match_operand:V2DI 2 "vector_operand" "xja,xm,xm,vm")]
UNSPEC_AESENCLAST))]
"TARGET_AES || (TARGET_VAES && TARGET_AVX512VL)"
"@
aesenclast\t{%2, %0|%0, %2}
vaesenclast\t{%2, %1, %0|%0, %1, %2}
+ %{evex%} vaesenclast\t{%2, %1, %0|%0, %1, %2}
vaesenclast\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,aes,avx512vl")
+ [(set_attr "isa" "noavx,aes_avx,vaes_avx512vl,vaes_avx512vl")
(set_attr "type" "sselog1")
- (set_attr "addr" "gpr16,*,*")
+ (set_attr "addr" "gpr16,*,*,*")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,vex,evex")
- (set_attr "btver2_decode" "double,double,double")
+ (set_attr "prefix" "orig,vex,evex,evex")
+ (set_attr "btver2_decode" "double,double,double,double")
(set_attr "mode" "TI")])
(define_insn "aesdec"
- [(set (match_operand:V2DI 0 "register_operand" "=x,x,v")
- (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v")
- (match_operand:V2DI 2 "vector_operand" "xja,xm,vm")]
+ [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,v")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,x,v")
+ (match_operand:V2DI 2 "vector_operand" "xja,xm,xm,vm")]
UNSPEC_AESDEC))]
"TARGET_AES || (TARGET_VAES && TARGET_AVX512VL)"
"@
aesdec\t{%2, %0|%0, %2}
vaesdec\t{%2, %1, %0|%0, %1, %2}
+ %{evex%} vaesdec\t{%2, %1, %0|%0, %1, %2}
vaesdec\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,aes,avx512vl")
+ [(set_attr "isa" "noavx,aes_avx,vaes_avx512vl,vaes_avx512vl")
(set_attr "type" "sselog1")
- (set_attr "addr" "gpr16,*,*")
+ (set_attr "addr" "gpr16,*,*,*")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,vex,evex")
- (set_attr "btver2_decode" "double,double,double")
+ (set_attr "prefix" "orig,vex,evex,evex")
+ (set_attr "btver2_decode" "double,double,double,double")
(set_attr "mode" "TI")])
(define_insn "aesdeclast"
- [(set (match_operand:V2DI 0 "register_operand" "=x,x,v")
- (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v")
- (match_operand:V2DI 2 "vector_operand" "xja,xm,vm")]
+ [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,v")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,x,v")
+ (match_operand:V2DI 2 "vector_operand" "xja,xm,xm,vm")]
UNSPEC_AESDECLAST))]
"TARGET_AES || (TARGET_VAES && TARGET_AVX512VL)"
"@
aesdeclast\t{%2, %0|%0, %2}
vaesdeclast\t{%2, %1, %0|%0, %1, %2}
+ %{evex%} vaesdeclast\t{%2, %1, %0|%0, %1, %2}
vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "isa" "noavx,aes,avx512vl")
- (set_attr "addr" "gpr16,*,*")
+ [(set_attr "isa" "noavx,aes_avx,vaes_avx512vl,vaes_avx512vl")
+ (set_attr "addr" "gpr16,*,*,*")
(set_attr "type" "sselog1")
(set_attr "prefix_extra" "1")
- (set_attr "prefix" "orig,vex,evex")
- (set_attr "btver2_decode" "double,double,double")
+ (set_attr "prefix" "orig,vex,evex,evex")
+ (set_attr "btver2_decode" "double,double,double,double")
(set_attr "mode" "TI")])
(define_insn "aesimc"
@@ -30246,24 +30250,32 @@ (define_insn "vpdpwssds_<mode>_maskz_1"
[(set_attr ("prefix") ("evex"))])
(define_insn "vaesdec_<mode>"
- [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
+ [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=x,v")
(unspec:VI1_AVX512VL_F
- [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
- (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
+ [(match_operand:VI1_AVX512VL_F 1 "register_operand" "x,v")
+ (match_operand:VI1_AVX512VL_F 2 "vector_operand" "xm,vm")]
UNSPEC_VAESDEC))]
"TARGET_VAES"
- "vaesdec\t{%2, %1, %0|%0, %1, %2}"
-)
+{
+ if (which_alternative == 0 && <MODE>mode == V16QImode)
+ return "%{evex%} vaesdec\t{%2, %1, %0|%0, %1, %2}";
+ else
+ return "vaesdec\t{%2, %1, %0|%0, %1, %2}";
+})
(define_insn "vaesdeclast_<mode>"
- [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
+ [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=x,v")
(unspec:VI1_AVX512VL_F
- [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
- (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
+ [(match_operand:VI1_AVX512VL_F 1 "register_operand" "x,v")
+ (match_operand:VI1_AVX512VL_F 2 "vector_operand" "xm,vm")]
UNSPEC_VAESDECLAST))]
"TARGET_VAES"
- "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
-)
+{
+ if (which_alternative == 0 && <MODE>mode == V16QImode)
+ return "%{evex%} vaesdeclast\t{%2, %1, %0|%0, %1, %2}";
+ else
+ return "vaesdeclast\t{%2, %1, %0|%0, %1, %2}";
+})
(define_insn "vaesenc_<mode>"
[(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
@@ -30272,18 +30284,26 @@ (define_insn "vaesenc_<mode>"
(match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
UNSPEC_VAESENC))]
"TARGET_VAES"
- "vaesenc\t{%2, %1, %0|%0, %1, %2}"
-)
+{
+ if (which_alternative == 0 && <MODE>mode == V16QImode)
+ return "%{evex%} vaesenc\t{%2, %1, %0|%0, %1, %2}";
+ else
+ return "vaesenc\t{%2, %1, %0|%0, %1, %2}";
+})
(define_insn "vaesenclast_<mode>"
- [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
+ [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=x,v")
(unspec:VI1_AVX512VL_F
- [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
- (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
+ [(match_operand:VI1_AVX512VL_F 1 "register_operand" "x,v")
+ (match_operand:VI1_AVX512VL_F 2 "vector_operand" "xm,vm")]
UNSPEC_VAESENCLAST))]
"TARGET_VAES"
- "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
-)
+{
+ if (which_alternative == 0 && <MODE>mode == V16QImode)
+ return "%{evex%} vaesenclast\t{%2, %1, %0|%0, %1, %2}";
+ else
+ return "vaesenclast\t{%2, %1, %0|%0, %1, %2}";
+})
(define_insn "vpclmulqdq_<mode>"
[(set (match_operand:VI8_FVL 0 "register_operand" "=v")
^ permalink raw reply [flat|nested] 8+ messages in thread
* [Bug target/114576] [14 regression] VEX-prefixed AES instruction without AVX enabled
2024-04-03 17:36 [Bug target/114576] New: [13 regression][config/i386] GCC 14/trunk emits VEX-prefixed AES instruction without AVX enabled thiago at kde dot org
` (2 preceding siblings ...)
2024-04-03 22:39 ` [Bug target/114576] [14 regression]VEX-prefixed " jakub at gcc dot gnu.org
@ 2024-04-03 23:12 ` thiago at kde dot org
2024-04-04 7:22 ` rguenth at gcc dot gnu.org
` (2 subsequent siblings)
6 siblings, 0 replies; 8+ messages in thread
From: thiago at kde dot org @ 2024-04-03 23:12 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114576
--- Comment #4 from Thiago Macieira <thiago at kde dot org> ---
(In reply to Jakub Jelinek from comment #3)
> vaesenc etc. instructions can be used even if just -maes -mavx, not just
> -mvaes -mavx512vl.
Correct, that's just VEX-prefixed AESNI instructions.
VAES added the 256-bit and 512-bit versions of those instructions. The table at
felix's website is accurate: https://www.felixcloutier.com/x86/aesenc
This is actually similar to GFNI:
* GFNI: 128-bit only, non-VEX, non-EVEX
* GFNI+AVX: VEX allowed, 128- and 256-bit; no EVEX
* GFNI+AVX512F: 128- and 256-bit with VEX, 512-bit with EVEX
* GFNI+AVX512VL: 128- and 256-bit with VEX, all with EVEX
* GFNI+AVX10 without EVEX512: 128- and 256-bit with VEX and EVEX, no 512-bit
The F-no-VL case does not exist in practice.
> But, it is especially messy because -mvaes doesn't imply -maes, so IMHO if
> somebody e.g. asks for -mvaes -mavx512vl -mno-aes and the insns don't use
> any xmm16+ register, it would emit the insn using VEX encoding rather than
> EVEX, so I think we need to use {evex} prefixes.
Would it be simpler to just imply that VAES includes AESNI? There are no
processors that have VAES without AESNI and it doesn't make sense for there to
be one.
^ permalink raw reply [flat|nested] 8+ messages in thread
* [Bug target/114576] [14 regression] VEX-prefixed AES instruction without AVX enabled
2024-04-03 17:36 [Bug target/114576] New: [13 regression][config/i386] GCC 14/trunk emits VEX-prefixed AES instruction without AVX enabled thiago at kde dot org
` (3 preceding siblings ...)
2024-04-03 23:12 ` [Bug target/114576] [14 regression] VEX-prefixed " thiago at kde dot org
@ 2024-04-04 7:22 ` rguenth at gcc dot gnu.org
2024-04-09 10:41 ` cvs-commit at gcc dot gnu.org
2024-04-09 10:48 ` jakub at gcc dot gnu.org
6 siblings, 0 replies; 8+ messages in thread
From: rguenth at gcc dot gnu.org @ 2024-04-04 7:22 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114576
Richard Biener <rguenth at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
Priority|P3 |P1
^ permalink raw reply [flat|nested] 8+ messages in thread
* [Bug target/114576] [14 regression] VEX-prefixed AES instruction without AVX enabled
2024-04-03 17:36 [Bug target/114576] New: [13 regression][config/i386] GCC 14/trunk emits VEX-prefixed AES instruction without AVX enabled thiago at kde dot org
` (4 preceding siblings ...)
2024-04-04 7:22 ` rguenth at gcc dot gnu.org
@ 2024-04-09 10:41 ` cvs-commit at gcc dot gnu.org
2024-04-09 10:48 ` jakub at gcc dot gnu.org
6 siblings, 0 replies; 8+ messages in thread
From: cvs-commit at gcc dot gnu.org @ 2024-04-09 10:41 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114576
--- Comment #5 from GCC Commits <cvs-commit at gcc dot gnu.org> ---
The master branch has been updated by Jakub Jelinek <jakub@gcc.gnu.org>:
https://gcc.gnu.org/g:a79d13a01f8cbb99fb45bf3f3ffc62c99ee0b05e
commit r14-9869-ga79d13a01f8cbb99fb45bf3f3ffc62c99ee0b05e
Author: Jakub Jelinek <jakub@redhat.com>
Date: Tue Apr 9 12:35:18 2024 +0200
i386: Fix aes/vaes patterns [PR114576]
On Wed, Apr 19, 2023 at 02:40:59AM +0000, Jiang, Haochen via Gcc-patches
wrote:
> > > (define_insn "aesenc"
> > > - [(set (match_operand:V2DI 0 "register_operand" "=x,x")
> > > - (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
> > > - (match_operand:V2DI 2 "vector_operand"
"xBm,xm")]
> > > + [(set (match_operand:V2DI 0 "register_operand" "=x,x,v")
> > > + (unspec:V2DI [(match_operand:V2DI 1 "register_operand"
"0,x,v")
> > > + (match_operand:V2DI 2 "vector_operand"
> > > + "xBm,xm,vm")]
> > > UNSPEC_AESENC))]
> > > - "TARGET_AES"
> > > + "TARGET_AES || (TARGET_VAES && TARGET_AVX512VL)"
> > > "@
> > > aesenc\t{%2, %0|%0, %2}
> > > + vaesenc\t{%2, %1, %0|%0, %1, %2}
> > > vaesenc\t{%2, %1, %0|%0, %1, %2}"
> > > - [(set_attr "isa" "noavx,avx")
> > > + [(set_attr "isa" "noavx,aes,avx512vl")
> > Shouldn't it be vaes_avx512vl and then remove " || (TARGET_VAES &&
> > TARGET_AVX512VL)" from condition.
>
> Since VAES should not imply AES, we need that "|| (TARGET_VAES &&
> TARGET_AVX512VL)"
>
> And there is no need to add vaes_avx512vl since the last alternative will
only
> be hit when there is no aes. When there is no aes, the pattern will need
vaes
> and avx512vl both or we could not use this pattern. avx512vl here is just
like
> a placeholder.
As the following testcase shows, the above change was incorrect.
Using aes isa for the second alternative is obviously wrong, aes is enabled
whenever -maes is, regardless of -mavx or -mno-avx, so the above change
means that for -maes -mno-avx RA can choose, either it matches the first
alternative with the dup operand, or it matches the second one (but that
is of course wrong because vaesenc VEX encoded insn needs AES & AVX CPUID).
The big question is if "Since VAES should not imply AES" is the case or
not.
Looking around at what LLVM does on godbolt, seems since clang 6 which
added
-mvaes support -mvaes there implies -maes, but GCC treats those two
independent.
Now, if we'd take the LLVM path of making -mvaes imply -maes and -mno-aes
imply -mno-vaes, then we should probably just revert the above patch and
tweak common/config/i386/ to do the implications (+ add the testcase from
this patch).
If we keep the current behavior, where AES and VAES are completely
independent extensions, then we need to do more changes as the following
patch attempts to do.
We should use the aesenc etc. insns for noavx as before, we know at that
point that TARGET_AES must be true because (TARGET_VAES && TARGET_AVX512VL)
won't be true when !TARGET_AVX - TARGET_AVX512VL implies TARGET_AVX.
For the second alternative, i.e. the AVX AES VEX or VAES AVX512F EVEX case
without using %xmm16+/EGPR regs, the patch uses avx isa, but we need to
emit {evex} prefix in the assembly if AES ISA is not enabled.
For the last alternative, we need to use a new vaes_avx512vl isa attribute,
because the %xmm16+/EGPR support is there only if both VAES and AVX512VL
is enabled, not just AVX and AES.
Still, I wonder if -mvaes shouldn't imply at least -mavx512f and
-mno-avx512f shouldn't imply -mno-vaes, because otherwise can't see how
it could use 512-bit registers (this part not done in the patch).
2024-04-09 Jakub Jelinek <jakub@redhat.com>
PR target/114576
* config/i386/i386.md (isa): Remove aes, add vaes_avx512vl.
(enabled): Remove aes isa check, add vaes_avx512vl.
* config/i386/sse.md (aesenc, aesenclast, aesdec, aesdeclast): Use
jm instead of m for second alternative and emit {evex} prefix
for it if !TARGET_AES. Use noavx,avx,vaes_avx512vl isa attribute.
(vaesdec_<mode>, vaesdeclast_<mode>, vaesenc_<mode>,
vaesenclast_<mode>): Add second alternative with x instead of v
and jm instead of m.
* gcc.target/i386/aes-pr114576.c: New test.
^ permalink raw reply [flat|nested] 8+ messages in thread
* [Bug target/114576] [14 regression] VEX-prefixed AES instruction without AVX enabled
2024-04-03 17:36 [Bug target/114576] New: [13 regression][config/i386] GCC 14/trunk emits VEX-prefixed AES instruction without AVX enabled thiago at kde dot org
` (5 preceding siblings ...)
2024-04-09 10:41 ` cvs-commit at gcc dot gnu.org
@ 2024-04-09 10:48 ` jakub at gcc dot gnu.org
6 siblings, 0 replies; 8+ messages in thread
From: jakub at gcc dot gnu.org @ 2024-04-09 10:48 UTC (permalink / raw)
To: gcc-bugs
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114576
Jakub Jelinek <jakub at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
Status|NEW |RESOLVED
Resolution|--- |FIXED
--- Comment #6 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
Fixed.
^ permalink raw reply [flat|nested] 8+ messages in thread
end of thread, other threads:[~2024-04-09 10:48 UTC | newest]
Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-03 17:36 [Bug target/114576] New: [13 regression][config/i386] GCC 14/trunk emits VEX-prefixed AES instruction without AVX enabled thiago at kde dot org
2024-04-03 17:41 ` [Bug target/114576] [14 " pinskia at gcc dot gnu.org
2024-04-03 17:46 ` pinskia at gcc dot gnu.org
2024-04-03 22:39 ` [Bug target/114576] [14 regression]VEX-prefixed " jakub at gcc dot gnu.org
2024-04-03 23:12 ` [Bug target/114576] [14 regression] VEX-prefixed " thiago at kde dot org
2024-04-04 7:22 ` rguenth at gcc dot gnu.org
2024-04-09 10:41 ` cvs-commit at gcc dot gnu.org
2024-04-09 10:48 ` jakub at gcc dot gnu.org
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).