[Bug target/114576] New: [13 regression][config/i386] GCC 14/trunk emits VEX-prefixed AES instruction without AVX enabled

public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed

* [Bug target/114576] New: [13 regression][config/i386] GCC 14/trunk emits VEX-prefixed AES instruction without AVX enabled
@ 2024-04-03 17:36 thiago at kde dot org
  2024-04-03 17:41 ` [Bug target/114576] [14 " pinskia at gcc dot gnu.org
                   ` (6 more replies)
  0 siblings, 7 replies; 8+ messages in thread
From: thiago at kde dot org @ 2024-04-03 17:36 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114576

            Bug ID: 114576
           Summary: [13 regression][config/i386] GCC 14/trunk emits
                    VEX-prefixed AES instruction without AVX enabled
           Product: gcc
           Version: 14.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: thiago at kde dot org
  Target Milestone: ---

Re: https://bugreports.qt.io/browse/QTBUG-123965
Re: https://bugzilla.redhat.com/show_bug.cgi?id=2262640,
https://bugzilla.redhat.com/show_bug.cgi?id=2272758
Godbolt link: https://gcc.godbolt.org/z/6P9fMvoxW

Found while compiling Qt 6.6 or 6.7 with GCC 14 (current trunk). This is a
regression from GCC 13.

This function from qhash.cpp
<https://github.com/qt/qtbase/blob/v6.7.0/src/corelib/tools/qhash.cpp#L581-L588>:

Q_ALWAYS_INLINE __m128i AESHashSeed::state1() const
{
    {
        // unlike the Go code, we don't have more per-process seed
        __m128i state1 = _mm_aesenc_si128(state0, mseed2);
        return state1;
    }
}

Is apparently getting assembled to:
.L2:
        leaq    (%rdi,%rsi), %rdx
        vaesenc %xmm1, %xmm0, %xmm1

Though there's no AVX enabled in this code (the original version in Qt has some
AVX/VAES and AVX512 code but the reduced example does not).

This function:
    // hash twice 16 bytes, running 2 scramble rounds of AES on itself
    static void QT_FUNCTION_TARGET(AES) QT_VECTORCALL
    hash2x16bytes(__m128i &state0, __m128i &state1, const __m128i *src0, const
__m128i *src1)
    {
        __m128i data0 = _mm_loadu_si128(src0);
        __m128i data1 = _mm_loadu_si128(src1);
        state0 = _mm_xor_si128(data0, state0);
        state1 = _mm_xor_si128(data1, state1);
        state0 = _mm_aesenc_si128(state0, state0);
        state1 = _mm_aesenc_si128(state1, state1);
        state0 = _mm_aesenc_si128(state0, state0);
        state1 = _mm_aesenc_si128(state1, state1);
    }

Is even emitting:
.L20:
        movdqu  (%rax), %xmm2
        pxor    %xmm0, %xmm2
        movdqu  -16(%rdx), %xmm0
        pxor    %xmm0, %xmm1
        vaesenc %xmm2, %xmm2, %xmm0
        aesenc  %xmm1, %xmm1
        aesenc  %xmm0, %xmm0
        aesenc  %xmm1, %xmm1

and that makes no sense to use AVX for one of four instructions alone, called
from the same source function.

For reference, GCC 13 generates respectively:

.L2:
        movdqa  %xmm0, %xmm1
        leaq    (%rdi,%rsi), %rdx
        aesenc  %xmm2, %xmm1
and
.L20:
        movdqu  (%rax), %xmm2
        pxor    %xmm0, %xmm2
        movdqu  -16(%rdx), %xmm0
        aesenc  %xmm2, %xmm2
        pxor    %xmm0, %xmm1
        movdqa  %xmm2, %xmm0
        aesenc  %xmm1, %xmm1
        aesenc  %xmm2, %xmm0
        aesenc  %xmm1, %xmm1

You can tell that they are the same source block because the labels are the
same.

Sources:

#include <immintrin.h>
#ifdef _MSC_VER
#  define Q_ALWAYS_INLINE __forceinline
#  define QT_VECTORCALL __vectorcall
#  define QT_FUNCTION_TARGET(x)
#else
#  define Q_ALWAYS_INLINE inline __attribute__((always_inline))
#  define QT_VECTORCALL
#  define QT_FUNCTION_TARGET(x) __attribute__((target(QT_FUNCTION_TARGET_##x)))
#  define QT_FUNCTION_TARGET_AES        "sse4.2,aes"
//#  define qCpuHasFeature(x) __builtin_cpu_supports(QT_FUNCTION_TARGET_ ## x)
#endif
#define QT_COMPILER_SUPPORTS_HERE(x)    true
#    define mm_set1_epz     _mm_set1_epi64x
#    define mm_cvtsz_si128  _mm_cvtsi64_si128
#    define mm_cvtsi128_sz  _mm_cvtsi128_si64
#    define mm256_set1_epz  _mm256_set1_epi64x
extern bool qCpuHasFeature(const char *) noexcept;
#define qCpuHasFeature(x)     qCpuHasFeature(#x)

using uchar = unsigned char;
using quintptr = unsigned long long;
using qint8 = signed char;

    // hash 16 bytes, running 3 scramble rounds of AES on itself (like label
"final1")
    static void Q_ALWAYS_INLINE QT_FUNCTION_TARGET(AES) QT_VECTORCALL
    hash16bytes(__m128i &state0, __m128i data)
    {
        state0 = _mm_xor_si128(state0, data);
        state0 = _mm_aesenc_si128(state0, state0);
        state0 = _mm_aesenc_si128(state0, state0);
        state0 = _mm_aesenc_si128(state0, state0);
    }

    // hash twice 16 bytes, running 2 scramble rounds of AES on itself
    static void QT_FUNCTION_TARGET(AES) QT_VECTORCALL
    hash2x16bytes(__m128i &state0, __m128i &state1, const __m128i *src0, const
__m128i *src1)
    {
        __m128i data0 = _mm_loadu_si128(src0);
        __m128i data1 = _mm_loadu_si128(src1);
        state0 = _mm_xor_si128(data0, state0);
        state1 = _mm_xor_si128(data1, state1);
        state0 = _mm_aesenc_si128(state0, state0);
        state1 = _mm_aesenc_si128(state1, state1);
        state0 = _mm_aesenc_si128(state0, state0);
        state1 = _mm_aesenc_si128(state1, state1);
    }

    struct AESHashSeed
    {
        __m128i state0;
        __m128i mseed2;
        AESHashSeed(size_t seed, size_t seed2) QT_FUNCTION_TARGET(AES);
        __m128i state1() const QT_FUNCTION_TARGET(AES);
    };

Q_ALWAYS_INLINE AESHashSeed::AESHashSeed(size_t seed, size_t seed2)
{
    __m128i mseed = mm_cvtsz_si128(seed);
    mseed2 = mm_set1_epz(seed2);

    // mseed (epi16) = [ seed, seed >> 16, seed >> 32, seed >> 48, len, 0, 0, 0
]
    mseed = _mm_insert_epi16(mseed, short(seed), 4);
    // mseed (epi16) = [ seed, seed >> 16, seed >> 32, seed >> 48, len, len,
len, len ]
    mseed = _mm_shufflehi_epi16(mseed, 0);

    // merge with the process-global seed
    __m128i key = _mm_xor_si128(mseed, mseed2);

    // scramble the key
    __m128i state0 = _mm_aesenc_si128(key, key);
    this->state0 = state0;
}

Q_ALWAYS_INLINE __m128i AESHashSeed::state1() const
{
    {
        // unlike the Go code, we don't have more per-process seed
        __m128i state1 = _mm_aesenc_si128(state0, mseed2);
        return state1;
    }
}

static size_t QT_FUNCTION_TARGET(AES) QT_VECTORCALL
aeshash128_16to32(__m128i state0, __m128i state1, const __m128i *src, const
__m128i *srcend)
{
    {
        if (src + 1 < srcend) {
            // epilogue: between 16 and 31 bytes
            hash2x16bytes(state0, state1, src, srcend - 1);
        } else if (src != srcend) {
            // epilogue: between 1 and 16 bytes, overlap with the end
            __m128i data = _mm_loadu_si128(srcend - 1);
            hash16bytes(state0, data);
        }

        // combine results:
        state0 = _mm_xor_si128(state0, state1);
    }

    return mm_cvtsi128_sz(state0);
}

static size_t QT_FUNCTION_TARGET(AES) QT_VECTORCALL
aeshash128_lt16(__m128i state0, const uchar *p, size_t len)
{
    if (len) {
        // We're going to load 16 bytes and mask zero the part we don't care
        // (the hash of a short string is different from the hash of a longer
        // including NULLs at the end because the length is in the key)
        // WARNING: this may produce valgrind warnings, but it's safe

        constexpr quintptr PageSize = 4096;
        __m128i data;

        if ((quintptr(p) & (PageSize / 2)) == 0) {
            // lower half of the page:
            // load all 16 bytes and mask off the bytes past the end of the
source
            static const qint8 maskarray[] = {
                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
                0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
            };
            __m128i mask = _mm_loadu_si128(reinterpret_cast<const __m128i
*>(maskarray + 15 - len));
            data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p));
            data = _mm_and_si128(data, mask);
        } else {
            // upper half of the page:
            // load 16 bytes ending at the data end, then shuffle them to the
beginning
            static const qint8 shufflecontrol[] = {
                1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
                -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
            };
            __m128i control = _mm_loadu_si128(reinterpret_cast<const __m128i
*>(shufflecontrol + 15 - len));
            data = _mm_loadu_si128(reinterpret_cast<const __m128i *>(p + len) -
1);
            data = _mm_shuffle_epi8(data, control);
        }

        hash16bytes(state0, data);
    }
    return mm_cvtsi128_sz(state0);
}

static size_t QT_FUNCTION_TARGET(AES) QT_VECTORCALL
aeshash128_ge32(__m128i state0, __m128i state1, const __m128i *src, const
__m128i *srcend)
{
    // main loop: scramble two 16-byte blocks
    for ( ; src + 2 < srcend; src += 2)
        hash2x16bytes(state0, state1, src, src + 1);

    return aeshash128_16to32(state0, state1, src, srcend);
}


static size_t QT_FUNCTION_TARGET(AES)
aeshash128(const uchar *p, size_t len, size_t seed, size_t seed2) noexcept
{
    AESHashSeed state(seed, seed2);
    auto src = reinterpret_cast<const __m128i *>(p);
    const auto srcend = reinterpret_cast<const __m128i *>(p + len);

    if (len < sizeof(__m128i))
        return aeshash128_lt16(state.state0, p, len);

    if (len <= sizeof(__m256i))
        return aeshash128_16to32(state.state0, state.state1(), src, srcend);

    return aeshash128_ge32(state.state0, state.state1(), src, srcend);
}

static size_t aeshash(const uchar *p, size_t len, size_t seed, size_t seed2)
noexcept
{
    return aeshash128(p, len, seed, seed2);
}

extern size_t qt_qhash_seed;
size_t qHashBits(const void *p, size_t size, size_t seed) noexcept
{
    size_t seed2 = size;
    if (seed)
        seed2 = qt_qhash_seed;
    return aeshash(reinterpret_cast<const uchar *>(p), size, seed, seed2);
}

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [Bug target/114576] [14 regression][config/i386] GCC 14/trunk emits VEX-prefixed AES instruction without AVX enabled
  2024-04-03 17:36 [Bug target/114576] New: [13 regression][config/i386] GCC 14/trunk emits VEX-prefixed AES instruction without AVX enabled thiago at kde dot org
@ 2024-04-03 17:41 ` pinskia at gcc dot gnu.org
  2024-04-03 17:46 ` pinskia at gcc dot gnu.org
                   ` (5 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: pinskia at gcc dot gnu.org @ 2024-04-03 17:41 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114576

Andrew Pinski <pinskia at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
            Summary|[13                         |[14
                   |regression][config/i386]    |regression][config/i386]
                   |GCC 14/trunk emits          |GCC 14/trunk emits
                   |VEX-prefixed AES            |VEX-prefixed AES
                   |instruction without AVX     |instruction without AVX
                   |enabled                     |enabled
   Target Milestone|---                         |14.0
           Keywords|                            |wrong-code

--- Comment #1 from Andrew Pinski <pinskia at gcc dot gnu.org> ---
(define_insn "aesenc"
  [(set (match_operand:V2DI 0 "register_operand" "=x,x,v")
        (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v")
                       (match_operand:V2DI 2 "vector_operand" "xja,xm,vm")]
                      UNSPEC_AESENC))]
  "TARGET_AES || (TARGET_VAES && TARGET_AVX512VL)"
  "@
   aesenc\t{%2, %0|%0, %2}
   vaesenc\t{%2, %1, %0|%0, %1, %2}
   vaesenc\t{%2, %1, %0|%0, %1, %2}"
  [(set_attr "isa" "noavx,aes,avx512vl")
   (set_attr "type" "sselog1")
   (set_attr "addr" "gpr16,*,*")
   (set_attr "prefix_extra" "1")
   (set_attr "prefix" "orig,vex,evex")
   (set_attr "btver2_decode" "double,double,double")
   (set_attr "mode" "TI")])

r14-104-g24a8acc1662c37

    Also, since -mvaes indicates that we could use VEX encoding for ymm, we
    should imply AVX for VAES.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [Bug target/114576] [14 regression][config/i386] GCC 14/trunk emits VEX-prefixed AES instruction without AVX enabled
  2024-04-03 17:36 [Bug target/114576] New: [13 regression][config/i386] GCC 14/trunk emits VEX-prefixed AES instruction without AVX enabled thiago at kde dot org
  2024-04-03 17:41 ` [Bug target/114576] [14 " pinskia at gcc dot gnu.org
@ 2024-04-03 17:46 ` pinskia at gcc dot gnu.org
  2024-04-03 22:39 ` [Bug target/114576] [14 regression]VEX-prefixed " jakub at gcc dot gnu.org
                   ` (4 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: pinskia at gcc dot gnu.org @ 2024-04-03 17:46 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114576

Andrew Pinski <pinskia at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
     Ever confirmed|0                           |1
   Last reconfirmed|                            |2024-04-03
             Status|UNCONFIRMED                 |NEW

--- Comment #2 from Andrew Pinski <pinskia at gcc dot gnu.org> ---
Something like this should fix it (but I am not 100% sure it is correct nor can
I test it):
```
apinski@xeond:~/src/upstream-gcc-match/gcc/gcc/config/i386$ git diff
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 6ac401154e4..82cf92653f4 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -916,6 +916,7 @@ (define_attr "enabled" ""
         (eq_attr "isa" "x64_avx512dq")
           (symbol_ref "TARGET_64BIT && TARGET_AVX512DQ")
         (eq_attr "isa" "aes") (symbol_ref "TARGET_AES")
+        (eq_attr "isa" "vaes") (symbol_ref "TARGET_VAES")
         (eq_attr "isa" "sse_noavx")
           (symbol_ref "TARGET_SSE && !TARGET_AVX")
         (eq_attr "isa" "sse2") (symbol_ref "TARGET_SSE2")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 3286d3a4fac..af79f3b126d 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -26286,7 +26286,7 @@ (define_insn "aesenc"
    aesenc\t{%2, %0|%0, %2}
    vaesenc\t{%2, %1, %0|%0, %1, %2}
    vaesenc\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "noavx,aes,avx512vl")
+  [(set_attr "isa" "noavx,vaes,avx512vl")
    (set_attr "type" "sselog1")
    (set_attr "addr" "gpr16,*,*")
    (set_attr "prefix_extra" "1")
@@ -26304,7 +26304,7 @@ (define_insn "aesenclast"
    aesenclast\t{%2, %0|%0, %2}
    vaesenclast\t{%2, %1, %0|%0, %1, %2}
    vaesenclast\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "noavx,aes,avx512vl")
+  [(set_attr "isa" "noavx,vaes,avx512vl")
    (set_attr "type" "sselog1")
    (set_attr "addr" "gpr16,*,*")
    (set_attr "prefix_extra" "1")
@@ -26322,7 +26322,7 @@ (define_insn "aesdec"
    aesdec\t{%2, %0|%0, %2}
    vaesdec\t{%2, %1, %0|%0, %1, %2}
    vaesdec\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "noavx,aes,avx512vl")
+  [(set_attr "isa" "noavx,vaes,avx512vl")
    (set_attr "type" "sselog1")
    (set_attr "addr" "gpr16,*,*")
    (set_attr "prefix_extra" "1")
@@ -26340,7 +26340,7 @@ (define_insn "aesdeclast"
    aesdeclast\t{%2, %0|%0, %2}
    vaesdeclast\t{%2, %1, %0|%0, %1, %2}
    vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "noavx,aes,avx512vl")
+  [(set_attr "isa" "noavx,vaes,avx512vl")
    (set_attr "addr" "gpr16,*,*")
    (set_attr "type" "sselog1")
    (set_attr "prefix_extra" "1")

```

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [Bug target/114576] [14 regression]VEX-prefixed AES instruction without AVX enabled
  2024-04-03 17:36 [Bug target/114576] New: [13 regression][config/i386] GCC 14/trunk emits VEX-prefixed AES instruction without AVX enabled thiago at kde dot org
  2024-04-03 17:41 ` [Bug target/114576] [14 " pinskia at gcc dot gnu.org
  2024-04-03 17:46 ` pinskia at gcc dot gnu.org
@ 2024-04-03 22:39 ` jakub at gcc dot gnu.org
  2024-04-03 23:12 ` [Bug target/114576] [14 regression] VEX-prefixed " thiago at kde dot org
                   ` (3 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: jakub at gcc dot gnu.org @ 2024-04-03 22:39 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114576

Jakub Jelinek <jakub at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |jakub at gcc dot gnu.org

--- Comment #3 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
(In reply to Andrew Pinski from comment #2)
> Something like this should fix it (but I am not 100% sure it is correct nor
> can I test it):

This is IMHO not correct.
vaesenc etc. instructions can be used even if just -maes -mavx, not just -mvaes
-mavx512vl.
But, it is especially messy because -mvaes doesn't imply -maes, so IMHO if
somebody e.g. asks for -mvaes -mavx512vl -mno-aes and the insns don't use any
xmm16+ register, it would emit the insn using VEX encoding rather than EVEX, so
I think we need to use {evex} prefixes.

So I think we want:
--- gcc/config/i386/i386.md.jj  2024-03-18 10:33:27.983419363 +0100
+++ gcc/config/i386/i386.md     2024-04-04 00:17:48.818340648 +0200
@@ -568,13 +568,14 @@ (define_attr "unit" "integer,i387,sse,mm

 ;; Used to control the "enabled" attribute on a per-instruction basis.
 (define_attr "isa" "base,x64,nox64,x64_sse2,x64_sse4,x64_sse4_noavx,
-                   x64_avx,x64_avx512bw,x64_avx512dq,aes,apx_ndd,
+                   x64_avx,x64_avx512bw,x64_avx512dq,apx_ndd,
                    sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx,
                   
avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,avx512f_512,
                    noavx512f,avx512bw,avx512bw_512,noavx512bw,avx512dq,
                    noavx512dq,fma_or_avx512vl,avx512vl,noavx512vl,avxvnni,
                    avx512vnnivl,avx512fp16,avxifma,avx512ifmavl,avxneconvert,
-                   avx512bf16vl,vpclmulqdqvl,avx_noavx512f,avx_noavx512vl"
+                   avx512bf16vl,vpclmulqdqvl,avx_noavx512f,avx_noavx512vl,
+                   aes_avx,vaes_avx512vl"
   (const_string "base"))

 ;; The (bounding maximum) length of an instruction immediate.
@@ -915,7 +916,6 @@ (define_attr "enabled" ""
           (symbol_ref "TARGET_64BIT && TARGET_AVX512BW")
         (eq_attr "isa" "x64_avx512dq")
           (symbol_ref "TARGET_64BIT && TARGET_AVX512DQ")
-        (eq_attr "isa" "aes") (symbol_ref "TARGET_AES")
         (eq_attr "isa" "sse_noavx")
           (symbol_ref "TARGET_SSE && !TARGET_AVX")
         (eq_attr "isa" "sse2") (symbol_ref "TARGET_SSE2")
@@ -968,6 +968,10 @@ (define_attr "enabled" ""
           (symbol_ref "TARGET_VPCLMULQDQ && TARGET_AVX512VL")
         (eq_attr "isa" "apx_ndd")
           (symbol_ref "TARGET_APX_NDD")
+        (eq_attr "isa" "aes_avx")
+          (symbol_ref "TARGET_AES && TARGET_AVX")
+        (eq_attr "isa" "vaes_avx512vl")
+          (symbol_ref "TARGET_VAES && TARGET_AVX512VL")

         (eq_attr "mmx_isa" "native")
           (symbol_ref "!TARGET_MMX_WITH_SSE")
--- gcc/config/i386/sse.md.jj   2024-03-18 08:58:45.942772799 +0100
+++ gcc/config/i386/sse.md      2024-04-04 00:33:32.386194779 +0200
@@ -26277,75 +26277,79 @@ (define_insn "xop_vpermil2<mode>3"
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

 (define_insn "aesenc"
-  [(set (match_operand:V2DI 0 "register_operand" "=x,x,v")
-       (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v")
-                      (match_operand:V2DI 2 "vector_operand" "xja,xm,vm")]
+  [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,v")
+       (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,x,v")
+                      (match_operand:V2DI 2 "vector_operand" "xja,xm,xm,vm")]
                      UNSPEC_AESENC))]
   "TARGET_AES || (TARGET_VAES && TARGET_AVX512VL)"
   "@
    aesenc\t{%2, %0|%0, %2}
    vaesenc\t{%2, %1, %0|%0, %1, %2}
+   %{evex%} vaesenc\t{%2, %1, %0|%0, %1, %2}
    vaesenc\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "noavx,aes,avx512vl")
+  [(set_attr "isa" "noavx,aes_avx,vaes_avx512vl,vaes_avx512vl")
    (set_attr "type" "sselog1")
-   (set_attr "addr" "gpr16,*,*")
+   (set_attr "addr" "gpr16,*,*,*")
    (set_attr "prefix_extra" "1")
-   (set_attr "prefix" "orig,vex,evex")
-   (set_attr "btver2_decode" "double,double,double")
+   (set_attr "prefix" "orig,vex,evex,evex")
+   (set_attr "btver2_decode" "double,double,double,double")
    (set_attr "mode" "TI")])

 (define_insn "aesenclast"
-  [(set (match_operand:V2DI 0 "register_operand" "=x,x,v")
-       (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v")
-                      (match_operand:V2DI 2 "vector_operand" "xja,xm,vm")]
+  [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,v")
+       (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,x,v")
+                      (match_operand:V2DI 2 "vector_operand" "xja,xm,xm,vm")]
                      UNSPEC_AESENCLAST))]
   "TARGET_AES || (TARGET_VAES && TARGET_AVX512VL)"
   "@
    aesenclast\t{%2, %0|%0, %2}
    vaesenclast\t{%2, %1, %0|%0, %1, %2}
+   %{evex%} vaesenclast\t{%2, %1, %0|%0, %1, %2}
    vaesenclast\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "noavx,aes,avx512vl")
+  [(set_attr "isa" "noavx,aes_avx,vaes_avx512vl,vaes_avx512vl")
    (set_attr "type" "sselog1")
-   (set_attr "addr" "gpr16,*,*")
+   (set_attr "addr" "gpr16,*,*,*")
    (set_attr "prefix_extra" "1")
-   (set_attr "prefix" "orig,vex,evex")
-   (set_attr "btver2_decode" "double,double,double") 
+   (set_attr "prefix" "orig,vex,evex,evex")
+   (set_attr "btver2_decode" "double,double,double,double")
    (set_attr "mode" "TI")])

 (define_insn "aesdec"
-  [(set (match_operand:V2DI 0 "register_operand" "=x,x,v")
-       (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v")
-                      (match_operand:V2DI 2 "vector_operand" "xja,xm,vm")]
+  [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,v")
+       (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,x,v")
+                      (match_operand:V2DI 2 "vector_operand" "xja,xm,xm,vm")]
                      UNSPEC_AESDEC))]
   "TARGET_AES || (TARGET_VAES && TARGET_AVX512VL)"
   "@
    aesdec\t{%2, %0|%0, %2}
    vaesdec\t{%2, %1, %0|%0, %1, %2}
+   %{evex%} vaesdec\t{%2, %1, %0|%0, %1, %2}
    vaesdec\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "noavx,aes,avx512vl")
+  [(set_attr "isa" "noavx,aes_avx,vaes_avx512vl,vaes_avx512vl")
    (set_attr "type" "sselog1")
-   (set_attr "addr" "gpr16,*,*")
+   (set_attr "addr" "gpr16,*,*,*")
    (set_attr "prefix_extra" "1")
-   (set_attr "prefix" "orig,vex,evex")
-   (set_attr "btver2_decode" "double,double,double") 
+   (set_attr "prefix" "orig,vex,evex,evex")
+   (set_attr "btver2_decode" "double,double,double,double") 
    (set_attr "mode" "TI")])

 (define_insn "aesdeclast"
-  [(set (match_operand:V2DI 0 "register_operand" "=x,x,v")
-       (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,v")
-                      (match_operand:V2DI 2 "vector_operand" "xja,xm,vm")]
+  [(set (match_operand:V2DI 0 "register_operand" "=x,x,x,v")
+       (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x,x,v")
+                      (match_operand:V2DI 2 "vector_operand" "xja,xm,xm,vm")]
                      UNSPEC_AESDECLAST))]
   "TARGET_AES || (TARGET_VAES && TARGET_AVX512VL)"
   "@
    aesdeclast\t{%2, %0|%0, %2}
    vaesdeclast\t{%2, %1, %0|%0, %1, %2}
+   %{evex%} vaesdeclast\t{%2, %1, %0|%0, %1, %2}
    vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
-  [(set_attr "isa" "noavx,aes,avx512vl")
-   (set_attr "addr" "gpr16,*,*")
+  [(set_attr "isa" "noavx,aes_avx,vaes_avx512vl,vaes_avx512vl")
+   (set_attr "addr" "gpr16,*,*,*")
    (set_attr "type" "sselog1")
    (set_attr "prefix_extra" "1")
-   (set_attr "prefix" "orig,vex,evex")
-   (set_attr "btver2_decode" "double,double,double")
+   (set_attr "prefix" "orig,vex,evex,evex")
+   (set_attr "btver2_decode" "double,double,double,double")
    (set_attr "mode" "TI")])

 (define_insn "aesimc"
@@ -30246,24 +30250,32 @@ (define_insn "vpdpwssds_<mode>_maskz_1"
    [(set_attr ("prefix") ("evex"))])

 (define_insn "vaesdec_<mode>"
-  [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
+  [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=x,v")
        (unspec:VI1_AVX512VL_F
-         [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
-          (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
+         [(match_operand:VI1_AVX512VL_F 1 "register_operand" "x,v")
+          (match_operand:VI1_AVX512VL_F 2 "vector_operand" "xm,vm")]
          UNSPEC_VAESDEC))]
   "TARGET_VAES"
-  "vaesdec\t{%2, %1, %0|%0, %1, %2}"
-)
+{
+  if (which_alternative == 0 && <MODE>mode == V16QImode)
+    return "%{evex%} vaesdec\t{%2, %1, %0|%0, %1, %2}";
+  else
+    return "vaesdec\t{%2, %1, %0|%0, %1, %2}";
+})

 (define_insn "vaesdeclast_<mode>"
-  [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
+  [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=x,v")
        (unspec:VI1_AVX512VL_F
-         [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
-          (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
+         [(match_operand:VI1_AVX512VL_F 1 "register_operand" "x,v")
+          (match_operand:VI1_AVX512VL_F 2 "vector_operand" "xm,vm")]
          UNSPEC_VAESDECLAST))]
   "TARGET_VAES"
-  "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
-)
+{
+  if (which_alternative == 0 && <MODE>mode == V16QImode)
+    return "%{evex%} vaesdeclast\t{%2, %1, %0|%0, %1, %2}";
+  else
+    return "vaesdeclast\t{%2, %1, %0|%0, %1, %2}";
+})

 (define_insn "vaesenc_<mode>"
   [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
@@ -30272,18 +30284,26 @@ (define_insn "vaesenc_<mode>"
           (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
          UNSPEC_VAESENC))]
   "TARGET_VAES"
-  "vaesenc\t{%2, %1, %0|%0, %1, %2}"
-)
+{
+  if (which_alternative == 0 && <MODE>mode == V16QImode)
+    return "%{evex%} vaesenc\t{%2, %1, %0|%0, %1, %2}";
+  else
+    return "vaesenc\t{%2, %1, %0|%0, %1, %2}";
+})

 (define_insn "vaesenclast_<mode>"
-  [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=v")
+  [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=x,v")
        (unspec:VI1_AVX512VL_F
-         [(match_operand:VI1_AVX512VL_F 1 "register_operand" "v")
-          (match_operand:VI1_AVX512VL_F 2 "vector_operand" "vm")]
+         [(match_operand:VI1_AVX512VL_F 1 "register_operand" "x,v")
+          (match_operand:VI1_AVX512VL_F 2 "vector_operand" "xm,vm")]
          UNSPEC_VAESENCLAST))]
   "TARGET_VAES"
-  "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
-)
+{
+  if (which_alternative == 0 && <MODE>mode == V16QImode)
+    return "%{evex%} vaesenclast\t{%2, %1, %0|%0, %1, %2}";
+  else
+    return "vaesenclast\t{%2, %1, %0|%0, %1, %2}";
+})

 (define_insn "vpclmulqdq_<mode>"
   [(set (match_operand:VI8_FVL 0 "register_operand" "=v")

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [Bug target/114576] [14 regression] VEX-prefixed AES instruction without AVX enabled
  2024-04-03 17:36 [Bug target/114576] New: [13 regression][config/i386] GCC 14/trunk emits VEX-prefixed AES instruction without AVX enabled thiago at kde dot org
                   ` (2 preceding siblings ...)
  2024-04-03 22:39 ` [Bug target/114576] [14 regression]VEX-prefixed " jakub at gcc dot gnu.org
@ 2024-04-03 23:12 ` thiago at kde dot org
  2024-04-04  7:22 ` rguenth at gcc dot gnu.org
                   ` (2 subsequent siblings)
  6 siblings, 0 replies; 8+ messages in thread
From: thiago at kde dot org @ 2024-04-03 23:12 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114576

--- Comment #4 from Thiago Macieira <thiago at kde dot org> ---
(In reply to Jakub Jelinek from comment #3)
> vaesenc etc. instructions can be used even if just -maes -mavx, not just
> -mvaes -mavx512vl.

Correct, that's just VEX-prefixed AESNI instructions.

VAES added the 256-bit and 512-bit versions of those instructions. The table at
felix's website is accurate: https://www.felixcloutier.com/x86/aesenc

This is actually similar to GFNI:
* GFNI: 128-bit only, non-VEX, non-EVEX
* GFNI+AVX: VEX allowed, 128- and 256-bit; no EVEX
* GFNI+AVX512F: 128- and 256-bit with VEX, 512-bit with EVEX
* GFNI+AVX512VL: 128- and 256-bit with VEX, all with EVEX
* GFNI+AVX10 without EVEX512: 128- and 256-bit with VEX and EVEX, no 512-bit

The F-no-VL case does not exist in practice.

> But, it is especially messy because -mvaes doesn't imply -maes, so IMHO if
> somebody e.g. asks for -mvaes -mavx512vl -mno-aes and the insns don't use
> any xmm16+ register, it would emit the insn using VEX encoding rather than
> EVEX, so I think we need to use {evex} prefixes.

Would it be simpler to just imply that VAES includes AESNI? There are no
processors that have VAES without AESNI and it doesn't make sense for there to
be one.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [Bug target/114576] [14 regression] VEX-prefixed AES instruction without AVX enabled
  2024-04-03 17:36 [Bug target/114576] New: [13 regression][config/i386] GCC 14/trunk emits VEX-prefixed AES instruction without AVX enabled thiago at kde dot org
                   ` (3 preceding siblings ...)
  2024-04-03 23:12 ` [Bug target/114576] [14 regression] VEX-prefixed " thiago at kde dot org
@ 2024-04-04  7:22 ` rguenth at gcc dot gnu.org
  2024-04-09 10:41 ` cvs-commit at gcc dot gnu.org
  2024-04-09 10:48 ` jakub at gcc dot gnu.org
  6 siblings, 0 replies; 8+ messages in thread
From: rguenth at gcc dot gnu.org @ 2024-04-04  7:22 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114576

Richard Biener <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
           Priority|P3                          |P1

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [Bug target/114576] [14 regression] VEX-prefixed AES instruction without AVX enabled
  2024-04-03 17:36 [Bug target/114576] New: [13 regression][config/i386] GCC 14/trunk emits VEX-prefixed AES instruction without AVX enabled thiago at kde dot org
                   ` (4 preceding siblings ...)
  2024-04-04  7:22 ` rguenth at gcc dot gnu.org
@ 2024-04-09 10:41 ` cvs-commit at gcc dot gnu.org
  2024-04-09 10:48 ` jakub at gcc dot gnu.org
  6 siblings, 0 replies; 8+ messages in thread
From: cvs-commit at gcc dot gnu.org @ 2024-04-09 10:41 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114576

--- Comment #5 from GCC Commits <cvs-commit at gcc dot gnu.org> ---
The master branch has been updated by Jakub Jelinek <jakub@gcc.gnu.org>:

https://gcc.gnu.org/g:a79d13a01f8cbb99fb45bf3f3ffc62c99ee0b05e

commit r14-9869-ga79d13a01f8cbb99fb45bf3f3ffc62c99ee0b05e
Author: Jakub Jelinek <jakub@redhat.com>
Date:   Tue Apr 9 12:35:18 2024 +0200

    i386: Fix aes/vaes patterns [PR114576]

    On Wed, Apr 19, 2023 at 02:40:59AM +0000, Jiang, Haochen via Gcc-patches
wrote:
    > > >  (define_insn "aesenc"
    > > > -  [(set (match_operand:V2DI 0 "register_operand" "=x,x")
    > > > -       (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0,x")
    > > > -                      (match_operand:V2DI 2 "vector_operand"
"xBm,xm")]
    > > > +  [(set (match_operand:V2DI 0 "register_operand" "=x,x,v")
    > > > +       (unspec:V2DI [(match_operand:V2DI 1 "register_operand"
"0,x,v")
    > > > +                      (match_operand:V2DI 2 "vector_operand"
    > > > + "xBm,xm,vm")]
    > > >                       UNSPEC_AESENC))]
    > > > -  "TARGET_AES"
    > > > +  "TARGET_AES || (TARGET_VAES && TARGET_AVX512VL)"
    > > >    "@
    > > >     aesenc\t{%2, %0|%0, %2}
    > > > +   vaesenc\t{%2, %1, %0|%0, %1, %2}
    > > >     vaesenc\t{%2, %1, %0|%0, %1, %2}"
    > > > -  [(set_attr "isa" "noavx,avx")
    > > > +  [(set_attr "isa" "noavx,aes,avx512vl")
    > > Shouldn't it be vaes_avx512vl and then remove " || (TARGET_VAES &&
    > > TARGET_AVX512VL)" from condition.
    >
    > Since VAES should not imply AES, we need that "|| (TARGET_VAES &&
    > TARGET_AVX512VL)"
    >
    > And there is no need to add vaes_avx512vl since the last alternative will
only
    > be hit when there is no aes. When there is no aes, the pattern will need
vaes
    > and avx512vl both or we could not use this pattern. avx512vl here is just
like
    > a placeholder.

    As the following testcase shows, the above change was incorrect.

    Using aes isa for the second alternative is obviously wrong, aes is enabled
    whenever -maes is, regardless of -mavx or -mno-avx, so the above change
    means that for -maes -mno-avx RA can choose, either it matches the first
    alternative with the dup operand, or it matches the second one (but that
    is of course wrong because vaesenc VEX encoded insn needs AES & AVX CPUID).

    The big question is if "Since VAES should not imply AES" is the case or
not.
    Looking around at what LLVM does on godbolt, seems since clang 6 which
added
    -mvaes support -mvaes there implies -maes, but GCC treats those two
    independent.

    Now, if we'd take the LLVM path of making -mvaes imply -maes and -mno-aes
    imply -mno-vaes, then we should probably just revert the above patch and
    tweak common/config/i386/ to do the implications (+ add the testcase from
    this patch).

    If we keep the current behavior, where AES and VAES are completely
    independent extensions, then we need to do more changes as the following
    patch attempts to do.
    We should use the aesenc etc. insns for noavx as before, we know at that
    point that TARGET_AES must be true because (TARGET_VAES && TARGET_AVX512VL)
    won't be true when !TARGET_AVX - TARGET_AVX512VL implies TARGET_AVX.
    For the second alternative, i.e. the AVX AES VEX or VAES AVX512F EVEX case
    without using %xmm16+/EGPR regs, the patch uses avx isa, but we need to
    emit {evex} prefix in the assembly if AES ISA is not enabled.
    For the last alternative, we need to use a new vaes_avx512vl isa attribute,
    because the %xmm16+/EGPR support is there only if both VAES and AVX512VL
    is enabled, not just AVX and AES.
    Still, I wonder if -mvaes shouldn't imply at least -mavx512f and
    -mno-avx512f shouldn't imply -mno-vaes, because otherwise can't see how
    it could use 512-bit registers (this part not done in the patch).

    2024-04-09  Jakub Jelinek  <jakub@redhat.com>

            PR target/114576
            * config/i386/i386.md (isa): Remove aes, add vaes_avx512vl.
            (enabled): Remove aes isa check, add vaes_avx512vl.
            * config/i386/sse.md (aesenc, aesenclast, aesdec, aesdeclast): Use
            jm instead of m for second alternative and emit {evex} prefix
            for it if !TARGET_AES.  Use noavx,avx,vaes_avx512vl isa attribute.
            (vaesdec_<mode>, vaesdeclast_<mode>, vaesenc_<mode>,
            vaesenclast_<mode>): Add second alternative with x instead of v
            and jm instead of m.

            * gcc.target/i386/aes-pr114576.c: New test.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* [Bug target/114576] [14 regression] VEX-prefixed AES instruction without AVX enabled
  2024-04-03 17:36 [Bug target/114576] New: [13 regression][config/i386] GCC 14/trunk emits VEX-prefixed AES instruction without AVX enabled thiago at kde dot org
                   ` (5 preceding siblings ...)
  2024-04-09 10:41 ` cvs-commit at gcc dot gnu.org
@ 2024-04-09 10:48 ` jakub at gcc dot gnu.org
  6 siblings, 0 replies; 8+ messages in thread
From: jakub at gcc dot gnu.org @ 2024-04-09 10:48 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114576

Jakub Jelinek <jakub at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|NEW                         |RESOLVED
         Resolution|---                         |FIXED

--- Comment #6 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
Fixed.

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2024-04-09 10:48 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2024-04-03 17:36 [Bug target/114576] New: [13 regression][config/i386] GCC 14/trunk emits VEX-prefixed AES instruction without AVX enabled thiago at kde dot org
2024-04-03 17:41 ` [Bug target/114576] [14 " pinskia at gcc dot gnu.org
2024-04-03 17:46 ` pinskia at gcc dot gnu.org
2024-04-03 22:39 ` [Bug target/114576] [14 regression]VEX-prefixed " jakub at gcc dot gnu.org
2024-04-03 23:12 ` [Bug target/114576] [14 regression] VEX-prefixed " thiago at kde dot org
2024-04-04  7:22 ` rguenth at gcc dot gnu.org
2024-04-09 10:41 ` cvs-commit at gcc dot gnu.org
2024-04-09 10:48 ` jakub at gcc dot gnu.org

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).