public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc r12-1706] Optimize vpexpand* to mask mov when mask have all ones in it's lower part (including 0 and -1).
@ 2021-06-22  1:35 hongtao Liu
  0 siblings, 0 replies; only message in thread
From: hongtao Liu @ 2021-06-22  1:35 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:f51618f301664d02cc41205f1386c0c9b9a29a54

commit r12-1706-gf51618f301664d02cc41205f1386c0c9b9a29a54
Author: liuhongt <hongtao.liu@intel.com>
Date:   Thu Apr 29 18:27:09 2021 +0800

    Optimize vpexpand* to mask mov when mask have all ones in it's lower part (including 0 and -1).
    
    gcc/ChangeLog:
    
            PR target/100267
            * config/i386/i386-builtin.def (BDESC): Adjust builtin name.
            * config/i386/sse.md (<avx512>_expand<mode>_mask): Rename to ..
            (expand<mode>_mask): this ..
            (*expand<mode>_mask): New pre_reload splitter to transform
            v{,p}expand* to vmov* when mask is zero, all ones, or has all
            ones in it's lower part, otherwise still generate
            v{,p}expand*.
    
    gcc/testsuite/ChangeLog:
    
            PR target/100267
            * gcc.target/i386/avx512bw-pr100267-1.c: New test.
            * gcc.target/i386/avx512bw-pr100267-b-2.c: New test.
            * gcc.target/i386/avx512bw-pr100267-d-2.c: New test.
            * gcc.target/i386/avx512bw-pr100267-q-2.c: New test.
            * gcc.target/i386/avx512bw-pr100267-w-2.c: New test.
            * gcc.target/i386/avx512f-pr100267-1.c: New test.
            * gcc.target/i386/avx512f-pr100267-pd-2.c: New test.
            * gcc.target/i386/avx512f-pr100267-ps-2.c: New test.
            * gcc.target/i386/avx512vl-pr100267-1.c: New test.
            * gcc.target/i386/avx512vl-pr100267-pd-2.c: New test.
            * gcc.target/i386/avx512vl-pr100267-ps-2.c: New test.
            * gcc.target/i386/avx512vlbw-pr100267-1.c: New test.
            * gcc.target/i386/avx512vlbw-pr100267-b-2.c: New test.
            * gcc.target/i386/avx512vlbw-pr100267-d-2.c: New test.
            * gcc.target/i386/avx512vlbw-pr100267-q-2.c: New test.
            * gcc.target/i386/avx512vlbw-pr100267-w-2.c: New test.

Diff:
---
 gcc/config/i386/i386-builtin.def                   |  48 ++++----
 gcc/config/i386/sse.md                             |  69 +++++++++++-
 .../gcc.target/i386/avx512bw-pr100267-1.c          |  38 +++++++
 .../gcc.target/i386/avx512bw-pr100267-b-2.c        |  74 +++++++++++++
 .../gcc.target/i386/avx512bw-pr100267-d-2.c        |  74 +++++++++++++
 .../gcc.target/i386/avx512bw-pr100267-q-2.c        |  74 +++++++++++++
 .../gcc.target/i386/avx512bw-pr100267-w-2.c        |  74 +++++++++++++
 gcc/testsuite/gcc.target/i386/avx512f-pr100267-1.c |  66 +++++++++++
 .../gcc.target/i386/avx512f-pr100267-pd-2.c        |  76 +++++++++++++
 .../gcc.target/i386/avx512f-pr100267-ps-2.c        |  72 ++++++++++++
 .../gcc.target/i386/avx512vl-pr100267-1.c          | 122 +++++++++++++++++++++
 .../gcc.target/i386/avx512vl-pr100267-pd-2.c       |  15 +++
 .../gcc.target/i386/avx512vl-pr100267-ps-2.c       |  15 +++
 .../gcc.target/i386/avx512vlbw-pr100267-1.c        |  66 +++++++++++
 .../gcc.target/i386/avx512vlbw-pr100267-b-2.c      |  16 +++
 .../gcc.target/i386/avx512vlbw-pr100267-d-2.c      |  15 +++
 .../gcc.target/i386/avx512vlbw-pr100267-q-2.c      |  15 +++
 .../gcc.target/i386/avx512vlbw-pr100267-w-2.c      |  16 +++
 18 files changed, 920 insertions(+), 25 deletions(-)

diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 80c2a2c0294..31df3a613dd 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -204,13 +204,13 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressstorev16sf_mask, "__
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCINT_V16SI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCINT64_V8DI_UQI)
@@ -337,14 +337,14 @@ BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_compressstorev4di_mask, "_
 BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_compressstorev2di_mask, "__builtin_ia32_compressstoredi128_mask", IX86_BUILTIN_PCOMPRESSQSTORE128, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_compressstorev8si_mask, "__builtin_ia32_compressstoresi256_mask", IX86_BUILTIN_PCOMPRESSDSTORE256, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8SI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_compressstorev4si_mask, "__builtin_ia32_compressstoresi128_mask", IX86_BUILTIN_PCOMPRESSDSTORE128, UNKNOWN, (int) VOID_FTYPE_PV4SI_V4SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv4df_mask, "__builtin_ia32_expandloaddf256_mask", IX86_BUILTIN_EXPANDPDLOAD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv2df_mask, "__builtin_ia32_expandloaddf128_mask", IX86_BUILTIN_EXPANDPDLOAD128, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv8sf_mask, "__builtin_ia32_expandloadsf256_mask", IX86_BUILTIN_EXPANDPSLOAD256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv4sf_mask, "__builtin_ia32_expandloadsf128_mask", IX86_BUILTIN_EXPANDPSLOAD128, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv4di_mask, "__builtin_ia32_expandloaddi256_mask", IX86_BUILTIN_PEXPANDQLOAD256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv2di_mask, "__builtin_ia32_expandloaddi128_mask", IX86_BUILTIN_PEXPANDQLOAD128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv8si_mask, "__builtin_ia32_expandloadsi256_mask", IX86_BUILTIN_PEXPANDDLOAD256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv4si_mask, "__builtin_ia32_expandloadsi128_mask", IX86_BUILTIN_PEXPANDDLOAD128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expandloaddf256_maskz", IX86_BUILTIN_EXPANDPDLOAD256Z, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expandloaddf128_maskz", IX86_BUILTIN_EXPANDPDLOAD128Z, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandloadsf256_maskz", IX86_BUILTIN_EXPANDPSLOAD256Z, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF_UQI)
@@ -1342,9 +1342,9 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vcvtps2ph512_mask,  "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ufloatv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT)
-BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_UQI)
@@ -1381,9 +1381,9 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
@@ -2187,14 +2187,14 @@ BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_compressv4di_mask, "__buil
 BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_compressv2di_mask, "__builtin_ia32_compressdi128_mask", IX86_BUILTIN_PCOMPRESSQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_compressv8si_mask, "__builtin_ia32_compresssi256_mask", IX86_BUILTIN_PCOMPRESSD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_compressv4si_mask, "__builtin_ia32_compresssi128_mask", IX86_BUILTIN_PCOMPRESSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv4df_mask, "__builtin_ia32_expanddf256_mask", IX86_BUILTIN_EXPANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv2df_mask, "__builtin_ia32_expanddf128_mask", IX86_BUILTIN_EXPANDPD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv8sf_mask, "__builtin_ia32_expandsf256_mask", IX86_BUILTIN_EXPANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv4sf_mask, "__builtin_ia32_expandsf128_mask", IX86_BUILTIN_EXPANDPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv4di_mask, "__builtin_ia32_expanddi256_mask", IX86_BUILTIN_PEXPANDQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv2di_mask, "__builtin_ia32_expanddi128_mask", IX86_BUILTIN_PEXPANDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv8si_mask, "__builtin_ia32_expandsi256_mask", IX86_BUILTIN_PEXPANDD256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv4si_mask, "__builtin_ia32_expandsi128_mask", IX86_BUILTIN_PEXPANDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_expandv4df_maskz, "__builtin_ia32_expanddf256_maskz", IX86_BUILTIN_EXPANDPD256Z, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_expandv2df_maskz, "__builtin_ia32_expanddf128_maskz", IX86_BUILTIN_EXPANDPD128Z, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_expandv8sf_maskz, "__builtin_ia32_expandsf256_maskz", IX86_BUILTIN_EXPANDPS256Z, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI)
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 94296bc773b..f5f9403db44 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -699,6 +699,17 @@
    (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
    (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
    (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+(define_mode_iterator VI12_VI48F_AVX512VLBW
+  [(V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
+   (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
+   (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
+   (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
+   (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
+   (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
+   (V64QI "TARGET_AVX512BW") (V16QI "TARGET_AVX512VL")
+   (V32QI "TARGET_AVX512VL && TARGET_AVX512BW") (V32HI "TARGET_AVX512BW")
+   (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
+
 (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
 
 (define_mode_iterator VF_AVX512
@@ -23009,7 +23020,7 @@
   "TARGET_AVX512F"
   "operands[2] = CONST0_RTX (<MODE>mode);")
 
-(define_insn "<avx512>_expand<mode>_mask"
+(define_insn "expand<mode>_mask"
   [(set (match_operand:VI48F 0 "register_operand" "=v,v")
 	(unspec:VI48F
 	  [(match_operand:VI48F 1 "nonimmediate_operand" "v,m")
@@ -23037,6 +23048,62 @@
    (set_attr "memory" "none,load")
    (set_attr "mode" "<sseinsnmode>")])
 
+(define_insn_and_split "*expand<mode>_mask"
+  [(set (match_operand:VI12_VI48F_AVX512VLBW 0 "register_operand")
+	(unspec:VI12_VI48F_AVX512VLBW
+	  [(match_operand:VI12_VI48F_AVX512VLBW 1 "nonimmediate_operand")
+	   (match_operand:VI12_VI48F_AVX512VLBW 2 "nonimm_or_0_operand")
+	   (match_operand 3 "const_int_operand")]
+	  UNSPEC_EXPAND))]
+  "ix86_pre_reload_split ()
+   && (TARGET_AVX512VBMI2 || GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) >= 4)"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  unsigned HOST_WIDE_INT mask = INTVAL (operands[3]);
+  bool has_zero = false;
+  unsigned n = GET_MODE_NUNITS (<MODE>mode), i;
+  unsigned ones = 0;
+
+  /* If all ones bits is in mask's lower part,
+     get number of ones and assign it to ONES.  */
+  for (i = 0; i != n; i++)
+    {
+      if ((mask & HOST_WIDE_INT_1U << i) && has_zero)
+	break;
+
+      /* Record first zero bit.  */
+      if (!(mask & HOST_WIDE_INT_1U << i) && !has_zero)
+	{
+	  has_zero = true;
+	  ones = i;
+	}
+    }
+
+  if (!has_zero)
+    ones = n;
+
+  if (i != n || (ones != 0 && ones != n))
+    {
+      rtx reg = gen_reg_rtx (<avx512fmaskmode>mode);
+      emit_move_insn (reg, operands[3]);
+      enum insn_code icode;
+      if (i == n)
+      /* For masks with all one bits in it's lower part,
+	 we can transform v{,p}expand* to vmovdq* with
+	 mask operand.  */
+	icode = CODE_FOR_<avx512>_load<mode>_mask;
+      else
+	icode = CODE_FOR_expand<mode>_mask;
+      emit_insn (GEN_FCN (icode) (operands[0], operands[1], operands[2], reg));
+    }
+  else
+    /* For ALL_MASK_ONES or CONST0_RTX mask, transform it to simple mov.  */
+    emit_move_insn (operands[0], ones ? operands[1] : operands[2]);
+  DONE;
+})
+
 (define_expand "expand<mode>_maskz"
   [(set (match_operand:VI12_AVX512VLBW 0 "register_operand")
 	(unspec:VI12_AVX512VLBW
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr100267-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr100267-1.c
new file mode 100644
index 00000000000..ce83d63bc73
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr100267-1.c
@@ -0,0 +1,38 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vbmi2 -O2" } */
+/* { dg-final { scan-assembler-times "vpexpandb\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vpexpandb\[ \\t\]+\[^\{\n\(]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vpexpandw\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vpexpandw\[ \\t\]+\[^\{\n\(]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+
+/* { dg-final { scan-assembler-times "vmov\[a-z0-9\]*\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  2 } } */
+/* { dg-final { scan-assembler-times "vmov\[a-z0-9\]*\[ \\t\]+\[^\{\n\(]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  2 } } */
+#include <immintrin.h>
+
+char *pi8;
+short *pi16;
+volatile __m512i xi16, xi8;
+
+void extern
+avx512f_test (void)
+{
+  xi8 = _mm512_mask_expand_epi8 (xi8, 0, xi8);
+  xi8 = _mm512_mask_expand_epi8 (xi8, -1, xi8);
+  xi8 = _mm512_mask_expand_epi8 (xi8, (1 << 8) - 1, xi8);
+  xi8 = _mm512_mask_expand_epi8 (xi8, (1 << 8) + 1, xi8);
+
+  xi8 = _mm512_mask_expandloadu_epi8 (xi8, 0, pi8);
+  xi8 = _mm512_mask_expandloadu_epi8 (xi8, -1, pi8);
+  xi8 = _mm512_mask_expandloadu_epi8 (xi8, (1 << 6) - 1, pi8);
+  xi8 = _mm512_mask_expandloadu_epi8 (xi8, (1 << 6) + 3, pi8);
+
+  xi16 = _mm512_mask_expand_epi16 (xi16, 0, xi16);
+  xi16 = _mm512_mask_expand_epi16 (xi16, -1, xi16);
+  xi16 = _mm512_mask_expand_epi16 (xi16, (1 << 3) - 1, xi16);
+  xi16 = _mm512_mask_expand_epi16 (xi16, (1 << 3) + 2, xi16);
+
+  xi16 = _mm512_mask_expandloadu_epi16 (xi16, 0, pi16);
+  xi16 = _mm512_mask_expandloadu_epi16 (xi16,  -1, pi16);
+  xi16 = _mm512_mask_expandloadu_epi16 (xi16, (1 << 7) - 1, pi16);
+  xi16 = _mm512_mask_expandloadu_epi16 (xi16, (1 << 7) + 7, pi16);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr100267-b-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr100267-b-2.c
new file mode 100644
index 00000000000..424b485a203
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr100267-b-2.c
@@ -0,0 +1,74 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -mavx512vbmi2 -mavx512bw" } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-require-effective-target avx512vbmi2 } */
+
+#define AVX512F
+
+#define AVX512VBMI2
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 8)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (char *s, char *r, MASK_TYPE mask)
+{
+  int i, k;
+
+  for (i = 0, k = 0; i < SIZE; i++)
+    {
+      if (mask & ((long long)1 << i))
+	r[i] = s[k++];
+    }
+}
+
+void
+TEST (void)
+{
+  UNION_TYPE (AVX512F_LEN, i_b) s1, res1, res2, res3, res4, res5, res6, res7, res8;
+  MASK_TYPE mask = (1 << (SIZE >> 2) - 1) - 1;
+  char s2[SIZE];
+  char res_ref1[SIZE];
+  char res_ref2[SIZE];
+  int i, sign = 1;
+
+  for (i = 0; i < SIZE; i++)
+    {
+      s1.a[i] = 12345 * (i + 200) * sign;
+      s2[i] = 67890 * (i + 300) * sign;
+      res1.a[i] = DEFAULT_VALUE;
+      res5.a[i] = DEFAULT_VALUE;
+      sign = -sign;
+    }
+
+  res2.x = INTRINSIC (_mask_expand_epi8) (res1.x, MASK_ALL_ONES, s1.x);
+  res3.x = INTRINSIC (_mask_expand_epi8) (res1.x, 0, s1.x);
+  res4.x = INTRINSIC (_mask_expand_epi8) (res1.x, mask, s1.x);
+  res6.x = INTRINSIC (_mask_expandloadu_epi8) (res5.x, MASK_ALL_ONES, s2);
+  res7.x = INTRINSIC (_mask_expandloadu_epi8) (res5.x, 0, s2);
+  res8.x = INTRINSIC (_mask_expandloadu_epi8) (res5.x, mask, s2);
+
+  CALC (s1.a, res_ref1, mask);
+  CALC (s2, res_ref2, mask);
+
+  if (UNION_CHECK (AVX512F_LEN, i_b) (res2, s1.a))
+    abort ();
+
+  if (UNION_CHECK (AVX512F_LEN, i_b) (res3, res1.a))
+    abort ();
+
+  MASK_MERGE (i_b) (res_ref1, mask, SIZE);
+  if (UNION_CHECK (AVX512F_LEN, i_b) (res4, res_ref1))
+    abort ();
+
+  if (UNION_CHECK (AVX512F_LEN, i_b) (res6, s2))
+    abort ();
+
+  if (UNION_CHECK (AVX512F_LEN, i_b) (res7, res5.a))
+    abort ();
+
+  MASK_MERGE (i_b) (res_ref2, mask, SIZE);
+  if (UNION_CHECK (AVX512F_LEN, i_b) (res8, res_ref2))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr100267-d-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr100267-d-2.c
new file mode 100644
index 00000000000..24790b20cf1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr100267-d-2.c
@@ -0,0 +1,74 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -mavx512vbmi2 -mavx512bw" } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-require-effective-target avx512vbmi2 } */
+
+#define AVX512F
+
+#define AVX512VBMI2
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (int *s, int *r, MASK_TYPE mask)
+{
+  int i, k;
+
+  for (i = 0, k = 0; i < SIZE; i++)
+    {
+      if (mask & ((long long)1 << i))
+	r[i] = s[k++];
+    }
+}
+
+void
+TEST (void)
+{
+  UNION_TYPE (AVX512F_LEN, i_d) s1, res1, res2, res3, res4, res5, res6, res7, res8;
+  MASK_TYPE mask = (1 << SIZE - 1) - 1;
+  int s2[SIZE];
+  int res_ref1[SIZE];
+  int res_ref2[SIZE];
+  int i, sign = 1;
+
+  for (i = 0; i < SIZE; i++)
+    {
+      s1.a[i] = 12345 * (i + 200) * sign;
+      s2[i] = 67890 * (i + 300) * sign;
+      res1.a[i] = DEFAULT_VALUE;
+      res5.a[i] = DEFAULT_VALUE;
+      sign = -sign;
+    }
+
+  res2.x = INTRINSIC (_mask_expand_epi32) (res1.x, MASK_ALL_ONES, s1.x);
+  res3.x = INTRINSIC (_mask_expand_epi32) (res1.x, 0, s1.x);
+  res4.x = INTRINSIC (_mask_expand_epi32) (res1.x, mask, s1.x);
+  res6.x = INTRINSIC (_mask_expandloadu_epi32) (res5.x, MASK_ALL_ONES, s2);
+  res7.x = INTRINSIC (_mask_expandloadu_epi32) (res5.x, 0, s2);
+  res8.x = INTRINSIC (_mask_expandloadu_epi32) (res5.x, mask, s2);
+
+  CALC (s1.a, res_ref1, mask);
+  CALC (s2, res_ref2, mask);
+
+  if (UNION_CHECK (AVX512F_LEN, i_d) (res2, s1.a))
+    abort ();
+
+  if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res1.a))
+    abort ();
+
+  MASK_MERGE (i_d) (res_ref1, mask, SIZE);
+  if (UNION_CHECK (AVX512F_LEN, i_d) (res4, res_ref1))
+    abort ();
+
+  if (UNION_CHECK (AVX512F_LEN, i_d) (res6, s2))
+    abort ();
+
+  if (UNION_CHECK (AVX512F_LEN, i_d) (res7, res5.a))
+    abort ();
+
+  MASK_MERGE (i_d) (res_ref2, mask, SIZE);
+  if (UNION_CHECK (AVX512F_LEN, i_d) (res8, res_ref2))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr100267-q-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr100267-q-2.c
new file mode 100644
index 00000000000..119b50e6f79
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr100267-q-2.c
@@ -0,0 +1,74 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -mavx512vbmi2 -mavx512bw" } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-require-effective-target avx512vbmi2 } */
+
+#define AVX512F
+
+#define AVX512VBMI2
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (long long *s, long long *r, MASK_TYPE mask)
+{
+  int i, k;
+
+  for (i = 0, k = 0; i < SIZE; i++)
+    {
+      if (mask & ((long long)1 << i))
+	r[i] = s[k++];
+    }
+}
+
+void
+TEST (void)
+{
+  UNION_TYPE (AVX512F_LEN, i_q) s1, res1, res2, res3, res4, res5, res6, res7, res8;
+  MASK_TYPE mask = (1 << SIZE - 1) - 1;
+  long long s2[SIZE];
+  long long res_ref1[SIZE];
+  long long res_ref2[SIZE];
+  int i, sign = 1;
+
+  for (i = 0; i < SIZE; i++)
+    {
+      s1.a[i] = 12345 * (i + 200) * sign;
+      s2[i] = 67890 * (i + 300) * sign;
+      res1.a[i] = DEFAULT_VALUE;
+      res5.a[i] = DEFAULT_VALUE;
+      sign = -sign;
+    }
+
+  res2.x = INTRINSIC (_mask_expand_epi64) (res1.x, MASK_ALL_ONES, s1.x);
+  res3.x = INTRINSIC (_mask_expand_epi64) (res1.x, 0, s1.x);
+  res4.x = INTRINSIC (_mask_expand_epi64) (res1.x, mask, s1.x);
+  res6.x = INTRINSIC (_mask_expandloadu_epi64) (res5.x, MASK_ALL_ONES, s2);
+  res7.x = INTRINSIC (_mask_expandloadu_epi64) (res5.x, 0, s2);
+  res8.x = INTRINSIC (_mask_expandloadu_epi64) (res5.x, mask, s2);
+
+  CALC (s1.a, res_ref1, mask);
+  CALC (s2, res_ref2, mask);
+
+  if (UNION_CHECK (AVX512F_LEN, i_q) (res2, s1.a))
+    abort ();
+
+  if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res1.a))
+    abort ();
+
+  MASK_MERGE (i_q) (res_ref1, mask, SIZE);
+  if (UNION_CHECK (AVX512F_LEN, i_q) (res4, res_ref1))
+    abort ();
+
+  if (UNION_CHECK (AVX512F_LEN, i_q) (res6, s2))
+    abort ();
+
+  if (UNION_CHECK (AVX512F_LEN, i_q) (res7, res5.a))
+    abort ();
+
+  MASK_MERGE (i_q) (res_ref2, mask, SIZE);
+  if (UNION_CHECK (AVX512F_LEN, i_q) (res8, res_ref2))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr100267-w-2.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr100267-w-2.c
new file mode 100644
index 00000000000..926e04d4df6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr100267-w-2.c
@@ -0,0 +1,74 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f -mavx512vbmi2 -mavx512bw" } */
+/* { dg-require-effective-target avx512f } */
+/* { dg-require-effective-target avx512vbmi2 } */
+
+#define AVX512F
+
+#define AVX512VBMI2
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (short *s, short *r, MASK_TYPE mask)
+{
+  int i, k;
+
+  for (i = 0, k = 0; i < SIZE; i++)
+    {
+      if (mask & ((long long)1 << i))
+	r[i] = s[k++];
+    }
+}
+
+void
+TEST (void)
+{
+  UNION_TYPE (AVX512F_LEN, i_w) s1, res1, res2, res3, res4, res5, res6, res7, res8;
+  MASK_TYPE mask = (1 << (SIZE >> 2) - 1) - 1;
+  short s2[SIZE];
+  short res_ref1[SIZE];
+  short res_ref2[SIZE];
+  int i, sign = 1;
+
+  for (i = 0; i < SIZE; i++)
+    {
+      s1.a[i] = 12345 * (i + 200) * sign;
+      s2[i] = 67890 * (i + 300) * sign;
+      res1.a[i] = DEFAULT_VALUE;
+      res5.a[i] = DEFAULT_VALUE;
+      sign = -sign;
+    }
+
+  res2.x = INTRINSIC (_mask_expand_epi16) (res1.x, MASK_ALL_ONES, s1.x);
+  res3.x = INTRINSIC (_mask_expand_epi16) (res1.x, 0, s1.x);
+  res4.x = INTRINSIC (_mask_expand_epi16) (res1.x, mask, s1.x);
+  res6.x = INTRINSIC (_mask_expandloadu_epi16) (res5.x, MASK_ALL_ONES, s2);
+  res7.x = INTRINSIC (_mask_expandloadu_epi16) (res5.x, 0, s2);
+  res8.x = INTRINSIC (_mask_expandloadu_epi16) (res5.x, mask, s2);
+
+  CALC (s1.a, res_ref1, mask);
+  CALC (s2, res_ref2, mask);
+
+  if (UNION_CHECK (AVX512F_LEN, i_w) (res2, s1.a))
+    abort ();
+
+  if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res1.a))
+    abort ();
+
+  MASK_MERGE (i_w) (res_ref1, mask, SIZE);
+  if (UNION_CHECK (AVX512F_LEN, i_w) (res4, res_ref1))
+    abort ();
+
+  if (UNION_CHECK (AVX512F_LEN, i_w) (res6, s2))
+    abort ();
+
+  if (UNION_CHECK (AVX512F_LEN, i_w) (res7, res5.a))
+    abort ();
+
+  MASK_MERGE (i_w) (res_ref2, mask, SIZE);
+  if (UNION_CHECK (AVX512F_LEN, i_w) (res8, res_ref2))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr100267-1.c b/gcc/testsuite/gcc.target/i386/avx512f-pr100267-1.c
new file mode 100644
index 00000000000..5eb6de531c2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-pr100267-1.c
@@ -0,0 +1,66 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -O2" } */
+/* { dg-final { scan-assembler-times "vpexpandd\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vpexpandd\[ \\t\]+\[^\{\n\(]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vpexpandq\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vpexpandq\[ \\t\]+\[^\{\n\(]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vexpandpd\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vexpandpd\[ \\t\]+\[^\{\n\(]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vexpandps\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vexpandps\[ \\t\]+\[^\{\n\(]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+
+/* { dg-final { scan-assembler-times "vmov\[a-z0-9\]*\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  4 } } */
+/* { dg-final { scan-assembler-times "vmov\[a-z0-9\]*\[ \\t\]+\[^\{\n\(]*%zmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  4 } } */
+#include <immintrin.h>
+
+int *pi32;
+long long *pi64;
+double *pd;
+float *pf;
+volatile __m512i xi32, xi64;
+volatile __m512d xd;
+volatile __m512 xf;
+
+void extern
+avx512f_test (void)
+{
+  xi32 = _mm512_mask_expand_epi32 (xi32, 0, xi32);
+  xi32 = _mm512_mask_expand_epi32 (xi32, -1, xi32);
+  xi32 = _mm512_mask_expand_epi32 (xi32, (1 << 8) - 1, xi32);
+  xi32 = _mm512_mask_expand_epi32 (xi32, (1 << 8) + 1, xi32);
+
+  xi32 = _mm512_mask_expandloadu_epi32 (xi32, 0, pi32);
+  xi32 = _mm512_mask_expandloadu_epi32 (xi32, (1 << 16) - 1, pi32);
+  xi32 = _mm512_mask_expandloadu_epi32 (xi32, (1 << 6) - 1, pi32);
+  xi32 = _mm512_mask_expandloadu_epi32 (xi32, (1 << 6) + 3, pi32);
+
+  xi64 = _mm512_mask_expand_epi64 (xi64, 0, xi64);
+  xi64 = _mm512_mask_expand_epi64 (xi64, -1, xi64);
+  xi64 = _mm512_mask_expand_epi64 (xi64, (1 << 3) - 1, xi64);
+  xi64 = _mm512_mask_expand_epi64 (xi64, (1 << 3) + 2, xi64);
+
+  xi64 = _mm512_mask_expandloadu_epi64 (xi64, 0, pi64);
+  xi64 = _mm512_mask_expandloadu_epi64 (xi64, (1 << 8) - 1, pi64);
+  xi64 = _mm512_mask_expandloadu_epi64 (xi64, (1 << 7) - 1, pi64);
+  xi64 = _mm512_mask_expandloadu_epi64 (xi64, (1 << 7) + 7, pi64);
+
+  xf = _mm512_mask_expand_ps (xf, 0, xf);
+  xf = _mm512_mask_expand_ps (xf, (1 << 16) - 1, xf);
+  xf = _mm512_mask_expand_ps (xf, (1 << 15) - 1, xf);
+  xf = _mm512_mask_expand_ps (xf, (1 << 14) + 3, xf);
+
+  xf = _mm512_mask_expandloadu_ps (xf, 0, pf);
+  xf = _mm512_mask_expandloadu_ps (xf, -1, pf);
+  xf = _mm512_mask_expandloadu_ps (xf, (1 << 13) - 1, pf);
+  xf = _mm512_mask_expandloadu_ps (xf, (1 << 13) + 5, pf);
+
+  xd = _mm512_mask_expand_pd (xd, 0, xd);
+  xd = _mm512_mask_expand_pd (xd, (1 << 8) - 1, xd);
+  xd = _mm512_mask_expand_pd (xd, (1 << 4) - 1, xd);
+  xd = _mm512_mask_expand_pd (xd, (1 << 4) + 1, xd);
+
+  xd = _mm512_mask_expandloadu_pd (xd, 0, pd);
+  xd = _mm512_mask_expandloadu_pd (xd, -1, pd);
+  xd = _mm512_mask_expandloadu_pd (xd, (1 << 5) - 1, pd);
+  xd = _mm512_mask_expandloadu_pd (xd, (1 << 5), pd);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr100267-pd-2.c b/gcc/testsuite/gcc.target/i386/avx512f-pr100267-pd-2.c
new file mode 100644
index 00000000000..7d653e8e853
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-pr100267-pd-2.c
@@ -0,0 +1,76 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#define AVX512F
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (double *s, double *r, MASK_TYPE mask)
+{
+  int i, k;
+
+  for (i = 0, k = 0; i < SIZE; i++)
+    {
+      if (mask & (1 << i))
+	r[i] = s[k++];
+    }
+}
+
+void
+TEST (void)
+{
+  UNION_TYPE (AVX512F_LEN, d) s1, res2, res3, res4, res5, res6, res7, res8, res9;
+  MASK_TYPE mask = (1 << SIZE - 1) - 1;
+  double s2[SIZE];
+  double res_ref1[SIZE];
+  double res_ref2[SIZE];
+  int i, sign = 1;
+
+  for (i = 0; i < SIZE; i++)
+    {
+      s1.a[i] = 123.456 * (i + 200) * sign;
+      s2[i] = 789.012 * (i + 300) * sign;
+      res2.a[i] = DEFAULT_VALUE;
+      res6.a[i] = DEFAULT_VALUE;
+      sign = -sign;
+    }
+
+  res3.x = INTRINSIC (_mask_expand_pd) (res2.x, MASK_ALL_ONES, s1.x);
+  res4.x = INTRINSIC (_mask_expand_pd) (res2.x, 0, s1.x);
+  res5.x = INTRINSIC (_mask_expand_pd) (res2.x, mask, s1.x);
+
+  res7.x = INTRINSIC (_mask_expandloadu_pd) (res6.x, MASK_ALL_ONES, s2);
+  res8.x = INTRINSIC (_mask_expandloadu_pd) (res6.x, 0, s2);
+  res9.x = INTRINSIC (_mask_expandloadu_pd) (res6.x, mask, s2);
+
+
+  /* no mask is the same as all ones mask.  */
+  CALC (s1.a, res_ref1, mask);
+  CALC (s2, res_ref2, mask);
+
+  if (UNION_CHECK (AVX512F_LEN, d) (res3, s1.a))
+    abort ();
+
+  if (UNION_CHECK (AVX512F_LEN, d) (res4, res2.a))
+    abort ();
+
+  MASK_MERGE (d) (res_ref1, mask, SIZE);
+  if (UNION_CHECK (AVX512F_LEN, d) (res5, res_ref1))
+    abort ();
+
+  if (UNION_CHECK (AVX512F_LEN, d) (res7, s2))
+    abort ();
+
+  if (UNION_CHECK (AVX512F_LEN, d) (res8, res6.a))
+    abort ();
+
+  MASK_MERGE (d) (res_ref2, mask, SIZE);
+  if (UNION_CHECK (AVX512F_LEN, d) (res9, res_ref2))
+    abort ();
+
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512f-pr100267-ps-2.c b/gcc/testsuite/gcc.target/i386/avx512f-pr100267-ps-2.c
new file mode 100644
index 00000000000..40f0dde0c88
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-pr100267-ps-2.c
@@ -0,0 +1,72 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512f" } */
+/* { dg-require-effective-target avx512f } */
+
+#define AVX512F
+
+#include "avx512f-helper.h"
+
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
+
+static void
+CALC (float *s, float *r, MASK_TYPE mask)
+{
+  int i, k;
+
+  for (i = 0, k = 0; i < SIZE; i++)
+    {
+      if (mask & (1 << i))
+	r[i] = s[k++];
+    }
+}
+
+void
+TEST (void)
+{
+  UNION_TYPE (AVX512F_LEN, ) s1, res2, res3, res4, res5, res6, res7, res8, res9;
+  MASK_TYPE mask = (1 << SIZE - 1) - 1;
+  float s2[SIZE];
+  float res_ref1[SIZE];
+  float res_ref2[SIZE];
+  int i, sign = 1;
+
+  for (i = 0; i < SIZE; i++)
+    {
+      s1.a[i] = 123.456 * (i + 200) * sign;
+      s2[i] = 789.012 * (i + 300) * sign;
+      res2.a[i] = DEFAULT_VALUE;
+      res6.a[i] = DEFAULT_VALUE;
+      sign = -sign;
+    }
+
+  res3.x = INTRINSIC (_mask_expand_ps) (res2.x, MASK_ALL_ONES, s1.x);
+  res4.x = INTRINSIC (_mask_expand_ps) (res2.x, 0, s1.x);
+  res5.x = INTRINSIC (_mask_expand_ps) (res2.x, mask, s1.x);
+  res7.x = INTRINSIC (_mask_expandloadu_ps) (res6.x, MASK_ALL_ONES, s2);
+  res8.x = INTRINSIC (_mask_expandloadu_ps) (res6.x, 0, s2);
+  res9.x = INTRINSIC (_mask_expandloadu_ps) (res6.x, mask, s2);
+
+  CALC (s1.a, res_ref1, mask);
+  CALC (s2, res_ref2, mask);
+
+  if (UNION_CHECK (AVX512F_LEN, ) (res3, s1.a))
+    abort ();
+
+  if (UNION_CHECK (AVX512F_LEN, ) (res4, res2.a))
+    abort ();
+
+  MASK_MERGE () (res_ref1, mask, SIZE);
+  if (UNION_CHECK (AVX512F_LEN, ) (res5, res_ref1))
+    abort ();
+
+  if (UNION_CHECK (AVX512F_LEN, ) (res7, s2))
+    abort ();
+
+  if (UNION_CHECK (AVX512F_LEN, ) (res8, res6.a))
+    abort ();
+
+  MASK_MERGE () (res_ref2, mask, SIZE);
+  if (UNION_CHECK (AVX512F_LEN, ) (res9, res_ref2))
+    abort ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr100267-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr100267-1.c
new file mode 100644
index 00000000000..9ffd381f6c4
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr100267-1.c
@@ -0,0 +1,122 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpexpandd\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vpexpandd\[ \\t\]+\[^\{\n\(]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vpexpandq\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vpexpandq\[ \\t\]+\[^\{\n\(]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vexpandpd\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vexpandpd\[ \\t\]+\[^\{\n\(]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vexpandps\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vexpandps\[ \\t\]+\[^\{\n\(]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+
+/* { dg-final { scan-assembler-times "vmov\[a-z0-9\]*\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  4 } } */
+/* { dg-final { scan-assembler-times "(?:vmov\[a-z0-9\]*\[ \\t\]+\[^\{\n\(]*%ymm\[0-9\]+\{%k\[1-7\]\}|blend\[a-z]+\[ \\t\]+\[^\{\n\(]*%ymm\[0-9\]+)(?:\n|\[ \\t\]+#)"  4 } } */
+/* { dg-final { scan-assembler-times "vpexpandd\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vpexpandd\[ \\t\]+\[^\{\n\(]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vpexpandq\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vpexpandq\[ \\t\]+\[^\{\n\(]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vexpandpd\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vexpandpd\[ \\t\]+\[^\{\n\(]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vexpandps\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vexpandps\[ \\t\]+\[^\{\n\(]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+
+/* { dg-final { scan-assembler-times "vmov\[a-z0-9\]*\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  4 } } */
+/* { dg-final { scan-assembler-times "(?:vmov\[a-z0-9\]*\[ \\t\]+\[^\{\n\(]*%xmm\[0-9\]+\{%k\[1-7\]\}|(?:blend\[a-z]+|movsd)\[ \\t\]+\[^\{\n\(]*%xmm\[0-9\]+)(?:\n|\[ \\t\]+#)"  4 } } */
+
+#include <immintrin.h>
+
+int *pi32;
+long long *pi64;
+double *pd;
+float *pf;
+volatile __m256i xi32, xi64;
+volatile __m256d xd;
+volatile __m256 xf;
+
+volatile __m128i xi32_128, xi64_128;
+volatile __m128d xd_128;
+volatile __m128 xf_128;
+
+void extern
+avx512vl_test (void)
+{
+  xi32 = _mm256_mask_expand_epi32 (xi32, 0, xi32);
+  xi32 = _mm256_mask_expand_epi32 (xi32, -1, xi32);
+  xi32 = _mm256_mask_expand_epi32 (xi32, (1 << 4) - 1, xi32);
+  xi32 = _mm256_mask_expand_epi32 (xi32, (1 << 4) + 1, xi32);
+
+  xi32 = _mm256_mask_expandloadu_epi32 (xi32, 0, pi32);
+  xi32 = _mm256_mask_expandloadu_epi32 (xi32, (1 << 8) - 1, pi32);
+  xi32 = _mm256_mask_expandloadu_epi32 (xi32, (1 << 6) - 1, pi32);
+  xi32 = _mm256_mask_expandloadu_epi32 (xi32, (1 << 6) + 3, pi32);
+
+  xi64 = _mm256_mask_expand_epi64 (xi64, 0, xi64);
+  xi64 = _mm256_mask_expand_epi64 (xi64, -1, xi64);
+  xi64 = _mm256_mask_expand_epi64 (xi64, (1 << 3) - 1, xi64);
+  xi64 = _mm256_mask_expand_epi64 (xi64, (1 << 3) + 2, xi64);
+
+  xi64 = _mm256_mask_expandloadu_epi64 (xi64, 0, pi64);
+  xi64 = _mm256_mask_expandloadu_epi64 (xi64, (1 << 4) - 1, pi64);
+  xi64 = _mm256_mask_expandloadu_epi64 (xi64, (1 << 2) - 1, pi64);
+  xi64 = _mm256_mask_expandloadu_epi64 (xi64, (1 << 2), pi64);
+
+  xf = _mm256_mask_expand_ps (xf, 0, xf);
+  xf = _mm256_mask_expand_ps (xf, (1 << 8) - 1, xf);
+  xf = _mm256_mask_expand_ps (xf, (1 << 6) - 1, xf);
+  xf = _mm256_mask_expand_ps (xf, (1 << 6) + 3, xf);
+
+  xf = _mm256_mask_expandloadu_ps (xf, 0, pf);
+  xf = _mm256_mask_expandloadu_ps (xf, -1, pf);
+  xf = _mm256_mask_expandloadu_ps (xf, (1 << 7) - 1, pf);
+  xf = _mm256_mask_expandloadu_ps (xf, (1 << 7) + 5, pf);
+
+  xd = _mm256_mask_expand_pd (xd, 0, xd);
+  xd = _mm256_mask_expand_pd (xd, (1 << 4) - 1, xd);
+  xd = _mm256_mask_expand_pd (xd, (1 << 2) - 1, xd);
+  xd = _mm256_mask_expand_pd (xd, (1 << 2), xd);
+
+  xd = _mm256_mask_expandloadu_pd (xd, 0, pd);
+  xd = _mm256_mask_expandloadu_pd (xd, -1, pd);
+  xd = _mm256_mask_expandloadu_pd (xd, (1 << 2) - 1, pd);
+  xd = _mm256_mask_expandloadu_pd (xd, (1 << 2), pd);
+
+  xi32_128 = _mm_mask_expand_epi32 (xi32_128, 0, xi32_128);
+  xi32_128 = _mm_mask_expand_epi32 (xi32_128, -1, xi32_128);
+  xi32_128 = _mm_mask_expand_epi32 (xi32_128, (1 << 3) - 1, xi32_128);
+  xi32_128 = _mm_mask_expand_epi32 (xi32_128, (1 << 3) + 1, xi32_128);
+
+  xi32_128 = _mm_mask_expandloadu_epi32 (xi32_128, 0, pi32);
+  xi32_128 = _mm_mask_expandloadu_epi32 (xi32_128, (1 << 4) - 1, pi32);
+  xi32_128 = _mm_mask_expandloadu_epi32 (xi32_128, (1 << 2) - 1, pi32);
+  xi32_128 = _mm_mask_expandloadu_epi32 (xi32_128, (1 << 1) + 3, pi32);
+
+  xi64_128 = _mm_mask_expand_epi64 (xi64_128, 0, xi64_128);
+  xi64_128 = _mm_mask_expand_epi64 (xi64_128, -1, xi64_128);
+  xi64_128 = _mm_mask_expand_epi64 (xi64_128, (1 << 1) - 1, xi64_128);
+  xi64_128 = _mm_mask_expand_epi64 (xi64_128, 2, xi64_128);
+
+  xi64_128 = _mm_mask_expandloadu_epi64 (xi64_128, 0, pi64);
+  xi64_128 = _mm_mask_expandloadu_epi64 (xi64_128, 3, pi64);
+  xi64_128 = _mm_mask_expandloadu_epi64 (xi64_128, 1, pi64);
+  xi64_128 = _mm_mask_expandloadu_epi64 (xi64_128, 2, pi64);
+
+  xf_128 = _mm_mask_expand_ps (xf_128, 0, xf_128);
+  xf_128 = _mm_mask_expand_ps (xf_128, (1 << 4) - 1, xf_128);
+  xf_128 = _mm_mask_expand_ps (xf_128, (1 << 3) - 1, xf_128);
+  xf_128 = _mm_mask_expand_ps (xf_128, (1 << 2), xf_128);
+
+  xf_128 = _mm_mask_expandloadu_ps (xf_128, 0, pf);
+  xf_128 = _mm_mask_expandloadu_ps (xf_128, -1, pf);
+  xf_128 = _mm_mask_expandloadu_ps (xf_128, (1 << 3) - 1, pf);
+  xf_128 = _mm_mask_expandloadu_ps (xf_128, (1 << 1), pf);
+
+  xd_128 = _mm_mask_expand_pd (xd_128, 0, xd_128);
+  xd_128 = _mm_mask_expand_pd (xd_128, (1 << 2) - 1, xd_128);
+  xd_128 = _mm_mask_expand_pd (xd_128, 1, xd_128);
+  xd_128 = _mm_mask_expand_pd (xd_128, 2, xd_128);
+
+  xd_128 = _mm_mask_expandloadu_pd (xd_128, 0, pd);
+  xd_128 = _mm_mask_expandloadu_pd (xd_128, -1, pd);
+  xd_128 = _mm_mask_expandloadu_pd (xd_128, 1, pd);
+  xd_128 = _mm_mask_expandloadu_pd (xd_128, 2, pd);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr100267-pd-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr100267-pd-2.c
new file mode 100644
index 00000000000..e5f30374b51
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr100267-pd-2.c
@@ -0,0 +1,15 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-pr100267-pd-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-pr100267-pd-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-pr100267-ps-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-pr100267-ps-2.c
new file mode 100644
index 00000000000..0149a8aaa55
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vl-pr100267-ps-2.c
@@ -0,0 +1,15 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512f-pr100267-ps-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512f-pr100267-ps-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vlbw-pr100267-1.c b/gcc/testsuite/gcc.target/i386/avx512vlbw-pr100267-1.c
new file mode 100644
index 00000000000..135dbd7577e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vlbw-pr100267-1.c
@@ -0,0 +1,66 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512bw -mavx512vbmi2 -mavx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpexpandb\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vpexpandb\[ \\t\]+\[^\{\n\(]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vpexpandw\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vpexpandw\[ \\t\]+\[^\{\n\(]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+
+/* { dg-final { scan-assembler-times "vmov\[a-z0-9\]*\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  2 } } */
+/* { dg-final { scan-assembler-times "vmov\[a-z0-9\]*\[ \\t\]+\[^\{\n\(]*%ymm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  2 } } */
+/* { dg-final { scan-assembler-times "vpexpandb\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vpexpandb\[ \\t\]+\[^\{\n\(]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vpexpandw\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+/* { dg-final { scan-assembler-times "vpexpandw\[ \\t\]+\[^\{\n\(]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  1 } } */
+
+/* { dg-final { scan-assembler-times "vmov\[a-z0-9\]*\[ \\t\]+\[^\{\n\]*\\(\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}(?:\n|\[ \\t\]+#)"  2 } } */
+/* { dg-final { scan-assembler-times "(?:vmov\[a-z0-9\]*\[ \\t\]+\[^\{\n\(]*%xmm\[0-9\]+\{%k\[1-7\]\}|blend\[a-z]*\[ \\t\]+\[^\{\n\(]*%xmm\[0-9\]+)(?:\n|\[ \\t\]+#)"  2 } } */
+#include <immintrin.h>
+
+char *pi8;
+short *pi16;
+volatile __m256i xi16, xi8;
+volatile __m128i xi16_xmm, xi8_xmm;
+
+void extern
+avx512f_test (void)
+{
+  xi8 = _mm256_mask_expand_epi8 (xi8, 0, xi8);
+  xi8 = _mm256_mask_expand_epi8 (xi8, -1, xi8);
+  xi8 = _mm256_mask_expand_epi8 (xi8, (1 << 30) - 1, xi8);
+  xi8 = _mm256_mask_expand_epi8 (xi8, (1 << 16) + 1, xi8);
+
+  xi8 = _mm256_mask_expandloadu_epi8 (xi8, 0, pi8);
+  xi8 = _mm256_mask_expandloadu_epi8 (xi8, -1, pi8);
+  xi8 = _mm256_mask_expandloadu_epi8 (xi8, (1 << 28) - 1, pi8);
+  xi8 = _mm256_mask_expandloadu_epi8 (xi8, (1 << 15) + 3, pi8);
+
+  xi16 = _mm256_mask_expand_epi16 (xi16, 0, xi16);
+  xi16 = _mm256_mask_expand_epi16 (xi16, -1, xi16);
+  xi16 = _mm256_mask_expand_epi16 (xi16, (1 << 15) - 1, xi16);
+  xi16 = _mm256_mask_expand_epi16 (xi16, (1 << 14) + 2, xi16);
+
+  xi16 = _mm256_mask_expandloadu_epi16 (xi16, 0, pi16);
+  xi16 = _mm256_mask_expandloadu_epi16 (xi16, (1 << 16) - 1, pi16);
+  xi16 = _mm256_mask_expandloadu_epi16 (xi16, (1 << 14) - 1, pi16);
+  xi16 = _mm256_mask_expandloadu_epi16 (xi16, (1 << 13) + 7, pi16);
+
+  xi8_xmm = _mm_mask_expand_epi8 (xi8_xmm, 0, xi8_xmm);
+  xi8_xmm = _mm_mask_expand_epi8 (xi8_xmm, -1, xi8_xmm);
+  xi8_xmm = _mm_mask_expand_epi8 (xi8_xmm, (1 << 13) - 1, xi8_xmm);
+  xi8_xmm = _mm_mask_expand_epi8 (xi8_xmm, (1 << 12) + 1, xi8_xmm);
+
+  xi8_xmm = _mm_mask_expandloadu_epi8 (xi8_xmm, 0, pi8);
+  xi8_xmm = _mm_mask_expandloadu_epi8 (xi8_xmm, (1 << 16) - 1, pi8);
+  xi8_xmm = _mm_mask_expandloadu_epi8 (xi8_xmm, (1 << 12) - 1, pi8);
+  xi8_xmm = _mm_mask_expandloadu_epi8 (xi8_xmm, (1 << 11) + 3, pi8);
+
+  xi16_xmm = _mm_mask_expand_epi16 (xi16_xmm, 0, xi16_xmm);
+  xi16_xmm = _mm_mask_expand_epi16 (xi16_xmm, -1, xi16_xmm);
+  xi16_xmm = _mm_mask_expand_epi16 (xi16_xmm, (1 << 7) - 1, xi16_xmm);
+  xi16_xmm = _mm_mask_expand_epi16 (xi16_xmm, (1 << 4) + 2, xi16_xmm);
+
+  xi16_xmm = _mm_mask_expandloadu_epi16 (xi16_xmm, 0, pi16);
+  xi16_xmm = _mm_mask_expandloadu_epi16 (xi16_xmm, (1 << 8) - 1, pi16);
+  xi16_xmm = _mm_mask_expandloadu_epi16 (xi16_xmm, (1 << 3) - 1, pi16);
+  xi16_xmm = _mm_mask_expandloadu_epi16 (xi16_xmm, (1 << 6) + 7, pi16);
+}
diff --git a/gcc/testsuite/gcc.target/i386/avx512vlbw-pr100267-b-2.c b/gcc/testsuite/gcc.target/i386/avx512vlbw-pr100267-b-2.c
new file mode 100644
index 00000000000..d54e8033a25
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vlbw-pr100267-b-2.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -mavx512vbmi2 -mavx512bw" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512vbmi2 } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-pr100267-b-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-pr100267-b-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vlbw-pr100267-d-2.c b/gcc/testsuite/gcc.target/i386/avx512vlbw-pr100267-d-2.c
new file mode 100644
index 00000000000..1e604cfb1aa
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vlbw-pr100267-d-2.c
@@ -0,0 +1,15 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-pr100267-d-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-pr100267-d-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vlbw-pr100267-q-2.c b/gcc/testsuite/gcc.target/i386/avx512vlbw-pr100267-q-2.c
new file mode 100644
index 00000000000..1f9fe65ad16
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vlbw-pr100267-q-2.c
@@ -0,0 +1,15 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl" } */
+/* { dg-require-effective-target avx512vl } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-pr100267-q-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-pr100267-q-2.c"
diff --git a/gcc/testsuite/gcc.target/i386/avx512vlbw-pr100267-w-2.c b/gcc/testsuite/gcc.target/i386/avx512vlbw-pr100267-w-2.c
new file mode 100644
index 00000000000..a46ca78a621
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512vlbw-pr100267-w-2.c
@@ -0,0 +1,16 @@
+/* { dg-do run } */
+/* { dg-options "-O2 -mavx512vl -mavx512vbmi2 -mavx512bw" } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target avx512vbmi2 } */
+
+#define AVX512VL
+#define AVX512F_LEN 256
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-pr100267-w-2.c"
+
+#undef AVX512F_LEN
+#undef AVX512F_LEN_HALF
+
+#define AVX512F_LEN 128
+#define AVX512F_LEN_HALF 128
+#include "avx512bw-pr100267-w-2.c"


^ permalink raw reply	[flat|nested] only message in thread

only message in thread, other threads:[~2021-06-22  1:35 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-06-22  1:35 [gcc r12-1706] Optimize vpexpand* to mask mov when mask have all ones in it's lower part (including 0 and -1) hongtao Liu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).