public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH v4 0/2] x86: Add general_regs_only function attribute
@ 2021-04-14 22:39 H.J. Lu
  2021-04-14 22:39 ` [PATCH v4 1/2] x86: Move OPTION_MASK_* to i386-common.h H.J. Lu
  2021-04-14 22:39 ` [PATCH v4 2/2] x86: Add general_regs_only function attribute H.J. Lu
  0 siblings, 2 replies; 22+ messages in thread
From: H.J. Lu @ 2021-04-14 22:39 UTC (permalink / raw)
  To: gcc-patches
  Cc: Uros Bizjak, Jakub Jelinek, Bernhard Reutner-Fischer,
	Martin Sebor, Richard Biener

I realized that

commit 87c753ac241f25d222d46ba1ac66ceba89d6a200
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Fri Aug 21 09:42:49 2020 -0700

    x86: Add target("general-regs-only") function attribute

is incomplete since it is impossible to call integer intrinsics from
a function with general-regs-only target attribute.  We need to add a
general_regs_only function attribute to go with it to mark functions
which use only general purpose registers.  When making inlining
decisions on such functions, x86 backend can exclude non-GPR compiler
options.  The general_regs_only attribute should be added to all x86
intrinsics which use only general purpose registers.

H.J. Lu (2):
  x86: Move OPTION_MASK_* to i386-common.h
  x86: Add general_regs_only function attribute

 gcc/common/config/i386/i386-common.c      | 297 ------------------
 gcc/common/config/i386/i386-common.h      | 315 +++++++++++++++++++
 gcc/config/i386/adxintrin.h               |  18 +-
 gcc/config/i386/bmi2intrin.h              |  24 +-
 gcc/config/i386/bmiintrin.h               |  92 ++++--
 gcc/config/i386/cetintrin.h               |  33 +-
 gcc/config/i386/cldemoteintrin.h          |   3 +-
 gcc/config/i386/clflushoptintrin.h        |   3 +-
 gcc/config/i386/clwbintrin.h              |   3 +-
 gcc/config/i386/clzerointrin.h            |   4 +-
 gcc/config/i386/enqcmdintrin.h            |   6 +-
 gcc/config/i386/fxsrintrin.h              |  12 +-
 gcc/config/i386/hresetintrin.h            |   3 +-
 gcc/config/i386/i386-options.c            |   2 +
 gcc/config/i386/i386.c                    |  29 +-
 gcc/config/i386/i386.h                    |   1 +
 gcc/config/i386/ia32intrin.h              |  82 +++--
 gcc/config/i386/lwpintrin.h               |  24 +-
 gcc/config/i386/lzcntintrin.h             |  20 +-
 gcc/config/i386/movdirintrin.h            |   9 +-
 gcc/config/i386/mwaitxintrin.h            |   8 +-
 gcc/config/i386/pconfigintrin.h           |   3 +-
 gcc/config/i386/pkuintrin.h               |   6 +-
 gcc/config/i386/popcntintrin.h            |   8 +-
 gcc/config/i386/rdseedintrin.h            |   9 +-
 gcc/config/i386/rtmintrin.h               |   9 +-
 gcc/config/i386/serializeintrin.h         |   8 +-
 gcc/config/i386/sgxintrin.h               |   9 +-
 gcc/config/i386/tbmintrin.h               |  80 +++--
 gcc/config/i386/tsxldtrkintrin.h          |   6 +-
 gcc/config/i386/uintrintrin.h             |  12 +-
 gcc/config/i386/waitpkgintrin.h           |   9 +-
 gcc/config/i386/wbnoinvdintrin.h          |   3 +-
 gcc/config/i386/x86gprintrin.h            |  45 ++-
 gcc/config/i386/xsavecintrin.h            |   6 +-
 gcc/config/i386/xsaveintrin.h             |  18 +-
 gcc/config/i386/xsaveoptintrin.h          |   6 +-
 gcc/config/i386/xsavesintrin.h            |  12 +-
 gcc/config/i386/xtestintrin.h             |   3 +-
 gcc/doc/extend.texi                       |   5 +
 gcc/testsuite/gcc.target/i386/pr99744-3.c |  13 +
 gcc/testsuite/gcc.target/i386/pr99744-4.c | 352 ++++++++++++++++++++++
 42 files changed, 1134 insertions(+), 476 deletions(-)
 create mode 100644 gcc/common/config/i386/i386-common.h
 create mode 100644 gcc/testsuite/gcc.target/i386/pr99744-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr99744-4.c

-- 
2.30.2


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH v4 1/2] x86: Move OPTION_MASK_* to i386-common.h
  2021-04-14 22:39 [PATCH v4 0/2] x86: Add general_regs_only function attribute H.J. Lu
@ 2021-04-14 22:39 ` H.J. Lu
  2021-04-14 22:39 ` [PATCH v4 2/2] x86: Add general_regs_only function attribute H.J. Lu
  1 sibling, 0 replies; 22+ messages in thread
From: H.J. Lu @ 2021-04-14 22:39 UTC (permalink / raw)
  To: gcc-patches
  Cc: Uros Bizjak, Jakub Jelinek, Bernhard Reutner-Fischer,
	Martin Sebor, Richard Biener

Move OPTION_MASK_* to i386-common.h so that they can be used in x86
backend.

	* common/config/i386/i386-common.c (OPTION_MASK_*): Move to ...
	* common/config/i386/i386-common.h: Here.  New file.
	* config/i386/i386.h: Include common/config/i386/i386-common.h.
---
 gcc/common/config/i386/i386-common.c | 297 -------------------------
 gcc/common/config/i386/i386-common.h | 315 +++++++++++++++++++++++++++
 gcc/config/i386/i386.h               |   1 +
 3 files changed, 316 insertions(+), 297 deletions(-)
 create mode 100644 gcc/common/config/i386/i386-common.h

diff --git a/gcc/common/config/i386/i386-common.c b/gcc/common/config/i386/i386-common.c
index 1e6c1590ac4..37ff47bd676 100644
--- a/gcc/common/config/i386/i386-common.c
+++ b/gcc/common/config/i386/i386-common.c
@@ -29,303 +29,6 @@ along with GCC; see the file COPYING3.  If not see
 #include "opts.h"
 #include "flags.h"
 
-/* Define a set of ISAs which are available when a given ISA is
-   enabled.  MMX and SSE ISAs are handled separately.  */
-
-#define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
-#define OPTION_MASK_ISA_3DNOW_SET \
-  (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
-#define OPTION_MASK_ISA_3DNOW_A_SET \
-  (OPTION_MASK_ISA_3DNOW_A | OPTION_MASK_ISA_3DNOW_SET)
-
-#define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
-#define OPTION_MASK_ISA_SSE2_SET \
-  (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
-#define OPTION_MASK_ISA_SSE3_SET \
-  (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
-#define OPTION_MASK_ISA_SSSE3_SET \
-  (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
-#define OPTION_MASK_ISA_SSE4_1_SET \
-  (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
-#define OPTION_MASK_ISA_SSE4_2_SET \
-  (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
-#define OPTION_MASK_ISA_AVX_SET \
-  (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET \
-   | OPTION_MASK_ISA_XSAVE_SET)
-#define OPTION_MASK_ISA_FMA_SET \
-  (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
-#define OPTION_MASK_ISA_AVX2_SET \
-  (OPTION_MASK_ISA_AVX2 | OPTION_MASK_ISA_AVX_SET)
-#define OPTION_MASK_ISA_FXSR_SET OPTION_MASK_ISA_FXSR
-#define OPTION_MASK_ISA_XSAVE_SET OPTION_MASK_ISA_XSAVE
-#define OPTION_MASK_ISA_XSAVEOPT_SET \
-  (OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_XSAVE_SET)
-#define OPTION_MASK_ISA_AVX512F_SET \
-  (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX2_SET)
-#define OPTION_MASK_ISA_AVX512CD_SET \
-  (OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512F_SET)
-#define OPTION_MASK_ISA_AVX512PF_SET \
-  (OPTION_MASK_ISA_AVX512PF | OPTION_MASK_ISA_AVX512F_SET)
-#define OPTION_MASK_ISA_AVX512ER_SET \
-  (OPTION_MASK_ISA_AVX512ER | OPTION_MASK_ISA_AVX512F_SET)
-#define OPTION_MASK_ISA_AVX512DQ_SET \
-  (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512F_SET)
-#define OPTION_MASK_ISA_AVX512BW_SET \
-  (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512F_SET)
-#define OPTION_MASK_ISA_AVX512VL_SET \
-  (OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512F_SET)
-#define OPTION_MASK_ISA_AVX512IFMA_SET \
-  (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512F_SET)
-#define OPTION_MASK_ISA_AVX512VBMI_SET \
-  (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512BW_SET)
-#define OPTION_MASK_ISA2_AVX5124FMAPS_SET OPTION_MASK_ISA2_AVX5124FMAPS
-#define OPTION_MASK_ISA2_AVX5124VNNIW_SET OPTION_MASK_ISA2_AVX5124VNNIW
-#define OPTION_MASK_ISA_AVX512VBMI2_SET \
-  (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512F_SET)
-#define OPTION_MASK_ISA_AVX512VNNI_SET \
-  (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512F_SET)
-#define OPTION_MASK_ISA2_AVXVNNI_SET OPTION_MASK_ISA2_AVXVNNI
-#define OPTION_MASK_ISA_AVX512VPOPCNTDQ_SET \
-  (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512F_SET)
-#define OPTION_MASK_ISA_AVX512BITALG_SET \
-  (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512F_SET)
-#define OPTION_MASK_ISA2_AVX512BF16_SET OPTION_MASK_ISA2_AVX512BF16
-#define OPTION_MASK_ISA_RTM_SET OPTION_MASK_ISA_RTM
-#define OPTION_MASK_ISA_PRFCHW_SET OPTION_MASK_ISA_PRFCHW
-#define OPTION_MASK_ISA_RDSEED_SET OPTION_MASK_ISA_RDSEED
-#define OPTION_MASK_ISA_ADX_SET OPTION_MASK_ISA_ADX
-#define OPTION_MASK_ISA_PREFETCHWT1_SET OPTION_MASK_ISA_PREFETCHWT1
-#define OPTION_MASK_ISA_CLFLUSHOPT_SET OPTION_MASK_ISA_CLFLUSHOPT
-#define OPTION_MASK_ISA_XSAVES_SET \
-  (OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_XSAVE_SET)
-#define OPTION_MASK_ISA_XSAVEC_SET \
-  (OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_XSAVE_SET)
-#define OPTION_MASK_ISA_CLWB_SET OPTION_MASK_ISA_CLWB
-#define OPTION_MASK_ISA2_AVX512VP2INTERSECT_SET OPTION_MASK_ISA2_AVX512VP2INTERSECT
-#define OPTION_MASK_ISA2_AMX_TILE_SET OPTION_MASK_ISA2_AMX_TILE
-#define OPTION_MASK_ISA2_AMX_INT8_SET OPTION_MASK_ISA2_AMX_INT8
-#define OPTION_MASK_ISA2_AMX_BF16_SET OPTION_MASK_ISA2_AMX_BF16
-
-/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
-   as -msse4.2.  */
-#define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
-
-#define OPTION_MASK_ISA_SSE4A_SET \
-  (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
-#define OPTION_MASK_ISA_FMA4_SET \
-  (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
-   | OPTION_MASK_ISA_AVX_SET)
-#define OPTION_MASK_ISA_XOP_SET \
-  (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
-#define OPTION_MASK_ISA_LWP_SET \
-  OPTION_MASK_ISA_LWP
-
-/* AES, SHA and PCLMUL need SSE2 because they use xmm registers.  */
-#define OPTION_MASK_ISA_AES_SET \
-  (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
-#define OPTION_MASK_ISA_SHA_SET \
-  (OPTION_MASK_ISA_SHA | OPTION_MASK_ISA_SSE2_SET)
-#define OPTION_MASK_ISA_PCLMUL_SET \
-  (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
-
-#define OPTION_MASK_ISA_ABM_SET \
-  (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
-
-#define OPTION_MASK_ISA2_PCONFIG_SET OPTION_MASK_ISA2_PCONFIG
-#define OPTION_MASK_ISA2_WBNOINVD_SET OPTION_MASK_ISA2_WBNOINVD
-#define OPTION_MASK_ISA2_SGX_SET OPTION_MASK_ISA2_SGX
-#define OPTION_MASK_ISA_BMI_SET OPTION_MASK_ISA_BMI
-#define OPTION_MASK_ISA_BMI2_SET OPTION_MASK_ISA_BMI2
-#define OPTION_MASK_ISA_LZCNT_SET OPTION_MASK_ISA_LZCNT
-#define OPTION_MASK_ISA_TBM_SET OPTION_MASK_ISA_TBM
-#define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
-#define OPTION_MASK_ISA2_CX16_SET OPTION_MASK_ISA2_CX16
-#define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
-#define OPTION_MASK_ISA2_MOVBE_SET OPTION_MASK_ISA2_MOVBE
-#define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
-
-#define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
-#define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
-#define OPTION_MASK_ISA2_PTWRITE_SET OPTION_MASK_ISA2_PTWRITE
-#define OPTION_MASK_ISA_F16C_SET \
-  (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
-#define OPTION_MASK_ISA2_MWAITX_SET OPTION_MASK_ISA2_MWAITX
-#define OPTION_MASK_ISA2_CLZERO_SET OPTION_MASK_ISA2_CLZERO
-#define OPTION_MASK_ISA_PKU_SET OPTION_MASK_ISA_PKU
-#define OPTION_MASK_ISA2_RDPID_SET OPTION_MASK_ISA2_RDPID
-#define OPTION_MASK_ISA_GFNI_SET OPTION_MASK_ISA_GFNI
-#define OPTION_MASK_ISA_SHSTK_SET OPTION_MASK_ISA_SHSTK
-#define OPTION_MASK_ISA2_VAES_SET OPTION_MASK_ISA2_VAES
-#define OPTION_MASK_ISA_VPCLMULQDQ_SET OPTION_MASK_ISA_VPCLMULQDQ
-#define OPTION_MASK_ISA_MOVDIRI_SET OPTION_MASK_ISA_MOVDIRI
-#define OPTION_MASK_ISA2_MOVDIR64B_SET OPTION_MASK_ISA2_MOVDIR64B
-#define OPTION_MASK_ISA2_WAITPKG_SET OPTION_MASK_ISA2_WAITPKG
-#define OPTION_MASK_ISA2_CLDEMOTE_SET OPTION_MASK_ISA2_CLDEMOTE
-#define OPTION_MASK_ISA2_ENQCMD_SET OPTION_MASK_ISA2_ENQCMD
-#define OPTION_MASK_ISA2_SERIALIZE_SET OPTION_MASK_ISA2_SERIALIZE
-#define OPTION_MASK_ISA2_TSXLDTRK_SET OPTION_MASK_ISA2_TSXLDTRK
-#define OPTION_MASK_ISA2_UINTR_SET OPTION_MASK_ISA2_UINTR
-#define OPTION_MASK_ISA2_HRESET_SET OPTION_MASK_ISA2_HRESET
-#define OPTION_MASK_ISA2_KL_SET OPTION_MASK_ISA2_KL
-#define OPTION_MASK_ISA2_WIDEKL_SET \
-  (OPTION_MASK_ISA2_WIDEKL | OPTION_MASK_ISA2_KL_SET)
-
-/* Define a set of ISAs which aren't available when a given ISA is
-   disabled.  MMX and SSE ISAs are handled separately.  */
-
-#define OPTION_MASK_ISA_MMX_UNSET \
-  (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
-#define OPTION_MASK_ISA_3DNOW_UNSET \
-  (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
-#define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
-
-#define OPTION_MASK_ISA_SSE_UNSET \
-  (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
-#define OPTION_MASK_ISA_SSE2_UNSET \
-  (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
-#define OPTION_MASK_ISA_SSE3_UNSET \
-  (OPTION_MASK_ISA_SSE3 \
-   | OPTION_MASK_ISA_SSSE3_UNSET \
-   | OPTION_MASK_ISA_SSE4A_UNSET )
-#define OPTION_MASK_ISA_SSSE3_UNSET \
-  (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
-#define OPTION_MASK_ISA_SSE4_1_UNSET \
-  (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
-#define OPTION_MASK_ISA_SSE4_2_UNSET \
-  (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
-#define OPTION_MASK_ISA_AVX_UNSET \
-  (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
-   | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET \
-   | OPTION_MASK_ISA_AVX2_UNSET )
-#define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
-#define OPTION_MASK_ISA_FXSR_UNSET OPTION_MASK_ISA_FXSR
-#define OPTION_MASK_ISA_XSAVE_UNSET \
-  (OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_XSAVEOPT_UNSET \
-   | OPTION_MASK_ISA_XSAVES_UNSET | OPTION_MASK_ISA_XSAVEC_UNSET \
-   | OPTION_MASK_ISA_AVX_UNSET)
-#define OPTION_MASK_ISA2_XSAVE_UNSET OPTION_MASK_ISA2_AMX_TILE_UNSET
-#define OPTION_MASK_ISA_XSAVEOPT_UNSET OPTION_MASK_ISA_XSAVEOPT
-#define OPTION_MASK_ISA_AVX2_UNSET \
-  (OPTION_MASK_ISA_AVX2 | OPTION_MASK_ISA_AVX512F_UNSET)
-#define OPTION_MASK_ISA2_AVX2_UNSET \
-  (OPTION_MASK_ISA2_AVXVNNI_UNSET | OPTION_MASK_ISA2_AVX512F_UNSET)
-#define OPTION_MASK_ISA_AVX512F_UNSET \
-  (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX512CD_UNSET \
-   | OPTION_MASK_ISA_AVX512PF_UNSET | OPTION_MASK_ISA_AVX512ER_UNSET \
-   | OPTION_MASK_ISA_AVX512DQ_UNSET | OPTION_MASK_ISA_AVX512BW_UNSET \
-   | OPTION_MASK_ISA_AVX512VL_UNSET | OPTION_MASK_ISA_AVX512IFMA_UNSET \
-   | OPTION_MASK_ISA_AVX512VBMI2_UNSET \
-   | OPTION_MASK_ISA_AVX512VNNI_UNSET \
-   | OPTION_MASK_ISA_AVX512VPOPCNTDQ_UNSET \
-   | OPTION_MASK_ISA_AVX512BITALG_UNSET)
-#define OPTION_MASK_ISA_AVX512CD_UNSET OPTION_MASK_ISA_AVX512CD
-#define OPTION_MASK_ISA_AVX512PF_UNSET OPTION_MASK_ISA_AVX512PF
-#define OPTION_MASK_ISA_AVX512ER_UNSET OPTION_MASK_ISA_AVX512ER
-#define OPTION_MASK_ISA_AVX512DQ_UNSET OPTION_MASK_ISA_AVX512DQ
-#define OPTION_MASK_ISA_AVX512BW_UNSET \
-  (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VBMI_UNSET)
-#define OPTION_MASK_ISA_AVX512VL_UNSET OPTION_MASK_ISA_AVX512VL
-#define OPTION_MASK_ISA_AVX512IFMA_UNSET OPTION_MASK_ISA_AVX512IFMA
-#define OPTION_MASK_ISA_AVX512VBMI_UNSET OPTION_MASK_ISA_AVX512VBMI
-#define OPTION_MASK_ISA2_AVX5124FMAPS_UNSET OPTION_MASK_ISA2_AVX5124FMAPS
-#define OPTION_MASK_ISA2_AVX5124VNNIW_UNSET OPTION_MASK_ISA2_AVX5124VNNIW
-#define OPTION_MASK_ISA_AVX512VBMI2_UNSET OPTION_MASK_ISA_AVX512VBMI2
-#define OPTION_MASK_ISA_AVX512VNNI_UNSET OPTION_MASK_ISA_AVX512VNNI
-#define OPTION_MASK_ISA2_AVXVNNI_UNSET OPTION_MASK_ISA2_AVXVNNI
-#define OPTION_MASK_ISA_AVX512VPOPCNTDQ_UNSET OPTION_MASK_ISA_AVX512VPOPCNTDQ
-#define OPTION_MASK_ISA_AVX512BITALG_UNSET OPTION_MASK_ISA_AVX512BITALG
-#define OPTION_MASK_ISA2_AVX512BF16_UNSET OPTION_MASK_ISA2_AVX512BF16
-#define OPTION_MASK_ISA_RTM_UNSET OPTION_MASK_ISA_RTM
-#define OPTION_MASK_ISA_PRFCHW_UNSET OPTION_MASK_ISA_PRFCHW
-#define OPTION_MASK_ISA_RDSEED_UNSET OPTION_MASK_ISA_RDSEED
-#define OPTION_MASK_ISA_ADX_UNSET OPTION_MASK_ISA_ADX
-#define OPTION_MASK_ISA_PREFETCHWT1_UNSET OPTION_MASK_ISA_PREFETCHWT1
-#define OPTION_MASK_ISA_CLFLUSHOPT_UNSET OPTION_MASK_ISA_CLFLUSHOPT
-#define OPTION_MASK_ISA_XSAVEC_UNSET OPTION_MASK_ISA_XSAVEC
-#define OPTION_MASK_ISA_XSAVES_UNSET OPTION_MASK_ISA_XSAVES
-#define OPTION_MASK_ISA_CLWB_UNSET OPTION_MASK_ISA_CLWB
-#define OPTION_MASK_ISA2_MWAITX_UNSET OPTION_MASK_ISA2_MWAITX
-#define OPTION_MASK_ISA2_CLZERO_UNSET OPTION_MASK_ISA2_CLZERO
-#define OPTION_MASK_ISA_PKU_UNSET OPTION_MASK_ISA_PKU
-#define OPTION_MASK_ISA2_RDPID_UNSET OPTION_MASK_ISA2_RDPID
-#define OPTION_MASK_ISA_GFNI_UNSET OPTION_MASK_ISA_GFNI
-#define OPTION_MASK_ISA_SHSTK_UNSET OPTION_MASK_ISA_SHSTK
-#define OPTION_MASK_ISA2_VAES_UNSET OPTION_MASK_ISA2_VAES
-#define OPTION_MASK_ISA_VPCLMULQDQ_UNSET OPTION_MASK_ISA_VPCLMULQDQ
-#define OPTION_MASK_ISA_MOVDIRI_UNSET OPTION_MASK_ISA_MOVDIRI
-#define OPTION_MASK_ISA2_MOVDIR64B_UNSET OPTION_MASK_ISA2_MOVDIR64B
-#define OPTION_MASK_ISA2_WAITPKG_UNSET OPTION_MASK_ISA2_WAITPKG
-#define OPTION_MASK_ISA2_CLDEMOTE_UNSET OPTION_MASK_ISA2_CLDEMOTE
-#define OPTION_MASK_ISA2_ENQCMD_UNSET OPTION_MASK_ISA2_ENQCMD
-#define OPTION_MASK_ISA2_SERIALIZE_UNSET OPTION_MASK_ISA2_SERIALIZE
-#define OPTION_MASK_ISA2_AVX512VP2INTERSECT_UNSET OPTION_MASK_ISA2_AVX512VP2INTERSECT
-#define OPTION_MASK_ISA2_TSXLDTRK_UNSET OPTION_MASK_ISA2_TSXLDTRK
-#define OPTION_MASK_ISA2_AMX_TILE_UNSET OPTION_MASK_ISA2_AMX_TILE
-#define OPTION_MASK_ISA2_AMX_INT8_UNSET OPTION_MASK_ISA2_AMX_INT8
-#define OPTION_MASK_ISA2_AMX_BF16_UNSET OPTION_MASK_ISA2_AMX_BF16
-#define OPTION_MASK_ISA2_UINTR_UNSET OPTION_MASK_ISA2_UINTR
-#define OPTION_MASK_ISA2_HRESET_UNSET OPTION_MASK_ISA2_HRESET
-#define OPTION_MASK_ISA2_KL_UNSET \
-  (OPTION_MASK_ISA2_KL | OPTION_MASK_ISA2_WIDEKL_UNSET)
-#define OPTION_MASK_ISA2_WIDEKL_UNSET OPTION_MASK_ISA2_WIDEKL
-
-/* SSE4 includes both SSE4.1 and SSE4.2.  -mno-sse4 should the same
-   as -mno-sse4.1. */
-#define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
-
-#define OPTION_MASK_ISA_SSE4A_UNSET \
-  (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
-
-#define OPTION_MASK_ISA_FMA4_UNSET \
-  (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
-#define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
-#define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
-
-#define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
-#define OPTION_MASK_ISA_SHA_UNSET OPTION_MASK_ISA_SHA
-#define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
-#define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
-#define OPTION_MASK_ISA2_PCONFIG_UNSET OPTION_MASK_ISA2_PCONFIG
-#define OPTION_MASK_ISA2_WBNOINVD_UNSET OPTION_MASK_ISA2_WBNOINVD
-#define OPTION_MASK_ISA2_SGX_UNSET OPTION_MASK_ISA2_SGX
-#define OPTION_MASK_ISA_BMI_UNSET OPTION_MASK_ISA_BMI
-#define OPTION_MASK_ISA_BMI2_UNSET OPTION_MASK_ISA_BMI2
-#define OPTION_MASK_ISA_LZCNT_UNSET OPTION_MASK_ISA_LZCNT
-#define OPTION_MASK_ISA_TBM_UNSET OPTION_MASK_ISA_TBM
-#define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
-#define OPTION_MASK_ISA2_CX16_UNSET OPTION_MASK_ISA2_CX16
-#define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
-#define OPTION_MASK_ISA2_MOVBE_UNSET OPTION_MASK_ISA2_MOVBE
-#define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
-
-#define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
-#define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
-#define OPTION_MASK_ISA2_PTWRITE_UNSET OPTION_MASK_ISA2_PTWRITE
-#define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
-
-#define OPTION_MASK_ISA_GENERAL_REGS_ONLY_UNSET \
-  (OPTION_MASK_ISA_MMX_UNSET \
-   | OPTION_MASK_ISA_SSE_UNSET)
-
-#define OPTION_MASK_ISA2_AVX512F_UNSET \
-  (OPTION_MASK_ISA2_AVX512BF16_UNSET \
-   | OPTION_MASK_ISA2_AVX5124FMAPS_UNSET \
-   | OPTION_MASK_ISA2_AVX5124VNNIW_UNSET \
-   | OPTION_MASK_ISA2_AVX512VP2INTERSECT_UNSET)
-#define OPTION_MASK_ISA2_GENERAL_REGS_ONLY_UNSET \
-  (OPTION_MASK_ISA2_AVX512F_UNSET)
-#define OPTION_MASK_ISA2_AVX_UNSET OPTION_MASK_ISA2_AVX2_UNSET
-#define OPTION_MASK_ISA2_SSE4_2_UNSET OPTION_MASK_ISA2_AVX_UNSET
-#define OPTION_MASK_ISA2_SSE4_1_UNSET OPTION_MASK_ISA2_SSE4_2_UNSET
-#define OPTION_MASK_ISA2_SSE4_UNSET OPTION_MASK_ISA2_SSE4_1_UNSET
-#define OPTION_MASK_ISA2_SSSE3_UNSET OPTION_MASK_ISA2_SSE4_1_UNSET
-#define OPTION_MASK_ISA2_SSE3_UNSET OPTION_MASK_ISA2_SSSE3_UNSET
-#define OPTION_MASK_ISA2_SSE2_UNSET \
-  (OPTION_MASK_ISA2_SSE3_UNSET | OPTION_MASK_ISA2_KL_UNSET)
-#define OPTION_MASK_ISA2_SSE_UNSET OPTION_MASK_ISA2_SSE2_UNSET
-
-#define OPTION_MASK_ISA2_AVX512BW_UNSET OPTION_MASK_ISA2_AVX512BF16_UNSET
-
 /* Set 1 << value as value of -malign-FLAG option.  */
 
 static void
diff --git a/gcc/common/config/i386/i386-common.h b/gcc/common/config/i386/i386-common.h
new file mode 100644
index 00000000000..4a7fd290bcf
--- /dev/null
+++ b/gcc/common/config/i386/i386-common.h
@@ -0,0 +1,315 @@
+/* IA-32 common macros.
+   Copyright (C) 2021 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Define a set of ISAs which are available when a given ISA is
+   enabled.  MMX and SSE ISAs are handled separately.  */
+
+#define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
+#define OPTION_MASK_ISA_3DNOW_SET \
+  (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
+#define OPTION_MASK_ISA_3DNOW_A_SET \
+  (OPTION_MASK_ISA_3DNOW_A | OPTION_MASK_ISA_3DNOW_SET)
+
+#define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
+#define OPTION_MASK_ISA_SSE2_SET \
+  (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
+#define OPTION_MASK_ISA_SSE3_SET \
+  (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
+#define OPTION_MASK_ISA_SSSE3_SET \
+  (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
+#define OPTION_MASK_ISA_SSE4_1_SET \
+  (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
+#define OPTION_MASK_ISA_SSE4_2_SET \
+  (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
+#define OPTION_MASK_ISA_AVX_SET \
+  (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET \
+   | OPTION_MASK_ISA_XSAVE_SET)
+#define OPTION_MASK_ISA_FMA_SET \
+  (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
+#define OPTION_MASK_ISA_AVX2_SET \
+  (OPTION_MASK_ISA_AVX2 | OPTION_MASK_ISA_AVX_SET)
+#define OPTION_MASK_ISA_FXSR_SET OPTION_MASK_ISA_FXSR
+#define OPTION_MASK_ISA_XSAVE_SET OPTION_MASK_ISA_XSAVE
+#define OPTION_MASK_ISA_XSAVEOPT_SET \
+  (OPTION_MASK_ISA_XSAVEOPT | OPTION_MASK_ISA_XSAVE_SET)
+#define OPTION_MASK_ISA_AVX512F_SET \
+  (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX2_SET)
+#define OPTION_MASK_ISA_AVX512CD_SET \
+  (OPTION_MASK_ISA_AVX512CD | OPTION_MASK_ISA_AVX512F_SET)
+#define OPTION_MASK_ISA_AVX512PF_SET \
+  (OPTION_MASK_ISA_AVX512PF | OPTION_MASK_ISA_AVX512F_SET)
+#define OPTION_MASK_ISA_AVX512ER_SET \
+  (OPTION_MASK_ISA_AVX512ER | OPTION_MASK_ISA_AVX512F_SET)
+#define OPTION_MASK_ISA_AVX512DQ_SET \
+  (OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512F_SET)
+#define OPTION_MASK_ISA_AVX512BW_SET \
+  (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512F_SET)
+#define OPTION_MASK_ISA_AVX512VL_SET \
+  (OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512F_SET)
+#define OPTION_MASK_ISA_AVX512IFMA_SET \
+  (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512F_SET)
+#define OPTION_MASK_ISA_AVX512VBMI_SET \
+  (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512BW_SET)
+#define OPTION_MASK_ISA2_AVX5124FMAPS_SET OPTION_MASK_ISA2_AVX5124FMAPS
+#define OPTION_MASK_ISA2_AVX5124VNNIW_SET OPTION_MASK_ISA2_AVX5124VNNIW
+#define OPTION_MASK_ISA_AVX512VBMI2_SET \
+  (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512F_SET)
+#define OPTION_MASK_ISA_AVX512VNNI_SET \
+  (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512F_SET)
+#define OPTION_MASK_ISA2_AVXVNNI_SET OPTION_MASK_ISA2_AVXVNNI
+#define OPTION_MASK_ISA_AVX512VPOPCNTDQ_SET \
+  (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512F_SET)
+#define OPTION_MASK_ISA_AVX512BITALG_SET \
+  (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512F_SET)
+#define OPTION_MASK_ISA2_AVX512BF16_SET OPTION_MASK_ISA2_AVX512BF16
+#define OPTION_MASK_ISA_RTM_SET OPTION_MASK_ISA_RTM
+#define OPTION_MASK_ISA_PRFCHW_SET OPTION_MASK_ISA_PRFCHW
+#define OPTION_MASK_ISA_RDSEED_SET OPTION_MASK_ISA_RDSEED
+#define OPTION_MASK_ISA_ADX_SET OPTION_MASK_ISA_ADX
+#define OPTION_MASK_ISA_PREFETCHWT1_SET OPTION_MASK_ISA_PREFETCHWT1
+#define OPTION_MASK_ISA_CLFLUSHOPT_SET OPTION_MASK_ISA_CLFLUSHOPT
+#define OPTION_MASK_ISA_XSAVES_SET \
+  (OPTION_MASK_ISA_XSAVES | OPTION_MASK_ISA_XSAVE_SET)
+#define OPTION_MASK_ISA_XSAVEC_SET \
+  (OPTION_MASK_ISA_XSAVEC | OPTION_MASK_ISA_XSAVE_SET)
+#define OPTION_MASK_ISA_CLWB_SET OPTION_MASK_ISA_CLWB
+#define OPTION_MASK_ISA2_AVX512VP2INTERSECT_SET OPTION_MASK_ISA2_AVX512VP2INTERSECT
+#define OPTION_MASK_ISA2_AMX_TILE_SET OPTION_MASK_ISA2_AMX_TILE
+#define OPTION_MASK_ISA2_AMX_INT8_SET OPTION_MASK_ISA2_AMX_INT8
+#define OPTION_MASK_ISA2_AMX_BF16_SET OPTION_MASK_ISA2_AMX_BF16
+
+/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
+   as -msse4.2.  */
+#define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
+
+#define OPTION_MASK_ISA_SSE4A_SET \
+  (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
+#define OPTION_MASK_ISA_FMA4_SET \
+  (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_SSE4A_SET \
+   | OPTION_MASK_ISA_AVX_SET)
+#define OPTION_MASK_ISA_XOP_SET \
+  (OPTION_MASK_ISA_XOP | OPTION_MASK_ISA_FMA4_SET)
+#define OPTION_MASK_ISA_LWP_SET \
+  OPTION_MASK_ISA_LWP
+
+/* AES, SHA and PCLMUL need SSE2 because they use xmm registers.  */
+#define OPTION_MASK_ISA_AES_SET \
+  (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
+#define OPTION_MASK_ISA_SHA_SET \
+  (OPTION_MASK_ISA_SHA | OPTION_MASK_ISA_SSE2_SET)
+#define OPTION_MASK_ISA_PCLMUL_SET \
+  (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
+
+#define OPTION_MASK_ISA_ABM_SET \
+  (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
+
+#define OPTION_MASK_ISA2_PCONFIG_SET OPTION_MASK_ISA2_PCONFIG
+#define OPTION_MASK_ISA2_WBNOINVD_SET OPTION_MASK_ISA2_WBNOINVD
+#define OPTION_MASK_ISA2_SGX_SET OPTION_MASK_ISA2_SGX
+#define OPTION_MASK_ISA_BMI_SET OPTION_MASK_ISA_BMI
+#define OPTION_MASK_ISA_BMI2_SET OPTION_MASK_ISA_BMI2
+#define OPTION_MASK_ISA_LZCNT_SET OPTION_MASK_ISA_LZCNT
+#define OPTION_MASK_ISA_TBM_SET OPTION_MASK_ISA_TBM
+#define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
+#define OPTION_MASK_ISA2_CX16_SET OPTION_MASK_ISA2_CX16
+#define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
+#define OPTION_MASK_ISA2_MOVBE_SET OPTION_MASK_ISA2_MOVBE
+#define OPTION_MASK_ISA_CRC32_SET OPTION_MASK_ISA_CRC32
+
+#define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
+#define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
+#define OPTION_MASK_ISA2_PTWRITE_SET OPTION_MASK_ISA2_PTWRITE
+#define OPTION_MASK_ISA_F16C_SET \
+  (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
+#define OPTION_MASK_ISA2_MWAITX_SET OPTION_MASK_ISA2_MWAITX
+#define OPTION_MASK_ISA2_CLZERO_SET OPTION_MASK_ISA2_CLZERO
+#define OPTION_MASK_ISA_PKU_SET OPTION_MASK_ISA_PKU
+#define OPTION_MASK_ISA2_RDPID_SET OPTION_MASK_ISA2_RDPID
+#define OPTION_MASK_ISA_GFNI_SET OPTION_MASK_ISA_GFNI
+#define OPTION_MASK_ISA_SHSTK_SET OPTION_MASK_ISA_SHSTK
+#define OPTION_MASK_ISA2_VAES_SET OPTION_MASK_ISA2_VAES
+#define OPTION_MASK_ISA_VPCLMULQDQ_SET OPTION_MASK_ISA_VPCLMULQDQ
+#define OPTION_MASK_ISA_MOVDIRI_SET OPTION_MASK_ISA_MOVDIRI
+#define OPTION_MASK_ISA2_MOVDIR64B_SET OPTION_MASK_ISA2_MOVDIR64B
+#define OPTION_MASK_ISA2_WAITPKG_SET OPTION_MASK_ISA2_WAITPKG
+#define OPTION_MASK_ISA2_CLDEMOTE_SET OPTION_MASK_ISA2_CLDEMOTE
+#define OPTION_MASK_ISA2_ENQCMD_SET OPTION_MASK_ISA2_ENQCMD
+#define OPTION_MASK_ISA2_SERIALIZE_SET OPTION_MASK_ISA2_SERIALIZE
+#define OPTION_MASK_ISA2_TSXLDTRK_SET OPTION_MASK_ISA2_TSXLDTRK
+#define OPTION_MASK_ISA2_UINTR_SET OPTION_MASK_ISA2_UINTR
+#define OPTION_MASK_ISA2_HRESET_SET OPTION_MASK_ISA2_HRESET
+#define OPTION_MASK_ISA2_KL_SET OPTION_MASK_ISA2_KL
+#define OPTION_MASK_ISA2_WIDEKL_SET \
+  (OPTION_MASK_ISA2_WIDEKL | OPTION_MASK_ISA2_KL_SET)
+
+/* Define a set of ISAs which aren't available when a given ISA is
+   disabled.  MMX and SSE ISAs are handled separately.  */
+
+#define OPTION_MASK_ISA_MMX_UNSET \
+  (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
+#define OPTION_MASK_ISA_3DNOW_UNSET \
+  (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
+#define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
+
+#define OPTION_MASK_ISA_SSE_UNSET \
+  (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
+#define OPTION_MASK_ISA_SSE2_UNSET \
+  (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
+#define OPTION_MASK_ISA_SSE3_UNSET \
+  (OPTION_MASK_ISA_SSE3 \
+   | OPTION_MASK_ISA_SSSE3_UNSET \
+   | OPTION_MASK_ISA_SSE4A_UNSET )
+#define OPTION_MASK_ISA_SSSE3_UNSET \
+  (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
+#define OPTION_MASK_ISA_SSE4_1_UNSET \
+  (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
+#define OPTION_MASK_ISA_SSE4_2_UNSET \
+  (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
+#define OPTION_MASK_ISA_AVX_UNSET \
+  (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET \
+   | OPTION_MASK_ISA_FMA4_UNSET | OPTION_MASK_ISA_F16C_UNSET \
+   | OPTION_MASK_ISA_AVX2_UNSET )
+#define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
+#define OPTION_MASK_ISA_FXSR_UNSET OPTION_MASK_ISA_FXSR
+#define OPTION_MASK_ISA_XSAVE_UNSET \
+  (OPTION_MASK_ISA_XSAVE | OPTION_MASK_ISA_XSAVEOPT_UNSET \
+   | OPTION_MASK_ISA_XSAVES_UNSET | OPTION_MASK_ISA_XSAVEC_UNSET \
+   | OPTION_MASK_ISA_AVX_UNSET)
+#define OPTION_MASK_ISA2_XSAVE_UNSET OPTION_MASK_ISA2_AMX_TILE_UNSET
+#define OPTION_MASK_ISA_XSAVEOPT_UNSET OPTION_MASK_ISA_XSAVEOPT
+#define OPTION_MASK_ISA_AVX2_UNSET \
+  (OPTION_MASK_ISA_AVX2 | OPTION_MASK_ISA_AVX512F_UNSET)
+#define OPTION_MASK_ISA2_AVX2_UNSET \
+  (OPTION_MASK_ISA2_AVXVNNI_UNSET | OPTION_MASK_ISA2_AVX512F_UNSET)
+#define OPTION_MASK_ISA_AVX512F_UNSET \
+  (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX512CD_UNSET \
+   | OPTION_MASK_ISA_AVX512PF_UNSET | OPTION_MASK_ISA_AVX512ER_UNSET \
+   | OPTION_MASK_ISA_AVX512DQ_UNSET | OPTION_MASK_ISA_AVX512BW_UNSET \
+   | OPTION_MASK_ISA_AVX512VL_UNSET | OPTION_MASK_ISA_AVX512IFMA_UNSET \
+   | OPTION_MASK_ISA_AVX512VBMI2_UNSET \
+   | OPTION_MASK_ISA_AVX512VNNI_UNSET \
+   | OPTION_MASK_ISA_AVX512VPOPCNTDQ_UNSET \
+   | OPTION_MASK_ISA_AVX512BITALG_UNSET)
+#define OPTION_MASK_ISA_AVX512CD_UNSET OPTION_MASK_ISA_AVX512CD
+#define OPTION_MASK_ISA_AVX512PF_UNSET OPTION_MASK_ISA_AVX512PF
+#define OPTION_MASK_ISA_AVX512ER_UNSET OPTION_MASK_ISA_AVX512ER
+#define OPTION_MASK_ISA_AVX512DQ_UNSET OPTION_MASK_ISA_AVX512DQ
+#define OPTION_MASK_ISA_AVX512BW_UNSET \
+  (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VBMI_UNSET)
+#define OPTION_MASK_ISA_AVX512VL_UNSET OPTION_MASK_ISA_AVX512VL
+#define OPTION_MASK_ISA_AVX512IFMA_UNSET OPTION_MASK_ISA_AVX512IFMA
+#define OPTION_MASK_ISA_AVX512VBMI_UNSET OPTION_MASK_ISA_AVX512VBMI
+#define OPTION_MASK_ISA2_AVX5124FMAPS_UNSET OPTION_MASK_ISA2_AVX5124FMAPS
+#define OPTION_MASK_ISA2_AVX5124VNNIW_UNSET OPTION_MASK_ISA2_AVX5124VNNIW
+#define OPTION_MASK_ISA_AVX512VBMI2_UNSET OPTION_MASK_ISA_AVX512VBMI2
+#define OPTION_MASK_ISA_AVX512VNNI_UNSET OPTION_MASK_ISA_AVX512VNNI
+#define OPTION_MASK_ISA2_AVXVNNI_UNSET OPTION_MASK_ISA2_AVXVNNI
+#define OPTION_MASK_ISA_AVX512VPOPCNTDQ_UNSET OPTION_MASK_ISA_AVX512VPOPCNTDQ
+#define OPTION_MASK_ISA_AVX512BITALG_UNSET OPTION_MASK_ISA_AVX512BITALG
+#define OPTION_MASK_ISA2_AVX512BF16_UNSET OPTION_MASK_ISA2_AVX512BF16
+#define OPTION_MASK_ISA_RTM_UNSET OPTION_MASK_ISA_RTM
+#define OPTION_MASK_ISA_PRFCHW_UNSET OPTION_MASK_ISA_PRFCHW
+#define OPTION_MASK_ISA_RDSEED_UNSET OPTION_MASK_ISA_RDSEED
+#define OPTION_MASK_ISA_ADX_UNSET OPTION_MASK_ISA_ADX
+#define OPTION_MASK_ISA_PREFETCHWT1_UNSET OPTION_MASK_ISA_PREFETCHWT1
+#define OPTION_MASK_ISA_CLFLUSHOPT_UNSET OPTION_MASK_ISA_CLFLUSHOPT
+#define OPTION_MASK_ISA_XSAVEC_UNSET OPTION_MASK_ISA_XSAVEC
+#define OPTION_MASK_ISA_XSAVES_UNSET OPTION_MASK_ISA_XSAVES
+#define OPTION_MASK_ISA_CLWB_UNSET OPTION_MASK_ISA_CLWB
+#define OPTION_MASK_ISA2_MWAITX_UNSET OPTION_MASK_ISA2_MWAITX
+#define OPTION_MASK_ISA2_CLZERO_UNSET OPTION_MASK_ISA2_CLZERO
+#define OPTION_MASK_ISA_PKU_UNSET OPTION_MASK_ISA_PKU
+#define OPTION_MASK_ISA2_RDPID_UNSET OPTION_MASK_ISA2_RDPID
+#define OPTION_MASK_ISA_GFNI_UNSET OPTION_MASK_ISA_GFNI
+#define OPTION_MASK_ISA_SHSTK_UNSET OPTION_MASK_ISA_SHSTK
+#define OPTION_MASK_ISA2_VAES_UNSET OPTION_MASK_ISA2_VAES
+#define OPTION_MASK_ISA_VPCLMULQDQ_UNSET OPTION_MASK_ISA_VPCLMULQDQ
+#define OPTION_MASK_ISA_MOVDIRI_UNSET OPTION_MASK_ISA_MOVDIRI
+#define OPTION_MASK_ISA2_MOVDIR64B_UNSET OPTION_MASK_ISA2_MOVDIR64B
+#define OPTION_MASK_ISA2_WAITPKG_UNSET OPTION_MASK_ISA2_WAITPKG
+#define OPTION_MASK_ISA2_CLDEMOTE_UNSET OPTION_MASK_ISA2_CLDEMOTE
+#define OPTION_MASK_ISA2_ENQCMD_UNSET OPTION_MASK_ISA2_ENQCMD
+#define OPTION_MASK_ISA2_SERIALIZE_UNSET OPTION_MASK_ISA2_SERIALIZE
+#define OPTION_MASK_ISA2_AVX512VP2INTERSECT_UNSET OPTION_MASK_ISA2_AVX512VP2INTERSECT
+#define OPTION_MASK_ISA2_TSXLDTRK_UNSET OPTION_MASK_ISA2_TSXLDTRK
+#define OPTION_MASK_ISA2_AMX_TILE_UNSET OPTION_MASK_ISA2_AMX_TILE
+#define OPTION_MASK_ISA2_AMX_INT8_UNSET OPTION_MASK_ISA2_AMX_INT8
+#define OPTION_MASK_ISA2_AMX_BF16_UNSET OPTION_MASK_ISA2_AMX_BF16
+#define OPTION_MASK_ISA2_UINTR_UNSET OPTION_MASK_ISA2_UINTR
+#define OPTION_MASK_ISA2_HRESET_UNSET OPTION_MASK_ISA2_HRESET
+#define OPTION_MASK_ISA2_KL_UNSET \
+  (OPTION_MASK_ISA2_KL | OPTION_MASK_ISA2_WIDEKL_UNSET)
+#define OPTION_MASK_ISA2_WIDEKL_UNSET OPTION_MASK_ISA2_WIDEKL
+
+/* SSE4 includes both SSE4.1 and SSE4.2.  -mno-sse4 should the same
+   as -mno-sse4.1. */
+#define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
+
+#define OPTION_MASK_ISA_SSE4A_UNSET \
+  (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_FMA4_UNSET)
+
+#define OPTION_MASK_ISA_FMA4_UNSET \
+  (OPTION_MASK_ISA_FMA4 | OPTION_MASK_ISA_XOP_UNSET)
+#define OPTION_MASK_ISA_XOP_UNSET OPTION_MASK_ISA_XOP
+#define OPTION_MASK_ISA_LWP_UNSET OPTION_MASK_ISA_LWP
+
+#define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
+#define OPTION_MASK_ISA_SHA_UNSET OPTION_MASK_ISA_SHA
+#define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
+#define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
+#define OPTION_MASK_ISA2_PCONFIG_UNSET OPTION_MASK_ISA2_PCONFIG
+#define OPTION_MASK_ISA2_WBNOINVD_UNSET OPTION_MASK_ISA2_WBNOINVD
+#define OPTION_MASK_ISA2_SGX_UNSET OPTION_MASK_ISA2_SGX
+#define OPTION_MASK_ISA_BMI_UNSET OPTION_MASK_ISA_BMI
+#define OPTION_MASK_ISA_BMI2_UNSET OPTION_MASK_ISA_BMI2
+#define OPTION_MASK_ISA_LZCNT_UNSET OPTION_MASK_ISA_LZCNT
+#define OPTION_MASK_ISA_TBM_UNSET OPTION_MASK_ISA_TBM
+#define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
+#define OPTION_MASK_ISA2_CX16_UNSET OPTION_MASK_ISA2_CX16
+#define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
+#define OPTION_MASK_ISA2_MOVBE_UNSET OPTION_MASK_ISA2_MOVBE
+#define OPTION_MASK_ISA_CRC32_UNSET OPTION_MASK_ISA_CRC32
+
+#define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
+#define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
+#define OPTION_MASK_ISA2_PTWRITE_UNSET OPTION_MASK_ISA2_PTWRITE
+#define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
+
+#define OPTION_MASK_ISA_GENERAL_REGS_ONLY_UNSET \
+  (OPTION_MASK_ISA_MMX_UNSET \
+   | OPTION_MASK_ISA_SSE_UNSET)
+
+#define OPTION_MASK_ISA2_AVX512F_UNSET \
+  (OPTION_MASK_ISA2_AVX512BF16_UNSET \
+   | OPTION_MASK_ISA2_AVX5124FMAPS_UNSET \
+   | OPTION_MASK_ISA2_AVX5124VNNIW_UNSET \
+   | OPTION_MASK_ISA2_AVX512VP2INTERSECT_UNSET)
+#define OPTION_MASK_ISA2_GENERAL_REGS_ONLY_UNSET \
+  (OPTION_MASK_ISA2_AVX512F_UNSET)
+#define OPTION_MASK_ISA2_AVX_UNSET OPTION_MASK_ISA2_AVX2_UNSET
+#define OPTION_MASK_ISA2_SSE4_2_UNSET OPTION_MASK_ISA2_AVX_UNSET
+#define OPTION_MASK_ISA2_SSE4_1_UNSET OPTION_MASK_ISA2_SSE4_2_UNSET
+#define OPTION_MASK_ISA2_SSE4_UNSET OPTION_MASK_ISA2_SSE4_1_UNSET
+#define OPTION_MASK_ISA2_SSSE3_UNSET OPTION_MASK_ISA2_SSE4_1_UNSET
+#define OPTION_MASK_ISA2_SSE3_UNSET OPTION_MASK_ISA2_SSSE3_UNSET
+#define OPTION_MASK_ISA2_SSE2_UNSET \
+  (OPTION_MASK_ISA2_SSE3_UNSET | OPTION_MASK_ISA2_KL_UNSET)
+#define OPTION_MASK_ISA2_SSE_UNSET OPTION_MASK_ISA2_SSE2_UNSET
+
+#define OPTION_MASK_ISA2_AVX512BW_UNSET OPTION_MASK_ISA2_AVX512BF16_UNSET
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 97700d797a7..cd5aed30325 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -2575,6 +2575,7 @@ constexpr wide_int_bitmask PTA_KNM = PTA_KNL | PTA_AVX5124VNNIW
 
 #include "insn-attr-common.h"
 
+#include "common/config/i386/i386-common.h"
 #include "common/config/i386/i386-cpuinfo.h"
 
 class pta
-- 
2.30.2


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH v4 2/2] x86: Add general_regs_only function attribute
  2021-04-14 22:39 [PATCH v4 0/2] x86: Add general_regs_only function attribute H.J. Lu
  2021-04-14 22:39 ` [PATCH v4 1/2] x86: Move OPTION_MASK_* to i386-common.h H.J. Lu
@ 2021-04-14 22:39 ` H.J. Lu
  2021-04-21  7:30   ` Uros Bizjak
  2021-04-21 17:09   ` Martin Sebor
  1 sibling, 2 replies; 22+ messages in thread
From: H.J. Lu @ 2021-04-14 22:39 UTC (permalink / raw)
  To: gcc-patches
  Cc: Uros Bizjak, Jakub Jelinek, Bernhard Reutner-Fischer,
	Martin Sebor, Richard Biener

commit 87c753ac241f25d222d46ba1ac66ceba89d6a200
Author: H.J. Lu <hjl.tools@gmail.com>
Date:   Fri Aug 21 09:42:49 2020 -0700

    x86: Add target("general-regs-only") function attribute

is incomplete since it is impossible to call integer intrinsics from
a function with general-regs-only target attribute.

1. Add general_regs_only function attribute to inform the compiler that
functions use only general purpose registers.  When making inlining
decisions on such functions, non-GPR compiler options are excluded.
2. Add general_regs_only attribute to x86 intrinsics which use only
general purpose registers.

gcc/

	PR target/99744
	* config/i386/i386-options.c (ix86_attribute_table): Add
	general_regs_only.
	* config/i386/i386.c (ix86_can_inline_p): Exclude non-integer
	target options if callee has general_regs_only attribute.
	* config/i386/adxintrin.h: Add general_regs_only attribute to
	intrinsics which use only general purpose registers.
	* config/i386/bmiintrin.h: Likewise.
	* config/i386/bmi2intrin.h: Likewise.
	* config/i386/cetintrin.h: Likewise.
	* config/i386/cldemoteintrin.h: Likewise.
	* config/i386/clflushoptintrin.h: Likewise.
	* config/i386/clwbintrin.h: Likewise.
	* config/i386/clzerointrin.h: Likewise.
	* config/i386/enqcmdintrin.h: Likewise.
	* config/i386/fxsrintrin.h: Likewise.
	* config/i386/hresetintrin.h: Likewise.
	* config/i386/ia32intrin.h: Likewise.
	* config/i386/lwpintrin.h: Likewise.
	* config/i386/lzcntintrin.h: Likewise.
	* config/i386/movdirintrin.h: Likewise.
	* config/i386/mwaitxintrin.h: Likewise.
	* config/i386/pconfigintrin.h: Likewise.
	* config/i386/pkuintrin.h: Likewise.
	* config/i386/popcntintrin.h: Likewise.
	* config/i386/rdseedintrin.h: Likewise.
	* config/i386/rtmintrin.h: Likewise.
	* config/i386/serializeintrin.h: Likewise.
	* config/i386/sgxintrin.h: Likewise.
	* config/i386/tbmintrin.h: Likewise.
	* config/i386/tsxldtrkintrin.h: Likewise.
	* config/i386/uintrintrin.h: Likewise.
	* config/i386/waitpkgintrin.h: Likewise.
	* config/i386/wbnoinvdintrin.h: Likewise.
	* config/i386/x86gprintrin.h: Likewise.
	* config/i386/xsavecintrin.h: Likewise.
	* config/i386/xsaveintrin.h: Likewise.
	* config/i386/xsaveoptintrin.h: Likewise.
	* config/i386/xsavesintrin.h: Likewise.
	* config/i386/xtestintrin.h: Likewise.
	* doc/extend.texi: Document general_regs_only function attribute.

gcc/testsuite/

	PR target/99744
	* gcc.target/i386/pr99744-3.c: New test.
	* gcc.target/i386/pr99744-4.c: Likewise.
---
 gcc/config/i386/adxintrin.h               |  18 +-
 gcc/config/i386/bmi2intrin.h              |  24 +-
 gcc/config/i386/bmiintrin.h               |  92 ++++--
 gcc/config/i386/cetintrin.h               |  33 +-
 gcc/config/i386/cldemoteintrin.h          |   3 +-
 gcc/config/i386/clflushoptintrin.h        |   3 +-
 gcc/config/i386/clwbintrin.h              |   3 +-
 gcc/config/i386/clzerointrin.h            |   4 +-
 gcc/config/i386/enqcmdintrin.h            |   6 +-
 gcc/config/i386/fxsrintrin.h              |  12 +-
 gcc/config/i386/hresetintrin.h            |   3 +-
 gcc/config/i386/i386-options.c            |   2 +
 gcc/config/i386/i386.c                    |  29 +-
 gcc/config/i386/ia32intrin.h              |  82 +++--
 gcc/config/i386/lwpintrin.h               |  24 +-
 gcc/config/i386/lzcntintrin.h             |  20 +-
 gcc/config/i386/movdirintrin.h            |   9 +-
 gcc/config/i386/mwaitxintrin.h            |   8 +-
 gcc/config/i386/pconfigintrin.h           |   3 +-
 gcc/config/i386/pkuintrin.h               |   6 +-
 gcc/config/i386/popcntintrin.h            |   8 +-
 gcc/config/i386/rdseedintrin.h            |   9 +-
 gcc/config/i386/rtmintrin.h               |   9 +-
 gcc/config/i386/serializeintrin.h         |   8 +-
 gcc/config/i386/sgxintrin.h               |   9 +-
 gcc/config/i386/tbmintrin.h               |  80 +++--
 gcc/config/i386/tsxldtrkintrin.h          |   6 +-
 gcc/config/i386/uintrintrin.h             |  12 +-
 gcc/config/i386/waitpkgintrin.h           |   9 +-
 gcc/config/i386/wbnoinvdintrin.h          |   3 +-
 gcc/config/i386/x86gprintrin.h            |  45 ++-
 gcc/config/i386/xsavecintrin.h            |   6 +-
 gcc/config/i386/xsaveintrin.h             |  18 +-
 gcc/config/i386/xsaveoptintrin.h          |   6 +-
 gcc/config/i386/xsavesintrin.h            |  12 +-
 gcc/config/i386/xtestintrin.h             |   3 +-
 gcc/doc/extend.texi                       |   5 +
 gcc/testsuite/gcc.target/i386/pr99744-3.c |  13 +
 gcc/testsuite/gcc.target/i386/pr99744-4.c | 352 ++++++++++++++++++++++
 39 files changed, 818 insertions(+), 179 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr99744-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr99744-4.c

diff --git a/gcc/config/i386/adxintrin.h b/gcc/config/i386/adxintrin.h
index e514e741f02..74e3df18dce 100644
--- a/gcc/config/i386/adxintrin.h
+++ b/gcc/config/i386/adxintrin.h
@@ -29,7 +29,8 @@
 #define _ADXINTRIN_H_INCLUDED
 
 extern __inline unsigned char
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _subborrow_u32 (unsigned char __CF, unsigned int __X,
 		unsigned int __Y, unsigned int *__P)
 {
@@ -37,7 +38,8 @@ _subborrow_u32 (unsigned char __CF, unsigned int __X,
 }
 
 extern __inline unsigned char
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _addcarry_u32 (unsigned char __CF, unsigned int __X,
 	       unsigned int __Y, unsigned int *__P)
 {
@@ -45,7 +47,8 @@ _addcarry_u32 (unsigned char __CF, unsigned int __X,
 }
 
 extern __inline unsigned char
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _addcarryx_u32 (unsigned char __CF, unsigned int __X,
 		unsigned int __Y, unsigned int *__P)
 {
@@ -54,7 +57,8 @@ _addcarryx_u32 (unsigned char __CF, unsigned int __X,
 
 #ifdef __x86_64__
 extern __inline unsigned char
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _subborrow_u64 (unsigned char __CF, unsigned long long __X,
 		unsigned long long __Y, unsigned long long *__P)
 {
@@ -62,7 +66,8 @@ _subborrow_u64 (unsigned char __CF, unsigned long long __X,
 }
 
 extern __inline unsigned char
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _addcarry_u64 (unsigned char __CF, unsigned long long __X,
 	       unsigned long long __Y, unsigned long long *__P)
 {
@@ -70,7 +75,8 @@ _addcarry_u64 (unsigned char __CF, unsigned long long __X,
 }
 
 extern __inline unsigned char
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _addcarryx_u64 (unsigned char __CF, unsigned long long __X,
 		unsigned long long __Y, unsigned long long *__P)
 {
diff --git a/gcc/config/i386/bmi2intrin.h b/gcc/config/i386/bmi2intrin.h
index 6b23e4e98a1..7f64e5a8ff1 100644
--- a/gcc/config/i386/bmi2intrin.h
+++ b/gcc/config/i386/bmi2intrin.h
@@ -35,21 +35,24 @@
 #endif /* __BMI2__ */
 
 extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _bzhi_u32 (unsigned int __X, unsigned int __Y)
 {
   return __builtin_ia32_bzhi_si (__X, __Y);
 }
 
 extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _pdep_u32 (unsigned int __X, unsigned int __Y)
 {
   return __builtin_ia32_pdep_si (__X, __Y);
 }
 
 extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _pext_u32 (unsigned int __X, unsigned int __Y)
 {
   return __builtin_ia32_pext_si (__X, __Y);
@@ -58,28 +61,32 @@ _pext_u32 (unsigned int __X, unsigned int __Y)
 #ifdef  __x86_64__
 
 extern __inline unsigned long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _bzhi_u64 (unsigned long long __X, unsigned long long __Y)
 {
   return __builtin_ia32_bzhi_di (__X, __Y);
 }
 
 extern __inline unsigned long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _pdep_u64 (unsigned long long __X, unsigned long long __Y)
 {
   return __builtin_ia32_pdep_di (__X, __Y);
 }
 
 extern __inline unsigned long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _pext_u64 (unsigned long long __X, unsigned long long __Y)
 {
   return __builtin_ia32_pext_di (__X, __Y);
 }
 
 extern __inline unsigned long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _mulx_u64 (unsigned long long __X, unsigned long long __Y,
 	   unsigned long long *__P)
 {
@@ -91,7 +98,8 @@ _mulx_u64 (unsigned long long __X, unsigned long long __Y,
 #else /* !__x86_64__ */
 
 extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _mulx_u32 (unsigned int __X, unsigned int __Y, unsigned int *__P)
 {
   unsigned long long __res = (unsigned long long) __X * __Y;
diff --git a/gcc/config/i386/bmiintrin.h b/gcc/config/i386/bmiintrin.h
index 439d81cba11..18b5d7b0734 100644
--- a/gcc/config/i386/bmiintrin.h
+++ b/gcc/config/i386/bmiintrin.h
@@ -34,73 +34,97 @@
 #define __DISABLE_BMI__
 #endif /* __BMI__ */
 
-extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned short
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __tzcnt_u16 (unsigned short __X)
 {
   return __builtin_ia32_tzcnt_u16 (__X);
 }
 
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __andn_u32 (unsigned int __X, unsigned int __Y)
 {
   return ~__X & __Y;
 }
 
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __bextr_u32 (unsigned int __X, unsigned int __Y)
 {
   return __builtin_ia32_bextr_u32 (__X, __Y);
 }
 
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _bextr_u32 (unsigned int __X, unsigned int __Y, unsigned __Z)
 {
   return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
 }
 
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __blsi_u32 (unsigned int __X)
 {
   return __X & -__X;
 }
 
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _blsi_u32 (unsigned int __X)
 {
   return __blsi_u32 (__X);
 }
 
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __blsmsk_u32 (unsigned int __X)
 {
   return __X ^ (__X - 1);
 }
 
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _blsmsk_u32 (unsigned int __X)
 {
   return __blsmsk_u32 (__X);
 }
 
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __blsr_u32 (unsigned int __X)
 {
   return __X & (__X - 1);
 }
 
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _blsr_u32 (unsigned int __X)
 {
   return __blsr_u32 (__X);
 }
 
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __tzcnt_u32 (unsigned int __X)
 {
   return __builtin_ia32_tzcnt_u32 (__X);
 }
 
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _tzcnt_u32 (unsigned int __X)
 {
   return __builtin_ia32_tzcnt_u32 (__X);
@@ -108,67 +132,89 @@ _tzcnt_u32 (unsigned int __X)
 
 
 #ifdef  __x86_64__
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __andn_u64 (unsigned long long __X, unsigned long long __Y)
 {
   return ~__X & __Y;
 }
 
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __bextr_u64 (unsigned long long __X, unsigned long long __Y)
 {
   return __builtin_ia32_bextr_u64 (__X, __Y);
 }
 
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _bextr_u64 (unsigned long long __X, unsigned int __Y, unsigned int __Z)
 {
   return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
 }
 
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __blsi_u64 (unsigned long long __X)
 {
   return __X & -__X;
 }
 
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _blsi_u64 (unsigned long long __X)
 {
   return __blsi_u64 (__X);
 }
 
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __blsmsk_u64 (unsigned long long __X)
 {
   return __X ^ (__X - 1);
 }
 
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _blsmsk_u64 (unsigned long long __X)
 {
   return __blsmsk_u64 (__X);
 }
 
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __blsr_u64 (unsigned long long __X)
 {
   return __X & (__X - 1);
 }
 
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _blsr_u64 (unsigned long long __X)
 {
   return __blsr_u64 (__X);
 }
 
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __tzcnt_u64 (unsigned long long __X)
 {
   return __builtin_ia32_tzcnt_u64 (__X);
 }
 
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _tzcnt_u64 (unsigned long long __X)
 {
   return __builtin_ia32_tzcnt_u64 (__X);
diff --git a/gcc/config/i386/cetintrin.h b/gcc/config/i386/cetintrin.h
index 803c6283bec..145bd3ce7d2 100644
--- a/gcc/config/i386/cetintrin.h
+++ b/gcc/config/i386/cetintrin.h
@@ -36,14 +36,16 @@
 
 #ifdef __x86_64__
 extern __inline unsigned long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _get_ssp (void)
 {
   return __builtin_ia32_rdsspq ();
 }
 #else
 extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _get_ssp (void)
 {
   return __builtin_ia32_rdsspd ();
@@ -51,7 +53,8 @@ _get_ssp (void)
 #endif
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _inc_ssp (unsigned int __B)
 {
 #ifdef __x86_64__
@@ -62,21 +65,24 @@ _inc_ssp (unsigned int __B)
 }
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _saveprevssp (void)
 {
   __builtin_ia32_saveprevssp ();
 }
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _rstorssp (void *__B)
 {
   __builtin_ia32_rstorssp (__B);
 }
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _wrssd (unsigned int __B, void *__C)
 {
   __builtin_ia32_wrssd (__B, __C);
@@ -84,7 +90,8 @@ _wrssd (unsigned int __B, void *__C)
 
 #ifdef __x86_64__
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _wrssq (unsigned long long __B, void *__C)
 {
   __builtin_ia32_wrssq (__B, __C);
@@ -92,7 +99,8 @@ _wrssq (unsigned long long __B, void *__C)
 #endif
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _wrussd (unsigned int __B, void *__C)
 {
   __builtin_ia32_wrussd (__B, __C);
@@ -100,7 +108,8 @@ _wrussd (unsigned int __B, void *__C)
 
 #ifdef __x86_64__
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _wrussq (unsigned long long __B, void *__C)
 {
   __builtin_ia32_wrussq (__B, __C);
@@ -108,14 +117,16 @@ _wrussq (unsigned long long __B, void *__C)
 #endif
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _setssbsy (void)
 {
   __builtin_ia32_setssbsy ();
 }
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _clrssbsy (void *__B)
 {
   __builtin_ia32_clrssbsy (__B);
diff --git a/gcc/config/i386/cldemoteintrin.h b/gcc/config/i386/cldemoteintrin.h
index 67dddaf2b89..897a2db9e41 100644
--- a/gcc/config/i386/cldemoteintrin.h
+++ b/gcc/config/i386/cldemoteintrin.h
@@ -34,7 +34,8 @@
 #define __DISABLE_CLDEMOTE__
 #endif /* __CLDEMOTE__ */
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _cldemote (void *__A)
 {
   __builtin_ia32_cldemote (__A);
diff --git a/gcc/config/i386/clflushoptintrin.h b/gcc/config/i386/clflushoptintrin.h
index d8b55762158..3bd91d00681 100644
--- a/gcc/config/i386/clflushoptintrin.h
+++ b/gcc/config/i386/clflushoptintrin.h
@@ -35,7 +35,8 @@
 #endif /* __CLFLUSHOPT__ */
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _mm_clflushopt (void *__A)
 {
   __builtin_ia32_clflushopt (__A);
diff --git a/gcc/config/i386/clwbintrin.h b/gcc/config/i386/clwbintrin.h
index 21134429a40..2ff40066ef9 100644
--- a/gcc/config/i386/clwbintrin.h
+++ b/gcc/config/i386/clwbintrin.h
@@ -35,7 +35,8 @@
 #endif /* __CLWB__ */
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _mm_clwb (void *__A)
 {
   __builtin_ia32_clwb (__A);
diff --git a/gcc/config/i386/clzerointrin.h b/gcc/config/i386/clzerointrin.h
index f9095160409..12930e387c3 100644
--- a/gcc/config/i386/clzerointrin.h
+++ b/gcc/config/i386/clzerointrin.h
@@ -30,7 +30,9 @@
 #define __DISABLE_CLZERO__
 #endif /* __CLZERO__ */
 
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _mm_clzero (void * __I)
 {
   __builtin_ia32_clzero (__I);
diff --git a/gcc/config/i386/enqcmdintrin.h b/gcc/config/i386/enqcmdintrin.h
index 2518df18db1..7f3d769c23f 100644
--- a/gcc/config/i386/enqcmdintrin.h
+++ b/gcc/config/i386/enqcmdintrin.h
@@ -35,14 +35,16 @@
 #endif /* __ENQCMD__ */
 
 extern __inline int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _enqcmd (void * __P, const void * __Q)
 {
   return __builtin_ia32_enqcmd (__P, __Q);
 }
 
 extern __inline int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _enqcmds (void * __P, const void * __Q)
 {
   return __builtin_ia32_enqcmds (__P, __Q);
diff --git a/gcc/config/i386/fxsrintrin.h b/gcc/config/i386/fxsrintrin.h
index fd2e538eb9c..a80654968eb 100644
--- a/gcc/config/i386/fxsrintrin.h
+++ b/gcc/config/i386/fxsrintrin.h
@@ -35,14 +35,16 @@
 #endif /* __FXSR__ */
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _fxsave (void *__P)
 {
   __builtin_ia32_fxsave (__P);
 }
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _fxrstor (void *__P)
 {
   __builtin_ia32_fxrstor (__P);
@@ -50,14 +52,16 @@ _fxrstor (void *__P)
 
 #ifdef __x86_64__
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _fxsave64 (void *__P)
 {
   __builtin_ia32_fxsave64 (__P);
 }
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _fxrstor64 (void *__P)
 {
   __builtin_ia32_fxrstor64 (__P);
diff --git a/gcc/config/i386/hresetintrin.h b/gcc/config/i386/hresetintrin.h
index 500618825c9..eba09a9010f 100644
--- a/gcc/config/i386/hresetintrin.h
+++ b/gcc/config/i386/hresetintrin.h
@@ -35,7 +35,8 @@
 #endif /* __HRESET__ */
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _hreset (unsigned int __EAX)
 {
   __builtin_ia32_hreset (__EAX);
diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c
index 91da2849c49..559f9357811 100644
--- a/gcc/config/i386/i386-options.c
+++ b/gcc/config/i386/i386-options.c
@@ -3961,6 +3961,8 @@ const struct attribute_spec ix86_attribute_table[] =
     ix86_handle_fentry_name, NULL },
   { "cf_check", 0, 0, true, false, false, false,
     ix86_handle_fndecl_attribute, NULL },
+  { "general_regs_only", 0, 0, true, false, false, false,
+    ix86_handle_fndecl_attribute, NULL },
 
   /* End element.  */
   { NULL, 0, 0, false, false, false, false, NULL, NULL }
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 7c41302c75b..201a001e95a 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -553,7 +553,7 @@ ix86_can_inline_p (tree caller, tree callee)
 
   /* Changes of those flags can be tolerated for always inlines. Lets hope
      user knows what he is doing.  */
-  const unsigned HOST_WIDE_INT always_inline_safe_mask
+  unsigned HOST_WIDE_INT always_inline_safe_mask
 	 = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
 	    | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
 	    | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
@@ -579,13 +579,32 @@ ix86_can_inline_p (tree caller, tree callee)
 			    DECL_ATTRIBUTES (callee)));
 
   cgraph_node *callee_node = cgraph_node::get (callee);
+
+  HOST_WIDE_INT callee_integer_isa_flags
+    = callee_opts->x_ix86_isa_flags;
+  HOST_WIDE_INT callee_integer_isa_flags2
+    = callee_opts->x_ix86_isa_flags2;
+
+  if (lookup_attribute ("general_regs_only",
+			DECL_ATTRIBUTES (callee)))
+    {
+      /* For general purpose register only function, callee's
+	 integer ISA options should be a subset of the caller's
+	 integer ISA options.  */
+      always_inline_safe_mask |= MASK_80387;
+      callee_integer_isa_flags
+	&= ~OPTION_MASK_ISA_GENERAL_REGS_ONLY_UNSET;
+      callee_integer_isa_flags2
+	&= ~OPTION_MASK_ISA2_GENERAL_REGS_ONLY_UNSET;
+    }
+
   /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
      function can inline a SSE2 function but a SSE2 function can't inline
      a SSE4 function.  */
-  if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
-       != callee_opts->x_ix86_isa_flags)
-      || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
-	  != callee_opts->x_ix86_isa_flags2))
+  if (((caller_opts->x_ix86_isa_flags & callee_integer_isa_flags)
+       != callee_integer_isa_flags)
+      || ((caller_opts->x_ix86_isa_flags2 & callee_integer_isa_flags2)
+	  != callee_integer_isa_flags2))
     ret = false;
 
   /* See if we have the same non-isa options.  */
diff --git a/gcc/config/i386/ia32intrin.h b/gcc/config/i386/ia32intrin.h
index 591394076cc..908eb44b0d7 100644
--- a/gcc/config/i386/ia32intrin.h
+++ b/gcc/config/i386/ia32intrin.h
@@ -27,7 +27,8 @@
 
 /* 32bit bsf */
 extern __inline int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __bsfd (int __X)
 {
   return __builtin_ctz (__X);
@@ -35,7 +36,8 @@ __bsfd (int __X)
 
 /* 32bit bsr */
 extern __inline int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __bsrd (int __X)
 {
   return __builtin_ia32_bsrsi (__X);
@@ -43,7 +45,8 @@ __bsrd (int __X)
 
 /* 32bit bswap */
 extern __inline int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __bswapd (int __X)
 {
   return __builtin_bswap32 (__X);
@@ -88,7 +91,8 @@ __crc32d (unsigned int __C, unsigned int __V)
 
 /* 32bit popcnt */
 extern __inline int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __popcntd (unsigned int __X)
 {
   return __builtin_popcount (__X);
@@ -98,7 +102,8 @@ __popcntd (unsigned int __X)
 
 /* rdpmc */
 extern __inline unsigned long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __rdpmc (int __S)
 {
   return __builtin_ia32_rdpmc (__S);
@@ -107,18 +112,31 @@ __rdpmc (int __S)
 #endif /* __iamcu__ */
 
 /* rdtsc */
-#define __rdtsc()		__builtin_ia32_rdtsc ()
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
+__rdtsc (void)
+{
+  return __builtin_ia32_rdtsc ();
+}
 
 #ifndef __iamcu__
 
 /* rdtscp */
-#define __rdtscp(a)		__builtin_ia32_rdtscp (a)
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
+__rdtscp (unsigned int *__A)
+{
+  return __builtin_ia32_rdtscp (__A);
+}
 
 #endif /* __iamcu__ */
 
 /* 8bit rol */
 extern __inline unsigned char
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __rolb (unsigned char __X, int __C)
 {
   return __builtin_ia32_rolqi (__X, __C);
@@ -126,7 +144,8 @@ __rolb (unsigned char __X, int __C)
 
 /* 16bit rol */
 extern __inline unsigned short
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __rolw (unsigned short __X, int __C)
 {
   return __builtin_ia32_rolhi (__X, __C);
@@ -134,7 +153,8 @@ __rolw (unsigned short __X, int __C)
 
 /* 32bit rol */
 extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __rold (unsigned int __X, int __C)
 {
   __C &= 31;
@@ -143,7 +163,8 @@ __rold (unsigned int __X, int __C)
 
 /* 8bit ror */
 extern __inline unsigned char
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __rorb (unsigned char __X, int __C)
 {
   return __builtin_ia32_rorqi (__X, __C);
@@ -151,7 +172,8 @@ __rorb (unsigned char __X, int __C)
 
 /* 16bit ror */
 extern __inline unsigned short
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __rorw (unsigned short __X, int __C)
 {
   return __builtin_ia32_rorhi (__X, __C);
@@ -159,7 +181,8 @@ __rorw (unsigned short __X, int __C)
 
 /* 32bit ror */
 extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __rord (unsigned int __X, int __C)
 {
   __C &= 31;
@@ -168,7 +191,8 @@ __rord (unsigned int __X, int __C)
 
 /* Pause */
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __pause (void)
 {
   __builtin_ia32_pause ();
@@ -177,7 +201,8 @@ __pause (void)
 #ifdef __x86_64__
 /* 64bit bsf */
 extern __inline int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __bsfq (long long __X)
 {
   return __builtin_ctzll (__X);
@@ -185,7 +210,8 @@ __bsfq (long long __X)
 
 /* 64bit bsr */
 extern __inline int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __bsrq (long long __X)
 {
   return __builtin_ia32_bsrdi (__X);
@@ -193,7 +219,8 @@ __bsrq (long long __X)
 
 /* 64bit bswap */
 extern __inline long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __bswapq (long long __X)
 {
   return __builtin_bswap64 (__X);
@@ -220,7 +247,8 @@ __crc32q (unsigned long long __C, unsigned long long __V)
 
 /* 64bit popcnt */
 extern __inline long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __popcntq (unsigned long long __X)
 {
   return __builtin_popcountll (__X);
@@ -228,7 +256,8 @@ __popcntq (unsigned long long __X)
 
 /* 64bit rol */
 extern __inline unsigned long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __rolq (unsigned long long __X, int __C)
 {
   __C &= 63;
@@ -237,7 +266,8 @@ __rolq (unsigned long long __X, int __C)
 
 /* 64bit ror */
 extern __inline unsigned long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __rorq (unsigned long long __X, int __C)
 {
   __C &= 63;
@@ -246,7 +276,8 @@ __rorq (unsigned long long __X, int __C)
 
 /* Read flags register */
 extern __inline unsigned long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __readeflags (void)
 {
   return __builtin_ia32_readeflags_u64 ();
@@ -254,7 +285,8 @@ __readeflags (void)
 
 /* Write flags register */
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __writeeflags (unsigned long long __X)
 {
   __builtin_ia32_writeeflags_u64 (__X);
@@ -266,7 +298,8 @@ __writeeflags (unsigned long long __X)
 
 /* Read flags register */
 extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __readeflags (void)
 {
   return __builtin_ia32_readeflags_u32 ();
@@ -274,7 +307,8 @@ __readeflags (void)
 
 /* Write flags register */
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __writeeflags (unsigned int __X)
 {
   __builtin_ia32_writeeflags_u32 (__X);
diff --git a/gcc/config/i386/lwpintrin.h b/gcc/config/i386/lwpintrin.h
index 1a7465b2f22..893a4313a68 100644
--- a/gcc/config/i386/lwpintrin.h
+++ b/gcc/config/i386/lwpintrin.h
@@ -34,27 +34,35 @@
 #define __DISABLE_LWP__
 #endif /* __LWP__ */
 
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __llwpcb (void *__pcbAddress)
 {
   __builtin_ia32_llwpcb (__pcbAddress);
 }
 
-extern __inline void * __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline void *
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __slwpcb (void)
 {
   return __builtin_ia32_slwpcb ();
 }
 
 #ifdef __OPTIMIZE__
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __lwpval32 (unsigned int __data2, unsigned int __data1, unsigned int __flags)
 {
   __builtin_ia32_lwpval32 (__data2, __data1, __flags);
 }
 
 #ifdef __x86_64__
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __lwpval64 (unsigned long long __data2, unsigned int __data1,
 	    unsigned int __flags)
 {
@@ -74,14 +82,18 @@ __lwpval64 (unsigned long long __data2, unsigned int __data1,
 
 
 #ifdef __OPTIMIZE__
-extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned char
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __lwpins32 (unsigned int __data2, unsigned int __data1, unsigned int __flags)
 {
   return __builtin_ia32_lwpins32 (__data2, __data1, __flags);
 }
 
 #ifdef __x86_64__
-extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned char
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __lwpins64 (unsigned long long __data2, unsigned int __data1,
 	    unsigned int __flags)
 {
diff --git a/gcc/config/i386/lzcntintrin.h b/gcc/config/i386/lzcntintrin.h
index cfa2719c044..864bdf67698 100644
--- a/gcc/config/i386/lzcntintrin.h
+++ b/gcc/config/i386/lzcntintrin.h
@@ -35,32 +35,42 @@
 #define __DISABLE_LZCNT__
 #endif /* __LZCNT__ */
 
-extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned short
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __lzcnt16 (unsigned short __X)
 {
   return __builtin_ia32_lzcnt_u16 (__X);
 }
 
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __lzcnt32 (unsigned int __X)
 {
   return __builtin_ia32_lzcnt_u32 (__X);
 }
 
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _lzcnt_u32 (unsigned int __X)
 {
   return __builtin_ia32_lzcnt_u32 (__X);
 }
 
 #ifdef __x86_64__
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __lzcnt64 (unsigned long long __X)
 {
   return __builtin_ia32_lzcnt_u64 (__X);
 }
 
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _lzcnt_u64 (unsigned long long __X)
 {
   return __builtin_ia32_lzcnt_u64 (__X);
diff --git a/gcc/config/i386/movdirintrin.h b/gcc/config/i386/movdirintrin.h
index c50fe40b937..e6ba84f39c8 100644
--- a/gcc/config/i386/movdirintrin.h
+++ b/gcc/config/i386/movdirintrin.h
@@ -35,14 +35,16 @@
 #endif /* __MOVDIRI__ */
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _directstoreu_u32 (void * __P, unsigned int __A)
 {
   __builtin_ia32_directstoreu_u32 ((unsigned int *)__P, __A);
 }
 #ifdef __x86_64__
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _directstoreu_u64 (void * __P, unsigned long long __A)
 {
   __builtin_ia32_directstoreu_u64 ((unsigned long long *)__P, __A);
@@ -61,7 +63,8 @@ _directstoreu_u64 (void * __P, unsigned long long __A)
 #endif /* __MOVDIR64B__ */
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _movdir64b (void * __P, const void * __Q)
 {
   __builtin_ia32_movdir64b (__P, __Q);
diff --git a/gcc/config/i386/mwaitxintrin.h b/gcc/config/i386/mwaitxintrin.h
index ad8afba4c28..0c9505bb2f6 100644
--- a/gcc/config/i386/mwaitxintrin.h
+++ b/gcc/config/i386/mwaitxintrin.h
@@ -30,13 +30,17 @@
 #define __DISABLE_MWAITX__
 #endif /* __MWAITX__ */
 
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _mm_monitorx (void const * __P, unsigned int __E, unsigned int __H)
 {
   __builtin_ia32_monitorx (__P, __E, __H);
 }
 
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _mm_mwaitx (unsigned int __E, unsigned int __H, unsigned int __C)
 {
   __builtin_ia32_mwaitx (__E, __H, __C);
diff --git a/gcc/config/i386/pconfigintrin.h b/gcc/config/i386/pconfigintrin.h
index 5346cbd78cb..f8f6279c586 100644
--- a/gcc/config/i386/pconfigintrin.h
+++ b/gcc/config/i386/pconfigintrin.h
@@ -47,7 +47,8 @@
 	: "cc")
 
 extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _pconfig_u32 (const unsigned int __L, size_t __D[])
 {
   enum __pconfig_type
diff --git a/gcc/config/i386/pkuintrin.h b/gcc/config/i386/pkuintrin.h
index cd5638fa035..6e59617a0ce 100644
--- a/gcc/config/i386/pkuintrin.h
+++ b/gcc/config/i386/pkuintrin.h
@@ -35,14 +35,16 @@
 #endif /* __PKU__ */
 
 extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _rdpkru_u32 (void)
 {
   return __builtin_ia32_rdpkru ();
 }
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _wrpkru (unsigned int __key)
 {
   __builtin_ia32_wrpkru (__key);
diff --git a/gcc/config/i386/popcntintrin.h b/gcc/config/i386/popcntintrin.h
index 84876562640..640de9db733 100644
--- a/gcc/config/i386/popcntintrin.h
+++ b/gcc/config/i386/popcntintrin.h
@@ -31,14 +31,18 @@
 #endif /* __POPCNT__ */
 
 /* Calculate a number of bits set to 1.  */
-extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _mm_popcnt_u32 (unsigned int __X)
 {
   return __builtin_popcount (__X);
 }
 
 #ifdef __x86_64__
-extern __inline long long  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _mm_popcnt_u64 (unsigned long long __X)
 {
   return __builtin_popcountll (__X);
diff --git a/gcc/config/i386/rdseedintrin.h b/gcc/config/i386/rdseedintrin.h
index 1badab7018c..0dc5fadce6a 100644
--- a/gcc/config/i386/rdseedintrin.h
+++ b/gcc/config/i386/rdseedintrin.h
@@ -36,14 +36,16 @@
 
 
 extern __inline int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _rdseed16_step (unsigned short *__p)
 {
   return __builtin_ia32_rdseed_hi_step (__p);
 }
 
 extern __inline int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _rdseed32_step (unsigned int *__p)
 {
   return __builtin_ia32_rdseed_si_step (__p);
@@ -51,7 +53,8 @@ _rdseed32_step (unsigned int *__p)
 
 #ifdef __x86_64__
 extern __inline int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _rdseed64_step (unsigned long long *__p)
 {
   return __builtin_ia32_rdseed_di_step (__p);
diff --git a/gcc/config/i386/rtmintrin.h b/gcc/config/i386/rtmintrin.h
index 5b2ac767737..33aadcfec61 100644
--- a/gcc/config/i386/rtmintrin.h
+++ b/gcc/config/i386/rtmintrin.h
@@ -46,7 +46,8 @@
 /* Start an RTM code region.  Return _XBEGIN_STARTED on success and the
    abort condition otherwise.  */
 extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _xbegin (void)
 {
   return __builtin_ia32_xbegin ();
@@ -57,7 +58,8 @@ _xbegin (void)
    commit fails, then control is transferred to the outermost transaction
    fallback handler.  */
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _xend (void)
 {
   __builtin_ia32_xend ();
@@ -67,7 +69,8 @@ _xend (void)
    outermost transaction fallback handler with the abort condition IMM.  */
 #ifdef __OPTIMIZE__
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _xabort (const unsigned int __imm)
 {
   __builtin_ia32_xabort (__imm);
diff --git a/gcc/config/i386/serializeintrin.h b/gcc/config/i386/serializeintrin.h
index e280250b198..dd27e6c7a81 100644
--- a/gcc/config/i386/serializeintrin.h
+++ b/gcc/config/i386/serializeintrin.h
@@ -34,7 +34,13 @@
 #define __DISABLE_SERIALIZE__
 #endif /* __SERIALIZE__ */
 
-#define _serialize()	__builtin_ia32_serialize ()
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
+_serialize (void)
+{
+  __builtin_ia32_serialize ();
+}
 
 #ifdef __DISABLE_SERIALIZE__
 #undef __DISABLE_SERIALIZE__
diff --git a/gcc/config/i386/sgxintrin.h b/gcc/config/i386/sgxintrin.h
index 152be6a37ed..264214af972 100644
--- a/gcc/config/i386/sgxintrin.h
+++ b/gcc/config/i386/sgxintrin.h
@@ -108,7 +108,8 @@
 	   : "cc")
 
 extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _encls_u32 (const unsigned int __L, size_t __D[])
 {
   enum __encls_type
@@ -175,7 +176,8 @@ _encls_u32 (const unsigned int __L, size_t __D[])
 }
 
 extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _enclu_u32 (const unsigned int __L, size_t __D[])
 {
   enum __enclu_type
@@ -218,7 +220,8 @@ _enclu_u32 (const unsigned int __L, size_t __D[])
 }
 
 extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _enclv_u32 (const unsigned int __L, size_t __D[])
 {
   enum __enclv_type
diff --git a/gcc/config/i386/tbmintrin.h b/gcc/config/i386/tbmintrin.h
index 971d1f36aff..bc9d3269515 100644
--- a/gcc/config/i386/tbmintrin.h
+++ b/gcc/config/i386/tbmintrin.h
@@ -35,7 +35,9 @@
 #endif /* __TBM__ */
 
 #ifdef __OPTIMIZE__
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __bextri_u32 (unsigned int __X, const unsigned int __I)
 {
   return __builtin_ia32_bextri_u32 (__X, __I);
@@ -46,55 +48,73 @@ __bextri_u32 (unsigned int __X, const unsigned int __I)
 					    (unsigned int)(I)))
 #endif /*__OPTIMIZE__ */
 
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __blcfill_u32 (unsigned int __X)
 {
   return __X & (__X + 1);
 }
 
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __blci_u32 (unsigned int __X)
 {
   return __X | ~(__X + 1);
 }
 
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __blcic_u32 (unsigned int __X)
 {
   return ~__X & (__X + 1);
 }
 
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __blcmsk_u32 (unsigned int __X)
 {
   return __X ^ (__X + 1);
 }
 
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __blcs_u32 (unsigned int __X)
 {
   return __X | (__X + 1);
 }
 
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __blsfill_u32 (unsigned int __X)
 {
   return __X | (__X - 1);
 }
 
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __blsic_u32 (unsigned int __X)
 {
   return ~__X | (__X - 1);
 }
 
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __t1mskc_u32 (unsigned int __X)
 {
   return ~__X | (__X + 1);
 }
 
-extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __tzmsk_u32 (unsigned int __X)
 {
   return ~__X & (__X - 1);
@@ -104,7 +124,9 @@ __tzmsk_u32 (unsigned int __X)
 
 #ifdef __x86_64__
 #ifdef __OPTIMIZE__
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __bextri_u64 (unsigned long long __X, const unsigned int __I)
 {
   return __builtin_ia32_bextri_u64 (__X, __I);
@@ -115,55 +137,73 @@ __bextri_u64 (unsigned long long __X, const unsigned int __I)
 						  (unsigned long long)(I)))
 #endif /*__OPTIMIZE__ */
 
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __blcfill_u64 (unsigned long long __X)
 {
   return __X & (__X + 1);
 }
 
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __blci_u64 (unsigned long long __X)
 {
   return __X | ~(__X + 1);
 }
 
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __blcic_u64 (unsigned long long __X)
 {
   return ~__X & (__X + 1);
 }
 
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __blcmsk_u64 (unsigned long long __X)
 {
   return __X ^ (__X + 1);
 }
 
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __blcs_u64 (unsigned long long __X)
 {
   return __X | (__X + 1);
 }
 
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __blsfill_u64 (unsigned long long __X)
 {
   return __X | (__X - 1);
 }
 
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __blsic_u64 (unsigned long long __X)
 {
   return ~__X | (__X - 1);
 }
 
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __t1mskc_u64 (unsigned long long __X)
 {
   return ~__X | (__X + 1);
 }
 
-extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 __tzmsk_u64 (unsigned long long __X)
 {
   return ~__X & (__X - 1);
diff --git a/gcc/config/i386/tsxldtrkintrin.h b/gcc/config/i386/tsxldtrkintrin.h
index bb42a8e89b9..32a0b87c43a 100644
--- a/gcc/config/i386/tsxldtrkintrin.h
+++ b/gcc/config/i386/tsxldtrkintrin.h
@@ -35,14 +35,16 @@
 #endif /* __TSXLDTRK__ */
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _xsusldtrk (void)
 {
   __builtin_ia32_xsusldtrk ();
 }
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _xresldtrk (void)
 {
   __builtin_ia32_xresldtrk ();
diff --git a/gcc/config/i386/uintrintrin.h b/gcc/config/i386/uintrintrin.h
index 2ff0cce9b49..d424bc22ba8 100644
--- a/gcc/config/i386/uintrintrin.h
+++ b/gcc/config/i386/uintrintrin.h
@@ -47,28 +47,32 @@ struct __uintr_frame
 };
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _clui (void)
 {
   __builtin_ia32_clui ();
 }
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _stui (void)
 {
   __builtin_ia32_stui ();
 }
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _senduipi (unsigned long long __R)
 {
   __builtin_ia32_senduipi (__R);
 }
 
 extern __inline unsigned char
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _testui (void)
 {
   return __builtin_ia32_testui ();
diff --git a/gcc/config/i386/waitpkgintrin.h b/gcc/config/i386/waitpkgintrin.h
index a7a4d6a927d..a2d7b004545 100644
--- a/gcc/config/i386/waitpkgintrin.h
+++ b/gcc/config/i386/waitpkgintrin.h
@@ -35,21 +35,24 @@
 #endif /* __WAITPKG__ */
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _umonitor (void *__A)
 {
   __builtin_ia32_umonitor (__A);
 }
 
 extern __inline unsigned char
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _umwait (unsigned int __A, unsigned long long __B)
 {
   return __builtin_ia32_umwait (__A, __B);
 }
 
 extern __inline unsigned char
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _tpause (unsigned int __A, unsigned long long __B)
 {
   return __builtin_ia32_tpause (__A, __B);
diff --git a/gcc/config/i386/wbnoinvdintrin.h b/gcc/config/i386/wbnoinvdintrin.h
index 71dc1b6accb..6ba9ca01f27 100644
--- a/gcc/config/i386/wbnoinvdintrin.h
+++ b/gcc/config/i386/wbnoinvdintrin.h
@@ -35,7 +35,8 @@
 #endif /* __WBNOINVD__ */
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _wbnoinvd (void)
 {
   __builtin_ia32_wbnoinvd ();
diff --git a/gcc/config/i386/x86gprintrin.h b/gcc/config/i386/x86gprintrin.h
index ceda501252c..4289ff66cfd 100644
--- a/gcc/config/i386/x86gprintrin.h
+++ b/gcc/config/i386/x86gprintrin.h
@@ -95,7 +95,8 @@
 #include <hresetintrin.h>
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _wbinvd (void)
 {
   __builtin_ia32_wbinvd ();
@@ -107,14 +108,16 @@ _wbinvd (void)
 #define __DISABLE_RDRND__
 #endif /* __RDRND__ */
 extern __inline int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _rdrand16_step (unsigned short *__P)
 {
   return __builtin_ia32_rdrand16_step (__P);
 }
 
 extern __inline int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _rdrand32_step (unsigned int *__P)
 {
   return __builtin_ia32_rdrand32_step (__P);
@@ -130,7 +133,8 @@ _rdrand32_step (unsigned int *__P)
 #define __DISABLE_RDPID__
 #endif /* __RDPID__ */
 extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _rdpid_u32 (void)
 {
   return __builtin_ia32_rdpid ();
@@ -148,56 +152,64 @@ _rdpid_u32 (void)
 #define __DISABLE_FSGSBASE__
 #endif /* __FSGSBASE__ */
 extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _readfsbase_u32 (void)
 {
   return __builtin_ia32_rdfsbase32 ();
 }
 
 extern __inline unsigned long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _readfsbase_u64 (void)
 {
   return __builtin_ia32_rdfsbase64 ();
 }
 
 extern __inline unsigned int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _readgsbase_u32 (void)
 {
   return __builtin_ia32_rdgsbase32 ();
 }
 
 extern __inline unsigned long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _readgsbase_u64 (void)
 {
   return __builtin_ia32_rdgsbase64 ();
 }
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _writefsbase_u32 (unsigned int __B)
 {
   __builtin_ia32_wrfsbase32 (__B);
 }
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _writefsbase_u64 (unsigned long long __B)
 {
   __builtin_ia32_wrfsbase64 (__B);
 }
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _writegsbase_u32 (unsigned int __B)
 {
   __builtin_ia32_wrgsbase32 (__B);
 }
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _writegsbase_u64 (unsigned long long __B)
 {
   __builtin_ia32_wrgsbase64 (__B);
@@ -213,7 +225,8 @@ _writegsbase_u64 (unsigned long long __B)
 #define __DISABLE_RDRND__
 #endif /* __RDRND__ */
 extern __inline int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _rdrand64_step (unsigned long long *__P)
 {
   return __builtin_ia32_rdrand64_step (__P);
@@ -233,7 +246,8 @@ _rdrand64_step (unsigned long long *__P)
 
 #ifdef __x86_64__
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _ptwrite64 (unsigned long long __B)
 {
   __builtin_ia32_ptwrite64 (__B);
@@ -241,7 +255,8 @@ _ptwrite64 (unsigned long long __B)
 #endif /* __x86_64__ */
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _ptwrite32 (unsigned __B)
 {
   __builtin_ia32_ptwrite32 (__B);
diff --git a/gcc/config/i386/xsavecintrin.h b/gcc/config/i386/xsavecintrin.h
index 45751a087bb..d0739cbd1cc 100644
--- a/gcc/config/i386/xsavecintrin.h
+++ b/gcc/config/i386/xsavecintrin.h
@@ -35,7 +35,8 @@
 #endif /* __XSAVEC__ */
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _xsavec (void *__P, long long __M)
 {
   __builtin_ia32_xsavec (__P, __M);
@@ -43,7 +44,8 @@ _xsavec (void *__P, long long __M)
 
 #ifdef __x86_64__
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _xsavec64 (void *__P, long long __M)
 {
   __builtin_ia32_xsavec64 (__P, __M);
diff --git a/gcc/config/i386/xsaveintrin.h b/gcc/config/i386/xsaveintrin.h
index 56e6a1e527b..50d174fa2b0 100644
--- a/gcc/config/i386/xsaveintrin.h
+++ b/gcc/config/i386/xsaveintrin.h
@@ -35,28 +35,32 @@
 #endif /* __XSAVE__ */
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _xsave (void *__P, long long __M)
 {
   __builtin_ia32_xsave (__P, __M);
 }
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _xrstor (void *__P, long long __M)
 {
   __builtin_ia32_xrstor (__P, __M);
 }
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _xsetbv (unsigned int __A, long long __V)
 {
   __builtin_ia32_xsetbv (__A, __V);
 }
 
 extern __inline long long
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _xgetbv (unsigned int __A)
 {
   return __builtin_ia32_xgetbv (__A);
@@ -64,14 +68,16 @@ _xgetbv (unsigned int __A)
 
 #ifdef __x86_64__
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _xsave64 (void *__P, long long __M)
 {
   __builtin_ia32_xsave64 (__P, __M);
 }
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _xrstor64 (void *__P, long long __M)
 {
   __builtin_ia32_xrstor64 (__P, __M);
diff --git a/gcc/config/i386/xsaveoptintrin.h b/gcc/config/i386/xsaveoptintrin.h
index ba076cea51a..b5c25f94f95 100644
--- a/gcc/config/i386/xsaveoptintrin.h
+++ b/gcc/config/i386/xsaveoptintrin.h
@@ -35,7 +35,8 @@
 #endif /* __XSAVEOPT__ */
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _xsaveopt (void *__P, long long __M)
 {
   __builtin_ia32_xsaveopt (__P, __M);
@@ -43,7 +44,8 @@ _xsaveopt (void *__P, long long __M)
 
 #ifdef __x86_64__
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _xsaveopt64 (void *__P, long long __M)
 {
   __builtin_ia32_xsaveopt64 (__P, __M);
diff --git a/gcc/config/i386/xsavesintrin.h b/gcc/config/i386/xsavesintrin.h
index 969835fed64..27cec8370ad 100644
--- a/gcc/config/i386/xsavesintrin.h
+++ b/gcc/config/i386/xsavesintrin.h
@@ -35,14 +35,16 @@
 #endif /* __XSAVES__ */
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _xsaves (void *__P, long long __M)
 {
   __builtin_ia32_xsaves (__P, __M);
 }
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _xrstors (void *__P, long long __M)
 {
   __builtin_ia32_xrstors (__P, __M);
@@ -50,14 +52,16 @@ _xrstors (void *__P, long long __M)
 
 #ifdef __x86_64__
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _xrstors64 (void *__P, long long __M)
 {
   __builtin_ia32_xrstors64 (__P, __M);
 }
 
 extern __inline void
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _xsaves64 (void *__P, long long __M)
 {
   __builtin_ia32_xsaves64 (__P, __M);
diff --git a/gcc/config/i386/xtestintrin.h b/gcc/config/i386/xtestintrin.h
index 39d18af6536..0eae87a1d43 100644
--- a/gcc/config/i386/xtestintrin.h
+++ b/gcc/config/i386/xtestintrin.h
@@ -37,7 +37,8 @@
 /* Return non-zero if the instruction executes inside an RTM or HLE code
    region.  Return zero otherwise.   */
 extern __inline int
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__attribute__((__gnu_inline__, __always_inline__, __artificial__,
+	       __general_regs_only__))
 _xtest (void)
 {
   return __builtin_ia32_xtest ();
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 1ddafb3ff2c..7111eca62ff 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -7066,6 +7066,11 @@ On x86 targets, the @code{fentry_section} attribute sets the name
 of the section to record function entry instrumentation calls in when
 enabled with @option{-pg -mrecord-mcount}
 
+@item general_regs_only
+@cindex @code{general_regs_only} function attribute, x86
+The @code{general_regs_only} attribute on functions is used to
+inform the compiler that functions use only general purpose registers.
+
 @end table
 
 @node Xstormy16 Function Attributes
diff --git a/gcc/testsuite/gcc.target/i386/pr99744-3.c b/gcc/testsuite/gcc.target/i386/pr99744-3.c
new file mode 100644
index 00000000000..6c505816ceb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr99744-3.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-serialize" } */
+
+#include <x86intrin.h>
+
+__attribute__ ((target("general-regs-only")))
+void
+foo1 (void)
+{
+  _serialize ();
+}
+
+/* { dg-error "target specific option mismatch" "" { target *-*-* } 0 } */
diff --git a/gcc/testsuite/gcc.target/i386/pr99744-4.c b/gcc/testsuite/gcc.target/i386/pr99744-4.c
new file mode 100644
index 00000000000..a17d4a2139b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr99744-4.c
@@ -0,0 +1,352 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbmi -mbmi2 -mcldemote -mclflushopt -mclwb -mclzero -menqcmd -mfsgsbase -mfxsr -mhreset -mlzcnt -mlwp -mmovdir64b -mmovdiri -mmwaitx -mpconfig -mpku -mpopcnt -mptwrite -mrdpid -mrdrnd -mrdseed -mrtm -msgx -mshstk -mtbm -mtsxldtrk -mxsave -mxsavec -mxsaveopt -mxsaves -mwaitpkg -mwbnoinvd" } */
+/* { dg-additional-options "-muintr" { target { ! ia32 } } }  */
+
+/* Test calling GPR intrinsics from functions with general-regs-only
+   target attribue.  */
+
+#include <x86gprintrin.h>
+
+#define _CONCAT(x,y) x ## y
+
+#define test_0(func, type)						\
+  __attribute__ ((target("general-regs-only")))				\
+  type _CONCAT(do_,func) (void)						\
+  { return func (); }
+
+#define test_0_i1(func, type, imm)					\
+  __attribute__ ((target("general-regs-only")))				\
+  type _CONCAT(do_,func) (void)						\
+  { return func (imm); }
+
+#define test_1(func, type, op1_type)					\
+  __attribute__ ((target("general-regs-only")))				\
+  type _CONCAT(do_,func) (op1_type A)					\
+  { return func (A); }
+
+#define test_1_i1(func, type, op1_type, imm)				\
+  __attribute__ ((target("general-regs-only")))				\
+  type _CONCAT(do_,func) (op1_type A)					\
+  { return func (A, imm); }
+
+#define test_2(func, type, op1_type, op2_type)				\
+  __attribute__ ((target("general-regs-only")))				\
+  type _CONCAT(do_,func) (op1_type A, op2_type B)			\
+  { return func (A, B); }
+
+#define test_2_i1(func, type, op1_type, op2_type, imm)			\
+  __attribute__ ((target("general-regs-only")))				\
+  type _CONCAT(do_,func) (op1_type A, op2_type B)			\
+  { return func (A, B, imm); }
+
+#define test_3(func, type, op1_type, op2_type, op3_type)		\
+  __attribute__ ((target("general-regs-only")))				\
+  type _CONCAT(do_,func) (op1_type A, op2_type B, op3_type C)		\
+  { return func (A, B, C); }
+
+#define test_4(func, type, op1_type, op2_type, op3_type, op4_type)	\
+  __attribute__ ((target("general-regs-only")))				\
+  type _CONCAT(do_,func) (op1_type A, op2_type B, op3_type C,		\
+			  op4_type D)					\
+  { return func (A, B, C, D); }
+
+/* ia32intrin.h  */
+test_1 (__bsfd, int, int)
+test_1 (__bsrd, int, int)
+test_1 (__bswapd, int, int)
+test_1 (__popcntd, int, unsigned int)
+test_2 (__rolb, unsigned char, unsigned char, int)
+test_2 (__rolw, unsigned short, unsigned short, int)
+test_2 (__rold, unsigned int, unsigned int, int)
+test_2 (__rorb, unsigned char, unsigned char, int)
+test_2 (__rorw, unsigned short, unsigned short, int)
+test_2 (__rord, unsigned int, unsigned int, int)
+
+#ifndef __iamcu__
+/* ia32intrin.h  */
+test_1 (__rdpmc, unsigned long long, int)
+test_0 (__rdtsc, unsigned long long)
+test_1 (__rdtscp, unsigned long long, unsigned int *)
+test_0 (__pause, void)
+
+/* adxintrin.h */
+test_4 (_subborrow_u32, unsigned char, unsigned char, unsigned int,
+	unsigned int, unsigned int *)
+test_4 (_addcarry_u32, unsigned char, unsigned char, unsigned int,
+	unsigned int, unsigned int *)
+test_4 (_addcarryx_u32, unsigned char, unsigned char, unsigned int,
+	unsigned int, unsigned int *)
+
+/* bmiintrin.h */
+test_1 (__tzcnt_u16, unsigned short, unsigned short)
+test_2 (__andn_u32, unsigned int, unsigned int, unsigned int)
+test_2 (__bextr_u32, unsigned int, unsigned int, unsigned int)
+test_3 (_bextr_u32, unsigned int, unsigned int, unsigned int,
+	unsigned int)
+test_1 (__blsi_u32, unsigned int, unsigned int)
+test_1 (_blsi_u32, unsigned int, unsigned int)
+test_1 (__blsmsk_u32, unsigned int, unsigned int)
+test_1 (_blsmsk_u32, unsigned int, unsigned int)
+test_1 (__blsr_u32, unsigned int, unsigned int)
+test_1 (_blsr_u32, unsigned int, unsigned int)
+test_1 (__tzcnt_u32, unsigned int, unsigned int)
+test_1 (_tzcnt_u32, unsigned int, unsigned int)
+
+/* bmi2intrin.h */
+test_2 (_bzhi_u32, unsigned int, unsigned int, unsigned int)
+test_2 (_pdep_u32, unsigned int, unsigned int, unsigned int)
+test_2 (_pext_u32, unsigned int, unsigned int, unsigned int)
+
+/* cetintrin.h */
+test_1 (_inc_ssp, void, unsigned int)
+test_0 (_saveprevssp, void)
+test_1 (_rstorssp, void, void *)
+test_2 (_wrssd, void, unsigned int, void *)
+test_2 (_wrussd, void, unsigned int, void *)
+test_0 (_setssbsy, void)
+test_1 (_clrssbsy, void, void *)
+
+/* cldemoteintrin.h */
+test_1 (_cldemote, void, void *)
+
+/* clflushoptintrin.h */
+test_1 (_mm_clflushopt, void, void *)
+
+/* clwbintrin.h */
+test_1 (_mm_clwb, void, void *)
+
+/* clzerointrin.h */
+test_1 (_mm_clzero, void, void *)
+
+/* enqcmdintrin.h */
+test_2 (_enqcmd, int, void *, const void *)
+test_2 (_enqcmds, int, void *, const void *)
+
+/* fxsrintrin.h */
+test_1 (_fxsave, void, void *)
+test_1 (_fxrstor, void, void *)
+
+/* hresetintrin.h */
+test_1 (_hreset, void, unsigned int)
+
+/* lzcntintrin.h */
+test_1 (__lzcnt16, unsigned short, unsigned short)
+test_1 (__lzcnt32, unsigned int, unsigned int)
+test_1 (_lzcnt_u32, unsigned int, unsigned int)
+
+/* lwpintrin.h */
+test_1 (__llwpcb, void, void *)
+test_0 (__slwpcb, void *)
+test_2_i1 (__lwpval32, void, unsigned int, unsigned int, 1)
+test_2_i1 (__lwpins32, unsigned char, unsigned int, unsigned int, 1)
+
+/* movdirintrin.h */
+test_2 (_directstoreu_u32, void, void *, unsigned int)
+test_2 (_movdir64b, void, void *, const void *)
+
+/* mwaitxintrin.h */
+test_3 (_mm_monitorx, void, void const *, unsigned int, unsigned int)
+test_3 (_mm_mwaitx, void, unsigned int, unsigned int, unsigned int)
+
+/* pconfigintrin.h */
+test_2 (_pconfig_u32, unsigned int, const unsigned int, size_t *)
+
+/* pkuintrin.h */
+test_0 (_rdpkru_u32, unsigned int)
+test_1 (_wrpkru, void, unsigned int)
+
+/* popcntintrin.h */
+test_1 (_mm_popcnt_u32, int, unsigned int)
+
+/* rdseedintrin.h */
+test_1 (_rdseed16_step, int, unsigned short *)
+test_1 (_rdseed32_step, int, unsigned int *)
+
+/* rtmintrin.h */
+test_0 (_xbegin, unsigned int)
+test_0 (_xend, void)
+test_0_i1 (_xabort, void, 1)
+
+/* sgxintrin.h */
+test_2 (_encls_u32, unsigned int, const unsigned int, size_t *)
+test_2 (_enclu_u32, unsigned int, const unsigned int, size_t *)
+test_2 (_enclv_u32, unsigned int, const unsigned int, size_t *)
+
+/* tbmintrin.h */
+test_1_i1 (__bextri_u32, unsigned int, unsigned int, 1)
+test_1 (__blcfill_u32, unsigned int, unsigned int)
+test_1 (__blci_u32, unsigned int, unsigned int)
+test_1 (__blcic_u32, unsigned int, unsigned int)
+test_1 (__blcmsk_u32, unsigned int, unsigned int)
+test_1 (__blcs_u32, unsigned int, unsigned int)
+test_1 (__blsfill_u32, unsigned int, unsigned int)
+test_1 (__blsic_u32, unsigned int, unsigned int)
+test_1 (__t1mskc_u32, unsigned int, unsigned int)
+test_1 (__tzmsk_u32, unsigned int, unsigned int)
+
+/* tsxldtrkintrin.h */
+test_0 (_xsusldtrk, void)
+test_0 (_xresldtrk, void)
+
+/* x86gprintrin.h */
+test_1 (_ptwrite32, void, unsigned int)
+test_1 (_rdrand16_step, int, unsigned short *)
+test_1 (_rdrand32_step, int, unsigned int *)
+test_0 (_wbinvd, void)
+
+/* xtestintrin.h */
+test_0 (_xtest, int)
+
+/* xsaveintrin.h */
+test_2 (_xsave, void, void *, long long)
+test_2 (_xrstor, void, void *, long long)
+test_2 (_xsetbv, void, unsigned int, long long)
+test_1 (_xgetbv, long long, unsigned int)
+
+/* xsavecintrin.h */
+test_2 (_xsavec, void, void *, long long)
+
+/* xsaveoptintrin.h */
+test_2 (_xsaveopt, void, void *, long long)
+
+/* xsavesintrin.h */
+test_2 (_xsaves, void, void *, long long)
+test_2 (_xrstors, void, void *, long long)
+
+/* wbnoinvdintrin.h */
+test_0 (_wbnoinvd, void)
+
+#ifdef __x86_64__
+/* adxintrin.h */
+test_4 (_subborrow_u64, unsigned char, unsigned char,
+	unsigned long long, unsigned long long,
+	unsigned long long *)
+test_4 (_addcarry_u64, unsigned char, unsigned char,
+	unsigned long long, unsigned long long,
+	unsigned long long *)
+test_4 (_addcarryx_u64, unsigned char, unsigned char,
+	unsigned long long, unsigned long long,
+	unsigned long long *)
+
+/* bmiintrin.h */
+test_2 (__andn_u64, unsigned long long, unsigned long long,
+	unsigned long long)
+test_2 (__bextr_u64, unsigned long long, unsigned long long,
+	unsigned long long)
+test_3 (_bextr_u64, unsigned long long, unsigned long long,
+	unsigned long long, unsigned long long)
+test_1 (__blsi_u64, unsigned long long, unsigned long long)
+test_1 (_blsi_u64, unsigned long long, unsigned long long)
+test_1 (__blsmsk_u64, unsigned long long, unsigned long long)
+test_1 (_blsmsk_u64, unsigned long long, unsigned long long)
+test_1 (__blsr_u64, unsigned long long, unsigned long long)
+test_1 (_blsr_u64, unsigned long long, unsigned long long)
+test_1 (__tzcnt_u64, unsigned long long, unsigned long long)
+test_1 (_tzcnt_u64, unsigned long long, unsigned long long)
+
+/* bmi2intrin.h */
+test_2 (_bzhi_u64, unsigned long long, unsigned long long,
+	unsigned long long)
+test_2 (_pdep_u64, unsigned long long, unsigned long long,
+	unsigned long long)
+test_2 (_pext_u64, unsigned long long, unsigned long long,
+	unsigned long long)
+test_3 (_mulx_u64, unsigned long long, unsigned long long,
+	unsigned long long, unsigned long long *)
+
+/* cetintrin.h */
+test_0 (_get_ssp, unsigned long long)
+test_2 (_wrssq, void, unsigned long long, void *)
+test_2 (_wrussq, void, unsigned long long, void *)
+
+/* fxsrintrin.h */
+test_1 (_fxsave64, void, void *)
+test_1 (_fxrstor64, void, void *)
+
+/* ia32intrin.h  */
+test_1 (__bsfq, int, long long)
+test_1 (__bsrq, int, long long)
+test_1 (__bswapq, long long, long long)
+test_1 (__popcntq, long long, unsigned long long)
+test_2 (__rolq, unsigned long long, unsigned long long, int)
+test_2 (__rorq, unsigned long long, unsigned long long, int)
+test_0 (__readeflags, unsigned long long)
+test_1 (__writeeflags, void, unsigned int)
+
+/* lzcntintrin.h */
+test_1 (__lzcnt64, unsigned long long, unsigned long long)
+test_1 (_lzcnt_u64, unsigned long long, unsigned long long)
+
+/* lwpintrin.h */
+test_2_i1 (__lwpval64, void, unsigned long long, unsigned int, 1)
+test_2_i1 (__lwpins64, unsigned char, unsigned long long,
+	   unsigned int, 1)
+
+/* movdirintrin.h */
+test_2 (_directstoreu_u64, void, void *, unsigned long long)
+
+/* popcntintrin.h */
+test_1 (_mm_popcnt_u64, long long, unsigned long long)
+
+/* rdseedintrin.h */
+test_1 (_rdseed64_step, int, unsigned long long *)
+
+/* tbmintrin.h */
+test_1_i1 (__bextri_u64, unsigned long long, unsigned long long, 1)
+test_1 (__blcfill_u64, unsigned long long, unsigned long long)
+test_1 (__blci_u64, unsigned long long, unsigned long long)
+test_1 (__blcic_u64, unsigned long long, unsigned long long)
+test_1 (__blcmsk_u64, unsigned long long, unsigned long long)
+test_1 (__blcs_u64, unsigned long long, unsigned long long)
+test_1 (__blsfill_u64, unsigned long long, unsigned long long)
+test_1 (__blsic_u64, unsigned long long, unsigned long long)
+test_1 (__t1mskc_u64, unsigned long long, unsigned long long)
+test_1 (__tzmsk_u64, unsigned long long, unsigned long long)
+
+/* uintrintrin.h */
+test_0 (_clui, void)
+test_1 (_senduipi, void, unsigned long long)
+test_0 (_stui, void)
+test_0 (_testui, unsigned char)
+
+/* x86gprintrin.h */
+test_1 (_ptwrite64, void, unsigned long long)
+test_0 (_readfsbase_u32, unsigned int)
+test_0 (_readfsbase_u64, unsigned long long)
+test_0 (_readgsbase_u32, unsigned int)
+test_0 (_readgsbase_u64, unsigned long long)
+test_1 (_rdrand64_step, int, unsigned long long *)
+test_1 (_writefsbase_u32, void, unsigned int)
+test_1 (_writefsbase_u64, void, unsigned long long)
+test_1 (_writegsbase_u32, void, unsigned int)
+test_1 (_writegsbase_u64, void, unsigned long long)
+
+/* xsaveintrin.h */
+test_2 (_xsave64, void, void *, long long)
+test_2 (_xrstor64, void, void *, long long)
+
+/* xsavecintrin.h */
+test_2 (_xsavec64, void, void *, long long)
+
+/* xsaveoptintrin.h */
+test_2 (_xsaveopt64, void, void *, long long)
+
+/* xsavesintrin.h */
+test_2 (_xsaves64, void, void *, long long)
+test_2 (_xrstors64, void, void *, long long)
+
+/* waitpkgintrin.h */
+test_1 (_umonitor, void, void *)
+test_2 (_umwait, unsigned char, unsigned int, unsigned long long)
+test_2 (_tpause, unsigned char, unsigned int, unsigned long long)
+
+#else /* !__x86_64__ */
+/* bmi2intrin.h */
+test_3 (_mulx_u32, unsigned int, unsigned int, unsigned int,
+	unsigned int *)
+
+/* cetintrin.h */
+test_0 (_get_ssp, unsigned int)
+#endif /* __x86_64__ */
+
+#endif
-- 
2.30.2


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v4 2/2] x86: Add general_regs_only function attribute
  2021-04-14 22:39 ` [PATCH v4 2/2] x86: Add general_regs_only function attribute H.J. Lu
@ 2021-04-21  7:30   ` Uros Bizjak
  2021-04-21 13:47     ` H.J. Lu
  2021-04-21 16:54     ` Martin Sebor
  2021-04-21 17:09   ` Martin Sebor
  1 sibling, 2 replies; 22+ messages in thread
From: Uros Bizjak @ 2021-04-21  7:30 UTC (permalink / raw)
  To: H.J. Lu
  Cc: gcc-patches, Jakub Jelinek, Bernhard Reutner-Fischer,
	Martin Sebor, Richard Biener

On Thu, Apr 15, 2021 at 12:39 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> commit 87c753ac241f25d222d46ba1ac66ceba89d6a200
> Author: H.J. Lu <hjl.tools@gmail.com>
> Date:   Fri Aug 21 09:42:49 2020 -0700
>
>     x86: Add target("general-regs-only") function attribute
>
> is incomplete since it is impossible to call integer intrinsics from
> a function with general-regs-only target attribute.
>
> 1. Add general_regs_only function attribute to inform the compiler that
> functions use only general purpose registers.  When making inlining
> decisions on such functions, non-GPR compiler options are excluded.
> 2. Add general_regs_only attribute to x86 intrinsics which use only
> general purpose registers.

I'd like to ask Richard and Jakub if they agree with the approach.

On a related note, can we declare default attributes like clang does, e.g.:

/* Define the default attributes for the functions.  */
#define __DEFAULT_FN_ATTRS __attribute__((__gnu_inline__,
__always_inline__, __artificial__))
#define __DEFAULT_FN_ATTRS_GRO __attribute__((__gnu_inline__,
__always_inline__, __general_regs_only, __artificial__))

and use these defines throughout header files?

Uros.

>
> gcc/
>
>         PR target/99744
>         * config/i386/i386-options.c (ix86_attribute_table): Add
>         general_regs_only.
>         * config/i386/i386.c (ix86_can_inline_p): Exclude non-integer
>         target options if callee has general_regs_only attribute.
>         * config/i386/adxintrin.h: Add general_regs_only attribute to
>         intrinsics which use only general purpose registers.
>         * config/i386/bmiintrin.h: Likewise.
>         * config/i386/bmi2intrin.h: Likewise.
>         * config/i386/cetintrin.h: Likewise.
>         * config/i386/cldemoteintrin.h: Likewise.
>         * config/i386/clflushoptintrin.h: Likewise.
>         * config/i386/clwbintrin.h: Likewise.
>         * config/i386/clzerointrin.h: Likewise.
>         * config/i386/enqcmdintrin.h: Likewise.
>         * config/i386/fxsrintrin.h: Likewise.
>         * config/i386/hresetintrin.h: Likewise.
>         * config/i386/ia32intrin.h: Likewise.
>         * config/i386/lwpintrin.h: Likewise.
>         * config/i386/lzcntintrin.h: Likewise.
>         * config/i386/movdirintrin.h: Likewise.
>         * config/i386/mwaitxintrin.h: Likewise.
>         * config/i386/pconfigintrin.h: Likewise.
>         * config/i386/pkuintrin.h: Likewise.
>         * config/i386/popcntintrin.h: Likewise.
>         * config/i386/rdseedintrin.h: Likewise.
>         * config/i386/rtmintrin.h: Likewise.
>         * config/i386/serializeintrin.h: Likewise.
>         * config/i386/sgxintrin.h: Likewise.
>         * config/i386/tbmintrin.h: Likewise.
>         * config/i386/tsxldtrkintrin.h: Likewise.
>         * config/i386/uintrintrin.h: Likewise.
>         * config/i386/waitpkgintrin.h: Likewise.
>         * config/i386/wbnoinvdintrin.h: Likewise.
>         * config/i386/x86gprintrin.h: Likewise.
>         * config/i386/xsavecintrin.h: Likewise.
>         * config/i386/xsaveintrin.h: Likewise.
>         * config/i386/xsaveoptintrin.h: Likewise.
>         * config/i386/xsavesintrin.h: Likewise.
>         * config/i386/xtestintrin.h: Likewise.
>         * doc/extend.texi: Document general_regs_only function attribute.
>
> gcc/testsuite/
>
>         PR target/99744
>         * gcc.target/i386/pr99744-3.c: New test.
>         * gcc.target/i386/pr99744-4.c: Likewise.
> ---
>  gcc/config/i386/adxintrin.h               |  18 +-
>  gcc/config/i386/bmi2intrin.h              |  24 +-
>  gcc/config/i386/bmiintrin.h               |  92 ++++--
>  gcc/config/i386/cetintrin.h               |  33 +-
>  gcc/config/i386/cldemoteintrin.h          |   3 +-
>  gcc/config/i386/clflushoptintrin.h        |   3 +-
>  gcc/config/i386/clwbintrin.h              |   3 +-
>  gcc/config/i386/clzerointrin.h            |   4 +-
>  gcc/config/i386/enqcmdintrin.h            |   6 +-
>  gcc/config/i386/fxsrintrin.h              |  12 +-
>  gcc/config/i386/hresetintrin.h            |   3 +-
>  gcc/config/i386/i386-options.c            |   2 +
>  gcc/config/i386/i386.c                    |  29 +-
>  gcc/config/i386/ia32intrin.h              |  82 +++--
>  gcc/config/i386/lwpintrin.h               |  24 +-
>  gcc/config/i386/lzcntintrin.h             |  20 +-
>  gcc/config/i386/movdirintrin.h            |   9 +-
>  gcc/config/i386/mwaitxintrin.h            |   8 +-
>  gcc/config/i386/pconfigintrin.h           |   3 +-
>  gcc/config/i386/pkuintrin.h               |   6 +-
>  gcc/config/i386/popcntintrin.h            |   8 +-
>  gcc/config/i386/rdseedintrin.h            |   9 +-
>  gcc/config/i386/rtmintrin.h               |   9 +-
>  gcc/config/i386/serializeintrin.h         |   8 +-
>  gcc/config/i386/sgxintrin.h               |   9 +-
>  gcc/config/i386/tbmintrin.h               |  80 +++--
>  gcc/config/i386/tsxldtrkintrin.h          |   6 +-
>  gcc/config/i386/uintrintrin.h             |  12 +-
>  gcc/config/i386/waitpkgintrin.h           |   9 +-
>  gcc/config/i386/wbnoinvdintrin.h          |   3 +-
>  gcc/config/i386/x86gprintrin.h            |  45 ++-
>  gcc/config/i386/xsavecintrin.h            |   6 +-
>  gcc/config/i386/xsaveintrin.h             |  18 +-
>  gcc/config/i386/xsaveoptintrin.h          |   6 +-
>  gcc/config/i386/xsavesintrin.h            |  12 +-
>  gcc/config/i386/xtestintrin.h             |   3 +-
>  gcc/doc/extend.texi                       |   5 +
>  gcc/testsuite/gcc.target/i386/pr99744-3.c |  13 +
>  gcc/testsuite/gcc.target/i386/pr99744-4.c | 352 ++++++++++++++++++++++
>  39 files changed, 818 insertions(+), 179 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr99744-3.c
>  create mode 100644 gcc/testsuite/gcc.target/i386/pr99744-4.c
>
> diff --git a/gcc/config/i386/adxintrin.h b/gcc/config/i386/adxintrin.h
> index e514e741f02..74e3df18dce 100644
> --- a/gcc/config/i386/adxintrin.h
> +++ b/gcc/config/i386/adxintrin.h
> @@ -29,7 +29,8 @@
>  #define _ADXINTRIN_H_INCLUDED
>
>  extern __inline unsigned char
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _subborrow_u32 (unsigned char __CF, unsigned int __X,
>                 unsigned int __Y, unsigned int *__P)
>  {
> @@ -37,7 +38,8 @@ _subborrow_u32 (unsigned char __CF, unsigned int __X,
>  }
>
>  extern __inline unsigned char
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _addcarry_u32 (unsigned char __CF, unsigned int __X,
>                unsigned int __Y, unsigned int *__P)
>  {
> @@ -45,7 +47,8 @@ _addcarry_u32 (unsigned char __CF, unsigned int __X,
>  }
>
>  extern __inline unsigned char
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _addcarryx_u32 (unsigned char __CF, unsigned int __X,
>                 unsigned int __Y, unsigned int *__P)
>  {
> @@ -54,7 +57,8 @@ _addcarryx_u32 (unsigned char __CF, unsigned int __X,
>
>  #ifdef __x86_64__
>  extern __inline unsigned char
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _subborrow_u64 (unsigned char __CF, unsigned long long __X,
>                 unsigned long long __Y, unsigned long long *__P)
>  {
> @@ -62,7 +66,8 @@ _subborrow_u64 (unsigned char __CF, unsigned long long __X,
>  }
>
>  extern __inline unsigned char
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _addcarry_u64 (unsigned char __CF, unsigned long long __X,
>                unsigned long long __Y, unsigned long long *__P)
>  {
> @@ -70,7 +75,8 @@ _addcarry_u64 (unsigned char __CF, unsigned long long __X,
>  }
>
>  extern __inline unsigned char
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _addcarryx_u64 (unsigned char __CF, unsigned long long __X,
>                 unsigned long long __Y, unsigned long long *__P)
>  {
> diff --git a/gcc/config/i386/bmi2intrin.h b/gcc/config/i386/bmi2intrin.h
> index 6b23e4e98a1..7f64e5a8ff1 100644
> --- a/gcc/config/i386/bmi2intrin.h
> +++ b/gcc/config/i386/bmi2intrin.h
> @@ -35,21 +35,24 @@
>  #endif /* __BMI2__ */
>
>  extern __inline unsigned int
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _bzhi_u32 (unsigned int __X, unsigned int __Y)
>  {
>    return __builtin_ia32_bzhi_si (__X, __Y);
>  }
>
>  extern __inline unsigned int
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _pdep_u32 (unsigned int __X, unsigned int __Y)
>  {
>    return __builtin_ia32_pdep_si (__X, __Y);
>  }
>
>  extern __inline unsigned int
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _pext_u32 (unsigned int __X, unsigned int __Y)
>  {
>    return __builtin_ia32_pext_si (__X, __Y);
> @@ -58,28 +61,32 @@ _pext_u32 (unsigned int __X, unsigned int __Y)
>  #ifdef  __x86_64__
>
>  extern __inline unsigned long long
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _bzhi_u64 (unsigned long long __X, unsigned long long __Y)
>  {
>    return __builtin_ia32_bzhi_di (__X, __Y);
>  }
>
>  extern __inline unsigned long long
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _pdep_u64 (unsigned long long __X, unsigned long long __Y)
>  {
>    return __builtin_ia32_pdep_di (__X, __Y);
>  }
>
>  extern __inline unsigned long long
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _pext_u64 (unsigned long long __X, unsigned long long __Y)
>  {
>    return __builtin_ia32_pext_di (__X, __Y);
>  }
>
>  extern __inline unsigned long long
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _mulx_u64 (unsigned long long __X, unsigned long long __Y,
>            unsigned long long *__P)
>  {
> @@ -91,7 +98,8 @@ _mulx_u64 (unsigned long long __X, unsigned long long __Y,
>  #else /* !__x86_64__ */
>
>  extern __inline unsigned int
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _mulx_u32 (unsigned int __X, unsigned int __Y, unsigned int *__P)
>  {
>    unsigned long long __res = (unsigned long long) __X * __Y;
> diff --git a/gcc/config/i386/bmiintrin.h b/gcc/config/i386/bmiintrin.h
> index 439d81cba11..18b5d7b0734 100644
> --- a/gcc/config/i386/bmiintrin.h
> +++ b/gcc/config/i386/bmiintrin.h
> @@ -34,73 +34,97 @@
>  #define __DISABLE_BMI__
>  #endif /* __BMI__ */
>
> -extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned short
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __tzcnt_u16 (unsigned short __X)
>  {
>    return __builtin_ia32_tzcnt_u16 (__X);
>  }
>
> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned int
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __andn_u32 (unsigned int __X, unsigned int __Y)
>  {
>    return ~__X & __Y;
>  }
>
> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned int
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __bextr_u32 (unsigned int __X, unsigned int __Y)
>  {
>    return __builtin_ia32_bextr_u32 (__X, __Y);
>  }
>
> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned int
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _bextr_u32 (unsigned int __X, unsigned int __Y, unsigned __Z)
>  {
>    return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
>  }
>
> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned int
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __blsi_u32 (unsigned int __X)
>  {
>    return __X & -__X;
>  }
>
> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned int
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _blsi_u32 (unsigned int __X)
>  {
>    return __blsi_u32 (__X);
>  }
>
> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned int
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __blsmsk_u32 (unsigned int __X)
>  {
>    return __X ^ (__X - 1);
>  }
>
> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned int
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _blsmsk_u32 (unsigned int __X)
>  {
>    return __blsmsk_u32 (__X);
>  }
>
> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned int
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __blsr_u32 (unsigned int __X)
>  {
>    return __X & (__X - 1);
>  }
>
> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned int
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _blsr_u32 (unsigned int __X)
>  {
>    return __blsr_u32 (__X);
>  }
>
> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned int
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __tzcnt_u32 (unsigned int __X)
>  {
>    return __builtin_ia32_tzcnt_u32 (__X);
>  }
>
> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned int
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _tzcnt_u32 (unsigned int __X)
>  {
>    return __builtin_ia32_tzcnt_u32 (__X);
> @@ -108,67 +132,89 @@ _tzcnt_u32 (unsigned int __X)
>
>
>  #ifdef  __x86_64__
> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned long long
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __andn_u64 (unsigned long long __X, unsigned long long __Y)
>  {
>    return ~__X & __Y;
>  }
>
> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned long long
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __bextr_u64 (unsigned long long __X, unsigned long long __Y)
>  {
>    return __builtin_ia32_bextr_u64 (__X, __Y);
>  }
>
> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned long long
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _bextr_u64 (unsigned long long __X, unsigned int __Y, unsigned int __Z)
>  {
>    return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
>  }
>
> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned long long
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __blsi_u64 (unsigned long long __X)
>  {
>    return __X & -__X;
>  }
>
> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned long long
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _blsi_u64 (unsigned long long __X)
>  {
>    return __blsi_u64 (__X);
>  }
>
> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned long long
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __blsmsk_u64 (unsigned long long __X)
>  {
>    return __X ^ (__X - 1);
>  }
>
> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned long long
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _blsmsk_u64 (unsigned long long __X)
>  {
>    return __blsmsk_u64 (__X);
>  }
>
> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned long long
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __blsr_u64 (unsigned long long __X)
>  {
>    return __X & (__X - 1);
>  }
>
> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned long long
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _blsr_u64 (unsigned long long __X)
>  {
>    return __blsr_u64 (__X);
>  }
>
> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned long long
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __tzcnt_u64 (unsigned long long __X)
>  {
>    return __builtin_ia32_tzcnt_u64 (__X);
>  }
>
> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned long long
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _tzcnt_u64 (unsigned long long __X)
>  {
>    return __builtin_ia32_tzcnt_u64 (__X);
> diff --git a/gcc/config/i386/cetintrin.h b/gcc/config/i386/cetintrin.h
> index 803c6283bec..145bd3ce7d2 100644
> --- a/gcc/config/i386/cetintrin.h
> +++ b/gcc/config/i386/cetintrin.h
> @@ -36,14 +36,16 @@
>
>  #ifdef __x86_64__
>  extern __inline unsigned long long
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _get_ssp (void)
>  {
>    return __builtin_ia32_rdsspq ();
>  }
>  #else
>  extern __inline unsigned int
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _get_ssp (void)
>  {
>    return __builtin_ia32_rdsspd ();
> @@ -51,7 +53,8 @@ _get_ssp (void)
>  #endif
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _inc_ssp (unsigned int __B)
>  {
>  #ifdef __x86_64__
> @@ -62,21 +65,24 @@ _inc_ssp (unsigned int __B)
>  }
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _saveprevssp (void)
>  {
>    __builtin_ia32_saveprevssp ();
>  }
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _rstorssp (void *__B)
>  {
>    __builtin_ia32_rstorssp (__B);
>  }
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _wrssd (unsigned int __B, void *__C)
>  {
>    __builtin_ia32_wrssd (__B, __C);
> @@ -84,7 +90,8 @@ _wrssd (unsigned int __B, void *__C)
>
>  #ifdef __x86_64__
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _wrssq (unsigned long long __B, void *__C)
>  {
>    __builtin_ia32_wrssq (__B, __C);
> @@ -92,7 +99,8 @@ _wrssq (unsigned long long __B, void *__C)
>  #endif
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _wrussd (unsigned int __B, void *__C)
>  {
>    __builtin_ia32_wrussd (__B, __C);
> @@ -100,7 +108,8 @@ _wrussd (unsigned int __B, void *__C)
>
>  #ifdef __x86_64__
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _wrussq (unsigned long long __B, void *__C)
>  {
>    __builtin_ia32_wrussq (__B, __C);
> @@ -108,14 +117,16 @@ _wrussq (unsigned long long __B, void *__C)
>  #endif
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _setssbsy (void)
>  {
>    __builtin_ia32_setssbsy ();
>  }
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _clrssbsy (void *__B)
>  {
>    __builtin_ia32_clrssbsy (__B);
> diff --git a/gcc/config/i386/cldemoteintrin.h b/gcc/config/i386/cldemoteintrin.h
> index 67dddaf2b89..897a2db9e41 100644
> --- a/gcc/config/i386/cldemoteintrin.h
> +++ b/gcc/config/i386/cldemoteintrin.h
> @@ -34,7 +34,8 @@
>  #define __DISABLE_CLDEMOTE__
>  #endif /* __CLDEMOTE__ */
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _cldemote (void *__A)
>  {
>    __builtin_ia32_cldemote (__A);
> diff --git a/gcc/config/i386/clflushoptintrin.h b/gcc/config/i386/clflushoptintrin.h
> index d8b55762158..3bd91d00681 100644
> --- a/gcc/config/i386/clflushoptintrin.h
> +++ b/gcc/config/i386/clflushoptintrin.h
> @@ -35,7 +35,8 @@
>  #endif /* __CLFLUSHOPT__ */
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _mm_clflushopt (void *__A)
>  {
>    __builtin_ia32_clflushopt (__A);
> diff --git a/gcc/config/i386/clwbintrin.h b/gcc/config/i386/clwbintrin.h
> index 21134429a40..2ff40066ef9 100644
> --- a/gcc/config/i386/clwbintrin.h
> +++ b/gcc/config/i386/clwbintrin.h
> @@ -35,7 +35,8 @@
>  #endif /* __CLWB__ */
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _mm_clwb (void *__A)
>  {
>    __builtin_ia32_clwb (__A);
> diff --git a/gcc/config/i386/clzerointrin.h b/gcc/config/i386/clzerointrin.h
> index f9095160409..12930e387c3 100644
> --- a/gcc/config/i386/clzerointrin.h
> +++ b/gcc/config/i386/clzerointrin.h
> @@ -30,7 +30,9 @@
>  #define __DISABLE_CLZERO__
>  #endif /* __CLZERO__ */
>
> -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline void
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _mm_clzero (void * __I)
>  {
>    __builtin_ia32_clzero (__I);
> diff --git a/gcc/config/i386/enqcmdintrin.h b/gcc/config/i386/enqcmdintrin.h
> index 2518df18db1..7f3d769c23f 100644
> --- a/gcc/config/i386/enqcmdintrin.h
> +++ b/gcc/config/i386/enqcmdintrin.h
> @@ -35,14 +35,16 @@
>  #endif /* __ENQCMD__ */
>
>  extern __inline int
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _enqcmd (void * __P, const void * __Q)
>  {
>    return __builtin_ia32_enqcmd (__P, __Q);
>  }
>
>  extern __inline int
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _enqcmds (void * __P, const void * __Q)
>  {
>    return __builtin_ia32_enqcmds (__P, __Q);
> diff --git a/gcc/config/i386/fxsrintrin.h b/gcc/config/i386/fxsrintrin.h
> index fd2e538eb9c..a80654968eb 100644
> --- a/gcc/config/i386/fxsrintrin.h
> +++ b/gcc/config/i386/fxsrintrin.h
> @@ -35,14 +35,16 @@
>  #endif /* __FXSR__ */
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _fxsave (void *__P)
>  {
>    __builtin_ia32_fxsave (__P);
>  }
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _fxrstor (void *__P)
>  {
>    __builtin_ia32_fxrstor (__P);
> @@ -50,14 +52,16 @@ _fxrstor (void *__P)
>
>  #ifdef __x86_64__
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _fxsave64 (void *__P)
>  {
>    __builtin_ia32_fxsave64 (__P);
>  }
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _fxrstor64 (void *__P)
>  {
>    __builtin_ia32_fxrstor64 (__P);
> diff --git a/gcc/config/i386/hresetintrin.h b/gcc/config/i386/hresetintrin.h
> index 500618825c9..eba09a9010f 100644
> --- a/gcc/config/i386/hresetintrin.h
> +++ b/gcc/config/i386/hresetintrin.h
> @@ -35,7 +35,8 @@
>  #endif /* __HRESET__ */
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _hreset (unsigned int __EAX)
>  {
>    __builtin_ia32_hreset (__EAX);
> diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c
> index 91da2849c49..559f9357811 100644
> --- a/gcc/config/i386/i386-options.c
> +++ b/gcc/config/i386/i386-options.c
> @@ -3961,6 +3961,8 @@ const struct attribute_spec ix86_attribute_table[] =
>      ix86_handle_fentry_name, NULL },
>    { "cf_check", 0, 0, true, false, false, false,
>      ix86_handle_fndecl_attribute, NULL },
> +  { "general_regs_only", 0, 0, true, false, false, false,
> +    ix86_handle_fndecl_attribute, NULL },
>
>    /* End element.  */
>    { NULL, 0, 0, false, false, false, false, NULL, NULL }
> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
> index 7c41302c75b..201a001e95a 100644
> --- a/gcc/config/i386/i386.c
> +++ b/gcc/config/i386/i386.c
> @@ -553,7 +553,7 @@ ix86_can_inline_p (tree caller, tree callee)
>
>    /* Changes of those flags can be tolerated for always inlines. Lets hope
>       user knows what he is doing.  */
> -  const unsigned HOST_WIDE_INT always_inline_safe_mask
> +  unsigned HOST_WIDE_INT always_inline_safe_mask
>          = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
>             | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
>             | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
> @@ -579,13 +579,32 @@ ix86_can_inline_p (tree caller, tree callee)
>                             DECL_ATTRIBUTES (callee)));
>
>    cgraph_node *callee_node = cgraph_node::get (callee);
> +
> +  HOST_WIDE_INT callee_integer_isa_flags
> +    = callee_opts->x_ix86_isa_flags;
> +  HOST_WIDE_INT callee_integer_isa_flags2
> +    = callee_opts->x_ix86_isa_flags2;
> +
> +  if (lookup_attribute ("general_regs_only",
> +                       DECL_ATTRIBUTES (callee)))
> +    {
> +      /* For general purpose register only function, callee's
> +        integer ISA options should be a subset of the caller's
> +        integer ISA options.  */
> +      always_inline_safe_mask |= MASK_80387;
> +      callee_integer_isa_flags
> +       &= ~OPTION_MASK_ISA_GENERAL_REGS_ONLY_UNSET;
> +      callee_integer_isa_flags2
> +       &= ~OPTION_MASK_ISA2_GENERAL_REGS_ONLY_UNSET;
> +    }
> +
>    /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
>       function can inline a SSE2 function but a SSE2 function can't inline
>       a SSE4 function.  */
> -  if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
> -       != callee_opts->x_ix86_isa_flags)
> -      || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
> -         != callee_opts->x_ix86_isa_flags2))
> +  if (((caller_opts->x_ix86_isa_flags & callee_integer_isa_flags)
> +       != callee_integer_isa_flags)
> +      || ((caller_opts->x_ix86_isa_flags2 & callee_integer_isa_flags2)
> +         != callee_integer_isa_flags2))
>      ret = false;
>
>    /* See if we have the same non-isa options.  */
> diff --git a/gcc/config/i386/ia32intrin.h b/gcc/config/i386/ia32intrin.h
> index 591394076cc..908eb44b0d7 100644
> --- a/gcc/config/i386/ia32intrin.h
> +++ b/gcc/config/i386/ia32intrin.h
> @@ -27,7 +27,8 @@
>
>  /* 32bit bsf */
>  extern __inline int
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __bsfd (int __X)
>  {
>    return __builtin_ctz (__X);
> @@ -35,7 +36,8 @@ __bsfd (int __X)
>
>  /* 32bit bsr */
>  extern __inline int
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __bsrd (int __X)
>  {
>    return __builtin_ia32_bsrsi (__X);
> @@ -43,7 +45,8 @@ __bsrd (int __X)
>
>  /* 32bit bswap */
>  extern __inline int
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __bswapd (int __X)
>  {
>    return __builtin_bswap32 (__X);
> @@ -88,7 +91,8 @@ __crc32d (unsigned int __C, unsigned int __V)
>
>  /* 32bit popcnt */
>  extern __inline int
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __popcntd (unsigned int __X)
>  {
>    return __builtin_popcount (__X);
> @@ -98,7 +102,8 @@ __popcntd (unsigned int __X)
>
>  /* rdpmc */
>  extern __inline unsigned long long
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __rdpmc (int __S)
>  {
>    return __builtin_ia32_rdpmc (__S);
> @@ -107,18 +112,31 @@ __rdpmc (int __S)
>  #endif /* __iamcu__ */
>
>  /* rdtsc */
> -#define __rdtsc()              __builtin_ia32_rdtsc ()
> +extern __inline unsigned long long
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
> +__rdtsc (void)
> +{
> +  return __builtin_ia32_rdtsc ();
> +}
>
>  #ifndef __iamcu__
>
>  /* rdtscp */
> -#define __rdtscp(a)            __builtin_ia32_rdtscp (a)
> +extern __inline unsigned long long
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
> +__rdtscp (unsigned int *__A)
> +{
> +  return __builtin_ia32_rdtscp (__A);
> +}
>
>  #endif /* __iamcu__ */
>
>  /* 8bit rol */
>  extern __inline unsigned char
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __rolb (unsigned char __X, int __C)
>  {
>    return __builtin_ia32_rolqi (__X, __C);
> @@ -126,7 +144,8 @@ __rolb (unsigned char __X, int __C)
>
>  /* 16bit rol */
>  extern __inline unsigned short
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __rolw (unsigned short __X, int __C)
>  {
>    return __builtin_ia32_rolhi (__X, __C);
> @@ -134,7 +153,8 @@ __rolw (unsigned short __X, int __C)
>
>  /* 32bit rol */
>  extern __inline unsigned int
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __rold (unsigned int __X, int __C)
>  {
>    __C &= 31;
> @@ -143,7 +163,8 @@ __rold (unsigned int __X, int __C)
>
>  /* 8bit ror */
>  extern __inline unsigned char
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __rorb (unsigned char __X, int __C)
>  {
>    return __builtin_ia32_rorqi (__X, __C);
> @@ -151,7 +172,8 @@ __rorb (unsigned char __X, int __C)
>
>  /* 16bit ror */
>  extern __inline unsigned short
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __rorw (unsigned short __X, int __C)
>  {
>    return __builtin_ia32_rorhi (__X, __C);
> @@ -159,7 +181,8 @@ __rorw (unsigned short __X, int __C)
>
>  /* 32bit ror */
>  extern __inline unsigned int
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __rord (unsigned int __X, int __C)
>  {
>    __C &= 31;
> @@ -168,7 +191,8 @@ __rord (unsigned int __X, int __C)
>
>  /* Pause */
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __pause (void)
>  {
>    __builtin_ia32_pause ();
> @@ -177,7 +201,8 @@ __pause (void)
>  #ifdef __x86_64__
>  /* 64bit bsf */
>  extern __inline int
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __bsfq (long long __X)
>  {
>    return __builtin_ctzll (__X);
> @@ -185,7 +210,8 @@ __bsfq (long long __X)
>
>  /* 64bit bsr */
>  extern __inline int
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __bsrq (long long __X)
>  {
>    return __builtin_ia32_bsrdi (__X);
> @@ -193,7 +219,8 @@ __bsrq (long long __X)
>
>  /* 64bit bswap */
>  extern __inline long long
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __bswapq (long long __X)
>  {
>    return __builtin_bswap64 (__X);
> @@ -220,7 +247,8 @@ __crc32q (unsigned long long __C, unsigned long long __V)
>
>  /* 64bit popcnt */
>  extern __inline long long
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __popcntq (unsigned long long __X)
>  {
>    return __builtin_popcountll (__X);
> @@ -228,7 +256,8 @@ __popcntq (unsigned long long __X)
>
>  /* 64bit rol */
>  extern __inline unsigned long long
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __rolq (unsigned long long __X, int __C)
>  {
>    __C &= 63;
> @@ -237,7 +266,8 @@ __rolq (unsigned long long __X, int __C)
>
>  /* 64bit ror */
>  extern __inline unsigned long long
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __rorq (unsigned long long __X, int __C)
>  {
>    __C &= 63;
> @@ -246,7 +276,8 @@ __rorq (unsigned long long __X, int __C)
>
>  /* Read flags register */
>  extern __inline unsigned long long
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __readeflags (void)
>  {
>    return __builtin_ia32_readeflags_u64 ();
> @@ -254,7 +285,8 @@ __readeflags (void)
>
>  /* Write flags register */
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __writeeflags (unsigned long long __X)
>  {
>    __builtin_ia32_writeeflags_u64 (__X);
> @@ -266,7 +298,8 @@ __writeeflags (unsigned long long __X)
>
>  /* Read flags register */
>  extern __inline unsigned int
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __readeflags (void)
>  {
>    return __builtin_ia32_readeflags_u32 ();
> @@ -274,7 +307,8 @@ __readeflags (void)
>
>  /* Write flags register */
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __writeeflags (unsigned int __X)
>  {
>    __builtin_ia32_writeeflags_u32 (__X);
> diff --git a/gcc/config/i386/lwpintrin.h b/gcc/config/i386/lwpintrin.h
> index 1a7465b2f22..893a4313a68 100644
> --- a/gcc/config/i386/lwpintrin.h
> +++ b/gcc/config/i386/lwpintrin.h
> @@ -34,27 +34,35 @@
>  #define __DISABLE_LWP__
>  #endif /* __LWP__ */
>
> -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline void
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __llwpcb (void *__pcbAddress)
>  {
>    __builtin_ia32_llwpcb (__pcbAddress);
>  }
>
> -extern __inline void * __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline void *
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __slwpcb (void)
>  {
>    return __builtin_ia32_slwpcb ();
>  }
>
>  #ifdef __OPTIMIZE__
> -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline void
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __lwpval32 (unsigned int __data2, unsigned int __data1, unsigned int __flags)
>  {
>    __builtin_ia32_lwpval32 (__data2, __data1, __flags);
>  }
>
>  #ifdef __x86_64__
> -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline void
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __lwpval64 (unsigned long long __data2, unsigned int __data1,
>             unsigned int __flags)
>  {
> @@ -74,14 +82,18 @@ __lwpval64 (unsigned long long __data2, unsigned int __data1,
>
>
>  #ifdef __OPTIMIZE__
> -extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned char
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __lwpins32 (unsigned int __data2, unsigned int __data1, unsigned int __flags)
>  {
>    return __builtin_ia32_lwpins32 (__data2, __data1, __flags);
>  }
>
>  #ifdef __x86_64__
> -extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned char
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __lwpins64 (unsigned long long __data2, unsigned int __data1,
>             unsigned int __flags)
>  {
> diff --git a/gcc/config/i386/lzcntintrin.h b/gcc/config/i386/lzcntintrin.h
> index cfa2719c044..864bdf67698 100644
> --- a/gcc/config/i386/lzcntintrin.h
> +++ b/gcc/config/i386/lzcntintrin.h
> @@ -35,32 +35,42 @@
>  #define __DISABLE_LZCNT__
>  #endif /* __LZCNT__ */
>
> -extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned short
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __lzcnt16 (unsigned short __X)
>  {
>    return __builtin_ia32_lzcnt_u16 (__X);
>  }
>
> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned int
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __lzcnt32 (unsigned int __X)
>  {
>    return __builtin_ia32_lzcnt_u32 (__X);
>  }
>
> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned int
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _lzcnt_u32 (unsigned int __X)
>  {
>    return __builtin_ia32_lzcnt_u32 (__X);
>  }
>
>  #ifdef __x86_64__
> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned long long
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __lzcnt64 (unsigned long long __X)
>  {
>    return __builtin_ia32_lzcnt_u64 (__X);
>  }
>
> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned long long
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _lzcnt_u64 (unsigned long long __X)
>  {
>    return __builtin_ia32_lzcnt_u64 (__X);
> diff --git a/gcc/config/i386/movdirintrin.h b/gcc/config/i386/movdirintrin.h
> index c50fe40b937..e6ba84f39c8 100644
> --- a/gcc/config/i386/movdirintrin.h
> +++ b/gcc/config/i386/movdirintrin.h
> @@ -35,14 +35,16 @@
>  #endif /* __MOVDIRI__ */
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _directstoreu_u32 (void * __P, unsigned int __A)
>  {
>    __builtin_ia32_directstoreu_u32 ((unsigned int *)__P, __A);
>  }
>  #ifdef __x86_64__
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _directstoreu_u64 (void * __P, unsigned long long __A)
>  {
>    __builtin_ia32_directstoreu_u64 ((unsigned long long *)__P, __A);
> @@ -61,7 +63,8 @@ _directstoreu_u64 (void * __P, unsigned long long __A)
>  #endif /* __MOVDIR64B__ */
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _movdir64b (void * __P, const void * __Q)
>  {
>    __builtin_ia32_movdir64b (__P, __Q);
> diff --git a/gcc/config/i386/mwaitxintrin.h b/gcc/config/i386/mwaitxintrin.h
> index ad8afba4c28..0c9505bb2f6 100644
> --- a/gcc/config/i386/mwaitxintrin.h
> +++ b/gcc/config/i386/mwaitxintrin.h
> @@ -30,13 +30,17 @@
>  #define __DISABLE_MWAITX__
>  #endif /* __MWAITX__ */
>
> -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline void
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _mm_monitorx (void const * __P, unsigned int __E, unsigned int __H)
>  {
>    __builtin_ia32_monitorx (__P, __E, __H);
>  }
>
> -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline void
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _mm_mwaitx (unsigned int __E, unsigned int __H, unsigned int __C)
>  {
>    __builtin_ia32_mwaitx (__E, __H, __C);
> diff --git a/gcc/config/i386/pconfigintrin.h b/gcc/config/i386/pconfigintrin.h
> index 5346cbd78cb..f8f6279c586 100644
> --- a/gcc/config/i386/pconfigintrin.h
> +++ b/gcc/config/i386/pconfigintrin.h
> @@ -47,7 +47,8 @@
>         : "cc")
>
>  extern __inline unsigned int
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _pconfig_u32 (const unsigned int __L, size_t __D[])
>  {
>    enum __pconfig_type
> diff --git a/gcc/config/i386/pkuintrin.h b/gcc/config/i386/pkuintrin.h
> index cd5638fa035..6e59617a0ce 100644
> --- a/gcc/config/i386/pkuintrin.h
> +++ b/gcc/config/i386/pkuintrin.h
> @@ -35,14 +35,16 @@
>  #endif /* __PKU__ */
>
>  extern __inline unsigned int
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _rdpkru_u32 (void)
>  {
>    return __builtin_ia32_rdpkru ();
>  }
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _wrpkru (unsigned int __key)
>  {
>    __builtin_ia32_wrpkru (__key);
> diff --git a/gcc/config/i386/popcntintrin.h b/gcc/config/i386/popcntintrin.h
> index 84876562640..640de9db733 100644
> --- a/gcc/config/i386/popcntintrin.h
> +++ b/gcc/config/i386/popcntintrin.h
> @@ -31,14 +31,18 @@
>  #endif /* __POPCNT__ */
>
>  /* Calculate a number of bits set to 1.  */
> -extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline int
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _mm_popcnt_u32 (unsigned int __X)
>  {
>    return __builtin_popcount (__X);
>  }
>
>  #ifdef __x86_64__
> -extern __inline long long  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline long long
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _mm_popcnt_u64 (unsigned long long __X)
>  {
>    return __builtin_popcountll (__X);
> diff --git a/gcc/config/i386/rdseedintrin.h b/gcc/config/i386/rdseedintrin.h
> index 1badab7018c..0dc5fadce6a 100644
> --- a/gcc/config/i386/rdseedintrin.h
> +++ b/gcc/config/i386/rdseedintrin.h
> @@ -36,14 +36,16 @@
>
>
>  extern __inline int
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _rdseed16_step (unsigned short *__p)
>  {
>    return __builtin_ia32_rdseed_hi_step (__p);
>  }
>
>  extern __inline int
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _rdseed32_step (unsigned int *__p)
>  {
>    return __builtin_ia32_rdseed_si_step (__p);
> @@ -51,7 +53,8 @@ _rdseed32_step (unsigned int *__p)
>
>  #ifdef __x86_64__
>  extern __inline int
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _rdseed64_step (unsigned long long *__p)
>  {
>    return __builtin_ia32_rdseed_di_step (__p);
> diff --git a/gcc/config/i386/rtmintrin.h b/gcc/config/i386/rtmintrin.h
> index 5b2ac767737..33aadcfec61 100644
> --- a/gcc/config/i386/rtmintrin.h
> +++ b/gcc/config/i386/rtmintrin.h
> @@ -46,7 +46,8 @@
>  /* Start an RTM code region.  Return _XBEGIN_STARTED on success and the
>     abort condition otherwise.  */
>  extern __inline unsigned int
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _xbegin (void)
>  {
>    return __builtin_ia32_xbegin ();
> @@ -57,7 +58,8 @@ _xbegin (void)
>     commit fails, then control is transferred to the outermost transaction
>     fallback handler.  */
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _xend (void)
>  {
>    __builtin_ia32_xend ();
> @@ -67,7 +69,8 @@ _xend (void)
>     outermost transaction fallback handler with the abort condition IMM.  */
>  #ifdef __OPTIMIZE__
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _xabort (const unsigned int __imm)
>  {
>    __builtin_ia32_xabort (__imm);
> diff --git a/gcc/config/i386/serializeintrin.h b/gcc/config/i386/serializeintrin.h
> index e280250b198..dd27e6c7a81 100644
> --- a/gcc/config/i386/serializeintrin.h
> +++ b/gcc/config/i386/serializeintrin.h
> @@ -34,7 +34,13 @@
>  #define __DISABLE_SERIALIZE__
>  #endif /* __SERIALIZE__ */
>
> -#define _serialize()   __builtin_ia32_serialize ()
> +extern __inline void
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
> +_serialize (void)
> +{
> +  __builtin_ia32_serialize ();
> +}
>
>  #ifdef __DISABLE_SERIALIZE__
>  #undef __DISABLE_SERIALIZE__
> diff --git a/gcc/config/i386/sgxintrin.h b/gcc/config/i386/sgxintrin.h
> index 152be6a37ed..264214af972 100644
> --- a/gcc/config/i386/sgxintrin.h
> +++ b/gcc/config/i386/sgxintrin.h
> @@ -108,7 +108,8 @@
>            : "cc")
>
>  extern __inline unsigned int
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _encls_u32 (const unsigned int __L, size_t __D[])
>  {
>    enum __encls_type
> @@ -175,7 +176,8 @@ _encls_u32 (const unsigned int __L, size_t __D[])
>  }
>
>  extern __inline unsigned int
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _enclu_u32 (const unsigned int __L, size_t __D[])
>  {
>    enum __enclu_type
> @@ -218,7 +220,8 @@ _enclu_u32 (const unsigned int __L, size_t __D[])
>  }
>
>  extern __inline unsigned int
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _enclv_u32 (const unsigned int __L, size_t __D[])
>  {
>    enum __enclv_type
> diff --git a/gcc/config/i386/tbmintrin.h b/gcc/config/i386/tbmintrin.h
> index 971d1f36aff..bc9d3269515 100644
> --- a/gcc/config/i386/tbmintrin.h
> +++ b/gcc/config/i386/tbmintrin.h
> @@ -35,7 +35,9 @@
>  #endif /* __TBM__ */
>
>  #ifdef __OPTIMIZE__
> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned int
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __bextri_u32 (unsigned int __X, const unsigned int __I)
>  {
>    return __builtin_ia32_bextri_u32 (__X, __I);
> @@ -46,55 +48,73 @@ __bextri_u32 (unsigned int __X, const unsigned int __I)
>                                             (unsigned int)(I)))
>  #endif /*__OPTIMIZE__ */
>
> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned int
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __blcfill_u32 (unsigned int __X)
>  {
>    return __X & (__X + 1);
>  }
>
> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned int
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __blci_u32 (unsigned int __X)
>  {
>    return __X | ~(__X + 1);
>  }
>
> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned int
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __blcic_u32 (unsigned int __X)
>  {
>    return ~__X & (__X + 1);
>  }
>
> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned int
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __blcmsk_u32 (unsigned int __X)
>  {
>    return __X ^ (__X + 1);
>  }
>
> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned int
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __blcs_u32 (unsigned int __X)
>  {
>    return __X | (__X + 1);
>  }
>
> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned int
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __blsfill_u32 (unsigned int __X)
>  {
>    return __X | (__X - 1);
>  }
>
> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned int
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __blsic_u32 (unsigned int __X)
>  {
>    return ~__X | (__X - 1);
>  }
>
> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned int
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __t1mskc_u32 (unsigned int __X)
>  {
>    return ~__X | (__X + 1);
>  }
>
> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned int
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __tzmsk_u32 (unsigned int __X)
>  {
>    return ~__X & (__X - 1);
> @@ -104,7 +124,9 @@ __tzmsk_u32 (unsigned int __X)
>
>  #ifdef __x86_64__
>  #ifdef __OPTIMIZE__
> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned long long
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __bextri_u64 (unsigned long long __X, const unsigned int __I)
>  {
>    return __builtin_ia32_bextri_u64 (__X, __I);
> @@ -115,55 +137,73 @@ __bextri_u64 (unsigned long long __X, const unsigned int __I)
>                                                   (unsigned long long)(I)))
>  #endif /*__OPTIMIZE__ */
>
> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned long long
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __blcfill_u64 (unsigned long long __X)
>  {
>    return __X & (__X + 1);
>  }
>
> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned long long
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __blci_u64 (unsigned long long __X)
>  {
>    return __X | ~(__X + 1);
>  }
>
> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned long long
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __blcic_u64 (unsigned long long __X)
>  {
>    return ~__X & (__X + 1);
>  }
>
> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned long long
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __blcmsk_u64 (unsigned long long __X)
>  {
>    return __X ^ (__X + 1);
>  }
>
> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned long long
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __blcs_u64 (unsigned long long __X)
>  {
>    return __X | (__X + 1);
>  }
>
> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned long long
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __blsfill_u64 (unsigned long long __X)
>  {
>    return __X | (__X - 1);
>  }
>
> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned long long
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __blsic_u64 (unsigned long long __X)
>  {
>    return ~__X | (__X - 1);
>  }
>
> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned long long
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __t1mskc_u64 (unsigned long long __X)
>  {
>    return ~__X | (__X + 1);
>  }
>
> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +extern __inline unsigned long long
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  __tzmsk_u64 (unsigned long long __X)
>  {
>    return ~__X & (__X - 1);
> diff --git a/gcc/config/i386/tsxldtrkintrin.h b/gcc/config/i386/tsxldtrkintrin.h
> index bb42a8e89b9..32a0b87c43a 100644
> --- a/gcc/config/i386/tsxldtrkintrin.h
> +++ b/gcc/config/i386/tsxldtrkintrin.h
> @@ -35,14 +35,16 @@
>  #endif /* __TSXLDTRK__ */
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _xsusldtrk (void)
>  {
>    __builtin_ia32_xsusldtrk ();
>  }
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _xresldtrk (void)
>  {
>    __builtin_ia32_xresldtrk ();
> diff --git a/gcc/config/i386/uintrintrin.h b/gcc/config/i386/uintrintrin.h
> index 2ff0cce9b49..d424bc22ba8 100644
> --- a/gcc/config/i386/uintrintrin.h
> +++ b/gcc/config/i386/uintrintrin.h
> @@ -47,28 +47,32 @@ struct __uintr_frame
>  };
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _clui (void)
>  {
>    __builtin_ia32_clui ();
>  }
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _stui (void)
>  {
>    __builtin_ia32_stui ();
>  }
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _senduipi (unsigned long long __R)
>  {
>    __builtin_ia32_senduipi (__R);
>  }
>
>  extern __inline unsigned char
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _testui (void)
>  {
>    return __builtin_ia32_testui ();
> diff --git a/gcc/config/i386/waitpkgintrin.h b/gcc/config/i386/waitpkgintrin.h
> index a7a4d6a927d..a2d7b004545 100644
> --- a/gcc/config/i386/waitpkgintrin.h
> +++ b/gcc/config/i386/waitpkgintrin.h
> @@ -35,21 +35,24 @@
>  #endif /* __WAITPKG__ */
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _umonitor (void *__A)
>  {
>    __builtin_ia32_umonitor (__A);
>  }
>
>  extern __inline unsigned char
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _umwait (unsigned int __A, unsigned long long __B)
>  {
>    return __builtin_ia32_umwait (__A, __B);
>  }
>
>  extern __inline unsigned char
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _tpause (unsigned int __A, unsigned long long __B)
>  {
>    return __builtin_ia32_tpause (__A, __B);
> diff --git a/gcc/config/i386/wbnoinvdintrin.h b/gcc/config/i386/wbnoinvdintrin.h
> index 71dc1b6accb..6ba9ca01f27 100644
> --- a/gcc/config/i386/wbnoinvdintrin.h
> +++ b/gcc/config/i386/wbnoinvdintrin.h
> @@ -35,7 +35,8 @@
>  #endif /* __WBNOINVD__ */
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _wbnoinvd (void)
>  {
>    __builtin_ia32_wbnoinvd ();
> diff --git a/gcc/config/i386/x86gprintrin.h b/gcc/config/i386/x86gprintrin.h
> index ceda501252c..4289ff66cfd 100644
> --- a/gcc/config/i386/x86gprintrin.h
> +++ b/gcc/config/i386/x86gprintrin.h
> @@ -95,7 +95,8 @@
>  #include <hresetintrin.h>
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _wbinvd (void)
>  {
>    __builtin_ia32_wbinvd ();
> @@ -107,14 +108,16 @@ _wbinvd (void)
>  #define __DISABLE_RDRND__
>  #endif /* __RDRND__ */
>  extern __inline int
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _rdrand16_step (unsigned short *__P)
>  {
>    return __builtin_ia32_rdrand16_step (__P);
>  }
>
>  extern __inline int
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _rdrand32_step (unsigned int *__P)
>  {
>    return __builtin_ia32_rdrand32_step (__P);
> @@ -130,7 +133,8 @@ _rdrand32_step (unsigned int *__P)
>  #define __DISABLE_RDPID__
>  #endif /* __RDPID__ */
>  extern __inline unsigned int
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _rdpid_u32 (void)
>  {
>    return __builtin_ia32_rdpid ();
> @@ -148,56 +152,64 @@ _rdpid_u32 (void)
>  #define __DISABLE_FSGSBASE__
>  #endif /* __FSGSBASE__ */
>  extern __inline unsigned int
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _readfsbase_u32 (void)
>  {
>    return __builtin_ia32_rdfsbase32 ();
>  }
>
>  extern __inline unsigned long long
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _readfsbase_u64 (void)
>  {
>    return __builtin_ia32_rdfsbase64 ();
>  }
>
>  extern __inline unsigned int
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _readgsbase_u32 (void)
>  {
>    return __builtin_ia32_rdgsbase32 ();
>  }
>
>  extern __inline unsigned long long
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _readgsbase_u64 (void)
>  {
>    return __builtin_ia32_rdgsbase64 ();
>  }
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _writefsbase_u32 (unsigned int __B)
>  {
>    __builtin_ia32_wrfsbase32 (__B);
>  }
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _writefsbase_u64 (unsigned long long __B)
>  {
>    __builtin_ia32_wrfsbase64 (__B);
>  }
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _writegsbase_u32 (unsigned int __B)
>  {
>    __builtin_ia32_wrgsbase32 (__B);
>  }
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _writegsbase_u64 (unsigned long long __B)
>  {
>    __builtin_ia32_wrgsbase64 (__B);
> @@ -213,7 +225,8 @@ _writegsbase_u64 (unsigned long long __B)
>  #define __DISABLE_RDRND__
>  #endif /* __RDRND__ */
>  extern __inline int
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _rdrand64_step (unsigned long long *__P)
>  {
>    return __builtin_ia32_rdrand64_step (__P);
> @@ -233,7 +246,8 @@ _rdrand64_step (unsigned long long *__P)
>
>  #ifdef __x86_64__
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _ptwrite64 (unsigned long long __B)
>  {
>    __builtin_ia32_ptwrite64 (__B);
> @@ -241,7 +255,8 @@ _ptwrite64 (unsigned long long __B)
>  #endif /* __x86_64__ */
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _ptwrite32 (unsigned __B)
>  {
>    __builtin_ia32_ptwrite32 (__B);
> diff --git a/gcc/config/i386/xsavecintrin.h b/gcc/config/i386/xsavecintrin.h
> index 45751a087bb..d0739cbd1cc 100644
> --- a/gcc/config/i386/xsavecintrin.h
> +++ b/gcc/config/i386/xsavecintrin.h
> @@ -35,7 +35,8 @@
>  #endif /* __XSAVEC__ */
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _xsavec (void *__P, long long __M)
>  {
>    __builtin_ia32_xsavec (__P, __M);
> @@ -43,7 +44,8 @@ _xsavec (void *__P, long long __M)
>
>  #ifdef __x86_64__
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _xsavec64 (void *__P, long long __M)
>  {
>    __builtin_ia32_xsavec64 (__P, __M);
> diff --git a/gcc/config/i386/xsaveintrin.h b/gcc/config/i386/xsaveintrin.h
> index 56e6a1e527b..50d174fa2b0 100644
> --- a/gcc/config/i386/xsaveintrin.h
> +++ b/gcc/config/i386/xsaveintrin.h
> @@ -35,28 +35,32 @@
>  #endif /* __XSAVE__ */
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _xsave (void *__P, long long __M)
>  {
>    __builtin_ia32_xsave (__P, __M);
>  }
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _xrstor (void *__P, long long __M)
>  {
>    __builtin_ia32_xrstor (__P, __M);
>  }
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _xsetbv (unsigned int __A, long long __V)
>  {
>    __builtin_ia32_xsetbv (__A, __V);
>  }
>
>  extern __inline long long
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _xgetbv (unsigned int __A)
>  {
>    return __builtin_ia32_xgetbv (__A);
> @@ -64,14 +68,16 @@ _xgetbv (unsigned int __A)
>
>  #ifdef __x86_64__
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _xsave64 (void *__P, long long __M)
>  {
>    __builtin_ia32_xsave64 (__P, __M);
>  }
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _xrstor64 (void *__P, long long __M)
>  {
>    __builtin_ia32_xrstor64 (__P, __M);
> diff --git a/gcc/config/i386/xsaveoptintrin.h b/gcc/config/i386/xsaveoptintrin.h
> index ba076cea51a..b5c25f94f95 100644
> --- a/gcc/config/i386/xsaveoptintrin.h
> +++ b/gcc/config/i386/xsaveoptintrin.h
> @@ -35,7 +35,8 @@
>  #endif /* __XSAVEOPT__ */
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _xsaveopt (void *__P, long long __M)
>  {
>    __builtin_ia32_xsaveopt (__P, __M);
> @@ -43,7 +44,8 @@ _xsaveopt (void *__P, long long __M)
>
>  #ifdef __x86_64__
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _xsaveopt64 (void *__P, long long __M)
>  {
>    __builtin_ia32_xsaveopt64 (__P, __M);
> diff --git a/gcc/config/i386/xsavesintrin.h b/gcc/config/i386/xsavesintrin.h
> index 969835fed64..27cec8370ad 100644
> --- a/gcc/config/i386/xsavesintrin.h
> +++ b/gcc/config/i386/xsavesintrin.h
> @@ -35,14 +35,16 @@
>  #endif /* __XSAVES__ */
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _xsaves (void *__P, long long __M)
>  {
>    __builtin_ia32_xsaves (__P, __M);
>  }
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _xrstors (void *__P, long long __M)
>  {
>    __builtin_ia32_xrstors (__P, __M);
> @@ -50,14 +52,16 @@ _xrstors (void *__P, long long __M)
>
>  #ifdef __x86_64__
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _xrstors64 (void *__P, long long __M)
>  {
>    __builtin_ia32_xrstors64 (__P, __M);
>  }
>
>  extern __inline void
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _xsaves64 (void *__P, long long __M)
>  {
>    __builtin_ia32_xsaves64 (__P, __M);
> diff --git a/gcc/config/i386/xtestintrin.h b/gcc/config/i386/xtestintrin.h
> index 39d18af6536..0eae87a1d43 100644
> --- a/gcc/config/i386/xtestintrin.h
> +++ b/gcc/config/i386/xtestintrin.h
> @@ -37,7 +37,8 @@
>  /* Return non-zero if the instruction executes inside an RTM or HLE code
>     region.  Return zero otherwise.   */
>  extern __inline int
> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
> +              __general_regs_only__))
>  _xtest (void)
>  {
>    return __builtin_ia32_xtest ();
> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index 1ddafb3ff2c..7111eca62ff 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -7066,6 +7066,11 @@ On x86 targets, the @code{fentry_section} attribute sets the name
>  of the section to record function entry instrumentation calls in when
>  enabled with @option{-pg -mrecord-mcount}
>
> +@item general_regs_only
> +@cindex @code{general_regs_only} function attribute, x86
> +The @code{general_regs_only} attribute on functions is used to
> +inform the compiler that functions use only general purpose registers.
> +
>  @end table
>
>  @node Xstormy16 Function Attributes
> diff --git a/gcc/testsuite/gcc.target/i386/pr99744-3.c b/gcc/testsuite/gcc.target/i386/pr99744-3.c
> new file mode 100644
> index 00000000000..6c505816ceb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr99744-3.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mno-serialize" } */
> +
> +#include <x86intrin.h>
> +
> +__attribute__ ((target("general-regs-only")))
> +void
> +foo1 (void)
> +{
> +  _serialize ();
> +}
> +
> +/* { dg-error "target specific option mismatch" "" { target *-*-* } 0 } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr99744-4.c b/gcc/testsuite/gcc.target/i386/pr99744-4.c
> new file mode 100644
> index 00000000000..a17d4a2139b
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/i386/pr99744-4.c
> @@ -0,0 +1,352 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mbmi -mbmi2 -mcldemote -mclflushopt -mclwb -mclzero -menqcmd -mfsgsbase -mfxsr -mhreset -mlzcnt -mlwp -mmovdir64b -mmovdiri -mmwaitx -mpconfig -mpku -mpopcnt -mptwrite -mrdpid -mrdrnd -mrdseed -mrtm -msgx -mshstk -mtbm -mtsxldtrk -mxsave -mxsavec -mxsaveopt -mxsaves -mwaitpkg -mwbnoinvd" } */
> +/* { dg-additional-options "-muintr" { target { ! ia32 } } }  */
> +
> +/* Test calling GPR intrinsics from functions with general-regs-only
> +   target attribue.  */
> +
> +#include <x86gprintrin.h>
> +
> +#define _CONCAT(x,y) x ## y
> +
> +#define test_0(func, type)                                             \
> +  __attribute__ ((target("general-regs-only")))                                \
> +  type _CONCAT(do_,func) (void)                                                \
> +  { return func (); }
> +
> +#define test_0_i1(func, type, imm)                                     \
> +  __attribute__ ((target("general-regs-only")))                                \
> +  type _CONCAT(do_,func) (void)                                                \
> +  { return func (imm); }
> +
> +#define test_1(func, type, op1_type)                                   \
> +  __attribute__ ((target("general-regs-only")))                                \
> +  type _CONCAT(do_,func) (op1_type A)                                  \
> +  { return func (A); }
> +
> +#define test_1_i1(func, type, op1_type, imm)                           \
> +  __attribute__ ((target("general-regs-only")))                                \
> +  type _CONCAT(do_,func) (op1_type A)                                  \
> +  { return func (A, imm); }
> +
> +#define test_2(func, type, op1_type, op2_type)                         \
> +  __attribute__ ((target("general-regs-only")))                                \
> +  type _CONCAT(do_,func) (op1_type A, op2_type B)                      \
> +  { return func (A, B); }
> +
> +#define test_2_i1(func, type, op1_type, op2_type, imm)                 \
> +  __attribute__ ((target("general-regs-only")))                                \
> +  type _CONCAT(do_,func) (op1_type A, op2_type B)                      \
> +  { return func (A, B, imm); }
> +
> +#define test_3(func, type, op1_type, op2_type, op3_type)               \
> +  __attribute__ ((target("general-regs-only")))                                \
> +  type _CONCAT(do_,func) (op1_type A, op2_type B, op3_type C)          \
> +  { return func (A, B, C); }
> +
> +#define test_4(func, type, op1_type, op2_type, op3_type, op4_type)     \
> +  __attribute__ ((target("general-regs-only")))                                \
> +  type _CONCAT(do_,func) (op1_type A, op2_type B, op3_type C,          \
> +                         op4_type D)                                   \
> +  { return func (A, B, C, D); }
> +
> +/* ia32intrin.h  */
> +test_1 (__bsfd, int, int)
> +test_1 (__bsrd, int, int)
> +test_1 (__bswapd, int, int)
> +test_1 (__popcntd, int, unsigned int)
> +test_2 (__rolb, unsigned char, unsigned char, int)
> +test_2 (__rolw, unsigned short, unsigned short, int)
> +test_2 (__rold, unsigned int, unsigned int, int)
> +test_2 (__rorb, unsigned char, unsigned char, int)
> +test_2 (__rorw, unsigned short, unsigned short, int)
> +test_2 (__rord, unsigned int, unsigned int, int)
> +
> +#ifndef __iamcu__
> +/* ia32intrin.h  */
> +test_1 (__rdpmc, unsigned long long, int)
> +test_0 (__rdtsc, unsigned long long)
> +test_1 (__rdtscp, unsigned long long, unsigned int *)
> +test_0 (__pause, void)
> +
> +/* adxintrin.h */
> +test_4 (_subborrow_u32, unsigned char, unsigned char, unsigned int,
> +       unsigned int, unsigned int *)
> +test_4 (_addcarry_u32, unsigned char, unsigned char, unsigned int,
> +       unsigned int, unsigned int *)
> +test_4 (_addcarryx_u32, unsigned char, unsigned char, unsigned int,
> +       unsigned int, unsigned int *)
> +
> +/* bmiintrin.h */
> +test_1 (__tzcnt_u16, unsigned short, unsigned short)
> +test_2 (__andn_u32, unsigned int, unsigned int, unsigned int)
> +test_2 (__bextr_u32, unsigned int, unsigned int, unsigned int)
> +test_3 (_bextr_u32, unsigned int, unsigned int, unsigned int,
> +       unsigned int)
> +test_1 (__blsi_u32, unsigned int, unsigned int)
> +test_1 (_blsi_u32, unsigned int, unsigned int)
> +test_1 (__blsmsk_u32, unsigned int, unsigned int)
> +test_1 (_blsmsk_u32, unsigned int, unsigned int)
> +test_1 (__blsr_u32, unsigned int, unsigned int)
> +test_1 (_blsr_u32, unsigned int, unsigned int)
> +test_1 (__tzcnt_u32, unsigned int, unsigned int)
> +test_1 (_tzcnt_u32, unsigned int, unsigned int)
> +
> +/* bmi2intrin.h */
> +test_2 (_bzhi_u32, unsigned int, unsigned int, unsigned int)
> +test_2 (_pdep_u32, unsigned int, unsigned int, unsigned int)
> +test_2 (_pext_u32, unsigned int, unsigned int, unsigned int)
> +
> +/* cetintrin.h */
> +test_1 (_inc_ssp, void, unsigned int)
> +test_0 (_saveprevssp, void)
> +test_1 (_rstorssp, void, void *)
> +test_2 (_wrssd, void, unsigned int, void *)
> +test_2 (_wrussd, void, unsigned int, void *)
> +test_0 (_setssbsy, void)
> +test_1 (_clrssbsy, void, void *)
> +
> +/* cldemoteintrin.h */
> +test_1 (_cldemote, void, void *)
> +
> +/* clflushoptintrin.h */
> +test_1 (_mm_clflushopt, void, void *)
> +
> +/* clwbintrin.h */
> +test_1 (_mm_clwb, void, void *)
> +
> +/* clzerointrin.h */
> +test_1 (_mm_clzero, void, void *)
> +
> +/* enqcmdintrin.h */
> +test_2 (_enqcmd, int, void *, const void *)
> +test_2 (_enqcmds, int, void *, const void *)
> +
> +/* fxsrintrin.h */
> +test_1 (_fxsave, void, void *)
> +test_1 (_fxrstor, void, void *)
> +
> +/* hresetintrin.h */
> +test_1 (_hreset, void, unsigned int)
> +
> +/* lzcntintrin.h */
> +test_1 (__lzcnt16, unsigned short, unsigned short)
> +test_1 (__lzcnt32, unsigned int, unsigned int)
> +test_1 (_lzcnt_u32, unsigned int, unsigned int)
> +
> +/* lwpintrin.h */
> +test_1 (__llwpcb, void, void *)
> +test_0 (__slwpcb, void *)
> +test_2_i1 (__lwpval32, void, unsigned int, unsigned int, 1)
> +test_2_i1 (__lwpins32, unsigned char, unsigned int, unsigned int, 1)
> +
> +/* movdirintrin.h */
> +test_2 (_directstoreu_u32, void, void *, unsigned int)
> +test_2 (_movdir64b, void, void *, const void *)
> +
> +/* mwaitxintrin.h */
> +test_3 (_mm_monitorx, void, void const *, unsigned int, unsigned int)
> +test_3 (_mm_mwaitx, void, unsigned int, unsigned int, unsigned int)
> +
> +/* pconfigintrin.h */
> +test_2 (_pconfig_u32, unsigned int, const unsigned int, size_t *)
> +
> +/* pkuintrin.h */
> +test_0 (_rdpkru_u32, unsigned int)
> +test_1 (_wrpkru, void, unsigned int)
> +
> +/* popcntintrin.h */
> +test_1 (_mm_popcnt_u32, int, unsigned int)
> +
> +/* rdseedintrin.h */
> +test_1 (_rdseed16_step, int, unsigned short *)
> +test_1 (_rdseed32_step, int, unsigned int *)
> +
> +/* rtmintrin.h */
> +test_0 (_xbegin, unsigned int)
> +test_0 (_xend, void)
> +test_0_i1 (_xabort, void, 1)
> +
> +/* sgxintrin.h */
> +test_2 (_encls_u32, unsigned int, const unsigned int, size_t *)
> +test_2 (_enclu_u32, unsigned int, const unsigned int, size_t *)
> +test_2 (_enclv_u32, unsigned int, const unsigned int, size_t *)
> +
> +/* tbmintrin.h */
> +test_1_i1 (__bextri_u32, unsigned int, unsigned int, 1)
> +test_1 (__blcfill_u32, unsigned int, unsigned int)
> +test_1 (__blci_u32, unsigned int, unsigned int)
> +test_1 (__blcic_u32, unsigned int, unsigned int)
> +test_1 (__blcmsk_u32, unsigned int, unsigned int)
> +test_1 (__blcs_u32, unsigned int, unsigned int)
> +test_1 (__blsfill_u32, unsigned int, unsigned int)
> +test_1 (__blsic_u32, unsigned int, unsigned int)
> +test_1 (__t1mskc_u32, unsigned int, unsigned int)
> +test_1 (__tzmsk_u32, unsigned int, unsigned int)
> +
> +/* tsxldtrkintrin.h */
> +test_0 (_xsusldtrk, void)
> +test_0 (_xresldtrk, void)
> +
> +/* x86gprintrin.h */
> +test_1 (_ptwrite32, void, unsigned int)
> +test_1 (_rdrand16_step, int, unsigned short *)
> +test_1 (_rdrand32_step, int, unsigned int *)
> +test_0 (_wbinvd, void)
> +
> +/* xtestintrin.h */
> +test_0 (_xtest, int)
> +
> +/* xsaveintrin.h */
> +test_2 (_xsave, void, void *, long long)
> +test_2 (_xrstor, void, void *, long long)
> +test_2 (_xsetbv, void, unsigned int, long long)
> +test_1 (_xgetbv, long long, unsigned int)
> +
> +/* xsavecintrin.h */
> +test_2 (_xsavec, void, void *, long long)
> +
> +/* xsaveoptintrin.h */
> +test_2 (_xsaveopt, void, void *, long long)
> +
> +/* xsavesintrin.h */
> +test_2 (_xsaves, void, void *, long long)
> +test_2 (_xrstors, void, void *, long long)
> +
> +/* wbnoinvdintrin.h */
> +test_0 (_wbnoinvd, void)
> +
> +#ifdef __x86_64__
> +/* adxintrin.h */
> +test_4 (_subborrow_u64, unsigned char, unsigned char,
> +       unsigned long long, unsigned long long,
> +       unsigned long long *)
> +test_4 (_addcarry_u64, unsigned char, unsigned char,
> +       unsigned long long, unsigned long long,
> +       unsigned long long *)
> +test_4 (_addcarryx_u64, unsigned char, unsigned char,
> +       unsigned long long, unsigned long long,
> +       unsigned long long *)
> +
> +/* bmiintrin.h */
> +test_2 (__andn_u64, unsigned long long, unsigned long long,
> +       unsigned long long)
> +test_2 (__bextr_u64, unsigned long long, unsigned long long,
> +       unsigned long long)
> +test_3 (_bextr_u64, unsigned long long, unsigned long long,
> +       unsigned long long, unsigned long long)
> +test_1 (__blsi_u64, unsigned long long, unsigned long long)
> +test_1 (_blsi_u64, unsigned long long, unsigned long long)
> +test_1 (__blsmsk_u64, unsigned long long, unsigned long long)
> +test_1 (_blsmsk_u64, unsigned long long, unsigned long long)
> +test_1 (__blsr_u64, unsigned long long, unsigned long long)
> +test_1 (_blsr_u64, unsigned long long, unsigned long long)
> +test_1 (__tzcnt_u64, unsigned long long, unsigned long long)
> +test_1 (_tzcnt_u64, unsigned long long, unsigned long long)
> +
> +/* bmi2intrin.h */
> +test_2 (_bzhi_u64, unsigned long long, unsigned long long,
> +       unsigned long long)
> +test_2 (_pdep_u64, unsigned long long, unsigned long long,
> +       unsigned long long)
> +test_2 (_pext_u64, unsigned long long, unsigned long long,
> +       unsigned long long)
> +test_3 (_mulx_u64, unsigned long long, unsigned long long,
> +       unsigned long long, unsigned long long *)
> +
> +/* cetintrin.h */
> +test_0 (_get_ssp, unsigned long long)
> +test_2 (_wrssq, void, unsigned long long, void *)
> +test_2 (_wrussq, void, unsigned long long, void *)
> +
> +/* fxsrintrin.h */
> +test_1 (_fxsave64, void, void *)
> +test_1 (_fxrstor64, void, void *)
> +
> +/* ia32intrin.h  */
> +test_1 (__bsfq, int, long long)
> +test_1 (__bsrq, int, long long)
> +test_1 (__bswapq, long long, long long)
> +test_1 (__popcntq, long long, unsigned long long)
> +test_2 (__rolq, unsigned long long, unsigned long long, int)
> +test_2 (__rorq, unsigned long long, unsigned long long, int)
> +test_0 (__readeflags, unsigned long long)
> +test_1 (__writeeflags, void, unsigned int)
> +
> +/* lzcntintrin.h */
> +test_1 (__lzcnt64, unsigned long long, unsigned long long)
> +test_1 (_lzcnt_u64, unsigned long long, unsigned long long)
> +
> +/* lwpintrin.h */
> +test_2_i1 (__lwpval64, void, unsigned long long, unsigned int, 1)
> +test_2_i1 (__lwpins64, unsigned char, unsigned long long,
> +          unsigned int, 1)
> +
> +/* movdirintrin.h */
> +test_2 (_directstoreu_u64, void, void *, unsigned long long)
> +
> +/* popcntintrin.h */
> +test_1 (_mm_popcnt_u64, long long, unsigned long long)
> +
> +/* rdseedintrin.h */
> +test_1 (_rdseed64_step, int, unsigned long long *)
> +
> +/* tbmintrin.h */
> +test_1_i1 (__bextri_u64, unsigned long long, unsigned long long, 1)
> +test_1 (__blcfill_u64, unsigned long long, unsigned long long)
> +test_1 (__blci_u64, unsigned long long, unsigned long long)
> +test_1 (__blcic_u64, unsigned long long, unsigned long long)
> +test_1 (__blcmsk_u64, unsigned long long, unsigned long long)
> +test_1 (__blcs_u64, unsigned long long, unsigned long long)
> +test_1 (__blsfill_u64, unsigned long long, unsigned long long)
> +test_1 (__blsic_u64, unsigned long long, unsigned long long)
> +test_1 (__t1mskc_u64, unsigned long long, unsigned long long)
> +test_1 (__tzmsk_u64, unsigned long long, unsigned long long)
> +
> +/* uintrintrin.h */
> +test_0 (_clui, void)
> +test_1 (_senduipi, void, unsigned long long)
> +test_0 (_stui, void)
> +test_0 (_testui, unsigned char)
> +
> +/* x86gprintrin.h */
> +test_1 (_ptwrite64, void, unsigned long long)
> +test_0 (_readfsbase_u32, unsigned int)
> +test_0 (_readfsbase_u64, unsigned long long)
> +test_0 (_readgsbase_u32, unsigned int)
> +test_0 (_readgsbase_u64, unsigned long long)
> +test_1 (_rdrand64_step, int, unsigned long long *)
> +test_1 (_writefsbase_u32, void, unsigned int)
> +test_1 (_writefsbase_u64, void, unsigned long long)
> +test_1 (_writegsbase_u32, void, unsigned int)
> +test_1 (_writegsbase_u64, void, unsigned long long)
> +
> +/* xsaveintrin.h */
> +test_2 (_xsave64, void, void *, long long)
> +test_2 (_xrstor64, void, void *, long long)
> +
> +/* xsavecintrin.h */
> +test_2 (_xsavec64, void, void *, long long)
> +
> +/* xsaveoptintrin.h */
> +test_2 (_xsaveopt64, void, void *, long long)
> +
> +/* xsavesintrin.h */
> +test_2 (_xsaves64, void, void *, long long)
> +test_2 (_xrstors64, void, void *, long long)
> +
> +/* waitpkgintrin.h */
> +test_1 (_umonitor, void, void *)
> +test_2 (_umwait, unsigned char, unsigned int, unsigned long long)
> +test_2 (_tpause, unsigned char, unsigned int, unsigned long long)
> +
> +#else /* !__x86_64__ */
> +/* bmi2intrin.h */
> +test_3 (_mulx_u32, unsigned int, unsigned int, unsigned int,
> +       unsigned int *)
> +
> +/* cetintrin.h */
> +test_0 (_get_ssp, unsigned int)
> +#endif /* __x86_64__ */
> +
> +#endif
> --
> 2.30.2
>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v4 2/2] x86: Add general_regs_only function attribute
  2021-04-21  7:30   ` Uros Bizjak
@ 2021-04-21 13:47     ` H.J. Lu
  2021-04-21 16:54     ` Martin Sebor
  1 sibling, 0 replies; 22+ messages in thread
From: H.J. Lu @ 2021-04-21 13:47 UTC (permalink / raw)
  To: Uros Bizjak
  Cc: gcc-patches, Jakub Jelinek, Bernhard Reutner-Fischer,
	Martin Sebor, Richard Biener

On Wed, Apr 21, 2021 at 12:30 AM Uros Bizjak <ubizjak@gmail.com> wrote:
>
> On Thu, Apr 15, 2021 at 12:39 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > commit 87c753ac241f25d222d46ba1ac66ceba89d6a200
> > Author: H.J. Lu <hjl.tools@gmail.com>
> > Date:   Fri Aug 21 09:42:49 2020 -0700
> >
> >     x86: Add target("general-regs-only") function attribute
> >
> > is incomplete since it is impossible to call integer intrinsics from
> > a function with general-regs-only target attribute.
> >
> > 1. Add general_regs_only function attribute to inform the compiler that
> > functions use only general purpose registers.  When making inlining
> > decisions on such functions, non-GPR compiler options are excluded.
> > 2. Add general_regs_only attribute to x86 intrinsics which use only
> > general purpose registers.
>
> I'd like to ask Richard and Jakub if they agree with the approach.

Here is the v5 patch:

https://gcc.gnu.org/pipermail/gcc-patches/2021-April/568407.html

Richard, Jakub,  do they look good to you?

> On a related note, can we declare default attributes like clang does, e.g.:
>
> /* Define the default attributes for the functions.  */
> #define __DEFAULT_FN_ATTRS __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))

This can be defined in x86intrin.h.

> #define __DEFAULT_FN_ATTRS_GRO __attribute__((__gnu_inline__,
> __always_inline__, __general_regs_only, __artificial__))

This can be defined in x86gprintrin.h.

> and use these defines throughout header files?
>
> Uros.
>

Thanks.

-- 
H.J.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v4 2/2] x86: Add general_regs_only function attribute
  2021-04-21  7:30   ` Uros Bizjak
  2021-04-21 13:47     ` H.J. Lu
@ 2021-04-21 16:54     ` Martin Sebor
  1 sibling, 0 replies; 22+ messages in thread
From: Martin Sebor @ 2021-04-21 16:54 UTC (permalink / raw)
  To: Uros Bizjak, H.J. Lu
  Cc: gcc-patches, Jakub Jelinek, Bernhard Reutner-Fischer, Richard Biener

On 4/21/21 1:30 AM, Uros Bizjak wrote:
> On Thu, Apr 15, 2021 at 12:39 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>>
>> commit 87c753ac241f25d222d46ba1ac66ceba89d6a200
>> Author: H.J. Lu <hjl.tools@gmail.com>
>> Date:   Fri Aug 21 09:42:49 2020 -0700
>>
>>      x86: Add target("general-regs-only") function attribute
>>
>> is incomplete since it is impossible to call integer intrinsics from
>> a function with general-regs-only target attribute.
>>
>> 1. Add general_regs_only function attribute to inform the compiler that
>> functions use only general purpose registers.  When making inlining
>> decisions on such functions, non-GPR compiler options are excluded.
>> 2. Add general_regs_only attribute to x86 intrinsics which use only
>> general purpose registers.
> 
> I'd like to ask Richard and Jakub if they agree with the approach.
> 
> On a related note, can we declare default attributes like clang does, e.g.:
> 
> /* Define the default attributes for the functions.  */
> #define __DEFAULT_FN_ATTRS __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> #define __DEFAULT_FN_ATTRS_GRO __attribute__((__gnu_inline__,
> __always_inline__, __general_regs_only, __artificial__))
> 
> and use these defines throughout header files?

FWIW, the sequence of attributes contributes measurably to
the amount of time it takes to parse all the declarations.  Since
they're the same for most of the thousands of functions declared
in these headers it might be worth considering either adding
a new attribute that "expands" to all of these, or using some
other mechanism to speed things up.  (This came up recenetly
in pr100099.)

Martin

> 
> Uros.
> 
>>
>> gcc/
>>
>>          PR target/99744
>>          * config/i386/i386-options.c (ix86_attribute_table): Add
>>          general_regs_only.
>>          * config/i386/i386.c (ix86_can_inline_p): Exclude non-integer
>>          target options if callee has general_regs_only attribute.
>>          * config/i386/adxintrin.h: Add general_regs_only attribute to
>>          intrinsics which use only general purpose registers.
>>          * config/i386/bmiintrin.h: Likewise.
>>          * config/i386/bmi2intrin.h: Likewise.
>>          * config/i386/cetintrin.h: Likewise.
>>          * config/i386/cldemoteintrin.h: Likewise.
>>          * config/i386/clflushoptintrin.h: Likewise.
>>          * config/i386/clwbintrin.h: Likewise.
>>          * config/i386/clzerointrin.h: Likewise.
>>          * config/i386/enqcmdintrin.h: Likewise.
>>          * config/i386/fxsrintrin.h: Likewise.
>>          * config/i386/hresetintrin.h: Likewise.
>>          * config/i386/ia32intrin.h: Likewise.
>>          * config/i386/lwpintrin.h: Likewise.
>>          * config/i386/lzcntintrin.h: Likewise.
>>          * config/i386/movdirintrin.h: Likewise.
>>          * config/i386/mwaitxintrin.h: Likewise.
>>          * config/i386/pconfigintrin.h: Likewise.
>>          * config/i386/pkuintrin.h: Likewise.
>>          * config/i386/popcntintrin.h: Likewise.
>>          * config/i386/rdseedintrin.h: Likewise.
>>          * config/i386/rtmintrin.h: Likewise.
>>          * config/i386/serializeintrin.h: Likewise.
>>          * config/i386/sgxintrin.h: Likewise.
>>          * config/i386/tbmintrin.h: Likewise.
>>          * config/i386/tsxldtrkintrin.h: Likewise.
>>          * config/i386/uintrintrin.h: Likewise.
>>          * config/i386/waitpkgintrin.h: Likewise.
>>          * config/i386/wbnoinvdintrin.h: Likewise.
>>          * config/i386/x86gprintrin.h: Likewise.
>>          * config/i386/xsavecintrin.h: Likewise.
>>          * config/i386/xsaveintrin.h: Likewise.
>>          * config/i386/xsaveoptintrin.h: Likewise.
>>          * config/i386/xsavesintrin.h: Likewise.
>>          * config/i386/xtestintrin.h: Likewise.
>>          * doc/extend.texi: Document general_regs_only function attribute.
>>
>> gcc/testsuite/
>>
>>          PR target/99744
>>          * gcc.target/i386/pr99744-3.c: New test.
>>          * gcc.target/i386/pr99744-4.c: Likewise.
>> ---
>>   gcc/config/i386/adxintrin.h               |  18 +-
>>   gcc/config/i386/bmi2intrin.h              |  24 +-
>>   gcc/config/i386/bmiintrin.h               |  92 ++++--
>>   gcc/config/i386/cetintrin.h               |  33 +-
>>   gcc/config/i386/cldemoteintrin.h          |   3 +-
>>   gcc/config/i386/clflushoptintrin.h        |   3 +-
>>   gcc/config/i386/clwbintrin.h              |   3 +-
>>   gcc/config/i386/clzerointrin.h            |   4 +-
>>   gcc/config/i386/enqcmdintrin.h            |   6 +-
>>   gcc/config/i386/fxsrintrin.h              |  12 +-
>>   gcc/config/i386/hresetintrin.h            |   3 +-
>>   gcc/config/i386/i386-options.c            |   2 +
>>   gcc/config/i386/i386.c                    |  29 +-
>>   gcc/config/i386/ia32intrin.h              |  82 +++--
>>   gcc/config/i386/lwpintrin.h               |  24 +-
>>   gcc/config/i386/lzcntintrin.h             |  20 +-
>>   gcc/config/i386/movdirintrin.h            |   9 +-
>>   gcc/config/i386/mwaitxintrin.h            |   8 +-
>>   gcc/config/i386/pconfigintrin.h           |   3 +-
>>   gcc/config/i386/pkuintrin.h               |   6 +-
>>   gcc/config/i386/popcntintrin.h            |   8 +-
>>   gcc/config/i386/rdseedintrin.h            |   9 +-
>>   gcc/config/i386/rtmintrin.h               |   9 +-
>>   gcc/config/i386/serializeintrin.h         |   8 +-
>>   gcc/config/i386/sgxintrin.h               |   9 +-
>>   gcc/config/i386/tbmintrin.h               |  80 +++--
>>   gcc/config/i386/tsxldtrkintrin.h          |   6 +-
>>   gcc/config/i386/uintrintrin.h             |  12 +-
>>   gcc/config/i386/waitpkgintrin.h           |   9 +-
>>   gcc/config/i386/wbnoinvdintrin.h          |   3 +-
>>   gcc/config/i386/x86gprintrin.h            |  45 ++-
>>   gcc/config/i386/xsavecintrin.h            |   6 +-
>>   gcc/config/i386/xsaveintrin.h             |  18 +-
>>   gcc/config/i386/xsaveoptintrin.h          |   6 +-
>>   gcc/config/i386/xsavesintrin.h            |  12 +-
>>   gcc/config/i386/xtestintrin.h             |   3 +-
>>   gcc/doc/extend.texi                       |   5 +
>>   gcc/testsuite/gcc.target/i386/pr99744-3.c |  13 +
>>   gcc/testsuite/gcc.target/i386/pr99744-4.c | 352 ++++++++++++++++++++++
>>   39 files changed, 818 insertions(+), 179 deletions(-)
>>   create mode 100644 gcc/testsuite/gcc.target/i386/pr99744-3.c
>>   create mode 100644 gcc/testsuite/gcc.target/i386/pr99744-4.c
>>
>> diff --git a/gcc/config/i386/adxintrin.h b/gcc/config/i386/adxintrin.h
>> index e514e741f02..74e3df18dce 100644
>> --- a/gcc/config/i386/adxintrin.h
>> +++ b/gcc/config/i386/adxintrin.h
>> @@ -29,7 +29,8 @@
>>   #define _ADXINTRIN_H_INCLUDED
>>
>>   extern __inline unsigned char
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _subborrow_u32 (unsigned char __CF, unsigned int __X,
>>                  unsigned int __Y, unsigned int *__P)
>>   {
>> @@ -37,7 +38,8 @@ _subborrow_u32 (unsigned char __CF, unsigned int __X,
>>   }
>>
>>   extern __inline unsigned char
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _addcarry_u32 (unsigned char __CF, unsigned int __X,
>>                 unsigned int __Y, unsigned int *__P)
>>   {
>> @@ -45,7 +47,8 @@ _addcarry_u32 (unsigned char __CF, unsigned int __X,
>>   }
>>
>>   extern __inline unsigned char
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _addcarryx_u32 (unsigned char __CF, unsigned int __X,
>>                  unsigned int __Y, unsigned int *__P)
>>   {
>> @@ -54,7 +57,8 @@ _addcarryx_u32 (unsigned char __CF, unsigned int __X,
>>
>>   #ifdef __x86_64__
>>   extern __inline unsigned char
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _subborrow_u64 (unsigned char __CF, unsigned long long __X,
>>                  unsigned long long __Y, unsigned long long *__P)
>>   {
>> @@ -62,7 +66,8 @@ _subborrow_u64 (unsigned char __CF, unsigned long long __X,
>>   }
>>
>>   extern __inline unsigned char
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _addcarry_u64 (unsigned char __CF, unsigned long long __X,
>>                 unsigned long long __Y, unsigned long long *__P)
>>   {
>> @@ -70,7 +75,8 @@ _addcarry_u64 (unsigned char __CF, unsigned long long __X,
>>   }
>>
>>   extern __inline unsigned char
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _addcarryx_u64 (unsigned char __CF, unsigned long long __X,
>>                  unsigned long long __Y, unsigned long long *__P)
>>   {
>> diff --git a/gcc/config/i386/bmi2intrin.h b/gcc/config/i386/bmi2intrin.h
>> index 6b23e4e98a1..7f64e5a8ff1 100644
>> --- a/gcc/config/i386/bmi2intrin.h
>> +++ b/gcc/config/i386/bmi2intrin.h
>> @@ -35,21 +35,24 @@
>>   #endif /* __BMI2__ */
>>
>>   extern __inline unsigned int
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _bzhi_u32 (unsigned int __X, unsigned int __Y)
>>   {
>>     return __builtin_ia32_bzhi_si (__X, __Y);
>>   }
>>
>>   extern __inline unsigned int
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _pdep_u32 (unsigned int __X, unsigned int __Y)
>>   {
>>     return __builtin_ia32_pdep_si (__X, __Y);
>>   }
>>
>>   extern __inline unsigned int
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _pext_u32 (unsigned int __X, unsigned int __Y)
>>   {
>>     return __builtin_ia32_pext_si (__X, __Y);
>> @@ -58,28 +61,32 @@ _pext_u32 (unsigned int __X, unsigned int __Y)
>>   #ifdef  __x86_64__
>>
>>   extern __inline unsigned long long
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _bzhi_u64 (unsigned long long __X, unsigned long long __Y)
>>   {
>>     return __builtin_ia32_bzhi_di (__X, __Y);
>>   }
>>
>>   extern __inline unsigned long long
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _pdep_u64 (unsigned long long __X, unsigned long long __Y)
>>   {
>>     return __builtin_ia32_pdep_di (__X, __Y);
>>   }
>>
>>   extern __inline unsigned long long
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _pext_u64 (unsigned long long __X, unsigned long long __Y)
>>   {
>>     return __builtin_ia32_pext_di (__X, __Y);
>>   }
>>
>>   extern __inline unsigned long long
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _mulx_u64 (unsigned long long __X, unsigned long long __Y,
>>             unsigned long long *__P)
>>   {
>> @@ -91,7 +98,8 @@ _mulx_u64 (unsigned long long __X, unsigned long long __Y,
>>   #else /* !__x86_64__ */
>>
>>   extern __inline unsigned int
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _mulx_u32 (unsigned int __X, unsigned int __Y, unsigned int *__P)
>>   {
>>     unsigned long long __res = (unsigned long long) __X * __Y;
>> diff --git a/gcc/config/i386/bmiintrin.h b/gcc/config/i386/bmiintrin.h
>> index 439d81cba11..18b5d7b0734 100644
>> --- a/gcc/config/i386/bmiintrin.h
>> +++ b/gcc/config/i386/bmiintrin.h
>> @@ -34,73 +34,97 @@
>>   #define __DISABLE_BMI__
>>   #endif /* __BMI__ */
>>
>> -extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned short
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __tzcnt_u16 (unsigned short __X)
>>   {
>>     return __builtin_ia32_tzcnt_u16 (__X);
>>   }
>>
>> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned int
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __andn_u32 (unsigned int __X, unsigned int __Y)
>>   {
>>     return ~__X & __Y;
>>   }
>>
>> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned int
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __bextr_u32 (unsigned int __X, unsigned int __Y)
>>   {
>>     return __builtin_ia32_bextr_u32 (__X, __Y);
>>   }
>>
>> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned int
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _bextr_u32 (unsigned int __X, unsigned int __Y, unsigned __Z)
>>   {
>>     return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
>>   }
>>
>> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned int
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __blsi_u32 (unsigned int __X)
>>   {
>>     return __X & -__X;
>>   }
>>
>> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned int
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _blsi_u32 (unsigned int __X)
>>   {
>>     return __blsi_u32 (__X);
>>   }
>>
>> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned int
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __blsmsk_u32 (unsigned int __X)
>>   {
>>     return __X ^ (__X - 1);
>>   }
>>
>> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned int
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _blsmsk_u32 (unsigned int __X)
>>   {
>>     return __blsmsk_u32 (__X);
>>   }
>>
>> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned int
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __blsr_u32 (unsigned int __X)
>>   {
>>     return __X & (__X - 1);
>>   }
>>
>> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned int
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _blsr_u32 (unsigned int __X)
>>   {
>>     return __blsr_u32 (__X);
>>   }
>>
>> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned int
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __tzcnt_u32 (unsigned int __X)
>>   {
>>     return __builtin_ia32_tzcnt_u32 (__X);
>>   }
>>
>> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned int
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _tzcnt_u32 (unsigned int __X)
>>   {
>>     return __builtin_ia32_tzcnt_u32 (__X);
>> @@ -108,67 +132,89 @@ _tzcnt_u32 (unsigned int __X)
>>
>>
>>   #ifdef  __x86_64__
>> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned long long
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __andn_u64 (unsigned long long __X, unsigned long long __Y)
>>   {
>>     return ~__X & __Y;
>>   }
>>
>> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned long long
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __bextr_u64 (unsigned long long __X, unsigned long long __Y)
>>   {
>>     return __builtin_ia32_bextr_u64 (__X, __Y);
>>   }
>>
>> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned long long
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _bextr_u64 (unsigned long long __X, unsigned int __Y, unsigned int __Z)
>>   {
>>     return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
>>   }
>>
>> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned long long
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __blsi_u64 (unsigned long long __X)
>>   {
>>     return __X & -__X;
>>   }
>>
>> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned long long
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _blsi_u64 (unsigned long long __X)
>>   {
>>     return __blsi_u64 (__X);
>>   }
>>
>> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned long long
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __blsmsk_u64 (unsigned long long __X)
>>   {
>>     return __X ^ (__X - 1);
>>   }
>>
>> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned long long
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _blsmsk_u64 (unsigned long long __X)
>>   {
>>     return __blsmsk_u64 (__X);
>>   }
>>
>> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned long long
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __blsr_u64 (unsigned long long __X)
>>   {
>>     return __X & (__X - 1);
>>   }
>>
>> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned long long
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _blsr_u64 (unsigned long long __X)
>>   {
>>     return __blsr_u64 (__X);
>>   }
>>
>> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned long long
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __tzcnt_u64 (unsigned long long __X)
>>   {
>>     return __builtin_ia32_tzcnt_u64 (__X);
>>   }
>>
>> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned long long
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _tzcnt_u64 (unsigned long long __X)
>>   {
>>     return __builtin_ia32_tzcnt_u64 (__X);
>> diff --git a/gcc/config/i386/cetintrin.h b/gcc/config/i386/cetintrin.h
>> index 803c6283bec..145bd3ce7d2 100644
>> --- a/gcc/config/i386/cetintrin.h
>> +++ b/gcc/config/i386/cetintrin.h
>> @@ -36,14 +36,16 @@
>>
>>   #ifdef __x86_64__
>>   extern __inline unsigned long long
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _get_ssp (void)
>>   {
>>     return __builtin_ia32_rdsspq ();
>>   }
>>   #else
>>   extern __inline unsigned int
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _get_ssp (void)
>>   {
>>     return __builtin_ia32_rdsspd ();
>> @@ -51,7 +53,8 @@ _get_ssp (void)
>>   #endif
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _inc_ssp (unsigned int __B)
>>   {
>>   #ifdef __x86_64__
>> @@ -62,21 +65,24 @@ _inc_ssp (unsigned int __B)
>>   }
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _saveprevssp (void)
>>   {
>>     __builtin_ia32_saveprevssp ();
>>   }
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _rstorssp (void *__B)
>>   {
>>     __builtin_ia32_rstorssp (__B);
>>   }
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _wrssd (unsigned int __B, void *__C)
>>   {
>>     __builtin_ia32_wrssd (__B, __C);
>> @@ -84,7 +90,8 @@ _wrssd (unsigned int __B, void *__C)
>>
>>   #ifdef __x86_64__
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _wrssq (unsigned long long __B, void *__C)
>>   {
>>     __builtin_ia32_wrssq (__B, __C);
>> @@ -92,7 +99,8 @@ _wrssq (unsigned long long __B, void *__C)
>>   #endif
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _wrussd (unsigned int __B, void *__C)
>>   {
>>     __builtin_ia32_wrussd (__B, __C);
>> @@ -100,7 +108,8 @@ _wrussd (unsigned int __B, void *__C)
>>
>>   #ifdef __x86_64__
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _wrussq (unsigned long long __B, void *__C)
>>   {
>>     __builtin_ia32_wrussq (__B, __C);
>> @@ -108,14 +117,16 @@ _wrussq (unsigned long long __B, void *__C)
>>   #endif
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _setssbsy (void)
>>   {
>>     __builtin_ia32_setssbsy ();
>>   }
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _clrssbsy (void *__B)
>>   {
>>     __builtin_ia32_clrssbsy (__B);
>> diff --git a/gcc/config/i386/cldemoteintrin.h b/gcc/config/i386/cldemoteintrin.h
>> index 67dddaf2b89..897a2db9e41 100644
>> --- a/gcc/config/i386/cldemoteintrin.h
>> +++ b/gcc/config/i386/cldemoteintrin.h
>> @@ -34,7 +34,8 @@
>>   #define __DISABLE_CLDEMOTE__
>>   #endif /* __CLDEMOTE__ */
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _cldemote (void *__A)
>>   {
>>     __builtin_ia32_cldemote (__A);
>> diff --git a/gcc/config/i386/clflushoptintrin.h b/gcc/config/i386/clflushoptintrin.h
>> index d8b55762158..3bd91d00681 100644
>> --- a/gcc/config/i386/clflushoptintrin.h
>> +++ b/gcc/config/i386/clflushoptintrin.h
>> @@ -35,7 +35,8 @@
>>   #endif /* __CLFLUSHOPT__ */
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _mm_clflushopt (void *__A)
>>   {
>>     __builtin_ia32_clflushopt (__A);
>> diff --git a/gcc/config/i386/clwbintrin.h b/gcc/config/i386/clwbintrin.h
>> index 21134429a40..2ff40066ef9 100644
>> --- a/gcc/config/i386/clwbintrin.h
>> +++ b/gcc/config/i386/clwbintrin.h
>> @@ -35,7 +35,8 @@
>>   #endif /* __CLWB__ */
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _mm_clwb (void *__A)
>>   {
>>     __builtin_ia32_clwb (__A);
>> diff --git a/gcc/config/i386/clzerointrin.h b/gcc/config/i386/clzerointrin.h
>> index f9095160409..12930e387c3 100644
>> --- a/gcc/config/i386/clzerointrin.h
>> +++ b/gcc/config/i386/clzerointrin.h
>> @@ -30,7 +30,9 @@
>>   #define __DISABLE_CLZERO__
>>   #endif /* __CLZERO__ */
>>
>> -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline void
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _mm_clzero (void * __I)
>>   {
>>     __builtin_ia32_clzero (__I);
>> diff --git a/gcc/config/i386/enqcmdintrin.h b/gcc/config/i386/enqcmdintrin.h
>> index 2518df18db1..7f3d769c23f 100644
>> --- a/gcc/config/i386/enqcmdintrin.h
>> +++ b/gcc/config/i386/enqcmdintrin.h
>> @@ -35,14 +35,16 @@
>>   #endif /* __ENQCMD__ */
>>
>>   extern __inline int
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _enqcmd (void * __P, const void * __Q)
>>   {
>>     return __builtin_ia32_enqcmd (__P, __Q);
>>   }
>>
>>   extern __inline int
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _enqcmds (void * __P, const void * __Q)
>>   {
>>     return __builtin_ia32_enqcmds (__P, __Q);
>> diff --git a/gcc/config/i386/fxsrintrin.h b/gcc/config/i386/fxsrintrin.h
>> index fd2e538eb9c..a80654968eb 100644
>> --- a/gcc/config/i386/fxsrintrin.h
>> +++ b/gcc/config/i386/fxsrintrin.h
>> @@ -35,14 +35,16 @@
>>   #endif /* __FXSR__ */
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _fxsave (void *__P)
>>   {
>>     __builtin_ia32_fxsave (__P);
>>   }
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _fxrstor (void *__P)
>>   {
>>     __builtin_ia32_fxrstor (__P);
>> @@ -50,14 +52,16 @@ _fxrstor (void *__P)
>>
>>   #ifdef __x86_64__
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _fxsave64 (void *__P)
>>   {
>>     __builtin_ia32_fxsave64 (__P);
>>   }
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _fxrstor64 (void *__P)
>>   {
>>     __builtin_ia32_fxrstor64 (__P);
>> diff --git a/gcc/config/i386/hresetintrin.h b/gcc/config/i386/hresetintrin.h
>> index 500618825c9..eba09a9010f 100644
>> --- a/gcc/config/i386/hresetintrin.h
>> +++ b/gcc/config/i386/hresetintrin.h
>> @@ -35,7 +35,8 @@
>>   #endif /* __HRESET__ */
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _hreset (unsigned int __EAX)
>>   {
>>     __builtin_ia32_hreset (__EAX);
>> diff --git a/gcc/config/i386/i386-options.c b/gcc/config/i386/i386-options.c
>> index 91da2849c49..559f9357811 100644
>> --- a/gcc/config/i386/i386-options.c
>> +++ b/gcc/config/i386/i386-options.c
>> @@ -3961,6 +3961,8 @@ const struct attribute_spec ix86_attribute_table[] =
>>       ix86_handle_fentry_name, NULL },
>>     { "cf_check", 0, 0, true, false, false, false,
>>       ix86_handle_fndecl_attribute, NULL },
>> +  { "general_regs_only", 0, 0, true, false, false, false,
>> +    ix86_handle_fndecl_attribute, NULL },
>>
>>     /* End element.  */
>>     { NULL, 0, 0, false, false, false, false, NULL, NULL }
>> diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
>> index 7c41302c75b..201a001e95a 100644
>> --- a/gcc/config/i386/i386.c
>> +++ b/gcc/config/i386/i386.c
>> @@ -553,7 +553,7 @@ ix86_can_inline_p (tree caller, tree callee)
>>
>>     /* Changes of those flags can be tolerated for always inlines. Lets hope
>>        user knows what he is doing.  */
>> -  const unsigned HOST_WIDE_INT always_inline_safe_mask
>> +  unsigned HOST_WIDE_INT always_inline_safe_mask
>>           = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
>>              | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
>>              | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
>> @@ -579,13 +579,32 @@ ix86_can_inline_p (tree caller, tree callee)
>>                              DECL_ATTRIBUTES (callee)));
>>
>>     cgraph_node *callee_node = cgraph_node::get (callee);
>> +
>> +  HOST_WIDE_INT callee_integer_isa_flags
>> +    = callee_opts->x_ix86_isa_flags;
>> +  HOST_WIDE_INT callee_integer_isa_flags2
>> +    = callee_opts->x_ix86_isa_flags2;
>> +
>> +  if (lookup_attribute ("general_regs_only",
>> +                       DECL_ATTRIBUTES (callee)))
>> +    {
>> +      /* For general purpose register only function, callee's
>> +        integer ISA options should be a subset of the caller's
>> +        integer ISA options.  */
>> +      always_inline_safe_mask |= MASK_80387;
>> +      callee_integer_isa_flags
>> +       &= ~OPTION_MASK_ISA_GENERAL_REGS_ONLY_UNSET;
>> +      callee_integer_isa_flags2
>> +       &= ~OPTION_MASK_ISA2_GENERAL_REGS_ONLY_UNSET;
>> +    }
>> +
>>     /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
>>        function can inline a SSE2 function but a SSE2 function can't inline
>>        a SSE4 function.  */
>> -  if (((caller_opts->x_ix86_isa_flags & callee_opts->x_ix86_isa_flags)
>> -       != callee_opts->x_ix86_isa_flags)
>> -      || ((caller_opts->x_ix86_isa_flags2 & callee_opts->x_ix86_isa_flags2)
>> -         != callee_opts->x_ix86_isa_flags2))
>> +  if (((caller_opts->x_ix86_isa_flags & callee_integer_isa_flags)
>> +       != callee_integer_isa_flags)
>> +      || ((caller_opts->x_ix86_isa_flags2 & callee_integer_isa_flags2)
>> +         != callee_integer_isa_flags2))
>>       ret = false;
>>
>>     /* See if we have the same non-isa options.  */
>> diff --git a/gcc/config/i386/ia32intrin.h b/gcc/config/i386/ia32intrin.h
>> index 591394076cc..908eb44b0d7 100644
>> --- a/gcc/config/i386/ia32intrin.h
>> +++ b/gcc/config/i386/ia32intrin.h
>> @@ -27,7 +27,8 @@
>>
>>   /* 32bit bsf */
>>   extern __inline int
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __bsfd (int __X)
>>   {
>>     return __builtin_ctz (__X);
>> @@ -35,7 +36,8 @@ __bsfd (int __X)
>>
>>   /* 32bit bsr */
>>   extern __inline int
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __bsrd (int __X)
>>   {
>>     return __builtin_ia32_bsrsi (__X);
>> @@ -43,7 +45,8 @@ __bsrd (int __X)
>>
>>   /* 32bit bswap */
>>   extern __inline int
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __bswapd (int __X)
>>   {
>>     return __builtin_bswap32 (__X);
>> @@ -88,7 +91,8 @@ __crc32d (unsigned int __C, unsigned int __V)
>>
>>   /* 32bit popcnt */
>>   extern __inline int
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __popcntd (unsigned int __X)
>>   {
>>     return __builtin_popcount (__X);
>> @@ -98,7 +102,8 @@ __popcntd (unsigned int __X)
>>
>>   /* rdpmc */
>>   extern __inline unsigned long long
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __rdpmc (int __S)
>>   {
>>     return __builtin_ia32_rdpmc (__S);
>> @@ -107,18 +112,31 @@ __rdpmc (int __S)
>>   #endif /* __iamcu__ */
>>
>>   /* rdtsc */
>> -#define __rdtsc()              __builtin_ia32_rdtsc ()
>> +extern __inline unsigned long long
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>> +__rdtsc (void)
>> +{
>> +  return __builtin_ia32_rdtsc ();
>> +}
>>
>>   #ifndef __iamcu__
>>
>>   /* rdtscp */
>> -#define __rdtscp(a)            __builtin_ia32_rdtscp (a)
>> +extern __inline unsigned long long
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>> +__rdtscp (unsigned int *__A)
>> +{
>> +  return __builtin_ia32_rdtscp (__A);
>> +}
>>
>>   #endif /* __iamcu__ */
>>
>>   /* 8bit rol */
>>   extern __inline unsigned char
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __rolb (unsigned char __X, int __C)
>>   {
>>     return __builtin_ia32_rolqi (__X, __C);
>> @@ -126,7 +144,8 @@ __rolb (unsigned char __X, int __C)
>>
>>   /* 16bit rol */
>>   extern __inline unsigned short
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __rolw (unsigned short __X, int __C)
>>   {
>>     return __builtin_ia32_rolhi (__X, __C);
>> @@ -134,7 +153,8 @@ __rolw (unsigned short __X, int __C)
>>
>>   /* 32bit rol */
>>   extern __inline unsigned int
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __rold (unsigned int __X, int __C)
>>   {
>>     __C &= 31;
>> @@ -143,7 +163,8 @@ __rold (unsigned int __X, int __C)
>>
>>   /* 8bit ror */
>>   extern __inline unsigned char
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __rorb (unsigned char __X, int __C)
>>   {
>>     return __builtin_ia32_rorqi (__X, __C);
>> @@ -151,7 +172,8 @@ __rorb (unsigned char __X, int __C)
>>
>>   /* 16bit ror */
>>   extern __inline unsigned short
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __rorw (unsigned short __X, int __C)
>>   {
>>     return __builtin_ia32_rorhi (__X, __C);
>> @@ -159,7 +181,8 @@ __rorw (unsigned short __X, int __C)
>>
>>   /* 32bit ror */
>>   extern __inline unsigned int
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __rord (unsigned int __X, int __C)
>>   {
>>     __C &= 31;
>> @@ -168,7 +191,8 @@ __rord (unsigned int __X, int __C)
>>
>>   /* Pause */
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __pause (void)
>>   {
>>     __builtin_ia32_pause ();
>> @@ -177,7 +201,8 @@ __pause (void)
>>   #ifdef __x86_64__
>>   /* 64bit bsf */
>>   extern __inline int
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __bsfq (long long __X)
>>   {
>>     return __builtin_ctzll (__X);
>> @@ -185,7 +210,8 @@ __bsfq (long long __X)
>>
>>   /* 64bit bsr */
>>   extern __inline int
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __bsrq (long long __X)
>>   {
>>     return __builtin_ia32_bsrdi (__X);
>> @@ -193,7 +219,8 @@ __bsrq (long long __X)
>>
>>   /* 64bit bswap */
>>   extern __inline long long
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __bswapq (long long __X)
>>   {
>>     return __builtin_bswap64 (__X);
>> @@ -220,7 +247,8 @@ __crc32q (unsigned long long __C, unsigned long long __V)
>>
>>   /* 64bit popcnt */
>>   extern __inline long long
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __popcntq (unsigned long long __X)
>>   {
>>     return __builtin_popcountll (__X);
>> @@ -228,7 +256,8 @@ __popcntq (unsigned long long __X)
>>
>>   /* 64bit rol */
>>   extern __inline unsigned long long
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __rolq (unsigned long long __X, int __C)
>>   {
>>     __C &= 63;
>> @@ -237,7 +266,8 @@ __rolq (unsigned long long __X, int __C)
>>
>>   /* 64bit ror */
>>   extern __inline unsigned long long
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __rorq (unsigned long long __X, int __C)
>>   {
>>     __C &= 63;
>> @@ -246,7 +276,8 @@ __rorq (unsigned long long __X, int __C)
>>
>>   /* Read flags register */
>>   extern __inline unsigned long long
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __readeflags (void)
>>   {
>>     return __builtin_ia32_readeflags_u64 ();
>> @@ -254,7 +285,8 @@ __readeflags (void)
>>
>>   /* Write flags register */
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __writeeflags (unsigned long long __X)
>>   {
>>     __builtin_ia32_writeeflags_u64 (__X);
>> @@ -266,7 +298,8 @@ __writeeflags (unsigned long long __X)
>>
>>   /* Read flags register */
>>   extern __inline unsigned int
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __readeflags (void)
>>   {
>>     return __builtin_ia32_readeflags_u32 ();
>> @@ -274,7 +307,8 @@ __readeflags (void)
>>
>>   /* Write flags register */
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __writeeflags (unsigned int __X)
>>   {
>>     __builtin_ia32_writeeflags_u32 (__X);
>> diff --git a/gcc/config/i386/lwpintrin.h b/gcc/config/i386/lwpintrin.h
>> index 1a7465b2f22..893a4313a68 100644
>> --- a/gcc/config/i386/lwpintrin.h
>> +++ b/gcc/config/i386/lwpintrin.h
>> @@ -34,27 +34,35 @@
>>   #define __DISABLE_LWP__
>>   #endif /* __LWP__ */
>>
>> -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline void
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __llwpcb (void *__pcbAddress)
>>   {
>>     __builtin_ia32_llwpcb (__pcbAddress);
>>   }
>>
>> -extern __inline void * __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline void *
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __slwpcb (void)
>>   {
>>     return __builtin_ia32_slwpcb ();
>>   }
>>
>>   #ifdef __OPTIMIZE__
>> -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline void
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __lwpval32 (unsigned int __data2, unsigned int __data1, unsigned int __flags)
>>   {
>>     __builtin_ia32_lwpval32 (__data2, __data1, __flags);
>>   }
>>
>>   #ifdef __x86_64__
>> -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline void
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __lwpval64 (unsigned long long __data2, unsigned int __data1,
>>              unsigned int __flags)
>>   {
>> @@ -74,14 +82,18 @@ __lwpval64 (unsigned long long __data2, unsigned int __data1,
>>
>>
>>   #ifdef __OPTIMIZE__
>> -extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned char
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __lwpins32 (unsigned int __data2, unsigned int __data1, unsigned int __flags)
>>   {
>>     return __builtin_ia32_lwpins32 (__data2, __data1, __flags);
>>   }
>>
>>   #ifdef __x86_64__
>> -extern __inline unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned char
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __lwpins64 (unsigned long long __data2, unsigned int __data1,
>>              unsigned int __flags)
>>   {
>> diff --git a/gcc/config/i386/lzcntintrin.h b/gcc/config/i386/lzcntintrin.h
>> index cfa2719c044..864bdf67698 100644
>> --- a/gcc/config/i386/lzcntintrin.h
>> +++ b/gcc/config/i386/lzcntintrin.h
>> @@ -35,32 +35,42 @@
>>   #define __DISABLE_LZCNT__
>>   #endif /* __LZCNT__ */
>>
>> -extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned short
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __lzcnt16 (unsigned short __X)
>>   {
>>     return __builtin_ia32_lzcnt_u16 (__X);
>>   }
>>
>> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned int
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __lzcnt32 (unsigned int __X)
>>   {
>>     return __builtin_ia32_lzcnt_u32 (__X);
>>   }
>>
>> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned int
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _lzcnt_u32 (unsigned int __X)
>>   {
>>     return __builtin_ia32_lzcnt_u32 (__X);
>>   }
>>
>>   #ifdef __x86_64__
>> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned long long
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __lzcnt64 (unsigned long long __X)
>>   {
>>     return __builtin_ia32_lzcnt_u64 (__X);
>>   }
>>
>> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned long long
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _lzcnt_u64 (unsigned long long __X)
>>   {
>>     return __builtin_ia32_lzcnt_u64 (__X);
>> diff --git a/gcc/config/i386/movdirintrin.h b/gcc/config/i386/movdirintrin.h
>> index c50fe40b937..e6ba84f39c8 100644
>> --- a/gcc/config/i386/movdirintrin.h
>> +++ b/gcc/config/i386/movdirintrin.h
>> @@ -35,14 +35,16 @@
>>   #endif /* __MOVDIRI__ */
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _directstoreu_u32 (void * __P, unsigned int __A)
>>   {
>>     __builtin_ia32_directstoreu_u32 ((unsigned int *)__P, __A);
>>   }
>>   #ifdef __x86_64__
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _directstoreu_u64 (void * __P, unsigned long long __A)
>>   {
>>     __builtin_ia32_directstoreu_u64 ((unsigned long long *)__P, __A);
>> @@ -61,7 +63,8 @@ _directstoreu_u64 (void * __P, unsigned long long __A)
>>   #endif /* __MOVDIR64B__ */
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _movdir64b (void * __P, const void * __Q)
>>   {
>>     __builtin_ia32_movdir64b (__P, __Q);
>> diff --git a/gcc/config/i386/mwaitxintrin.h b/gcc/config/i386/mwaitxintrin.h
>> index ad8afba4c28..0c9505bb2f6 100644
>> --- a/gcc/config/i386/mwaitxintrin.h
>> +++ b/gcc/config/i386/mwaitxintrin.h
>> @@ -30,13 +30,17 @@
>>   #define __DISABLE_MWAITX__
>>   #endif /* __MWAITX__ */
>>
>> -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline void
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _mm_monitorx (void const * __P, unsigned int __E, unsigned int __H)
>>   {
>>     __builtin_ia32_monitorx (__P, __E, __H);
>>   }
>>
>> -extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline void
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _mm_mwaitx (unsigned int __E, unsigned int __H, unsigned int __C)
>>   {
>>     __builtin_ia32_mwaitx (__E, __H, __C);
>> diff --git a/gcc/config/i386/pconfigintrin.h b/gcc/config/i386/pconfigintrin.h
>> index 5346cbd78cb..f8f6279c586 100644
>> --- a/gcc/config/i386/pconfigintrin.h
>> +++ b/gcc/config/i386/pconfigintrin.h
>> @@ -47,7 +47,8 @@
>>          : "cc")
>>
>>   extern __inline unsigned int
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _pconfig_u32 (const unsigned int __L, size_t __D[])
>>   {
>>     enum __pconfig_type
>> diff --git a/gcc/config/i386/pkuintrin.h b/gcc/config/i386/pkuintrin.h
>> index cd5638fa035..6e59617a0ce 100644
>> --- a/gcc/config/i386/pkuintrin.h
>> +++ b/gcc/config/i386/pkuintrin.h
>> @@ -35,14 +35,16 @@
>>   #endif /* __PKU__ */
>>
>>   extern __inline unsigned int
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _rdpkru_u32 (void)
>>   {
>>     return __builtin_ia32_rdpkru ();
>>   }
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _wrpkru (unsigned int __key)
>>   {
>>     __builtin_ia32_wrpkru (__key);
>> diff --git a/gcc/config/i386/popcntintrin.h b/gcc/config/i386/popcntintrin.h
>> index 84876562640..640de9db733 100644
>> --- a/gcc/config/i386/popcntintrin.h
>> +++ b/gcc/config/i386/popcntintrin.h
>> @@ -31,14 +31,18 @@
>>   #endif /* __POPCNT__ */
>>
>>   /* Calculate a number of bits set to 1.  */
>> -extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline int
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _mm_popcnt_u32 (unsigned int __X)
>>   {
>>     return __builtin_popcount (__X);
>>   }
>>
>>   #ifdef __x86_64__
>> -extern __inline long long  __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline long long
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _mm_popcnt_u64 (unsigned long long __X)
>>   {
>>     return __builtin_popcountll (__X);
>> diff --git a/gcc/config/i386/rdseedintrin.h b/gcc/config/i386/rdseedintrin.h
>> index 1badab7018c..0dc5fadce6a 100644
>> --- a/gcc/config/i386/rdseedintrin.h
>> +++ b/gcc/config/i386/rdseedintrin.h
>> @@ -36,14 +36,16 @@
>>
>>
>>   extern __inline int
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _rdseed16_step (unsigned short *__p)
>>   {
>>     return __builtin_ia32_rdseed_hi_step (__p);
>>   }
>>
>>   extern __inline int
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _rdseed32_step (unsigned int *__p)
>>   {
>>     return __builtin_ia32_rdseed_si_step (__p);
>> @@ -51,7 +53,8 @@ _rdseed32_step (unsigned int *__p)
>>
>>   #ifdef __x86_64__
>>   extern __inline int
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _rdseed64_step (unsigned long long *__p)
>>   {
>>     return __builtin_ia32_rdseed_di_step (__p);
>> diff --git a/gcc/config/i386/rtmintrin.h b/gcc/config/i386/rtmintrin.h
>> index 5b2ac767737..33aadcfec61 100644
>> --- a/gcc/config/i386/rtmintrin.h
>> +++ b/gcc/config/i386/rtmintrin.h
>> @@ -46,7 +46,8 @@
>>   /* Start an RTM code region.  Return _XBEGIN_STARTED on success and the
>>      abort condition otherwise.  */
>>   extern __inline unsigned int
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _xbegin (void)
>>   {
>>     return __builtin_ia32_xbegin ();
>> @@ -57,7 +58,8 @@ _xbegin (void)
>>      commit fails, then control is transferred to the outermost transaction
>>      fallback handler.  */
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _xend (void)
>>   {
>>     __builtin_ia32_xend ();
>> @@ -67,7 +69,8 @@ _xend (void)
>>      outermost transaction fallback handler with the abort condition IMM.  */
>>   #ifdef __OPTIMIZE__
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _xabort (const unsigned int __imm)
>>   {
>>     __builtin_ia32_xabort (__imm);
>> diff --git a/gcc/config/i386/serializeintrin.h b/gcc/config/i386/serializeintrin.h
>> index e280250b198..dd27e6c7a81 100644
>> --- a/gcc/config/i386/serializeintrin.h
>> +++ b/gcc/config/i386/serializeintrin.h
>> @@ -34,7 +34,13 @@
>>   #define __DISABLE_SERIALIZE__
>>   #endif /* __SERIALIZE__ */
>>
>> -#define _serialize()   __builtin_ia32_serialize ()
>> +extern __inline void
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>> +_serialize (void)
>> +{
>> +  __builtin_ia32_serialize ();
>> +}
>>
>>   #ifdef __DISABLE_SERIALIZE__
>>   #undef __DISABLE_SERIALIZE__
>> diff --git a/gcc/config/i386/sgxintrin.h b/gcc/config/i386/sgxintrin.h
>> index 152be6a37ed..264214af972 100644
>> --- a/gcc/config/i386/sgxintrin.h
>> +++ b/gcc/config/i386/sgxintrin.h
>> @@ -108,7 +108,8 @@
>>             : "cc")
>>
>>   extern __inline unsigned int
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _encls_u32 (const unsigned int __L, size_t __D[])
>>   {
>>     enum __encls_type
>> @@ -175,7 +176,8 @@ _encls_u32 (const unsigned int __L, size_t __D[])
>>   }
>>
>>   extern __inline unsigned int
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _enclu_u32 (const unsigned int __L, size_t __D[])
>>   {
>>     enum __enclu_type
>> @@ -218,7 +220,8 @@ _enclu_u32 (const unsigned int __L, size_t __D[])
>>   }
>>
>>   extern __inline unsigned int
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _enclv_u32 (const unsigned int __L, size_t __D[])
>>   {
>>     enum __enclv_type
>> diff --git a/gcc/config/i386/tbmintrin.h b/gcc/config/i386/tbmintrin.h
>> index 971d1f36aff..bc9d3269515 100644
>> --- a/gcc/config/i386/tbmintrin.h
>> +++ b/gcc/config/i386/tbmintrin.h
>> @@ -35,7 +35,9 @@
>>   #endif /* __TBM__ */
>>
>>   #ifdef __OPTIMIZE__
>> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned int
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __bextri_u32 (unsigned int __X, const unsigned int __I)
>>   {
>>     return __builtin_ia32_bextri_u32 (__X, __I);
>> @@ -46,55 +48,73 @@ __bextri_u32 (unsigned int __X, const unsigned int __I)
>>                                              (unsigned int)(I)))
>>   #endif /*__OPTIMIZE__ */
>>
>> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned int
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __blcfill_u32 (unsigned int __X)
>>   {
>>     return __X & (__X + 1);
>>   }
>>
>> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned int
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __blci_u32 (unsigned int __X)
>>   {
>>     return __X | ~(__X + 1);
>>   }
>>
>> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned int
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __blcic_u32 (unsigned int __X)
>>   {
>>     return ~__X & (__X + 1);
>>   }
>>
>> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned int
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __blcmsk_u32 (unsigned int __X)
>>   {
>>     return __X ^ (__X + 1);
>>   }
>>
>> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned int
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __blcs_u32 (unsigned int __X)
>>   {
>>     return __X | (__X + 1);
>>   }
>>
>> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned int
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __blsfill_u32 (unsigned int __X)
>>   {
>>     return __X | (__X - 1);
>>   }
>>
>> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned int
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __blsic_u32 (unsigned int __X)
>>   {
>>     return ~__X | (__X - 1);
>>   }
>>
>> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned int
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __t1mskc_u32 (unsigned int __X)
>>   {
>>     return ~__X | (__X + 1);
>>   }
>>
>> -extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned int
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __tzmsk_u32 (unsigned int __X)
>>   {
>>     return ~__X & (__X - 1);
>> @@ -104,7 +124,9 @@ __tzmsk_u32 (unsigned int __X)
>>
>>   #ifdef __x86_64__
>>   #ifdef __OPTIMIZE__
>> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned long long
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __bextri_u64 (unsigned long long __X, const unsigned int __I)
>>   {
>>     return __builtin_ia32_bextri_u64 (__X, __I);
>> @@ -115,55 +137,73 @@ __bextri_u64 (unsigned long long __X, const unsigned int __I)
>>                                                    (unsigned long long)(I)))
>>   #endif /*__OPTIMIZE__ */
>>
>> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned long long
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __blcfill_u64 (unsigned long long __X)
>>   {
>>     return __X & (__X + 1);
>>   }
>>
>> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned long long
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __blci_u64 (unsigned long long __X)
>>   {
>>     return __X | ~(__X + 1);
>>   }
>>
>> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned long long
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __blcic_u64 (unsigned long long __X)
>>   {
>>     return ~__X & (__X + 1);
>>   }
>>
>> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned long long
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __blcmsk_u64 (unsigned long long __X)
>>   {
>>     return __X ^ (__X + 1);
>>   }
>>
>> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned long long
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __blcs_u64 (unsigned long long __X)
>>   {
>>     return __X | (__X + 1);
>>   }
>>
>> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned long long
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __blsfill_u64 (unsigned long long __X)
>>   {
>>     return __X | (__X - 1);
>>   }
>>
>> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned long long
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __blsic_u64 (unsigned long long __X)
>>   {
>>     return ~__X | (__X - 1);
>>   }
>>
>> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned long long
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __t1mskc_u64 (unsigned long long __X)
>>   {
>>     return ~__X | (__X + 1);
>>   }
>>
>> -extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +extern __inline unsigned long long
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   __tzmsk_u64 (unsigned long long __X)
>>   {
>>     return ~__X & (__X - 1);
>> diff --git a/gcc/config/i386/tsxldtrkintrin.h b/gcc/config/i386/tsxldtrkintrin.h
>> index bb42a8e89b9..32a0b87c43a 100644
>> --- a/gcc/config/i386/tsxldtrkintrin.h
>> +++ b/gcc/config/i386/tsxldtrkintrin.h
>> @@ -35,14 +35,16 @@
>>   #endif /* __TSXLDTRK__ */
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _xsusldtrk (void)
>>   {
>>     __builtin_ia32_xsusldtrk ();
>>   }
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _xresldtrk (void)
>>   {
>>     __builtin_ia32_xresldtrk ();
>> diff --git a/gcc/config/i386/uintrintrin.h b/gcc/config/i386/uintrintrin.h
>> index 2ff0cce9b49..d424bc22ba8 100644
>> --- a/gcc/config/i386/uintrintrin.h
>> +++ b/gcc/config/i386/uintrintrin.h
>> @@ -47,28 +47,32 @@ struct __uintr_frame
>>   };
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _clui (void)
>>   {
>>     __builtin_ia32_clui ();
>>   }
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _stui (void)
>>   {
>>     __builtin_ia32_stui ();
>>   }
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _senduipi (unsigned long long __R)
>>   {
>>     __builtin_ia32_senduipi (__R);
>>   }
>>
>>   extern __inline unsigned char
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _testui (void)
>>   {
>>     return __builtin_ia32_testui ();
>> diff --git a/gcc/config/i386/waitpkgintrin.h b/gcc/config/i386/waitpkgintrin.h
>> index a7a4d6a927d..a2d7b004545 100644
>> --- a/gcc/config/i386/waitpkgintrin.h
>> +++ b/gcc/config/i386/waitpkgintrin.h
>> @@ -35,21 +35,24 @@
>>   #endif /* __WAITPKG__ */
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _umonitor (void *__A)
>>   {
>>     __builtin_ia32_umonitor (__A);
>>   }
>>
>>   extern __inline unsigned char
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _umwait (unsigned int __A, unsigned long long __B)
>>   {
>>     return __builtin_ia32_umwait (__A, __B);
>>   }
>>
>>   extern __inline unsigned char
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _tpause (unsigned int __A, unsigned long long __B)
>>   {
>>     return __builtin_ia32_tpause (__A, __B);
>> diff --git a/gcc/config/i386/wbnoinvdintrin.h b/gcc/config/i386/wbnoinvdintrin.h
>> index 71dc1b6accb..6ba9ca01f27 100644
>> --- a/gcc/config/i386/wbnoinvdintrin.h
>> +++ b/gcc/config/i386/wbnoinvdintrin.h
>> @@ -35,7 +35,8 @@
>>   #endif /* __WBNOINVD__ */
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _wbnoinvd (void)
>>   {
>>     __builtin_ia32_wbnoinvd ();
>> diff --git a/gcc/config/i386/x86gprintrin.h b/gcc/config/i386/x86gprintrin.h
>> index ceda501252c..4289ff66cfd 100644
>> --- a/gcc/config/i386/x86gprintrin.h
>> +++ b/gcc/config/i386/x86gprintrin.h
>> @@ -95,7 +95,8 @@
>>   #include <hresetintrin.h>
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _wbinvd (void)
>>   {
>>     __builtin_ia32_wbinvd ();
>> @@ -107,14 +108,16 @@ _wbinvd (void)
>>   #define __DISABLE_RDRND__
>>   #endif /* __RDRND__ */
>>   extern __inline int
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _rdrand16_step (unsigned short *__P)
>>   {
>>     return __builtin_ia32_rdrand16_step (__P);
>>   }
>>
>>   extern __inline int
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _rdrand32_step (unsigned int *__P)
>>   {
>>     return __builtin_ia32_rdrand32_step (__P);
>> @@ -130,7 +133,8 @@ _rdrand32_step (unsigned int *__P)
>>   #define __DISABLE_RDPID__
>>   #endif /* __RDPID__ */
>>   extern __inline unsigned int
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _rdpid_u32 (void)
>>   {
>>     return __builtin_ia32_rdpid ();
>> @@ -148,56 +152,64 @@ _rdpid_u32 (void)
>>   #define __DISABLE_FSGSBASE__
>>   #endif /* __FSGSBASE__ */
>>   extern __inline unsigned int
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _readfsbase_u32 (void)
>>   {
>>     return __builtin_ia32_rdfsbase32 ();
>>   }
>>
>>   extern __inline unsigned long long
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _readfsbase_u64 (void)
>>   {
>>     return __builtin_ia32_rdfsbase64 ();
>>   }
>>
>>   extern __inline unsigned int
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _readgsbase_u32 (void)
>>   {
>>     return __builtin_ia32_rdgsbase32 ();
>>   }
>>
>>   extern __inline unsigned long long
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _readgsbase_u64 (void)
>>   {
>>     return __builtin_ia32_rdgsbase64 ();
>>   }
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _writefsbase_u32 (unsigned int __B)
>>   {
>>     __builtin_ia32_wrfsbase32 (__B);
>>   }
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _writefsbase_u64 (unsigned long long __B)
>>   {
>>     __builtin_ia32_wrfsbase64 (__B);
>>   }
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _writegsbase_u32 (unsigned int __B)
>>   {
>>     __builtin_ia32_wrgsbase32 (__B);
>>   }
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _writegsbase_u64 (unsigned long long __B)
>>   {
>>     __builtin_ia32_wrgsbase64 (__B);
>> @@ -213,7 +225,8 @@ _writegsbase_u64 (unsigned long long __B)
>>   #define __DISABLE_RDRND__
>>   #endif /* __RDRND__ */
>>   extern __inline int
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _rdrand64_step (unsigned long long *__P)
>>   {
>>     return __builtin_ia32_rdrand64_step (__P);
>> @@ -233,7 +246,8 @@ _rdrand64_step (unsigned long long *__P)
>>
>>   #ifdef __x86_64__
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _ptwrite64 (unsigned long long __B)
>>   {
>>     __builtin_ia32_ptwrite64 (__B);
>> @@ -241,7 +255,8 @@ _ptwrite64 (unsigned long long __B)
>>   #endif /* __x86_64__ */
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _ptwrite32 (unsigned __B)
>>   {
>>     __builtin_ia32_ptwrite32 (__B);
>> diff --git a/gcc/config/i386/xsavecintrin.h b/gcc/config/i386/xsavecintrin.h
>> index 45751a087bb..d0739cbd1cc 100644
>> --- a/gcc/config/i386/xsavecintrin.h
>> +++ b/gcc/config/i386/xsavecintrin.h
>> @@ -35,7 +35,8 @@
>>   #endif /* __XSAVEC__ */
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _xsavec (void *__P, long long __M)
>>   {
>>     __builtin_ia32_xsavec (__P, __M);
>> @@ -43,7 +44,8 @@ _xsavec (void *__P, long long __M)
>>
>>   #ifdef __x86_64__
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _xsavec64 (void *__P, long long __M)
>>   {
>>     __builtin_ia32_xsavec64 (__P, __M);
>> diff --git a/gcc/config/i386/xsaveintrin.h b/gcc/config/i386/xsaveintrin.h
>> index 56e6a1e527b..50d174fa2b0 100644
>> --- a/gcc/config/i386/xsaveintrin.h
>> +++ b/gcc/config/i386/xsaveintrin.h
>> @@ -35,28 +35,32 @@
>>   #endif /* __XSAVE__ */
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _xsave (void *__P, long long __M)
>>   {
>>     __builtin_ia32_xsave (__P, __M);
>>   }
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _xrstor (void *__P, long long __M)
>>   {
>>     __builtin_ia32_xrstor (__P, __M);
>>   }
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _xsetbv (unsigned int __A, long long __V)
>>   {
>>     __builtin_ia32_xsetbv (__A, __V);
>>   }
>>
>>   extern __inline long long
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _xgetbv (unsigned int __A)
>>   {
>>     return __builtin_ia32_xgetbv (__A);
>> @@ -64,14 +68,16 @@ _xgetbv (unsigned int __A)
>>
>>   #ifdef __x86_64__
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _xsave64 (void *__P, long long __M)
>>   {
>>     __builtin_ia32_xsave64 (__P, __M);
>>   }
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _xrstor64 (void *__P, long long __M)
>>   {
>>     __builtin_ia32_xrstor64 (__P, __M);
>> diff --git a/gcc/config/i386/xsaveoptintrin.h b/gcc/config/i386/xsaveoptintrin.h
>> index ba076cea51a..b5c25f94f95 100644
>> --- a/gcc/config/i386/xsaveoptintrin.h
>> +++ b/gcc/config/i386/xsaveoptintrin.h
>> @@ -35,7 +35,8 @@
>>   #endif /* __XSAVEOPT__ */
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _xsaveopt (void *__P, long long __M)
>>   {
>>     __builtin_ia32_xsaveopt (__P, __M);
>> @@ -43,7 +44,8 @@ _xsaveopt (void *__P, long long __M)
>>
>>   #ifdef __x86_64__
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _xsaveopt64 (void *__P, long long __M)
>>   {
>>     __builtin_ia32_xsaveopt64 (__P, __M);
>> diff --git a/gcc/config/i386/xsavesintrin.h b/gcc/config/i386/xsavesintrin.h
>> index 969835fed64..27cec8370ad 100644
>> --- a/gcc/config/i386/xsavesintrin.h
>> +++ b/gcc/config/i386/xsavesintrin.h
>> @@ -35,14 +35,16 @@
>>   #endif /* __XSAVES__ */
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _xsaves (void *__P, long long __M)
>>   {
>>     __builtin_ia32_xsaves (__P, __M);
>>   }
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _xrstors (void *__P, long long __M)
>>   {
>>     __builtin_ia32_xrstors (__P, __M);
>> @@ -50,14 +52,16 @@ _xrstors (void *__P, long long __M)
>>
>>   #ifdef __x86_64__
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _xrstors64 (void *__P, long long __M)
>>   {
>>     __builtin_ia32_xrstors64 (__P, __M);
>>   }
>>
>>   extern __inline void
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _xsaves64 (void *__P, long long __M)
>>   {
>>     __builtin_ia32_xsaves64 (__P, __M);
>> diff --git a/gcc/config/i386/xtestintrin.h b/gcc/config/i386/xtestintrin.h
>> index 39d18af6536..0eae87a1d43 100644
>> --- a/gcc/config/i386/xtestintrin.h
>> +++ b/gcc/config/i386/xtestintrin.h
>> @@ -37,7 +37,8 @@
>>   /* Return non-zero if the instruction executes inside an RTM or HLE code
>>      region.  Return zero otherwise.   */
>>   extern __inline int
>> -__attribute__((__gnu_inline__, __always_inline__, __artificial__))
>> +__attribute__((__gnu_inline__, __always_inline__, __artificial__,
>> +              __general_regs_only__))
>>   _xtest (void)
>>   {
>>     return __builtin_ia32_xtest ();
>> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
>> index 1ddafb3ff2c..7111eca62ff 100644
>> --- a/gcc/doc/extend.texi
>> +++ b/gcc/doc/extend.texi
>> @@ -7066,6 +7066,11 @@ On x86 targets, the @code{fentry_section} attribute sets the name
>>   of the section to record function entry instrumentation calls in when
>>   enabled with @option{-pg -mrecord-mcount}
>>
>> +@item general_regs_only
>> +@cindex @code{general_regs_only} function attribute, x86
>> +The @code{general_regs_only} attribute on functions is used to
>> +inform the compiler that functions use only general purpose registers.
>> +
>>   @end table
>>
>>   @node Xstormy16 Function Attributes
>> diff --git a/gcc/testsuite/gcc.target/i386/pr99744-3.c b/gcc/testsuite/gcc.target/i386/pr99744-3.c
>> new file mode 100644
>> index 00000000000..6c505816ceb
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/pr99744-3.c
>> @@ -0,0 +1,13 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -mno-serialize" } */
>> +
>> +#include <x86intrin.h>
>> +
>> +__attribute__ ((target("general-regs-only")))
>> +void
>> +foo1 (void)
>> +{
>> +  _serialize ();
>> +}
>> +
>> +/* { dg-error "target specific option mismatch" "" { target *-*-* } 0 } */
>> diff --git a/gcc/testsuite/gcc.target/i386/pr99744-4.c b/gcc/testsuite/gcc.target/i386/pr99744-4.c
>> new file mode 100644
>> index 00000000000..a17d4a2139b
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/i386/pr99744-4.c
>> @@ -0,0 +1,352 @@
>> +/* { dg-do compile } */
>> +/* { dg-options "-O2 -mbmi -mbmi2 -mcldemote -mclflushopt -mclwb -mclzero -menqcmd -mfsgsbase -mfxsr -mhreset -mlzcnt -mlwp -mmovdir64b -mmovdiri -mmwaitx -mpconfig -mpku -mpopcnt -mptwrite -mrdpid -mrdrnd -mrdseed -mrtm -msgx -mshstk -mtbm -mtsxldtrk -mxsave -mxsavec -mxsaveopt -mxsaves -mwaitpkg -mwbnoinvd" } */
>> +/* { dg-additional-options "-muintr" { target { ! ia32 } } }  */
>> +
>> +/* Test calling GPR intrinsics from functions with general-regs-only
>> +   target attribue.  */
>> +
>> +#include <x86gprintrin.h>
>> +
>> +#define _CONCAT(x,y) x ## y
>> +
>> +#define test_0(func, type)                                             \
>> +  __attribute__ ((target("general-regs-only")))                                \
>> +  type _CONCAT(do_,func) (void)                                                \
>> +  { return func (); }
>> +
>> +#define test_0_i1(func, type, imm)                                     \
>> +  __attribute__ ((target("general-regs-only")))                                \
>> +  type _CONCAT(do_,func) (void)                                                \
>> +  { return func (imm); }
>> +
>> +#define test_1(func, type, op1_type)                                   \
>> +  __attribute__ ((target("general-regs-only")))                                \
>> +  type _CONCAT(do_,func) (op1_type A)                                  \
>> +  { return func (A); }
>> +
>> +#define test_1_i1(func, type, op1_type, imm)                           \
>> +  __attribute__ ((target("general-regs-only")))                                \
>> +  type _CONCAT(do_,func) (op1_type A)                                  \
>> +  { return func (A, imm); }
>> +
>> +#define test_2(func, type, op1_type, op2_type)                         \
>> +  __attribute__ ((target("general-regs-only")))                                \
>> +  type _CONCAT(do_,func) (op1_type A, op2_type B)                      \
>> +  { return func (A, B); }
>> +
>> +#define test_2_i1(func, type, op1_type, op2_type, imm)                 \
>> +  __attribute__ ((target("general-regs-only")))                                \
>> +  type _CONCAT(do_,func) (op1_type A, op2_type B)                      \
>> +  { return func (A, B, imm); }
>> +
>> +#define test_3(func, type, op1_type, op2_type, op3_type)               \
>> +  __attribute__ ((target("general-regs-only")))                                \
>> +  type _CONCAT(do_,func) (op1_type A, op2_type B, op3_type C)          \
>> +  { return func (A, B, C); }
>> +
>> +#define test_4(func, type, op1_type, op2_type, op3_type, op4_type)     \
>> +  __attribute__ ((target("general-regs-only")))                                \
>> +  type _CONCAT(do_,func) (op1_type A, op2_type B, op3_type C,          \
>> +                         op4_type D)                                   \
>> +  { return func (A, B, C, D); }
>> +
>> +/* ia32intrin.h  */
>> +test_1 (__bsfd, int, int)
>> +test_1 (__bsrd, int, int)
>> +test_1 (__bswapd, int, int)
>> +test_1 (__popcntd, int, unsigned int)
>> +test_2 (__rolb, unsigned char, unsigned char, int)
>> +test_2 (__rolw, unsigned short, unsigned short, int)
>> +test_2 (__rold, unsigned int, unsigned int, int)
>> +test_2 (__rorb, unsigned char, unsigned char, int)
>> +test_2 (__rorw, unsigned short, unsigned short, int)
>> +test_2 (__rord, unsigned int, unsigned int, int)
>> +
>> +#ifndef __iamcu__
>> +/* ia32intrin.h  */
>> +test_1 (__rdpmc, unsigned long long, int)
>> +test_0 (__rdtsc, unsigned long long)
>> +test_1 (__rdtscp, unsigned long long, unsigned int *)
>> +test_0 (__pause, void)
>> +
>> +/* adxintrin.h */
>> +test_4 (_subborrow_u32, unsigned char, unsigned char, unsigned int,
>> +       unsigned int, unsigned int *)
>> +test_4 (_addcarry_u32, unsigned char, unsigned char, unsigned int,
>> +       unsigned int, unsigned int *)
>> +test_4 (_addcarryx_u32, unsigned char, unsigned char, unsigned int,
>> +       unsigned int, unsigned int *)
>> +
>> +/* bmiintrin.h */
>> +test_1 (__tzcnt_u16, unsigned short, unsigned short)
>> +test_2 (__andn_u32, unsigned int, unsigned int, unsigned int)
>> +test_2 (__bextr_u32, unsigned int, unsigned int, unsigned int)
>> +test_3 (_bextr_u32, unsigned int, unsigned int, unsigned int,
>> +       unsigned int)
>> +test_1 (__blsi_u32, unsigned int, unsigned int)
>> +test_1 (_blsi_u32, unsigned int, unsigned int)
>> +test_1 (__blsmsk_u32, unsigned int, unsigned int)
>> +test_1 (_blsmsk_u32, unsigned int, unsigned int)
>> +test_1 (__blsr_u32, unsigned int, unsigned int)
>> +test_1 (_blsr_u32, unsigned int, unsigned int)
>> +test_1 (__tzcnt_u32, unsigned int, unsigned int)
>> +test_1 (_tzcnt_u32, unsigned int, unsigned int)
>> +
>> +/* bmi2intrin.h */
>> +test_2 (_bzhi_u32, unsigned int, unsigned int, unsigned int)
>> +test_2 (_pdep_u32, unsigned int, unsigned int, unsigned int)
>> +test_2 (_pext_u32, unsigned int, unsigned int, unsigned int)
>> +
>> +/* cetintrin.h */
>> +test_1 (_inc_ssp, void, unsigned int)
>> +test_0 (_saveprevssp, void)
>> +test_1 (_rstorssp, void, void *)
>> +test_2 (_wrssd, void, unsigned int, void *)
>> +test_2 (_wrussd, void, unsigned int, void *)
>> +test_0 (_setssbsy, void)
>> +test_1 (_clrssbsy, void, void *)
>> +
>> +/* cldemoteintrin.h */
>> +test_1 (_cldemote, void, void *)
>> +
>> +/* clflushoptintrin.h */
>> +test_1 (_mm_clflushopt, void, void *)
>> +
>> +/* clwbintrin.h */
>> +test_1 (_mm_clwb, void, void *)
>> +
>> +/* clzerointrin.h */
>> +test_1 (_mm_clzero, void, void *)
>> +
>> +/* enqcmdintrin.h */
>> +test_2 (_enqcmd, int, void *, const void *)
>> +test_2 (_enqcmds, int, void *, const void *)
>> +
>> +/* fxsrintrin.h */
>> +test_1 (_fxsave, void, void *)
>> +test_1 (_fxrstor, void, void *)
>> +
>> +/* hresetintrin.h */
>> +test_1 (_hreset, void, unsigned int)
>> +
>> +/* lzcntintrin.h */
>> +test_1 (__lzcnt16, unsigned short, unsigned short)
>> +test_1 (__lzcnt32, unsigned int, unsigned int)
>> +test_1 (_lzcnt_u32, unsigned int, unsigned int)
>> +
>> +/* lwpintrin.h */
>> +test_1 (__llwpcb, void, void *)
>> +test_0 (__slwpcb, void *)
>> +test_2_i1 (__lwpval32, void, unsigned int, unsigned int, 1)
>> +test_2_i1 (__lwpins32, unsigned char, unsigned int, unsigned int, 1)
>> +
>> +/* movdirintrin.h */
>> +test_2 (_directstoreu_u32, void, void *, unsigned int)
>> +test_2 (_movdir64b, void, void *, const void *)
>> +
>> +/* mwaitxintrin.h */
>> +test_3 (_mm_monitorx, void, void const *, unsigned int, unsigned int)
>> +test_3 (_mm_mwaitx, void, unsigned int, unsigned int, unsigned int)
>> +
>> +/* pconfigintrin.h */
>> +test_2 (_pconfig_u32, unsigned int, const unsigned int, size_t *)
>> +
>> +/* pkuintrin.h */
>> +test_0 (_rdpkru_u32, unsigned int)
>> +test_1 (_wrpkru, void, unsigned int)
>> +
>> +/* popcntintrin.h */
>> +test_1 (_mm_popcnt_u32, int, unsigned int)
>> +
>> +/* rdseedintrin.h */
>> +test_1 (_rdseed16_step, int, unsigned short *)
>> +test_1 (_rdseed32_step, int, unsigned int *)
>> +
>> +/* rtmintrin.h */
>> +test_0 (_xbegin, unsigned int)
>> +test_0 (_xend, void)
>> +test_0_i1 (_xabort, void, 1)
>> +
>> +/* sgxintrin.h */
>> +test_2 (_encls_u32, unsigned int, const unsigned int, size_t *)
>> +test_2 (_enclu_u32, unsigned int, const unsigned int, size_t *)
>> +test_2 (_enclv_u32, unsigned int, const unsigned int, size_t *)
>> +
>> +/* tbmintrin.h */
>> +test_1_i1 (__bextri_u32, unsigned int, unsigned int, 1)
>> +test_1 (__blcfill_u32, unsigned int, unsigned int)
>> +test_1 (__blci_u32, unsigned int, unsigned int)
>> +test_1 (__blcic_u32, unsigned int, unsigned int)
>> +test_1 (__blcmsk_u32, unsigned int, unsigned int)
>> +test_1 (__blcs_u32, unsigned int, unsigned int)
>> +test_1 (__blsfill_u32, unsigned int, unsigned int)
>> +test_1 (__blsic_u32, unsigned int, unsigned int)
>> +test_1 (__t1mskc_u32, unsigned int, unsigned int)
>> +test_1 (__tzmsk_u32, unsigned int, unsigned int)
>> +
>> +/* tsxldtrkintrin.h */
>> +test_0 (_xsusldtrk, void)
>> +test_0 (_xresldtrk, void)
>> +
>> +/* x86gprintrin.h */
>> +test_1 (_ptwrite32, void, unsigned int)
>> +test_1 (_rdrand16_step, int, unsigned short *)
>> +test_1 (_rdrand32_step, int, unsigned int *)
>> +test_0 (_wbinvd, void)
>> +
>> +/* xtestintrin.h */
>> +test_0 (_xtest, int)
>> +
>> +/* xsaveintrin.h */
>> +test_2 (_xsave, void, void *, long long)
>> +test_2 (_xrstor, void, void *, long long)
>> +test_2 (_xsetbv, void, unsigned int, long long)
>> +test_1 (_xgetbv, long long, unsigned int)
>> +
>> +/* xsavecintrin.h */
>> +test_2 (_xsavec, void, void *, long long)
>> +
>> +/* xsaveoptintrin.h */
>> +test_2 (_xsaveopt, void, void *, long long)
>> +
>> +/* xsavesintrin.h */
>> +test_2 (_xsaves, void, void *, long long)
>> +test_2 (_xrstors, void, void *, long long)
>> +
>> +/* wbnoinvdintrin.h */
>> +test_0 (_wbnoinvd, void)
>> +
>> +#ifdef __x86_64__
>> +/* adxintrin.h */
>> +test_4 (_subborrow_u64, unsigned char, unsigned char,
>> +       unsigned long long, unsigned long long,
>> +       unsigned long long *)
>> +test_4 (_addcarry_u64, unsigned char, unsigned char,
>> +       unsigned long long, unsigned long long,
>> +       unsigned long long *)
>> +test_4 (_addcarryx_u64, unsigned char, unsigned char,
>> +       unsigned long long, unsigned long long,
>> +       unsigned long long *)
>> +
>> +/* bmiintrin.h */
>> +test_2 (__andn_u64, unsigned long long, unsigned long long,
>> +       unsigned long long)
>> +test_2 (__bextr_u64, unsigned long long, unsigned long long,
>> +       unsigned long long)
>> +test_3 (_bextr_u64, unsigned long long, unsigned long long,
>> +       unsigned long long, unsigned long long)
>> +test_1 (__blsi_u64, unsigned long long, unsigned long long)
>> +test_1 (_blsi_u64, unsigned long long, unsigned long long)
>> +test_1 (__blsmsk_u64, unsigned long long, unsigned long long)
>> +test_1 (_blsmsk_u64, unsigned long long, unsigned long long)
>> +test_1 (__blsr_u64, unsigned long long, unsigned long long)
>> +test_1 (_blsr_u64, unsigned long long, unsigned long long)
>> +test_1 (__tzcnt_u64, unsigned long long, unsigned long long)
>> +test_1 (_tzcnt_u64, unsigned long long, unsigned long long)
>> +
>> +/* bmi2intrin.h */
>> +test_2 (_bzhi_u64, unsigned long long, unsigned long long,
>> +       unsigned long long)
>> +test_2 (_pdep_u64, unsigned long long, unsigned long long,
>> +       unsigned long long)
>> +test_2 (_pext_u64, unsigned long long, unsigned long long,
>> +       unsigned long long)
>> +test_3 (_mulx_u64, unsigned long long, unsigned long long,
>> +       unsigned long long, unsigned long long *)
>> +
>> +/* cetintrin.h */
>> +test_0 (_get_ssp, unsigned long long)
>> +test_2 (_wrssq, void, unsigned long long, void *)
>> +test_2 (_wrussq, void, unsigned long long, void *)
>> +
>> +/* fxsrintrin.h */
>> +test_1 (_fxsave64, void, void *)
>> +test_1 (_fxrstor64, void, void *)
>> +
>> +/* ia32intrin.h  */
>> +test_1 (__bsfq, int, long long)
>> +test_1 (__bsrq, int, long long)
>> +test_1 (__bswapq, long long, long long)
>> +test_1 (__popcntq, long long, unsigned long long)
>> +test_2 (__rolq, unsigned long long, unsigned long long, int)
>> +test_2 (__rorq, unsigned long long, unsigned long long, int)
>> +test_0 (__readeflags, unsigned long long)
>> +test_1 (__writeeflags, void, unsigned int)
>> +
>> +/* lzcntintrin.h */
>> +test_1 (__lzcnt64, unsigned long long, unsigned long long)
>> +test_1 (_lzcnt_u64, unsigned long long, unsigned long long)
>> +
>> +/* lwpintrin.h */
>> +test_2_i1 (__lwpval64, void, unsigned long long, unsigned int, 1)
>> +test_2_i1 (__lwpins64, unsigned char, unsigned long long,
>> +          unsigned int, 1)
>> +
>> +/* movdirintrin.h */
>> +test_2 (_directstoreu_u64, void, void *, unsigned long long)
>> +
>> +/* popcntintrin.h */
>> +test_1 (_mm_popcnt_u64, long long, unsigned long long)
>> +
>> +/* rdseedintrin.h */
>> +test_1 (_rdseed64_step, int, unsigned long long *)
>> +
>> +/* tbmintrin.h */
>> +test_1_i1 (__bextri_u64, unsigned long long, unsigned long long, 1)
>> +test_1 (__blcfill_u64, unsigned long long, unsigned long long)
>> +test_1 (__blci_u64, unsigned long long, unsigned long long)
>> +test_1 (__blcic_u64, unsigned long long, unsigned long long)
>> +test_1 (__blcmsk_u64, unsigned long long, unsigned long long)
>> +test_1 (__blcs_u64, unsigned long long, unsigned long long)
>> +test_1 (__blsfill_u64, unsigned long long, unsigned long long)
>> +test_1 (__blsic_u64, unsigned long long, unsigned long long)
>> +test_1 (__t1mskc_u64, unsigned long long, unsigned long long)
>> +test_1 (__tzmsk_u64, unsigned long long, unsigned long long)
>> +
>> +/* uintrintrin.h */
>> +test_0 (_clui, void)
>> +test_1 (_senduipi, void, unsigned long long)
>> +test_0 (_stui, void)
>> +test_0 (_testui, unsigned char)
>> +
>> +/* x86gprintrin.h */
>> +test_1 (_ptwrite64, void, unsigned long long)
>> +test_0 (_readfsbase_u32, unsigned int)
>> +test_0 (_readfsbase_u64, unsigned long long)
>> +test_0 (_readgsbase_u32, unsigned int)
>> +test_0 (_readgsbase_u64, unsigned long long)
>> +test_1 (_rdrand64_step, int, unsigned long long *)
>> +test_1 (_writefsbase_u32, void, unsigned int)
>> +test_1 (_writefsbase_u64, void, unsigned long long)
>> +test_1 (_writegsbase_u32, void, unsigned int)
>> +test_1 (_writegsbase_u64, void, unsigned long long)
>> +
>> +/* xsaveintrin.h */
>> +test_2 (_xsave64, void, void *, long long)
>> +test_2 (_xrstor64, void, void *, long long)
>> +
>> +/* xsavecintrin.h */
>> +test_2 (_xsavec64, void, void *, long long)
>> +
>> +/* xsaveoptintrin.h */
>> +test_2 (_xsaveopt64, void, void *, long long)
>> +
>> +/* xsavesintrin.h */
>> +test_2 (_xsaves64, void, void *, long long)
>> +test_2 (_xrstors64, void, void *, long long)
>> +
>> +/* waitpkgintrin.h */
>> +test_1 (_umonitor, void, void *)
>> +test_2 (_umwait, unsigned char, unsigned int, unsigned long long)
>> +test_2 (_tpause, unsigned char, unsigned int, unsigned long long)
>> +
>> +#else /* !__x86_64__ */
>> +/* bmi2intrin.h */
>> +test_3 (_mulx_u32, unsigned int, unsigned int, unsigned int,
>> +       unsigned int *)
>> +
>> +/* cetintrin.h */
>> +test_0 (_get_ssp, unsigned int)
>> +#endif /* __x86_64__ */
>> +
>> +#endif
>> --
>> 2.30.2
>>


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v4 2/2] x86: Add general_regs_only function attribute
  2021-04-14 22:39 ` [PATCH v4 2/2] x86: Add general_regs_only function attribute H.J. Lu
  2021-04-21  7:30   ` Uros Bizjak
@ 2021-04-21 17:09   ` Martin Sebor
  2021-04-21 20:58     ` H.J. Lu
  1 sibling, 1 reply; 22+ messages in thread
From: Martin Sebor @ 2021-04-21 17:09 UTC (permalink / raw)
  To: H.J. Lu, gcc-patches
  Cc: Uros Bizjak, Jakub Jelinek, Bernhard Reutner-Fischer, Richard Biener

On 4/14/21 4:39 PM, H.J. Lu wrote:
> commit 87c753ac241f25d222d46ba1ac66ceba89d6a200
> Author: H.J. Lu <hjl.tools@gmail.com>
> Date:   Fri Aug 21 09:42:49 2020 -0700
> 
>      x86: Add target("general-regs-only") function attribute
> 
> is incomplete since it is impossible to call integer intrinsics from
> a function with general-regs-only target attribute.
> 
> 1. Add general_regs_only function attribute to inform the compiler that
> functions use only general purpose registers.  When making inlining
> decisions on such functions, non-GPR compiler options are excluded.
> 2. Add general_regs_only attribute to x86 intrinsics which use only
> general purpose registers.
> 
...
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -7066,6 +7066,11 @@ On x86 targets, the @code{fentry_section} attribute sets the name
>   of the section to record function entry instrumentation calls in when
>   enabled with @option{-pg -mrecord-mcount}
>   
> +@item general_regs_only
> +@cindex @code{general_regs_only} function attribute, x86
> +The @code{general_regs_only} attribute on functions is used to
> +inform the compiler that functions use only general purpose registers.

I'll just reiterate basically the same comment as before: it's not
clear from the very brief description above what the requirements
are for using the attribute.  I'm guessing it can be applied to
any function (inline or otherwise) but only has any effect when
the function is actually inlined and otherwise doesn't constrain
what the function can do.  (Whatever the constraints are, I think
the manual should spell them out, and likewise for its effects.)

Similarly it's not clear what should be expected when the function
does use some other register.  Ideally, I think GCC would check and
issue a nice error message whether or not the function is inlined
or called.  I suspect that might only be possible for inline
functions that are actually called and for which the back end must
emit code.

Other than that, I'd suggest to improve the phrasing a bit:

   The @code{general_regs_only} function attribute indicates that
   the function uses only general purpose registers... [text
   explaining constraints and errors follows].

Martin

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v4 2/2] x86: Add general_regs_only function attribute
  2021-04-21 17:09   ` Martin Sebor
@ 2021-04-21 20:58     ` H.J. Lu
  2021-04-21 23:23       ` Martin Sebor
  0 siblings, 1 reply; 22+ messages in thread
From: H.J. Lu @ 2021-04-21 20:58 UTC (permalink / raw)
  To: Martin Sebor
  Cc: gcc-patches, Uros Bizjak, Jakub Jelinek,
	Bernhard Reutner-Fischer, Richard Biener

On Wed, Apr 21, 2021 at 10:09 AM Martin Sebor <msebor@gmail.com> wrote:
>
> On 4/14/21 4:39 PM, H.J. Lu wrote:
> > commit 87c753ac241f25d222d46ba1ac66ceba89d6a200
> > Author: H.J. Lu <hjl.tools@gmail.com>
> > Date:   Fri Aug 21 09:42:49 2020 -0700
> >
> >      x86: Add target("general-regs-only") function attribute
> >
> > is incomplete since it is impossible to call integer intrinsics from
> > a function with general-regs-only target attribute.
> >
> > 1. Add general_regs_only function attribute to inform the compiler that
> > functions use only general purpose registers.  When making inlining
> > decisions on such functions, non-GPR compiler options are excluded.
> > 2. Add general_regs_only attribute to x86 intrinsics which use only
> > general purpose registers.
> >
> ...
> > --- a/gcc/doc/extend.texi
> > +++ b/gcc/doc/extend.texi
> > @@ -7066,6 +7066,11 @@ On x86 targets, the @code{fentry_section} attribute sets the name
> >   of the section to record function entry instrumentation calls in when
> >   enabled with @option{-pg -mrecord-mcount}
> >
> > +@item general_regs_only
> > +@cindex @code{general_regs_only} function attribute, x86
> > +The @code{general_regs_only} attribute on functions is used to
> > +inform the compiler that functions use only general purpose registers.
>
> I'll just reiterate basically the same comment as before: it's not
> clear from the very brief description above what the requirements
> are for using the attribute.  I'm guessing it can be applied to
> any function (inline or otherwise) but only has any effect when
> the function is actually inlined and otherwise doesn't constrain
> what the function can do.  (Whatever the constraints are, I think
> the manual should spell them out, and likewise for its effects.)

That is correct.

> Similarly it's not clear what should be expected when the function
> does use some other register.  Ideally, I think GCC would check and
> issue a nice error message whether or not the function is inlined
> or called.  I suspect that might only be possible for inline
> functions that are actually called and for which the back end must
> emit code.

This is what GCC does today:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99744

> Other than that, I'd suggest to improve the phrasing a bit:
>
>    The @code{general_regs_only} function attribute indicates that
>    the function uses only general purpose registers... [text
>    explaining constraints and errors follows].
>
> Martin

How about this

@item general_regs_only
@cindex @code{general_regs_only} function attribute, x86
The @code{general_regs_only} attribute on functions is used to inform
the compiler that functions use only general purpose registers.  It
can be used together with the @code{always_inline} attribute to avoid
inlining failure when there is a mismatch in compiler vector options.

Thanks.

-- 
H.J.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v4 2/2] x86: Add general_regs_only function attribute
  2021-04-21 20:58     ` H.J. Lu
@ 2021-04-21 23:23       ` Martin Sebor
  2021-04-22  1:01         ` H.J. Lu
  0 siblings, 1 reply; 22+ messages in thread
From: Martin Sebor @ 2021-04-21 23:23 UTC (permalink / raw)
  To: H.J. Lu
  Cc: gcc-patches, Uros Bizjak, Jakub Jelinek,
	Bernhard Reutner-Fischer, Richard Biener

On 4/21/21 2:58 PM, H.J. Lu wrote:
> On Wed, Apr 21, 2021 at 10:09 AM Martin Sebor <msebor@gmail.com> wrote:
>>
>> On 4/14/21 4:39 PM, H.J. Lu wrote:
>>> commit 87c753ac241f25d222d46ba1ac66ceba89d6a200
>>> Author: H.J. Lu <hjl.tools@gmail.com>
>>> Date:   Fri Aug 21 09:42:49 2020 -0700
>>>
>>>       x86: Add target("general-regs-only") function attribute
>>>
>>> is incomplete since it is impossible to call integer intrinsics from
>>> a function with general-regs-only target attribute.
>>>
>>> 1. Add general_regs_only function attribute to inform the compiler that
>>> functions use only general purpose registers.  When making inlining
>>> decisions on such functions, non-GPR compiler options are excluded.
>>> 2. Add general_regs_only attribute to x86 intrinsics which use only
>>> general purpose registers.
>>>
>> ...
>>> --- a/gcc/doc/extend.texi
>>> +++ b/gcc/doc/extend.texi
>>> @@ -7066,6 +7066,11 @@ On x86 targets, the @code{fentry_section} attribute sets the name
>>>    of the section to record function entry instrumentation calls in when
>>>    enabled with @option{-pg -mrecord-mcount}
>>>
>>> +@item general_regs_only
>>> +@cindex @code{general_regs_only} function attribute, x86
>>> +The @code{general_regs_only} attribute on functions is used to
>>> +inform the compiler that functions use only general purpose registers.
>>
>> I'll just reiterate basically the same comment as before: it's not
>> clear from the very brief description above what the requirements
>> are for using the attribute.  I'm guessing it can be applied to
>> any function (inline or otherwise) but only has any effect when
>> the function is actually inlined and otherwise doesn't constrain
>> what the function can do.  (Whatever the constraints are, I think
>> the manual should spell them out, and likewise for its effects.)
> 
> That is correct.
> 
>> Similarly it's not clear what should be expected when the function
>> does use some other register.  Ideally, I think GCC would check and
>> issue a nice error message whether or not the function is inlined
>> or called.  I suspect that might only be possible for inline
>> functions that are actually called and for which the back end must
>> emit code.
> 
> This is what GCC does today:
> 
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99744

Yes, that's the rather obscure error I think I commented on before
and suggested should be improved.  Based on r99744-3.c I don't think
this has changed in the improved patch.

> 
>> Other than that, I'd suggest to improve the phrasing a bit:
>>
>>     The @code{general_regs_only} function attribute indicates that
>>     the function uses only general purpose registers... [text
>>     explaining constraints and errors follows].
>>
>> Martin
> 
> How about this
> 
> @item general_regs_only
> @cindex @code{general_regs_only} function attribute, x86
> The @code{general_regs_only} attribute on functions is used to inform
> the compiler that functions use only general purpose registers.  It
> can be used together with the @code{always_inline} attribute to avoid
> inlining failure when there is a mismatch in compiler vector options.

Without an article the part "that functions use only general purpose
registers" is unclear and/or grammatically incorrect.  What functions?
If the function the attribute is applied to, it needs an article, e.g.,
"the function" or "a function", and singular.  (Otherwise it could be
read as talking about the functions called from the one with
the attribute, or some other functions altogether).

I tried to correct that above but, if you prefer, the following would
be closer to your phrasing but more correct/accurate:

   The @code{general_regs_only} function attribute informs
   the compiler that the function uses only general purpose
   registers.

I don't understand what the second sentence is trying to say, and
without a better error message for the problem in r99744, I suspect
few users will either.  I am suggesting to explain in the text you
are adding, under what conditions inlining might fail without
the attribute, and what effect the attribute has on the function
that prevents the inlining failure.

(If we can't explain what the effect is then I wonder why
the attribute is being added at all instead of teaching GCC to
always behave as if the attribute were there when its absence
would otherwise lead to an error.)

Martin

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v4 2/2] x86: Add general_regs_only function attribute
  2021-04-21 23:23       ` Martin Sebor
@ 2021-04-22  1:01         ` H.J. Lu
  2021-04-22  8:27           ` Richard Biener
  2021-04-22  9:02           ` Jakub Jelinek
  0 siblings, 2 replies; 22+ messages in thread
From: H.J. Lu @ 2021-04-22  1:01 UTC (permalink / raw)
  To: Martin Sebor
  Cc: gcc-patches, Uros Bizjak, Jakub Jelinek,
	Bernhard Reutner-Fischer, Richard Biener

On Wed, Apr 21, 2021 at 4:24 PM Martin Sebor <msebor@gmail.com> wrote:
>
> On 4/21/21 2:58 PM, H.J. Lu wrote:
> > On Wed, Apr 21, 2021 at 10:09 AM Martin Sebor <msebor@gmail.com> wrote:
> >>
> >> On 4/14/21 4:39 PM, H.J. Lu wrote:
> >>> commit 87c753ac241f25d222d46ba1ac66ceba89d6a200
> >>> Author: H.J. Lu <hjl.tools@gmail.com>
> >>> Date:   Fri Aug 21 09:42:49 2020 -0700
> >>>
> >>>       x86: Add target("general-regs-only") function attribute
> >>>
> >>> is incomplete since it is impossible to call integer intrinsics from
> >>> a function with general-regs-only target attribute.
> >>>
> >>> 1. Add general_regs_only function attribute to inform the compiler that
> >>> functions use only general purpose registers.  When making inlining
> >>> decisions on such functions, non-GPR compiler options are excluded.
> >>> 2. Add general_regs_only attribute to x86 intrinsics which use only
> >>> general purpose registers.
> >>>
> >> ...
> >>> --- a/gcc/doc/extend.texi
> >>> +++ b/gcc/doc/extend.texi
> >>> @@ -7066,6 +7066,11 @@ On x86 targets, the @code{fentry_section} attribute sets the name
> >>>    of the section to record function entry instrumentation calls in when
> >>>    enabled with @option{-pg -mrecord-mcount}
> >>>
> >>> +@item general_regs_only
> >>> +@cindex @code{general_regs_only} function attribute, x86
> >>> +The @code{general_regs_only} attribute on functions is used to
> >>> +inform the compiler that functions use only general purpose registers.
> >>
> >> I'll just reiterate basically the same comment as before: it's not
> >> clear from the very brief description above what the requirements
> >> are for using the attribute.  I'm guessing it can be applied to
> >> any function (inline or otherwise) but only has any effect when
> >> the function is actually inlined and otherwise doesn't constrain
> >> what the function can do.  (Whatever the constraints are, I think
> >> the manual should spell them out, and likewise for its effects.)
> >
> > That is correct.
> >
> >> Similarly it's not clear what should be expected when the function
> >> does use some other register.  Ideally, I think GCC would check and
> >> issue a nice error message whether or not the function is inlined
> >> or called.  I suspect that might only be possible for inline
> >> functions that are actually called and for which the back end must
> >> emit code.
> >
> > This is what GCC does today:
> >
> > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99744
>
> Yes, that's the rather obscure error I think I commented on before
> and suggested should be improved.  Based on r99744-3.c I don't think
> this has changed in the improved patch.

My goal is to fix the inline failures, not to improve the compiler error
message.

> >
> >> Other than that, I'd suggest to improve the phrasing a bit:
> >>
> >>     The @code{general_regs_only} function attribute indicates that
> >>     the function uses only general purpose registers... [text
> >>     explaining constraints and errors follows].
> >>
> >> Martin
> >
> > How about this
> >
> > @item general_regs_only
> > @cindex @code{general_regs_only} function attribute, x86
> > The @code{general_regs_only} attribute on functions is used to inform
> > the compiler that functions use only general purpose registers.  It
> > can be used together with the @code{always_inline} attribute to avoid
> > inlining failure when there is a mismatch in compiler vector options.
>
> Without an article the part "that functions use only general purpose
> registers" is unclear and/or grammatically incorrect.  What functions?
> If the function the attribute is applied to, it needs an article, e.g.,
> "the function" or "a function", and singular.  (Otherwise it could be
> read as talking about the functions called from the one with
> the attribute, or some other functions altogether).
>
> I tried to correct that above but, if you prefer, the following would
> be closer to your phrasing but more correct/accurate:
>
>    The @code{general_regs_only} function attribute informs
>    the compiler that the function uses only general purpose
>    registers.
>
> I don't understand what the second sentence is trying to say, and
> without a better error message for the problem in r99744, I suspect
> few users will either.  I am suggesting to explain in the text you
> are adding, under what conditions inlining might fail without
> the attribute, and what effect the attribute has on the function
> that prevents the inlining failure.

How about this?

@item general_regs_only
@cindex @code{general_regs_only} function attribute, x86
The @code{general_regs_only} function attribute informs the compiler
that the function uses only general purpose registers.  When the
compiler inlines a function with the @code{always_inline} attribute,
target-specific compilation options may lead to inline failures.
The @code{general_regs_only} attribute, if applicable, can be used
together with the @code{always_inline} attribute to reduce inlining
failure.

> (If we can't explain what the effect is then I wonder why
> the attribute is being added at all instead of teaching GCC to
> always behave as if the attribute were there when its absence
> would otherwise lead to an error.)
>

Inlining an always_inline function into a function, which doesn't
support the ISA needed by the always_inline function,  should fail.
But when inlining the always_inline function, the compiler doesn't
know if GPR instructions are sufficient for the always_inline function.
The general_regs_only informs the compiler that the function uses
only general purpose registers.   There are no other ways for the
compiler to deduce such info at this stage.

-- 
H.J.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v4 2/2] x86: Add general_regs_only function attribute
  2021-04-22  1:01         ` H.J. Lu
@ 2021-04-22  8:27           ` Richard Biener
  2021-04-22  9:02           ` Jakub Jelinek
  1 sibling, 0 replies; 22+ messages in thread
From: Richard Biener @ 2021-04-22  8:27 UTC (permalink / raw)
  To: H.J. Lu
  Cc: Martin Sebor, GCC Patches, Uros Bizjak, Jakub Jelinek,
	Bernhard Reutner-Fischer

On Thu, Apr 22, 2021 at 3:01 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Wed, Apr 21, 2021 at 4:24 PM Martin Sebor <msebor@gmail.com> wrote:
> >
> > On 4/21/21 2:58 PM, H.J. Lu wrote:
> > > On Wed, Apr 21, 2021 at 10:09 AM Martin Sebor <msebor@gmail.com> wrote:
> > >>
> > >> On 4/14/21 4:39 PM, H.J. Lu wrote:
> > >>> commit 87c753ac241f25d222d46ba1ac66ceba89d6a200
> > >>> Author: H.J. Lu <hjl.tools@gmail.com>
> > >>> Date:   Fri Aug 21 09:42:49 2020 -0700
> > >>>
> > >>>       x86: Add target("general-regs-only") function attribute
> > >>>
> > >>> is incomplete since it is impossible to call integer intrinsics from
> > >>> a function with general-regs-only target attribute.
> > >>>
> > >>> 1. Add general_regs_only function attribute to inform the compiler that
> > >>> functions use only general purpose registers.  When making inlining
> > >>> decisions on such functions, non-GPR compiler options are excluded.
> > >>> 2. Add general_regs_only attribute to x86 intrinsics which use only
> > >>> general purpose registers.
> > >>>
> > >> ...
> > >>> --- a/gcc/doc/extend.texi
> > >>> +++ b/gcc/doc/extend.texi
> > >>> @@ -7066,6 +7066,11 @@ On x86 targets, the @code{fentry_section} attribute sets the name
> > >>>    of the section to record function entry instrumentation calls in when
> > >>>    enabled with @option{-pg -mrecord-mcount}
> > >>>
> > >>> +@item general_regs_only
> > >>> +@cindex @code{general_regs_only} function attribute, x86
> > >>> +The @code{general_regs_only} attribute on functions is used to
> > >>> +inform the compiler that functions use only general purpose registers.
> > >>
> > >> I'll just reiterate basically the same comment as before: it's not
> > >> clear from the very brief description above what the requirements
> > >> are for using the attribute.  I'm guessing it can be applied to
> > >> any function (inline or otherwise) but only has any effect when
> > >> the function is actually inlined and otherwise doesn't constrain
> > >> what the function can do.  (Whatever the constraints are, I think
> > >> the manual should spell them out, and likewise for its effects.)
> > >
> > > That is correct.
> > >
> > >> Similarly it's not clear what should be expected when the function
> > >> does use some other register.  Ideally, I think GCC would check and
> > >> issue a nice error message whether or not the function is inlined
> > >> or called.  I suspect that might only be possible for inline
> > >> functions that are actually called and for which the back end must
> > >> emit code.
> > >
> > > This is what GCC does today:
> > >
> > > https://gcc.gnu.org/bugzilla/show_bug.cgi?id=99744
> >
> > Yes, that's the rather obscure error I think I commented on before
> > and suggested should be improved.  Based on r99744-3.c I don't think
> > this has changed in the improved patch.
>
> My goal is to fix the inline failures, not to improve the compiler error
> message.
>
> > >
> > >> Other than that, I'd suggest to improve the phrasing a bit:
> > >>
> > >>     The @code{general_regs_only} function attribute indicates that
> > >>     the function uses only general purpose registers... [text
> > >>     explaining constraints and errors follows].
> > >>
> > >> Martin
> > >
> > > How about this
> > >
> > > @item general_regs_only
> > > @cindex @code{general_regs_only} function attribute, x86
> > > The @code{general_regs_only} attribute on functions is used to inform
> > > the compiler that functions use only general purpose registers.  It
> > > can be used together with the @code{always_inline} attribute to avoid
> > > inlining failure when there is a mismatch in compiler vector options.
> >
> > Without an article the part "that functions use only general purpose
> > registers" is unclear and/or grammatically incorrect.  What functions?
> > If the function the attribute is applied to, it needs an article, e.g.,
> > "the function" or "a function", and singular.  (Otherwise it could be
> > read as talking about the functions called from the one with
> > the attribute, or some other functions altogether).
> >
> > I tried to correct that above but, if you prefer, the following would
> > be closer to your phrasing but more correct/accurate:
> >
> >    The @code{general_regs_only} function attribute informs
> >    the compiler that the function uses only general purpose
> >    registers.
> >
> > I don't understand what the second sentence is trying to say, and
> > without a better error message for the problem in r99744, I suspect
> > few users will either.  I am suggesting to explain in the text you
> > are adding, under what conditions inlining might fail without
> > the attribute, and what effect the attribute has on the function
> > that prevents the inlining failure.
>
> How about this?
>
> @item general_regs_only
> @cindex @code{general_regs_only} function attribute, x86
> The @code{general_regs_only} function attribute informs the compiler
> that the function uses only general purpose registers.  When the
> compiler inlines a function with the @code{always_inline} attribute,
> target-specific compilation options may lead to inline failures.
> The @code{general_regs_only} attribute, if applicable, can be used
> together with the @code{always_inline} attribute to reduce inlining
> failure.
>
> > (If we can't explain what the effect is then I wonder why
> > the attribute is being added at all instead of teaching GCC to
> > always behave as if the attribute were there when its absence
> > would otherwise lead to an error.)
> >
>
> Inlining an always_inline function into a function, which doesn't
> support the ISA needed by the always_inline function,  should fail.
> But when inlining the always_inline function, the compiler doesn't
> know if GPR instructions are sufficient for the always_inline function.
> The general_regs_only informs the compiler that the function uses
> only general purpose registers.   There are no other ways for the
> compiler to deduce such info at this stage.

Is placing this attribute on a function definition which ends up
using non-GPR regs invoking undefined behavior?  That is,
can IRA/LRA assume xmm regs need no saving around calls
to such functions?  Do we need to diagnose non-GPR uses
(late, of course)?

I'm not sure we're going to solve the always-inline issues with
this new attribute - for example BMI intrinsics won't use
non-GPR regs but then a function using those and marked
as general-regs-only should still not be inlined into a
function doing the BMI cpuid check.

You document the attribute only affects always_inline
decisions, this makes it even more special.

I still think we should do the same for target attributes
as we do for optimize attributes - always_inline should
ignore any option differences and do what it is documented
(inline):

"@item always_inline
@cindex @code{always_inline} function attribute
Generally, functions are not inlined unless optimization is specified.
For functions declared inline, this attribute inlines the function
independent of any restrictions that otherwise apply to inlining.
Failure to inline such a function is diagnosed as an error.
Note that if such a function is called indirectly the compiler may
or may not inline it depending on optimization level and a failure
to inline an indirect call may or may not be diagnosed."

nowhere does this suggest that inlining is not done if
the caller is AVX2 but the callee is not.

Richard.

> --
> H.J.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v4 2/2] x86: Add general_regs_only function attribute
  2021-04-22  1:01         ` H.J. Lu
  2021-04-22  8:27           ` Richard Biener
@ 2021-04-22  9:02           ` Jakub Jelinek
  2021-04-22 11:23             ` Richard Biener
  1 sibling, 1 reply; 22+ messages in thread
From: Jakub Jelinek @ 2021-04-22  9:02 UTC (permalink / raw)
  To: H.J. Lu; +Cc: Martin Sebor, gcc-patches

On Wed, Apr 21, 2021 at 06:01:07PM -0700, H.J. Lu via Gcc-patches wrote:
> How about this?
> 
> @item general_regs_only
> @cindex @code{general_regs_only} function attribute, x86
> The @code{general_regs_only} function attribute informs the compiler
> that the function uses only general purpose registers.  When the
> compiler inlines a function with the @code{always_inline} attribute,
> target-specific compilation options may lead to inline failures.
> The @code{general_regs_only} attribute, if applicable, can be used
> together with the @code{always_inline} attribute to reduce inlining
> failure.

I don't really like this attribute.
It is very specific to what you want to solve and doesn't address the
general problem, that always_inline means different things in different
code, and that is a problem for many targets, not just one.

As has been written, in some cases it means inline always, error
whenever it is called indirectly which can't be optimized into a direct
call that can be inlined and error whenever the inlining fails for other
reasons.

Another case, e.g. in the glibc fortify wrappers, is inline always
when the call is direct or an indirect call can be optimized into a direct
call and error when the inlining fails, but support indirect calls without
errors.

Another case, which is most of the x86/aarch64/arm etc. intrinsics, is
inline always unless there is a target mismatch (roughly what is
actually implemented).

Because from the always_inline attribute it is impossible to determine which
one of those it is (for the indirect calls the rule could be
gnu_inline extern inline means indirect calls are ok, anything else means
indirect calls are bad), we need new syntax to distinguish those cases.

general_regs_only attribute doesn't seem to be it, e.g. for the glibc
fortify wrappers cases I don't see why we should forbid using floating point
in such inlines.

So IMHO we need some new attribute for one of those, or optional parameter
to always_inline.

For the intrinsic case, ideal would be if we could record which ISA flags
(or more generally which options) are required and which are not.  Either
have some syntax where those would be explicitly specified in attribute (but
frankly that would be a maintainance nightmare), or derive those from
surrounding pragmas.  Right now we have those wrapped in
#ifndef __AVX2__
#pragma GCC push_options
#pragma GCC target("avx2")
#define __DISABLE_AVX2__
#endif /* __AVX2__ */

...

#ifdef __DISABLE_AVX2__
#undef __DISABLE_AVX2__
#pragma GCC pop_options
#endif /* __DISABLE_AVX2__ */

The question is if the pragma GCC target right now behaves incrementally
or not, whether
#pragma GCC target("avx2")
adds -mavx2 to options if it was missing before and nothing otherwise, or if
it switches other options off.  If it is incremental, we could e.g. try to
use the second least significant bit of global_options_set.x_* to mean
this option has been set explicitly by some surrounding #pragma GCC target.
The normal tests - global_options_set.x_flag_whatever could still work
fine because they wouldn't care if the option was explicit from anywhere
(command line or GCC target or target attribute) and just & 2 would mean
it was explicit from pragma GCC target; though there is the case of
bitfields... And then the inlining decision could check the & 2 flags to
see what is required and what is just from command line.
Or we can have some other pragma GCC that would be like target but would
have flags that are explicit (and could e.g. be more restricted, to ISA
options only, and let those use in addition to #pragma GCC target.

	Jakub


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v4 2/2] x86: Add general_regs_only function attribute
  2021-04-22  9:02           ` Jakub Jelinek
@ 2021-04-22 11:23             ` Richard Biener
  2021-04-22 11:57               ` H.J. Lu
  2021-04-22 12:22               ` Jakub Jelinek
  0 siblings, 2 replies; 22+ messages in thread
From: Richard Biener @ 2021-04-22 11:23 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: H.J. Lu, GCC Patches

On Thu, Apr 22, 2021 at 12:30 PM Jakub Jelinek via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> On Wed, Apr 21, 2021 at 06:01:07PM -0700, H.J. Lu via Gcc-patches wrote:
> > How about this?
> >
> > @item general_regs_only
> > @cindex @code{general_regs_only} function attribute, x86
> > The @code{general_regs_only} function attribute informs the compiler
> > that the function uses only general purpose registers.  When the
> > compiler inlines a function with the @code{always_inline} attribute,
> > target-specific compilation options may lead to inline failures.
> > The @code{general_regs_only} attribute, if applicable, can be used
> > together with the @code{always_inline} attribute to reduce inlining
> > failure.
>
> I don't really like this attribute.
> It is very specific to what you want to solve and doesn't address the
> general problem, that always_inline means different things in different
> code, and that is a problem for many targets, not just one.
>
> As has been written, in some cases it means inline always, error
> whenever it is called indirectly which can't be optimized into a direct
> call that can be inlined and error whenever the inlining fails for other
> reasons.
>
> Another case, e.g. in the glibc fortify wrappers, is inline always
> when the call is direct or an indirect call can be optimized into a direct
> call and error when the inlining fails, but support indirect calls without
> errors.
>
> Another case, which is most of the x86/aarch64/arm etc. intrinsics, is
> inline always unless there is a target mismatch (roughly what is
> actually implemented).
>
> Because from the always_inline attribute it is impossible to determine which
> one of those it is (for the indirect calls the rule could be
> gnu_inline extern inline means indirect calls are ok, anything else means
> indirect calls are bad), we need new syntax to distinguish those cases.
>
> general_regs_only attribute doesn't seem to be it, e.g. for the glibc
> fortify wrappers cases I don't see why we should forbid using floating point
> in such inlines.
>
> So IMHO we need some new attribute for one of those, or optional parameter
> to always_inline.
>
> For the intrinsic case, ideal would be if we could record which ISA flags
> (or more generally which options) are required and which are not.  Either
> have some syntax where those would be explicitly specified in attribute (but
> frankly that would be a maintainance nightmare), or derive those from
> surrounding pragmas.  Right now we have those wrapped in
> #ifndef __AVX2__
> #pragma GCC push_options
> #pragma GCC target("avx2")
> #define __DISABLE_AVX2__
> #endif /* __AVX2__ */
>
> ...
>
> #ifdef __DISABLE_AVX2__
> #undef __DISABLE_AVX2__
> #pragma GCC pop_options
> #endif /* __DISABLE_AVX2__ */
>
> The question is if the pragma GCC target right now behaves incrementally
> or not, whether
> #pragma GCC target("avx2")
> adds -mavx2 to options if it was missing before and nothing otherwise, or if
> it switches other options off.  If it is incremental, we could e.g. try to
> use the second least significant bit of global_options_set.x_* to mean
> this option has been set explicitly by some surrounding #pragma GCC target.
> The normal tests - global_options_set.x_flag_whatever could still work
> fine because they wouldn't care if the option was explicit from anywhere
> (command line or GCC target or target attribute) and just & 2 would mean
> it was explicit from pragma GCC target; though there is the case of
> bitfields... And then the inlining decision could check the & 2 flags to
> see what is required and what is just from command line.
> Or we can have some other pragma GCC that would be like target but would
> have flags that are explicit (and could e.g. be more restricted, to ISA
> options only, and let those use in addition to #pragma GCC target.

I'm still curious as to what you think will break if always-inline does what
it is documented to do.

Richard.

>         Jakub
>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v4 2/2] x86: Add general_regs_only function attribute
  2021-04-22 11:23             ` Richard Biener
@ 2021-04-22 11:57               ` H.J. Lu
  2021-04-22 12:16                 ` Richard Biener
  2021-04-22 12:22               ` Jakub Jelinek
  1 sibling, 1 reply; 22+ messages in thread
From: H.J. Lu @ 2021-04-22 11:57 UTC (permalink / raw)
  To: Richard Biener; +Cc: Jakub Jelinek, GCC Patches

On Thu, Apr 22, 2021 at 4:23 AM Richard Biener
<richard.guenther@gmail.com> wrote:
>
> On Thu, Apr 22, 2021 at 12:30 PM Jakub Jelinek via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > On Wed, Apr 21, 2021 at 06:01:07PM -0700, H.J. Lu via Gcc-patches wrote:
> > > How about this?
> > >
> > > @item general_regs_only
> > > @cindex @code{general_regs_only} function attribute, x86
> > > The @code{general_regs_only} function attribute informs the compiler
> > > that the function uses only general purpose registers.  When the
> > > compiler inlines a function with the @code{always_inline} attribute,
> > > target-specific compilation options may lead to inline failures.
> > > The @code{general_regs_only} attribute, if applicable, can be used
> > > together with the @code{always_inline} attribute to reduce inlining
> > > failure.
> >
> > I don't really like this attribute.
> > It is very specific to what you want to solve and doesn't address the
> > general problem, that always_inline means different things in different
> > code, and that is a problem for many targets, not just one.
> >
> > As has been written, in some cases it means inline always, error
> > whenever it is called indirectly which can't be optimized into a direct
> > call that can be inlined and error whenever the inlining fails for other
> > reasons.
> >
> > Another case, e.g. in the glibc fortify wrappers, is inline always
> > when the call is direct or an indirect call can be optimized into a direct
> > call and error when the inlining fails, but support indirect calls without
> > errors.
> >
> > Another case, which is most of the x86/aarch64/arm etc. intrinsics, is
> > inline always unless there is a target mismatch (roughly what is
> > actually implemented).
> >
> > Because from the always_inline attribute it is impossible to determine which
> > one of those it is (for the indirect calls the rule could be
> > gnu_inline extern inline means indirect calls are ok, anything else means
> > indirect calls are bad), we need new syntax to distinguish those cases.
> >
> > general_regs_only attribute doesn't seem to be it, e.g. for the glibc
> > fortify wrappers cases I don't see why we should forbid using floating point
> > in such inlines.
> >
> > So IMHO we need some new attribute for one of those, or optional parameter
> > to always_inline.
> >
> > For the intrinsic case, ideal would be if we could record which ISA flags
> > (or more generally which options) are required and which are not.  Either
> > have some syntax where those would be explicitly specified in attribute (but
> > frankly that would be a maintainance nightmare), or derive those from
> > surrounding pragmas.  Right now we have those wrapped in
> > #ifndef __AVX2__
> > #pragma GCC push_options
> > #pragma GCC target("avx2")
> > #define __DISABLE_AVX2__
> > #endif /* __AVX2__ */
> >
> > ...
> >
> > #ifdef __DISABLE_AVX2__
> > #undef __DISABLE_AVX2__
> > #pragma GCC pop_options
> > #endif /* __DISABLE_AVX2__ */
> >
> > The question is if the pragma GCC target right now behaves incrementally
> > or not, whether
> > #pragma GCC target("avx2")
> > adds -mavx2 to options if it was missing before and nothing otherwise, or if
> > it switches other options off.  If it is incremental, we could e.g. try to
> > use the second least significant bit of global_options_set.x_* to mean
> > this option has been set explicitly by some surrounding #pragma GCC target.
> > The normal tests - global_options_set.x_flag_whatever could still work
> > fine because they wouldn't care if the option was explicit from anywhere
> > (command line or GCC target or target attribute) and just & 2 would mean
> > it was explicit from pragma GCC target; though there is the case of
> > bitfields... And then the inlining decision could check the & 2 flags to
> > see what is required and what is just from command line.
> > Or we can have some other pragma GCC that would be like target but would
> > have flags that are explicit (and could e.g. be more restricted, to ISA
> > options only, and let those use in addition to #pragma GCC target.
>
> I'm still curious as to what you think will break if always-inline does what
> it is documented to do.

No wrong code.  But the compiler will generate a different error message
at the later stage if the ISA for the intrinsic isn't enabled.

-- 
H.J.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v4 2/2] x86: Add general_regs_only function attribute
  2021-04-22 11:57               ` H.J. Lu
@ 2021-04-22 12:16                 ` Richard Biener
  0 siblings, 0 replies; 22+ messages in thread
From: Richard Biener @ 2021-04-22 12:16 UTC (permalink / raw)
  To: H.J. Lu; +Cc: Jakub Jelinek, GCC Patches

On Thu, Apr 22, 2021 at 1:58 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Thu, Apr 22, 2021 at 4:23 AM Richard Biener
> <richard.guenther@gmail.com> wrote:
> >
> > On Thu, Apr 22, 2021 at 12:30 PM Jakub Jelinek via Gcc-patches
> > <gcc-patches@gcc.gnu.org> wrote:
> > >
> > > On Wed, Apr 21, 2021 at 06:01:07PM -0700, H.J. Lu via Gcc-patches wrote:
> > > > How about this?
> > > >
> > > > @item general_regs_only
> > > > @cindex @code{general_regs_only} function attribute, x86
> > > > The @code{general_regs_only} function attribute informs the compiler
> > > > that the function uses only general purpose registers.  When the
> > > > compiler inlines a function with the @code{always_inline} attribute,
> > > > target-specific compilation options may lead to inline failures.
> > > > The @code{general_regs_only} attribute, if applicable, can be used
> > > > together with the @code{always_inline} attribute to reduce inlining
> > > > failure.
> > >
> > > I don't really like this attribute.
> > > It is very specific to what you want to solve and doesn't address the
> > > general problem, that always_inline means different things in different
> > > code, and that is a problem for many targets, not just one.
> > >
> > > As has been written, in some cases it means inline always, error
> > > whenever it is called indirectly which can't be optimized into a direct
> > > call that can be inlined and error whenever the inlining fails for other
> > > reasons.
> > >
> > > Another case, e.g. in the glibc fortify wrappers, is inline always
> > > when the call is direct or an indirect call can be optimized into a direct
> > > call and error when the inlining fails, but support indirect calls without
> > > errors.
> > >
> > > Another case, which is most of the x86/aarch64/arm etc. intrinsics, is
> > > inline always unless there is a target mismatch (roughly what is
> > > actually implemented).
> > >
> > > Because from the always_inline attribute it is impossible to determine which
> > > one of those it is (for the indirect calls the rule could be
> > > gnu_inline extern inline means indirect calls are ok, anything else means
> > > indirect calls are bad), we need new syntax to distinguish those cases.
> > >
> > > general_regs_only attribute doesn't seem to be it, e.g. for the glibc
> > > fortify wrappers cases I don't see why we should forbid using floating point
> > > in such inlines.
> > >
> > > So IMHO we need some new attribute for one of those, or optional parameter
> > > to always_inline.
> > >
> > > For the intrinsic case, ideal would be if we could record which ISA flags
> > > (or more generally which options) are required and which are not.  Either
> > > have some syntax where those would be explicitly specified in attribute (but
> > > frankly that would be a maintainance nightmare), or derive those from
> > > surrounding pragmas.  Right now we have those wrapped in
> > > #ifndef __AVX2__
> > > #pragma GCC push_options
> > > #pragma GCC target("avx2")
> > > #define __DISABLE_AVX2__
> > > #endif /* __AVX2__ */
> > >
> > > ...
> > >
> > > #ifdef __DISABLE_AVX2__
> > > #undef __DISABLE_AVX2__
> > > #pragma GCC pop_options
> > > #endif /* __DISABLE_AVX2__ */
> > >
> > > The question is if the pragma GCC target right now behaves incrementally
> > > or not, whether
> > > #pragma GCC target("avx2")
> > > adds -mavx2 to options if it was missing before and nothing otherwise, or if
> > > it switches other options off.  If it is incremental, we could e.g. try to
> > > use the second least significant bit of global_options_set.x_* to mean
> > > this option has been set explicitly by some surrounding #pragma GCC target.
> > > The normal tests - global_options_set.x_flag_whatever could still work
> > > fine because they wouldn't care if the option was explicit from anywhere
> > > (command line or GCC target or target attribute) and just & 2 would mean
> > > it was explicit from pragma GCC target; though there is the case of
> > > bitfields... And then the inlining decision could check the & 2 flags to
> > > see what is required and what is just from command line.
> > > Or we can have some other pragma GCC that would be like target but would
> > > have flags that are explicit (and could e.g. be more restricted, to ISA
> > > options only, and let those use in addition to #pragma GCC target.
> >
> > I'm still curious as to what you think will break if always-inline does what
> > it is documented to do.
>
> No wrong code.  But the compiler will generate a different error message
> at the later stage if the ISA for the intrinsic isn't enabled.

But all issues at hand we're trying to fix are rejections of _valid_ code.

Improving diagnostics for invalid code should be secondary to
properly accepting valid code.

There's nothing perfect here - for diagnostics we might want to consider
an __attribute__((requires_isa(a,b,c))) which says that calling a function
with such attribute requires the ISAs in the list to be active.  Target
code can then iterate over calls _before_ inlining to emit diagnostics.

Note that the target flags in effect on the callee are not the same as
what would be documented as required - if the CU is compiled
with -mavx2 then even SSE intrinsics will have AVX2 "enabled"
I think:

#include <xmmintrin.h>

__m128 __attribute__((target("no-avx,sse2"))) foo(__m128 A, __m128 B)
{
  return _mm_sub_ss (A, B);
}

> gcc-10 -S t.c -mavx2
In file included from t.c:1:
t.c: In function 'foo':
/usr/lib64/gcc/x86_64-suse-linux/10/include/xmmintrin.h:134:1: error:
inlining failed in call to 'always_inline' '_mm_sub_ss': target
specific option mismatch
  134 | _mm_sub_ss (__m128 __A, __m128 __B)
      | ^~~~~~~~~~
t.c:5:10: note: called from here
    5 |   return _mm_sub_ss (A, B);
      |          ^~~~~~~~~~~~~~~~~

the above is obviously bogus (_mm_sub_ss inherited AVX2).

Richard.

> --
> H.J.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v4 2/2] x86: Add general_regs_only function attribute
  2021-04-22 11:23             ` Richard Biener
  2021-04-22 11:57               ` H.J. Lu
@ 2021-04-22 12:22               ` Jakub Jelinek
  2021-04-22 12:52                 ` Richard Biener
  1 sibling, 1 reply; 22+ messages in thread
From: Jakub Jelinek @ 2021-04-22 12:22 UTC (permalink / raw)
  To: Richard Biener; +Cc: GCC Patches

On Thu, Apr 22, 2021 at 01:23:20PM +0200, Richard Biener via Gcc-patches wrote:
> > The question is if the pragma GCC target right now behaves incrementally
> > or not, whether
> > #pragma GCC target("avx2")
> > adds -mavx2 to options if it was missing before and nothing otherwise, or if
> > it switches other options off.  If it is incremental, we could e.g. try to
> > use the second least significant bit of global_options_set.x_* to mean
> > this option has been set explicitly by some surrounding #pragma GCC target.
> > The normal tests - global_options_set.x_flag_whatever could still work
> > fine because they wouldn't care if the option was explicit from anywhere
> > (command line or GCC target or target attribute) and just & 2 would mean
> > it was explicit from pragma GCC target; though there is the case of
> > bitfields... And then the inlining decision could check the & 2 flags to
> > see what is required and what is just from command line.
> > Or we can have some other pragma GCC that would be like target but would
> > have flags that are explicit (and could e.g. be more restricted, to ISA
> > options only, and let those use in addition to #pragma GCC target.
> 
> I'm still curious as to what you think will break if always-inline does what
> it is documented to do.

We will silently accept calling intrinsics that must be used only in certain
ISA contexts, which will lead to people writing non-portable code.

So -O2 -mno-avx
#include <x86intrin.h>

void
foo (__m256 *x)
{
  x[0] = _mm256_sub_ps (x[1], x[2]);
}
etc. will now be accepted when it shouldn't be.
clang rejects it like gcc with:
1.c:6:10: error: always_inline function '_mm256_sub_ps' requires target feature 'avx', but would be inlined into function 'foo' that is compiled without support for 'avx'
  x[0] = _mm256_sub_ps (x[1], x[2]);
         ^

Note, if I do:
#include <x86intrin.h>

__attribute__((target ("no-sse3"))) void
foo (__m256 *x)
{
  x[0] = _mm256_sub_ps (x[1], x[2]);
}
and compile
clang -S -O2 -mavx2 1.c
1.c:6:10: error: always_inline function '_mm256_sub_ps' requires target feature 'avx', but would be inlined into function 'foo' that is compiled without support for 'avx'
  x[0] = _mm256_sub_ps (x[1], x[2]);
         ^
then from the error message it seems that unlike GCC, clang remembers
the exact target features that are needed for the intrinsics and checks just
those.
Though, looking at the preprocessed source, seems it uses
static __inline __m256 __attribute__((__always_inline__, __nodebug__, __target__("avx"), __min_vector_width__(256)))
_mm256_sub_ps(__m256 __a, __m256 __b)
{
  return (__m256)((__v8sf)__a-(__v8sf)__b);
}
and not target pragmas.

Anyway, if we tweak our intrinsic headers so that
-#ifndef __AVX__
 #pragma GCC push_options
 #pragma GCC target("avx")
-#define __DISABLE_AVX__
-#endif /* __AVX__ */

...
-#ifdef __DISABLE_AVX__
-#undef __DISABLE_AVX__
 #pragma GCC pop_options
-#endif /* __DISABLE_AVX__ */
and do the opts_set->x_* & 2 stuff on explicit options coming out of
target/optimize pragmas and attributes, perhaps we don't even need
to introduce a new attribute and can handle everything magically:

1) if it is gnu_inline extern inline, allow indirect calls, otherwise
disallow them for always_inline functions
2) for the isa flags and option mismatches, only disallow opts_set->x_* & 2
stuff
This will keep both intrinsics and glibc fortify macros working fine
in all the needed use cases.

	Jakub


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v4 2/2] x86: Add general_regs_only function attribute
  2021-04-22 12:22               ` Jakub Jelinek
@ 2021-04-22 12:52                 ` Richard Biener
  2021-04-22 12:55                   ` Richard Biener
  0 siblings, 1 reply; 22+ messages in thread
From: Richard Biener @ 2021-04-22 12:52 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: GCC Patches

On Thu, Apr 22, 2021 at 2:22 PM Jakub Jelinek <jakub@redhat.com> wrote:
>
> On Thu, Apr 22, 2021 at 01:23:20PM +0200, Richard Biener via Gcc-patches wrote:
> > > The question is if the pragma GCC target right now behaves incrementally
> > > or not, whether
> > > #pragma GCC target("avx2")
> > > adds -mavx2 to options if it was missing before and nothing otherwise, or if
> > > it switches other options off.  If it is incremental, we could e.g. try to
> > > use the second least significant bit of global_options_set.x_* to mean
> > > this option has been set explicitly by some surrounding #pragma GCC target.
> > > The normal tests - global_options_set.x_flag_whatever could still work
> > > fine because they wouldn't care if the option was explicit from anywhere
> > > (command line or GCC target or target attribute) and just & 2 would mean
> > > it was explicit from pragma GCC target; though there is the case of
> > > bitfields... And then the inlining decision could check the & 2 flags to
> > > see what is required and what is just from command line.
> > > Or we can have some other pragma GCC that would be like target but would
> > > have flags that are explicit (and could e.g. be more restricted, to ISA
> > > options only, and let those use in addition to #pragma GCC target.
> >
> > I'm still curious as to what you think will break if always-inline does what
> > it is documented to do.
>
> We will silently accept calling intrinsics that must be used only in certain
> ISA contexts, which will lead to people writing non-portable code.
>
> So -O2 -mno-avx
> #include <x86intrin.h>
>
> void
> foo (__m256 *x)
> {
>   x[0] = _mm256_sub_ps (x[1], x[2]);
> }
> etc. will now be accepted when it shouldn't be.
> clang rejects it like gcc with:
> 1.c:6:10: error: always_inline function '_mm256_sub_ps' requires target feature 'avx', but would be inlined into function 'foo' that is compiled without support for 'avx'
>   x[0] = _mm256_sub_ps (x[1], x[2]);
>          ^
>
> Note, if I do:
> #include <x86intrin.h>
>
> __attribute__((target ("no-sse3"))) void
> foo (__m256 *x)
> {
>   x[0] = _mm256_sub_ps (x[1], x[2]);
> }
> and compile
> clang -S -O2 -mavx2 1.c
> 1.c:6:10: error: always_inline function '_mm256_sub_ps' requires target feature 'avx', but would be inlined into function 'foo' that is compiled without support for 'avx'
>   x[0] = _mm256_sub_ps (x[1], x[2]);
>          ^
> then from the error message it seems that unlike GCC, clang remembers
> the exact target features that are needed for the intrinsics and checks just
> those.
> Though, looking at the preprocessed source, seems it uses
> static __inline __m256 __attribute__((__always_inline__, __nodebug__, __target__("avx"), __min_vector_width__(256)))
> _mm256_sub_ps(__m256 __a, __m256 __b)
> {
>   return (__m256)((__v8sf)__a-(__v8sf)__b);
> }
> and not target pragmas.
>
> Anyway, if we tweak our intrinsic headers so that
> -#ifndef __AVX__
>  #pragma GCC push_options
>  #pragma GCC target("avx")
> -#define __DISABLE_AVX__
> -#endif /* __AVX__ */
>
> ...
> -#ifdef __DISABLE_AVX__
> -#undef __DISABLE_AVX__
>  #pragma GCC pop_options
> -#endif /* __DISABLE_AVX__ */
> and do the opts_set->x_* & 2 stuff on explicit options coming out of
> target/optimize pragmas and attributes, perhaps we don't even need
> to introduce a new attribute and can handle everything magically:
>
> 1) if it is gnu_inline extern inline, allow indirect calls, otherwise
> disallow them for always_inline functions

There are a lot of intrinsics using extern inline __gnu_inline though...

> 2) for the isa flags and option mismatches, only disallow opts_set->x_* & 2
> stuff
> This will keep both intrinsics and glibc fortify macros working fine
> in all the needed use cases.

Yes, see my example in the other mail.

I think before we add any new attributes we should sort out the
current mess, eventually adding some testcases for desired
diagnostic.

Richard.

>         Jakub
>

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v4 2/2] x86: Add general_regs_only function attribute
  2021-04-22 12:52                 ` Richard Biener
@ 2021-04-22 12:55                   ` Richard Biener
  2021-07-18  1:45                     ` [PATCH v5] <x86gprintrin.h>: Add pragma GCC target("general-regs-only") H.J. Lu
  0 siblings, 1 reply; 22+ messages in thread
From: Richard Biener @ 2021-04-22 12:55 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: GCC Patches

On Thu, Apr 22, 2021 at 2:52 PM Richard Biener
<richard.guenther@gmail.com> wrote:
>
> On Thu, Apr 22, 2021 at 2:22 PM Jakub Jelinek <jakub@redhat.com> wrote:
> >
> > On Thu, Apr 22, 2021 at 01:23:20PM +0200, Richard Biener via Gcc-patches wrote:
> > > > The question is if the pragma GCC target right now behaves incrementally
> > > > or not, whether
> > > > #pragma GCC target("avx2")
> > > > adds -mavx2 to options if it was missing before and nothing otherwise, or if
> > > > it switches other options off.  If it is incremental, we could e.g. try to
> > > > use the second least significant bit of global_options_set.x_* to mean
> > > > this option has been set explicitly by some surrounding #pragma GCC target.
> > > > The normal tests - global_options_set.x_flag_whatever could still work
> > > > fine because they wouldn't care if the option was explicit from anywhere
> > > > (command line or GCC target or target attribute) and just & 2 would mean
> > > > it was explicit from pragma GCC target; though there is the case of
> > > > bitfields... And then the inlining decision could check the & 2 flags to
> > > > see what is required and what is just from command line.
> > > > Or we can have some other pragma GCC that would be like target but would
> > > > have flags that are explicit (and could e.g. be more restricted, to ISA
> > > > options only, and let those use in addition to #pragma GCC target.
> > >
> > > I'm still curious as to what you think will break if always-inline does what
> > > it is documented to do.
> >
> > We will silently accept calling intrinsics that must be used only in certain
> > ISA contexts, which will lead to people writing non-portable code.
> >
> > So -O2 -mno-avx
> > #include <x86intrin.h>
> >
> > void
> > foo (__m256 *x)
> > {
> >   x[0] = _mm256_sub_ps (x[1], x[2]);
> > }
> > etc. will now be accepted when it shouldn't be.
> > clang rejects it like gcc with:
> > 1.c:6:10: error: always_inline function '_mm256_sub_ps' requires target feature 'avx', but would be inlined into function 'foo' that is compiled without support for 'avx'
> >   x[0] = _mm256_sub_ps (x[1], x[2]);
> >          ^
> >
> > Note, if I do:
> > #include <x86intrin.h>
> >
> > __attribute__((target ("no-sse3"))) void
> > foo (__m256 *x)
> > {
> >   x[0] = _mm256_sub_ps (x[1], x[2]);
> > }
> > and compile
> > clang -S -O2 -mavx2 1.c
> > 1.c:6:10: error: always_inline function '_mm256_sub_ps' requires target feature 'avx', but would be inlined into function 'foo' that is compiled without support for 'avx'
> >   x[0] = _mm256_sub_ps (x[1], x[2]);
> >          ^
> > then from the error message it seems that unlike GCC, clang remembers
> > the exact target features that are needed for the intrinsics and checks just
> > those.
> > Though, looking at the preprocessed source, seems it uses
> > static __inline __m256 __attribute__((__always_inline__, __nodebug__, __target__("avx"), __min_vector_width__(256)))
> > _mm256_sub_ps(__m256 __a, __m256 __b)
> > {
> >   return (__m256)((__v8sf)__a-(__v8sf)__b);
> > }
> > and not target pragmas.
> >
> > Anyway, if we tweak our intrinsic headers so that
> > -#ifndef __AVX__
> >  #pragma GCC push_options
> >  #pragma GCC target("avx")
> > -#define __DISABLE_AVX__
> > -#endif /* __AVX__ */
> >
> > ...
> > -#ifdef __DISABLE_AVX__
> > -#undef __DISABLE_AVX__
> >  #pragma GCC pop_options
> > -#endif /* __DISABLE_AVX__ */
> > and do the opts_set->x_* & 2 stuff on explicit options coming out of
> > target/optimize pragmas and attributes, perhaps we don't even need
> > to introduce a new attribute and can handle everything magically:

Oh, and any such changes will likely interact with Martins ideas to rework
how optimize and target attributes work (aka adding ontop of the
commandline options).  That is, attribute target will then not be enough
to remember the exact set of needed ISA features (as opposed to what
likely clang implements?)

> > 1) if it is gnu_inline extern inline, allow indirect calls, otherwise
> > disallow them for always_inline functions
>
> There are a lot of intrinsics using extern inline __gnu_inline though...
>
> > 2) for the isa flags and option mismatches, only disallow opts_set->x_* & 2
> > stuff
> > This will keep both intrinsics and glibc fortify macros working fine
> > in all the needed use cases.
>
> Yes, see my example in the other mail.
>
> I think before we add any new attributes we should sort out the
> current mess, eventually adding some testcases for desired
> diagnostic.
>
> Richard.
>
> >         Jakub
> >

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH v5] <x86gprintrin.h>: Add pragma GCC target("general-regs-only")
  2021-04-22 12:55                   ` Richard Biener
@ 2021-07-18  1:45                     ` H.J. Lu
  2021-07-31 15:35                       ` PING^1 " H.J. Lu
  2021-08-03 11:47                       ` Richard Biener
  0 siblings, 2 replies; 22+ messages in thread
From: H.J. Lu @ 2021-07-18  1:45 UTC (permalink / raw)
  To: Richard Biener, Uros Bizjak, Hongyu Wang; +Cc: Jakub Jelinek, GCC Patches

[-- Attachment #1: Type: text/plain, Size: 5564 bytes --]

On Thu, Apr 22, 2021 at 7:30 AM Richard Biener via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> On Thu, Apr 22, 2021 at 2:52 PM Richard Biener
> <richard.guenther@gmail.com> wrote:
> >
> > On Thu, Apr 22, 2021 at 2:22 PM Jakub Jelinek <jakub@redhat.com> wrote:
> > >
> > > On Thu, Apr 22, 2021 at 01:23:20PM +0200, Richard Biener via Gcc-patches wrote:
> > > > > The question is if the pragma GCC target right now behaves incrementally
> > > > > or not, whether
> > > > > #pragma GCC target("avx2")
> > > > > adds -mavx2 to options if it was missing before and nothing otherwise, or if
> > > > > it switches other options off.  If it is incremental, we could e.g. try to
> > > > > use the second least significant bit of global_options_set.x_* to mean
> > > > > this option has been set explicitly by some surrounding #pragma GCC target.
> > > > > The normal tests - global_options_set.x_flag_whatever could still work
> > > > > fine because they wouldn't care if the option was explicit from anywhere
> > > > > (command line or GCC target or target attribute) and just & 2 would mean
> > > > > it was explicit from pragma GCC target; though there is the case of
> > > > > bitfields... And then the inlining decision could check the & 2 flags to
> > > > > see what is required and what is just from command line.
> > > > > Or we can have some other pragma GCC that would be like target but would
> > > > > have flags that are explicit (and could e.g. be more restricted, to ISA
> > > > > options only, and let those use in addition to #pragma GCC target.
> > > >
> > > > I'm still curious as to what you think will break if always-inline does what
> > > > it is documented to do.
> > >
> > > We will silently accept calling intrinsics that must be used only in certain
> > > ISA contexts, which will lead to people writing non-portable code.
> > >
> > > So -O2 -mno-avx
> > > #include <x86intrin.h>
> > >
> > > void
> > > foo (__m256 *x)
> > > {
> > >   x[0] = _mm256_sub_ps (x[1], x[2]);
> > > }
> > > etc. will now be accepted when it shouldn't be.
> > > clang rejects it like gcc with:
> > > 1.c:6:10: error: always_inline function '_mm256_sub_ps' requires target feature 'avx', but would be inlined into function 'foo' that is compiled without support for 'avx'
> > >   x[0] = _mm256_sub_ps (x[1], x[2]);
> > >          ^
> > >
> > > Note, if I do:
> > > #include <x86intrin.h>
> > >
> > > __attribute__((target ("no-sse3"))) void
> > > foo (__m256 *x)
> > > {
> > >   x[0] = _mm256_sub_ps (x[1], x[2]);
> > > }
> > > and compile
> > > clang -S -O2 -mavx2 1.c
> > > 1.c:6:10: error: always_inline function '_mm256_sub_ps' requires target feature 'avx', but would be inlined into function 'foo' that is compiled without support for 'avx'
> > >   x[0] = _mm256_sub_ps (x[1], x[2]);
> > >          ^
> > > then from the error message it seems that unlike GCC, clang remembers
> > > the exact target features that are needed for the intrinsics and checks just
> > > those.
> > > Though, looking at the preprocessed source, seems it uses
> > > static __inline __m256 __attribute__((__always_inline__, __nodebug__, __target__("avx"), __min_vector_width__(256)))
> > > _mm256_sub_ps(__m256 __a, __m256 __b)
> > > {
> > >   return (__m256)((__v8sf)__a-(__v8sf)__b);
> > > }
> > > and not target pragmas.
> > >
> > > Anyway, if we tweak our intrinsic headers so that
> > > -#ifndef __AVX__
> > >  #pragma GCC push_options
> > >  #pragma GCC target("avx")
> > > -#define __DISABLE_AVX__
> > > -#endif /* __AVX__ */
> > >
> > > ...
> > > -#ifdef __DISABLE_AVX__
> > > -#undef __DISABLE_AVX__
> > >  #pragma GCC pop_options
> > > -#endif /* __DISABLE_AVX__ */
> > > and do the opts_set->x_* & 2 stuff on explicit options coming out of
> > > target/optimize pragmas and attributes, perhaps we don't even need
> > > to introduce a new attribute and can handle everything magically:
>
> Oh, and any such changes will likely interact with Martins ideas to rework
> how optimize and target attributes work (aka adding ontop of the
> commandline options).  That is, attribute target will then not be enough
> to remember the exact set of needed ISA features (as opposed to what
> likely clang implements?)
>
> > > 1) if it is gnu_inline extern inline, allow indirect calls, otherwise
> > > disallow them for always_inline functions
> >
> > There are a lot of intrinsics using extern inline __gnu_inline though...
> >
> > > 2) for the isa flags and option mismatches, only disallow opts_set->x_* & 2
> > > stuff
> > > This will keep both intrinsics and glibc fortify macros working fine
> > > in all the needed use cases.
> >
> > Yes, see my example in the other mail.
> >
> > I think before we add any new attributes we should sort out the
> > current mess, eventually adding some testcases for desired
> > diagnostic.
> >
> > Richard.
> >
> > >         Jakub

Here is the v5 patch:

1. Intrinsics in <x86gprintrin.h> only require GPR ISAs.  Add

 #if defined __MMX__ || defined __SSE__
 #pragma GCC push_options
 #pragma GCC target("general-regs-only")
 #define __DISABLE_GENERAL_REGS_ONLY__
 #endif

and

 #ifdef __DISABLE_GENERAL_REGS_ONLY__
 #undef __DISABLE_GENERAL_REGS_ONLY__
 #pragma GCC pop_options
 #endif /* __DISABLE_GENERAL_REGS_ONLY__ */

to <x86gprintrin.h> to disable non-GPR ISAs so that they can be used in
functions with __attribute__ ((target("general-regs-only"))).
2. When checking always_inline attribute, if callee only uses GPRs,
ignore MASK_80387 since enable MASK_80387 in caller has no impact on
callee inline.

OK for master?

Thanks.

-- 
H.J.

[-- Attachment #2: v5-0001-x86gprintrin.h-Add-pragma-GCC-target-general-regs.patch --]
[-- Type: application/x-patch, Size: 21599 bytes --]

^ permalink raw reply	[flat|nested] 22+ messages in thread

* PING^1 [PATCH v5] <x86gprintrin.h>: Add pragma GCC target("general-regs-only")
  2021-07-18  1:45                     ` [PATCH v5] <x86gprintrin.h>: Add pragma GCC target("general-regs-only") H.J. Lu
@ 2021-07-31 15:35                       ` H.J. Lu
  2021-08-03 11:47                       ` Richard Biener
  1 sibling, 0 replies; 22+ messages in thread
From: H.J. Lu @ 2021-07-31 15:35 UTC (permalink / raw)
  To: Richard Biener, Uros Bizjak, Hongyu Wang, Hongtao Liu
  Cc: Jakub Jelinek, GCC Patches

On Sat, Jul 17, 2021 at 6:45 PM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Thu, Apr 22, 2021 at 7:30 AM Richard Biener via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > On Thu, Apr 22, 2021 at 2:52 PM Richard Biener
> > <richard.guenther@gmail.com> wrote:
> > >
> > > On Thu, Apr 22, 2021 at 2:22 PM Jakub Jelinek <jakub@redhat.com> wrote:
> > > >
> > > > On Thu, Apr 22, 2021 at 01:23:20PM +0200, Richard Biener via Gcc-patches wrote:
> > > > > > The question is if the pragma GCC target right now behaves incrementally
> > > > > > or not, whether
> > > > > > #pragma GCC target("avx2")
> > > > > > adds -mavx2 to options if it was missing before and nothing otherwise, or if
> > > > > > it switches other options off.  If it is incremental, we could e.g. try to
> > > > > > use the second least significant bit of global_options_set.x_* to mean
> > > > > > this option has been set explicitly by some surrounding #pragma GCC target.
> > > > > > The normal tests - global_options_set.x_flag_whatever could still work
> > > > > > fine because they wouldn't care if the option was explicit from anywhere
> > > > > > (command line or GCC target or target attribute) and just & 2 would mean
> > > > > > it was explicit from pragma GCC target; though there is the case of
> > > > > > bitfields... And then the inlining decision could check the & 2 flags to
> > > > > > see what is required and what is just from command line.
> > > > > > Or we can have some other pragma GCC that would be like target but would
> > > > > > have flags that are explicit (and could e.g. be more restricted, to ISA
> > > > > > options only, and let those use in addition to #pragma GCC target.
> > > > >
> > > > > I'm still curious as to what you think will break if always-inline does what
> > > > > it is documented to do.
> > > >
> > > > We will silently accept calling intrinsics that must be used only in certain
> > > > ISA contexts, which will lead to people writing non-portable code.
> > > >
> > > > So -O2 -mno-avx
> > > > #include <x86intrin.h>
> > > >
> > > > void
> > > > foo (__m256 *x)
> > > > {
> > > >   x[0] = _mm256_sub_ps (x[1], x[2]);
> > > > }
> > > > etc. will now be accepted when it shouldn't be.
> > > > clang rejects it like gcc with:
> > > > 1.c:6:10: error: always_inline function '_mm256_sub_ps' requires target feature 'avx', but would be inlined into function 'foo' that is compiled without support for 'avx'
> > > >   x[0] = _mm256_sub_ps (x[1], x[2]);
> > > >          ^
> > > >
> > > > Note, if I do:
> > > > #include <x86intrin.h>
> > > >
> > > > __attribute__((target ("no-sse3"))) void
> > > > foo (__m256 *x)
> > > > {
> > > >   x[0] = _mm256_sub_ps (x[1], x[2]);
> > > > }
> > > > and compile
> > > > clang -S -O2 -mavx2 1.c
> > > > 1.c:6:10: error: always_inline function '_mm256_sub_ps' requires target feature 'avx', but would be inlined into function 'foo' that is compiled without support for 'avx'
> > > >   x[0] = _mm256_sub_ps (x[1], x[2]);
> > > >          ^
> > > > then from the error message it seems that unlike GCC, clang remembers
> > > > the exact target features that are needed for the intrinsics and checks just
> > > > those.
> > > > Though, looking at the preprocessed source, seems it uses
> > > > static __inline __m256 __attribute__((__always_inline__, __nodebug__, __target__("avx"), __min_vector_width__(256)))
> > > > _mm256_sub_ps(__m256 __a, __m256 __b)
> > > > {
> > > >   return (__m256)((__v8sf)__a-(__v8sf)__b);
> > > > }
> > > > and not target pragmas.
> > > >
> > > > Anyway, if we tweak our intrinsic headers so that
> > > > -#ifndef __AVX__
> > > >  #pragma GCC push_options
> > > >  #pragma GCC target("avx")
> > > > -#define __DISABLE_AVX__
> > > > -#endif /* __AVX__ */
> > > >
> > > > ...
> > > > -#ifdef __DISABLE_AVX__
> > > > -#undef __DISABLE_AVX__
> > > >  #pragma GCC pop_options
> > > > -#endif /* __DISABLE_AVX__ */
> > > > and do the opts_set->x_* & 2 stuff on explicit options coming out of
> > > > target/optimize pragmas and attributes, perhaps we don't even need
> > > > to introduce a new attribute and can handle everything magically:
> >
> > Oh, and any such changes will likely interact with Martins ideas to rework
> > how optimize and target attributes work (aka adding ontop of the
> > commandline options).  That is, attribute target will then not be enough
> > to remember the exact set of needed ISA features (as opposed to what
> > likely clang implements?)
> >
> > > > 1) if it is gnu_inline extern inline, allow indirect calls, otherwise
> > > > disallow them for always_inline functions
> > >
> > > There are a lot of intrinsics using extern inline __gnu_inline though...
> > >
> > > > 2) for the isa flags and option mismatches, only disallow opts_set->x_* & 2
> > > > stuff
> > > > This will keep both intrinsics and glibc fortify macros working fine
> > > > in all the needed use cases.
> > >
> > > Yes, see my example in the other mail.
> > >
> > > I think before we add any new attributes we should sort out the
> > > current mess, eventually adding some testcases for desired
> > > diagnostic.
> > >
> > > Richard.
> > >
> > > >         Jakub
>
> Here is the v5 patch:
>
> 1. Intrinsics in <x86gprintrin.h> only require GPR ISAs.  Add
>
>  #if defined __MMX__ || defined __SSE__
>  #pragma GCC push_options
>  #pragma GCC target("general-regs-only")
>  #define __DISABLE_GENERAL_REGS_ONLY__
>  #endif
>
> and
>
>  #ifdef __DISABLE_GENERAL_REGS_ONLY__
>  #undef __DISABLE_GENERAL_REGS_ONLY__
>  #pragma GCC pop_options
>  #endif /* __DISABLE_GENERAL_REGS_ONLY__ */
>
> to <x86gprintrin.h> to disable non-GPR ISAs so that they can be used in
> functions with __attribute__ ((target("general-regs-only"))).
> 2. When checking always_inline attribute, if callee only uses GPRs,
> ignore MASK_80387 since enable MASK_80387 in caller has no impact on
> callee inline.
>
> OK for master?
>

PING.

-- 
H.J.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v5] <x86gprintrin.h>: Add pragma GCC target("general-regs-only")
  2021-07-18  1:45                     ` [PATCH v5] <x86gprintrin.h>: Add pragma GCC target("general-regs-only") H.J. Lu
  2021-07-31 15:35                       ` PING^1 " H.J. Lu
@ 2021-08-03 11:47                       ` Richard Biener
  2021-08-03 14:45                         ` [PATCH v6] " H.J. Lu
  1 sibling, 1 reply; 22+ messages in thread
From: Richard Biener @ 2021-08-03 11:47 UTC (permalink / raw)
  To: H.J. Lu; +Cc: Uros Bizjak, Hongyu Wang, Jakub Jelinek, GCC Patches

On Sun, Jul 18, 2021 at 3:46 AM H.J. Lu <hjl.tools@gmail.com> wrote:
>
> On Thu, Apr 22, 2021 at 7:30 AM Richard Biener via Gcc-patches
> <gcc-patches@gcc.gnu.org> wrote:
> >
> > On Thu, Apr 22, 2021 at 2:52 PM Richard Biener
> > <richard.guenther@gmail.com> wrote:
> > >
> > > On Thu, Apr 22, 2021 at 2:22 PM Jakub Jelinek <jakub@redhat.com> wrote:
> > > >
> > > > On Thu, Apr 22, 2021 at 01:23:20PM +0200, Richard Biener via Gcc-patches wrote:
> > > > > > The question is if the pragma GCC target right now behaves incrementally
> > > > > > or not, whether
> > > > > > #pragma GCC target("avx2")
> > > > > > adds -mavx2 to options if it was missing before and nothing otherwise, or if
> > > > > > it switches other options off.  If it is incremental, we could e.g. try to
> > > > > > use the second least significant bit of global_options_set.x_* to mean
> > > > > > this option has been set explicitly by some surrounding #pragma GCC target.
> > > > > > The normal tests - global_options_set.x_flag_whatever could still work
> > > > > > fine because they wouldn't care if the option was explicit from anywhere
> > > > > > (command line or GCC target or target attribute) and just & 2 would mean
> > > > > > it was explicit from pragma GCC target; though there is the case of
> > > > > > bitfields... And then the inlining decision could check the & 2 flags to
> > > > > > see what is required and what is just from command line.
> > > > > > Or we can have some other pragma GCC that would be like target but would
> > > > > > have flags that are explicit (and could e.g. be more restricted, to ISA
> > > > > > options only, and let those use in addition to #pragma GCC target.
> > > > >
> > > > > I'm still curious as to what you think will break if always-inline does what
> > > > > it is documented to do.
> > > >
> > > > We will silently accept calling intrinsics that must be used only in certain
> > > > ISA contexts, which will lead to people writing non-portable code.
> > > >
> > > > So -O2 -mno-avx
> > > > #include <x86intrin.h>
> > > >
> > > > void
> > > > foo (__m256 *x)
> > > > {
> > > >   x[0] = _mm256_sub_ps (x[1], x[2]);
> > > > }
> > > > etc. will now be accepted when it shouldn't be.
> > > > clang rejects it like gcc with:
> > > > 1.c:6:10: error: always_inline function '_mm256_sub_ps' requires target feature 'avx', but would be inlined into function 'foo' that is compiled without support for 'avx'
> > > >   x[0] = _mm256_sub_ps (x[1], x[2]);
> > > >          ^
> > > >
> > > > Note, if I do:
> > > > #include <x86intrin.h>
> > > >
> > > > __attribute__((target ("no-sse3"))) void
> > > > foo (__m256 *x)
> > > > {
> > > >   x[0] = _mm256_sub_ps (x[1], x[2]);
> > > > }
> > > > and compile
> > > > clang -S -O2 -mavx2 1.c
> > > > 1.c:6:10: error: always_inline function '_mm256_sub_ps' requires target feature 'avx', but would be inlined into function 'foo' that is compiled without support for 'avx'
> > > >   x[0] = _mm256_sub_ps (x[1], x[2]);
> > > >          ^
> > > > then from the error message it seems that unlike GCC, clang remembers
> > > > the exact target features that are needed for the intrinsics and checks just
> > > > those.
> > > > Though, looking at the preprocessed source, seems it uses
> > > > static __inline __m256 __attribute__((__always_inline__, __nodebug__, __target__("avx"), __min_vector_width__(256)))
> > > > _mm256_sub_ps(__m256 __a, __m256 __b)
> > > > {
> > > >   return (__m256)((__v8sf)__a-(__v8sf)__b);
> > > > }
> > > > and not target pragmas.
> > > >
> > > > Anyway, if we tweak our intrinsic headers so that
> > > > -#ifndef __AVX__
> > > >  #pragma GCC push_options
> > > >  #pragma GCC target("avx")
> > > > -#define __DISABLE_AVX__
> > > > -#endif /* __AVX__ */
> > > >
> > > > ...
> > > > -#ifdef __DISABLE_AVX__
> > > > -#undef __DISABLE_AVX__
> > > >  #pragma GCC pop_options
> > > > -#endif /* __DISABLE_AVX__ */
> > > > and do the opts_set->x_* & 2 stuff on explicit options coming out of
> > > > target/optimize pragmas and attributes, perhaps we don't even need
> > > > to introduce a new attribute and can handle everything magically:
> >
> > Oh, and any such changes will likely interact with Martins ideas to rework
> > how optimize and target attributes work (aka adding ontop of the
> > commandline options).  That is, attribute target will then not be enough
> > to remember the exact set of needed ISA features (as opposed to what
> > likely clang implements?)
> >
> > > > 1) if it is gnu_inline extern inline, allow indirect calls, otherwise
> > > > disallow them for always_inline functions
> > >
> > > There are a lot of intrinsics using extern inline __gnu_inline though...
> > >
> > > > 2) for the isa flags and option mismatches, only disallow opts_set->x_* & 2
> > > > stuff
> > > > This will keep both intrinsics and glibc fortify macros working fine
> > > > in all the needed use cases.
> > >
> > > Yes, see my example in the other mail.
> > >
> > > I think before we add any new attributes we should sort out the
> > > current mess, eventually adding some testcases for desired
> > > diagnostic.
> > >
> > > Richard.
> > >
> > > >         Jakub
>
> Here is the v5 patch:
>
> 1. Intrinsics in <x86gprintrin.h> only require GPR ISAs.  Add
>
>  #if defined __MMX__ || defined __SSE__
>  #pragma GCC push_options
>  #pragma GCC target("general-regs-only")
>  #define __DISABLE_GENERAL_REGS_ONLY__
>  #endif
>
> and
>
>  #ifdef __DISABLE_GENERAL_REGS_ONLY__
>  #undef __DISABLE_GENERAL_REGS_ONLY__
>  #pragma GCC pop_options
>  #endif /* __DISABLE_GENERAL_REGS_ONLY__ */
>
> to <x86gprintrin.h> to disable non-GPR ISAs so that they can be used in
> functions with __attribute__ ((target("general-regs-only"))).
> 2. When checking always_inline attribute, if callee only uses GPRs,
> ignore MASK_80387 since enable MASK_80387 in caller has no impact on
> callee inline.
>
> OK for master?

+
+#include <x86intrin.h>
+
+#include <x86intrin.h>
+

there are some cases like the above - intentional?

Otherwise I guess I can live with this, hopefully things won't break.

In the end it's a quite narrow solution to a subpart of the overall
issue of course.

Thus OK unless any other stakeholder has comments.

Thanks,
Richard.

> Thanks.
>
> --
> H.J.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH v6] <x86gprintrin.h>: Add pragma GCC target("general-regs-only")
  2021-08-03 11:47                       ` Richard Biener
@ 2021-08-03 14:45                         ` H.J. Lu
  0 siblings, 0 replies; 22+ messages in thread
From: H.J. Lu @ 2021-08-03 14:45 UTC (permalink / raw)
  To: Richard Biener; +Cc: Uros Bizjak, Hongyu Wang, Jakub Jelinek, GCC Patches

[-- Attachment #1: Type: text/plain, Size: 6728 bytes --]

On Tue, Aug 3, 2021 at 4:47 AM Richard Biener
<richard.guenther@gmail.com> wrote:
>
> On Sun, Jul 18, 2021 at 3:46 AM H.J. Lu <hjl.tools@gmail.com> wrote:
> >
> > On Thu, Apr 22, 2021 at 7:30 AM Richard Biener via Gcc-patches
> > <gcc-patches@gcc.gnu.org> wrote:
> > >
> > > On Thu, Apr 22, 2021 at 2:52 PM Richard Biener
> > > <richard.guenther@gmail.com> wrote:
> > > >
> > > > On Thu, Apr 22, 2021 at 2:22 PM Jakub Jelinek <jakub@redhat.com> wrote:
> > > > >
> > > > > On Thu, Apr 22, 2021 at 01:23:20PM +0200, Richard Biener via Gcc-patches wrote:
> > > > > > > The question is if the pragma GCC target right now behaves incrementally
> > > > > > > or not, whether
> > > > > > > #pragma GCC target("avx2")
> > > > > > > adds -mavx2 to options if it was missing before and nothing otherwise, or if
> > > > > > > it switches other options off.  If it is incremental, we could e.g. try to
> > > > > > > use the second least significant bit of global_options_set.x_* to mean
> > > > > > > this option has been set explicitly by some surrounding #pragma GCC target.
> > > > > > > The normal tests - global_options_set.x_flag_whatever could still work
> > > > > > > fine because they wouldn't care if the option was explicit from anywhere
> > > > > > > (command line or GCC target or target attribute) and just & 2 would mean
> > > > > > > it was explicit from pragma GCC target; though there is the case of
> > > > > > > bitfields... And then the inlining decision could check the & 2 flags to
> > > > > > > see what is required and what is just from command line.
> > > > > > > Or we can have some other pragma GCC that would be like target but would
> > > > > > > have flags that are explicit (and could e.g. be more restricted, to ISA
> > > > > > > options only, and let those use in addition to #pragma GCC target.
> > > > > >
> > > > > > I'm still curious as to what you think will break if always-inline does what
> > > > > > it is documented to do.
> > > > >
> > > > > We will silently accept calling intrinsics that must be used only in certain
> > > > > ISA contexts, which will lead to people writing non-portable code.
> > > > >
> > > > > So -O2 -mno-avx
> > > > > #include <x86intrin.h>
> > > > >
> > > > > void
> > > > > foo (__m256 *x)
> > > > > {
> > > > >   x[0] = _mm256_sub_ps (x[1], x[2]);
> > > > > }
> > > > > etc. will now be accepted when it shouldn't be.
> > > > > clang rejects it like gcc with:
> > > > > 1.c:6:10: error: always_inline function '_mm256_sub_ps' requires target feature 'avx', but would be inlined into function 'foo' that is compiled without support for 'avx'
> > > > >   x[0] = _mm256_sub_ps (x[1], x[2]);
> > > > >          ^
> > > > >
> > > > > Note, if I do:
> > > > > #include <x86intrin.h>
> > > > >
> > > > > __attribute__((target ("no-sse3"))) void
> > > > > foo (__m256 *x)
> > > > > {
> > > > >   x[0] = _mm256_sub_ps (x[1], x[2]);
> > > > > }
> > > > > and compile
> > > > > clang -S -O2 -mavx2 1.c
> > > > > 1.c:6:10: error: always_inline function '_mm256_sub_ps' requires target feature 'avx', but would be inlined into function 'foo' that is compiled without support for 'avx'
> > > > >   x[0] = _mm256_sub_ps (x[1], x[2]);
> > > > >          ^
> > > > > then from the error message it seems that unlike GCC, clang remembers
> > > > > the exact target features that are needed for the intrinsics and checks just
> > > > > those.
> > > > > Though, looking at the preprocessed source, seems it uses
> > > > > static __inline __m256 __attribute__((__always_inline__, __nodebug__, __target__("avx"), __min_vector_width__(256)))
> > > > > _mm256_sub_ps(__m256 __a, __m256 __b)
> > > > > {
> > > > >   return (__m256)((__v8sf)__a-(__v8sf)__b);
> > > > > }
> > > > > and not target pragmas.
> > > > >
> > > > > Anyway, if we tweak our intrinsic headers so that
> > > > > -#ifndef __AVX__
> > > > >  #pragma GCC push_options
> > > > >  #pragma GCC target("avx")
> > > > > -#define __DISABLE_AVX__
> > > > > -#endif /* __AVX__ */
> > > > >
> > > > > ...
> > > > > -#ifdef __DISABLE_AVX__
> > > > > -#undef __DISABLE_AVX__
> > > > >  #pragma GCC pop_options
> > > > > -#endif /* __DISABLE_AVX__ */
> > > > > and do the opts_set->x_* & 2 stuff on explicit options coming out of
> > > > > target/optimize pragmas and attributes, perhaps we don't even need
> > > > > to introduce a new attribute and can handle everything magically:
> > >
> > > Oh, and any such changes will likely interact with Martins ideas to rework
> > > how optimize and target attributes work (aka adding ontop of the
> > > commandline options).  That is, attribute target will then not be enough
> > > to remember the exact set of needed ISA features (as opposed to what
> > > likely clang implements?)
> > >
> > > > > 1) if it is gnu_inline extern inline, allow indirect calls, otherwise
> > > > > disallow them for always_inline functions
> > > >
> > > > There are a lot of intrinsics using extern inline __gnu_inline though...
> > > >
> > > > > 2) for the isa flags and option mismatches, only disallow opts_set->x_* & 2
> > > > > stuff
> > > > > This will keep both intrinsics and glibc fortify macros working fine
> > > > > in all the needed use cases.
> > > >
> > > > Yes, see my example in the other mail.
> > > >
> > > > I think before we add any new attributes we should sort out the
> > > > current mess, eventually adding some testcases for desired
> > > > diagnostic.
> > > >
> > > > Richard.
> > > >
> > > > >         Jakub
> >
> > Here is the v5 patch:
> >
> > 1. Intrinsics in <x86gprintrin.h> only require GPR ISAs.  Add
> >
> >  #if defined __MMX__ || defined __SSE__
> >  #pragma GCC push_options
> >  #pragma GCC target("general-regs-only")
> >  #define __DISABLE_GENERAL_REGS_ONLY__
> >  #endif
> >
> > and
> >
> >  #ifdef __DISABLE_GENERAL_REGS_ONLY__
> >  #undef __DISABLE_GENERAL_REGS_ONLY__
> >  #pragma GCC pop_options
> >  #endif /* __DISABLE_GENERAL_REGS_ONLY__ */
> >
> > to <x86gprintrin.h> to disable non-GPR ISAs so that they can be used in
> > functions with __attribute__ ((target("general-regs-only"))).
> > 2. When checking always_inline attribute, if callee only uses GPRs,
> > ignore MASK_80387 since enable MASK_80387 in caller has no impact on
> > callee inline.
> >
> > OK for master?
>
> +
> +#include <x86intrin.h>
> +
> +#include <x86intrin.h>
> +
>
> there are some cases like the above - intentional?

Fixed in the v6 patch.

> Otherwise I guess I can live with this, hopefully things won't break.
>
> In the end it's a quite narrow solution to a subpart of the overall
> issue of course.
>
> Thus OK unless any other stakeholder has comments.

Thanks.  I will wait for a few days before commiting.

> Thanks,
> Richard.
>
> > Thanks.
> >
> > --
> > H.J.



-- 
H.J.

[-- Attachment #2: v6-0001-x86gprintrin.h-Add-pragma-GCC-target-general-regs.patch --]
[-- Type: text/x-patch, Size: 21547 bytes --]

From ef93867816e73a0aae1c526cf8d7999d5a15b6f9 Mon Sep 17 00:00:00 2001
From: "H.J. Lu" <hjl.tools@gmail.com>
Date: Sat, 17 Jul 2021 07:44:45 -0700
Subject: [PATCH v6] <x86gprintrin.h>: Add pragma GCC
 target("general-regs-only")

1. Intrinsics in <x86gprintrin.h> only require GPR ISAs.  Add

 #if defined __MMX__ || defined __SSE__
 #pragma GCC push_options
 #pragma GCC target("general-regs-only")
 #define __DISABLE_GENERAL_REGS_ONLY__
 #endif

and

 #ifdef __DISABLE_GENERAL_REGS_ONLY__
 #undef __DISABLE_GENERAL_REGS_ONLY__
 #pragma GCC pop_options
 #endif /* __DISABLE_GENERAL_REGS_ONLY__ */

to <x86gprintrin.h> to disable non-GPR ISAs so that they can be used in
functions with __attribute__ ((target("general-regs-only"))).
2. When checking always_inline attribute, if callee only uses GPRs,
ignore MASK_80387 since enable MASK_80387 in caller has no impact on
callee inline.

gcc/

	PR target/99744
	* config/i386/i386.c (ix86_can_inline_p): Ignore MASK_80387 if
	callee only uses GPRs.
	* config/i386/ia32intrin.h: Revert commit 5463cee2770.
	* config/i386/serializeintrin.h: Revert commit 71958f740f1.
	* config/i386/x86gprintrin.h: Add
	#pragma GCC target("general-regs-only") and #pragma GCC pop_options
	to disable non-GPR ISAs.

gcc/testsuite/

	PR target/99744
	* gcc.target/i386/pr99744-3.c: New test.
	* gcc.target/i386/pr99744-4.c: Likewise.
	* gcc.target/i386/pr99744-5.c: Likewise.
	* gcc.target/i386/pr99744-6.c: Likewise.
	* gcc.target/i386/pr99744-7.c: Likewise.
	* gcc.target/i386/pr99744-8.c: Likewise.
---
 gcc/config/i386/i386.c                    |   6 +-
 gcc/config/i386/ia32intrin.h              |  14 +-
 gcc/config/i386/serializeintrin.h         |   7 +-
 gcc/config/i386/x86gprintrin.h            |  11 +
 gcc/testsuite/gcc.target/i386/pr99744-3.c |  13 +
 gcc/testsuite/gcc.target/i386/pr99744-4.c | 357 ++++++++++++++++++++++
 gcc/testsuite/gcc.target/i386/pr99744-5.c |  25 ++
 gcc/testsuite/gcc.target/i386/pr99744-6.c |  23 ++
 gcc/testsuite/gcc.target/i386/pr99744-7.c |  12 +
 gcc/testsuite/gcc.target/i386/pr99744-8.c |  13 +
 10 files changed, 477 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/pr99744-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr99744-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr99744-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr99744-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr99744-7.c
 create mode 100644 gcc/testsuite/gcc.target/i386/pr99744-8.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index ec0690876b7..aea224ab235 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -554,7 +554,7 @@ ix86_can_inline_p (tree caller, tree callee)
 
   /* Changes of those flags can be tolerated for always inlines. Lets hope
      user knows what he is doing.  */
-  const unsigned HOST_WIDE_INT always_inline_safe_mask
+  unsigned HOST_WIDE_INT always_inline_safe_mask
 	 = (MASK_USE_8BIT_IDIV | MASK_ACCUMULATE_OUTGOING_ARGS
 	    | MASK_NO_ALIGN_STRINGOPS | MASK_AVX256_SPLIT_UNALIGNED_LOAD
 	    | MASK_AVX256_SPLIT_UNALIGNED_STORE | MASK_CLD
@@ -579,6 +579,10 @@ ix86_can_inline_p (tree caller, tree callee)
        && lookup_attribute ("always_inline",
 			    DECL_ATTRIBUTES (callee)));
 
+  /* If callee only uses GPRs, ignore MASK_80387.  */
+  if (TARGET_GENERAL_REGS_ONLY_P (callee_opts->x_ix86_target_flags))
+    always_inline_safe_mask |= MASK_80387;
+
   cgraph_node *callee_node = cgraph_node::get (callee);
   /* Callee's isa options should be a subset of the caller's, i.e. a SSE4
      function can inline a SSE2 function but a SSE2 function can't inline
diff --git a/gcc/config/i386/ia32intrin.h b/gcc/config/i386/ia32intrin.h
index 5422b0fc9e0..df99220ee4f 100644
--- a/gcc/config/i386/ia32intrin.h
+++ b/gcc/config/i386/ia32intrin.h
@@ -107,12 +107,22 @@ __rdpmc (int __S)
 #endif /* __iamcu__ */
 
 /* rdtsc */
-#define __rdtsc()		__builtin_ia32_rdtsc ()
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rdtsc (void)
+{
+  return __builtin_ia32_rdtsc ();
+}
 
 #ifndef __iamcu__
 
 /* rdtscp */
-#define __rdtscp(a)		__builtin_ia32_rdtscp (a)
+extern __inline unsigned long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__rdtscp (unsigned int *__A)
+{
+  return __builtin_ia32_rdtscp (__A);
+}
 
 #endif /* __iamcu__ */
 
diff --git a/gcc/config/i386/serializeintrin.h b/gcc/config/i386/serializeintrin.h
index e280250b198..89b5b94ea9b 100644
--- a/gcc/config/i386/serializeintrin.h
+++ b/gcc/config/i386/serializeintrin.h
@@ -34,7 +34,12 @@
 #define __DISABLE_SERIALIZE__
 #endif /* __SERIALIZE__ */
 
-#define _serialize()	__builtin_ia32_serialize ()
+extern __inline void
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_serialize (void)
+{
+  __builtin_ia32_serialize ();
+}
 
 #ifdef __DISABLE_SERIALIZE__
 #undef __DISABLE_SERIALIZE__
diff --git a/gcc/config/i386/x86gprintrin.h b/gcc/config/i386/x86gprintrin.h
index 7793032ba90..b7fefa780a6 100644
--- a/gcc/config/i386/x86gprintrin.h
+++ b/gcc/config/i386/x86gprintrin.h
@@ -24,6 +24,12 @@
 #ifndef _X86GPRINTRIN_H_INCLUDED
 #define _X86GPRINTRIN_H_INCLUDED
 
+#if defined __MMX__ || defined __SSE__
+#pragma GCC push_options
+#pragma GCC target("general-regs-only")
+#define __DISABLE_GENERAL_REGS_ONLY__
+#endif
+
 #include <ia32intrin.h>
 
 #ifndef __iamcu__
@@ -255,4 +261,9 @@ _ptwrite32 (unsigned __B)
 
 #endif /* __iamcu__ */
 
+#ifdef __DISABLE_GENERAL_REGS_ONLY__
+#undef __DISABLE_GENERAL_REGS_ONLY__
+#pragma GCC pop_options
+#endif /* __DISABLE_GENERAL_REGS_ONLY__ */
+
 #endif /* _X86GPRINTRIN_H_INCLUDED.  */
diff --git a/gcc/testsuite/gcc.target/i386/pr99744-3.c b/gcc/testsuite/gcc.target/i386/pr99744-3.c
new file mode 100644
index 00000000000..6c505816ceb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr99744-3.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mno-serialize" } */
+
+#include <x86intrin.h>
+
+__attribute__ ((target("general-regs-only")))
+void
+foo1 (void)
+{
+  _serialize ();
+}
+
+/* { dg-error "target specific option mismatch" "" { target *-*-* } 0 } */
diff --git a/gcc/testsuite/gcc.target/i386/pr99744-4.c b/gcc/testsuite/gcc.target/i386/pr99744-4.c
new file mode 100644
index 00000000000..9196e62d955
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr99744-4.c
@@ -0,0 +1,357 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbmi -mbmi2 -mcldemote -mclflushopt -mclwb -mclzero -mcrc32 -menqcmd -mfsgsbase -mfxsr -mhreset -mlzcnt -mlwp -mmovdir64b -mmovdiri -mmwaitx -mpconfig -mpku -mpopcnt -mptwrite -mrdpid -mrdrnd -mrdseed -mrtm -msgx -mshstk -mtbm -mtsxldtrk -mxsave -mxsavec -mxsaveopt -mxsaves -mwaitpkg -mwbnoinvd" } */
+/* { dg-additional-options "-muintr" { target { ! ia32 } } }  */
+
+/* Test calling GPR intrinsics from functions with general-regs-only
+   target attribute.  */
+
+#include <x86gprintrin.h>
+
+#define _CONCAT(x,y) x ## y
+
+#define test_0(func, type)						\
+  __attribute__ ((target("general-regs-only")))				\
+  type _CONCAT(do_,func) (void)						\
+  { return func (); }
+
+#define test_0_i1(func, type, imm)					\
+  __attribute__ ((target("general-regs-only")))				\
+  type _CONCAT(do_,func) (void)						\
+  { return func (imm); }
+
+#define test_1(func, type, op1_type)					\
+  __attribute__ ((target("general-regs-only")))				\
+  type _CONCAT(do_,func) (op1_type A)					\
+  { return func (A); }
+
+#define test_1_i1(func, type, op1_type, imm)				\
+  __attribute__ ((target("general-regs-only")))				\
+  type _CONCAT(do_,func) (op1_type A)					\
+  { return func (A, imm); }
+
+#define test_2(func, type, op1_type, op2_type)				\
+  __attribute__ ((target("general-regs-only")))				\
+  type _CONCAT(do_,func) (op1_type A, op2_type B)			\
+  { return func (A, B); }
+
+#define test_2_i1(func, type, op1_type, op2_type, imm)			\
+  __attribute__ ((target("general-regs-only")))				\
+  type _CONCAT(do_,func) (op1_type A, op2_type B)			\
+  { return func (A, B, imm); }
+
+#define test_3(func, type, op1_type, op2_type, op3_type)		\
+  __attribute__ ((target("general-regs-only")))				\
+  type _CONCAT(do_,func) (op1_type A, op2_type B, op3_type C)		\
+  { return func (A, B, C); }
+
+#define test_4(func, type, op1_type, op2_type, op3_type, op4_type)	\
+  __attribute__ ((target("general-regs-only")))				\
+  type _CONCAT(do_,func) (op1_type A, op2_type B, op3_type C,		\
+			  op4_type D)					\
+  { return func (A, B, C, D); }
+
+/* ia32intrin.h  */
+test_1 (__bsfd, int, int)
+test_1 (__bsrd, int, int)
+test_1 (__bswapd, int, int)
+test_1 (__popcntd, int, unsigned int)
+test_2 (__rolb, unsigned char, unsigned char, int)
+test_2 (__rolw, unsigned short, unsigned short, int)
+test_2 (__rold, unsigned int, unsigned int, int)
+test_2 (__rorb, unsigned char, unsigned char, int)
+test_2 (__rorw, unsigned short, unsigned short, int)
+test_2 (__rord, unsigned int, unsigned int, int)
+
+#ifndef __iamcu__
+/* adxintrin.h */
+test_4 (_subborrow_u32, unsigned char, unsigned char, unsigned int,
+	unsigned int, unsigned int *)
+test_4 (_addcarry_u32, unsigned char, unsigned char, unsigned int,
+	unsigned int, unsigned int *)
+test_4 (_addcarryx_u32, unsigned char, unsigned char, unsigned int,
+	unsigned int, unsigned int *)
+
+/* bmiintrin.h */
+test_1 (__tzcnt_u16, unsigned short, unsigned short)
+test_2 (__andn_u32, unsigned int, unsigned int, unsigned int)
+test_2 (__bextr_u32, unsigned int, unsigned int, unsigned int)
+test_3 (_bextr_u32, unsigned int, unsigned int, unsigned int,
+	unsigned int)
+test_1 (__blsi_u32, unsigned int, unsigned int)
+test_1 (_blsi_u32, unsigned int, unsigned int)
+test_1 (__blsmsk_u32, unsigned int, unsigned int)
+test_1 (_blsmsk_u32, unsigned int, unsigned int)
+test_1 (__blsr_u32, unsigned int, unsigned int)
+test_1 (_blsr_u32, unsigned int, unsigned int)
+test_1 (__tzcnt_u32, unsigned int, unsigned int)
+test_1 (_tzcnt_u32, unsigned int, unsigned int)
+
+/* bmi2intrin.h */
+test_2 (_bzhi_u32, unsigned int, unsigned int, unsigned int)
+test_2 (_pdep_u32, unsigned int, unsigned int, unsigned int)
+test_2 (_pext_u32, unsigned int, unsigned int, unsigned int)
+
+/* cetintrin.h */
+test_1 (_inc_ssp, void, unsigned int)
+test_0 (_saveprevssp, void)
+test_1 (_rstorssp, void, void *)
+test_2 (_wrssd, void, unsigned int, void *)
+test_2 (_wrussd, void, unsigned int, void *)
+test_0 (_setssbsy, void)
+test_1 (_clrssbsy, void, void *)
+
+/* cldemoteintrin.h */
+test_1 (_cldemote, void, void *)
+
+/* clflushoptintrin.h */
+test_1 (_mm_clflushopt, void, void *)
+
+/* clwbintrin.h */
+test_1 (_mm_clwb, void, void *)
+
+/* clzerointrin.h */
+test_1 (_mm_clzero, void, void *)
+
+/* enqcmdintrin.h */
+test_2 (_enqcmd, int, void *, const void *)
+test_2 (_enqcmds, int, void *, const void *)
+
+/* fxsrintrin.h */
+test_1 (_fxsave, void, void *)
+test_1 (_fxrstor, void, void *)
+
+/* hresetintrin.h */
+test_1 (_hreset, void, unsigned int)
+
+/* ia32intrin.h  */
+test_2 (__crc32b, unsigned int, unsigned char, unsigned char)
+test_2 (__crc32w, unsigned int, unsigned short, unsigned short)
+test_2 (__crc32d, unsigned int, unsigned int, unsigned int)
+test_1 (__rdpmc, unsigned long long, int)
+test_0 (__rdtsc, unsigned long long)
+test_1 (__rdtscp, unsigned long long, unsigned int *)
+test_0 (__pause, void)
+
+/* lzcntintrin.h */
+test_1 (__lzcnt16, unsigned short, unsigned short)
+test_1 (__lzcnt32, unsigned int, unsigned int)
+test_1 (_lzcnt_u32, unsigned int, unsigned int)
+
+/* lwpintrin.h */
+test_1 (__llwpcb, void, void *)
+test_0 (__slwpcb, void *)
+test_2_i1 (__lwpval32, void, unsigned int, unsigned int, 1)
+test_2_i1 (__lwpins32, unsigned char, unsigned int, unsigned int, 1)
+
+/* movdirintrin.h */
+test_2 (_directstoreu_u32, void, void *, unsigned int)
+test_2 (_movdir64b, void, void *, const void *)
+
+/* mwaitxintrin.h */
+test_3 (_mm_monitorx, void, void const *, unsigned int, unsigned int)
+test_3 (_mm_mwaitx, void, unsigned int, unsigned int, unsigned int)
+
+/* pconfigintrin.h */
+test_2 (_pconfig_u32, unsigned int, const unsigned int, size_t *)
+
+/* pkuintrin.h */
+test_0 (_rdpkru_u32, unsigned int)
+test_1 (_wrpkru, void, unsigned int)
+
+/* popcntintrin.h */
+test_1 (_mm_popcnt_u32, int, unsigned int)
+
+/* rdseedintrin.h */
+test_1 (_rdseed16_step, int, unsigned short *)
+test_1 (_rdseed32_step, int, unsigned int *)
+
+/* rtmintrin.h */
+test_0 (_xbegin, unsigned int)
+test_0 (_xend, void)
+test_0_i1 (_xabort, void, 1)
+
+/* sgxintrin.h */
+test_2 (_encls_u32, unsigned int, const unsigned int, size_t *)
+test_2 (_enclu_u32, unsigned int, const unsigned int, size_t *)
+test_2 (_enclv_u32, unsigned int, const unsigned int, size_t *)
+
+/* tbmintrin.h */
+test_1_i1 (__bextri_u32, unsigned int, unsigned int, 1)
+test_1 (__blcfill_u32, unsigned int, unsigned int)
+test_1 (__blci_u32, unsigned int, unsigned int)
+test_1 (__blcic_u32, unsigned int, unsigned int)
+test_1 (__blcmsk_u32, unsigned int, unsigned int)
+test_1 (__blcs_u32, unsigned int, unsigned int)
+test_1 (__blsfill_u32, unsigned int, unsigned int)
+test_1 (__blsic_u32, unsigned int, unsigned int)
+test_1 (__t1mskc_u32, unsigned int, unsigned int)
+test_1 (__tzmsk_u32, unsigned int, unsigned int)
+
+/* tsxldtrkintrin.h */
+test_0 (_xsusldtrk, void)
+test_0 (_xresldtrk, void)
+
+/* x86gprintrin.h */
+test_1 (_ptwrite32, void, unsigned int)
+test_1 (_rdrand16_step, int, unsigned short *)
+test_1 (_rdrand32_step, int, unsigned int *)
+test_0 (_wbinvd, void)
+
+/* xtestintrin.h */
+test_0 (_xtest, int)
+
+/* xsaveintrin.h */
+test_2 (_xsave, void, void *, long long)
+test_2 (_xrstor, void, void *, long long)
+test_2 (_xsetbv, void, unsigned int, long long)
+test_1 (_xgetbv, long long, unsigned int)
+
+/* xsavecintrin.h */
+test_2 (_xsavec, void, void *, long long)
+
+/* xsaveoptintrin.h */
+test_2 (_xsaveopt, void, void *, long long)
+
+/* xsavesintrin.h */
+test_2 (_xsaves, void, void *, long long)
+test_2 (_xrstors, void, void *, long long)
+
+/* wbnoinvdintrin.h */
+test_0 (_wbnoinvd, void)
+
+#ifdef __x86_64__
+/* adxintrin.h */
+test_4 (_subborrow_u64, unsigned char, unsigned char,
+	unsigned long long, unsigned long long,
+	unsigned long long *)
+test_4 (_addcarry_u64, unsigned char, unsigned char,
+	unsigned long long, unsigned long long,
+	unsigned long long *)
+test_4 (_addcarryx_u64, unsigned char, unsigned char,
+	unsigned long long, unsigned long long,
+	unsigned long long *)
+
+/* bmiintrin.h */
+test_2 (__andn_u64, unsigned long long, unsigned long long,
+	unsigned long long)
+test_2 (__bextr_u64, unsigned long long, unsigned long long,
+	unsigned long long)
+test_3 (_bextr_u64, unsigned long long, unsigned long long,
+	unsigned long long, unsigned long long)
+test_1 (__blsi_u64, unsigned long long, unsigned long long)
+test_1 (_blsi_u64, unsigned long long, unsigned long long)
+test_1 (__blsmsk_u64, unsigned long long, unsigned long long)
+test_1 (_blsmsk_u64, unsigned long long, unsigned long long)
+test_1 (__blsr_u64, unsigned long long, unsigned long long)
+test_1 (_blsr_u64, unsigned long long, unsigned long long)
+test_1 (__tzcnt_u64, unsigned long long, unsigned long long)
+test_1 (_tzcnt_u64, unsigned long long, unsigned long long)
+
+/* bmi2intrin.h */
+test_2 (_bzhi_u64, unsigned long long, unsigned long long,
+	unsigned long long)
+test_2 (_pdep_u64, unsigned long long, unsigned long long,
+	unsigned long long)
+test_2 (_pext_u64, unsigned long long, unsigned long long,
+	unsigned long long)
+test_3 (_mulx_u64, unsigned long long, unsigned long long,
+	unsigned long long, unsigned long long *)
+
+/* cetintrin.h */
+test_0 (_get_ssp, unsigned long long)
+test_2 (_wrssq, void, unsigned long long, void *)
+test_2 (_wrussq, void, unsigned long long, void *)
+
+/* fxsrintrin.h */
+test_1 (_fxsave64, void, void *)
+test_1 (_fxrstor64, void, void *)
+
+/* ia32intrin.h  */
+test_1 (__bsfq, int, long long)
+test_1 (__bsrq, int, long long)
+test_1 (__bswapq, long long, long long)
+test_2 (__crc32q, unsigned long long, unsigned long long,
+	unsigned long long)
+test_1 (__popcntq, long long, unsigned long long)
+test_2 (__rolq, unsigned long long, unsigned long long, int)
+test_2 (__rorq, unsigned long long, unsigned long long, int)
+test_0 (__readeflags, unsigned long long)
+test_1 (__writeeflags, void, unsigned int)
+
+/* lzcntintrin.h */
+test_1 (__lzcnt64, unsigned long long, unsigned long long)
+test_1 (_lzcnt_u64, unsigned long long, unsigned long long)
+
+/* lwpintrin.h */
+test_2_i1 (__lwpval64, void, unsigned long long, unsigned int, 1)
+test_2_i1 (__lwpins64, unsigned char, unsigned long long,
+	   unsigned int, 1)
+
+/* movdirintrin.h */
+test_2 (_directstoreu_u64, void, void *, unsigned long long)
+
+/* popcntintrin.h */
+test_1 (_mm_popcnt_u64, long long, unsigned long long)
+
+/* rdseedintrin.h */
+test_1 (_rdseed64_step, int, unsigned long long *)
+
+/* tbmintrin.h */
+test_1_i1 (__bextri_u64, unsigned long long, unsigned long long, 1)
+test_1 (__blcfill_u64, unsigned long long, unsigned long long)
+test_1 (__blci_u64, unsigned long long, unsigned long long)
+test_1 (__blcic_u64, unsigned long long, unsigned long long)
+test_1 (__blcmsk_u64, unsigned long long, unsigned long long)
+test_1 (__blcs_u64, unsigned long long, unsigned long long)
+test_1 (__blsfill_u64, unsigned long long, unsigned long long)
+test_1 (__blsic_u64, unsigned long long, unsigned long long)
+test_1 (__t1mskc_u64, unsigned long long, unsigned long long)
+test_1 (__tzmsk_u64, unsigned long long, unsigned long long)
+
+/* uintrintrin.h */
+test_0 (_clui, void)
+test_1 (_senduipi, void, unsigned long long)
+test_0 (_stui, void)
+test_0 (_testui, unsigned char)
+
+/* x86gprintrin.h */
+test_1 (_ptwrite64, void, unsigned long long)
+test_0 (_readfsbase_u32, unsigned int)
+test_0 (_readfsbase_u64, unsigned long long)
+test_0 (_readgsbase_u32, unsigned int)
+test_0 (_readgsbase_u64, unsigned long long)
+test_1 (_rdrand64_step, int, unsigned long long *)
+test_1 (_writefsbase_u32, void, unsigned int)
+test_1 (_writefsbase_u64, void, unsigned long long)
+test_1 (_writegsbase_u32, void, unsigned int)
+test_1 (_writegsbase_u64, void, unsigned long long)
+
+/* xsaveintrin.h */
+test_2 (_xsave64, void, void *, long long)
+test_2 (_xrstor64, void, void *, long long)
+
+/* xsavecintrin.h */
+test_2 (_xsavec64, void, void *, long long)
+
+/* xsaveoptintrin.h */
+test_2 (_xsaveopt64, void, void *, long long)
+
+/* xsavesintrin.h */
+test_2 (_xsaves64, void, void *, long long)
+test_2 (_xrstors64, void, void *, long long)
+
+/* waitpkgintrin.h */
+test_1 (_umonitor, void, void *)
+test_2 (_umwait, unsigned char, unsigned int, unsigned long long)
+test_2 (_tpause, unsigned char, unsigned int, unsigned long long)
+
+#else /* !__x86_64__ */
+/* bmi2intrin.h */
+test_3 (_mulx_u32, unsigned int, unsigned int, unsigned int,
+	unsigned int *)
+
+/* cetintrin.h */
+test_0 (_get_ssp, unsigned int)
+#endif /* __x86_64__ */
+
+#endif
diff --git a/gcc/testsuite/gcc.target/i386/pr99744-5.c b/gcc/testsuite/gcc.target/i386/pr99744-5.c
new file mode 100644
index 00000000000..9e40e5ef428
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr99744-5.c
@@ -0,0 +1,25 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mmwait" } */
+
+/* Test calling MWAIT intrinsics from functions with general-regs-only
+   target attribute.  */
+
+#include <x86gprintrin.h>
+
+#define _CONCAT(x,y) x ## y
+
+#define test_2(func, type, op1_type, op2_type)				\
+  __attribute__ ((target("general-regs-only")))				\
+  type _CONCAT(do_,func) (op1_type A, op2_type B)			\
+  { return func (A, B); }
+
+#define test_3(func, type, op1_type, op2_type, op3_type)		\
+  __attribute__ ((target("general-regs-only")))				\
+  type _CONCAT(do_,func) (op1_type A, op2_type B, op3_type C)		\
+  { return func (A, B, C); }
+
+#ifndef __iamcu__
+/* mwaitintrin.h */
+test_3 (_mm_monitor, void, void const *, unsigned int, unsigned int)
+test_2 (_mm_mwait, void, unsigned int, unsigned int)
+#endif
diff --git a/gcc/testsuite/gcc.target/i386/pr99744-6.c b/gcc/testsuite/gcc.target/i386/pr99744-6.c
new file mode 100644
index 00000000000..4025918a9c9
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr99744-6.c
@@ -0,0 +1,23 @@
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+#include <x86intrin.h>
+
+extern unsigned long long int curr_deadline;
+extern void bar (void);
+
+void
+foo1 (void)
+{
+  if (__rdtsc () < curr_deadline)
+    return; 
+  bar ();
+}
+
+void
+foo2 (unsigned int *p)
+{
+  if (__rdtscp (p) < curr_deadline)
+    return; 
+  bar ();
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr99744-7.c b/gcc/testsuite/gcc.target/i386/pr99744-7.c
new file mode 100644
index 00000000000..30b7ca05966
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr99744-7.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-options "-O -mno-avx -Wno-psabi" } */
+
+#include <x86intrin.h>
+
+void
+foo (__m256 *x)
+{
+  x[0] = _mm256_sub_ps (x[1], x[2]);
+}
+
+/* { dg-error "target specific option mismatch" "" { target *-*-* } 0 } */
diff --git a/gcc/testsuite/gcc.target/i386/pr99744-8.c b/gcc/testsuite/gcc.target/i386/pr99744-8.c
new file mode 100644
index 00000000000..115183eede6
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr99744-8.c
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-options "-O -Wno-psabi" } */
+
+#include <x86intrin.h>
+
+__attribute__((target ("no-avx")))
+void
+foo (__m256 *x)
+{
+  x[0] = _mm256_sub_ps (x[1], x[2]);
+}
+
+/* { dg-error "target specific option mismatch" "" { target *-*-* } 0 } */
-- 
2.31.1


^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2021-08-03 14:46 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-04-14 22:39 [PATCH v4 0/2] x86: Add general_regs_only function attribute H.J. Lu
2021-04-14 22:39 ` [PATCH v4 1/2] x86: Move OPTION_MASK_* to i386-common.h H.J. Lu
2021-04-14 22:39 ` [PATCH v4 2/2] x86: Add general_regs_only function attribute H.J. Lu
2021-04-21  7:30   ` Uros Bizjak
2021-04-21 13:47     ` H.J. Lu
2021-04-21 16:54     ` Martin Sebor
2021-04-21 17:09   ` Martin Sebor
2021-04-21 20:58     ` H.J. Lu
2021-04-21 23:23       ` Martin Sebor
2021-04-22  1:01         ` H.J. Lu
2021-04-22  8:27           ` Richard Biener
2021-04-22  9:02           ` Jakub Jelinek
2021-04-22 11:23             ` Richard Biener
2021-04-22 11:57               ` H.J. Lu
2021-04-22 12:16                 ` Richard Biener
2021-04-22 12:22               ` Jakub Jelinek
2021-04-22 12:52                 ` Richard Biener
2021-04-22 12:55                   ` Richard Biener
2021-07-18  1:45                     ` [PATCH v5] <x86gprintrin.h>: Add pragma GCC target("general-regs-only") H.J. Lu
2021-07-31 15:35                       ` PING^1 " H.J. Lu
2021-08-03 11:47                       ` Richard Biener
2021-08-03 14:45                         ` [PATCH v6] " H.J. Lu

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).