From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga18.intel.com (mga18.intel.com [134.134.136.126]) by sourceware.org (Postfix) with ESMTPS id C2EA338582A6 for ; Fri, 14 Oct 2022 07:54:56 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org C2EA338582A6 Authentication-Results: sourceware.org; dmarc=pass (p=none dis=none) header.from=intel.com Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=intel.com DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1665734096; x=1697270096; h=from:to:cc:subject:date:message-id:in-reply-to: references; bh=GR/UauJVuxZYk0KN0v8IYtmSQGbUrITqNrVGtDwtskw=; b=akqIQ68x6Ki8ko/NgjAsXGgznvsSmjcON0OudNisJHEUdklKy1xwNTHO piqY2wSkljTCuB0+mdCOQmxxR7v0auztrOO7FfL8vkSv2OmQj9PzF22B2 0XwMCrS7ItgvuaNwC73G4jm9PqcMg+G1vUHIqAf0ZtNVhmfmXHtQ33dnO rOU0p8XbROLGTJ66AuOFi5CnS76sOkiV55yG+NWRghksVL0BrtnB/atu0 sKNBF585+acovsGTrOpI00ezDSfG6sH0J+5yxDoRLQmKoNhmfC7r9/J6u 8IrhhsRQiAyC8M7dTfi7iDjgxDaiP0QhX7UQaM4tXRaPdK1CtgQn6D44x w==; X-IronPort-AV: E=McAfee;i="6500,9779,10499"; a="288597861" X-IronPort-AV: E=Sophos;i="5.95,182,1661842800"; d="scan'208";a="288597861" Received: from orsmga002.jf.intel.com ([10.7.209.21]) by orsmga106.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 14 Oct 2022 00:54:56 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6500,9779,10499"; a="627488383" X-IronPort-AV: E=Sophos;i="5.95,182,1661842800"; d="scan'208";a="627488383" Received: from shvmail03.sh.intel.com ([10.239.245.20]) by orsmga002.jf.intel.com with ESMTP; 14 Oct 2022 00:54:48 -0700 Received: from shliclel320.sh.intel.com (shliclel320.sh.intel.com [10.239.240.127]) by shvmail03.sh.intel.com (Postfix) with ESMTP id DA0C21009C8C; Fri, 14 Oct 2022 15:54:47 +0800 (CST) From: Haochen Jiang To: gcc-patches@gcc.gnu.org Cc: hongtao.liu@intel.com, ubizjak@gmail.com, Hongyu Wang Subject: [PATCH 1/6] Support Intel AVX-IFMA Date: Fri, 14 Oct 2022 15:54:40 +0800 Message-Id: <20221014075445.7938-2-haochen.jiang@intel.com> X-Mailer: git-send-email 2.18.1 In-Reply-To: <20221014075445.7938-1-haochen.jiang@intel.com> References: <20221014075445.7938-1-haochen.jiang@intel.com> X-Spam-Status: No, score=-12.0 required=5.0 tests=BAYES_00,DKIMWL_WL_HIGH,DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,GIT_PATCH_0,KAM_SHORT,SPF_HELO_NONE,SPF_NONE,TXREP autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: From: Hongyu Wang gcc/ * common/config/i386/i386-common.cc (OPTION_MASK_ISA_AVXIFMA_SET, OPTION_MASK_ISA2_AVXIFMA_UNSET, OPTION_MASK_ISA2_AVX2_UNSET): New macro. (ix86_handle_option): Handle -mavxifma. * commmon/config/i386/i386-cpuinfo.h (processor_types): Add FEATURE_AVXIFMA. * common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for avxifma. * common/config/i386/cpuinfo.h (get_available_features): Detect avxifma. * config.gcc: Add avxifmaintrin.h * config/i386/avxifmaintrin.h: New. * config/i386/cpuid.h (bit_AVXIFMA): New. * config/i386/i386-builtin.def: Add new builtins. * config/i386/i386-c.cc (ix86_target_macros_internal): Define __AVXIFMA__. * config/i386/i386-options.cc (isa2_opts): Add -mavxifma. (ix86_valid_target_attribute_inner_p): Handle avxifma. * config/i386/i386.h (TARGET_AVXIFMA, TARGET_AVXIFMA_P, PTA_AVXIFMA): New. * config/i386/i386.opt: Add option -mavxifma. * config/i386/immintrin.h: Inculde avxifmaintrin.h. * config/i386/sse.md (vpamdd52): Remove. (avx_vpmadd52_, vpamdd52, vpamdd52_maskz_1): New define_insn. * doc/invoke.texi: Document -mavxifma. * doc/extend.texi: Document avxifma. * doc/sourcebuild.text: Document target avxifma. gcc/testsuite/ * gcc.target/i386/avx512ifma-vpmaddhuq-1.c: Remane.. * gcc.target/i386/avx512ifma-vpmaddhuq-1a.c: To this. * gcc.target/i386/avx512ifma-vpmaddluq-1.c: Ditto. * gcc.target/i386/avx512ifma-vpmaddluq-1a.c: Ditto. * gcc.target/i386/avx512vl-vpmaddhuq-2.c: Ditto. * gcc.target/i386/avx512vl-vpmaddhuq-2a.c: Ditto. * gcc.target/i386/avx512vl-vpmaddluq-2.c: Ditto. * gcc.target/i386/avx512vl-vpmaddluq-2a.c: Ditto. * gcc.target/i386/avx-check.h: Add avxifma check. * gcc.target/i386/avx512ifma-vpmaddhuq-1b.c: New Test. * gcc.target/i386/avx512ifma-vpmaddluq-1b.c: Ditto. * gcc.target/i386/avx512vl-vpmaddhuq-2b.c: Ditto. * gcc.target/i386/avx512vl-vpmaddluq-2b.c: Ditto. * gcc.target/i386/avx-ifma-1.c: Ditto. * gcc.target/i386/avx-ifma-vpmaddhuq-2.c: Ditto. * gcc.target/i386/avx-ifma-vpmaddluq-2.c: Ditto. * gcc.target/i386/sse-12.c: Add -mavxifma. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-14.c: Ditto. * gcc.target/i386/sse-22.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. * g++.dg/other/i386-2.C: Ditto. * g++.dg/other/i386-3.C: Ditto. * gcc.target/i386/builtin_target.c: Detect avxifma. * gcc.target/i386/funcspec-56.inc: Add new target attribute. * lib/target-supports.exp (check_effective_target_avxifma): New. --- gcc/common/config/i386/cpuinfo.h | 2 + gcc/common/config/i386/i386-common.cc | 20 ++++- gcc/common/config/i386/i386-cpuinfo.h | 1 + gcc/common/config/i386/i386-isas.h | 1 + gcc/config.gcc | 3 +- gcc/config/i386/avxifmaintrin.h | 78 +++++++++++++++++++ gcc/config/i386/cpuid.h | 1 + gcc/config/i386/i386-builtin.def | 6 ++ gcc/config/i386/i386-c.cc | 2 + gcc/config/i386/i386-isa.def | 1 + gcc/config/i386/i386-options.cc | 4 +- gcc/config/i386/i386.opt | 5 ++ gcc/config/i386/immintrin.h | 2 + gcc/config/i386/sse.md | 42 +++++++++- gcc/doc/extend.texi | 5 ++ gcc/doc/invoke.texi | 9 ++- gcc/doc/sourcebuild.texi | 3 + gcc/testsuite/g++.dg/other/i386-2.C | 2 +- gcc/testsuite/g++.dg/other/i386-3.C | 2 +- gcc/testsuite/gcc.target/i386/avx-check.h | 6 +- gcc/testsuite/gcc.target/i386/avx-ifma-1.c | 20 +++++ .../gcc.target/i386/avx-ifma-vpmaddhuq-2.c | 72 +++++++++++++++++ .../gcc.target/i386/avx-ifma-vpmaddluq-2.c | 61 +++++++++++++++ ...pmaddhuq-1.c => avx512ifma-vpmaddhuq-1a.c} | 0 .../gcc.target/i386/avx512ifma-vpmaddhuq-1b.c | 33 ++++++++ ...pmaddluq-1.c => avx512ifma-vpmaddluq-1a.c} | 0 .../gcc.target/i386/avx512ifma-vpmaddluq-1b.c | 33 ++++++++ gcc/testsuite/gcc.target/i386/funcspec-56.inc | 2 + gcc/testsuite/gcc.target/i386/sse-12.c | 2 +- gcc/testsuite/gcc.target/i386/sse-13.c | 2 +- gcc/testsuite/gcc.target/i386/sse-14.c | 2 +- gcc/testsuite/gcc.target/i386/sse-22.c | 4 +- gcc/testsuite/gcc.target/i386/sse-23.c | 2 +- gcc/testsuite/lib/target-supports.exp | 12 +++ 34 files changed, 423 insertions(+), 17 deletions(-) create mode 100644 gcc/config/i386/avxifmaintrin.h create mode 100644 gcc/testsuite/gcc.target/i386/avx-ifma-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-ifma-vpmaddhuq-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-ifma-vpmaddluq-2.c rename gcc/testsuite/gcc.target/i386/{avx512ifma-vpmaddhuq-1.c => avx512ifma-vpmaddhuq-1a.c} (100%) create mode 100644 gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddhuq-1b.c rename gcc/testsuite/gcc.target/i386/{avx512ifma-vpmaddluq-1.c => avx512ifma-vpmaddluq-1a.c} (100%) create mode 100644 gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddluq-1b.c diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h index b5c1b21e554..9bb21c6cacc 100644 --- a/gcc/common/config/i386/cpuinfo.h +++ b/gcc/common/config/i386/cpuinfo.h @@ -793,6 +793,8 @@ get_available_features (struct __processor_model *cpu_model, { if (eax & bit_AVXVNNI) set_feature (FEATURE_AVXVNNI); + if (eax & bit_AVXIFMA) + set_feature (FEATURE_AVXIFMA); } if (avx512_usable) { diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc index d6a68dc9b1d..4de7906b247 100644 --- a/gcc/common/config/i386/i386-common.cc +++ b/gcc/common/config/i386/i386-common.cc @@ -76,6 +76,7 @@ along with GCC; see the file COPYING3. If not see (OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512F_SET) #define OPTION_MASK_ISA_AVX512IFMA_SET \ (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512F_SET) +#define OPTION_MASK_ISA2_AVXIFMA_SET OPTION_MASK_ISA2_AVXIFMA #define OPTION_MASK_ISA_AVX512VBMI_SET \ (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512BW_SET) #define OPTION_MASK_ISA2_AVX5124FMAPS_SET OPTION_MASK_ISA2_AVX5124FMAPS @@ -212,7 +213,8 @@ along with GCC; see the file COPYING3. If not see #define OPTION_MASK_ISA_AVX2_UNSET \ (OPTION_MASK_ISA_AVX2 | OPTION_MASK_ISA_AVX512F_UNSET) #define OPTION_MASK_ISA2_AVX2_UNSET \ - (OPTION_MASK_ISA2_AVXVNNI_UNSET | OPTION_MASK_ISA2_AVX512F_UNSET) + (OPTION_MASK_ISA2_AVXIFMA_UNSET | OPTION_MASK_ISA2_AVXVNNI_UNSET \ + | OPTION_MASK_ISA2_AVX512F_UNSET) #define OPTION_MASK_ISA_AVX512F_UNSET \ (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX512CD_UNSET \ | OPTION_MASK_ISA_AVX512PF_UNSET | OPTION_MASK_ISA_AVX512ER_UNSET \ @@ -230,6 +232,7 @@ along with GCC; see the file COPYING3. If not see (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VBMI_UNSET) #define OPTION_MASK_ISA_AVX512VL_UNSET OPTION_MASK_ISA_AVX512VL #define OPTION_MASK_ISA_AVX512IFMA_UNSET OPTION_MASK_ISA_AVX512IFMA +#define OPTION_MASK_ISA2_AVXIFMA_UNSET OPTION_MASK_ISA2_AVXIFMA #define OPTION_MASK_ISA_AVX512VBMI_UNSET OPTION_MASK_ISA_AVX512VBMI #define OPTION_MASK_ISA2_AVX5124FMAPS_UNSET OPTION_MASK_ISA2_AVX5124FMAPS #define OPTION_MASK_ISA2_AVX5124VNNIW_UNSET OPTION_MASK_ISA2_AVX5124VNNIW @@ -1124,6 +1127,21 @@ ix86_handle_option (struct gcc_options *opts, } return true; + case OPT_mavxifma: + if (value) + { + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVXIFMA_SET; + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVXIFMA_SET; + opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2_SET; + opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX2_SET; + } + else + { + opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVXIFMA_UNSET; + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVXIFMA_UNSET; + } + return true; + case OPT_mfma: if (value) { diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h index 643fbd97378..968f9a56a6c 100644 --- a/gcc/common/config/i386/i386-cpuinfo.h +++ b/gcc/common/config/i386/i386-cpuinfo.h @@ -240,6 +240,7 @@ enum processor_features FEATURE_X86_64_V2, FEATURE_X86_64_V3, FEATURE_X86_64_V4, + FEATURE_AVXIFMA, CPU_FEATURE_MAX }; diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h index 2d0646a68f8..b05b4bb8f0d 100644 --- a/gcc/common/config/i386/i386-isas.h +++ b/gcc/common/config/i386/i386-isas.h @@ -175,4 +175,5 @@ ISA_NAMES_TABLE_START ISA_NAMES_TABLE_ENTRY("x86-64-v2", FEATURE_X86_64_V2, P_X86_64_V2, NULL) ISA_NAMES_TABLE_ENTRY("x86-64-v3", FEATURE_X86_64_V3, P_X86_64_V3, NULL) ISA_NAMES_TABLE_ENTRY("x86-64-v4", FEATURE_X86_64_V4, P_X86_64_V4, NULL) + ISA_NAMES_TABLE_ENTRY("avxifma", FEATURE_AVXIFMA, P_NONE, "-mavxifma") ISA_NAMES_TABLE_END diff --git a/gcc/config.gcc b/gcc/config.gcc index 8d5972fecf7..12365abbf86 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -421,7 +421,8 @@ i[34567]86-*-* | x86_64-*-*) tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h amxbf16intrin.h x86gprintrin.h uintrintrin.h hresetintrin.h keylockerintrin.h avxvnniintrin.h - mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h" + mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h + avxifmaintrin.h" ;; ia64-*-*) extra_headers=ia64intrin.h diff --git a/gcc/config/i386/avxifmaintrin.h b/gcc/config/i386/avxifmaintrin.h new file mode 100644 index 00000000000..8f512c3ecb0 --- /dev/null +++ b/gcc/config/i386/avxifmaintrin.h @@ -0,0 +1,78 @@ +/* Copyright (C) 2020 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef _IMMINTRIN_H_INCLUDED +#error "Never use directly; include instead." +#endif + +#ifndef _AVXIFMAINTRIN_H_INCLUDED +#define _AVXIFMAINTRIN_H_INCLUDED + +#ifndef __AVXIFMA__ +#pragma GCC push_options +#pragma GCC target("avxifma") +#define __DISABLE_AVXIFMA__ +#endif /* __AVXIFMA__ */ + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_madd52lo_avx_epu64 (__m128i __X, __m128i __Y, __m128i __Z) +{ + return (__m128i) __builtin_ia32_avx_vpmadd52luq128 ((__v2di) __X, + (__v2di) __Y, + (__v2di) __Z); +} + +extern __inline __m128i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm_madd52hi_avx_epu64 (__m128i __X, __m128i __Y, __m128i __Z) +{ + return (__m128i) __builtin_ia32_avx_vpmadd52huq128 ((__v2di) __X, + (__v2di) __Y, + (__v2di) __Z); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_madd52lo_avx_epu64 (__m256i __X, __m256i __Y, __m256i __Z) +{ + return (__m256i) __builtin_ia32_avx_vpmadd52luq256 ((__v4di) __X, + (__v4di) __Y, + (__v4di) __Z); +} + +extern __inline __m256i +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +_mm256_madd52hi_avx_epu64 (__m256i __X, __m256i __Y, __m256i __Z) +{ + return (__m256i) __builtin_ia32_avx_vpmadd52huq256 ((__v4di) __X, + (__v4di) __Y, + (__v4di) __Z); +} + +#ifdef __DISABLE_AVXIFMA__ +#undef __DISABLE_AVXIFMA__ +#pragma GCC pop_options +#endif /* __DISABLE_AVXIFMA__ */ + +#endif /* _AVXIFMAINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h index a4c2fed7eda..9885699efd5 100644 --- a/gcc/config/i386/cpuid.h +++ b/gcc/config/i386/cpuid.h @@ -28,6 +28,7 @@ #define bit_AVXVNNI (1 << 4) #define bit_AVX512BF16 (1 << 5) #define bit_HRESET (1 << 22) +#define bit_AVXIFMA (1 << 23) /* %ecx */ #define bit_SSE3 (1 << 0) diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index dea52a28d28..4a89099a00f 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -2499,6 +2499,12 @@ BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpamdd BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpamdd52huqv2di_mask, "__builtin_ia32_vpmadd52huq128_mask", IX86_BUILTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI) BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpamdd52huqv2di_maskz, "__builtin_ia32_vpmadd52huq128_maskz", IX86_BUILTIN_VPMADD52HUQ128_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI) +/* AVX_IFMA */ +BDESC (0, OPTION_MASK_ISA2_AVXIFMA, CODE_FOR_avx_vpmadd52luq_v4di, "__builtin_ia32_avx_vpmadd52luq256", IX86_BUINTIN_AVX_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI) +BDESC (0, OPTION_MASK_ISA2_AVXIFMA, CODE_FOR_avx_vpmadd52huq_v4di, "__builtin_ia32_avx_vpmadd52huq256", IX86_BUINTIN_AVX_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI) +BDESC (0, OPTION_MASK_ISA2_AVXIFMA, CODE_FOR_avx_vpmadd52luq_v2di, "__builtin_ia32_avx_vpmadd52luq128", IX86_BUINTIN_AVX_VPMADD52LUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI) +BDESC (0, OPTION_MASK_ISA2_AVXIFMA, CODE_FOR_avx_vpmadd52huq_v2di, "__builtin_ia32_avx_vpmadd52huq128", IX86_BUINTIN_AVX_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI) + /* AVX512VBMI */ BDESC (OPTION_MASK_ISA_AVX512VBMI, 0, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI) BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI) diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc index eb0e3b36a76..3494ec035d5 100644 --- a/gcc/config/i386/i386-c.cc +++ b/gcc/config/i386/i386-c.cc @@ -633,6 +633,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, def_or_undef (parse_in, "__WIDEKL__"); if (isa_flag2 & OPTION_MASK_ISA2_AVXVNNI) def_or_undef (parse_in, "__AVXVNNI__"); + if (isa_flag2 & OPTION_MASK_ISA2_AVXIFMA) + def_or_undef (parse_in, "__AVXIFMA__"); if (TARGET_IAMCU) { def_or_undef (parse_in, "__iamcu"); diff --git a/gcc/config/i386/i386-isa.def b/gcc/config/i386/i386-isa.def index 83659d0bea4..6e0254ce418 100644 --- a/gcc/config/i386/i386-isa.def +++ b/gcc/config/i386/i386-isa.def @@ -109,3 +109,4 @@ DEF_PTA(KL) DEF_PTA(WIDEKL) DEF_PTA(AVXVNNI) DEF_PTA(AVX512FP16) +DEF_PTA(AVXIFMA) diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index acb2291e70f..5facb64c2a8 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -226,7 +226,8 @@ static struct ix86_target_opts isa2_opts[] = { "-mkl", OPTION_MASK_ISA2_KL }, { "-mwidekl", OPTION_MASK_ISA2_WIDEKL }, { "-mavxvnni", OPTION_MASK_ISA2_AVXVNNI }, - { "-mavx512fp16", OPTION_MASK_ISA2_AVX512FP16 } + { "-mavx512fp16", OPTION_MASK_ISA2_AVX512FP16 }, + { "-mavxifma", OPTION_MASK_ISA2_AVXIFMA } }; static struct ix86_target_opts isa_opts[] = { @@ -1072,6 +1073,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[], IX86_ATTR_ISA ("hreset", OPT_mhreset), IX86_ATTR_ISA ("avxvnni", OPT_mavxvnni), IX86_ATTR_ISA ("avx512fp16", OPT_mavx512fp16), + IX86_ATTR_ISA ("avxifma", OPT_mavxifma), /* enum options */ IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 0dbaacb57ed..36e28b7063d 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -1214,3 +1214,8 @@ Do not use GOT to access external symbols. -param=x86-stlf-window-ninsns= Target Joined UInteger Var(x86_stlf_window_ninsns) Init(64) Param Instructions number above which STFL stall penalty can be compensated. + +mavxifma +Target Mask(ISA2_AVXIFMA) Var(ix86_isa_flags2) Save +Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, and +AVXIFMA built-in functions and code generation. diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h index 6afd78c2b6f..e9d4e975243 100644 --- a/gcc/config/i386/immintrin.h +++ b/gcc/config/i386/immintrin.h @@ -44,6 +44,8 @@ #include +#include + #include #include diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 076064f97e6..331347569ea 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -27867,6 +27867,19 @@ (define_int_attr vpmadd52type [(UNSPEC_VPMADD52LUQ "luq") (UNSPEC_VPMADD52HUQ "huq")]) +(define_insn "avx_vpmadd52_" + [(set (match_operand:VI8_AVX2 0 "register_operand" "=x") + (unspec:VI8_AVX2 + [(match_operand:VI8_AVX2 1 "register_operand" "0") + (match_operand:VI8_AVX2 2 "register_operand" "x") + (match_operand:VI8_AVX2 3 "nonimmediate_operand" "xm")] + VPMADD52))] + "TARGET_AVXIFMA" + "%{vex%} vpmadd52\t{%3, %2, %0|%0, %2, %3}" + [(set_attr "type" "ssemuladd") + (set_attr "prefix" "vex") + (set_attr "mode" "")]) + (define_expand "vpamdd52huq_maskz" [(match_operand:VI8_AVX512VL 0 "register_operand") (match_operand:VI8_AVX512VL 1 "register_operand") @@ -27895,7 +27908,7 @@ DONE; }) -(define_insn "vpamdd52" +(define_insn "vpamdd52" [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v") (unspec:VI8_AVX512VL [(match_operand:VI8_AVX512VL 1 "register_operand" "0") @@ -27903,7 +27916,32 @@ (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")] VPMADD52))] "TARGET_AVX512IFMA" - "vpmadd52\t{%3, %2, %0|%0, %2, %3}" +{ + if ( <=32 + && TARGET_AVXIFMA + && !EXT_REX_SSE_REG_P (operands[1]) + && !EXT_REX_SSE_REG_P (operands[2]) + && !EXT_REX_SSE_REG_P (operands[3])) + return "%{vex%} vpmadd52\t{%3, %2, %0|%0, %2, %3}"; + else + return "vpmadd52\t{%3, %2, %0|%0, %2, %3}"; +} + [(set_attr "type" "ssemuladd") + (set_attr "prefix" "maybe_evex") + (set_attr "mode" "")]) + +(define_insn "vpamdd52_maskz_1" + [(set (match_operand:VI8_AVX512VL 0 "register_operand" "=v") + (vec_merge:VI8_AVX512VL + (unspec:VI8_AVX512VL + [(match_operand:VI8_AVX512VL 1 "register_operand" "0") + (match_operand:VI8_AVX512VL 2 "register_operand" "v") + (match_operand:VI8_AVX512VL 3 "nonimmediate_operand" "vm")] + VPMADD52) + (match_operand:VI8_AVX512VL 4 "const0_operand" "C") + (match_operand: 5 "register_operand" "Yk")))] + "TARGET_AVX512IFMA" + "vpmadd52\t{%3, %2, %0%{%5%}%{z%}|%0%{%5%}%{z%}, %2, %3}" [(set_attr "type" "ssemuladd") (set_attr "prefix" "evex") (set_attr "mode" "")]) diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index cfbe32afce9..edecf5c0070 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -7060,6 +7060,11 @@ Enable/disable the generation of the WIDEKL instructions. @cindex @code{target("avxvnni")} function attribute, x86 Enable/disable the generation of the AVXVNNI instructions. +@item avxifma +@itemx no-avxifma +@cindex @code{target("avxifma")} function attribute, x86 +Enable/disable the generation of the AVXIFMA instructions. + @item cld @itemx no-cld @cindex @code{target("cld")} function attribute, x86 diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index a9ecc4426a4..886fc1d0164 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -1436,7 +1436,7 @@ See RS/6000 and PowerPC Options. -mavx5124fmaps -mavx512vnni -mavx5124vnniw -mprfchw -mrdpid @gol -mrdseed -msgx -mavx512vp2intersect -mserialize -mtsxldtrk@gol -mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset -mavxvnni@gol --mavx512fp16 @gol +-mavx512fp16 -mavxifma @gol -mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops @gol -minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol -mkl -mwidekl @gol @@ -32893,6 +32893,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. @need 200 @itemx -mwidekl @opindex mwidekl +@need 200 +@itemx -mavxifma +@opindex mavxifma These switches enable the use of instructions in the MMX, SSE, SSE2, SSE3, SSSE3, SSE4, SSE4A, SSE4.1, SSE4.2, AVX, AVX2, AVX512F, AVX512PF, AVX512ER, AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA, @@ -32902,8 +32905,8 @@ WBNOINVD, FMA4, PREFETCHW, RDPID, PREFETCHWT1, RDSEED, SGX, XOP, LWP, XSAVEOPT, XSAVEC, XSAVES, RTM, HLE, TBM, MWAITX, CLZERO, PKU, AVX512VBMI2, GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16, ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, SERIALIZE, -UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512FP16 -or CLDEMOTE extended instruction sets. Each has a corresponding +UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512FP16, +AVXIFMA or CLDEMOTE extended instruction sets. Each has a corresponding @option{-mno-} option to disable use of these instructions. These extensions are also available as built-in functions: see diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index c81e2ffd43a..0173acf4a65 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -2490,6 +2490,9 @@ Target supports the execution of @code{avx512f} instructions. @item avx512vp2intersect Target supports the execution of @code{avx512vp2intersect} instructions. +@item avxifma +Target supports the execution of @code{avxifma} instructions. + @item amx_tile Target supports the execution of @code{amx-tile} instructions. diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C index fba3d1ac684..5388606779b 100644 --- a/gcc/testsuite/g++.dg/other/i386-2.C +++ b/gcc/testsuite/g++.dg/other/i386-2.C @@ -1,5 +1,5 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16" } */ +/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma" } */ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C index 5cc0fa83457..86cedd3d32f 100644 --- a/gcc/testsuite/g++.dg/other/i386-3.C +++ b/gcc/testsuite/g++.dg/other/i386-3.C @@ -1,5 +1,5 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16" } */ +/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma" } */ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, diff --git a/gcc/testsuite/gcc.target/i386/avx-check.h b/gcc/testsuite/gcc.target/i386/avx-check.h index 7ddca9d7b80..24ee6ab4efd 100644 --- a/gcc/testsuite/gcc.target/i386/avx-check.h +++ b/gcc/testsuite/gcc.target/i386/avx-check.h @@ -22,7 +22,11 @@ main () /* Run AVX test only if host has AVX support. */ if (((ecx & (bit_AVX | bit_OSXSAVE)) == (bit_AVX | bit_OSXSAVE)) - && avx_os_support ()) + && avx_os_support () +#ifdef AVXIFMA + && __builtin_cpu_supports ("avxifma") +#endif + ) { do_test (); #ifdef DEBUG diff --git a/gcc/testsuite/gcc.target/i386/avx-ifma-1.c b/gcc/testsuite/gcc.target/i386/avx-ifma-1.c new file mode 100644 index 00000000000..6388373123c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-ifma-1.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-mavxifma -O2" } */ +/* { dg-final { scan-assembler-times "\{vex\} vpmadd52huq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpmadd52luq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpmadd52huq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpmadd52luq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+" 1 } } */ + +#include + +volatile __m256i x,y,z; +volatile __m128i x_,y_,z_; + +void extern +avxifma_test (void) +{ + x = _mm256_madd52hi_avx_epu64 (x, y, z); + x = _mm256_madd52lo_avx_epu64 (x, y, z); + x_ = _mm_madd52hi_avx_epu64 (x_, y_, z_); + x_ = _mm_madd52lo_avx_epu64 (x_, y_, z_); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-ifma-vpmaddhuq-2.c b/gcc/testsuite/gcc.target/i386/avx-ifma-vpmaddhuq-2.c new file mode 100644 index 00000000000..c9efee33091 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-ifma-vpmaddhuq-2.c @@ -0,0 +1,72 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavxifma" } */ +/* { dg-require-effective-target avxifma } */ +#define AVXIFMA +#ifndef CHECK +#define CHECK "avx-check.h" +#endif + +#ifndef TEST +#define TEST avx_test +#endif + +#include CHECK + +void +CALC (long long *r, long long *s1, long long *s2, long long *s3, int size) +{ + int i; + long long a,b; + + for (i = 0; i < size; i++) + { + /* Simulate higher 52 bits out of 104 bit, + by shifting opernads with 0 in lower 26 bits. */ + a = s2[i] >> 26; + b = s3[i] >> 26; + r[i] = a * b + s1[i]; + } +} + +void +TEST (void) +{ + union256i_q src1_256, src2_256, dst_256; + union128i_q src1_128, src2_128, dst_128; + long long dst_ref_256[4], dst_ref_128[2]; + int i; + + for (i = 0; i < 4; i++) + { + src1_256.a[i] = 15 + 3467 * i; + src2_256.a[i] = 9217 + i; + src1_256.a[i] = src1_256.a[i] << 26; + src2_256.a[i] = src2_256.a[i] << 26; + src1_256.a[i] &= ((1LL << 52) - 1); + src2_256.a[i] &= ((1LL << 52) - 1); + dst_256.a[i] = -1; + } + + for (i = 0; i < 2; i++) + { + src1_128.a[i] = 16 + 3467 * i; + src2_128.a[i] = 9127 + i; + src1_128.a[i] = src1_128.a[i] << 26; + src2_128.a[i] = src2_128.a[i] << 26; + src1_128.a[i] &= ((1LL << 52) - 1); + src2_128.a[i] &= ((1LL << 52) - 1); + dst_128.a[i] = -1; + } + + CALC (dst_ref_256, dst_256.a, src1_256.a, src2_256.a, 4); + dst_256.x = _mm256_madd52hi_avx_epu64 (dst_256.x, src1_256.x, src2_256.x); + if (check_union256i_q (dst_256, dst_ref_256)) + abort (); + + CALC (dst_ref_128, dst_128.a, src1_128.a, src2_128.a, 2); + dst_128.x = _mm_madd52hi_avx_epu64 (dst_128.x, src1_128.x, src2_128.x); + if (check_union128i_q (dst_128, dst_ref_128)) + abort (); + +} + diff --git a/gcc/testsuite/gcc.target/i386/avx-ifma-vpmaddluq-2.c b/gcc/testsuite/gcc.target/i386/avx-ifma-vpmaddluq-2.c new file mode 100644 index 00000000000..600978ea9ad --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-ifma-vpmaddluq-2.c @@ -0,0 +1,61 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -mavxifma" } */ +/* { dg-require-effective-target avxifma } */ +#define AVXIFMA +#ifndef CHECK +#define CHECK "avx-check.h" +#endif + +#ifndef TEST +#define TEST avx_test +#endif + +#include CHECK + +void +CALC (unsigned long long *r, unsigned long long *s1, + unsigned long long *s2, unsigned long long *s3, + int size) +{ + int i; + + for (i = 0; i < size; i++) + { + r[i] = s2[i] * s3[i] + s1[i]; + } +} + +void +TEST (void) +{ + union256i_q src1_256, src2_256, dst_256; + union128i_q src1_128, src2_128, dst_128; + unsigned long long dst_ref_256[4], dst_ref_128[2]; + int i; + + for (i = 0; i < 4; i++) + { + src1_256.a[i] = 3450 * i; + src2_256.a[i] = 7863 * i; + dst_256.a[i] = 117; + } + + for (i = 0; i < 2; i++) + { + src1_128.a[i] = 3540 * i; + src2_128.a[i] = 7683 * i; + dst_128.a[i] = 117; + } + + CALC (dst_ref_256, dst_256.a, src1_256.a, src2_256.a, 4); + dst_256.x = _mm256_madd52lo_avx_epu64 (dst_256.x, src1_256.x, src2_256.x); + if (check_union256i_q (dst_256, dst_ref_256)) + abort (); + + CALC (dst_ref_128, dst_128.a, src1_128.a, src2_128.a, 2); + dst_128.x = _mm_madd52lo_avx_epu64 (dst_128.x, src1_128.x, src2_128.x); + if (check_union128i_q (dst_128, dst_ref_128)) + abort (); + +} + diff --git a/gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddhuq-1.c b/gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddhuq-1a.c similarity index 100% rename from gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddhuq-1.c rename to gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddhuq-1a.c diff --git a/gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddhuq-1b.c b/gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddhuq-1b.c new file mode 100644 index 00000000000..67e94baa01b --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddhuq-1b.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512ifma -mavx512vl -mavxifma -O2" } */ +/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+" 3 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpmadd52huq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+" 3 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpmadd52huq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+" 3 } } */ +/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52huq\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */ + +#include + +volatile __m512i _x1, _y1, _z1; +volatile __m256i _x2, _y2, _z2; +volatile __m128i _x3, _y3, _z3; + +void extern +avx512ifma_test (void) +{ + _x3 = _mm_madd52hi_epu64 (_x3, _y3, _z3); + _x3 = _mm_mask_madd52hi_epu64 (_x3, 2, _y3, _z3); + _x3 = _mm_maskz_madd52hi_epu64 (2, _x3, _y3, _z3); + _x2 = _mm256_madd52hi_epu64 (_x2, _y2, _z2); + _x2 = _mm256_mask_madd52hi_epu64 (_x2, 3, _y2, _z2); + _x2 = _mm256_maskz_madd52hi_epu64 (3, _x2, _y2, _z2); + _x1 = _mm512_madd52hi_epu64 (_x1, _y1, _z1); + _x1 = _mm512_mask_madd52hi_epu64 (_x1, 3, _y1, _z1); + _x1 = _mm512_maskz_madd52hi_epu64 (3, _x1, _y1, _z1); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddluq-1.c b/gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddluq-1a.c similarity index 100% rename from gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddluq-1.c rename to gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddluq-1a.c diff --git a/gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddluq-1b.c b/gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddluq-1b.c new file mode 100644 index 00000000000..4b8ea27f403 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512ifma-vpmaddluq-1b.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512ifma -mavx512vl -mavxifma -O2" } */ +/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+" 3 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpmadd52luq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\[^\n\]*%xmm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+" 3 } } */ +/* { dg-final { scan-assembler-times "\{vex\} vpmadd52luq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\[^\n\]*%ymm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+" 3 } } */ +/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\[^\{\]" 1 } } */ +/* { dg-final { scan-assembler-times "vpmadd52luq\[ \\t\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\[^\n\]*%zmm\[0-9\]+\{%k\[1-7\]\}\{z\}" 1 } } */ + +#include + +volatile __m512i _x1, _y1, _z1; +volatile __m256i _x2, _y2, _z2; +volatile __m128i _x3, _y3, _z3; + +void extern +avx512ifma_test (void) +{ + _x3 = _mm_madd52lo_epu64 (_x3, _y3, _z3); + _x3 = _mm_mask_madd52lo_epu64 (_x3, 2, _y3, _z3); + _x3 = _mm_maskz_madd52lo_epu64 (2, _x3, _y3, _z3); + _x2 = _mm256_madd52lo_epu64 (_x2, _y2, _z2); + _x2 = _mm256_mask_madd52lo_epu64 (_x2, 3, _y2, _z2); + _x2 = _mm256_maskz_madd52lo_epu64 (3, _x2, _y2, _z2); + _x1 = _mm512_madd52lo_epu64 (_x1, _y1, _z1); + _x1 = _mm512_mask_madd52lo_epu64 (_x1, 3, _y1, _z1); + _x1 = _mm512_maskz_madd52lo_epu64 (3, _x1, _y1, _z1); +} diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc index b76dddb86a2..466555c0d06 100644 --- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc +++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc @@ -80,6 +80,7 @@ extern void test_keylocker (void) __attribute__((__target__("kl"))); extern void test_widekl (void) __attribute__((__target__("widekl"))); extern void test_avxvnni (void) __attribute__((__target__("avxvnni"))); extern void test_avx512fp16 (void) __attribute__((__target__("avx512fp16"))); +extern void test_avxifma (void) __attribute__((__target__("avxifma"))); extern void test_no_sgx (void) __attribute__((__target__("no-sgx"))); extern void test_no_avx5124fmaps(void) __attribute__((__target__("no-avx5124fmaps"))); @@ -161,6 +162,7 @@ extern void test_no_keylocker (void) __attribute__((__target__("no-kl"))); extern void test_no_widekl (void) __attribute__((__target__("no-widekl"))); extern void test_no_avxvnni (void) __attribute__((__target__("no-avxvnni"))); extern void test_no_avx512fp16 (void) __attribute__((__target__("no-avx512fp16"))); +extern void test_no_avxifma (void) __attribute__((__target__("no-avxifma"))); extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona"))); extern void test_arch_core2 (void) __attribute__((__target__("arch=core2"))); diff --git a/gcc/testsuite/gcc.target/i386/sse-12.c b/gcc/testsuite/gcc.target/i386/sse-12.c index 375d4d1b4de..fde56261d8f 100644 --- a/gcc/testsuite/gcc.target/i386/sse-12.c +++ b/gcc/testsuite/gcc.target/i386/sse-12.c @@ -3,7 +3,7 @@ popcntintrin.h gfniintrin.h and mm_malloc.h are usable with -O -std=c89 -pedantic-errors. */ /* { dg-do compile } */ -/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni" } */ +/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma" } */ #include diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index e285c307d00..bb29555babe 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16" } */ +/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma" } */ /* { dg-add-options bind_pic_locally } */ #include diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index f41493b93f3..f2701ddaaf9 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16" } */ +/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma" } */ /* { dg-add-options bind_pic_locally } */ #include diff --git a/gcc/testsuite/gcc.target/i386/sse-22.c b/gcc/testsuite/gcc.target/i386/sse-22.c index 31492ef3697..3d196975b1e 100644 --- a/gcc/testsuite/gcc.target/i386/sse-22.c +++ b/gcc/testsuite/gcc.target/i386/sse-22.c @@ -103,7 +103,7 @@ #ifndef DIFFERENT_PRAGMAS -#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16") +#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma") #endif /* Following intrinsics require immediate arguments. They @@ -220,7 +220,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1) /* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */ #ifdef DIFFERENT_PRAGMAS -#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16") +#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma") #endif #include test_1 (_cvtss_sh, unsigned short, float, 1) diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index f71a7b29157..d3a233f90fc 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -843,6 +843,6 @@ #define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1) #define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1) -#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16") +#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma") #include diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index fdd88e6a516..69de3b96bfc 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -9506,6 +9506,18 @@ proc check_effective_target_avxvnni { } { } "-mavxvnni" ] } +# Return 1 if avxifma instructions can be compiled. +proc check_effective_target_avxifma { } { + return [check_no_compiler_messages avxifma object { + typedef long long __v4di __attribute__ ((__vector_size__ (32))); + __v4di + _mm256_maddlo_avx_epu64 (__v4di __X, __v4di __Y, __v4di __Z) + { + return __builtin_ia32_avx_vpmadd52luq256 (__X, __Y, __Z); + } + } "-O0 -mavxifma" ] +} + # Return 1 if sse instructions can be compiled. proc check_effective_target_sse { } { return [check_no_compiler_messages sse object { -- 2.18.1