From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from mga02.intel.com (mga02.intel.com [134.134.136.20]) by sourceware.org (Postfix) with ESMTPS id E4EC03857C42 for ; Mon, 24 Oct 2022 09:01:33 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.4.1 sourceware.org E4EC03857C42 Authentication-Results: sourceware.org; dmarc=pass (p=none dis=none) header.from=intel.com Authentication-Results: sourceware.org; spf=pass smtp.mailfrom=intel.com DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/simple; d=intel.com; i=@intel.com; q=dns/txt; s=Intel; t=1666602094; x=1698138094; h=from:to:cc:subject:date:message-id:in-reply-to: references; bh=otop8yurvlE7WsYKl/KWVAE1JLVIybYk8pc5/nKYirE=; b=LhkDfgP5/QhOadkO8m9rcCOjCFqzxpqa/JepzpohwrZI9xxr0ONydlsK rM9h/ty3ZOUBjfvd0K2mKWkdZO1E/dY5br2csa34uzBKCE3Me6tk7c3CJ IV22THVCPSOVJZ6V+GYJ0EIgK+gIlhsLiQE5mWJb3M1NrEIDRRlSTrTJQ yn2WrPA4d+Ek+nKObK2l0tprTJWYTgNo9YlRgc/zaOPinEUrgfzxHoJgJ HAkB079oPVfKomI4w6J1MSex7kLkbXAI5uCX3swSJRLsohnxdbPE7aR6x Ku7MS6CSBpxpVTiBW8Svr8ptSN0FICQuKU/z1rsEO3TsUij4Oa+bWVfDe Q==; X-IronPort-AV: E=McAfee;i="6500,9779,10509"; a="294770840" X-IronPort-AV: E=Sophos;i="5.95,207,1661842800"; d="scan'208";a="294770840" Received: from fmsmga006.fm.intel.com ([10.253.24.20]) by orsmga101.jf.intel.com with ESMTP/TLS/ECDHE-RSA-AES256-GCM-SHA384; 24 Oct 2022 02:01:32 -0700 X-ExtLoop1: 1 X-IronPort-AV: E=McAfee;i="6500,9779,10509"; a="876374752" X-IronPort-AV: E=Sophos;i="5.95,207,1661842800"; d="scan'208";a="876374752" Received: from shvmail03.sh.intel.com ([10.239.245.20]) by fmsmga006.fm.intel.com with ESMTP; 24 Oct 2022 02:01:26 -0700 Received: from shliclel320.sh.intel.com (shliclel320.sh.intel.com [10.239.240.127]) by shvmail03.sh.intel.com (Postfix) with ESMTP id 9FE2A100569D; Mon, 24 Oct 2022 17:01:25 +0800 (CST) From: Haochen Jiang To: gcc-patches@gcc.gnu.org Cc: hongtao.liu@intel.com, ubizjak@gmail.com Subject: [PATCH] Support Intel CMPccXADD Date: Mon, 24 Oct 2022 17:01:25 +0800 Message-Id: <20221024090125.16371-1-haochen.jiang@intel.com> X-Mailer: git-send-email 2.18.1 In-Reply-To: <20221014075445.7938-6-haochen.jiang@intel.com> References: <20221014075445.7938-6-haochen.jiang@intel.com> X-Spam-Status: No, score=-11.4 required=5.0 tests=BAYES_00,DKIMWL_WL_HIGH,DKIM_SIGNED,DKIM_VALID,DKIM_VALID_AU,DKIM_VALID_EF,GIT_PATCH_0,KAM_SHORT,RCVD_IN_MSPIKE_H3,RCVD_IN_MSPIKE_WL,SPF_HELO_NONE,SPF_NONE,TXREP autolearn=ham autolearn_force=no version=3.4.6 X-Spam-Checker-Version: SpamAssassin 3.4.6 (2021-04-09) on server2.sourceware.org List-Id: Hi all, I just refined CMPccXADD patch to make the enum in order intrin file aligned with how opcode does. Ok for trunk? BRs, Haochen gcc/ChangeLog: * common/config/i386/cpuinfo.h (get_available_features): Detect cmpccxadd. * common/config/i386/i386-common.cc (OPTION_MASK_ISA2_CMPCCXADD_SET, OPTION_MASK_ISA2_CMPCCXADD_UNSET): New. (ix86_handle_option): Handle -mcmpccxadd, unset cmpccxadd when avx2 is disabled. * common/config/i386/i386-cpuinfo.h (enum processor_features): Add FEATURE_CMPCCXADD. * common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for cmpccxadd. * config.gcc: Add cmpccxaddintrin.h. * config/i386/cpuid.h (bit_CMPCCXADD): New. * config/i386/i386-builtin-types.def: Add DEF_FUNCTION_TYPE(INT, PINT, INT, INT, INT) and DEF_FUNCTION_TYPE(LONGLONG, PLONGLONG, LONGLONG, LONGLONG, INT). * config/i386/i386-builtin.def (BDESC): Add new builtins. * config/i386/i386-c.cc (ix86_target_macros_internal): Define __CMPCCXADD__. * config/i386/i386-expand.cc (ix86_expand_special_args_builtin): Add new parameter to indicate constant position. Handle INT_FTYPE_PINT_INT_INT_INT and LONGLONG_FTYPE_PLONGLONG_LONGLONG_LONGLONG_INT. * config/i386/i386-isa.def (CMPCCXADD): Add DEF_PTA(CMPCCXADD). * config/i386/i386-options.cc (isa2_opts): Add -mcmpccxadd. (ix86_valid_target_attribute_inner_p): Handle cmpccxadd. * config/i386/i386.opt: Add option -mcmpccxadd. * config/i386/sync.md (cmpccxadd_): New define insn. * config/i386/x86gprintrin.h: Include cmpccxaddintrin.h. * doc/extend.texi: Document cmpccxadd. * doc/invoke.texi: Document -mcmpccxadd. * doc/sourcebuild.texi: Document target cmpccxadd. * config/i386/cmpccxaddintrin.h: New file. gcc/testsuite/ChangeLog: * g++.dg/other/i386-2.C: Add -mcmpccxadd. * g++.dg/other/i386-3.C: Ditto. * gcc.target/i386/avx-1.c: Add builtin define for enum. * gcc.target/i386/funcspec-56.inc: Add new target attribute. * gcc.target/i386/sse-13.c: Add builtin define for enum. * gcc.target/i386/sse-23.c: Ditto. * gcc.target/i386/x86gprintrin-1.c: Add -mcmpccxadd for 64 bit target. * gcc.target/i386/x86gprintrin-2.c: Add -mcmpccxadd for 64 bit target. Add builtin define for enum. * gcc.target/i386/x86gprintrin-3.c: Add -mcmpccxadd for 64 bit target. * gcc.target/i386/x86gprintrin-4.c: Add mcmpccxadd for 64 bit target. * gcc.target/i386/x86gprintrin-5.c: Add mcpmccxadd for 64 bit target. Add builtin define for enum. * gcc.target/i386/cmpccxadd-1.c: New test. * gcc.target/i386/cmpccxadd-2.c: New test. --- gcc/common/config/i386/cpuinfo.h | 2 + gcc/common/config/i386/i386-common.cc | 15 ++ gcc/common/config/i386/i386-cpuinfo.h | 1 + gcc/common/config/i386/i386-isas.h | 1 + gcc/config.gcc | 3 +- gcc/config/i386/cmpccxaddintrin.h | 89 +++++++++++ gcc/config/i386/cpuid.h | 1 + gcc/config/i386/i386-builtin-types.def | 4 + gcc/config/i386/i386-builtin.def | 4 + gcc/config/i386/i386-c.cc | 2 + gcc/config/i386/i386-expand.cc | 22 ++- gcc/config/i386/i386-isa.def | 1 + gcc/config/i386/i386-options.cc | 4 +- gcc/config/i386/i386.opt | 5 + gcc/config/i386/sync.md | 42 ++++++ gcc/config/i386/x86gprintrin.h | 2 + gcc/doc/extend.texi | 5 + gcc/doc/invoke.texi | 10 +- gcc/doc/sourcebuild.texi | 3 + gcc/testsuite/g++.dg/other/i386-2.C | 2 +- gcc/testsuite/g++.dg/other/i386-3.C | 2 +- gcc/testsuite/gcc.target/i386/avx-1.c | 4 + gcc/testsuite/gcc.target/i386/cmpccxadd-1.c | 61 ++++++++ gcc/testsuite/gcc.target/i386/cmpccxadd-2.c | 138 ++++++++++++++++++ gcc/testsuite/gcc.target/i386/funcspec-56.inc | 2 + gcc/testsuite/gcc.target/i386/sse-13.c | 6 +- gcc/testsuite/gcc.target/i386/sse-23.c | 6 +- .../gcc.target/i386/x86gprintrin-1.c | 2 +- .../gcc.target/i386/x86gprintrin-2.c | 6 +- .../gcc.target/i386/x86gprintrin-3.c | 2 +- .../gcc.target/i386/x86gprintrin-4.c | 2 +- .../gcc.target/i386/x86gprintrin-5.c | 6 +- gcc/testsuite/lib/target-supports.exp | 10 ++ 33 files changed, 450 insertions(+), 15 deletions(-) create mode 100644 gcc/config/i386/cmpccxaddintrin.h create mode 100644 gcc/testsuite/gcc.target/i386/cmpccxadd-1.c create mode 100644 gcc/testsuite/gcc.target/i386/cmpccxadd-2.c diff --git a/gcc/common/config/i386/cpuinfo.h b/gcc/common/config/i386/cpuinfo.h index 77d7e315640..49979ca3f1d 100644 --- a/gcc/common/config/i386/cpuinfo.h +++ b/gcc/common/config/i386/cpuinfo.h @@ -803,6 +803,8 @@ get_available_features (struct __processor_model *cpu_model, __cpuid_count (7, 1, eax, ebx, ecx, edx); if (eax & bit_HRESET) set_feature (FEATURE_HRESET); + if (eax & bit_CMPCCXADD) + set_feature(FEATURE_CMPCCXADD); if (avx_usable) { if (eax & bit_AVXVNNI) diff --git a/gcc/common/config/i386/i386-common.cc b/gcc/common/config/i386/i386-common.cc index 20bec7f6dfc..f7ce18f20f2 100644 --- a/gcc/common/config/i386/i386-common.cc +++ b/gcc/common/config/i386/i386-common.cc @@ -110,6 +110,7 @@ along with GCC; see the file COPYING3. If not see #define OPTION_MASK_ISA2_AMX_BF16_SET OPTION_MASK_ISA2_AMX_BF16 #define OPTION_MASK_ISA2_AVXVNNIINT8_SET OPTION_MASK_ISA2_AVXVNNIINT8 #define OPTION_MASK_ISA2_AVXNECONVERT_SET OPTION_MASK_ISA2_AVXNECONVERT +#define OPTION_MASK_ISA2_CMPCCXADD_SET OPTION_MASK_ISA2_CMPCCXADD /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same as -msse4.2. */ @@ -283,6 +284,7 @@ along with GCC; see the file COPYING3. If not see #define OPTION_MASK_ISA2_WIDEKL_UNSET OPTION_MASK_ISA2_WIDEKL #define OPTION_MASK_ISA2_AVXVNNIINT8_UNSET OPTION_MASK_ISA2_AVXVNNIINT8 #define OPTION_MASK_ISA2_AVXNECONVERT_UNSET OPTION_MASK_ISA2_AVXNECONVERT +#define OPTION_MASK_ISA2_CMPCCXADD_UNSET OPTION_MASK_ISA2_CMPCCXADD /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same as -mno-sse4.1. */ @@ -1181,6 +1183,19 @@ ix86_handle_option (struct gcc_options *opts, } return true; + case OPT_mcmpccxadd: + if (value) + { + opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_CMPCCXADD_SET; + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_CMPCCXADD_SET; + } + else + { + opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_CMPCCXADD_UNSET; + opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_CMPCCXADD_UNSET; + } + return true; + case OPT_mfma: if (value) { diff --git a/gcc/common/config/i386/i386-cpuinfo.h b/gcc/common/config/i386/i386-cpuinfo.h index ad0cb6fe38d..5f8970257fa 100644 --- a/gcc/common/config/i386/i386-cpuinfo.h +++ b/gcc/common/config/i386/i386-cpuinfo.h @@ -244,6 +244,7 @@ enum processor_features FEATURE_AVXIFMA, FEATURE_AVXVNNIINT8, FEATURE_AVXNECONVERT, + FEATURE_CMPCCXADD, CPU_FEATURE_MAX }; diff --git a/gcc/common/config/i386/i386-isas.h b/gcc/common/config/i386/i386-isas.h index bceaee589ee..3035e4a8186 100644 --- a/gcc/common/config/i386/i386-isas.h +++ b/gcc/common/config/i386/i386-isas.h @@ -180,4 +180,5 @@ ISA_NAMES_TABLE_START P_NONE, "-mavxvnniint8") ISA_NAMES_TABLE_ENTRY("avxneconvert", FEATURE_AVXNECONVERT, P_NONE, "-mavxneconvert") + ISA_NAMES_TABLE_ENTRY("cmpccxadd", FEATURE_CMPCCXADD, P_NONE, "-mcmpccxadd") ISA_NAMES_TABLE_END diff --git a/gcc/config.gcc b/gcc/config.gcc index 41a42006be8..7a6b5ffd032 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -422,7 +422,8 @@ i[34567]86-*-* | x86_64-*-*) amxbf16intrin.h x86gprintrin.h uintrintrin.h hresetintrin.h keylockerintrin.h avxvnniintrin.h mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h - avxifmaintrin.h avxvnniint8intrin.h avxneconvertintrin.h" + avxifmaintrin.h avxvnniint8intrin.h avxneconvertintrin.h + cmpccxaddintrin.h" ;; ia64-*-*) extra_headers=ia64intrin.h diff --git a/gcc/config/i386/cmpccxaddintrin.h b/gcc/config/i386/cmpccxaddintrin.h new file mode 100644 index 00000000000..6d1b05397c6 --- /dev/null +++ b/gcc/config/i386/cmpccxaddintrin.h @@ -0,0 +1,89 @@ +/* Copyright (C) 2012-2021 Free Software Foundation, Inc. + + This file is part of GCC. + + GCC is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + GCC is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#ifndef _X86GPRINTRIN_H_INCLUDED +#error "Never use directly; include instead." +#endif + +#ifndef _CMPCCXADDINTRIN_H_INCLUDED +#define _CMPCCXADDINTRIN_H_INCLUDED + +#ifdef __x86_64__ + +#ifndef __CMPCCXADD__ +#pragma GCC push_options +#pragma GCC target("cmpccxadd") +#define __DISABLE_CMPCCXADD__ +#endif /* __CMPCCXADD__ */ + +typedef enum { + _CMPCCX_O, /* Overflow. */ + _CMPCCX_NO, /* No overflow. */ + _CMPCCX_B, /* Below. */ + _CMPCCX_NB, /* Not below. */ + _CMPCCX_Z, /* Zero. */ + _CMPCCX_NZ, /* Not zero. */ + _CMPCCX_BE, /* Below or equal. */ + _CMPCCX_NBE, /* Neither below nor equal. */ + _CMPCCX_S, /* Sign. */ + _CMPCCX_NS, /* No sign. */ + _CMPCCX_P, /* Parity. */ + _CMPCCX_NP, /* No parity. */ + _CMPCCX_L, /* Less. */ + _CMPCCX_NL, /* Not less. */ + _CMPCCX_LE, /* Less or equal. */ + _CMPCCX_NLE, /* Neither less nor equal. */ +} _CMPCCX_ENUM; + +#ifdef __OPTIMIZE__ +extern __inline int +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__cmpccxadd_epi32 (int *__A, int __B, int __C, const _CMPCCX_ENUM __D) +{ + return __builtin_ia32_cmpccxadd (__A, __B, __C, __D); +} + +extern __inline long long +__attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) +__cmpccxadd_epi64 (long long *__A, long long __B, long long __C, + const _CMPCCX_ENUM __D) +{ + return __builtin_ia32_cmpccxadd64 (__A, __B, __C, __D); +} +#else +#define __cmpccxadd_epi32(A,B,C,D) \ +__builtin_ia32_cmpccxadd((int *) (A), (int) (B), (int) (C), \ + (_CMPCCX_ENUM)(D)) +#define __cmpccxadd_epi64(A,B,C,D) \ +__builtin_ia32_cmpccxadd64((int*) (A), (int) (B), (int) (C), \ + (_CMPCCX_ENUM)(D)) +#endif + +#ifdef __DISABLE_CMPCCXADD__ +#undef __DISABLE_CMPCCXADD__ +#pragma GCC pop_options +#endif /* __DISABLE_CMPCCXADD__ */ + +#endif + +#endif /* _CMPCCXADDINTRIN_H_INCLUDED */ diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h index 18bbc0cb7be..19c0d033921 100644 --- a/gcc/config/i386/cpuid.h +++ b/gcc/config/i386/cpuid.h @@ -27,6 +27,7 @@ /* %eax */ #define bit_AVXVNNI (1 << 4) #define bit_AVX512BF16 (1 << 5) +#define bit_CMPCCXADD (1 << 7) #define bit_HRESET (1 << 22) #define bit_AVXIFMA (1 << 23) diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def index c87ff356556..7e5c1b52ae1 100644 --- a/gcc/config/i386/i386-builtin-types.def +++ b/gcc/config/i386/i386-builtin-types.def @@ -1409,3 +1409,7 @@ DEF_FUNCTION_TYPE (V4SF, PCV8HF) DEF_FUNCTION_TYPE (V8SF, PCV16HF) DEF_FUNCTION_TYPE (V4SF, PCV8BF) DEF_FUNCTION_TYPE (V8SF, PCV16BF) + +# CMPccXADD builtins +DEF_FUNCTION_TYPE (INT, PINT, INT, INT, INT) +DEF_FUNCTION_TYPE (LONGLONG, PLONGLONG, LONGLONG, LONGLONG, INT) diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def index 73d2aa7be2b..adbbb12efc8 100644 --- a/gcc/config/i386/i386-builtin.def +++ b/gcc/config/i386/i386-builtin.def @@ -288,6 +288,10 @@ BDESC (0, OPTION_MASK_ISA2_AVXNECONVERT, CODE_FOR_vcvtneobf162ps_v16bf, "__built BDESC (0, OPTION_MASK_ISA2_AVXNECONVERT, CODE_FOR_vcvtneoph2ps_v8hf, "__builtin_ia32_vcvtneoph2ps128", IX86_BUILTIN_VCVTNEOPH2PS128, UNKNOWN, (int) V4SF_FTYPE_PCV8HF) BDESC (0, OPTION_MASK_ISA2_AVXNECONVERT, CODE_FOR_vcvtneoph2ps_v16hf, "__builtin_ia32_vcvtneoph2ps256", IX86_BUILTIN_VCVTNEOPH2PS256, UNKNOWN, (int) V8SF_FTYPE_PCV16HF) +/* CMPCCXADD */ +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_CMPCCXADD, CODE_FOR_cmpccxadd_si, "__builtin_ia32_cmpccxadd", IX86_BUILTIN_CMPCCXADD, UNKNOWN, (int) INT_FTYPE_PINT_INT_INT_INT) +BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_CMPCCXADD, CODE_FOR_cmpccxadd_di, "__builtin_ia32_cmpccxadd64", IX86_BUILTIN_CMPCCXADD64, UNKNOWN, (int) LONGLONG_FTYPE_PLONGLONG_LONGLONG_LONGLONG_INT) + /* AVX512BW */ BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_loadv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCSHORT_V32HI_USI) BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_loadv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCCHAR_V64QI_UDI) diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc index fa195e739a6..818cfd77491 100644 --- a/gcc/config/i386/i386-c.cc +++ b/gcc/config/i386/i386-c.cc @@ -646,6 +646,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag, def_or_undef (parse_in, "__AVXVNNIINT8__"); if (isa_flag2 & OPTION_MASK_ISA2_AVXNECONVERT) def_or_undef (parse_in, "__AVXNECONVERT__"); + if (isa_flag2 & OPTION_MASK_ISA2_CMPCCXADD) + def_or_undef (parse_in, "__CMPCCXADD__"); if (TARGET_IAMCU) { def_or_undef (parse_in, "__iamcu"); diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index a4b3323237d..f5397f4f45e 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -11862,8 +11862,9 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, tree arg; rtx pat, op; unsigned int i, nargs, arg_adjust, memory; + unsigned int constant = 100; bool aligned_mem = false; - rtx xops[3]; + rtx xops[4]; enum insn_code icode = d->icode; const struct insn_data_d *insn_p = &insn_data[icode]; machine_mode tmode = insn_p->operand[0].mode; @@ -12154,6 +12155,13 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, klass = load; memory = 0; break; + case INT_FTYPE_PINT_INT_INT_INT: + case LONGLONG_FTYPE_PLONGLONG_LONGLONG_LONGLONG_INT: + nargs = 4; + klass = load; + memory = 0; + constant = 3; + break; default: gcc_unreachable (); } @@ -12219,6 +12227,15 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, if (MEM_ALIGN (op) < align) set_mem_align (op, align); } + else if (i == constant) + { + /* This must be the constant. */ + if (!insn_p->operand[nargs].predicate(op, SImode)) + { + error ("the fourth argument must be one of enum %qs", "_CMPCCX_ENUM"); + return const0_rtx; + } + } else { /* This must be register. */ @@ -12260,6 +12277,9 @@ ix86_expand_special_args_builtin (const struct builtin_description *d, case 3: pat = GEN_FCN (icode) (target, xops[0], xops[1], xops[2]); break; + case 4: + pat = GEN_FCN (icode) (target, xops[0], xops[1], xops[2], xops[3]); + break; default: gcc_unreachable (); } diff --git a/gcc/config/i386/i386-isa.def b/gcc/config/i386/i386-isa.def index 4ea3f96f69f..7ffc73ba23e 100644 --- a/gcc/config/i386/i386-isa.def +++ b/gcc/config/i386/i386-isa.def @@ -112,3 +112,4 @@ DEF_PTA(AVX512FP16) DEF_PTA(AVXIFMA) DEF_PTA(AVXVNNIINT8) DEF_PTA(AVXNECONVERT) +DEF_PTA(CMPCCXADD) diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc index ef9c888980c..38e3fd6b8c7 100644 --- a/gcc/config/i386/i386-options.cc +++ b/gcc/config/i386/i386-options.cc @@ -230,7 +230,8 @@ static struct ix86_target_opts isa2_opts[] = { "-mavx512fp16", OPTION_MASK_ISA2_AVX512FP16 }, { "-mavxifma", OPTION_MASK_ISA2_AVXIFMA }, { "-mavxvnniint8", OPTION_MASK_ISA2_AVXVNNIINT8 }, - { "-mavxneconvert", OPTION_MASK_ISA2_AVXNECONVERT } + { "-mavxneconvert", OPTION_MASK_ISA2_AVXNECONVERT }, + { "-mcmpccxadd", OPTION_MASK_ISA2_CMPCCXADD } }; static struct ix86_target_opts isa_opts[] = { @@ -1080,6 +1081,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[], IX86_ATTR_ISA ("avxifma", OPT_mavxifma), IX86_ATTR_ISA ("avxvnniint8", OPT_mavxvnniint8), IX86_ATTR_ISA ("avxneconvert", OPT_mavxneconvert), + IX86_ATTR_ISA ("cmpccxadd", OPT_mcmpccxadd), /* enum options */ IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_), diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt index 6e07b89ac4c..c4a3bdcf960 100644 --- a/gcc/config/i386/i386.opt +++ b/gcc/config/i386/i386.opt @@ -1229,3 +1229,8 @@ mavxneconvert Target Mask(ISA2_AVXNECONVERT) Var(ix86_isa_flags2) Save Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, and AVXNECONVERT build-in functions and code generation. + +mcmpccxadd +Target Mask(ISA2_CMPCCXADD) Var(ix86_isa_flags2) Save +Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, and +CMPCCXADD build-in functions and code generation. diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md index 92634d538cb..2b6f2f4c826 100644 --- a/gcc/config/i386/sync.md +++ b/gcc/config/i386/sync.md @@ -37,6 +37,9 @@ UNSPECV_CMPXCHG UNSPECV_XCHG UNSPECV_LOCK + + ;; For CMPccXADD support + UNSPECV_CMPCCXADD ]) (define_expand "sse2_lfence" @@ -1061,3 +1064,42 @@ (any_logic:SWI (match_dup 0) (match_dup 1)))] "" "lock{%;} %K2{}\t{%1, %0|%0, %1}") + +;; CMPCCXADD + +(define_insn "@cmpccxadd__1" + [(set (match_operand:SWI48x 1 "register_operand" "+r") + (match_operand:SWI48x 0 "memory_operand" "+m")) + (set (match_dup 0) + (unspec_volatile:SWI48x + [(match_dup 0) + (match_dup 1) + (match_operand:SWI48x 2 "register_operand" "r") + (match_operand:SI 3 "const_0_to_15_operand" "n")] + UNSPECV_CMPCCXADD)) + (clobber (reg:CC FLAGS_REG))] + "TARGET_CMPCCXADD && TARGET_64BIT" +{ + char buf[128]; + const char *ops = "cmp%sxadd\t{%%2, %%1, %%0|%%0, %%1, %%2}"; + char const *cc[16] = {"be" ,"b", "le", "l", "nbe", "nb", "nle", "nl", + "no", "np", "ns", "nz", "o", "p", "s", "z"}; + + snprintf (buf, sizeof (buf), ops, cc[INTVAL (operands[3])]); + output_asm_insn (buf, operands); + return ""; +}) + +(define_expand "cmpccxadd_" + [(match_operand:SWI48x 0 "register_operand") + (match_operand:SWI48x 1 "memory_operand") + (match_operand:SWI48x 2 "register_operand") + (match_operand:SWI48x 3 "register_operand") + (match_operand:SI 4 "const_0_to_15_operand")] + "TARGET_CMPCCXADD && TARGET_64BIT" +{ + emit_insn (gen_cmpccxadd_1 (mode, operands[1], operands[2], + operands[3], operands[4])); + emit_move_insn (operands[0], operands[2]); + DONE; +}) diff --git a/gcc/config/i386/x86gprintrin.h b/gcc/config/i386/x86gprintrin.h index e0be01d5e78..a84fbe9137d 100644 --- a/gcc/config/i386/x86gprintrin.h +++ b/gcc/config/i386/x86gprintrin.h @@ -52,6 +52,8 @@ #include +#include + #include #include diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi index 79ed891bd28..aff345fdc80 100644 --- a/gcc/doc/extend.texi +++ b/gcc/doc/extend.texi @@ -7075,6 +7075,11 @@ Enable/disable the generation of the AVXVNNIINT8 instructions. @cindex @code{target("avxneconvert")} function attribute, x86 Enable/disable the generation of the AVXNECONVERT instructions. +@item cmpccxadd +@itemx no-cmpccxadd +@cindex @code{target("cmpccxadd")} function attribute, x86 +Enable/disable the generation of the CMPccXADD instructions. + @item cld @itemx no-cld @cindex @code{target("cld")} function attribute, x86 diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 83bc97eaf6d..a63910a1fe1 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -1436,7 +1436,7 @@ See RS/6000 and PowerPC Options. -mavx5124fmaps -mavx512vnni -mavx5124vnniw -mprfchw -mrdpid @gol -mrdseed -msgx -mavx512vp2intersect -mserialize -mtsxldtrk@gol -mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset -mavxvnni@gol --mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert @gol +-mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd @gol -mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops @gol -minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol -mkl -mwidekl @gol @@ -32922,6 +32922,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}. @need 200 @itemx -mavxneconvert @opindex mavxneconvert +@need 200 +@itemx -mcmpccxadd +@opindex mcmpccxadd These switches enable the use of instructions in the MMX, SSE, SSE2, SSE3, SSSE3, SSE4, SSE4A, SSE4.1, SSE4.2, AVX, AVX2, AVX512F, AVX512PF, AVX512ER, AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA, @@ -32932,8 +32935,9 @@ XSAVEOPT, XSAVEC, XSAVES, RTM, HLE, TBM, MWAITX, CLZERO, PKU, AVX512VBMI2, GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16, ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, SERIALIZE, UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512FP16, -AVXIFMA, AVXVNNIINT8, AVXNECONVERT or CLDEMOTE extended instruction sets. Each -has a corresponding @option{-mno-} option to disable use of these instructions. +AVXIFMA, AVXVNNIINT8, AVXNECONVERT, CMPCCXADD or CLDEMOTE extended instruction +sets. Each has a corresponding @option{-mno-} option to disable use of these +instructions. These extensions are also available as built-in functions: see @ref{x86 Built-in Functions}, for details of the functions enabled and diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi index a12175b6498..714595d33bf 100644 --- a/gcc/doc/sourcebuild.texi +++ b/gcc/doc/sourcebuild.texi @@ -2511,6 +2511,9 @@ Target supports the execution of @code{amx-bf16} instructions. @item cell_hw Test system can execute AltiVec and Cell PPU instructions. +@item cmpccxadd +Target supports the execution of @code{cmpccxadd} instructions. + @item coldfire_fpu Target uses a ColdFire FPU. diff --git a/gcc/testsuite/g++.dg/other/i386-2.C b/gcc/testsuite/g++.dg/other/i386-2.C index dd3e71f25ed..f7dbbbbf619 100644 --- a/gcc/testsuite/g++.dg/other/i386-2.C +++ b/gcc/testsuite/g++.dg/other/i386-2.C @@ -1,5 +1,5 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert" } */ +/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd" } */ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, diff --git a/gcc/testsuite/g++.dg/other/i386-3.C b/gcc/testsuite/g++.dg/other/i386-3.C index cd7045cc4e4..2ac5d9f2df5 100644 --- a/gcc/testsuite/g++.dg/other/i386-3.C +++ b/gcc/testsuite/g++.dg/other/i386-3.C @@ -1,5 +1,5 @@ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */ -/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert" } */ +/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd" } */ /* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h, xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h, diff --git a/gcc/testsuite/gcc.target/i386/avx-1.c b/gcc/testsuite/gcc.target/i386/avx-1.c index 154e7b3b107..051a1b59b5b 100644 --- a/gcc/testsuite/gcc.target/i386/avx-1.c +++ b/gcc/testsuite/gcc.target/i386/avx-1.c @@ -835,6 +835,10 @@ #define __builtin_ia32_bextri_u32(X, Y) __builtin_ia32_bextri_u32 (X, 1) #define __builtin_ia32_bextri_u64(X, Y) __builtin_ia32_bextri_u64 (X, 1) +/* cmpccxadd.h */ +#define __builtin_ia32_cmpccxadd(A, B, C, D) __builtin_ia32_cmpccxadd(A, B, C, 1) +#define __builtin_ia32_cmpccxadd64(A, B, C, D) __builtin_ia32_cmpccxadd64(A, B, C, 1) + #include #include #include diff --git a/gcc/testsuite/gcc.target/i386/cmpccxadd-1.c b/gcc/testsuite/gcc.target/i386/cmpccxadd-1.c new file mode 100644 index 00000000000..699ed9b2dc2 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cmpccxadd-1.c @@ -0,0 +1,61 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-O2 -mcmpccxadd" } */ +/* { dg-final { scan-assembler-times "cmpbexadd\[ \\t\]" 2 } } */ +/* { dg-final { scan-assembler-times "cmpbxadd\[ \\t\]" 2 } } */ +/* { dg-final { scan-assembler-times "cmplexadd\[ \\t\]" 2 } } */ +/* { dg-final { scan-assembler-times "cmplxadd\[ \\t\]" 2 } } */ +/* { dg-final { scan-assembler-times "cmpnbexadd\[ \\t\]" 2 } } */ +/* { dg-final { scan-assembler-times "cmpnbxadd\[ \\t\]" 2 } } */ +/* { dg-final { scan-assembler-times "cmpnlexadd\[ \\t\]" 2 } } */ +/* { dg-final { scan-assembler-times "cmpnlxadd\[ \\t\]" 2 } } */ +/* { dg-final { scan-assembler-times "cmpnoxadd\[ \\t\]" 2 } } */ +/* { dg-final { scan-assembler-times "cmpnpxadd\[ \\t\]" 2 } } */ +/* { dg-final { scan-assembler-times "cmpnsxadd\[ \\t\]" 2 } } */ +/* { dg-final { scan-assembler-times "cmpnzxadd\[ \\t\]" 2 } } */ +/* { dg-final { scan-assembler-times "cmpoxadd\[ \\t\]" 2 } } */ +/* { dg-final { scan-assembler-times "cmppxadd\[ \\t\]" 2 } } */ +/* { dg-final { scan-assembler-times "cmpsxadd\[ \\t\]" 2 } } */ +/* { dg-final { scan-assembler-times "cmpzxadd\[ \\t\]" 2 } } */ +#include + +int *a; +int b, c; +long long *d; +long long e, f; + +void extern +cmpccxadd_test(void) +{ + b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_BE); + e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_BE); + b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_B); + e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_B); + b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_LE); + e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_LE); + b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_L); + e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_L); + b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_NBE); + e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_NBE); + b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_NB); + e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_NB); + b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_NLE); + e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_NLE); + b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_NL); + e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_NL); + b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_NO); + e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_NO); + b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_NP); + e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_NP); + b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_NS); + e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_NS); + b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_NZ); + e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_NZ); + b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_O); + e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_O); + b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_P); + e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_P); + b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_S); + e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_S); + b = __cmpccxadd_epi32 (a, b, c, _CMPCCX_Z); + e = __cmpccxadd_epi64 (d, e, f, _CMPCCX_Z); +} diff --git a/gcc/testsuite/gcc.target/i386/cmpccxadd-2.c b/gcc/testsuite/gcc.target/i386/cmpccxadd-2.c new file mode 100644 index 00000000000..76d17803fbb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/cmpccxadd-2.c @@ -0,0 +1,138 @@ +/* { dg-do run { target { ! ia32 } } } */ +/* { dg-options "-O2 -mcmpccxadd" } */ +/* { dg-require-effective-target cmpccxadd } */ + +#include +#include + +int +main() +{ + if (!__builtin_cpu_supports("cmpccxadd")) + return 0; + + int srcdest1[16] = { 1,1,1,1,2,1,2,1,1,2,2,2,-2147483648,4,1,1 }; + int srcdest2[16] = { 1,2,1,2,1,1,1,1,1,1,1,1,1,1,2,1 }; + int src3[16] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 }; + int _srcdest1[16], _srcdest2[16], res[16], cond[16]; + long long srcdest1_64[16] = { 1,1,1,1,2,1,2,1,1,2,2,2,-9223372036854775807LL-1,4,1,1 }; + long long srcdest2_64[16] = { 1,2,1,2,1,1,1,1,1,1,1,1,1,1,2,1 }; + long long src3_64[16] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1 }; + long long _srcdest1_64[16], _srcdest2_64[16], res_64[16], cond_64[16]; + + int tmp2[16]; + long long tmp2_64[16]; + + int cf[16] = { 0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; + int of[16] = { 0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0 }; + int sf[16] = { 0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0 }; + int zf[16] = { 1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,1 }; + int af[16] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; + int pf[16] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0 }; + + for (int i = 0; i < 16; i++) + { + tmp2[i] = srcdest1[i] + src3[i]; + tmp2_64[i] = srcdest1_64[i] + src3_64[i]; + } + + cond[0] = (cf[0] || zf[0]) == 1 ? 1 : 0; + cond[1] = cf[1] == 1 ? 1 : 0; + cond[2] = (((sf[2] && !of[2]) || (!sf[2] && of[2])) || zf[2]) == 1 ? 1 : 0; + cond[3] = ((sf[3] && !of[3]) || (!sf[3] && of[3])) == 1 ? 1 : 0; + cond[4] = (cf[4] || zf[4]) == 0 ? 1 : 0; + cond[5] = cf[5] == 0 ? 1 : 0; + cond[6] = (((sf[6] && !of[6]) || (!sf[6] && of[6])) || zf[6]) == 0 ? 1 : 0; + cond[7] = ((sf[7] && !of[7]) || (!sf[7] && of[7])) == 0 ? 1 : 0; + cond[8] = of[8] == 0 ? 1 : 0; + cond[9] = pf[9] == 0 ? 1 : 0; + cond[10] = sf[10] == 0 ? 1 : 0; + cond[11] = zf[11] == 0 ? 1 : 0; + cond[12] = of[12] == 1 ? 1 : 0; + cond[13] = pf[13] == 1 ? 1 : 0; + cond[14] = sf[14] == 1 ? 1 : 0; + cond[15] = zf[15] == 1 ? 1 : 0; + + cond_64[0] = (cf[0] || zf[0]) == 1 ? 1 : 0; + cond_64[1] = cf[1] == 1 ? 1 : 0; + cond_64[2] = (((sf[2] && !of[2]) || (!sf[2] && of[2])) || zf[2]) == 1 ? 1 : 0; + cond_64[3] = ((sf[3] && !of[3]) || (!sf[3] && of[3])) == 1 ? 1 : 0; + cond_64[4] = (cf[4] || zf[4]) == 0 ? 1 : 0; + cond_64[5] = cf[5] == 0 ? 1 : 0; + cond_64[6] = (((sf[6] && !of[6]) || (!sf[6] && of[6])) || zf[6]) == 0 ? 1 : 0; + cond_64[7] = ((sf[7] && !of[7]) || (!sf[7] && of[7])) == 0 ? 1 : 0; + cond_64[8] = of[8] == 0 ? 1 : 0; + cond_64[9] = pf[9] == 0 ? 1 : 0; + cond_64[10] = sf[10] == 0 ? 1 : 0; + cond_64[11] = zf[11] == 0 ? 1 : 0; + cond_64[12] = of[12] == 1 ? 1 : 0; + cond_64[13] = pf[13] == 1 ? 1 : 0; + cond_64[14] = sf[14] == 1 ? 1 : 0; + cond_64[15] = zf[15] == 1 ? 1 : 0; + + for (int i = 0; i < 16; i++) + { + if (cond[i] == 1) + { + _srcdest1[i] = tmp2[i]; + } + else + { + _srcdest1[i] = srcdest1[i]; + } + if (cond_64[i] == 1) + { + _srcdest1_64[i] = tmp2_64[i]; + } + else + { + _srcdest1_64[i] = srcdest1_64[i]; + } + _srcdest2[i] = srcdest1[i]; + _srcdest2_64[i] = srcdest1_64[i]; + } + + res[0] = __cmpccxadd_epi32 (&srcdest1[0], srcdest2[0], src3[0], _CMPCCX_BE); + res[1] = __cmpccxadd_epi32 (&srcdest1[1], srcdest2[1], src3[1], _CMPCCX_B); + res[2] = __cmpccxadd_epi32 (&srcdest1[2], srcdest2[2], src3[2], _CMPCCX_LE); + res[3] = __cmpccxadd_epi32 (&srcdest1[3], srcdest2[3], src3[3], _CMPCCX_L); + res[4] = __cmpccxadd_epi32 (&srcdest1[4], srcdest2[4], src3[4], _CMPCCX_NBE); + res[5] = __cmpccxadd_epi32 (&srcdest1[5], srcdest2[5], src3[5], _CMPCCX_NB); + res[6] = __cmpccxadd_epi32 (&srcdest1[6], srcdest2[6], src3[6], _CMPCCX_NLE); + res[7] = __cmpccxadd_epi32 (&srcdest1[7], srcdest2[7], src3[7], _CMPCCX_NL); + res[8] = __cmpccxadd_epi32 (&srcdest1[8], srcdest2[8], src3[8], _CMPCCX_NO); + res[9] = __cmpccxadd_epi32 (&srcdest1[9], srcdest2[9], src3[9], _CMPCCX_NP); + res[10] = __cmpccxadd_epi32 (&srcdest1[10], srcdest2[10], src3[10], _CMPCCX_NS); + res[11] = __cmpccxadd_epi32 (&srcdest1[11], srcdest2[11], src3[11], _CMPCCX_NZ); + res[12] = __cmpccxadd_epi32 (&srcdest1[12], srcdest2[12], src3[12], _CMPCCX_O); + res[13] = __cmpccxadd_epi32 (&srcdest1[13], srcdest2[13], src3[13], _CMPCCX_P); + res[14] = __cmpccxadd_epi32 (&srcdest1[14], srcdest2[14], src3[14], _CMPCCX_S); + res[15] = __cmpccxadd_epi32 (&srcdest1[15], srcdest2[15], src3[15], _CMPCCX_Z); + + res_64[0] = __cmpccxadd_epi64(&srcdest1_64[0], srcdest2_64[0], src3_64[0], _CMPCCX_BE); + res_64[1] = __cmpccxadd_epi64(&srcdest1_64[1], srcdest2_64[1], src3_64[1], _CMPCCX_B); + res_64[2] = __cmpccxadd_epi64(&srcdest1_64[2], srcdest2_64[2], src3_64[2], _CMPCCX_LE); + res_64[3] = __cmpccxadd_epi64(&srcdest1_64[3], srcdest2_64[3], src3_64[3], _CMPCCX_L); + res_64[4] = __cmpccxadd_epi64(&srcdest1_64[4], srcdest2_64[4], src3_64[4], _CMPCCX_NBE); + res_64[5] = __cmpccxadd_epi64(&srcdest1_64[5], srcdest2_64[5], src3_64[5], _CMPCCX_NB); + res_64[6] = __cmpccxadd_epi64(&srcdest1_64[6], srcdest2_64[6], src3_64[6], _CMPCCX_NLE); + res_64[7] = __cmpccxadd_epi64(&srcdest1_64[7], srcdest2_64[7], src3_64[7], _CMPCCX_NL); + res_64[8] = __cmpccxadd_epi64(&srcdest1_64[8], srcdest2_64[8], src3_64[8], _CMPCCX_NO); + res_64[9] = __cmpccxadd_epi64(&srcdest1_64[9], srcdest2_64[9], src3_64[9], _CMPCCX_NP); + res_64[10] = __cmpccxadd_epi64(&srcdest1_64[10], srcdest2_64[10], src3_64[10], _CMPCCX_NS); + res_64[11] = __cmpccxadd_epi64(&srcdest1_64[11], srcdest2_64[11], src3_64[11], _CMPCCX_NZ); + res_64[12] = __cmpccxadd_epi64(&srcdest1_64[12], srcdest2_64[12], src3_64[12], _CMPCCX_O); + res_64[13] = __cmpccxadd_epi64(&srcdest1_64[13], srcdest2_64[13], src3_64[13], _CMPCCX_P); + res_64[14] = __cmpccxadd_epi64(&srcdest1_64[14], srcdest2_64[14], src3_64[14], _CMPCCX_S); + res_64[15] = __cmpccxadd_epi64(&srcdest1_64[15], srcdest2_64[15], src3_64[15], _CMPCCX_Z); + + for (int i = 0; i < 16; i++) + { + if ((srcdest1[i] != _srcdest1[i]) || (res[i] != _srcdest2[i])) + abort(); + if ((srcdest1_64[i] != _srcdest1_64[i]) || (res_64[i] != _srcdest2_64[i])) + abort(); + } + + return 0; +} diff --git a/gcc/testsuite/gcc.target/i386/funcspec-56.inc b/gcc/testsuite/gcc.target/i386/funcspec-56.inc index 5655c5b1919..ab748fad994 100644 --- a/gcc/testsuite/gcc.target/i386/funcspec-56.inc +++ b/gcc/testsuite/gcc.target/i386/funcspec-56.inc @@ -83,6 +83,7 @@ extern void test_avx512fp16 (void) __attribute__((__target__("avx512fp16"))); extern void test_avxifma (void) __attribute__((__target__("avxifma"))); extern void test_avxvnniint8 (void) __attribute__((__target__("avxvnniint8"))); extern void test_avxneconvert (void) __attribute__((__target__("avxneconvert"))); +extern void test_cmpccxadd (void) __attribute__((__target__("cmpccxadd"))); extern void test_no_sgx (void) __attribute__((__target__("no-sgx"))); extern void test_no_avx5124fmaps(void) __attribute__((__target__("no-avx5124fmaps"))); @@ -167,6 +168,7 @@ extern void test_no_avx512fp16 (void) __attribute__((__target__("no-avx512fp16" extern void test_no_avxifma (void) __attribute__((__target__("no-avxifma"))); extern void test_no_avxvnniint8 (void) __attribute__((__target__("no-avxvnniint8"))); extern void test_no_avxneconvert (void) __attribute__((__target__("no-avxneconvert"))); +extern void test_no_cmpccxadd (void) __attribute__((__target__("no-cmpccxadd"))); extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona"))); extern void test_arch_core2 (void) __attribute__((__target__("arch=core2"))); diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index b9cdfb690d1..e947b4347f4 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert" } */ +/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8 -mavxneconvert -mcmpccxadd" } */ /* { dg-add-options bind_pic_locally } */ #include @@ -842,4 +842,8 @@ #define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1) #define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1) +/* cmpccxadd.h */ +#define __builtin_ia32_cmpccxadd(A, B, C, D) __builtin_ia32_cmpccxadd(A, B, C, 1) +#define __builtin_ia32_cmpccxadd64(A, B, C, D) __builtin_ia32_cmpccxadd64(A, B, C, 1) + #include diff --git a/gcc/testsuite/gcc.target/i386/sse-23.c b/gcc/testsuite/gcc.target/i386/sse-23.c index 898dde80c8f..757ba9c9a7d 100644 --- a/gcc/testsuite/gcc.target/i386/sse-23.c +++ b/gcc/testsuite/gcc.target/i386/sse-23.c @@ -843,6 +843,10 @@ #define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1) #define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1) -#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8,avxneconvert") +/* cmpccxadd.h */ +#define __builtin_ia32_cmpccxadd(A, B, C, D) __builtin_ia32_cmpccxadd(A, B, C, 1) +#define __builtin_ia32_cmpccxadd64(A, B, C, D) __builtin_ia32_cmpccxadd64(A, B, C, 1) + +#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8,avxneconvert,cmpccxadd") #include diff --git a/gcc/testsuite/gcc.target/i386/x86gprintrin-1.c b/gcc/testsuite/gcc.target/i386/x86gprintrin-1.c index 293be094b78..76de89d0cb7 100644 --- a/gcc/testsuite/gcc.target/i386/x86gprintrin-1.c +++ b/gcc/testsuite/gcc.target/i386/x86gprintrin-1.c @@ -1,7 +1,7 @@ /* Test that is usable with -O -std=c89 -pedantic-errors. */ /* { dg-do compile } */ /* { dg-options "-O -std=c89 -pedantic-errors -march=x86-64 -madx -mbmi -mbmi2 -mcldemote -mclflushopt -mclwb -mclzero -menqcmd -mfsgsbase -mfxsr -mhreset -mlzcnt -mlwp -mmovdiri -mmwaitx -mpconfig -mpopcnt -mpku -mptwrite -mrdpid -mrdrnd -mrdseed -mrtm -mserialize -msgx -mshstk -mtbm -mtsxldtrk -mwaitpkg -mwbnoinvd -mxsave -mxsavec -mxsaveopt -mxsaves -mno-sse -mno-mmx" } */ -/* { dg-additional-options "-muintr" { target { ! ia32 } } } */ +/* { dg-additional-options "-mcmpccxadd -muintr" { target { ! ia32 } } } */ #include diff --git a/gcc/testsuite/gcc.target/i386/x86gprintrin-2.c b/gcc/testsuite/gcc.target/i386/x86gprintrin-2.c index c6330275746..aefad77f864 100644 --- a/gcc/testsuite/gcc.target/i386/x86gprintrin-2.c +++ b/gcc/testsuite/gcc.target/i386/x86gprintrin-2.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-O2 -Werror-implicit-function-declaration -march=x86-64 -madx -mbmi -mbmi2 -mcldemote -mclflushopt -mclwb -mclzero -menqcmd -mfsgsbase -mfxsr -mhreset -mlzcnt -mlwp -mmovdiri -mmwaitx -mpconfig -mpopcnt -mpku -mptwrite -mrdpid -mrdrnd -mrdseed -mrtm -mserialize -msgx -mshstk -mtbm -mtsxldtrk -mwaitpkg -mwbnoinvd -mxsave -mxsavec -mxsaveopt -mxsaves -mno-sse -mno-mmx" } */ /* { dg-add-options bind_pic_locally } */ -/* { dg-additional-options "-muintr" { target { ! ia32 } } } */ +/* { dg-additional-options "-mcmpccxadd -muintr" { target { ! ia32 } } } */ /* Test that the intrinsics in compile with optimization. All of them are defined as inline functions that reference the proper @@ -28,4 +28,8 @@ /* rtmintrin.h */ #define __builtin_ia32_xabort(N) __builtin_ia32_xabort(1) +/* cmpccxadd.h */ +#define __builtin_ia32_cmpccxadd(A, B, C, D) __builtin_ia32_cmpccxadd(A, B, C, 1) +#define __builtin_ia32_cmpccxadd64(A, B, C, D) __builtin_ia32_cmpccxadd64(A, B, C, 1) + #include diff --git a/gcc/testsuite/gcc.target/i386/x86gprintrin-3.c b/gcc/testsuite/gcc.target/i386/x86gprintrin-3.c index 3a7e1f4a10d..261c9180aa0 100644 --- a/gcc/testsuite/gcc.target/i386/x86gprintrin-3.c +++ b/gcc/testsuite/gcc.target/i386/x86gprintrin-3.c @@ -1,7 +1,7 @@ /* { dg-do compile } */ /* { dg-options "-O0 -Werror-implicit-function-declaration -march=x86-64 -madx -mbmi -mbmi2 -mcldemote -mclflushopt -mclwb -mclzero -menqcmd -mfsgsbase -mfxsr -mhreset -mlzcnt -mlwp -mmovdiri -mmwaitx -mpconfig -mpopcnt -mpku -mptwrite -mrdpid -mrdrnd -mrdseed -mrtm -mserialize -msgx -mshstk -mtbm -mtsxldtrk -mwaitpkg -mwbnoinvd -mxsave -mxsavec -mxsaveopt -mxsaves -mno-sse -mno-mmx" } */ /* { dg-add-options bind_pic_locally } */ -/* { dg-additional-options "-muintr" { target { ! ia32 } } } */ +/* { dg-additional-options "-mcmpccxadd -muintr" { target { ! ia32 } } } */ /* Test that the intrinsics in compile without optimization. All of them are defined as inline functions that reference the proper diff --git a/gcc/testsuite/gcc.target/i386/x86gprintrin-4.c b/gcc/testsuite/gcc.target/i386/x86gprintrin-4.c index d8a6126e5dc..7f76b870934 100644 --- a/gcc/testsuite/gcc.target/i386/x86gprintrin-4.c +++ b/gcc/testsuite/gcc.target/i386/x86gprintrin-4.c @@ -15,7 +15,7 @@ #ifndef DIFFERENT_PRAGMAS #ifdef __x86_64__ -#pragma GCC target ("adx,bmi,bmi2,fsgsbase,fxsr,hreset,lwp,lzcnt,popcnt,rdrnd,rdseed,tbm,rtm,serialize,tsxldtrk,uintr,xsaveopt") +#pragma GCC target ("adx,bmi,bmi2,cmpccxadd,fsgsbase,fxsr,hreset,lwp,lzcnt,popcnt,rdrnd,rdseed,tbm,rtm,serialize,tsxldtrk,uintr,xsaveopt") #else #pragma GCC target ("adx,bmi,bmi2,fsgsbase,fxsr,hreset,lwp,lzcnt,popcnt,rdrnd,rdseed,tbm,rtm,serialize,tsxldtrk,xsaveopt") #endif diff --git a/gcc/testsuite/gcc.target/i386/x86gprintrin-5.c b/gcc/testsuite/gcc.target/i386/x86gprintrin-5.c index 9ef66fdad54..54d826c4f46 100644 --- a/gcc/testsuite/gcc.target/i386/x86gprintrin-5.c +++ b/gcc/testsuite/gcc.target/i386/x86gprintrin-5.c @@ -27,8 +27,12 @@ /* rtmintrin.h */ #define __builtin_ia32_xabort(M) __builtin_ia32_xabort(1) +/* cmpccxadd.h */ +#define __builtin_ia32_cmpccxadd(A, B, C, D) __builtin_ia32_cmpccxadd(A, B, C, 1) +#define __builtin_ia32_cmpccxadd64(A, B, C, D) __builtin_ia32_cmpccxadd64(A, B, C, 1) + #ifdef __x86_64__ -#pragma GCC target ("adx,bmi,bmi2,clflushopt,clwb,clzero,enqcmd,fsgsbase,fxsr,hreset,lwp,lzcnt,mwaitx,pconfig,pku,popcnt,rdpid,rdrnd,rdseed,tbm,rtm,serialize,sgx,tsxldtrk,uintr,xsavec,xsaveopt,xsaves,wbnoinvd") +#pragma GCC target ("adx,bmi,bmi2,clflushopt,clwb,clzero,cmpccxadd,enqcmd,fsgsbase,fxsr,hreset,lwp,lzcnt,mwaitx,pconfig,pku,popcnt,rdpid,rdrnd,rdseed,tbm,rtm,serialize,sgx,tsxldtrk,uintr,xsavec,xsaveopt,xsaves,wbnoinvd") #else #pragma GCC target ("adx,bmi,bmi2,clflushopt,clwb,clzero,enqcmd,fsgsbase,fxsr,hreset,lwp,lzcnt,mwaitx,pconfig,pku,popcnt,rdpid,rdrnd,rdseed,tbm,rtm,serialize,sgx,tsxldtrk,xsavec,xsaveopt,xsaves,wbnoinvd") #endif diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp index 2d6054f191a..4ba4a106fea 100644 --- a/gcc/testsuite/lib/target-supports.exp +++ b/gcc/testsuite/lib/target-supports.exp @@ -9558,6 +9558,16 @@ proc check_effective_target_avxneconvert { } { } "-O0 -mavxneconvert" ] } +# Return 1 if cmpccxadd instructions can be compiled. +proc check_effective_target_cmpccxadd { } { + return [check_no_compiler_messages cmpccxadd object { + int _cmpccxadd_epi32 (int *__A, int __B, int __C, const int __D) + { + return (int)__builtin_ia32_cmpccxadd (__A, __B, __C, 1); + } + } "-mcmpccxadd" ] +} + # Return 1 if sse instructions can be compiled. proc check_effective_target_sse { } { return [check_no_compiler_messages sse object { -- 2.18.1